001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.fft;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 3D Discrete Fourier Transform (DFT) of complex and real, double
043 * precision data. The sizes of all three dimensions can be arbitrary numbers.
044 * This is a parallel implementation of split-radix and mixed-radix algorithms
045 * optimized for SMP systems. <br>
046 * <br>
047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura
048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
049 * 
050 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
051 * 
052 */
053public class DoubleFFT_3D {
054
055    private int slices;
056
057    private int rows;
058
059    private int columns;
060
061    private int sliceStride;
062
063    private int rowStride;
064
065    private double[] t;
066
067    private DoubleFFT_1D fftSlices, fftRows, fftColumns;
068
069    private int oldNthreads;
070
071    private int nt;
072
073    private boolean isPowerOfTwo = false;
074
075    private boolean useThreads = false;
076
077    /**
078     * Creates new instance of DoubleFFT_3D.
079     * 
080     * @param slices
081     *            number of slices
082     * @param rows
083     *            number of rows
084     * @param columns
085     *            number of columns
086     * 
087     */
088    public DoubleFFT_3D(int slices, int rows, int columns) {
089        if (slices <= 1 || rows <= 1 || columns <= 1) {
090            throw new IllegalArgumentException("slices, rows and columns must be greater than 1");
091        }
092        this.slices = slices;
093        this.rows = rows;
094        this.columns = columns;
095        this.sliceStride = rows * columns;
096        this.rowStride = columns;
097        if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) {
098            this.useThreads = true;
099        }
100        if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
101            isPowerOfTwo = true;
102            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
103            nt = slices;
104            if (nt < rows) {
105                nt = rows;
106            }
107            nt *= 8;
108            if (oldNthreads > 1) {
109                nt *= oldNthreads;
110            }
111            if (2 * columns == 4) {
112                nt >>= 1;
113            } else if (2 * columns < 4) {
114                nt >>= 2;
115            }
116            t = new double[nt];
117        }
118        fftSlices = new DoubleFFT_1D(slices);
119        if (slices == rows) {
120            fftRows = fftSlices;
121        } else {
122            fftRows = new DoubleFFT_1D(rows);
123        }
124        if (slices == columns) {
125            fftColumns = fftSlices;
126        } else if (rows == columns) {
127            fftColumns = fftRows;
128        } else {
129            fftColumns = new DoubleFFT_1D(columns);
130        }
131
132    }
133
134    /**
135     * Computes 3D forward DFT of complex data leaving the result in
136     * <code>a</code>. The data is stored in 1D array addressed in slice-major,
137     * then row-major, then column-major, in order of significance, i.e. element
138     * (i,j,k) of 3D array x[slices][rows][2*columns] is stored in a[i*sliceStride +
139     * j*rowStride + k], where sliceStride = rows * 2 * columns and rowStride = 2 * columns.
140     * Complex number is stored as two double values in sequence: the real and
141     * imaginary part, i.e. the input array must be of size slices*rows*2*columns. The
142     * physical layout of the input data is as follows:
143     * 
144     * <pre>
145     * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3], 
146     * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3], 0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;=k3&lt;columns,
147     * </pre>
148     * 
149     * @param a
150     *            data to transform
151     */
152    public void complexForward(final double[] a) {
153        int nthreads = ConcurrencyUtils.getNumberOfThreads();
154        if (isPowerOfTwo) {
155            int oldn3 = columns;
156            columns = 2 * columns;
157
158            sliceStride = rows * columns;
159            rowStride = columns;
160
161            if (nthreads != oldNthreads) {
162                nt = slices;
163                if (nt < rows) {
164                    nt = rows;
165                }
166                nt *= 8;
167                if (nthreads > 1) {
168                    nt *= nthreads;
169                }
170                if (columns == 4) {
171                    nt >>= 1;
172                } else if (columns < 4) {
173                    nt >>= 2;
174                }
175                t = new double[nt];
176                oldNthreads = nthreads;
177            }
178            if ((nthreads > 1) && useThreads) {
179                xdft3da_subth2(0, -1, a, true);
180                cdft3db_subth(-1, a, true);
181            } else {
182                xdft3da_sub2(0, -1, a, true);
183                cdft3db_sub(-1, a, true);
184            }
185            columns = oldn3;
186            sliceStride = rows * columns;
187            rowStride = columns;
188        } else {
189            sliceStride = 2 * rows * columns;
190            rowStride = 2 * columns;
191            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
192                Future<?>[] futures = new Future[nthreads];
193                int p = slices / nthreads;
194                for (int l = 0; l < nthreads; l++) {
195                    final int firstSlice = l * p;
196                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
197                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
198                        public void run() {
199                            for (int s = firstSlice; s < lastSlice; s++) {
200                                int idx1 = s * sliceStride;
201                                for (int r = 0; r < rows; r++) {
202                                    fftColumns.complexForward(a, idx1 + r * rowStride);
203                                }
204                            }
205                        }
206                    });
207                }
208                ConcurrencyUtils.waitForCompletion(futures);
209
210                for (int l = 0; l < nthreads; l++) {
211                    final int firstSlice = l * p;
212                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
213
214                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
215                        public void run() {
216                            double[] temp = new double[2 * rows];
217                            for (int s = firstSlice; s < lastSlice; s++) {
218                                int idx1 = s * sliceStride;
219                                for (int c = 0; c < columns; c++) {
220                                    int idx2 = 2 * c;
221                                    for (int r = 0; r < rows; r++) {
222                                        int idx3 = idx1 + idx2 + r * rowStride;
223                                        int idx4 = 2 * r;
224                                        temp[idx4] = a[idx3];
225                                        temp[idx4 + 1] = a[idx3 + 1];
226                                    }
227                                    fftRows.complexForward(temp);
228                                    for (int r = 0; r < rows; r++) {
229                                        int idx3 = idx1 + idx2 + r * rowStride;
230                                        int idx4 = 2 * r;
231                                        a[idx3] = temp[idx4];
232                                        a[idx3 + 1] = temp[idx4 + 1];
233                                    }
234                                }
235                            }
236                        }
237                    });
238                }
239                ConcurrencyUtils.waitForCompletion(futures);
240
241                p = rows / nthreads;
242                for (int l = 0; l < nthreads; l++) {
243                    final int firstRow = l * p;
244                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
245
246                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
247                        public void run() {
248                            double[] temp = new double[2 * slices];
249                            for (int r = firstRow; r < lastRow; r++) {
250                                int idx1 = r * rowStride;
251                                for (int c = 0; c < columns; c++) {
252                                    int idx2 = 2 * c;
253                                    for (int s = 0; s < slices; s++) {
254                                        int idx3 = s * sliceStride + idx1 + idx2;
255                                        int idx4 = 2 * s;
256                                        temp[idx4] = a[idx3];
257                                        temp[idx4 + 1] = a[idx3 + 1];
258                                    }
259                                    fftSlices.complexForward(temp);
260                                    for (int s = 0; s < slices; s++) {
261                                        int idx3 = s * sliceStride + idx1 + idx2;
262                                        int idx4 = 2 * s;
263                                        a[idx3] = temp[idx4];
264                                        a[idx3 + 1] = temp[idx4 + 1];
265                                    }
266                                }
267                            }
268                        }
269                    });
270                }
271                ConcurrencyUtils.waitForCompletion(futures);
272
273            } else {
274                for (int s = 0; s < slices; s++) {
275                    int idx1 = s * sliceStride;
276                    for (int r = 0; r < rows; r++) {
277                        fftColumns.complexForward(a, idx1 + r * rowStride);
278                    }
279                }
280
281                double[] temp = new double[2 * rows];
282                for (int s = 0; s < slices; s++) {
283                    int idx1 = s * sliceStride;
284                    for (int c = 0; c < columns; c++) {
285                        int idx2 = 2 * c;
286                        for (int r = 0; r < rows; r++) {
287                            int idx3 = idx1 + idx2 + r * rowStride;
288                            int idx4 = 2 * r;
289                            temp[idx4] = a[idx3];
290                            temp[idx4 + 1] = a[idx3 + 1];
291                        }
292                        fftRows.complexForward(temp);
293                        for (int r = 0; r < rows; r++) {
294                            int idx3 = idx1 + idx2 + r * rowStride;
295                            int idx4 = 2 * r;
296                            a[idx3] = temp[idx4];
297                            a[idx3 + 1] = temp[idx4 + 1];
298                        }
299                    }
300                }
301
302                temp = new double[2 * slices];
303                for (int r = 0; r < rows; r++) {
304                    int idx1 = r * rowStride;
305                    for (int c = 0; c < columns; c++) {
306                        int idx2 = 2 * c;
307                        for (int s = 0; s < slices; s++) {
308                            int idx3 = s * sliceStride + idx1 + idx2;
309                            int idx4 = 2 * s;
310                            temp[idx4] = a[idx3];
311                            temp[idx4 + 1] = a[idx3 + 1];
312                        }
313                        fftSlices.complexForward(temp);
314                        for (int s = 0; s < slices; s++) {
315                            int idx3 = s * sliceStride + idx1 + idx2;
316                            int idx4 = 2 * s;
317                            a[idx3] = temp[idx4];
318                            a[idx3 + 1] = temp[idx4 + 1];
319                        }
320                    }
321                }
322            }
323            sliceStride = rows * columns;
324            rowStride = columns;
325        }
326    }
327
328    /**
329     * Computes 3D forward DFT of complex data leaving the result in
330     * <code>a</code>. The data is stored in 3D array. Complex data is
331     * represented by 2 double values in sequence: the real and imaginary part,
332     * i.e. the input array must be of size slices by rows by 2*columns. The physical
333     * layout of the input data is as follows:
334     * 
335     * <pre>
336     * a[k1][k2][2*k3] = Re[k1][k2][k3], 
337     * a[k1][k2][2*k3+1] = Im[k1][k2][k3], 0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;=k3&lt;columns,
338     * </pre>
339     * 
340     * @param a
341     *            data to transform
342     */
343    public void complexForward(final double[][][] a) {
344        int nthreads = ConcurrencyUtils.getNumberOfThreads();
345        if (isPowerOfTwo) {
346            int oldn3 = columns;
347            columns = 2 * columns;
348
349            sliceStride = rows * columns;
350            rowStride = columns;
351
352            if (nthreads != oldNthreads) {
353                nt = slices;
354                if (nt < rows) {
355                    nt = rows;
356                }
357                nt *= 8;
358                if (nthreads > 1) {
359                    nt *= nthreads;
360                }
361                if (columns == 4) {
362                    nt >>= 1;
363                } else if (columns < 4) {
364                    nt >>= 2;
365                }
366                t = new double[nt];
367                oldNthreads = nthreads;
368            }
369            if ((nthreads > 1) && useThreads) {
370                xdft3da_subth2(0, -1, a, true);
371                cdft3db_subth(-1, a, true);
372            } else {
373                xdft3da_sub2(0, -1, a, true);
374                cdft3db_sub(-1, a, true);
375            }
376            columns = oldn3;
377            sliceStride = rows * columns;
378            rowStride = columns;
379        } else {
380            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
381                Future<?>[] futures = new Future[nthreads];
382                int p = slices / nthreads;
383                for (int l = 0; l < nthreads; l++) {
384                    final int firstSlice = l * p;
385                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
386                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
387                        public void run() {
388                            for (int s = firstSlice; s < lastSlice; s++) {
389                                for (int r = 0; r < rows; r++) {
390                                    fftColumns.complexForward(a[s][r]);
391                                }
392                            }
393                        }
394                    });
395                }
396                ConcurrencyUtils.waitForCompletion(futures);
397
398                for (int l = 0; l < nthreads; l++) {
399                    final int firstSlice = l * p;
400                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
401
402                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
403                        public void run() {
404                            double[] temp = new double[2 * rows];
405                            for (int s = firstSlice; s < lastSlice; s++) {
406                                for (int c = 0; c < columns; c++) {
407                                    int idx2 = 2 * c;
408                                    for (int r = 0; r < rows; r++) {
409                                        int idx4 = 2 * r;
410                                        temp[idx4] = a[s][r][idx2];
411                                        temp[idx4 + 1] = a[s][r][idx2 + 1];
412                                    }
413                                    fftRows.complexForward(temp);
414                                    for (int r = 0; r < rows; r++) {
415                                        int idx4 = 2 * r;
416                                        a[s][r][idx2] = temp[idx4];
417                                        a[s][r][idx2 + 1] = temp[idx4 + 1];
418                                    }
419                                }
420                            }
421                        }
422                    });
423                }
424                ConcurrencyUtils.waitForCompletion(futures);
425
426                p = rows / nthreads;
427                for (int l = 0; l < nthreads; l++) {
428                    final int firstRow = l * p;
429                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
430
431                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
432                        public void run() {
433                            double[] temp = new double[2 * slices];
434                            for (int r = firstRow; r < lastRow; r++) {
435                                for (int c = 0; c < columns; c++) {
436                                    int idx2 = 2 * c;
437                                    for (int s = 0; s < slices; s++) {
438                                        int idx4 = 2 * s;
439                                        temp[idx4] = a[s][r][idx2];
440                                        temp[idx4 + 1] = a[s][r][idx2 + 1];
441                                    }
442                                    fftSlices.complexForward(temp);
443                                    for (int s = 0; s < slices; s++) {
444                                        int idx4 = 2 * s;
445                                        a[s][r][idx2] = temp[idx4];
446                                        a[s][r][idx2 + 1] = temp[idx4 + 1];
447                                    }
448                                }
449                            }
450                        }
451                    });
452                }
453                ConcurrencyUtils.waitForCompletion(futures);
454
455            } else {
456                for (int s = 0; s < slices; s++) {
457                    for (int r = 0; r < rows; r++) {
458                        fftColumns.complexForward(a[s][r]);
459                    }
460                }
461
462                double[] temp = new double[2 * rows];
463                for (int s = 0; s < slices; s++) {
464                    for (int c = 0; c < columns; c++) {
465                        int idx2 = 2 * c;
466                        for (int r = 0; r < rows; r++) {
467                            int idx4 = 2 * r;
468                            temp[idx4] = a[s][r][idx2];
469                            temp[idx4 + 1] = a[s][r][idx2 + 1];
470                        }
471                        fftRows.complexForward(temp);
472                        for (int r = 0; r < rows; r++) {
473                            int idx4 = 2 * r;
474                            a[s][r][idx2] = temp[idx4];
475                            a[s][r][idx2 + 1] = temp[idx4 + 1];
476                        }
477                    }
478                }
479
480                temp = new double[2 * slices];
481                for (int r = 0; r < rows; r++) {
482                    for (int c = 0; c < columns; c++) {
483                        int idx2 = 2 * c;
484                        for (int s = 0; s < slices; s++) {
485                            int idx4 = 2 * s;
486                            temp[idx4] = a[s][r][idx2];
487                            temp[idx4 + 1] = a[s][r][idx2 + 1];
488                        }
489                        fftSlices.complexForward(temp);
490                        for (int s = 0; s < slices; s++) {
491                            int idx4 = 2 * s;
492                            a[s][r][idx2] = temp[idx4];
493                            a[s][r][idx2 + 1] = temp[idx4 + 1];
494                        }
495                    }
496                }
497            }
498        }
499    }
500
501    /**
502     * Computes 3D inverse DFT of complex data leaving the result in
503     * <code>a</code>. The data is stored in a 1D array addressed in
504     * slice-major, then row-major, then column-major, in order of significance,
505     * i.e. element (i,j,k) of 3-d array x[slices][rows][2*columns] is stored in
506     * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * 2 * columns and
507     * rowStride = 2 * columns. Complex number is stored as two double values in
508     * sequence: the real and imaginary part, i.e. the input array must be of
509     * size slices*rows*2*columns. The physical layout of the input data is as follows:
510     * 
511     * <pre>
512     * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3], 
513     * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3], 0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;=k3&lt;columns,
514     * </pre>
515     * 
516     * @param a
517     *            data to transform
518     * @param scale
519     *            if true then scaling is performed
520     */
521    public void complexInverse(final double[] a, final boolean scale) {
522
523        int nthreads = ConcurrencyUtils.getNumberOfThreads();
524
525        if (isPowerOfTwo) {
526            int oldn3 = columns;
527            columns = 2 * columns;
528            sliceStride = rows * columns;
529            rowStride = columns;
530            if (nthreads != oldNthreads) {
531                nt = slices;
532                if (nt < rows) {
533                    nt = rows;
534                }
535                nt *= 8;
536                if (nthreads > 1) {
537                    nt *= nthreads;
538                }
539                if (columns == 4) {
540                    nt >>= 1;
541                } else if (columns < 4) {
542                    nt >>= 2;
543                }
544                t = new double[nt];
545                oldNthreads = nthreads;
546            }
547            if ((nthreads > 1) && useThreads) {
548                xdft3da_subth2(0, 1, a, scale);
549                cdft3db_subth(1, a, scale);
550            } else {
551                xdft3da_sub2(0, 1, a, scale);
552                cdft3db_sub(1, a, scale);
553            }
554            columns = oldn3;
555            sliceStride = rows * columns;
556            rowStride = columns;
557        } else {
558            sliceStride = 2 * rows * columns;
559            rowStride = 2 * columns;
560            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
561                Future<?>[] futures = new Future[nthreads];
562                int p = slices / nthreads;
563                for (int l = 0; l < nthreads; l++) {
564                    final int firstSlice = l * p;
565                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
566
567                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
568                        public void run() {
569                            for (int s = firstSlice; s < lastSlice; s++) {
570                                int idx1 = s * sliceStride;
571                                for (int r = 0; r < rows; r++) {
572                                    fftColumns.complexInverse(a, idx1 + r * rowStride, scale);
573                                }
574                            }
575                        }
576                    });
577                }
578                ConcurrencyUtils.waitForCompletion(futures);
579
580                for (int l = 0; l < nthreads; l++) {
581                    final int firstSlice = l * p;
582                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
583
584                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
585                        public void run() {
586                            double[] temp = new double[2 * rows];
587                            for (int s = firstSlice; s < lastSlice; s++) {
588                                int idx1 = s * sliceStride;
589                                for (int c = 0; c < columns; c++) {
590                                    int idx2 = 2 * c;
591                                    for (int r = 0; r < rows; r++) {
592                                        int idx3 = idx1 + idx2 + r * rowStride;
593                                        int idx4 = 2 * r;
594                                        temp[idx4] = a[idx3];
595                                        temp[idx4 + 1] = a[idx3 + 1];
596                                    }
597                                    fftRows.complexInverse(temp, scale);
598                                    for (int r = 0; r < rows; r++) {
599                                        int idx3 = idx1 + idx2 + r * rowStride;
600                                        int idx4 = 2 * r;
601                                        a[idx3] = temp[idx4];
602                                        a[idx3 + 1] = temp[idx4 + 1];
603                                    }
604                                }
605                            }
606                        }
607                    });
608                }
609                ConcurrencyUtils.waitForCompletion(futures);
610
611                p = rows / nthreads;
612                for (int l = 0; l < nthreads; l++) {
613                    final int firstRow = l * p;
614                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
615
616                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
617                        public void run() {
618                            double[] temp = new double[2 * slices];
619                            for (int r = firstRow; r < lastRow; r++) {
620                                int idx1 = r * rowStride;
621                                for (int c = 0; c < columns; c++) {
622                                    int idx2 = 2 * c;
623                                    for (int s = 0; s < slices; s++) {
624                                        int idx3 = s * sliceStride + idx1 + idx2;
625                                        int idx4 = 2 * s;
626                                        temp[idx4] = a[idx3];
627                                        temp[idx4 + 1] = a[idx3 + 1];
628                                    }
629                                    fftSlices.complexInverse(temp, scale);
630                                    for (int s = 0; s < slices; s++) {
631                                        int idx3 = s * sliceStride + idx1 + idx2;
632                                        int idx4 = 2 * s;
633                                        a[idx3] = temp[idx4];
634                                        a[idx3 + 1] = temp[idx4 + 1];
635                                    }
636                                }
637                            }
638                        }
639                    });
640                }
641                ConcurrencyUtils.waitForCompletion(futures);
642
643            } else {
644                for (int s = 0; s < slices; s++) {
645                    int idx1 = s * sliceStride;
646                    for (int r = 0; r < rows; r++) {
647                        fftColumns.complexInverse(a, idx1 + r * rowStride, scale);
648                    }
649                }
650                double[] temp = new double[2 * rows];
651                for (int s = 0; s < slices; s++) {
652                    int idx1 = s * sliceStride;
653                    for (int c = 0; c < columns; c++) {
654                        int idx2 = 2 * c;
655                        for (int r = 0; r < rows; r++) {
656                            int idx3 = idx1 + idx2 + r * rowStride;
657                            int idx4 = 2 * r;
658                            temp[idx4] = a[idx3];
659                            temp[idx4 + 1] = a[idx3 + 1];
660                        }
661                        fftRows.complexInverse(temp, scale);
662                        for (int r = 0; r < rows; r++) {
663                            int idx3 = idx1 + idx2 + r * rowStride;
664                            int idx4 = 2 * r;
665                            a[idx3] = temp[idx4];
666                            a[idx3 + 1] = temp[idx4 + 1];
667                        }
668                    }
669                }
670                temp = new double[2 * slices];
671                for (int r = 0; r < rows; r++) {
672                    int idx1 = r * rowStride;
673                    for (int c = 0; c < columns; c++) {
674                        int idx2 = 2 * c;
675                        for (int s = 0; s < slices; s++) {
676                            int idx3 = s * sliceStride + idx1 + idx2;
677                            int idx4 = 2 * s;
678                            temp[idx4] = a[idx3];
679                            temp[idx4 + 1] = a[idx3 + 1];
680                        }
681                        fftSlices.complexInverse(temp, scale);
682                        for (int s = 0; s < slices; s++) {
683                            int idx3 = s * sliceStride + idx1 + idx2;
684                            int idx4 = 2 * s;
685                            a[idx3] = temp[idx4];
686                            a[idx3 + 1] = temp[idx4 + 1];
687                        }
688                    }
689                }
690            }
691            sliceStride = rows * columns;
692            rowStride = columns;
693        }
694    }
695
696    /**
697     * Computes 3D inverse DFT of complex data leaving the result in
698     * <code>a</code>. The data is stored in a 3D array. Complex data is
699     * represented by 2 double values in sequence: the real and imaginary part,
700     * i.e. the input array must be of size slices by rows by 2*columns. The physical
701     * layout of the input data is as follows:
702     * 
703     * <pre>
704     * a[k1][k2][2*k3] = Re[k1][k2][k3], 
705     * a[k1][k2][2*k3+1] = Im[k1][k2][k3], 0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;=k3&lt;columns,
706     * </pre>
707     * 
708     * @param a
709     *            data to transform
710     * @param scale
711     *            if true then scaling is performed
712     */
713    public void complexInverse(final double[][][] a, final boolean scale) {
714        int nthreads = ConcurrencyUtils.getNumberOfThreads();
715        if (isPowerOfTwo) {
716            int oldn3 = columns;
717            columns = 2 * columns;
718            sliceStride = rows * columns;
719            rowStride = columns;
720            if (nthreads != oldNthreads) {
721                nt = slices;
722                if (nt < rows) {
723                    nt = rows;
724                }
725                nt *= 8;
726                if (nthreads > 1) {
727                    nt *= nthreads;
728                }
729                if (columns == 4) {
730                    nt >>= 1;
731                } else if (columns < 4) {
732                    nt >>= 2;
733                }
734                t = new double[nt];
735                oldNthreads = nthreads;
736            }
737            if ((nthreads > 1) && useThreads) {
738                xdft3da_subth2(0, 1, a, scale);
739                cdft3db_subth(1, a, scale);
740            } else {
741                xdft3da_sub2(0, 1, a, scale);
742                cdft3db_sub(1, a, scale);
743            }
744            columns = oldn3;
745            sliceStride = rows * columns;
746            rowStride = columns;
747        } else {
748            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
749                Future<?>[] futures = new Future[nthreads];
750                int p = slices / nthreads;
751                for (int l = 0; l < nthreads; l++) {
752                    final int firstSlice = l * p;
753                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
754
755                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
756                        public void run() {
757                            for (int s = firstSlice; s < lastSlice; s++) {
758                                for (int r = 0; r < rows; r++) {
759                                    fftColumns.complexInverse(a[s][r], scale);
760                                }
761                            }
762                        }
763                    });
764                }
765                ConcurrencyUtils.waitForCompletion(futures);
766
767                for (int l = 0; l < nthreads; l++) {
768                    final int firstSlice = l * p;
769                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
770
771                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
772                        public void run() {
773                            double[] temp = new double[2 * rows];
774                            for (int s = firstSlice; s < lastSlice; s++) {
775                                for (int c = 0; c < columns; c++) {
776                                    int idx2 = 2 * c;
777                                    for (int r = 0; r < rows; r++) {
778                                        int idx4 = 2 * r;
779                                        temp[idx4] = a[s][r][idx2];
780                                        temp[idx4 + 1] = a[s][r][idx2 + 1];
781                                    }
782                                    fftRows.complexInverse(temp, scale);
783                                    for (int r = 0; r < rows; r++) {
784                                        int idx4 = 2 * r;
785                                        a[s][r][idx2] = temp[idx4];
786                                        a[s][r][idx2 + 1] = temp[idx4 + 1];
787                                    }
788                                }
789                            }
790                        }
791                    });
792                }
793                ConcurrencyUtils.waitForCompletion(futures);
794
795                p = rows / nthreads;
796                for (int l = 0; l < nthreads; l++) {
797                    final int firstRow = l * p;
798                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
799
800                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
801                        public void run() {
802                            double[] temp = new double[2 * slices];
803                            for (int r = firstRow; r < lastRow; r++) {
804                                for (int c = 0; c < columns; c++) {
805                                    int idx2 = 2 * c;
806                                    for (int s = 0; s < slices; s++) {
807                                        int idx4 = 2 * s;
808                                        temp[idx4] = a[s][r][idx2];
809                                        temp[idx4 + 1] = a[s][r][idx2 + 1];
810                                    }
811                                    fftSlices.complexInverse(temp, scale);
812                                    for (int s = 0; s < slices; s++) {
813                                        int idx4 = 2 * s;
814                                        a[s][r][idx2] = temp[idx4];
815                                        a[s][r][idx2 + 1] = temp[idx4 + 1];
816                                    }
817                                }
818                            }
819                        }
820                    });
821                }
822                ConcurrencyUtils.waitForCompletion(futures);
823
824            } else {
825                for (int s = 0; s < slices; s++) {
826                    for (int r = 0; r < rows; r++) {
827                        fftColumns.complexInverse(a[s][r], scale);
828                    }
829                }
830                double[] temp = new double[2 * rows];
831                for (int s = 0; s < slices; s++) {
832                    for (int c = 0; c < columns; c++) {
833                        int idx2 = 2 * c;
834                        for (int r = 0; r < rows; r++) {
835                            int idx4 = 2 * r;
836                            temp[idx4] = a[s][r][idx2];
837                            temp[idx4 + 1] = a[s][r][idx2 + 1];
838                        }
839                        fftRows.complexInverse(temp, scale);
840                        for (int r = 0; r < rows; r++) {
841                            int idx4 = 2 * r;
842                            a[s][r][idx2] = temp[idx4];
843                            a[s][r][idx2 + 1] = temp[idx4 + 1];
844                        }
845                    }
846                }
847                temp = new double[2 * slices];
848                for (int r = 0; r < rows; r++) {
849                    for (int c = 0; c < columns; c++) {
850                        int idx2 = 2 * c;
851                        for (int s = 0; s < slices; s++) {
852                            int idx4 = 2 * s;
853                            temp[idx4] = a[s][r][idx2];
854                            temp[idx4 + 1] = a[s][r][idx2 + 1];
855                        }
856                        fftSlices.complexInverse(temp, scale);
857                        for (int s = 0; s < slices; s++) {
858                            int idx4 = 2 * s;
859                            a[s][r][idx2] = temp[idx4];
860                            a[s][r][idx2 + 1] = temp[idx4 + 1];
861                        }
862                    }
863                }
864            }
865        }
866    }
867
868    /**
869     * Computes 3D forward DFT of real data leaving the result in <code>a</code>
870     * . This method only works when the sizes of all three dimensions are
871     * power-of-two numbers. The data is stored in a 1D array addressed in
872     * slice-major, then row-major, then column-major, in order of significance,
873     * i.e. element (i,j,k) of 3-d array x[slices][rows][2*columns] is stored in
874     * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * 2 * columns and
875     * rowStride = 2 * columns. The physical layout of the output data is as follows:
876     * 
877     * <pre>
878     * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3]
879     *                 = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
880     * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3]
881     *                   = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
882     *     0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;k3&lt;columns/2, 
883     * a[k1*sliceStride + k2*rowStride] = Re[k1][k2][0]
884     *              = Re[(slices-k1)%slices][rows-k2][0], 
885     * a[k1*sliceStride + k2*rowStride + 1] = Im[k1][k2][0]
886     *              = -Im[(slices-k1)%slices][rows-k2][0], 
887     * a[k1*sliceStride + (rows-k2)*rowStride + 1] = Re[(slices-k1)%slices][k2][columns/2]
888     *                 = Re[k1][rows-k2][columns/2], 
889     * a[k1*sliceStride + (rows-k2)*rowStride] = -Im[(slices-k1)%slices][k2][columns/2]
890     *                 = Im[k1][rows-k2][columns/2], 
891     *     0&lt;=k1&lt;slices, 0&lt;k2&lt;rows/2, 
892     * a[k1*sliceStride] = Re[k1][0][0]
893     *             = Re[slices-k1][0][0], 
894     * a[k1*sliceStride + 1] = Im[k1][0][0]
895     *             = -Im[slices-k1][0][0], 
896     * a[k1*sliceStride + (rows/2)*rowStride] = Re[k1][rows/2][0]
897     *                = Re[slices-k1][rows/2][0], 
898     * a[k1*sliceStride + (rows/2)*rowStride + 1] = Im[k1][rows/2][0]
899     *                = -Im[slices-k1][rows/2][0], 
900     * a[(slices-k1)*sliceStride + 1] = Re[k1][0][columns/2]
901     *                = Re[slices-k1][0][columns/2], 
902     * a[(slices-k1)*sliceStride] = -Im[k1][0][columns/2]
903     *                = Im[slices-k1][0][columns/2], 
904     * a[(slices-k1)*sliceStride + (rows/2)*rowStride + 1] = Re[k1][rows/2][columns/2]
905     *                   = Re[slices-k1][rows/2][columns/2], 
906     * a[(slices-k1)*sliceStride + (rows/2) * rowStride] = -Im[k1][rows/2][columns/2]
907     *                   = Im[slices-k1][rows/2][columns/2], 
908     *     0&lt;k1&lt;slices/2, 
909     * a[0] = Re[0][0][0], 
910     * a[1] = Re[0][0][columns/2], 
911     * a[(rows/2)*rowStride] = Re[0][rows/2][0], 
912     * a[(rows/2)*rowStride + 1] = Re[0][rows/2][columns/2], 
913     * a[(slices/2)*sliceStride] = Re[slices/2][0][0], 
914     * a[(slices/2)*sliceStride + 1] = Re[slices/2][0][columns/2], 
915     * a[(slices/2)*sliceStride + (rows/2)*rowStride] = Re[slices/2][rows/2][0], 
916     * a[(slices/2)*sliceStride + (rows/2)*rowStride + 1] = Re[slices/2][rows/2][columns/2]
917     * </pre>
918     * 
919     * 
920     * This method computes only half of the elements of the real transform. The
921     * other half satisfies the symmetry condition. If you want the full real
922     * forward transform, use <code>realForwardFull</code>. To get back the
923     * original data, use <code>realInverse</code> on the output of this method.
924     * 
925     * @param a
926     *            data to transform
927     */
928    public void realForward(double[] a) {
929        if (isPowerOfTwo == false) {
930            throw new IllegalArgumentException("slices, rows and columns must be power of two numbers");
931        } else {
932            int nthreads = ConcurrencyUtils.getNumberOfThreads();
933            if (nthreads != oldNthreads) {
934                nt = slices;
935                if (nt < rows) {
936                    nt = rows;
937                }
938                nt *= 8;
939                if (nthreads > 1) {
940                    nt *= nthreads;
941                }
942                if (columns == 4) {
943                    nt >>= 1;
944                } else if (columns < 4) {
945                    nt >>= 2;
946                }
947                t = new double[nt];
948                oldNthreads = nthreads;
949            }
950            if ((nthreads > 1) && useThreads) {
951                xdft3da_subth1(1, -1, a, true);
952                cdft3db_subth(-1, a, true);
953                rdft3d_sub(1, a);
954            } else {
955                xdft3da_sub1(1, -1, a, true);
956                cdft3db_sub(-1, a, true);
957                rdft3d_sub(1, a);
958            }
959        }
960    }
961
962    /**
963     * Computes 3D forward DFT of real data leaving the result in <code>a</code>
964     * . This method only works when the sizes of all three dimensions are
965     * power-of-two numbers. The data is stored in a 3D array. The physical
966     * layout of the output data is as follows:
967     * 
968     * <pre>
969     * a[k1][k2][2*k3] = Re[k1][k2][k3]
970     *                 = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
971     * a[k1][k2][2*k3+1] = Im[k1][k2][k3]
972     *                   = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
973     *     0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;k3&lt;columns/2, 
974     * a[k1][k2][0] = Re[k1][k2][0]
975     *              = Re[(slices-k1)%slices][rows-k2][0], 
976     * a[k1][k2][1] = Im[k1][k2][0]
977     *              = -Im[(slices-k1)%slices][rows-k2][0], 
978     * a[k1][rows-k2][1] = Re[(slices-k1)%slices][k2][columns/2]
979     *                 = Re[k1][rows-k2][columns/2], 
980     * a[k1][rows-k2][0] = -Im[(slices-k1)%slices][k2][columns/2]
981     *                 = Im[k1][rows-k2][columns/2], 
982     *     0&lt;=k1&lt;slices, 0&lt;k2&lt;rows/2, 
983     * a[k1][0][0] = Re[k1][0][0]
984     *             = Re[slices-k1][0][0], 
985     * a[k1][0][1] = Im[k1][0][0]
986     *             = -Im[slices-k1][0][0], 
987     * a[k1][rows/2][0] = Re[k1][rows/2][0]
988     *                = Re[slices-k1][rows/2][0], 
989     * a[k1][rows/2][1] = Im[k1][rows/2][0]
990     *                = -Im[slices-k1][rows/2][0], 
991     * a[slices-k1][0][1] = Re[k1][0][columns/2]
992     *                = Re[slices-k1][0][columns/2], 
993     * a[slices-k1][0][0] = -Im[k1][0][columns/2]
994     *                = Im[slices-k1][0][columns/2], 
995     * a[slices-k1][rows/2][1] = Re[k1][rows/2][columns/2]
996     *                   = Re[slices-k1][rows/2][columns/2], 
997     * a[slices-k1][rows/2][0] = -Im[k1][rows/2][columns/2]
998     *                   = Im[slices-k1][rows/2][columns/2], 
999     *     0&lt;k1&lt;slices/2, 
1000     * a[0][0][0] = Re[0][0][0], 
1001     * a[0][0][1] = Re[0][0][columns/2], 
1002     * a[0][rows/2][0] = Re[0][rows/2][0], 
1003     * a[0][rows/2][1] = Re[0][rows/2][columns/2], 
1004     * a[slices/2][0][0] = Re[slices/2][0][0], 
1005     * a[slices/2][0][1] = Re[slices/2][0][columns/2], 
1006     * a[slices/2][rows/2][0] = Re[slices/2][rows/2][0], 
1007     * a[slices/2][rows/2][1] = Re[slices/2][rows/2][columns/2]
1008     * </pre>
1009     * 
1010     * 
1011     * This method computes only half of the elements of the real transform. The
1012     * other half satisfies the symmetry condition. If you want the full real
1013     * forward transform, use <code>realForwardFull</code>. To get back the
1014     * original data, use <code>realInverse</code> on the output of this method.
1015     * 
1016     * @param a
1017     *            data to transform
1018     */
1019    public void realForward(double[][][] a) {
1020        if (isPowerOfTwo == false) {
1021            throw new IllegalArgumentException("slices, rows and columns must be power of two numbers");
1022        } else {
1023            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1024            if (nthreads != oldNthreads) {
1025                nt = slices;
1026                if (nt < rows) {
1027                    nt = rows;
1028                }
1029                nt *= 8;
1030                if (nthreads > 1) {
1031                    nt *= nthreads;
1032                }
1033                if (columns == 4) {
1034                    nt >>= 1;
1035                } else if (columns < 4) {
1036                    nt >>= 2;
1037                }
1038                t = new double[nt];
1039                oldNthreads = nthreads;
1040            }
1041            if ((nthreads > 1) && useThreads) {
1042                xdft3da_subth1(1, -1, a, true);
1043                cdft3db_subth(-1, a, true);
1044                rdft3d_sub(1, a);
1045            } else {
1046                xdft3da_sub1(1, -1, a, true);
1047                cdft3db_sub(-1, a, true);
1048                rdft3d_sub(1, a);
1049            }
1050        }
1051    }
1052
1053    /**
1054     * Computes 3D forward DFT of real data leaving the result in <code>a</code>
1055     * . This method computes full real forward transform, i.e. you will get the
1056     * same result as from <code>complexForward</code> called with all imaginary
1057     * part equal 0. Because the result is stored in <code>a</code>, the input
1058     * array must be of size slices*rows*2*columns, with only the first slices*rows*columns elements
1059     * filled with real data. To get back the original data, use
1060     * <code>complexInverse</code> on the output of this method.
1061     * 
1062     * @param a
1063     *            data to transform
1064     */
1065    public void realForwardFull(double[] a) {
1066        if (isPowerOfTwo) {
1067            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1068            if (nthreads != oldNthreads) {
1069                nt = slices;
1070                if (nt < rows) {
1071                    nt = rows;
1072                }
1073                nt *= 8;
1074                if (nthreads > 1) {
1075                    nt *= nthreads;
1076                }
1077                if (columns == 4) {
1078                    nt >>= 1;
1079                } else if (columns < 4) {
1080                    nt >>= 2;
1081                }
1082                t = new double[nt];
1083                oldNthreads = nthreads;
1084            }
1085            if ((nthreads > 1) && useThreads) {
1086                xdft3da_subth2(1, -1, a, true);
1087                cdft3db_subth(-1, a, true);
1088                rdft3d_sub(1, a);
1089            } else {
1090                xdft3da_sub2(1, -1, a, true);
1091                cdft3db_sub(-1, a, true);
1092                rdft3d_sub(1, a);
1093            }
1094            fillSymmetric(a);
1095        } else {
1096            mixedRadixRealForwardFull(a);
1097        }
1098    }
1099
1100    /**
1101     * Computes 3D forward DFT of real data leaving the result in <code>a</code>
1102     * . This method computes full real forward transform, i.e. you will get the
1103     * same result as from <code>complexForward</code> called with all imaginary
1104     * part equal 0. Because the result is stored in <code>a</code>, the input
1105     * array must be of size slices by rows by 2*columns, with only the first slices by rows by
1106     * columns elements filled with real data. To get back the original data, use
1107     * <code>complexInverse</code> on the output of this method.
1108     * 
1109     * @param a
1110     *            data to transform
1111     */
1112    public void realForwardFull(double[][][] a) {
1113        if (isPowerOfTwo) {
1114            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1115            if (nthreads != oldNthreads) {
1116                nt = slices;
1117                if (nt < rows) {
1118                    nt = rows;
1119                }
1120                nt *= 8;
1121                if (nthreads > 1) {
1122                    nt *= nthreads;
1123                }
1124                if (columns == 4) {
1125                    nt >>= 1;
1126                } else if (columns < 4) {
1127                    nt >>= 2;
1128                }
1129                t = new double[nt];
1130                oldNthreads = nthreads;
1131            }
1132            if ((nthreads > 1) && useThreads) {
1133                xdft3da_subth2(1, -1, a, true);
1134                cdft3db_subth(-1, a, true);
1135                rdft3d_sub(1, a);
1136            } else {
1137                xdft3da_sub2(1, -1, a, true);
1138                cdft3db_sub(-1, a, true);
1139                rdft3d_sub(1, a);
1140            }
1141            fillSymmetric(a);
1142        } else {
1143            mixedRadixRealForwardFull(a);
1144        }
1145    }
1146
1147    /**
1148     * Computes 3D inverse DFT of real data leaving the result in <code>a</code>
1149     * . This method only works when the sizes of all three dimensions are
1150     * power-of-two numbers. The data is stored in a 1D array addressed in
1151     * slice-major, then row-major, then column-major, in order of significance,
1152     * i.e. element (i,j,k) of 3-d array x[slices][rows][2*columns] is stored in
1153     * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * 2 * columns and
1154     * rowStride = 2 * columns. The physical layout of the input data has to be as
1155     * follows:
1156     * 
1157     * <pre>
1158     * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3]
1159     *                 = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
1160     * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3]
1161     *                   = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
1162     *     0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;k3&lt;columns/2, 
1163     * a[k1*sliceStride + k2*rowStride] = Re[k1][k2][0]
1164     *              = Re[(slices-k1)%slices][rows-k2][0], 
1165     * a[k1*sliceStride + k2*rowStride + 1] = Im[k1][k2][0]
1166     *              = -Im[(slices-k1)%slices][rows-k2][0], 
1167     * a[k1*sliceStride + (rows-k2)*rowStride + 1] = Re[(slices-k1)%slices][k2][columns/2]
1168     *                 = Re[k1][rows-k2][columns/2], 
1169     * a[k1*sliceStride + (rows-k2)*rowStride] = -Im[(slices-k1)%slices][k2][columns/2]
1170     *                 = Im[k1][rows-k2][columns/2], 
1171     *     0&lt;=k1&lt;slices, 0&lt;k2&lt;rows/2, 
1172     * a[k1*sliceStride] = Re[k1][0][0]
1173     *             = Re[slices-k1][0][0], 
1174     * a[k1*sliceStride + 1] = Im[k1][0][0]
1175     *             = -Im[slices-k1][0][0], 
1176     * a[k1*sliceStride + (rows/2)*rowStride] = Re[k1][rows/2][0]
1177     *                = Re[slices-k1][rows/2][0], 
1178     * a[k1*sliceStride + (rows/2)*rowStride + 1] = Im[k1][rows/2][0]
1179     *                = -Im[slices-k1][rows/2][0], 
1180     * a[(slices-k1)*sliceStride + 1] = Re[k1][0][columns/2]
1181     *                = Re[slices-k1][0][columns/2], 
1182     * a[(slices-k1)*sliceStride] = -Im[k1][0][columns/2]
1183     *                = Im[slices-k1][0][columns/2], 
1184     * a[(slices-k1)*sliceStride + (rows/2)*rowStride + 1] = Re[k1][rows/2][columns/2]
1185     *                   = Re[slices-k1][rows/2][columns/2], 
1186     * a[(slices-k1)*sliceStride + (rows/2) * rowStride] = -Im[k1][rows/2][columns/2]
1187     *                   = Im[slices-k1][rows/2][columns/2], 
1188     *     0&lt;k1&lt;slices/2, 
1189     * a[0] = Re[0][0][0], 
1190     * a[1] = Re[0][0][columns/2], 
1191     * a[(rows/2)*rowStride] = Re[0][rows/2][0], 
1192     * a[(rows/2)*rowStride + 1] = Re[0][rows/2][columns/2], 
1193     * a[(slices/2)*sliceStride] = Re[slices/2][0][0], 
1194     * a[(slices/2)*sliceStride + 1] = Re[slices/2][0][columns/2], 
1195     * a[(slices/2)*sliceStride + (rows/2)*rowStride] = Re[slices/2][rows/2][0], 
1196     * a[(slices/2)*sliceStride + (rows/2)*rowStride + 1] = Re[slices/2][rows/2][columns/2]
1197     * </pre>
1198     * 
1199     * This method computes only half of the elements of the real transform. The
1200     * other half satisfies the symmetry condition. If you want the full real
1201     * inverse transform, use <code>realInverseFull</code>.
1202     * 
1203     * @param a
1204     *            data to transform
1205     * 
1206     * @param scale
1207     *            if true then scaling is performed
1208     */
1209    public void realInverse(double[] a, boolean scale) {
1210        if (isPowerOfTwo == false) {
1211            throw new IllegalArgumentException("slices, rows and columns must be power of two numbers");
1212        } else {
1213            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1214            if (nthreads != oldNthreads) {
1215                nt = slices;
1216                if (nt < rows) {
1217                    nt = rows;
1218                }
1219                nt *= 8;
1220                if (nthreads > 1) {
1221                    nt *= nthreads;
1222                }
1223                if (columns == 4) {
1224                    nt >>= 1;
1225                } else if (columns < 4) {
1226                    nt >>= 2;
1227                }
1228                t = new double[nt];
1229                oldNthreads = nthreads;
1230            }
1231            if ((nthreads > 1) && useThreads) {
1232                rdft3d_sub(-1, a);
1233                cdft3db_subth(1, a, scale);
1234                xdft3da_subth1(1, 1, a, scale);
1235            } else {
1236                rdft3d_sub(-1, a);
1237                cdft3db_sub(1, a, scale);
1238                xdft3da_sub1(1, 1, a, scale);
1239            }
1240        }
1241    }
1242
1243    /**
1244     * Computes 3D inverse DFT of real data leaving the result in <code>a</code>
1245     * . This method only works when the sizes of all three dimensions are
1246     * power-of-two numbers. The data is stored in a 3D array. The physical
1247     * layout of the input data has to be as follows:
1248     * 
1249     * <pre>
1250     * a[k1][k2][2*k3] = Re[k1][k2][k3]
1251     *                 = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
1252     * a[k1][k2][2*k3+1] = Im[k1][k2][k3]
1253     *                   = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
1254     *     0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;k3&lt;columns/2, 
1255     * a[k1][k2][0] = Re[k1][k2][0]
1256     *              = Re[(slices-k1)%slices][rows-k2][0], 
1257     * a[k1][k2][1] = Im[k1][k2][0]
1258     *              = -Im[(slices-k1)%slices][rows-k2][0], 
1259     * a[k1][rows-k2][1] = Re[(slices-k1)%slices][k2][columns/2]
1260     *                 = Re[k1][rows-k2][columns/2], 
1261     * a[k1][rows-k2][0] = -Im[(slices-k1)%slices][k2][columns/2]
1262     *                 = Im[k1][rows-k2][columns/2], 
1263     *     0&lt;=k1&lt;slices, 0&lt;k2&lt;rows/2, 
1264     * a[k1][0][0] = Re[k1][0][0]
1265     *             = Re[slices-k1][0][0], 
1266     * a[k1][0][1] = Im[k1][0][0]
1267     *             = -Im[slices-k1][0][0], 
1268     * a[k1][rows/2][0] = Re[k1][rows/2][0]
1269     *                = Re[slices-k1][rows/2][0], 
1270     * a[k1][rows/2][1] = Im[k1][rows/2][0]
1271     *                = -Im[slices-k1][rows/2][0], 
1272     * a[slices-k1][0][1] = Re[k1][0][columns/2]
1273     *                = Re[slices-k1][0][columns/2], 
1274     * a[slices-k1][0][0] = -Im[k1][0][columns/2]
1275     *                = Im[slices-k1][0][columns/2], 
1276     * a[slices-k1][rows/2][1] = Re[k1][rows/2][columns/2]
1277     *                   = Re[slices-k1][rows/2][columns/2], 
1278     * a[slices-k1][rows/2][0] = -Im[k1][rows/2][columns/2]
1279     *                   = Im[slices-k1][rows/2][columns/2], 
1280     *     0&lt;k1&lt;slices/2, 
1281     * a[0][0][0] = Re[0][0][0], 
1282     * a[0][0][1] = Re[0][0][columns/2], 
1283     * a[0][rows/2][0] = Re[0][rows/2][0], 
1284     * a[0][rows/2][1] = Re[0][rows/2][columns/2], 
1285     * a[slices/2][0][0] = Re[slices/2][0][0], 
1286     * a[slices/2][0][1] = Re[slices/2][0][columns/2], 
1287     * a[slices/2][rows/2][0] = Re[slices/2][rows/2][0], 
1288     * a[slices/2][rows/2][1] = Re[slices/2][rows/2][columns/2]
1289     * </pre>
1290     * 
1291     * This method computes only half of the elements of the real transform. The
1292     * other half satisfies the symmetry condition. If you want the full real
1293     * inverse transform, use <code>realInverseFull</code>.
1294     * 
1295     * @param a
1296     *            data to transform
1297     * 
1298     * @param scale
1299     *            if true then scaling is performed
1300     */
1301    public void realInverse(double[][][] a, boolean scale) {
1302        if (isPowerOfTwo == false) {
1303            throw new IllegalArgumentException("slices, rows and columns must be power of two numbers");
1304        } else {
1305            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1306            if (nthreads != oldNthreads) {
1307                nt = slices;
1308                if (nt < rows) {
1309                    nt = rows;
1310                }
1311                nt *= 8;
1312                if (nthreads > 1) {
1313                    nt *= nthreads;
1314                }
1315                if (columns == 4) {
1316                    nt >>= 1;
1317                } else if (columns < 4) {
1318                    nt >>= 2;
1319                }
1320                t = new double[nt];
1321                oldNthreads = nthreads;
1322            }
1323            if ((nthreads > 1) && useThreads) {
1324                rdft3d_sub(-1, a);
1325                cdft3db_subth(1, a, scale);
1326                xdft3da_subth1(1, 1, a, scale);
1327            } else {
1328                rdft3d_sub(-1, a);
1329                cdft3db_sub(1, a, scale);
1330                xdft3da_sub1(1, 1, a, scale);
1331            }
1332        }
1333    }
1334
1335    /**
1336     * Computes 3D inverse DFT of real data leaving the result in <code>a</code>
1337     * . This method computes full real inverse transform, i.e. you will get the
1338     * same result as from <code>complexInverse</code> called with all imaginary
1339     * part equal 0. Because the result is stored in <code>a</code>, the input
1340     * array must be of size slices*rows*2*columns, with only the first slices*rows*columns elements
1341     * filled with real data.
1342     * 
1343     * @param a
1344     *            data to transform
1345     * @param scale
1346     *            if true then scaling is performed
1347     */
1348    public void realInverseFull(double[] a, boolean scale) {
1349        if (isPowerOfTwo) {
1350            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1351            if (nthreads != oldNthreads) {
1352                nt = slices;
1353                if (nt < rows) {
1354                    nt = rows;
1355                }
1356                nt *= 8;
1357                if (nthreads > 1) {
1358                    nt *= nthreads;
1359                }
1360                if (columns == 4) {
1361                    nt >>= 1;
1362                } else if (columns < 4) {
1363                    nt >>= 2;
1364                }
1365                t = new double[nt];
1366                oldNthreads = nthreads;
1367            }
1368            if ((nthreads > 1) && useThreads) {
1369                xdft3da_subth2(1, 1, a, scale);
1370                cdft3db_subth(1, a, scale);
1371                rdft3d_sub(1, a);
1372            } else {
1373                xdft3da_sub2(1, 1, a, scale);
1374                cdft3db_sub(1, a, scale);
1375                rdft3d_sub(1, a);
1376            }
1377            fillSymmetric(a);
1378        } else {
1379            mixedRadixRealInverseFull(a, scale);
1380        }
1381    }
1382
1383    /**
1384     * Computes 3D inverse DFT of real data leaving the result in <code>a</code>
1385     * . This method computes full real inverse transform, i.e. you will get the
1386     * same result as from <code>complexInverse</code> called with all imaginary
1387     * part equal 0. Because the result is stored in <code>a</code>, the input
1388     * array must be of size slices by rows by 2*columns, with only the first slices by rows by
1389     * columns elements filled with real data.
1390     * 
1391     * @param a
1392     *            data to transform
1393     * @param scale
1394     *            if true then scaling is performed
1395     */
1396    public void realInverseFull(double[][][] a, boolean scale) {
1397        if (isPowerOfTwo) {
1398            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1399            if (nthreads != oldNthreads) {
1400                nt = slices;
1401                if (nt < rows) {
1402                    nt = rows;
1403                }
1404                nt *= 8;
1405                if (nthreads > 1) {
1406                    nt *= nthreads;
1407                }
1408                if (columns == 4) {
1409                    nt >>= 1;
1410                } else if (columns < 4) {
1411                    nt >>= 2;
1412                }
1413                t = new double[nt];
1414                oldNthreads = nthreads;
1415            }
1416            if ((nthreads > 1) && useThreads) {
1417                xdft3da_subth2(1, 1, a, scale);
1418                cdft3db_subth(1, a, scale);
1419                rdft3d_sub(1, a);
1420            } else {
1421                xdft3da_sub2(1, 1, a, scale);
1422                cdft3db_sub(1, a, scale);
1423                rdft3d_sub(1, a);
1424            }
1425            fillSymmetric(a);
1426        } else {
1427            mixedRadixRealInverseFull(a, scale);
1428        }
1429    }
1430
1431    /* -------- child routines -------- */
1432
1433    private void mixedRadixRealForwardFull(final double[][][] a) {
1434        double[] temp = new double[2 * rows];
1435        int ldimn2 = rows / 2 + 1;
1436        final int newn3 = 2 * columns;
1437        final int n2d2;
1438        if (rows % 2 == 0) {
1439            n2d2 = rows / 2;
1440        } else {
1441            n2d2 = (rows + 1) / 2;
1442        }
1443
1444        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1445        if ((nthreads > 1) && useThreads && (slices >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) {
1446            Future<?>[] futures = new Future[nthreads];
1447            int p = slices / nthreads;
1448            for (int l = 0; l < nthreads; l++) {
1449                final int firstSlice = l * p;
1450                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1451
1452                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1453                    public void run() {
1454                        for (int s = firstSlice; s < lastSlice; s++) {
1455                            for (int r = 0; r < rows; r++) {
1456                                fftColumns.realForwardFull(a[s][r]);
1457                            }
1458                        }
1459                    }
1460                });
1461            }
1462            ConcurrencyUtils.waitForCompletion(futures);
1463
1464            for (int l = 0; l < nthreads; l++) {
1465                final int firstSlice = l * p;
1466                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1467
1468                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1469                    public void run() {
1470                        double[] temp = new double[2 * rows];
1471
1472                        for (int s = firstSlice; s < lastSlice; s++) {
1473                            for (int c = 0; c < columns; c++) {
1474                                int idx2 = 2 * c;
1475                                for (int r = 0; r < rows; r++) {
1476                                    int idx4 = 2 * r;
1477                                    temp[idx4] = a[s][r][idx2];
1478                                    temp[idx4 + 1] = a[s][r][idx2 + 1];
1479                                }
1480                                fftRows.complexForward(temp);
1481                                for (int r = 0; r < rows; r++) {
1482                                    int idx4 = 2 * r;
1483                                    a[s][r][idx2] = temp[idx4];
1484                                    a[s][r][idx2 + 1] = temp[idx4 + 1];
1485                                }
1486                            }
1487                        }
1488                    }
1489                });
1490            }
1491            ConcurrencyUtils.waitForCompletion(futures);
1492
1493            p = ldimn2 / nthreads;
1494            for (int l = 0; l < nthreads; l++) {
1495                final int firstRow = l * p;
1496                final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p;
1497
1498                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1499                    public void run() {
1500                        double[] temp = new double[2 * slices];
1501
1502                        for (int r = firstRow; r < lastRow; r++) {
1503                            for (int c = 0; c < columns; c++) {
1504                                int idx1 = 2 * c;
1505                                for (int s = 0; s < slices; s++) {
1506                                    int idx2 = 2 * s;
1507                                    temp[idx2] = a[s][r][idx1];
1508                                    temp[idx2 + 1] = a[s][r][idx1 + 1];
1509                                }
1510                                fftSlices.complexForward(temp);
1511                                for (int s = 0; s < slices; s++) {
1512                                    int idx2 = 2 * s;
1513                                    a[s][r][idx1] = temp[idx2];
1514                                    a[s][r][idx1 + 1] = temp[idx2 + 1];
1515                                }
1516                            }
1517                        }
1518                    }
1519                });
1520            }
1521            ConcurrencyUtils.waitForCompletion(futures);
1522            p = slices / nthreads;
1523            for (int l = 0; l < nthreads; l++) {
1524                final int firstSlice = l * p;
1525                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1526
1527                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1528                    public void run() {
1529
1530                        for (int s = firstSlice; s < lastSlice; s++) {
1531                            int idx2 = (slices - s) % slices;
1532                            for (int r = 1; r < n2d2; r++) {
1533                                int idx4 = rows - r;
1534                                for (int c = 0; c < columns; c++) {
1535                                    int idx1 = 2 * c;
1536                                    int idx3 = newn3 - idx1;
1537                                    a[idx2][idx4][idx3 % newn3] = a[s][r][idx1];
1538                                    a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1];
1539                                }
1540                            }
1541                        }
1542                    }
1543                });
1544            }
1545            ConcurrencyUtils.waitForCompletion(futures);
1546        } else {
1547
1548            for (int s = 0; s < slices; s++) {
1549                for (int r = 0; r < rows; r++) {
1550                    fftColumns.realForwardFull(a[s][r]);
1551                }
1552            }
1553
1554            for (int s = 0; s < slices; s++) {
1555                for (int c = 0; c < columns; c++) {
1556                    int idx2 = 2 * c;
1557                    for (int r = 0; r < rows; r++) {
1558                        int idx4 = 2 * r;
1559                        temp[idx4] = a[s][r][idx2];
1560                        temp[idx4 + 1] = a[s][r][idx2 + 1];
1561                    }
1562                    fftRows.complexForward(temp);
1563                    for (int r = 0; r < rows; r++) {
1564                        int idx4 = 2 * r;
1565                        a[s][r][idx2] = temp[idx4];
1566                        a[s][r][idx2 + 1] = temp[idx4 + 1];
1567                    }
1568                }
1569            }
1570
1571            temp = new double[2 * slices];
1572
1573            for (int r = 0; r < ldimn2; r++) {
1574                for (int c = 0; c < columns; c++) {
1575                    int idx1 = 2 * c;
1576                    for (int s = 0; s < slices; s++) {
1577                        int idx2 = 2 * s;
1578                        temp[idx2] = a[s][r][idx1];
1579                        temp[idx2 + 1] = a[s][r][idx1 + 1];
1580                    }
1581                    fftSlices.complexForward(temp);
1582                    for (int s = 0; s < slices; s++) {
1583                        int idx2 = 2 * s;
1584                        a[s][r][idx1] = temp[idx2];
1585                        a[s][r][idx1 + 1] = temp[idx2 + 1];
1586                    }
1587                }
1588            }
1589
1590            for (int s = 0; s < slices; s++) {
1591                int idx2 = (slices - s) % slices;
1592                for (int r = 1; r < n2d2; r++) {
1593                    int idx4 = rows - r;
1594                    for (int c = 0; c < columns; c++) {
1595                        int idx1 = 2 * c;
1596                        int idx3 = newn3 - idx1;
1597                        a[idx2][idx4][idx3 % newn3] = a[s][r][idx1];
1598                        a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1];
1599                    }
1600                }
1601            }
1602
1603        }
1604    }
1605
1606    private void mixedRadixRealInverseFull(final double[][][] a, final boolean scale) {
1607        double[] temp = new double[2 * rows];
1608        int ldimn2 = rows / 2 + 1;
1609        final int newn3 = 2 * columns;
1610        final int n2d2;
1611        if (rows % 2 == 0) {
1612            n2d2 = rows / 2;
1613        } else {
1614            n2d2 = (rows + 1) / 2;
1615        }
1616
1617        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1618        if ((nthreads > 1) && useThreads && (slices >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) {
1619            Future<?>[] futures = new Future[nthreads];
1620            int p = slices / nthreads;
1621            for (int l = 0; l < nthreads; l++) {
1622                final int firstSlice = l * p;
1623                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1624
1625                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1626                    public void run() {
1627                        for (int s = firstSlice; s < lastSlice; s++) {
1628                            for (int r = 0; r < rows; r++) {
1629                                fftColumns.realInverseFull(a[s][r], scale);
1630                            }
1631                        }
1632                    }
1633                });
1634            }
1635            ConcurrencyUtils.waitForCompletion(futures);
1636
1637            for (int l = 0; l < nthreads; l++) {
1638                final int firstSlice = l * p;
1639                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1640
1641                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1642                    public void run() {
1643                        double[] temp = new double[2 * rows];
1644
1645                        for (int s = firstSlice; s < lastSlice; s++) {
1646                            for (int c = 0; c < columns; c++) {
1647                                int idx2 = 2 * c;
1648                                for (int r = 0; r < rows; r++) {
1649                                    int idx4 = 2 * r;
1650                                    temp[idx4] = a[s][r][idx2];
1651                                    temp[idx4 + 1] = a[s][r][idx2 + 1];
1652                                }
1653                                fftRows.complexInverse(temp, scale);
1654                                for (int r = 0; r < rows; r++) {
1655                                    int idx4 = 2 * r;
1656                                    a[s][r][idx2] = temp[idx4];
1657                                    a[s][r][idx2 + 1] = temp[idx4 + 1];
1658                                }
1659                            }
1660                        }
1661                    }
1662                });
1663            }
1664            ConcurrencyUtils.waitForCompletion(futures);
1665
1666            p = ldimn2 / nthreads;
1667            for (int l = 0; l < nthreads; l++) {
1668                final int firstRow = l * p;
1669                final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p;
1670                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1671                    public void run() {
1672                        double[] temp = new double[2 * slices];
1673
1674                        for (int r = firstRow; r < lastRow; r++) {
1675                            for (int c = 0; c < columns; c++) {
1676                                int idx1 = 2 * c;
1677                                for (int s = 0; s < slices; s++) {
1678                                    int idx2 = 2 * s;
1679                                    temp[idx2] = a[s][r][idx1];
1680                                    temp[idx2 + 1] = a[s][r][idx1 + 1];
1681                                }
1682                                fftSlices.complexInverse(temp, scale);
1683                                for (int s = 0; s < slices; s++) {
1684                                    int idx2 = 2 * s;
1685                                    a[s][r][idx1] = temp[idx2];
1686                                    a[s][r][idx1 + 1] = temp[idx2 + 1];
1687                                }
1688                            }
1689                        }
1690                    }
1691                });
1692            }
1693            ConcurrencyUtils.waitForCompletion(futures);
1694            p = slices / nthreads;
1695            for (int l = 0; l < nthreads; l++) {
1696                final int firstSlice = l * p;
1697                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1698
1699                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1700                    public void run() {
1701
1702                        for (int s = firstSlice; s < lastSlice; s++) {
1703                            int idx2 = (slices - s) % slices;
1704                            for (int r = 1; r < n2d2; r++) {
1705                                int idx4 = rows - r;
1706                                for (int c = 0; c < columns; c++) {
1707                                    int idx1 = 2 * c;
1708                                    int idx3 = newn3 - idx1;
1709                                    a[idx2][idx4][idx3 % newn3] = a[s][r][idx1];
1710                                    a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1];
1711                                }
1712                            }
1713                        }
1714                    }
1715                });
1716            }
1717            ConcurrencyUtils.waitForCompletion(futures);
1718        } else {
1719
1720            for (int s = 0; s < slices; s++) {
1721                for (int r = 0; r < rows; r++) {
1722                    fftColumns.realInverseFull(a[s][r], scale);
1723                }
1724            }
1725
1726            for (int s = 0; s < slices; s++) {
1727                for (int c = 0; c < columns; c++) {
1728                    int idx2 = 2 * c;
1729                    for (int r = 0; r < rows; r++) {
1730                        int idx4 = 2 * r;
1731                        temp[idx4] = a[s][r][idx2];
1732                        temp[idx4 + 1] = a[s][r][idx2 + 1];
1733                    }
1734                    fftRows.complexInverse(temp, scale);
1735                    for (int r = 0; r < rows; r++) {
1736                        int idx4 = 2 * r;
1737                        a[s][r][idx2] = temp[idx4];
1738                        a[s][r][idx2 + 1] = temp[idx4 + 1];
1739                    }
1740                }
1741            }
1742
1743            temp = new double[2 * slices];
1744
1745            for (int r = 0; r < ldimn2; r++) {
1746                for (int c = 0; c < columns; c++) {
1747                    int idx1 = 2 * c;
1748                    for (int s = 0; s < slices; s++) {
1749                        int idx2 = 2 * s;
1750                        temp[idx2] = a[s][r][idx1];
1751                        temp[idx2 + 1] = a[s][r][idx1 + 1];
1752                    }
1753                    fftSlices.complexInverse(temp, scale);
1754                    for (int s = 0; s < slices; s++) {
1755                        int idx2 = 2 * s;
1756                        a[s][r][idx1] = temp[idx2];
1757                        a[s][r][idx1 + 1] = temp[idx2 + 1];
1758                    }
1759                }
1760            }
1761
1762            for (int s = 0; s < slices; s++) {
1763                int idx2 = (slices - s) % slices;
1764                for (int r = 1; r < n2d2; r++) {
1765                    int idx4 = rows - r;
1766                    for (int c = 0; c < columns; c++) {
1767                        int idx1 = 2 * c;
1768                        int idx3 = newn3 - idx1;
1769                        a[idx2][idx4][idx3 % newn3] = a[s][r][idx1];
1770                        a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1];
1771                    }
1772                }
1773            }
1774
1775        }
1776    }
1777
1778    private void mixedRadixRealForwardFull(final double[] a) {
1779        final int twon3 = 2 * columns;
1780        double[] temp = new double[twon3];
1781        int ldimn2 = rows / 2 + 1;
1782        final int n2d2;
1783        if (rows % 2 == 0) {
1784            n2d2 = rows / 2;
1785        } else {
1786            n2d2 = (rows + 1) / 2;
1787        }
1788
1789        final int twoSliceStride = 2 * sliceStride;
1790        final int twoRowStride = 2 * rowStride;
1791        int n1d2 = slices / 2;
1792        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1793        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) {
1794            Future<?>[] futures = new Future[nthreads];
1795            int p = n1d2 / nthreads;
1796            for (int l = 0; l < nthreads; l++) {
1797                final int firstSlice = slices - 1 - l * p;
1798                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice - p;
1799                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1800                    public void run() {
1801                        double[] temp = new double[twon3];
1802                        for (int s = firstSlice; s >= lastSlice; s--) {
1803                            int idx1 = s * sliceStride;
1804                            int idx2 = s * twoSliceStride;
1805                            for (int r = rows - 1; r >= 0; r--) {
1806                                System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns);
1807                                fftColumns.realForwardFull(temp);
1808                                System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3);
1809                            }
1810                        }
1811                    }
1812                });
1813            }
1814            ConcurrencyUtils.waitForCompletion(futures);
1815
1816            final double[][][] temp2 = new double[n1d2 + 1][rows][twon3];
1817
1818            for (int l = 0; l < nthreads; l++) {
1819                final int firstSlice = l * p;
1820                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p;
1821                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1822                    public void run() {
1823                        for (int s = firstSlice; s < lastSlice; s++) {
1824                            int idx1 = s * sliceStride;
1825                            for (int r = 0; r < rows; r++) {
1826                                System.arraycopy(a, idx1 + r * rowStride, temp2[s][r], 0, columns);
1827                                fftColumns.realForwardFull(temp2[s][r]);
1828                            }
1829                        }
1830                    }
1831                });
1832            }
1833            ConcurrencyUtils.waitForCompletion(futures);
1834
1835            for (int l = 0; l < nthreads; l++) {
1836                final int firstSlice = l * p;
1837                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p;
1838                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1839                    public void run() {
1840                        for (int s = firstSlice; s < lastSlice; s++) {
1841                            int idx1 = s * twoSliceStride;
1842                            for (int r = 0; r < rows; r++) {
1843                                System.arraycopy(temp2[s][r], 0, a, idx1 + r * twoRowStride, twon3);
1844                            }
1845                        }
1846                    }
1847                });
1848            }
1849            ConcurrencyUtils.waitForCompletion(futures);
1850
1851            p = slices / nthreads;
1852            for (int l = 0; l < nthreads; l++) {
1853                final int firstSlice = l * p;
1854                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1855                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1856                    public void run() {
1857                        double[] temp = new double[2 * rows];
1858
1859                        for (int s = firstSlice; s < lastSlice; s++) {
1860                            int idx1 = s * twoSliceStride;
1861                            for (int c = 0; c < columns; c++) {
1862                                int idx2 = 2 * c;
1863                                for (int r = 0; r < rows; r++) {
1864                                    int idx3 = idx1 + r * twoRowStride + idx2;
1865                                    int idx4 = 2 * r;
1866                                    temp[idx4] = a[idx3];
1867                                    temp[idx4 + 1] = a[idx3 + 1];
1868                                }
1869                                fftRows.complexForward(temp);
1870                                for (int r = 0; r < rows; r++) {
1871                                    int idx3 = idx1 + r * twoRowStride + idx2;
1872                                    int idx4 = 2 * r;
1873                                    a[idx3] = temp[idx4];
1874                                    a[idx3 + 1] = temp[idx4 + 1];
1875                                }
1876                            }
1877                        }
1878                    }
1879                });
1880            }
1881            ConcurrencyUtils.waitForCompletion(futures);
1882
1883            p = ldimn2 / nthreads;
1884            for (int l = 0; l < nthreads; l++) {
1885                final int firstRow = l * p;
1886                final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p;
1887                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1888                    public void run() {
1889                        double[] temp = new double[2 * slices];
1890
1891                        for (int r = firstRow; r < lastRow; r++) {
1892                            int idx3 = r * twoRowStride;
1893                            for (int c = 0; c < columns; c++) {
1894                                int idx1 = 2 * c;
1895                                for (int s = 0; s < slices; s++) {
1896                                    int idx2 = 2 * s;
1897                                    int idx4 = s * twoSliceStride + idx3 + idx1;
1898                                    temp[idx2] = a[idx4];
1899                                    temp[idx2 + 1] = a[idx4 + 1];
1900                                }
1901                                fftSlices.complexForward(temp);
1902                                for (int s = 0; s < slices; s++) {
1903                                    int idx2 = 2 * s;
1904                                    int idx4 = s * twoSliceStride + idx3 + idx1;
1905                                    a[idx4] = temp[idx2];
1906                                    a[idx4 + 1] = temp[idx2 + 1];
1907                                }
1908                            }
1909                        }
1910                    }
1911                });
1912            }
1913            ConcurrencyUtils.waitForCompletion(futures);
1914            p = slices / nthreads;
1915            for (int l = 0; l < nthreads; l++) {
1916                final int firstSlice = l * p;
1917                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1918                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1919                    public void run() {
1920
1921                        for (int s = firstSlice; s < lastSlice; s++) {
1922                            int idx2 = (slices - s) % slices;
1923                            int idx5 = idx2 * twoSliceStride;
1924                            int idx6 = s * twoSliceStride;
1925                            for (int r = 1; r < n2d2; r++) {
1926                                int idx4 = rows - r;
1927                                int idx7 = idx4 * twoRowStride;
1928                                int idx8 = r * twoRowStride;
1929                                int idx9 = idx5 + idx7;
1930                                for (int c = 0; c < columns; c++) {
1931                                    int idx1 = 2 * c;
1932                                    int idx3 = twon3 - idx1;
1933                                    int idx10 = idx6 + idx8 + idx1;
1934                                    a[idx9 + idx3 % twon3] = a[idx10];
1935                                    a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1];
1936                                }
1937                            }
1938                        }
1939                    }
1940                });
1941            }
1942            ConcurrencyUtils.waitForCompletion(futures);
1943        } else {
1944
1945            for (int s = slices - 1; s >= 0; s--) {
1946                int idx1 = s * sliceStride;
1947                int idx2 = s * twoSliceStride;
1948                for (int r = rows - 1; r >= 0; r--) {
1949                    System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns);
1950                    fftColumns.realForwardFull(temp);
1951                    System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3);
1952                }
1953            }
1954
1955            temp = new double[2 * rows];
1956
1957            for (int s = 0; s < slices; s++) {
1958                int idx1 = s * twoSliceStride;
1959                for (int c = 0; c < columns; c++) {
1960                    int idx2 = 2 * c;
1961                    for (int r = 0; r < rows; r++) {
1962                        int idx4 = 2 * r;
1963                        int idx3 = idx1 + r * twoRowStride + idx2;
1964                        temp[idx4] = a[idx3];
1965                        temp[idx4 + 1] = a[idx3 + 1];
1966                    }
1967                    fftRows.complexForward(temp);
1968                    for (int r = 0; r < rows; r++) {
1969                        int idx4 = 2 * r;
1970                        int idx3 = idx1 + r * twoRowStride + idx2;
1971                        a[idx3] = temp[idx4];
1972                        a[idx3 + 1] = temp[idx4 + 1];
1973                    }
1974                }
1975            }
1976
1977            temp = new double[2 * slices];
1978
1979            for (int r = 0; r < ldimn2; r++) {
1980                int idx3 = r * twoRowStride;
1981                for (int c = 0; c < columns; c++) {
1982                    int idx1 = 2 * c;
1983                    for (int s = 0; s < slices; s++) {
1984                        int idx2 = 2 * s;
1985                        int idx4 = s * twoSliceStride + idx3 + idx1;
1986                        temp[idx2] = a[idx4];
1987                        temp[idx2 + 1] = a[idx4 + 1];
1988                    }
1989                    fftSlices.complexForward(temp);
1990                    for (int s = 0; s < slices; s++) {
1991                        int idx2 = 2 * s;
1992                        int idx4 = s * twoSliceStride + idx3 + idx1;
1993                        a[idx4] = temp[idx2];
1994                        a[idx4 + 1] = temp[idx2 + 1];
1995                    }
1996                }
1997            }
1998
1999            for (int s = 0; s < slices; s++) {
2000                int idx2 = (slices - s) % slices;
2001                int idx5 = idx2 * twoSliceStride;
2002                int idx6 = s * twoSliceStride;
2003                for (int r = 1; r < n2d2; r++) {
2004                    int idx4 = rows - r;
2005                    int idx7 = idx4 * twoRowStride;
2006                    int idx8 = r * twoRowStride;
2007                    int idx9 = idx5 + idx7;
2008                    for (int c = 0; c < columns; c++) {
2009                        int idx1 = 2 * c;
2010                        int idx3 = twon3 - idx1;
2011                        int idx10 = idx6 + idx8 + idx1;
2012                        a[idx9 + idx3 % twon3] = a[idx10];
2013                        a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1];
2014                    }
2015                }
2016            }
2017
2018        }
2019    }
2020
2021    private void mixedRadixRealInverseFull(final double[] a, final boolean scale) {
2022        final int twon3 = 2 * columns;
2023        double[] temp = new double[twon3];
2024        int ldimn2 = rows / 2 + 1;
2025        final int n2d2;
2026        if (rows % 2 == 0) {
2027            n2d2 = rows / 2;
2028        } else {
2029            n2d2 = (rows + 1) / 2;
2030        }
2031
2032        final int twoSliceStride = 2 * sliceStride;
2033        final int twoRowStride = 2 * rowStride;
2034        int n1d2 = slices / 2;
2035
2036        int nthreads = ConcurrencyUtils.getNumberOfThreads();
2037        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) {
2038            Future<?>[] futures = new Future[nthreads];
2039            int p = n1d2 / nthreads;
2040            for (int l = 0; l < nthreads; l++) {
2041                final int firstSlice = slices - 1 - l * p;
2042                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice - p;
2043                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2044                    public void run() {
2045                        double[] temp = new double[twon3];
2046                        for (int s = firstSlice; s >= lastSlice; s--) {
2047                            int idx1 = s * sliceStride;
2048                            int idx2 = s * twoSliceStride;
2049                            for (int r = rows - 1; r >= 0; r--) {
2050                                System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns);
2051                                fftColumns.realInverseFull(temp, scale);
2052                                System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3);
2053                            }
2054                        }
2055                    }
2056                });
2057            }
2058            ConcurrencyUtils.waitForCompletion(futures);
2059
2060            final double[][][] temp2 = new double[n1d2 + 1][rows][twon3];
2061
2062            for (int l = 0; l < nthreads; l++) {
2063                final int firstSlice = l * p;
2064                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p;
2065                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2066                    public void run() {
2067                        for (int s = firstSlice; s < lastSlice; s++) {
2068                            int idx1 = s * sliceStride;
2069                            for (int r = 0; r < rows; r++) {
2070                                System.arraycopy(a, idx1 + r * rowStride, temp2[s][r], 0, columns);
2071                                fftColumns.realInverseFull(temp2[s][r], scale);
2072                            }
2073                        }
2074                    }
2075                });
2076            }
2077            ConcurrencyUtils.waitForCompletion(futures);
2078
2079            for (int l = 0; l < nthreads; l++) {
2080                final int firstSlice = l * p;
2081                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p;
2082                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2083                    public void run() {
2084                        for (int s = firstSlice; s < lastSlice; s++) {
2085                            int idx1 = s * twoSliceStride;
2086                            for (int r = 0; r < rows; r++) {
2087                                System.arraycopy(temp2[s][r], 0, a, idx1 + r * twoRowStride, twon3);
2088                            }
2089                        }
2090                    }
2091                });
2092            }
2093            ConcurrencyUtils.waitForCompletion(futures);
2094
2095            p = slices / nthreads;
2096            for (int l = 0; l < nthreads; l++) {
2097                final int firstSlice = l * p;
2098                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
2099                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2100                    public void run() {
2101                        double[] temp = new double[2 * rows];
2102
2103                        for (int s = firstSlice; s < lastSlice; s++) {
2104                            int idx1 = s * twoSliceStride;
2105                            for (int c = 0; c < columns; c++) {
2106                                int idx2 = 2 * c;
2107                                for (int r = 0; r < rows; r++) {
2108                                    int idx3 = idx1 + r * twoRowStride + idx2;
2109                                    int idx4 = 2 * r;
2110                                    temp[idx4] = a[idx3];
2111                                    temp[idx4 + 1] = a[idx3 + 1];
2112                                }
2113                                fftRows.complexInverse(temp, scale);
2114                                for (int r = 0; r < rows; r++) {
2115                                    int idx3 = idx1 + r * twoRowStride + idx2;
2116                                    int idx4 = 2 * r;
2117                                    a[idx3] = temp[idx4];
2118                                    a[idx3 + 1] = temp[idx4 + 1];
2119                                }
2120                            }
2121                        }
2122                    }
2123                });
2124            }
2125            ConcurrencyUtils.waitForCompletion(futures);
2126
2127            p = ldimn2 / nthreads;
2128            for (int l = 0; l < nthreads; l++) {
2129                final int firstRow = l * p;
2130                final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p;
2131                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2132                    public void run() {
2133                        double[] temp = new double[2 * slices];
2134
2135                        for (int r = firstRow; r < lastRow; r++) {
2136                            int idx3 = r * twoRowStride;
2137                            for (int c = 0; c < columns; c++) {
2138                                int idx1 = 2 * c;
2139                                for (int s = 0; s < slices; s++) {
2140                                    int idx2 = 2 * s;
2141                                    int idx4 = s * twoSliceStride + idx3 + idx1;
2142                                    temp[idx2] = a[idx4];
2143                                    temp[idx2 + 1] = a[idx4 + 1];
2144                                }
2145                                fftSlices.complexInverse(temp, scale);
2146                                for (int s = 0; s < slices; s++) {
2147                                    int idx2 = 2 * s;
2148                                    int idx4 = s * twoSliceStride + idx3 + idx1;
2149                                    a[idx4] = temp[idx2];
2150                                    a[idx4 + 1] = temp[idx2 + 1];
2151                                }
2152                            }
2153                        }
2154                    }
2155                });
2156            }
2157            ConcurrencyUtils.waitForCompletion(futures);
2158
2159            p = slices / nthreads;
2160            for (int l = 0; l < nthreads; l++) {
2161                final int firstSlice = l * p;
2162                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
2163                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2164                    public void run() {
2165
2166                        for (int s = firstSlice; s < lastSlice; s++) {
2167                            int idx2 = (slices - s) % slices;
2168                            int idx5 = idx2 * twoSliceStride;
2169                            int idx6 = s * twoSliceStride;
2170                            for (int r = 1; r < n2d2; r++) {
2171                                int idx4 = rows - r;
2172                                int idx7 = idx4 * twoRowStride;
2173                                int idx8 = r * twoRowStride;
2174                                int idx9 = idx5 + idx7;
2175                                for (int c = 0; c < columns; c++) {
2176                                    int idx1 = 2 * c;
2177                                    int idx3 = twon3 - idx1;
2178                                    int idx10 = idx6 + idx8 + idx1;
2179                                    a[idx9 + idx3 % twon3] = a[idx10];
2180                                    a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1];
2181                                }
2182                            }
2183                        }
2184                    }
2185                });
2186            }
2187            ConcurrencyUtils.waitForCompletion(futures);
2188        } else {
2189
2190            for (int s = slices - 1; s >= 0; s--) {
2191                int idx1 = s * sliceStride;
2192                int idx2 = s * twoSliceStride;
2193                for (int r = rows - 1; r >= 0; r--) {
2194                    System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns);
2195                    fftColumns.realInverseFull(temp, scale);
2196                    System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3);
2197                }
2198            }
2199
2200            temp = new double[2 * rows];
2201
2202            for (int s = 0; s < slices; s++) {
2203                int idx1 = s * twoSliceStride;
2204                for (int c = 0; c < columns; c++) {
2205                    int idx2 = 2 * c;
2206                    for (int r = 0; r < rows; r++) {
2207                        int idx4 = 2 * r;
2208                        int idx3 = idx1 + r * twoRowStride + idx2;
2209                        temp[idx4] = a[idx3];
2210                        temp[idx4 + 1] = a[idx3 + 1];
2211                    }
2212                    fftRows.complexInverse(temp, scale);
2213                    for (int r = 0; r < rows; r++) {
2214                        int idx4 = 2 * r;
2215                        int idx3 = idx1 + r * twoRowStride + idx2;
2216                        a[idx3] = temp[idx4];
2217                        a[idx3 + 1] = temp[idx4 + 1];
2218                    }
2219                }
2220            }
2221
2222            temp = new double[2 * slices];
2223
2224            for (int r = 0; r < ldimn2; r++) {
2225                int idx3 = r * twoRowStride;
2226                for (int c = 0; c < columns; c++) {
2227                    int idx1 = 2 * c;
2228                    for (int s = 0; s < slices; s++) {
2229                        int idx2 = 2 * s;
2230                        int idx4 = s * twoSliceStride + idx3 + idx1;
2231                        temp[idx2] = a[idx4];
2232                        temp[idx2 + 1] = a[idx4 + 1];
2233                    }
2234                    fftSlices.complexInverse(temp, scale);
2235                    for (int s = 0; s < slices; s++) {
2236                        int idx2 = 2 * s;
2237                        int idx4 = s * twoSliceStride + idx3 + idx1;
2238                        a[idx4] = temp[idx2];
2239                        a[idx4 + 1] = temp[idx2 + 1];
2240                    }
2241                }
2242            }
2243
2244            for (int s = 0; s < slices; s++) {
2245                int idx2 = (slices - s) % slices;
2246                int idx5 = idx2 * twoSliceStride;
2247                int idx6 = s * twoSliceStride;
2248                for (int r = 1; r < n2d2; r++) {
2249                    int idx4 = rows - r;
2250                    int idx7 = idx4 * twoRowStride;
2251                    int idx8 = r * twoRowStride;
2252                    int idx9 = idx5 + idx7;
2253                    for (int c = 0; c < columns; c++) {
2254                        int idx1 = 2 * c;
2255                        int idx3 = twon3 - idx1;
2256                        int idx10 = idx6 + idx8 + idx1;
2257                        a[idx9 + idx3 % twon3] = a[idx10];
2258                        a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1];
2259                    }
2260                }
2261            }
2262
2263        }
2264    }
2265
2266    private void xdft3da_sub1(int icr, int isgn, double[] a, boolean scale) {
2267        int idx0, idx1, idx2, idx3, idx4, idx5;
2268
2269        if (isgn == -1) {
2270            for (int s = 0; s < slices; s++) {
2271                idx0 = s * sliceStride;
2272                if (icr == 0) {
2273                    for (int r = 0; r < rows; r++) {
2274                        fftColumns.complexForward(a, idx0 + r * rowStride);
2275                    }
2276                } else {
2277                    for (int r = 0; r < rows; r++) {
2278                        fftColumns.realForward(a, idx0 + r * rowStride);
2279                    }
2280                }
2281                if (columns > 4) {
2282                    for (int c = 0; c < columns; c += 8) {
2283                        for (int r = 0; r < rows; r++) {
2284                            idx1 = idx0 + r * rowStride + c;
2285                            idx2 = 2 * r;
2286                            idx3 = 2 * rows + 2 * r;
2287                            idx4 = idx3 + 2 * rows;
2288                            idx5 = idx4 + 2 * rows;
2289                            t[idx2] = a[idx1];
2290                            t[idx2 + 1] = a[idx1 + 1];
2291                            t[idx3] = a[idx1 + 2];
2292                            t[idx3 + 1] = a[idx1 + 3];
2293                            t[idx4] = a[idx1 + 4];
2294                            t[idx4 + 1] = a[idx1 + 5];
2295                            t[idx5] = a[idx1 + 6];
2296                            t[idx5 + 1] = a[idx1 + 7];
2297                        }
2298                        fftRows.complexForward(t, 0);
2299                        fftRows.complexForward(t, 2 * rows);
2300                        fftRows.complexForward(t, 4 * rows);
2301                        fftRows.complexForward(t, 6 * rows);
2302                        for (int r = 0; r < rows; r++) {
2303                            idx1 = idx0 + r * rowStride + c;
2304                            idx2 = 2 * r;
2305                            idx3 = 2 * rows + 2 * r;
2306                            idx4 = idx3 + 2 * rows;
2307                            idx5 = idx4 + 2 * rows;
2308                            a[idx1] = t[idx2];
2309                            a[idx1 + 1] = t[idx2 + 1];
2310                            a[idx1 + 2] = t[idx3];
2311                            a[idx1 + 3] = t[idx3 + 1];
2312                            a[idx1 + 4] = t[idx4];
2313                            a[idx1 + 5] = t[idx4 + 1];
2314                            a[idx1 + 6] = t[idx5];
2315                            a[idx1 + 7] = t[idx5 + 1];
2316                        }
2317                    }
2318                } else if (columns == 4) {
2319                    for (int r = 0; r < rows; r++) {
2320                        idx1 = idx0 + r * rowStride;
2321                        idx2 = 2 * r;
2322                        idx3 = 2 * rows + 2 * r;
2323                        t[idx2] = a[idx1];
2324                        t[idx2 + 1] = a[idx1 + 1];
2325                        t[idx3] = a[idx1 + 2];
2326                        t[idx3 + 1] = a[idx1 + 3];
2327                    }
2328                    fftRows.complexForward(t, 0);
2329                    fftRows.complexForward(t, 2 * rows);
2330                    for (int r = 0; r < rows; r++) {
2331                        idx1 = idx0 + r * rowStride;
2332                        idx2 = 2 * r;
2333                        idx3 = 2 * rows + 2 * r;
2334                        a[idx1] = t[idx2];
2335                        a[idx1 + 1] = t[idx2 + 1];
2336                        a[idx1 + 2] = t[idx3];
2337                        a[idx1 + 3] = t[idx3 + 1];
2338                    }
2339                } else if (columns == 2) {
2340                    for (int r = 0; r < rows; r++) {
2341                        idx1 = idx0 + r * rowStride;
2342                        idx2 = 2 * r;
2343                        t[idx2] = a[idx1];
2344                        t[idx2 + 1] = a[idx1 + 1];
2345                    }
2346                    fftRows.complexForward(t, 0);
2347                    for (int r = 0; r < rows; r++) {
2348                        idx1 = idx0 + r * rowStride;
2349                        idx2 = 2 * r;
2350                        a[idx1] = t[idx2];
2351                        a[idx1 + 1] = t[idx2 + 1];
2352                    }
2353                }
2354            }
2355        } else {
2356            for (int s = 0; s < slices; s++) {
2357                idx0 = s * sliceStride;
2358                if (icr == 0) {
2359                    for (int r = 0; r < rows; r++) {
2360                        fftColumns.complexInverse(a, idx0 + r * rowStride, scale);
2361                    }
2362                }
2363                if (columns > 4) {
2364                    for (int c = 0; c < columns; c += 8) {
2365                        for (int r = 0; r < rows; r++) {
2366                            idx1 = idx0 + r * rowStride + c;
2367                            idx2 = 2 * r;
2368                            idx3 = 2 * rows + 2 * r;
2369                            idx4 = idx3 + 2 * rows;
2370                            idx5 = idx4 + 2 * rows;
2371                            t[idx2] = a[idx1];
2372                            t[idx2 + 1] = a[idx1 + 1];
2373                            t[idx3] = a[idx1 + 2];
2374                            t[idx3 + 1] = a[idx1 + 3];
2375                            t[idx4] = a[idx1 + 4];
2376                            t[idx4 + 1] = a[idx1 + 5];
2377                            t[idx5] = a[idx1 + 6];
2378                            t[idx5 + 1] = a[idx1 + 7];
2379                        }
2380                        fftRows.complexInverse(t, 0, scale);
2381                        fftRows.complexInverse(t, 2 * rows, scale);
2382                        fftRows.complexInverse(t, 4 * rows, scale);
2383                        fftRows.complexInverse(t, 6 * rows, scale);
2384                        for (int r = 0; r < rows; r++) {
2385                            idx1 = idx0 + r * rowStride + c;
2386                            idx2 = 2 * r;
2387                            idx3 = 2 * rows + 2 * r;
2388                            idx4 = idx3 + 2 * rows;
2389                            idx5 = idx4 + 2 * rows;
2390                            a[idx1] = t[idx2];
2391                            a[idx1 + 1] = t[idx2 + 1];
2392                            a[idx1 + 2] = t[idx3];
2393                            a[idx1 + 3] = t[idx3 + 1];
2394                            a[idx1 + 4] = t[idx4];
2395                            a[idx1 + 5] = t[idx4 + 1];
2396                            a[idx1 + 6] = t[idx5];
2397                            a[idx1 + 7] = t[idx5 + 1];
2398                        }
2399                    }
2400                } else if (columns == 4) {
2401                    for (int r = 0; r < rows; r++) {
2402                        idx1 = idx0 + r * rowStride;
2403                        idx2 = 2 * r;
2404                        idx3 = 2 * rows + 2 * r;
2405                        t[idx2] = a[idx1];
2406                        t[idx2 + 1] = a[idx1 + 1];
2407                        t[idx3] = a[idx1 + 2];
2408                        t[idx3 + 1] = a[idx1 + 3];
2409                    }
2410                    fftRows.complexInverse(t, 0, scale);
2411                    fftRows.complexInverse(t, 2 * rows, scale);
2412                    for (int r = 0; r < rows; r++) {
2413                        idx1 = idx0 + r * rowStride;
2414                        idx2 = 2 * r;
2415                        idx3 = 2 * rows + 2 * r;
2416                        a[idx1] = t[idx2];
2417                        a[idx1 + 1] = t[idx2 + 1];
2418                        a[idx1 + 2] = t[idx3];
2419                        a[idx1 + 3] = t[idx3 + 1];
2420                    }
2421                } else if (columns == 2) {
2422                    for (int r = 0; r < rows; r++) {
2423                        idx1 = idx0 + r * rowStride;
2424                        idx2 = 2 * r;
2425                        t[idx2] = a[idx1];
2426                        t[idx2 + 1] = a[idx1 + 1];
2427                    }
2428                    fftRows.complexInverse(t, 0, scale);
2429                    for (int r = 0; r < rows; r++) {
2430                        idx1 = idx0 + r * rowStride;
2431                        idx2 = 2 * r;
2432                        a[idx1] = t[idx2];
2433                        a[idx1 + 1] = t[idx2 + 1];
2434                    }
2435                }
2436                if (icr != 0) {
2437                    for (int r = 0; r < rows; r++) {
2438                        fftColumns.realInverse(a, idx0 + r * rowStride, scale);
2439                    }
2440                }
2441            }
2442        }
2443    }
2444
2445    private void xdft3da_sub2(int icr, int isgn, double[] a, boolean scale) {
2446        int idx0, idx1, idx2, idx3, idx4, idx5;
2447
2448        if (isgn == -1) {
2449            for (int s = 0; s < slices; s++) {
2450                idx0 = s * sliceStride;
2451                if (icr == 0) {
2452                    for (int r = 0; r < rows; r++) {
2453                        fftColumns.complexForward(a, idx0 + r * rowStride);
2454                    }
2455                } else {
2456                    for (int r = 0; r < rows; r++) {
2457                        fftColumns.realForward(a, idx0 + r * rowStride);
2458                    }
2459                }
2460                if (columns > 4) {
2461                    for (int c = 0; c < columns; c += 8) {
2462                        for (int r = 0; r < rows; r++) {
2463                            idx1 = idx0 + r * rowStride + c;
2464                            idx2 = 2 * r;
2465                            idx3 = 2 * rows + 2 * r;
2466                            idx4 = idx3 + 2 * rows;
2467                            idx5 = idx4 + 2 * rows;
2468                            t[idx2] = a[idx1];
2469                            t[idx2 + 1] = a[idx1 + 1];
2470                            t[idx3] = a[idx1 + 2];
2471                            t[idx3 + 1] = a[idx1 + 3];
2472                            t[idx4] = a[idx1 + 4];
2473                            t[idx4 + 1] = a[idx1 + 5];
2474                            t[idx5] = a[idx1 + 6];
2475                            t[idx5 + 1] = a[idx1 + 7];
2476                        }
2477                        fftRows.complexForward(t, 0);
2478                        fftRows.complexForward(t, 2 * rows);
2479                        fftRows.complexForward(t, 4 * rows);
2480                        fftRows.complexForward(t, 6 * rows);
2481                        for (int r = 0; r < rows; r++) {
2482                            idx1 = idx0 + r * rowStride + c;
2483                            idx2 = 2 * r;
2484                            idx3 = 2 * rows + 2 * r;
2485                            idx4 = idx3 + 2 * rows;
2486                            idx5 = idx4 + 2 * rows;
2487                            a[idx1] = t[idx2];
2488                            a[idx1 + 1] = t[idx2 + 1];
2489                            a[idx1 + 2] = t[idx3];
2490                            a[idx1 + 3] = t[idx3 + 1];
2491                            a[idx1 + 4] = t[idx4];
2492                            a[idx1 + 5] = t[idx4 + 1];
2493                            a[idx1 + 6] = t[idx5];
2494                            a[idx1 + 7] = t[idx5 + 1];
2495                        }
2496                    }
2497                } else if (columns == 4) {
2498                    for (int r = 0; r < rows; r++) {
2499                        idx1 = idx0 + r * rowStride;
2500                        idx2 = 2 * r;
2501                        idx3 = 2 * rows + 2 * r;
2502                        t[idx2] = a[idx1];
2503                        t[idx2 + 1] = a[idx1 + 1];
2504                        t[idx3] = a[idx1 + 2];
2505                        t[idx3 + 1] = a[idx1 + 3];
2506                    }
2507                    fftRows.complexForward(t, 0);
2508                    fftRows.complexForward(t, 2 * rows);
2509                    for (int r = 0; r < rows; r++) {
2510                        idx1 = idx0 + r * rowStride;
2511                        idx2 = 2 * r;
2512                        idx3 = 2 * rows + 2 * r;
2513                        a[idx1] = t[idx2];
2514                        a[idx1 + 1] = t[idx2 + 1];
2515                        a[idx1 + 2] = t[idx3];
2516                        a[idx1 + 3] = t[idx3 + 1];
2517                    }
2518                } else if (columns == 2) {
2519                    for (int r = 0; r < rows; r++) {
2520                        idx1 = idx0 + r * rowStride;
2521                        idx2 = 2 * r;
2522                        t[idx2] = a[idx1];
2523                        t[idx2 + 1] = a[idx1 + 1];
2524                    }
2525                    fftRows.complexForward(t, 0);
2526                    for (int r = 0; r < rows; r++) {
2527                        idx1 = idx0 + r * rowStride;
2528                        idx2 = 2 * r;
2529                        a[idx1] = t[idx2];
2530                        a[idx1 + 1] = t[idx2 + 1];
2531                    }
2532                }
2533            }
2534        } else {
2535            for (int s = 0; s < slices; s++) {
2536                idx0 = s * sliceStride;
2537                if (icr == 0) {
2538                    for (int r = 0; r < rows; r++) {
2539                        fftColumns.complexInverse(a, idx0 + r * rowStride, scale);
2540                    }
2541                } else {
2542                    for (int r = 0; r < rows; r++) {
2543                        fftColumns.realInverse2(a, idx0 + r * rowStride, scale);
2544                    }
2545                }
2546                if (columns > 4) {
2547                    for (int c = 0; c < columns; c += 8) {
2548                        for (int r = 0; r < rows; r++) {
2549                            idx1 = idx0 + r * rowStride + c;
2550                            idx2 = 2 * r;
2551                            idx3 = 2 * rows + 2 * r;
2552                            idx4 = idx3 + 2 * rows;
2553                            idx5 = idx4 + 2 * rows;
2554                            t[idx2] = a[idx1];
2555                            t[idx2 + 1] = a[idx1 + 1];
2556                            t[idx3] = a[idx1 + 2];
2557                            t[idx3 + 1] = a[idx1 + 3];
2558                            t[idx4] = a[idx1 + 4];
2559                            t[idx4 + 1] = a[idx1 + 5];
2560                            t[idx5] = a[idx1 + 6];
2561                            t[idx5 + 1] = a[idx1 + 7];
2562                        }
2563                        fftRows.complexInverse(t, 0, scale);
2564                        fftRows.complexInverse(t, 2 * rows, scale);
2565                        fftRows.complexInverse(t, 4 * rows, scale);
2566                        fftRows.complexInverse(t, 6 * rows, scale);
2567                        for (int r = 0; r < rows; r++) {
2568                            idx1 = idx0 + r * rowStride + c;
2569                            idx2 = 2 * r;
2570                            idx3 = 2 * rows + 2 * r;
2571                            idx4 = idx3 + 2 * rows;
2572                            idx5 = idx4 + 2 * rows;
2573                            a[idx1] = t[idx2];
2574                            a[idx1 + 1] = t[idx2 + 1];
2575                            a[idx1 + 2] = t[idx3];
2576                            a[idx1 + 3] = t[idx3 + 1];
2577                            a[idx1 + 4] = t[idx4];
2578                            a[idx1 + 5] = t[idx4 + 1];
2579                            a[idx1 + 6] = t[idx5];
2580                            a[idx1 + 7] = t[idx5 + 1];
2581                        }
2582                    }
2583                } else if (columns == 4) {
2584                    for (int r = 0; r < rows; r++) {
2585                        idx1 = idx0 + r * rowStride;
2586                        idx2 = 2 * r;
2587                        idx3 = 2 * rows + 2 * r;
2588                        t[idx2] = a[idx1];
2589                        t[idx2 + 1] = a[idx1 + 1];
2590                        t[idx3] = a[idx1 + 2];
2591                        t[idx3 + 1] = a[idx1 + 3];
2592                    }
2593                    fftRows.complexInverse(t, 0, scale);
2594                    fftRows.complexInverse(t, 2 * rows, scale);
2595                    for (int r = 0; r < rows; r++) {
2596                        idx1 = idx0 + r * rowStride;
2597                        idx2 = 2 * r;
2598                        idx3 = 2 * rows + 2 * r;
2599                        a[idx1] = t[idx2];
2600                        a[idx1 + 1] = t[idx2 + 1];
2601                        a[idx1 + 2] = t[idx3];
2602                        a[idx1 + 3] = t[idx3 + 1];
2603                    }
2604                } else if (columns == 2) {
2605                    for (int r = 0; r < rows; r++) {
2606                        idx1 = idx0 + r * rowStride;
2607                        idx2 = 2 * r;
2608                        t[idx2] = a[idx1];
2609                        t[idx2 + 1] = a[idx1 + 1];
2610                    }
2611                    fftRows.complexInverse(t, 0, scale);
2612                    for (int r = 0; r < rows; r++) {
2613                        idx1 = idx0 + r * rowStride;
2614                        idx2 = 2 * r;
2615                        a[idx1] = t[idx2];
2616                        a[idx1 + 1] = t[idx2 + 1];
2617                    }
2618                }
2619            }
2620        }
2621    }
2622
2623    private void xdft3da_sub1(int icr, int isgn, double[][][] a, boolean scale) {
2624        int idx2, idx3, idx4, idx5;
2625
2626        if (isgn == -1) {
2627            for (int s = 0; s < slices; s++) {
2628                if (icr == 0) {
2629                    for (int r = 0; r < rows; r++) {
2630                        fftColumns.complexForward(a[s][r]);
2631                    }
2632                } else {
2633                    for (int r = 0; r < rows; r++) {
2634                        fftColumns.realForward(a[s][r], 0);
2635                    }
2636                }
2637                if (columns > 4) {
2638                    for (int c = 0; c < columns; c += 8) {
2639                        for (int r = 0; r < rows; r++) {
2640                            idx2 = 2 * r;
2641                            idx3 = 2 * rows + 2 * r;
2642                            idx4 = idx3 + 2 * rows;
2643                            idx5 = idx4 + 2 * rows;
2644                            t[idx2] = a[s][r][c];
2645                            t[idx2 + 1] = a[s][r][c + 1];
2646                            t[idx3] = a[s][r][c + 2];
2647                            t[idx3 + 1] = a[s][r][c + 3];
2648                            t[idx4] = a[s][r][c + 4];
2649                            t[idx4 + 1] = a[s][r][c + 5];
2650                            t[idx5] = a[s][r][c + 6];
2651                            t[idx5 + 1] = a[s][r][c + 7];
2652                        }
2653                        fftRows.complexForward(t, 0);
2654                        fftRows.complexForward(t, 2 * rows);
2655                        fftRows.complexForward(t, 4 * rows);
2656                        fftRows.complexForward(t, 6 * rows);
2657                        for (int r = 0; r < rows; r++) {
2658                            idx2 = 2 * r;
2659                            idx3 = 2 * rows + 2 * r;
2660                            idx4 = idx3 + 2 * rows;
2661                            idx5 = idx4 + 2 * rows;
2662                            a[s][r][c] = t[idx2];
2663                            a[s][r][c + 1] = t[idx2 + 1];
2664                            a[s][r][c + 2] = t[idx3];
2665                            a[s][r][c + 3] = t[idx3 + 1];
2666                            a[s][r][c + 4] = t[idx4];
2667                            a[s][r][c + 5] = t[idx4 + 1];
2668                            a[s][r][c + 6] = t[idx5];
2669                            a[s][r][c + 7] = t[idx5 + 1];
2670                        }
2671                    }
2672                } else if (columns == 4) {
2673                    for (int r = 0; r < rows; r++) {
2674                        idx2 = 2 * r;
2675                        idx3 = 2 * rows + 2 * r;
2676                        t[idx2] = a[s][r][0];
2677                        t[idx2 + 1] = a[s][r][1];
2678                        t[idx3] = a[s][r][2];
2679                        t[idx3 + 1] = a[s][r][3];
2680                    }
2681                    fftRows.complexForward(t, 0);
2682                    fftRows.complexForward(t, 2 * rows);
2683                    for (int r = 0; r < rows; r++) {
2684                        idx2 = 2 * r;
2685                        idx3 = 2 * rows + 2 * r;
2686                        a[s][r][0] = t[idx2];
2687                        a[s][r][1] = t[idx2 + 1];
2688                        a[s][r][2] = t[idx3];
2689                        a[s][r][3] = t[idx3 + 1];
2690                    }
2691                } else if (columns == 2) {
2692                    for (int r = 0; r < rows; r++) {
2693                        idx2 = 2 * r;
2694                        t[idx2] = a[s][r][0];
2695                        t[idx2 + 1] = a[s][r][1];
2696                    }
2697                    fftRows.complexForward(t, 0);
2698                    for (int r = 0; r < rows; r++) {
2699                        idx2 = 2 * r;
2700                        a[s][r][0] = t[idx2];
2701                        a[s][r][1] = t[idx2 + 1];
2702                    }
2703                }
2704            }
2705        } else {
2706            for (int s = 0; s < slices; s++) {
2707                if (icr == 0) {
2708                    for (int r = 0; r < rows; r++) {
2709                        fftColumns.complexInverse(a[s][r], scale);
2710                    }
2711                }
2712                if (columns > 4) {
2713                    for (int c = 0; c < columns; c += 8) {
2714                        for (int r = 0; r < rows; r++) {
2715                            idx2 = 2 * r;
2716                            idx3 = 2 * rows + 2 * r;
2717                            idx4 = idx3 + 2 * rows;
2718                            idx5 = idx4 + 2 * rows;
2719                            t[idx2] = a[s][r][c];
2720                            t[idx2 + 1] = a[s][r][c + 1];
2721                            t[idx3] = a[s][r][c + 2];
2722                            t[idx3 + 1] = a[s][r][c + 3];
2723                            t[idx4] = a[s][r][c + 4];
2724                            t[idx4 + 1] = a[s][r][c + 5];
2725                            t[idx5] = a[s][r][c + 6];
2726                            t[idx5 + 1] = a[s][r][c + 7];
2727                        }
2728                        fftRows.complexInverse(t, 0, scale);
2729                        fftRows.complexInverse(t, 2 * rows, scale);
2730                        fftRows.complexInverse(t, 4 * rows, scale);
2731                        fftRows.complexInverse(t, 6 * rows, scale);
2732                        for (int r = 0; r < rows; r++) {
2733                            idx2 = 2 * r;
2734                            idx3 = 2 * rows + 2 * r;
2735                            idx4 = idx3 + 2 * rows;
2736                            idx5 = idx4 + 2 * rows;
2737                            a[s][r][c] = t[idx2];
2738                            a[s][r][c + 1] = t[idx2 + 1];
2739                            a[s][r][c + 2] = t[idx3];
2740                            a[s][r][c + 3] = t[idx3 + 1];
2741                            a[s][r][c + 4] = t[idx4];
2742                            a[s][r][c + 5] = t[idx4 + 1];
2743                            a[s][r][c + 6] = t[idx5];
2744                            a[s][r][c + 7] = t[idx5 + 1];
2745                        }
2746                    }
2747                } else if (columns == 4) {
2748                    for (int r = 0; r < rows; r++) {
2749                        idx2 = 2 * r;
2750                        idx3 = 2 * rows + 2 * r;
2751                        t[idx2] = a[s][r][0];
2752                        t[idx2 + 1] = a[s][r][1];
2753                        t[idx3] = a[s][r][2];
2754                        t[idx3 + 1] = a[s][r][3];
2755                    }
2756                    fftRows.complexInverse(t, 0, scale);
2757                    fftRows.complexInverse(t, 2 * rows, scale);
2758                    for (int r = 0; r < rows; r++) {
2759                        idx2 = 2 * r;
2760                        idx3 = 2 * rows + 2 * r;
2761                        a[s][r][0] = t[idx2];
2762                        a[s][r][1] = t[idx2 + 1];
2763                        a[s][r][2] = t[idx3];
2764                        a[s][r][3] = t[idx3 + 1];
2765                    }
2766                } else if (columns == 2) {
2767                    for (int r = 0; r < rows; r++) {
2768                        idx2 = 2 * r;
2769                        t[idx2] = a[s][r][0];
2770                        t[idx2 + 1] = a[s][r][1];
2771                    }
2772                    fftRows.complexInverse(t, 0, scale);
2773                    for (int r = 0; r < rows; r++) {
2774                        idx2 = 2 * r;
2775                        a[s][r][0] = t[idx2];
2776                        a[s][r][1] = t[idx2 + 1];
2777                    }
2778                }
2779                if (icr != 0) {
2780                    for (int r = 0; r < rows; r++) {
2781                        fftColumns.realInverse(a[s][r], scale);
2782                    }
2783                }
2784            }
2785        }
2786    }
2787
2788    private void xdft3da_sub2(int icr, int isgn, double[][][] a, boolean scale) {
2789        int idx2, idx3, idx4, idx5;
2790
2791        if (isgn == -1) {
2792            for (int s = 0; s < slices; s++) {
2793                if (icr == 0) {
2794                    for (int r = 0; r < rows; r++) {
2795                        fftColumns.complexForward(a[s][r]);
2796                    }
2797                } else {
2798                    for (int r = 0; r < rows; r++) {
2799                        fftColumns.realForward(a[s][r]);
2800                    }
2801                }
2802                if (columns > 4) {
2803                    for (int c = 0; c < columns; c += 8) {
2804                        for (int r = 0; r < rows; r++) {
2805                            idx2 = 2 * r;
2806                            idx3 = 2 * rows + 2 * r;
2807                            idx4 = idx3 + 2 * rows;
2808                            idx5 = idx4 + 2 * rows;
2809                            t[idx2] = a[s][r][c];
2810                            t[idx2 + 1] = a[s][r][c + 1];
2811                            t[idx3] = a[s][r][c + 2];
2812                            t[idx3 + 1] = a[s][r][c + 3];
2813                            t[idx4] = a[s][r][c + 4];
2814                            t[idx4 + 1] = a[s][r][c + 5];
2815                            t[idx5] = a[s][r][c + 6];
2816                            t[idx5 + 1] = a[s][r][c + 7];
2817                        }
2818                        fftRows.complexForward(t, 0);
2819                        fftRows.complexForward(t, 2 * rows);
2820                        fftRows.complexForward(t, 4 * rows);
2821                        fftRows.complexForward(t, 6 * rows);
2822                        for (int r = 0; r < rows; r++) {
2823                            idx2 = 2 * r;
2824                            idx3 = 2 * rows + 2 * r;
2825                            idx4 = idx3 + 2 * rows;
2826                            idx5 = idx4 + 2 * rows;
2827                            a[s][r][c] = t[idx2];
2828                            a[s][r][c + 1] = t[idx2 + 1];
2829                            a[s][r][c + 2] = t[idx3];
2830                            a[s][r][c + 3] = t[idx3 + 1];
2831                            a[s][r][c + 4] = t[idx4];
2832                            a[s][r][c + 5] = t[idx4 + 1];
2833                            a[s][r][c + 6] = t[idx5];
2834                            a[s][r][c + 7] = t[idx5 + 1];
2835                        }
2836                    }
2837                } else if (columns == 4) {
2838                    for (int r = 0; r < rows; r++) {
2839                        idx2 = 2 * r;
2840                        idx3 = 2 * rows + 2 * r;
2841                        t[idx2] = a[s][r][0];
2842                        t[idx2 + 1] = a[s][r][1];
2843                        t[idx3] = a[s][r][2];
2844                        t[idx3 + 1] = a[s][r][3];
2845                    }
2846                    fftRows.complexForward(t, 0);
2847                    fftRows.complexForward(t, 2 * rows);
2848                    for (int r = 0; r < rows; r++) {
2849                        idx2 = 2 * r;
2850                        idx3 = 2 * rows + 2 * r;
2851                        a[s][r][0] = t[idx2];
2852                        a[s][r][1] = t[idx2 + 1];
2853                        a[s][r][2] = t[idx3];
2854                        a[s][r][3] = t[idx3 + 1];
2855                    }
2856                } else if (columns == 2) {
2857                    for (int r = 0; r < rows; r++) {
2858                        idx2 = 2 * r;
2859                        t[idx2] = a[s][r][0];
2860                        t[idx2 + 1] = a[s][r][1];
2861                    }
2862                    fftRows.complexForward(t, 0);
2863                    for (int r = 0; r < rows; r++) {
2864                        idx2 = 2 * r;
2865                        a[s][r][0] = t[idx2];
2866                        a[s][r][1] = t[idx2 + 1];
2867                    }
2868                }
2869            }
2870        } else {
2871            for (int s = 0; s < slices; s++) {
2872                if (icr == 0) {
2873                    for (int r = 0; r < rows; r++) {
2874                        fftColumns.complexInverse(a[s][r], scale);
2875                    }
2876                } else {
2877                    for (int r = 0; r < rows; r++) {
2878                        fftColumns.realInverse2(a[s][r], 0, scale);
2879                    }
2880                }
2881                if (columns > 4) {
2882                    for (int c = 0; c < columns; c += 8) {
2883                        for (int r = 0; r < rows; r++) {
2884                            idx2 = 2 * r;
2885                            idx3 = 2 * rows + 2 * r;
2886                            idx4 = idx3 + 2 * rows;
2887                            idx5 = idx4 + 2 * rows;
2888                            t[idx2] = a[s][r][c];
2889                            t[idx2 + 1] = a[s][r][c + 1];
2890                            t[idx3] = a[s][r][c + 2];
2891                            t[idx3 + 1] = a[s][r][c + 3];
2892                            t[idx4] = a[s][r][c + 4];
2893                            t[idx4 + 1] = a[s][r][c + 5];
2894                            t[idx5] = a[s][r][c + 6];
2895                            t[idx5 + 1] = a[s][r][c + 7];
2896                        }
2897                        fftRows.complexInverse(t, 0, scale);
2898                        fftRows.complexInverse(t, 2 * rows, scale);
2899                        fftRows.complexInverse(t, 4 * rows, scale);
2900                        fftRows.complexInverse(t, 6 * rows, scale);
2901                        for (int r = 0; r < rows; r++) {
2902                            idx2 = 2 * r;
2903                            idx3 = 2 * rows + 2 * r;
2904                            idx4 = idx3 + 2 * rows;
2905                            idx5 = idx4 + 2 * rows;
2906                            a[s][r][c] = t[idx2];
2907                            a[s][r][c + 1] = t[idx2 + 1];
2908                            a[s][r][c + 2] = t[idx3];
2909                            a[s][r][c + 3] = t[idx3 + 1];
2910                            a[s][r][c + 4] = t[idx4];
2911                            a[s][r][c + 5] = t[idx4 + 1];
2912                            a[s][r][c + 6] = t[idx5];
2913                            a[s][r][c + 7] = t[idx5 + 1];
2914                        }
2915                    }
2916                } else if (columns == 4) {
2917                    for (int r = 0; r < rows; r++) {
2918                        idx2 = 2 * r;
2919                        idx3 = 2 * rows + 2 * r;
2920                        t[idx2] = a[s][r][0];
2921                        t[idx2 + 1] = a[s][r][1];
2922                        t[idx3] = a[s][r][2];
2923                        t[idx3 + 1] = a[s][r][3];
2924                    }
2925                    fftRows.complexInverse(t, 0, scale);
2926                    fftRows.complexInverse(t, 2 * rows, scale);
2927                    for (int r = 0; r < rows; r++) {
2928                        idx2 = 2 * r;
2929                        idx3 = 2 * rows + 2 * r;
2930                        a[s][r][0] = t[idx2];
2931                        a[s][r][1] = t[idx2 + 1];
2932                        a[s][r][2] = t[idx3];
2933                        a[s][r][3] = t[idx3 + 1];
2934                    }
2935                } else if (columns == 2) {
2936                    for (int r = 0; r < rows; r++) {
2937                        idx2 = 2 * r;
2938                        t[idx2] = a[s][r][0];
2939                        t[idx2 + 1] = a[s][r][1];
2940                    }
2941                    fftRows.complexInverse(t, 0, scale);
2942                    for (int r = 0; r < rows; r++) {
2943                        idx2 = 2 * r;
2944                        a[s][r][0] = t[idx2];
2945                        a[s][r][1] = t[idx2 + 1];
2946                    }
2947                }
2948            }
2949        }
2950    }
2951
2952    private void cdft3db_sub(int isgn, double[] a, boolean scale) {
2953        int idx0, idx1, idx2, idx3, idx4, idx5;
2954
2955        if (isgn == -1) {
2956            if (columns > 4) {
2957                for (int r = 0; r < rows; r++) {
2958                    idx0 = r * rowStride;
2959                    for (int c = 0; c < columns; c += 8) {
2960                        for (int s = 0; s < slices; s++) {
2961                            idx1 = s * sliceStride + idx0 + c;
2962                            idx2 = 2 * s;
2963                            idx3 = 2 * slices + 2 * s;
2964                            idx4 = idx3 + 2 * slices;
2965                            idx5 = idx4 + 2 * slices;
2966                            t[idx2] = a[idx1];
2967                            t[idx2 + 1] = a[idx1 + 1];
2968                            t[idx3] = a[idx1 + 2];
2969                            t[idx3 + 1] = a[idx1 + 3];
2970                            t[idx4] = a[idx1 + 4];
2971                            t[idx4 + 1] = a[idx1 + 5];
2972                            t[idx5] = a[idx1 + 6];
2973                            t[idx5 + 1] = a[idx1 + 7];
2974                        }
2975                        fftSlices.complexForward(t, 0);
2976                        fftSlices.complexForward(t, 2 * slices);
2977                        fftSlices.complexForward(t, 4 * slices);
2978                        fftSlices.complexForward(t, 6 * slices);
2979                        for (int s = 0; s < slices; s++) {
2980                            idx1 = s * sliceStride + idx0 + c;
2981                            idx2 = 2 * s;
2982                            idx3 = 2 * slices + 2 * s;
2983                            idx4 = idx3 + 2 * slices;
2984                            idx5 = idx4 + 2 * slices;
2985                            a[idx1] = t[idx2];
2986                            a[idx1 + 1] = t[idx2 + 1];
2987                            a[idx1 + 2] = t[idx3];
2988                            a[idx1 + 3] = t[idx3 + 1];
2989                            a[idx1 + 4] = t[idx4];
2990                            a[idx1 + 5] = t[idx4 + 1];
2991                            a[idx1 + 6] = t[idx5];
2992                            a[idx1 + 7] = t[idx5 + 1];
2993                        }
2994                    }
2995                }
2996            } else if (columns == 4) {
2997                for (int r = 0; r < rows; r++) {
2998                    idx0 = r * rowStride;
2999                    for (int s = 0; s < slices; s++) {
3000                        idx1 = s * sliceStride + idx0;
3001                        idx2 = 2 * s;
3002                        idx3 = 2 * slices + 2 * s;
3003                        t[idx2] = a[idx1];
3004                        t[idx2 + 1] = a[idx1 + 1];
3005                        t[idx3] = a[idx1 + 2];
3006                        t[idx3 + 1] = a[idx1 + 3];
3007                    }
3008                    fftSlices.complexForward(t, 0);
3009                    fftSlices.complexForward(t, 2 * slices);
3010                    for (int s = 0; s < slices; s++) {
3011                        idx1 = s * sliceStride + idx0;
3012                        idx2 = 2 * s;
3013                        idx3 = 2 * slices + 2 * s;
3014                        a[idx1] = t[idx2];
3015                        a[idx1 + 1] = t[idx2 + 1];
3016                        a[idx1 + 2] = t[idx3];
3017                        a[idx1 + 3] = t[idx3 + 1];
3018                    }
3019                }
3020            } else if (columns == 2) {
3021                for (int r = 0; r < rows; r++) {
3022                    idx0 = r * rowStride;
3023                    for (int s = 0; s < slices; s++) {
3024                        idx1 = s * sliceStride + idx0;
3025                        idx2 = 2 * s;
3026                        t[idx2] = a[idx1];
3027                        t[idx2 + 1] = a[idx1 + 1];
3028                    }
3029                    fftSlices.complexForward(t, 0);
3030                    for (int s = 0; s < slices; s++) {
3031                        idx1 = s * sliceStride + idx0;
3032                        idx2 = 2 * s;
3033                        a[idx1] = t[idx2];
3034                        a[idx1 + 1] = t[idx2 + 1];
3035                    }
3036                }
3037            }
3038        } else {
3039            if (columns > 4) {
3040                for (int r = 0; r < rows; r++) {
3041                    idx0 = r * rowStride;
3042                    for (int c = 0; c < columns; c += 8) {
3043                        for (int s = 0; s < slices; s++) {
3044                            idx1 = s * sliceStride + idx0 + c;
3045                            idx2 = 2 * s;
3046                            idx3 = 2 * slices + 2 * s;
3047                            idx4 = idx3 + 2 * slices;
3048                            idx5 = idx4 + 2 * slices;
3049                            t[idx2] = a[idx1];
3050                            t[idx2 + 1] = a[idx1 + 1];
3051                            t[idx3] = a[idx1 + 2];
3052                            t[idx3 + 1] = a[idx1 + 3];
3053                            t[idx4] = a[idx1 + 4];
3054                            t[idx4 + 1] = a[idx1 + 5];
3055                            t[idx5] = a[idx1 + 6];
3056                            t[idx5 + 1] = a[idx1 + 7];
3057                        }
3058                        fftSlices.complexInverse(t, 0, scale);
3059                        fftSlices.complexInverse(t, 2 * slices, scale);
3060                        fftSlices.complexInverse(t, 4 * slices, scale);
3061                        fftSlices.complexInverse(t, 6 * slices, scale);
3062                        for (int s = 0; s < slices; s++) {
3063                            idx1 = s * sliceStride + idx0 + c;
3064                            idx2 = 2 * s;
3065                            idx3 = 2 * slices + 2 * s;
3066                            idx4 = idx3 + 2 * slices;
3067                            idx5 = idx4 + 2 * slices;
3068                            a[idx1] = t[idx2];
3069                            a[idx1 + 1] = t[idx2 + 1];
3070                            a[idx1 + 2] = t[idx3];
3071                            a[idx1 + 3] = t[idx3 + 1];
3072                            a[idx1 + 4] = t[idx4];
3073                            a[idx1 + 5] = t[idx4 + 1];
3074                            a[idx1 + 6] = t[idx5];
3075                            a[idx1 + 7] = t[idx5 + 1];
3076                        }
3077                    }
3078                }
3079            } else if (columns == 4) {
3080                for (int r = 0; r < rows; r++) {
3081                    idx0 = r * rowStride;
3082                    for (int s = 0; s < slices; s++) {
3083                        idx1 = s * sliceStride + idx0;
3084                        idx2 = 2 * s;
3085                        idx3 = 2 * slices + 2 * s;
3086                        t[idx2] = a[idx1];
3087                        t[idx2 + 1] = a[idx1 + 1];
3088                        t[idx3] = a[idx1 + 2];
3089                        t[idx3 + 1] = a[idx1 + 3];
3090                    }
3091                    fftSlices.complexInverse(t, 0, scale);
3092                    fftSlices.complexInverse(t, 2 * slices, scale);
3093                    for (int s = 0; s < slices; s++) {
3094                        idx1 = s * sliceStride + idx0;
3095                        idx2 = 2 * s;
3096                        idx3 = 2 * slices + 2 * s;
3097                        a[idx1] = t[idx2];
3098                        a[idx1 + 1] = t[idx2 + 1];
3099                        a[idx1 + 2] = t[idx3];
3100                        a[idx1 + 3] = t[idx3 + 1];
3101                    }
3102                }
3103            } else if (columns == 2) {
3104                for (int r = 0; r < rows; r++) {
3105                    idx0 = r * rowStride;
3106                    for (int s = 0; s < slices; s++) {
3107                        idx1 = s * sliceStride + idx0;
3108                        idx2 = 2 * s;
3109                        t[idx2] = a[idx1];
3110                        t[idx2 + 1] = a[idx1 + 1];
3111                    }
3112                    fftSlices.complexInverse(t, 0, scale);
3113                    for (int s = 0; s < slices; s++) {
3114                        idx1 = s * sliceStride + idx0;
3115                        idx2 = 2 * s;
3116                        a[idx1] = t[idx2];
3117                        a[idx1 + 1] = t[idx2 + 1];
3118                    }
3119                }
3120            }
3121        }
3122    }
3123
3124    private void cdft3db_sub(int isgn, double[][][] a, boolean scale) {
3125        int idx2, idx3, idx4, idx5;
3126
3127        if (isgn == -1) {
3128            if (columns > 4) {
3129                for (int r = 0; r < rows; r++) {
3130                    for (int c = 0; c < columns; c += 8) {
3131                        for (int s = 0; s < slices; s++) {
3132                            idx2 = 2 * s;
3133                            idx3 = 2 * slices + 2 * s;
3134                            idx4 = idx3 + 2 * slices;
3135                            idx5 = idx4 + 2 * slices;
3136                            t[idx2] = a[s][r][c];
3137                            t[idx2 + 1] = a[s][r][c + 1];
3138                            t[idx3] = a[s][r][c + 2];
3139                            t[idx3 + 1] = a[s][r][c + 3];
3140                            t[idx4] = a[s][r][c + 4];
3141                            t[idx4 + 1] = a[s][r][c + 5];
3142                            t[idx5] = a[s][r][c + 6];
3143                            t[idx5 + 1] = a[s][r][c + 7];
3144                        }
3145                        fftSlices.complexForward(t, 0);
3146                        fftSlices.complexForward(t, 2 * slices);
3147                        fftSlices.complexForward(t, 4 * slices);
3148                        fftSlices.complexForward(t, 6 * slices);
3149                        for (int s = 0; s < slices; s++) {
3150                            idx2 = 2 * s;
3151                            idx3 = 2 * slices + 2 * s;
3152                            idx4 = idx3 + 2 * slices;
3153                            idx5 = idx4 + 2 * slices;
3154                            a[s][r][c] = t[idx2];
3155                            a[s][r][c + 1] = t[idx2 + 1];
3156                            a[s][r][c + 2] = t[idx3];
3157                            a[s][r][c + 3] = t[idx3 + 1];
3158                            a[s][r][c + 4] = t[idx4];
3159                            a[s][r][c + 5] = t[idx4 + 1];
3160                            a[s][r][c + 6] = t[idx5];
3161                            a[s][r][c + 7] = t[idx5 + 1];
3162                        }
3163                    }
3164                }
3165            } else if (columns == 4) {
3166                for (int r = 0; r < rows; r++) {
3167                    for (int s = 0; s < slices; s++) {
3168                        idx2 = 2 * s;
3169                        idx3 = 2 * slices + 2 * s;
3170                        t[idx2] = a[s][r][0];
3171                        t[idx2 + 1] = a[s][r][1];
3172                        t[idx3] = a[s][r][2];
3173                        t[idx3 + 1] = a[s][r][3];
3174                    }
3175                    fftSlices.complexForward(t, 0);
3176                    fftSlices.complexForward(t, 2 * slices);
3177                    for (int s = 0; s < slices; s++) {
3178                        idx2 = 2 * s;
3179                        idx3 = 2 * slices + 2 * s;
3180                        a[s][r][0] = t[idx2];
3181                        a[s][r][1] = t[idx2 + 1];
3182                        a[s][r][2] = t[idx3];
3183                        a[s][r][3] = t[idx3 + 1];
3184                    }
3185                }
3186            } else if (columns == 2) {
3187                for (int r = 0; r < rows; r++) {
3188                    for (int s = 0; s < slices; s++) {
3189                        idx2 = 2 * s;
3190                        t[idx2] = a[s][r][0];
3191                        t[idx2 + 1] = a[s][r][1];
3192                    }
3193                    fftSlices.complexForward(t, 0);
3194                    for (int s = 0; s < slices; s++) {
3195                        idx2 = 2 * s;
3196                        a[s][r][0] = t[idx2];
3197                        a[s][r][1] = t[idx2 + 1];
3198                    }
3199                }
3200            }
3201        } else {
3202            if (columns > 4) {
3203                for (int r = 0; r < rows; r++) {
3204                    for (int c = 0; c < columns; c += 8) {
3205                        for (int s = 0; s < slices; s++) {
3206                            idx2 = 2 * s;
3207                            idx3 = 2 * slices + 2 * s;
3208                            idx4 = idx3 + 2 * slices;
3209                            idx5 = idx4 + 2 * slices;
3210                            t[idx2] = a[s][r][c];
3211                            t[idx2 + 1] = a[s][r][c + 1];
3212                            t[idx3] = a[s][r][c + 2];
3213                            t[idx3 + 1] = a[s][r][c + 3];
3214                            t[idx4] = a[s][r][c + 4];
3215                            t[idx4 + 1] = a[s][r][c + 5];
3216                            t[idx5] = a[s][r][c + 6];
3217                            t[idx5 + 1] = a[s][r][c + 7];
3218                        }
3219                        fftSlices.complexInverse(t, 0, scale);
3220                        fftSlices.complexInverse(t, 2 * slices, scale);
3221                        fftSlices.complexInverse(t, 4 * slices, scale);
3222                        fftSlices.complexInverse(t, 6 * slices, scale);
3223                        for (int s = 0; s < slices; s++) {
3224                            idx2 = 2 * s;
3225                            idx3 = 2 * slices + 2 * s;
3226                            idx4 = idx3 + 2 * slices;
3227                            idx5 = idx4 + 2 * slices;
3228                            a[s][r][c] = t[idx2];
3229                            a[s][r][c + 1] = t[idx2 + 1];
3230                            a[s][r][c + 2] = t[idx3];
3231                            a[s][r][c + 3] = t[idx3 + 1];
3232                            a[s][r][c + 4] = t[idx4];
3233                            a[s][r][c + 5] = t[idx4 + 1];
3234                            a[s][r][c + 6] = t[idx5];
3235                            a[s][r][c + 7] = t[idx5 + 1];
3236                        }
3237                    }
3238                }
3239            } else if (columns == 4) {
3240                for (int r = 0; r < rows; r++) {
3241                    for (int s = 0; s < slices; s++) {
3242                        idx2 = 2 * s;
3243                        idx3 = 2 * slices + 2 * s;
3244                        t[idx2] = a[s][r][0];
3245                        t[idx2 + 1] = a[s][r][1];
3246                        t[idx3] = a[s][r][2];
3247                        t[idx3 + 1] = a[s][r][3];
3248                    }
3249                    fftSlices.complexInverse(t, 0, scale);
3250                    fftSlices.complexInverse(t, 2 * slices, scale);
3251                    for (int s = 0; s < slices; s++) {
3252                        idx2 = 2 * s;
3253                        idx3 = 2 * slices + 2 * s;
3254                        a[s][r][0] = t[idx2];
3255                        a[s][r][1] = t[idx2 + 1];
3256                        a[s][r][2] = t[idx3];
3257                        a[s][r][3] = t[idx3 + 1];
3258                    }
3259                }
3260            } else if (columns == 2) {
3261                for (int r = 0; r < rows; r++) {
3262                    for (int s = 0; s < slices; s++) {
3263                        idx2 = 2 * s;
3264                        t[idx2] = a[s][r][0];
3265                        t[idx2 + 1] = a[s][r][1];
3266                    }
3267                    fftSlices.complexInverse(t, 0, scale);
3268                    for (int s = 0; s < slices; s++) {
3269                        idx2 = 2 * s;
3270                        a[s][r][0] = t[idx2];
3271                        a[s][r][1] = t[idx2 + 1];
3272                    }
3273                }
3274            }
3275        }
3276    }
3277
3278    private void xdft3da_subth1(final int icr, final int isgn, final double[] a, final boolean scale) {
3279        int nt, i;
3280        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices);
3281        nt = 8 * rows;
3282        if (columns == 4) {
3283            nt >>= 1;
3284        } else if (columns < 4) {
3285            nt >>= 2;
3286        }
3287        Future<?>[] futures = new Future[nthreads];
3288        for (i = 0; i < nthreads; i++) {
3289            final int n0 = i;
3290            final int startt = nt * i;
3291            futures[i] = ConcurrencyUtils.submit(new Runnable() {
3292                public void run() {
3293                    int idx0, idx1, idx2, idx3, idx4, idx5;
3294
3295                    if (isgn == -1) {
3296                        for (int s = n0; s < slices; s += nthreads) {
3297                            idx0 = s * sliceStride;
3298                            if (icr == 0) {
3299                                for (int r = 0; r < rows; r++) {
3300                                    fftColumns.complexForward(a, idx0 + r * rowStride);
3301                                }
3302                            } else {
3303                                for (int r = 0; r < rows; r++) {
3304                                    fftColumns.realForward(a, idx0 + r * rowStride);
3305                                }
3306                            }
3307                            if (columns > 4) {
3308                                for (int c = 0; c < columns; c += 8) {
3309                                    for (int r = 0; r < rows; r++) {
3310                                        idx1 = idx0 + r * rowStride + c;
3311                                        idx2 = startt + 2 * r;
3312                                        idx3 = startt + 2 * rows + 2 * r;
3313                                        idx4 = idx3 + 2 * rows;
3314                                        idx5 = idx4 + 2 * rows;
3315                                        t[idx2] = a[idx1];
3316                                        t[idx2 + 1] = a[idx1 + 1];
3317                                        t[idx3] = a[idx1 + 2];
3318                                        t[idx3 + 1] = a[idx1 + 3];
3319                                        t[idx4] = a[idx1 + 4];
3320                                        t[idx4 + 1] = a[idx1 + 5];
3321                                        t[idx5] = a[idx1 + 6];
3322                                        t[idx5 + 1] = a[idx1 + 7];
3323                                    }
3324                                    fftRows.complexForward(t, startt);
3325                                    fftRows.complexForward(t, startt + 2 * rows);
3326                                    fftRows.complexForward(t, startt + 4 * rows);
3327                                    fftRows.complexForward(t, startt + 6 * rows);
3328                                    for (int r = 0; r < rows; r++) {
3329                                        idx1 = idx0 + r * rowStride + c;
3330                                        idx2 = startt + 2 * r;
3331                                        idx3 = startt + 2 * rows + 2 * r;
3332                                        idx4 = idx3 + 2 * rows;
3333                                        idx5 = idx4 + 2 * rows;
3334                                        a[idx1] = t[idx2];
3335                                        a[idx1 + 1] = t[idx2 + 1];
3336                                        a[idx1 + 2] = t[idx3];
3337                                        a[idx1 + 3] = t[idx3 + 1];
3338                                        a[idx1 + 4] = t[idx4];
3339                                        a[idx1 + 5] = t[idx4 + 1];
3340                                        a[idx1 + 6] = t[idx5];
3341                                        a[idx1 + 7] = t[idx5 + 1];
3342                                    }
3343                                }
3344                            } else if (columns == 4) {
3345                                for (int r = 0; r < rows; r++) {
3346                                    idx1 = idx0 + r * rowStride;
3347                                    idx2 = startt + 2 * r;
3348                                    idx3 = startt + 2 * rows + 2 * r;
3349                                    t[idx2] = a[idx1];
3350                                    t[idx2 + 1] = a[idx1 + 1];
3351                                    t[idx3] = a[idx1 + 2];
3352                                    t[idx3 + 1] = a[idx1 + 3];
3353                                }
3354                                fftRows.complexForward(t, startt);
3355                                fftRows.complexForward(t, startt + 2 * rows);
3356                                for (int r = 0; r < rows; r++) {
3357                                    idx1 = idx0 + r * rowStride;
3358                                    idx2 = startt + 2 * r;
3359                                    idx3 = startt + 2 * rows + 2 * r;
3360                                    a[idx1] = t[idx2];
3361                                    a[idx1 + 1] = t[idx2 + 1];
3362                                    a[idx1 + 2] = t[idx3];
3363                                    a[idx1 + 3] = t[idx3 + 1];
3364                                }
3365                            } else if (columns == 2) {
3366                                for (int r = 0; r < rows; r++) {
3367                                    idx1 = idx0 + r * rowStride;
3368                                    idx2 = startt + 2 * r;
3369                                    t[idx2] = a[idx1];
3370                                    t[idx2 + 1] = a[idx1 + 1];
3371                                }
3372                                fftRows.complexForward(t, startt);
3373                                for (int r = 0; r < rows; r++) {
3374                                    idx1 = idx0 + r * rowStride;
3375                                    idx2 = startt + 2 * r;
3376                                    a[idx1] = t[idx2];
3377                                    a[idx1 + 1] = t[idx2 + 1];
3378                                }
3379                            }
3380
3381                        }
3382                    } else {
3383                        for (int s = n0; s < slices; s += nthreads) {
3384                            idx0 = s * sliceStride;
3385                            if (icr == 0) {
3386                                for (int r = 0; r < rows; r++) {
3387                                    fftColumns.complexInverse(a, idx0 + r * rowStride, scale);
3388                                }
3389                            }
3390                            if (columns > 4) {
3391                                for (int c = 0; c < columns; c += 8) {
3392                                    for (int r = 0; r < rows; r++) {
3393                                        idx1 = idx0 + r * rowStride + c;
3394                                        idx2 = startt + 2 * r;
3395                                        idx3 = startt + 2 * rows + 2 * r;
3396                                        idx4 = idx3 + 2 * rows;
3397                                        idx5 = idx4 + 2 * rows;
3398                                        t[idx2] = a[idx1];
3399                                        t[idx2 + 1] = a[idx1 + 1];
3400                                        t[idx3] = a[idx1 + 2];
3401                                        t[idx3 + 1] = a[idx1 + 3];
3402                                        t[idx4] = a[idx1 + 4];
3403                                        t[idx4 + 1] = a[idx1 + 5];
3404                                        t[idx5] = a[idx1 + 6];
3405                                        t[idx5 + 1] = a[idx1 + 7];
3406                                    }
3407                                    fftRows.complexInverse(t, startt, scale);
3408                                    fftRows.complexInverse(t, startt + 2 * rows, scale);
3409                                    fftRows.complexInverse(t, startt + 4 * rows, scale);
3410                                    fftRows.complexInverse(t, startt + 6 * rows, scale);
3411                                    for (int r = 0; r < rows; r++) {
3412                                        idx1 = idx0 + r * rowStride + c;
3413                                        idx2 = startt + 2 * r;
3414                                        idx3 = startt + 2 * rows + 2 * r;
3415                                        idx4 = idx3 + 2 * rows;
3416                                        idx5 = idx4 + 2 * rows;
3417                                        a[idx1] = t[idx2];
3418                                        a[idx1 + 1] = t[idx2 + 1];
3419                                        a[idx1 + 2] = t[idx3];
3420                                        a[idx1 + 3] = t[idx3 + 1];
3421                                        a[idx1 + 4] = t[idx4];
3422                                        a[idx1 + 5] = t[idx4 + 1];
3423                                        a[idx1 + 6] = t[idx5];
3424                                        a[idx1 + 7] = t[idx5 + 1];
3425                                    }
3426                                }
3427                            } else if (columns == 4) {
3428                                for (int r = 0; r < rows; r++) {
3429                                    idx1 = idx0 + r * rowStride;
3430                                    idx2 = startt + 2 * r;
3431                                    idx3 = startt + 2 * rows + 2 * r;
3432                                    t[idx2] = a[idx1];
3433                                    t[idx2 + 1] = a[idx1 + 1];
3434                                    t[idx3] = a[idx1 + 2];
3435                                    t[idx3 + 1] = a[idx1 + 3];
3436                                }
3437                                fftRows.complexInverse(t, startt, scale);
3438                                fftRows.complexInverse(t, startt + 2 * rows, scale);
3439                                for (int r = 0; r < rows; r++) {
3440                                    idx1 = idx0 + r * rowStride;
3441                                    idx2 = startt + 2 * r;
3442                                    idx3 = startt + 2 * rows + 2 * r;
3443                                    a[idx1] = t[idx2];
3444                                    a[idx1 + 1] = t[idx2 + 1];
3445                                    a[idx1 + 2] = t[idx3];
3446                                    a[idx1 + 3] = t[idx3 + 1];
3447                                }
3448                            } else if (columns == 2) {
3449                                for (int r = 0; r < rows; r++) {
3450                                    idx1 = idx0 + r * rowStride;
3451                                    idx2 = startt + 2 * r;
3452                                    t[idx2] = a[idx1];
3453                                    t[idx2 + 1] = a[idx1 + 1];
3454                                }
3455                                fftRows.complexInverse(t, startt, scale);
3456                                for (int r = 0; r < rows; r++) {
3457                                    idx1 = idx0 + r * rowStride;
3458                                    idx2 = startt + 2 * r;
3459                                    a[idx1] = t[idx2];
3460                                    a[idx1 + 1] = t[idx2 + 1];
3461                                }
3462                            }
3463                            if (icr != 0) {
3464                                for (int r = 0; r < rows; r++) {
3465                                    fftColumns.realInverse(a, idx0 + r
3466                                            * rowStride, scale);
3467                                }
3468                            }
3469                        }
3470                    }
3471                }
3472            });
3473        }
3474        ConcurrencyUtils.waitForCompletion(futures);
3475    }
3476
3477    private void xdft3da_subth2(final int icr, final int isgn, final double[] a, final boolean scale) {
3478        int nt, i;
3479
3480        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices);
3481        nt = 8 * rows;
3482        if (columns == 4) {
3483            nt >>= 1;
3484        } else if (columns < 4) {
3485            nt >>= 2;
3486        }
3487        Future<?>[] futures = new Future[nthreads];
3488        for (i = 0; i < nthreads; i++) {
3489            final int n0 = i;
3490            final int startt = nt * i;
3491            futures[i] = ConcurrencyUtils.submit(new Runnable() {
3492                public void run() {
3493                    int idx0, idx1, idx2, idx3, idx4, idx5;
3494
3495                    if (isgn == -1) {
3496                        for (int s = n0; s < slices; s += nthreads) {
3497                            idx0 = s * sliceStride;
3498                            if (icr == 0) {
3499                                for (int r = 0; r < rows; r++) {
3500                                    fftColumns.complexForward(a, idx0 + r * rowStride);
3501                                }
3502                            } else {
3503                                for (int r = 0; r < rows; r++) {
3504                                    fftColumns.realForward(a, idx0 + r * rowStride);
3505                                }
3506                            }
3507                            if (columns > 4) {
3508                                for (int c = 0; c < columns; c += 8) {
3509                                    for (int r = 0; r < rows; r++) {
3510                                        idx1 = idx0 + r * rowStride + c;
3511                                        idx2 = startt + 2 * r;
3512                                        idx3 = startt + 2 * rows + 2 * r;
3513                                        idx4 = idx3 + 2 * rows;
3514                                        idx5 = idx4 + 2 * rows;
3515                                        t[idx2] = a[idx1];
3516                                        t[idx2 + 1] = a[idx1 + 1];
3517                                        t[idx3] = a[idx1 + 2];
3518                                        t[idx3 + 1] = a[idx1 + 3];
3519                                        t[idx4] = a[idx1 + 4];
3520                                        t[idx4 + 1] = a[idx1 + 5];
3521                                        t[idx5] = a[idx1 + 6];
3522                                        t[idx5 + 1] = a[idx1 + 7];
3523                                    }
3524                                    fftRows.complexForward(t, startt);
3525                                    fftRows.complexForward(t, startt + 2 * rows);
3526                                    fftRows.complexForward(t, startt + 4 * rows);
3527                                    fftRows.complexForward(t, startt + 6 * rows);
3528                                    for (int r = 0; r < rows; r++) {
3529                                        idx1 = idx0 + r * rowStride + c;
3530                                        idx2 = startt + 2 * r;
3531                                        idx3 = startt + 2 * rows + 2 * r;
3532                                        idx4 = idx3 + 2 * rows;
3533                                        idx5 = idx4 + 2 * rows;
3534                                        a[idx1] = t[idx2];
3535                                        a[idx1 + 1] = t[idx2 + 1];
3536                                        a[idx1 + 2] = t[idx3];
3537                                        a[idx1 + 3] = t[idx3 + 1];
3538                                        a[idx1 + 4] = t[idx4];
3539                                        a[idx1 + 5] = t[idx4 + 1];
3540                                        a[idx1 + 6] = t[idx5];
3541                                        a[idx1 + 7] = t[idx5 + 1];
3542                                    }
3543                                }
3544                            } else if (columns == 4) {
3545                                for (int r = 0; r < rows; r++) {
3546                                    idx1 = idx0 + r * rowStride;
3547                                    idx2 = startt + 2 * r;
3548                                    idx3 = startt + 2 * rows + 2 * r;
3549                                    t[idx2] = a[idx1];
3550                                    t[idx2 + 1] = a[idx1 + 1];
3551                                    t[idx3] = a[idx1 + 2];
3552                                    t[idx3 + 1] = a[idx1 + 3];
3553                                }
3554                                fftRows.complexForward(t, startt);
3555                                fftRows.complexForward(t, startt + 2 * rows);
3556                                for (int r = 0; r < rows; r++) {
3557                                    idx1 = idx0 + r * rowStride;
3558                                    idx2 = startt + 2 * r;
3559                                    idx3 = startt + 2 * rows + 2 * r;
3560                                    a[idx1] = t[idx2];
3561                                    a[idx1 + 1] = t[idx2 + 1];
3562                                    a[idx1 + 2] = t[idx3];
3563                                    a[idx1 + 3] = t[idx3 + 1];
3564                                }
3565                            } else if (columns == 2) {
3566                                for (int r = 0; r < rows; r++) {
3567                                    idx1 = idx0 + r * rowStride;
3568                                    idx2 = startt + 2 * r;
3569                                    t[idx2] = a[idx1];
3570                                    t[idx2 + 1] = a[idx1 + 1];
3571                                }
3572                                fftRows.complexForward(t, startt);
3573                                for (int r = 0; r < rows; r++) {
3574                                    idx1 = idx0 + r * rowStride;
3575                                    idx2 = startt + 2 * r;
3576                                    a[idx1] = t[idx2];
3577                                    a[idx1 + 1] = t[idx2 + 1];
3578                                }
3579                            }
3580
3581                        }
3582                    } else {
3583                        for (int s = n0; s < slices; s += nthreads) {
3584                            idx0 = s * sliceStride;
3585                            if (icr == 0) {
3586                                for (int r = 0; r < rows; r++) {
3587                                    fftColumns.complexInverse(a, idx0 + r * rowStride, scale);
3588                                }
3589                            } else {
3590                                for (int r = 0; r < rows; r++) {
3591                                    fftColumns.realInverse2(a, idx0 + r * rowStride, scale);
3592                                }
3593                            }
3594                            if (columns > 4) {
3595                                for (int c = 0; c < columns; c += 8) {
3596                                    for (int r = 0; r < rows; r++) {
3597                                        idx1 = idx0 + r * rowStride + c;
3598                                        idx2 = startt + 2 * r;
3599                                        idx3 = startt + 2 * rows + 2 * r;
3600                                        idx4 = idx3 + 2 * rows;
3601                                        idx5 = idx4 + 2 * rows;
3602                                        t[idx2] = a[idx1];
3603                                        t[idx2 + 1] = a[idx1 + 1];
3604                                        t[idx3] = a[idx1 + 2];
3605                                        t[idx3 + 1] = a[idx1 + 3];
3606                                        t[idx4] = a[idx1 + 4];
3607                                        t[idx4 + 1] = a[idx1 + 5];
3608                                        t[idx5] = a[idx1 + 6];
3609                                        t[idx5 + 1] = a[idx1 + 7];
3610                                    }
3611                                    fftRows.complexInverse(t, startt, scale);
3612                                    fftRows.complexInverse(t, startt + 2 * rows, scale);
3613                                    fftRows.complexInverse(t, startt + 4 * rows, scale);
3614                                    fftRows.complexInverse(t, startt + 6 * rows, scale);
3615                                    for (int r = 0; r < rows; r++) {
3616                                        idx1 = idx0 + r * rowStride + c;
3617                                        idx2 = startt + 2 * r;
3618                                        idx3 = startt + 2 * rows + 2 * r;
3619                                        idx4 = idx3 + 2 * rows;
3620                                        idx5 = idx4 + 2 * rows;
3621                                        a[idx1] = t[idx2];
3622                                        a[idx1 + 1] = t[idx2 + 1];
3623                                        a[idx1 + 2] = t[idx3];
3624                                        a[idx1 + 3] = t[idx3 + 1];
3625                                        a[idx1 + 4] = t[idx4];
3626                                        a[idx1 + 5] = t[idx4 + 1];
3627                                        a[idx1 + 6] = t[idx5];
3628                                        a[idx1 + 7] = t[idx5 + 1];
3629                                    }
3630                                }
3631                            } else if (columns == 4) {
3632                                for (int r = 0; r < rows; r++) {
3633                                    idx1 = idx0 + r * rowStride;
3634                                    idx2 = startt + 2 * r;
3635                                    idx3 = startt + 2 * rows + 2 * r;
3636                                    t[idx2] = a[idx1];
3637                                    t[idx2 + 1] = a[idx1 + 1];
3638                                    t[idx3] = a[idx1 + 2];
3639                                    t[idx3 + 1] = a[idx1 + 3];
3640                                }
3641                                fftRows.complexInverse(t, startt, scale);
3642                                fftRows.complexInverse(t, startt + 2 * rows, scale);
3643                                for (int r = 0; r < rows; r++) {
3644                                    idx1 = idx0 + r * rowStride;
3645                                    idx2 = startt + 2 * r;
3646                                    idx3 = startt + 2 * rows + 2 * r;
3647                                    a[idx1] = t[idx2];
3648                                    a[idx1 + 1] = t[idx2 + 1];
3649                                    a[idx1 + 2] = t[idx3];
3650                                    a[idx1 + 3] = t[idx3 + 1];
3651                                }
3652                            } else if (columns == 2) {
3653                                for (int r = 0; r < rows; r++) {
3654                                    idx1 = idx0 + r * rowStride;
3655                                    idx2 = startt + 2 * r;
3656                                    t[idx2] = a[idx1];
3657                                    t[idx2 + 1] = a[idx1 + 1];
3658                                }
3659                                fftRows.complexInverse(t, startt, scale);
3660                                for (int r = 0; r < rows; r++) {
3661                                    idx1 = idx0 + r * rowStride;
3662                                    idx2 = startt + 2 * r;
3663                                    a[idx1] = t[idx2];
3664                                    a[idx1 + 1] = t[idx2 + 1];
3665                                }
3666                            }
3667                        }
3668                    }
3669                }
3670            });
3671        }
3672        ConcurrencyUtils.waitForCompletion(futures);
3673    }
3674
3675    private void xdft3da_subth1(final int icr, final int isgn, final double[][][] a, final boolean scale) {
3676        int nt, i;
3677
3678        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices);
3679        nt = 8 * rows;
3680        if (columns == 4) {
3681            nt >>= 1;
3682        } else if (columns < 4) {
3683            nt >>= 2;
3684        }
3685        Future<?>[] futures = new Future[nthreads];
3686        for (i = 0; i < nthreads; i++) {
3687            final int n0 = i;
3688            final int startt = nt * i;
3689            futures[i] = ConcurrencyUtils.submit(new Runnable() {
3690                public void run() {
3691                    int idx2, idx3, idx4, idx5;
3692
3693                    if (isgn == -1) {
3694                        for (int s = n0; s < slices; s += nthreads) {
3695                            if (icr == 0) {
3696                                for (int r = 0; r < rows; r++) {
3697                                    fftColumns.complexForward(a[s][r]);
3698                                }
3699                            } else {
3700                                for (int r = 0; r < rows; r++) {
3701                                    fftColumns.realForward(a[s][r], 0);
3702                                }
3703                            }
3704                            if (columns > 4) {
3705                                for (int c = 0; c < columns; c += 8) {
3706                                    for (int r = 0; r < rows; r++) {
3707                                        idx2 = startt + 2 * r;
3708                                        idx3 = startt + 2 * rows + 2 * r;
3709                                        idx4 = idx3 + 2 * rows;
3710                                        idx5 = idx4 + 2 * rows;
3711                                        t[idx2] = a[s][r][c];
3712                                        t[idx2 + 1] = a[s][r][c + 1];
3713                                        t[idx3] = a[s][r][c + 2];
3714                                        t[idx3 + 1] = a[s][r][c + 3];
3715                                        t[idx4] = a[s][r][c + 4];
3716                                        t[idx4 + 1] = a[s][r][c + 5];
3717                                        t[idx5] = a[s][r][c + 6];
3718                                        t[idx5 + 1] = a[s][r][c + 7];
3719                                    }
3720                                    fftRows.complexForward(t, startt);
3721                                    fftRows.complexForward(t, startt + 2 * rows);
3722                                    fftRows.complexForward(t, startt + 4 * rows);
3723                                    fftRows.complexForward(t, startt + 6 * rows);
3724                                    for (int r = 0; r < rows; r++) {
3725                                        idx2 = startt + 2 * r;
3726                                        idx3 = startt + 2 * rows + 2 * r;
3727                                        idx4 = idx3 + 2 * rows;
3728                                        idx5 = idx4 + 2 * rows;
3729                                        a[s][r][c] = t[idx2];
3730                                        a[s][r][c + 1] = t[idx2 + 1];
3731                                        a[s][r][c + 2] = t[idx3];
3732                                        a[s][r][c + 3] = t[idx3 + 1];
3733                                        a[s][r][c + 4] = t[idx4];
3734                                        a[s][r][c + 5] = t[idx4 + 1];
3735                                        a[s][r][c + 6] = t[idx5];
3736                                        a[s][r][c + 7] = t[idx5 + 1];
3737                                    }
3738                                }
3739                            } else if (columns == 4) {
3740                                for (int r = 0; r < rows; r++) {
3741                                    idx2 = startt + 2 * r;
3742                                    idx3 = startt + 2 * rows + 2 * r;
3743                                    t[idx2] = a[s][r][0];
3744                                    t[idx2 + 1] = a[s][r][1];
3745                                    t[idx3] = a[s][r][2];
3746                                    t[idx3 + 1] = a[s][r][3];
3747                                }
3748                                fftRows.complexForward(t, startt);
3749                                fftRows.complexForward(t, startt + 2 * rows);
3750                                for (int r = 0; r < rows; r++) {
3751                                    idx2 = startt + 2 * r;
3752                                    idx3 = startt + 2 * rows + 2 * r;
3753                                    a[s][r][0] = t[idx2];
3754                                    a[s][r][1] = t[idx2 + 1];
3755                                    a[s][r][2] = t[idx3];
3756                                    a[s][r][3] = t[idx3 + 1];
3757                                }
3758                            } else if (columns == 2) {
3759                                for (int r = 0; r < rows; r++) {
3760                                    idx2 = startt + 2 * r;
3761                                    t[idx2] = a[s][r][0];
3762                                    t[idx2 + 1] = a[s][r][1];
3763                                }
3764                                fftRows.complexForward(t, startt);
3765                                for (int r = 0; r < rows; r++) {
3766                                    idx2 = startt + 2 * r;
3767                                    a[s][r][0] = t[idx2];
3768                                    a[s][r][1] = t[idx2 + 1];
3769                                }
3770                            }
3771
3772                        }
3773                    } else {
3774                        for (int s = n0; s < slices; s += nthreads) {
3775                            if (icr == 0) {
3776                                for (int r = 0; r < rows; r++) {
3777                                    fftColumns.complexInverse(a[s][r], scale);
3778                                }
3779                            }
3780                            if (columns > 4) {
3781                                for (int c = 0; c < columns; c += 8) {
3782                                    for (int r = 0; r < rows; r++) {
3783                                        idx2 = startt + 2 * r;
3784                                        idx3 = startt + 2 * rows + 2 * r;
3785                                        idx4 = idx3 + 2 * rows;
3786                                        idx5 = idx4 + 2 * rows;
3787                                        t[idx2] = a[s][r][c];
3788                                        t[idx2 + 1] = a[s][r][c + 1];
3789                                        t[idx3] = a[s][r][c + 2];
3790                                        t[idx3 + 1] = a[s][r][c + 3];
3791                                        t[idx4] = a[s][r][c + 4];
3792                                        t[idx4 + 1] = a[s][r][c + 5];
3793                                        t[idx5] = a[s][r][c + 6];
3794                                        t[idx5 + 1] = a[s][r][c + 7];
3795                                    }
3796                                    fftRows.complexInverse(t, startt, scale);
3797                                    fftRows.complexInverse(t, startt + 2 * rows, scale);
3798                                    fftRows.complexInverse(t, startt + 4 * rows, scale);
3799                                    fftRows.complexInverse(t, startt + 6 * rows, scale);
3800                                    for (int r = 0; r < rows; r++) {
3801                                        idx2 = startt + 2 * r;
3802                                        idx3 = startt + 2 * rows + 2 * r;
3803                                        idx4 = idx3 + 2 * rows;
3804                                        idx5 = idx4 + 2 * rows;
3805                                        a[s][r][c] = t[idx2];
3806                                        a[s][r][c + 1] = t[idx2 + 1];
3807                                        a[s][r][c + 2] = t[idx3];
3808                                        a[s][r][c + 3] = t[idx3 + 1];
3809                                        a[s][r][c + 4] = t[idx4];
3810                                        a[s][r][c + 5] = t[idx4 + 1];
3811                                        a[s][r][c + 6] = t[idx5];
3812                                        a[s][r][c + 7] = t[idx5 + 1];
3813                                    }
3814                                }
3815                            } else if (columns == 4) {
3816                                for (int r = 0; r < rows; r++) {
3817                                    idx2 = startt + 2 * r;
3818                                    idx3 = startt + 2 * rows + 2 * r;
3819                                    t[idx2] = a[s][r][0];
3820                                    t[idx2 + 1] = a[s][r][1];
3821                                    t[idx3] = a[s][r][2];
3822                                    t[idx3 + 1] = a[s][r][3];
3823                                }
3824                                fftRows.complexInverse(t, startt, scale);
3825                                fftRows.complexInverse(t, startt + 2 * rows, scale);
3826                                for (int r = 0; r < rows; r++) {
3827                                    idx2 = startt + 2 * r;
3828                                    idx3 = startt + 2 * rows + 2 * r;
3829                                    a[s][r][0] = t[idx2];
3830                                    a[s][r][1] = t[idx2 + 1];
3831                                    a[s][r][2] = t[idx3];
3832                                    a[s][r][3] = t[idx3 + 1];
3833                                }
3834                            } else if (columns == 2) {
3835                                for (int r = 0; r < rows; r++) {
3836                                    idx2 = startt + 2 * r;
3837                                    t[idx2] = a[s][r][0];
3838                                    t[idx2 + 1] = a[s][r][1];
3839                                }
3840                                fftRows.complexInverse(t, startt, scale);
3841                                for (int r = 0; r < rows; r++) {
3842                                    idx2 = startt + 2 * r;
3843                                    a[s][r][0] = t[idx2];
3844                                    a[s][r][1] = t[idx2 + 1];
3845                                }
3846                            }
3847                            if (icr != 0) {
3848                                for (int r = 0; r < rows; r++) {
3849                                    fftColumns.realInverse(a[s][r], scale);
3850                                }
3851                            }
3852                        }
3853                    }
3854                }
3855            });
3856        }
3857        ConcurrencyUtils.waitForCompletion(futures);
3858    }
3859
3860    private void xdft3da_subth2(final int icr, final int isgn, final double[][][] a, final boolean scale) {
3861        int nt, i;
3862
3863        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices);
3864        nt = 8 * rows;
3865        if (columns == 4) {
3866            nt >>= 1;
3867        } else if (columns < 4) {
3868            nt >>= 2;
3869        }
3870        Future<?>[] futures = new Future[nthreads];
3871        for (i = 0; i < nthreads; i++) {
3872            final int n0 = i;
3873            final int startt = nt * i;
3874            futures[i] = ConcurrencyUtils.submit(new Runnable() {
3875                public void run() {
3876                    int idx2, idx3, idx4, idx5;
3877
3878                    if (isgn == -1) {
3879                        for (int s = n0; s < slices; s += nthreads) {
3880                            if (icr == 0) {
3881                                for (int r = 0; r < rows; r++) {
3882                                    fftColumns.complexForward(a[s][r]);
3883                                }
3884                            } else {
3885                                for (int r = 0; r < rows; r++) {
3886                                    fftColumns.realForward(a[s][r]);
3887                                }
3888                            }
3889                            if (columns > 4) {
3890                                for (int c = 0; c < columns; c += 8) {
3891                                    for (int r = 0; r < rows; r++) {
3892                                        idx2 = startt + 2 * r;
3893                                        idx3 = startt + 2 * rows + 2 * r;
3894                                        idx4 = idx3 + 2 * rows;
3895                                        idx5 = idx4 + 2 * rows;
3896                                        t[idx2] = a[s][r][c];
3897                                        t[idx2 + 1] = a[s][r][c + 1];
3898                                        t[idx3] = a[s][r][c + 2];
3899                                        t[idx3 + 1] = a[s][r][c + 3];
3900                                        t[idx4] = a[s][r][c + 4];
3901                                        t[idx4 + 1] = a[s][r][c + 5];
3902                                        t[idx5] = a[s][r][c + 6];
3903                                        t[idx5 + 1] = a[s][r][c + 7];
3904                                    }
3905                                    fftRows.complexForward(t, startt);
3906                                    fftRows.complexForward(t, startt + 2 * rows);
3907                                    fftRows.complexForward(t, startt + 4 * rows);
3908                                    fftRows.complexForward(t, startt + 6 * rows);
3909                                    for (int r = 0; r < rows; r++) {
3910                                        idx2 = startt + 2 * r;
3911                                        idx3 = startt + 2 * rows + 2 * r;
3912                                        idx4 = idx3 + 2 * rows;
3913                                        idx5 = idx4 + 2 * rows;
3914                                        a[s][r][c] = t[idx2];
3915                                        a[s][r][c + 1] = t[idx2 + 1];
3916                                        a[s][r][c + 2] = t[idx3];
3917                                        a[s][r][c + 3] = t[idx3 + 1];
3918                                        a[s][r][c + 4] = t[idx4];
3919                                        a[s][r][c + 5] = t[idx4 + 1];
3920                                        a[s][r][c + 6] = t[idx5];
3921                                        a[s][r][c + 7] = t[idx5 + 1];
3922                                    }
3923                                }
3924                            } else if (columns == 4) {
3925                                for (int r = 0; r < rows; r++) {
3926                                    idx2 = startt + 2 * r;
3927                                    idx3 = startt + 2 * rows + 2 * r;
3928                                    t[idx2] = a[s][r][0];
3929                                    t[idx2 + 1] = a[s][r][1];
3930                                    t[idx3] = a[s][r][2];
3931                                    t[idx3 + 1] = a[s][r][3];
3932                                }
3933                                fftRows.complexForward(t, startt);
3934                                fftRows.complexForward(t, startt + 2 * rows);
3935                                for (int r = 0; r < rows; r++) {
3936                                    idx2 = startt + 2 * r;
3937                                    idx3 = startt + 2 * rows + 2 * r;
3938                                    a[s][r][0] = t[idx2];
3939                                    a[s][r][1] = t[idx2 + 1];
3940                                    a[s][r][2] = t[idx3];
3941                                    a[s][r][3] = t[idx3 + 1];
3942                                }
3943                            } else if (columns == 2) {
3944                                for (int r = 0; r < rows; r++) {
3945                                    idx2 = startt + 2 * r;
3946                                    t[idx2] = a[s][r][0];
3947                                    t[idx2 + 1] = a[s][r][1];
3948                                }
3949                                fftRows.complexForward(t, startt);
3950                                for (int r = 0; r < rows; r++) {
3951                                    idx2 = startt + 2 * r;
3952                                    a[s][r][0] = t[idx2];
3953                                    a[s][r][1] = t[idx2 + 1];
3954                                }
3955                            }
3956
3957                        }
3958                    } else {
3959                        for (int s = n0; s < slices; s += nthreads) {
3960                            if (icr == 0) {
3961                                for (int r = 0; r < rows; r++) {
3962                                    fftColumns.complexInverse(a[s][r], scale);
3963                                }
3964                            } else {
3965                                for (int r = 0; r < rows; r++) {
3966                                    fftColumns.realInverse2(a[s][r], 0, scale);
3967                                }
3968                            }
3969                            if (columns > 4) {
3970                                for (int c = 0; c < columns; c += 8) {
3971                                    for (int r = 0; r < rows; r++) {
3972                                        idx2 = startt + 2 * r;
3973                                        idx3 = startt + 2 * rows + 2 * r;
3974                                        idx4 = idx3 + 2 * rows;
3975                                        idx5 = idx4 + 2 * rows;
3976                                        t[idx2] = a[s][r][c];
3977                                        t[idx2 + 1] = a[s][r][c + 1];
3978                                        t[idx3] = a[s][r][c + 2];
3979                                        t[idx3 + 1] = a[s][r][c + 3];
3980                                        t[idx4] = a[s][r][c + 4];
3981                                        t[idx4 + 1] = a[s][r][c + 5];
3982                                        t[idx5] = a[s][r][c + 6];
3983                                        t[idx5 + 1] = a[s][r][c + 7];
3984                                    }
3985                                    fftRows.complexInverse(t, startt, scale);
3986                                    fftRows.complexInverse(t, startt + 2 * rows, scale);
3987                                    fftRows.complexInverse(t, startt + 4 * rows, scale);
3988                                    fftRows.complexInverse(t, startt + 6 * rows, scale);
3989                                    for (int r = 0; r < rows; r++) {
3990                                        idx2 = startt + 2 * r;
3991                                        idx3 = startt + 2 * rows + 2 * r;
3992                                        idx4 = idx3 + 2 * rows;
3993                                        idx5 = idx4 + 2 * rows;
3994                                        a[s][r][c] = t[idx2];
3995                                        a[s][r][c + 1] = t[idx2 + 1];
3996                                        a[s][r][c + 2] = t[idx3];
3997                                        a[s][r][c + 3] = t[idx3 + 1];
3998                                        a[s][r][c + 4] = t[idx4];
3999                                        a[s][r][c + 5] = t[idx4 + 1];
4000                                        a[s][r][c + 6] = t[idx5];
4001                                        a[s][r][c + 7] = t[idx5 + 1];
4002                                    }
4003                                }
4004                            } else if (columns == 4) {
4005                                for (int r = 0; r < rows; r++) {
4006                                    idx2 = startt + 2 * r;
4007                                    idx3 = startt + 2 * rows + 2 * r;
4008                                    t[idx2] = a[s][r][0];
4009                                    t[idx2 + 1] = a[s][r][1];
4010                                    t[idx3] = a[s][r][2];
4011                                    t[idx3 + 1] = a[s][r][3];
4012                                }
4013                                fftRows.complexInverse(t, startt, scale);
4014                                fftRows.complexInverse(t, startt + 2 * rows, scale);
4015                                for (int r = 0; r < rows; r++) {
4016                                    idx2 = startt + 2 * r;
4017                                    idx3 = startt + 2 * rows + 2 * r;
4018                                    a[s][r][0] = t[idx2];
4019                                    a[s][r][1] = t[idx2 + 1];
4020                                    a[s][r][2] = t[idx3];
4021                                    a[s][r][3] = t[idx3 + 1];
4022                                }
4023                            } else if (columns == 2) {
4024                                for (int r = 0; r < rows; r++) {
4025                                    idx2 = startt + 2 * r;
4026                                    t[idx2] = a[s][r][0];
4027                                    t[idx2 + 1] = a[s][r][1];
4028                                }
4029                                fftRows.complexInverse(t, startt, scale);
4030                                for (int r = 0; r < rows; r++) {
4031                                    idx2 = startt + 2 * r;
4032                                    a[s][r][0] = t[idx2];
4033                                    a[s][r][1] = t[idx2 + 1];
4034                                }
4035                            }
4036                        }
4037                    }
4038                }
4039            });
4040        }
4041        ConcurrencyUtils.waitForCompletion(futures);
4042    }
4043
4044    private void cdft3db_subth(final int isgn, final double[] a, final boolean scale) {
4045        int nt, i;
4046
4047        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), rows);
4048        nt = 8 * slices;
4049        if (columns == 4) {
4050            nt >>= 1;
4051        } else if (columns < 4) {
4052            nt >>= 2;
4053        }
4054        Future<?>[] futures = new Future[nthreads];
4055        for (i = 0; i < nthreads; i++) {
4056            final int n0 = i;
4057            final int startt = nt * i;
4058            futures[i] = ConcurrencyUtils.submit(new Runnable() {
4059
4060                public void run() {
4061                    int idx0, idx1, idx2, idx3, idx4, idx5;
4062
4063                    if (isgn == -1) {
4064                        if (columns > 4) {
4065                            for (int r = n0; r < rows; r += nthreads) {
4066                                idx0 = r * rowStride;
4067                                for (int c = 0; c < columns; c += 8) {
4068                                    for (int s = 0; s < slices; s++) {
4069                                        idx1 = s * sliceStride + idx0 + c;
4070                                        idx2 = startt + 2 * s;
4071                                        idx3 = startt + 2 * slices + 2 * s;
4072                                        idx4 = idx3 + 2 * slices;
4073                                        idx5 = idx4 + 2 * slices;
4074                                        t[idx2] = a[idx1];
4075                                        t[idx2 + 1] = a[idx1 + 1];
4076                                        t[idx3] = a[idx1 + 2];
4077                                        t[idx3 + 1] = a[idx1 + 3];
4078                                        t[idx4] = a[idx1 + 4];
4079                                        t[idx4 + 1] = a[idx1 + 5];
4080                                        t[idx5] = a[idx1 + 6];
4081                                        t[idx5 + 1] = a[idx1 + 7];
4082                                    }
4083                                    fftSlices.complexForward(t, startt);
4084                                    fftSlices.complexForward(t, startt + 2 * slices);
4085                                    fftSlices.complexForward(t, startt + 4 * slices);
4086                                    fftSlices.complexForward(t, startt + 6 * slices);
4087                                    for (int s = 0; s < slices; s++) {
4088                                        idx1 = s * sliceStride + idx0 + c;
4089                                        idx2 = startt + 2 * s;
4090                                        idx3 = startt + 2 * slices + 2 * s;
4091                                        idx4 = idx3 + 2 * slices;
4092                                        idx5 = idx4 + 2 * slices;
4093                                        a[idx1] = t[idx2];
4094                                        a[idx1 + 1] = t[idx2 + 1];
4095                                        a[idx1 + 2] = t[idx3];
4096                                        a[idx1 + 3] = t[idx3 + 1];
4097                                        a[idx1 + 4] = t[idx4];
4098                                        a[idx1 + 5] = t[idx4 + 1];
4099                                        a[idx1 + 6] = t[idx5];
4100                                        a[idx1 + 7] = t[idx5 + 1];
4101                                    }
4102                                }
4103                            }
4104                        } else if (columns == 4) {
4105                            for (int r = n0; r < rows; r += nthreads) {
4106                                idx0 = r * rowStride;
4107                                for (int s = 0; s < slices; s++) {
4108                                    idx1 = s * sliceStride + idx0;
4109                                    idx2 = startt + 2 * s;
4110                                    idx3 = startt + 2 * slices + 2 * s;
4111                                    t[idx2] = a[idx1];
4112                                    t[idx2 + 1] = a[idx1 + 1];
4113                                    t[idx3] = a[idx1 + 2];
4114                                    t[idx3 + 1] = a[idx1 + 3];
4115                                }
4116                                fftSlices.complexForward(t, startt);
4117                                fftSlices.complexForward(t, startt + 2 * slices);
4118                                for (int s = 0; s < slices; s++) {
4119                                    idx1 = s * sliceStride + idx0;
4120                                    idx2 = startt + 2 * s;
4121                                    idx3 = startt + 2 * slices + 2 * s;
4122                                    a[idx1] = t[idx2];
4123                                    a[idx1 + 1] = t[idx2 + 1];
4124                                    a[idx1 + 2] = t[idx3];
4125                                    a[idx1 + 3] = t[idx3 + 1];
4126                                }
4127                            }
4128                        } else if (columns == 2) {
4129                            for (int r = n0; r < rows; r += nthreads) {
4130                                idx0 = r * rowStride;
4131                                for (int s = 0; s < slices; s++) {
4132                                    idx1 = s * sliceStride + idx0;
4133                                    idx2 = startt + 2 * s;
4134                                    t[idx2] = a[idx1];
4135                                    t[idx2 + 1] = a[idx1 + 1];
4136                                }
4137                                fftSlices.complexForward(t, startt);
4138                                for (int s = 0; s < slices; s++) {
4139                                    idx1 = s * sliceStride + idx0;
4140                                    idx2 = startt + 2 * s;
4141                                    a[idx1] = t[idx2];
4142                                    a[idx1 + 1] = t[idx2 + 1];
4143                                }
4144                            }
4145                        }
4146                    } else {
4147                        if (columns > 4) {
4148                            for (int r = n0; r < rows; r += nthreads) {
4149                                idx0 = r * rowStride;
4150                                for (int c = 0; c < columns; c += 8) {
4151                                    for (int s = 0; s < slices; s++) {
4152                                        idx1 = s * sliceStride + idx0 + c;
4153                                        idx2 = startt + 2 * s;
4154                                        idx3 = startt + 2 * slices + 2 * s;
4155                                        idx4 = idx3 + 2 * slices;
4156                                        idx5 = idx4 + 2 * slices;
4157                                        t[idx2] = a[idx1];
4158                                        t[idx2 + 1] = a[idx1 + 1];
4159                                        t[idx3] = a[idx1 + 2];
4160                                        t[idx3 + 1] = a[idx1 + 3];
4161                                        t[idx4] = a[idx1 + 4];
4162                                        t[idx4 + 1] = a[idx1 + 5];
4163                                        t[idx5] = a[idx1 + 6];
4164                                        t[idx5 + 1] = a[idx1 + 7];
4165                                    }
4166                                    fftSlices.complexInverse(t, startt, scale);
4167                                    fftSlices.complexInverse(t, startt + 2 * slices, scale);
4168                                    fftSlices.complexInverse(t, startt + 4 * slices, scale);
4169                                    fftSlices.complexInverse(t, startt + 6 * slices, scale);
4170                                    for (int s = 0; s < slices; s++) {
4171                                        idx1 = s * sliceStride + idx0 + c;
4172                                        idx2 = startt + 2 * s;
4173                                        idx3 = startt + 2 * slices + 2 * s;
4174                                        idx4 = idx3 + 2 * slices;
4175                                        idx5 = idx4 + 2 * slices;
4176                                        a[idx1] = t[idx2];
4177                                        a[idx1 + 1] = t[idx2 + 1];
4178                                        a[idx1 + 2] = t[idx3];
4179                                        a[idx1 + 3] = t[idx3 + 1];
4180                                        a[idx1 + 4] = t[idx4];
4181                                        a[idx1 + 5] = t[idx4 + 1];
4182                                        a[idx1 + 6] = t[idx5];
4183                                        a[idx1 + 7] = t[idx5 + 1];
4184                                    }
4185                                }
4186                            }
4187                        } else if (columns == 4) {
4188                            for (int r = n0; r < rows; r += nthreads) {
4189                                idx0 = r * rowStride;
4190                                for (int s = 0; s < slices; s++) {
4191                                    idx1 = s * sliceStride + idx0;
4192                                    idx2 = startt + 2 * s;
4193                                    idx3 = startt + 2 * slices + 2 * s;
4194                                    t[idx2] = a[idx1];
4195                                    t[idx2 + 1] = a[idx1 + 1];
4196                                    t[idx3] = a[idx1 + 2];
4197                                    t[idx3 + 1] = a[idx1 + 3];
4198                                }
4199                                fftSlices.complexInverse(t, startt, scale);
4200                                fftSlices.complexInverse(t, startt + 2 * slices, scale);
4201                                for (int s = 0; s < slices; s++) {
4202                                    idx1 = s * sliceStride + idx0;
4203                                    idx2 = startt + 2 * s;
4204                                    idx3 = startt + 2 * slices + 2 * s;
4205                                    a[idx1] = t[idx2];
4206                                    a[idx1 + 1] = t[idx2 + 1];
4207                                    a[idx1 + 2] = t[idx3];
4208                                    a[idx1 + 3] = t[idx3 + 1];
4209                                }
4210                            }
4211                        } else if (columns == 2) {
4212                            for (int r = n0; r < rows; r += nthreads) {
4213                                idx0 = r * rowStride;
4214                                for (int s = 0; s < slices; s++) {
4215                                    idx1 = s * sliceStride + idx0;
4216                                    idx2 = startt + 2 * s;
4217                                    t[idx2] = a[idx1];
4218                                    t[idx2 + 1] = a[idx1 + 1];
4219                                }
4220                                fftSlices.complexInverse(t, startt, scale);
4221                                for (int s = 0; s < slices; s++) {
4222                                    idx1 = s * sliceStride + idx0;
4223                                    idx2 = startt + 2 * s;
4224                                    a[idx1] = t[idx2];
4225                                    a[idx1 + 1] = t[idx2 + 1];
4226                                }
4227                            }
4228                        }
4229                    }
4230
4231                }
4232            });
4233        }
4234        ConcurrencyUtils.waitForCompletion(futures);
4235    }
4236
4237    private void cdft3db_subth(final int isgn, final double[][][] a, final boolean scale) {
4238        int nt, i;
4239
4240        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), rows);
4241        nt = 8 * slices;
4242        if (columns == 4) {
4243            nt >>= 1;
4244        } else if (columns < 4) {
4245            nt >>= 2;
4246        }
4247        Future<?>[] futures = new Future[nthreads];
4248        for (i = 0; i < nthreads; i++) {
4249            final int n0 = i;
4250            final int startt = nt * i;
4251            futures[i] = ConcurrencyUtils.submit(new Runnable() {
4252
4253                public void run() {
4254                    int idx2, idx3, idx4, idx5;
4255
4256                    if (isgn == -1) {
4257                        if (columns > 4) {
4258                            for (int r = n0; r < rows; r += nthreads) {
4259                                for (int c = 0; c < columns; c += 8) {
4260                                    for (int s = 0; s < slices; s++) {
4261                                        idx2 = startt + 2 * s;
4262                                        idx3 = startt + 2 * slices + 2 * s;
4263                                        idx4 = idx3 + 2 * slices;
4264                                        idx5 = idx4 + 2 * slices;
4265                                        t[idx2] = a[s][r][c];
4266                                        t[idx2 + 1] = a[s][r][c + 1];
4267                                        t[idx3] = a[s][r][c + 2];
4268                                        t[idx3 + 1] = a[s][r][c + 3];
4269                                        t[idx4] = a[s][r][c + 4];
4270                                        t[idx4 + 1] = a[s][r][c + 5];
4271                                        t[idx5] = a[s][r][c + 6];
4272                                        t[idx5 + 1] = a[s][r][c + 7];
4273                                    }
4274                                    fftSlices.complexForward(t, startt);
4275                                    fftSlices.complexForward(t, startt + 2 * slices);
4276                                    fftSlices.complexForward(t, startt + 4 * slices);
4277                                    fftSlices.complexForward(t, startt + 6 * slices);
4278                                    for (int s = 0; s < slices; s++) {
4279                                        idx2 = startt + 2 * s;
4280                                        idx3 = startt + 2 * slices + 2 * s;
4281                                        idx4 = idx3 + 2 * slices;
4282                                        idx5 = idx4 + 2 * slices;
4283                                        a[s][r][c] = t[idx2];
4284                                        a[s][r][c + 1] = t[idx2 + 1];
4285                                        a[s][r][c + 2] = t[idx3];
4286                                        a[s][r][c + 3] = t[idx3 + 1];
4287                                        a[s][r][c + 4] = t[idx4];
4288                                        a[s][r][c + 5] = t[idx4 + 1];
4289                                        a[s][r][c + 6] = t[idx5];
4290                                        a[s][r][c + 7] = t[idx5 + 1];
4291                                    }
4292                                }
4293                            }
4294                        } else if (columns == 4) {
4295                            for (int r = n0; r < rows; r += nthreads) {
4296                                for (int s = 0; s < slices; s++) {
4297                                    idx2 = startt + 2 * s;
4298                                    idx3 = startt + 2 * slices + 2 * s;
4299                                    t[idx2] = a[s][r][0];
4300                                    t[idx2 + 1] = a[s][r][1];
4301                                    t[idx3] = a[s][r][2];
4302                                    t[idx3 + 1] = a[s][r][3];
4303                                }
4304                                fftSlices.complexForward(t, startt);
4305                                fftSlices.complexForward(t, startt + 2 * slices);
4306                                for (int s = 0; s < slices; s++) {
4307                                    idx2 = startt + 2 * s;
4308                                    idx3 = startt + 2 * slices + 2 * s;
4309                                    a[s][r][0] = t[idx2];
4310                                    a[s][r][1] = t[idx2 + 1];
4311                                    a[s][r][2] = t[idx3];
4312                                    a[s][r][3] = t[idx3 + 1];
4313                                }
4314                            }
4315                        } else if (columns == 2) {
4316                            for (int r = n0; r < rows; r += nthreads) {
4317                                for (int s = 0; s < slices; s++) {
4318                                    idx2 = startt + 2 * s;
4319                                    t[idx2] = a[s][r][0];
4320                                    t[idx2 + 1] = a[s][r][1];
4321                                }
4322                                fftSlices.complexForward(t, startt);
4323                                for (int s = 0; s < slices; s++) {
4324                                    idx2 = startt + 2 * s;
4325                                    a[s][r][0] = t[idx2];
4326                                    a[s][r][1] = t[idx2 + 1];
4327                                }
4328                            }
4329                        }
4330                    } else {
4331                        if (columns > 4) {
4332                            for (int r = n0; r < rows; r += nthreads) {
4333                                for (int c = 0; c < columns; c += 8) {
4334                                    for (int s = 0; s < slices; s++) {
4335                                        idx2 = startt + 2 * s;
4336                                        idx3 = startt + 2 * slices + 2 * s;
4337                                        idx4 = idx3 + 2 * slices;
4338                                        idx5 = idx4 + 2 * slices;
4339                                        t[idx2] = a[s][r][c];
4340                                        t[idx2 + 1] = a[s][r][c + 1];
4341                                        t[idx3] = a[s][r][c + 2];
4342                                        t[idx3 + 1] = a[s][r][c + 3];
4343                                        t[idx4] = a[s][r][c + 4];
4344                                        t[idx4 + 1] = a[s][r][c + 5];
4345                                        t[idx5] = a[s][r][c + 6];
4346                                        t[idx5 + 1] = a[s][r][c + 7];
4347                                    }
4348                                    fftSlices.complexInverse(t, startt, scale);
4349                                    fftSlices.complexInverse(t, startt + 2 * slices, scale);
4350                                    fftSlices.complexInverse(t, startt + 4 * slices, scale);
4351                                    fftSlices.complexInverse(t, startt + 6 * slices, scale);
4352                                    for (int s = 0; s < slices; s++) {
4353                                        idx2 = startt + 2 * s;
4354                                        idx3 = startt + 2 * slices + 2 * s;
4355                                        idx4 = idx3 + 2 * slices;
4356                                        idx5 = idx4 + 2 * slices;
4357                                        a[s][r][c] = t[idx2];
4358                                        a[s][r][c + 1] = t[idx2 + 1];
4359                                        a[s][r][c + 2] = t[idx3];
4360                                        a[s][r][c + 3] = t[idx3 + 1];
4361                                        a[s][r][c + 4] = t[idx4];
4362                                        a[s][r][c + 5] = t[idx4 + 1];
4363                                        a[s][r][c + 6] = t[idx5];
4364                                        a[s][r][c + 7] = t[idx5 + 1];
4365                                    }
4366                                }
4367                            }
4368                        } else if (columns == 4) {
4369                            for (int r = n0; r < rows; r += nthreads) {
4370                                for (int s = 0; s < slices; s++) {
4371                                    idx2 = startt + 2 * s;
4372                                    idx3 = startt + 2 * slices + 2 * s;
4373                                    t[idx2] = a[s][r][0];
4374                                    t[idx2 + 1] = a[s][r][1];
4375                                    t[idx3] = a[s][r][2];
4376                                    t[idx3 + 1] = a[s][r][3];
4377                                }
4378                                fftSlices.complexInverse(t, startt, scale);
4379                                fftSlices.complexInverse(t, startt + 2 * slices, scale);
4380                                for (int s = 0; s < slices; s++) {
4381                                    idx2 = startt + 2 * s;
4382                                    idx3 = startt + 2 * slices + 2 * s;
4383                                    a[s][r][0] = t[idx2];
4384                                    a[s][r][1] = t[idx2 + 1];
4385                                    a[s][r][2] = t[idx3];
4386                                    a[s][r][3] = t[idx3 + 1];
4387                                }
4388                            }
4389                        } else if (columns == 2) {
4390                            for (int r = n0; r < rows; r += nthreads) {
4391                                for (int s = 0; s < slices; s++) {
4392                                    idx2 = startt + 2 * s;
4393                                    t[idx2] = a[s][r][0];
4394                                    t[idx2 + 1] = a[s][r][1];
4395                                }
4396                                fftSlices.complexInverse(t, startt, scale);
4397                                for (int s = 0; s < slices; s++) {
4398                                    idx2 = startt + 2 * s;
4399                                    a[s][r][0] = t[idx2];
4400                                    a[s][r][1] = t[idx2 + 1];
4401                                }
4402                            }
4403                        }
4404                    }
4405
4406                }
4407            });
4408        }
4409        ConcurrencyUtils.waitForCompletion(futures);
4410    }
4411
4412    private void rdft3d_sub(int isgn, double[] a) {
4413        int n1h, n2h, i, j, k, l, idx1, idx2, idx3, idx4;
4414        double xi;
4415
4416        n1h = slices >> 1;
4417        n2h = rows >> 1;
4418        if (isgn < 0) {
4419            for (i = 1; i < n1h; i++) {
4420                j = slices - i;
4421                idx1 = i * sliceStride;
4422                idx2 = j * sliceStride;
4423                idx3 = i * sliceStride + n2h * rowStride;
4424                idx4 = j * sliceStride + n2h * rowStride;
4425                xi = a[idx1] - a[idx2];
4426                a[idx1] += a[idx2];
4427                a[idx2] = xi;
4428                xi = a[idx2 + 1] - a[idx1 + 1];
4429                a[idx1 + 1] += a[idx2 + 1];
4430                a[idx2 + 1] = xi;
4431                xi = a[idx3] - a[idx4];
4432                a[idx3] += a[idx4];
4433                a[idx4] = xi;
4434                xi = a[idx4 + 1] - a[idx3 + 1];
4435                a[idx3 + 1] += a[idx4 + 1];
4436                a[idx4 + 1] = xi;
4437                for (k = 1; k < n2h; k++) {
4438                    l = rows - k;
4439                    idx1 = i * sliceStride + k * rowStride;
4440                    idx2 = j * sliceStride + l * rowStride;
4441                    xi = a[idx1] - a[idx2];
4442                    a[idx1] += a[idx2];
4443                    a[idx2] = xi;
4444                    xi = a[idx2 + 1] - a[idx1 + 1];
4445                    a[idx1 + 1] += a[idx2 + 1];
4446                    a[idx2 + 1] = xi;
4447                    idx3 = j * sliceStride + k * rowStride;
4448                    idx4 = i * sliceStride + l * rowStride;
4449                    xi = a[idx3] - a[idx4];
4450                    a[idx3] += a[idx4];
4451                    a[idx4] = xi;
4452                    xi = a[idx4 + 1] - a[idx3 + 1];
4453                    a[idx3 + 1] += a[idx4 + 1];
4454                    a[idx4 + 1] = xi;
4455                }
4456            }
4457            for (k = 1; k < n2h; k++) {
4458                l = rows - k;
4459                idx1 = k * rowStride;
4460                idx2 = l * rowStride;
4461                xi = a[idx1] - a[idx2];
4462                a[idx1] += a[idx2];
4463                a[idx2] = xi;
4464                xi = a[idx2 + 1] - a[idx1 + 1];
4465                a[idx1 + 1] += a[idx2 + 1];
4466                a[idx2 + 1] = xi;
4467                idx3 = n1h * sliceStride + k * rowStride;
4468                idx4 = n1h * sliceStride + l * rowStride;
4469                xi = a[idx3] - a[idx4];
4470                a[idx3] += a[idx4];
4471                a[idx4] = xi;
4472                xi = a[idx4 + 1] - a[idx3 + 1];
4473                a[idx3 + 1] += a[idx4 + 1];
4474                a[idx4 + 1] = xi;
4475            }
4476        } else {
4477            for (i = 1; i < n1h; i++) {
4478                j = slices - i;
4479                idx1 = j * sliceStride;
4480                idx2 = i * sliceStride;
4481                a[idx1] = 0.5f * (a[idx2] - a[idx1]);
4482                a[idx2] -= a[idx1];
4483                a[idx1 + 1] = 0.5f * (a[idx2 + 1] + a[idx1 + 1]);
4484                a[idx2 + 1] -= a[idx1 + 1];
4485                idx3 = j * sliceStride + n2h * rowStride;
4486                idx4 = i * sliceStride + n2h * rowStride;
4487                a[idx3] = 0.5f * (a[idx4] - a[idx3]);
4488                a[idx4] -= a[idx3];
4489                a[idx3 + 1] = 0.5f * (a[idx4 + 1] + a[idx3 + 1]);
4490                a[idx4 + 1] -= a[idx3 + 1];
4491                for (k = 1; k < n2h; k++) {
4492                    l = rows - k;
4493                    idx1 = j * sliceStride + l * rowStride;
4494                    idx2 = i * sliceStride + k * rowStride;
4495                    a[idx1] = 0.5f * (a[idx2] - a[idx1]);
4496                    a[idx2] -= a[idx1];
4497                    a[idx1 + 1] = 0.5f * (a[idx2 + 1] + a[idx1 + 1]);
4498                    a[idx2 + 1] -= a[idx1 + 1];
4499                    idx3 = i * sliceStride + l * rowStride;
4500                    idx4 = j * sliceStride + k * rowStride;
4501                    a[idx3] = 0.5f * (a[idx4] - a[idx3]);
4502                    a[idx4] -= a[idx3];
4503                    a[idx3 + 1] = 0.5f * (a[idx4 + 1] + a[idx3 + 1]);
4504                    a[idx4 + 1] -= a[idx3 + 1];
4505                }
4506            }
4507            for (k = 1; k < n2h; k++) {
4508                l = rows - k;
4509                idx1 = l * rowStride;
4510                idx2 = k * rowStride;
4511                a[idx1] = 0.5f * (a[idx2] - a[idx1]);
4512                a[idx2] -= a[idx1];
4513                a[idx1 + 1] = 0.5f * (a[idx2 + 1] + a[idx1 + 1]);
4514                a[idx2 + 1] -= a[idx1 + 1];
4515                idx3 = n1h * sliceStride + l * rowStride;
4516                idx4 = n1h * sliceStride + k * rowStride;
4517                a[idx3] = 0.5f * (a[idx4] - a[idx3]);
4518                a[idx4] -= a[idx3];
4519                a[idx3 + 1] = 0.5f * (a[idx4 + 1] + a[idx3 + 1]);
4520                a[idx4 + 1] -= a[idx3 + 1];
4521            }
4522        }
4523    }
4524
4525    private void rdft3d_sub(int isgn, double[][][] a) {
4526        int n1h, n2h, i, j, k, l;
4527        double xi;
4528
4529        n1h = slices >> 1;
4530        n2h = rows >> 1;
4531        if (isgn < 0) {
4532            for (i = 1; i < n1h; i++) {
4533                j = slices - i;
4534                xi = a[i][0][0] - a[j][0][0];
4535                a[i][0][0] += a[j][0][0];
4536                a[j][0][0] = xi;
4537                xi = a[j][0][1] - a[i][0][1];
4538                a[i][0][1] += a[j][0][1];
4539                a[j][0][1] = xi;
4540                xi = a[i][n2h][0] - a[j][n2h][0];
4541                a[i][n2h][0] += a[j][n2h][0];
4542                a[j][n2h][0] = xi;
4543                xi = a[j][n2h][1] - a[i][n2h][1];
4544                a[i][n2h][1] += a[j][n2h][1];
4545                a[j][n2h][1] = xi;
4546                for (k = 1; k < n2h; k++) {
4547                    l = rows - k;
4548                    xi = a[i][k][0] - a[j][l][0];
4549                    a[i][k][0] += a[j][l][0];
4550                    a[j][l][0] = xi;
4551                    xi = a[j][l][1] - a[i][k][1];
4552                    a[i][k][1] += a[j][l][1];
4553                    a[j][l][1] = xi;
4554                    xi = a[j][k][0] - a[i][l][0];
4555                    a[j][k][0] += a[i][l][0];
4556                    a[i][l][0] = xi;
4557                    xi = a[i][l][1] - a[j][k][1];
4558                    a[j][k][1] += a[i][l][1];
4559                    a[i][l][1] = xi;
4560                }
4561            }
4562            for (k = 1; k < n2h; k++) {
4563                l = rows - k;
4564                xi = a[0][k][0] - a[0][l][0];
4565                a[0][k][0] += a[0][l][0];
4566                a[0][l][0] = xi;
4567                xi = a[0][l][1] - a[0][k][1];
4568                a[0][k][1] += a[0][l][1];
4569                a[0][l][1] = xi;
4570                xi = a[n1h][k][0] - a[n1h][l][0];
4571                a[n1h][k][0] += a[n1h][l][0];
4572                a[n1h][l][0] = xi;
4573                xi = a[n1h][l][1] - a[n1h][k][1];
4574                a[n1h][k][1] += a[n1h][l][1];
4575                a[n1h][l][1] = xi;
4576            }
4577        } else {
4578            for (i = 1; i < n1h; i++) {
4579                j = slices - i;
4580                a[j][0][0] = 0.5f * (a[i][0][0] - a[j][0][0]);
4581                a[i][0][0] -= a[j][0][0];
4582                a[j][0][1] = 0.5f * (a[i][0][1] + a[j][0][1]);
4583                a[i][0][1] -= a[j][0][1];
4584                a[j][n2h][0] = 0.5f * (a[i][n2h][0] - a[j][n2h][0]);
4585                a[i][n2h][0] -= a[j][n2h][0];
4586                a[j][n2h][1] = 0.5f * (a[i][n2h][1] + a[j][n2h][1]);
4587                a[i][n2h][1] -= a[j][n2h][1];
4588                for (k = 1; k < n2h; k++) {
4589                    l = rows - k;
4590                    a[j][l][0] = 0.5f * (a[i][k][0] - a[j][l][0]);
4591                    a[i][k][0] -= a[j][l][0];
4592                    a[j][l][1] = 0.5f * (a[i][k][1] + a[j][l][1]);
4593                    a[i][k][1] -= a[j][l][1];
4594                    a[i][l][0] = 0.5f * (a[j][k][0] - a[i][l][0]);
4595                    a[j][k][0] -= a[i][l][0];
4596                    a[i][l][1] = 0.5f * (a[j][k][1] + a[i][l][1]);
4597                    a[j][k][1] -= a[i][l][1];
4598                }
4599            }
4600            for (k = 1; k < n2h; k++) {
4601                l = rows - k;
4602                a[0][l][0] = 0.5f * (a[0][k][0] - a[0][l][0]);
4603                a[0][k][0] -= a[0][l][0];
4604                a[0][l][1] = 0.5f * (a[0][k][1] + a[0][l][1]);
4605                a[0][k][1] -= a[0][l][1];
4606                a[n1h][l][0] = 0.5f * (a[n1h][k][0] - a[n1h][l][0]);
4607                a[n1h][k][0] -= a[n1h][l][0];
4608                a[n1h][l][1] = 0.5f * (a[n1h][k][1] + a[n1h][l][1]);
4609                a[n1h][k][1] -= a[n1h][l][1];
4610            }
4611        }
4612    }
4613
4614    private void fillSymmetric(final double[][][] a) {
4615        final int twon3 = 2 * columns;
4616        final int n2d2 = rows / 2;
4617        int n1d2 = slices / 2;
4618        int nthreads = ConcurrencyUtils.getNumberOfThreads();
4619        if ((nthreads > 1) && useThreads && (slices >= nthreads)) {
4620            Future<?>[] futures = new Future[nthreads];
4621            int p = slices / nthreads;
4622            for (int l = 0; l < nthreads; l++) {
4623                final int firstSlice = l * p;
4624                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4625                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4626                    public void run() {
4627                        for (int s = firstSlice; s < lastSlice; s++) {
4628                            int idx1 = (slices - s) % slices;
4629                            for (int r = 0; r < rows; r++) {
4630                                int idx2 = (rows - r) % rows;
4631                                for (int c = 1; c < columns; c += 2) {
4632                                    int idx3 = twon3 - c;
4633                                    a[idx1][idx2][idx3] = -a[s][r][c + 2];
4634                                    a[idx1][idx2][idx3 - 1] = a[s][r][c + 1];
4635                                }
4636                            }
4637                        }
4638                    }
4639                });
4640            }
4641            ConcurrencyUtils.waitForCompletion(futures);
4642
4643            // ---------------------------------------------
4644
4645            for (int l = 0; l < nthreads; l++) {
4646                final int firstSlice = l * p;
4647                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4648                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4649                    public void run() {
4650                        for (int s = firstSlice; s < lastSlice; s++) {
4651                            int idx1 = (slices - s) % slices;
4652                            for (int r = 1; r < n2d2; r++) {
4653                                int idx2 = rows - r;
4654                                a[idx1][r][columns] = a[s][idx2][1];
4655                                a[s][idx2][columns] = a[s][idx2][1];
4656                                a[idx1][r][columns + 1] = -a[s][idx2][0];
4657                                a[s][idx2][columns + 1] = a[s][idx2][0];
4658                            }
4659                        }
4660                    }
4661                });
4662            }
4663            ConcurrencyUtils.waitForCompletion(futures);
4664
4665            for (int l = 0; l < nthreads; l++) {
4666                final int firstSlice = l * p;
4667                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4668                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4669                    public void run() {
4670                        for (int s = firstSlice; s < lastSlice; s++) {
4671                            int idx1 = (slices - s) % slices;
4672                            for (int r = 1; r < n2d2; r++) {
4673                                int idx2 = rows - r;
4674                                a[idx1][idx2][0] = a[s][r][0];
4675                                a[idx1][idx2][1] = -a[s][r][1];
4676                            }
4677                        }
4678                    }
4679                });
4680            }
4681            ConcurrencyUtils.waitForCompletion(futures);
4682
4683        } else {
4684
4685            for (int s = 0; s < slices; s++) {
4686                int idx1 = (slices - s) % slices;
4687                for (int r = 0; r < rows; r++) {
4688                    int idx2 = (rows - r) % rows;
4689                    for (int c = 1; c < columns; c += 2) {
4690                        int idx3 = twon3 - c;
4691                        a[idx1][idx2][idx3] = -a[s][r][c + 2];
4692                        a[idx1][idx2][idx3 - 1] = a[s][r][c + 1];
4693                    }
4694                }
4695            }
4696
4697            // ---------------------------------------------
4698
4699            for (int s = 0; s < slices; s++) {
4700                int idx1 = (slices - s) % slices;
4701                for (int r = 1; r < n2d2; r++) {
4702                    int idx2 = rows - r;
4703                    a[idx1][r][columns] = a[s][idx2][1];
4704                    a[s][idx2][columns] = a[s][idx2][1];
4705                    a[idx1][r][columns + 1] = -a[s][idx2][0];
4706                    a[s][idx2][columns + 1] = a[s][idx2][0];
4707                }
4708            }
4709
4710            for (int s = 0; s < slices; s++) {
4711                int idx1 = (slices - s) % slices;
4712                for (int r = 1; r < n2d2; r++) {
4713                    int idx2 = rows - r;
4714                    a[idx1][idx2][0] = a[s][r][0];
4715                    a[idx1][idx2][1] = -a[s][r][1];
4716                }
4717            }
4718        }
4719
4720        // ----------------------------------------------------------
4721
4722        for (int s = 1; s < n1d2; s++) {
4723            int idx1 = slices - s;
4724            a[s][0][columns] = a[idx1][0][1];
4725            a[idx1][0][columns] = a[idx1][0][1];
4726            a[s][0][columns + 1] = -a[idx1][0][0];
4727            a[idx1][0][columns + 1] = a[idx1][0][0];
4728            a[s][n2d2][columns] = a[idx1][n2d2][1];
4729            a[idx1][n2d2][columns] = a[idx1][n2d2][1];
4730            a[s][n2d2][columns + 1] = -a[idx1][n2d2][0];
4731            a[idx1][n2d2][columns + 1] = a[idx1][n2d2][0];
4732            a[idx1][0][0] = a[s][0][0];
4733            a[idx1][0][1] = -a[s][0][1];
4734            a[idx1][n2d2][0] = a[s][n2d2][0];
4735            a[idx1][n2d2][1] = -a[s][n2d2][1];
4736
4737        }
4738        // ----------------------------------------
4739
4740        a[0][0][columns] = a[0][0][1];
4741        a[0][0][1] = 0;
4742        a[0][n2d2][columns] = a[0][n2d2][1];
4743        a[0][n2d2][1] = 0;
4744        a[n1d2][0][columns] = a[n1d2][0][1];
4745        a[n1d2][0][1] = 0;
4746        a[n1d2][n2d2][columns] = a[n1d2][n2d2][1];
4747        a[n1d2][n2d2][1] = 0;
4748        a[n1d2][0][columns + 1] = 0;
4749        a[n1d2][n2d2][columns + 1] = 0;
4750    }
4751
4752    private void fillSymmetric(final double[] a) {
4753        final int twon3 = 2 * columns;
4754        final int n2d2 = rows / 2;
4755        int n1d2 = slices / 2;
4756
4757        final int twoSliceStride = rows * twon3;
4758        final int twoRowStride = twon3;
4759
4760        int idx1, idx2, idx3, idx4, idx5, idx6;
4761
4762        for (int s = (slices - 1); s >= 1; s--) {
4763            idx3 = s * sliceStride;
4764            idx4 = 2 * idx3;
4765            for (int r = 0; r < rows; r++) {
4766                idx5 = r * rowStride;
4767                idx6 = 2 * idx5;
4768                for (int c = 0; c < columns; c += 2) {
4769                    idx1 = idx3 + idx5 + c;
4770                    idx2 = idx4 + idx6 + c;
4771                    a[idx2] = a[idx1];
4772                    a[idx1] = 0;
4773                    idx1++;
4774                    idx2++;
4775                    a[idx2] = a[idx1];
4776                    a[idx1] = 0;
4777                }
4778            }
4779        }
4780
4781        for (int r = 1; r < rows; r++) {
4782            idx3 = (rows - r) * rowStride;
4783            idx4 = (rows - r) * twoRowStride;
4784            for (int c = 0; c < columns; c += 2) {
4785                idx1 = idx3 + c;
4786                idx2 = idx4 + c;
4787                a[idx2] = a[idx1];
4788                a[idx1] = 0;
4789                idx1++;
4790                idx2++;
4791                a[idx2] = a[idx1];
4792                a[idx1] = 0;
4793            }
4794        }
4795
4796        int nthreads = ConcurrencyUtils.getNumberOfThreads();
4797        if ((nthreads > 1) && useThreads && (slices >= nthreads)) {
4798            Future<?>[] futures = new Future[nthreads];
4799            int p = slices / nthreads;
4800            for (int l = 0; l < nthreads; l++) {
4801                final int firstSlice = l * p;
4802                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4803                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4804                    public void run() {
4805                        for (int s = firstSlice; s < lastSlice; s++) {
4806                            int idx3 = ((slices - s) % slices) * twoSliceStride;
4807                            int idx5 = s * twoSliceStride;
4808                            for (int r = 0; r < rows; r++) {
4809                                int idx4 = ((rows - r) % rows) * twoRowStride;
4810                                int idx6 = r * twoRowStride;
4811                                for (int c = 1; c < columns; c += 2) {
4812                                    int idx1 = idx3 + idx4 + twon3 - c;
4813                                    int idx2 = idx5 + idx6 + c;
4814                                    a[idx1] = -a[idx2 + 2];
4815                                    a[idx1 - 1] = a[idx2 + 1];
4816                                }
4817                            }
4818                        }
4819                    }
4820                });
4821            }
4822            ConcurrencyUtils.waitForCompletion(futures);
4823
4824            // ---------------------------------------------
4825
4826            for (int l = 0; l < nthreads; l++) {
4827                final int firstSlice = l * p;
4828                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4829                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4830                    public void run() {
4831                        for (int s = firstSlice; s < lastSlice; s++) {
4832                            int idx5 = ((slices - s) % slices) * twoSliceStride;
4833                            int idx6 = s * twoSliceStride;
4834                            for (int r = 1; r < n2d2; r++) {
4835                                int idx4 = idx6 + (rows - r) * twoRowStride;
4836                                int idx1 = idx5 + r * twoRowStride + columns;
4837                                int idx2 = idx4 + columns;
4838                                int idx3 = idx4 + 1;
4839                                a[idx1] = a[idx3];
4840                                a[idx2] = a[idx3];
4841                                a[idx1 + 1] = -a[idx4];
4842                                a[idx2 + 1] = a[idx4];
4843
4844                            }
4845                        }
4846                    }
4847                });
4848            }
4849            ConcurrencyUtils.waitForCompletion(futures);
4850            for (int l = 0; l < nthreads; l++) {
4851                final int firstSlice = l * p;
4852                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4853                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4854                    public void run() {
4855                        for (int s = firstSlice; s < lastSlice; s++) {
4856                            int idx3 = ((slices - s) % slices) * twoSliceStride;
4857                            int idx4 = s * twoSliceStride;
4858                            for (int r = 1; r < n2d2; r++) {
4859                                int idx1 = idx3 + (rows - r) * twoRowStride;
4860                                int idx2 = idx4 + r * twoRowStride;
4861                                a[idx1] = a[idx2];
4862                                a[idx1 + 1] = -a[idx2 + 1];
4863
4864                            }
4865                        }
4866                    }
4867                });
4868            }
4869            ConcurrencyUtils.waitForCompletion(futures);
4870        } else {
4871
4872            // -----------------------------------------------
4873            for (int s = 0; s < slices; s++) {
4874                idx3 = ((slices - s) % slices) * twoSliceStride;
4875                idx5 = s * twoSliceStride;
4876                for (int r = 0; r < rows; r++) {
4877                    idx4 = ((rows - r) % rows) * twoRowStride;
4878                    idx6 = r * twoRowStride;
4879                    for (int c = 1; c < columns; c += 2) {
4880                        idx1 = idx3 + idx4 + twon3 - c;
4881                        idx2 = idx5 + idx6 + c;
4882                        a[idx1] = -a[idx2 + 2];
4883                        a[idx1 - 1] = a[idx2 + 1];
4884                    }
4885                }
4886            }
4887
4888            // ---------------------------------------------
4889
4890            for (int s = 0; s < slices; s++) {
4891                idx5 = ((slices - s) % slices) * twoSliceStride;
4892                idx6 = s * twoSliceStride;
4893                for (int r = 1; r < n2d2; r++) {
4894                    idx4 = idx6 + (rows - r) * twoRowStride;
4895                    idx1 = idx5 + r * twoRowStride + columns;
4896                    idx2 = idx4 + columns;
4897                    idx3 = idx4 + 1;
4898                    a[idx1] = a[idx3];
4899                    a[idx2] = a[idx3];
4900                    a[idx1 + 1] = -a[idx4];
4901                    a[idx2 + 1] = a[idx4];
4902
4903                }
4904            }
4905
4906            for (int s = 0; s < slices; s++) {
4907                idx3 = ((slices - s) % slices) * twoSliceStride;
4908                idx4 = s * twoSliceStride;
4909                for (int r = 1; r < n2d2; r++) {
4910                    idx1 = idx3 + (rows - r) * twoRowStride;
4911                    idx2 = idx4 + r * twoRowStride;
4912                    a[idx1] = a[idx2];
4913                    a[idx1 + 1] = -a[idx2 + 1];
4914
4915                }
4916            }
4917        }
4918
4919        // ----------------------------------------------------------
4920
4921        for (int s = 1; s < n1d2; s++) {
4922            idx1 = s * twoSliceStride;
4923            idx2 = (slices - s) * twoSliceStride;
4924            idx3 = n2d2 * twoRowStride;
4925            idx4 = idx1 + idx3;
4926            idx5 = idx2 + idx3;
4927            a[idx1 + columns] = a[idx2 + 1];
4928            a[idx2 + columns] = a[idx2 + 1];
4929            a[idx1 + columns + 1] = -a[idx2];
4930            a[idx2 + columns + 1] = a[idx2];
4931            a[idx4 + columns] = a[idx5 + 1];
4932            a[idx5 + columns] = a[idx5 + 1];
4933            a[idx4 + columns + 1] = -a[idx5];
4934            a[idx5 + columns + 1] = a[idx5];
4935            a[idx2] = a[idx1];
4936            a[idx2 + 1] = -a[idx1 + 1];
4937            a[idx5] = a[idx4];
4938            a[idx5 + 1] = -a[idx4 + 1];
4939
4940        }
4941
4942        // ----------------------------------------
4943
4944        a[columns] = a[1];
4945        a[1] = 0;
4946        idx1 = n2d2 * twoRowStride;
4947        idx2 = n1d2 * twoSliceStride;
4948        idx3 = idx1 + idx2;
4949        a[idx1 + columns] = a[idx1 + 1];
4950        a[idx1 + 1] = 0;
4951        a[idx2 + columns] = a[idx2 + 1];
4952        a[idx2 + 1] = 0;
4953        a[idx3 + columns] = a[idx3 + 1];
4954        a[idx3 + 1] = 0;
4955        a[idx2 + columns + 1] = 0;
4956        a[idx3 + columns + 1] = 0;
4957    }
4958}