001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.fft;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 2D Discrete Fourier Transform (DFT) of complex and real, double
043 * precision data. The sizes of both dimensions can be arbitrary numbers. This
044 * is a parallel implementation of split-radix and mixed-radix algorithms
045 * optimized for SMP systems. <br>
046 * <br>
047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura
048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
049 * 
050 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
051 * 
052 */
053public class DoubleFFT_2D {
054
055    private int rows;
056
057    private int columns;
058
059    private double[] t;
060
061    private DoubleFFT_1D fftColumns, fftRows;
062
063    private int oldNthreads;
064
065    private int nt;
066
067    private boolean isPowerOfTwo = false;
068
069    private boolean useThreads = false;
070
071    /**
072     * Creates new instance of DoubleFFT_2D.
073     * 
074     * @param rows
075     *            number of rows
076     * @param columns
077     *            number of columns
078     */
079    public DoubleFFT_2D(int rows, int columns) {
080        if (rows <= 1 || columns <= 1) {
081            throw new IllegalArgumentException("rows and columns must be greater than 1");
082        }
083        this.rows = rows;
084        this.columns = columns;
085        if (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D()) {
086            this.useThreads = true;
087        }
088        if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
089            isPowerOfTwo = true;
090            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
091            nt = 8 * oldNthreads * rows;
092            if (2 * columns == 4 * oldNthreads) {
093                nt >>= 1;
094            } else if (2 * columns < 4 * oldNthreads) {
095                nt >>= 2;
096            }
097            t = new double[nt];
098        }
099        fftRows = new DoubleFFT_1D(rows);
100        if (rows == columns) {
101            fftColumns = fftRows;
102        } else {
103            fftColumns = new DoubleFFT_1D(columns);
104        }
105    }
106
107    /**
108     * Computes 2D forward DFT of complex data leaving the result in
109     * <code>a</code>. The data is stored in 1D array in row-major order.
110     * Complex number is stored as two double values in sequence: the real and
111     * imaginary part, i.e. the input array must be of size rows*2*columns. The
112     * physical layout of the input data has to be as follows:<br>
113     * 
114     * <pre>
115     * a[k1*2*columns+2*k2] = Re[k1][k2], 
116     * a[k1*2*columns+2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
117     * </pre>
118     * 
119     * @param a
120     *            data to transform
121     */
122    public void complexForward(final double[] a) {
123        int nthreads = ConcurrencyUtils.getNumberOfThreads();
124        if (isPowerOfTwo) {
125            int oldn2 = columns;
126            columns = 2 * columns;
127            if (nthreads != oldNthreads) {
128                nt = 8 * nthreads * rows;
129                if (columns == 4 * nthreads) {
130                    nt >>= 1;
131                } else if (columns < 4 * nthreads) {
132                    nt >>= 2;
133                }
134                t = new double[nt];
135                oldNthreads = nthreads;
136            }
137            if ((nthreads > 1) && useThreads) {
138                xdft2d0_subth1(0, -1, a, true);
139                cdft2d_subth(-1, a, true);
140            } else {
141                for (int r = 0; r < rows; r++) {
142                    fftColumns.complexForward(a, r * columns);
143                }
144                cdft2d_sub(-1, a, true);
145            }
146            columns = oldn2;
147        } else {
148            final int rowStride = 2 * columns;
149            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
150                Future<?>[] futures = new Future[nthreads];
151                int p = rows / nthreads;
152                for (int l = 0; l < nthreads; l++) {
153                    final int firstRow = l * p;
154                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
155                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
156                        public void run() {
157                            for (int r = firstRow; r < lastRow; r++) {
158                                fftColumns.complexForward(a, r * rowStride);
159                            }
160                        }
161                    });
162                }
163                ConcurrencyUtils.waitForCompletion(futures);
164                p = columns / nthreads;
165                for (int l = 0; l < nthreads; l++) {
166                    final int firstColumn = l * p;
167                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
168                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
169                        public void run() {
170                            double[] temp = new double[2 * rows];
171                            for (int c = firstColumn; c < lastColumn; c++) {
172                                int idx0 = 2 * c;
173                                for (int r = 0; r < rows; r++) {
174                                    int idx1 = 2 * r;
175                                    int idx2 = r * rowStride + idx0;
176                                    temp[idx1] = a[idx2];
177                                    temp[idx1 + 1] = a[idx2 + 1];
178                                }
179                                fftRows.complexForward(temp);
180                                for (int r = 0; r < rows; r++) {
181                                    int idx1 = 2 * r;
182                                    int idx2 = r * rowStride + idx0;
183                                    a[idx2] = temp[idx1];
184                                    a[idx2 + 1] = temp[idx1 + 1];
185                                }
186                            }
187                        }
188                    });
189                }
190                ConcurrencyUtils.waitForCompletion(futures);
191            } else {
192                for (int r = 0; r < rows; r++) {
193                    fftColumns.complexForward(a, r * rowStride);
194                }
195                double[] temp = new double[2 * rows];
196                for (int c = 0; c < columns; c++) {
197                    int idx0 = 2 * c;
198                    for (int r = 0; r < rows; r++) {
199                        int idx1 = 2 * r;
200                        int idx2 = r * rowStride + idx0;
201                        temp[idx1] = a[idx2];
202                        temp[idx1 + 1] = a[idx2 + 1];
203                    }
204                    fftRows.complexForward(temp);
205                    for (int r = 0; r < rows; r++) {
206                        int idx1 = 2 * r;
207                        int idx2 = r * rowStride + idx0;
208                        a[idx2] = temp[idx1];
209                        a[idx2 + 1] = temp[idx1 + 1];
210                    }
211                }
212            }
213        }
214    }
215
216    /**
217     * Computes 2D forward DFT of complex data leaving the result in
218     * <code>a</code>. The data is stored in 2D array. Complex data is
219     * represented by 2 double values in sequence: the real and imaginary part,
220     * i.e. the input array must be of size rows by 2*columns. The physical
221     * layout of the input data has to be as follows:<br>
222     * 
223     * <pre>
224     * a[k1][2*k2] = Re[k1][k2], 
225     * a[k1][2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
226     * </pre>
227     * 
228     * @param a
229     *            data to transform
230     */
231    public void complexForward(final double[][] a) {
232        int nthreads = ConcurrencyUtils.getNumberOfThreads();
233        if (isPowerOfTwo) {
234            int oldn2 = columns;
235            columns = 2 * columns;
236            if (nthreads != oldNthreads) {
237                nt = 8 * nthreads * rows;
238                if (columns == 4 * nthreads) {
239                    nt >>= 1;
240                } else if (columns < 4 * nthreads) {
241                    nt >>= 2;
242                }
243                t = new double[nt];
244                oldNthreads = nthreads;
245            }
246            if ((nthreads > 1) && useThreads) {
247                xdft2d0_subth1(0, -1, a, true);
248                cdft2d_subth(-1, a, true);
249            } else {
250                for (int r = 0; r < rows; r++) {
251                    fftColumns.complexForward(a[r]);
252                }
253                cdft2d_sub(-1, a, true);
254            }
255            columns = oldn2;
256        } else {
257            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
258                Future<?>[] futures = new Future[nthreads];
259                int p = rows / nthreads;
260                for (int l = 0; l < nthreads; l++) {
261                    final int firstRow = l * p;
262                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
263                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
264                        public void run() {
265                            for (int r = firstRow; r < lastRow; r++) {
266                                fftColumns.complexForward(a[r]);
267                            }
268                        }
269                    });
270                }
271                ConcurrencyUtils.waitForCompletion(futures);
272                p = columns / nthreads;
273                for (int l = 0; l < nthreads; l++) {
274                    final int firstColumn = l * p;
275                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
276                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
277                        public void run() {
278                            double[] temp = new double[2 * rows];
279                            for (int c = firstColumn; c < lastColumn; c++) {
280                                int idx1 = 2 * c;
281                                for (int r = 0; r < rows; r++) {
282                                    int idx2 = 2 * r;
283                                    temp[idx2] = a[r][idx1];
284                                    temp[idx2 + 1] = a[r][idx1 + 1];
285                                }
286                                fftRows.complexForward(temp);
287                                for (int r = 0; r < rows; r++) {
288                                    int idx2 = 2 * r;
289                                    a[r][idx1] = temp[idx2];
290                                    a[r][idx1 + 1] = temp[idx2 + 1];
291                                }
292                            }
293                        }
294                    });
295                }
296                ConcurrencyUtils.waitForCompletion(futures);
297            } else {
298                for (int r = 0; r < rows; r++) {
299                    fftColumns.complexForward(a[r]);
300                }
301                double[] temp = new double[2 * rows];
302                for (int c = 0; c < columns; c++) {
303                    int idx1 = 2 * c;
304                    for (int r = 0; r < rows; r++) {
305                        int idx2 = 2 * r;
306                        temp[idx2] = a[r][idx1];
307                        temp[idx2 + 1] = a[r][idx1 + 1];
308                    }
309                    fftRows.complexForward(temp);
310                    for (int r = 0; r < rows; r++) {
311                        int idx2 = 2 * r;
312                        a[r][idx1] = temp[idx2];
313                        a[r][idx1 + 1] = temp[idx2 + 1];
314                    }
315                }
316            }
317        }
318    }
319
320    /**
321     * Computes 2D inverse DFT of complex data leaving the result in
322     * <code>a</code>. The data is stored in 1D array in row-major order.
323     * Complex number is stored as two double values in sequence: the real and
324     * imaginary part, i.e. the input array must be of size rows*2*columns. The
325     * physical layout of the input data has to be as follows:<br>
326     * 
327     * <pre>
328     * a[k1*2*columns+2*k2] = Re[k1][k2], 
329     * a[k1*2*columns+2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
330     * </pre>
331     * 
332     * @param a
333     *            data to transform
334     * @param scale
335     *            if true then scaling is performed
336     * 
337     */
338    public void complexInverse(final double[] a, final boolean scale) {
339        int nthreads = ConcurrencyUtils.getNumberOfThreads();
340        if (isPowerOfTwo) {
341            int oldn2 = columns;
342            columns = 2 * columns;
343            if (nthreads != oldNthreads) {
344                nt = 8 * nthreads * rows;
345                if (columns == 4 * nthreads) {
346                    nt >>= 1;
347                } else if (columns < 4 * nthreads) {
348                    nt >>= 2;
349                }
350                t = new double[nt];
351                oldNthreads = nthreads;
352            }
353            if ((nthreads > 1) && useThreads) {
354                xdft2d0_subth1(0, 1, a, scale);
355                cdft2d_subth(1, a, scale);
356            } else {
357
358                for (int r = 0; r < rows; r++) {
359                    fftColumns.complexInverse(a, r * columns, scale);
360                }
361                cdft2d_sub(1, a, scale);
362            }
363            columns = oldn2;
364        } else {
365            final int rowspan = 2 * columns;
366            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
367                Future<?>[] futures = new Future[nthreads];
368                int p = rows / nthreads;
369                for (int l = 0; l < nthreads; l++) {
370                    final int firstRow = l * p;
371                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
372                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
373                        public void run() {
374                            for (int r = firstRow; r < lastRow; r++) {
375                                fftColumns.complexInverse(a, r * rowspan, scale);
376                            }
377                        }
378                    });
379                }
380                ConcurrencyUtils.waitForCompletion(futures);
381                p = columns / nthreads;
382                for (int l = 0; l < nthreads; l++) {
383                    final int firstColumn = l * p;
384                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
385                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
386                        public void run() {
387                            double[] temp = new double[2 * rows];
388                            for (int c = firstColumn; c < lastColumn; c++) {
389                                int idx1 = 2 * c;
390                                for (int r = 0; r < rows; r++) {
391                                    int idx2 = 2 * r;
392                                    int idx3 = r * rowspan + idx1;
393                                    temp[idx2] = a[idx3];
394                                    temp[idx2 + 1] = a[idx3 + 1];
395                                }
396                                fftRows.complexInverse(temp, scale);
397                                for (int r = 0; r < rows; r++) {
398                                    int idx2 = 2 * r;
399                                    int idx3 = r * rowspan + idx1;
400                                    a[idx3] = temp[idx2];
401                                    a[idx3 + 1] = temp[idx2 + 1];
402                                }
403                            }
404                        }
405                    });
406                }
407                ConcurrencyUtils.waitForCompletion(futures);
408            } else {
409                for (int r = 0; r < rows; r++) {
410                    fftColumns.complexInverse(a, r * rowspan, scale);
411                }
412                double[] temp = new double[2 * rows];
413                for (int c = 0; c < columns; c++) {
414                    int idx1 = 2 * c;
415                    for (int r = 0; r < rows; r++) {
416                        int idx2 = 2 * r;
417                        int idx3 = r * rowspan + idx1;
418                        temp[idx2] = a[idx3];
419                        temp[idx2 + 1] = a[idx3 + 1];
420                    }
421                    fftRows.complexInverse(temp, scale);
422                    for (int r = 0; r < rows; r++) {
423                        int idx2 = 2 * r;
424                        int idx3 = r * rowspan + idx1;
425                        a[idx3] = temp[idx2];
426                        a[idx3 + 1] = temp[idx2 + 1];
427                    }
428                }
429            }
430        }
431    }
432
433    /**
434     * Computes 2D inverse DFT of complex data leaving the result in
435     * <code>a</code>. The data is stored in 2D array. Complex data is
436     * represented by 2 double values in sequence: the real and imaginary part,
437     * i.e. the input array must be of size rows by 2*columns. The physical
438     * layout of the input data has to be as follows:<br>
439     * 
440     * <pre>
441     * a[k1][2*k2] = Re[k1][k2], 
442     * a[k1][2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
443     * </pre>
444     * 
445     * @param a
446     *            data to transform
447     * @param scale
448     *            if true then scaling is performed
449     * 
450     */
451    public void complexInverse(final double[][] a, final boolean scale) {
452        int nthreads = ConcurrencyUtils.getNumberOfThreads();
453        if (isPowerOfTwo) {
454            int oldn2 = columns;
455            columns = 2 * columns;
456
457            if (nthreads != oldNthreads) {
458                nt = 8 * nthreads * rows;
459                if (columns == 4 * nthreads) {
460                    nt >>= 1;
461                } else if (columns < 4 * nthreads) {
462                    nt >>= 2;
463                }
464                t = new double[nt];
465                oldNthreads = nthreads;
466            }
467            if ((nthreads > 1) && useThreads) {
468                xdft2d0_subth1(0, 1, a, scale);
469                cdft2d_subth(1, a, scale);
470            } else {
471
472                for (int r = 0; r < rows; r++) {
473                    fftColumns.complexInverse(a[r], scale);
474                }
475                cdft2d_sub(1, a, scale);
476            }
477            columns = oldn2;
478        } else {
479            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
480                Future<?>[] futures = new Future[nthreads];
481                int p = rows / nthreads;
482                for (int l = 0; l < nthreads; l++) {
483                    final int firstRow = l * p;
484                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
485                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
486                        public void run() {
487                            for (int r = firstRow; r < lastRow; r++) {
488                                fftColumns.complexInverse(a[r], scale);
489                            }
490                        }
491                    });
492                }
493                ConcurrencyUtils.waitForCompletion(futures);
494                p = columns / nthreads;
495                for (int l = 0; l < nthreads; l++) {
496                    final int firstColumn = l * p;
497                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
498                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
499                        public void run() {
500                            double[] temp = new double[2 * rows];
501                            for (int c = firstColumn; c < lastColumn; c++) {
502                                int idx1 = 2 * c;
503                                for (int r = 0; r < rows; r++) {
504                                    int idx2 = 2 * r;
505                                    temp[idx2] = a[r][idx1];
506                                    temp[idx2 + 1] = a[r][idx1 + 1];
507                                }
508                                fftRows.complexInverse(temp, scale);
509                                for (int r = 0; r < rows; r++) {
510                                    int idx2 = 2 * r;
511                                    a[r][idx1] = temp[idx2];
512                                    a[r][idx1 + 1] = temp[idx2 + 1];
513                                }
514                            }
515                        }
516                    });
517                }
518                ConcurrencyUtils.waitForCompletion(futures);
519            } else {
520                for (int r = 0; r < rows; r++) {
521                    fftColumns.complexInverse(a[r], scale);
522                }
523                double[] temp = new double[2 * rows];
524                for (int c = 0; c < columns; c++) {
525                    int idx1 = 2 * c;
526                    for (int r = 0; r < rows; r++) {
527                        int idx2 = 2 * r;
528                        temp[idx2] = a[r][idx1];
529                        temp[idx2 + 1] = a[r][idx1 + 1];
530                    }
531                    fftRows.complexInverse(temp, scale);
532                    for (int r = 0; r < rows; r++) {
533                        int idx2 = 2 * r;
534                        a[r][idx1] = temp[idx2];
535                        a[r][idx1 + 1] = temp[idx2 + 1];
536                    }
537                }
538            }
539        }
540    }
541
542    /**
543     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
544     * . This method only works when the sizes of both dimensions are
545     * power-of-two numbers. The physical layout of the output data is as
546     * follows:
547     * 
548     * <pre>
549     * a[k1*columns+2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
550     * a[k1*columns+2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
551     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
552     * a[2*k2] = Re[0][k2] = Re[0][columns-k2], 
553     * a[2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
554     *       0&lt;k2&lt;columns/2, 
555     * a[k1*columns] = Re[k1][0] = Re[rows-k1][0], 
556     * a[k1*columns+1] = Im[k1][0] = -Im[rows-k1][0], 
557     * a[(rows-k1)*columns+1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
558     * a[(rows-k1)*columns] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
559     *       0&lt;k1&lt;rows/2, 
560     * a[0] = Re[0][0], 
561     * a[1] = Re[0][columns/2], 
562     * a[(rows/2)*columns] = Re[rows/2][0], 
563     * a[(rows/2)*columns+1] = Re[rows/2][columns/2]
564     * </pre>
565     * 
566     * This method computes only half of the elements of the real transform. The
567     * other half satisfies the symmetry condition. If you want the full real
568     * forward transform, use <code>realForwardFull</code>. To get back the
569     * original data, use <code>realInverse</code> on the output of this method.
570     * 
571     * @param a
572     *            data to transform
573     */
574    public void realForward(double[] a) {
575        if (isPowerOfTwo == false) {
576            throw new IllegalArgumentException("rows and columns must be power of two numbers");
577        } else {
578            int nthreads;
579
580            nthreads = ConcurrencyUtils.getNumberOfThreads();
581            if (nthreads != oldNthreads) {
582                nt = 8 * nthreads * rows;
583                if (columns == 4 * nthreads) {
584                    nt >>= 1;
585                } else if (columns < 4 * nthreads) {
586                    nt >>= 2;
587                }
588                t = new double[nt];
589                oldNthreads = nthreads;
590            }
591            if ((nthreads > 1) && useThreads) {
592                xdft2d0_subth1(1, 1, a, true);
593                cdft2d_subth(-1, a, true);
594                rdft2d_sub(1, a);
595            } else {
596                for (int r = 0; r < rows; r++) {
597                    fftColumns.realForward(a, r * columns);
598                }
599                cdft2d_sub(-1, a, true);
600                rdft2d_sub(1, a);
601            }
602        }
603    }
604
605    /**
606     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
607     * . This method only works when the sizes of both dimensions are
608     * power-of-two numbers. The physical layout of the output data is as
609     * follows:
610     * 
611     * <pre>
612     * a[k1][2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
613     * a[k1][2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
614     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
615     * a[0][2*k2] = Re[0][k2] = Re[0][columns-k2], 
616     * a[0][2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
617     *       0&lt;k2&lt;columns/2, 
618     * a[k1][0] = Re[k1][0] = Re[rows-k1][0], 
619     * a[k1][1] = Im[k1][0] = -Im[rows-k1][0], 
620     * a[rows-k1][1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
621     * a[rows-k1][0] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
622     *       0&lt;k1&lt;rows/2, 
623     * a[0][0] = Re[0][0], 
624     * a[0][1] = Re[0][columns/2], 
625     * a[rows/2][0] = Re[rows/2][0], 
626     * a[rows/2][1] = Re[rows/2][columns/2]
627     * </pre>
628     * 
629     * This method computes only half of the elements of the real transform. The
630     * other half satisfies the symmetry condition. If you want the full real
631     * forward transform, use <code>realForwardFull</code>. To get back the
632     * original data, use <code>realInverse</code> on the output of this method.
633     * 
634     * @param a
635     *            data to transform
636     */
637    public void realForward(double[][] a) {
638        if (isPowerOfTwo == false) {
639            throw new IllegalArgumentException("rows and columns must be power of two numbers");
640        } else {
641            int nthreads;
642
643            nthreads = ConcurrencyUtils.getNumberOfThreads();
644            if (nthreads != oldNthreads) {
645                nt = 8 * nthreads * rows;
646                if (columns == 4 * nthreads) {
647                    nt >>= 1;
648                } else if (columns < 4 * nthreads) {
649                    nt >>= 2;
650                }
651                t = new double[nt];
652                oldNthreads = nthreads;
653            }
654            if ((nthreads > 1) && useThreads) {
655                xdft2d0_subth1(1, 1, a, true);
656                cdft2d_subth(-1, a, true);
657                rdft2d_sub(1, a);
658            } else {
659                for (int r = 0; r < rows; r++) {
660                    fftColumns.realForward(a[r]);
661                }
662                cdft2d_sub(-1, a, true);
663                rdft2d_sub(1, a);
664            }
665        }
666    }
667
668    /**
669     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
670     * . This method computes full real forward transform, i.e. you will get the
671     * same result as from <code>complexForward</code> called with all imaginary
672     * part equal 0. Because the result is stored in <code>a</code>, the input
673     * array must be of size rows*2*columns, with only the first rows*columns
674     * elements filled with real data. To get back the original data, use
675     * <code>complexInverse</code> on the output of this method.
676     * 
677     * @param a
678     *            data to transform
679     */
680    public void realForwardFull(double[] a) {
681        if (isPowerOfTwo) {
682            int nthreads;
683
684            nthreads = ConcurrencyUtils.getNumberOfThreads();
685            if (nthreads != oldNthreads) {
686                nt = 8 * nthreads * rows;
687                if (columns == 4 * nthreads) {
688                    nt >>= 1;
689                } else if (columns < 4 * nthreads) {
690                    nt >>= 2;
691                }
692                t = new double[nt];
693                oldNthreads = nthreads;
694            }
695            if ((nthreads > 1) && useThreads) {
696                xdft2d0_subth1(1, 1, a, true);
697                cdft2d_subth(-1, a, true);
698                rdft2d_sub(1, a);
699            } else {
700                for (int r = 0; r < rows; r++) {
701                    fftColumns.realForward(a, r * columns);
702                }
703                cdft2d_sub(-1, a, true);
704                rdft2d_sub(1, a);
705            }
706            fillSymmetric(a);
707        } else {
708            mixedRadixRealForwardFull(a);
709        }
710    }
711
712    /**
713     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
714     * . This method computes full real forward transform, i.e. you will get the
715     * same result as from <code>complexForward</code> called with all imaginary
716     * part equal 0. Because the result is stored in <code>a</code>, the input
717     * array must be of size rows by 2*columns, with only the first rows by
718     * columns elements filled with real data. To get back the original data,
719     * use <code>complexInverse</code> on the output of this method.
720     * 
721     * @param a
722     *            data to transform
723     */
724    public void realForwardFull(double[][] a) {
725        if (isPowerOfTwo) {
726            int nthreads;
727
728            nthreads = ConcurrencyUtils.getNumberOfThreads();
729            if (nthreads != oldNthreads) {
730                nt = 8 * nthreads * rows;
731                if (columns == 4 * nthreads) {
732                    nt >>= 1;
733                } else if (columns < 4 * nthreads) {
734                    nt >>= 2;
735                }
736                t = new double[nt];
737                oldNthreads = nthreads;
738            }
739            if ((nthreads > 1) && useThreads) {
740                xdft2d0_subth1(1, 1, a, true);
741                cdft2d_subth(-1, a, true);
742                rdft2d_sub(1, a);
743            } else {
744                for (int r = 0; r < rows; r++) {
745                    fftColumns.realForward(a[r]);
746                }
747                cdft2d_sub(-1, a, true);
748                rdft2d_sub(1, a);
749            }
750            fillSymmetric(a);
751        } else {
752            mixedRadixRealForwardFull(a);
753        }
754    }
755
756    /**
757     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
758     * . This method only works when the sizes of both dimensions are
759     * power-of-two numbers. The physical layout of the input data has to be as
760     * follows:
761     * 
762     * <pre>
763     * a[k1*columns+2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
764     * a[k1*columns+2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
765     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
766     * a[2*k2] = Re[0][k2] = Re[0][columns-k2], 
767     * a[2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
768     *       0&lt;k2&lt;columns/2, 
769     * a[k1*columns] = Re[k1][0] = Re[rows-k1][0], 
770     * a[k1*columns+1] = Im[k1][0] = -Im[rows-k1][0], 
771     * a[(rows-k1)*columns+1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
772     * a[(rows-k1)*columns] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
773     *       0&lt;k1&lt;rows/2, 
774     * a[0] = Re[0][0], 
775     * a[1] = Re[0][columns/2], 
776     * a[(rows/2)*columns] = Re[rows/2][0], 
777     * a[(rows/2)*columns+1] = Re[rows/2][columns/2]
778     * </pre>
779     * 
780     * This method computes only half of the elements of the real transform. The
781     * other half satisfies the symmetry condition. If you want the full real
782     * inverse transform, use <code>realInverseFull</code>.
783     * 
784     * @param a
785     *            data to transform
786     * 
787     * @param scale
788     *            if true then scaling is performed
789     */
790    public void realInverse(double[] a, boolean scale) {
791        if (isPowerOfTwo == false) {
792            throw new IllegalArgumentException("rows and columns must be power of two numbers");
793        } else {
794            int nthreads;
795            nthreads = ConcurrencyUtils.getNumberOfThreads();
796            if (nthreads != oldNthreads) {
797                nt = 8 * nthreads * rows;
798                if (columns == 4 * nthreads) {
799                    nt >>= 1;
800                } else if (columns < 4 * nthreads) {
801                    nt >>= 2;
802                }
803                t = new double[nt];
804                oldNthreads = nthreads;
805            }
806            if ((nthreads > 1) && useThreads) {
807                rdft2d_sub(-1, a);
808                cdft2d_subth(1, a, scale);
809                xdft2d0_subth1(1, -1, a, scale);
810            } else {
811                rdft2d_sub(-1, a);
812                cdft2d_sub(1, a, scale);
813                for (int r = 0; r < rows; r++) {
814                    fftColumns.realInverse(a, r * columns, scale);
815                }
816            }
817        }
818    }
819
820    /**
821     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
822     * . This method only works when the sizes of both dimensions are
823     * power-of-two numbers. The physical layout of the input data has to be as
824     * follows:
825     * 
826     * <pre>
827     * a[k1][2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
828     * a[k1][2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
829     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
830     * a[0][2*k2] = Re[0][k2] = Re[0][columns-k2], 
831     * a[0][2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
832     *       0&lt;k2&lt;columns/2, 
833     * a[k1][0] = Re[k1][0] = Re[rows-k1][0], 
834     * a[k1][1] = Im[k1][0] = -Im[rows-k1][0], 
835     * a[rows-k1][1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
836     * a[rows-k1][0] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
837     *       0&lt;k1&lt;rows/2, 
838     * a[0][0] = Re[0][0], 
839     * a[0][1] = Re[0][columns/2], 
840     * a[rows/2][0] = Re[rows/2][0], 
841     * a[rows/2][1] = Re[rows/2][columns/2]
842     * </pre>
843     * 
844     * This method computes only half of the elements of the real transform. The
845     * other half satisfies the symmetry condition. If you want the full real
846     * inverse transform, use <code>realInverseFull</code>.
847     * 
848     * @param a
849     *            data to transform
850     * 
851     * @param scale
852     *            if true then scaling is performed
853     */
854    public void realInverse(double[][] a, boolean scale) {
855        if (isPowerOfTwo == false) {
856            throw new IllegalArgumentException("rows and columns must be power of two numbers");
857        } else {
858            int nthreads;
859
860            nthreads = ConcurrencyUtils.getNumberOfThreads();
861            if (nthreads != oldNthreads) {
862                nt = 8 * nthreads * rows;
863                if (columns == 4 * nthreads) {
864                    nt >>= 1;
865                } else if (columns < 4 * nthreads) {
866                    nt >>= 2;
867                }
868                t = new double[nt];
869                oldNthreads = nthreads;
870            }
871            if ((nthreads > 1) && useThreads) {
872                rdft2d_sub(-1, a);
873                cdft2d_subth(1, a, scale);
874                xdft2d0_subth1(1, -1, a, scale);
875            } else {
876                rdft2d_sub(-1, a);
877                cdft2d_sub(1, a, scale);
878                for (int r = 0; r < rows; r++) {
879                    fftColumns.realInverse(a[r], scale);
880                }
881            }
882        }
883    }
884
885    /**
886     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
887     * . This method computes full real inverse transform, i.e. you will get the
888     * same result as from <code>complexInverse</code> called with all imaginary
889     * part equal 0. Because the result is stored in <code>a</code>, the input
890     * array must be of size rows*2*columns, with only the first rows*columns
891     * elements filled with real data.
892     * 
893     * @param a
894     *            data to transform
895     * 
896     * @param scale
897     *            if true then scaling is performed
898     */
899    public void realInverseFull(double[] a, boolean scale) {
900        if (isPowerOfTwo) {
901            int nthreads;
902
903            nthreads = ConcurrencyUtils.getNumberOfThreads();
904            if (nthreads != oldNthreads) {
905                nt = 8 * nthreads * rows;
906                if (columns == 4 * nthreads) {
907                    nt >>= 1;
908                } else if (columns < 4 * nthreads) {
909                    nt >>= 2;
910                }
911                t = new double[nt];
912                oldNthreads = nthreads;
913            }
914            if ((nthreads > 1) && useThreads) {
915                xdft2d0_subth2(1, -1, a, scale);
916                cdft2d_subth(1, a, scale);
917                rdft2d_sub(1, a);
918            } else {
919                for (int r = 0; r < rows; r++) {
920                    fftColumns.realInverse2(a, r * columns, scale);
921                }
922                cdft2d_sub(1, a, scale);
923                rdft2d_sub(1, a);
924            }
925            fillSymmetric(a);
926        } else {
927            mixedRadixRealInverseFull(a, scale);
928        }
929    }
930
931    /**
932     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
933     * . This method computes full real inverse transform, i.e. you will get the
934     * same result as from <code>complexInverse</code> called with all imaginary
935     * part equal 0. Because the result is stored in <code>a</code>, the input
936     * array must be of size rows by 2*columns, with only the first rows by
937     * columns elements filled with real data.
938     * 
939     * @param a
940     *            data to transform
941     * 
942     * @param scale
943     *            if true then scaling is performed
944     */
945    public void realInverseFull(double[][] a, boolean scale) {
946        if (isPowerOfTwo) {
947            int nthreads;
948
949            nthreads = ConcurrencyUtils.getNumberOfThreads();
950            if (nthreads != oldNthreads) {
951                nt = 8 * nthreads * rows;
952                if (columns == 4 * nthreads) {
953                    nt >>= 1;
954                } else if (columns < 4 * nthreads) {
955                    nt >>= 2;
956                }
957                t = new double[nt];
958                oldNthreads = nthreads;
959            }
960            if ((nthreads > 1) && useThreads) {
961                xdft2d0_subth2(1, -1, a, scale);
962                cdft2d_subth(1, a, scale);
963                rdft2d_sub(1, a);
964            } else {
965                for (int r = 0; r < rows; r++) {
966                    fftColumns.realInverse2(a[r], 0, scale);
967                }
968                cdft2d_sub(1, a, scale);
969                rdft2d_sub(1, a);
970            }
971            fillSymmetric(a);
972        } else {
973            mixedRadixRealInverseFull(a, scale);
974        }
975    }
976
977    private void mixedRadixRealForwardFull(final double[][] a) {
978        final int n2d2 = columns / 2 + 1;
979        final double[][] temp = new double[n2d2][2 * rows];
980
981        int nthreads = ConcurrencyUtils.getNumberOfThreads();
982        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
983            Future<?>[] futures = new Future[nthreads];
984            int p = rows / nthreads;
985            for (int l = 0; l < nthreads; l++) {
986                final int firstRow = l * p;
987                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
988                futures[l] = ConcurrencyUtils.submit(new Runnable() {
989                    public void run() {
990                        for (int i = firstRow; i < lastRow; i++) {
991                            fftColumns.realForward(a[i]);
992                        }
993                    }
994                });
995            }
996            ConcurrencyUtils.waitForCompletion(futures);
997
998            for (int r = 0; r < rows; r++) {
999                temp[0][r] = a[r][0]; //first column is always real
1000            }
1001            fftRows.realForwardFull(temp[0]);
1002
1003            p = (n2d2 - 2) / nthreads;
1004            for (int l = 0; l < nthreads; l++) {
1005                final int firstColumn = 1 + l * p;
1006                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1007                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1008                    public void run() {
1009                        for (int c = firstColumn; c < lastColumn; c++) {
1010                            int idx2 = 2 * c;
1011                            for (int r = 0; r < rows; r++) {
1012                                int idx1 = 2 * r;
1013                                temp[c][idx1] = a[r][idx2];
1014                                temp[c][idx1 + 1] = a[r][idx2 + 1];
1015                            }
1016                            fftRows.complexForward(temp[c]);
1017                        }
1018                    }
1019                });
1020            }
1021            ConcurrencyUtils.waitForCompletion(futures);
1022
1023            if ((columns % 2) == 0) {
1024                for (int r = 0; r < rows; r++) {
1025                    temp[n2d2 - 1][r] = a[r][1];
1026                    //imaginary part = 0;
1027                }
1028                fftRows.realForwardFull(temp[n2d2 - 1]);
1029
1030            } else {
1031                for (int r = 0; r < rows; r++) {
1032                    int idx1 = 2 * r;
1033                    int idx2 = n2d2 - 1;
1034                    temp[idx2][idx1] = a[r][2 * idx2];
1035                    temp[idx2][idx1 + 1] = a[r][1];
1036                }
1037                fftRows.complexForward(temp[n2d2 - 1]);
1038
1039            }
1040
1041            p = rows / nthreads;
1042            for (int l = 0; l < nthreads; l++) {
1043                final int firstRow = l * p;
1044                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1045                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1046                    public void run() {
1047                        for (int r = firstRow; r < lastRow; r++) {
1048                            int idx1 = 2 * r;
1049                            for (int c = 0; c < n2d2; c++) {
1050                                int idx2 = 2 * c;
1051                                a[r][idx2] = temp[c][idx1];
1052                                a[r][idx2 + 1] = temp[c][idx1 + 1];
1053                            }
1054                        }
1055                    }
1056                });
1057            }
1058            ConcurrencyUtils.waitForCompletion(futures);
1059
1060            for (int l = 0; l < nthreads; l++) {
1061                final int firstRow = 1 + l * p;
1062                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1063                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1064                    public void run() {
1065                        for (int r = firstRow; r < lastRow; r++) {
1066                            int idx3 = rows - r;
1067                            for (int c = n2d2; c < columns; c++) {
1068                                int idx1 = 2 * c;
1069                                int idx2 = 2 * (columns - c);
1070                                a[0][idx1] = a[0][idx2];
1071                                a[0][idx1 + 1] = -a[0][idx2 + 1];
1072                                a[r][idx1] = a[idx3][idx2];
1073                                a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1074                            }
1075                        }
1076                    }
1077                });
1078            }
1079            ConcurrencyUtils.waitForCompletion(futures);
1080
1081        } else {
1082            for (int r = 0; r < rows; r++) {
1083                fftColumns.realForward(a[r]);
1084            }
1085
1086            for (int r = 0; r < rows; r++) {
1087                temp[0][r] = a[r][0]; //first column is always real
1088            }
1089            fftRows.realForwardFull(temp[0]);
1090
1091            for (int c = 1; c < n2d2 - 1; c++) {
1092                int idx2 = 2 * c;
1093                for (int r = 0; r < rows; r++) {
1094                    int idx1 = 2 * r;
1095                    temp[c][idx1] = a[r][idx2];
1096                    temp[c][idx1 + 1] = a[r][idx2 + 1];
1097                }
1098                fftRows.complexForward(temp[c]);
1099            }
1100
1101            if ((columns % 2) == 0) {
1102                for (int r = 0; r < rows; r++) {
1103                    temp[n2d2 - 1][r] = a[r][1];
1104                    //imaginary part = 0;
1105                }
1106                fftRows.realForwardFull(temp[n2d2 - 1]);
1107
1108            } else {
1109                for (int r = 0; r < rows; r++) {
1110                    int idx1 = 2 * r;
1111                    int idx2 = n2d2 - 1;
1112                    temp[idx2][idx1] = a[r][2 * idx2];
1113                    temp[idx2][idx1 + 1] = a[r][1];
1114                }
1115                fftRows.complexForward(temp[n2d2 - 1]);
1116
1117            }
1118
1119            for (int r = 0; r < rows; r++) {
1120                int idx1 = 2 * r;
1121                for (int c = 0; c < n2d2; c++) {
1122                    int idx2 = 2 * c;
1123                    a[r][idx2] = temp[c][idx1];
1124                    a[r][idx2 + 1] = temp[c][idx1 + 1];
1125                }
1126            }
1127
1128            //fill symmetric
1129            for (int r = 1; r < rows; r++) {
1130                int idx3 = rows - r;
1131                for (int c = n2d2; c < columns; c++) {
1132                    int idx1 = 2 * c;
1133                    int idx2 = 2 * (columns - c);
1134                    a[0][idx1] = a[0][idx2];
1135                    a[0][idx1 + 1] = -a[0][idx2 + 1];
1136                    a[r][idx1] = a[idx3][idx2];
1137                    a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1138                }
1139            }
1140        }
1141    }
1142
1143    private void mixedRadixRealForwardFull(final double[] a) {
1144        final int rowStride = 2 * columns;
1145        final int n2d2 = columns / 2 + 1;
1146        final double[][] temp = new double[n2d2][2 * rows];
1147
1148        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1149        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
1150            Future<?>[] futures = new Future[nthreads];
1151            int p = rows / nthreads;
1152            for (int l = 0; l < nthreads; l++) {
1153                final int firstRow = l * p;
1154                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1155                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1156                    public void run() {
1157                        for (int i = firstRow; i < lastRow; i++) {
1158                            fftColumns.realForward(a, i * columns);
1159                        }
1160                    }
1161                });
1162            }
1163            ConcurrencyUtils.waitForCompletion(futures);
1164
1165            for (int r = 0; r < rows; r++) {
1166                temp[0][r] = a[r * columns]; //first column is always real
1167            }
1168            fftRows.realForwardFull(temp[0]);
1169
1170            p = (n2d2 - 2) / nthreads;
1171            for (int l = 0; l < nthreads; l++) {
1172                final int firstColumn = 1 + l * p;
1173                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1174                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1175                    public void run() {
1176                        for (int c = firstColumn; c < lastColumn; c++) {
1177                            int idx0 = 2 * c;
1178                            for (int r = 0; r < rows; r++) {
1179                                int idx1 = 2 * r;
1180                                int idx2 = r * columns + idx0;
1181                                temp[c][idx1] = a[idx2];
1182                                temp[c][idx1 + 1] = a[idx2 + 1];
1183                            }
1184                            fftRows.complexForward(temp[c]);
1185                        }
1186                    }
1187                });
1188            }
1189            ConcurrencyUtils.waitForCompletion(futures);
1190            
1191            if ((columns % 2) == 0) {
1192                for (int r = 0; r < rows; r++) {
1193                    temp[n2d2 - 1][r] = a[r * columns + 1];
1194                    //imaginary part = 0;
1195                }
1196                fftRows.realForwardFull(temp[n2d2 - 1]);
1197
1198            } else {
1199                for (int r = 0; r < rows; r++) {
1200                    int idx1 = 2 * r;
1201                    int idx2 = r * columns;
1202                    int idx3 = n2d2 - 1;
1203                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1204                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1205                }
1206                fftRows.complexForward(temp[n2d2 - 1]);
1207            }
1208
1209            p = rows / nthreads;
1210            for (int l = 0; l < nthreads; l++) {
1211                final int firstRow = l * p;
1212                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1213                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1214                    public void run() {
1215                        for (int r = firstRow; r < lastRow; r++) {
1216                            int idx1 = 2 * r;
1217                            for (int c = 0; c < n2d2; c++) {
1218                                int idx0 = 2 * c;
1219                                int idx2 = r * rowStride + idx0;
1220                                a[idx2] = temp[c][idx1];
1221                                a[idx2 + 1] = temp[c][idx1 + 1];
1222                            }
1223                        }
1224                    }
1225                });
1226            }
1227            ConcurrencyUtils.waitForCompletion(futures);
1228            
1229            for (int l = 0; l < nthreads; l++) {
1230                final int firstRow = 1 + l * p;
1231                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1232                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1233                    public void run() {
1234                        for (int r = firstRow; r < lastRow; r++) {
1235                            int idx5 = r * rowStride;
1236                            int idx6 = (rows - r + 1) * rowStride;
1237                            for (int c = n2d2; c < columns; c++) {
1238                                int idx1 = 2 * c;
1239                                int idx2 = 2 * (columns - c);
1240                                a[idx1] = a[idx2];
1241                                a[idx1 + 1] = -a[idx2 + 1];
1242                                int idx3 = idx5 + idx1;
1243                                int idx4 = idx6 - idx1;
1244                                a[idx3] = a[idx4];
1245                                a[idx3 + 1] = -a[idx4 + 1];
1246                            }
1247                        }
1248                    }
1249                });
1250            }
1251            ConcurrencyUtils.waitForCompletion(futures);
1252            
1253        } else {
1254            for (int r = 0; r < rows; r++) {
1255                fftColumns.realForward(a, r * columns);
1256            }
1257            for (int r = 0; r < rows; r++) {
1258                temp[0][r] = a[r * columns]; //first column is always real
1259            }
1260            fftRows.realForwardFull(temp[0]);
1261
1262            for (int c = 1; c < n2d2 - 1; c++) {
1263                int idx0 = 2 * c;
1264                for (int r = 0; r < rows; r++) {
1265                    int idx1 = 2 * r;
1266                    int idx2 = r * columns + idx0;
1267                    temp[c][idx1] = a[idx2];
1268                    temp[c][idx1 + 1] = a[idx2 + 1];
1269                }
1270                fftRows.complexForward(temp[c]);
1271            }
1272
1273            if ((columns % 2) == 0) {
1274                for (int r = 0; r < rows; r++) {
1275                    temp[n2d2 - 1][r] = a[r * columns + 1];
1276                    //imaginary part = 0;
1277                }
1278                fftRows.realForwardFull(temp[n2d2 - 1]);
1279
1280            } else {
1281                for (int r = 0; r < rows; r++) {
1282                    int idx1 = 2 * r;
1283                    int idx2 = r * columns;
1284                    int idx3 = n2d2 - 1;
1285                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1286                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1287                }
1288                fftRows.complexForward(temp[n2d2 - 1]);
1289            }
1290
1291            for (int r = 0; r < rows; r++) {
1292                int idx1 = 2 * r;
1293                for (int c = 0; c < n2d2; c++) {
1294                    int idx0 = 2 * c;
1295                    int idx2 = r * rowStride + idx0;
1296                    a[idx2] = temp[c][idx1];
1297                    a[idx2 + 1] = temp[c][idx1 + 1];
1298                }
1299            }
1300
1301            //fill symmetric
1302            for (int r = 1; r < rows; r++) {
1303                int idx5 = r * rowStride;
1304                int idx6 = (rows - r + 1) * rowStride;
1305                for (int c = n2d2; c < columns; c++) {
1306                    int idx1 = 2 * c;
1307                    int idx2 = 2 * (columns - c);
1308                    a[idx1] = a[idx2];
1309                    a[idx1 + 1] = -a[idx2 + 1];
1310                    int idx3 = idx5 + idx1;
1311                    int idx4 = idx6 - idx1;
1312                    a[idx3] = a[idx4];
1313                    a[idx3 + 1] = -a[idx4 + 1];
1314                }
1315            }
1316        }
1317    }
1318
1319    private void mixedRadixRealInverseFull(final double[][] a, final boolean scale) {
1320        final int n2d2 = columns / 2 + 1;
1321        final double[][] temp = new double[n2d2][2 * rows];
1322
1323        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1324        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
1325            Future<?>[] futures = new Future[nthreads];
1326            int p = rows / nthreads;
1327            for (int l = 0; l < nthreads; l++) {
1328                final int firstRow = l * p;
1329                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1330                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1331                    public void run() {
1332                        for (int i = firstRow; i < lastRow; i++) {
1333                            fftColumns.realInverse2(a[i], 0, scale);
1334                        }
1335                    }
1336                });
1337            }
1338            ConcurrencyUtils.waitForCompletion(futures);
1339
1340            for (int r = 0; r < rows; r++) {
1341                temp[0][r] = a[r][0]; //first column is always real
1342            }
1343            fftRows.realInverseFull(temp[0], scale);
1344
1345            p = (n2d2 - 2) / nthreads;
1346            for (int l = 0; l < nthreads; l++) {
1347                final int firstColumn = 1 + l * p;
1348                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1349                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1350                    public void run() {
1351                        for (int c = firstColumn; c < lastColumn; c++) {
1352                            int idx2 = 2 * c;
1353                            for (int r = 0; r < rows; r++) {
1354                                int idx1 = 2 * r;
1355                                temp[c][idx1] = a[r][idx2];
1356                                temp[c][idx1 + 1] = a[r][idx2 + 1];
1357                            }
1358                            fftRows.complexInverse(temp[c], scale);
1359                        }
1360                    }
1361                });
1362            }
1363            ConcurrencyUtils.waitForCompletion(futures);
1364
1365            if ((columns % 2) == 0) {
1366                for (int r = 0; r < rows; r++) {
1367                    temp[n2d2 - 1][r] = a[r][1];
1368                    //imaginary part = 0;
1369                }
1370                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1371
1372            } else {
1373                for (int r = 0; r < rows; r++) {
1374                    int idx1 = 2 * r;
1375                    int idx2 = n2d2 - 1;
1376                    temp[idx2][idx1] = a[r][2 * idx2];
1377                    temp[idx2][idx1 + 1] = a[r][1];
1378                }
1379                fftRows.complexInverse(temp[n2d2 - 1], scale);
1380
1381            }
1382
1383            p = rows / nthreads;
1384            for (int l = 0; l < nthreads; l++) {
1385                final int firstRow = l * p;
1386                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1387                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1388                    public void run() {
1389                        for (int r = firstRow; r < lastRow; r++) {
1390                            int idx1 = 2 * r;
1391                            for (int c = 0; c < n2d2; c++) {
1392                                int idx2 = 2 * c;
1393                                a[r][idx2] = temp[c][idx1];
1394                                a[r][idx2 + 1] = temp[c][idx1 + 1];
1395                            }
1396                        }
1397                    }
1398                });
1399            }
1400            ConcurrencyUtils.waitForCompletion(futures);
1401
1402            for (int l = 0; l < nthreads; l++) {
1403                final int firstRow = 1 + l * p;
1404                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1405                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1406                    public void run() {
1407                        for (int r = firstRow; r < lastRow; r++) {
1408                            int idx3 = rows - r;
1409                            for (int c = n2d2; c < columns; c++) {
1410                                int idx1 = 2 * c;
1411                                int idx2 = 2 * (columns - c);
1412                                a[0][idx1] = a[0][idx2];
1413                                a[0][idx1 + 1] = -a[0][idx2 + 1];
1414                                a[r][idx1] = a[idx3][idx2];
1415                                a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1416                            }
1417                        }
1418                    }
1419                });
1420            }
1421            ConcurrencyUtils.waitForCompletion(futures);
1422
1423        } else {
1424            for (int r = 0; r < rows; r++) {
1425                fftColumns.realInverse2(a[r], 0, scale);
1426            }
1427
1428            for (int r = 0; r < rows; r++) {
1429                temp[0][r] = a[r][0]; //first column is always real
1430            }
1431            fftRows.realInverseFull(temp[0], scale);
1432
1433            for (int c = 1; c < n2d2 - 1; c++) {
1434                int idx2 = 2 * c;
1435                for (int r = 0; r < rows; r++) {
1436                    int idx1 = 2 * r;
1437                    temp[c][idx1] = a[r][idx2];
1438                    temp[c][idx1 + 1] = a[r][idx2 + 1];
1439                }
1440                fftRows.complexInverse(temp[c], scale);
1441            }
1442
1443            if ((columns % 2) == 0) {
1444                for (int r = 0; r < rows; r++) {
1445                    temp[n2d2 - 1][r] = a[r][1];
1446                    //imaginary part = 0;
1447                }
1448                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1449
1450            } else {
1451                for (int r = 0; r < rows; r++) {
1452                    int idx1 = 2 * r;
1453                    int idx2 = n2d2 - 1;
1454                    temp[idx2][idx1] = a[r][2 * idx2];
1455                    temp[idx2][idx1 + 1] = a[r][1];
1456                }
1457                fftRows.complexInverse(temp[n2d2 - 1], scale);
1458
1459            }
1460
1461            for (int r = 0; r < rows; r++) {
1462                int idx1 = 2 * r;
1463                for (int c = 0; c < n2d2; c++) {
1464                    int idx2 = 2 * c;
1465                    a[r][idx2] = temp[c][idx1];
1466                    a[r][idx2 + 1] = temp[c][idx1 + 1];
1467                }
1468            }
1469
1470            //fill symmetric
1471            for (int r = 1; r < rows; r++) {
1472                int idx3 = rows - r;
1473                for (int c = n2d2; c < columns; c++) {
1474                    int idx1 = 2 * c;
1475                    int idx2 = 2 * (columns - c);
1476                    a[0][idx1] = a[0][idx2];
1477                    a[0][idx1 + 1] = -a[0][idx2 + 1];
1478                    a[r][idx1] = a[idx3][idx2];
1479                    a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1480                }
1481            }
1482        }
1483    }
1484
1485    private void mixedRadixRealInverseFull(final double[] a, final boolean scale) {
1486        final int rowStride = 2 * columns;
1487        final int n2d2 = columns / 2 + 1;
1488        final double[][] temp = new double[n2d2][2 * rows];
1489
1490        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1491        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
1492            Future<?>[] futures = new Future[nthreads];
1493            int p = rows / nthreads;
1494            for (int l = 0; l < nthreads; l++) {
1495                final int firstRow = l * p;
1496                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1497                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1498                    public void run() {
1499                        for (int i = firstRow; i < lastRow; i++) {
1500                            fftColumns.realInverse2(a, i * columns, scale);
1501                        }
1502                    }
1503                });
1504            }
1505            ConcurrencyUtils.waitForCompletion(futures);
1506
1507            for (int r = 0; r < rows; r++) {
1508                temp[0][r] = a[r * columns]; //first column is always real
1509            }
1510            fftRows.realInverseFull(temp[0], scale);
1511
1512            p = (n2d2 - 2) / nthreads;
1513            for (int l = 0; l < nthreads; l++) {
1514                final int firstColumn = 1 + l * p;
1515                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1516                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1517                    public void run() {
1518                        for (int c = firstColumn; c < lastColumn; c++) {
1519                            int idx0 = 2 * c;
1520                            for (int r = 0; r < rows; r++) {
1521                                int idx1 = 2 * r;
1522                                int idx2 = r * columns + idx0;
1523                                temp[c][idx1] = a[idx2];
1524                                temp[c][idx1 + 1] = a[idx2 + 1];
1525                            }
1526                            fftRows.complexInverse(temp[c], scale);
1527                        }
1528                    }
1529                });
1530            }
1531            ConcurrencyUtils.waitForCompletion(futures);
1532
1533            if ((columns % 2) == 0) {
1534                for (int r = 0; r < rows; r++) {
1535                    temp[n2d2 - 1][r] = a[r * columns + 1];
1536                    //imaginary part = 0;
1537                }
1538                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1539
1540            } else {
1541                for (int r = 0; r < rows; r++) {
1542                    int idx1 = 2 * r;
1543                    int idx2 = r * columns;
1544                    int idx3 = n2d2 - 1;
1545                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1546                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1547                }
1548                fftRows.complexInverse(temp[n2d2 - 1], scale);
1549            }
1550
1551            p = rows / nthreads;
1552            for (int l = 0; l < nthreads; l++) {
1553                final int firstRow = l * p;
1554                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1555                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1556                    public void run() {
1557                        for (int r = firstRow; r < lastRow; r++) {
1558                            int idx1 = 2 * r;
1559                            for (int c = 0; c < n2d2; c++) {
1560                                int idx0 = 2 * c;
1561                                int idx2 = r * rowStride + idx0;
1562                                a[idx2] = temp[c][idx1];
1563                                a[idx2 + 1] = temp[c][idx1 + 1];
1564                            }
1565                        }
1566                    }
1567                });
1568            }
1569            ConcurrencyUtils.waitForCompletion(futures);
1570
1571            for (int l = 0; l < nthreads; l++) {
1572                final int firstRow = 1 + l * p;
1573                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1574                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1575                    public void run() {
1576                        for (int r = firstRow; r < lastRow; r++) {
1577                            int idx5 = r * rowStride;
1578                            int idx6 = (rows - r + 1) * rowStride;
1579                            for (int c = n2d2; c < columns; c++) {
1580                                int idx1 = 2 * c;
1581                                int idx2 = 2 * (columns - c);
1582                                a[idx1] = a[idx2];
1583                                a[idx1 + 1] = -a[idx2 + 1];
1584                                int idx3 = idx5 + idx1;
1585                                int idx4 = idx6 - idx1;
1586                                a[idx3] = a[idx4];
1587                                a[idx3 + 1] = -a[idx4 + 1];
1588                            }
1589                        }
1590                    }
1591                });
1592            }
1593            ConcurrencyUtils.waitForCompletion(futures);
1594        } else {
1595            for (int r = 0; r < rows; r++) {
1596                fftColumns.realInverse2(a, r * columns, scale);
1597            }
1598            for (int r = 0; r < rows; r++) {
1599                temp[0][r] = a[r * columns]; //first column is always real
1600            }
1601            fftRows.realInverseFull(temp[0], scale);
1602
1603            for (int c = 1; c < n2d2 - 1; c++) {
1604                int idx0 = 2 * c;
1605                for (int r = 0; r < rows; r++) {
1606                    int idx1 = 2 * r;
1607                    int idx2 = r * columns + idx0;
1608                    temp[c][idx1] = a[idx2];
1609                    temp[c][idx1 + 1] = a[idx2 + 1];
1610                }
1611                fftRows.complexInverse(temp[c], scale);
1612            }
1613
1614            if ((columns % 2) == 0) {
1615                for (int r = 0; r < rows; r++) {
1616                    temp[n2d2 - 1][r] = a[r * columns + 1];
1617                    //imaginary part = 0;
1618                }
1619                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1620
1621            } else {
1622                for (int r = 0; r < rows; r++) {
1623                    int idx1 = 2 * r;
1624                    int idx2 = r * columns;
1625                    int idx3 = n2d2 - 1;
1626                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1627                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1628                }
1629                fftRows.complexInverse(temp[n2d2 - 1], scale);
1630            }
1631
1632            for (int r = 0; r < rows; r++) {
1633                int idx1 = 2 * r;
1634                for (int c = 0; c < n2d2; c++) {
1635                    int idx0 = 2 * c;
1636                    int idx2 = r * rowStride + idx0;
1637                    a[idx2] = temp[c][idx1];
1638                    a[idx2 + 1] = temp[c][idx1 + 1];
1639                }
1640            }
1641
1642            //fill symmetric
1643            for (int r = 1; r < rows; r++) {
1644                int idx5 = r * rowStride;
1645                int idx6 = (rows - r + 1) * rowStride;
1646                for (int c = n2d2; c < columns; c++) {
1647                    int idx1 = 2 * c;
1648                    int idx2 = 2 * (columns - c);
1649                    a[idx1] = a[idx2];
1650                    a[idx1 + 1] = -a[idx2 + 1];
1651                    int idx3 = idx5 + idx1;
1652                    int idx4 = idx6 - idx1;
1653                    a[idx3] = a[idx4];
1654                    a[idx3 + 1] = -a[idx4 + 1];
1655                }
1656            }
1657        }
1658    }
1659
1660    private void rdft2d_sub(int isgn, double[] a) {
1661        int n1h, j;
1662        double xi;
1663        int idx1, idx2;
1664
1665        n1h = rows >> 1;
1666        if (isgn < 0) {
1667            for (int i = 1; i < n1h; i++) {
1668                j = rows - i;
1669                idx1 = i * columns;
1670                idx2 = j * columns;
1671                xi = a[idx1] - a[idx2];
1672                a[idx1] += a[idx2];
1673                a[idx2] = xi;
1674                xi = a[idx2 + 1] - a[idx1 + 1];
1675                a[idx1 + 1] += a[idx2 + 1];
1676                a[idx2 + 1] = xi;
1677            }
1678        } else {
1679            for (int i = 1; i < n1h; i++) {
1680                j = rows - i;
1681                idx1 = i * columns;
1682                idx2 = j * columns;
1683                a[idx2] = 0.5f * (a[idx1] - a[idx2]);
1684                a[idx1] -= a[idx2];
1685                a[idx2 + 1] = 0.5f * (a[idx1 + 1] + a[idx2 + 1]);
1686                a[idx1 + 1] -= a[idx2 + 1];
1687            }
1688        }
1689    }
1690
1691    private void rdft2d_sub(int isgn, double[][] a) {
1692        int n1h, j;
1693        double xi;
1694
1695        n1h = rows >> 1;
1696        if (isgn < 0) {
1697            for (int i = 1; i < n1h; i++) {
1698                j = rows - i;
1699                xi = a[i][0] - a[j][0];
1700                a[i][0] += a[j][0];
1701                a[j][0] = xi;
1702                xi = a[j][1] - a[i][1];
1703                a[i][1] += a[j][1];
1704                a[j][1] = xi;
1705            }
1706        } else {
1707            for (int i = 1; i < n1h; i++) {
1708                j = rows - i;
1709                a[j][0] = 0.5f * (a[i][0] - a[j][0]);
1710                a[i][0] -= a[j][0];
1711                a[j][1] = 0.5f * (a[i][1] + a[j][1]);
1712                a[i][1] -= a[j][1];
1713            }
1714        }
1715    }
1716
1717    private void cdft2d_sub(int isgn, double[] a, boolean scale) {
1718        int idx1, idx2, idx3, idx4, idx5;
1719        if (isgn == -1) {
1720            if (columns > 4) {
1721                for (int c = 0; c < columns; c += 8) {
1722                    for (int r = 0; r < rows; r++) {
1723                        idx1 = r * columns + c;
1724                        idx2 = 2 * r;
1725                        idx3 = 2 * rows + 2 * r;
1726                        idx4 = idx3 + 2 * rows;
1727                        idx5 = idx4 + 2 * rows;
1728                        t[idx2] = a[idx1];
1729                        t[idx2 + 1] = a[idx1 + 1];
1730                        t[idx3] = a[idx1 + 2];
1731                        t[idx3 + 1] = a[idx1 + 3];
1732                        t[idx4] = a[idx1 + 4];
1733                        t[idx4 + 1] = a[idx1 + 5];
1734                        t[idx5] = a[idx1 + 6];
1735                        t[idx5 + 1] = a[idx1 + 7];
1736                    }
1737                    fftRows.complexForward(t, 0);
1738                    fftRows.complexForward(t, 2 * rows);
1739                    fftRows.complexForward(t, 4 * rows);
1740                    fftRows.complexForward(t, 6 * rows);
1741                    for (int r = 0; r < rows; r++) {
1742                        idx1 = r * columns + c;
1743                        idx2 = 2 * r;
1744                        idx3 = 2 * rows + 2 * r;
1745                        idx4 = idx3 + 2 * rows;
1746                        idx5 = idx4 + 2 * rows;
1747                        a[idx1] = t[idx2];
1748                        a[idx1 + 1] = t[idx2 + 1];
1749                        a[idx1 + 2] = t[idx3];
1750                        a[idx1 + 3] = t[idx3 + 1];
1751                        a[idx1 + 4] = t[idx4];
1752                        a[idx1 + 5] = t[idx4 + 1];
1753                        a[idx1 + 6] = t[idx5];
1754                        a[idx1 + 7] = t[idx5 + 1];
1755                    }
1756                }
1757            } else if (columns == 4) {
1758                for (int r = 0; r < rows; r++) {
1759                    idx1 = r * columns;
1760                    idx2 = 2 * r;
1761                    idx3 = 2 * rows + 2 * r;
1762                    t[idx2] = a[idx1];
1763                    t[idx2 + 1] = a[idx1 + 1];
1764                    t[idx3] = a[idx1 + 2];
1765                    t[idx3 + 1] = a[idx1 + 3];
1766                }
1767                fftRows.complexForward(t, 0);
1768                fftRows.complexForward(t, 2 * rows);
1769                for (int r = 0; r < rows; r++) {
1770                    idx1 = r * columns;
1771                    idx2 = 2 * r;
1772                    idx3 = 2 * rows + 2 * r;
1773                    a[idx1] = t[idx2];
1774                    a[idx1 + 1] = t[idx2 + 1];
1775                    a[idx1 + 2] = t[idx3];
1776                    a[idx1 + 3] = t[idx3 + 1];
1777                }
1778            } else if (columns == 2) {
1779                for (int r = 0; r < rows; r++) {
1780                    idx1 = r * columns;
1781                    idx2 = 2 * r;
1782                    t[idx2] = a[idx1];
1783                    t[idx2 + 1] = a[idx1 + 1];
1784                }
1785                fftRows.complexForward(t, 0);
1786                for (int r = 0; r < rows; r++) {
1787                    idx1 = r * columns;
1788                    idx2 = 2 * r;
1789                    a[idx1] = t[idx2];
1790                    a[idx1 + 1] = t[idx2 + 1];
1791                }
1792            }
1793        } else {
1794            if (columns > 4) {
1795                for (int c = 0; c < columns; c += 8) {
1796                    for (int r = 0; r < rows; r++) {
1797                        idx1 = r * columns + c;
1798                        idx2 = 2 * r;
1799                        idx3 = 2 * rows + 2 * r;
1800                        idx4 = idx3 + 2 * rows;
1801                        idx5 = idx4 + 2 * rows;
1802                        t[idx2] = a[idx1];
1803                        t[idx2 + 1] = a[idx1 + 1];
1804                        t[idx3] = a[idx1 + 2];
1805                        t[idx3 + 1] = a[idx1 + 3];
1806                        t[idx4] = a[idx1 + 4];
1807                        t[idx4 + 1] = a[idx1 + 5];
1808                        t[idx5] = a[idx1 + 6];
1809                        t[idx5 + 1] = a[idx1 + 7];
1810                    }
1811                    fftRows.complexInverse(t, 0, scale);
1812                    fftRows.complexInverse(t, 2 * rows, scale);
1813                    fftRows.complexInverse(t, 4 * rows, scale);
1814                    fftRows.complexInverse(t, 6 * rows, scale);
1815                    for (int r = 0; r < rows; r++) {
1816                        idx1 = r * columns + c;
1817                        idx2 = 2 * r;
1818                        idx3 = 2 * rows + 2 * r;
1819                        idx4 = idx3 + 2 * rows;
1820                        idx5 = idx4 + 2 * rows;
1821                        a[idx1] = t[idx2];
1822                        a[idx1 + 1] = t[idx2 + 1];
1823                        a[idx1 + 2] = t[idx3];
1824                        a[idx1 + 3] = t[idx3 + 1];
1825                        a[idx1 + 4] = t[idx4];
1826                        a[idx1 + 5] = t[idx4 + 1];
1827                        a[idx1 + 6] = t[idx5];
1828                        a[idx1 + 7] = t[idx5 + 1];
1829                    }
1830                }
1831            } else if (columns == 4) {
1832                for (int r = 0; r < rows; r++) {
1833                    idx1 = r * columns;
1834                    idx2 = 2 * r;
1835                    idx3 = 2 * rows + 2 * r;
1836                    t[idx2] = a[idx1];
1837                    t[idx2 + 1] = a[idx1 + 1];
1838                    t[idx3] = a[idx1 + 2];
1839                    t[idx3 + 1] = a[idx1 + 3];
1840                }
1841                fftRows.complexInverse(t, 0, scale);
1842                fftRows.complexInverse(t, 2 * rows, scale);
1843                for (int r = 0; r < rows; r++) {
1844                    idx1 = r * columns;
1845                    idx2 = 2 * r;
1846                    idx3 = 2 * rows + 2 * r;
1847                    a[idx1] = t[idx2];
1848                    a[idx1 + 1] = t[idx2 + 1];
1849                    a[idx1 + 2] = t[idx3];
1850                    a[idx1 + 3] = t[idx3 + 1];
1851                }
1852            } else if (columns == 2) {
1853                for (int r = 0; r < rows; r++) {
1854                    idx1 = r * columns;
1855                    idx2 = 2 * r;
1856                    t[idx2] = a[idx1];
1857                    t[idx2 + 1] = a[idx1 + 1];
1858                }
1859                fftRows.complexInverse(t, 0, scale);
1860                for (int r = 0; r < rows; r++) {
1861                    idx1 = r * columns;
1862                    idx2 = 2 * r;
1863                    a[idx1] = t[idx2];
1864                    a[idx1 + 1] = t[idx2 + 1];
1865                }
1866            }
1867        }
1868    }
1869
1870    private void cdft2d_sub(int isgn, double[][] a, boolean scale) {
1871        int idx2, idx3, idx4, idx5;
1872        if (isgn == -1) {
1873            if (columns > 4) {
1874                for (int c = 0; c < columns; c += 8) {
1875                    for (int r = 0; r < rows; r++) {
1876                        idx2 = 2 * r;
1877                        idx3 = 2 * rows + 2 * r;
1878                        idx4 = idx3 + 2 * rows;
1879                        idx5 = idx4 + 2 * rows;
1880                        t[idx2] = a[r][c];
1881                        t[idx2 + 1] = a[r][c + 1];
1882                        t[idx3] = a[r][c + 2];
1883                        t[idx3 + 1] = a[r][c + 3];
1884                        t[idx4] = a[r][c + 4];
1885                        t[idx4 + 1] = a[r][c + 5];
1886                        t[idx5] = a[r][c + 6];
1887                        t[idx5 + 1] = a[r][c + 7];
1888                    }
1889                    fftRows.complexForward(t, 0);
1890                    fftRows.complexForward(t, 2 * rows);
1891                    fftRows.complexForward(t, 4 * rows);
1892                    fftRows.complexForward(t, 6 * rows);
1893                    for (int r = 0; r < rows; r++) {
1894                        idx2 = 2 * r;
1895                        idx3 = 2 * rows + 2 * r;
1896                        idx4 = idx3 + 2 * rows;
1897                        idx5 = idx4 + 2 * rows;
1898                        a[r][c] = t[idx2];
1899                        a[r][c + 1] = t[idx2 + 1];
1900                        a[r][c + 2] = t[idx3];
1901                        a[r][c + 3] = t[idx3 + 1];
1902                        a[r][c + 4] = t[idx4];
1903                        a[r][c + 5] = t[idx4 + 1];
1904                        a[r][c + 6] = t[idx5];
1905                        a[r][c + 7] = t[idx5 + 1];
1906                    }
1907                }
1908            } else if (columns == 4) {
1909                for (int r = 0; r < rows; r++) {
1910                    idx2 = 2 * r;
1911                    idx3 = 2 * rows + 2 * r;
1912                    t[idx2] = a[r][0];
1913                    t[idx2 + 1] = a[r][1];
1914                    t[idx3] = a[r][2];
1915                    t[idx3 + 1] = a[r][3];
1916                }
1917                fftRows.complexForward(t, 0);
1918                fftRows.complexForward(t, 2 * rows);
1919                for (int r = 0; r < rows; r++) {
1920                    idx2 = 2 * r;
1921                    idx3 = 2 * rows + 2 * r;
1922                    a[r][0] = t[idx2];
1923                    a[r][1] = t[idx2 + 1];
1924                    a[r][2] = t[idx3];
1925                    a[r][3] = t[idx3 + 1];
1926                }
1927            } else if (columns == 2) {
1928                for (int r = 0; r < rows; r++) {
1929                    idx2 = 2 * r;
1930                    t[idx2] = a[r][0];
1931                    t[idx2 + 1] = a[r][1];
1932                }
1933                fftRows.complexForward(t, 0);
1934                for (int r = 0; r < rows; r++) {
1935                    idx2 = 2 * r;
1936                    a[r][0] = t[idx2];
1937                    a[r][1] = t[idx2 + 1];
1938                }
1939            }
1940        } else {
1941            if (columns > 4) {
1942                for (int c = 0; c < columns; c += 8) {
1943                    for (int r = 0; r < rows; r++) {
1944                        idx2 = 2 * r;
1945                        idx3 = 2 * rows + 2 * r;
1946                        idx4 = idx3 + 2 * rows;
1947                        idx5 = idx4 + 2 * rows;
1948                        t[idx2] = a[r][c];
1949                        t[idx2 + 1] = a[r][c + 1];
1950                        t[idx3] = a[r][c + 2];
1951                        t[idx3 + 1] = a[r][c + 3];
1952                        t[idx4] = a[r][c + 4];
1953                        t[idx4 + 1] = a[r][c + 5];
1954                        t[idx5] = a[r][c + 6];
1955                        t[idx5 + 1] = a[r][c + 7];
1956                    }
1957                    fftRows.complexInverse(t, 0, scale);
1958                    fftRows.complexInverse(t, 2 * rows, scale);
1959                    fftRows.complexInverse(t, 4 * rows, scale);
1960                    fftRows.complexInverse(t, 6 * rows, scale);
1961                    for (int r = 0; r < rows; r++) {
1962                        idx2 = 2 * r;
1963                        idx3 = 2 * rows + 2 * r;
1964                        idx4 = idx3 + 2 * rows;
1965                        idx5 = idx4 + 2 * rows;
1966                        a[r][c] = t[idx2];
1967                        a[r][c + 1] = t[idx2 + 1];
1968                        a[r][c + 2] = t[idx3];
1969                        a[r][c + 3] = t[idx3 + 1];
1970                        a[r][c + 4] = t[idx4];
1971                        a[r][c + 5] = t[idx4 + 1];
1972                        a[r][c + 6] = t[idx5];
1973                        a[r][c + 7] = t[idx5 + 1];
1974                    }
1975                }
1976            } else if (columns == 4) {
1977                for (int r = 0; r < rows; r++) {
1978                    idx2 = 2 * r;
1979                    idx3 = 2 * rows + 2 * r;
1980                    t[idx2] = a[r][0];
1981                    t[idx2 + 1] = a[r][1];
1982                    t[idx3] = a[r][2];
1983                    t[idx3 + 1] = a[r][3];
1984                }
1985                fftRows.complexInverse(t, 0, scale);
1986                fftRows.complexInverse(t, 2 * rows, scale);
1987                for (int r = 0; r < rows; r++) {
1988                    idx2 = 2 * r;
1989                    idx3 = 2 * rows + 2 * r;
1990                    a[r][0] = t[idx2];
1991                    a[r][1] = t[idx2 + 1];
1992                    a[r][2] = t[idx3];
1993                    a[r][3] = t[idx3 + 1];
1994                }
1995            } else if (columns == 2) {
1996                for (int r = 0; r < rows; r++) {
1997                    idx2 = 2 * r;
1998                    t[idx2] = a[r][0];
1999                    t[idx2 + 1] = a[r][1];
2000                }
2001                fftRows.complexInverse(t, 0, scale);
2002                for (int r = 0; r < rows; r++) {
2003                    idx2 = 2 * r;
2004                    a[r][0] = t[idx2];
2005                    a[r][1] = t[idx2 + 1];
2006                }
2007            }
2008        }
2009    }
2010
2011    private void xdft2d0_subth1(final int icr, final int isgn, final double[] a, final boolean scale) {
2012        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2013
2014        Future<?>[] futures = new Future[nthreads];
2015        for (int i = 0; i < nthreads; i++) {
2016            final int n0 = i;
2017            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2018                public void run() {
2019                    if (icr == 0) {
2020                        if (isgn == -1) {
2021                            for (int r = n0; r < rows; r += nthreads) {
2022                                fftColumns.complexForward(a, r * columns);
2023                            }
2024                        } else {
2025                            for (int r = n0; r < rows; r += nthreads) {
2026                                fftColumns.complexInverse(a, r * columns, scale);
2027                            }
2028                        }
2029                    } else {
2030                        if (isgn == 1) {
2031                            for (int r = n0; r < rows; r += nthreads) {
2032                                fftColumns.realForward(a, r * columns);
2033                            }
2034                        } else {
2035                            for (int r = n0; r < rows; r += nthreads) {
2036                                fftColumns.realInverse(a, r * columns, scale);
2037                            }
2038                        }
2039                    }
2040                }
2041            });
2042        }
2043        ConcurrencyUtils.waitForCompletion(futures);
2044    }
2045
2046    private void xdft2d0_subth2(final int icr, final int isgn, final double[] a, final boolean scale) {
2047        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2048
2049        Future<?>[] futures = new Future[nthreads];
2050        for (int i = 0; i < nthreads; i++) {
2051            final int n0 = i;
2052            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2053                public void run() {
2054                    if (icr == 0) {
2055                        if (isgn == -1) {
2056                            for (int r = n0; r < rows; r += nthreads) {
2057                                fftColumns.complexForward(a, r * columns);
2058                            }
2059                        } else {
2060                            for (int r = n0; r < rows; r += nthreads) {
2061                                fftColumns.complexInverse(a, r * columns, scale);
2062                            }
2063                        }
2064                    } else {
2065                        if (isgn == 1) {
2066                            for (int r = n0; r < rows; r += nthreads) {
2067                                fftColumns.realForward(a, r * columns);
2068                            }
2069                        } else {
2070                            for (int r = n0; r < rows; r += nthreads) {
2071                                fftColumns.realInverse2(a, r * columns, scale);
2072                            }
2073                        }
2074                    }
2075                }
2076            });
2077        }
2078        ConcurrencyUtils.waitForCompletion(futures);
2079    }
2080
2081    private void xdft2d0_subth1(final int icr, final int isgn, final double[][] a, final boolean scale) {
2082        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2083
2084        Future<?>[] futures = new Future[nthreads];
2085        for (int i = 0; i < nthreads; i++) {
2086            final int n0 = i;
2087            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2088                public void run() {
2089                    if (icr == 0) {
2090                        if (isgn == -1) {
2091                            for (int r = n0; r < rows; r += nthreads) {
2092                                fftColumns.complexForward(a[r]);
2093                            }
2094                        } else {
2095                            for (int r = n0; r < rows; r += nthreads) {
2096                                fftColumns.complexInverse(a[r], scale);
2097                            }
2098                        }
2099                    } else {
2100                        if (isgn == 1) {
2101                            for (int r = n0; r < rows; r += nthreads) {
2102                                fftColumns.realForward(a[r]);
2103                            }
2104                        } else {
2105                            for (int r = n0; r < rows; r += nthreads) {
2106                                fftColumns.realInverse(a[r], scale);
2107                            }
2108                        }
2109                    }
2110                }
2111            });
2112        }
2113        ConcurrencyUtils.waitForCompletion(futures);
2114    }
2115
2116    private void xdft2d0_subth2(final int icr, final int isgn, final double[][] a, final boolean scale) {
2117        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2118
2119        Future<?>[] futures = new Future[nthreads];
2120        for (int i = 0; i < nthreads; i++) {
2121            final int n0 = i;
2122            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2123                public void run() {
2124                    if (icr == 0) {
2125                        if (isgn == -1) {
2126                            for (int r = n0; r < rows; r += nthreads) {
2127                                fftColumns.complexForward(a[r]);
2128                            }
2129                        } else {
2130                            for (int r = n0; r < rows; r += nthreads) {
2131                                fftColumns.complexInverse(a[r], scale);
2132                            }
2133                        }
2134                    } else {
2135                        if (isgn == 1) {
2136                            for (int r = n0; r < rows; r += nthreads) {
2137                                fftColumns.realForward(a[r]);
2138                            }
2139                        } else {
2140                            for (int r = n0; r < rows; r += nthreads) {
2141                                fftColumns.realInverse2(a[r], 0, scale);
2142                            }
2143                        }
2144                    }
2145                }
2146            });
2147        }
2148        ConcurrencyUtils.waitForCompletion(futures);
2149    }
2150
2151    private void cdft2d_subth(final int isgn, final double[] a, final boolean scale) {
2152        int nthread = ConcurrencyUtils.getNumberOfThreads();
2153        int nt = 8 * rows;
2154        if (columns == 4 * nthread) {
2155            nt >>= 1;
2156        } else if (columns < 4 * nthread) {
2157            nthread = columns >> 1;
2158            nt >>= 2;
2159        }
2160        Future<?>[] futures = new Future[nthread];
2161        final int nthreads = nthread;
2162        for (int i = 0; i < nthread; i++) {
2163            final int n0 = i;
2164            final int startt = nt * i;
2165            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2166                public void run() {
2167                    int idx1, idx2, idx3, idx4, idx5;
2168                    if (isgn == -1) {
2169                        if (columns > 4 * nthreads) {
2170                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2171                                for (int r = 0; r < rows; r++) {
2172                                    idx1 = r * columns + c;
2173                                    idx2 = startt + 2 * r;
2174                                    idx3 = startt + 2 * rows + 2 * r;
2175                                    idx4 = idx3 + 2 * rows;
2176                                    idx5 = idx4 + 2 * rows;
2177                                    t[idx2] = a[idx1];
2178                                    t[idx2 + 1] = a[idx1 + 1];
2179                                    t[idx3] = a[idx1 + 2];
2180                                    t[idx3 + 1] = a[idx1 + 3];
2181                                    t[idx4] = a[idx1 + 4];
2182                                    t[idx4 + 1] = a[idx1 + 5];
2183                                    t[idx5] = a[idx1 + 6];
2184                                    t[idx5 + 1] = a[idx1 + 7];
2185                                }
2186                                fftRows.complexForward(t, startt);
2187                                fftRows.complexForward(t, startt + 2 * rows);
2188                                fftRows.complexForward(t, startt + 4 * rows);
2189                                fftRows.complexForward(t, startt + 6 * rows);
2190                                for (int r = 0; r < rows; r++) {
2191                                    idx1 = r * columns + c;
2192                                    idx2 = startt + 2 * r;
2193                                    idx3 = startt + 2 * rows + 2 * r;
2194                                    idx4 = idx3 + 2 * rows;
2195                                    idx5 = idx4 + 2 * rows;
2196                                    a[idx1] = t[idx2];
2197                                    a[idx1 + 1] = t[idx2 + 1];
2198                                    a[idx1 + 2] = t[idx3];
2199                                    a[idx1 + 3] = t[idx3 + 1];
2200                                    a[idx1 + 4] = t[idx4];
2201                                    a[idx1 + 5] = t[idx4 + 1];
2202                                    a[idx1 + 6] = t[idx5];
2203                                    a[idx1 + 7] = t[idx5 + 1];
2204                                }
2205                            }
2206                        } else if (columns == 4 * nthreads) {
2207                            for (int r = 0; r < rows; r++) {
2208                                idx1 = r * columns + 4 * n0;
2209                                idx2 = startt + 2 * r;
2210                                idx3 = startt + 2 * rows + 2 * r;
2211                                t[idx2] = a[idx1];
2212                                t[idx2 + 1] = a[idx1 + 1];
2213                                t[idx3] = a[idx1 + 2];
2214                                t[idx3 + 1] = a[idx1 + 3];
2215                            }
2216                            fftRows.complexForward(t, startt);
2217                            fftRows.complexForward(t, startt + 2 * rows);
2218                            for (int r = 0; r < rows; r++) {
2219                                idx1 = r * columns + 4 * n0;
2220                                idx2 = startt + 2 * r;
2221                                idx3 = startt + 2 * rows + 2 * r;
2222                                a[idx1] = t[idx2];
2223                                a[idx1 + 1] = t[idx2 + 1];
2224                                a[idx1 + 2] = t[idx3];
2225                                a[idx1 + 3] = t[idx3 + 1];
2226                            }
2227                        } else if (columns == 2 * nthreads) {
2228                            for (int r = 0; r < rows; r++) {
2229                                idx1 = r * columns + 2 * n0;
2230                                idx2 = startt + 2 * r;
2231                                t[idx2] = a[idx1];
2232                                t[idx2 + 1] = a[idx1 + 1];
2233                            }
2234                            fftRows.complexForward(t, startt);
2235                            for (int r = 0; r < rows; r++) {
2236                                idx1 = r * columns + 2 * n0;
2237                                idx2 = startt + 2 * r;
2238                                a[idx1] = t[idx2];
2239                                a[idx1 + 1] = t[idx2 + 1];
2240                            }
2241                        }
2242                    } else {
2243                        if (columns > 4 * nthreads) {
2244                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2245                                for (int r = 0; r < rows; r++) {
2246                                    idx1 = r * columns + c;
2247                                    idx2 = startt + 2 * r;
2248                                    idx3 = startt + 2 * rows + 2 * r;
2249                                    idx4 = idx3 + 2 * rows;
2250                                    idx5 = idx4 + 2 * rows;
2251                                    t[idx2] = a[idx1];
2252                                    t[idx2 + 1] = a[idx1 + 1];
2253                                    t[idx3] = a[idx1 + 2];
2254                                    t[idx3 + 1] = a[idx1 + 3];
2255                                    t[idx4] = a[idx1 + 4];
2256                                    t[idx4 + 1] = a[idx1 + 5];
2257                                    t[idx5] = a[idx1 + 6];
2258                                    t[idx5 + 1] = a[idx1 + 7];
2259                                }
2260                                fftRows.complexInverse(t, startt, scale);
2261                                fftRows.complexInverse(t, startt + 2 * rows, scale);
2262                                fftRows.complexInverse(t, startt + 4 * rows, scale);
2263                                fftRows.complexInverse(t, startt + 6 * rows, scale);
2264                                for (int r = 0; r < rows; r++) {
2265                                    idx1 = r * columns + c;
2266                                    idx2 = startt + 2 * r;
2267                                    idx3 = startt + 2 * rows + 2 * r;
2268                                    idx4 = idx3 + 2 * rows;
2269                                    idx5 = idx4 + 2 * rows;
2270                                    a[idx1] = t[idx2];
2271                                    a[idx1 + 1] = t[idx2 + 1];
2272                                    a[idx1 + 2] = t[idx3];
2273                                    a[idx1 + 3] = t[idx3 + 1];
2274                                    a[idx1 + 4] = t[idx4];
2275                                    a[idx1 + 5] = t[idx4 + 1];
2276                                    a[idx1 + 6] = t[idx5];
2277                                    a[idx1 + 7] = t[idx5 + 1];
2278                                }
2279                            }
2280                        } else if (columns == 4 * nthreads) {
2281                            for (int r = 0; r < rows; r++) {
2282                                idx1 = r * columns + 4 * n0;
2283                                idx2 = startt + 2 * r;
2284                                idx3 = startt + 2 * rows + 2 * r;
2285                                t[idx2] = a[idx1];
2286                                t[idx2 + 1] = a[idx1 + 1];
2287                                t[idx3] = a[idx1 + 2];
2288                                t[idx3 + 1] = a[idx1 + 3];
2289                            }
2290                            fftRows.complexInverse(t, startt, scale);
2291                            fftRows.complexInverse(t, startt + 2 * rows, scale);
2292                            for (int r = 0; r < rows; r++) {
2293                                idx1 = r * columns + 4 * n0;
2294                                idx2 = startt + 2 * r;
2295                                idx3 = startt + 2 * rows + 2 * r;
2296                                a[idx1] = t[idx2];
2297                                a[idx1 + 1] = t[idx2 + 1];
2298                                a[idx1 + 2] = t[idx3];
2299                                a[idx1 + 3] = t[idx3 + 1];
2300                            }
2301                        } else if (columns == 2 * nthreads) {
2302                            for (int r = 0; r < rows; r++) {
2303                                idx1 = r * columns + 2 * n0;
2304                                idx2 = startt + 2 * r;
2305                                t[idx2] = a[idx1];
2306                                t[idx2 + 1] = a[idx1 + 1];
2307                            }
2308                            fftRows.complexInverse(t, startt, scale);
2309                            for (int r = 0; r < rows; r++) {
2310                                idx1 = r * columns + 2 * n0;
2311                                idx2 = startt + 2 * r;
2312                                a[idx1] = t[idx2];
2313                                a[idx1 + 1] = t[idx2 + 1];
2314                            }
2315                        }
2316                    }
2317                }
2318            });
2319        }
2320        ConcurrencyUtils.waitForCompletion(futures);
2321    }
2322
2323    private void cdft2d_subth(final int isgn, final double[][] a, final boolean scale) {
2324        int nthread = ConcurrencyUtils.getNumberOfThreads();
2325        int nt = 8 * rows;
2326        if (columns == 4 * nthread) {
2327            nt >>= 1;
2328        } else if (columns < 4 * nthread) {
2329            nthread = columns >> 1;
2330            nt >>= 2;
2331        }
2332        Future<?>[] futures = new Future[nthread];
2333        final int nthreads = nthread;
2334        for (int i = 0; i < nthreads; i++) {
2335            final int n0 = i;
2336            final int startt = nt * i;
2337            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2338                public void run() {
2339                    int idx2, idx3, idx4, idx5;
2340                    if (isgn == -1) {
2341                        if (columns > 4 * nthreads) {
2342                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2343                                for (int r = 0; r < rows; r++) {
2344                                    idx2 = startt + 2 * r;
2345                                    idx3 = startt + 2 * rows + 2 * r;
2346                                    idx4 = idx3 + 2 * rows;
2347                                    idx5 = idx4 + 2 * rows;
2348                                    t[idx2] = a[r][c];
2349                                    t[idx2 + 1] = a[r][c + 1];
2350                                    t[idx3] = a[r][c + 2];
2351                                    t[idx3 + 1] = a[r][c + 3];
2352                                    t[idx4] = a[r][c + 4];
2353                                    t[idx4 + 1] = a[r][c + 5];
2354                                    t[idx5] = a[r][c + 6];
2355                                    t[idx5 + 1] = a[r][c + 7];
2356                                }
2357                                fftRows.complexForward(t, startt);
2358                                fftRows.complexForward(t, startt + 2 * rows);
2359                                fftRows.complexForward(t, startt + 4 * rows);
2360                                fftRows.complexForward(t, startt + 6 * rows);
2361                                for (int r = 0; r < rows; r++) {
2362                                    idx2 = startt + 2 * r;
2363                                    idx3 = startt + 2 * rows + 2 * r;
2364                                    idx4 = idx3 + 2 * rows;
2365                                    idx5 = idx4 + 2 * rows;
2366                                    a[r][c] = t[idx2];
2367                                    a[r][c + 1] = t[idx2 + 1];
2368                                    a[r][c + 2] = t[idx3];
2369                                    a[r][c + 3] = t[idx3 + 1];
2370                                    a[r][c + 4] = t[idx4];
2371                                    a[r][c + 5] = t[idx4 + 1];
2372                                    a[r][c + 6] = t[idx5];
2373                                    a[r][c + 7] = t[idx5 + 1];
2374                                }
2375                            }
2376                        } else if (columns == 4 * nthreads) {
2377                            for (int r = 0; r < rows; r++) {
2378                                idx2 = startt + 2 * r;
2379                                idx3 = startt + 2 * rows + 2 * r;
2380                                t[idx2] = a[r][4 * n0];
2381                                t[idx2 + 1] = a[r][4 * n0 + 1];
2382                                t[idx3] = a[r][4 * n0 + 2];
2383                                t[idx3 + 1] = a[r][4 * n0 + 3];
2384                            }
2385                            fftRows.complexForward(t, startt);
2386                            fftRows.complexForward(t, startt + 2 * rows);
2387                            for (int r = 0; r < rows; r++) {
2388                                idx2 = startt + 2 * r;
2389                                idx3 = startt + 2 * rows + 2 * r;
2390                                a[r][4 * n0] = t[idx2];
2391                                a[r][4 * n0 + 1] = t[idx2 + 1];
2392                                a[r][4 * n0 + 2] = t[idx3];
2393                                a[r][4 * n0 + 3] = t[idx3 + 1];
2394                            }
2395                        } else if (columns == 2 * nthreads) {
2396                            for (int r = 0; r < rows; r++) {
2397                                idx2 = startt + 2 * r;
2398                                t[idx2] = a[r][2 * n0];
2399                                t[idx2 + 1] = a[r][2 * n0 + 1];
2400                            }
2401                            fftRows.complexForward(t, startt);
2402                            for (int r = 0; r < rows; r++) {
2403                                idx2 = startt + 2 * r;
2404                                a[r][2 * n0] = t[idx2];
2405                                a[r][2 * n0 + 1] = t[idx2 + 1];
2406                            }
2407                        }
2408                    } else {
2409                        if (columns > 4 * nthreads) {
2410                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2411                                for (int r = 0; r < rows; r++) {
2412                                    idx2 = startt + 2 * r;
2413                                    idx3 = startt + 2 * rows + 2 * r;
2414                                    idx4 = idx3 + 2 * rows;
2415                                    idx5 = idx4 + 2 * rows;
2416                                    t[idx2] = a[r][c];
2417                                    t[idx2 + 1] = a[r][c + 1];
2418                                    t[idx3] = a[r][c + 2];
2419                                    t[idx3 + 1] = a[r][c + 3];
2420                                    t[idx4] = a[r][c + 4];
2421                                    t[idx4 + 1] = a[r][c + 5];
2422                                    t[idx5] = a[r][c + 6];
2423                                    t[idx5 + 1] = a[r][c + 7];
2424                                }
2425                                fftRows.complexInverse(t, startt, scale);
2426                                fftRows.complexInverse(t, startt + 2 * rows, scale);
2427                                fftRows.complexInverse(t, startt + 4 * rows, scale);
2428                                fftRows.complexInverse(t, startt + 6 * rows, scale);
2429                                for (int r = 0; r < rows; r++) {
2430                                    idx2 = startt + 2 * r;
2431                                    idx3 = startt + 2 * rows + 2 * r;
2432                                    idx4 = idx3 + 2 * rows;
2433                                    idx5 = idx4 + 2 * rows;
2434                                    a[r][c] = t[idx2];
2435                                    a[r][c + 1] = t[idx2 + 1];
2436                                    a[r][c + 2] = t[idx3];
2437                                    a[r][c + 3] = t[idx3 + 1];
2438                                    a[r][c + 4] = t[idx4];
2439                                    a[r][c + 5] = t[idx4 + 1];
2440                                    a[r][c + 6] = t[idx5];
2441                                    a[r][c + 7] = t[idx5 + 1];
2442                                }
2443                            }
2444                        } else if (columns == 4 * nthreads) {
2445                            for (int r = 0; r < rows; r++) {
2446                                idx2 = startt + 2 * r;
2447                                idx3 = startt + 2 * rows + 2 * r;
2448                                t[idx2] = a[r][4 * n0];
2449                                t[idx2 + 1] = a[r][4 * n0 + 1];
2450                                t[idx3] = a[r][4 * n0 + 2];
2451                                t[idx3 + 1] = a[r][4 * n0 + 3];
2452                            }
2453                            fftRows.complexInverse(t, startt, scale);
2454                            fftRows.complexInverse(t, startt + 2 * rows, scale);
2455                            for (int r = 0; r < rows; r++) {
2456                                idx2 = startt + 2 * r;
2457                                idx3 = startt + 2 * rows + 2 * r;
2458                                a[r][4 * n0] = t[idx2];
2459                                a[r][4 * n0 + 1] = t[idx2 + 1];
2460                                a[r][4 * n0 + 2] = t[idx3];
2461                                a[r][4 * n0 + 3] = t[idx3 + 1];
2462                            }
2463                        } else if (columns == 2 * nthreads) {
2464                            for (int r = 0; r < rows; r++) {
2465                                idx2 = startt + 2 * r;
2466                                t[idx2] = a[r][2 * n0];
2467                                t[idx2 + 1] = a[r][2 * n0 + 1];
2468                            }
2469                            fftRows.complexInverse(t, startt, scale);
2470                            for (int r = 0; r < rows; r++) {
2471                                idx2 = startt + 2 * r;
2472                                a[r][2 * n0] = t[idx2];
2473                                a[r][2 * n0 + 1] = t[idx2 + 1];
2474                            }
2475                        }
2476                    }
2477                }
2478            });
2479        }
2480        ConcurrencyUtils.waitForCompletion(futures);
2481    }
2482
2483    private void fillSymmetric(final double[] a) {
2484        final int twon2 = 2 * columns;
2485        int idx1, idx2, idx3, idx4;
2486        int n1d2 = rows / 2;
2487
2488        for (int r = (rows - 1); r >= 1; r--) {
2489            idx1 = r * columns;
2490            idx2 = 2 * idx1;
2491            for (int c = 0; c < columns; c += 2) {
2492                a[idx2 + c] = a[idx1 + c];
2493                a[idx1 + c] = 0;
2494                a[idx2 + c + 1] = a[idx1 + c + 1];
2495                a[idx1 + c + 1] = 0;
2496            }
2497        }
2498        int nthreads = ConcurrencyUtils.getNumberOfThreads();
2499        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads)) {
2500            Future<?>[] futures = new Future[nthreads];
2501            int l1k = n1d2 / nthreads;
2502            final int newn2 = 2 * columns;
2503            for (int i = 0; i < nthreads; i++) {
2504                final int l1offa, l1stopa, l2offa, l2stopa;
2505                if (i == 0)
2506                    l1offa = i * l1k + 1;
2507                else {
2508                    l1offa = i * l1k;
2509                }
2510                l1stopa = i * l1k + l1k;
2511                l2offa = i * l1k;
2512                if (i == nthreads - 1) {
2513                    l2stopa = i * l1k + l1k + 1;
2514                } else {
2515                    l2stopa = i * l1k + l1k;
2516                }
2517                futures[i] = ConcurrencyUtils.submit(new Runnable() {
2518                    public void run() {
2519                        int idx1, idx2, idx3, idx4;
2520
2521                        for (int r = l1offa; r < l1stopa; r++) {
2522                            idx1 = r * newn2;
2523                            idx2 = (rows - r) * newn2;
2524                            idx3 = idx1 + columns;
2525                            a[idx3] = a[idx2 + 1];
2526                            a[idx3 + 1] = -a[idx2];
2527                        }
2528                        for (int r = l1offa; r < l1stopa; r++) {
2529                            idx1 = r * newn2;
2530                            idx3 = (rows - r + 1) * newn2;
2531                            for (int c = columns + 2; c < newn2; c += 2) {
2532                                idx2 = idx3 - c;
2533                                idx4 = idx1 + c;
2534                                a[idx4] = a[idx2];
2535                                a[idx4 + 1] = -a[idx2 + 1];
2536
2537                            }
2538                        }
2539                        for (int r = l2offa; r < l2stopa; r++) {
2540                            idx3 = ((rows - r) % rows) * newn2;
2541                            idx4 = r * newn2;
2542                            for (int c = 0; c < newn2; c += 2) {
2543                                idx1 = idx3 + (newn2 - c) % newn2;
2544                                idx2 = idx4 + c;
2545                                a[idx1] = a[idx2];
2546                                a[idx1 + 1] = -a[idx2 + 1];
2547                            }
2548                        }
2549                    }
2550                });
2551            }
2552            ConcurrencyUtils.waitForCompletion(futures);
2553
2554        } else {
2555
2556            for (int r = 1; r < n1d2; r++) {
2557                idx2 = r * twon2;
2558                idx3 = (rows - r) * twon2;
2559                a[idx2 + columns] = a[idx3 + 1];
2560                a[idx2 + columns + 1] = -a[idx3];
2561            }
2562
2563            for (int r = 1; r < n1d2; r++) {
2564                idx2 = r * twon2;
2565                idx3 = (rows - r + 1) * twon2;
2566                for (int c = columns + 2; c < twon2; c += 2) {
2567                    a[idx2 + c] = a[idx3 - c];
2568                    a[idx2 + c + 1] = -a[idx3 - c + 1];
2569
2570                }
2571            }
2572            for (int r = 0; r <= rows / 2; r++) {
2573                idx1 = r * twon2;
2574                idx4 = ((rows - r) % rows) * twon2;
2575                for (int c = 0; c < twon2; c += 2) {
2576                    idx2 = idx1 + c;
2577                    idx3 = idx4 + (twon2 - c) % twon2;
2578                    a[idx3] = a[idx2];
2579                    a[idx3 + 1] = -a[idx2 + 1];
2580                }
2581            }
2582        }
2583        a[columns] = -a[1];
2584        a[1] = 0;
2585        idx1 = n1d2 * twon2;
2586        a[idx1 + columns] = -a[idx1 + 1];
2587        a[idx1 + 1] = 0;
2588        a[idx1 + columns + 1] = 0;
2589    }
2590
2591    private void fillSymmetric(final double[][] a) {
2592        final int newn2 = 2 * columns;
2593        int n1d2 = rows / 2;
2594
2595        int nthreads = ConcurrencyUtils.getNumberOfThreads();
2596        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads)) {
2597            Future<?>[] futures = new Future[nthreads];
2598            int l1k = n1d2 / nthreads;
2599            for (int i = 0; i < nthreads; i++) {
2600                final int l1offa, l1stopa, l2offa, l2stopa;
2601                if (i == 0)
2602                    l1offa = i * l1k + 1;
2603                else {
2604                    l1offa = i * l1k;
2605                }
2606                l1stopa = i * l1k + l1k;
2607                l2offa = i * l1k;
2608                if (i == nthreads - 1) {
2609                    l2stopa = i * l1k + l1k + 1;
2610                } else {
2611                    l2stopa = i * l1k + l1k;
2612                }
2613                futures[i] = ConcurrencyUtils.submit(new Runnable() {
2614                    public void run() {
2615                        int idx1, idx2;
2616                        for (int r = l1offa; r < l1stopa; r++) {
2617                            idx1 = rows - r;
2618                            a[r][columns] = a[idx1][1];
2619                            a[r][columns + 1] = -a[idx1][0];
2620                        }
2621                        for (int r = l1offa; r < l1stopa; r++) {
2622                            idx1 = rows - r;
2623                            for (int c = columns + 2; c < newn2; c += 2) {
2624                                idx2 = newn2 - c;
2625                                a[r][c] = a[idx1][idx2];
2626                                a[r][c + 1] = -a[idx1][idx2 + 1];
2627
2628                            }
2629                        }
2630                        for (int r = l2offa; r < l2stopa; r++) {
2631                            idx1 = (rows - r) % rows;
2632                            for (int c = 0; c < newn2; c = c + 2) {
2633                                idx2 = (newn2 - c) % newn2;
2634                                a[idx1][idx2] = a[r][c];
2635                                a[idx1][idx2 + 1] = -a[r][c + 1];
2636                            }
2637                        }
2638                    }
2639                });
2640            }
2641            ConcurrencyUtils.waitForCompletion(futures);
2642        } else {
2643
2644            for (int r = 1; r < n1d2; r++) {
2645                int idx1 = rows - r;
2646                a[r][columns] = a[idx1][1];
2647                a[r][columns + 1] = -a[idx1][0];
2648            }
2649            for (int r = 1; r < n1d2; r++) {
2650                int idx1 = rows - r;
2651                for (int c = columns + 2; c < newn2; c += 2) {
2652                    int idx2 = newn2 - c;
2653                    a[r][c] = a[idx1][idx2];
2654                    a[r][c + 1] = -a[idx1][idx2 + 1];
2655                }
2656            }
2657            for (int r = 0; r <= rows / 2; r++) {
2658                int idx1 = (rows - r) % rows;
2659                for (int c = 0; c < newn2; c += 2) {
2660                    int idx2 = (newn2 - c) % newn2;
2661                    a[idx1][idx2] = a[r][c];
2662                    a[idx1][idx2 + 1] = -a[r][c + 1];
2663                }
2664            }
2665        }
2666        a[0][columns] = -a[0][1];
2667        a[0][1] = 0;
2668        a[n1d2][columns] = -a[n1d2][1];
2669        a[n1d2][1] = 0;
2670        a[n1d2][columns + 1] = 0;
2671    }
2672}