001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.fft;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 2D Discrete Fourier Transform (DFT) of complex and real, single
043 * precision data. The sizes of both dimensions can be arbitrary numbers. This
044 * is a parallel implementation of split-radix and mixed-radix algorithms
045 * optimized for SMP systems. <br>
046 * <br>
047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura
048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
049 * 
050 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
051 * 
052 */
053public strictfp class FloatFFT_2D {
054
055    private int rows;
056
057    private int columns;
058
059    private float[] t;
060
061    private FloatFFT_1D fftColumns, fftRows;
062
063    private int oldNthreads;
064
065    private int nt;
066
067    private boolean isPowerOfTwo = false;
068
069    private boolean useThreads = false;
070
071    /**
072     * Creates new instance of FloatFFT_2D.
073     * 
074     * @param rows
075     *            number of rows
076     * @param columns
077     *            number of columns
078     */
079    public FloatFFT_2D(int rows, int columns) {
080        if (rows <= 1 || columns <= 1) {
081            throw new IllegalArgumentException("rows and columns must be greater than 1");
082        }
083        this.rows = rows;
084        this.columns = columns;
085        if (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D()) {
086            this.useThreads = true;
087        }
088        if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
089            isPowerOfTwo = true;
090            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
091            nt = 8 * oldNthreads * rows;
092            if (2 * columns == 4 * oldNthreads) {
093                nt >>= 1;
094            } else if (2 * columns < 4 * oldNthreads) {
095                nt >>= 2;
096            }
097            t = new float[nt];
098        }
099        fftRows = new FloatFFT_1D(rows);
100        if (rows == columns) {
101            fftColumns = fftRows;
102        } else {
103            fftColumns = new FloatFFT_1D(columns);
104        }
105    }
106
107    /**
108     * Computes 2D forward DFT of complex data leaving the result in
109     * <code>a</code>. The data is stored in 1D array in row-major order.
110     * Complex number is stored as two float values in sequence: the real and
111     * imaginary part, i.e. the input array must be of size rows*2*columns. The
112     * physical layout of the input data has to be as follows:<br>
113     * 
114     * <pre>
115     * a[k1*2*columns+2*k2] = Re[k1][k2], 
116     * a[k1*2*columns+2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
117     * </pre>
118     * 
119     * @param a
120     *            data to transform
121     */
122    public void complexForward(final float[] a) {
123        int nthreads = ConcurrencyUtils.getNumberOfThreads();
124        if (isPowerOfTwo) {
125            int oldn2 = columns;
126            columns = 2 * columns;
127            if (nthreads != oldNthreads) {
128                nt = 8 * nthreads * rows;
129                if (columns == 4 * nthreads) {
130                    nt >>= 1;
131                } else if (columns < 4 * nthreads) {
132                    nt >>= 2;
133                }
134                t = new float[nt];
135                oldNthreads = nthreads;
136            }
137            if ((nthreads > 1) && useThreads) {
138                xdft2d0_subth1(0, -1, a, true);
139                cdft2d_subth(-1, a, true);
140            } else {
141                for (int r = 0; r < rows; r++) {
142                    fftColumns.complexForward(a, r * columns);
143                }
144                cdft2d_sub(-1, a, true);
145            }
146            columns = oldn2;
147        } else {
148            final int rowStride = 2 * columns;
149            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
150                Future<?>[] futures = new Future[nthreads];
151                int p = rows / nthreads;
152                for (int l = 0; l < nthreads; l++) {
153                    final int firstRow = l * p;
154                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
155                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
156                        public void run() {
157                            for (int r = firstRow; r < lastRow; r++) {
158                                fftColumns.complexForward(a, r * rowStride);
159                            }
160                        }
161                    });
162                }
163                ConcurrencyUtils.waitForCompletion(futures);
164                p = columns / nthreads;
165                for (int l = 0; l < nthreads; l++) {
166                    final int firstColumn = l * p;
167                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
168                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
169                        public void run() {
170                            float[] temp = new float[2 * rows];
171                            for (int c = firstColumn; c < lastColumn; c++) {
172                                int idx0 = 2 * c;
173                                for (int r = 0; r < rows; r++) {
174                                    int idx1 = 2 * r;
175                                    int idx2 = r * rowStride + idx0;
176                                    temp[idx1] = a[idx2];
177                                    temp[idx1 + 1] = a[idx2 + 1];
178                                }
179                                fftRows.complexForward(temp);
180                                for (int r = 0; r < rows; r++) {
181                                    int idx1 = 2 * r;
182                                    int idx2 = r * rowStride + idx0;
183                                    a[idx2] = temp[idx1];
184                                    a[idx2 + 1] = temp[idx1 + 1];
185                                }
186                            }
187                        }
188                    });
189                }
190                ConcurrencyUtils.waitForCompletion(futures);
191            } else {
192                for (int r = 0; r < rows; r++) {
193                    fftColumns.complexForward(a, r * rowStride);
194                }
195                float[] temp = new float[2 * rows];
196                for (int c = 0; c < columns; c++) {
197                    int idx0 = 2 * c;
198                    for (int r = 0; r < rows; r++) {
199                        int idx1 = 2 * r;
200                        int idx2 = r * rowStride + idx0;
201                        temp[idx1] = a[idx2];
202                        temp[idx1 + 1] = a[idx2 + 1];
203                    }
204                    fftRows.complexForward(temp);
205                    for (int r = 0; r < rows; r++) {
206                        int idx1 = 2 * r;
207                        int idx2 = r * rowStride + idx0;
208                        a[idx2] = temp[idx1];
209                        a[idx2 + 1] = temp[idx1 + 1];
210                    }
211                }
212            }
213        }
214    }
215
216    /**
217     * Computes 2D forward DFT of complex data leaving the result in
218     * <code>a</code>. The data is stored in 2D array. Complex data is
219     * represented by 2 float values in sequence: the real and imaginary part,
220     * i.e. the input array must be of size rows by 2*columns. The physical
221     * layout of the input data has to be as follows:<br>
222     * 
223     * <pre>
224     * a[k1][2*k2] = Re[k1][k2], 
225     * a[k1][2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
226     * </pre>
227     * 
228     * @param a
229     *            data to transform
230     */
231    public void complexForward(final float[][] a) {
232        int nthreads = ConcurrencyUtils.getNumberOfThreads();
233        if (isPowerOfTwo) {
234            int oldn2 = columns;
235            columns = 2 * columns;
236            if (nthreads != oldNthreads) {
237                nt = 8 * nthreads * rows;
238                if (columns == 4 * nthreads) {
239                    nt >>= 1;
240                } else if (columns < 4 * nthreads) {
241                    nt >>= 2;
242                }
243                t = new float[nt];
244                oldNthreads = nthreads;
245            }
246            if ((nthreads > 1) && useThreads) {
247                xdft2d0_subth1(0, -1, a, true);
248                cdft2d_subth(-1, a, true);
249            } else {
250                for (int r = 0; r < rows; r++) {
251                    fftColumns.complexForward(a[r]);
252                }
253                cdft2d_sub(-1, a, true);
254            }
255            columns = oldn2;
256        } else {
257            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
258                Future<?>[] futures = new Future[nthreads];
259                int p = rows / nthreads;
260                for (int l = 0; l < nthreads; l++) {
261                    final int firstRow = l * p;
262                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
263                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
264                        public void run() {
265                            for (int r = firstRow; r < lastRow; r++) {
266                                fftColumns.complexForward(a[r]);
267                            }
268                        }
269                    });
270                }
271                ConcurrencyUtils.waitForCompletion(futures);
272                p = columns / nthreads;
273                for (int l = 0; l < nthreads; l++) {
274                    final int firstColumn = l * p;
275                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
276                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
277                        public void run() {
278                            float[] temp = new float[2 * rows];
279                            for (int c = firstColumn; c < lastColumn; c++) {
280                                int idx1 = 2 * c;
281                                for (int r = 0; r < rows; r++) {
282                                    int idx2 = 2 * r;
283                                    temp[idx2] = a[r][idx1];
284                                    temp[idx2 + 1] = a[r][idx1 + 1];
285                                }
286                                fftRows.complexForward(temp);
287                                for (int r = 0; r < rows; r++) {
288                                    int idx2 = 2 * r;
289                                    a[r][idx1] = temp[idx2];
290                                    a[r][idx1 + 1] = temp[idx2 + 1];
291                                }
292                            }
293                        }
294                    });
295                }
296                ConcurrencyUtils.waitForCompletion(futures);
297            } else {
298                for (int r = 0; r < rows; r++) {
299                    fftColumns.complexForward(a[r]);
300                }
301                float[] temp = new float[2 * rows];
302                for (int c = 0; c < columns; c++) {
303                    int idx1 = 2 * c;
304                    for (int r = 0; r < rows; r++) {
305                        int idx2 = 2 * r;
306                        temp[idx2] = a[r][idx1];
307                        temp[idx2 + 1] = a[r][idx1 + 1];
308                    }
309                    fftRows.complexForward(temp);
310                    for (int r = 0; r < rows; r++) {
311                        int idx2 = 2 * r;
312                        a[r][idx1] = temp[idx2];
313                        a[r][idx1 + 1] = temp[idx2 + 1];
314                    }
315                }
316            }
317        }
318    }
319
320    /**
321     * Computes 2D inverse DFT of complex data leaving the result in
322     * <code>a</code>. The data is stored in 1D array in row-major order.
323     * Complex number is stored as two float values in sequence: the real and
324     * imaginary part, i.e. the input array must be of size rows*2*columns. The
325     * physical layout of the input data has to be as follows:<br>
326     * 
327     * <pre>
328     * a[k1*2*columns+2*k2] = Re[k1][k2], 
329     * a[k1*2*columns+2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
330     * </pre>
331     * 
332     * @param a
333     *            data to transform
334     * @param scale
335     *            if true then scaling is performed
336     * 
337     */
338    public void complexInverse(final float[] a, final boolean scale) {
339        int nthreads = ConcurrencyUtils.getNumberOfThreads();
340        if (isPowerOfTwo) {
341            int oldn2 = columns;
342            columns = 2 * columns;
343            if (nthreads != oldNthreads) {
344                nt = 8 * nthreads * rows;
345                if (columns == 4 * nthreads) {
346                    nt >>= 1;
347                } else if (columns < 4 * nthreads) {
348                    nt >>= 2;
349                }
350                t = new float[nt];
351                oldNthreads = nthreads;
352            }
353            if ((nthreads > 1) && useThreads) {
354                xdft2d0_subth1(0, 1, a, scale);
355                cdft2d_subth(1, a, scale);
356            } else {
357
358                for (int r = 0; r < rows; r++) {
359                    fftColumns.complexInverse(a, r * columns, scale);
360                }
361                cdft2d_sub(1, a, scale);
362            }
363            columns = oldn2;
364        } else {
365            final int rowspan = 2 * columns;
366            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
367                Future<?>[] futures = new Future[nthreads];
368                int p = rows / nthreads;
369                for (int l = 0; l < nthreads; l++) {
370                    final int firstRow = l * p;
371                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
372                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
373                        public void run() {
374                            for (int r = firstRow; r < lastRow; r++) {
375                                fftColumns.complexInverse(a, r * rowspan, scale);
376                            }
377                        }
378                    });
379                }
380                ConcurrencyUtils.waitForCompletion(futures);
381                p = columns / nthreads;
382                for (int l = 0; l < nthreads; l++) {
383                    final int firstColumn = l * p;
384                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
385                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
386                        public void run() {
387                            float[] temp = new float[2 * rows];
388                            for (int c = firstColumn; c < lastColumn; c++) {
389                                int idx1 = 2 * c;
390                                for (int r = 0; r < rows; r++) {
391                                    int idx2 = 2 * r;
392                                    int idx3 = r * rowspan + idx1;
393                                    temp[idx2] = a[idx3];
394                                    temp[idx2 + 1] = a[idx3 + 1];
395                                }
396                                fftRows.complexInverse(temp, scale);
397                                for (int r = 0; r < rows; r++) {
398                                    int idx2 = 2 * r;
399                                    int idx3 = r * rowspan + idx1;
400                                    a[idx3] = temp[idx2];
401                                    a[idx3 + 1] = temp[idx2 + 1];
402                                }
403                            }
404                        }
405                    });
406                }
407                ConcurrencyUtils.waitForCompletion(futures);
408            } else {
409                for (int r = 0; r < rows; r++) {
410                    fftColumns.complexInverse(a, r * rowspan, scale);
411                }
412                float[] temp = new float[2 * rows];
413                for (int c = 0; c < columns; c++) {
414                    int idx1 = 2 * c;
415                    for (int r = 0; r < rows; r++) {
416                        int idx2 = 2 * r;
417                        int idx3 = r * rowspan + idx1;
418                        temp[idx2] = a[idx3];
419                        temp[idx2 + 1] = a[idx3 + 1];
420                    }
421                    fftRows.complexInverse(temp, scale);
422                    for (int r = 0; r < rows; r++) {
423                        int idx2 = 2 * r;
424                        int idx3 = r * rowspan + idx1;
425                        a[idx3] = temp[idx2];
426                        a[idx3 + 1] = temp[idx2 + 1];
427                    }
428                }
429            }
430        }
431    }
432
433    /**
434     * Computes 2D inverse DFT of complex data leaving the result in
435     * <code>a</code>. The data is stored in 2D array. Complex data is
436     * represented by 2 float values in sequence: the real and imaginary part,
437     * i.e. the input array must be of size rows by 2*columns. The physical
438     * layout of the input data has to be as follows:<br>
439     * 
440     * <pre>
441     * a[k1][2*k2] = Re[k1][k2], 
442     * a[k1][2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
443     * </pre>
444     * 
445     * @param a
446     *            data to transform
447     * @param scale
448     *            if true then scaling is performed
449     * 
450     */
451    public void complexInverse(final float[][] a, final boolean scale) {
452        int nthreads = ConcurrencyUtils.getNumberOfThreads();
453        if (isPowerOfTwo) {
454            int oldn2 = columns;
455            columns = 2 * columns;
456
457            if (nthreads != oldNthreads) {
458                nt = 8 * nthreads * rows;
459                if (columns == 4 * nthreads) {
460                    nt >>= 1;
461                } else if (columns < 4 * nthreads) {
462                    nt >>= 2;
463                }
464                t = new float[nt];
465                oldNthreads = nthreads;
466            }
467            if ((nthreads > 1) && useThreads) {
468                xdft2d0_subth1(0, 1, a, scale);
469                cdft2d_subth(1, a, scale);
470            } else {
471
472                for (int r = 0; r < rows; r++) {
473                    fftColumns.complexInverse(a[r], scale);
474                }
475                cdft2d_sub(1, a, scale);
476            }
477            columns = oldn2;
478        } else {
479            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
480                Future<?>[] futures = new Future[nthreads];
481                int p = rows / nthreads;
482                for (int l = 0; l < nthreads; l++) {
483                    final int firstRow = l * p;
484                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
485                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
486                        public void run() {
487                            for (int r = firstRow; r < lastRow; r++) {
488                                fftColumns.complexInverse(a[r], scale);
489                            }
490                        }
491                    });
492                }
493                ConcurrencyUtils.waitForCompletion(futures);
494                p = columns / nthreads;
495                for (int l = 0; l < nthreads; l++) {
496                    final int firstColumn = l * p;
497                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
498                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
499                        public void run() {
500                            float[] temp = new float[2 * rows];
501                            for (int c = firstColumn; c < lastColumn; c++) {
502                                int idx1 = 2 * c;
503                                for (int r = 0; r < rows; r++) {
504                                    int idx2 = 2 * r;
505                                    temp[idx2] = a[r][idx1];
506                                    temp[idx2 + 1] = a[r][idx1 + 1];
507                                }
508                                fftRows.complexInverse(temp, scale);
509                                for (int r = 0; r < rows; r++) {
510                                    int idx2 = 2 * r;
511                                    a[r][idx1] = temp[idx2];
512                                    a[r][idx1 + 1] = temp[idx2 + 1];
513                                }
514                            }
515                        }
516                    });
517                }
518                ConcurrencyUtils.waitForCompletion(futures);
519            } else {
520                for (int r = 0; r < rows; r++) {
521                    fftColumns.complexInverse(a[r], scale);
522                }
523                float[] temp = new float[2 * rows];
524                for (int c = 0; c < columns; c++) {
525                    int idx1 = 2 * c;
526                    for (int r = 0; r < rows; r++) {
527                        int idx2 = 2 * r;
528                        temp[idx2] = a[r][idx1];
529                        temp[idx2 + 1] = a[r][idx1 + 1];
530                    }
531                    fftRows.complexInverse(temp, scale);
532                    for (int r = 0; r < rows; r++) {
533                        int idx2 = 2 * r;
534                        a[r][idx1] = temp[idx2];
535                        a[r][idx1 + 1] = temp[idx2 + 1];
536                    }
537                }
538            }
539        }
540    }
541
542    /**
543     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
544     * . This method only works when the sizes of both dimensions are
545     * power-of-two numbers. The physical layout of the output data is as
546     * follows:
547     * 
548     * <pre>
549     * a[k1*columns+2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
550     * a[k1*columns+2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
551     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
552     * a[2*k2] = Re[0][k2] = Re[0][columns-k2], 
553     * a[2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
554     *       0&lt;k2&lt;columns/2, 
555     * a[k1*columns] = Re[k1][0] = Re[rows-k1][0], 
556     * a[k1*columns+1] = Im[k1][0] = -Im[rows-k1][0], 
557     * a[(rows-k1)*columns+1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
558     * a[(rows-k1)*columns] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
559     *       0&lt;k1&lt;rows/2, 
560     * a[0] = Re[0][0], 
561     * a[1] = Re[0][columns/2], 
562     * a[(rows/2)*columns] = Re[rows/2][0], 
563     * a[(rows/2)*columns+1] = Re[rows/2][columns/2]
564     * </pre>
565     * 
566     * This method computes only half of the elements of the real transform. The
567     * other half satisfies the symmetry condition. If you want the full real
568     * forward transform, use <code>realForwardFull</code>. To get back the
569     * original data, use <code>realInverse</code> on the output of this method.
570     * 
571     * @param a
572     *            data to transform
573     */
574    public void realForward(float[] a) {
575        if (isPowerOfTwo == false) {
576            throw new IllegalArgumentException("rows and columns must be power of two numbers");
577        } else {
578            int nthreads;
579
580            nthreads = ConcurrencyUtils.getNumberOfThreads();
581            if (nthreads != oldNthreads) {
582                nt = 8 * nthreads * rows;
583                if (columns == 4 * nthreads) {
584                    nt >>= 1;
585                } else if (columns < 4 * nthreads) {
586                    nt >>= 2;
587                }
588                t = new float[nt];
589                oldNthreads = nthreads;
590            }
591            if ((nthreads > 1) && useThreads) {
592                xdft2d0_subth1(1, 1, a, true);
593                cdft2d_subth(-1, a, true);
594                rdft2d_sub(1, a);
595            } else {
596                for (int r = 0; r < rows; r++) {
597                    fftColumns.realForward(a, r * columns);
598                }
599                cdft2d_sub(-1, a, true);
600                rdft2d_sub(1, a);
601            }
602        }
603    }
604
605    /**
606     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
607     * . This method only works when the sizes of both dimensions are
608     * power-of-two numbers. The physical layout of the output data is as
609     * follows:
610     * 
611     * <pre>
612     * a[k1][2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
613     * a[k1][2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
614     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
615     * a[0][2*k2] = Re[0][k2] = Re[0][columns-k2], 
616     * a[0][2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
617     *       0&lt;k2&lt;columns/2, 
618     * a[k1][0] = Re[k1][0] = Re[rows-k1][0], 
619     * a[k1][1] = Im[k1][0] = -Im[rows-k1][0], 
620     * a[rows-k1][1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
621     * a[rows-k1][0] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
622     *       0&lt;k1&lt;rows/2, 
623     * a[0][0] = Re[0][0], 
624     * a[0][1] = Re[0][columns/2], 
625     * a[rows/2][0] = Re[rows/2][0], 
626     * a[rows/2][1] = Re[rows/2][columns/2]
627     * </pre>
628     * 
629     * This method computes only half of the elements of the real transform. The
630     * other half satisfies the symmetry condition. If you want the full real
631     * forward transform, use <code>realForwardFull</code>. To get back the
632     * original data, use <code>realInverse</code> on the output of this method.
633     * 
634     * @param a
635     *            data to transform
636     */
637    public void realForward(float[][] a) {
638        if (isPowerOfTwo == false) {
639            throw new IllegalArgumentException("rows and columns must be power of two numbers");
640        } else {
641            int nthreads;
642
643            nthreads = ConcurrencyUtils.getNumberOfThreads();
644            if (nthreads != oldNthreads) {
645                nt = 8 * nthreads * rows;
646                if (columns == 4 * nthreads) {
647                    nt >>= 1;
648                } else if (columns < 4 * nthreads) {
649                    nt >>= 2;
650                }
651                t = new float[nt];
652                oldNthreads = nthreads;
653            }
654            if ((nthreads > 1) && useThreads) {
655                xdft2d0_subth1(1, 1, a, true);
656                cdft2d_subth(-1, a, true);
657                rdft2d_sub(1, a);
658            } else {
659                for (int r = 0; r < rows; r++) {
660                    fftColumns.realForward(a[r]);
661                }
662                cdft2d_sub(-1, a, true);
663                rdft2d_sub(1, a);
664            }
665        }
666    }
667
668    /**
669     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
670     * . This method computes full real forward transform, i.e. you will get the
671     * same result as from <code>complexForward</code> called with all imaginary
672     * part equal 0. Because the result is stored in <code>a</code>, the input
673     * array must be of size rows*2*columns, with only the first rows*columns
674     * elements filled with real data. To get back the original data, use
675     * <code>complexInverse</code> on the output of this method.
676     * 
677     * @param a
678     *            data to transform
679     */
680    public void realForwardFull(float[] a) {
681        if (isPowerOfTwo) {
682            int nthreads;
683
684            nthreads = ConcurrencyUtils.getNumberOfThreads();
685            if (nthreads != oldNthreads) {
686                nt = 8 * nthreads * rows;
687                if (columns == 4 * nthreads) {
688                    nt >>= 1;
689                } else if (columns < 4 * nthreads) {
690                    nt >>= 2;
691                }
692                t = new float[nt];
693                oldNthreads = nthreads;
694            }
695            if ((nthreads > 1) && useThreads) {
696                xdft2d0_subth1(1, 1, a, true);
697                cdft2d_subth(-1, a, true);
698                rdft2d_sub(1, a);
699            } else {
700                for (int r = 0; r < rows; r++) {
701                    fftColumns.realForward(a, r * columns);
702                }
703                cdft2d_sub(-1, a, true);
704                rdft2d_sub(1, a);
705            }
706            fillSymmetric(a);
707        } else {
708            mixedRadixRealForwardFull(a);
709        }
710    }
711
712    /**
713     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
714     * . This method computes full real forward transform, i.e. you will get the
715     * same result as from <code>complexForward</code> called with all imaginary
716     * part equal 0. Because the result is stored in <code>a</code>, the input
717     * array must be of size rows by 2*columns, with only the first rows by
718     * columns elements filled with real data. To get back the original data,
719     * use <code>complexInverse</code> on the output of this method.
720     * 
721     * @param a
722     *            data to transform
723     */
724    public void realForwardFull(float[][] a) {
725        if (isPowerOfTwo) {
726            int nthreads;
727
728            nthreads = ConcurrencyUtils.getNumberOfThreads();
729            if (nthreads != oldNthreads) {
730                nt = 8 * nthreads * rows;
731                if (columns == 4 * nthreads) {
732                    nt >>= 1;
733                } else if (columns < 4 * nthreads) {
734                    nt >>= 2;
735                }
736                t = new float[nt];
737                oldNthreads = nthreads;
738            }
739            if ((nthreads > 1) && useThreads) {
740                xdft2d0_subth1(1, 1, a, true);
741                cdft2d_subth(-1, a, true);
742                rdft2d_sub(1, a);
743            } else {
744                for (int r = 0; r < rows; r++) {
745                    fftColumns.realForward(a[r]);
746                }
747                cdft2d_sub(-1, a, true);
748                rdft2d_sub(1, a);
749            }
750            fillSymmetric(a);
751        } else {
752            mixedRadixRealForwardFull(a);
753        }
754    }
755
756    /**
757     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
758     * . This method only works when the sizes of both dimensions are
759     * power-of-two numbers. The physical layout of the input data has to be as
760     * follows:
761     * 
762     * <pre>
763     * a[k1*columns+2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
764     * a[k1*columns+2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
765     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
766     * a[2*k2] = Re[0][k2] = Re[0][columns-k2], 
767     * a[2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
768     *       0&lt;k2&lt;columns/2, 
769     * a[k1*columns] = Re[k1][0] = Re[rows-k1][0], 
770     * a[k1*columns+1] = Im[k1][0] = -Im[rows-k1][0], 
771     * a[(rows-k1)*columns+1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
772     * a[(rows-k1)*columns] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
773     *       0&lt;k1&lt;rows/2, 
774     * a[0] = Re[0][0], 
775     * a[1] = Re[0][columns/2], 
776     * a[(rows/2)*columns] = Re[rows/2][0], 
777     * a[(rows/2)*columns+1] = Re[rows/2][columns/2]
778     * </pre>
779     * 
780     * This method computes only half of the elements of the real transform. The
781     * other half satisfies the symmetry condition. If you want the full real
782     * inverse transform, use <code>realInverseFull</code>.
783     * 
784     * @param a
785     *            data to transform
786     * 
787     * @param scale
788     *            if true then scaling is performed
789     */
790    public void realInverse(float[] a, boolean scale) {
791        if (isPowerOfTwo == false) {
792            throw new IllegalArgumentException("rows and columns must be power of two numbers");
793        } else {
794            int nthreads;
795            nthreads = ConcurrencyUtils.getNumberOfThreads();
796            if (nthreads != oldNthreads) {
797                nt = 8 * nthreads * rows;
798                if (columns == 4 * nthreads) {
799                    nt >>= 1;
800                } else if (columns < 4 * nthreads) {
801                    nt >>= 2;
802                }
803                t = new float[nt];
804                oldNthreads = nthreads;
805            }
806            if ((nthreads > 1) && useThreads) {
807                rdft2d_sub(-1, a);
808                cdft2d_subth(1, a, scale);
809                xdft2d0_subth1(1, -1, a, scale);
810            } else {
811                rdft2d_sub(-1, a);
812                cdft2d_sub(1, a, scale);
813                for (int r = 0; r < rows; r++) {
814                    fftColumns.realInverse(a, r * columns, scale);
815                }
816            }
817        }
818    }
819
820    /**
821     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
822     * . This method only works when the sizes of both dimensions are
823     * power-of-two numbers. The physical layout of the input data has to be as
824     * follows:
825     * 
826     * <pre>
827     * a[k1][2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
828     * a[k1][2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
829     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
830     * a[0][2*k2] = Re[0][k2] = Re[0][columns-k2], 
831     * a[0][2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
832     *       0&lt;k2&lt;columns/2, 
833     * a[k1][0] = Re[k1][0] = Re[rows-k1][0], 
834     * a[k1][1] = Im[k1][0] = -Im[rows-k1][0], 
835     * a[rows-k1][1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
836     * a[rows-k1][0] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
837     *       0&lt;k1&lt;rows/2, 
838     * a[0][0] = Re[0][0], 
839     * a[0][1] = Re[0][columns/2], 
840     * a[rows/2][0] = Re[rows/2][0], 
841     * a[rows/2][1] = Re[rows/2][columns/2]
842     * </pre>
843     * 
844     * This method computes only half of the elements of the real transform. The
845     * other half satisfies the symmetry condition. If you want the full real
846     * inverse transform, use <code>realInverseFull</code>.
847     * 
848     * @param a
849     *            data to transform
850     * 
851     * @param scale
852     *            if true then scaling is performed
853     */
854    public void realInverse(float[][] a, boolean scale) {
855        if (isPowerOfTwo == false) {
856            throw new IllegalArgumentException("rows and columns must be power of two numbers");
857        } else {
858            int nthreads;
859
860            nthreads = ConcurrencyUtils.getNumberOfThreads();
861            if (nthreads != oldNthreads) {
862                nt = 8 * nthreads * rows;
863                if (columns == 4 * nthreads) {
864                    nt >>= 1;
865                } else if (columns < 4 * nthreads) {
866                    nt >>= 2;
867                }
868                t = new float[nt];
869                oldNthreads = nthreads;
870            }
871            if ((nthreads > 1) && useThreads) {
872                rdft2d_sub(-1, a);
873                cdft2d_subth(1, a, scale);
874                xdft2d0_subth1(1, -1, a, scale);
875            } else {
876                rdft2d_sub(-1, a);
877                cdft2d_sub(1, a, scale);
878                for (int r = 0; r < rows; r++) {
879                    fftColumns.realInverse(a[r], scale);
880                }
881            }
882        }
883    }
884
885    /**
886     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
887     * . This method computes full real inverse transform, i.e. you will get the
888     * same result as from <code>complexInverse</code> called with all imaginary
889     * part equal 0. Because the result is stored in <code>a</code>, the input
890     * array must be of size rows*2*columns, with only the first rows*columns
891     * elements filled with real data.
892     * 
893     * @param a
894     *            data to transform
895     * 
896     * @param scale
897     *            if true then scaling is performed
898     */
899    public void realInverseFull(float[] a, boolean scale) {
900        if (isPowerOfTwo) {
901            int nthreads;
902
903            nthreads = ConcurrencyUtils.getNumberOfThreads();
904            if (nthreads != oldNthreads) {
905                nt = 8 * nthreads * rows;
906                if (columns == 4 * nthreads) {
907                    nt >>= 1;
908                } else if (columns < 4 * nthreads) {
909                    nt >>= 2;
910                }
911                t = new float[nt];
912                oldNthreads = nthreads;
913            }
914            if ((nthreads > 1) && useThreads) {
915                xdft2d0_subth2(1, -1, a, scale);
916                cdft2d_subth(1, a, scale);
917                rdft2d_sub(1, a);
918            } else {
919                for (int r = 0; r < rows; r++) {
920                    fftColumns.realInverse2(a, r * columns, scale);
921                }
922                cdft2d_sub(1, a, scale);
923                rdft2d_sub(1, a);
924            }
925            fillSymmetric(a);
926        } else {
927            mixedRadixRealInverseFull(a, scale);
928        }
929    }
930
931    /**
932     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
933     * . This method computes full real inverse transform, i.e. you will get the
934     * same result as from <code>complexInverse</code> called with all imaginary
935     * part equal 0. Because the result is stored in <code>a</code>, the input
936     * array must be of size rows by 2*columns, with only the first rows by
937     * columns elements filled with real data.
938     * 
939     * @param a
940     *            data to transform
941     * 
942     * @param scale
943     *            if true then scaling is performed
944     */
945    public void realInverseFull(float[][] a, boolean scale) {
946        if (isPowerOfTwo) {
947            int nthreads;
948
949            nthreads = ConcurrencyUtils.getNumberOfThreads();
950            if (nthreads != oldNthreads) {
951                nt = 8 * nthreads * rows;
952                if (columns == 4 * nthreads) {
953                    nt >>= 1;
954                } else if (columns < 4 * nthreads) {
955                    nt >>= 2;
956                }
957                t = new float[nt];
958                oldNthreads = nthreads;
959            }
960            if ((nthreads > 1) && useThreads) {
961                xdft2d0_subth2(1, -1, a, scale);
962                cdft2d_subth(1, a, scale);
963                rdft2d_sub(1, a);
964            } else {
965                for (int r = 0; r < rows; r++) {
966                    fftColumns.realInverse2(a[r], 0, scale);
967                }
968                cdft2d_sub(1, a, scale);
969                rdft2d_sub(1, a);
970            }
971            fillSymmetric(a);
972        } else {
973            mixedRadixRealInverseFull(a, scale);
974        }
975    }
976
977    private void mixedRadixRealForwardFull(final float[][] a) {
978        final int n2d2 = columns / 2 + 1;
979        final float[][] temp = new float[n2d2][2 * rows];
980
981        int nthreads = ConcurrencyUtils.getNumberOfThreads();
982        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
983            Future<?>[] futures = new Future[nthreads];
984            int p = rows / nthreads;
985            for (int l = 0; l < nthreads; l++) {
986                final int firstRow = l * p;
987                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
988                futures[l] = ConcurrencyUtils.submit(new Runnable() {
989                    public void run() {
990                        for (int i = firstRow; i < lastRow; i++) {
991                            fftColumns.realForward(a[i]);
992                        }
993                    }
994                });
995            }
996            ConcurrencyUtils.waitForCompletion(futures);
997
998            for (int r = 0; r < rows; r++) {
999                temp[0][r] = a[r][0]; //first column is always real
1000            }
1001            fftRows.realForwardFull(temp[0]);
1002
1003            p = (n2d2 - 2) / nthreads;
1004            for (int l = 0; l < nthreads; l++) {
1005                final int firstColumn = 1 + l * p;
1006                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1007                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1008                    public void run() {
1009                        for (int c = firstColumn; c < lastColumn; c++) {
1010                            int idx2 = 2 * c;
1011                            for (int r = 0; r < rows; r++) {
1012                                int idx1 = 2 * r;
1013                                temp[c][idx1] = a[r][idx2];
1014                                temp[c][idx1 + 1] = a[r][idx2 + 1];
1015                            }
1016                            fftRows.complexForward(temp[c]);
1017                        }
1018                    }
1019                });
1020            }
1021            ConcurrencyUtils.waitForCompletion(futures);
1022
1023            if ((columns % 2) == 0) {
1024                for (int r = 0; r < rows; r++) {
1025                    temp[n2d2 - 1][r] = a[r][1];
1026                    //imaginary part = 0;
1027                }
1028                fftRows.realForwardFull(temp[n2d2 - 1]);
1029
1030            } else {
1031                for (int r = 0; r < rows; r++) {
1032                    int idx1 = 2 * r;
1033                    int idx2 = n2d2 - 1;
1034                    temp[idx2][idx1] = a[r][2 * idx2];
1035                    temp[idx2][idx1 + 1] = a[r][1];
1036                }
1037                fftRows.complexForward(temp[n2d2 - 1]);
1038
1039            }
1040
1041            p = rows / nthreads;
1042            for (int l = 0; l < nthreads; l++) {
1043                final int firstRow = l * p;
1044                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1045                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1046                    public void run() {
1047                        for (int r = firstRow; r < lastRow; r++) {
1048                            int idx1 = 2 * r;
1049                            for (int c = 0; c < n2d2; c++) {
1050                                int idx2 = 2 * c;
1051                                a[r][idx2] = temp[c][idx1];
1052                                a[r][idx2 + 1] = temp[c][idx1 + 1];
1053                            }
1054                        }
1055                    }
1056                });
1057            }
1058            ConcurrencyUtils.waitForCompletion(futures);
1059
1060            for (int l = 0; l < nthreads; l++) {
1061                final int firstRow = 1 + l * p;
1062                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1063                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1064                    public void run() {
1065                        for (int r = firstRow; r < lastRow; r++) {
1066                            int idx3 = rows - r;
1067                            for (int c = n2d2; c < columns; c++) {
1068                                int idx1 = 2 * c;
1069                                int idx2 = 2 * (columns - c);
1070                                a[0][idx1] = a[0][idx2];
1071                                a[0][idx1 + 1] = -a[0][idx2 + 1];
1072                                a[r][idx1] = a[idx3][idx2];
1073                                a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1074                            }
1075                        }
1076                    }
1077                });
1078            }
1079            ConcurrencyUtils.waitForCompletion(futures);
1080
1081        } else {
1082            for (int r = 0; r < rows; r++) {
1083                fftColumns.realForward(a[r]);
1084            }
1085
1086            for (int r = 0; r < rows; r++) {
1087                temp[0][r] = a[r][0]; //first column is always real
1088            }
1089            fftRows.realForwardFull(temp[0]);
1090
1091            for (int c = 1; c < n2d2 - 1; c++) {
1092                int idx2 = 2 * c;
1093                for (int r = 0; r < rows; r++) {
1094                    int idx1 = 2 * r;
1095                    temp[c][idx1] = a[r][idx2];
1096                    temp[c][idx1 + 1] = a[r][idx2 + 1];
1097                }
1098                fftRows.complexForward(temp[c]);
1099            }
1100
1101            if ((columns % 2) == 0) {
1102                for (int r = 0; r < rows; r++) {
1103                    temp[n2d2 - 1][r] = a[r][1];
1104                    //imaginary part = 0;
1105                }
1106                fftRows.realForwardFull(temp[n2d2 - 1]);
1107
1108            } else {
1109                for (int r = 0; r < rows; r++) {
1110                    int idx1 = 2 * r;
1111                    int idx2 = n2d2 - 1;
1112                    temp[idx2][idx1] = a[r][2 * idx2];
1113                    temp[idx2][idx1 + 1] = a[r][1];
1114                }
1115                fftRows.complexForward(temp[n2d2 - 1]);
1116
1117            }
1118
1119            for (int r = 0; r < rows; r++) {
1120                int idx1 = 2 * r;
1121                for (int c = 0; c < n2d2; c++) {
1122                    int idx2 = 2 * c;
1123                    a[r][idx2] = temp[c][idx1];
1124                    a[r][idx2 + 1] = temp[c][idx1 + 1];
1125                }
1126            }
1127
1128            //fill symmetric
1129            for (int r = 1; r < rows; r++) {
1130                int idx3 = rows - r;
1131                for (int c = n2d2; c < columns; c++) {
1132                    int idx1 = 2 * c;
1133                    int idx2 = 2 * (columns - c);
1134                    a[0][idx1] = a[0][idx2];
1135                    a[0][idx1 + 1] = -a[0][idx2 + 1];
1136                    a[r][idx1] = a[idx3][idx2];
1137                    a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1138                }
1139            }
1140        }
1141    }
1142
1143    private void mixedRadixRealForwardFull(final float[] a) {
1144        final int rowStride = 2 * columns;
1145        final int n2d2 = columns / 2 + 1;
1146        final float[][] temp = new float[n2d2][2 * rows];
1147
1148        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1149        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
1150            Future<?>[] futures = new Future[nthreads];
1151            int p = rows / nthreads;
1152            for (int l = 0; l < nthreads; l++) {
1153                final int firstRow = l * p;
1154                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1155                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1156                    public void run() {
1157                        for (int i = firstRow; i < lastRow; i++) {
1158                            fftColumns.realForward(a, i * columns);
1159                        }
1160                    }
1161                });
1162            }
1163            ConcurrencyUtils.waitForCompletion(futures);
1164
1165            for (int r = 0; r < rows; r++) {
1166                temp[0][r] = a[r * columns]; //first column is always real
1167            }
1168            fftRows.realForwardFull(temp[0]);
1169
1170            p = (n2d2 - 2) / nthreads;
1171            for (int l = 0; l < nthreads; l++) {
1172                final int firstColumn = 1 + l * p;
1173                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1174                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1175                    public void run() {
1176                        for (int c = firstColumn; c < lastColumn; c++) {
1177                            int idx0 = 2 * c;
1178                            for (int r = 0; r < rows; r++) {
1179                                int idx1 = 2 * r;
1180                                int idx2 = r * columns + idx0;
1181                                temp[c][idx1] = a[idx2];
1182                                temp[c][idx1 + 1] = a[idx2 + 1];
1183                            }
1184                            fftRows.complexForward(temp[c]);
1185                        }
1186                    }
1187                });
1188            }
1189            ConcurrencyUtils.waitForCompletion(futures);
1190
1191            if ((columns % 2) == 0) {
1192                for (int r = 0; r < rows; r++) {
1193                    temp[n2d2 - 1][r] = a[r * columns + 1];
1194                    //imaginary part = 0;
1195                }
1196                fftRows.realForwardFull(temp[n2d2 - 1]);
1197
1198            } else {
1199                for (int r = 0; r < rows; r++) {
1200                    int idx1 = 2 * r;
1201                    int idx2 = r * columns;
1202                    int idx3 = n2d2 - 1;
1203                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1204                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1205                }
1206                fftRows.complexForward(temp[n2d2 - 1]);
1207            }
1208
1209            p = rows / nthreads;
1210            for (int l = 0; l < nthreads; l++) {
1211                final int firstRow = l * p;
1212                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1213                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1214                    public void run() {
1215                        for (int r = firstRow; r < lastRow; r++) {
1216                            int idx1 = 2 * r;
1217                            for (int c = 0; c < n2d2; c++) {
1218                                int idx0 = 2 * c;
1219                                int idx2 = r * rowStride + idx0;
1220                                a[idx2] = temp[c][idx1];
1221                                a[idx2 + 1] = temp[c][idx1 + 1];
1222                            }
1223                        }
1224                    }
1225                });
1226            }
1227            ConcurrencyUtils.waitForCompletion(futures);
1228
1229            for (int l = 0; l < nthreads; l++) {
1230                final int firstRow = 1 + l * p;
1231                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1232                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1233                    public void run() {
1234                        for (int r = firstRow; r < lastRow; r++) {
1235                            int idx5 = r * rowStride;
1236                            int idx6 = (rows - r + 1) * rowStride;
1237                            for (int c = n2d2; c < columns; c++) {
1238                                int idx1 = 2 * c;
1239                                int idx2 = 2 * (columns - c);
1240                                a[idx1] = a[idx2];
1241                                a[idx1 + 1] = -a[idx2 + 1];
1242                                int idx3 = idx5 + idx1;
1243                                int idx4 = idx6 - idx1;
1244                                a[idx3] = a[idx4];
1245                                a[idx3 + 1] = -a[idx4 + 1];
1246                            }
1247                        }
1248                    }
1249                });
1250            }
1251            ConcurrencyUtils.waitForCompletion(futures);
1252        } else {
1253            for (int r = 0; r < rows; r++) {
1254                fftColumns.realForward(a, r * columns);
1255            }
1256            for (int r = 0; r < rows; r++) {
1257                temp[0][r] = a[r * columns]; //first column is always real
1258            }
1259            fftRows.realForwardFull(temp[0]);
1260
1261            for (int c = 1; c < n2d2 - 1; c++) {
1262                int idx0 = 2 * c;
1263                for (int r = 0; r < rows; r++) {
1264                    int idx1 = 2 * r;
1265                    int idx2 = r * columns + idx0;
1266                    temp[c][idx1] = a[idx2];
1267                    temp[c][idx1 + 1] = a[idx2 + 1];
1268                }
1269                fftRows.complexForward(temp[c]);
1270            }
1271
1272            if ((columns % 2) == 0) {
1273                for (int r = 0; r < rows; r++) {
1274                    temp[n2d2 - 1][r] = a[r * columns + 1];
1275                    //imaginary part = 0;
1276                }
1277                fftRows.realForwardFull(temp[n2d2 - 1]);
1278
1279            } else {
1280                for (int r = 0; r < rows; r++) {
1281                    int idx1 = 2 * r;
1282                    int idx2 = r * columns;
1283                    int idx3 = n2d2 - 1;
1284                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1285                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1286                }
1287                fftRows.complexForward(temp[n2d2 - 1]);
1288            }
1289
1290            for (int r = 0; r < rows; r++) {
1291                int idx1 = 2 * r;
1292                for (int c = 0; c < n2d2; c++) {
1293                    int idx0 = 2 * c;
1294                    int idx2 = r * rowStride + idx0;
1295                    a[idx2] = temp[c][idx1];
1296                    a[idx2 + 1] = temp[c][idx1 + 1];
1297                }
1298            }
1299
1300            //fill symmetric
1301            for (int r = 1; r < rows; r++) {
1302                int idx5 = r * rowStride;
1303                int idx6 = (rows - r + 1) * rowStride;
1304                for (int c = n2d2; c < columns; c++) {
1305                    int idx1 = 2 * c;
1306                    int idx2 = 2 * (columns - c);
1307                    a[idx1] = a[idx2];
1308                    a[idx1 + 1] = -a[idx2 + 1];
1309                    int idx3 = idx5 + idx1;
1310                    int idx4 = idx6 - idx1;
1311                    a[idx3] = a[idx4];
1312                    a[idx3 + 1] = -a[idx4 + 1];
1313                }
1314            }
1315        }
1316    }
1317
1318    private void mixedRadixRealInverseFull(final float[][] a, final boolean scale) {
1319        final int n2d2 = columns / 2 + 1;
1320        final float[][] temp = new float[n2d2][2 * rows];
1321
1322        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1323        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
1324            Future<?>[] futures = new Future[nthreads];
1325            int p = rows / nthreads;
1326            for (int l = 0; l < nthreads; l++) {
1327                final int firstRow = l * p;
1328                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1329                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1330                    public void run() {
1331                        for (int i = firstRow; i < lastRow; i++) {
1332                            fftColumns.realInverse2(a[i], 0, scale);
1333                        }
1334                    }
1335                });
1336            }
1337            ConcurrencyUtils.waitForCompletion(futures);
1338
1339            for (int r = 0; r < rows; r++) {
1340                temp[0][r] = a[r][0]; //first column is always real
1341            }
1342            fftRows.realInverseFull(temp[0], scale);
1343
1344            p = (n2d2 - 2) / nthreads;
1345            for (int l = 0; l < nthreads; l++) {
1346                final int firstColumn = 1 + l * p;
1347                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1348                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1349                    public void run() {
1350                        for (int c = firstColumn; c < lastColumn; c++) {
1351                            int idx2 = 2 * c;
1352                            for (int r = 0; r < rows; r++) {
1353                                int idx1 = 2 * r;
1354                                temp[c][idx1] = a[r][idx2];
1355                                temp[c][idx1 + 1] = a[r][idx2 + 1];
1356                            }
1357                            fftRows.complexInverse(temp[c], scale);
1358                        }
1359                    }
1360                });
1361            }
1362            ConcurrencyUtils.waitForCompletion(futures);
1363
1364            if ((columns % 2) == 0) {
1365                for (int r = 0; r < rows; r++) {
1366                    temp[n2d2 - 1][r] = a[r][1];
1367                    //imaginary part = 0;
1368                }
1369                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1370
1371            } else {
1372                for (int r = 0; r < rows; r++) {
1373                    int idx1 = 2 * r;
1374                    int idx2 = n2d2 - 1;
1375                    temp[idx2][idx1] = a[r][2 * idx2];
1376                    temp[idx2][idx1 + 1] = a[r][1];
1377                }
1378                fftRows.complexInverse(temp[n2d2 - 1], scale);
1379
1380            }
1381
1382            p = rows / nthreads;
1383            for (int l = 0; l < nthreads; l++) {
1384                final int firstRow = l * p;
1385                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1386                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1387                    public void run() {
1388                        for (int r = firstRow; r < lastRow; r++) {
1389                            int idx1 = 2 * r;
1390                            for (int c = 0; c < n2d2; c++) {
1391                                int idx2 = 2 * c;
1392                                a[r][idx2] = temp[c][idx1];
1393                                a[r][idx2 + 1] = temp[c][idx1 + 1];
1394                            }
1395                        }
1396                    }
1397                });
1398            }
1399            ConcurrencyUtils.waitForCompletion(futures);
1400
1401            for (int l = 0; l < nthreads; l++) {
1402                final int firstRow = 1 + l * p;
1403                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1404                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1405                    public void run() {
1406                        for (int r = firstRow; r < lastRow; r++) {
1407                            int idx3 = rows - r;
1408                            for (int c = n2d2; c < columns; c++) {
1409                                int idx1 = 2 * c;
1410                                int idx2 = 2 * (columns - c);
1411                                a[0][idx1] = a[0][idx2];
1412                                a[0][idx1 + 1] = -a[0][idx2 + 1];
1413                                a[r][idx1] = a[idx3][idx2];
1414                                a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1415                            }
1416                        }
1417                    }
1418                });
1419            }
1420            ConcurrencyUtils.waitForCompletion(futures);
1421
1422        } else {
1423            for (int r = 0; r < rows; r++) {
1424                fftColumns.realInverse2(a[r], 0, scale);
1425            }
1426
1427            for (int r = 0; r < rows; r++) {
1428                temp[0][r] = a[r][0]; //first column is always real
1429            }
1430            fftRows.realInverseFull(temp[0], scale);
1431
1432            for (int c = 1; c < n2d2 - 1; c++) {
1433                int idx2 = 2 * c;
1434                for (int r = 0; r < rows; r++) {
1435                    int idx1 = 2 * r;
1436                    temp[c][idx1] = a[r][idx2];
1437                    temp[c][idx1 + 1] = a[r][idx2 + 1];
1438                }
1439                fftRows.complexInverse(temp[c], scale);
1440            }
1441
1442            if ((columns % 2) == 0) {
1443                for (int r = 0; r < rows; r++) {
1444                    temp[n2d2 - 1][r] = a[r][1];
1445                    //imaginary part = 0;
1446                }
1447                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1448
1449            } else {
1450                for (int r = 0; r < rows; r++) {
1451                    int idx1 = 2 * r;
1452                    int idx2 = n2d2 - 1;
1453                    temp[idx2][idx1] = a[r][2 * idx2];
1454                    temp[idx2][idx1 + 1] = a[r][1];
1455                }
1456                fftRows.complexInverse(temp[n2d2 - 1], scale);
1457
1458            }
1459
1460            for (int r = 0; r < rows; r++) {
1461                int idx1 = 2 * r;
1462                for (int c = 0; c < n2d2; c++) {
1463                    int idx2 = 2 * c;
1464                    a[r][idx2] = temp[c][idx1];
1465                    a[r][idx2 + 1] = temp[c][idx1 + 1];
1466                }
1467            }
1468
1469            //fill symmetric
1470            for (int r = 1; r < rows; r++) {
1471                int idx3 = rows - r;
1472                for (int c = n2d2; c < columns; c++) {
1473                    int idx1 = 2 * c;
1474                    int idx2 = 2 * (columns - c);
1475                    a[0][idx1] = a[0][idx2];
1476                    a[0][idx1 + 1] = -a[0][idx2 + 1];
1477                    a[r][idx1] = a[idx3][idx2];
1478                    a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1479                }
1480            }
1481        }
1482    }
1483
1484    private void mixedRadixRealInverseFull(final float[] a, final boolean scale) {
1485        final int rowStride = 2 * columns;
1486        final int n2d2 = columns / 2 + 1;
1487        final float[][] temp = new float[n2d2][2 * rows];
1488
1489        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1490        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
1491            Future<?>[] futures = new Future[nthreads];
1492            int p = rows / nthreads;
1493            for (int l = 0; l < nthreads; l++) {
1494                final int firstRow = l * p;
1495                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1496                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1497                    public void run() {
1498                        for (int i = firstRow; i < lastRow; i++) {
1499                            fftColumns.realInverse2(a, i * columns, scale);
1500                        }
1501                    }
1502                });
1503            }
1504            ConcurrencyUtils.waitForCompletion(futures);
1505
1506            for (int r = 0; r < rows; r++) {
1507                temp[0][r] = a[r * columns]; //first column is always real
1508            }
1509            fftRows.realInverseFull(temp[0], scale);
1510
1511            p = (n2d2 - 2) / nthreads;
1512            for (int l = 0; l < nthreads; l++) {
1513                final int firstColumn = 1 + l * p;
1514                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1515                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1516                    public void run() {
1517                        for (int c = firstColumn; c < lastColumn; c++) {
1518                            int idx0 = 2 * c;
1519                            for (int r = 0; r < rows; r++) {
1520                                int idx1 = 2 * r;
1521                                int idx2 = r * columns + idx0;
1522                                temp[c][idx1] = a[idx2];
1523                                temp[c][idx1 + 1] = a[idx2 + 1];
1524                            }
1525                            fftRows.complexInverse(temp[c], scale);
1526                        }
1527                    }
1528                });
1529            }
1530            ConcurrencyUtils.waitForCompletion(futures);
1531
1532            if ((columns % 2) == 0) {
1533                for (int r = 0; r < rows; r++) {
1534                    temp[n2d2 - 1][r] = a[r * columns + 1];
1535                    //imaginary part = 0;
1536                }
1537                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1538
1539            } else {
1540                for (int r = 0; r < rows; r++) {
1541                    int idx1 = 2 * r;
1542                    int idx2 = r * columns;
1543                    int idx3 = n2d2 - 1;
1544                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1545                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1546                }
1547                fftRows.complexInverse(temp[n2d2 - 1], scale);
1548            }
1549
1550            p = rows / nthreads;
1551            for (int l = 0; l < nthreads; l++) {
1552                final int firstRow = l * p;
1553                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1554                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1555                    public void run() {
1556                        for (int r = firstRow; r < lastRow; r++) {
1557                            int idx1 = 2 * r;
1558                            for (int c = 0; c < n2d2; c++) {
1559                                int idx0 = 2 * c;
1560                                int idx2 = r * rowStride + idx0;
1561                                a[idx2] = temp[c][idx1];
1562                                a[idx2 + 1] = temp[c][idx1 + 1];
1563                            }
1564                        }
1565                    }
1566                });
1567            }
1568            ConcurrencyUtils.waitForCompletion(futures);
1569
1570            for (int l = 0; l < nthreads; l++) {
1571                final int firstRow = 1 + l * p;
1572                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1573                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1574                    public void run() {
1575                        for (int r = firstRow; r < lastRow; r++) {
1576                            int idx5 = r * rowStride;
1577                            int idx6 = (rows - r + 1) * rowStride;
1578                            for (int c = n2d2; c < columns; c++) {
1579                                int idx1 = 2 * c;
1580                                int idx2 = 2 * (columns - c);
1581                                a[idx1] = a[idx2];
1582                                a[idx1 + 1] = -a[idx2 + 1];
1583                                int idx3 = idx5 + idx1;
1584                                int idx4 = idx6 - idx1;
1585                                a[idx3] = a[idx4];
1586                                a[idx3 + 1] = -a[idx4 + 1];
1587                            }
1588                        }
1589                    }
1590                });
1591            }
1592            ConcurrencyUtils.waitForCompletion(futures);
1593        } else {
1594            for (int r = 0; r < rows; r++) {
1595                fftColumns.realInverse2(a, r * columns, scale);
1596            }
1597            for (int r = 0; r < rows; r++) {
1598                temp[0][r] = a[r * columns]; //first column is always real
1599            }
1600            fftRows.realInverseFull(temp[0], scale);
1601
1602            for (int c = 1; c < n2d2 - 1; c++) {
1603                int idx0 = 2 * c;
1604                for (int r = 0; r < rows; r++) {
1605                    int idx1 = 2 * r;
1606                    int idx2 = r * columns + idx0;
1607                    temp[c][idx1] = a[idx2];
1608                    temp[c][idx1 + 1] = a[idx2 + 1];
1609                }
1610                fftRows.complexInverse(temp[c], scale);
1611            }
1612
1613            if ((columns % 2) == 0) {
1614                for (int r = 0; r < rows; r++) {
1615                    temp[n2d2 - 1][r] = a[r * columns + 1];
1616                    //imaginary part = 0;
1617                }
1618                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1619
1620            } else {
1621                for (int r = 0; r < rows; r++) {
1622                    int idx1 = 2 * r;
1623                    int idx2 = r * columns;
1624                    int idx3 = n2d2 - 1;
1625                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1626                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1627                }
1628                fftRows.complexInverse(temp[n2d2 - 1], scale);
1629            }
1630
1631            for (int r = 0; r < rows; r++) {
1632                int idx1 = 2 * r;
1633                for (int c = 0; c < n2d2; c++) {
1634                    int idx0 = 2 * c;
1635                    int idx2 = r * rowStride + idx0;
1636                    a[idx2] = temp[c][idx1];
1637                    a[idx2 + 1] = temp[c][idx1 + 1];
1638                }
1639            }
1640
1641            //fill symmetric
1642            for (int r = 1; r < rows; r++) {
1643                int idx5 = r * rowStride;
1644                int idx6 = (rows - r + 1) * rowStride;
1645                for (int c = n2d2; c < columns; c++) {
1646                    int idx1 = 2 * c;
1647                    int idx2 = 2 * (columns - c);
1648                    a[idx1] = a[idx2];
1649                    a[idx1 + 1] = -a[idx2 + 1];
1650                    int idx3 = idx5 + idx1;
1651                    int idx4 = idx6 - idx1;
1652                    a[idx3] = a[idx4];
1653                    a[idx3 + 1] = -a[idx4 + 1];
1654                }
1655            }
1656        }
1657    }
1658
1659    private void rdft2d_sub(int isgn, float[] a) {
1660        int n1h, j;
1661        float xi;
1662        int idx1, idx2;
1663
1664        n1h = rows >> 1;
1665        if (isgn < 0) {
1666            for (int i = 1; i < n1h; i++) {
1667                j = rows - i;
1668                idx1 = i * columns;
1669                idx2 = j * columns;
1670                xi = a[idx1] - a[idx2];
1671                a[idx1] += a[idx2];
1672                a[idx2] = xi;
1673                xi = a[idx2 + 1] - a[idx1 + 1];
1674                a[idx1 + 1] += a[idx2 + 1];
1675                a[idx2 + 1] = xi;
1676            }
1677        } else {
1678            for (int i = 1; i < n1h; i++) {
1679                j = rows - i;
1680                idx1 = i * columns;
1681                idx2 = j * columns;
1682                a[idx2] = 0.5f * (a[idx1] - a[idx2]);
1683                a[idx1] -= a[idx2];
1684                a[idx2 + 1] = 0.5f * (a[idx1 + 1] + a[idx2 + 1]);
1685                a[idx1 + 1] -= a[idx2 + 1];
1686            }
1687        }
1688    }
1689
1690    private void rdft2d_sub(int isgn, float[][] a) {
1691        int n1h, j;
1692        float xi;
1693
1694        n1h = rows >> 1;
1695        if (isgn < 0) {
1696            for (int i = 1; i < n1h; i++) {
1697                j = rows - i;
1698                xi = a[i][0] - a[j][0];
1699                a[i][0] += a[j][0];
1700                a[j][0] = xi;
1701                xi = a[j][1] - a[i][1];
1702                a[i][1] += a[j][1];
1703                a[j][1] = xi;
1704            }
1705        } else {
1706            for (int i = 1; i < n1h; i++) {
1707                j = rows - i;
1708                a[j][0] = 0.5f * (a[i][0] - a[j][0]);
1709                a[i][0] -= a[j][0];
1710                a[j][1] = 0.5f * (a[i][1] + a[j][1]);
1711                a[i][1] -= a[j][1];
1712            }
1713        }
1714    }
1715
1716    private void cdft2d_sub(int isgn, float[] a, boolean scale) {
1717        int idx1, idx2, idx3, idx4, idx5;
1718        if (isgn == -1) {
1719            if (columns > 4) {
1720                for (int c = 0; c < columns; c += 8) {
1721                    for (int r = 0; r < rows; r++) {
1722                        idx1 = r * columns + c;
1723                        idx2 = 2 * r;
1724                        idx3 = 2 * rows + 2 * r;
1725                        idx4 = idx3 + 2 * rows;
1726                        idx5 = idx4 + 2 * rows;
1727                        t[idx2] = a[idx1];
1728                        t[idx2 + 1] = a[idx1 + 1];
1729                        t[idx3] = a[idx1 + 2];
1730                        t[idx3 + 1] = a[idx1 + 3];
1731                        t[idx4] = a[idx1 + 4];
1732                        t[idx4 + 1] = a[idx1 + 5];
1733                        t[idx5] = a[idx1 + 6];
1734                        t[idx5 + 1] = a[idx1 + 7];
1735                    }
1736                    fftRows.complexForward(t, 0);
1737                    fftRows.complexForward(t, 2 * rows);
1738                    fftRows.complexForward(t, 4 * rows);
1739                    fftRows.complexForward(t, 6 * rows);
1740                    for (int r = 0; r < rows; r++) {
1741                        idx1 = r * columns + c;
1742                        idx2 = 2 * r;
1743                        idx3 = 2 * rows + 2 * r;
1744                        idx4 = idx3 + 2 * rows;
1745                        idx5 = idx4 + 2 * rows;
1746                        a[idx1] = t[idx2];
1747                        a[idx1 + 1] = t[idx2 + 1];
1748                        a[idx1 + 2] = t[idx3];
1749                        a[idx1 + 3] = t[idx3 + 1];
1750                        a[idx1 + 4] = t[idx4];
1751                        a[idx1 + 5] = t[idx4 + 1];
1752                        a[idx1 + 6] = t[idx5];
1753                        a[idx1 + 7] = t[idx5 + 1];
1754                    }
1755                }
1756            } else if (columns == 4) {
1757                for (int r = 0; r < rows; r++) {
1758                    idx1 = r * columns;
1759                    idx2 = 2 * r;
1760                    idx3 = 2 * rows + 2 * r;
1761                    t[idx2] = a[idx1];
1762                    t[idx2 + 1] = a[idx1 + 1];
1763                    t[idx3] = a[idx1 + 2];
1764                    t[idx3 + 1] = a[idx1 + 3];
1765                }
1766                fftRows.complexForward(t, 0);
1767                fftRows.complexForward(t, 2 * rows);
1768                for (int r = 0; r < rows; r++) {
1769                    idx1 = r * columns;
1770                    idx2 = 2 * r;
1771                    idx3 = 2 * rows + 2 * r;
1772                    a[idx1] = t[idx2];
1773                    a[idx1 + 1] = t[idx2 + 1];
1774                    a[idx1 + 2] = t[idx3];
1775                    a[idx1 + 3] = t[idx3 + 1];
1776                }
1777            } else if (columns == 2) {
1778                for (int r = 0; r < rows; r++) {
1779                    idx1 = r * columns;
1780                    idx2 = 2 * r;
1781                    t[idx2] = a[idx1];
1782                    t[idx2 + 1] = a[idx1 + 1];
1783                }
1784                fftRows.complexForward(t, 0);
1785                for (int r = 0; r < rows; r++) {
1786                    idx1 = r * columns;
1787                    idx2 = 2 * r;
1788                    a[idx1] = t[idx2];
1789                    a[idx1 + 1] = t[idx2 + 1];
1790                }
1791            }
1792        } else {
1793            if (columns > 4) {
1794                for (int c = 0; c < columns; c += 8) {
1795                    for (int r = 0; r < rows; r++) {
1796                        idx1 = r * columns + c;
1797                        idx2 = 2 * r;
1798                        idx3 = 2 * rows + 2 * r;
1799                        idx4 = idx3 + 2 * rows;
1800                        idx5 = idx4 + 2 * rows;
1801                        t[idx2] = a[idx1];
1802                        t[idx2 + 1] = a[idx1 + 1];
1803                        t[idx3] = a[idx1 + 2];
1804                        t[idx3 + 1] = a[idx1 + 3];
1805                        t[idx4] = a[idx1 + 4];
1806                        t[idx4 + 1] = a[idx1 + 5];
1807                        t[idx5] = a[idx1 + 6];
1808                        t[idx5 + 1] = a[idx1 + 7];
1809                    }
1810                    fftRows.complexInverse(t, 0, scale);
1811                    fftRows.complexInverse(t, 2 * rows, scale);
1812                    fftRows.complexInverse(t, 4 * rows, scale);
1813                    fftRows.complexInverse(t, 6 * rows, scale);
1814                    for (int r = 0; r < rows; r++) {
1815                        idx1 = r * columns + c;
1816                        idx2 = 2 * r;
1817                        idx3 = 2 * rows + 2 * r;
1818                        idx4 = idx3 + 2 * rows;
1819                        idx5 = idx4 + 2 * rows;
1820                        a[idx1] = t[idx2];
1821                        a[idx1 + 1] = t[idx2 + 1];
1822                        a[idx1 + 2] = t[idx3];
1823                        a[idx1 + 3] = t[idx3 + 1];
1824                        a[idx1 + 4] = t[idx4];
1825                        a[idx1 + 5] = t[idx4 + 1];
1826                        a[idx1 + 6] = t[idx5];
1827                        a[idx1 + 7] = t[idx5 + 1];
1828                    }
1829                }
1830            } else if (columns == 4) {
1831                for (int r = 0; r < rows; r++) {
1832                    idx1 = r * columns;
1833                    idx2 = 2 * r;
1834                    idx3 = 2 * rows + 2 * r;
1835                    t[idx2] = a[idx1];
1836                    t[idx2 + 1] = a[idx1 + 1];
1837                    t[idx3] = a[idx1 + 2];
1838                    t[idx3 + 1] = a[idx1 + 3];
1839                }
1840                fftRows.complexInverse(t, 0, scale);
1841                fftRows.complexInverse(t, 2 * rows, scale);
1842                for (int r = 0; r < rows; r++) {
1843                    idx1 = r * columns;
1844                    idx2 = 2 * r;
1845                    idx3 = 2 * rows + 2 * r;
1846                    a[idx1] = t[idx2];
1847                    a[idx1 + 1] = t[idx2 + 1];
1848                    a[idx1 + 2] = t[idx3];
1849                    a[idx1 + 3] = t[idx3 + 1];
1850                }
1851            } else if (columns == 2) {
1852                for (int r = 0; r < rows; r++) {
1853                    idx1 = r * columns;
1854                    idx2 = 2 * r;
1855                    t[idx2] = a[idx1];
1856                    t[idx2 + 1] = a[idx1 + 1];
1857                }
1858                fftRows.complexInverse(t, 0, scale);
1859                for (int r = 0; r < rows; r++) {
1860                    idx1 = r * columns;
1861                    idx2 = 2 * r;
1862                    a[idx1] = t[idx2];
1863                    a[idx1 + 1] = t[idx2 + 1];
1864                }
1865            }
1866        }
1867    }
1868
1869    private void cdft2d_sub(int isgn, float[][] a, boolean scale) {
1870        int idx2, idx3, idx4, idx5;
1871        if (isgn == -1) {
1872            if (columns > 4) {
1873                for (int c = 0; c < columns; c += 8) {
1874                    for (int r = 0; r < rows; r++) {
1875                        idx2 = 2 * r;
1876                        idx3 = 2 * rows + 2 * r;
1877                        idx4 = idx3 + 2 * rows;
1878                        idx5 = idx4 + 2 * rows;
1879                        t[idx2] = a[r][c];
1880                        t[idx2 + 1] = a[r][c + 1];
1881                        t[idx3] = a[r][c + 2];
1882                        t[idx3 + 1] = a[r][c + 3];
1883                        t[idx4] = a[r][c + 4];
1884                        t[idx4 + 1] = a[r][c + 5];
1885                        t[idx5] = a[r][c + 6];
1886                        t[idx5 + 1] = a[r][c + 7];
1887                    }
1888                    fftRows.complexForward(t, 0);
1889                    fftRows.complexForward(t, 2 * rows);
1890                    fftRows.complexForward(t, 4 * rows);
1891                    fftRows.complexForward(t, 6 * rows);
1892                    for (int r = 0; r < rows; r++) {
1893                        idx2 = 2 * r;
1894                        idx3 = 2 * rows + 2 * r;
1895                        idx4 = idx3 + 2 * rows;
1896                        idx5 = idx4 + 2 * rows;
1897                        a[r][c] = t[idx2];
1898                        a[r][c + 1] = t[idx2 + 1];
1899                        a[r][c + 2] = t[idx3];
1900                        a[r][c + 3] = t[idx3 + 1];
1901                        a[r][c + 4] = t[idx4];
1902                        a[r][c + 5] = t[idx4 + 1];
1903                        a[r][c + 6] = t[idx5];
1904                        a[r][c + 7] = t[idx5 + 1];
1905                    }
1906                }
1907            } else if (columns == 4) {
1908                for (int r = 0; r < rows; r++) {
1909                    idx2 = 2 * r;
1910                    idx3 = 2 * rows + 2 * r;
1911                    t[idx2] = a[r][0];
1912                    t[idx2 + 1] = a[r][1];
1913                    t[idx3] = a[r][2];
1914                    t[idx3 + 1] = a[r][3];
1915                }
1916                fftRows.complexForward(t, 0);
1917                fftRows.complexForward(t, 2 * rows);
1918                for (int r = 0; r < rows; r++) {
1919                    idx2 = 2 * r;
1920                    idx3 = 2 * rows + 2 * r;
1921                    a[r][0] = t[idx2];
1922                    a[r][1] = t[idx2 + 1];
1923                    a[r][2] = t[idx3];
1924                    a[r][3] = t[idx3 + 1];
1925                }
1926            } else if (columns == 2) {
1927                for (int r = 0; r < rows; r++) {
1928                    idx2 = 2 * r;
1929                    t[idx2] = a[r][0];
1930                    t[idx2 + 1] = a[r][1];
1931                }
1932                fftRows.complexForward(t, 0);
1933                for (int r = 0; r < rows; r++) {
1934                    idx2 = 2 * r;
1935                    a[r][0] = t[idx2];
1936                    a[r][1] = t[idx2 + 1];
1937                }
1938            }
1939        } else {
1940            if (columns > 4) {
1941                for (int c = 0; c < columns; c += 8) {
1942                    for (int r = 0; r < rows; r++) {
1943                        idx2 = 2 * r;
1944                        idx3 = 2 * rows + 2 * r;
1945                        idx4 = idx3 + 2 * rows;
1946                        idx5 = idx4 + 2 * rows;
1947                        t[idx2] = a[r][c];
1948                        t[idx2 + 1] = a[r][c + 1];
1949                        t[idx3] = a[r][c + 2];
1950                        t[idx3 + 1] = a[r][c + 3];
1951                        t[idx4] = a[r][c + 4];
1952                        t[idx4 + 1] = a[r][c + 5];
1953                        t[idx5] = a[r][c + 6];
1954                        t[idx5 + 1] = a[r][c + 7];
1955                    }
1956                    fftRows.complexInverse(t, 0, scale);
1957                    fftRows.complexInverse(t, 2 * rows, scale);
1958                    fftRows.complexInverse(t, 4 * rows, scale);
1959                    fftRows.complexInverse(t, 6 * rows, scale);
1960                    for (int r = 0; r < rows; r++) {
1961                        idx2 = 2 * r;
1962                        idx3 = 2 * rows + 2 * r;
1963                        idx4 = idx3 + 2 * rows;
1964                        idx5 = idx4 + 2 * rows;
1965                        a[r][c] = t[idx2];
1966                        a[r][c + 1] = t[idx2 + 1];
1967                        a[r][c + 2] = t[idx3];
1968                        a[r][c + 3] = t[idx3 + 1];
1969                        a[r][c + 4] = t[idx4];
1970                        a[r][c + 5] = t[idx4 + 1];
1971                        a[r][c + 6] = t[idx5];
1972                        a[r][c + 7] = t[idx5 + 1];
1973                    }
1974                }
1975            } else if (columns == 4) {
1976                for (int r = 0; r < rows; r++) {
1977                    idx2 = 2 * r;
1978                    idx3 = 2 * rows + 2 * r;
1979                    t[idx2] = a[r][0];
1980                    t[idx2 + 1] = a[r][1];
1981                    t[idx3] = a[r][2];
1982                    t[idx3 + 1] = a[r][3];
1983                }
1984                fftRows.complexInverse(t, 0, scale);
1985                fftRows.complexInverse(t, 2 * rows, scale);
1986                for (int r = 0; r < rows; r++) {
1987                    idx2 = 2 * r;
1988                    idx3 = 2 * rows + 2 * r;
1989                    a[r][0] = t[idx2];
1990                    a[r][1] = t[idx2 + 1];
1991                    a[r][2] = t[idx3];
1992                    a[r][3] = t[idx3 + 1];
1993                }
1994            } else if (columns == 2) {
1995                for (int r = 0; r < rows; r++) {
1996                    idx2 = 2 * r;
1997                    t[idx2] = a[r][0];
1998                    t[idx2 + 1] = a[r][1];
1999                }
2000                fftRows.complexInverse(t, 0, scale);
2001                for (int r = 0; r < rows; r++) {
2002                    idx2 = 2 * r;
2003                    a[r][0] = t[idx2];
2004                    a[r][1] = t[idx2 + 1];
2005                }
2006            }
2007        }
2008    }
2009
2010    private void xdft2d0_subth1(final int icr, final int isgn, final float[] a, final boolean scale) {
2011        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2012
2013        Future<?>[] futures = new Future[nthreads];
2014        for (int i = 0; i < nthreads; i++) {
2015            final int n0 = i;
2016            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2017                public void run() {
2018                    if (icr == 0) {
2019                        if (isgn == -1) {
2020                            for (int r = n0; r < rows; r += nthreads) {
2021                                fftColumns.complexForward(a, r * columns);
2022                            }
2023                        } else {
2024                            for (int r = n0; r < rows; r += nthreads) {
2025                                fftColumns.complexInverse(a, r * columns, scale);
2026                            }
2027                        }
2028                    } else {
2029                        if (isgn == 1) {
2030                            for (int r = n0; r < rows; r += nthreads) {
2031                                fftColumns.realForward(a, r * columns);
2032                            }
2033                        } else {
2034                            for (int r = n0; r < rows; r += nthreads) {
2035                                fftColumns.realInverse(a, r * columns, scale);
2036                            }
2037                        }
2038                    }
2039                }
2040            });
2041        }
2042        ConcurrencyUtils.waitForCompletion(futures);
2043    }
2044
2045    private void xdft2d0_subth2(final int icr, final int isgn, final float[] a, final boolean scale) {
2046        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2047
2048        Future<?>[] futures = new Future[nthreads];
2049        for (int i = 0; i < nthreads; i++) {
2050            final int n0 = i;
2051            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2052                public void run() {
2053                    if (icr == 0) {
2054                        if (isgn == -1) {
2055                            for (int r = n0; r < rows; r += nthreads) {
2056                                fftColumns.complexForward(a, r * columns);
2057                            }
2058                        } else {
2059                            for (int r = n0; r < rows; r += nthreads) {
2060                                fftColumns.complexInverse(a, r * columns, scale);
2061                            }
2062                        }
2063                    } else {
2064                        if (isgn == 1) {
2065                            for (int r = n0; r < rows; r += nthreads) {
2066                                fftColumns.realForward(a, r * columns);
2067                            }
2068                        } else {
2069                            for (int r = n0; r < rows; r += nthreads) {
2070                                fftColumns.realInverse2(a, r * columns, scale);
2071                            }
2072                        }
2073                    }
2074                }
2075            });
2076        }
2077        ConcurrencyUtils.waitForCompletion(futures);
2078    }
2079
2080    private void xdft2d0_subth1(final int icr, final int isgn, final float[][] a, final boolean scale) {
2081        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2082
2083        Future<?>[] futures = new Future[nthreads];
2084        for (int i = 0; i < nthreads; i++) {
2085            final int n0 = i;
2086            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2087                public void run() {
2088                    if (icr == 0) {
2089                        if (isgn == -1) {
2090                            for (int r = n0; r < rows; r += nthreads) {
2091                                fftColumns.complexForward(a[r]);
2092                            }
2093                        } else {
2094                            for (int r = n0; r < rows; r += nthreads) {
2095                                fftColumns.complexInverse(a[r], scale);
2096                            }
2097                        }
2098                    } else {
2099                        if (isgn == 1) {
2100                            for (int r = n0; r < rows; r += nthreads) {
2101                                fftColumns.realForward(a[r]);
2102                            }
2103                        } else {
2104                            for (int r = n0; r < rows; r += nthreads) {
2105                                fftColumns.realInverse(a[r], scale);
2106                            }
2107                        }
2108                    }
2109                }
2110            });
2111        }
2112        ConcurrencyUtils.waitForCompletion(futures);
2113    }
2114
2115    private void xdft2d0_subth2(final int icr, final int isgn, final float[][] a, final boolean scale) {
2116        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2117
2118        Future<?>[] futures = new Future[nthreads];
2119        for (int i = 0; i < nthreads; i++) {
2120            final int n0 = i;
2121            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2122                public void run() {
2123                    if (icr == 0) {
2124                        if (isgn == -1) {
2125                            for (int r = n0; r < rows; r += nthreads) {
2126                                fftColumns.complexForward(a[r]);
2127                            }
2128                        } else {
2129                            for (int r = n0; r < rows; r += nthreads) {
2130                                fftColumns.complexInverse(a[r], scale);
2131                            }
2132                        }
2133                    } else {
2134                        if (isgn == 1) {
2135                            for (int r = n0; r < rows; r += nthreads) {
2136                                fftColumns.realForward(a[r]);
2137                            }
2138                        } else {
2139                            for (int r = n0; r < rows; r += nthreads) {
2140                                fftColumns.realInverse2(a[r], 0, scale);
2141                            }
2142                        }
2143                    }
2144                }
2145            });
2146        }
2147        ConcurrencyUtils.waitForCompletion(futures);
2148    }
2149
2150    private void cdft2d_subth(final int isgn, final float[] a, final boolean scale) {
2151        int nthread = ConcurrencyUtils.getNumberOfThreads();
2152        int nt = 8 * rows;
2153        if (columns == 4 * nthread) {
2154            nt >>= 1;
2155        } else if (columns < 4 * nthread) {
2156            nthread = columns >> 1;
2157            nt >>= 2;
2158        }
2159        Future<?>[] futures = new Future[nthread];
2160        final int nthreads = nthread;
2161        for (int i = 0; i < nthread; i++) {
2162            final int n0 = i;
2163            final int startt = nt * i;
2164            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2165                public void run() {
2166                    int idx1, idx2, idx3, idx4, idx5;
2167                    if (isgn == -1) {
2168                        if (columns > 4 * nthreads) {
2169                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2170                                for (int r = 0; r < rows; r++) {
2171                                    idx1 = r * columns + c;
2172                                    idx2 = startt + 2 * r;
2173                                    idx3 = startt + 2 * rows + 2 * r;
2174                                    idx4 = idx3 + 2 * rows;
2175                                    idx5 = idx4 + 2 * rows;
2176                                    t[idx2] = a[idx1];
2177                                    t[idx2 + 1] = a[idx1 + 1];
2178                                    t[idx3] = a[idx1 + 2];
2179                                    t[idx3 + 1] = a[idx1 + 3];
2180                                    t[idx4] = a[idx1 + 4];
2181                                    t[idx4 + 1] = a[idx1 + 5];
2182                                    t[idx5] = a[idx1 + 6];
2183                                    t[idx5 + 1] = a[idx1 + 7];
2184                                }
2185                                fftRows.complexForward(t, startt);
2186                                fftRows.complexForward(t, startt + 2 * rows);
2187                                fftRows.complexForward(t, startt + 4 * rows);
2188                                fftRows.complexForward(t, startt + 6 * rows);
2189                                for (int r = 0; r < rows; r++) {
2190                                    idx1 = r * columns + c;
2191                                    idx2 = startt + 2 * r;
2192                                    idx3 = startt + 2 * rows + 2 * r;
2193                                    idx4 = idx3 + 2 * rows;
2194                                    idx5 = idx4 + 2 * rows;
2195                                    a[idx1] = t[idx2];
2196                                    a[idx1 + 1] = t[idx2 + 1];
2197                                    a[idx1 + 2] = t[idx3];
2198                                    a[idx1 + 3] = t[idx3 + 1];
2199                                    a[idx1 + 4] = t[idx4];
2200                                    a[idx1 + 5] = t[idx4 + 1];
2201                                    a[idx1 + 6] = t[idx5];
2202                                    a[idx1 + 7] = t[idx5 + 1];
2203                                }
2204                            }
2205                        } else if (columns == 4 * nthreads) {
2206                            for (int r = 0; r < rows; r++) {
2207                                idx1 = r * columns + 4 * n0;
2208                                idx2 = startt + 2 * r;
2209                                idx3 = startt + 2 * rows + 2 * r;
2210                                t[idx2] = a[idx1];
2211                                t[idx2 + 1] = a[idx1 + 1];
2212                                t[idx3] = a[idx1 + 2];
2213                                t[idx3 + 1] = a[idx1 + 3];
2214                            }
2215                            fftRows.complexForward(t, startt);
2216                            fftRows.complexForward(t, startt + 2 * rows);
2217                            for (int r = 0; r < rows; r++) {
2218                                idx1 = r * columns + 4 * n0;
2219                                idx2 = startt + 2 * r;
2220                                idx3 = startt + 2 * rows + 2 * r;
2221                                a[idx1] = t[idx2];
2222                                a[idx1 + 1] = t[idx2 + 1];
2223                                a[idx1 + 2] = t[idx3];
2224                                a[idx1 + 3] = t[idx3 + 1];
2225                            }
2226                        } else if (columns == 2 * nthreads) {
2227                            for (int r = 0; r < rows; r++) {
2228                                idx1 = r * columns + 2 * n0;
2229                                idx2 = startt + 2 * r;
2230                                t[idx2] = a[idx1];
2231                                t[idx2 + 1] = a[idx1 + 1];
2232                            }
2233                            fftRows.complexForward(t, startt);
2234                            for (int r = 0; r < rows; r++) {
2235                                idx1 = r * columns + 2 * n0;
2236                                idx2 = startt + 2 * r;
2237                                a[idx1] = t[idx2];
2238                                a[idx1 + 1] = t[idx2 + 1];
2239                            }
2240                        }
2241                    } else {
2242                        if (columns > 4 * nthreads) {
2243                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2244                                for (int r = 0; r < rows; r++) {
2245                                    idx1 = r * columns + c;
2246                                    idx2 = startt + 2 * r;
2247                                    idx3 = startt + 2 * rows + 2 * r;
2248                                    idx4 = idx3 + 2 * rows;
2249                                    idx5 = idx4 + 2 * rows;
2250                                    t[idx2] = a[idx1];
2251                                    t[idx2 + 1] = a[idx1 + 1];
2252                                    t[idx3] = a[idx1 + 2];
2253                                    t[idx3 + 1] = a[idx1 + 3];
2254                                    t[idx4] = a[idx1 + 4];
2255                                    t[idx4 + 1] = a[idx1 + 5];
2256                                    t[idx5] = a[idx1 + 6];
2257                                    t[idx5 + 1] = a[idx1 + 7];
2258                                }
2259                                fftRows.complexInverse(t, startt, scale);
2260                                fftRows.complexInverse(t, startt + 2 * rows, scale);
2261                                fftRows.complexInverse(t, startt + 4 * rows, scale);
2262                                fftRows.complexInverse(t, startt + 6 * rows, scale);
2263                                for (int r = 0; r < rows; r++) {
2264                                    idx1 = r * columns + c;
2265                                    idx2 = startt + 2 * r;
2266                                    idx3 = startt + 2 * rows + 2 * r;
2267                                    idx4 = idx3 + 2 * rows;
2268                                    idx5 = idx4 + 2 * rows;
2269                                    a[idx1] = t[idx2];
2270                                    a[idx1 + 1] = t[idx2 + 1];
2271                                    a[idx1 + 2] = t[idx3];
2272                                    a[idx1 + 3] = t[idx3 + 1];
2273                                    a[idx1 + 4] = t[idx4];
2274                                    a[idx1 + 5] = t[idx4 + 1];
2275                                    a[idx1 + 6] = t[idx5];
2276                                    a[idx1 + 7] = t[idx5 + 1];
2277                                }
2278                            }
2279                        } else if (columns == 4 * nthreads) {
2280                            for (int r = 0; r < rows; r++) {
2281                                idx1 = r * columns + 4 * n0;
2282                                idx2 = startt + 2 * r;
2283                                idx3 = startt + 2 * rows + 2 * r;
2284                                t[idx2] = a[idx1];
2285                                t[idx2 + 1] = a[idx1 + 1];
2286                                t[idx3] = a[idx1 + 2];
2287                                t[idx3 + 1] = a[idx1 + 3];
2288                            }
2289                            fftRows.complexInverse(t, startt, scale);
2290                            fftRows.complexInverse(t, startt + 2 * rows, scale);
2291                            for (int r = 0; r < rows; r++) {
2292                                idx1 = r * columns + 4 * n0;
2293                                idx2 = startt + 2 * r;
2294                                idx3 = startt + 2 * rows + 2 * r;
2295                                a[idx1] = t[idx2];
2296                                a[idx1 + 1] = t[idx2 + 1];
2297                                a[idx1 + 2] = t[idx3];
2298                                a[idx1 + 3] = t[idx3 + 1];
2299                            }
2300                        } else if (columns == 2 * nthreads) {
2301                            for (int r = 0; r < rows; r++) {
2302                                idx1 = r * columns + 2 * n0;
2303                                idx2 = startt + 2 * r;
2304                                t[idx2] = a[idx1];
2305                                t[idx2 + 1] = a[idx1 + 1];
2306                            }
2307                            fftRows.complexInverse(t, startt, scale);
2308                            for (int r = 0; r < rows; r++) {
2309                                idx1 = r * columns + 2 * n0;
2310                                idx2 = startt + 2 * r;
2311                                a[idx1] = t[idx2];
2312                                a[idx1 + 1] = t[idx2 + 1];
2313                            }
2314                        }
2315                    }
2316                }
2317            });
2318        }
2319        ConcurrencyUtils.waitForCompletion(futures);
2320    }
2321
2322    private void cdft2d_subth(final int isgn, final float[][] a, final boolean scale) {
2323        int nthread = ConcurrencyUtils.getNumberOfThreads();
2324        int nt = 8 * rows;
2325        if (columns == 4 * nthread) {
2326            nt >>= 1;
2327        } else if (columns < 4 * nthread) {
2328            nthread = columns >> 1;
2329            nt >>= 2;
2330        }
2331        Future<?>[] futures = new Future[nthread];
2332        final int nthreads = nthread;
2333        for (int i = 0; i < nthreads; i++) {
2334            final int n0 = i;
2335            final int startt = nt * i;
2336            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2337                public void run() {
2338                    int idx2, idx3, idx4, idx5;
2339                    if (isgn == -1) {
2340                        if (columns > 4 * nthreads) {
2341                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2342                                for (int r = 0; r < rows; r++) {
2343                                    idx2 = startt + 2 * r;
2344                                    idx3 = startt + 2 * rows + 2 * r;
2345                                    idx4 = idx3 + 2 * rows;
2346                                    idx5 = idx4 + 2 * rows;
2347                                    t[idx2] = a[r][c];
2348                                    t[idx2 + 1] = a[r][c + 1];
2349                                    t[idx3] = a[r][c + 2];
2350                                    t[idx3 + 1] = a[r][c + 3];
2351                                    t[idx4] = a[r][c + 4];
2352                                    t[idx4 + 1] = a[r][c + 5];
2353                                    t[idx5] = a[r][c + 6];
2354                                    t[idx5 + 1] = a[r][c + 7];
2355                                }
2356                                fftRows.complexForward(t, startt);
2357                                fftRows.complexForward(t, startt + 2 * rows);
2358                                fftRows.complexForward(t, startt + 4 * rows);
2359                                fftRows.complexForward(t, startt + 6 * rows);
2360                                for (int r = 0; r < rows; r++) {
2361                                    idx2 = startt + 2 * r;
2362                                    idx3 = startt + 2 * rows + 2 * r;
2363                                    idx4 = idx3 + 2 * rows;
2364                                    idx5 = idx4 + 2 * rows;
2365                                    a[r][c] = t[idx2];
2366                                    a[r][c + 1] = t[idx2 + 1];
2367                                    a[r][c + 2] = t[idx3];
2368                                    a[r][c + 3] = t[idx3 + 1];
2369                                    a[r][c + 4] = t[idx4];
2370                                    a[r][c + 5] = t[idx4 + 1];
2371                                    a[r][c + 6] = t[idx5];
2372                                    a[r][c + 7] = t[idx5 + 1];
2373                                }
2374                            }
2375                        } else if (columns == 4 * nthreads) {
2376                            for (int r = 0; r < rows; r++) {
2377                                idx2 = startt + 2 * r;
2378                                idx3 = startt + 2 * rows + 2 * r;
2379                                t[idx2] = a[r][4 * n0];
2380                                t[idx2 + 1] = a[r][4 * n0 + 1];
2381                                t[idx3] = a[r][4 * n0 + 2];
2382                                t[idx3 + 1] = a[r][4 * n0 + 3];
2383                            }
2384                            fftRows.complexForward(t, startt);
2385                            fftRows.complexForward(t, startt + 2 * rows);
2386                            for (int r = 0; r < rows; r++) {
2387                                idx2 = startt + 2 * r;
2388                                idx3 = startt + 2 * rows + 2 * r;
2389                                a[r][4 * n0] = t[idx2];
2390                                a[r][4 * n0 + 1] = t[idx2 + 1];
2391                                a[r][4 * n0 + 2] = t[idx3];
2392                                a[r][4 * n0 + 3] = t[idx3 + 1];
2393                            }
2394                        } else if (columns == 2 * nthreads) {
2395                            for (int r = 0; r < rows; r++) {
2396                                idx2 = startt + 2 * r;
2397                                t[idx2] = a[r][2 * n0];
2398                                t[idx2 + 1] = a[r][2 * n0 + 1];
2399                            }
2400                            fftRows.complexForward(t, startt);
2401                            for (int r = 0; r < rows; r++) {
2402                                idx2 = startt + 2 * r;
2403                                a[r][2 * n0] = t[idx2];
2404                                a[r][2 * n0 + 1] = t[idx2 + 1];
2405                            }
2406                        }
2407                    } else {
2408                        if (columns > 4 * nthreads) {
2409                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2410                                for (int r = 0; r < rows; r++) {
2411                                    idx2 = startt + 2 * r;
2412                                    idx3 = startt + 2 * rows + 2 * r;
2413                                    idx4 = idx3 + 2 * rows;
2414                                    idx5 = idx4 + 2 * rows;
2415                                    t[idx2] = a[r][c];
2416                                    t[idx2 + 1] = a[r][c + 1];
2417                                    t[idx3] = a[r][c + 2];
2418                                    t[idx3 + 1] = a[r][c + 3];
2419                                    t[idx4] = a[r][c + 4];
2420                                    t[idx4 + 1] = a[r][c + 5];
2421                                    t[idx5] = a[r][c + 6];
2422                                    t[idx5 + 1] = a[r][c + 7];
2423                                }
2424                                fftRows.complexInverse(t, startt, scale);
2425                                fftRows.complexInverse(t, startt + 2 * rows, scale);
2426                                fftRows.complexInverse(t, startt + 4 * rows, scale);
2427                                fftRows.complexInverse(t, startt + 6 * rows, scale);
2428                                for (int r = 0; r < rows; r++) {
2429                                    idx2 = startt + 2 * r;
2430                                    idx3 = startt + 2 * rows + 2 * r;
2431                                    idx4 = idx3 + 2 * rows;
2432                                    idx5 = idx4 + 2 * rows;
2433                                    a[r][c] = t[idx2];
2434                                    a[r][c + 1] = t[idx2 + 1];
2435                                    a[r][c + 2] = t[idx3];
2436                                    a[r][c + 3] = t[idx3 + 1];
2437                                    a[r][c + 4] = t[idx4];
2438                                    a[r][c + 5] = t[idx4 + 1];
2439                                    a[r][c + 6] = t[idx5];
2440                                    a[r][c + 7] = t[idx5 + 1];
2441                                }
2442                            }
2443                        } else if (columns == 4 * nthreads) {
2444                            for (int r = 0; r < rows; r++) {
2445                                idx2 = startt + 2 * r;
2446                                idx3 = startt + 2 * rows + 2 * r;
2447                                t[idx2] = a[r][4 * n0];
2448                                t[idx2 + 1] = a[r][4 * n0 + 1];
2449                                t[idx3] = a[r][4 * n0 + 2];
2450                                t[idx3 + 1] = a[r][4 * n0 + 3];
2451                            }
2452                            fftRows.complexInverse(t, startt, scale);
2453                            fftRows.complexInverse(t, startt + 2 * rows, scale);
2454                            for (int r = 0; r < rows; r++) {
2455                                idx2 = startt + 2 * r;
2456                                idx3 = startt + 2 * rows + 2 * r;
2457                                a[r][4 * n0] = t[idx2];
2458                                a[r][4 * n0 + 1] = t[idx2 + 1];
2459                                a[r][4 * n0 + 2] = t[idx3];
2460                                a[r][4 * n0 + 3] = t[idx3 + 1];
2461                            }
2462                        } else if (columns == 2 * nthreads) {
2463                            for (int r = 0; r < rows; r++) {
2464                                idx2 = startt + 2 * r;
2465                                t[idx2] = a[r][2 * n0];
2466                                t[idx2 + 1] = a[r][2 * n0 + 1];
2467                            }
2468                            fftRows.complexInverse(t, startt, scale);
2469                            for (int r = 0; r < rows; r++) {
2470                                idx2 = startt + 2 * r;
2471                                a[r][2 * n0] = t[idx2];
2472                                a[r][2 * n0 + 1] = t[idx2 + 1];
2473                            }
2474                        }
2475                    }
2476                }
2477            });
2478        }
2479        ConcurrencyUtils.waitForCompletion(futures);
2480    }
2481
2482    private void fillSymmetric(final float[] a) {
2483        final int twon2 = 2 * columns;
2484        int idx1, idx2, idx3, idx4;
2485        int n1d2 = rows / 2;
2486
2487        for (int r = (rows - 1); r >= 1; r--) {
2488            idx1 = r * columns;
2489            idx2 = 2 * idx1;
2490            for (int c = 0; c < columns; c += 2) {
2491                a[idx2 + c] = a[idx1 + c];
2492                a[idx1 + c] = 0;
2493                a[idx2 + c + 1] = a[idx1 + c + 1];
2494                a[idx1 + c + 1] = 0;
2495            }
2496        }
2497        int nthreads = ConcurrencyUtils.getNumberOfThreads();
2498        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads)) {
2499            Future<?>[] futures = new Future[nthreads];
2500            int l1k = n1d2 / nthreads;
2501            final int newn2 = 2 * columns;
2502            for (int i = 0; i < nthreads; i++) {
2503                final int l1offa, l1stopa, l2offa, l2stopa;
2504                if (i == 0)
2505                    l1offa = i * l1k + 1;
2506                else {
2507                    l1offa = i * l1k;
2508                }
2509                l1stopa = i * l1k + l1k;
2510                l2offa = i * l1k;
2511                if (i == nthreads - 1) {
2512                    l2stopa = i * l1k + l1k + 1;
2513                } else {
2514                    l2stopa = i * l1k + l1k;
2515                }
2516                futures[i] = ConcurrencyUtils.submit(new Runnable() {
2517                    public void run() {
2518                        int idx1, idx2, idx3, idx4;
2519
2520                        for (int r = l1offa; r < l1stopa; r++) {
2521                            idx1 = r * newn2;
2522                            idx2 = (rows - r) * newn2;
2523                            idx3 = idx1 + columns;
2524                            a[idx3] = a[idx2 + 1];
2525                            a[idx3 + 1] = -a[idx2];
2526                        }
2527                        for (int r = l1offa; r < l1stopa; r++) {
2528                            idx1 = r * newn2;
2529                            idx3 = (rows - r + 1) * newn2;
2530                            for (int c = columns + 2; c < newn2; c += 2) {
2531                                idx2 = idx3 - c;
2532                                idx4 = idx1 + c;
2533                                a[idx4] = a[idx2];
2534                                a[idx4 + 1] = -a[idx2 + 1];
2535
2536                            }
2537                        }
2538                        for (int r = l2offa; r < l2stopa; r++) {
2539                            idx3 = ((rows - r) % rows) * newn2;
2540                            idx4 = r * newn2;
2541                            for (int c = 0; c < newn2; c += 2) {
2542                                idx1 = idx3 + (newn2 - c) % newn2;
2543                                idx2 = idx4 + c;
2544                                a[idx1] = a[idx2];
2545                                a[idx1 + 1] = -a[idx2 + 1];
2546                            }
2547                        }
2548                    }
2549                });
2550            }
2551            ConcurrencyUtils.waitForCompletion(futures);
2552
2553        } else {
2554
2555            for (int r = 1; r < n1d2; r++) {
2556                idx2 = r * twon2;
2557                idx3 = (rows - r) * twon2;
2558                a[idx2 + columns] = a[idx3 + 1];
2559                a[idx2 + columns + 1] = -a[idx3];
2560            }
2561
2562            for (int r = 1; r < n1d2; r++) {
2563                idx2 = r * twon2;
2564                idx3 = (rows - r + 1) * twon2;
2565                for (int c = columns + 2; c < twon2; c += 2) {
2566                    a[idx2 + c] = a[idx3 - c];
2567                    a[idx2 + c + 1] = -a[idx3 - c + 1];
2568
2569                }
2570            }
2571            for (int r = 0; r <= rows / 2; r++) {
2572                idx1 = r * twon2;
2573                idx4 = ((rows - r) % rows) * twon2;
2574                for (int c = 0; c < twon2; c += 2) {
2575                    idx2 = idx1 + c;
2576                    idx3 = idx4 + (twon2 - c) % twon2;
2577                    a[idx3] = a[idx2];
2578                    a[idx3 + 1] = -a[idx2 + 1];
2579                }
2580            }
2581        }
2582        a[columns] = -a[1];
2583        a[1] = 0;
2584        idx1 = n1d2 * twon2;
2585        a[idx1 + columns] = -a[idx1 + 1];
2586        a[idx1 + 1] = 0;
2587        a[idx1 + columns + 1] = 0;
2588    }
2589
2590    private void fillSymmetric(final float[][] a) {
2591        final int newn2 = 2 * columns;
2592        int n1d2 = rows / 2;
2593
2594        int nthreads = ConcurrencyUtils.getNumberOfThreads();
2595        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads)) {
2596            Future<?>[] futures = new Future[nthreads];
2597            int l1k = n1d2 / nthreads;
2598            for (int i = 0; i < nthreads; i++) {
2599                final int l1offa, l1stopa, l2offa, l2stopa;
2600                if (i == 0)
2601                    l1offa = i * l1k + 1;
2602                else {
2603                    l1offa = i * l1k;
2604                }
2605                l1stopa = i * l1k + l1k;
2606                l2offa = i * l1k;
2607                if (i == nthreads - 1) {
2608                    l2stopa = i * l1k + l1k + 1;
2609                } else {
2610                    l2stopa = i * l1k + l1k;
2611                }
2612                futures[i] = ConcurrencyUtils.submit(new Runnable() {
2613                    public void run() {
2614                        int idx1, idx2;
2615                        for (int r = l1offa; r < l1stopa; r++) {
2616                            idx1 = rows - r;
2617                            a[r][columns] = a[idx1][1];
2618                            a[r][columns + 1] = -a[idx1][0];
2619                        }
2620                        for (int r = l1offa; r < l1stopa; r++) {
2621                            idx1 = rows - r;
2622                            for (int c = columns + 2; c < newn2; c += 2) {
2623                                idx2 = newn2 - c;
2624                                a[r][c] = a[idx1][idx2];
2625                                a[r][c + 1] = -a[idx1][idx2 + 1];
2626
2627                            }
2628                        }
2629                        for (int r = l2offa; r < l2stopa; r++) {
2630                            idx1 = (rows - r) % rows;
2631                            for (int c = 0; c < newn2; c = c + 2) {
2632                                idx2 = (newn2 - c) % newn2;
2633                                a[idx1][idx2] = a[r][c];
2634                                a[idx1][idx2 + 1] = -a[r][c + 1];
2635                            }
2636                        }
2637                    }
2638                });
2639            }
2640            ConcurrencyUtils.waitForCompletion(futures);
2641        } else {
2642
2643            for (int r = 1; r < n1d2; r++) {
2644                int idx1 = rows - r;
2645                a[r][columns] = a[idx1][1];
2646                a[r][columns + 1] = -a[idx1][0];
2647            }
2648            for (int r = 1; r < n1d2; r++) {
2649                int idx1 = rows - r;
2650                for (int c = columns + 2; c < newn2; c += 2) {
2651                    int idx2 = newn2 - c;
2652                    a[r][c] = a[idx1][idx2];
2653                    a[r][c + 1] = -a[idx1][idx2 + 1];
2654                }
2655            }
2656            for (int r = 0; r <= rows / 2; r++) {
2657                int idx1 = (rows - r) % rows;
2658                for (int c = 0; c < newn2; c += 2) {
2659                    int idx2 = (newn2 - c) % newn2;
2660                    a[idx1][idx2] = a[r][c];
2661                    a[idx1][idx2 + 1] = -a[r][c + 1];
2662                }
2663            }
2664        }
2665        a[0][columns] = -a[0][1];
2666        a[0][1] = 0;
2667        a[n1d2][columns] = -a[n1d2][1];
2668        a[n1d2][1] = 0;
2669        a[n1d2][columns + 1] = 0;
2670    }
2671}