001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.dht;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 2D Discrete Hartley Transform (DHT) of real, single precision data.
043 * The sizes of both dimensions can be arbitrary numbers. This is a parallel
044 * implementation optimized for SMP systems.<br>
045 * <br>
046 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura
047 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
048 * 
049 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
050 * 
051 */
052public class FloatDHT_2D {
053
054    private int rows;
055
056    private int columns;
057
058    private float[] t;
059
060    private FloatDHT_1D dhtColumns, dhtRows;
061
062    private int oldNthreads;
063
064    private int nt;
065
066    private boolean isPowerOfTwo = false;
067
068    private boolean useThreads = false;
069
070    /**
071     * Creates new instance of FloatDHT_2D.
072     * 
073     * @param rows
074     *            number of rows
075     * @param column
076     *            number of columns
077     */
078    public FloatDHT_2D(int rows, int column) {
079        if (rows <= 1 || column <= 1) {
080            throw new IllegalArgumentException("rows and columns must be greater than 1");
081        }
082        this.rows = rows;
083        this.columns = column;
084        if (rows * column >= ConcurrencyUtils.getThreadsBeginN_2D()) {
085            this.useThreads = true;
086        }
087        if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(column)) {
088            isPowerOfTwo = true;
089            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
090            nt = 4 * oldNthreads * rows;
091            if (column == 2 * oldNthreads) {
092                nt >>= 1;
093            } else if (column < 2 * oldNthreads) {
094                nt >>= 2;
095            }
096            t = new float[nt];
097        }
098        dhtColumns = new FloatDHT_1D(column);
099        if (column == rows) {
100            dhtRows = dhtColumns;
101        } else {
102            dhtRows = new FloatDHT_1D(rows);
103        }
104    }
105
106    /**
107     * Computes 2D real, forward DHT leaving the result in <code>a</code>. The
108     * data is stored in 1D array in row-major order.
109     * 
110     * @param a
111     *            data to transform
112     */
113    public void forward(final float[] a) {
114        int nthreads = ConcurrencyUtils.getNumberOfThreads();
115        if (isPowerOfTwo) {
116            if (nthreads != oldNthreads) {
117                nt = 4 * nthreads * rows;
118                if (columns == 2 * nthreads) {
119                    nt >>= 1;
120                } else if (columns < 2 * nthreads) {
121                    nt >>= 2;
122                }
123                t = new float[nt];
124                oldNthreads = nthreads;
125            }
126            if ((nthreads > 1) && useThreads) {
127                ddxt2d_subth(-1, a, true);
128                ddxt2d0_subth(-1, a, true);
129            } else {
130                ddxt2d_sub(-1, a, true);
131                for (int i = 0; i < rows; i++) {
132                    dhtColumns.forward(a, i * columns);
133                }
134            }
135            yTransform(a);
136        } else {
137            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
138                Future<?>[] futures = new Future[nthreads];
139                int p = rows / nthreads;
140                for (int l = 0; l < nthreads; l++) {
141                    final int firstRow = l * p;
142                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
143                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
144                        public void run() {
145                            for (int i = firstRow; i < lastRow; i++) {
146                                dhtColumns.forward(a, i * columns);
147                            }
148                        }
149                    });
150                }
151                ConcurrencyUtils.waitForCompletion(futures);
152                p = columns / nthreads;
153                for (int l = 0; l < nthreads; l++) {
154                    final int firstColumn = l * p;
155                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
156                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
157                        public void run() {
158                            float[] temp = new float[rows];
159                            for (int c = firstColumn; c < lastColumn; c++) {
160                                for (int r = 0; r < rows; r++) {
161                                    temp[r] = a[r * columns + c];
162                                }
163                                dhtRows.forward(temp);
164                                for (int r = 0; r < rows; r++) {
165                                    a[r * columns + c] = temp[r];
166                                }
167                            }
168                        }
169                    });
170                }
171                ConcurrencyUtils.waitForCompletion(futures);
172
173            } else {
174                for (int i = 0; i < rows; i++) {
175                    dhtColumns.forward(a, i * columns);
176                }
177                float[] temp = new float[rows];
178                for (int c = 0; c < columns; c++) {
179                    for (int r = 0; r < rows; r++) {
180                        temp[r] = a[r * columns + c];
181                    }
182                    dhtRows.forward(temp);
183                    for (int r = 0; r < rows; r++) {
184                        a[r * columns + c] = temp[r];
185                    }
186                }
187            }
188            yTransform(a);
189        }
190    }
191
192    /**
193     * Computes 2D real, forward DHT leaving the result in <code>a</code>. The
194     * data is stored in 2D array.
195     * 
196     * @param a
197     *            data to transform
198     */
199    public void forward(final float[][] a) {
200        int nthreads = ConcurrencyUtils.getNumberOfThreads();
201        if (isPowerOfTwo) {
202            if (nthreads != oldNthreads) {
203                nt = 4 * nthreads * rows;
204                if (columns == 2 * nthreads) {
205                    nt >>= 1;
206                } else if (columns < 2 * nthreads) {
207                    nt >>= 2;
208                }
209                t = new float[nt];
210                oldNthreads = nthreads;
211            }
212            if ((nthreads > 1) && useThreads) {
213                ddxt2d_subth(-1, a, true);
214                ddxt2d0_subth(-1, a, true);
215            } else {
216                ddxt2d_sub(-1, a, true);
217                for (int i = 0; i < rows; i++) {
218                    dhtColumns.forward(a[i]);
219                }
220            }
221            y_transform(a);
222        } else {
223            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
224                Future<?>[] futures = new Future[nthreads];
225                int p = rows / nthreads;
226                for (int l = 0; l < nthreads; l++) {
227                    final int firstRow = l * p;
228                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
229                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
230                        public void run() {
231                            for (int i = firstRow; i < lastRow; i++) {
232                                dhtColumns.forward(a[i]);
233                            }
234                        }
235                    });
236                }
237                ConcurrencyUtils.waitForCompletion(futures);
238
239                p = columns / nthreads;
240                for (int l = 0; l < nthreads; l++) {
241                    final int firstColumn = l * p;
242                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
243                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
244                        public void run() {
245                            float[] temp = new float[rows];
246                            for (int c = firstColumn; c < lastColumn; c++) {
247                                for (int r = 0; r < rows; r++) {
248                                    temp[r] = a[r][c];
249                                }
250                                dhtRows.forward(temp);
251                                for (int r = 0; r < rows; r++) {
252                                    a[r][c] = temp[r];
253                                }
254                            }
255                        }
256                    });
257                }
258                ConcurrencyUtils.waitForCompletion(futures);
259
260            } else {
261                for (int i = 0; i < rows; i++) {
262                    dhtColumns.forward(a[i]);
263                }
264                float[] temp = new float[rows];
265                for (int c = 0; c < columns; c++) {
266                    for (int r = 0; r < rows; r++) {
267                        temp[r] = a[r][c];
268                    }
269                    dhtRows.forward(temp);
270                    for (int r = 0; r < rows; r++) {
271                        a[r][c] = temp[r];
272                    }
273                }
274            }
275            y_transform(a);
276        }
277    }
278
279    /**
280     * Computes 2D real, inverse DHT leaving the result in <code>a</code>. The
281     * data is stored in 1D array in row-major order.
282     * 
283     * @param a
284     *            data to transform
285     * @param scale
286     *            if true then scaling is performed
287     */
288    public void inverse(final float[] a, final boolean scale) {
289        int nthreads = ConcurrencyUtils.getNumberOfThreads();
290        if (isPowerOfTwo) {
291            if (nthreads != oldNthreads) {
292                nt = 4 * nthreads * rows;
293                if (columns == 2 * nthreads) {
294                    nt >>= 1;
295                } else if (columns < 2 * nthreads) {
296                    nt >>= 2;
297                }
298                t = new float[nt];
299                oldNthreads = nthreads;
300            }
301            if ((nthreads > 1) && useThreads) {
302                ddxt2d_subth(1, a, scale);
303                ddxt2d0_subth(1, a, scale);
304            } else {
305                ddxt2d_sub(1, a, scale);
306                for (int i = 0; i < rows; i++) {
307                    dhtColumns.inverse(a, i * columns, scale);
308                }
309            }
310            yTransform(a);
311        } else {
312            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
313                Future<?>[] futures = new Future[nthreads];
314                int p = rows / nthreads;
315                for (int l = 0; l < nthreads; l++) {
316                    final int firstRow = l * p;
317                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
318                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
319                        public void run() {
320                            for (int i = firstRow; i < lastRow; i++) {
321                                dhtColumns.inverse(a, i * columns, scale);
322                            }
323                        }
324                    });
325                }
326                ConcurrencyUtils.waitForCompletion(futures);
327
328                p = columns / nthreads;
329                for (int l = 0; l < nthreads; l++) {
330                    final int firstColumn = l * p;
331                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
332                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
333                        public void run() {
334                            float[] temp = new float[rows];
335                            for (int c = firstColumn; c < lastColumn; c++) {
336                                for (int r = 0; r < rows; r++) {
337                                    temp[r] = a[r * columns + c];
338                                }
339                                dhtRows.inverse(temp, scale);
340                                for (int r = 0; r < rows; r++) {
341                                    a[r * columns + c] = temp[r];
342                                }
343                            }
344                        }
345                    });
346                }
347                ConcurrencyUtils.waitForCompletion(futures);
348
349            } else {
350                for (int i = 0; i < rows; i++) {
351                    dhtColumns.inverse(a, i * columns, scale);
352                }
353                float[] temp = new float[rows];
354                for (int c = 0; c < columns; c++) {
355                    for (int r = 0; r < rows; r++) {
356                        temp[r] = a[r * columns + c];
357                    }
358                    dhtRows.inverse(temp, scale);
359                    for (int r = 0; r < rows; r++) {
360                        a[r * columns + c] = temp[r];
361                    }
362                }
363            }
364            yTransform(a);
365        }
366    }
367
368    /**
369     * Computes 2D real, inverse DHT leaving the result in <code>a</code>. The
370     * data is stored in 2D array.
371     * 
372     * @param a
373     *            data to transform
374     * @param scale
375     *            if true then scaling is performed
376     */
377    public void inverse(final float[][] a, final boolean scale) {
378        int nthreads = ConcurrencyUtils.getNumberOfThreads();
379        if (isPowerOfTwo) {
380            if (nthreads != oldNthreads) {
381                nt = 4 * nthreads * rows;
382                if (columns == 2 * nthreads) {
383                    nt >>= 1;
384                } else if (columns < 2 * nthreads) {
385                    nt >>= 2;
386                }
387                t = new float[nt];
388                oldNthreads = nthreads;
389            }
390            if ((nthreads > 1) && useThreads) {
391                ddxt2d_subth(1, a, scale);
392                ddxt2d0_subth(1, a, scale);
393            } else {
394                ddxt2d_sub(1, a, scale);
395                for (int i = 0; i < rows; i++) {
396                    dhtColumns.inverse(a[i], scale);
397                }
398            }
399            y_transform(a);
400        } else {
401            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
402                Future<?>[] futures = new Future[nthreads];
403                int p = rows / nthreads;
404                for (int l = 0; l < nthreads; l++) {
405                    final int firstRow = l * p;
406                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
407                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
408                        public void run() {
409                            for (int i = firstRow; i < lastRow; i++) {
410                                dhtColumns.inverse(a[i], scale);
411                            }
412                        }
413                    });
414                }
415                ConcurrencyUtils.waitForCompletion(futures);
416
417                p = columns / nthreads;
418                for (int l = 0; l < nthreads; l++) {
419                    final int firstColumn = l * p;
420                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
421                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
422                        public void run() {
423                            float[] temp = new float[rows];
424                            for (int c = firstColumn; c < lastColumn; c++) {
425                                for (int r = 0; r < rows; r++) {
426                                    temp[r] = a[r][c];
427                                }
428                                dhtRows.inverse(temp, scale);
429                                for (int r = 0; r < rows; r++) {
430                                    a[r][c] = temp[r];
431                                }
432                            }
433                        }
434                    });
435                }
436                ConcurrencyUtils.waitForCompletion(futures);
437
438            } else {
439                for (int i = 0; i < rows; i++) {
440                    dhtColumns.inverse(a[i], scale);
441                }
442                float[] temp = new float[rows];
443                for (int c = 0; c < columns; c++) {
444                    for (int r = 0; r < rows; r++) {
445                        temp[r] = a[r][c];
446                    }
447                    dhtRows.inverse(temp, scale);
448                    for (int r = 0; r < rows; r++) {
449                        a[r][c] = temp[r];
450                    }
451                }
452            }
453            y_transform(a);
454        }
455    }
456
457    private void ddxt2d_subth(final int isgn, final float[] a, final boolean scale) {
458        int nthread = ConcurrencyUtils.getNumberOfThreads();
459        int nt = 4 * rows;
460        if (columns == 2 * nthread) {
461            nt >>= 1;
462        } else if (columns < 2 * nthread) {
463            nthread = columns;
464            nt >>= 2;
465        }
466        final int nthreads = nthread;
467        Future<?>[] futures = new Future[nthreads];
468
469        for (int i = 0; i < nthreads; i++) {
470            final int n0 = i;
471            final int startt = nt * i;
472            futures[i] = ConcurrencyUtils.submit(new Runnable() {
473                public void run() {
474                    int idx1, idx2;
475                    if (columns > 2 * nthreads) {
476                        if (isgn == -1) {
477                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
478                                for (int r = 0; r < rows; r++) {
479                                    idx1 = r * columns + c;
480                                    idx2 = startt + rows + r;
481                                    t[startt + r] = a[idx1];
482                                    t[idx2] = a[idx1 + 1];
483                                    t[idx2 + rows] = a[idx1 + 2];
484                                    t[idx2 + 2 * rows] = a[idx1 + 3];
485                                }
486                                dhtRows.forward(t, startt);
487                                dhtRows.forward(t, startt + rows);
488                                dhtRows.forward(t, startt + 2 * rows);
489                                dhtRows.forward(t, startt + 3 * rows);
490                                for (int r = 0; r < rows; r++) {
491                                    idx1 = r * columns + c;
492                                    idx2 = startt + rows + r;
493                                    a[idx1] = t[startt + r];
494                                    a[idx1 + 1] = t[idx2];
495                                    a[idx1 + 2] = t[idx2 + rows];
496                                    a[idx1 + 3] = t[idx2 + 2 * rows];
497                                }
498                            }
499                        } else {
500                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
501                                for (int r = 0; r < rows; r++) {
502                                    idx1 = r * columns + c;
503                                    idx2 = startt + rows + r;
504                                    t[startt + r] = a[idx1];
505                                    t[idx2] = a[idx1 + 1];
506                                    t[idx2 + rows] = a[idx1 + 2];
507                                    t[idx2 + 2 * rows] = a[idx1 + 3];
508                                }
509                                dhtRows.inverse(t, startt, scale);
510                                dhtRows.inverse(t, startt + rows, scale);
511                                dhtRows.inverse(t, startt + 2 * rows, scale);
512                                dhtRows.inverse(t, startt + 3 * rows, scale);
513                                for (int r = 0; r < rows; r++) {
514                                    idx1 = r * columns + c;
515                                    idx2 = startt + rows + r;
516                                    a[idx1] = t[startt + r];
517                                    a[idx1 + 1] = t[idx2];
518                                    a[idx1 + 2] = t[idx2 + rows];
519                                    a[idx1 + 3] = t[idx2 + 2 * rows];
520                                }
521                            }
522                        }
523                    } else if (columns == 2 * nthreads) {
524                        for (int r = 0; r < rows; r++) {
525                            idx1 = r * columns + 2 * n0;
526                            idx2 = startt + r;
527                            t[idx2] = a[idx1];
528                            t[idx2 + rows] = a[idx1 + 1];
529                        }
530                        if (isgn == -1) {
531                            dhtRows.forward(t, startt);
532                            dhtRows.forward(t, startt + rows);
533                        } else {
534                            dhtRows.inverse(t, startt, scale);
535                            dhtRows.inverse(t, startt + rows, scale);
536                        }
537                        for (int r = 0; r < rows; r++) {
538                            idx1 = r * columns + 2 * n0;
539                            idx2 = startt + r;
540                            a[idx1] = t[idx2];
541                            a[idx1 + 1] = t[idx2 + rows];
542                        }
543                    } else if (columns == nthreads) {
544                        for (int r = 0; r < rows; r++) {
545                            t[startt + r] = a[r * columns + n0];
546                        }
547                        if (isgn == -1) {
548                            dhtRows.forward(t, startt);
549                        } else {
550                            dhtRows.inverse(t, startt, scale);
551                        }
552                        for (int r = 0; r < rows; r++) {
553                            a[r * columns + n0] = t[startt + r];
554                        }
555                    }
556                }
557            });
558        }
559        ConcurrencyUtils.waitForCompletion(futures);
560    }
561
562    private void ddxt2d_subth(final int isgn, final float[][] a, final boolean scale) {
563        int nthread = ConcurrencyUtils.getNumberOfThreads();
564        int nt = 4 * rows;
565        if (columns == 2 * nthread) {
566            nt >>= 1;
567        } else if (columns < 2 * nthread) {
568            nthread = columns;
569            nt >>= 2;
570        }
571        final int nthreads = nthread;
572        Future<?>[] futures = new Future[nthreads];
573
574        for (int i = 0; i < nthreads; i++) {
575            final int n0 = i;
576            final int startt = nt * i;
577            futures[i] = ConcurrencyUtils.submit(new Runnable() {
578                public void run() {
579                    int idx2;
580                    if (columns > 2 * nthreads) {
581                        if (isgn == -1) {
582                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
583                                for (int r = 0; r < rows; r++) {
584                                    idx2 = startt + rows + r;
585                                    t[startt + r] = a[r][c];
586                                    t[idx2] = a[r][c + 1];
587                                    t[idx2 + rows] = a[r][c + 2];
588                                    t[idx2 + 2 * rows] = a[r][c + 3];
589                                }
590                                dhtRows.forward(t, startt);
591                                dhtRows.forward(t, startt + rows);
592                                dhtRows.forward(t, startt + 2 * rows);
593                                dhtRows.forward(t, startt + 3 * rows);
594                                for (int r = 0; r < rows; r++) {
595                                    idx2 = startt + rows + r;
596                                    a[r][c] = t[startt + r];
597                                    a[r][c + 1] = t[idx2];
598                                    a[r][c + 2] = t[idx2 + rows];
599                                    a[r][c + 3] = t[idx2 + 2 * rows];
600                                }
601                            }
602                        } else {
603                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
604                                for (int r = 0; r < rows; r++) {
605                                    idx2 = startt + rows + r;
606                                    t[startt + r] = a[r][c];
607                                    t[idx2] = a[r][c + 1];
608                                    t[idx2 + rows] = a[r][c + 2];
609                                    t[idx2 + 2 * rows] = a[r][c + 3];
610                                }
611                                dhtRows.inverse(t, startt, scale);
612                                dhtRows.inverse(t, startt + rows, scale);
613                                dhtRows.inverse(t, startt + 2 * rows, scale);
614                                dhtRows.inverse(t, startt + 3 * rows, scale);
615                                for (int r = 0; r < rows; r++) {
616                                    idx2 = startt + rows + r;
617                                    a[r][c] = t[startt + r];
618                                    a[r][c + 1] = t[idx2];
619                                    a[r][c + 2] = t[idx2 + rows];
620                                    a[r][c + 3] = t[idx2 + 2 * rows];
621                                }
622                            }
623                        }
624                    } else if (columns == 2 * nthreads) {
625                        for (int r = 0; r < rows; r++) {
626                            idx2 = startt + r;
627                            t[idx2] = a[r][2 * n0];
628                            t[idx2 + rows] = a[r][2 * n0 + 1];
629                        }
630                        if (isgn == -1) {
631                            dhtRows.forward(t, startt);
632                            dhtRows.forward(t, startt + rows);
633                        } else {
634                            dhtRows.inverse(t, startt, scale);
635                            dhtRows.inverse(t, startt + rows, scale);
636                        }
637                        for (int r = 0; r < rows; r++) {
638                            idx2 = startt + r;
639                            a[r][2 * n0] = t[idx2];
640                            a[r][2 * n0 + 1] = t[idx2 + rows];
641                        }
642                    } else if (columns == nthreads) {
643                        for (int r = 0; r < rows; r++) {
644                            t[startt + r] = a[r][n0];
645                        }
646                        if (isgn == -1) {
647                            dhtRows.forward(t, startt);
648                        } else {
649                            dhtRows.inverse(t, startt, scale);
650                        }
651                        for (int r = 0; r < rows; r++) {
652                            a[r][n0] = t[startt + r];
653                        }
654                    }
655                }
656            });
657        }
658        ConcurrencyUtils.waitForCompletion(futures);
659    }
660
661    private void ddxt2d0_subth(final int isgn, final float[] a, final boolean scale) {
662        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
663
664        Future<?>[] futures = new Future[nthreads];
665
666        for (int i = 0; i < nthreads; i++) {
667            final int n0 = i;
668            futures[i] = ConcurrencyUtils.submit(new Runnable() {
669
670                public void run() {
671                    if (isgn == -1) {
672                        for (int r = n0; r < rows; r += nthreads) {
673                            dhtColumns.forward(a, r * columns);
674                        }
675                    } else {
676                        for (int r = n0; r < rows; r += nthreads) {
677                            dhtColumns.inverse(a, r * columns, scale);
678                        }
679                    }
680                }
681            });
682        }
683        ConcurrencyUtils.waitForCompletion(futures);
684    }
685
686    private void ddxt2d0_subth(final int isgn, final float[][] a, final boolean scale) {
687        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
688
689        Future<?>[] futures = new Future[nthreads];
690
691        for (int i = 0; i < nthreads; i++) {
692            final int n0 = i;
693            futures[i] = ConcurrencyUtils.submit(new Runnable() {
694
695                public void run() {
696                    if (isgn == -1) {
697                        for (int r = n0; r < rows; r += nthreads) {
698                            dhtColumns.forward(a[r]);
699                        }
700                    } else {
701                        for (int r = n0; r < rows; r += nthreads) {
702                            dhtColumns.inverse(a[r], scale);
703                        }
704                    }
705                }
706            });
707        }
708        ConcurrencyUtils.waitForCompletion(futures);
709    }
710
711    private void ddxt2d_sub(int isgn, float[] a, boolean scale) {
712        int idx1, idx2;
713
714        if (columns > 2) {
715            if (isgn == -1) {
716                for (int c = 0; c < columns; c += 4) {
717                    for (int r = 0; r < rows; r++) {
718                        idx1 = r * columns + c;
719                        idx2 = rows + r;
720                        t[r] = a[idx1];
721                        t[idx2] = a[idx1 + 1];
722                        t[idx2 + rows] = a[idx1 + 2];
723                        t[idx2 + 2 * rows] = a[idx1 + 3];
724                    }
725                    dhtRows.forward(t, 0);
726                    dhtRows.forward(t, rows);
727                    dhtRows.forward(t, 2 * rows);
728                    dhtRows.forward(t, 3 * rows);
729                    for (int r = 0; r < rows; r++) {
730                        idx1 = r * columns + c;
731                        idx2 = rows + r;
732                        a[idx1] = t[r];
733                        a[idx1 + 1] = t[idx2];
734                        a[idx1 + 2] = t[idx2 + rows];
735                        a[idx1 + 3] = t[idx2 + 2 * rows];
736                    }
737                }
738            } else {
739                for (int c = 0; c < columns; c += 4) {
740                    for (int r = 0; r < rows; r++) {
741                        idx1 = r * columns + c;
742                        idx2 = rows + r;
743                        t[r] = a[idx1];
744                        t[idx2] = a[idx1 + 1];
745                        t[idx2 + rows] = a[idx1 + 2];
746                        t[idx2 + 2 * rows] = a[idx1 + 3];
747                    }
748                    dhtRows.inverse(t, 0, scale);
749                    dhtRows.inverse(t, rows, scale);
750                    dhtRows.inverse(t, 2 * rows, scale);
751                    dhtRows.inverse(t, 3 * rows, scale);
752                    for (int r = 0; r < rows; r++) {
753                        idx1 = r * columns + c;
754                        idx2 = rows + r;
755                        a[idx1] = t[r];
756                        a[idx1 + 1] = t[idx2];
757                        a[idx1 + 2] = t[idx2 + rows];
758                        a[idx1 + 3] = t[idx2 + 2 * rows];
759                    }
760                }
761            }
762        } else if (columns == 2) {
763            for (int r = 0; r < rows; r++) {
764                idx1 = r * columns;
765                t[r] = a[idx1];
766                t[rows + r] = a[idx1 + 1];
767            }
768            if (isgn == -1) {
769                dhtRows.forward(t, 0);
770                dhtRows.forward(t, rows);
771            } else {
772                dhtRows.inverse(t, 0, scale);
773                dhtRows.inverse(t, rows, scale);
774            }
775            for (int r = 0; r < rows; r++) {
776                idx1 = r * columns;
777                a[idx1] = t[r];
778                a[idx1 + 1] = t[rows + r];
779            }
780        }
781    }
782
783    private void ddxt2d_sub(int isgn, float[][] a, boolean scale) {
784        int idx2;
785
786        if (columns > 2) {
787            if (isgn == -1) {
788                for (int c = 0; c < columns; c += 4) {
789                    for (int r = 0; r < rows; r++) {
790                        idx2 = rows + r;
791                        t[r] = a[r][c];
792                        t[idx2] = a[r][c + 1];
793                        t[idx2 + rows] = a[r][c + 2];
794                        t[idx2 + 2 * rows] = a[r][c + 3];
795                    }
796                    dhtRows.forward(t, 0);
797                    dhtRows.forward(t, rows);
798                    dhtRows.forward(t, 2 * rows);
799                    dhtRows.forward(t, 3 * rows);
800                    for (int r = 0; r < rows; r++) {
801                        idx2 = rows + r;
802                        a[r][c] = t[r];
803                        a[r][c + 1] = t[idx2];
804                        a[r][c + 2] = t[idx2 + rows];
805                        a[r][c + 3] = t[idx2 + 2 * rows];
806                    }
807                }
808            } else {
809                for (int c = 0; c < columns; c += 4) {
810                    for (int r = 0; r < rows; r++) {
811                        idx2 = rows + r;
812                        t[r] = a[r][c];
813                        t[idx2] = a[r][c + 1];
814                        t[idx2 + rows] = a[r][c + 2];
815                        t[idx2 + 2 * rows] = a[r][c + 3];
816                    }
817                    dhtRows.inverse(t, 0, scale);
818                    dhtRows.inverse(t, rows, scale);
819                    dhtRows.inverse(t, 2 * rows, scale);
820                    dhtRows.inverse(t, 3 * rows, scale);
821                    for (int r = 0; r < rows; r++) {
822                        idx2 = rows + r;
823                        a[r][c] = t[r];
824                        a[r][c + 1] = t[idx2];
825                        a[r][c + 2] = t[idx2 + rows];
826                        a[r][c + 3] = t[idx2 + 2 * rows];
827                    }
828                }
829            }
830        } else if (columns == 2) {
831            for (int r = 0; r < rows; r++) {
832                t[r] = a[r][0];
833                t[rows + r] = a[r][1];
834            }
835            if (isgn == -1) {
836                dhtRows.forward(t, 0);
837                dhtRows.forward(t, rows);
838            } else {
839                dhtRows.inverse(t, 0, scale);
840                dhtRows.inverse(t, rows, scale);
841            }
842            for (int r = 0; r < rows; r++) {
843                a[r][0] = t[r];
844                a[r][1] = t[rows + r];
845            }
846        }
847    }
848
849    private void yTransform(float[] a) {
850        int mRow, mCol, idx1, idx2;
851        float A, B, C, D, E;
852        for (int r = 0; r <= rows / 2; r++) {
853            mRow = (rows - r) % rows;
854            idx1 = r * columns;
855            idx2 = mRow * columns;
856            for (int c = 0; c <= columns / 2; c++) {
857                mCol = (columns - c) % columns;
858                A = a[idx1 + c];
859                B = a[idx2 + c];
860                C = a[idx1 + mCol];
861                D = a[idx2 + mCol];
862                E = ((A + D) - (B + C)) / 2;
863                a[idx1 + c] = A - E;
864                a[idx2 + c] = B + E;
865                a[idx1 + mCol] = C + E;
866                a[idx2 + mCol] = D - E;
867            }
868        }
869    }
870
871    private void y_transform(float[][] a) {
872        int mRow, mCol;
873        float A, B, C, D, E;
874        for (int r = 0; r <= rows / 2; r++) {
875            mRow = (rows - r) % rows;
876            for (int c = 0; c <= columns / 2; c++) {
877                mCol = (columns - c) % columns;
878                A = a[r][c];
879                B = a[mRow][c];
880                C = a[r][mCol];
881                D = a[mRow][mCol];
882                E = ((A + D) - (B + C)) / 2;
883                a[r][c] = A - E;
884                a[mRow][c] = B + E;
885                a[r][mCol] = C + E;
886                a[mRow][mCol] = D - E;
887            }
888        }
889    }
890
891}