001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.dst;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 2D Discrete Sine Transform (DST) of single precision data. The sizes
043 * of both dimensions can be arbitrary numbers. This is a parallel
044 * implementation optimized for SMP systems.<br>
045 * <br>
046 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura
047 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
048 * 
049 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
050 * 
051 */
052public class FloatDST_2D {
053
054    private int rows;
055
056    private int columns;
057
058    private float[] t;
059
060    private FloatDST_1D dstColumns, dstRows;
061
062    private int oldNthreads;
063
064    private int nt;
065
066    private boolean isPowerOfTwo = false;
067
068    private boolean useThreads = false;
069
070    /**
071     * Creates new instance of FloatDST_2D.
072     * 
073     * @param rows
074     *            number of rows
075     * @param columns
076     *            number of columns
077     */
078    public FloatDST_2D(int rows, int columns) {
079        if (rows <= 1 || columns <= 1) {
080            throw new IllegalArgumentException("rows and columns must be greater than 1");
081        }
082        this.rows = rows;
083        this.columns = columns;
084        if (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D()) {
085            useThreads = true;
086        }
087        if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
088            isPowerOfTwo = true;
089            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
090            nt = 4 * oldNthreads * rows;
091            if (columns == 2 * oldNthreads) {
092                nt >>= 1;
093            } else if (columns < 2 * oldNthreads) {
094                nt >>= 2;
095            }
096            t = new float[nt];
097        }
098        dstColumns = new FloatDST_1D(columns);
099        if (columns == rows) {
100            dstRows = dstColumns;
101        } else {
102            dstRows = new FloatDST_1D(rows);
103        }
104    }
105
106    /**
107     * Computes 2D forward DST (DST-II) leaving the result in <code>a</code>.
108     * The data is stored in 1D array in row-major order.
109     * 
110     * @param a
111     *            data to transform
112     * @param scale
113     *            if true then scaling is performed
114     */
115    public void forward(final float[] a, final boolean scale) {
116        int nthreads = ConcurrencyUtils.getNumberOfThreads();
117        if (isPowerOfTwo) {
118            if (nthreads != oldNthreads) {
119                nt = 4 * nthreads * rows;
120                if (columns == 2 * nthreads) {
121                    nt >>= 1;
122                } else if (columns < 2 * nthreads) {
123                    nt >>= 2;
124                }
125                t = new float[nt];
126                oldNthreads = nthreads;
127            }
128            if ((nthreads > 1) && useThreads) {
129                ddxt2d_subth(-1, a, scale);
130                ddxt2d0_subth(-1, a, scale);
131            } else {
132                ddxt2d_sub(-1, a, scale);
133                for (int i = 0; i < rows; i++) {
134                    dstColumns.forward(a, i * columns, scale);
135                }
136            }
137        } else {
138            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
139                Future<?>[] futures = new Future[nthreads];
140                int p = rows / nthreads;
141                for (int l = 0; l < nthreads; l++) {
142                    final int firstRow = l * p;
143                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
144                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
145                        public void run() {
146                            for (int i = firstRow; i < lastRow; i++) {
147                                dstColumns.forward(a, i * columns, scale);
148                            }
149                        }
150                    });
151                }
152                ConcurrencyUtils.waitForCompletion(futures);
153                p = columns / nthreads;
154                for (int l = 0; l < nthreads; l++) {
155                    final int firstColumn = l * p;
156                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
157                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
158                        public void run() {
159                            float[] temp = new float[rows];
160                            for (int c = firstColumn; c < lastColumn; c++) {
161                                for (int r = 0; r < rows; r++) {
162                                    temp[r] = a[r * columns + c];
163                                }
164                                dstRows.forward(temp, scale);
165                                for (int r = 0; r < rows; r++) {
166                                    a[r * columns + c] = temp[r];
167                                }
168                            }
169                        }
170                    });
171                }
172                ConcurrencyUtils.waitForCompletion(futures);
173            } else {
174                for (int i = 0; i < rows; i++) {
175                    dstColumns.forward(a, i * columns, scale);
176                }
177                float[] temp = new float[rows];
178                for (int c = 0; c < columns; c++) {
179                    for (int r = 0; r < rows; r++) {
180                        temp[r] = a[r * columns + c];
181                    }
182                    dstRows.forward(temp, scale);
183                    for (int r = 0; r < rows; r++) {
184                        a[r * columns + c] = temp[r];
185                    }
186                }
187            }
188        }
189    }
190
191    /**
192     * Computes 2D forward DST (DST-II) leaving the result in <code>a</code>.
193     * The data is stored in 2D array.
194     * 
195     * @param a
196     *            data to transform
197     * @param scale
198     *            if true then scaling is performed
199     */
200    public void forward(final float[][] a, final boolean scale) {
201        int nthreads = ConcurrencyUtils.getNumberOfThreads();
202        if (isPowerOfTwo) {
203            if (nthreads != oldNthreads) {
204                nt = 4 * nthreads * rows;
205                if (columns == 2 * nthreads) {
206                    nt >>= 1;
207                } else if (columns < 2 * nthreads) {
208                    nt >>= 2;
209                }
210                t = new float[nt];
211                oldNthreads = nthreads;
212            }
213            if ((nthreads > 1) && useThreads) {
214                ddxt2d_subth(-1, a, scale);
215                ddxt2d0_subth(-1, a, scale);
216            } else {
217                ddxt2d_sub(-1, a, scale);
218                for (int i = 0; i < rows; i++) {
219                    dstColumns.forward(a[i], scale);
220                }
221            }
222        } else {
223            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
224                Future<?>[] futures = new Future[nthreads];
225                int p = rows / nthreads;
226                for (int l = 0; l < nthreads; l++) {
227                    final int firstRow = l * p;
228                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
229                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
230                        public void run() {
231                            for (int i = firstRow; i < lastRow; i++) {
232                                dstColumns.forward(a[i], scale);
233                            }
234                        }
235                    });
236                }
237                ConcurrencyUtils.waitForCompletion(futures);
238                p = columns / nthreads;
239                for (int l = 0; l < nthreads; l++) {
240                    final int firstColumn = l * p;
241                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
242                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
243                        public void run() {
244                            float[] temp = new float[rows];
245                            for (int c = firstColumn; c < lastColumn; c++) {
246                                for (int r = 0; r < rows; r++) {
247                                    temp[r] = a[r][c];
248                                }
249                                dstRows.forward(temp, scale);
250                                for (int r = 0; r < rows; r++) {
251                                    a[r][c] = temp[r];
252                                }
253                            }
254                        }
255                    });
256                }
257                ConcurrencyUtils.waitForCompletion(futures);
258            } else {
259                for (int i = 0; i < rows; i++) {
260                    dstColumns.forward(a[i], scale);
261                }
262                float[] temp = new float[rows];
263                for (int c = 0; c < columns; c++) {
264                    for (int r = 0; r < rows; r++) {
265                        temp[r] = a[r][c];
266                    }
267                    dstRows.forward(temp, scale);
268                    for (int r = 0; r < rows; r++) {
269                        a[r][c] = temp[r];
270                    }
271                }
272            }
273        }
274    }
275
276    /**
277     * Computes 2D inverse DST (DST-III) leaving the result in <code>a</code>.
278     * The data is stored in 1D array in row-major order.
279     * 
280     * @param a
281     *            data to transform
282     * @param scale
283     *            if true then scaling is performed
284     */
285    public void inverse(final float[] a, final boolean scale) {
286        int nthreads = ConcurrencyUtils.getNumberOfThreads();
287        if (isPowerOfTwo) {
288            if (nthreads != oldNthreads) {
289                nt = 4 * nthreads * rows;
290                if (columns == 2 * nthreads) {
291                    nt >>= 1;
292                } else if (columns < 2 * nthreads) {
293                    nt >>= 2;
294                }
295                t = new float[nt];
296                oldNthreads = nthreads;
297            }
298            if ((nthreads > 1) && useThreads) {
299                ddxt2d_subth(1, a, scale);
300                ddxt2d0_subth(1, a, scale);
301            } else {
302                ddxt2d_sub(1, a, scale);
303                for (int i = 0; i < rows; i++) {
304                    dstColumns.inverse(a, i * columns, scale);
305                }
306            }
307        } else {
308            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
309                Future<?>[] futures = new Future[nthreads];
310                int p = rows / nthreads;
311                for (int l = 0; l < nthreads; l++) {
312                    final int firstRow = l * p;
313                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
314                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
315                        public void run() {
316                            for (int i = firstRow; i < lastRow; i++) {
317                                dstColumns.inverse(a, i * columns, scale);
318                            }
319                        }
320                    });
321                }
322                ConcurrencyUtils.waitForCompletion(futures);
323                p = columns / nthreads;
324                for (int l = 0; l < nthreads; l++) {
325                    final int firstColumn = l * p;
326                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
327                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
328                        public void run() {
329                            float[] temp = new float[rows];
330                            for (int c = firstColumn; c < lastColumn; c++) {
331                                for (int r = 0; r < rows; r++) {
332                                    temp[r] = a[r * columns + c];
333                                }
334                                dstRows.inverse(temp, scale);
335                                for (int r = 0; r < rows; r++) {
336                                    a[r * columns + c] = temp[r];
337                                }
338                            }
339                        }
340                    });
341                }
342                ConcurrencyUtils.waitForCompletion(futures);
343            } else {
344                for (int i = 0; i < rows; i++) {
345                    dstColumns.inverse(a, i * columns, scale);
346                }
347                float[] temp = new float[rows];
348                for (int c = 0; c < columns; c++) {
349                    for (int r = 0; r < rows; r++) {
350                        temp[r] = a[r * columns + c];
351                    }
352                    dstRows.inverse(temp, scale);
353                    for (int r = 0; r < rows; r++) {
354                        a[r * columns + c] = temp[r];
355                    }
356                }
357            }
358        }
359    }
360
361    /**
362     * Computes 2D inverse DST (DST-III) leaving the result in <code>a</code>.
363     * The data is stored in 2D array.
364     * 
365     * @param a
366     *            data to transform
367     * @param scale
368     *            if true then scaling is performed
369     */
370    public void inverse(final float[][] a, final boolean scale) {
371        int nthreads = ConcurrencyUtils.getNumberOfThreads();
372        if (isPowerOfTwo) {
373            if (nthreads != oldNthreads) {
374                nt = 4 * nthreads * rows;
375                if (columns == 2 * nthreads) {
376                    nt >>= 1;
377                } else if (columns < 2 * nthreads) {
378                    nt >>= 2;
379                }
380                t = new float[nt];
381                oldNthreads = nthreads;
382            }
383            if ((nthreads > 1) && useThreads) {
384                ddxt2d_subth(1, a, scale);
385                ddxt2d0_subth(1, a, scale);
386            } else {
387                ddxt2d_sub(1, a, scale);
388                for (int i = 0; i < rows; i++) {
389                    dstColumns.inverse(a[i], scale);
390                }
391            }
392        } else {
393            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
394                Future<?>[] futures = new Future[nthreads];
395                int p = rows / nthreads;
396                for (int l = 0; l < nthreads; l++) {
397                    final int firstRow = l * p;
398                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
399                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
400                        public void run() {
401                            for (int i = firstRow; i < lastRow; i++) {
402                                dstColumns.inverse(a[i], scale);
403                            }
404                        }
405                    });
406                }
407                ConcurrencyUtils.waitForCompletion(futures);
408                p = columns / nthreads;
409                for (int l = 0; l < nthreads; l++) {
410                    final int firstColumn = l * p;
411                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
412                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
413                        public void run() {
414                            float[] temp = new float[rows];
415                            for (int c = firstColumn; c < lastColumn; c++) {
416                                for (int r = 0; r < rows; r++) {
417                                    temp[r] = a[r][c];
418                                }
419                                dstRows.inverse(temp, scale);
420                                for (int r = 0; r < rows; r++) {
421                                    a[r][c] = temp[r];
422                                }
423                            }
424                        }
425                    });
426                }
427                ConcurrencyUtils.waitForCompletion(futures);
428            } else {
429                for (int i = 0; i < rows; i++) {
430                    dstColumns.inverse(a[i], scale);
431                }
432                float[] temp = new float[rows];
433                for (int c = 0; c < columns; c++) {
434                    for (int r = 0; r < rows; r++) {
435                        temp[r] = a[r][c];
436                    }
437                    dstRows.inverse(temp, scale);
438                    for (int r = 0; r < rows; r++) {
439                        a[r][c] = temp[r];
440                    }
441                }
442            }
443        }
444    }
445
446    private void ddxt2d_subth(final int isgn, final float[] a, final boolean scale) {
447        int nthread = ConcurrencyUtils.getNumberOfThreads();
448        int nt = 4 * rows;
449        if (columns == 2 * nthread) {
450            nt >>= 1;
451        } else if (columns < 2 * nthread) {
452            nthread = columns;
453            nt >>= 2;
454        }
455        final int nthreads = nthread;
456        Future<?>[] futures = new Future[nthreads];
457
458        for (int i = 0; i < nthreads; i++) {
459            final int n0 = i;
460            final int startt = nt * i;
461            futures[i] = ConcurrencyUtils.submit(new Runnable() {
462                public void run() {
463                    int idx1, idx2;
464                    if (columns > 2 * nthreads) {
465                        if (isgn == -1) {
466                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
467                                for (int r = 0; r < rows; r++) {
468                                    idx1 = r * columns + c;
469                                    idx2 = startt + rows + r;
470                                    t[startt + r] = a[idx1];
471                                    t[idx2] = a[idx1 + 1];
472                                    t[idx2 + rows] = a[idx1 + 2];
473                                    t[idx2 + 2 * rows] = a[idx1 + 3];
474                                }
475                                dstRows.forward(t, startt, scale);
476                                dstRows.forward(t, startt + rows, scale);
477                                dstRows.forward(t, startt + 2 * rows, scale);
478                                dstRows.forward(t, startt + 3 * rows, scale);
479                                for (int r = 0; r < rows; r++) {
480                                    idx1 = r * columns + c;
481                                    idx2 = startt + rows + r;
482                                    a[idx1] = t[startt + r];
483                                    a[idx1 + 1] = t[idx2];
484                                    a[idx1 + 2] = t[idx2 + rows];
485                                    a[idx1 + 3] = t[idx2 + 2 * rows];
486                                }
487                            }
488                        } else {
489                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
490                                for (int r = 0; r < rows; r++) {
491                                    idx1 = r * columns + c;
492                                    idx2 = startt + rows + r;
493                                    t[startt + r] = a[idx1];
494                                    t[idx2] = a[idx1 + 1];
495                                    t[idx2 + rows] = a[idx1 + 2];
496                                    t[idx2 + 2 * rows] = a[idx1 + 3];
497                                }
498                                dstRows.inverse(t, startt, scale);
499                                dstRows.inverse(t, startt + rows, scale);
500                                dstRows.inverse(t, startt + 2 * rows, scale);
501                                dstRows.inverse(t, startt + 3 * rows, scale);
502                                for (int r = 0; r < rows; r++) {
503                                    idx1 = r * columns + c;
504                                    idx2 = startt + rows + r;
505                                    a[idx1] = t[startt + r];
506                                    a[idx1 + 1] = t[idx2];
507                                    a[idx1 + 2] = t[idx2 + rows];
508                                    a[idx1 + 3] = t[idx2 + 2 * rows];
509                                }
510                            }
511                        }
512                    } else if (columns == 2 * nthreads) {
513                        for (int r = 0; r < rows; r++) {
514                            idx1 = r * columns + 2 * n0;
515                            idx2 = startt + r;
516                            t[idx2] = a[idx1];
517                            t[idx2 + rows] = a[idx1 + 1];
518                        }
519                        if (isgn == -1) {
520                            dstRows.forward(t, startt, scale);
521                            dstRows.forward(t, startt + rows, scale);
522                        } else {
523                            dstRows.inverse(t, startt, scale);
524                            dstRows.inverse(t, startt + rows, scale);
525                        }
526                        for (int r = 0; r < rows; r++) {
527                            idx1 = r * columns + 2 * n0;
528                            idx2 = startt + r;
529                            a[idx1] = t[idx2];
530                            a[idx1 + 1] = t[idx2 + rows];
531                        }
532                    } else if (columns == nthreads) {
533                        for (int r = 0; r < rows; r++) {
534                            t[startt + r] = a[r * columns + n0];
535                        }
536                        if (isgn == -1) {
537                            dstRows.forward(t, startt, scale);
538                        } else {
539                            dstRows.inverse(t, startt, scale);
540                        }
541                        for (int r = 0; r < rows; r++) {
542                            a[r * columns + n0] = t[startt + r];
543                        }
544                    }
545                }
546            });
547        }
548        ConcurrencyUtils.waitForCompletion(futures);
549    }
550
551    private void ddxt2d_subth(final int isgn, final float[][] a, final boolean scale) {
552        int nthread = ConcurrencyUtils.getNumberOfThreads();
553        int nt = 4 * rows;
554        if (columns == 2 * nthread) {
555            nt >>= 1;
556        } else if (columns < 2 * nthread) {
557            nthread = columns;
558            nt >>= 2;
559        }
560        final int nthreads = nthread;
561        Future<?>[] futures = new Future[nthreads];
562
563        for (int i = 0; i < nthreads; i++) {
564            final int n0 = i;
565            final int startt = nt * i;
566            futures[i] = ConcurrencyUtils.submit(new Runnable() {
567                public void run() {
568                    int idx2;
569                    if (columns > 2 * nthreads) {
570                        if (isgn == -1) {
571                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
572                                for (int r = 0; r < rows; r++) {
573                                    idx2 = startt + rows + r;
574                                    t[startt + r] = a[r][c];
575                                    t[idx2] = a[r][c + 1];
576                                    t[idx2 + rows] = a[r][c + 2];
577                                    t[idx2 + 2 * rows] = a[r][c + 3];
578                                }
579                                dstRows.forward(t, startt, scale);
580                                dstRows.forward(t, startt + rows, scale);
581                                dstRows.forward(t, startt + 2 * rows, scale);
582                                dstRows.forward(t, startt + 3 * rows, scale);
583                                for (int r = 0; r < rows; r++) {
584                                    idx2 = startt + rows + r;
585                                    a[r][c] = t[startt + r];
586                                    a[r][c + 1] = t[idx2];
587                                    a[r][c + 2] = t[idx2 + rows];
588                                    a[r][c + 3] = t[idx2 + 2 * rows];
589                                }
590                            }
591                        } else {
592                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
593                                for (int r = 0; r < rows; r++) {
594                                    idx2 = startt + rows + r;
595                                    t[startt + r] = a[r][c];
596                                    t[idx2] = a[r][c + 1];
597                                    t[idx2 + rows] = a[r][c + 2];
598                                    t[idx2 + 2 * rows] = a[r][c + 3];
599                                }
600                                dstRows.inverse(t, startt, scale);
601                                dstRows.inverse(t, startt + rows, scale);
602                                dstRows.inverse(t, startt + 2 * rows, scale);
603                                dstRows.inverse(t, startt + 3 * rows, scale);
604                                for (int r = 0; r < rows; r++) {
605                                    idx2 = startt + rows + r;
606                                    a[r][c] = t[startt + r];
607                                    a[r][c + 1] = t[idx2];
608                                    a[r][c + 2] = t[idx2 + rows];
609                                    a[r][c + 3] = t[idx2 + 2 * rows];
610                                }
611                            }
612                        }
613                    } else if (columns == 2 * nthreads) {
614                        for (int r = 0; r < rows; r++) {
615                            idx2 = startt + r;
616                            t[idx2] = a[r][2 * n0];
617                            t[idx2 + rows] = a[r][2 * n0 + 1];
618                        }
619                        if (isgn == -1) {
620                            dstRows.forward(t, startt, scale);
621                            dstRows.forward(t, startt + rows, scale);
622                        } else {
623                            dstRows.inverse(t, startt, scale);
624                            dstRows.inverse(t, startt + rows, scale);
625                        }
626                        for (int r = 0; r < rows; r++) {
627                            idx2 = startt + r;
628                            a[r][2 * n0] = t[idx2];
629                            a[r][2 * n0 + 1] = t[idx2 + rows];
630                        }
631                    } else if (columns == nthreads) {
632                        for (int r = 0; r < rows; r++) {
633                            t[startt + r] = a[r][n0];
634                        }
635                        if (isgn == -1) {
636                            dstRows.forward(t, startt, scale);
637                        } else {
638                            dstRows.inverse(t, startt, scale);
639                        }
640                        for (int r = 0; r < rows; r++) {
641                            a[r][n0] = t[startt + r];
642                        }
643                    }
644                }
645            });
646        }
647        ConcurrencyUtils.waitForCompletion(futures);
648    }
649
650    private void ddxt2d0_subth(final int isgn, final float[] a, final boolean scale) {
651        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
652
653        Future<?>[] futures = new Future[nthreads];
654
655        for (int i = 0; i < nthreads; i++) {
656            final int n0 = i;
657            futures[i] = ConcurrencyUtils.submit(new Runnable() {
658
659                public void run() {
660                    if (isgn == -1) {
661                        for (int r = n0; r < rows; r += nthreads) {
662                            dstColumns.forward(a, r * columns, scale);
663                        }
664                    } else {
665                        for (int r = n0; r < rows; r += nthreads) {
666                            dstColumns.inverse(a, r * columns, scale);
667                        }
668                    }
669                }
670            });
671        }
672        ConcurrencyUtils.waitForCompletion(futures);
673    }
674
675    private void ddxt2d0_subth(final int isgn, final float[][] a, final boolean scale) {
676        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
677
678        Future<?>[] futures = new Future[nthreads];
679
680        for (int i = 0; i < nthreads; i++) {
681            final int n0 = i;
682            futures[i] = ConcurrencyUtils.submit(new Runnable() {
683
684                public void run() {
685                    if (isgn == -1) {
686                        for (int r = n0; r < rows; r += nthreads) {
687                            dstColumns.forward(a[r], scale);
688                        }
689                    } else {
690                        for (int r = n0; r < rows; r += nthreads) {
691                            dstColumns.inverse(a[r], scale);
692                        }
693                    }
694                }
695            });
696        }
697        ConcurrencyUtils.waitForCompletion(futures);
698    }
699
700    private void ddxt2d_sub(int isgn, float[] a, boolean scale) {
701        int idx1, idx2;
702
703        if (columns > 2) {
704            if (isgn == -1) {
705                for (int c = 0; c < columns; c += 4) {
706                    for (int r = 0; r < rows; r++) {
707                        idx1 = r * columns + c;
708                        idx2 = rows + r;
709                        t[r] = a[idx1];
710                        t[idx2] = a[idx1 + 1];
711                        t[idx2 + rows] = a[idx1 + 2];
712                        t[idx2 + 2 * rows] = a[idx1 + 3];
713                    }
714                    dstRows.forward(t, 0, scale);
715                    dstRows.forward(t, rows, scale);
716                    dstRows.forward(t, 2 * rows, scale);
717                    dstRows.forward(t, 3 * rows, scale);
718                    for (int r = 0; r < rows; r++) {
719                        idx1 = r * columns + c;
720                        idx2 = rows + r;
721                        a[idx1] = t[r];
722                        a[idx1 + 1] = t[idx2];
723                        a[idx1 + 2] = t[idx2 + rows];
724                        a[idx1 + 3] = t[idx2 + 2 * rows];
725                    }
726                }
727            } else {
728                for (int c = 0; c < columns; c += 4) {
729                    for (int r = 0; r < rows; r++) {
730                        idx1 = r * columns + c;
731                        idx2 = rows + r;
732                        t[r] = a[idx1];
733                        t[idx2] = a[idx1 + 1];
734                        t[idx2 + rows] = a[idx1 + 2];
735                        t[idx2 + 2 * rows] = a[idx1 + 3];
736                    }
737                    dstRows.inverse(t, 0, scale);
738                    dstRows.inverse(t, rows, scale);
739                    dstRows.inverse(t, 2 * rows, scale);
740                    dstRows.inverse(t, 3 * rows, scale);
741                    for (int r = 0; r < rows; r++) {
742                        idx1 = r * columns + c;
743                        idx2 = rows + r;
744                        a[idx1] = t[r];
745                        a[idx1 + 1] = t[idx2];
746                        a[idx1 + 2] = t[idx2 + rows];
747                        a[idx1 + 3] = t[idx2 + 2 * rows];
748                    }
749                }
750            }
751        } else if (columns == 2) {
752            for (int r = 0; r < rows; r++) {
753                idx1 = r * columns;
754                t[r] = a[idx1];
755                t[rows + r] = a[idx1 + 1];
756            }
757            if (isgn == -1) {
758                dstRows.forward(t, 0, scale);
759                dstRows.forward(t, rows, scale);
760            } else {
761                dstRows.inverse(t, 0, scale);
762                dstRows.inverse(t, rows, scale);
763            }
764            for (int r = 0; r < rows; r++) {
765                idx1 = r * columns;
766                a[idx1] = t[r];
767                a[idx1 + 1] = t[rows + r];
768            }
769        }
770    }
771
772    private void ddxt2d_sub(int isgn, float[][] a, boolean scale) {
773        int idx2;
774
775        if (columns > 2) {
776            if (isgn == -1) {
777                for (int c = 0; c < columns; c += 4) {
778                    for (int r = 0; r < rows; r++) {
779                        idx2 = rows + r;
780                        t[r] = a[r][c];
781                        t[idx2] = a[r][c + 1];
782                        t[idx2 + rows] = a[r][c + 2];
783                        t[idx2 + 2 * rows] = a[r][c + 3];
784                    }
785                    dstRows.forward(t, 0, scale);
786                    dstRows.forward(t, rows, scale);
787                    dstRows.forward(t, 2 * rows, scale);
788                    dstRows.forward(t, 3 * rows, scale);
789                    for (int r = 0; r < rows; r++) {
790                        idx2 = rows + r;
791                        a[r][c] = t[r];
792                        a[r][c + 1] = t[idx2];
793                        a[r][c + 2] = t[idx2 + rows];
794                        a[r][c + 3] = t[idx2 + 2 * rows];
795                    }
796                }
797            } else {
798                for (int c = 0; c < columns; c += 4) {
799                    for (int r = 0; r < rows; r++) {
800                        idx2 = rows + r;
801                        t[r] = a[r][c];
802                        t[idx2] = a[r][c + 1];
803                        t[idx2 + rows] = a[r][c + 2];
804                        t[idx2 + 2 * rows] = a[r][c + 3];
805                    }
806                    dstRows.inverse(t, 0, scale);
807                    dstRows.inverse(t, rows, scale);
808                    dstRows.inverse(t, 2 * rows, scale);
809                    dstRows.inverse(t, 3 * rows, scale);
810                    for (int r = 0; r < rows; r++) {
811                        idx2 = rows + r;
812                        a[r][c] = t[r];
813                        a[r][c + 1] = t[idx2];
814                        a[r][c + 2] = t[idx2 + rows];
815                        a[r][c + 3] = t[idx2 + 2 * rows];
816                    }
817                }
818            }
819        } else if (columns == 2) {
820            for (int r = 0; r < rows; r++) {
821                t[r] = a[r][0];
822                t[rows + r] = a[r][1];
823            }
824            if (isgn == -1) {
825                dstRows.forward(t, 0, scale);
826                dstRows.forward(t, rows, scale);
827            } else {
828                dstRows.inverse(t, 0, scale);
829                dstRows.inverse(t, rows, scale);
830            }
831            for (int r = 0; r < rows; r++) {
832                a[r][0] = t[r];
833                a[r][1] = t[rows + r];
834            }
835        }
836    }
837}