001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.dst;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 3D Discrete Sine Transform (DST) of single precision data. The sizes
043 * of all three dimensions can be arbitrary numbers. This is a parallel
044 * implementation optimized for SMP systems.<br>
045 * <br>
046 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura
047 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
048 * 
049 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
050 * 
051 */
052public class FloatDST_3D {
053
054    private int slices;
055
056    private int rows;
057
058    private int columns;
059
060    private int sliceStride;
061
062    private int rowStride;
063
064    private float[] t;
065
066    private FloatDST_1D dstSlices, dstRows, dstColumns;
067
068    private int oldNthreads;
069
070    private int nt;
071
072    private boolean isPowerOfTwo = false;
073
074    private boolean useThreads = false;
075
076    /**
077     * Creates new instance of FloatDST_3D.
078     * 
079     * @param slices
080     *            number of slices
081     * @param rows
082     *            number of rows
083     * @param columns
084     *            number of columns
085     */
086    public FloatDST_3D(int slices, int rows, int columns) {
087        if (slices <= 1 || rows <= 1 || columns <= 1) {
088            throw new IllegalArgumentException("slices, rows and columns must be greater than 1");
089        }
090        this.slices = slices;
091        this.rows = rows;
092        this.columns = columns;
093        this.sliceStride = rows * columns;
094        this.rowStride = columns;
095        if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) {
096            this.useThreads = true;
097        }
098        if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
099            isPowerOfTwo = true;
100            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
101            nt = slices;
102            if (nt < rows) {
103                nt = rows;
104            }
105            nt *= 4;
106            if (oldNthreads > 1) {
107                nt *= oldNthreads;
108            }
109            if (columns == 2) {
110                nt >>= 1;
111            }
112            t = new float[nt];
113        }
114        dstSlices = new FloatDST_1D(slices);
115        if (slices == rows) {
116            dstRows = dstSlices;
117        } else {
118            dstRows = new FloatDST_1D(rows);
119        }
120        if (slices == columns) {
121            dstColumns = dstSlices;
122        } else if (rows == columns) {
123            dstColumns = dstRows;
124        } else {
125            dstColumns = new FloatDST_1D(columns);
126        }
127    }
128
129    /**
130     * Computes the 3D forward DST (DST-II) leaving the result in <code>a</code>
131     * . The data is stored in 1D array addressed in slice-major, then
132     * row-major, then column-major, in order of significance, i.e. the element
133     * (i,j,k) of 3D array x[slices][rows][columns] is stored in a[i*sliceStride
134     * + j*rowStride + k], where sliceStride = rows * columns and rowStride =
135     * columns.
136     * 
137     * @param a
138     *            data to transform
139     * @param scale
140     *            if true then scaling is performed
141     */
142    public void forward(final float[] a, final boolean scale) {
143        int nthreads = ConcurrencyUtils.getNumberOfThreads();
144        if (isPowerOfTwo) {
145            if (nthreads != oldNthreads) {
146                nt = slices;
147                if (nt < rows) {
148                    nt = rows;
149                }
150                nt *= 4;
151                if (nthreads > 1) {
152                    nt *= nthreads;
153                }
154                if (columns == 2) {
155                    nt >>= 1;
156                }
157                t = new float[nt];
158                oldNthreads = nthreads;
159            }
160            if ((nthreads > 1) && useThreads) {
161                ddxt3da_subth(-1, a, scale);
162                ddxt3db_subth(-1, a, scale);
163            } else {
164                ddxt3da_sub(-1, a, scale);
165                ddxt3db_sub(-1, a, scale);
166            }
167        } else {
168            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
169                Future<?>[] futures = new Future[nthreads];
170                int p = slices / nthreads;
171                for (int l = 0; l < nthreads; l++) {
172                    final int firstSlice = l * p;
173                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
174                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
175                        public void run() {
176                            for (int s = firstSlice; s < lastSlice; s++) {
177                                int idx1 = s * sliceStride;
178                                for (int r = 0; r < rows; r++) {
179                                    dstColumns.forward(a, idx1 + r * rowStride, scale);
180                                }
181                            }
182                        }
183                    });
184                }
185                ConcurrencyUtils.waitForCompletion(futures);
186
187                for (int l = 0; l < nthreads; l++) {
188                    final int firstSlice = l * p;
189                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
190                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
191                        public void run() {
192                            float[] temp = new float[rows];
193                            for (int s = firstSlice; s < lastSlice; s++) {
194                                int idx1 = s * sliceStride;
195                                for (int c = 0; c < columns; c++) {
196                                    for (int r = 0; r < rows; r++) {
197                                        int idx3 = idx1 + r * rowStride + c;
198                                        temp[r] = a[idx3];
199                                    }
200                                    dstRows.forward(temp, scale);
201                                    for (int r = 0; r < rows; r++) {
202                                        int idx3 = idx1 + r * rowStride + c;
203                                        a[idx3] = temp[r];
204                                    }
205                                }
206                            }
207                        }
208                    });
209                }
210                ConcurrencyUtils.waitForCompletion(futures);
211
212                p = rows / nthreads;
213                for (int l = 0; l < nthreads; l++) {
214                    final int firstRow = l * p;
215                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
216                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
217                        public void run() {
218                            float[] temp = new float[slices];
219                            for (int r = firstRow; r < lastRow; r++) {
220                                int idx1 = r * rowStride;
221                                for (int c = 0; c < columns; c++) {
222                                    for (int s = 0; s < slices; s++) {
223                                        int idx3 = s * sliceStride + idx1 + c;
224                                        temp[s] = a[idx3];
225                                    }
226                                    dstSlices.forward(temp, scale);
227                                    for (int s = 0; s < slices; s++) {
228                                        int idx3 = s * sliceStride + idx1 + c;
229                                        a[idx3] = temp[s];
230                                    }
231                                }
232                            }
233                        }
234                    });
235                }
236                ConcurrencyUtils.waitForCompletion(futures);
237
238            } else {
239                for (int s = 0; s < slices; s++) {
240                    int idx1 = s * sliceStride;
241                    for (int r = 0; r < rows; r++) {
242                        dstColumns.forward(a, idx1 + r * rowStride, scale);
243                    }
244                }
245                float[] temp = new float[rows];
246                for (int s = 0; s < slices; s++) {
247                    int idx1 = s * sliceStride;
248                    for (int c = 0; c < columns; c++) {
249                        for (int r = 0; r < rows; r++) {
250                            int idx3 = idx1 + r * rowStride + c;
251                            temp[r] = a[idx3];
252                        }
253                        dstRows.forward(temp, scale);
254                        for (int r = 0; r < rows; r++) {
255                            int idx3 = idx1 + r * rowStride + c;
256                            a[idx3] = temp[r];
257                        }
258                    }
259                }
260                temp = new float[slices];
261                for (int r = 0; r < rows; r++) {
262                    int idx1 = r * rowStride;
263                    for (int c = 0; c < columns; c++) {
264                        for (int s = 0; s < slices; s++) {
265                            int idx3 = s * sliceStride + idx1 + c;
266                            temp[s] = a[idx3];
267                        }
268                        dstSlices.forward(temp, scale);
269                        for (int s = 0; s < slices; s++) {
270                            int idx3 = s * sliceStride + idx1 + c;
271                            a[idx3] = temp[s];
272                        }
273                    }
274                }
275            }
276        }
277    }
278
279    /**
280     * Computes the 3D forward DST (DST-II) leaving the result in <code>a</code>
281     * . The data is stored in 3D array.
282     * 
283     * @param a
284     *            data to transform
285     * @param scale
286     *            if true then scaling is performed
287     */
288    public void forward(final float[][][] a, final boolean scale) {
289        int nthreads = ConcurrencyUtils.getNumberOfThreads();
290        if (isPowerOfTwo) {
291            if (nthreads != oldNthreads) {
292                nt = slices;
293                if (nt < rows) {
294                    nt = rows;
295                }
296                nt *= 4;
297                if (nthreads > 1) {
298                    nt *= nthreads;
299                }
300                if (columns == 2) {
301                    nt >>= 1;
302                }
303                t = new float[nt];
304                oldNthreads = nthreads;
305            }
306            if ((nthreads > 1) && useThreads) {
307                ddxt3da_subth(-1, a, scale);
308                ddxt3db_subth(-1, a, scale);
309            } else {
310                ddxt3da_sub(-1, a, scale);
311                ddxt3db_sub(-1, a, scale);
312            }
313        } else {
314            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
315                Future<?>[] futures = new Future[nthreads];
316                int p = slices / nthreads;
317                for (int l = 0; l < nthreads; l++) {
318                    final int firstSlice = l * p;
319                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
320                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
321                        public void run() {
322                            for (int s = firstSlice; s < lastSlice; s++) {
323                                for (int r = 0; r < rows; r++) {
324                                    dstColumns.forward(a[s][r], scale);
325                                }
326                            }
327                        }
328                    });
329                }
330                ConcurrencyUtils.waitForCompletion(futures);
331
332                for (int l = 0; l < nthreads; l++) {
333                    final int firstSlice = l * p;
334                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
335                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
336                        public void run() {
337                            float[] temp = new float[rows];
338                            for (int s = firstSlice; s < lastSlice; s++) {
339                                for (int c = 0; c < columns; c++) {
340                                    for (int r = 0; r < rows; r++) {
341                                        temp[r] = a[s][r][c];
342                                    }
343                                    dstRows.forward(temp, scale);
344                                    for (int r = 0; r < rows; r++) {
345                                        a[s][r][c] = temp[r];
346                                    }
347                                }
348                            }
349                        }
350                    });
351                }
352                ConcurrencyUtils.waitForCompletion(futures);
353
354                p = rows / nthreads;
355                for (int l = 0; l < nthreads; l++) {
356                    final int firstRow = l * p;
357                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
358                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
359                        public void run() {
360                            float[] temp = new float[slices];
361                            for (int r = firstRow; r < lastRow; r++) {
362                                for (int c = 0; c < columns; c++) {
363                                    for (int s = 0; s < slices; s++) {
364                                        temp[s] = a[s][r][c];
365                                    }
366                                    dstSlices.forward(temp, scale);
367                                    for (int s = 0; s < slices; s++) {
368                                        a[s][r][c] = temp[s];
369                                    }
370                                }
371                            }
372                        }
373                    });
374                }
375                ConcurrencyUtils.waitForCompletion(futures);
376
377            } else {
378                for (int s = 0; s < slices; s++) {
379                    for (int r = 0; r < rows; r++) {
380                        dstColumns.forward(a[s][r], scale);
381                    }
382                }
383                float[] temp = new float[rows];
384                for (int s = 0; s < slices; s++) {
385                    for (int c = 0; c < columns; c++) {
386                        for (int r = 0; r < rows; r++) {
387                            temp[r] = a[s][r][c];
388                        }
389                        dstRows.forward(temp, scale);
390                        for (int r = 0; r < rows; r++) {
391                            a[s][r][c] = temp[r];
392                        }
393                    }
394                }
395                temp = new float[slices];
396                for (int r = 0; r < rows; r++) {
397                    for (int c = 0; c < columns; c++) {
398                        for (int s = 0; s < slices; s++) {
399                            temp[s] = a[s][r][c];
400                        }
401                        dstSlices.forward(temp, scale);
402                        for (int s = 0; s < slices; s++) {
403                            a[s][r][c] = temp[s];
404                        }
405                    }
406                }
407            }
408        }
409    }
410
411    /**
412     * Computes the 3D inverse DST (DST-III) leaving the result in
413     * <code>a</code>. The data is stored in 1D array addressed in slice-major,
414     * then row-major, then column-major, in order of significance, i.e. the
415     * element (i,j,k) of 3D array x[slices][rows][columns] is stored in
416     * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * columns
417     * and rowStride = columns.
418     * 
419     * @param a
420     *            data to transform
421     * @param scale
422     *            if true then scaling is performed
423     */
424    public void inverse(final float[] a, final boolean scale) {
425        int nthreads = ConcurrencyUtils.getNumberOfThreads();
426        if (isPowerOfTwo) {
427            if (nthreads != oldNthreads) {
428                nt = slices;
429                if (nt < rows) {
430                    nt = rows;
431                }
432                nt *= 4;
433                if (nthreads > 1) {
434                    nt *= nthreads;
435                }
436                if (columns == 2) {
437                    nt >>= 1;
438                }
439                t = new float[nt];
440                oldNthreads = nthreads;
441            }
442            if ((nthreads > 1) && useThreads) {
443                ddxt3da_subth(1, a, scale);
444                ddxt3db_subth(1, a, scale);
445            } else {
446                ddxt3da_sub(1, a, scale);
447                ddxt3db_sub(1, a, scale);
448            }
449        } else {
450            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
451                Future<?>[] futures = new Future[nthreads];
452                int p = slices / nthreads;
453                for (int l = 0; l < nthreads; l++) {
454                    final int firstSlice = l * p;
455                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
456                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
457                        public void run() {
458                            for (int s = firstSlice; s < lastSlice; s++) {
459                                int idx1 = s * sliceStride;
460                                for (int r = 0; r < rows; r++) {
461                                    dstColumns.inverse(a, idx1 + r * rowStride, scale);
462                                }
463                            }
464                        }
465                    });
466                }
467                ConcurrencyUtils.waitForCompletion(futures);
468
469                for (int l = 0; l < nthreads; l++) {
470                    final int firstSlice = l * p;
471                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
472                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
473                        public void run() {
474                            float[] temp = new float[rows];
475                            for (int s = firstSlice; s < lastSlice; s++) {
476                                int idx1 = s * sliceStride;
477                                for (int c = 0; c < columns; c++) {
478                                    for (int r = 0; r < rows; r++) {
479                                        int idx3 = idx1 + r * rowStride + c;
480                                        temp[r] = a[idx3];
481                                    }
482                                    dstRows.inverse(temp, scale);
483                                    for (int r = 0; r < rows; r++) {
484                                        int idx3 = idx1 + r * rowStride + c;
485                                        a[idx3] = temp[r];
486                                    }
487                                }
488                            }
489                        }
490                    });
491                }
492                ConcurrencyUtils.waitForCompletion(futures);
493
494                p = rows / nthreads;
495                for (int l = 0; l < nthreads; l++) {
496                    final int firstRow = l * p;
497                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
498                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
499                        public void run() {
500                            float[] temp = new float[slices];
501                            for (int r = firstRow; r < lastRow; r++) {
502                                int idx1 = r * rowStride;
503                                for (int c = 0; c < columns; c++) {
504                                    for (int s = 0; s < slices; s++) {
505                                        int idx3 = s * sliceStride + idx1 + c;
506                                        temp[s] = a[idx3];
507                                    }
508                                    dstSlices.inverse(temp, scale);
509                                    for (int s = 0; s < slices; s++) {
510                                        int idx3 = s * sliceStride + idx1 + c;
511                                        a[idx3] = temp[s];
512                                    }
513                                }
514                            }
515                        }
516                    });
517                }
518                ConcurrencyUtils.waitForCompletion(futures);
519
520            } else {
521                for (int s = 0; s < slices; s++) {
522                    int idx1 = s * sliceStride;
523                    for (int r = 0; r < rows; r++) {
524                        dstColumns.inverse(a, idx1 + r * rowStride, scale);
525                    }
526                }
527                float[] temp = new float[rows];
528                for (int s = 0; s < slices; s++) {
529                    int idx1 = s * sliceStride;
530                    for (int c = 0; c < columns; c++) {
531                        for (int r = 0; r < rows; r++) {
532                            int idx3 = idx1 + r * rowStride + c;
533                            temp[r] = a[idx3];
534                        }
535                        dstRows.inverse(temp, scale);
536                        for (int r = 0; r < rows; r++) {
537                            int idx3 = idx1 + r * rowStride + c;
538                            a[idx3] = temp[r];
539                        }
540                    }
541                }
542                temp = new float[slices];
543                for (int r = 0; r < rows; r++) {
544                    int idx1 = r * rowStride;
545                    for (int c = 0; c < columns; c++) {
546                        for (int s = 0; s < slices; s++) {
547                            int idx3 = s * sliceStride + idx1 + c;
548                            temp[s] = a[idx3];
549                        }
550                        dstSlices.inverse(temp, scale);
551                        for (int s = 0; s < slices; s++) {
552                            int idx3 = s * sliceStride + idx1 + c;
553                            a[idx3] = temp[s];
554                        }
555                    }
556                }
557            }
558        }
559
560    }
561
562    /**
563     * Computes the 3D inverse DST (DST-III) leaving the result in
564     * <code>a</code>. The data is stored in 3D array.
565     * 
566     * @param a
567     *            data to transform
568     * @param scale
569     *            if true then scaling is performed
570     */
571    public void inverse(final float[][][] a, final boolean scale) {
572        int nthreads = ConcurrencyUtils.getNumberOfThreads();
573        if (isPowerOfTwo) {
574            if (nthreads != oldNthreads) {
575                nt = slices;
576                if (nt < rows) {
577                    nt = rows;
578                }
579                nt *= 4;
580                if (nthreads > 1) {
581                    nt *= nthreads;
582                }
583                if (columns == 2) {
584                    nt >>= 1;
585                }
586                t = new float[nt];
587                oldNthreads = nthreads;
588            }
589            if ((nthreads > 1) && useThreads) {
590                ddxt3da_subth(1, a, scale);
591                ddxt3db_subth(1, a, scale);
592            } else {
593                ddxt3da_sub(1, a, scale);
594                ddxt3db_sub(1, a, scale);
595            }
596        } else {
597            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
598                Future<?>[] futures = new Future[nthreads];
599                int p = slices / nthreads;
600                for (int l = 0; l < nthreads; l++) {
601                    final int firstSlice = l * p;
602                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
603                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
604                        public void run() {
605                            for (int s = firstSlice; s < lastSlice; s++) {
606                                for (int r = 0; r < rows; r++) {
607                                    dstColumns.inverse(a[s][r], scale);
608                                }
609                            }
610                        }
611                    });
612                }
613                ConcurrencyUtils.waitForCompletion(futures);
614
615                for (int l = 0; l < nthreads; l++) {
616                    final int firstSlice = l * p;
617                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
618                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
619                        public void run() {
620                            float[] temp = new float[rows];
621                            for (int s = firstSlice; s < lastSlice; s++) {
622                                for (int c = 0; c < columns; c++) {
623                                    for (int r = 0; r < rows; r++) {
624                                        temp[r] = a[s][r][c];
625                                    }
626                                    dstRows.inverse(temp, scale);
627                                    for (int r = 0; r < rows; r++) {
628                                        a[s][r][c] = temp[r];
629                                    }
630                                }
631                            }
632                        }
633                    });
634                }
635                ConcurrencyUtils.waitForCompletion(futures);
636
637                p = rows / nthreads;
638                for (int l = 0; l < nthreads; l++) {
639                    final int firstRow = l * p;
640                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
641                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
642                        public void run() {
643                            float[] temp = new float[slices];
644                            for (int r = firstRow; r < lastRow; r++) {
645                                for (int c = 0; c < columns; c++) {
646                                    for (int s = 0; s < slices; s++) {
647                                        temp[s] = a[s][r][c];
648                                    }
649                                    dstSlices.inverse(temp, scale);
650                                    for (int s = 0; s < slices; s++) {
651                                        a[s][r][c] = temp[s];
652                                    }
653                                }
654                            }
655                        }
656                    });
657                }
658                ConcurrencyUtils.waitForCompletion(futures);
659
660            } else {
661                for (int s = 0; s < slices; s++) {
662                    for (int r = 0; r < rows; r++) {
663                        dstColumns.inverse(a[s][r], scale);
664                    }
665                }
666                float[] temp = new float[rows];
667                for (int s = 0; s < slices; s++) {
668                    for (int c = 0; c < columns; c++) {
669                        for (int r = 0; r < rows; r++) {
670                            temp[r] = a[s][r][c];
671                        }
672                        dstRows.inverse(temp, scale);
673                        for (int r = 0; r < rows; r++) {
674                            a[s][r][c] = temp[r];
675                        }
676                    }
677                }
678                temp = new float[slices];
679                for (int r = 0; r < rows; r++) {
680                    for (int c = 0; c < columns; c++) {
681                        for (int s = 0; s < slices; s++) {
682                            temp[s] = a[s][r][c];
683                        }
684                        dstSlices.inverse(temp, scale);
685                        for (int s = 0; s < slices; s++) {
686                            a[s][r][c] = temp[s];
687                        }
688                    }
689                }
690            }
691        }
692    }
693
694    private void ddxt3da_sub(int isgn, float[] a, boolean scale) {
695        int idx0, idx1, idx2;
696
697        if (isgn == -1) {
698            for (int s = 0; s < slices; s++) {
699                idx0 = s * sliceStride;
700                for (int r = 0; r < rows; r++) {
701                    dstColumns.forward(a, idx0 + r * rowStride, scale);
702                }
703                if (columns > 2) {
704                    for (int c = 0; c < columns; c += 4) {
705                        for (int r = 0; r < rows; r++) {
706                            idx1 = idx0 + r * rowStride + c;
707                            idx2 = rows + r;
708                            t[r] = a[idx1];
709                            t[idx2] = a[idx1 + 1];
710                            t[idx2 + rows] = a[idx1 + 2];
711                            t[idx2 + 2 * rows] = a[idx1 + 3];
712                        }
713                        dstRows.forward(t, 0, scale);
714                        dstRows.forward(t, rows, scale);
715                        dstRows.forward(t, 2 * rows, scale);
716                        dstRows.forward(t, 3 * rows, scale);
717                        for (int r = 0; r < rows; r++) {
718                            idx1 = idx0 + r * rowStride + c;
719                            idx2 = rows + r;
720                            a[idx1] = t[r];
721                            a[idx1 + 1] = t[idx2];
722                            a[idx1 + 2] = t[idx2 + rows];
723                            a[idx1 + 3] = t[idx2 + 2 * rows];
724                        }
725                    }
726                } else if (columns == 2) {
727                    for (int r = 0; r < rows; r++) {
728                        idx1 = idx0 + r * rowStride;
729                        t[r] = a[idx1];
730                        t[rows + r] = a[idx1 + 1];
731                    }
732                    dstRows.forward(t, 0, scale);
733                    dstRows.forward(t, rows, scale);
734                    for (int r = 0; r < rows; r++) {
735                        idx1 = idx0 + r * rowStride;
736                        a[idx1] = t[r];
737                        a[idx1 + 1] = t[rows + r];
738                    }
739                }
740            }
741        } else {
742            for (int s = 0; s < slices; s++) {
743                idx0 = s * sliceStride;
744                for (int r = 0; r < rows; r++) {
745                    dstColumns.inverse(a, idx0 + r * rowStride, scale);
746                }
747                if (columns > 2) {
748                    for (int c = 0; c < columns; c += 4) {
749                        for (int r = 0; r < rows; r++) {
750                            idx1 = idx0 + r * rowStride + c;
751                            idx2 = rows + r;
752                            t[r] = a[idx1];
753                            t[idx2] = a[idx1 + 1];
754                            t[idx2 + rows] = a[idx1 + 2];
755                            t[idx2 + 2 * rows] = a[idx1 + 3];
756                        }
757                        dstRows.inverse(t, 0, scale);
758                        dstRows.inverse(t, rows, scale);
759                        dstRows.inverse(t, 2 * rows, scale);
760                        dstRows.inverse(t, 3 * rows, scale);
761                        for (int r = 0; r < rows; r++) {
762                            idx1 = idx0 + r * rowStride + c;
763                            idx2 = rows + r;
764                            a[idx1] = t[r];
765                            a[idx1 + 1] = t[idx2];
766                            a[idx1 + 2] = t[idx2 + rows];
767                            a[idx1 + 3] = t[idx2 + 2 * rows];
768                        }
769                    }
770                } else if (columns == 2) {
771                    for (int r = 0; r < rows; r++) {
772                        idx1 = idx0 + r * rowStride;
773                        t[r] = a[idx1];
774                        t[rows + r] = a[idx1 + 1];
775                    }
776                    dstRows.inverse(t, 0, scale);
777                    dstRows.inverse(t, rows, scale);
778                    for (int r = 0; r < rows; r++) {
779                        idx1 = idx0 + r * rowStride;
780                        a[idx1] = t[r];
781                        a[idx1 + 1] = t[rows + r];
782                    }
783                }
784            }
785        }
786    }
787
788    private void ddxt3da_sub(int isgn, float[][][] a, boolean scale) {
789        int idx2;
790
791        if (isgn == -1) {
792            for (int s = 0; s < slices; s++) {
793                for (int r = 0; r < rows; r++) {
794                    dstColumns.forward(a[s][r], scale);
795                }
796                if (columns > 2) {
797                    for (int c = 0; c < columns; c += 4) {
798                        for (int r = 0; r < rows; r++) {
799                            idx2 = rows + r;
800                            t[r] = a[s][r][c];
801                            t[idx2] = a[s][r][c + 1];
802                            t[idx2 + rows] = a[s][r][c + 2];
803                            t[idx2 + 2 * rows] = a[s][r][c + 3];
804                        }
805                        dstRows.forward(t, 0, scale);
806                        dstRows.forward(t, rows, scale);
807                        dstRows.forward(t, 2 * rows, scale);
808                        dstRows.forward(t, 3 * rows, scale);
809                        for (int r = 0; r < rows; r++) {
810                            idx2 = rows + r;
811                            a[s][r][c] = t[r];
812                            a[s][r][c + 1] = t[idx2];
813                            a[s][r][c + 2] = t[idx2 + rows];
814                            a[s][r][c + 3] = t[idx2 + 2 * rows];
815                        }
816                    }
817                } else if (columns == 2) {
818                    for (int r = 0; r < rows; r++) {
819                        t[r] = a[s][r][0];
820                        t[rows + r] = a[s][r][1];
821                    }
822                    dstRows.forward(t, 0, scale);
823                    dstRows.forward(t, rows, scale);
824                    for (int r = 0; r < rows; r++) {
825                        a[s][r][0] = t[r];
826                        a[s][r][1] = t[rows + r];
827                    }
828                }
829            }
830        } else {
831            for (int s = 0; s < slices; s++) {
832                for (int r = 0; r < rows; r++) {
833                    dstColumns.inverse(a[s][r], scale);
834                }
835                if (columns > 2) {
836                    for (int c = 0; c < columns; c += 4) {
837                        for (int r = 0; r < rows; r++) {
838                            idx2 = rows + r;
839                            t[r] = a[s][r][c];
840                            t[idx2] = a[s][r][c + 1];
841                            t[idx2 + rows] = a[s][r][c + 2];
842                            t[idx2 + 2 * rows] = a[s][r][c + 3];
843                        }
844                        dstRows.inverse(t, 0, scale);
845                        dstRows.inverse(t, rows, scale);
846                        dstRows.inverse(t, 2 * rows, scale);
847                        dstRows.inverse(t, 3 * rows, scale);
848                        for (int r = 0; r < rows; r++) {
849                            idx2 = rows + r;
850                            a[s][r][c] = t[r];
851                            a[s][r][c + 1] = t[idx2];
852                            a[s][r][c + 2] = t[idx2 + rows];
853                            a[s][r][c + 3] = t[idx2 + 2 * rows];
854                        }
855                    }
856                } else if (columns == 2) {
857                    for (int r = 0; r < rows; r++) {
858                        t[r] = a[s][r][0];
859                        t[rows + r] = a[s][r][1];
860                    }
861                    dstRows.inverse(t, 0, scale);
862                    dstRows.inverse(t, rows, scale);
863                    for (int r = 0; r < rows; r++) {
864                        a[s][r][0] = t[r];
865                        a[s][r][1] = t[rows + r];
866                    }
867                }
868            }
869        }
870    }
871
872    private void ddxt3db_sub(int isgn, float[] a, boolean scale) {
873        int idx0, idx1, idx2;
874
875        if (isgn == -1) {
876            if (columns > 2) {
877                for (int r = 0; r < rows; r++) {
878                    idx0 = r * rowStride;
879                    for (int c = 0; c < columns; c += 4) {
880                        for (int s = 0; s < slices; s++) {
881                            idx1 = s * sliceStride + idx0 + c;
882                            idx2 = slices + s;
883                            t[s] = a[idx1];
884                            t[idx2] = a[idx1 + 1];
885                            t[idx2 + slices] = a[idx1 + 2];
886                            t[idx2 + 2 * slices] = a[idx1 + 3];
887                        }
888                        dstSlices.forward(t, 0, scale);
889                        dstSlices.forward(t, slices, scale);
890                        dstSlices.forward(t, 2 * slices, scale);
891                        dstSlices.forward(t, 3 * slices, scale);
892                        for (int s = 0; s < slices; s++) {
893                            idx1 = s * sliceStride + idx0 + c;
894                            idx2 = slices + s;
895                            a[idx1] = t[s];
896                            a[idx1 + 1] = t[idx2];
897                            a[idx1 + 2] = t[idx2 + slices];
898                            a[idx1 + 3] = t[idx2 + 2 * slices];
899                        }
900                    }
901                }
902            } else if (columns == 2) {
903                for (int r = 0; r < rows; r++) {
904                    idx0 = r * rowStride;
905                    for (int s = 0; s < slices; s++) {
906                        idx1 = s * sliceStride + idx0;
907                        t[s] = a[idx1];
908                        t[slices + s] = a[idx1 + 1];
909                    }
910                    dstSlices.forward(t, 0, scale);
911                    dstSlices.forward(t, slices, scale);
912                    for (int s = 0; s < slices; s++) {
913                        idx1 = s * sliceStride + idx0;
914                        a[idx1] = t[s];
915                        a[idx1 + 1] = t[slices + s];
916                    }
917                }
918            }
919        } else {
920            if (columns > 2) {
921                for (int r = 0; r < rows; r++) {
922                    idx0 = r * rowStride;
923                    for (int c = 0; c < columns; c += 4) {
924                        for (int s = 0; s < slices; s++) {
925                            idx1 = s * sliceStride + idx0 + c;
926                            idx2 = slices + s;
927                            t[s] = a[idx1];
928                            t[idx2] = a[idx1 + 1];
929                            t[idx2 + slices] = a[idx1 + 2];
930                            t[idx2 + 2 * slices] = a[idx1 + 3];
931                        }
932                        dstSlices.inverse(t, 0, scale);
933                        dstSlices.inverse(t, slices, scale);
934                        dstSlices.inverse(t, 2 * slices, scale);
935                        dstSlices.inverse(t, 3 * slices, scale);
936
937                        for (int s = 0; s < slices; s++) {
938                            idx1 = s * sliceStride + idx0 + c;
939                            idx2 = slices + s;
940                            a[idx1] = t[s];
941                            a[idx1 + 1] = t[idx2];
942                            a[idx1 + 2] = t[idx2 + slices];
943                            a[idx1 + 3] = t[idx2 + 2 * slices];
944                        }
945                    }
946                }
947            } else if (columns == 2) {
948                for (int r = 0; r < rows; r++) {
949                    idx0 = r * rowStride;
950                    for (int s = 0; s < slices; s++) {
951                        idx1 = s * sliceStride + idx0;
952                        t[s] = a[idx1];
953                        t[slices + s] = a[idx1 + 1];
954                    }
955                    dstSlices.inverse(t, 0, scale);
956                    dstSlices.inverse(t, slices, scale);
957                    for (int s = 0; s < slices; s++) {
958                        idx1 = s * sliceStride + idx0;
959                        a[idx1] = t[s];
960                        a[idx1 + 1] = t[slices + s];
961                    }
962                }
963            }
964        }
965    }
966
967    private void ddxt3db_sub(int isgn, float[][][] a, boolean scale) {
968        int idx2;
969
970        if (isgn == -1) {
971            if (columns > 2) {
972                for (int r = 0; r < rows; r++) {
973                    for (int c = 0; c < columns; c += 4) {
974                        for (int s = 0; s < slices; s++) {
975                            idx2 = slices + s;
976                            t[s] = a[s][r][c];
977                            t[idx2] = a[s][r][c + 1];
978                            t[idx2 + slices] = a[s][r][c + 2];
979                            t[idx2 + 2 * slices] = a[s][r][c + 3];
980                        }
981                        dstSlices.forward(t, 0, scale);
982                        dstSlices.forward(t, slices, scale);
983                        dstSlices.forward(t, 2 * slices, scale);
984                        dstSlices.forward(t, 3 * slices, scale);
985                        for (int s = 0; s < slices; s++) {
986                            idx2 = slices + s;
987                            a[s][r][c] = t[s];
988                            a[s][r][c + 1] = t[idx2];
989                            a[s][r][c + 2] = t[idx2 + slices];
990                            a[s][r][c + 3] = t[idx2 + 2 * slices];
991                        }
992                    }
993                }
994            } else if (columns == 2) {
995                for (int r = 0; r < rows; r++) {
996                    for (int s = 0; s < slices; s++) {
997                        t[s] = a[s][r][0];
998                        t[slices + s] = a[s][r][1];
999                    }
1000                    dstSlices.forward(t, 0, scale);
1001                    dstSlices.forward(t, slices, scale);
1002                    for (int s = 0; s < slices; s++) {
1003                        a[s][r][0] = t[s];
1004                        a[s][r][1] = t[slices + s];
1005                    }
1006                }
1007            }
1008        } else {
1009            if (columns > 2) {
1010                for (int r = 0; r < rows; r++) {
1011                    for (int c = 0; c < columns; c += 4) {
1012                        for (int s = 0; s < slices; s++) {
1013                            idx2 = slices + s;
1014                            t[s] = a[s][r][c];
1015                            t[idx2] = a[s][r][c + 1];
1016                            t[idx2 + slices] = a[s][r][c + 2];
1017                            t[idx2 + 2 * slices] = a[s][r][c + 3];
1018                        }
1019                        dstSlices.inverse(t, 0, scale);
1020                        dstSlices.inverse(t, slices, scale);
1021                        dstSlices.inverse(t, 2 * slices, scale);
1022                        dstSlices.inverse(t, 3 * slices, scale);
1023
1024                        for (int s = 0; s < slices; s++) {
1025                            idx2 = slices + s;
1026                            a[s][r][c] = t[s];
1027                            a[s][r][c + 1] = t[idx2];
1028                            a[s][r][c + 2] = t[idx2 + slices];
1029                            a[s][r][c + 3] = t[idx2 + 2 * slices];
1030                        }
1031                    }
1032                }
1033            } else if (columns == 2) {
1034                for (int r = 0; r < rows; r++) {
1035                    for (int s = 0; s < slices; s++) {
1036                        t[s] = a[s][r][0];
1037                        t[slices + s] = a[s][r][1];
1038                    }
1039                    dstSlices.inverse(t, 0, scale);
1040                    dstSlices.inverse(t, slices, scale);
1041                    for (int s = 0; s < slices; s++) {
1042                        a[s][r][0] = t[s];
1043                        a[s][r][1] = t[slices + s];
1044                    }
1045                }
1046            }
1047        }
1048    }
1049
1050    private void ddxt3da_subth(final int isgn, final float[] a, final boolean scale) {
1051        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1052
1053        int nt = 4 * rows;
1054        if (columns == 2) {
1055            nt >>= 1;
1056        }
1057        Future<?>[] futures = new Future[nthreads];
1058
1059        for (int i = 0; i < nthreads; i++) {
1060            final int n0 = i;
1061            final int startt = nt * i;
1062            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1063
1064                public void run() {
1065                    int idx0, idx1, idx2;
1066                    if (isgn == -1) {
1067                        for (int s = n0; s < slices; s += nthreads) {
1068                            idx0 = s * sliceStride;
1069                            for (int r = 0; r < rows; r++) {
1070                                dstColumns.forward(a, idx0 + r * rowStride, scale);
1071                            }
1072                            if (columns > 2) {
1073                                for (int c = 0; c < columns; c += 4) {
1074                                    for (int r = 0; r < rows; r++) {
1075                                        idx1 = idx0 + r * rowStride + c;
1076                                        idx2 = startt + rows + r;
1077                                        t[startt + r] = a[idx1];
1078                                        t[idx2] = a[idx1 + 1];
1079                                        t[idx2 + rows] = a[idx1 + 2];
1080                                        t[idx2 + 2 * rows] = a[idx1 + 3];
1081                                    }
1082                                    dstRows.forward(t, startt, scale);
1083                                    dstRows.forward(t, startt + rows, scale);
1084                                    dstRows.forward(t, startt + 2 * rows, scale);
1085                                    dstRows.forward(t, startt + 3 * rows, scale);
1086                                    for (int r = 0; r < rows; r++) {
1087                                        idx1 = idx0 + r * rowStride + c;
1088                                        idx2 = startt + rows + r;
1089                                        a[idx1] = t[startt + r];
1090                                        a[idx1 + 1] = t[idx2];
1091                                        a[idx1 + 2] = t[idx2 + rows];
1092                                        a[idx1 + 3] = t[idx2 + 2 * rows];
1093                                    }
1094                                }
1095                            } else if (columns == 2) {
1096                                for (int r = 0; r < rows; r++) {
1097                                    idx1 = idx0 + r * rowStride;
1098                                    t[startt + r] = a[idx1];
1099                                    t[startt + rows + r] = a[idx1 + 1];
1100                                }
1101                                dstRows.forward(t, startt, scale);
1102                                dstRows.forward(t, startt + rows, scale);
1103                                for (int r = 0; r < rows; r++) {
1104                                    idx1 = idx0 + r * rowStride;
1105                                    a[idx1] = t[startt + r];
1106                                    a[idx1 + 1] = t[startt + rows + r];
1107                                }
1108                            }
1109                        }
1110                    } else {
1111                        for (int s = n0; s < slices; s += nthreads) {
1112                            idx0 = s * sliceStride;
1113                            for (int r = 0; r < rows; r++) {
1114                                dstColumns.inverse(a, idx0 + r * rowStride, scale);
1115                            }
1116                            if (columns > 2) {
1117                                for (int c = 0; c < columns; c += 4) {
1118                                    for (int r = 0; r < rows; r++) {
1119                                        idx1 = idx0 + r * rowStride + c;
1120                                        idx2 = startt + rows + r;
1121                                        t[startt + r] = a[idx1];
1122                                        t[idx2] = a[idx1 + 1];
1123                                        t[idx2 + rows] = a[idx1 + 2];
1124                                        t[idx2 + 2 * rows] = a[idx1 + 3];
1125                                    }
1126                                    dstRows.inverse(t, startt, scale);
1127                                    dstRows.inverse(t, startt + rows, scale);
1128                                    dstRows.inverse(t, startt + 2 * rows, scale);
1129                                    dstRows.inverse(t, startt + 3 * rows, scale);
1130                                    for (int r = 0; r < rows; r++) {
1131                                        idx1 = idx0 + r * rowStride + c;
1132                                        idx2 = startt + rows + r;
1133                                        a[idx1] = t[startt + r];
1134                                        a[idx1 + 1] = t[idx2];
1135                                        a[idx1 + 2] = t[idx2 + rows];
1136                                        a[idx1 + 3] = t[idx2 + 2 * rows];
1137                                    }
1138                                }
1139                            } else if (columns == 2) {
1140                                for (int r = 0; r < rows; r++) {
1141                                    idx1 = idx0 + r * rowStride;
1142                                    t[startt + r] = a[idx1];
1143                                    t[startt + rows + r] = a[idx1 + 1];
1144                                }
1145                                dstRows.inverse(t, startt, scale);
1146                                dstRows.inverse(t, startt + rows, scale);
1147                                for (int r = 0; r < rows; r++) {
1148                                    idx1 = idx0 + r * rowStride;
1149                                    a[idx1] = t[startt + r];
1150                                    a[idx1 + 1] = t[startt + rows + r];
1151                                }
1152                            }
1153                        }
1154                    }
1155                }
1156            });
1157        }
1158        ConcurrencyUtils.waitForCompletion(futures);
1159    }
1160
1161    private void ddxt3da_subth(final int isgn, final float[][][] a, final boolean scale) {
1162        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1163        int nt = 4 * rows;
1164        if (columns == 2) {
1165            nt >>= 1;
1166        }
1167        Future<?>[] futures = new Future[nthreads];
1168
1169        for (int i = 0; i < nthreads; i++) {
1170            final int n0 = i;
1171            final int startt = nt * i;
1172            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1173
1174                public void run() {
1175                    int idx2;
1176                    if (isgn == -1) {
1177                        for (int s = n0; s < slices; s += nthreads) {
1178                            for (int r = 0; r < rows; r++) {
1179                                dstColumns.forward(a[s][r], scale);
1180                            }
1181                            if (columns > 2) {
1182                                for (int c = 0; c < columns; c += 4) {
1183                                    for (int r = 0; r < rows; r++) {
1184                                        idx2 = startt + rows + r;
1185                                        t[startt + r] = a[s][r][c];
1186                                        t[idx2] = a[s][r][c + 1];
1187                                        t[idx2 + rows] = a[s][r][c + 2];
1188                                        t[idx2 + 2 * rows] = a[s][r][c + 3];
1189                                    }
1190                                    dstRows.forward(t, startt, scale);
1191                                    dstRows.forward(t, startt + rows, scale);
1192                                    dstRows.forward(t, startt + 2 * rows, scale);
1193                                    dstRows.forward(t, startt + 3 * rows, scale);
1194                                    for (int r = 0; r < rows; r++) {
1195                                        idx2 = startt + rows + r;
1196                                        a[s][r][c] = t[startt + r];
1197                                        a[s][r][c + 1] = t[idx2];
1198                                        a[s][r][c + 2] = t[idx2 + rows];
1199                                        a[s][r][c + 3] = t[idx2 + 2 * rows];
1200                                    }
1201                                }
1202                            } else if (columns == 2) {
1203                                for (int r = 0; r < rows; r++) {
1204                                    t[startt + r] = a[s][r][0];
1205                                    t[startt + rows + r] = a[s][r][1];
1206                                }
1207                                dstRows.forward(t, startt, scale);
1208                                dstRows.forward(t, startt + rows, scale);
1209                                for (int r = 0; r < rows; r++) {
1210                                    a[s][r][0] = t[startt + r];
1211                                    a[s][r][1] = t[startt + rows + r];
1212                                }
1213                            }
1214                        }
1215                    } else {
1216                        for (int s = n0; s < slices; s += nthreads) {
1217                            for (int r = 0; r < rows; r++) {
1218                                dstColumns.inverse(a[s][r], scale);
1219                            }
1220                            if (columns > 2) {
1221                                for (int c = 0; c < columns; c += 4) {
1222                                    for (int r = 0; r < rows; r++) {
1223                                        idx2 = startt + rows + r;
1224                                        t[startt + r] = a[s][r][c];
1225                                        t[idx2] = a[s][r][c + 1];
1226                                        t[idx2 + rows] = a[s][r][c + 2];
1227                                        t[idx2 + 2 * rows] = a[s][r][c + 3];
1228                                    }
1229                                    dstRows.inverse(t, startt, scale);
1230                                    dstRows.inverse(t, startt + rows, scale);
1231                                    dstRows.inverse(t, startt + 2 * rows, scale);
1232                                    dstRows.inverse(t, startt + 3 * rows, scale);
1233                                    for (int r = 0; r < rows; r++) {
1234                                        idx2 = startt + rows + r;
1235                                        a[s][r][c] = t[startt + r];
1236                                        a[s][r][c + 1] = t[idx2];
1237                                        a[s][r][c + 2] = t[idx2 + rows];
1238                                        a[s][r][c + 3] = t[idx2 + 2 * rows];
1239                                    }
1240                                }
1241                            } else if (columns == 2) {
1242                                for (int r = 0; r < rows; r++) {
1243                                    t[startt + r] = a[s][r][0];
1244                                    t[startt + rows + r] = a[s][r][1];
1245                                }
1246                                dstRows.inverse(t, startt, scale);
1247                                dstRows.inverse(t, startt + rows, scale);
1248                                for (int r = 0; r < rows; r++) {
1249                                    a[s][r][0] = t[startt + r];
1250                                    a[s][r][1] = t[startt + rows + r];
1251                                }
1252                            }
1253                        }
1254                    }
1255                }
1256            });
1257        }
1258        ConcurrencyUtils.waitForCompletion(futures);
1259    }
1260
1261    private void ddxt3db_subth(final int isgn, final float[] a, final boolean scale) {
1262        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1263        int nt = 4 * slices;
1264        if (columns == 2) {
1265            nt >>= 1;
1266        }
1267        Future<?>[] futures = new Future[nthreads];
1268
1269        for (int i = 0; i < nthreads; i++) {
1270            final int n0 = i;
1271            final int startt = nt * i;
1272            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1273
1274                public void run() {
1275                    int idx0, idx1, idx2;
1276                    if (isgn == -1) {
1277                        if (columns > 2) {
1278                            for (int r = n0; r < rows; r += nthreads) {
1279                                idx0 = r * rowStride;
1280                                for (int c = 0; c < columns; c += 4) {
1281                                    for (int s = 0; s < slices; s++) {
1282                                        idx1 = s * sliceStride + idx0 + c;
1283                                        idx2 = startt + slices + s;
1284                                        t[startt + s] = a[idx1];
1285                                        t[idx2] = a[idx1 + 1];
1286                                        t[idx2 + slices] = a[idx1 + 2];
1287                                        t[idx2 + 2 * slices] = a[idx1 + 3];
1288                                    }
1289                                    dstSlices.forward(t, startt, scale);
1290                                    dstSlices.forward(t, startt + slices, scale);
1291                                    dstSlices.forward(t, startt + 2 * slices, scale);
1292                                    dstSlices.forward(t, startt + 3 * slices, scale);
1293                                    for (int s = 0; s < slices; s++) {
1294                                        idx1 = s * sliceStride + idx0 + c;
1295                                        idx2 = startt + slices + s;
1296                                        a[idx1] = t[startt + s];
1297                                        a[idx1 + 1] = t[idx2];
1298                                        a[idx1 + 2] = t[idx2 + slices];
1299                                        a[idx1 + 3] = t[idx2 + 2 * slices];
1300                                    }
1301                                }
1302                            }
1303                        } else if (columns == 2) {
1304                            for (int r = n0; r < rows; r += nthreads) {
1305                                idx0 = r * rowStride;
1306                                for (int s = 0; s < slices; s++) {
1307                                    idx1 = s * sliceStride + idx0;
1308                                    t[startt + s] = a[idx1];
1309                                    t[startt + slices + s] = a[idx1 + 1];
1310                                }
1311                                dstSlices.forward(t, startt, scale);
1312                                dstSlices.forward(t, startt + slices, scale);
1313                                for (int s = 0; s < slices; s++) {
1314                                    idx1 = s * sliceStride + idx0;
1315                                    a[idx1] = t[startt + s];
1316                                    a[idx1 + 1] = t[startt + slices + s];
1317                                }
1318                            }
1319                        }
1320                    } else {
1321                        if (columns > 2) {
1322                            for (int r = n0; r < rows; r += nthreads) {
1323                                idx0 = r * rowStride;
1324                                for (int c = 0; c < columns; c += 4) {
1325                                    for (int s = 0; s < slices; s++) {
1326                                        idx1 = s * sliceStride + idx0 + c;
1327                                        idx2 = startt + slices + s;
1328                                        t[startt + s] = a[idx1];
1329                                        t[idx2] = a[idx1 + 1];
1330                                        t[idx2 + slices] = a[idx1 + 2];
1331                                        t[idx2 + 2 * slices] = a[idx1 + 3];
1332                                    }
1333                                    dstSlices.inverse(t, startt, scale);
1334                                    dstSlices.inverse(t, startt + slices, scale);
1335                                    dstSlices.inverse(t, startt + 2 * slices, scale);
1336                                    dstSlices.inverse(t, startt + 3 * slices, scale);
1337                                    for (int s = 0; s < slices; s++) {
1338                                        idx1 = s * sliceStride + idx0 + c;
1339                                        idx2 = startt + slices + s;
1340                                        a[idx1] = t[startt + s];
1341                                        a[idx1 + 1] = t[idx2];
1342                                        a[idx1 + 2] = t[idx2 + slices];
1343                                        a[idx1 + 3] = t[idx2 + 2 * slices];
1344                                    }
1345                                }
1346                            }
1347                        } else if (columns == 2) {
1348                            for (int r = n0; r < rows; r += nthreads) {
1349                                idx0 = r * rowStride;
1350                                for (int s = 0; s < slices; s++) {
1351                                    idx1 = s * sliceStride + idx0;
1352                                    t[startt + s] = a[idx1];
1353                                    t[startt + slices + s] = a[idx1 + 1];
1354                                }
1355                                dstSlices.inverse(t, startt, scale);
1356                                dstSlices.inverse(t, startt + slices, scale);
1357
1358                                for (int s = 0; s < slices; s++) {
1359                                    idx1 = s * sliceStride + idx0;
1360                                    a[idx1] = t[startt + s];
1361                                    a[idx1 + 1] = t[startt + slices + s];
1362                                }
1363                            }
1364                        }
1365                    }
1366                }
1367            });
1368        }
1369        ConcurrencyUtils.waitForCompletion(futures);
1370    }
1371
1372    private void ddxt3db_subth(final int isgn, final float[][][] a, final boolean scale) {
1373        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1374        int nt = 4 * slices;
1375        if (columns == 2) {
1376            nt >>= 1;
1377        }
1378        Future<?>[] futures = new Future[nthreads];
1379
1380        for (int i = 0; i < nthreads; i++) {
1381            final int n0 = i;
1382            final int startt = nt * i;
1383            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1384
1385                public void run() {
1386                    int idx2;
1387                    if (isgn == -1) {
1388                        if (columns > 2) {
1389                            for (int r = n0; r < rows; r += nthreads) {
1390                                for (int c = 0; c < columns; c += 4) {
1391                                    for (int s = 0; s < slices; s++) {
1392                                        idx2 = startt + slices + s;
1393                                        t[startt + s] = a[s][r][c];
1394                                        t[idx2] = a[s][r][c + 1];
1395                                        t[idx2 + slices] = a[s][r][c + 2];
1396                                        t[idx2 + 2 * slices] = a[s][r][c + 3];
1397                                    }
1398                                    dstSlices.forward(t, startt, scale);
1399                                    dstSlices.forward(t, startt + slices, scale);
1400                                    dstSlices.forward(t, startt + 2 * slices, scale);
1401                                    dstSlices.forward(t, startt + 3 * slices, scale);
1402                                    for (int s = 0; s < slices; s++) {
1403                                        idx2 = startt + slices + s;
1404                                        a[s][r][c] = t[startt + s];
1405                                        a[s][r][c + 1] = t[idx2];
1406                                        a[s][r][c + 2] = t[idx2 + slices];
1407                                        a[s][r][c + 3] = t[idx2 + 2 * slices];
1408                                    }
1409                                }
1410                            }
1411                        } else if (columns == 2) {
1412                            for (int r = n0; r < rows; r += nthreads) {
1413                                for (int s = 0; s < slices; s++) {
1414                                    t[startt + s] = a[s][r][0];
1415                                    t[startt + slices + s] = a[s][r][1];
1416                                }
1417                                dstSlices.forward(t, startt, scale);
1418                                dstSlices.forward(t, startt + slices, scale);
1419                                for (int s = 0; s < slices; s++) {
1420                                    a[s][r][0] = t[startt + s];
1421                                    a[s][r][1] = t[startt + slices + s];
1422                                }
1423                            }
1424                        }
1425                    } else {
1426                        if (columns > 2) {
1427                            for (int r = n0; r < rows; r += nthreads) {
1428                                for (int c = 0; c < columns; c += 4) {
1429                                    for (int s = 0; s < slices; s++) {
1430                                        idx2 = startt + slices + s;
1431                                        t[startt + s] = a[s][r][c];
1432                                        t[idx2] = a[s][r][c + 1];
1433                                        t[idx2 + slices] = a[s][r][c + 2];
1434                                        t[idx2 + 2 * slices] = a[s][r][c + 3];
1435                                    }
1436                                    dstSlices.inverse(t, startt, scale);
1437                                    dstSlices.inverse(t, startt + slices, scale);
1438                                    dstSlices.inverse(t, startt + 2 * slices, scale);
1439                                    dstSlices.inverse(t, startt + 3 * slices, scale);
1440                                    for (int s = 0; s < slices; s++) {
1441                                        idx2 = startt + slices + s;
1442                                        a[s][r][c] = t[startt + s];
1443                                        a[s][r][c + 1] = t[idx2];
1444                                        a[s][r][c + 2] = t[idx2 + slices];
1445                                        a[s][r][c + 3] = t[idx2 + 2 * slices];
1446                                    }
1447                                }
1448                            }
1449                        } else if (columns == 2) {
1450                            for (int r = n0; r < rows; r += nthreads) {
1451                                for (int s = 0; s < slices; s++) {
1452                                    t[startt + s] = a[s][r][0];
1453                                    t[startt + slices + s] = a[s][r][1];
1454                                }
1455                                dstSlices.inverse(t, startt, scale);
1456                                dstSlices.inverse(t, startt + slices, scale);
1457
1458                                for (int s = 0; s < slices; s++) {
1459                                    a[s][r][0] = t[startt + s];
1460                                    a[s][r][1] = t[startt + slices + s];
1461                                }
1462                            }
1463                        }
1464                    }
1465                }
1466            });
1467        }
1468        ConcurrencyUtils.waitForCompletion(futures);
1469    }
1470}