001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.dct;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 3D Discrete Cosine Transform (DCT) of double precision data. The
043 * sizes of all three dimensions can be arbitrary numbers. This is a parallel
044 * implementation of split-radix and mixed-radix algorithms optimized for SMP
045 * systems. <br>
046 * <br>
047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura
048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
049 * 
050 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
051 * 
052 */
053public class DoubleDCT_3D {
054
055    private int slices;
056
057    private int rows;
058
059    private int columns;
060
061    private int sliceStride;
062
063    private int rowStride;
064
065    private double[] t;
066
067    private DoubleDCT_1D dctSlices, dctRows, dctColumns;
068
069    private int oldNthreads;
070
071    private int nt;
072
073    private boolean isPowerOfTwo = false;
074
075    private boolean useThreads = false;
076
077    /**
078     * Creates new instance of DoubleDCT_3D.
079     * 
080     * @param slices
081     *            number of slices
082     * @param rows
083     *            number of rows
084     * @param columns
085     *            number of columns
086     */
087    public DoubleDCT_3D(int slices, int rows, int columns) {
088        if (slices <= 1 || rows <= 1 || columns <= 1) {
089            throw new IllegalArgumentException("slices, rows and columns must be greater than 1");
090        }
091        this.slices = slices;
092        this.rows = rows;
093        this.columns = columns;
094        this.sliceStride = rows * columns;
095        this.rowStride = columns;
096        if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) {
097            this.useThreads = true;
098        }
099        if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
100            isPowerOfTwo = true;
101
102            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
103            nt = slices;
104            if (nt < rows) {
105                nt = rows;
106            }
107            nt *= 4;
108            if (oldNthreads > 1) {
109                nt *= oldNthreads;
110            }
111            if (columns == 2) {
112                nt >>= 1;
113            }
114            t = new double[nt];
115        }
116        dctSlices = new DoubleDCT_1D(slices);
117        if (slices == rows) {
118            dctRows = dctSlices;
119        } else {
120            dctRows = new DoubleDCT_1D(rows);
121        }
122        if (slices == columns) {
123            dctColumns = dctSlices;
124        } else if (rows == columns) {
125            dctColumns = dctRows;
126        } else {
127            dctColumns = new DoubleDCT_1D(columns);
128        }
129    }
130
131    /**
132     * Computes the 3D forward DCT (DCT-II) leaving the result in <code>a</code>
133     * . The data is stored in 1D array addressed in slice-major, then
134     * row-major, then column-major, in order of significance, i.e. the element
135     * (i,j,k) of 3D array x[slices][rows][columns] is stored in a[i*sliceStride
136     * + j*rowStride + k], where sliceStride = rows * columns and rowStride =
137     * columns.
138     * 
139     * @param a
140     *            data to transform
141     * @param scale
142     *            if true then scaling is performed
143     */
144    public void forward(final double[] a, final boolean scale) {
145        int nthreads = ConcurrencyUtils.getNumberOfThreads();
146        if (isPowerOfTwo) {
147            if (nthreads != oldNthreads) {
148                nt = slices;
149                if (nt < rows) {
150                    nt = rows;
151                }
152                nt *= 4;
153                if (nthreads > 1) {
154                    nt *= nthreads;
155                }
156                if (columns == 2) {
157                    nt >>= 1;
158                }
159                t = new double[nt];
160                oldNthreads = nthreads;
161            }
162            if ((nthreads > 1) && useThreads) {
163                ddxt3da_subth(-1, a, scale);
164                ddxt3db_subth(-1, a, scale);
165            } else {
166                ddxt3da_sub(-1, a, scale);
167                ddxt3db_sub(-1, a, scale);
168            }
169        } else {
170            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
171                Future<?>[] futures = new Future[nthreads];
172                int p = slices / nthreads;
173                for (int l = 0; l < nthreads; l++) {
174                    final int firstSlice = l * p;
175                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
176                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
177                        public void run() {
178                            for (int s = firstSlice; s < lastSlice; s++) {
179                                int idx1 = s * sliceStride;
180                                for (int r = 0; r < rows; r++) {
181                                    dctColumns.forward(a, idx1 + r * rowStride, scale);
182                                }
183                            }
184                        }
185                    });
186                }
187                ConcurrencyUtils.waitForCompletion(futures);
188
189                for (int l = 0; l < nthreads; l++) {
190                    final int firstSlice = l * p;
191                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
192                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
193                        public void run() {
194                            double[] temp = new double[rows];
195                            for (int s = firstSlice; s < lastSlice; s++) {
196                                int idx1 = s * sliceStride;
197                                for (int c = 0; c < columns; c++) {
198                                    for (int r = 0; r < rows; r++) {
199                                        int idx3 = idx1 + r * rowStride + c;
200                                        temp[r] = a[idx3];
201                                    }
202                                    dctRows.forward(temp, scale);
203                                    for (int r = 0; r < rows; r++) {
204                                        int idx3 = idx1 + r * rowStride + c;
205                                        a[idx3] = temp[r];
206                                    }
207                                }
208                            }
209                        }
210                    });
211                }
212                ConcurrencyUtils.waitForCompletion(futures);
213
214                p = rows / nthreads;
215                for (int l = 0; l < nthreads; l++) {
216                    final int firstRow = l * p;
217                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
218                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
219                        public void run() {
220                            double[] temp = new double[slices];
221                            for (int r = firstRow; r < lastRow; r++) {
222                                int idx1 = r * rowStride;
223                                for (int c = 0; c < columns; c++) {
224                                    for (int s = 0; s < slices; s++) {
225                                        int idx3 = s * sliceStride + idx1 + c;
226                                        temp[s] = a[idx3];
227                                    }
228                                    dctSlices.forward(temp, scale);
229                                    for (int s = 0; s < slices; s++) {
230                                        int idx3 = s * sliceStride + idx1 + c;
231                                        a[idx3] = temp[s];
232                                    }
233                                }
234                            }
235                        }
236                    });
237                }
238                ConcurrencyUtils.waitForCompletion(futures);
239
240            } else {
241                for (int s = 0; s < slices; s++) {
242                    int idx1 = s * sliceStride;
243                    for (int r = 0; r < rows; r++) {
244                        dctColumns.forward(a, idx1 + r * rowStride, scale);
245                    }
246                }
247                double[] temp = new double[rows];
248                for (int s = 0; s < slices; s++) {
249                    int idx1 = s * sliceStride;
250                    for (int c = 0; c < columns; c++) {
251                        for (int r = 0; r < rows; r++) {
252                            int idx3 = idx1 + r * rowStride + c;
253                            temp[r] = a[idx3];
254                        }
255                        dctRows.forward(temp, scale);
256                        for (int r = 0; r < rows; r++) {
257                            int idx3 = idx1 + r * rowStride + c;
258                            a[idx3] = temp[r];
259                        }
260                    }
261                }
262                temp = new double[slices];
263                for (int r = 0; r < rows; r++) {
264                    int idx1 = r * rowStride;
265                    for (int c = 0; c < columns; c++) {
266                        for (int s = 0; s < slices; s++) {
267                            int idx3 = s * sliceStride + idx1 + c;
268                            temp[s] = a[idx3];
269                        }
270                        dctSlices.forward(temp, scale);
271                        for (int s = 0; s < slices; s++) {
272                            int idx3 = s * sliceStride + idx1 + c;
273                            a[idx3] = temp[s];
274                        }
275                    }
276                }
277            }
278        }
279    }
280
281    /**
282     * Computes the 3D forward DCT (DCT-II) leaving the result in <code>a</code>
283     * . The data is stored in 3D array
284     * 
285     * @param a
286     *            data to transform
287     * @param scale
288     *            if true then scaling is performed
289     */
290    public void forward(final double[][][] a, final boolean scale) {
291        int nthreads = ConcurrencyUtils.getNumberOfThreads();
292        if (isPowerOfTwo) {
293            if (nthreads != oldNthreads) {
294                nt = slices;
295                if (nt < rows) {
296                    nt = rows;
297                }
298                nt *= 4;
299                if (nthreads > 1) {
300                    nt *= nthreads;
301                }
302                if (columns == 2) {
303                    nt >>= 1;
304                }
305                t = new double[nt];
306                oldNthreads = nthreads;
307            }
308            if ((nthreads > 1) && useThreads) {
309                ddxt3da_subth(-1, a, scale);
310                ddxt3db_subth(-1, a, scale);
311            } else {
312                ddxt3da_sub(-1, a, scale);
313                ddxt3db_sub(-1, a, scale);
314            }
315        } else {
316            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
317                Future<?>[] futures = new Future[nthreads];
318                int p = slices / nthreads;
319                for (int l = 0; l < nthreads; l++) {
320                    final int firstSlice = l * p;
321                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
322                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
323                        public void run() {
324                            for (int s = firstSlice; s < lastSlice; s++) {
325                                for (int r = 0; r < rows; r++) {
326                                    dctColumns.forward(a[s][r], scale);
327                                }
328                            }
329                        }
330                    });
331                }
332                ConcurrencyUtils.waitForCompletion(futures);
333
334                for (int l = 0; l < nthreads; l++) {
335                    final int firstSlice = l * p;
336                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
337                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
338                        public void run() {
339                            double[] temp = new double[rows];
340                            for (int s = firstSlice; s < lastSlice; s++) {
341                                for (int c = 0; c < columns; c++) {
342                                    for (int r = 0; r < rows; r++) {
343                                        temp[r] = a[s][r][c];
344                                    }
345                                    dctRows.forward(temp, scale);
346                                    for (int r = 0; r < rows; r++) {
347                                        a[s][r][c] = temp[r];
348                                    }
349                                }
350                            }
351                        }
352                    });
353                }
354                ConcurrencyUtils.waitForCompletion(futures);
355
356                p = rows / nthreads;
357                for (int l = 0; l < nthreads; l++) {
358                    final int firstRow = l * p;
359                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
360                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
361                        public void run() {
362                            double[] temp = new double[slices];
363                            for (int r = firstRow; r < lastRow; r++) {
364                                for (int c = 0; c < columns; c++) {
365                                    for (int s = 0; s < slices; s++) {
366                                        temp[s] = a[s][r][c];
367                                    }
368                                    dctSlices.forward(temp, scale);
369                                    for (int s = 0; s < slices; s++) {
370                                        a[s][r][c] = temp[s];
371                                    }
372                                }
373                            }
374                        }
375                    });
376                }
377                ConcurrencyUtils.waitForCompletion(futures);
378
379            } else {
380                for (int s = 0; s < slices; s++) {
381                    for (int r = 0; r < rows; r++) {
382                        dctColumns.forward(a[s][r], scale);
383                    }
384                }
385                double[] temp = new double[rows];
386                for (int s = 0; s < slices; s++) {
387                    for (int c = 0; c < columns; c++) {
388                        for (int r = 0; r < rows; r++) {
389                            temp[r] = a[s][r][c];
390                        }
391                        dctRows.forward(temp, scale);
392                        for (int r = 0; r < rows; r++) {
393                            a[s][r][c] = temp[r];
394                        }
395                    }
396                }
397                temp = new double[slices];
398                for (int r = 0; r < rows; r++) {
399                    for (int c = 0; c < columns; c++) {
400                        for (int s = 0; s < slices; s++) {
401                            temp[s] = a[s][r][c];
402                        }
403                        dctSlices.forward(temp, scale);
404                        for (int s = 0; s < slices; s++) {
405                            a[s][r][c] = temp[s];
406                        }
407                    }
408                }
409            }
410        }
411    }
412
413    /**
414     * Computes the 3D inverse DCT (DCT-III) leaving the result in
415     * <code>a</code>. The data is stored in 1D array addressed in slice-major,
416     * then row-major, then column-major, in order of significance, i.e. the
417     * element (i,j,k) of 3D array x[slices][rows][columns] is stored in
418     * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * columns
419     * and rowStride = columns.
420     * 
421     * @param a
422     *            data to transform
423     * @param scale
424     *            if true then scaling is performed
425     */
426    public void inverse(final double[] a, final boolean scale) {
427        int nthreads = ConcurrencyUtils.getNumberOfThreads();
428        if (isPowerOfTwo) {
429            if (nthreads != oldNthreads) {
430                nt = slices;
431                if (nt < rows) {
432                    nt = rows;
433                }
434                nt *= 4;
435                if (nthreads > 1) {
436                    nt *= nthreads;
437                }
438                if (columns == 2) {
439                    nt >>= 1;
440                }
441                t = new double[nt];
442                oldNthreads = nthreads;
443            }
444            if ((nthreads > 1) && useThreads) {
445                ddxt3da_subth(1, a, scale);
446                ddxt3db_subth(1, a, scale);
447            } else {
448                ddxt3da_sub(1, a, scale);
449                ddxt3db_sub(1, a, scale);
450            }
451        } else {
452            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
453                Future<?>[] futures = new Future[nthreads];
454                int p = slices / nthreads;
455                for (int l = 0; l < nthreads; l++) {
456                    final int firstSlice = l * p;
457                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
458                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
459                        public void run() {
460                            for (int s = firstSlice; s < lastSlice; s++) {
461                                int idx1 = s * sliceStride;
462                                for (int r = 0; r < rows; r++) {
463                                    dctColumns.inverse(a, idx1 + r * rowStride, scale);
464                                }
465                            }
466                        }
467                    });
468                }
469                ConcurrencyUtils.waitForCompletion(futures);
470
471                for (int l = 0; l < nthreads; l++) {
472                    final int firstSlice = l * p;
473                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
474                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
475                        public void run() {
476                            double[] temp = new double[rows];
477                            for (int s = firstSlice; s < lastSlice; s++) {
478                                int idx1 = s * sliceStride;
479                                for (int c = 0; c < columns; c++) {
480                                    for (int r = 0; r < rows; r++) {
481                                        int idx3 = idx1 + r * rowStride + c;
482                                        temp[r] = a[idx3];
483                                    }
484                                    dctRows.inverse(temp, scale);
485                                    for (int r = 0; r < rows; r++) {
486                                        int idx3 = idx1 + r * rowStride + c;
487                                        a[idx3] = temp[r];
488                                    }
489                                }
490                            }
491                        }
492                    });
493                }
494                ConcurrencyUtils.waitForCompletion(futures);
495
496                p = rows / nthreads;
497                for (int l = 0; l < nthreads; l++) {
498                    final int firstRow = l * p;
499                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
500                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
501                        public void run() {
502                            double[] temp = new double[slices];
503                            for (int r = firstRow; r < lastRow; r++) {
504                                int idx1 = r * rowStride;
505                                for (int c = 0; c < columns; c++) {
506                                    for (int s = 0; s < slices; s++) {
507                                        int idx3 = s * sliceStride + idx1 + c;
508                                        temp[s] = a[idx3];
509                                    }
510                                    dctSlices.inverse(temp, scale);
511                                    for (int s = 0; s < slices; s++) {
512                                        int idx3 = s * sliceStride + idx1 + c;
513                                        a[idx3] = temp[s];
514                                    }
515                                }
516                            }
517                        }
518                    });
519                }
520                ConcurrencyUtils.waitForCompletion(futures);
521
522            } else {
523                for (int s = 0; s < slices; s++) {
524                    int idx1 = s * sliceStride;
525                    for (int r = 0; r < rows; r++) {
526                        dctColumns.inverse(a, idx1 + r * rowStride, scale);
527                    }
528                }
529                double[] temp = new double[rows];
530                for (int s = 0; s < slices; s++) {
531                    int idx1 = s * sliceStride;
532                    for (int c = 0; c < columns; c++) {
533                        for (int r = 0; r < rows; r++) {
534                            int idx3 = idx1 + r * rowStride + c;
535                            temp[r] = a[idx3];
536                        }
537                        dctRows.inverse(temp, scale);
538                        for (int r = 0; r < rows; r++) {
539                            int idx3 = idx1 + r * rowStride + c;
540                            a[idx3] = temp[r];
541                        }
542                    }
543                }
544                temp = new double[slices];
545                for (int r = 0; r < rows; r++) {
546                    int idx1 = r * rowStride;
547                    for (int c = 0; c < columns; c++) {
548                        for (int s = 0; s < slices; s++) {
549                            int idx3 = s * sliceStride + idx1 + c;
550                            temp[s] = a[idx3];
551                        }
552                        dctSlices.inverse(temp, scale);
553                        for (int s = 0; s < slices; s++) {
554                            int idx3 = s * sliceStride + idx1 + c;
555                            a[idx3] = temp[s];
556                        }
557                    }
558                }
559            }
560        }
561    }
562
563    /**
564     * Computes the 3D inverse DCT (DCT-III) leaving the result in
565     * <code>a</code>. The data is stored in 3D array.
566     * 
567     * @param a
568     *            data to transform
569     * @param scale
570     *            if true then scaling is performed
571     */
572    public void inverse(final double[][][] a, final boolean scale) {
573        int nthreads = ConcurrencyUtils.getNumberOfThreads();
574        if (isPowerOfTwo) {
575            if (nthreads != oldNthreads) {
576                nt = slices;
577                if (nt < rows) {
578                    nt = rows;
579                }
580                nt *= 4;
581                if (nthreads > 1) {
582                    nt *= nthreads;
583                }
584                if (columns == 2) {
585                    nt >>= 1;
586                }
587                t = new double[nt];
588                oldNthreads = nthreads;
589            }
590            if ((nthreads > 1) && useThreads) {
591                ddxt3da_subth(1, a, scale);
592                ddxt3db_subth(1, a, scale);
593            } else {
594                ddxt3da_sub(1, a, scale);
595                ddxt3db_sub(1, a, scale);
596            }
597        } else {
598            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
599                Future<?>[] futures = new Future[nthreads];
600                int p = slices / nthreads;
601                for (int l = 0; l < nthreads; l++) {
602                    final int firstSlice = l * p;
603                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
604                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
605                        public void run() {
606                            for (int s = firstSlice; s < lastSlice; s++) {
607                                for (int r = 0; r < rows; r++) {
608                                    dctColumns.inverse(a[s][r], scale);
609                                }
610                            }
611                        }
612                    });
613                }
614                ConcurrencyUtils.waitForCompletion(futures);
615
616                for (int l = 0; l < nthreads; l++) {
617                    final int firstSlice = l * p;
618                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
619                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
620                        public void run() {
621                            double[] temp = new double[rows];
622                            for (int s = firstSlice; s < lastSlice; s++) {
623                                for (int c = 0; c < columns; c++) {
624                                    for (int r = 0; r < rows; r++) {
625                                        temp[r] = a[s][r][c];
626                                    }
627                                    dctRows.inverse(temp, scale);
628                                    for (int r = 0; r < rows; r++) {
629                                        a[s][r][c] = temp[r];
630                                    }
631                                }
632                            }
633                        }
634                    });
635                }
636                ConcurrencyUtils.waitForCompletion(futures);
637
638                p = rows / nthreads;
639                for (int l = 0; l < nthreads; l++) {
640                    final int firstRow = l * p;
641                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
642                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
643                        public void run() {
644                            double[] temp = new double[slices];
645                            for (int r = firstRow; r < lastRow; r++) {
646                                for (int c = 0; c < columns; c++) {
647                                    for (int s = 0; s < slices; s++) {
648                                        temp[s] = a[s][r][c];
649                                    }
650                                    dctSlices.inverse(temp, scale);
651                                    for (int s = 0; s < slices; s++) {
652                                        a[s][r][c] = temp[s];
653                                    }
654                                }
655                            }
656                        }
657                    });
658                }
659                ConcurrencyUtils.waitForCompletion(futures);
660
661            } else {
662                for (int s = 0; s < slices; s++) {
663                    for (int r = 0; r < rows; r++) {
664                        dctColumns.inverse(a[s][r], scale);
665                    }
666                }
667                double[] temp = new double[rows];
668                for (int s = 0; s < slices; s++) {
669                    for (int c = 0; c < columns; c++) {
670                        for (int r = 0; r < rows; r++) {
671                            temp[r] = a[s][r][c];
672                        }
673                        dctRows.inverse(temp, scale);
674                        for (int r = 0; r < rows; r++) {
675                            a[s][r][c] = temp[r];
676                        }
677                    }
678                }
679                temp = new double[slices];
680                for (int r = 0; r < rows; r++) {
681                    for (int c = 0; c < columns; c++) {
682                        for (int s = 0; s < slices; s++) {
683                            temp[s] = a[s][r][c];
684                        }
685                        dctSlices.inverse(temp, scale);
686                        for (int s = 0; s < slices; s++) {
687                            a[s][r][c] = temp[s];
688                        }
689                    }
690                }
691            }
692        }
693    }
694
695    private void ddxt3da_sub(int isgn, double[] a, boolean scale) {
696        int idx0, idx1, idx2;
697
698        if (isgn == -1) {
699            for (int s = 0; s < slices; s++) {
700                idx0 = s * sliceStride;
701                for (int r = 0; r < rows; r++) {
702                    dctColumns.forward(a, idx0 + r * rowStride, scale);
703                }
704                if (columns > 2) {
705                    for (int c = 0; c < columns; c += 4) {
706                        for (int r = 0; r < rows; r++) {
707                            idx1 = idx0 + r * rowStride + c;
708                            idx2 = rows + r;
709                            t[r] = a[idx1];
710                            t[idx2] = a[idx1 + 1];
711                            t[idx2 + rows] = a[idx1 + 2];
712                            t[idx2 + 2 * rows] = a[idx1 + 3];
713                        }
714                        dctRows.forward(t, 0, scale);
715                        dctRows.forward(t, rows, scale);
716                        dctRows.forward(t, 2 * rows, scale);
717                        dctRows.forward(t, 3 * rows, scale);
718                        for (int r = 0; r < rows; r++) {
719                            idx1 = idx0 + r * rowStride + c;
720                            idx2 = rows + r;
721                            a[idx1] = t[r];
722                            a[idx1 + 1] = t[idx2];
723                            a[idx1 + 2] = t[idx2 + rows];
724                            a[idx1 + 3] = t[idx2 + 2 * rows];
725                        }
726                    }
727                } else if (columns == 2) {
728                    for (int r = 0; r < rows; r++) {
729                        idx1 = idx0 + r * rowStride;
730                        t[r] = a[idx1];
731                        t[rows + r] = a[idx1 + 1];
732                    }
733                    dctRows.forward(t, 0, scale);
734                    dctRows.forward(t, rows, scale);
735                    for (int r = 0; r < rows; r++) {
736                        idx1 = idx0 + r * rowStride;
737                        a[idx1] = t[r];
738                        a[idx1 + 1] = t[rows + r];
739                    }
740                }
741            }
742        } else {
743            for (int s = 0; s < slices; s++) {
744                idx0 = s * sliceStride;
745                for (int r = 0; r < rows; r++) {
746                    dctColumns.inverse(a, idx0 + r * rowStride, scale);
747                }
748                if (columns > 2) {
749                    for (int c = 0; c < columns; c += 4) {
750                        for (int r = 0; r < rows; r++) {
751                            idx1 = idx0 + r * rowStride + c;
752                            idx2 = rows + r;
753                            t[r] = a[idx1];
754                            t[idx2] = a[idx1 + 1];
755                            t[idx2 + rows] = a[idx1 + 2];
756                            t[idx2 + 2 * rows] = a[idx1 + 3];
757                        }
758                        dctRows.inverse(t, 0, scale);
759                        dctRows.inverse(t, rows, scale);
760                        dctRows.inverse(t, 2 * rows, scale);
761                        dctRows.inverse(t, 3 * rows, scale);
762                        for (int r = 0; r < rows; r++) {
763                            idx1 = idx0 + r * rowStride + c;
764                            idx2 = rows + r;
765                            a[idx1] = t[r];
766                            a[idx1 + 1] = t[idx2];
767                            a[idx1 + 2] = t[idx2 + rows];
768                            a[idx1 + 3] = t[idx2 + 2 * rows];
769                        }
770                    }
771                } else if (columns == 2) {
772                    for (int r = 0; r < rows; r++) {
773                        idx1 = idx0 + r * rowStride;
774                        t[r] = a[idx1];
775                        t[rows + r] = a[idx1 + 1];
776                    }
777                    dctRows.inverse(t, 0, scale);
778                    dctRows.inverse(t, rows, scale);
779                    for (int r = 0; r < rows; r++) {
780                        idx1 = idx0 + r * rowStride;
781                        a[idx1] = t[r];
782                        a[idx1 + 1] = t[rows + r];
783                    }
784                }
785            }
786        }
787    }
788
789    private void ddxt3da_sub(int isgn, double[][][] a, boolean scale) {
790        int idx2;
791
792        if (isgn == -1) {
793            for (int s = 0; s < slices; s++) {
794                for (int r = 0; r < rows; r++) {
795                    dctColumns.forward(a[s][r], scale);
796                }
797                if (columns > 2) {
798                    for (int c = 0; c < columns; c += 4) {
799                        for (int r = 0; r < rows; r++) {
800                            idx2 = rows + r;
801                            t[r] = a[s][r][c];
802                            t[idx2] = a[s][r][c + 1];
803                            t[idx2 + rows] = a[s][r][c + 2];
804                            t[idx2 + 2 * rows] = a[s][r][c + 3];
805                        }
806                        dctRows.forward(t, 0, scale);
807                        dctRows.forward(t, rows, scale);
808                        dctRows.forward(t, 2 * rows, scale);
809                        dctRows.forward(t, 3 * rows, scale);
810                        for (int r = 0; r < rows; r++) {
811                            idx2 = rows + r;
812                            a[s][r][c] = t[r];
813                            a[s][r][c + 1] = t[idx2];
814                            a[s][r][c + 2] = t[idx2 + rows];
815                            a[s][r][c + 3] = t[idx2 + 2 * rows];
816                        }
817                    }
818                } else if (columns == 2) {
819                    for (int r = 0; r < rows; r++) {
820                        t[r] = a[s][r][0];
821                        t[rows + r] = a[s][r][1];
822                    }
823                    dctRows.forward(t, 0, scale);
824                    dctRows.forward(t, rows, scale);
825                    for (int r = 0; r < rows; r++) {
826                        a[s][r][0] = t[r];
827                        a[s][r][1] = t[rows + r];
828                    }
829                }
830            }
831        } else {
832            for (int s = 0; s < slices; s++) {
833                for (int r = 0; r < rows; r++) {
834                    dctColumns.inverse(a[s][r], scale);
835                }
836                if (columns > 2) {
837                    for (int c = 0; c < columns; c += 4) {
838                        for (int r = 0; r < rows; r++) {
839                            idx2 = rows + r;
840                            t[r] = a[s][r][c];
841                            t[idx2] = a[s][r][c + 1];
842                            t[idx2 + rows] = a[s][r][c + 2];
843                            t[idx2 + 2 * rows] = a[s][r][c + 3];
844                        }
845                        dctRows.inverse(t, 0, scale);
846                        dctRows.inverse(t, rows, scale);
847                        dctRows.inverse(t, 2 * rows, scale);
848                        dctRows.inverse(t, 3 * rows, scale);
849                        for (int r = 0; r < rows; r++) {
850                            idx2 = rows + r;
851                            a[s][r][c] = t[r];
852                            a[s][r][c + 1] = t[idx2];
853                            a[s][r][c + 2] = t[idx2 + rows];
854                            a[s][r][c + 3] = t[idx2 + 2 * rows];
855                        }
856                    }
857                } else if (columns == 2) {
858                    for (int r = 0; r < rows; r++) {
859                        t[r] = a[s][r][0];
860                        t[rows + r] = a[s][r][1];
861                    }
862                    dctRows.inverse(t, 0, scale);
863                    dctRows.inverse(t, rows, scale);
864                    for (int r = 0; r < rows; r++) {
865                        a[s][r][0] = t[r];
866                        a[s][r][1] = t[rows + r];
867                    }
868                }
869            }
870        }
871    }
872
873    private void ddxt3db_sub(int isgn, double[] a, boolean scale) {
874        int idx0, idx1, idx2;
875
876        if (isgn == -1) {
877            if (columns > 2) {
878                for (int r = 0; r < rows; r++) {
879                    idx0 = r * rowStride;
880                    for (int c = 0; c < columns; c += 4) {
881                        for (int s = 0; s < slices; s++) {
882                            idx1 = s * sliceStride + idx0 + c;
883                            idx2 = slices + s;
884                            t[s] = a[idx1];
885                            t[idx2] = a[idx1 + 1];
886                            t[idx2 + slices] = a[idx1 + 2];
887                            t[idx2 + 2 * slices] = a[idx1 + 3];
888                        }
889                        dctSlices.forward(t, 0, scale);
890                        dctSlices.forward(t, slices, scale);
891                        dctSlices.forward(t, 2 * slices, scale);
892                        dctSlices.forward(t, 3 * slices, scale);
893                        for (int s = 0; s < slices; s++) {
894                            idx1 = s * sliceStride + idx0 + c;
895                            idx2 = slices + s;
896                            a[idx1] = t[s];
897                            a[idx1 + 1] = t[idx2];
898                            a[idx1 + 2] = t[idx2 + slices];
899                            a[idx1 + 3] = t[idx2 + 2 * slices];
900                        }
901                    }
902                }
903            } else if (columns == 2) {
904                for (int r = 0; r < rows; r++) {
905                    idx0 = r * rowStride;
906                    for (int s = 0; s < slices; s++) {
907                        idx1 = s * sliceStride + idx0;
908                        t[s] = a[idx1];
909                        t[slices + s] = a[idx1 + 1];
910                    }
911                    dctSlices.forward(t, 0, scale);
912                    dctSlices.forward(t, slices, scale);
913                    for (int s = 0; s < slices; s++) {
914                        idx1 = s * sliceStride + idx0;
915                        a[idx1] = t[s];
916                        a[idx1 + 1] = t[slices + s];
917                    }
918                }
919            }
920        } else {
921            if (columns > 2) {
922                for (int r = 0; r < rows; r++) {
923                    idx0 = r * rowStride;
924                    for (int c = 0; c < columns; c += 4) {
925                        for (int s = 0; s < slices; s++) {
926                            idx1 = s * sliceStride + idx0 + c;
927                            idx2 = slices + s;
928                            t[s] = a[idx1];
929                            t[idx2] = a[idx1 + 1];
930                            t[idx2 + slices] = a[idx1 + 2];
931                            t[idx2 + 2 * slices] = a[idx1 + 3];
932                        }
933                        dctSlices.inverse(t, 0, scale);
934                        dctSlices.inverse(t, slices, scale);
935                        dctSlices.inverse(t, 2 * slices, scale);
936                        dctSlices.inverse(t, 3 * slices, scale);
937
938                        for (int s = 0; s < slices; s++) {
939                            idx1 = s * sliceStride + idx0 + c;
940                            idx2 = slices + s;
941                            a[idx1] = t[s];
942                            a[idx1 + 1] = t[idx2];
943                            a[idx1 + 2] = t[idx2 + slices];
944                            a[idx1 + 3] = t[idx2 + 2 * slices];
945                        }
946                    }
947                }
948            } else if (columns == 2) {
949                for (int r = 0; r < rows; r++) {
950                    idx0 = r * rowStride;
951                    for (int s = 0; s < slices; s++) {
952                        idx1 = s * sliceStride + idx0;
953                        t[s] = a[idx1];
954                        t[slices + s] = a[idx1 + 1];
955                    }
956                    dctSlices.inverse(t, 0, scale);
957                    dctSlices.inverse(t, slices, scale);
958                    for (int s = 0; s < slices; s++) {
959                        idx1 = s * sliceStride + idx0;
960                        a[idx1] = t[s];
961                        a[idx1 + 1] = t[slices + s];
962                    }
963                }
964            }
965        }
966    }
967
968    private void ddxt3db_sub(int isgn, double[][][] a, boolean scale) {
969        int idx2;
970
971        if (isgn == -1) {
972            if (columns > 2) {
973                for (int r = 0; r < rows; r++) {
974                    for (int c = 0; c < columns; c += 4) {
975                        for (int s = 0; s < slices; s++) {
976                            idx2 = slices + s;
977                            t[s] = a[s][r][c];
978                            t[idx2] = a[s][r][c + 1];
979                            t[idx2 + slices] = a[s][r][c + 2];
980                            t[idx2 + 2 * slices] = a[s][r][c + 3];
981                        }
982                        dctSlices.forward(t, 0, scale);
983                        dctSlices.forward(t, slices, scale);
984                        dctSlices.forward(t, 2 * slices, scale);
985                        dctSlices.forward(t, 3 * slices, scale);
986                        for (int s = 0; s < slices; s++) {
987                            idx2 = slices + s;
988                            a[s][r][c] = t[s];
989                            a[s][r][c + 1] = t[idx2];
990                            a[s][r][c + 2] = t[idx2 + slices];
991                            a[s][r][c + 3] = t[idx2 + 2 * slices];
992                        }
993                    }
994                }
995            } else if (columns == 2) {
996                for (int r = 0; r < rows; r++) {
997                    for (int s = 0; s < slices; s++) {
998                        t[s] = a[s][r][0];
999                        t[slices + s] = a[s][r][1];
1000                    }
1001                    dctSlices.forward(t, 0, scale);
1002                    dctSlices.forward(t, slices, scale);
1003                    for (int s = 0; s < slices; s++) {
1004                        a[s][r][0] = t[s];
1005                        a[s][r][1] = t[slices + s];
1006                    }
1007                }
1008            }
1009        } else {
1010            if (columns > 2) {
1011                for (int r = 0; r < rows; r++) {
1012                    for (int c = 0; c < columns; c += 4) {
1013                        for (int s = 0; s < slices; s++) {
1014                            idx2 = slices + s;
1015                            t[s] = a[s][r][c];
1016                            t[idx2] = a[s][r][c + 1];
1017                            t[idx2 + slices] = a[s][r][c + 2];
1018                            t[idx2 + 2 * slices] = a[s][r][c + 3];
1019                        }
1020                        dctSlices.inverse(t, 0, scale);
1021                        dctSlices.inverse(t, slices, scale);
1022                        dctSlices.inverse(t, 2 * slices, scale);
1023                        dctSlices.inverse(t, 3 * slices, scale);
1024
1025                        for (int s = 0; s < slices; s++) {
1026                            idx2 = slices + s;
1027                            a[s][r][c] = t[s];
1028                            a[s][r][c + 1] = t[idx2];
1029                            a[s][r][c + 2] = t[idx2 + slices];
1030                            a[s][r][c + 3] = t[idx2 + 2 * slices];
1031                        }
1032                    }
1033                }
1034            } else if (columns == 2) {
1035                for (int r = 0; r < rows; r++) {
1036                    for (int s = 0; s < slices; s++) {
1037                        t[s] = a[s][r][0];
1038                        t[slices + s] = a[s][r][1];
1039                    }
1040                    dctSlices.inverse(t, 0, scale);
1041                    dctSlices.inverse(t, slices, scale);
1042                    for (int s = 0; s < slices; s++) {
1043                        a[s][r][0] = t[s];
1044                        a[s][r][1] = t[slices + s];
1045                    }
1046                }
1047            }
1048        }
1049    }
1050
1051    private void ddxt3da_subth(final int isgn, final double[] a, final boolean scale) {
1052        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1053        int nt = 4 * rows;
1054        if (columns == 2) {
1055            nt >>= 1;
1056        }
1057        Future<?>[] futures = new Future[nthreads];
1058
1059        for (int i = 0; i < nthreads; i++) {
1060            final int n0 = i;
1061            final int startt = nt * i;
1062            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1063
1064                public void run() {
1065                    int idx0, idx1, idx2;
1066                    if (isgn == -1) {
1067                        for (int s = n0; s < slices; s += nthreads) {
1068                            idx0 = s * sliceStride;
1069                            for (int r = 0; r < rows; r++) {
1070                                dctColumns.forward(a, idx0 + r * rowStride, scale);
1071                            }
1072                            if (columns > 2) {
1073                                for (int c = 0; c < columns; c += 4) {
1074                                    for (int r = 0; r < rows; r++) {
1075                                        idx1 = idx0 + r * rowStride + c;
1076                                        idx2 = startt + rows + r;
1077                                        t[startt + r] = a[idx1];
1078                                        t[idx2] = a[idx1 + 1];
1079                                        t[idx2 + rows] = a[idx1 + 2];
1080                                        t[idx2 + 2 * rows] = a[idx1 + 3];
1081                                    }
1082                                    dctRows.forward(t, startt, scale);
1083                                    dctRows.forward(t, startt + rows, scale);
1084                                    dctRows.forward(t, startt + 2 * rows, scale);
1085                                    dctRows.forward(t, startt + 3 * rows, scale);
1086                                    for (int r = 0; r < rows; r++) {
1087                                        idx1 = idx0 + r * rowStride + c;
1088                                        idx2 = startt + rows + r;
1089                                        a[idx1] = t[startt + r];
1090                                        a[idx1 + 1] = t[idx2];
1091                                        a[idx1 + 2] = t[idx2 + rows];
1092                                        a[idx1 + 3] = t[idx2 + 2 * rows];
1093                                    }
1094                                }
1095                            } else if (columns == 2) {
1096                                for (int r = 0; r < rows; r++) {
1097                                    idx1 = idx0 + r * rowStride;
1098                                    t[startt + r] = a[idx1];
1099                                    t[startt + rows + r] = a[idx1 + 1];
1100                                }
1101                                dctRows.forward(t, startt, scale);
1102                                dctRows.forward(t, startt + rows, scale);
1103                                for (int r = 0; r < rows; r++) {
1104                                    idx1 = idx0 + r * rowStride;
1105                                    a[idx1] = t[startt + r];
1106                                    a[idx1 + 1] = t[startt + rows + r];
1107                                }
1108                            }
1109                        }
1110                    } else {
1111                        for (int s = n0; s < slices; s += nthreads) {
1112                            idx0 = s * sliceStride;
1113                            for (int r = 0; r < rows; r++) {
1114                                dctColumns.inverse(a, idx0 + r * rowStride, scale);
1115                            }
1116                            if (columns > 2) {
1117                                for (int c = 0; c < columns; c += 4) {
1118                                    for (int j = 0; j < rows; j++) {
1119                                        idx1 = idx0 + j * rowStride + c;
1120                                        idx2 = startt + rows + j;
1121                                        t[startt + j] = a[idx1];
1122                                        t[idx2] = a[idx1 + 1];
1123                                        t[idx2 + rows] = a[idx1 + 2];
1124                                        t[idx2 + 2 * rows] = a[idx1 + 3];
1125                                    }
1126                                    dctRows.inverse(t, startt, scale);
1127                                    dctRows.inverse(t, startt + rows, scale);
1128                                    dctRows.inverse(t, startt + 2 * rows, scale);
1129                                    dctRows.inverse(t, startt + 3 * rows, scale);
1130                                    for (int j = 0; j < rows; j++) {
1131                                        idx1 = idx0 + j * rowStride + c;
1132                                        idx2 = startt + rows + j;
1133                                        a[idx1] = t[startt + j];
1134                                        a[idx1 + 1] = t[idx2];
1135                                        a[idx1 + 2] = t[idx2 + rows];
1136                                        a[idx1 + 3] = t[idx2 + 2 * rows];
1137                                    }
1138                                }
1139                            } else if (columns == 2) {
1140                                for (int r = 0; r < rows; r++) {
1141                                    idx1 = idx0 + r * rowStride;
1142                                    t[startt + r] = a[idx1];
1143                                    t[startt + rows + r] = a[idx1 + 1];
1144                                }
1145                                dctRows.inverse(t, startt, scale);
1146                                dctRows.inverse(t, startt + rows, scale);
1147                                for (int r = 0; r < rows; r++) {
1148                                    idx1 = idx0 + r * rowStride;
1149                                    a[idx1] = t[startt + r];
1150                                    a[idx1 + 1] = t[startt + rows + r];
1151                                }
1152                            }
1153                        }
1154                    }
1155                }
1156            });
1157        }
1158        ConcurrencyUtils.waitForCompletion(futures);
1159    }
1160
1161    private void ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale) {
1162        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1163        int nt = 4 * rows;
1164        if (columns == 2) {
1165            nt >>= 1;
1166        }
1167        Future<?>[] futures = new Future[nthreads];
1168
1169        for (int i = 0; i < nthreads; i++) {
1170            final int n0 = i;
1171            final int startt = nt * i;
1172            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1173
1174                public void run() {
1175                    int idx2;
1176                    if (isgn == -1) {
1177                        for (int s = n0; s < slices; s += nthreads) {
1178                            for (int r = 0; r < rows; r++) {
1179                                dctColumns.forward(a[s][r], scale);
1180                            }
1181                            if (columns > 2) {
1182                                for (int c = 0; c < columns; c += 4) {
1183                                    for (int r = 0; r < rows; r++) {
1184                                        idx2 = startt + rows + r;
1185                                        t[startt + r] = a[s][r][c];
1186                                        t[idx2] = a[s][r][c + 1];
1187                                        t[idx2 + rows] = a[s][r][c + 2];
1188                                        t[idx2 + 2 * rows] = a[s][r][c + 3];
1189                                    }
1190                                    dctRows.forward(t, startt, scale);
1191                                    dctRows.forward(t, startt + rows, scale);
1192                                    dctRows.forward(t, startt + 2 * rows, scale);
1193                                    dctRows.forward(t, startt + 3 * rows, scale);
1194                                    for (int r = 0; r < rows; r++) {
1195                                        idx2 = startt + rows + r;
1196                                        a[s][r][c] = t[startt + r];
1197                                        a[s][r][c + 1] = t[idx2];
1198                                        a[s][r][c + 2] = t[idx2 + rows];
1199                                        a[s][r][c + 3] = t[idx2 + 2 * rows];
1200                                    }
1201                                }
1202                            } else if (columns == 2) {
1203                                for (int r = 0; r < rows; r++) {
1204                                    t[startt + r] = a[s][r][0];
1205                                    t[startt + rows + r] = a[s][r][1];
1206                                }
1207                                dctRows.forward(t, startt, scale);
1208                                dctRows.forward(t, startt + rows, scale);
1209                                for (int r = 0; r < rows; r++) {
1210                                    a[s][r][0] = t[startt + r];
1211                                    a[s][r][1] = t[startt + rows + r];
1212                                }
1213                            }
1214                        }
1215                    } else {
1216                        for (int s = n0; s < slices; s += nthreads) {
1217                            for (int r = 0; r < rows; r++) {
1218                                dctColumns.inverse(a[s][r], scale);
1219                            }
1220                            if (columns > 2) {
1221                                for (int c = 0; c < columns; c += 4) {
1222                                    for (int r = 0; r < rows; r++) {
1223                                        idx2 = startt + rows + r;
1224                                        t[startt + r] = a[s][r][c];
1225                                        t[idx2] = a[s][r][c + 1];
1226                                        t[idx2 + rows] = a[s][r][c + 2];
1227                                        t[idx2 + 2 * rows] = a[s][r][c + 3];
1228                                    }
1229                                    dctRows.inverse(t, startt, scale);
1230                                    dctRows.inverse(t, startt + rows, scale);
1231                                    dctRows.inverse(t, startt + 2 * rows, scale);
1232                                    dctRows.inverse(t, startt + 3 * rows, scale);
1233                                    for (int r = 0; r < rows; r++) {
1234                                        idx2 = startt + rows + r;
1235                                        a[s][r][c] = t[startt + r];
1236                                        a[s][r][c + 1] = t[idx2];
1237                                        a[s][r][c + 2] = t[idx2 + rows];
1238                                        a[s][r][c + 3] = t[idx2 + 2 * rows];
1239                                    }
1240                                }
1241                            } else if (columns == 2) {
1242                                for (int r = 0; r < rows; r++) {
1243                                    t[startt + r] = a[s][r][0];
1244                                    t[startt + rows + r] = a[s][r][1];
1245                                }
1246                                dctRows.inverse(t, startt, scale);
1247                                dctRows.inverse(t, startt + rows, scale);
1248                                for (int r = 0; r < rows; r++) {
1249                                    a[s][r][0] = t[startt + r];
1250                                    a[s][r][1] = t[startt + rows + r];
1251                                }
1252                            }
1253                        }
1254                    }
1255                }
1256            });
1257        }
1258        ConcurrencyUtils.waitForCompletion(futures);
1259    }
1260
1261    private void ddxt3db_subth(final int isgn, final double[] a, final boolean scale) {
1262        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1263        int nt = 4 * slices;
1264        if (columns == 2) {
1265            nt >>= 1;
1266        }
1267        Future<?>[] futures = new Future[nthreads];
1268        for (int i = 0; i < nthreads; i++) {
1269            final int n0 = i;
1270            final int startt = nt * i;
1271            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1272
1273                public void run() {
1274                    int idx0, idx1, idx2;
1275                    if (isgn == -1) {
1276                        if (columns > 2) {
1277                            for (int r = n0; r < rows; r += nthreads) {
1278                                idx0 = r * rowStride;
1279                                for (int c = 0; c < columns; c += 4) {
1280                                    for (int s = 0; s < slices; s++) {
1281                                        idx1 = s * sliceStride + idx0 + c;
1282                                        idx2 = startt + slices + s;
1283                                        t[startt + s] = a[idx1];
1284                                        t[idx2] = a[idx1 + 1];
1285                                        t[idx2 + slices] = a[idx1 + 2];
1286                                        t[idx2 + 2 * slices] = a[idx1 + 3];
1287                                    }
1288                                    dctSlices.forward(t, startt, scale);
1289                                    dctSlices.forward(t, startt + slices, scale);
1290                                    dctSlices.forward(t, startt + 2 * slices, scale);
1291                                    dctSlices.forward(t, startt + 3 * slices, scale);
1292                                    for (int s = 0; s < slices; s++) {
1293                                        idx1 = s * sliceStride + idx0 + c;
1294                                        idx2 = startt + slices + s;
1295                                        a[idx1] = t[startt + s];
1296                                        a[idx1 + 1] = t[idx2];
1297                                        a[idx1 + 2] = t[idx2 + slices];
1298                                        a[idx1 + 3] = t[idx2 + 2 * slices];
1299                                    }
1300                                }
1301                            }
1302                        } else if (columns == 2) {
1303                            for (int r = n0; r < rows; r += nthreads) {
1304                                idx0 = r * rowStride;
1305                                for (int s = 0; s < slices; s++) {
1306                                    idx1 = s * sliceStride + idx0;
1307                                    t[startt + s] = a[idx1];
1308                                    t[startt + slices + s] = a[idx1 + 1];
1309                                }
1310                                dctSlices.forward(t, startt, scale);
1311                                dctSlices.forward(t, startt + slices, scale);
1312                                for (int s = 0; s < slices; s++) {
1313                                    idx1 = s * sliceStride + idx0;
1314                                    a[idx1] = t[startt + s];
1315                                    a[idx1 + 1] = t[startt + slices + s];
1316                                }
1317                            }
1318                        }
1319                    } else {
1320                        if (columns > 2) {
1321                            for (int r = n0; r < rows; r += nthreads) {
1322                                idx0 = r * rowStride;
1323                                for (int c = 0; c < columns; c += 4) {
1324                                    for (int s = 0; s < slices; s++) {
1325                                        idx1 = s * sliceStride + idx0 + c;
1326                                        idx2 = startt + slices + s;
1327                                        t[startt + s] = a[idx1];
1328                                        t[idx2] = a[idx1 + 1];
1329                                        t[idx2 + slices] = a[idx1 + 2];
1330                                        t[idx2 + 2 * slices] = a[idx1 + 3];
1331                                    }
1332                                    dctSlices.inverse(t, startt, scale);
1333                                    dctSlices.inverse(t, startt + slices, scale);
1334                                    dctSlices.inverse(t, startt + 2 * slices, scale);
1335                                    dctSlices.inverse(t, startt + 3 * slices, scale);
1336                                    for (int s = 0; s < slices; s++) {
1337                                        idx1 = s * sliceStride + idx0 + c;
1338                                        idx2 = startt + slices + s;
1339                                        a[idx1] = t[startt + s];
1340                                        a[idx1 + 1] = t[idx2];
1341                                        a[idx1 + 2] = t[idx2 + slices];
1342                                        a[idx1 + 3] = t[idx2 + 2 * slices];
1343                                    }
1344                                }
1345                            }
1346                        } else if (columns == 2) {
1347                            for (int r = n0; r < rows; r += nthreads) {
1348                                idx0 = r * rowStride;
1349                                for (int s = 0; s < slices; s++) {
1350                                    idx1 = s * sliceStride + idx0;
1351                                    t[startt + s] = a[idx1];
1352                                    t[startt + slices + s] = a[idx1 + 1];
1353                                }
1354                                dctSlices.inverse(t, startt, scale);
1355                                dctSlices.inverse(t, startt + slices, scale);
1356
1357                                for (int s = 0; s < slices; s++) {
1358                                    idx1 = s * sliceStride + idx0;
1359                                    a[idx1] = t[startt + s];
1360                                    a[idx1 + 1] = t[startt + slices + s];
1361                                }
1362                            }
1363                        }
1364                    }
1365                }
1366            });
1367        }
1368        ConcurrencyUtils.waitForCompletion(futures);
1369    }
1370
1371    private void ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale) {
1372        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1373        int nt = 4 * slices;
1374        if (columns == 2) {
1375            nt >>= 1;
1376        }
1377        Future<?>[] futures = new Future[nthreads];
1378
1379        for (int i = 0; i < nthreads; i++) {
1380            final int n0 = i;
1381            final int startt = nt * i;
1382            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1383
1384                public void run() {
1385                    int idx2;
1386                    if (isgn == -1) {
1387                        if (columns > 2) {
1388                            for (int r = n0; r < rows; r += nthreads) {
1389                                for (int c = 0; c < columns; c += 4) {
1390                                    for (int s = 0; s < slices; s++) {
1391                                        idx2 = startt + slices + s;
1392                                        t[startt + s] = a[s][r][c];
1393                                        t[idx2] = a[s][r][c + 1];
1394                                        t[idx2 + slices] = a[s][r][c + 2];
1395                                        t[idx2 + 2 * slices] = a[s][r][c + 3];
1396                                    }
1397                                    dctSlices.forward(t, startt, scale);
1398                                    dctSlices.forward(t, startt + slices, scale);
1399                                    dctSlices.forward(t, startt + 2 * slices, scale);
1400                                    dctSlices.forward(t, startt + 3 * slices, scale);
1401                                    for (int s = 0; s < slices; s++) {
1402                                        idx2 = startt + slices + s;
1403                                        a[s][r][c] = t[startt + s];
1404                                        a[s][r][c + 1] = t[idx2];
1405                                        a[s][r][c + 2] = t[idx2 + slices];
1406                                        a[s][r][c + 3] = t[idx2 + 2 * slices];
1407                                    }
1408                                }
1409                            }
1410                        } else if (columns == 2) {
1411                            for (int r = n0; r < rows; r += nthreads) {
1412                                for (int s = 0; s < slices; s++) {
1413                                    t[startt + s] = a[s][r][0];
1414                                    t[startt + slices + s] = a[s][r][1];
1415                                }
1416                                dctSlices.forward(t, startt, scale);
1417                                dctSlices.forward(t, startt + slices, scale);
1418                                for (int s = 0; s < slices; s++) {
1419                                    a[s][r][0] = t[startt + s];
1420                                    a[s][r][1] = t[startt + slices + s];
1421                                }
1422                            }
1423                        }
1424                    } else {
1425                        if (columns > 2) {
1426                            for (int r = n0; r < rows; r += nthreads) {
1427                                for (int c = 0; c < columns; c += 4) {
1428                                    for (int s = 0; s < slices; s++) {
1429                                        idx2 = startt + slices + s;
1430                                        t[startt + s] = a[s][r][c];
1431                                        t[idx2] = a[s][r][c + 1];
1432                                        t[idx2 + slices] = a[s][r][c + 2];
1433                                        t[idx2 + 2 * slices] = a[s][r][c + 3];
1434                                    }
1435                                    dctSlices.inverse(t, startt, scale);
1436                                    dctSlices.inverse(t, startt + slices, scale);
1437                                    dctSlices.inverse(t, startt + 2 * slices, scale);
1438                                    dctSlices.inverse(t, startt + 3 * slices, scale);
1439                                    for (int s = 0; s < slices; s++) {
1440                                        idx2 = startt + slices + s;
1441                                        a[s][r][c] = t[startt + s];
1442                                        a[s][r][c + 1] = t[idx2];
1443                                        a[s][r][c + 2] = t[idx2 + slices];
1444                                        a[s][r][c + 3] = t[idx2 + 2 * slices];
1445                                    }
1446                                }
1447                            }
1448                        } else if (columns == 2) {
1449                            for (int r = n0; r < rows; r += nthreads) {
1450                                for (int s = 0; s < slices; s++) {
1451                                    t[startt + s] = a[s][r][0];
1452                                    t[startt + slices + s] = a[s][r][1];
1453                                }
1454                                dctSlices.inverse(t, startt, scale);
1455                                dctSlices.inverse(t, startt + slices, scale);
1456
1457                                for (int s = 0; s < slices; s++) {
1458                                    a[s][r][0] = t[startt + s];
1459                                    a[s][r][1] = t[startt + slices + s];
1460                                }
1461                            }
1462                        }
1463                    }
1464                }
1465            });
1466        }
1467        ConcurrencyUtils.waitForCompletion(futures);
1468    }
1469}