001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.dht;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 3D Discrete Hartley Transform (DHT) of real, double precision data.
043 * The sizes of all three dimensions can be arbitrary numbers. This is a
044 * parallel implementation optimized for SMP systems.<br>
045 * <br>
046 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura
047 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
048 * 
049 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
050 * 
051 */
052public class DoubleDHT_3D {
053
054    private int slices;
055
056    private int rows;
057
058    private int columns;
059
060    private int sliceStride;
061
062    private int rowStride;
063
064    private double[] t;
065
066    private DoubleDHT_1D dhtSlices, dhtRows, dhtColumns;
067
068    private int oldNthreads;
069
070    private int nt;
071
072    private boolean isPowerOfTwo = false;
073
074    private boolean useThreads = false;
075
076    /**
077     * Creates new instance of DoubleDHT_3D.
078     * 
079     * @param slices
080     *            number of slices
081     * @param rows
082     *            number of rows
083     * @param columns
084     *            number of columns
085     */
086    public DoubleDHT_3D(int slices, int rows, int columns) {
087        if (slices <= 1 || rows <= 1 || columns <= 1) {
088            throw new IllegalArgumentException("slices, rows and columns must be greater than 1");
089        }
090        this.slices = slices;
091        this.rows = rows;
092        this.columns = columns;
093        this.sliceStride = rows * columns;
094        this.rowStride = columns;
095        if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) {
096            this.useThreads = true;
097        }
098        if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
099            isPowerOfTwo = true;
100            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
101            nt = slices;
102            if (nt < rows) {
103                nt = rows;
104            }
105            nt *= 4;
106            if (oldNthreads > 1) {
107                nt *= oldNthreads;
108            }
109            if (columns == 2) {
110                nt >>= 1;
111            }
112            t = new double[nt];
113        }
114        dhtSlices = new DoubleDHT_1D(slices);
115        if (slices == rows) {
116            dhtRows = dhtSlices;
117        } else {
118            dhtRows = new DoubleDHT_1D(rows);
119        }
120        if (slices == columns) {
121            dhtColumns = dhtSlices;
122        } else if (rows == columns) {
123            dhtColumns = dhtRows;
124        } else {
125            dhtColumns = new DoubleDHT_1D(columns);
126        }
127    }
128
129    /**
130     * Computes the 3D real, forward DHT leaving the result in <code>a</code>.
131     * The data is stored in 1D array addressed in slice-major, then row-major,
132     * then column-major, in order of significance, i.e. the element (i,j,k) of
133     * 3D array x[slices][rows][columns] is stored in a[i*sliceStride +
134     * j*rowStride + k], where sliceStride = rows * columns and rowStride =
135     * columns.
136     * 
137     * @param a
138     *            data to transform
139     */
140    public void forward(final double[] a) {
141        int nthreads = ConcurrencyUtils.getNumberOfThreads();
142        if (isPowerOfTwo) {
143            if (nthreads != oldNthreads) {
144                nt = slices;
145                if (nt < rows) {
146                    nt = rows;
147                }
148                nt *= 4;
149                if (nthreads > 1) {
150                    nt *= nthreads;
151                }
152                if (columns == 2) {
153                    nt >>= 1;
154                }
155                t = new double[nt];
156                oldNthreads = nthreads;
157            }
158            if ((nthreads > 1) && useThreads) {
159                ddxt3da_subth(-1, a, true);
160                ddxt3db_subth(-1, a, true);
161            } else {
162                ddxt3da_sub(-1, a, true);
163                ddxt3db_sub(-1, a, true);
164            }
165            yTransform(a);
166        } else {
167            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
168                Future<?>[] futures = new Future[nthreads];
169                int p = slices / nthreads;
170                for (int l = 0; l < nthreads; l++) {
171                    final int firstSlice = l * p;
172                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
173                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
174                        public void run() {
175                            for (int s = firstSlice; s < lastSlice; s++) {
176                                int idx1 = s * sliceStride;
177                                for (int r = 0; r < rows; r++) {
178                                    dhtColumns.forward(a, idx1 + r * rowStride);
179                                }
180                            }
181                        }
182                    });
183                }
184                ConcurrencyUtils.waitForCompletion(futures);
185
186                for (int l = 0; l < nthreads; l++) {
187                    final int firstSlice = l * p;
188                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
189                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
190                        public void run() {
191                            double[] temp = new double[rows];
192                            for (int s = firstSlice; s < lastSlice; s++) {
193                                int idx1 = s * sliceStride;
194                                for (int c = 0; c < columns; c++) {
195                                    for (int r = 0; r < rows; r++) {
196                                        int idx3 = idx1 + r * rowStride + c;
197                                        temp[r] = a[idx3];
198                                    }
199                                    dhtRows.forward(temp);
200                                    for (int r = 0; r < rows; r++) {
201                                        int idx3 = idx1 + r * rowStride + c;
202                                        a[idx3] = temp[r];
203                                    }
204                                }
205                            }
206                        }
207                    });
208                }
209                ConcurrencyUtils.waitForCompletion(futures);
210
211                p = rows / nthreads;
212                for (int l = 0; l < nthreads; l++) {
213                    final int startRow = l * p;
214                    final int stopRow;
215                    if (l == nthreads - 1) {
216                        stopRow = rows;
217                    } else {
218                        stopRow = startRow + p;
219                    }
220                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
221                        public void run() {
222                            double[] temp = new double[slices];
223                            for (int r = startRow; r < stopRow; r++) {
224                                int idx1 = r * rowStride;
225                                for (int c = 0; c < columns; c++) {
226                                    for (int s = 0; s < slices; s++) {
227                                        int idx3 = s * sliceStride + idx1 + c;
228                                        temp[s] = a[idx3];
229                                    }
230                                    dhtSlices.forward(temp);
231                                    for (int s = 0; s < slices; s++) {
232                                        int idx3 = s * sliceStride + idx1 + c;
233                                        a[idx3] = temp[s];
234                                    }
235                                }
236                            }
237                        }
238                    });
239                }
240                ConcurrencyUtils.waitForCompletion(futures);
241
242            } else {
243                for (int s = 0; s < slices; s++) {
244                    int idx1 = s * sliceStride;
245                    for (int r = 0; r < rows; r++) {
246                        dhtColumns.forward(a, idx1 + r * rowStride);
247                    }
248                }
249                double[] temp = new double[rows];
250                for (int s = 0; s < slices; s++) {
251                    int idx1 = s * sliceStride;
252                    for (int c = 0; c < columns; c++) {
253                        for (int r = 0; r < rows; r++) {
254                            int idx3 = idx1 + r * rowStride + c;
255                            temp[r] = a[idx3];
256                        }
257                        dhtRows.forward(temp);
258                        for (int r = 0; r < rows; r++) {
259                            int idx3 = idx1 + r * rowStride + c;
260                            a[idx3] = temp[r];
261                        }
262                    }
263                }
264                temp = new double[slices];
265                for (int r = 0; r < rows; r++) {
266                    int idx1 = r * rowStride;
267                    for (int c = 0; c < columns; c++) {
268                        for (int s = 0; s < slices; s++) {
269                            int idx3 = s * sliceStride + idx1 + c;
270                            temp[s] = a[idx3];
271                        }
272                        dhtSlices.forward(temp);
273                        for (int s = 0; s < slices; s++) {
274                            int idx3 = s * sliceStride + idx1 + c;
275                            a[idx3] = temp[s];
276                        }
277                    }
278                }
279            }
280            yTransform(a);
281        }
282    }
283
284    /**
285     * Computes the 3D real, forward DHT leaving the result in <code>a</code>.
286     * The data is stored in 3D array.
287     * 
288     * @param a
289     *            data to transform
290     */
291    public void forward(final double[][][] a) {
292        int nthreads = ConcurrencyUtils.getNumberOfThreads();
293        if (isPowerOfTwo) {
294            if (nthreads != oldNthreads) {
295                nt = slices;
296                if (nt < rows) {
297                    nt = rows;
298                }
299                nt *= 4;
300                if (nthreads > 1) {
301                    nt *= nthreads;
302                }
303                if (columns == 2) {
304                    nt >>= 1;
305                }
306                t = new double[nt];
307                oldNthreads = nthreads;
308            }
309            if ((nthreads > 1) && useThreads) {
310                ddxt3da_subth(-1, a, true);
311                ddxt3db_subth(-1, a, true);
312            } else {
313                ddxt3da_sub(-1, a, true);
314                ddxt3db_sub(-1, a, true);
315            }
316            yTransform(a);
317        } else {
318            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
319                Future<?>[] futures = new Future[nthreads];
320                int p = slices / nthreads;
321                for (int l = 0; l < nthreads; l++) {
322                    final int firstSlice = l * p;
323                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
324                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
325                        public void run() {
326                            for (int s = firstSlice; s < lastSlice; s++) {
327                                for (int r = 0; r < rows; r++) {
328                                    dhtColumns.forward(a[s][r]);
329                                }
330                            }
331                        }
332                    });
333                }
334                ConcurrencyUtils.waitForCompletion(futures);
335
336                for (int l = 0; l < nthreads; l++) {
337                    final int firstSlice = l * p;
338                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
339                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
340                        public void run() {
341                            double[] temp = new double[rows];
342                            for (int s = firstSlice; s < lastSlice; s++) {
343                                for (int c = 0; c < columns; c++) {
344                                    for (int r = 0; r < rows; r++) {
345                                        temp[r] = a[s][r][c];
346                                    }
347                                    dhtRows.forward(temp);
348                                    for (int r = 0; r < rows; r++) {
349                                        a[s][r][c] = temp[r];
350                                    }
351                                }
352                            }
353                        }
354                    });
355                }
356                ConcurrencyUtils.waitForCompletion(futures);
357
358                p = rows / nthreads;
359                for (int l = 0; l < nthreads; l++) {
360                    final int firstRow = l * p;
361                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
362                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
363                        public void run() {
364                            double[] temp = new double[slices];
365                            for (int r = firstRow; r < lastRow; r++) {
366                                for (int c = 0; c < columns; c++) {
367                                    for (int s = 0; s < slices; s++) {
368                                        temp[s] = a[s][r][c];
369                                    }
370                                    dhtSlices.forward(temp);
371                                    for (int s = 0; s < slices; s++) {
372                                        a[s][r][c] = temp[s];
373                                    }
374                                }
375                            }
376                        }
377                    });
378                }
379                ConcurrencyUtils.waitForCompletion(futures);
380
381            } else {
382                for (int s = 0; s < slices; s++) {
383                    for (int r = 0; r < rows; r++) {
384                        dhtColumns.forward(a[s][r]);
385                    }
386                }
387                double[] temp = new double[rows];
388                for (int s = 0; s < slices; s++) {
389                    for (int c = 0; c < columns; c++) {
390                        for (int r = 0; r < rows; r++) {
391                            temp[r] = a[s][r][c];
392                        }
393                        dhtRows.forward(temp);
394                        for (int r = 0; r < rows; r++) {
395                            a[s][r][c] = temp[r];
396                        }
397                    }
398                }
399                temp = new double[slices];
400                for (int r = 0; r < rows; r++) {
401                    for (int c = 0; c < columns; c++) {
402                        for (int s = 0; s < slices; s++) {
403                            temp[s] = a[s][r][c];
404                        }
405                        dhtSlices.forward(temp);
406                        for (int s = 0; s < slices; s++) {
407                            a[s][r][c] = temp[s];
408                        }
409                    }
410                }
411            }
412            yTransform(a);
413        }
414    }
415
416    /**
417     * Computes the 3D real, inverse DHT leaving the result in <code>a</code>.
418     * The data is stored in 1D array addressed in slice-major, then row-major,
419     * then column-major, in order of significance, i.e. the element (i,j,k) of
420     * 3D array x[slices][rows][columns] is stored in a[i*sliceStride +
421     * j*rowStride + k], where sliceStride = rows * columns and rowStride =
422     * columns.
423     * 
424     * @param a
425     *            data to transform
426     * @param scale
427     *            if true then scaling is performed
428     */
429    public void inverse(final double[] a, final boolean scale) {
430        int nthreads = ConcurrencyUtils.getNumberOfThreads();
431        if (isPowerOfTwo) {
432            if (nthreads != oldNthreads) {
433                nt = slices;
434                if (nt < rows) {
435                    nt = rows;
436                }
437                nt *= 4;
438                if (nthreads > 1) {
439                    nt *= nthreads;
440                }
441                if (columns == 2) {
442                    nt >>= 1;
443                }
444                t = new double[nt];
445                oldNthreads = nthreads;
446            }
447            if ((nthreads > 1) && useThreads) {
448                ddxt3da_subth(1, a, scale);
449                ddxt3db_subth(1, a, scale);
450            } else {
451                ddxt3da_sub(1, a, scale);
452                ddxt3db_sub(1, a, scale);
453            }
454            yTransform(a);
455        } else {
456            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
457                Future<?>[] futures = new Future[nthreads];
458                int p = slices / nthreads;
459                for (int l = 0; l < nthreads; l++) {
460                    final int firstSlice = l * p;
461                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
462                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
463                        public void run() {
464                            for (int s = firstSlice; s < lastSlice; s++) {
465                                int idx1 = s * sliceStride;
466                                for (int r = 0; r < rows; r++) {
467                                    dhtColumns.inverse(a, idx1 + r * rowStride, scale);
468                                }
469                            }
470                        }
471                    });
472                }
473                ConcurrencyUtils.waitForCompletion(futures);
474
475                for (int l = 0; l < nthreads; l++) {
476                    final int firstSlice = l * p;
477                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
478                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
479                        public void run() {
480                            double[] temp = new double[rows];
481                            for (int s = firstSlice; s < lastSlice; s++) {
482                                int idx1 = s * sliceStride;
483                                for (int c = 0; c < columns; c++) {
484                                    for (int r = 0; r < rows; r++) {
485                                        int idx3 = idx1 + r * rowStride + c;
486                                        temp[r] = a[idx3];
487                                    }
488                                    dhtRows.inverse(temp, scale);
489                                    for (int r = 0; r < rows; r++) {
490                                        int idx3 = idx1 + r * rowStride + c;
491                                        a[idx3] = temp[r];
492                                    }
493                                }
494                            }
495                        }
496                    });
497                }
498                ConcurrencyUtils.waitForCompletion(futures);
499
500                p = rows / nthreads;
501                for (int l = 0; l < nthreads; l++) {
502                    final int firstRow = l * p;
503                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
504                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
505                        public void run() {
506                            double[] temp = new double[slices];
507                            for (int r = firstRow; r < lastRow; r++) {
508                                int idx1 = r * rowStride;
509                                for (int c = 0; c < columns; c++) {
510                                    for (int s = 0; s < slices; s++) {
511                                        int idx3 = s * sliceStride + idx1 + c;
512                                        temp[s] = a[idx3];
513                                    }
514                                    dhtSlices.inverse(temp, scale);
515                                    for (int s = 0; s < slices; s++) {
516                                        int idx3 = s * sliceStride + idx1 + c;
517                                        a[idx3] = temp[s];
518                                    }
519                                }
520                            }
521                        }
522                    });
523                }
524                ConcurrencyUtils.waitForCompletion(futures);
525
526            } else {
527                for (int s = 0; s < slices; s++) {
528                    int idx1 = s * sliceStride;
529                    for (int r = 0; r < rows; r++) {
530                        dhtColumns.inverse(a, idx1 + r * rowStride, scale);
531                    }
532                }
533                double[] temp = new double[rows];
534                for (int s = 0; s < slices; s++) {
535                    int idx1 = s * sliceStride;
536                    for (int c = 0; c < columns; c++) {
537                        for (int r = 0; r < rows; r++) {
538                            int idx3 = idx1 + r * rowStride + c;
539                            temp[r] = a[idx3];
540                        }
541                        dhtRows.inverse(temp, scale);
542                        for (int r = 0; r < rows; r++) {
543                            int idx3 = idx1 + r * rowStride + c;
544                            a[idx3] = temp[r];
545                        }
546                    }
547                }
548                temp = new double[slices];
549                for (int r = 0; r < rows; r++) {
550                    int idx1 = r * rowStride;
551                    for (int c = 0; c < columns; c++) {
552                        for (int s = 0; s < slices; s++) {
553                            int idx3 = s * sliceStride + idx1 + c;
554                            temp[s] = a[idx3];
555                        }
556                        dhtSlices.inverse(temp, scale);
557                        for (int s = 0; s < slices; s++) {
558                            int idx3 = s * sliceStride + idx1 + c;
559                            a[idx3] = temp[s];
560                        }
561                    }
562                }
563            }
564            yTransform(a);
565        }
566    }
567
568    /**
569     * Computes the 3D real, inverse DHT leaving the result in <code>a</code>.
570     * The data is stored in 3D array.
571     * 
572     * @param a
573     *            data to transform
574     * @param scale
575     *            if true then scaling is performed
576     */
577    public void inverse(final double[][][] a, final boolean scale) {
578        int nthreads = ConcurrencyUtils.getNumberOfThreads();
579        if (isPowerOfTwo) {
580            if (nthreads != oldNthreads) {
581                nt = slices;
582                if (nt < rows) {
583                    nt = rows;
584                }
585                nt *= 4;
586                if (nthreads > 1) {
587                    nt *= nthreads;
588                }
589                if (columns == 2) {
590                    nt >>= 1;
591                }
592                t = new double[nt];
593                oldNthreads = nthreads;
594            }
595            if ((nthreads > 1) && useThreads) {
596                ddxt3da_subth(1, a, scale);
597                ddxt3db_subth(1, a, scale);
598            } else {
599                ddxt3da_sub(1, a, scale);
600                ddxt3db_sub(1, a, scale);
601            }
602            yTransform(a);
603        } else {
604            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
605                Future<?>[] futures = new Future[nthreads];
606                int p = slices / nthreads;
607                for (int l = 0; l < nthreads; l++) {
608                    final int firstSlice = l * p;
609                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
610                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
611                        public void run() {
612                            for (int s = firstSlice; s < lastSlice; s++) {
613                                for (int r = 0; r < rows; r++) {
614                                    dhtColumns.inverse(a[s][r], scale);
615                                }
616                            }
617                        }
618                    });
619                }
620                ConcurrencyUtils.waitForCompletion(futures);
621
622                for (int l = 0; l < nthreads; l++) {
623                    final int firstSlice = l * p;
624                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
625                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
626                        public void run() {
627                            double[] temp = new double[rows];
628                            for (int s = firstSlice; s < lastSlice; s++) {
629                                for (int c = 0; c < columns; c++) {
630                                    for (int r = 0; r < rows; r++) {
631                                        temp[r] = a[s][r][c];
632                                    }
633                                    dhtRows.inverse(temp, scale);
634                                    for (int r = 0; r < rows; r++) {
635                                        a[s][r][c] = temp[r];
636                                    }
637                                }
638                            }
639                        }
640                    });
641                }
642                ConcurrencyUtils.waitForCompletion(futures);
643
644                p = rows / nthreads;
645                for (int l = 0; l < nthreads; l++) {
646                    final int firstRow = l * p;
647                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
648                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
649                        public void run() {
650                            double[] temp = new double[slices];
651                            for (int r = firstRow; r < lastRow; r++) {
652                                for (int c = 0; c < columns; c++) {
653                                    for (int s = 0; s < slices; s++) {
654                                        temp[s] = a[s][r][c];
655                                    }
656                                    dhtSlices.inverse(temp, scale);
657                                    for (int s = 0; s < slices; s++) {
658                                        a[s][r][c] = temp[s];
659                                    }
660                                }
661                            }
662                        }
663                    });
664                }
665                ConcurrencyUtils.waitForCompletion(futures);
666
667            } else {
668                for (int s = 0; s < slices; s++) {
669                    for (int r = 0; r < rows; r++) {
670                        dhtColumns.inverse(a[s][r], scale);
671                    }
672                }
673                double[] temp = new double[rows];
674                for (int s = 0; s < slices; s++) {
675                    for (int c = 0; c < columns; c++) {
676                        for (int r = 0; r < rows; r++) {
677                            temp[r] = a[s][r][c];
678                        }
679                        dhtRows.inverse(temp, scale);
680                        for (int r = 0; r < rows; r++) {
681                            a[s][r][c] = temp[r];
682                        }
683                    }
684                }
685                temp = new double[slices];
686                for (int r = 0; r < rows; r++) {
687                    for (int c = 0; c < columns; c++) {
688                        for (int s = 0; s < slices; s++) {
689                            temp[s] = a[s][r][c];
690                        }
691                        dhtSlices.inverse(temp, scale);
692                        for (int s = 0; s < slices; s++) {
693                            a[s][r][c] = temp[s];
694                        }
695                    }
696                }
697            }
698            yTransform(a);
699        }
700    }
701
702    private void ddxt3da_sub(int isgn, double[] a, boolean scale) {
703        int idx0, idx1, idx2;
704
705        if (isgn == -1) {
706            for (int s = 0; s < slices; s++) {
707                idx0 = s * sliceStride;
708                for (int r = 0; r < rows; r++) {
709                    dhtColumns.forward(a, idx0 + r * rowStride);
710                }
711                if (columns > 2) {
712                    for (int c = 0; c < columns; c += 4) {
713                        for (int r = 0; r < rows; r++) {
714                            idx1 = idx0 + r * rowStride + c;
715                            idx2 = rows + r;
716                            t[r] = a[idx1];
717                            t[idx2] = a[idx1 + 1];
718                            t[idx2 + rows] = a[idx1 + 2];
719                            t[idx2 + 2 * rows] = a[idx1 + 3];
720                        }
721                        dhtRows.forward(t, 0);
722                        dhtRows.forward(t, rows);
723                        dhtRows.forward(t, 2 * rows);
724                        dhtRows.forward(t, 3 * rows);
725                        for (int r = 0; r < rows; r++) {
726                            idx1 = idx0 + r * rowStride + c;
727                            idx2 = rows + r;
728                            a[idx1] = t[r];
729                            a[idx1 + 1] = t[idx2];
730                            a[idx1 + 2] = t[idx2 + rows];
731                            a[idx1 + 3] = t[idx2 + 2 * rows];
732                        }
733                    }
734                } else if (columns == 2) {
735                    for (int r = 0; r < rows; r++) {
736                        idx1 = idx0 + r * rowStride;
737                        t[r] = a[idx1];
738                        t[rows + r] = a[idx1 + 1];
739                    }
740                    dhtRows.forward(t, 0);
741                    dhtRows.forward(t, rows);
742                    for (int r = 0; r < rows; r++) {
743                        idx1 = idx0 + r * rowStride;
744                        a[idx1] = t[r];
745                        a[idx1 + 1] = t[rows + r];
746                    }
747                }
748            }
749        } else {
750            for (int s = 0; s < slices; s++) {
751                idx0 = s * sliceStride;
752                for (int r = 0; r < rows; r++) {
753                    dhtColumns.inverse(a, idx0 + r * rowStride, scale);
754                }
755                if (columns > 2) {
756                    for (int c = 0; c < columns; c += 4) {
757                        for (int r = 0; r < rows; r++) {
758                            idx1 = idx0 + r * rowStride + c;
759                            idx2 = rows + r;
760                            t[r] = a[idx1];
761                            t[idx2] = a[idx1 + 1];
762                            t[idx2 + rows] = a[idx1 + 2];
763                            t[idx2 + 2 * rows] = a[idx1 + 3];
764                        }
765                        dhtRows.inverse(t, 0, scale);
766                        dhtRows.inverse(t, rows, scale);
767                        dhtRows.inverse(t, 2 * rows, scale);
768                        dhtRows.inverse(t, 3 * rows, scale);
769                        for (int r = 0; r < rows; r++) {
770                            idx1 = idx0 + r * rowStride + c;
771                            idx2 = rows + r;
772                            a[idx1] = t[r];
773                            a[idx1 + 1] = t[idx2];
774                            a[idx1 + 2] = t[idx2 + rows];
775                            a[idx1 + 3] = t[idx2 + 2 * rows];
776                        }
777                    }
778                } else if (columns == 2) {
779                    for (int r = 0; r < rows; r++) {
780                        idx1 = idx0 + r * rowStride;
781                        t[r] = a[idx1];
782                        t[rows + r] = a[idx1 + 1];
783                    }
784                    dhtRows.inverse(t, 0, scale);
785                    dhtRows.inverse(t, rows, scale);
786                    for (int r = 0; r < rows; r++) {
787                        idx1 = idx0 + r * rowStride;
788                        a[idx1] = t[r];
789                        a[idx1 + 1] = t[rows + r];
790                    }
791                }
792            }
793        }
794    }
795
796    private void ddxt3da_sub(int isgn, double[][][] a, boolean scale) {
797        int idx2;
798
799        if (isgn == -1) {
800            for (int s = 0; s < slices; s++) {
801                for (int r = 0; r < rows; r++) {
802                    dhtColumns.forward(a[s][r]);
803                }
804                if (columns > 2) {
805                    for (int c = 0; c < columns; c += 4) {
806                        for (int r = 0; r < rows; r++) {
807                            idx2 = rows + r;
808                            t[r] = a[s][r][c];
809                            t[idx2] = a[s][r][c + 1];
810                            t[idx2 + rows] = a[s][r][c + 2];
811                            t[idx2 + 2 * rows] = a[s][r][c + 3];
812                        }
813                        dhtRows.forward(t, 0);
814                        dhtRows.forward(t, rows);
815                        dhtRows.forward(t, 2 * rows);
816                        dhtRows.forward(t, 3 * rows);
817                        for (int r = 0; r < rows; r++) {
818                            idx2 = rows + r;
819                            a[s][r][c] = t[r];
820                            a[s][r][c + 1] = t[idx2];
821                            a[s][r][c + 2] = t[idx2 + rows];
822                            a[s][r][c + 3] = t[idx2 + 2 * rows];
823                        }
824                    }
825                } else if (columns == 2) {
826                    for (int r = 0; r < rows; r++) {
827                        t[r] = a[s][r][0];
828                        t[rows + r] = a[s][r][1];
829                    }
830                    dhtRows.forward(t, 0);
831                    dhtRows.forward(t, rows);
832                    for (int r = 0; r < rows; r++) {
833                        a[s][r][0] = t[r];
834                        a[s][r][1] = t[rows + r];
835                    }
836                }
837            }
838        } else {
839            for (int s = 0; s < slices; s++) {
840                for (int r = 0; r < rows; r++) {
841                    dhtColumns.inverse(a[s][r], scale);
842                }
843                if (columns > 2) {
844                    for (int c = 0; c < columns; c += 4) {
845                        for (int r = 0; r < rows; r++) {
846                            idx2 = rows + r;
847                            t[r] = a[s][r][c];
848                            t[idx2] = a[s][r][c + 1];
849                            t[idx2 + rows] = a[s][r][c + 2];
850                            t[idx2 + 2 * rows] = a[s][r][c + 3];
851                        }
852                        dhtRows.inverse(t, 0, scale);
853                        dhtRows.inverse(t, rows, scale);
854                        dhtRows.inverse(t, 2 * rows, scale);
855                        dhtRows.inverse(t, 3 * rows, scale);
856                        for (int r = 0; r < rows; r++) {
857                            idx2 = rows + r;
858                            a[s][r][c] = t[r];
859                            a[s][r][c + 1] = t[idx2];
860                            a[s][r][c + 2] = t[idx2 + rows];
861                            a[s][r][c + 3] = t[idx2 + 2 * rows];
862                        }
863                    }
864                } else if (columns == 2) {
865                    for (int r = 0; r < rows; r++) {
866                        t[r] = a[s][r][0];
867                        t[rows + r] = a[s][r][1];
868                    }
869                    dhtRows.inverse(t, 0, scale);
870                    dhtRows.inverse(t, rows, scale);
871                    for (int r = 0; r < rows; r++) {
872                        a[s][r][0] = t[r];
873                        a[s][r][1] = t[rows + r];
874                    }
875                }
876            }
877        }
878    }
879
880    private void ddxt3db_sub(int isgn, double[] a, boolean scale) {
881        int idx0, idx1, idx2;
882
883        if (isgn == -1) {
884            if (columns > 2) {
885                for (int r = 0; r < rows; r++) {
886                    idx0 = r * rowStride;
887                    for (int c = 0; c < columns; c += 4) {
888                        for (int s = 0; s < slices; s++) {
889                            idx1 = s * sliceStride + idx0 + c;
890                            idx2 = slices + s;
891                            t[s] = a[idx1];
892                            t[idx2] = a[idx1 + 1];
893                            t[idx2 + slices] = a[idx1 + 2];
894                            t[idx2 + 2 * slices] = a[idx1 + 3];
895                        }
896                        dhtSlices.forward(t, 0);
897                        dhtSlices.forward(t, slices);
898                        dhtSlices.forward(t, 2 * slices);
899                        dhtSlices.forward(t, 3 * slices);
900                        for (int s = 0; s < slices; s++) {
901                            idx1 = s * sliceStride + idx0 + c;
902                            idx2 = slices + s;
903                            a[idx1] = t[s];
904                            a[idx1 + 1] = t[idx2];
905                            a[idx1 + 2] = t[idx2 + slices];
906                            a[idx1 + 3] = t[idx2 + 2 * slices];
907                        }
908                    }
909                }
910            } else if (columns == 2) {
911                for (int r = 0; r < rows; r++) {
912                    idx0 = r * rowStride;
913                    for (int s = 0; s < slices; s++) {
914                        idx1 = s * sliceStride + idx0;
915                        t[s] = a[idx1];
916                        t[slices + s] = a[idx1 + 1];
917                    }
918                    dhtSlices.forward(t, 0);
919                    dhtSlices.forward(t, slices);
920                    for (int s = 0; s < slices; s++) {
921                        idx1 = s * sliceStride + idx0;
922                        a[idx1] = t[s];
923                        a[idx1 + 1] = t[slices + s];
924                    }
925                }
926            }
927        } else {
928            if (columns > 2) {
929                for (int r = 0; r < rows; r++) {
930                    idx0 = r * rowStride;
931                    for (int c = 0; c < columns; c += 4) {
932                        for (int s = 0; s < slices; s++) {
933                            idx1 = s * sliceStride + idx0 + c;
934                            idx2 = slices + s;
935                            t[s] = a[idx1];
936                            t[idx2] = a[idx1 + 1];
937                            t[idx2 + slices] = a[idx1 + 2];
938                            t[idx2 + 2 * slices] = a[idx1 + 3];
939                        }
940                        dhtSlices.inverse(t, 0, scale);
941                        dhtSlices.inverse(t, slices, scale);
942                        dhtSlices.inverse(t, 2 * slices, scale);
943                        dhtSlices.inverse(t, 3 * slices, scale);
944
945                        for (int s = 0; s < slices; s++) {
946                            idx1 = s * sliceStride + idx0 + c;
947                            idx2 = slices + s;
948                            a[idx1] = t[s];
949                            a[idx1 + 1] = t[idx2];
950                            a[idx1 + 2] = t[idx2 + slices];
951                            a[idx1 + 3] = t[idx2 + 2 * slices];
952                        }
953                    }
954                }
955            } else if (columns == 2) {
956                for (int r = 0; r < rows; r++) {
957                    idx0 = r * rowStride;
958                    for (int s = 0; s < slices; s++) {
959                        idx1 = s * sliceStride + idx0;
960                        t[s] = a[idx1];
961                        t[slices + s] = a[idx1 + 1];
962                    }
963                    dhtSlices.inverse(t, 0, scale);
964                    dhtSlices.inverse(t, slices, scale);
965                    for (int s = 0; s < slices; s++) {
966                        idx1 = s * sliceStride + idx0;
967                        a[idx1] = t[s];
968                        a[idx1 + 1] = t[slices + s];
969                    }
970                }
971            }
972        }
973    }
974
975    private void ddxt3db_sub(int isgn, double[][][] a, boolean scale) {
976        int idx2;
977
978        if (isgn == -1) {
979            if (columns > 2) {
980                for (int r = 0; r < rows; r++) {
981                    for (int c = 0; c < columns; c += 4) {
982                        for (int s = 0; s < slices; s++) {
983                            idx2 = slices + s;
984                            t[s] = a[s][r][c];
985                            t[idx2] = a[s][r][c + 1];
986                            t[idx2 + slices] = a[s][r][c + 2];
987                            t[idx2 + 2 * slices] = a[s][r][c + 3];
988                        }
989                        dhtSlices.forward(t, 0);
990                        dhtSlices.forward(t, slices);
991                        dhtSlices.forward(t, 2 * slices);
992                        dhtSlices.forward(t, 3 * slices);
993                        for (int s = 0; s < slices; s++) {
994                            idx2 = slices + s;
995                            a[s][r][c] = t[s];
996                            a[s][r][c + 1] = t[idx2];
997                            a[s][r][c + 2] = t[idx2 + slices];
998                            a[s][r][c + 3] = t[idx2 + 2 * slices];
999                        }
1000                    }
1001                }
1002            } else if (columns == 2) {
1003                for (int r = 0; r < rows; r++) {
1004                    for (int s = 0; s < slices; s++) {
1005                        t[s] = a[s][r][0];
1006                        t[slices + s] = a[s][r][1];
1007                    }
1008                    dhtSlices.forward(t, 0);
1009                    dhtSlices.forward(t, slices);
1010                    for (int s = 0; s < slices; s++) {
1011                        a[s][r][0] = t[s];
1012                        a[s][r][1] = t[slices + s];
1013                    }
1014                }
1015            }
1016        } else {
1017            if (columns > 2) {
1018                for (int r = 0; r < rows; r++) {
1019                    for (int c = 0; c < columns; c += 4) {
1020                        for (int s = 0; s < slices; s++) {
1021                            idx2 = slices + s;
1022                            t[s] = a[s][r][c];
1023                            t[idx2] = a[s][r][c + 1];
1024                            t[idx2 + slices] = a[s][r][c + 2];
1025                            t[idx2 + 2 * slices] = a[s][r][c + 3];
1026                        }
1027                        dhtSlices.inverse(t, 0, scale);
1028                        dhtSlices.inverse(t, slices, scale);
1029                        dhtSlices.inverse(t, 2 * slices, scale);
1030                        dhtSlices.inverse(t, 3 * slices, scale);
1031
1032                        for (int s = 0; s < slices; s++) {
1033                            idx2 = slices + s;
1034                            a[s][r][c] = t[s];
1035                            a[s][r][c + 1] = t[idx2];
1036                            a[s][r][c + 2] = t[idx2 + slices];
1037                            a[s][r][c + 3] = t[idx2 + 2 * slices];
1038                        }
1039                    }
1040                }
1041            } else if (columns == 2) {
1042                for (int r = 0; r < rows; r++) {
1043                    for (int s = 0; s < slices; s++) {
1044                        t[s] = a[s][r][0];
1045                        t[slices + s] = a[s][r][1];
1046                    }
1047                    dhtSlices.inverse(t, 0, scale);
1048                    dhtSlices.inverse(t, slices, scale);
1049                    for (int s = 0; s < slices; s++) {
1050                        a[s][r][0] = t[s];
1051                        a[s][r][1] = t[slices + s];
1052                    }
1053                }
1054            }
1055        }
1056    }
1057
1058    private void ddxt3da_subth(final int isgn, final double[] a, final boolean scale) {
1059        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1060        int nt = 4 * rows;
1061        if (columns == 2) {
1062            nt >>= 1;
1063        }
1064        Future<?>[] futures = new Future[nthreads];
1065
1066        for (int i = 0; i < nthreads; i++) {
1067            final int n0 = i;
1068            final int startt = nt * i;
1069            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1070
1071                public void run() {
1072                    int idx0, idx1, idx2;
1073                    if (isgn == -1) {
1074                        for (int s = n0; s < slices; s += nthreads) {
1075                            idx0 = s * sliceStride;
1076                            for (int r = 0; r < rows; r++) {
1077                                dhtColumns.forward(a, idx0 + r * rowStride);
1078                            }
1079                            if (columns > 2) {
1080                                for (int c = 0; c < columns; c += 4) {
1081                                    for (int r = 0; r < rows; r++) {
1082                                        idx1 = idx0 + r * rowStride + c;
1083                                        idx2 = startt + rows + r;
1084                                        t[startt + r] = a[idx1];
1085                                        t[idx2] = a[idx1 + 1];
1086                                        t[idx2 + rows] = a[idx1 + 2];
1087                                        t[idx2 + 2 * rows] = a[idx1 + 3];
1088                                    }
1089                                    dhtRows.forward(t, startt);
1090                                    dhtRows.forward(t, startt + rows);
1091                                    dhtRows.forward(t, startt + 2 * rows);
1092                                    dhtRows.forward(t, startt + 3 * rows);
1093                                    for (int r = 0; r < rows; r++) {
1094                                        idx1 = idx0 + r * rowStride + c;
1095                                        idx2 = startt + rows + r;
1096                                        a[idx1] = t[startt + r];
1097                                        a[idx1 + 1] = t[idx2];
1098                                        a[idx1 + 2] = t[idx2 + rows];
1099                                        a[idx1 + 3] = t[idx2 + 2 * rows];
1100                                    }
1101                                }
1102                            } else if (columns == 2) {
1103                                for (int r = 0; r < rows; r++) {
1104                                    idx1 = idx0 + r * rowStride;
1105                                    t[startt + r] = a[idx1];
1106                                    t[startt + rows + r] = a[idx1 + 1];
1107                                }
1108                                dhtRows.forward(t, startt);
1109                                dhtRows.forward(t, startt + rows);
1110                                for (int r = 0; r < rows; r++) {
1111                                    idx1 = idx0 + r * rowStride;
1112                                    a[idx1] = t[startt + r];
1113                                    a[idx1 + 1] = t[startt + rows + r];
1114                                }
1115                            }
1116                        }
1117                    } else {
1118                        for (int s = n0; s < slices; s += nthreads) {
1119                            idx0 = s * sliceStride;
1120                            for (int r = 0; r < rows; r++) {
1121                                dhtColumns.inverse(a, idx0 + r * rowStride, scale);
1122                            }
1123                            if (columns > 2) {
1124                                for (int c = 0; c < columns; c += 4) {
1125                                    for (int r = 0; r < rows; r++) {
1126                                        idx1 = idx0 + r * rowStride + c;
1127                                        idx2 = startt + rows + r;
1128                                        t[startt + r] = a[idx1];
1129                                        t[idx2] = a[idx1 + 1];
1130                                        t[idx2 + rows] = a[idx1 + 2];
1131                                        t[idx2 + 2 * rows] = a[idx1 + 3];
1132                                    }
1133                                    dhtRows.inverse(t, startt, scale);
1134                                    dhtRows.inverse(t, startt + rows, scale);
1135                                    dhtRows.inverse(t, startt + 2 * rows, scale);
1136                                    dhtRows.inverse(t, startt + 3 * rows, scale);
1137                                    for (int r = 0; r < rows; r++) {
1138                                        idx1 = idx0 + r * rowStride + c;
1139                                        idx2 = startt + rows + r;
1140                                        a[idx1] = t[startt + r];
1141                                        a[idx1 + 1] = t[idx2];
1142                                        a[idx1 + 2] = t[idx2 + rows];
1143                                        a[idx1 + 3] = t[idx2 + 2 * rows];
1144                                    }
1145                                }
1146                            } else if (columns == 2) {
1147                                for (int r = 0; r < rows; r++) {
1148                                    idx1 = idx0 + r * rowStride;
1149                                    t[startt + r] = a[idx1];
1150                                    t[startt + rows + r] = a[idx1 + 1];
1151                                }
1152                                dhtRows.inverse(t, startt, scale);
1153                                dhtRows.inverse(t, startt + rows, scale);
1154                                for (int r = 0; r < rows; r++) {
1155                                    idx1 = idx0 + r * rowStride;
1156                                    a[idx1] = t[startt + r];
1157                                    a[idx1 + 1] = t[startt + rows + r];
1158                                }
1159                            }
1160                        }
1161                    }
1162                }
1163            });
1164        }
1165        ConcurrencyUtils.waitForCompletion(futures);
1166    }
1167
1168    private void ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale) {
1169        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1170        int nt = 4 * rows;
1171        if (columns == 2) {
1172            nt >>= 1;
1173        }
1174        Future<?>[] futures = new Future[nthreads];
1175
1176        for (int i = 0; i < nthreads; i++) {
1177            final int n0 = i;
1178            final int startt = nt * i;
1179            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1180
1181                public void run() {
1182                    int idx2;
1183                    if (isgn == -1) {
1184                        for (int s = n0; s < slices; s += nthreads) {
1185                            for (int r = 0; r < rows; r++) {
1186                                dhtColumns.forward(a[s][r]);
1187                            }
1188                            if (columns > 2) {
1189                                for (int c = 0; c < columns; c += 4) {
1190                                    for (int r = 0; r < rows; r++) {
1191                                        idx2 = startt + rows + r;
1192                                        t[startt + r] = a[s][r][c];
1193                                        t[idx2] = a[s][r][c + 1];
1194                                        t[idx2 + rows] = a[s][r][c + 2];
1195                                        t[idx2 + 2 * rows] = a[s][r][c + 3];
1196                                    }
1197                                    dhtRows.forward(t, startt);
1198                                    dhtRows.forward(t, startt + rows);
1199                                    dhtRows.forward(t, startt + 2 * rows);
1200                                    dhtRows.forward(t, startt + 3 * rows);
1201                                    for (int r = 0; r < rows; r++) {
1202                                        idx2 = startt + rows + r;
1203                                        a[s][r][c] = t[startt + r];
1204                                        a[s][r][c + 1] = t[idx2];
1205                                        a[s][r][c + 2] = t[idx2 + rows];
1206                                        a[s][r][c + 3] = t[idx2 + 2 * rows];
1207                                    }
1208                                }
1209                            } else if (columns == 2) {
1210                                for (int r = 0; r < rows; r++) {
1211                                    t[startt + r] = a[s][r][0];
1212                                    t[startt + rows + r] = a[s][r][1];
1213                                }
1214                                dhtRows.forward(t, startt);
1215                                dhtRows.forward(t, startt + rows);
1216                                for (int r = 0; r < rows; r++) {
1217                                    a[s][r][0] = t[startt + r];
1218                                    a[s][r][1] = t[startt + rows + r];
1219                                }
1220                            }
1221                        }
1222                    } else {
1223                        for (int s = n0; s < slices; s += nthreads) {
1224                            for (int r = 0; r < rows; r++) {
1225                                dhtColumns.inverse(a[s][r], scale);
1226                            }
1227                            if (columns > 2) {
1228                                for (int c = 0; c < columns; c += 4) {
1229                                    for (int r = 0; r < rows; r++) {
1230                                        idx2 = startt + rows + r;
1231                                        t[startt + r] = a[s][r][c];
1232                                        t[idx2] = a[s][r][c + 1];
1233                                        t[idx2 + rows] = a[s][r][c + 2];
1234                                        t[idx2 + 2 * rows] = a[s][r][c + 3];
1235                                    }
1236                                    dhtRows.inverse(t, startt, scale);
1237                                    dhtRows.inverse(t, startt + rows, scale);
1238                                    dhtRows.inverse(t, startt + 2 * rows, scale);
1239                                    dhtRows.inverse(t, startt + 3 * rows, scale);
1240                                    for (int r = 0; r < rows; r++) {
1241                                        idx2 = startt + rows + r;
1242                                        a[s][r][c] = t[startt + r];
1243                                        a[s][r][c + 1] = t[idx2];
1244                                        a[s][r][c + 2] = t[idx2 + rows];
1245                                        a[s][r][c + 3] = t[idx2 + 2 * rows];
1246                                    }
1247                                }
1248                            } else if (columns == 2) {
1249                                for (int r = 0; r < rows; r++) {
1250                                    t[startt + r] = a[s][r][0];
1251                                    t[startt + rows + r] = a[s][r][1];
1252                                }
1253                                dhtRows.inverse(t, startt, scale);
1254                                dhtRows.inverse(t, startt + rows, scale);
1255                                for (int r = 0; r < rows; r++) {
1256                                    a[s][r][0] = t[startt + r];
1257                                    a[s][r][1] = t[startt + rows + r];
1258                                }
1259                            }
1260                        }
1261                    }
1262                }
1263            });
1264        }
1265        ConcurrencyUtils.waitForCompletion(futures);
1266    }
1267
1268    private void ddxt3db_subth(final int isgn, final double[] a, final boolean scale) {
1269        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1270        int nt = 4 * slices;
1271        if (columns == 2) {
1272            nt >>= 1;
1273        }
1274        Future<?>[] futures = new Future[nthreads];
1275
1276        for (int i = 0; i < nthreads; i++) {
1277            final int n0 = i;
1278            final int startt = nt * i;
1279            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1280
1281                public void run() {
1282                    int idx0, idx1, idx2;
1283                    if (isgn == -1) {
1284                        if (columns > 2) {
1285                            for (int r = n0; r < rows; r += nthreads) {
1286                                idx0 = r * rowStride;
1287                                for (int c = 0; c < columns; c += 4) {
1288                                    for (int s = 0; s < slices; s++) {
1289                                        idx1 = s * sliceStride + idx0 + c;
1290                                        idx2 = startt + slices + s;
1291                                        t[startt + s] = a[idx1];
1292                                        t[idx2] = a[idx1 + 1];
1293                                        t[idx2 + slices] = a[idx1 + 2];
1294                                        t[idx2 + 2 * slices] = a[idx1 + 3];
1295                                    }
1296                                    dhtSlices.forward(t, startt);
1297                                    dhtSlices.forward(t, startt + slices);
1298                                    dhtSlices.forward(t, startt + 2 * slices);
1299                                    dhtSlices.forward(t, startt + 3 * slices);
1300                                    for (int s = 0; s < slices; s++) {
1301                                        idx1 = s * sliceStride + idx0 + c;
1302                                        idx2 = startt + slices + s;
1303                                        a[idx1] = t[startt + s];
1304                                        a[idx1 + 1] = t[idx2];
1305                                        a[idx1 + 2] = t[idx2 + slices];
1306                                        a[idx1 + 3] = t[idx2 + 2 * slices];
1307                                    }
1308                                }
1309                            }
1310                        } else if (columns == 2) {
1311                            for (int r = n0; r < rows; r += nthreads) {
1312                                idx0 = r * rowStride;
1313                                for (int s = 0; s < slices; s++) {
1314                                    idx1 = s * sliceStride + idx0;
1315                                    t[startt + s] = a[idx1];
1316                                    t[startt + slices + s] = a[idx1 + 1];
1317                                }
1318                                dhtSlices.forward(t, startt);
1319                                dhtSlices.forward(t, startt + slices);
1320                                for (int s = 0; s < slices; s++) {
1321                                    idx1 = s * sliceStride + idx0;
1322                                    a[idx1] = t[startt + s];
1323                                    a[idx1 + 1] = t[startt + slices + s];
1324                                }
1325                            }
1326                        }
1327                    } else {
1328                        if (columns > 2) {
1329                            for (int r = n0; r < rows; r += nthreads) {
1330                                idx0 = r * rowStride;
1331                                for (int c = 0; c < columns; c += 4) {
1332                                    for (int s = 0; s < slices; s++) {
1333                                        idx1 = s * sliceStride + idx0 + c;
1334                                        idx2 = startt + slices + s;
1335                                        t[startt + s] = a[idx1];
1336                                        t[idx2] = a[idx1 + 1];
1337                                        t[idx2 + slices] = a[idx1 + 2];
1338                                        t[idx2 + 2 * slices] = a[idx1 + 3];
1339                                    }
1340                                    dhtSlices.inverse(t, startt, scale);
1341                                    dhtSlices.inverse(t, startt + slices, scale);
1342                                    dhtSlices.inverse(t, startt + 2 * slices, scale);
1343                                    dhtSlices.inverse(t, startt + 3 * slices, scale);
1344                                    for (int s = 0; s < slices; s++) {
1345                                        idx1 = s * sliceStride + idx0 + c;
1346                                        idx2 = startt + slices + s;
1347                                        a[idx1] = t[startt + s];
1348                                        a[idx1 + 1] = t[idx2];
1349                                        a[idx1 + 2] = t[idx2 + slices];
1350                                        a[idx1 + 3] = t[idx2 + 2 * slices];
1351                                    }
1352                                }
1353                            }
1354                        } else if (columns == 2) {
1355                            for (int r = n0; r < rows; r += nthreads) {
1356                                idx0 = r * rowStride;
1357                                for (int s = 0; s < slices; s++) {
1358                                    idx1 = s * sliceStride + idx0;
1359                                    t[startt + s] = a[idx1];
1360                                    t[startt + slices + s] = a[idx1 + 1];
1361                                }
1362                                dhtSlices.inverse(t, startt, scale);
1363                                dhtSlices.inverse(t, startt + slices, scale);
1364                                for (int s = 0; s < slices; s++) {
1365                                    idx1 = s * sliceStride + idx0;
1366                                    a[idx1] = t[startt + s];
1367                                    a[idx1 + 1] = t[startt + slices + s];
1368                                }
1369                            }
1370                        }
1371                    }
1372                }
1373            });
1374        }
1375        ConcurrencyUtils.waitForCompletion(futures);
1376    }
1377
1378    private void ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale) {
1379        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1380        int nt = 4 * slices;
1381        if (columns == 2) {
1382            nt >>= 1;
1383        }
1384        Future<?>[] futures = new Future[nthreads];
1385
1386        for (int i = 0; i < nthreads; i++) {
1387            final int n0 = i;
1388            final int startt = nt * i;
1389            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1390
1391                public void run() {
1392                    int idx2;
1393                    if (isgn == -1) {
1394                        if (columns > 2) {
1395                            for (int r = n0; r < rows; r += nthreads) {
1396                                for (int c = 0; c < columns; c += 4) {
1397                                    for (int s = 0; s < slices; s++) {
1398                                        idx2 = startt + slices + s;
1399                                        t[startt + s] = a[s][r][c];
1400                                        t[idx2] = a[s][r][c + 1];
1401                                        t[idx2 + slices] = a[s][r][c + 2];
1402                                        t[idx2 + 2 * slices] = a[s][r][c + 3];
1403                                    }
1404                                    dhtSlices.forward(t, startt);
1405                                    dhtSlices.forward(t, startt + slices);
1406                                    dhtSlices.forward(t, startt + 2 * slices);
1407                                    dhtSlices.forward(t, startt + 3 * slices);
1408                                    for (int s = 0; s < slices; s++) {
1409                                        idx2 = startt + slices + s;
1410                                        a[s][r][c] = t[startt + s];
1411                                        a[s][r][c + 1] = t[idx2];
1412                                        a[s][r][c + 2] = t[idx2 + slices];
1413                                        a[s][r][c + 3] = t[idx2 + 2 * slices];
1414                                    }
1415                                }
1416                            }
1417                        } else if (columns == 2) {
1418                            for (int r = n0; r < rows; r += nthreads) {
1419                                for (int s = 0; s < slices; s++) {
1420                                    t[startt + s] = a[s][r][0];
1421                                    t[startt + slices + s] = a[s][r][1];
1422                                }
1423                                dhtSlices.forward(t, startt);
1424                                dhtSlices.forward(t, startt + slices);
1425                                for (int s = 0; s < slices; s++) {
1426                                    a[s][r][0] = t[startt + s];
1427                                    a[s][r][1] = t[startt + slices + s];
1428                                }
1429                            }
1430                        }
1431                    } else {
1432                        if (columns > 2) {
1433                            for (int r = n0; r < rows; r += nthreads) {
1434                                for (int c = 0; c < columns; c += 4) {
1435                                    for (int s = 0; s < slices; s++) {
1436                                        idx2 = startt + slices + s;
1437                                        t[startt + s] = a[s][r][c];
1438                                        t[idx2] = a[s][r][c + 1];
1439                                        t[idx2 + slices] = a[s][r][c + 2];
1440                                        t[idx2 + 2 * slices] = a[s][r][c + 3];
1441                                    }
1442                                    dhtSlices.inverse(t, startt, scale);
1443                                    dhtSlices.inverse(t, startt + slices, scale);
1444                                    dhtSlices.inverse(t, startt + 2 * slices, scale);
1445                                    dhtSlices.inverse(t, startt + 3 * slices, scale);
1446                                    for (int s = 0; s < slices; s++) {
1447                                        idx2 = startt + slices + s;
1448                                        a[s][r][c] = t[startt + s];
1449                                        a[s][r][c + 1] = t[idx2];
1450                                        a[s][r][c + 2] = t[idx2 + slices];
1451                                        a[s][r][c + 3] = t[idx2 + 2 * slices];
1452                                    }
1453                                }
1454                            }
1455                        } else if (columns == 2) {
1456                            for (int r = n0; r < rows; r += nthreads) {
1457                                for (int s = 0; s < slices; s++) {
1458                                    t[startt + s] = a[s][r][0];
1459                                    t[startt + slices + s] = a[s][r][1];
1460                                }
1461                                dhtSlices.inverse(t, startt, scale);
1462                                dhtSlices.inverse(t, startt + slices, scale);
1463
1464                                for (int s = 0; s < slices; s++) {
1465                                    a[s][r][0] = t[startt + s];
1466                                    a[s][r][1] = t[startt + slices + s];
1467                                }
1468                            }
1469                        }
1470                    }
1471                }
1472            });
1473        }
1474        ConcurrencyUtils.waitForCompletion(futures);
1475    }
1476
1477    private void yTransform(double[] a) {
1478        double A, B, C, D, E, F, G, H;
1479        int cC, rC, sC;
1480        int idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9, idx10, idx11, idx12;
1481        for (int s = 0; s <= slices / 2; s++) {
1482            sC = (slices - s) % slices;
1483            idx9 = s * sliceStride;
1484            idx10 = sC * sliceStride;
1485            for (int r = 0; r <= rows / 2; r++) {
1486                rC = (rows - r) % rows;
1487                idx11 = r * rowStride;
1488                idx12 = rC * rowStride;
1489                for (int c = 0; c <= columns / 2; c++) {
1490                    cC = (columns - c) % columns;
1491                    idx1 = idx9 + idx12 + c;
1492                    idx2 = idx9 + idx11 + cC;
1493                    idx3 = idx10 + idx11 + c;
1494                    idx4 = idx10 + idx12 + cC;
1495                    idx5 = idx10 + idx12 + c;
1496                    idx6 = idx10 + idx11 + cC;
1497                    idx7 = idx9 + idx11 + c;
1498                    idx8 = idx9 + idx12 + cC;
1499                    A = a[idx1];
1500                    B = a[idx2];
1501                    C = a[idx3];
1502                    D = a[idx4];
1503                    E = a[idx5];
1504                    F = a[idx6];
1505                    G = a[idx7];
1506                    H = a[idx8];
1507                    a[idx7] = (A + B + C - D) / 2;
1508                    a[idx3] = (E + F + G - H) / 2;
1509                    a[idx1] = (G + H + E - F) / 2;
1510                    a[idx5] = (C + D + A - B) / 2;
1511                    a[idx2] = (H + G + F - E) / 2;
1512                    a[idx6] = (D + C + B - A) / 2;
1513                    a[idx8] = (B + A + D - C) / 2;
1514                    a[idx4] = (F + E + H - G) / 2;
1515                }
1516            }
1517        }
1518    }
1519
1520    private void yTransform(double[][][] a) {
1521        double A, B, C, D, E, F, G, H;
1522        int cC, rC, sC;
1523        for (int s = 0; s <= slices / 2; s++) {
1524            sC = (slices - s) % slices;
1525            for (int r = 0; r <= rows / 2; r++) {
1526                rC = (rows - r) % rows;
1527                for (int c = 0; c <= columns / 2; c++) {
1528                    cC = (columns - c) % columns;
1529                    A = a[s][rC][c];
1530                    B = a[s][r][cC];
1531                    C = a[sC][r][c];
1532                    D = a[sC][rC][cC];
1533                    E = a[sC][rC][c];
1534                    F = a[sC][r][cC];
1535                    G = a[s][r][c];
1536                    H = a[s][rC][cC];
1537                    a[s][r][c] = (A + B + C - D) / 2;
1538                    a[sC][r][c] = (E + F + G - H) / 2;
1539                    a[s][rC][c] = (G + H + E - F) / 2;
1540                    a[sC][rC][c] = (C + D + A - B) / 2;
1541                    a[s][r][cC] = (H + G + F - E) / 2;
1542                    a[sC][r][cC] = (D + C + B - A) / 2;
1543                    a[s][rC][cC] = (B + A + D - C) / 2;
1544                    a[sC][rC][cC] = (F + E + H - G) / 2;
1545                }
1546            }
1547        }
1548    }
1549
1550}