001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.dct;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 2D Discrete Cosine Transform (DCT) of single precision data. The
043 * sizes of both dimensions can be arbitrary numbers. This is a parallel
044 * implementation of split-radix and mixed-radix algorithms optimized for SMP
045 * systems. <br>
046 * <br>
047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura
048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
049 * 
050 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
051 * 
052 */
053public class FloatDCT_2D {
054
055    private int rows;
056
057    private int columns;
058
059    private float[] t;
060
061    private FloatDCT_1D dctColumns, dctRows;
062
063    private int nt;
064
065    private int oldNthreads;
066
067    private boolean isPowerOfTwo = false;
068
069    private boolean useThreads = false;
070
071    /**
072     * Creates new instance of FloatDCT_2D.
073     * 
074     * @param rows
075     *            number of rows
076     * @param columns
077     *            number of columns
078     */
079    public FloatDCT_2D(int rows, int columns) {
080        if (rows <= 1 || columns <= 1) {
081            throw new IllegalArgumentException("rows and columns must be greater than 1");
082        }
083        this.rows = rows;
084        this.columns = columns;
085        if (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D()) {
086            this.useThreads = true;
087        }
088        if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
089            isPowerOfTwo = true;
090            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
091            nt = 4 * oldNthreads * rows;
092            if (columns == 2 * oldNthreads) {
093                nt >>= 1;
094            } else if (columns < 2 * oldNthreads) {
095                nt >>= 2;
096            }
097            t = new float[nt];
098        }
099        dctColumns = new FloatDCT_1D(columns);
100        if (columns == rows) {
101            dctRows = dctColumns;
102        } else {
103            dctRows = new FloatDCT_1D(rows);
104        }
105    }
106
107    /**
108     * Computes 2D forward DCT (DCT-II) leaving the result in <code>a</code>.
109     * The data is stored in 1D array in row-major order.
110     * 
111     * @param a
112     *            data to transform
113     * @param scale
114     *            if true then scaling is performed
115     */
116    public void forward(final float[] a, final boolean scale) {
117        int nthreads = ConcurrencyUtils.getNumberOfThreads();
118        if (isPowerOfTwo) {
119            if (nthreads != oldNthreads) {
120                nt = 4 * nthreads * rows;
121                if (columns == 2 * nthreads) {
122                    nt >>= 1;
123                } else if (columns < 2 * nthreads) {
124                    nt >>= 2;
125                }
126                t = new float[nt];
127                oldNthreads = nthreads;
128            }
129            if ((nthreads > 1) && useThreads) {
130                ddxt2d_subth(-1, a, scale);
131                ddxt2d0_subth(-1, a, scale);
132            } else {
133                ddxt2d_sub(-1, a, scale);
134                for (int i = 0; i < rows; i++) {
135                    dctColumns.forward(a, i * columns, scale);
136                }
137            }
138        } else {
139            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
140                Future<?>[] futures = new Future[nthreads];
141                int p = rows / nthreads;
142                for (int l = 0; l < nthreads; l++) {
143                    final int firstRow = l * p;
144                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
145                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
146                        public void run() {
147                            for (int r = firstRow; r < lastRow; r++) {
148                                dctColumns.forward(a, r * columns, scale);
149                            }
150                        }
151                    });
152                }
153                ConcurrencyUtils.waitForCompletion(futures);
154                p = columns / nthreads;
155                for (int l = 0; l < nthreads; l++) {
156                    final int firstColumn = l * p;
157                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
158                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
159                        public void run() {
160                            float[] temp = new float[rows];
161                            for (int c = firstColumn; c < lastColumn; c++) {
162                                for (int r = 0; r < rows; r++) {
163                                    temp[r] = a[r * columns + c];
164                                }
165                                dctRows.forward(temp, scale);
166                                for (int r = 0; r < rows; r++) {
167                                    a[r * columns + c] = temp[r];
168                                }
169                            }
170                        }
171                    });
172                }
173                ConcurrencyUtils.waitForCompletion(futures);
174            } else {
175                for (int i = 0; i < rows; i++) {
176                    dctColumns.forward(a, i * columns, scale);
177                }
178                float[] temp = new float[rows];
179                for (int c = 0; c < columns; c++) {
180                    for (int r = 0; r < rows; r++) {
181                        temp[r] = a[r * columns + c];
182                    }
183                    dctRows.forward(temp, scale);
184                    for (int r = 0; r < rows; r++) {
185                        a[r * columns + c] = temp[r];
186                    }
187                }
188            }
189        }
190    }
191
192    /**
193     * Computes 2D forward DCT (DCT-II) leaving the result in <code>a</code>.
194     * The data is stored in 2D array.
195     * 
196     * @param a
197     *            data to transform
198     * @param scale
199     *            if true then scaling is performed
200     */
201    public void forward(final float[][] a, final boolean scale) {
202        int nthreads = ConcurrencyUtils.getNumberOfThreads();
203        if (isPowerOfTwo) {
204            if (nthreads != oldNthreads) {
205                nt = 4 * nthreads * rows;
206                if (columns == 2 * nthreads) {
207                    nt >>= 1;
208                } else if (columns < 2 * nthreads) {
209                    nt >>= 2;
210                }
211                t = new float[nt];
212                oldNthreads = nthreads;
213            }
214            if ((nthreads > 1) && useThreads) {
215                ddxt2d_subth(-1, a, scale);
216                ddxt2d0_subth(-1, a, scale);
217            } else {
218                ddxt2d_sub(-1, a, scale);
219                for (int i = 0; i < rows; i++) {
220                    dctColumns.forward(a[i], scale);
221                }
222            }
223        } else {
224            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
225                Future<?>[] futures = new Future[nthreads];
226                int p = rows / nthreads;
227                for (int l = 0; l < nthreads; l++) {
228                    final int firstRow = l * p;
229                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
230                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
231                        public void run() {
232                            for (int i = firstRow; i < lastRow; i++) {
233                                dctColumns.forward(a[i], scale);
234                            }
235                        }
236                    });
237                }
238                ConcurrencyUtils.waitForCompletion(futures);
239                p = columns / nthreads;
240                for (int l = 0; l < nthreads; l++) {
241                    final int firstColumn = l * p;
242                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
243                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
244                        public void run() {
245                            float[] temp = new float[rows];
246                            for (int c = firstColumn; c < lastColumn; c++) {
247                                for (int r = 0; r < rows; r++) {
248                                    temp[r] = a[r][c];
249                                }
250                                dctRows.forward(temp, scale);
251                                for (int r = 0; r < rows; r++) {
252                                    a[r][c] = temp[r];
253                                }
254                            }
255                        }
256                    });
257                }
258                ConcurrencyUtils.waitForCompletion(futures);
259            } else {
260                for (int i = 0; i < rows; i++) {
261                    dctColumns.forward(a[i], scale);
262                }
263                float[] temp = new float[rows];
264                for (int c = 0; c < columns; c++) {
265                    for (int r = 0; r < rows; r++) {
266                        temp[r] = a[r][c];
267                    }
268                    dctRows.forward(temp, scale);
269                    for (int r = 0; r < rows; r++) {
270                        a[r][c] = temp[r];
271                    }
272                }
273            }
274        }
275    }
276
277    /**
278     * Computes 2D inverse DCT (DCT-III) leaving the result in <code>a</code>.
279     * The data is stored in 1D array in row-major order.
280     * 
281     * @param a
282     *            data to transform
283     * @param scale
284     *            if true then scaling is performed
285     */
286    public void inverse(final float[] a, final boolean scale) {
287        int nthreads = ConcurrencyUtils.getNumberOfThreads();
288        if (isPowerOfTwo) {
289            if (nthreads != oldNthreads) {
290                nt = 4 * nthreads * rows;
291                if (columns == 2 * nthreads) {
292                    nt >>= 1;
293                } else if (columns < 2 * nthreads) {
294                    nt >>= 2;
295                }
296                t = new float[nt];
297                oldNthreads = nthreads;
298            }
299            if ((nthreads > 1) && useThreads) {
300                ddxt2d_subth(1, a, scale);
301                ddxt2d0_subth(1, a, scale);
302            } else {
303                ddxt2d_sub(1, a, scale);
304                for (int i = 0; i < rows; i++) {
305                    dctColumns.inverse(a, i * columns, scale);
306                }
307            }
308        } else {
309            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
310                Future<?>[] futures = new Future[nthreads];
311                int p = rows / nthreads;
312                for (int l = 0; l < nthreads; l++) {
313                    final int firstRow = l * p;
314                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
315                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
316                        public void run() {
317                            for (int i = firstRow; i < lastRow; i++) {
318                                dctColumns.inverse(a, i * columns, scale);
319                            }
320                        }
321                    });
322                }
323                ConcurrencyUtils.waitForCompletion(futures);
324                p = columns / nthreads;
325                for (int l = 0; l < nthreads; l++) {
326                    final int firstColumn = l * p;
327                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
328                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
329                        public void run() {
330                            float[] temp = new float[rows];
331                            for (int c = firstColumn; c < lastColumn; c++) {
332                                for (int r = 0; r < rows; r++) {
333                                    temp[r] = a[r * columns + c];
334                                }
335                                dctRows.inverse(temp, scale);
336                                for (int r = 0; r < rows; r++) {
337                                    a[r * columns + c] = temp[r];
338                                }
339                            }
340                        }
341                    });
342                }
343                ConcurrencyUtils.waitForCompletion(futures);
344            } else {
345                for (int i = 0; i < rows; i++) {
346                    dctColumns.inverse(a, i * columns, scale);
347                }
348                float[] temp = new float[rows];
349                for (int c = 0; c < columns; c++) {
350                    for (int r = 0; r < rows; r++) {
351                        temp[r] = a[r * columns + c];
352                    }
353                    dctRows.inverse(temp, scale);
354                    for (int r = 0; r < rows; r++) {
355                        a[r * columns + c] = temp[r];
356                    }
357                }
358            }
359        }
360    }
361
362    /**
363     * Computes 2D inverse DCT (DCT-III) leaving the result in <code>a</code>.
364     * The data is stored in 2D array.
365     * 
366     * @param a
367     *            data to transform
368     * @param scale
369     *            if true then scaling is performed
370     */
371    public void inverse(final float[][] a, final boolean scale) {
372        int nthreads = ConcurrencyUtils.getNumberOfThreads();
373        if (isPowerOfTwo) {
374            if (nthreads != oldNthreads) {
375                nt = 4 * nthreads * rows;
376                if (columns == 2 * nthreads) {
377                    nt >>= 1;
378                } else if (columns < 2 * nthreads) {
379                    nt >>= 2;
380                }
381                t = new float[nt];
382                oldNthreads = nthreads;
383            }
384            if ((nthreads > 1) && useThreads) {
385                ddxt2d_subth(1, a, scale);
386                ddxt2d0_subth(1, a, scale);
387            } else {
388                ddxt2d_sub(1, a, scale);
389                for (int i = 0; i < rows; i++) {
390                    dctColumns.inverse(a[i], scale);
391                }
392            }
393        } else {
394            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
395                Future<?>[] futures = new Future[nthreads];
396                int p = rows / nthreads;
397                for (int l = 0; l < nthreads; l++) {
398                    final int firstRow = l * p;
399                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
400                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
401                        public void run() {
402                            for (int i = firstRow; i < lastRow; i++) {
403                                dctColumns.inverse(a[i], scale);
404                            }
405                        }
406                    });
407                }
408                ConcurrencyUtils.waitForCompletion(futures);
409                p = columns / nthreads;
410                for (int l = 0; l < nthreads; l++) {
411                    final int firstColumn = l * p;
412                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
413                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
414                        public void run() {
415                            float[] temp = new float[rows];
416                            for (int c = firstColumn; c < lastColumn; c++) {
417                                for (int r = 0; r < rows; r++) {
418                                    temp[r] = a[r][c];
419                                }
420                                dctRows.inverse(temp, scale);
421                                for (int r = 0; r < rows; r++) {
422                                    a[r][c] = temp[r];
423                                }
424                            }
425                        }
426                    });
427                }
428                ConcurrencyUtils.waitForCompletion(futures);
429            } else {
430                for (int r = 0; r < rows; r++) {
431                    dctColumns.inverse(a[r], scale);
432                }
433                float[] temp = new float[rows];
434                for (int c = 0; c < columns; c++) {
435                    for (int r = 0; r < rows; r++) {
436                        temp[r] = a[r][c];
437                    }
438                    dctRows.inverse(temp, scale);
439                    for (int r = 0; r < rows; r++) {
440                        a[r][c] = temp[r];
441                    }
442                }
443            }
444        }
445    }
446
447    private void ddxt2d_subth(final int isgn, final float[] a, final boolean scale) {
448        int nthread = ConcurrencyUtils.getNumberOfThreads();
449        int nt = 4 * rows;
450        if (columns == 2 * nthread) {
451            nt >>= 1;
452        } else if (columns < 2 * nthread) {
453            nthread = columns;
454            nt >>= 2;
455        }
456        final int nthreads = nthread;
457        Future<?>[] futures = new Future[nthread];
458
459        for (int i = 0; i < nthread; i++) {
460            final int n0 = i;
461            final int startt = nt * i;
462            futures[i] = ConcurrencyUtils.submit(new Runnable() {
463                public void run() {
464                    int idx1, idx2;
465                    if (columns > 2 * nthreads) {
466                        if (isgn == -1) {
467                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
468                                for (int r = 0; r < rows; r++) {
469                                    idx1 = r * columns + c;
470                                    idx2 = startt + rows + r;
471                                    t[startt + r] = a[idx1];
472                                    t[idx2] = a[idx1 + 1];
473                                    t[idx2 + rows] = a[idx1 + 2];
474                                    t[idx2 + 2 * rows] = a[idx1 + 3];
475                                }
476                                dctRows.forward(t, startt, scale);
477                                dctRows.forward(t, startt + rows, scale);
478                                dctRows.forward(t, startt + 2 * rows, scale);
479                                dctRows.forward(t, startt + 3 * rows, scale);
480                                for (int r = 0; r < rows; r++) {
481                                    idx1 = r * columns + c;
482                                    idx2 = startt + rows + r;
483                                    a[idx1] = t[startt + r];
484                                    a[idx1 + 1] = t[idx2];
485                                    a[idx1 + 2] = t[idx2 + rows];
486                                    a[idx1 + 3] = t[idx2 + 2 * rows];
487                                }
488                            }
489                        } else {
490                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
491                                for (int r = 0; r < rows; r++) {
492                                    idx1 = r * columns + c;
493                                    idx2 = startt + rows + r;
494                                    t[startt + r] = a[idx1];
495                                    t[idx2] = a[idx1 + 1];
496                                    t[idx2 + rows] = a[idx1 + 2];
497                                    t[idx2 + 2 * rows] = a[idx1 + 3];
498                                }
499                                dctRows.inverse(t, startt, scale);
500                                dctRows.inverse(t, startt + rows, scale);
501                                dctRows.inverse(t, startt + 2 * rows, scale);
502                                dctRows.inverse(t, startt + 3 * rows, scale);
503                                for (int r = 0; r < rows; r++) {
504                                    idx1 = r * columns + c;
505                                    idx2 = startt + rows + r;
506                                    a[idx1] = t[startt + r];
507                                    a[idx1 + 1] = t[idx2];
508                                    a[idx1 + 2] = t[idx2 + rows];
509                                    a[idx1 + 3] = t[idx2 + 2 * rows];
510                                }
511                            }
512                        }
513                    } else if (columns == 2 * nthreads) {
514                        for (int r = 0; r < rows; r++) {
515                            idx1 = r * columns + 2 * n0;
516                            idx2 = startt + r;
517                            t[idx2] = a[idx1];
518                            t[idx2 + rows] = a[idx1 + 1];
519                        }
520                        if (isgn == -1) {
521                            dctRows.forward(t, startt, scale);
522                            dctRows.forward(t, startt + rows, scale);
523                        } else {
524                            dctRows.inverse(t, startt, scale);
525                            dctRows.inverse(t, startt + rows, scale);
526                        }
527                        for (int r = 0; r < rows; r++) {
528                            idx1 = r * columns + 2 * n0;
529                            idx2 = startt + r;
530                            a[idx1] = t[idx2];
531                            a[idx1 + 1] = t[idx2 + rows];
532                        }
533                    } else if (columns == nthreads) {
534                        for (int r = 0; r < rows; r++) {
535                            t[startt + r] = a[r * columns + n0];
536                        }
537                        if (isgn == -1) {
538                            dctRows.forward(t, startt, scale);
539                        } else {
540                            dctRows.inverse(t, startt, scale);
541                        }
542                        for (int r = 0; r < rows; r++) {
543                            a[r * columns + n0] = t[startt + r];
544                        }
545                    }
546                }
547            });
548        }
549        ConcurrencyUtils.waitForCompletion(futures);
550    }
551
552    private void ddxt2d_subth(final int isgn, final float[][] a, final boolean scale) {
553        int nthread = ConcurrencyUtils.getNumberOfThreads();
554        int nt = 4 * rows;
555        if (columns == 2 * nthread) {
556            nt >>= 1;
557        } else if (columns < 2 * nthread) {
558            nthread = columns;
559            nt >>= 2;
560        }
561        final int nthreads = nthread;
562        Future<?>[] futures = new Future[nthread];
563
564        for (int i = 0; i < nthread; i++) {
565            final int n0 = i;
566            final int startt = nt * i;
567            futures[i] = ConcurrencyUtils.submit(new Runnable() {
568                public void run() {
569                    int idx2;
570                    if (columns > 2 * nthreads) {
571                        if (isgn == -1) {
572                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
573                                for (int r = 0; r < rows; r++) {
574                                    idx2 = startt + rows + r;
575                                    t[startt + r] = a[r][c];
576                                    t[idx2] = a[r][c + 1];
577                                    t[idx2 + rows] = a[r][c + 2];
578                                    t[idx2 + 2 * rows] = a[r][c + 3];
579                                }
580                                dctRows.forward(t, startt, scale);
581                                dctRows.forward(t, startt + rows, scale);
582                                dctRows.forward(t, startt + 2 * rows, scale);
583                                dctRows.forward(t, startt + 3 * rows, scale);
584                                for (int r = 0; r < rows; r++) {
585                                    idx2 = startt + rows + r;
586                                    a[r][c] = t[startt + r];
587                                    a[r][c + 1] = t[idx2];
588                                    a[r][c + 2] = t[idx2 + rows];
589                                    a[r][c + 3] = t[idx2 + 2 * rows];
590                                }
591                            }
592                        } else {
593                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
594                                for (int r = 0; r < rows; r++) {
595                                    idx2 = startt + rows + r;
596                                    t[startt + r] = a[r][c];
597                                    t[idx2] = a[r][c + 1];
598                                    t[idx2 + rows] = a[r][c + 2];
599                                    t[idx2 + 2 * rows] = a[r][c + 3];
600                                }
601                                dctRows.inverse(t, startt, scale);
602                                dctRows.inverse(t, startt + rows, scale);
603                                dctRows.inverse(t, startt + 2 * rows, scale);
604                                dctRows.inverse(t, startt + 3 * rows, scale);
605                                for (int r = 0; r < rows; r++) {
606                                    idx2 = startt + rows + r;
607                                    a[r][c] = t[startt + r];
608                                    a[r][c + 1] = t[idx2];
609                                    a[r][c + 2] = t[idx2 + rows];
610                                    a[r][c + 3] = t[idx2 + 2 * rows];
611                                }
612                            }
613                        }
614                    } else if (columns == 2 * nthreads) {
615                        for (int r = 0; r < rows; r++) {
616                            idx2 = startt + r;
617                            t[idx2] = a[r][2 * n0];
618                            t[idx2 + rows] = a[r][2 * n0 + 1];
619                        }
620                        if (isgn == -1) {
621                            dctRows.forward(t, startt, scale);
622                            dctRows.forward(t, startt + rows, scale);
623                        } else {
624                            dctRows.inverse(t, startt, scale);
625                            dctRows.inverse(t, startt + rows, scale);
626                        }
627                        for (int r = 0; r < rows; r++) {
628                            idx2 = startt + r;
629                            a[r][2 * n0] = t[idx2];
630                            a[r][2 * n0 + 1] = t[idx2 + rows];
631                        }
632                    } else if (columns == nthreads) {
633                        for (int r = 0; r < rows; r++) {
634                            t[startt + r] = a[r][n0];
635                        }
636                        if (isgn == -1) {
637                            dctRows.forward(t, startt, scale);
638                        } else {
639                            dctRows.inverse(t, startt, scale);
640                        }
641                        for (int r = 0; r < rows; r++) {
642                            a[r][n0] = t[startt + r];
643                        }
644                    }
645                }
646            });
647        }
648        ConcurrencyUtils.waitForCompletion(futures);
649    }
650
651    private void ddxt2d0_subth(final int isgn, final float[] a, final boolean scale) {
652        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
653
654        Future<?>[] futures = new Future[nthreads];
655
656        for (int i = 0; i < nthreads; i++) {
657            final int n0 = i;
658            futures[i] = ConcurrencyUtils.submit(new Runnable() {
659
660                public void run() {
661                    if (isgn == -1) {
662                        for (int r = n0; r < rows; r += nthreads) {
663                            dctColumns.forward(a, r * columns, scale);
664                        }
665                    } else {
666                        for (int r = n0; r < rows; r += nthreads) {
667                            dctColumns.inverse(a, r * columns, scale);
668                        }
669                    }
670                }
671            });
672        }
673        ConcurrencyUtils.waitForCompletion(futures);
674    }
675
676    private void ddxt2d0_subth(final int isgn, final float[][] a, final boolean scale) {
677        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
678
679        Future<?>[] futures = new Future[nthreads];
680
681        for (int i = 0; i < nthreads; i++) {
682            final int n0 = i;
683            futures[i] = ConcurrencyUtils.submit(new Runnable() {
684
685                public void run() {
686                    if (isgn == -1) {
687                        for (int r = n0; r < rows; r += nthreads) {
688                            dctColumns.forward(a[r], scale);
689                        }
690                    } else {
691                        for (int r = n0; r < rows; r += nthreads) {
692                            dctColumns.inverse(a[r], scale);
693                        }
694                    }
695                }
696            });
697        }
698        ConcurrencyUtils.waitForCompletion(futures);
699    }
700
701    private void ddxt2d_sub(int isgn, float[] a, boolean scale) {
702        int idx1, idx2;
703
704        if (columns > 2) {
705            if (isgn == -1) {
706                for (int c = 0; c < columns; c += 4) {
707                    for (int r = 0; r < rows; r++) {
708                        idx1 = r * columns + c;
709                        idx2 = rows + r;
710                        t[r] = a[idx1];
711                        t[idx2] = a[idx1 + 1];
712                        t[idx2 + rows] = a[idx1 + 2];
713                        t[idx2 + 2 * rows] = a[idx1 + 3];
714                    }
715                    dctRows.forward(t, 0, scale);
716                    dctRows.forward(t, rows, scale);
717                    dctRows.forward(t, 2 * rows, scale);
718                    dctRows.forward(t, 3 * rows, scale);
719                    for (int r = 0; r < rows; r++) {
720                        idx1 = r * columns + c;
721                        idx2 = rows + r;
722                        a[idx1] = t[r];
723                        a[idx1 + 1] = t[idx2];
724                        a[idx1 + 2] = t[idx2 + rows];
725                        a[idx1 + 3] = t[idx2 + 2 * rows];
726                    }
727                }
728            } else {
729                for (int c = 0; c < columns; c += 4) {
730                    for (int r = 0; r < rows; r++) {
731                        idx1 = r * columns + c;
732                        idx2 = rows + r;
733                        t[r] = a[idx1];
734                        t[idx2] = a[idx1 + 1];
735                        t[idx2 + rows] = a[idx1 + 2];
736                        t[idx2 + 2 * rows] = a[idx1 + 3];
737                    }
738                    dctRows.inverse(t, 0, scale);
739                    dctRows.inverse(t, rows, scale);
740                    dctRows.inverse(t, 2 * rows, scale);
741                    dctRows.inverse(t, 3 * rows, scale);
742                    for (int r = 0; r < rows; r++) {
743                        idx1 = r * columns + c;
744                        idx2 = rows + r;
745                        a[idx1] = t[r];
746                        a[idx1 + 1] = t[idx2];
747                        a[idx1 + 2] = t[idx2 + rows];
748                        a[idx1 + 3] = t[idx2 + 2 * rows];
749                    }
750                }
751            }
752        } else if (columns == 2) {
753            for (int r = 0; r < rows; r++) {
754                idx1 = r * columns;
755                t[r] = a[idx1];
756                t[rows + r] = a[idx1 + 1];
757            }
758            if (isgn == -1) {
759                dctRows.forward(t, 0, scale);
760                dctRows.forward(t, rows, scale);
761            } else {
762                dctRows.inverse(t, 0, scale);
763                dctRows.inverse(t, rows, scale);
764            }
765            for (int r = 0; r < rows; r++) {
766                idx1 = r * columns;
767                a[idx1] = t[r];
768                a[idx1 + 1] = t[rows + r];
769            }
770        }
771    }
772
773    private void ddxt2d_sub(int isgn, float[][] a, boolean scale) {
774        int idx2;
775
776        if (columns > 2) {
777            if (isgn == -1) {
778                for (int c = 0; c < columns; c += 4) {
779                    for (int r = 0; r < rows; r++) {
780                        idx2 = rows + r;
781                        t[r] = a[r][c];
782                        t[idx2] = a[r][c + 1];
783                        t[idx2 + rows] = a[r][c + 2];
784                        t[idx2 + 2 * rows] = a[r][c + 3];
785                    }
786                    dctRows.forward(t, 0, scale);
787                    dctRows.forward(t, rows, scale);
788                    dctRows.forward(t, 2 * rows, scale);
789                    dctRows.forward(t, 3 * rows, scale);
790                    for (int r = 0; r < rows; r++) {
791                        idx2 = rows + r;
792                        a[r][c] = t[r];
793                        a[r][c + 1] = t[idx2];
794                        a[r][c + 2] = t[idx2 + rows];
795                        a[r][c + 3] = t[idx2 + 2 * rows];
796                    }
797                }
798            } else {
799                for (int c = 0; c < columns; c += 4) {
800                    for (int r = 0; r < rows; r++) {
801                        idx2 = rows + r;
802                        t[r] = a[r][c];
803                        t[idx2] = a[r][c + 1];
804                        t[idx2 + rows] = a[r][c + 2];
805                        t[idx2 + 2 * rows] = a[r][c + 3];
806                    }
807                    dctRows.inverse(t, 0, scale);
808                    dctRows.inverse(t, rows, scale);
809                    dctRows.inverse(t, 2 * rows, scale);
810                    dctRows.inverse(t, 3 * rows, scale);
811                    for (int r = 0; r < rows; r++) {
812                        idx2 = rows + r;
813                        a[r][c] = t[r];
814                        a[r][c + 1] = t[idx2];
815                        a[r][c + 2] = t[idx2 + rows];
816                        a[r][c + 3] = t[idx2 + 2 * rows];
817                    }
818                }
819            }
820        } else if (columns == 2) {
821            for (int r = 0; r < rows; r++) {
822                t[r] = a[r][0];
823                t[rows + r] = a[r][1];
824            }
825            if (isgn == -1) {
826                dctRows.forward(t, 0, scale);
827                dctRows.forward(t, rows, scale);
828            } else {
829                dctRows.inverse(t, 0, scale);
830                dctRows.inverse(t, rows, scale);
831            }
832            for (int r = 0; r < rows; r++) {
833                a[r][0] = t[r];
834                a[r][1] = t[rows + r];
835            }
836        }
837    }
838}