001/* ***** BEGIN LICENSE BLOCK ***** 002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 003 * 004 * The contents of this file are subject to the Mozilla Public License Version 005 * 1.1 (the "License"); you may not use this file except in compliance with 006 * the License. You may obtain a copy of the License at 007 * http://www.mozilla.org/MPL/ 008 * 009 * Software distributed under the License is distributed on an "AS IS" basis, 010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 011 * for the specific language governing rights and limitations under the 012 * License. 013 * 014 * The Original Code is JTransforms. 015 * 016 * The Initial Developer of the Original Code is 017 * Piotr Wendykier, Emory University. 018 * Portions created by the Initial Developer are Copyright (C) 2007-2009 019 * the Initial Developer. All Rights Reserved. 020 * 021 * Alternatively, the contents of this file may be used under the terms of 022 * either the GNU General Public License Version 2 or later (the "GPL"), or 023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 024 * in which case the provisions of the GPL or the LGPL are applicable instead 025 * of those above. If you wish to allow use of your version of this file only 026 * under the terms of either the GPL or the LGPL, and not to allow others to 027 * use your version of this file under the terms of the MPL, indicate your 028 * decision by deleting the provisions above and replace them with the notice 029 * and other provisions required by the GPL or the LGPL. If you do not delete 030 * the provisions above, a recipient may use your version of this file under 031 * the terms of any one of the MPL, the GPL or the LGPL. 032 * 033 * ***** END LICENSE BLOCK ***** */ 034 035package edu.emory.mathcs.jtransforms.dct; 036 037import java.util.concurrent.Future; 038 039import edu.emory.mathcs.utils.ConcurrencyUtils; 040 041/** 042 * Computes 2D Discrete Cosine Transform (DCT) of single precision data. The 043 * sizes of both dimensions can be arbitrary numbers. This is a parallel 044 * implementation of split-radix and mixed-radix algorithms optimized for SMP 045 * systems. <br> 046 * <br> 047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura 048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html) 049 * 050 * @author Piotr Wendykier (piotr.wendykier@gmail.com) 051 * 052 */ 053public class FloatDCT_2D { 054 055 private int rows; 056 057 private int columns; 058 059 private float[] t; 060 061 private FloatDCT_1D dctColumns, dctRows; 062 063 private int nt; 064 065 private int oldNthreads; 066 067 private boolean isPowerOfTwo = false; 068 069 private boolean useThreads = false; 070 071 /** 072 * Creates new instance of FloatDCT_2D. 073 * 074 * @param rows 075 * number of rows 076 * @param columns 077 * number of columns 078 */ 079 public FloatDCT_2D(int rows, int columns) { 080 if (rows <= 1 || columns <= 1) { 081 throw new IllegalArgumentException("rows and columns must be greater than 1"); 082 } 083 this.rows = rows; 084 this.columns = columns; 085 if (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D()) { 086 this.useThreads = true; 087 } 088 if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) { 089 isPowerOfTwo = true; 090 oldNthreads = ConcurrencyUtils.getNumberOfThreads(); 091 nt = 4 * oldNthreads * rows; 092 if (columns == 2 * oldNthreads) { 093 nt >>= 1; 094 } else if (columns < 2 * oldNthreads) { 095 nt >>= 2; 096 } 097 t = new float[nt]; 098 } 099 dctColumns = new FloatDCT_1D(columns); 100 if (columns == rows) { 101 dctRows = dctColumns; 102 } else { 103 dctRows = new FloatDCT_1D(rows); 104 } 105 } 106 107 /** 108 * Computes 2D forward DCT (DCT-II) leaving the result in <code>a</code>. 109 * The data is stored in 1D array in row-major order. 110 * 111 * @param a 112 * data to transform 113 * @param scale 114 * if true then scaling is performed 115 */ 116 public void forward(final float[] a, final boolean scale) { 117 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 118 if (isPowerOfTwo) { 119 if (nthreads != oldNthreads) { 120 nt = 4 * nthreads * rows; 121 if (columns == 2 * nthreads) { 122 nt >>= 1; 123 } else if (columns < 2 * nthreads) { 124 nt >>= 2; 125 } 126 t = new float[nt]; 127 oldNthreads = nthreads; 128 } 129 if ((nthreads > 1) && useThreads) { 130 ddxt2d_subth(-1, a, scale); 131 ddxt2d0_subth(-1, a, scale); 132 } else { 133 ddxt2d_sub(-1, a, scale); 134 for (int i = 0; i < rows; i++) { 135 dctColumns.forward(a, i * columns, scale); 136 } 137 } 138 } else { 139 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 140 Future<?>[] futures = new Future[nthreads]; 141 int p = rows / nthreads; 142 for (int l = 0; l < nthreads; l++) { 143 final int firstRow = l * p; 144 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 145 futures[l] = ConcurrencyUtils.submit(new Runnable() { 146 public void run() { 147 for (int r = firstRow; r < lastRow; r++) { 148 dctColumns.forward(a, r * columns, scale); 149 } 150 } 151 }); 152 } 153 ConcurrencyUtils.waitForCompletion(futures); 154 p = columns / nthreads; 155 for (int l = 0; l < nthreads; l++) { 156 final int firstColumn = l * p; 157 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 158 futures[l] = ConcurrencyUtils.submit(new Runnable() { 159 public void run() { 160 float[] temp = new float[rows]; 161 for (int c = firstColumn; c < lastColumn; c++) { 162 for (int r = 0; r < rows; r++) { 163 temp[r] = a[r * columns + c]; 164 } 165 dctRows.forward(temp, scale); 166 for (int r = 0; r < rows; r++) { 167 a[r * columns + c] = temp[r]; 168 } 169 } 170 } 171 }); 172 } 173 ConcurrencyUtils.waitForCompletion(futures); 174 } else { 175 for (int i = 0; i < rows; i++) { 176 dctColumns.forward(a, i * columns, scale); 177 } 178 float[] temp = new float[rows]; 179 for (int c = 0; c < columns; c++) { 180 for (int r = 0; r < rows; r++) { 181 temp[r] = a[r * columns + c]; 182 } 183 dctRows.forward(temp, scale); 184 for (int r = 0; r < rows; r++) { 185 a[r * columns + c] = temp[r]; 186 } 187 } 188 } 189 } 190 } 191 192 /** 193 * Computes 2D forward DCT (DCT-II) leaving the result in <code>a</code>. 194 * The data is stored in 2D array. 195 * 196 * @param a 197 * data to transform 198 * @param scale 199 * if true then scaling is performed 200 */ 201 public void forward(final float[][] a, final boolean scale) { 202 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 203 if (isPowerOfTwo) { 204 if (nthreads != oldNthreads) { 205 nt = 4 * nthreads * rows; 206 if (columns == 2 * nthreads) { 207 nt >>= 1; 208 } else if (columns < 2 * nthreads) { 209 nt >>= 2; 210 } 211 t = new float[nt]; 212 oldNthreads = nthreads; 213 } 214 if ((nthreads > 1) && useThreads) { 215 ddxt2d_subth(-1, a, scale); 216 ddxt2d0_subth(-1, a, scale); 217 } else { 218 ddxt2d_sub(-1, a, scale); 219 for (int i = 0; i < rows; i++) { 220 dctColumns.forward(a[i], scale); 221 } 222 } 223 } else { 224 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 225 Future<?>[] futures = new Future[nthreads]; 226 int p = rows / nthreads; 227 for (int l = 0; l < nthreads; l++) { 228 final int firstRow = l * p; 229 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 230 futures[l] = ConcurrencyUtils.submit(new Runnable() { 231 public void run() { 232 for (int i = firstRow; i < lastRow; i++) { 233 dctColumns.forward(a[i], scale); 234 } 235 } 236 }); 237 } 238 ConcurrencyUtils.waitForCompletion(futures); 239 p = columns / nthreads; 240 for (int l = 0; l < nthreads; l++) { 241 final int firstColumn = l * p; 242 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 243 futures[l] = ConcurrencyUtils.submit(new Runnable() { 244 public void run() { 245 float[] temp = new float[rows]; 246 for (int c = firstColumn; c < lastColumn; c++) { 247 for (int r = 0; r < rows; r++) { 248 temp[r] = a[r][c]; 249 } 250 dctRows.forward(temp, scale); 251 for (int r = 0; r < rows; r++) { 252 a[r][c] = temp[r]; 253 } 254 } 255 } 256 }); 257 } 258 ConcurrencyUtils.waitForCompletion(futures); 259 } else { 260 for (int i = 0; i < rows; i++) { 261 dctColumns.forward(a[i], scale); 262 } 263 float[] temp = new float[rows]; 264 for (int c = 0; c < columns; c++) { 265 for (int r = 0; r < rows; r++) { 266 temp[r] = a[r][c]; 267 } 268 dctRows.forward(temp, scale); 269 for (int r = 0; r < rows; r++) { 270 a[r][c] = temp[r]; 271 } 272 } 273 } 274 } 275 } 276 277 /** 278 * Computes 2D inverse DCT (DCT-III) leaving the result in <code>a</code>. 279 * The data is stored in 1D array in row-major order. 280 * 281 * @param a 282 * data to transform 283 * @param scale 284 * if true then scaling is performed 285 */ 286 public void inverse(final float[] a, final boolean scale) { 287 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 288 if (isPowerOfTwo) { 289 if (nthreads != oldNthreads) { 290 nt = 4 * nthreads * rows; 291 if (columns == 2 * nthreads) { 292 nt >>= 1; 293 } else if (columns < 2 * nthreads) { 294 nt >>= 2; 295 } 296 t = new float[nt]; 297 oldNthreads = nthreads; 298 } 299 if ((nthreads > 1) && useThreads) { 300 ddxt2d_subth(1, a, scale); 301 ddxt2d0_subth(1, a, scale); 302 } else { 303 ddxt2d_sub(1, a, scale); 304 for (int i = 0; i < rows; i++) { 305 dctColumns.inverse(a, i * columns, scale); 306 } 307 } 308 } else { 309 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 310 Future<?>[] futures = new Future[nthreads]; 311 int p = rows / nthreads; 312 for (int l = 0; l < nthreads; l++) { 313 final int firstRow = l * p; 314 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 315 futures[l] = ConcurrencyUtils.submit(new Runnable() { 316 public void run() { 317 for (int i = firstRow; i < lastRow; i++) { 318 dctColumns.inverse(a, i * columns, scale); 319 } 320 } 321 }); 322 } 323 ConcurrencyUtils.waitForCompletion(futures); 324 p = columns / nthreads; 325 for (int l = 0; l < nthreads; l++) { 326 final int firstColumn = l * p; 327 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 328 futures[l] = ConcurrencyUtils.submit(new Runnable() { 329 public void run() { 330 float[] temp = new float[rows]; 331 for (int c = firstColumn; c < lastColumn; c++) { 332 for (int r = 0; r < rows; r++) { 333 temp[r] = a[r * columns + c]; 334 } 335 dctRows.inverse(temp, scale); 336 for (int r = 0; r < rows; r++) { 337 a[r * columns + c] = temp[r]; 338 } 339 } 340 } 341 }); 342 } 343 ConcurrencyUtils.waitForCompletion(futures); 344 } else { 345 for (int i = 0; i < rows; i++) { 346 dctColumns.inverse(a, i * columns, scale); 347 } 348 float[] temp = new float[rows]; 349 for (int c = 0; c < columns; c++) { 350 for (int r = 0; r < rows; r++) { 351 temp[r] = a[r * columns + c]; 352 } 353 dctRows.inverse(temp, scale); 354 for (int r = 0; r < rows; r++) { 355 a[r * columns + c] = temp[r]; 356 } 357 } 358 } 359 } 360 } 361 362 /** 363 * Computes 2D inverse DCT (DCT-III) leaving the result in <code>a</code>. 364 * The data is stored in 2D array. 365 * 366 * @param a 367 * data to transform 368 * @param scale 369 * if true then scaling is performed 370 */ 371 public void inverse(final float[][] a, final boolean scale) { 372 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 373 if (isPowerOfTwo) { 374 if (nthreads != oldNthreads) { 375 nt = 4 * nthreads * rows; 376 if (columns == 2 * nthreads) { 377 nt >>= 1; 378 } else if (columns < 2 * nthreads) { 379 nt >>= 2; 380 } 381 t = new float[nt]; 382 oldNthreads = nthreads; 383 } 384 if ((nthreads > 1) && useThreads) { 385 ddxt2d_subth(1, a, scale); 386 ddxt2d0_subth(1, a, scale); 387 } else { 388 ddxt2d_sub(1, a, scale); 389 for (int i = 0; i < rows; i++) { 390 dctColumns.inverse(a[i], scale); 391 } 392 } 393 } else { 394 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 395 Future<?>[] futures = new Future[nthreads]; 396 int p = rows / nthreads; 397 for (int l = 0; l < nthreads; l++) { 398 final int firstRow = l * p; 399 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 400 futures[l] = ConcurrencyUtils.submit(new Runnable() { 401 public void run() { 402 for (int i = firstRow; i < lastRow; i++) { 403 dctColumns.inverse(a[i], scale); 404 } 405 } 406 }); 407 } 408 ConcurrencyUtils.waitForCompletion(futures); 409 p = columns / nthreads; 410 for (int l = 0; l < nthreads; l++) { 411 final int firstColumn = l * p; 412 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 413 futures[l] = ConcurrencyUtils.submit(new Runnable() { 414 public void run() { 415 float[] temp = new float[rows]; 416 for (int c = firstColumn; c < lastColumn; c++) { 417 for (int r = 0; r < rows; r++) { 418 temp[r] = a[r][c]; 419 } 420 dctRows.inverse(temp, scale); 421 for (int r = 0; r < rows; r++) { 422 a[r][c] = temp[r]; 423 } 424 } 425 } 426 }); 427 } 428 ConcurrencyUtils.waitForCompletion(futures); 429 } else { 430 for (int r = 0; r < rows; r++) { 431 dctColumns.inverse(a[r], scale); 432 } 433 float[] temp = new float[rows]; 434 for (int c = 0; c < columns; c++) { 435 for (int r = 0; r < rows; r++) { 436 temp[r] = a[r][c]; 437 } 438 dctRows.inverse(temp, scale); 439 for (int r = 0; r < rows; r++) { 440 a[r][c] = temp[r]; 441 } 442 } 443 } 444 } 445 } 446 447 private void ddxt2d_subth(final int isgn, final float[] a, final boolean scale) { 448 int nthread = ConcurrencyUtils.getNumberOfThreads(); 449 int nt = 4 * rows; 450 if (columns == 2 * nthread) { 451 nt >>= 1; 452 } else if (columns < 2 * nthread) { 453 nthread = columns; 454 nt >>= 2; 455 } 456 final int nthreads = nthread; 457 Future<?>[] futures = new Future[nthread]; 458 459 for (int i = 0; i < nthread; i++) { 460 final int n0 = i; 461 final int startt = nt * i; 462 futures[i] = ConcurrencyUtils.submit(new Runnable() { 463 public void run() { 464 int idx1, idx2; 465 if (columns > 2 * nthreads) { 466 if (isgn == -1) { 467 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 468 for (int r = 0; r < rows; r++) { 469 idx1 = r * columns + c; 470 idx2 = startt + rows + r; 471 t[startt + r] = a[idx1]; 472 t[idx2] = a[idx1 + 1]; 473 t[idx2 + rows] = a[idx1 + 2]; 474 t[idx2 + 2 * rows] = a[idx1 + 3]; 475 } 476 dctRows.forward(t, startt, scale); 477 dctRows.forward(t, startt + rows, scale); 478 dctRows.forward(t, startt + 2 * rows, scale); 479 dctRows.forward(t, startt + 3 * rows, scale); 480 for (int r = 0; r < rows; r++) { 481 idx1 = r * columns + c; 482 idx2 = startt + rows + r; 483 a[idx1] = t[startt + r]; 484 a[idx1 + 1] = t[idx2]; 485 a[idx1 + 2] = t[idx2 + rows]; 486 a[idx1 + 3] = t[idx2 + 2 * rows]; 487 } 488 } 489 } else { 490 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 491 for (int r = 0; r < rows; r++) { 492 idx1 = r * columns + c; 493 idx2 = startt + rows + r; 494 t[startt + r] = a[idx1]; 495 t[idx2] = a[idx1 + 1]; 496 t[idx2 + rows] = a[idx1 + 2]; 497 t[idx2 + 2 * rows] = a[idx1 + 3]; 498 } 499 dctRows.inverse(t, startt, scale); 500 dctRows.inverse(t, startt + rows, scale); 501 dctRows.inverse(t, startt + 2 * rows, scale); 502 dctRows.inverse(t, startt + 3 * rows, scale); 503 for (int r = 0; r < rows; r++) { 504 idx1 = r * columns + c; 505 idx2 = startt + rows + r; 506 a[idx1] = t[startt + r]; 507 a[idx1 + 1] = t[idx2]; 508 a[idx1 + 2] = t[idx2 + rows]; 509 a[idx1 + 3] = t[idx2 + 2 * rows]; 510 } 511 } 512 } 513 } else if (columns == 2 * nthreads) { 514 for (int r = 0; r < rows; r++) { 515 idx1 = r * columns + 2 * n0; 516 idx2 = startt + r; 517 t[idx2] = a[idx1]; 518 t[idx2 + rows] = a[idx1 + 1]; 519 } 520 if (isgn == -1) { 521 dctRows.forward(t, startt, scale); 522 dctRows.forward(t, startt + rows, scale); 523 } else { 524 dctRows.inverse(t, startt, scale); 525 dctRows.inverse(t, startt + rows, scale); 526 } 527 for (int r = 0; r < rows; r++) { 528 idx1 = r * columns + 2 * n0; 529 idx2 = startt + r; 530 a[idx1] = t[idx2]; 531 a[idx1 + 1] = t[idx2 + rows]; 532 } 533 } else if (columns == nthreads) { 534 for (int r = 0; r < rows; r++) { 535 t[startt + r] = a[r * columns + n0]; 536 } 537 if (isgn == -1) { 538 dctRows.forward(t, startt, scale); 539 } else { 540 dctRows.inverse(t, startt, scale); 541 } 542 for (int r = 0; r < rows; r++) { 543 a[r * columns + n0] = t[startt + r]; 544 } 545 } 546 } 547 }); 548 } 549 ConcurrencyUtils.waitForCompletion(futures); 550 } 551 552 private void ddxt2d_subth(final int isgn, final float[][] a, final boolean scale) { 553 int nthread = ConcurrencyUtils.getNumberOfThreads(); 554 int nt = 4 * rows; 555 if (columns == 2 * nthread) { 556 nt >>= 1; 557 } else if (columns < 2 * nthread) { 558 nthread = columns; 559 nt >>= 2; 560 } 561 final int nthreads = nthread; 562 Future<?>[] futures = new Future[nthread]; 563 564 for (int i = 0; i < nthread; i++) { 565 final int n0 = i; 566 final int startt = nt * i; 567 futures[i] = ConcurrencyUtils.submit(new Runnable() { 568 public void run() { 569 int idx2; 570 if (columns > 2 * nthreads) { 571 if (isgn == -1) { 572 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 573 for (int r = 0; r < rows; r++) { 574 idx2 = startt + rows + r; 575 t[startt + r] = a[r][c]; 576 t[idx2] = a[r][c + 1]; 577 t[idx2 + rows] = a[r][c + 2]; 578 t[idx2 + 2 * rows] = a[r][c + 3]; 579 } 580 dctRows.forward(t, startt, scale); 581 dctRows.forward(t, startt + rows, scale); 582 dctRows.forward(t, startt + 2 * rows, scale); 583 dctRows.forward(t, startt + 3 * rows, scale); 584 for (int r = 0; r < rows; r++) { 585 idx2 = startt + rows + r; 586 a[r][c] = t[startt + r]; 587 a[r][c + 1] = t[idx2]; 588 a[r][c + 2] = t[idx2 + rows]; 589 a[r][c + 3] = t[idx2 + 2 * rows]; 590 } 591 } 592 } else { 593 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 594 for (int r = 0; r < rows; r++) { 595 idx2 = startt + rows + r; 596 t[startt + r] = a[r][c]; 597 t[idx2] = a[r][c + 1]; 598 t[idx2 + rows] = a[r][c + 2]; 599 t[idx2 + 2 * rows] = a[r][c + 3]; 600 } 601 dctRows.inverse(t, startt, scale); 602 dctRows.inverse(t, startt + rows, scale); 603 dctRows.inverse(t, startt + 2 * rows, scale); 604 dctRows.inverse(t, startt + 3 * rows, scale); 605 for (int r = 0; r < rows; r++) { 606 idx2 = startt + rows + r; 607 a[r][c] = t[startt + r]; 608 a[r][c + 1] = t[idx2]; 609 a[r][c + 2] = t[idx2 + rows]; 610 a[r][c + 3] = t[idx2 + 2 * rows]; 611 } 612 } 613 } 614 } else if (columns == 2 * nthreads) { 615 for (int r = 0; r < rows; r++) { 616 idx2 = startt + r; 617 t[idx2] = a[r][2 * n0]; 618 t[idx2 + rows] = a[r][2 * n0 + 1]; 619 } 620 if (isgn == -1) { 621 dctRows.forward(t, startt, scale); 622 dctRows.forward(t, startt + rows, scale); 623 } else { 624 dctRows.inverse(t, startt, scale); 625 dctRows.inverse(t, startt + rows, scale); 626 } 627 for (int r = 0; r < rows; r++) { 628 idx2 = startt + r; 629 a[r][2 * n0] = t[idx2]; 630 a[r][2 * n0 + 1] = t[idx2 + rows]; 631 } 632 } else if (columns == nthreads) { 633 for (int r = 0; r < rows; r++) { 634 t[startt + r] = a[r][n0]; 635 } 636 if (isgn == -1) { 637 dctRows.forward(t, startt, scale); 638 } else { 639 dctRows.inverse(t, startt, scale); 640 } 641 for (int r = 0; r < rows; r++) { 642 a[r][n0] = t[startt + r]; 643 } 644 } 645 } 646 }); 647 } 648 ConcurrencyUtils.waitForCompletion(futures); 649 } 650 651 private void ddxt2d0_subth(final int isgn, final float[] a, final boolean scale) { 652 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 653 654 Future<?>[] futures = new Future[nthreads]; 655 656 for (int i = 0; i < nthreads; i++) { 657 final int n0 = i; 658 futures[i] = ConcurrencyUtils.submit(new Runnable() { 659 660 public void run() { 661 if (isgn == -1) { 662 for (int r = n0; r < rows; r += nthreads) { 663 dctColumns.forward(a, r * columns, scale); 664 } 665 } else { 666 for (int r = n0; r < rows; r += nthreads) { 667 dctColumns.inverse(a, r * columns, scale); 668 } 669 } 670 } 671 }); 672 } 673 ConcurrencyUtils.waitForCompletion(futures); 674 } 675 676 private void ddxt2d0_subth(final int isgn, final float[][] a, final boolean scale) { 677 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 678 679 Future<?>[] futures = new Future[nthreads]; 680 681 for (int i = 0; i < nthreads; i++) { 682 final int n0 = i; 683 futures[i] = ConcurrencyUtils.submit(new Runnable() { 684 685 public void run() { 686 if (isgn == -1) { 687 for (int r = n0; r < rows; r += nthreads) { 688 dctColumns.forward(a[r], scale); 689 } 690 } else { 691 for (int r = n0; r < rows; r += nthreads) { 692 dctColumns.inverse(a[r], scale); 693 } 694 } 695 } 696 }); 697 } 698 ConcurrencyUtils.waitForCompletion(futures); 699 } 700 701 private void ddxt2d_sub(int isgn, float[] a, boolean scale) { 702 int idx1, idx2; 703 704 if (columns > 2) { 705 if (isgn == -1) { 706 for (int c = 0; c < columns; c += 4) { 707 for (int r = 0; r < rows; r++) { 708 idx1 = r * columns + c; 709 idx2 = rows + r; 710 t[r] = a[idx1]; 711 t[idx2] = a[idx1 + 1]; 712 t[idx2 + rows] = a[idx1 + 2]; 713 t[idx2 + 2 * rows] = a[idx1 + 3]; 714 } 715 dctRows.forward(t, 0, scale); 716 dctRows.forward(t, rows, scale); 717 dctRows.forward(t, 2 * rows, scale); 718 dctRows.forward(t, 3 * rows, scale); 719 for (int r = 0; r < rows; r++) { 720 idx1 = r * columns + c; 721 idx2 = rows + r; 722 a[idx1] = t[r]; 723 a[idx1 + 1] = t[idx2]; 724 a[idx1 + 2] = t[idx2 + rows]; 725 a[idx1 + 3] = t[idx2 + 2 * rows]; 726 } 727 } 728 } else { 729 for (int c = 0; c < columns; c += 4) { 730 for (int r = 0; r < rows; r++) { 731 idx1 = r * columns + c; 732 idx2 = rows + r; 733 t[r] = a[idx1]; 734 t[idx2] = a[idx1 + 1]; 735 t[idx2 + rows] = a[idx1 + 2]; 736 t[idx2 + 2 * rows] = a[idx1 + 3]; 737 } 738 dctRows.inverse(t, 0, scale); 739 dctRows.inverse(t, rows, scale); 740 dctRows.inverse(t, 2 * rows, scale); 741 dctRows.inverse(t, 3 * rows, scale); 742 for (int r = 0; r < rows; r++) { 743 idx1 = r * columns + c; 744 idx2 = rows + r; 745 a[idx1] = t[r]; 746 a[idx1 + 1] = t[idx2]; 747 a[idx1 + 2] = t[idx2 + rows]; 748 a[idx1 + 3] = t[idx2 + 2 * rows]; 749 } 750 } 751 } 752 } else if (columns == 2) { 753 for (int r = 0; r < rows; r++) { 754 idx1 = r * columns; 755 t[r] = a[idx1]; 756 t[rows + r] = a[idx1 + 1]; 757 } 758 if (isgn == -1) { 759 dctRows.forward(t, 0, scale); 760 dctRows.forward(t, rows, scale); 761 } else { 762 dctRows.inverse(t, 0, scale); 763 dctRows.inverse(t, rows, scale); 764 } 765 for (int r = 0; r < rows; r++) { 766 idx1 = r * columns; 767 a[idx1] = t[r]; 768 a[idx1 + 1] = t[rows + r]; 769 } 770 } 771 } 772 773 private void ddxt2d_sub(int isgn, float[][] a, boolean scale) { 774 int idx2; 775 776 if (columns > 2) { 777 if (isgn == -1) { 778 for (int c = 0; c < columns; c += 4) { 779 for (int r = 0; r < rows; r++) { 780 idx2 = rows + r; 781 t[r] = a[r][c]; 782 t[idx2] = a[r][c + 1]; 783 t[idx2 + rows] = a[r][c + 2]; 784 t[idx2 + 2 * rows] = a[r][c + 3]; 785 } 786 dctRows.forward(t, 0, scale); 787 dctRows.forward(t, rows, scale); 788 dctRows.forward(t, 2 * rows, scale); 789 dctRows.forward(t, 3 * rows, scale); 790 for (int r = 0; r < rows; r++) { 791 idx2 = rows + r; 792 a[r][c] = t[r]; 793 a[r][c + 1] = t[idx2]; 794 a[r][c + 2] = t[idx2 + rows]; 795 a[r][c + 3] = t[idx2 + 2 * rows]; 796 } 797 } 798 } else { 799 for (int c = 0; c < columns; c += 4) { 800 for (int r = 0; r < rows; r++) { 801 idx2 = rows + r; 802 t[r] = a[r][c]; 803 t[idx2] = a[r][c + 1]; 804 t[idx2 + rows] = a[r][c + 2]; 805 t[idx2 + 2 * rows] = a[r][c + 3]; 806 } 807 dctRows.inverse(t, 0, scale); 808 dctRows.inverse(t, rows, scale); 809 dctRows.inverse(t, 2 * rows, scale); 810 dctRows.inverse(t, 3 * rows, scale); 811 for (int r = 0; r < rows; r++) { 812 idx2 = rows + r; 813 a[r][c] = t[r]; 814 a[r][c + 1] = t[idx2]; 815 a[r][c + 2] = t[idx2 + rows]; 816 a[r][c + 3] = t[idx2 + 2 * rows]; 817 } 818 } 819 } 820 } else if (columns == 2) { 821 for (int r = 0; r < rows; r++) { 822 t[r] = a[r][0]; 823 t[rows + r] = a[r][1]; 824 } 825 if (isgn == -1) { 826 dctRows.forward(t, 0, scale); 827 dctRows.forward(t, rows, scale); 828 } else { 829 dctRows.inverse(t, 0, scale); 830 dctRows.inverse(t, rows, scale); 831 } 832 for (int r = 0; r < rows; r++) { 833 a[r][0] = t[r]; 834 a[r][1] = t[rows + r]; 835 } 836 } 837 } 838}