001/* ***** BEGIN LICENSE BLOCK ***** 002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 003 * 004 * The contents of this file are subject to the Mozilla Public License Version 005 * 1.1 (the "License"); you may not use this file except in compliance with 006 * the License. You may obtain a copy of the License at 007 * http://www.mozilla.org/MPL/ 008 * 009 * Software distributed under the License is distributed on an "AS IS" basis, 010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 011 * for the specific language governing rights and limitations under the 012 * License. 013 * 014 * The Original Code is JTransforms. 015 * 016 * The Initial Developer of the Original Code is 017 * Piotr Wendykier, Emory University. 018 * Portions created by the Initial Developer are Copyright (C) 2007-2009 019 * the Initial Developer. All Rights Reserved. 020 * 021 * Alternatively, the contents of this file may be used under the terms of 022 * either the GNU General Public License Version 2 or later (the "GPL"), or 023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 024 * in which case the provisions of the GPL or the LGPL are applicable instead 025 * of those above. If you wish to allow use of your version of this file only 026 * under the terms of either the GPL or the LGPL, and not to allow others to 027 * use your version of this file under the terms of the MPL, indicate your 028 * decision by deleting the provisions above and replace them with the notice 029 * and other provisions required by the GPL or the LGPL. If you do not delete 030 * the provisions above, a recipient may use your version of this file under 031 * the terms of any one of the MPL, the GPL or the LGPL. 032 * 033 * ***** END LICENSE BLOCK ***** */ 034 035package edu.emory.mathcs.jtransforms.dst; 036 037import java.util.concurrent.Future; 038 039import edu.emory.mathcs.utils.ConcurrencyUtils; 040 041/** 042 * Computes 2D Discrete Sine Transform (DST) of single precision data. The sizes 043 * of both dimensions can be arbitrary numbers. This is a parallel 044 * implementation optimized for SMP systems.<br> 045 * <br> 046 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura 047 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html) 048 * 049 * @author Piotr Wendykier (piotr.wendykier@gmail.com) 050 * 051 */ 052public class FloatDST_2D { 053 054 private int rows; 055 056 private int columns; 057 058 private float[] t; 059 060 private FloatDST_1D dstColumns, dstRows; 061 062 private int oldNthreads; 063 064 private int nt; 065 066 private boolean isPowerOfTwo = false; 067 068 private boolean useThreads = false; 069 070 /** 071 * Creates new instance of FloatDST_2D. 072 * 073 * @param rows 074 * number of rows 075 * @param columns 076 * number of columns 077 */ 078 public FloatDST_2D(int rows, int columns) { 079 if (rows <= 1 || columns <= 1) { 080 throw new IllegalArgumentException("rows and columns must be greater than 1"); 081 } 082 this.rows = rows; 083 this.columns = columns; 084 if (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D()) { 085 useThreads = true; 086 } 087 if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) { 088 isPowerOfTwo = true; 089 oldNthreads = ConcurrencyUtils.getNumberOfThreads(); 090 nt = 4 * oldNthreads * rows; 091 if (columns == 2 * oldNthreads) { 092 nt >>= 1; 093 } else if (columns < 2 * oldNthreads) { 094 nt >>= 2; 095 } 096 t = new float[nt]; 097 } 098 dstColumns = new FloatDST_1D(columns); 099 if (columns == rows) { 100 dstRows = dstColumns; 101 } else { 102 dstRows = new FloatDST_1D(rows); 103 } 104 } 105 106 /** 107 * Computes 2D forward DST (DST-II) leaving the result in <code>a</code>. 108 * The data is stored in 1D array in row-major order. 109 * 110 * @param a 111 * data to transform 112 * @param scale 113 * if true then scaling is performed 114 */ 115 public void forward(final float[] a, final boolean scale) { 116 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 117 if (isPowerOfTwo) { 118 if (nthreads != oldNthreads) { 119 nt = 4 * nthreads * rows; 120 if (columns == 2 * nthreads) { 121 nt >>= 1; 122 } else if (columns < 2 * nthreads) { 123 nt >>= 2; 124 } 125 t = new float[nt]; 126 oldNthreads = nthreads; 127 } 128 if ((nthreads > 1) && useThreads) { 129 ddxt2d_subth(-1, a, scale); 130 ddxt2d0_subth(-1, a, scale); 131 } else { 132 ddxt2d_sub(-1, a, scale); 133 for (int i = 0; i < rows; i++) { 134 dstColumns.forward(a, i * columns, scale); 135 } 136 } 137 } else { 138 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 139 Future<?>[] futures = new Future[nthreads]; 140 int p = rows / nthreads; 141 for (int l = 0; l < nthreads; l++) { 142 final int firstRow = l * p; 143 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 144 futures[l] = ConcurrencyUtils.submit(new Runnable() { 145 public void run() { 146 for (int i = firstRow; i < lastRow; i++) { 147 dstColumns.forward(a, i * columns, scale); 148 } 149 } 150 }); 151 } 152 ConcurrencyUtils.waitForCompletion(futures); 153 p = columns / nthreads; 154 for (int l = 0; l < nthreads; l++) { 155 final int firstColumn = l * p; 156 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 157 futures[l] = ConcurrencyUtils.submit(new Runnable() { 158 public void run() { 159 float[] temp = new float[rows]; 160 for (int c = firstColumn; c < lastColumn; c++) { 161 for (int r = 0; r < rows; r++) { 162 temp[r] = a[r * columns + c]; 163 } 164 dstRows.forward(temp, scale); 165 for (int r = 0; r < rows; r++) { 166 a[r * columns + c] = temp[r]; 167 } 168 } 169 } 170 }); 171 } 172 ConcurrencyUtils.waitForCompletion(futures); 173 } else { 174 for (int i = 0; i < rows; i++) { 175 dstColumns.forward(a, i * columns, scale); 176 } 177 float[] temp = new float[rows]; 178 for (int c = 0; c < columns; c++) { 179 for (int r = 0; r < rows; r++) { 180 temp[r] = a[r * columns + c]; 181 } 182 dstRows.forward(temp, scale); 183 for (int r = 0; r < rows; r++) { 184 a[r * columns + c] = temp[r]; 185 } 186 } 187 } 188 } 189 } 190 191 /** 192 * Computes 2D forward DST (DST-II) leaving the result in <code>a</code>. 193 * The data is stored in 2D array. 194 * 195 * @param a 196 * data to transform 197 * @param scale 198 * if true then scaling is performed 199 */ 200 public void forward(final float[][] a, final boolean scale) { 201 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 202 if (isPowerOfTwo) { 203 if (nthreads != oldNthreads) { 204 nt = 4 * nthreads * rows; 205 if (columns == 2 * nthreads) { 206 nt >>= 1; 207 } else if (columns < 2 * nthreads) { 208 nt >>= 2; 209 } 210 t = new float[nt]; 211 oldNthreads = nthreads; 212 } 213 if ((nthreads > 1) && useThreads) { 214 ddxt2d_subth(-1, a, scale); 215 ddxt2d0_subth(-1, a, scale); 216 } else { 217 ddxt2d_sub(-1, a, scale); 218 for (int i = 0; i < rows; i++) { 219 dstColumns.forward(a[i], scale); 220 } 221 } 222 } else { 223 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 224 Future<?>[] futures = new Future[nthreads]; 225 int p = rows / nthreads; 226 for (int l = 0; l < nthreads; l++) { 227 final int firstRow = l * p; 228 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 229 futures[l] = ConcurrencyUtils.submit(new Runnable() { 230 public void run() { 231 for (int i = firstRow; i < lastRow; i++) { 232 dstColumns.forward(a[i], scale); 233 } 234 } 235 }); 236 } 237 ConcurrencyUtils.waitForCompletion(futures); 238 p = columns / nthreads; 239 for (int l = 0; l < nthreads; l++) { 240 final int firstColumn = l * p; 241 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 242 futures[l] = ConcurrencyUtils.submit(new Runnable() { 243 public void run() { 244 float[] temp = new float[rows]; 245 for (int c = firstColumn; c < lastColumn; c++) { 246 for (int r = 0; r < rows; r++) { 247 temp[r] = a[r][c]; 248 } 249 dstRows.forward(temp, scale); 250 for (int r = 0; r < rows; r++) { 251 a[r][c] = temp[r]; 252 } 253 } 254 } 255 }); 256 } 257 ConcurrencyUtils.waitForCompletion(futures); 258 } else { 259 for (int i = 0; i < rows; i++) { 260 dstColumns.forward(a[i], scale); 261 } 262 float[] temp = new float[rows]; 263 for (int c = 0; c < columns; c++) { 264 for (int r = 0; r < rows; r++) { 265 temp[r] = a[r][c]; 266 } 267 dstRows.forward(temp, scale); 268 for (int r = 0; r < rows; r++) { 269 a[r][c] = temp[r]; 270 } 271 } 272 } 273 } 274 } 275 276 /** 277 * Computes 2D inverse DST (DST-III) leaving the result in <code>a</code>. 278 * The data is stored in 1D array in row-major order. 279 * 280 * @param a 281 * data to transform 282 * @param scale 283 * if true then scaling is performed 284 */ 285 public void inverse(final float[] a, final boolean scale) { 286 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 287 if (isPowerOfTwo) { 288 if (nthreads != oldNthreads) { 289 nt = 4 * nthreads * rows; 290 if (columns == 2 * nthreads) { 291 nt >>= 1; 292 } else if (columns < 2 * nthreads) { 293 nt >>= 2; 294 } 295 t = new float[nt]; 296 oldNthreads = nthreads; 297 } 298 if ((nthreads > 1) && useThreads) { 299 ddxt2d_subth(1, a, scale); 300 ddxt2d0_subth(1, a, scale); 301 } else { 302 ddxt2d_sub(1, a, scale); 303 for (int i = 0; i < rows; i++) { 304 dstColumns.inverse(a, i * columns, scale); 305 } 306 } 307 } else { 308 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 309 Future<?>[] futures = new Future[nthreads]; 310 int p = rows / nthreads; 311 for (int l = 0; l < nthreads; l++) { 312 final int firstRow = l * p; 313 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 314 futures[l] = ConcurrencyUtils.submit(new Runnable() { 315 public void run() { 316 for (int i = firstRow; i < lastRow; i++) { 317 dstColumns.inverse(a, i * columns, scale); 318 } 319 } 320 }); 321 } 322 ConcurrencyUtils.waitForCompletion(futures); 323 p = columns / nthreads; 324 for (int l = 0; l < nthreads; l++) { 325 final int firstColumn = l * p; 326 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 327 futures[l] = ConcurrencyUtils.submit(new Runnable() { 328 public void run() { 329 float[] temp = new float[rows]; 330 for (int c = firstColumn; c < lastColumn; c++) { 331 for (int r = 0; r < rows; r++) { 332 temp[r] = a[r * columns + c]; 333 } 334 dstRows.inverse(temp, scale); 335 for (int r = 0; r < rows; r++) { 336 a[r * columns + c] = temp[r]; 337 } 338 } 339 } 340 }); 341 } 342 ConcurrencyUtils.waitForCompletion(futures); 343 } else { 344 for (int i = 0; i < rows; i++) { 345 dstColumns.inverse(a, i * columns, scale); 346 } 347 float[] temp = new float[rows]; 348 for (int c = 0; c < columns; c++) { 349 for (int r = 0; r < rows; r++) { 350 temp[r] = a[r * columns + c]; 351 } 352 dstRows.inverse(temp, scale); 353 for (int r = 0; r < rows; r++) { 354 a[r * columns + c] = temp[r]; 355 } 356 } 357 } 358 } 359 } 360 361 /** 362 * Computes 2D inverse DST (DST-III) leaving the result in <code>a</code>. 363 * The data is stored in 2D array. 364 * 365 * @param a 366 * data to transform 367 * @param scale 368 * if true then scaling is performed 369 */ 370 public void inverse(final float[][] a, final boolean scale) { 371 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 372 if (isPowerOfTwo) { 373 if (nthreads != oldNthreads) { 374 nt = 4 * nthreads * rows; 375 if (columns == 2 * nthreads) { 376 nt >>= 1; 377 } else if (columns < 2 * nthreads) { 378 nt >>= 2; 379 } 380 t = new float[nt]; 381 oldNthreads = nthreads; 382 } 383 if ((nthreads > 1) && useThreads) { 384 ddxt2d_subth(1, a, scale); 385 ddxt2d0_subth(1, a, scale); 386 } else { 387 ddxt2d_sub(1, a, scale); 388 for (int i = 0; i < rows; i++) { 389 dstColumns.inverse(a[i], scale); 390 } 391 } 392 } else { 393 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 394 Future<?>[] futures = new Future[nthreads]; 395 int p = rows / nthreads; 396 for (int l = 0; l < nthreads; l++) { 397 final int firstRow = l * p; 398 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 399 futures[l] = ConcurrencyUtils.submit(new Runnable() { 400 public void run() { 401 for (int i = firstRow; i < lastRow; i++) { 402 dstColumns.inverse(a[i], scale); 403 } 404 } 405 }); 406 } 407 ConcurrencyUtils.waitForCompletion(futures); 408 p = columns / nthreads; 409 for (int l = 0; l < nthreads; l++) { 410 final int firstColumn = l * p; 411 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 412 futures[l] = ConcurrencyUtils.submit(new Runnable() { 413 public void run() { 414 float[] temp = new float[rows]; 415 for (int c = firstColumn; c < lastColumn; c++) { 416 for (int r = 0; r < rows; r++) { 417 temp[r] = a[r][c]; 418 } 419 dstRows.inverse(temp, scale); 420 for (int r = 0; r < rows; r++) { 421 a[r][c] = temp[r]; 422 } 423 } 424 } 425 }); 426 } 427 ConcurrencyUtils.waitForCompletion(futures); 428 } else { 429 for (int i = 0; i < rows; i++) { 430 dstColumns.inverse(a[i], scale); 431 } 432 float[] temp = new float[rows]; 433 for (int c = 0; c < columns; c++) { 434 for (int r = 0; r < rows; r++) { 435 temp[r] = a[r][c]; 436 } 437 dstRows.inverse(temp, scale); 438 for (int r = 0; r < rows; r++) { 439 a[r][c] = temp[r]; 440 } 441 } 442 } 443 } 444 } 445 446 private void ddxt2d_subth(final int isgn, final float[] a, final boolean scale) { 447 int nthread = ConcurrencyUtils.getNumberOfThreads(); 448 int nt = 4 * rows; 449 if (columns == 2 * nthread) { 450 nt >>= 1; 451 } else if (columns < 2 * nthread) { 452 nthread = columns; 453 nt >>= 2; 454 } 455 final int nthreads = nthread; 456 Future<?>[] futures = new Future[nthreads]; 457 458 for (int i = 0; i < nthreads; i++) { 459 final int n0 = i; 460 final int startt = nt * i; 461 futures[i] = ConcurrencyUtils.submit(new Runnable() { 462 public void run() { 463 int idx1, idx2; 464 if (columns > 2 * nthreads) { 465 if (isgn == -1) { 466 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 467 for (int r = 0; r < rows; r++) { 468 idx1 = r * columns + c; 469 idx2 = startt + rows + r; 470 t[startt + r] = a[idx1]; 471 t[idx2] = a[idx1 + 1]; 472 t[idx2 + rows] = a[idx1 + 2]; 473 t[idx2 + 2 * rows] = a[idx1 + 3]; 474 } 475 dstRows.forward(t, startt, scale); 476 dstRows.forward(t, startt + rows, scale); 477 dstRows.forward(t, startt + 2 * rows, scale); 478 dstRows.forward(t, startt + 3 * rows, scale); 479 for (int r = 0; r < rows; r++) { 480 idx1 = r * columns + c; 481 idx2 = startt + rows + r; 482 a[idx1] = t[startt + r]; 483 a[idx1 + 1] = t[idx2]; 484 a[idx1 + 2] = t[idx2 + rows]; 485 a[idx1 + 3] = t[idx2 + 2 * rows]; 486 } 487 } 488 } else { 489 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 490 for (int r = 0; r < rows; r++) { 491 idx1 = r * columns + c; 492 idx2 = startt + rows + r; 493 t[startt + r] = a[idx1]; 494 t[idx2] = a[idx1 + 1]; 495 t[idx2 + rows] = a[idx1 + 2]; 496 t[idx2 + 2 * rows] = a[idx1 + 3]; 497 } 498 dstRows.inverse(t, startt, scale); 499 dstRows.inverse(t, startt + rows, scale); 500 dstRows.inverse(t, startt + 2 * rows, scale); 501 dstRows.inverse(t, startt + 3 * rows, scale); 502 for (int r = 0; r < rows; r++) { 503 idx1 = r * columns + c; 504 idx2 = startt + rows + r; 505 a[idx1] = t[startt + r]; 506 a[idx1 + 1] = t[idx2]; 507 a[idx1 + 2] = t[idx2 + rows]; 508 a[idx1 + 3] = t[idx2 + 2 * rows]; 509 } 510 } 511 } 512 } else if (columns == 2 * nthreads) { 513 for (int r = 0; r < rows; r++) { 514 idx1 = r * columns + 2 * n0; 515 idx2 = startt + r; 516 t[idx2] = a[idx1]; 517 t[idx2 + rows] = a[idx1 + 1]; 518 } 519 if (isgn == -1) { 520 dstRows.forward(t, startt, scale); 521 dstRows.forward(t, startt + rows, scale); 522 } else { 523 dstRows.inverse(t, startt, scale); 524 dstRows.inverse(t, startt + rows, scale); 525 } 526 for (int r = 0; r < rows; r++) { 527 idx1 = r * columns + 2 * n0; 528 idx2 = startt + r; 529 a[idx1] = t[idx2]; 530 a[idx1 + 1] = t[idx2 + rows]; 531 } 532 } else if (columns == nthreads) { 533 for (int r = 0; r < rows; r++) { 534 t[startt + r] = a[r * columns + n0]; 535 } 536 if (isgn == -1) { 537 dstRows.forward(t, startt, scale); 538 } else { 539 dstRows.inverse(t, startt, scale); 540 } 541 for (int r = 0; r < rows; r++) { 542 a[r * columns + n0] = t[startt + r]; 543 } 544 } 545 } 546 }); 547 } 548 ConcurrencyUtils.waitForCompletion(futures); 549 } 550 551 private void ddxt2d_subth(final int isgn, final float[][] a, final boolean scale) { 552 int nthread = ConcurrencyUtils.getNumberOfThreads(); 553 int nt = 4 * rows; 554 if (columns == 2 * nthread) { 555 nt >>= 1; 556 } else if (columns < 2 * nthread) { 557 nthread = columns; 558 nt >>= 2; 559 } 560 final int nthreads = nthread; 561 Future<?>[] futures = new Future[nthreads]; 562 563 for (int i = 0; i < nthreads; i++) { 564 final int n0 = i; 565 final int startt = nt * i; 566 futures[i] = ConcurrencyUtils.submit(new Runnable() { 567 public void run() { 568 int idx2; 569 if (columns > 2 * nthreads) { 570 if (isgn == -1) { 571 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 572 for (int r = 0; r < rows; r++) { 573 idx2 = startt + rows + r; 574 t[startt + r] = a[r][c]; 575 t[idx2] = a[r][c + 1]; 576 t[idx2 + rows] = a[r][c + 2]; 577 t[idx2 + 2 * rows] = a[r][c + 3]; 578 } 579 dstRows.forward(t, startt, scale); 580 dstRows.forward(t, startt + rows, scale); 581 dstRows.forward(t, startt + 2 * rows, scale); 582 dstRows.forward(t, startt + 3 * rows, scale); 583 for (int r = 0; r < rows; r++) { 584 idx2 = startt + rows + r; 585 a[r][c] = t[startt + r]; 586 a[r][c + 1] = t[idx2]; 587 a[r][c + 2] = t[idx2 + rows]; 588 a[r][c + 3] = t[idx2 + 2 * rows]; 589 } 590 } 591 } else { 592 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 593 for (int r = 0; r < rows; r++) { 594 idx2 = startt + rows + r; 595 t[startt + r] = a[r][c]; 596 t[idx2] = a[r][c + 1]; 597 t[idx2 + rows] = a[r][c + 2]; 598 t[idx2 + 2 * rows] = a[r][c + 3]; 599 } 600 dstRows.inverse(t, startt, scale); 601 dstRows.inverse(t, startt + rows, scale); 602 dstRows.inverse(t, startt + 2 * rows, scale); 603 dstRows.inverse(t, startt + 3 * rows, scale); 604 for (int r = 0; r < rows; r++) { 605 idx2 = startt + rows + r; 606 a[r][c] = t[startt + r]; 607 a[r][c + 1] = t[idx2]; 608 a[r][c + 2] = t[idx2 + rows]; 609 a[r][c + 3] = t[idx2 + 2 * rows]; 610 } 611 } 612 } 613 } else if (columns == 2 * nthreads) { 614 for (int r = 0; r < rows; r++) { 615 idx2 = startt + r; 616 t[idx2] = a[r][2 * n0]; 617 t[idx2 + rows] = a[r][2 * n0 + 1]; 618 } 619 if (isgn == -1) { 620 dstRows.forward(t, startt, scale); 621 dstRows.forward(t, startt + rows, scale); 622 } else { 623 dstRows.inverse(t, startt, scale); 624 dstRows.inverse(t, startt + rows, scale); 625 } 626 for (int r = 0; r < rows; r++) { 627 idx2 = startt + r; 628 a[r][2 * n0] = t[idx2]; 629 a[r][2 * n0 + 1] = t[idx2 + rows]; 630 } 631 } else if (columns == nthreads) { 632 for (int r = 0; r < rows; r++) { 633 t[startt + r] = a[r][n0]; 634 } 635 if (isgn == -1) { 636 dstRows.forward(t, startt, scale); 637 } else { 638 dstRows.inverse(t, startt, scale); 639 } 640 for (int r = 0; r < rows; r++) { 641 a[r][n0] = t[startt + r]; 642 } 643 } 644 } 645 }); 646 } 647 ConcurrencyUtils.waitForCompletion(futures); 648 } 649 650 private void ddxt2d0_subth(final int isgn, final float[] a, final boolean scale) { 651 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 652 653 Future<?>[] futures = new Future[nthreads]; 654 655 for (int i = 0; i < nthreads; i++) { 656 final int n0 = i; 657 futures[i] = ConcurrencyUtils.submit(new Runnable() { 658 659 public void run() { 660 if (isgn == -1) { 661 for (int r = n0; r < rows; r += nthreads) { 662 dstColumns.forward(a, r * columns, scale); 663 } 664 } else { 665 for (int r = n0; r < rows; r += nthreads) { 666 dstColumns.inverse(a, r * columns, scale); 667 } 668 } 669 } 670 }); 671 } 672 ConcurrencyUtils.waitForCompletion(futures); 673 } 674 675 private void ddxt2d0_subth(final int isgn, final float[][] a, final boolean scale) { 676 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 677 678 Future<?>[] futures = new Future[nthreads]; 679 680 for (int i = 0; i < nthreads; i++) { 681 final int n0 = i; 682 futures[i] = ConcurrencyUtils.submit(new Runnable() { 683 684 public void run() { 685 if (isgn == -1) { 686 for (int r = n0; r < rows; r += nthreads) { 687 dstColumns.forward(a[r], scale); 688 } 689 } else { 690 for (int r = n0; r < rows; r += nthreads) { 691 dstColumns.inverse(a[r], scale); 692 } 693 } 694 } 695 }); 696 } 697 ConcurrencyUtils.waitForCompletion(futures); 698 } 699 700 private void ddxt2d_sub(int isgn, float[] a, boolean scale) { 701 int idx1, idx2; 702 703 if (columns > 2) { 704 if (isgn == -1) { 705 for (int c = 0; c < columns; c += 4) { 706 for (int r = 0; r < rows; r++) { 707 idx1 = r * columns + c; 708 idx2 = rows + r; 709 t[r] = a[idx1]; 710 t[idx2] = a[idx1 + 1]; 711 t[idx2 + rows] = a[idx1 + 2]; 712 t[idx2 + 2 * rows] = a[idx1 + 3]; 713 } 714 dstRows.forward(t, 0, scale); 715 dstRows.forward(t, rows, scale); 716 dstRows.forward(t, 2 * rows, scale); 717 dstRows.forward(t, 3 * rows, scale); 718 for (int r = 0; r < rows; r++) { 719 idx1 = r * columns + c; 720 idx2 = rows + r; 721 a[idx1] = t[r]; 722 a[idx1 + 1] = t[idx2]; 723 a[idx1 + 2] = t[idx2 + rows]; 724 a[idx1 + 3] = t[idx2 + 2 * rows]; 725 } 726 } 727 } else { 728 for (int c = 0; c < columns; c += 4) { 729 for (int r = 0; r < rows; r++) { 730 idx1 = r * columns + c; 731 idx2 = rows + r; 732 t[r] = a[idx1]; 733 t[idx2] = a[idx1 + 1]; 734 t[idx2 + rows] = a[idx1 + 2]; 735 t[idx2 + 2 * rows] = a[idx1 + 3]; 736 } 737 dstRows.inverse(t, 0, scale); 738 dstRows.inverse(t, rows, scale); 739 dstRows.inverse(t, 2 * rows, scale); 740 dstRows.inverse(t, 3 * rows, scale); 741 for (int r = 0; r < rows; r++) { 742 idx1 = r * columns + c; 743 idx2 = rows + r; 744 a[idx1] = t[r]; 745 a[idx1 + 1] = t[idx2]; 746 a[idx1 + 2] = t[idx2 + rows]; 747 a[idx1 + 3] = t[idx2 + 2 * rows]; 748 } 749 } 750 } 751 } else if (columns == 2) { 752 for (int r = 0; r < rows; r++) { 753 idx1 = r * columns; 754 t[r] = a[idx1]; 755 t[rows + r] = a[idx1 + 1]; 756 } 757 if (isgn == -1) { 758 dstRows.forward(t, 0, scale); 759 dstRows.forward(t, rows, scale); 760 } else { 761 dstRows.inverse(t, 0, scale); 762 dstRows.inverse(t, rows, scale); 763 } 764 for (int r = 0; r < rows; r++) { 765 idx1 = r * columns; 766 a[idx1] = t[r]; 767 a[idx1 + 1] = t[rows + r]; 768 } 769 } 770 } 771 772 private void ddxt2d_sub(int isgn, float[][] a, boolean scale) { 773 int idx2; 774 775 if (columns > 2) { 776 if (isgn == -1) { 777 for (int c = 0; c < columns; c += 4) { 778 for (int r = 0; r < rows; r++) { 779 idx2 = rows + r; 780 t[r] = a[r][c]; 781 t[idx2] = a[r][c + 1]; 782 t[idx2 + rows] = a[r][c + 2]; 783 t[idx2 + 2 * rows] = a[r][c + 3]; 784 } 785 dstRows.forward(t, 0, scale); 786 dstRows.forward(t, rows, scale); 787 dstRows.forward(t, 2 * rows, scale); 788 dstRows.forward(t, 3 * rows, scale); 789 for (int r = 0; r < rows; r++) { 790 idx2 = rows + r; 791 a[r][c] = t[r]; 792 a[r][c + 1] = t[idx2]; 793 a[r][c + 2] = t[idx2 + rows]; 794 a[r][c + 3] = t[idx2 + 2 * rows]; 795 } 796 } 797 } else { 798 for (int c = 0; c < columns; c += 4) { 799 for (int r = 0; r < rows; r++) { 800 idx2 = rows + r; 801 t[r] = a[r][c]; 802 t[idx2] = a[r][c + 1]; 803 t[idx2 + rows] = a[r][c + 2]; 804 t[idx2 + 2 * rows] = a[r][c + 3]; 805 } 806 dstRows.inverse(t, 0, scale); 807 dstRows.inverse(t, rows, scale); 808 dstRows.inverse(t, 2 * rows, scale); 809 dstRows.inverse(t, 3 * rows, scale); 810 for (int r = 0; r < rows; r++) { 811 idx2 = rows + r; 812 a[r][c] = t[r]; 813 a[r][c + 1] = t[idx2]; 814 a[r][c + 2] = t[idx2 + rows]; 815 a[r][c + 3] = t[idx2 + 2 * rows]; 816 } 817 } 818 } 819 } else if (columns == 2) { 820 for (int r = 0; r < rows; r++) { 821 t[r] = a[r][0]; 822 t[rows + r] = a[r][1]; 823 } 824 if (isgn == -1) { 825 dstRows.forward(t, 0, scale); 826 dstRows.forward(t, rows, scale); 827 } else { 828 dstRows.inverse(t, 0, scale); 829 dstRows.inverse(t, rows, scale); 830 } 831 for (int r = 0; r < rows; r++) { 832 a[r][0] = t[r]; 833 a[r][1] = t[rows + r]; 834 } 835 } 836 } 837}