001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.feature.dense.gradient.dsift; 031 032import org.openimaj.feature.local.list.LocalFeatureList; 033import org.openimaj.feature.local.list.MemoryLocalFeatureList; 034import org.openimaj.image.FImage; 035import org.openimaj.image.processing.convolution.FImageConvolveSeparable; 036import org.openimaj.image.processing.convolution.FImageGradients; 037import org.openimaj.math.geometry.shape.Rectangle; 038import org.openimaj.util.array.ArrayUtils; 039 040/** 041 * Implementation of a dense SIFT feature extractor for {@link FImage}s. 042 * Extracts upright SIFT features at a single scale on a grid. 043 * <p> 044 * Implementation inspired by the <a 045 * href="http://www.vlfeat.org/api/dsift.html#dsift-usage">VLFeat extractor</a>. 046 * <p> 047 * <b>Implementation Notes</b>. The analyser is not thread-safe, however, it is 048 * safe to reuse the analyser. In multi-threaded environments, a separate 049 * instance must be made for each thread. Internally, this implementation 050 * allocates memory for the gradient images, and if possible re-uses these 051 * between calls. Re-use requires that the input image is the same size between 052 * calls to the analyser. 053 * 054 * @see "http://www.vlfeat.org/api/dsift.html#dsift-usage" 055 * 056 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 057 * 058 */ 059public class DenseSIFT extends AbstractDenseSIFT<FImage> { 060 static class WorkingData { 061 /** 062 * minimum X bound for sampling the descriptors (inclusive) 063 */ 064 protected int boundMinX; 065 066 /** 067 * maximum X bound for sampling the descriptors (inclusive) 068 */ 069 protected int boundMaxX; 070 071 /** 072 * minimum Y bound for sampling the descriptors (inclusive) 073 */ 074 protected int boundMinY; 075 076 /** 077 * maximum X bound for sampling the descriptors (inclusive) 078 */ 079 protected int boundMaxY; 080 081 /** 082 * Quantised orientation/gradient magnitude data. Each element 083 * corresponds to an angular bin of the orientations, and the individual 084 * images correspond to the gradient magnitudes for the respective 085 * orientations. 086 */ 087 protected FImage[] gradientMagnitudes; 088 089 /** 090 * Setup the required space for holding gradient maps and descriptors 091 * 092 * @param image 093 * the image being analysed 094 */ 095 protected void setupWorkingSpace(FImage image, DenseSIFT dsift) { 096 if (gradientMagnitudes == null) { 097 gradientMagnitudes = new FImage[dsift.numOriBins]; 098 } 099 100 if (gradientMagnitudes[0] == null || gradientMagnitudes[0].width != image.width 101 || gradientMagnitudes[0].height != image.height) 102 { 103 for (int i = 0; i < dsift.numOriBins; i++) 104 gradientMagnitudes[i] = new FImage(image.width, image.height); 105 } 106 107 final int rangeX = boundMaxX - boundMinX - (dsift.numBinsX - 1) * dsift.binWidth; 108 final int rangeY = boundMaxY - boundMinY - (dsift.numBinsY - 1) * dsift.binHeight; 109 110 final int numWindowsX = (rangeX >= 0) ? rangeX / dsift.stepX + 1 : 0; 111 final int numWindowsY = (rangeY >= 0) ? rangeY / dsift.stepY + 1 : 0; 112 113 final int numFeatures = numWindowsX * numWindowsY; 114 115 dsift.descriptors = new float[numFeatures][dsift.numOriBins * dsift.numBinsX * dsift.numBinsY]; 116 dsift.energies = new float[numFeatures]; 117 } 118 } 119 120 /** 121 * Step size of sampling window in x-direction (in pixels) 122 */ 123 protected int stepX = 5; 124 125 /** 126 * Step size of sampling window in y-direction (in pixels) 127 */ 128 protected int stepY = 5; 129 130 /** 131 * Width of a single bin of the sampling window (in pixels). Sampling window 132 * width is this multiplied by #numBinX. 133 */ 134 protected int binWidth = 5; 135 136 /** 137 * Height of a single bin of the sampling window (in pixels). Sampling 138 * window height is this multiplied by #numBinY. 139 */ 140 protected int binHeight = 5; 141 142 /** 143 * Number of spatial bins in the X direction 144 */ 145 protected int numBinsX = 4; 146 147 /** 148 * Number of spatial bins in the Y direction 149 */ 150 protected int numBinsY = 4; 151 152 /** The number of orientation bins */ 153 protected int numOriBins = 8; 154 155 /** 156 * Size of the Gaussian window (in relative to of the size of a bin) 157 */ 158 protected float gaussianWindowSize = 2f; 159 160 /** 161 * Threshold for clipping the SIFT features 162 */ 163 protected float valueThreshold = 0.2f; 164 165 protected volatile WorkingData data = new WorkingData(); 166 167 /** 168 * Extracted descriptors 169 */ 170 protected volatile float[][] descriptors; 171 172 /** 173 * Descriptor energies 174 */ 175 protected volatile float[] energies; 176 177 /** 178 * Construct with the default configuration: standard SIFT geometry (4x4x8), 179 * 5px x 5px spatial bins, 5px step size, gaussian window size of 2 and 180 * value threshold of 0.2. 181 */ 182 public DenseSIFT() { 183 } 184 185 /** 186 * Construct with the given step size (for both x and y) and binSize. All 187 * other values are the defaults. 188 * 189 * @param step 190 * the step size 191 * @param binSize 192 * the spatial bin size 193 */ 194 public DenseSIFT(int step, int binSize) { 195 this.binWidth = binSize; 196 this.binHeight = binSize; 197 this.stepX = step; 198 this.stepY = step; 199 } 200 201 /** 202 * Construct with the given configuration. The gaussian window size is set 203 * to 2, and value threshold to 0.2. 204 * 205 * @param stepX 206 * step size in x direction 207 * @param stepY 208 * step size in y direction 209 * @param binWidth 210 * width of spatial bins 211 * @param binHeight 212 * height of spatial bins 213 * @param numBinsX 214 * number of bins in x direction for each descriptor 215 * @param numBinsY 216 * number of bins in y direction for each descriptor 217 * @param numOriBins 218 * number of orientation bins for each descriptor 219 */ 220 public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins) { 221 this(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, 2f); 222 } 223 224 /** 225 * Construct with the given configuration. The value threshold is set to 226 * 0.2. 227 * 228 * @param stepX 229 * step size in x direction 230 * @param stepY 231 * step size in y direction 232 * @param binWidth 233 * width of spatial bins 234 * @param binHeight 235 * height of spatial bins 236 * @param numBinsX 237 * number of bins in x direction for each descriptor 238 * @param numBinsY 239 * number of bins in y direction for each descriptor 240 * @param numOriBins 241 * number of orientation bins for each descriptor 242 * @param gaussianWindowSize 243 * the size of the gaussian weighting window 244 */ 245 public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins, 246 float gaussianWindowSize) 247 { 248 this(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize, 0.2f); 249 } 250 251 /** 252 * Construct with the given configuration. The value threshold is set to 253 * 0.2. 254 * 255 * @param stepX 256 * step size in x direction 257 * @param stepY 258 * step size in y direction 259 * @param binWidth 260 * width of spatial bins 261 * @param binHeight 262 * height of spatial bins 263 * @param numBinsX 264 * number of bins in x direction for each descriptor 265 * @param numBinsY 266 * number of bins in y direction for each descriptor 267 * @param numOriBins 268 * number of orientation bins for each descriptor 269 * @param gaussianWindowSize 270 * the size of the gaussian weighting window 271 * @param valueThreshold 272 * the threshold for clipping features 273 */ 274 public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins, 275 float gaussianWindowSize, float valueThreshold) 276 { 277 this.binWidth = binWidth; 278 this.binHeight = binHeight; 279 this.stepX = stepX; 280 this.stepY = stepY; 281 this.numBinsX = numBinsX; 282 this.numBinsY = numBinsY; 283 this.numOriBins = numOriBins; 284 this.gaussianWindowSize = gaussianWindowSize; 285 this.valueThreshold = valueThreshold; 286 } 287 288 private float[] buildKernel(int binSize, int numBins, int binIndex, float windowSize) { 289 final int kernelSize = 2 * binSize - 1; 290 final float[] kernel = new float[kernelSize]; 291 final float delta = binSize * (binIndex - 0.5F * (numBins - 1)); 292 293 final float sigma = binSize * windowSize; 294 295 for (int x = -binSize + 1, i = 0; x <= +binSize - 1; x++, i++) { 296 final float z = (x - delta) / sigma; 297 298 kernel[i] = (1.0F - Math.abs(x) / binSize) * ((binIndex >= 0) ? (float) Math.exp(-0.5F * z * z) : 1.0F); 299 } 300 301 return kernel; 302 } 303 304 /** 305 * Extract the DSIFT features 306 */ 307 protected void extractFeatures() { 308 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 309 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 310 311 for (int biny = 0; biny < numBinsY; biny++) { 312 final float[] yker = buildKernel(binHeight, numBinsY, biny, gaussianWindowSize); 313 314 for (int binx = 0; binx < numBinsX; binx++) { 315 final float[] xker = buildKernel(binWidth, numBinsX, binx, gaussianWindowSize); 316 317 for (int bint = 0; bint < numOriBins; bint++) { 318 final FImage conv = data.gradientMagnitudes[bint].process(new FImageConvolveSeparable(xker, yker)); 319 final float[][] src = conv.pixels; 320 321 final int descriptorOffset = bint + binx * numOriBins + biny * (numBinsX * numOriBins); 322 int descriptorIndex = 0; 323 324 for (int framey = data.boundMinY; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) 325 { 326 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX) 327 { 328 descriptors[descriptorIndex][descriptorOffset] = 329 src[framey + biny * binHeight][framex + binx * binWidth]; 330 descriptorIndex++; 331 } 332 } 333 } 334 } 335 } 336 } 337 338 @Override 339 public void analyseImage(FImage image, Rectangle bounds) { 340 if (data == null) 341 data = new WorkingData(); 342 343 data.boundMinX = (int) bounds.x; 344 data.boundMaxX = (int) (bounds.width - 1); 345 data.boundMinY = (int) bounds.y; 346 data.boundMaxY = (int) (bounds.height - 1); 347 348 data.setupWorkingSpace(image, this); 349 350 FImageGradients.gradientMagnitudesAndQuantisedOrientations(image, data.gradientMagnitudes); 351 352 extractFeatures(); 353 354 normaliseDescriptors(); 355 } 356 357 private void normaliseDescriptors() { 358 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 359 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 360 final float energyNorm = frameSizeX * frameSizeY; 361 362 for (int j = 0; j < descriptors.length; j++) { 363 final float[] arr = descriptors[j]; 364 365 energies[j] = ArrayUtils.sumValues(arr) / energyNorm; 366 367 ArrayUtils.normalise(arr); 368 369 boolean changed = false; 370 for (int i = 0; i < arr.length; i++) { 371 if (arr[i] > valueThreshold) { 372 arr[i] = valueThreshold; 373 changed = true; 374 } 375 } 376 377 if (changed) 378 ArrayUtils.normalise(arr); 379 } 380 } 381 382 @Override 383 public LocalFeatureList<FloatDSIFTKeypoint> getFloatKeypoints() { 384 final MemoryLocalFeatureList<FloatDSIFTKeypoint> keys = new MemoryLocalFeatureList<FloatDSIFTKeypoint>(numOriBins 385 * numBinsX * numBinsY, descriptors.length); 386 387 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 388 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 389 390 final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1); 391 final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1); 392 393 for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) { 394 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) { 395 keys.add(new FloatDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i], energies[i])); 396 } 397 } 398 399 return keys; 400 } 401 402 @Override 403 public LocalFeatureList<ByteDSIFTKeypoint> getByteKeypoints() { 404 final MemoryLocalFeatureList<ByteDSIFTKeypoint> keys = new MemoryLocalFeatureList<ByteDSIFTKeypoint>(numOriBins 405 * numBinsX * numBinsY, descriptors.length); 406 407 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 408 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 409 410 final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1); 411 final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1); 412 413 for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) { 414 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) { 415 keys.add(new ByteDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i], energies[i])); 416 } 417 } 418 419 return keys; 420 } 421 422 @Override 423 public LocalFeatureList<FloatDSIFTKeypoint> getFloatKeypoints(float energyThreshold) { 424 final MemoryLocalFeatureList<FloatDSIFTKeypoint> keys = new MemoryLocalFeatureList<FloatDSIFTKeypoint>(numOriBins 425 * numBinsX * numBinsY); 426 427 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 428 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 429 430 final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1); 431 final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1); 432 433 for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) { 434 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) { 435 if (energies[i] >= energyThreshold) 436 keys.add(new FloatDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i], 437 energies[i])); 438 } 439 } 440 441 return keys; 442 } 443 444 @Override 445 public LocalFeatureList<ByteDSIFTKeypoint> getByteKeypoints(float energyThreshold) { 446 final MemoryLocalFeatureList<ByteDSIFTKeypoint> keys = new MemoryLocalFeatureList<ByteDSIFTKeypoint>(numOriBins 447 * numBinsX * numBinsY); 448 449 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 450 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 451 452 final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1); 453 final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1); 454 455 for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) { 456 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) { 457 if (energies[i] >= energyThreshold) 458 keys.add(new ByteDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i], 459 energies[i])); 460 } 461 } 462 463 return keys; 464 } 465 466 /** 467 * Get the computed raw dense SIFT descriptors from the previous call to 468 * {@link #analyseImage(FImage)} or {@link #analyseImage(FImage, Rectangle)} 469 * . 470 * 471 * @return the descriptors. 472 */ 473 @Override 474 public float[][] getDescriptors() { 475 return descriptors; 476 } 477 478 @Override 479 public DenseSIFT clone() { 480 final DenseSIFT clone = (DenseSIFT) super.clone(); 481 482 clone.descriptors = null; 483 clone.energies = null; 484 clone.data = null; 485 486 return clone; 487 } 488 489 @Override 490 public void setBinWidth(int size) { 491 this.binWidth = size; 492 } 493 494 @Override 495 public void setBinHeight(int size) { 496 this.binHeight = size; 497 } 498 499 @Override 500 public int getBinWidth() { 501 return binWidth; 502 } 503 504 @Override 505 public int getBinHeight() { 506 return binHeight; 507 } 508 509 @Override 510 public int getNumBinsX() { 511 return numBinsX; 512 } 513 514 @Override 515 public int getNumBinsY() { 516 return numBinsY; 517 } 518 519 @Override 520 public int getNumOriBins() { 521 return numOriBins; 522 } 523}