001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.feature.dense.gradient.dsift; 031 032import org.openimaj.image.FImage; 033import org.openimaj.image.processing.convolution.FTriangleFilter; 034 035/** 036 * Implementation of an approximate dense SIFT feature extractor. Extracts 037 * approximate upright SIFT features at a single scale on a grid. Implementation 038 * is approximate because instead of using an exact Gaussian weighting, samples 039 * are weighted using a flat windowing function for speed, and then after 040 * accumulation are re-weighted by the average of the Gaussian window over the 041 * spatial support of the sampling region. The end result is that the extracted 042 * features are similar to the exact dense SIFT implementation, but computation 043 * is much faster. 044 * <p> 045 * Implementation directly based on the <a 046 * href="http://www.vlfeat.org/api/dsift.html#dsift-usage">VLFeat extractor</a>. 047 * <p> 048 * <b>Implementation Notes</b>. The analyser is not thread-safe, however, it is 049 * safe to reuse the analyser. In multi-threaded environments, a separate 050 * instance must be made for each thread. Internally, this implementation 051 * allocates memory for the gradient images, and if possible re-uses these 052 * between calls. Re-use requires that the input image is the same size between 053 * calls to the analyser. 054 * 055 * @see "http://www.vlfeat.org/api/dsift.html#dsift-usage" 056 * 057 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 058 * 059 */ 060public class ApproximateDenseSIFT extends DenseSIFT { 061 /** 062 * Construct with the default configuration: standard SIFT geometry (4x4x8), 063 * 5px x 5px spatial bins, 5px step size, gaussian window size of 2 and 064 * value threshold of 0.2. 065 */ 066 public ApproximateDenseSIFT() { 067 super(); 068 } 069 070 /** 071 * Construct with the given step size (for both x and y) and binSize. All 072 * other values are the defaults. 073 * 074 * @param step 075 * the step size 076 * @param binSize 077 * the spatial bin size 078 */ 079 public ApproximateDenseSIFT(int step, int binSize) { 080 super(step, binSize); 081 } 082 083 /** 084 * Construct with the given configuration. The gaussian window size is set 085 * to 2, and value threshold to 0.2. 086 * 087 * @param stepX 088 * step size in x direction 089 * @param stepY 090 * step size in y direction 091 * @param binWidth 092 * width of spatial bins 093 * @param binHeight 094 * height of spatial bins 095 * @param numBinsX 096 * number of bins in x direction for each descriptor 097 * @param numBinsY 098 * number of bins in y direction for each descriptor 099 * @param numOriBins 100 * number of orientation bins for each descriptor 101 */ 102 public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, 103 int numOriBins) 104 { 105 super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins); 106 } 107 108 /** 109 * Construct with the given configuration. The value threshold is set to 110 * 0.2. 111 * 112 * @param stepX 113 * step size in x direction 114 * @param stepY 115 * step size in y direction 116 * @param binWidth 117 * width of spatial bins 118 * @param binHeight 119 * height of spatial bins 120 * @param numBinsX 121 * number of bins in x direction for each descriptor 122 * @param numBinsY 123 * number of bins in y direction for each descriptor 124 * @param numOriBins 125 * number of orientation bins for each descriptor 126 * @param gaussianWindowSize 127 * the size of the gaussian weighting window 128 */ 129 public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, 130 int numOriBins, 131 float gaussianWindowSize) 132 { 133 super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize); 134 } 135 136 /** 137 * Construct with the given configuration. The value threshold is set to 138 * 0.2. 139 * 140 * @param stepX 141 * step size in x direction 142 * @param stepY 143 * step size in y direction 144 * @param binWidth 145 * width of spatial bins 146 * @param binHeight 147 * height of spatial bins 148 * @param numBinsX 149 * number of bins in x direction for each descriptor 150 * @param numBinsY 151 * number of bins in y direction for each descriptor 152 * @param numOriBins 153 * number of orientation bins for each descriptor 154 * @param gaussianWindowSize 155 * the size of the gaussian weighting window 156 * @param valueThreshold 157 * the threshold for clipping features 158 */ 159 public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, 160 int numOriBins, 161 float gaussianWindowSize, float valueThreshold) 162 { 163 super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize, valueThreshold); 164 } 165 166 private float computeWindowMean(int binSize, int numBins, int binIndex, double windowSize) 167 { 168 final float delta = binSize * (binIndex - 0.5F * (numBins - 1)); 169 /* float sigma = 0.5F * ((numBins - 1) * binSize + 1) ; */ 170 final float sigma = binSize * (float) windowSize; 171 int x; 172 173 float acc = 0.0f; 174 for (x = -binSize + 1; x <= +binSize - 1; ++x) { 175 final float z = (x - delta) / sigma; 176 acc += ((binIndex >= 0) ? (float) Math.exp(-0.5F * z * z) : 1.0F); 177 } 178 return acc /= (2 * binSize - 1); 179 } 180 181 @Override 182 protected void extractFeatures() 183 { 184 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 185 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 186 187 for (int bint = 0; bint < numOriBins; bint++) { 188 final FImage conv = data.gradientMagnitudes[bint].process(new FTriangleFilter(binWidth, binHeight)); 189 final float[][] src = conv.pixels; 190 191 for (int biny = 0; biny < numBinsY; biny++) { 192 193 // This approximate version of DSIFT does not use a proper 194 // Gaussian weighting scheme for the gradients that are 195 // accumulated on the spatial bins. Instead each spatial bins is 196 // accumulated based on the triangular kernel only, equivalent 197 // to bilinear interpolation plus a flat, rather than Gaussian, 198 // window. Eventually, however, the magnitude of the spatial 199 // bins in the SIFT descriptor is reweighted by the average of 200 // the Gaussian window on each bin. 201 float wy = computeWindowMean(binHeight, numBinsY, biny, gaussianWindowSize); 202 203 // The triangular convolution functions convolve by a triangular 204 // kernel with unit integral; instead for SIFT the triangular 205 // kernel should have unit height. This is compensated for by 206 // multiplying by the bin size: 207 wy *= binHeight; 208 209 for (int binx = 0; binx < numBinsX; ++binx) { 210 float wx = computeWindowMean(binWidth, numBinsX, binx, gaussianWindowSize); 211 wx *= binWidth; 212 final float w = wx * wy; 213 214 final int descriptorOffset = bint + binx * numOriBins + biny * (numBinsX * numOriBins); 215 int descriptorIndex = 0; 216 217 for (int framey = data.boundMinY; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) { 218 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX) { 219 descriptors[descriptorIndex][descriptorOffset] = w 220 * src[framey + biny * binHeight][framex + binx * binWidth]; 221 descriptorIndex++; 222 } 223 } 224 } 225 } 226 } 227 } 228 229 @Override 230 public ApproximateDenseSIFT clone() { 231 return (ApproximateDenseSIFT) super.clone(); 232 } 233}