001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.feature.dense.gradient.dsift;
031
032import org.openimaj.feature.local.list.LocalFeatureList;
033import org.openimaj.feature.local.list.MemoryLocalFeatureList;
034import org.openimaj.image.FImage;
035import org.openimaj.image.processing.convolution.FImageConvolveSeparable;
036import org.openimaj.image.processing.convolution.FImageGradients;
037import org.openimaj.math.geometry.shape.Rectangle;
038import org.openimaj.util.array.ArrayUtils;
039
040/**
041 * Implementation of a dense SIFT feature extractor for {@link FImage}s.
042 * Extracts upright SIFT features at a single scale on a grid.
043 * <p>
044 * Implementation inspired by the <a
045 * href="http://www.vlfeat.org/api/dsift.html#dsift-usage">VLFeat extractor</a>.
046 * <p>
047 * <b>Implementation Notes</b>. The analyser is not thread-safe, however, it is
048 * safe to reuse the analyser. In multi-threaded environments, a separate
049 * instance must be made for each thread. Internally, this implementation
050 * allocates memory for the gradient images, and if possible re-uses these
051 * between calls. Re-use requires that the input image is the same size between
052 * calls to the analyser.
053 * 
054 * @see "http://www.vlfeat.org/api/dsift.html#dsift-usage"
055 * 
056 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
057 * 
058 */
059public class DenseSIFT extends AbstractDenseSIFT<FImage> {
060        static class WorkingData {
061                /**
062                 * minimum X bound for sampling the descriptors (inclusive)
063                 */
064                protected int boundMinX;
065
066                /**
067                 * maximum X bound for sampling the descriptors (inclusive)
068                 */
069                protected int boundMaxX;
070
071                /**
072                 * minimum Y bound for sampling the descriptors (inclusive)
073                 */
074                protected int boundMinY;
075
076                /**
077                 * maximum X bound for sampling the descriptors (inclusive)
078                 */
079                protected int boundMaxY;
080
081                /**
082                 * Quantised orientation/gradient magnitude data. Each element
083                 * corresponds to an angular bin of the orientations, and the individual
084                 * images correspond to the gradient magnitudes for the respective
085                 * orientations.
086                 */
087                protected FImage[] gradientMagnitudes;
088
089                /**
090                 * Setup the required space for holding gradient maps and descriptors
091                 * 
092                 * @param image
093                 *            the image being analysed
094                 */
095                protected void setupWorkingSpace(FImage image, DenseSIFT dsift) {
096                        if (gradientMagnitudes == null) {
097                                gradientMagnitudes = new FImage[dsift.numOriBins];
098                        }
099
100                        if (gradientMagnitudes[0] == null || gradientMagnitudes[0].width != image.width
101                                        || gradientMagnitudes[0].height != image.height)
102                        {
103                                for (int i = 0; i < dsift.numOriBins; i++)
104                                        gradientMagnitudes[i] = new FImage(image.width, image.height);
105                        }
106
107                        final int rangeX = boundMaxX - boundMinX - (dsift.numBinsX - 1) * dsift.binWidth;
108                        final int rangeY = boundMaxY - boundMinY - (dsift.numBinsY - 1) * dsift.binHeight;
109
110                        final int numWindowsX = (rangeX >= 0) ? rangeX / dsift.stepX + 1 : 0;
111                        final int numWindowsY = (rangeY >= 0) ? rangeY / dsift.stepY + 1 : 0;
112
113                        final int numFeatures = numWindowsX * numWindowsY;
114
115                        dsift.descriptors = new float[numFeatures][dsift.numOriBins * dsift.numBinsX * dsift.numBinsY];
116                        dsift.energies = new float[numFeatures];
117                }
118        }
119
120        /**
121         * Step size of sampling window in x-direction (in pixels)
122         */
123        protected int stepX = 5;
124
125        /**
126         * Step size of sampling window in y-direction (in pixels)
127         */
128        protected int stepY = 5;
129
130        /**
131         * Width of a single bin of the sampling window (in pixels). Sampling window
132         * width is this multiplied by #numBinX.
133         */
134        protected int binWidth = 5;
135
136        /**
137         * Height of a single bin of the sampling window (in pixels). Sampling
138         * window height is this multiplied by #numBinY.
139         */
140        protected int binHeight = 5;
141
142        /**
143         * Number of spatial bins in the X direction
144         */
145        protected int numBinsX = 4;
146
147        /**
148         * Number of spatial bins in the Y direction
149         */
150        protected int numBinsY = 4;
151
152        /** The number of orientation bins */
153        protected int numOriBins = 8;
154
155        /**
156         * Size of the Gaussian window (in relative to of the size of a bin)
157         */
158        protected float gaussianWindowSize = 2f;
159
160        /**
161         * Threshold for clipping the SIFT features
162         */
163        protected float valueThreshold = 0.2f;
164
165        protected volatile WorkingData data = new WorkingData();
166
167        /**
168         * Extracted descriptors
169         */
170        protected volatile float[][] descriptors;
171
172        /**
173         * Descriptor energies
174         */
175        protected volatile float[] energies;
176
177        /**
178         * Construct with the default configuration: standard SIFT geometry (4x4x8),
179         * 5px x 5px spatial bins, 5px step size, gaussian window size of 2 and
180         * value threshold of 0.2.
181         */
182        public DenseSIFT() {
183        }
184
185        /**
186         * Construct with the given step size (for both x and y) and binSize. All
187         * other values are the defaults.
188         * 
189         * @param step
190         *            the step size
191         * @param binSize
192         *            the spatial bin size
193         */
194        public DenseSIFT(int step, int binSize) {
195                this.binWidth = binSize;
196                this.binHeight = binSize;
197                this.stepX = step;
198                this.stepY = step;
199        }
200
201        /**
202         * Construct with the given configuration. The gaussian window size is set
203         * to 2, and value threshold to 0.2.
204         * 
205         * @param stepX
206         *            step size in x direction
207         * @param stepY
208         *            step size in y direction
209         * @param binWidth
210         *            width of spatial bins
211         * @param binHeight
212         *            height of spatial bins
213         * @param numBinsX
214         *            number of bins in x direction for each descriptor
215         * @param numBinsY
216         *            number of bins in y direction for each descriptor
217         * @param numOriBins
218         *            number of orientation bins for each descriptor
219         */
220        public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins) {
221                this(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, 2f);
222        }
223
224        /**
225         * Construct with the given configuration. The value threshold is set to
226         * 0.2.
227         * 
228         * @param stepX
229         *            step size in x direction
230         * @param stepY
231         *            step size in y direction
232         * @param binWidth
233         *            width of spatial bins
234         * @param binHeight
235         *            height of spatial bins
236         * @param numBinsX
237         *            number of bins in x direction for each descriptor
238         * @param numBinsY
239         *            number of bins in y direction for each descriptor
240         * @param numOriBins
241         *            number of orientation bins for each descriptor
242         * @param gaussianWindowSize
243         *            the size of the gaussian weighting window
244         */
245        public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins,
246                        float gaussianWindowSize)
247        {
248                this(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize, 0.2f);
249        }
250
251        /**
252         * Construct with the given configuration. The value threshold is set to
253         * 0.2.
254         * 
255         * @param stepX
256         *            step size in x direction
257         * @param stepY
258         *            step size in y direction
259         * @param binWidth
260         *            width of spatial bins
261         * @param binHeight
262         *            height of spatial bins
263         * @param numBinsX
264         *            number of bins in x direction for each descriptor
265         * @param numBinsY
266         *            number of bins in y direction for each descriptor
267         * @param numOriBins
268         *            number of orientation bins for each descriptor
269         * @param gaussianWindowSize
270         *            the size of the gaussian weighting window
271         * @param valueThreshold
272         *            the threshold for clipping features
273         */
274        public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins,
275                        float gaussianWindowSize, float valueThreshold)
276        {
277                this.binWidth = binWidth;
278                this.binHeight = binHeight;
279                this.stepX = stepX;
280                this.stepY = stepY;
281                this.numBinsX = numBinsX;
282                this.numBinsY = numBinsY;
283                this.numOriBins = numOriBins;
284                this.gaussianWindowSize = gaussianWindowSize;
285                this.valueThreshold = valueThreshold;
286        }
287
288        private float[] buildKernel(int binSize, int numBins, int binIndex, float windowSize) {
289                final int kernelSize = 2 * binSize - 1;
290                final float[] kernel = new float[kernelSize];
291                final float delta = binSize * (binIndex - 0.5F * (numBins - 1));
292
293                final float sigma = binSize * windowSize;
294
295                for (int x = -binSize + 1, i = 0; x <= +binSize - 1; x++, i++) {
296                        final float z = (x - delta) / sigma;
297
298                        kernel[i] = (1.0F - Math.abs(x) / binSize) * ((binIndex >= 0) ? (float) Math.exp(-0.5F * z * z) : 1.0F);
299                }
300
301                return kernel;
302        }
303
304        /**
305         * Extract the DSIFT features
306         */
307        protected void extractFeatures() {
308                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
309                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
310
311                for (int biny = 0; biny < numBinsY; biny++) {
312                        final float[] yker = buildKernel(binHeight, numBinsY, biny, gaussianWindowSize);
313
314                        for (int binx = 0; binx < numBinsX; binx++) {
315                                final float[] xker = buildKernel(binWidth, numBinsX, binx, gaussianWindowSize);
316
317                                for (int bint = 0; bint < numOriBins; bint++) {
318                                        final FImage conv = data.gradientMagnitudes[bint].process(new FImageConvolveSeparable(xker, yker));
319                                        final float[][] src = conv.pixels;
320
321                                        final int descriptorOffset = bint + binx * numOriBins + biny * (numBinsX * numOriBins);
322                                        int descriptorIndex = 0;
323
324                                        for (int framey = data.boundMinY; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY)
325                                        {
326                                                for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX)
327                                                {
328                                                        descriptors[descriptorIndex][descriptorOffset] =
329                                                                        src[framey + biny * binHeight][framex + binx * binWidth];
330                                                        descriptorIndex++;
331                                                }
332                                        }
333                                }
334                        }
335                }
336        }
337
338        @Override
339        public void analyseImage(FImage image, Rectangle bounds) {
340                if (data == null)
341                        data = new WorkingData();
342
343                data.boundMinX = (int) bounds.x;
344                data.boundMaxX = (int) (bounds.width - 1);
345                data.boundMinY = (int) bounds.y;
346                data.boundMaxY = (int) (bounds.height - 1);
347
348                data.setupWorkingSpace(image, this);
349
350                FImageGradients.gradientMagnitudesAndQuantisedOrientations(image, data.gradientMagnitudes);
351
352                extractFeatures();
353
354                normaliseDescriptors();
355        }
356
357        private void normaliseDescriptors() {
358                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
359                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
360                final float energyNorm = frameSizeX * frameSizeY;
361
362                for (int j = 0; j < descriptors.length; j++) {
363                        final float[] arr = descriptors[j];
364
365                        energies[j] = ArrayUtils.sumValues(arr) / energyNorm;
366
367                        ArrayUtils.normalise(arr);
368
369                        boolean changed = false;
370                        for (int i = 0; i < arr.length; i++) {
371                                if (arr[i] > valueThreshold) {
372                                        arr[i] = valueThreshold;
373                                        changed = true;
374                                }
375                        }
376
377                        if (changed)
378                                ArrayUtils.normalise(arr);
379                }
380        }
381
382        @Override
383        public LocalFeatureList<FloatDSIFTKeypoint> getFloatKeypoints() {
384                final MemoryLocalFeatureList<FloatDSIFTKeypoint> keys = new MemoryLocalFeatureList<FloatDSIFTKeypoint>(numOriBins
385                                * numBinsX * numBinsY, descriptors.length);
386
387                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
388                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
389
390                final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1);
391                final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1);
392
393                for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
394                        for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) {
395                                keys.add(new FloatDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i], energies[i]));
396                        }
397                }
398
399                return keys;
400        }
401
402        @Override
403        public LocalFeatureList<ByteDSIFTKeypoint> getByteKeypoints() {
404                final MemoryLocalFeatureList<ByteDSIFTKeypoint> keys = new MemoryLocalFeatureList<ByteDSIFTKeypoint>(numOriBins
405                                * numBinsX * numBinsY, descriptors.length);
406
407                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
408                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
409
410                final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1);
411                final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1);
412
413                for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
414                        for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) {
415                                keys.add(new ByteDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i], energies[i]));
416                        }
417                }
418
419                return keys;
420        }
421
422        @Override
423        public LocalFeatureList<FloatDSIFTKeypoint> getFloatKeypoints(float energyThreshold) {
424                final MemoryLocalFeatureList<FloatDSIFTKeypoint> keys = new MemoryLocalFeatureList<FloatDSIFTKeypoint>(numOriBins
425                                * numBinsX * numBinsY);
426
427                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
428                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
429
430                final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1);
431                final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1);
432
433                for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
434                        for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) {
435                                if (energies[i] >= energyThreshold)
436                                        keys.add(new FloatDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i],
437                                                        energies[i]));
438                        }
439                }
440
441                return keys;
442        }
443
444        @Override
445        public LocalFeatureList<ByteDSIFTKeypoint> getByteKeypoints(float energyThreshold) {
446                final MemoryLocalFeatureList<ByteDSIFTKeypoint> keys = new MemoryLocalFeatureList<ByteDSIFTKeypoint>(numOriBins
447                                * numBinsX * numBinsY);
448
449                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
450                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
451
452                final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1);
453                final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1);
454
455                for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
456                        for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) {
457                                if (energies[i] >= energyThreshold)
458                                        keys.add(new ByteDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i],
459                                                        energies[i]));
460                        }
461                }
462
463                return keys;
464        }
465
466        /**
467         * Get the computed raw dense SIFT descriptors from the previous call to
468         * {@link #analyseImage(FImage)} or {@link #analyseImage(FImage, Rectangle)}
469         * .
470         * 
471         * @return the descriptors.
472         */
473        @Override
474        public float[][] getDescriptors() {
475                return descriptors;
476        }
477
478        @Override
479        public DenseSIFT clone() {
480                final DenseSIFT clone = (DenseSIFT) super.clone();
481
482                clone.descriptors = null;
483                clone.energies = null;
484                clone.data = null;
485
486                return clone;
487        }
488
489        @Override
490        public void setBinWidth(int size) {
491                this.binWidth = size;
492        }
493
494        @Override
495        public void setBinHeight(int size) {
496                this.binHeight = size;
497        }
498
499        @Override
500        public int getBinWidth() {
501                return binWidth;
502        }
503
504        @Override
505        public int getBinHeight() {
506                return binHeight;
507        }
508
509        @Override
510        public int getNumBinsX() {
511                return numBinsX;
512        }
513
514        @Override
515        public int getNumBinsY() {
516                return numBinsY;
517        }
518
519        @Override
520        public int getNumOriBins() {
521                return numOriBins;
522        }
523}