Source code

001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.feature.dense.gradient.dsift;
031
032import org.openimaj.image.FImage;
033import org.openimaj.image.processing.convolution.FTriangleFilter;
034
035/**
036 * Implementation of an approximate dense SIFT feature extractor. Extracts
037 * approximate upright SIFT features at a single scale on a grid. Implementation
038 * is approximate because instead of using an exact Gaussian weighting, samples
039 * are weighted using a flat windowing function for speed, and then after
040 * accumulation are re-weighted by the average of the Gaussian window over the
041 * spatial support of the sampling region. The end result is that the extracted
042 * features are similar to the exact dense SIFT implementation, but computation
043 * is much faster.
044 * <p>
045 * Implementation directly based on the <a
046 * href="http://www.vlfeat.org/api/dsift.html#dsift-usage">VLFeat extractor</a>.
047 * <p>
048 * <b>Implementation Notes</b>. The analyser is not thread-safe, however, it is
049 * safe to reuse the analyser. In multi-threaded environments, a separate
050 * instance must be made for each thread. Internally, this implementation
051 * allocates memory for the gradient images, and if possible re-uses these
052 * between calls. Re-use requires that the input image is the same size between
053 * calls to the analyser.
054 * 
055 * @see "http://www.vlfeat.org/api/dsift.html#dsift-usage"
056 * 
057 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
058 * 
059 */
060public class ApproximateDenseSIFT extends DenseSIFT {
061        /**
062         * Construct with the default configuration: standard SIFT geometry (4x4x8),
063         * 5px x 5px spatial bins, 5px step size, gaussian window size of 2 and
064         * value threshold of 0.2.
065         */
066        public ApproximateDenseSIFT() {
067                super();
068        }
069
070        /**
071         * Construct with the given step size (for both x and y) and binSize. All
072         * other values are the defaults.
073         * 
074         * @param step
075         *            the step size
076         * @param binSize
077         *            the spatial bin size
078         */
079        public ApproximateDenseSIFT(int step, int binSize) {
080                super(step, binSize);
081        }
082
083        /**
084         * Construct with the given configuration. The gaussian window size is set
085         * to 2, and value threshold to 0.2.
086         * 
087         * @param stepX
088         *            step size in x direction
089         * @param stepY
090         *            step size in y direction
091         * @param binWidth
092         *            width of spatial bins
093         * @param binHeight
094         *            height of spatial bins
095         * @param numBinsX
096         *            number of bins in x direction for each descriptor
097         * @param numBinsY
098         *            number of bins in y direction for each descriptor
099         * @param numOriBins
100         *            number of orientation bins for each descriptor
101         */
102        public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY,
103                        int numOriBins)
104        {
105                super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins);
106        }
107
108        /**
109         * Construct with the given configuration. The value threshold is set to
110         * 0.2.
111         * 
112         * @param stepX
113         *            step size in x direction
114         * @param stepY
115         *            step size in y direction
116         * @param binWidth
117         *            width of spatial bins
118         * @param binHeight
119         *            height of spatial bins
120         * @param numBinsX
121         *            number of bins in x direction for each descriptor
122         * @param numBinsY
123         *            number of bins in y direction for each descriptor
124         * @param numOriBins
125         *            number of orientation bins for each descriptor
126         * @param gaussianWindowSize
127         *            the size of the gaussian weighting window
128         */
129        public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY,
130                        int numOriBins,
131                        float gaussianWindowSize)
132        {
133                super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize);
134        }
135
136        /**
137         * Construct with the given configuration. The value threshold is set to
138         * 0.2.
139         * 
140         * @param stepX
141         *            step size in x direction
142         * @param stepY
143         *            step size in y direction
144         * @param binWidth
145         *            width of spatial bins
146         * @param binHeight
147         *            height of spatial bins
148         * @param numBinsX
149         *            number of bins in x direction for each descriptor
150         * @param numBinsY
151         *            number of bins in y direction for each descriptor
152         * @param numOriBins
153         *            number of orientation bins for each descriptor
154         * @param gaussianWindowSize
155         *            the size of the gaussian weighting window
156         * @param valueThreshold
157         *            the threshold for clipping features
158         */
159        public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY,
160                        int numOriBins,
161                        float gaussianWindowSize, float valueThreshold)
162        {
163                super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize, valueThreshold);
164        }
165
166        private float computeWindowMean(int binSize, int numBins, int binIndex, double windowSize)
167        {
168                final float delta = binSize * (binIndex - 0.5F * (numBins - 1));
169                /* float sigma = 0.5F * ((numBins - 1) * binSize + 1) ; */
170                final float sigma = binSize * (float) windowSize;
171                int x;
172
173                float acc = 0.0f;
174                for (x = -binSize + 1; x <= +binSize - 1; ++x) {
175                        final float z = (x - delta) / sigma;
176                        acc += ((binIndex >= 0) ? (float) Math.exp(-0.5F * z * z) : 1.0F);
177                }
178                return acc /= (2 * binSize - 1);
179        }
180
181        @Override
182        protected void extractFeatures()
183        {
184                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
185                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
186
187                for (int bint = 0; bint < numOriBins; bint++) {
188                        final FImage conv = data.gradientMagnitudes[bint].process(new FTriangleFilter(binWidth, binHeight));
189                        final float[][] src = conv.pixels;
190
191                        for (int biny = 0; biny < numBinsY; biny++) {
192
193                                // This approximate version of DSIFT does not use a proper
194                                // Gaussian weighting scheme for the gradients that are
195                                // accumulated on the spatial bins. Instead each spatial bins is
196                                // accumulated based on the triangular kernel only, equivalent
197                                // to bilinear interpolation plus a flat, rather than Gaussian,
198                                // window. Eventually, however, the magnitude of the spatial
199                                // bins in the SIFT descriptor is reweighted by the average of
200                                // the Gaussian window on each bin.
201                                float wy = computeWindowMean(binHeight, numBinsY, biny, gaussianWindowSize);
202
203                                // The triangular convolution functions convolve by a triangular
204                                // kernel with unit integral; instead for SIFT the triangular
205                                // kernel should have unit height. This is compensated for by
206                                // multiplying by the bin size:
207                                wy *= binHeight;
208
209                                for (int binx = 0; binx < numBinsX; ++binx) {
210                                        float wx = computeWindowMean(binWidth, numBinsX, binx, gaussianWindowSize);
211                                        wx *= binWidth;
212                                        final float w = wx * wy;
213
214                                        final int descriptorOffset = bint + binx * numOriBins + biny * (numBinsX * numOriBins);
215                                        int descriptorIndex = 0;
216
217                                        for (int framey = data.boundMinY; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
218                                                for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX) {
219                                                        descriptors[descriptorIndex][descriptorOffset] = w
220                                                                        * src[framey + biny * binHeight][framex + binx * binWidth];
221                                                        descriptorIndex++;
222                                                }
223                                        }
224                                }
225                        }
226                }
227        }
228
229        @Override
230        public ApproximateDenseSIFT clone() {
231                return (ApproximateDenseSIFT) super.clone();
232        }
233}