Source code

001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.feature.local.aggregate;
031
032import java.util.List;
033
034import org.openimaj.citation.annotation.Reference;
035import org.openimaj.citation.annotation.ReferenceType;
036import org.openimaj.citation.annotation.References;
037import org.openimaj.feature.ArrayFeatureVector;
038import org.openimaj.feature.FloatFV;
039import org.openimaj.feature.local.LocalFeature;
040import org.openimaj.math.statistics.distribution.MixtureOfGaussians;
041import org.openimaj.math.statistics.distribution.MultivariateGaussian;
042import org.openimaj.ml.gmm.GaussianMixtureModelEM;
043import org.openimaj.ml.gmm.GaussianMixtureModelEM.CovarianceType;
044
045/**
046 * Implementation of the Fisher Vector (FV) encoding scheme. FV provides a way
047 * of encoding a set of vectors (e.g. local features) as a single vector that
048 * encapsulates the first and second order residuals of the vectors from a
049 * gaussian mixture model.
050 * <p>
051 * The dimensionality of the output vector is 2*K*D where K is the number of
052 * Gaussians in the mixture, and D is the descriptor dimensionality. Note that
053 * only the diagonal values of the gaussian covariance matrices are used, and
054 * thus you probably want to learn a {@link CovarianceType#Diagonal} or
055 * {@link CovarianceType#Spherical} type gaussian with the
056 * {@link GaussianMixtureModelEM} class.
057 * 
058 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
059 * 
060 * @param <T>
061 *            Primitive array type of the {@link ArrayFeatureVector}s used by
062 *            the {@link LocalFeature}s that will be processed.
063 */
064@References(
065                references = { @Reference(
066                                type = ReferenceType.Inproceedings,
067                                author = { "Perronnin, F.", "Dance, C." },
068                                title = "Fisher Kernels on Visual Vocabularies for Image Categorization",
069                                year = "2007",
070                                booktitle = "Computer Vision and Pattern Recognition, 2007. CVPR '07. IEEE Conference on",
071                                pages = { "1", "8" },
072                                customData = {
073                                                "keywords", "Gaussian processes;gradient methods;image classification;Fisher kernels;Gaussian mixture model;generative probability model;gradient vector;image categorization;pattern classification;visual vocabularies;Character generation;Feeds;Image databases;Kernel;Pattern classification;Power generation;Signal generators;Spatial databases;Visual databases;Vocabulary",
074                                                "doi", "10.1109/CVPR.2007.383266",
075                                                "ISSN", "1063-6919"
076                                }
077                                ),
078                                @Reference(
079                                                type = ReferenceType.Inproceedings,
080                                                author = { "Perronnin, Florent", "S\'{a}nchez, Jorge", "Mensink, Thomas" },
081                                                title = "Improving the Fisher Kernel for Large-scale Image Classification",
082                                                year = "2010",
083                                                booktitle = "Proceedings of the 11th European Conference on Computer Vision: Part IV",
084                                                pages = { "143", "", "156" },
085                                                url = "http://dl.acm.org/citation.cfm?id=1888089.1888101",
086                                                publisher = "Springer-Verlag",
087                                                series = "ECCV'10",
088                                                customData = {
089                                                                "isbn", "3-642-15560-X, 978-3-642-15560-4",
090                                                                "location", "Heraklion, Crete, Greece",
091                                                                "numpages", "14",
092                                                                "acmid", "1888101",
093                                                                "address", "Berlin, Heidelberg"
094                                                }
095                                )
096                })
097public class FisherVector<T> implements VectorAggregator<ArrayFeatureVector<T>, FloatFV> {
098        private MixtureOfGaussians gmm;
099        private boolean hellinger;
100        private boolean l2normalise;
101
102        /**
103         * Construct with the given mixture of Gaussians and optional improvement
104         * steps. The covariance matrices of the gaussians are all assumed to be
105         * diagonal, and will be treated as such; any non-zero off-diagonal values
106         * will be completely ignored.
107         * 
108         * @param gmm
109         *            the mixture of gaussians
110         * @param hellinger
111         *            if true then use Hellinger's kernel rather than the linear one
112         *            by signed square rooting the values in the final vector
113         * @param l2normalise
114         *            if true then apply l2 normalisation to the final vector. This
115         *            occurs after the Hellinger step if it is used.
116         */
117        public FisherVector(MixtureOfGaussians gmm, boolean hellinger, boolean l2normalise) {
118                this.gmm = gmm;
119                this.hellinger = hellinger;
120                this.l2normalise = l2normalise;
121        }
122
123        /**
124         * Construct the standard Fisher Vector encoder with the given mixture of
125         * Gaussians. The covariance matrices of the gaussians are all assumed to be
126         * diagonal, and will be treated as such; any non-zero off-diagonal values
127         * will be completely ignored.
128         * 
129         * @param gmm
130         *            the mixture of gaussians
131         */
132        public FisherVector(MixtureOfGaussians gmm) {
133                this(gmm, false);
134        }
135
136        /**
137         * Construct the Fisher Vector encoder with the given mixture of Gaussians
138         * and the optional improvement steps (in the sense of the VLFeat
139         * documentation). The covariance matrices of the gaussians are all assumed
140         * to be diagonal, and will be treated as such; any non-zero off-diagonal
141         * values will be completely ignored. For the improved version, the final
142         * vector is projected into Hellinger's kernel and then l2 normalised.
143         * 
144         * @param gmm
145         *            the mixture of gaussians
146         * @param improved
147         *            if true then Hellinger's kernel is used, and the vector is l2
148         *            normalised.
149         */
150        public FisherVector(MixtureOfGaussians gmm, boolean improved) {
151                this(gmm, improved, improved);
152        }
153
154        @Override
155        public FloatFV aggregate(List<? extends LocalFeature<?, ? extends ArrayFeatureVector<T>>> features) {
156                if (features == null || features.size() <= 0)
157                        return null;
158
159                final int K = this.gmm.gaussians.length;
160                final int D = features.get(0).getFeatureVector().length();
161
162                final float[] vector = new float[2 * K * D];
163
164                // cache all the features in an array
165                final double[][] X = new double[features.size()][];
166                for (int i = 0; i < X.length; i++) {
167                        final LocalFeature<?, ? extends ArrayFeatureVector<T>> f = features.get(i);
168                        X[i] = f.getFeatureVector().asDoubleVector();
169                }
170
171                // compute posterior probabilities of all features at once (more
172                // efficient than
173                // doing it for each one at a time)
174                final double[][] posteriors = gmm.scoreSamples(X).secondObject();
175
176                for (int p = 0; p < X.length; p++) {
177                        final double[] xp = X[p];
178
179                        for (int k = 0; k < K; k++) {
180                                final double apk = posteriors[p][k];
181
182                                if (apk < 1e-6)
183                                        continue; // speed-up: ignore really small terms...
184
185                                final MultivariateGaussian gauss = gmm.gaussians[k];
186                                final double[] mean = gauss.getMean().getArray()[0];
187
188                                for (int j = 0; j < D; j++) {
189                                        final double var = gauss.getCovariance(j, j);
190                                        final double diff = (xp[j] - mean[j]) / Math.sqrt(var);
191
192                                        vector[k * 2 * D + j] += apk * diff;
193                                        vector[k * 2 * D + j + D] += apk * ((diff * diff) - 1);
194                                }
195                        }
196                }
197
198                for (int k = 0; k < K; k++) {
199                        final double wt1 = 1.0 / (features.size() * Math.sqrt(gmm.weights[k]));
200                        final double wt2 = 1.0 / (features.size() * Math.sqrt(2 * gmm.weights[k]));
201
202                        for (int j = 0; j < D; j++) {
203                                vector[k * 2 * D + j] *= wt1;
204                                vector[k * 2 * D + j + D] *= wt2;
205                        }
206                }
207
208                final FloatFV out = new FloatFV(vector);
209
210                if (hellinger) {
211                        for (int i = 0; i < out.values.length; i++) {
212                                out.values[i] = (float) (out.values[i] > 0 ? Math.sqrt(out.values[i]) :
213                                                -1 * Math.sqrt(-1 * out.values[i]));
214                        }
215                }
216
217                if (l2normalise) {
218                        // l2 norm
219                        double sumsq = 0;
220                        for (int i = 0; i < out.values.length; i++) {
221                                sumsq += (out.values[i] * out.values[i]);
222                        }
223                        final float norm = (float) (1.0 / Math.sqrt(sumsq));
224                        for (int i = 0; i < out.values.length; i++) {
225                                out.values[i] *= norm;
226                        }
227                }
228
229                return out;
230        }
231}