001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.feature.local.aggregate; 031 032import java.util.List; 033 034import org.openimaj.citation.annotation.Reference; 035import org.openimaj.citation.annotation.ReferenceType; 036import org.openimaj.citation.annotation.References; 037import org.openimaj.feature.ArrayFeatureVector; 038import org.openimaj.feature.FloatFV; 039import org.openimaj.feature.local.LocalFeature; 040import org.openimaj.math.statistics.distribution.MixtureOfGaussians; 041import org.openimaj.math.statistics.distribution.MultivariateGaussian; 042import org.openimaj.ml.gmm.GaussianMixtureModelEM; 043import org.openimaj.ml.gmm.GaussianMixtureModelEM.CovarianceType; 044 045/** 046 * Implementation of the Fisher Vector (FV) encoding scheme. FV provides a way 047 * of encoding a set of vectors (e.g. local features) as a single vector that 048 * encapsulates the first and second order residuals of the vectors from a 049 * gaussian mixture model. 050 * <p> 051 * The dimensionality of the output vector is 2*K*D where K is the number of 052 * Gaussians in the mixture, and D is the descriptor dimensionality. Note that 053 * only the diagonal values of the gaussian covariance matrices are used, and 054 * thus you probably want to learn a {@link CovarianceType#Diagonal} or 055 * {@link CovarianceType#Spherical} type gaussian with the 056 * {@link GaussianMixtureModelEM} class. 057 * 058 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 059 * 060 * @param <T> 061 * Primitive array type of the {@link ArrayFeatureVector}s used by 062 * the {@link LocalFeature}s that will be processed. 063 */ 064@References( 065 references = { @Reference( 066 type = ReferenceType.Inproceedings, 067 author = { "Perronnin, F.", "Dance, C." }, 068 title = "Fisher Kernels on Visual Vocabularies for Image Categorization", 069 year = "2007", 070 booktitle = "Computer Vision and Pattern Recognition, 2007. CVPR '07. IEEE Conference on", 071 pages = { "1", "8" }, 072 customData = { 073 "keywords", "Gaussian processes;gradient methods;image classification;Fisher kernels;Gaussian mixture model;generative probability model;gradient vector;image categorization;pattern classification;visual vocabularies;Character generation;Feeds;Image databases;Kernel;Pattern classification;Power generation;Signal generators;Spatial databases;Visual databases;Vocabulary", 074 "doi", "10.1109/CVPR.2007.383266", 075 "ISSN", "1063-6919" 076 } 077 ), 078 @Reference( 079 type = ReferenceType.Inproceedings, 080 author = { "Perronnin, Florent", "S\'{a}nchez, Jorge", "Mensink, Thomas" }, 081 title = "Improving the Fisher Kernel for Large-scale Image Classification", 082 year = "2010", 083 booktitle = "Proceedings of the 11th European Conference on Computer Vision: Part IV", 084 pages = { "143", "", "156" }, 085 url = "http://dl.acm.org/citation.cfm?id=1888089.1888101", 086 publisher = "Springer-Verlag", 087 series = "ECCV'10", 088 customData = { 089 "isbn", "3-642-15560-X, 978-3-642-15560-4", 090 "location", "Heraklion, Crete, Greece", 091 "numpages", "14", 092 "acmid", "1888101", 093 "address", "Berlin, Heidelberg" 094 } 095 ) 096 }) 097public class FisherVector<T> implements VectorAggregator<ArrayFeatureVector<T>, FloatFV> { 098 private MixtureOfGaussians gmm; 099 private boolean hellinger; 100 private boolean l2normalise; 101 102 /** 103 * Construct with the given mixture of Gaussians and optional improvement 104 * steps. The covariance matrices of the gaussians are all assumed to be 105 * diagonal, and will be treated as such; any non-zero off-diagonal values 106 * will be completely ignored. 107 * 108 * @param gmm 109 * the mixture of gaussians 110 * @param hellinger 111 * if true then use Hellinger's kernel rather than the linear one 112 * by signed square rooting the values in the final vector 113 * @param l2normalise 114 * if true then apply l2 normalisation to the final vector. This 115 * occurs after the Hellinger step if it is used. 116 */ 117 public FisherVector(MixtureOfGaussians gmm, boolean hellinger, boolean l2normalise) { 118 this.gmm = gmm; 119 this.hellinger = hellinger; 120 this.l2normalise = l2normalise; 121 } 122 123 /** 124 * Construct the standard Fisher Vector encoder with the given mixture of 125 * Gaussians. The covariance matrices of the gaussians are all assumed to be 126 * diagonal, and will be treated as such; any non-zero off-diagonal values 127 * will be completely ignored. 128 * 129 * @param gmm 130 * the mixture of gaussians 131 */ 132 public FisherVector(MixtureOfGaussians gmm) { 133 this(gmm, false); 134 } 135 136 /** 137 * Construct the Fisher Vector encoder with the given mixture of Gaussians 138 * and the optional improvement steps (in the sense of the VLFeat 139 * documentation). The covariance matrices of the gaussians are all assumed 140 * to be diagonal, and will be treated as such; any non-zero off-diagonal 141 * values will be completely ignored. For the improved version, the final 142 * vector is projected into Hellinger's kernel and then l2 normalised. 143 * 144 * @param gmm 145 * the mixture of gaussians 146 * @param improved 147 * if true then Hellinger's kernel is used, and the vector is l2 148 * normalised. 149 */ 150 public FisherVector(MixtureOfGaussians gmm, boolean improved) { 151 this(gmm, improved, improved); 152 } 153 154 @Override 155 public FloatFV aggregate(List<? extends LocalFeature<?, ? extends ArrayFeatureVector<T>>> features) { 156 if (features == null || features.size() <= 0) 157 return null; 158 159 final int K = this.gmm.gaussians.length; 160 final int D = features.get(0).getFeatureVector().length(); 161 162 final float[] vector = new float[2 * K * D]; 163 164 // cache all the features in an array 165 final double[][] X = new double[features.size()][]; 166 for (int i = 0; i < X.length; i++) { 167 final LocalFeature<?, ? extends ArrayFeatureVector<T>> f = features.get(i); 168 X[i] = f.getFeatureVector().asDoubleVector(); 169 } 170 171 // compute posterior probabilities of all features at once (more 172 // efficient than 173 // doing it for each one at a time) 174 final double[][] posteriors = gmm.scoreSamples(X).secondObject(); 175 176 for (int p = 0; p < X.length; p++) { 177 final double[] xp = X[p]; 178 179 for (int k = 0; k < K; k++) { 180 final double apk = posteriors[p][k]; 181 182 if (apk < 1e-6) 183 continue; // speed-up: ignore really small terms... 184 185 final MultivariateGaussian gauss = gmm.gaussians[k]; 186 final double[] mean = gauss.getMean().getArray()[0]; 187 188 for (int j = 0; j < D; j++) { 189 final double var = gauss.getCovariance(j, j); 190 final double diff = (xp[j] - mean[j]) / Math.sqrt(var); 191 192 vector[k * 2 * D + j] += apk * diff; 193 vector[k * 2 * D + j + D] += apk * ((diff * diff) - 1); 194 } 195 } 196 } 197 198 for (int k = 0; k < K; k++) { 199 final double wt1 = 1.0 / (features.size() * Math.sqrt(gmm.weights[k])); 200 final double wt2 = 1.0 / (features.size() * Math.sqrt(2 * gmm.weights[k])); 201 202 for (int j = 0; j < D; j++) { 203 vector[k * 2 * D + j] *= wt1; 204 vector[k * 2 * D + j + D] *= wt2; 205 } 206 } 207 208 final FloatFV out = new FloatFV(vector); 209 210 if (hellinger) { 211 for (int i = 0; i < out.values.length; i++) { 212 out.values[i] = (float) (out.values[i] > 0 ? Math.sqrt(out.values[i]) : 213 -1 * Math.sqrt(-1 * out.values[i])); 214 } 215 } 216 217 if (l2normalise) { 218 // l2 norm 219 double sumsq = 0; 220 for (int i = 0; i < out.values.length; i++) { 221 sumsq += (out.values[i] * out.values[i]); 222 } 223 final float norm = (float) (1.0 / Math.sqrt(sumsq)); 224 for (int i = 0; i < out.values.length; i++) { 225 out.values[i] *= norm; 226 } 227 } 228 229 return out; 230 } 231}