001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.video.processing.motion; 031 032import org.apache.commons.math.complex.Complex; 033import org.apache.commons.math.linear.Array2DRowFieldMatrix; 034import org.apache.commons.math.linear.FieldLUDecompositionImpl; 035import org.openimaj.image.FImage; 036import org.openimaj.image.analysis.algorithm.TemplateMatcher; 037import org.openimaj.image.analysis.algorithm.TemplateMatcher.Mode; 038import org.openimaj.image.pixel.FValuePixel; 039import org.openimaj.image.processing.algorithm.FourierTransform; 040import org.openimaj.math.geometry.point.Point2d; 041import org.openimaj.math.geometry.point.Point2dImpl; 042import org.openimaj.math.geometry.shape.Rectangle; 043import org.openimaj.video.VideoFrame; 044import org.openimaj.video.VideoSubFrame; 045 046import edu.emory.mathcs.jtransforms.fft.FloatFFT_2D; 047 048/** 049 * A set of algorithms for the motion estimator. 050 * 051 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 052 * @created 1 Mar 2012 053 * 054 */ 055public abstract class MotionEstimatorAlgorithm 056{ 057 058 /** 059 * Within a search window around the subimages detect most likely match and 060 * thus motion. 061 * 062 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 063 * 064 */ 065 public static class TEMPLATE_MATCH extends MotionEstimatorAlgorithm { 066 private float searchProp; 067 private Mode mode; 068 069 /** 070 * Defaults to allowing a maximum of templatesize/2 movement using the 071 * {@link Mode#CORRELATION} 072 */ 073 public TEMPLATE_MATCH() { 074 this.searchProp = .5f; 075 this.mode = TemplateMatcher.Mode.NORM_SUM_SQUARED_DIFFERENCE; 076 } 077 078 /** 079 * Given the template's size, search around a border of size 080 * template*searchWindowBorderProp 081 * 082 * @param searchWindowBorderProp 083 * @param mode 084 * the matching mode 085 */ 086 public TEMPLATE_MATCH(float searchWindowBorderProp, Mode mode) { 087 this.searchProp = searchWindowBorderProp; 088 this.mode = mode; 089 } 090 091 @Override 092 Point2d estimateMotion(VideoSubFrame<FImage> img1sub, VideoSubFrame<FImage>... imagesSub) { 093 094 final VideoFrame<FImage> current = img1sub.extract(); 095 final VideoFrame<FImage> prev = imagesSub[0]; 096 final Rectangle prevSearchRect = imagesSub[0].roi; 097 098 final int sw = (int) (prevSearchRect.width * this.searchProp); 099 final int sh = (int) (prevSearchRect.height * this.searchProp); 100 final int sx = (int) (prevSearchRect.x + ((prevSearchRect.width - sw) / 2.f)); 101 final int sy = (int) (prevSearchRect.y + ((prevSearchRect.height - sh) / 2.f)); 102 103 final Rectangle searchRect = new Rectangle(sx, sy, sw, sh); 104 // System.out.println("Search window: " + searchRect); 105 // MBFImage searchRectDraw = new 106 // MBFImage(img1sub.frame.clone(),img1sub.frame.clone(),img1sub.frame.clone()); 107 // searchRectDraw.drawShape(searchRect, RGBColour.RED); 108 // searchRectDraw.drawPoint(img1sub.roi.getCOG(), RGBColour.GREEN, 109 // 3); 110 final TemplateMatcher matcher = new TemplateMatcher(current.frame, mode, searchRect); 111 matcher.analyseImage(prev.frame); 112 final FValuePixel[] responses = matcher.getBestResponses(1); 113 final FValuePixel firstBest = responses[0]; 114 // for (FValuePixel bestRespose : responses) { 115 // if(bestRespose == null) continue; 116 // if(firstBest == null) firstBest = bestRespose; 117 // bestRespose.translate(current.frame.width/2, 118 // current.frame.height/2); 119 // // searchRectDraw.drawPoint(bestRespose, RGBColour.BLUE, 3); 120 // } 121 final Point2d centerOfGrid = img1sub.roi.calculateCentroid(); 122 // System.out.println("First reponse: " + firstBest ); 123 // System.out.println("Center of template: " + centerOfGrid); 124 125 // DisplayUtilities.displayName(searchRectDraw, "searchWindow"); 126 if (firstBest == null) 127 return new Point2dImpl(0, 0); 128 // firstBest.translate(current.frame.width/2, 129 // current.frame.height/2); 130 // System.out.println("First reponse (corrected): " + firstBest ); 131 // System.out.println("Diff: " + centerOfGrid.minus(firstBest)); 132 return centerOfGrid.minus(firstBest); 133 } 134 135 } 136 137 /** 138 * Basic phase correlation algorithm that finds peaks in the cross-power 139 * spectrum between two images. This is the basic implementation without 140 * sub-pixel accuracy. 141 */ 142 public static class PHASE_CORRELATION extends MotionEstimatorAlgorithm 143 { 144 /** 145 * Calculate the estimated motion vector between <code>images</code> 146 * which [0] is first in the sequence and <code>img2</code> which is 147 * second in the sequence. This method uses phase correlation - the fact 148 * that translations in space can be seen as shifts in phase in the 149 * frequency domain. The returned vector will have a maximum horizontal 150 * displacement of <code>img2.getWidth()/2</code> and a minimum 151 * displacement of <code>-img2.getWidth()/2</code> and similarly for the 152 * vertical displacement and height. 153 * 154 * @param img2sub 155 * The second image in the sequence 156 * @param imagesSub 157 * The previous image in the sequence 158 * @return the estimated motion vector as a {@link Point2d} in absolute 159 * x and y coordinates. 160 */ 161 @Override 162 public Point2d estimateMotion(VideoSubFrame<FImage> img2sub, 163 VideoSubFrame<FImage>... imagesSub) 164 { 165 // The previous image will be the first in the images array 166 final FImage img1 = imagesSub[0].extract().frame; 167 final VideoFrame<FImage> img2 = img2sub.extract(); 168 169 // No previous frame? 170 if (img1 == null) 171 return new Point2dImpl(0, 0); 172 173 // The images must have comparable shapes and must be square 174 if (img1.getRows() != img2.frame.getRows() || 175 img1.getCols() != img2.frame.getCols() || 176 img1.getCols() != img2.frame.getRows()) 177 return new Point2dImpl(0, 0); 178 179 // Prepare and perform an FFT for each of the incoming images. 180 final int h = img1.getRows(); 181 final int w = img1.getCols(); 182 183 try 184 { 185 final FloatFFT_2D fft1 = new FloatFFT_2D(h, w); 186 final FloatFFT_2D fft2 = new FloatFFT_2D(h, w); 187 final float[][] data1 = FourierTransform.prepareData(img1, h, w, false); 188 final float[][] data2 = FourierTransform.prepareData(img2.frame, h, w, false); 189 fft1.complexForward(data1); 190 fft2.complexForward(data2); 191 192 // Multiply (element-wise) the fft and the conjugate of the fft. 193 Complex[][] cfft = new Complex[h][w]; 194 for (int y = 0; y < h; y++) 195 { 196 for (int x = 0; x < w; x++) 197 { 198 final float re1 = data1[y][x * 2]; 199 final float im1 = data1[y][1 + x * 2]; 200 final float re2 = data2[y][x * 2]; 201 final float im2 = data2[y][1 + x * 2]; 202 203 final Complex c1 = new Complex(re1, im1); 204 final Complex c2 = new Complex(re2, -im2); 205 cfft[y][x] = c1.multiply(c2); 206 } 207 } 208 209 // ---------------------------------------- 210 // Normalise by the determinant 211 // ---------------------------------------- 212 // First calculate the determinant 213 final Array2DRowFieldMatrix<Complex> cmat = 214 new Array2DRowFieldMatrix<Complex>(cfft); 215 final FieldLUDecompositionImpl<Complex> luDecomp = 216 new FieldLUDecompositionImpl<Complex>(cmat); 217 final Complex det = luDecomp.getDeterminant(); 218 cmat.scalarMultiply(new Complex(1d / det.abs(), 0)); 219 220 // Convert back to an array for doing the inverse FFTing 221 cfft = cmat.getData(); 222 for (int y = 0; y < h; y++) 223 { 224 for (int x = 0; x < w; x++) 225 { 226 data1[y][x * 2] = (float) cfft[y][x].getReal(); 227 data1[y][1 + x * 2] = (float) cfft[y][x].getImaginary(); 228 } 229 } 230 231 // Perform the inverse FFT 232 fft1.complexInverse(data1, false); 233 234 // Get the data back out 235 FourierTransform.unprepareData(data1, img1, false); 236 237 // Get the estimated motion vector from the peak in the space 238 final FValuePixel p = img1.maxPixel(); 239 return new Point2dImpl( 240 -(p.x > w / 2 ? p.x - w : p.x), 241 -(p.y > w / 2 ? p.y - w : p.y)); 242 } catch (final Exception e) 243 { 244 return new Point2dImpl(0, 0); 245 } 246 } 247 }; 248 249 /** 250 * Estimate the motion to the given subimage, <code>img1sub</code> from the 251 * previous frames. The previous frames will be given in reverse order so 252 * that imagesSub[0] will be the previous frame, imagesSub[1] the frame 253 * before that, etc. The number of frames given will be at most that given 254 * by {@link #requiredNumberOfFrames()}. It could be less if at the 255 * beginning of the video. If you require more frames, return an empty 256 * motion vector - that is (0,0). 257 * 258 * @param img1sub 259 * The image to which we want to estimate the motion. 260 * @param imagesSub 261 * The previous frames in reverse order 262 * @return The estimated motion vector. 263 */ 264 abstract Point2d estimateMotion(VideoSubFrame<FImage> img1sub, 265 VideoSubFrame<FImage>... imagesSub); 266 267 /** 268 * The required number of frames required for the given motion estimation 269 * algorithm to work. The default is 1 which means the algorithm will only 270 * receive the previous frame. If more are required, override this method 271 * and return the required number. 272 * 273 * @return The required number of frames to pass to the algorithm. 274 */ 275 protected int requiredNumberOfFrames() 276 { 277 return 1; 278 } 279}