001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.processing.edges; 031 032import java.util.ArrayList; 033import java.util.Collections; 034import java.util.Comparator; 035import java.util.Iterator; 036import java.util.List; 037 038import org.openimaj.citation.annotation.Reference; 039import org.openimaj.citation.annotation.ReferenceType; 040import org.openimaj.image.FImage; 041import org.openimaj.image.pixel.Pixel; 042import org.openimaj.image.pixel.util.LineIterators; 043import org.openimaj.image.processing.convolution.FSobel; 044import org.openimaj.image.processor.SinglebandImageProcessor; 045import org.openimaj.math.geometry.line.Line2d; 046import org.openimaj.math.util.FloatArrayStatsUtils; 047 048/** 049 * Implementation of the Stroke Width Transform. 050 * <p> 051 * The Stroke Width Transform detects strokes and their respective widths from 052 * an image. This implementation contains a number of enhancements to improve 053 * the quality of the detected strokes, based on ideas from <a 054 * href="http://libccv.org/lib/ccv-swt/">LibCCV</a> implementation: 055 * <ul> 056 * <li>We search around the stroke in a small window for endpoints.</li> 057 * <li>We search around the endpoint in a small window for matching gradients.</li> 058 * <li>In addition to the stroke along the gradient, we also stroke at +/-45 059 * degrees from this.</li> 060 * </ul> 061 * 062 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 063 * 064 */ 065@Reference( 066 type = ReferenceType.Inproceedings, 067 author = { "Epshtein, B.", "Ofek, E.", "Wexler, Y." }, 068 title = "Detecting text in natural scenes with stroke width transform", 069 year = "2010", 070 booktitle = "Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on", 071 pages = { "2963", "2970" }, 072 customData = { 073 "keywords", 074 "image processing;text analysis;image operator;image pixel;natural images;natural scenes;stroke width transform;text detection;Colored noise;Computer vision;Engines;Filter bank;Geometry;Image segmentation;Layout;Optical character recognition software;Pixel;Robustness", 075 "doi", "10.1109/CVPR.2010.5540041", 076 "ISSN", "1063-6919" 077 }) 078public class StrokeWidthTransform implements SinglebandImageProcessor<Float, FImage> { 079 private final static int[][] edgeSearchRegion = { { 0, 0 }, { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } }; 080 private final static int[][] gradSearchRegion = { 081 { 0, 0 }, { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 }, { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } }; 082 083 private final CannyEdgeDetector canny; 084 private boolean direction; 085 private int maxStrokeWidth = 70; 086 087 /** 088 * Construct the SWT with the given sigma for smoothing in the Canny phase 089 * The Canny thresholds are chosen automatically. 090 * 091 * @param direction 092 * direction of the SWT; true for dark on light, false for light 093 * on dark. 094 * @param sigma 095 * the amount of initial blurring 096 */ 097 public StrokeWidthTransform(boolean direction, float sigma) { 098 this.direction = direction; 099 this.canny = new CannyEdgeDetector(sigma); 100 } 101 102 /** 103 * Construct with all Canny parameters set manually. 104 * 105 * @param direction 106 * direction of the SWT; true for dark on light, false for light 107 * on dark. 108 * 109 * @param lowThresh 110 * lower hysteresis threshold. 111 * @param highThresh 112 * upper hysteresis threshold. 113 * @param sigma 114 * the amount of initial blurring. 115 */ 116 public StrokeWidthTransform(boolean direction, float lowThresh, float highThresh, float sigma) { 117 this.direction = direction; 118 this.canny = new CannyEdgeDetector(lowThresh, highThresh, sigma); 119 } 120 121 @Override 122 public void processImage(FImage image) { 123 final FSobel grads = new FSobel(canny.sigma); 124 125 final FImage edges = image.clone(); 126 canny.processImage(edges, grads); 127 128 image.fill(Float.POSITIVE_INFINITY); 129 130 final List<List<Pixel>> rays = generateRays(edges, grads.dx, grads.dy, direction, image); 131 medianFilter(image, rays); 132 } 133 134 private List<List<Pixel>> generateRays(FImage edges, FImage dx, FImage dy, boolean detectDark, FImage output) { 135 final List<List<Pixel>> rays = new ArrayList<List<Pixel>>(); 136 137 final float gradDirection = detectDark ? -1 : 1; 138 139 for (int y = 0; y < output.height; y++) { 140 for (int x = 0; x < output.width; x++) { 141 if (edges.pixels[y][x] > 0) { 142 traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, 0, 0, 1); 143 traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, 1, -1, 1); 144 traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, -1, 1, 1); 145 } 146 } 147 } 148 return rays; 149 } 150 151 private void traceRay(FImage edges, FImage dx, FImage dy, boolean 152 detectDark, FImage output, float gradDirection, 153 int x, int y, List<List<Pixel>> rays, int xx, int xy, int yx, int yy) 154 { 155 final float gradX = (xx * dx.pixels[y][x] + xy * dy.pixels[y][x]) * gradDirection; 156 final float gradY = (yy * dy.pixels[y][x] + yx * dx.pixels[y][x]) * gradDirection; 157 158 final Iterator<Pixel> iterator = LineIterators.bresenham(x, y, gradX, gradY); 159 final Pixel start = iterator.next().clone(); // start of ray 160 161 for (int j = 0; j < maxStrokeWidth; j++) { 162 final Pixel current = iterator.next(); 163 164 // check bounds 165 if (current.x < 1 || current.x >= output.width - 1 || current.y < 1 || current.y >= output.height - 1) { 166 break; 167 } 168 169 if (Math.abs(current.x - start.x) < 2 && Math.abs(current.y - start.y) < 2) 170 continue; 171 172 Pixel end = null; 173 174 // search over the around the current pixel region for an edge 175 for (int i = 0; i < edgeSearchRegion.length; i++) { 176 final int currentX = current.x + edgeSearchRegion[i][0]; 177 final int currentY = current.y + edgeSearchRegion[i][1]; 178 179 if (edges.pixels[currentY][currentX] > 0) { 180 end = new Pixel(currentX, currentY); 181 break; 182 } 183 } 184 185 if (end != null) { 186 // edge found; now search for matching gradient in surrounding 187 // area 188 boolean found = false; 189 190 final float startGradX = dx.pixels[start.y][start.x]; 191 final float startGradY = dy.pixels[start.y][start.x]; 192 193 for (int i = 0; i < gradSearchRegion.length; i++) { 194 final int currentX = end.x + gradSearchRegion[i][0]; 195 final int currentY = end.y + gradSearchRegion[i][1]; 196 197 final float currentGradX = dx.pixels[currentY][currentX]; 198 final float currentGradY = dy.pixels[currentY][currentX]; 199 // final float currentMag = (float) Math.sqrt((currentGradX 200 // * currentGradX) 201 // + (currentGradY * currentGradY)) 202 // * gradDirection; 203 // 204 // currentGradX = currentGradX / currentMag; 205 // currentGradY = currentGradY / currentMag; 206 // 207 // if (Math.acos(gradX * -currentGradX + gradY * 208 // -currentGradY) < Math.PI / 6.0) { 209 // found = true; 210 // break; 211 // } 212 213 final float tn = startGradY * currentGradX - startGradX * currentGradY; 214 final float td = startGradX * currentGradX + startGradY * currentGradY; 215 if (tn * 7 < -td * 4 && tn * 7 > td * 4) 216 { 217 found = true; 218 break; 219 } 220 } 221 222 // if we found an opposite grad, then we fill in the ray using 223 // the supercover to select all pixels that the ray passes 224 // through 225 if (found) { 226 final float length = (float) Line2d.distance(start, end); 227 final List<Pixel> ray = LineIterators.supercoverAsList(start, end); 228 for (final Pixel p : ray) { 229 output.pixels[p.y][p.x] = Math.min(length, output.pixels[p.y][p.x]); 230 } 231 232 rays.add(ray); 233 } 234 break; 235 } 236 } 237 } 238 239 private void medianFilter(FImage output, List<List<Pixel>> rays) { 240 if (rays.size() == 0) 241 return; 242 243 Collections.sort(rays, new Comparator<List<Pixel>>() { 244 @Override 245 public int compare(List<Pixel> o1, List<Pixel> o2) { 246 return o1.size() - o2.size(); 247 } 248 }); 249 250 final float[] working = new float[rays.get(rays.size() - 1).size()]; 251 252 for (final List<Pixel> ray : rays) { 253 final int length = ray.size(); 254 for (int i = 0; i < length; i++) { 255 final Pixel pixel = ray.get(i); 256 working[i] = output.pixels[pixel.y][pixel.x]; 257 } 258 259 final float median = FloatArrayStatsUtils.median(working, 0, length); 260 for (int i = 0; i < length; i++) { 261 final Pixel pixel = ray.get(i); 262 263 if (output.pixels[pixel.y][pixel.x] > median) 264 output.pixels[pixel.y][pixel.x] = median; 265 } 266 } 267 } 268 269 /** 270 * Normalise the output image of the {@link StrokeWidthTransform} for 271 * display. This replaces all {@link Float#POSITIVE_INFINITY} pixels with a 272 * value of 1, and min-max normalises all the valid stroke pixels to be 273 * between 0 and 1. 274 * 275 * @param input 276 * the image to normalise 277 * @return the normalised image 278 */ 279 public static FImage normaliseImage(FImage input) { 280 final FImage output = input.clone(); 281 282 float maxVal = 0; 283 float minVal = Float.MAX_VALUE; 284 for (int row = 0; row < input.height; row++) { 285 for (int col = 0; col < input.width; col++) { 286 final float val = input.pixels[row][col]; 287 if (val != Float.POSITIVE_INFINITY) { 288 maxVal = Math.max(val, maxVal); 289 minVal = Math.min(val, minVal); 290 } 291 } 292 } 293 294 final float difference = maxVal - minVal; 295 for (int row = 0; row < input.height; row++) { 296 for (int col = 0; col < input.width; col++) { 297 final float val = input.pixels[row][col]; 298 if (val == Float.POSITIVE_INFINITY) { 299 output.pixels[row][col] = 1; 300 } else { 301 output.pixels[row][col] = (val - minVal) / difference; 302 } 303 } 304 } 305 return output; 306 } 307 308 /** 309 * Get the direction of the SWT; true for dark on light, false for light 310 * 311 * @return the direction 312 */ 313 public boolean getDirection() { 314 return direction; 315 } 316 317 /** 318 * Set the direction of the SWT; true for dark on light, false for light 319 * 320 * @param direction 321 * the direction to set 322 */ 323 public void setDirection(boolean direction) { 324 this.direction = direction; 325 } 326 327}