001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.processing.edges;
031
032import java.util.ArrayList;
033import java.util.Collections;
034import java.util.Comparator;
035import java.util.Iterator;
036import java.util.List;
037
038import org.openimaj.citation.annotation.Reference;
039import org.openimaj.citation.annotation.ReferenceType;
040import org.openimaj.image.FImage;
041import org.openimaj.image.pixel.Pixel;
042import org.openimaj.image.pixel.util.LineIterators;
043import org.openimaj.image.processing.convolution.FSobel;
044import org.openimaj.image.processor.SinglebandImageProcessor;
045import org.openimaj.math.geometry.line.Line2d;
046import org.openimaj.math.util.FloatArrayStatsUtils;
047
048/**
049 * Implementation of the Stroke Width Transform.
050 * <p>
051 * The Stroke Width Transform detects strokes and their respective widths from
052 * an image. This implementation contains a number of enhancements to improve
053 * the quality of the detected strokes, based on ideas from <a
054 * href="http://libccv.org/lib/ccv-swt/">LibCCV</a> implementation:
055 * <ul>
056 * <li>We search around the stroke in a small window for endpoints.</li>
057 * <li>We search around the endpoint in a small window for matching gradients.</li>
058 * <li>In addition to the stroke along the gradient, we also stroke at +/-45
059 * degrees from this.</li>
060 * </ul>
061 * 
062 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
063 * 
064 */
065@Reference(
066                type = ReferenceType.Inproceedings,
067                author = { "Epshtein, B.", "Ofek, E.", "Wexler, Y." },
068                title = "Detecting text in natural scenes with stroke width transform",
069                year = "2010",
070                booktitle = "Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on",
071                pages = { "2963", "2970" },
072                customData = {
073                                "keywords",
074                                "image processing;text analysis;image operator;image pixel;natural images;natural scenes;stroke width transform;text detection;Colored noise;Computer vision;Engines;Filter bank;Geometry;Image segmentation;Layout;Optical character recognition software;Pixel;Robustness",
075                                "doi", "10.1109/CVPR.2010.5540041",
076                                "ISSN", "1063-6919"
077                })
078public class StrokeWidthTransform implements SinglebandImageProcessor<Float, FImage> {
079        private final static int[][] edgeSearchRegion = { { 0, 0 }, { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } };
080        private final static int[][] gradSearchRegion = {
081                        { 0, 0 }, { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 }, { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
082
083        private final CannyEdgeDetector canny;
084        private boolean direction;
085        private int maxStrokeWidth = 70;
086
087        /**
088         * Construct the SWT with the given sigma for smoothing in the Canny phase
089         * The Canny thresholds are chosen automatically.
090         * 
091         * @param direction
092         *            direction of the SWT; true for dark on light, false for light
093         *            on dark.
094         * @param sigma
095         *            the amount of initial blurring
096         */
097        public StrokeWidthTransform(boolean direction, float sigma) {
098                this.direction = direction;
099                this.canny = new CannyEdgeDetector(sigma);
100        }
101
102        /**
103         * Construct with all Canny parameters set manually.
104         * 
105         * @param direction
106         *            direction of the SWT; true for dark on light, false for light
107         *            on dark.
108         * 
109         * @param lowThresh
110         *            lower hysteresis threshold.
111         * @param highThresh
112         *            upper hysteresis threshold.
113         * @param sigma
114         *            the amount of initial blurring.
115         */
116        public StrokeWidthTransform(boolean direction, float lowThresh, float highThresh, float sigma) {
117                this.direction = direction;
118                this.canny = new CannyEdgeDetector(lowThresh, highThresh, sigma);
119        }
120
121        @Override
122        public void processImage(FImage image) {
123                final FSobel grads = new FSobel(canny.sigma);
124
125                final FImage edges = image.clone();
126                canny.processImage(edges, grads);
127
128                image.fill(Float.POSITIVE_INFINITY);
129
130                final List<List<Pixel>> rays = generateRays(edges, grads.dx, grads.dy, direction, image);
131                medianFilter(image, rays);
132        }
133
134        private List<List<Pixel>> generateRays(FImage edges, FImage dx, FImage dy, boolean detectDark, FImage output) {
135                final List<List<Pixel>> rays = new ArrayList<List<Pixel>>();
136
137                final float gradDirection = detectDark ? -1 : 1;
138
139                for (int y = 0; y < output.height; y++) {
140                        for (int x = 0; x < output.width; x++) {
141                                if (edges.pixels[y][x] > 0) {
142                                        traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, 0, 0, 1);
143                                        traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, 1, -1, 1);
144                                        traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, -1, 1, 1);
145                                }
146                        }
147                }
148                return rays;
149        }
150
151        private void traceRay(FImage edges, FImage dx, FImage dy, boolean
152                        detectDark, FImage output, float gradDirection,
153                        int x, int y, List<List<Pixel>> rays, int xx, int xy, int yx, int yy)
154        {
155                final float gradX = (xx * dx.pixels[y][x] + xy * dy.pixels[y][x]) * gradDirection;
156                final float gradY = (yy * dy.pixels[y][x] + yx * dx.pixels[y][x]) * gradDirection;
157
158                final Iterator<Pixel> iterator = LineIterators.bresenham(x, y, gradX, gradY);
159                final Pixel start = iterator.next().clone(); // start of ray
160
161                for (int j = 0; j < maxStrokeWidth; j++) {
162                        final Pixel current = iterator.next();
163
164                        // check bounds
165                        if (current.x < 1 || current.x >= output.width - 1 || current.y < 1 || current.y >= output.height - 1) {
166                                break;
167                        }
168
169                        if (Math.abs(current.x - start.x) < 2 && Math.abs(current.y - start.y) < 2)
170                                continue;
171
172                        Pixel end = null;
173
174                        // search over the around the current pixel region for an edge
175                        for (int i = 0; i < edgeSearchRegion.length; i++) {
176                                final int currentX = current.x + edgeSearchRegion[i][0];
177                                final int currentY = current.y + edgeSearchRegion[i][1];
178
179                                if (edges.pixels[currentY][currentX] > 0) {
180                                        end = new Pixel(currentX, currentY);
181                                        break;
182                                }
183                        }
184
185                        if (end != null) {
186                                // edge found; now search for matching gradient in surrounding
187                                // area
188                                boolean found = false;
189
190                                final float startGradX = dx.pixels[start.y][start.x];
191                                final float startGradY = dy.pixels[start.y][start.x];
192
193                                for (int i = 0; i < gradSearchRegion.length; i++) {
194                                        final int currentX = end.x + gradSearchRegion[i][0];
195                                        final int currentY = end.y + gradSearchRegion[i][1];
196
197                                        final float currentGradX = dx.pixels[currentY][currentX];
198                                        final float currentGradY = dy.pixels[currentY][currentX];
199                                        // final float currentMag = (float) Math.sqrt((currentGradX
200                                        // * currentGradX)
201                                        // + (currentGradY * currentGradY))
202                                        // * gradDirection;
203                                        //
204                                        // currentGradX = currentGradX / currentMag;
205                                        // currentGradY = currentGradY / currentMag;
206                                        //
207                                        // if (Math.acos(gradX * -currentGradX + gradY *
208                                        // -currentGradY) < Math.PI / 6.0) {
209                                        // found = true;
210                                        // break;
211                                        // }
212
213                                        final float tn = startGradY * currentGradX - startGradX * currentGradY;
214                                        final float td = startGradX * currentGradX + startGradY * currentGradY;
215                                        if (tn * 7 < -td * 4 && tn * 7 > td * 4)
216                                        {
217                                                found = true;
218                                                break;
219                                        }
220                                }
221
222                                // if we found an opposite grad, then we fill in the ray using
223                                // the supercover to select all pixels that the ray passes
224                                // through
225                                if (found) {
226                                        final float length = (float) Line2d.distance(start, end);
227                                        final List<Pixel> ray = LineIterators.supercoverAsList(start, end);
228                                        for (final Pixel p : ray) {
229                                                output.pixels[p.y][p.x] = Math.min(length, output.pixels[p.y][p.x]);
230                                        }
231
232                                        rays.add(ray);
233                                }
234                                break;
235                        }
236                }
237        }
238
239        private void medianFilter(FImage output, List<List<Pixel>> rays) {
240                if (rays.size() == 0)
241                        return;
242
243                Collections.sort(rays, new Comparator<List<Pixel>>() {
244                        @Override
245                        public int compare(List<Pixel> o1, List<Pixel> o2) {
246                                return o1.size() - o2.size();
247                        }
248                });
249
250                final float[] working = new float[rays.get(rays.size() - 1).size()];
251
252                for (final List<Pixel> ray : rays) {
253                        final int length = ray.size();
254                        for (int i = 0; i < length; i++) {
255                                final Pixel pixel = ray.get(i);
256                                working[i] = output.pixels[pixel.y][pixel.x];
257                        }
258
259                        final float median = FloatArrayStatsUtils.median(working, 0, length);
260                        for (int i = 0; i < length; i++) {
261                                final Pixel pixel = ray.get(i);
262
263                                if (output.pixels[pixel.y][pixel.x] > median)
264                                        output.pixels[pixel.y][pixel.x] = median;
265                        }
266                }
267        }
268
269        /**
270         * Normalise the output image of the {@link StrokeWidthTransform} for
271         * display. This replaces all {@link Float#POSITIVE_INFINITY} pixels with a
272         * value of 1, and min-max normalises all the valid stroke pixels to be
273         * between 0 and 1.
274         * 
275         * @param input
276         *            the image to normalise
277         * @return the normalised image
278         */
279        public static FImage normaliseImage(FImage input) {
280                final FImage output = input.clone();
281
282                float maxVal = 0;
283                float minVal = Float.MAX_VALUE;
284                for (int row = 0; row < input.height; row++) {
285                        for (int col = 0; col < input.width; col++) {
286                                final float val = input.pixels[row][col];
287                                if (val != Float.POSITIVE_INFINITY) {
288                                        maxVal = Math.max(val, maxVal);
289                                        minVal = Math.min(val, minVal);
290                                }
291                        }
292                }
293
294                final float difference = maxVal - minVal;
295                for (int row = 0; row < input.height; row++) {
296                        for (int col = 0; col < input.width; col++) {
297                                final float val = input.pixels[row][col];
298                                if (val == Float.POSITIVE_INFINITY) {
299                                        output.pixels[row][col] = 1;
300                                } else {
301                                        output.pixels[row][col] = (val - minVal) / difference;
302                                }
303                        }
304                }
305                return output;
306        }
307
308        /**
309         * Get the direction of the SWT; true for dark on light, false for light
310         * 
311         * @return the direction
312         */
313        public boolean getDirection() {
314                return direction;
315        }
316
317        /**
318         * Set the direction of the SWT; true for dark on light, false for light
319         * 
320         * @param direction
321         *            the direction to set
322         */
323        public void setDirection(boolean direction) {
324                this.direction = direction;
325        }
326
327}