001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.dataset;
031
032import java.io.IOException;
033import java.io.InputStream;
034import java.net.URL;
035import java.util.ArrayList;
036import java.util.List;
037
038import net.billylieurance.azuresearch.AbstractAzureSearchQuery.AZURESEARCH_FORMAT;
039import net.billylieurance.azuresearch.AzureSearchImageQuery;
040import net.billylieurance.azuresearch.AzureSearchImageResult;
041
042import org.openimaj.data.dataset.ReadableListDataset;
043import org.openimaj.data.identity.Identifiable;
044import org.openimaj.image.Image;
045import org.openimaj.io.HttpUtils;
046import org.openimaj.io.InputStreamObjectReader;
047import org.openimaj.util.api.auth.common.BingAPIToken;
048
049/**
050 * Image datasets dynamically created from the Bing search API.
051 * 
052 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
053 * 
054 * @param <IMAGE>
055 *            The type of {@link Image} instance held by the dataset.
056 */
057public class BingImageDataset<IMAGE extends Image<?, IMAGE>> extends ReadableListDataset<IMAGE, InputStream>
058                implements
059                        Identifiable
060{
061        List<AzureSearchImageResult> images;
062        AzureSearchImageQuery query;
063
064        protected BingImageDataset(InputStreamObjectReader<IMAGE> reader, List<AzureSearchImageResult> results,
065                        AzureSearchImageQuery query)
066        {
067                super(reader);
068                this.images = results;
069                this.query = query;
070        }
071
072        @Override
073        public IMAGE getInstance(int index) {
074                return read(getImage(index));
075        }
076
077        private IMAGE read(AzureSearchImageResult next) {
078                if (next == null)
079                        return null;
080
081                InputStream stream = null;
082                try {
083                        final String imageURL = next.getMediaUrl();
084                        stream = HttpUtils.readURL(new URL(imageURL));
085
086                        return reader.read(stream);
087                } catch (final IOException e) {
088                        throw new RuntimeException(e);
089                } finally {
090                        try {
091                                if (stream != null)
092                                        stream.close();
093                        } catch (final IOException e) {
094                                // ignore
095                        }
096                }
097        }
098
099        @Override
100        public int numInstances() {
101                return images.size();
102        }
103
104        /**
105         * Get the underlying {@link AzureSearchImageResult} objects that back the
106         * dataset.
107         * 
108         * @return the underlying {@link AzureSearchImageResult} objects
109         */
110        public List<AzureSearchImageResult> getImages() {
111                return images;
112        }
113
114        /**
115         * Get the specific underlying {@link AzureSearchImageResult} for the given
116         * index.
117         * 
118         * @param index
119         *            the index
120         * @return the specific {@link AzureSearchImageResult} for the given index.
121         */
122        public AzureSearchImageResult getImage(int index) {
123                return images.get(index);
124        }
125
126        private static List<AzureSearchImageResult> performSinglePageQuery(AzureSearchImageQuery query) {
127                query.setFormat(AZURESEARCH_FORMAT.XML);
128                query.doQuery();
129
130                return query.getQueryResult().getASRs();
131        }
132
133        private static List<AzureSearchImageResult> performQuery(AzureSearchImageQuery query, int number) {
134                if (number <= 0)
135                        number = 1000;
136
137                query.setPage(0);
138                query.setPerPage(50);
139                query.setFormat(AZURESEARCH_FORMAT.XML);
140
141                final List<AzureSearchImageResult> images = new ArrayList<AzureSearchImageResult>();
142                for (int i = 0; i < 20; i++) {
143                        final List<AzureSearchImageResult> res = performSinglePageQuery(query);
144
145                        if (res == null || res.size() == 0)
146                                break;
147
148                        images.addAll(res);
149
150                        if (images.size() >= number)
151                                break;
152                }
153
154                if (images.size() <= number)
155                        return images;
156                return images.subList(0, number);
157        }
158
159        /**
160         * Perform a search with the given query. The appid must have been set
161         * externally.
162         * 
163         * @see AzureSearchImageQuery#setAppid(String)
164         * 
165         * @param reader
166         *            the reader with which to load the images
167         * @param query
168         *            the query
169         * @param number
170         *            the target number of results; the resultant dataset may
171         *            contain fewer images than specified.
172         * @return a new {@link BingImageDataset} created from the query.
173         */
174        public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader,
175                        AzureSearchImageQuery query, int number)
176        {
177                return new BingImageDataset<IMAGE>(reader, performQuery(query, number), query);
178        }
179
180        /**
181         * Perform a search with the given query. The given api token will be used
182         * to set the appid in the query object.
183         * 
184         * @param reader
185         *            the reader with which to load the images
186         * @param token
187         *            the api authentication token
188         * @param query
189         *            the query
190         * @param number
191         *            the target number of results; the resultant dataset may
192         *            contain fewer images than specified.
193         * @return a new {@link BingImageDataset} created from the query.
194         */
195        public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader,
196                        BingAPIToken token, AzureSearchImageQuery query, int number)
197        {
198                query.setAppid(token.accountKey);
199                return new BingImageDataset<IMAGE>(reader, performQuery(query, number), query);
200        }
201
202        /**
203         * Perform a search with the given query string and filters.
204         * 
205         * @param reader
206         *            the reader with which to load the images
207         * @param token
208         *            the api authentication token
209         * @param query
210         *            the query
211         * @param imageFilters
212         *            the image filters
213         * @param number
214         *            the target number of results; the resultant dataset may
215         *            contain fewer images than specified.
216         * @return a new {@link BingImageDataset} created from the query.
217         */
218        public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader,
219                        BingAPIToken token, String query, String imageFilters, int number)
220        {
221                final AzureSearchImageQuery aq = new AzureSearchImageQuery();
222                aq.setAppid(token.accountKey);
223                aq.setQuery(query);
224                if (imageFilters != null)
225                        aq.setImageFilters(imageFilters);
226
227                return new BingImageDataset<IMAGE>(reader, performQuery(aq, number), aq);
228        }
229
230        /**
231         * Perform a search with the given query string.
232         * 
233         * @param reader
234         *            the reader with which to load the images
235         * @param token
236         *            the api authentication token
237         * @param query
238         *            the query
239         * @param number
240         *            the target number of results; the resultant dataset may
241         *            contain fewer images than specified.
242         * @return a new {@link BingImageDataset} created from the query.
243         */
244        public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader,
245                        BingAPIToken token, String query, int number)
246        {
247                final AzureSearchImageQuery aq = new AzureSearchImageQuery();
248                aq.setAppid(token.accountKey);
249                aq.setQuery(query);
250
251                return new BingImageDataset<IMAGE>(reader, performQuery(aq, number), aq);
252        }
253
254        @Override
255        public String getID() {
256                return query.getQuery();
257        }
258}