001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.dataset; 031 032import java.io.IOException; 033import java.io.InputStream; 034import java.net.URL; 035import java.util.ArrayList; 036import java.util.List; 037 038import net.billylieurance.azuresearch.AbstractAzureSearchQuery.AZURESEARCH_FORMAT; 039import net.billylieurance.azuresearch.AzureSearchImageQuery; 040import net.billylieurance.azuresearch.AzureSearchImageResult; 041 042import org.openimaj.data.dataset.ReadableListDataset; 043import org.openimaj.data.identity.Identifiable; 044import org.openimaj.image.Image; 045import org.openimaj.io.HttpUtils; 046import org.openimaj.io.InputStreamObjectReader; 047import org.openimaj.util.api.auth.common.BingAPIToken; 048 049/** 050 * Image datasets dynamically created from the Bing search API. 051 * 052 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 053 * 054 * @param <IMAGE> 055 * The type of {@link Image} instance held by the dataset. 056 */ 057public class BingImageDataset<IMAGE extends Image<?, IMAGE>> extends ReadableListDataset<IMAGE, InputStream> 058 implements 059 Identifiable 060{ 061 List<AzureSearchImageResult> images; 062 AzureSearchImageQuery query; 063 064 protected BingImageDataset(InputStreamObjectReader<IMAGE> reader, List<AzureSearchImageResult> results, 065 AzureSearchImageQuery query) 066 { 067 super(reader); 068 this.images = results; 069 this.query = query; 070 } 071 072 @Override 073 public IMAGE getInstance(int index) { 074 return read(getImage(index)); 075 } 076 077 private IMAGE read(AzureSearchImageResult next) { 078 if (next == null) 079 return null; 080 081 InputStream stream = null; 082 try { 083 final String imageURL = next.getMediaUrl(); 084 stream = HttpUtils.readURL(new URL(imageURL)); 085 086 return reader.read(stream); 087 } catch (final IOException e) { 088 throw new RuntimeException(e); 089 } finally { 090 try { 091 if (stream != null) 092 stream.close(); 093 } catch (final IOException e) { 094 // ignore 095 } 096 } 097 } 098 099 @Override 100 public int numInstances() { 101 return images.size(); 102 } 103 104 /** 105 * Get the underlying {@link AzureSearchImageResult} objects that back the 106 * dataset. 107 * 108 * @return the underlying {@link AzureSearchImageResult} objects 109 */ 110 public List<AzureSearchImageResult> getImages() { 111 return images; 112 } 113 114 /** 115 * Get the specific underlying {@link AzureSearchImageResult} for the given 116 * index. 117 * 118 * @param index 119 * the index 120 * @return the specific {@link AzureSearchImageResult} for the given index. 121 */ 122 public AzureSearchImageResult getImage(int index) { 123 return images.get(index); 124 } 125 126 private static List<AzureSearchImageResult> performSinglePageQuery(AzureSearchImageQuery query) { 127 query.setFormat(AZURESEARCH_FORMAT.XML); 128 query.doQuery(); 129 130 return query.getQueryResult().getASRs(); 131 } 132 133 private static List<AzureSearchImageResult> performQuery(AzureSearchImageQuery query, int number) { 134 if (number <= 0) 135 number = 1000; 136 137 query.setPage(0); 138 query.setPerPage(50); 139 query.setFormat(AZURESEARCH_FORMAT.XML); 140 141 final List<AzureSearchImageResult> images = new ArrayList<AzureSearchImageResult>(); 142 for (int i = 0; i < 20; i++) { 143 final List<AzureSearchImageResult> res = performSinglePageQuery(query); 144 145 if (res == null || res.size() == 0) 146 break; 147 148 images.addAll(res); 149 150 if (images.size() >= number) 151 break; 152 } 153 154 if (images.size() <= number) 155 return images; 156 return images.subList(0, number); 157 } 158 159 /** 160 * Perform a search with the given query. The appid must have been set 161 * externally. 162 * 163 * @see AzureSearchImageQuery#setAppid(String) 164 * 165 * @param reader 166 * the reader with which to load the images 167 * @param query 168 * the query 169 * @param number 170 * the target number of results; the resultant dataset may 171 * contain fewer images than specified. 172 * @return a new {@link BingImageDataset} created from the query. 173 */ 174 public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader, 175 AzureSearchImageQuery query, int number) 176 { 177 return new BingImageDataset<IMAGE>(reader, performQuery(query, number), query); 178 } 179 180 /** 181 * Perform a search with the given query. The given api token will be used 182 * to set the appid in the query object. 183 * 184 * @param reader 185 * the reader with which to load the images 186 * @param token 187 * the api authentication token 188 * @param query 189 * the query 190 * @param number 191 * the target number of results; the resultant dataset may 192 * contain fewer images than specified. 193 * @return a new {@link BingImageDataset} created from the query. 194 */ 195 public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader, 196 BingAPIToken token, AzureSearchImageQuery query, int number) 197 { 198 query.setAppid(token.accountKey); 199 return new BingImageDataset<IMAGE>(reader, performQuery(query, number), query); 200 } 201 202 /** 203 * Perform a search with the given query string and filters. 204 * 205 * @param reader 206 * the reader with which to load the images 207 * @param token 208 * the api authentication token 209 * @param query 210 * the query 211 * @param imageFilters 212 * the image filters 213 * @param number 214 * the target number of results; the resultant dataset may 215 * contain fewer images than specified. 216 * @return a new {@link BingImageDataset} created from the query. 217 */ 218 public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader, 219 BingAPIToken token, String query, String imageFilters, int number) 220 { 221 final AzureSearchImageQuery aq = new AzureSearchImageQuery(); 222 aq.setAppid(token.accountKey); 223 aq.setQuery(query); 224 if (imageFilters != null) 225 aq.setImageFilters(imageFilters); 226 227 return new BingImageDataset<IMAGE>(reader, performQuery(aq, number), aq); 228 } 229 230 /** 231 * Perform a search with the given query string. 232 * 233 * @param reader 234 * the reader with which to load the images 235 * @param token 236 * the api authentication token 237 * @param query 238 * the query 239 * @param number 240 * the target number of results; the resultant dataset may 241 * contain fewer images than specified. 242 * @return a new {@link BingImageDataset} created from the query. 243 */ 244 public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader, 245 BingAPIToken token, String query, int number) 246 { 247 final AzureSearchImageQuery aq = new AzureSearchImageQuery(); 248 aq.setAppid(token.accountKey); 249 aq.setQuery(query); 250 251 return new BingImageDataset<IMAGE>(reader, performQuery(aq, number), aq); 252 } 253 254 @Override 255 public String getID() { 256 return query.getQuery(); 257 } 258}