001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.dataset; 031 032import java.io.IOException; 033import java.io.InputStream; 034import java.net.URL; 035import java.util.ArrayList; 036import java.util.Iterator; 037import java.util.List; 038import java.util.regex.Matcher; 039import java.util.regex.Pattern; 040 041import javax.xml.parsers.ParserConfigurationException; 042 043import org.openimaj.data.dataset.ReadableListDataset; 044import org.openimaj.image.Image; 045import org.openimaj.io.HttpUtils; 046import org.openimaj.io.InputStreamObjectReader; 047import org.openimaj.util.api.auth.common.FlickrAPIToken; 048 049import com.aetrion.flickr.Flickr; 050import com.aetrion.flickr.REST; 051import com.aetrion.flickr.collections.CollectionsInterface; 052import com.aetrion.flickr.collections.CollectionsSearchParameters; 053import com.aetrion.flickr.photos.Extras; 054import com.aetrion.flickr.photos.Photo; 055import com.aetrion.flickr.photos.PhotoList; 056import com.aetrion.flickr.photosets.PhotosetsInterface; 057 058/** 059 * Class to dynamically create image datasets from flickr through various api 060 * calls. 061 * 062 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 063 * 064 * @param <IMAGE> 065 * The type of {@link Image} instance held by the dataset. 066 */ 067public class FlickrImageDataset<IMAGE extends Image<?, IMAGE>> extends ReadableListDataset<IMAGE, InputStream> { 068 /** 069 * Possible sizes of image from flickr. 070 * 071 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 072 */ 073 public enum Size { 074 /** 075 * The original uploaded size 076 */ 077 Original { 078 @Override 079 protected URL getURL(Photo photo) { 080 try { 081 return new URL(photo.getOriginalUrl()); 082 } catch (final Exception e) { 083 throw new RuntimeException(e); 084 } 085 } 086 }, 087 /** 088 * Large size 089 */ 090 Large { 091 @Override 092 protected URL getURL(Photo photo) { 093 try { 094 return new URL(photo.getLargeUrl()); 095 } catch (final Exception e) { 096 throw new RuntimeException(e); 097 } 098 } 099 }, 100 /** 101 * Medium size 102 */ 103 Medium { 104 @Override 105 protected URL getURL(Photo photo) { 106 try { 107 return new URL(photo.getMediumUrl()); 108 } catch (final Exception e) { 109 throw new RuntimeException(e); 110 } 111 } 112 }, 113 /** 114 * Small size 115 */ 116 Small { 117 @Override 118 protected URL getURL(Photo photo) { 119 try { 120 return new URL(photo.getSmallUrl()); 121 } catch (final Exception e) { 122 throw new RuntimeException(e); 123 } 124 } 125 }, 126 /** 127 * Thumbnail size 128 */ 129 Thumbnail { 130 @Override 131 protected URL getURL(Photo photo) { 132 try { 133 return new URL(photo.getThumbnailUrl()); 134 } catch (final Exception e) { 135 throw new RuntimeException(e); 136 } 137 } 138 }, 139 /** 140 * Square thumbnail size 141 */ 142 Square { 143 @Override 144 protected URL getURL(Photo photo) { 145 try { 146 return new URL(photo.getSmallSquareUrl()); 147 } catch (final Exception e) { 148 throw new RuntimeException(e); 149 } 150 } 151 }; 152 153 protected abstract URL getURL(Photo photo); 154 } 155 156 private final static Pattern GALLERY_URL_PATTERN = Pattern.compile(".*/photos/.*/galleries/[0-9]*(/|$)"); 157 private final static Pattern PHOTOSET_URL_PATTERN = Pattern.compile(".*/photos/.*/sets/([0-9]*)(/|$)"); 158 private final static Pattern COLLECTION_URL_PATTERN = Pattern.compile(".*/photos/(.*)/collections/([0-9]*)(/|$)"); 159 160 protected List<Photo> photos; 161 protected Size targetSize = Size.Medium; 162 163 protected FlickrImageDataset(InputStreamObjectReader<IMAGE> reader, List<Photo> photos) { 164 super(reader); 165 166 this.photos = photos; 167 } 168 169 /** 170 * Set the size of the images that this dataset produces. 171 * 172 * @param size 173 * the size 174 */ 175 public void setImageSize(Size size) { 176 this.targetSize = size; 177 } 178 179 /** 180 * Get the size of the images that this dataset produces. 181 * 182 * @return the size of the returned images 183 */ 184 public Size getImageSize() { 185 return targetSize; 186 } 187 188 /** 189 * Get the underlying flickr {@link Photo} objects. 190 * 191 * @return the underlying list of {@link Photo}s. 192 */ 193 public List<Photo> getPhotos() { 194 return photos; 195 } 196 197 /** 198 * Get the a specific underlying flickr {@link Photo} object corresponding 199 * to a particular image instance. 200 * 201 * @param index 202 * the index of the instance 203 * 204 * @return the underlying {@link Photo} corresponding to the given instance 205 * index. 206 */ 207 public Photo getPhoto(int index) { 208 return photos.get(index); 209 } 210 211 @Override 212 public IMAGE getInstance(int index) { 213 return read(photos.get(index)); 214 } 215 216 @Override 217 public int numInstances() { 218 return photos.size(); 219 } 220 221 @Override 222 public String getID(int index) { 223 return targetSize.getURL(photos.get(index)).toString(); 224 } 225 226 private IMAGE read(Photo next) { 227 if (next == null) 228 return null; 229 230 InputStream stream = null; 231 try { 232 stream = HttpUtils.readURL(targetSize.getURL(next)); 233 234 return reader.read(stream); 235 } catch (final IOException e) { 236 throw new RuntimeException(e); 237 } finally { 238 try { 239 if (stream != null) 240 stream.close(); 241 } catch (final IOException e) { 242 // ignore 243 } 244 } 245 } 246 247 @Override 248 public Iterator<IMAGE> iterator() { 249 return new Iterator<IMAGE>() { 250 Iterator<Photo> internal = photos.iterator(); 251 252 @Override 253 public boolean hasNext() { 254 return internal.hasNext(); 255 } 256 257 @Override 258 public IMAGE next() { 259 return read(internal.next()); 260 } 261 262 @Override 263 public void remove() { 264 internal.remove(); 265 } 266 }; 267 } 268 269 @Override 270 public String toString() { 271 return String.format("%s(%d images)", this.getClass().getName(), this.photos.size()); 272 } 273 274 /** 275 * Create an image dataset from the flickr gallery, photoset or collection 276 * at the given url. 277 * 278 * @param reader 279 * the reader with which to load the images 280 * @param token 281 * the flickr api authentication token 282 * @param url 283 * the url of the collection/gallery/photo-set 284 * @return a {@link FlickrImageDataset} created from the given url 285 * @throws Exception 286 * if an error occurs 287 */ 288 public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader, 289 FlickrAPIToken token, 290 URL url) throws Exception 291 { 292 return create(reader, token, url, 0); 293 } 294 295 /** 296 * Create an image dataset by searching flickr with the given search terms. 297 * 298 * @param reader 299 * the reader with which to load the images 300 * @param token 301 * the flickr api authentication token 302 * @param searchTerms 303 * the search terms; space separated. Prepending a term with a 304 * "-" means that the term should not appear. 305 * @return a {@link FlickrImageDataset} created from the given url 306 * @throws Exception 307 * if an error occurs 308 */ 309 public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader, 310 FlickrAPIToken token, String searchTerms) throws Exception 311 { 312 return create(reader, token, searchTerms, 0); 313 } 314 315 /** 316 * Create an image dataset by searching flickr with the given search terms. 317 * The number of images can be limited to a subset. 318 * 319 * @param reader 320 * the reader with which to load the images 321 * @param token 322 * the flickr api authentication token 323 * @param searchTerms 324 * the search terms; space separated. Prepending a term with a 325 * "-" means that the term should not appear. 326 * @param number 327 * the maximum number of images to add to the dataset. Setting to 328 * 0 or less will attempt to use all the images. 329 * @return a {@link FlickrImageDataset} created from the given url 330 * @throws Exception 331 * if an error occurs 332 */ 333 public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader, 334 FlickrAPIToken token, 335 String searchTerms, int number) throws Exception 336 { 337 final com.aetrion.flickr.photos.SearchParameters params = new com.aetrion.flickr.photos.SearchParameters(); 338 params.setText(searchTerms); 339 340 return createFromSearch(reader, token, params, number); 341 } 342 343 /** 344 * Create an image dataset from the flickr gallery, photoset or collection 345 * at the given url. The number of images can be limited to a subset. 346 * 347 * @param reader 348 * the reader with which to load the images 349 * @param token 350 * the flickr api authentication token 351 * @param url 352 * the url of the collection/gallery/photo-set 353 * @param number 354 * the maximum number of images to add to the dataset. Setting to 355 * 0 or less will attempt to use all the images. 356 * @return a {@link FlickrImageDataset} created from the given url 357 * @throws Exception 358 * if an error occurs 359 */ 360 public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader, 361 FlickrAPIToken token, 362 URL url, int number) throws Exception 363 { 364 final String urlString = url.toString(); 365 366 if (GALLERY_URL_PATTERN.matcher(urlString).matches()) { 367 return fromGallery(reader, token, urlString, number); 368 } else if (PHOTOSET_URL_PATTERN.matcher(urlString).matches()) { 369 return fromPhotoset(reader, token, urlString, number); 370 } else if (COLLECTION_URL_PATTERN.matcher(urlString).matches()) { 371 return fromCollection(reader, token, urlString, number); 372 } 373 374 throw new IllegalArgumentException("Unknown URL type " + urlString); 375 } 376 377 private static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> fromGallery( 378 InputStreamObjectReader<IMAGE> reader, 379 FlickrAPIToken token, 380 String urlString, int number) throws Exception 381 { 382 final Flickr flickr = makeFlickr(token); 383 384 final String galleryId = flickr.getUrlsInterface().lookupGallery(urlString); 385 386 final com.aetrion.flickr.galleries.SearchParameters params = new com.aetrion.flickr.galleries.SearchParameters(); 387 params.setGalleryId(galleryId); 388 389 return createFromGallery(reader, token, params, number); 390 } 391 392 private static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> fromPhotoset( 393 InputStreamObjectReader<IMAGE> reader, 394 FlickrAPIToken token, 395 String urlString, int number) throws Exception 396 { 397 final Matcher matcher = PHOTOSET_URL_PATTERN.matcher(urlString); 398 matcher.find(); 399 final String setId = matcher.group(1); 400 401 return createFromPhotoset(reader, token, setId, number); 402 } 403 404 private static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> fromCollection( 405 InputStreamObjectReader<IMAGE> reader, 406 FlickrAPIToken token, 407 String urlString, int number) throws Exception 408 { 409 final Flickr flickr = makeFlickr(token); 410 411 final Matcher matcher = COLLECTION_URL_PATTERN.matcher(urlString); 412 matcher.find(); 413 final String userName = matcher.group(1); 414 final String collectionsId = matcher.group(2); 415 416 final CollectionsSearchParameters params = new CollectionsSearchParameters(); 417 params.setCollectionId(collectionsId); 418 params.setUserId(flickr.getPeopleInterface().findByUsername(userName).getId()); 419 420 return createFromCollection(reader, token, params, number); 421 } 422 423 /** 424 * Create an image dataset from a flickr gallery with the specified 425 * parameters. 426 * 427 * @param reader 428 * the reader with which to load the images 429 * @param token 430 * the flickr api authentication token 431 * @param params 432 * the parameters describing the gallery and any additional 433 * constraints. 434 * @return a {@link FlickrImageDataset} created from the gallery described 435 * by the given parameters 436 * @throws Exception 437 * if an error occurs 438 */ 439 public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromGallery( 440 InputStreamObjectReader<IMAGE> reader, 441 FlickrAPIToken token, 442 com.aetrion.flickr.galleries.SearchParameters params) throws Exception 443 { 444 return createFromGallery(reader, token, params, 0); 445 } 446 447 /** 448 * Create an image dataset from a flickr gallery with the specified 449 * parameters. The number of images can be limited to a subset. 450 * 451 * @param reader 452 * the reader with which to load the images 453 * @param token 454 * the flickr api authentication token 455 * @param params 456 * the parameters describing the gallery and any additional 457 * constraints. 458 * @param number 459 * the maximum number of images to add to the dataset. Setting to 460 * 0 or less will attempt to use all the images. 461 * @return a {@link FlickrImageDataset} created from the gallery described 462 * by the given parameters 463 * @throws Exception 464 * if an error occurs 465 */ 466 @SuppressWarnings("unchecked") 467 public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromGallery( 468 InputStreamObjectReader<IMAGE> reader, 469 FlickrAPIToken token, 470 com.aetrion.flickr.galleries.SearchParameters params, int number) throws Exception 471 { 472 final Flickr flickr = makeFlickr(token); 473 474 params.setExtras(Extras.ALL_EXTRAS); 475 476 List<Photo> photos = new ArrayList<Photo>(); 477 final PhotoList first = flickr.getGalleriesInterface().getPhotos(params, 250, 0); 478 photos.addAll(first); 479 480 if (number > 0) 481 number = Math.min(number, first.getTotal()); 482 483 for (int page = 1, n = photos.size(); n < number; page++) { 484 final PhotoList result = flickr.getGalleriesInterface().getPhotos(params, 250, page); 485 photos.addAll(result); 486 n += result.size(); 487 } 488 489 if (number > 0 && number < photos.size()) 490 photos = photos.subList(0, number); 491 492 return new FlickrImageDataset<IMAGE>(reader, photos); 493 } 494 495 /** 496 * Create an image dataset from a flickr photoset. 497 * 498 * @param reader 499 * the reader with which to load the images 500 * @param token 501 * the flickr api authentication token 502 * @param setId 503 * the photoset identifier 504 * @return a {@link FlickrImageDataset} created from the gallery described 505 * by the given parameters 506 * @throws Exception 507 * if an error occurs 508 */ 509 public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromPhotoset( 510 InputStreamObjectReader<IMAGE> reader, FlickrAPIToken token, String setId) throws Exception 511 { 512 return createFromPhotoset(reader, token, setId, 0); 513 } 514 515 /** 516 * Create an image dataset from a flickr photoset. The number of images can 517 * be limited to a subset. 518 * 519 * @param reader 520 * the reader with which to load the images 521 * @param token 522 * the flickr api authentication token 523 * @param setId 524 * the photoset identifier 525 * @param number 526 * the maximum number of images to add to the dataset. Setting to 527 * 0 or less will attempt to use all the images. 528 * @return a {@link FlickrImageDataset} created from the gallery described 529 * by the given parameters 530 * @throws Exception 531 * if an error occurs 532 */ 533 @SuppressWarnings("unchecked") 534 public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromPhotoset( 535 InputStreamObjectReader<IMAGE> reader, 536 FlickrAPIToken token, 537 String setId, int number) throws Exception 538 { 539 final Flickr flickr = makeFlickr(token); 540 541 final PhotosetsInterface setsInterface = flickr.getPhotosetsInterface(); 542 543 List<Photo> photos = new ArrayList<Photo>(); 544 final PhotoList first = setsInterface.getPhotos(setId, Extras.ALL_EXTRAS, 0, 250, 0); 545 photos.addAll(first); 546 547 if (number > 0) 548 number = Math.min(number, first.getTotal()); 549 550 for (int page = 1, n = photos.size(); n < number; page++) { 551 final PhotoList result = setsInterface.getPhotos(setId, Extras.ALL_EXTRAS, 0, 250, page); 552 photos.addAll(result); 553 n += result.size(); 554 } 555 556 if (number > 0 && number < photos.size()) 557 photos = photos.subList(0, number); 558 559 return new FlickrImageDataset<IMAGE>(reader, photos); 560 } 561 562 /** 563 * Create an image dataset from a flickr collection with the specified 564 * parameters. 565 * 566 * @param reader 567 * the reader with which to load the images 568 * @param token 569 * the flickr api authentication token 570 * @param params 571 * the parameters describing the gallery and any additional 572 * constraints. 573 * @return a {@link FlickrImageDataset} created from the gallery described 574 * by the given parameters 575 * @throws Exception 576 * if an error occurs 577 */ 578 public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromCollection( 579 InputStreamObjectReader<IMAGE> reader, 580 FlickrAPIToken token, 581 com.aetrion.flickr.collections.CollectionsSearchParameters params) throws Exception 582 { 583 return createFromCollection(reader, token, params, 0); 584 } 585 586 /** 587 * Create an image dataset from a flickr collection with the specified 588 * parameters. The number of images can be limited to a subset. 589 * 590 * @param reader 591 * the reader with which to load the images 592 * @param token 593 * the flickr api authentication token 594 * @param params 595 * the parameters describing the gallery and any additional 596 * constraints. 597 * @param number 598 * the maximum number of images to add to the dataset. Setting to 599 * 0 or less will attempt to use all the images. 600 * @return a {@link FlickrImageDataset} created from the gallery described 601 * by the given parameters 602 * @throws Exception 603 * if an error occurs 604 */ 605 @SuppressWarnings("unchecked") 606 public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromCollection( 607 InputStreamObjectReader<IMAGE> reader, 608 FlickrAPIToken token, 609 com.aetrion.flickr.collections.CollectionsSearchParameters params, int number) throws Exception 610 { 611 final Flickr flickr = makeFlickr(token); 612 613 params.setExtras(Extras.ALL_EXTRAS); 614 615 List<Photo> photos = new ArrayList<Photo>(); 616 final CollectionsInterface collectionsInterface = flickr.getCollectionsInterface(); 617 final PhotoList photoList = collectionsInterface.getTree(params).getPhotoUrls(flickr.getPhotosetsInterface()); 618 photos.addAll(photoList); 619 620 if (number > 0 && number < photos.size()) 621 photos = photos.subList(0, number); 622 623 return new FlickrImageDataset<IMAGE>(reader, photos); 624 } 625 626 /** 627 * Create an image dataset from a flickr search with the specified 628 * parameters. 629 * 630 * @param reader 631 * the reader with which to load the images 632 * @param token 633 * the flickr api authentication token 634 * @param params 635 * the parameters describing the gallery and any additional 636 * constraints. 637 * @return a {@link FlickrImageDataset} created from the gallery described 638 * by the given parameters 639 * @throws Exception 640 * if an error occurs 641 */ 642 public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromSearch( 643 InputStreamObjectReader<IMAGE> reader, 644 FlickrAPIToken token, 645 com.aetrion.flickr.photos.SearchParameters params) throws Exception 646 { 647 return createFromSearch(reader, token, params, 0); 648 } 649 650 /** 651 * Create an image dataset from a flickr search with the specified 652 * parameters. The number of images can be limited to a subset. 653 * 654 * @param reader 655 * the reader with which to load the images 656 * @param token 657 * the flickr api authentication token 658 * @param params 659 * the parameters describing the gallery and any additional 660 * constraints. 661 * @param number 662 * the maximum number of images to add to the dataset. Setting to 663 * 0 or less will attempt to use all the images. 664 * @return a {@link FlickrImageDataset} created from the gallery described 665 * by the given parameters 666 * @throws Exception 667 * if an error occurs 668 */ 669 @SuppressWarnings("unchecked") 670 public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromSearch( 671 InputStreamObjectReader<IMAGE> reader, 672 FlickrAPIToken token, 673 com.aetrion.flickr.photos.SearchParameters params, int number) throws Exception 674 { 675 final Flickr flickr = makeFlickr(token); 676 677 params.setExtras(Extras.ALL_EXTRAS); 678 679 List<Photo> photos = new ArrayList<Photo>(); 680 final PhotoList first = flickr.getPhotosInterface().search(params, 250, 0); 681 photos.addAll(first); 682 683 if (number > 0) 684 number = Math.min(number, first.getTotal()); 685 686 for (int page = 1, n = photos.size(); n < number; page++) { 687 final PhotoList result = flickr.getPhotosInterface().search(params, 250, page); 688 photos.addAll(result); 689 n += result.size(); 690 } 691 692 if (number > 0 && number < photos.size()) 693 photos = photos.subList(0, number); 694 695 return new FlickrImageDataset<IMAGE>(reader, photos); 696 } 697 698 private static Flickr makeFlickr(FlickrAPIToken token) throws ParserConfigurationException { 699 if (token.secret == null) 700 return new Flickr(token.apikey, new REST(Flickr.DEFAULT_HOST)); 701 return new Flickr(token.apikey, token.secret, new REST(Flickr.DEFAULT_HOST)); 702 } 703}