001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.dataset;
031
032import java.io.IOException;
033import java.io.InputStream;
034import java.net.URL;
035import java.util.ArrayList;
036import java.util.Iterator;
037import java.util.List;
038import java.util.regex.Matcher;
039import java.util.regex.Pattern;
040
041import javax.xml.parsers.ParserConfigurationException;
042
043import org.openimaj.data.dataset.ReadableListDataset;
044import org.openimaj.image.Image;
045import org.openimaj.io.HttpUtils;
046import org.openimaj.io.InputStreamObjectReader;
047import org.openimaj.util.api.auth.common.FlickrAPIToken;
048
049import com.aetrion.flickr.Flickr;
050import com.aetrion.flickr.REST;
051import com.aetrion.flickr.collections.CollectionsInterface;
052import com.aetrion.flickr.collections.CollectionsSearchParameters;
053import com.aetrion.flickr.photos.Extras;
054import com.aetrion.flickr.photos.Photo;
055import com.aetrion.flickr.photos.PhotoList;
056import com.aetrion.flickr.photosets.PhotosetsInterface;
057
058/**
059 * Class to dynamically create image datasets from flickr through various api
060 * calls.
061 * 
062 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
063 * 
064 * @param <IMAGE>
065 *            The type of {@link Image} instance held by the dataset.
066 */
067public class FlickrImageDataset<IMAGE extends Image<?, IMAGE>> extends ReadableListDataset<IMAGE, InputStream> {
068        /**
069         * Possible sizes of image from flickr.
070         * 
071         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
072         */
073        public enum Size {
074                /**
075                 * The original uploaded size
076                 */
077                Original {
078                        @Override
079                        protected URL getURL(Photo photo) {
080                                try {
081                                        return new URL(photo.getOriginalUrl());
082                                } catch (final Exception e) {
083                                        throw new RuntimeException(e);
084                                }
085                        }
086                },
087                /**
088                 * Large size
089                 */
090                Large {
091                        @Override
092                        protected URL getURL(Photo photo) {
093                                try {
094                                        return new URL(photo.getLargeUrl());
095                                } catch (final Exception e) {
096                                        throw new RuntimeException(e);
097                                }
098                        }
099                },
100                /**
101                 * Medium size
102                 */
103                Medium {
104                        @Override
105                        protected URL getURL(Photo photo) {
106                                try {
107                                        return new URL(photo.getMediumUrl());
108                                } catch (final Exception e) {
109                                        throw new RuntimeException(e);
110                                }
111                        }
112                },
113                /**
114                 * Small size
115                 */
116                Small {
117                        @Override
118                        protected URL getURL(Photo photo) {
119                                try {
120                                        return new URL(photo.getSmallUrl());
121                                } catch (final Exception e) {
122                                        throw new RuntimeException(e);
123                                }
124                        }
125                },
126                /**
127                 * Thumbnail size
128                 */
129                Thumbnail {
130                        @Override
131                        protected URL getURL(Photo photo) {
132                                try {
133                                        return new URL(photo.getThumbnailUrl());
134                                } catch (final Exception e) {
135                                        throw new RuntimeException(e);
136                                }
137                        }
138                },
139                /**
140                 * Square thumbnail size
141                 */
142                Square {
143                        @Override
144                        protected URL getURL(Photo photo) {
145                                try {
146                                        return new URL(photo.getSmallSquareUrl());
147                                } catch (final Exception e) {
148                                        throw new RuntimeException(e);
149                                }
150                        }
151                };
152
153                protected abstract URL getURL(Photo photo);
154        }
155
156        private final static Pattern GALLERY_URL_PATTERN = Pattern.compile(".*/photos/.*/galleries/[0-9]*(/|$)");
157        private final static Pattern PHOTOSET_URL_PATTERN = Pattern.compile(".*/photos/.*/sets/([0-9]*)(/|$)");
158        private final static Pattern COLLECTION_URL_PATTERN = Pattern.compile(".*/photos/(.*)/collections/([0-9]*)(/|$)");
159
160        protected List<Photo> photos;
161        protected Size targetSize = Size.Medium;
162
163        protected FlickrImageDataset(InputStreamObjectReader<IMAGE> reader, List<Photo> photos) {
164                super(reader);
165
166                this.photos = photos;
167        }
168
169        /**
170         * Set the size of the images that this dataset produces.
171         * 
172         * @param size
173         *            the size
174         */
175        public void setImageSize(Size size) {
176                this.targetSize = size;
177        }
178
179        /**
180         * Get the size of the images that this dataset produces.
181         * 
182         * @return the size of the returned images
183         */
184        public Size getImageSize() {
185                return targetSize;
186        }
187
188        /**
189         * Get the underlying flickr {@link Photo} objects.
190         * 
191         * @return the underlying list of {@link Photo}s.
192         */
193        public List<Photo> getPhotos() {
194                return photos;
195        }
196
197        /**
198         * Get the a specific underlying flickr {@link Photo} object corresponding
199         * to a particular image instance.
200         * 
201         * @param index
202         *            the index of the instance
203         * 
204         * @return the underlying {@link Photo} corresponding to the given instance
205         *         index.
206         */
207        public Photo getPhoto(int index) {
208                return photos.get(index);
209        }
210
211        @Override
212        public IMAGE getInstance(int index) {
213                return read(photos.get(index));
214        }
215
216        @Override
217        public int numInstances() {
218                return photos.size();
219        }
220
221        @Override
222        public String getID(int index) {
223                return targetSize.getURL(photos.get(index)).toString();
224        }
225
226        private IMAGE read(Photo next) {
227                if (next == null)
228                        return null;
229
230                InputStream stream = null;
231                try {
232                        stream = HttpUtils.readURL(targetSize.getURL(next));
233
234                        return reader.read(stream);
235                } catch (final IOException e) {
236                        throw new RuntimeException(e);
237                } finally {
238                        try {
239                                if (stream != null)
240                                        stream.close();
241                        } catch (final IOException e) {
242                                // ignore
243                        }
244                }
245        }
246
247        @Override
248        public Iterator<IMAGE> iterator() {
249                return new Iterator<IMAGE>() {
250                        Iterator<Photo> internal = photos.iterator();
251
252                        @Override
253                        public boolean hasNext() {
254                                return internal.hasNext();
255                        }
256
257                        @Override
258                        public IMAGE next() {
259                                return read(internal.next());
260                        }
261
262                        @Override
263                        public void remove() {
264                                internal.remove();
265                        }
266                };
267        }
268
269        @Override
270        public String toString() {
271                return String.format("%s(%d images)", this.getClass().getName(), this.photos.size());
272        }
273
274        /**
275         * Create an image dataset from the flickr gallery, photoset or collection
276         * at the given url.
277         * 
278         * @param reader
279         *            the reader with which to load the images
280         * @param token
281         *            the flickr api authentication token
282         * @param url
283         *            the url of the collection/gallery/photo-set
284         * @return a {@link FlickrImageDataset} created from the given url
285         * @throws Exception
286         *             if an error occurs
287         */
288        public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader,
289                        FlickrAPIToken token,
290                        URL url) throws Exception
291        {
292                return create(reader, token, url, 0);
293        }
294
295        /**
296         * Create an image dataset by searching flickr with the given search terms.
297         * 
298         * @param reader
299         *            the reader with which to load the images
300         * @param token
301         *            the flickr api authentication token
302         * @param searchTerms
303         *            the search terms; space separated. Prepending a term with a
304         *            "-" means that the term should not appear.
305         * @return a {@link FlickrImageDataset} created from the given url
306         * @throws Exception
307         *             if an error occurs
308         */
309        public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader,
310                        FlickrAPIToken token, String searchTerms) throws Exception
311        {
312                return create(reader, token, searchTerms, 0);
313        }
314
315        /**
316         * Create an image dataset by searching flickr with the given search terms.
317         * The number of images can be limited to a subset.
318         * 
319         * @param reader
320         *            the reader with which to load the images
321         * @param token
322         *            the flickr api authentication token
323         * @param searchTerms
324         *            the search terms; space separated. Prepending a term with a
325         *            "-" means that the term should not appear.
326         * @param number
327         *            the maximum number of images to add to the dataset. Setting to
328         *            0 or less will attempt to use all the images.
329         * @return a {@link FlickrImageDataset} created from the given url
330         * @throws Exception
331         *             if an error occurs
332         */
333        public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader,
334                        FlickrAPIToken token,
335                        String searchTerms, int number) throws Exception
336        {
337                final com.aetrion.flickr.photos.SearchParameters params = new com.aetrion.flickr.photos.SearchParameters();
338                params.setText(searchTerms);
339
340                return createFromSearch(reader, token, params, number);
341        }
342
343        /**
344         * Create an image dataset from the flickr gallery, photoset or collection
345         * at the given url. The number of images can be limited to a subset.
346         * 
347         * @param reader
348         *            the reader with which to load the images
349         * @param token
350         *            the flickr api authentication token
351         * @param url
352         *            the url of the collection/gallery/photo-set
353         * @param number
354         *            the maximum number of images to add to the dataset. Setting to
355         *            0 or less will attempt to use all the images.
356         * @return a {@link FlickrImageDataset} created from the given url
357         * @throws Exception
358         *             if an error occurs
359         */
360        public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader,
361                        FlickrAPIToken token,
362                        URL url, int number) throws Exception
363        {
364                final String urlString = url.toString();
365
366                if (GALLERY_URL_PATTERN.matcher(urlString).matches()) {
367                        return fromGallery(reader, token, urlString, number);
368                } else if (PHOTOSET_URL_PATTERN.matcher(urlString).matches()) {
369                        return fromPhotoset(reader, token, urlString, number);
370                } else if (COLLECTION_URL_PATTERN.matcher(urlString).matches()) {
371                        return fromCollection(reader, token, urlString, number);
372                }
373
374                throw new IllegalArgumentException("Unknown URL type " + urlString);
375        }
376
377        private static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> fromGallery(
378                        InputStreamObjectReader<IMAGE> reader,
379                        FlickrAPIToken token,
380                        String urlString, int number) throws Exception
381        {
382                final Flickr flickr = makeFlickr(token);
383
384                final String galleryId = flickr.getUrlsInterface().lookupGallery(urlString);
385
386                final com.aetrion.flickr.galleries.SearchParameters params = new com.aetrion.flickr.galleries.SearchParameters();
387                params.setGalleryId(galleryId);
388
389                return createFromGallery(reader, token, params, number);
390        }
391
392        private static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> fromPhotoset(
393                        InputStreamObjectReader<IMAGE> reader,
394                        FlickrAPIToken token,
395                        String urlString, int number) throws Exception
396        {
397                final Matcher matcher = PHOTOSET_URL_PATTERN.matcher(urlString);
398                matcher.find();
399                final String setId = matcher.group(1);
400
401                return createFromPhotoset(reader, token, setId, number);
402        }
403
404        private static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> fromCollection(
405                        InputStreamObjectReader<IMAGE> reader,
406                        FlickrAPIToken token,
407                        String urlString, int number) throws Exception
408        {
409                final Flickr flickr = makeFlickr(token);
410
411                final Matcher matcher = COLLECTION_URL_PATTERN.matcher(urlString);
412                matcher.find();
413                final String userName = matcher.group(1);
414                final String collectionsId = matcher.group(2);
415
416                final CollectionsSearchParameters params = new CollectionsSearchParameters();
417                params.setCollectionId(collectionsId);
418                params.setUserId(flickr.getPeopleInterface().findByUsername(userName).getId());
419
420                return createFromCollection(reader, token, params, number);
421        }
422
423        /**
424         * Create an image dataset from a flickr gallery with the specified
425         * parameters.
426         * 
427         * @param reader
428         *            the reader with which to load the images
429         * @param token
430         *            the flickr api authentication token
431         * @param params
432         *            the parameters describing the gallery and any additional
433         *            constraints.
434         * @return a {@link FlickrImageDataset} created from the gallery described
435         *         by the given parameters
436         * @throws Exception
437         *             if an error occurs
438         */
439        public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromGallery(
440                        InputStreamObjectReader<IMAGE> reader,
441                        FlickrAPIToken token,
442                        com.aetrion.flickr.galleries.SearchParameters params) throws Exception
443        {
444                return createFromGallery(reader, token, params, 0);
445        }
446
447        /**
448         * Create an image dataset from a flickr gallery with the specified
449         * parameters. The number of images can be limited to a subset.
450         * 
451         * @param reader
452         *            the reader with which to load the images
453         * @param token
454         *            the flickr api authentication token
455         * @param params
456         *            the parameters describing the gallery and any additional
457         *            constraints.
458         * @param number
459         *            the maximum number of images to add to the dataset. Setting to
460         *            0 or less will attempt to use all the images.
461         * @return a {@link FlickrImageDataset} created from the gallery described
462         *         by the given parameters
463         * @throws Exception
464         *             if an error occurs
465         */
466        @SuppressWarnings("unchecked")
467        public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromGallery(
468                        InputStreamObjectReader<IMAGE> reader,
469                        FlickrAPIToken token,
470                        com.aetrion.flickr.galleries.SearchParameters params, int number) throws Exception
471        {
472                final Flickr flickr = makeFlickr(token);
473
474                params.setExtras(Extras.ALL_EXTRAS);
475
476                List<Photo> photos = new ArrayList<Photo>();
477                final PhotoList first = flickr.getGalleriesInterface().getPhotos(params, 250, 0);
478                photos.addAll(first);
479
480                if (number > 0)
481                        number = Math.min(number, first.getTotal());
482
483                for (int page = 1, n = photos.size(); n < number; page++) {
484                        final PhotoList result = flickr.getGalleriesInterface().getPhotos(params, 250, page);
485                        photos.addAll(result);
486                        n += result.size();
487                }
488
489                if (number > 0 && number < photos.size())
490                        photos = photos.subList(0, number);
491
492                return new FlickrImageDataset<IMAGE>(reader, photos);
493        }
494
495        /**
496         * Create an image dataset from a flickr photoset.
497         * 
498         * @param reader
499         *            the reader with which to load the images
500         * @param token
501         *            the flickr api authentication token
502         * @param setId
503         *            the photoset identifier
504         * @return a {@link FlickrImageDataset} created from the gallery described
505         *         by the given parameters
506         * @throws Exception
507         *             if an error occurs
508         */
509        public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromPhotoset(
510                        InputStreamObjectReader<IMAGE> reader, FlickrAPIToken token, String setId) throws Exception
511        {
512                return createFromPhotoset(reader, token, setId, 0);
513        }
514
515        /**
516         * Create an image dataset from a flickr photoset. The number of images can
517         * be limited to a subset.
518         * 
519         * @param reader
520         *            the reader with which to load the images
521         * @param token
522         *            the flickr api authentication token
523         * @param setId
524         *            the photoset identifier
525         * @param number
526         *            the maximum number of images to add to the dataset. Setting to
527         *            0 or less will attempt to use all the images.
528         * @return a {@link FlickrImageDataset} created from the gallery described
529         *         by the given parameters
530         * @throws Exception
531         *             if an error occurs
532         */
533        @SuppressWarnings("unchecked")
534        public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromPhotoset(
535                        InputStreamObjectReader<IMAGE> reader,
536                        FlickrAPIToken token,
537                        String setId, int number) throws Exception
538        {
539                final Flickr flickr = makeFlickr(token);
540
541                final PhotosetsInterface setsInterface = flickr.getPhotosetsInterface();
542
543                List<Photo> photos = new ArrayList<Photo>();
544                final PhotoList first = setsInterface.getPhotos(setId, Extras.ALL_EXTRAS, 0, 250, 0);
545                photos.addAll(first);
546
547                if (number > 0)
548                        number = Math.min(number, first.getTotal());
549
550                for (int page = 1, n = photos.size(); n < number; page++) {
551                        final PhotoList result = setsInterface.getPhotos(setId, Extras.ALL_EXTRAS, 0, 250, page);
552                        photos.addAll(result);
553                        n += result.size();
554                }
555
556                if (number > 0 && number < photos.size())
557                        photos = photos.subList(0, number);
558
559                return new FlickrImageDataset<IMAGE>(reader, photos);
560        }
561
562        /**
563         * Create an image dataset from a flickr collection with the specified
564         * parameters.
565         * 
566         * @param reader
567         *            the reader with which to load the images
568         * @param token
569         *            the flickr api authentication token
570         * @param params
571         *            the parameters describing the gallery and any additional
572         *            constraints.
573         * @return a {@link FlickrImageDataset} created from the gallery described
574         *         by the given parameters
575         * @throws Exception
576         *             if an error occurs
577         */
578        public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromCollection(
579                        InputStreamObjectReader<IMAGE> reader,
580                        FlickrAPIToken token,
581                        com.aetrion.flickr.collections.CollectionsSearchParameters params) throws Exception
582        {
583                return createFromCollection(reader, token, params, 0);
584        }
585
586        /**
587         * Create an image dataset from a flickr collection with the specified
588         * parameters. The number of images can be limited to a subset.
589         * 
590         * @param reader
591         *            the reader with which to load the images
592         * @param token
593         *            the flickr api authentication token
594         * @param params
595         *            the parameters describing the gallery and any additional
596         *            constraints.
597         * @param number
598         *            the maximum number of images to add to the dataset. Setting to
599         *            0 or less will attempt to use all the images.
600         * @return a {@link FlickrImageDataset} created from the gallery described
601         *         by the given parameters
602         * @throws Exception
603         *             if an error occurs
604         */
605        @SuppressWarnings("unchecked")
606        public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromCollection(
607                        InputStreamObjectReader<IMAGE> reader,
608                        FlickrAPIToken token,
609                        com.aetrion.flickr.collections.CollectionsSearchParameters params, int number) throws Exception
610        {
611                final Flickr flickr = makeFlickr(token);
612
613                params.setExtras(Extras.ALL_EXTRAS);
614
615                List<Photo> photos = new ArrayList<Photo>();
616                final CollectionsInterface collectionsInterface = flickr.getCollectionsInterface();
617                final PhotoList photoList = collectionsInterface.getTree(params).getPhotoUrls(flickr.getPhotosetsInterface());
618                photos.addAll(photoList);
619
620                if (number > 0 && number < photos.size())
621                        photos = photos.subList(0, number);
622
623                return new FlickrImageDataset<IMAGE>(reader, photos);
624        }
625
626        /**
627         * Create an image dataset from a flickr search with the specified
628         * parameters.
629         * 
630         * @param reader
631         *            the reader with which to load the images
632         * @param token
633         *            the flickr api authentication token
634         * @param params
635         *            the parameters describing the gallery and any additional
636         *            constraints.
637         * @return a {@link FlickrImageDataset} created from the gallery described
638         *         by the given parameters
639         * @throws Exception
640         *             if an error occurs
641         */
642        public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromSearch(
643                        InputStreamObjectReader<IMAGE> reader,
644                        FlickrAPIToken token,
645                        com.aetrion.flickr.photos.SearchParameters params) throws Exception
646        {
647                return createFromSearch(reader, token, params, 0);
648        }
649
650        /**
651         * Create an image dataset from a flickr search with the specified
652         * parameters. The number of images can be limited to a subset.
653         * 
654         * @param reader
655         *            the reader with which to load the images
656         * @param token
657         *            the flickr api authentication token
658         * @param params
659         *            the parameters describing the gallery and any additional
660         *            constraints.
661         * @param number
662         *            the maximum number of images to add to the dataset. Setting to
663         *            0 or less will attempt to use all the images.
664         * @return a {@link FlickrImageDataset} created from the gallery described
665         *         by the given parameters
666         * @throws Exception
667         *             if an error occurs
668         */
669        @SuppressWarnings("unchecked")
670        public static <IMAGE extends Image<?, IMAGE>> FlickrImageDataset<IMAGE> createFromSearch(
671                        InputStreamObjectReader<IMAGE> reader,
672                        FlickrAPIToken token,
673                        com.aetrion.flickr.photos.SearchParameters params, int number) throws Exception
674        {
675                final Flickr flickr = makeFlickr(token);
676
677                params.setExtras(Extras.ALL_EXTRAS);
678
679                List<Photo> photos = new ArrayList<Photo>();
680                final PhotoList first = flickr.getPhotosInterface().search(params, 250, 0);
681                photos.addAll(first);
682
683                if (number > 0)
684                        number = Math.min(number, first.getTotal());
685
686                for (int page = 1, n = photos.size(); n < number; page++) {
687                        final PhotoList result = flickr.getPhotosInterface().search(params, 250, page);
688                        photos.addAll(result);
689                        n += result.size();
690                }
691
692                if (number > 0 && number < photos.size())
693                        photos = photos.subList(0, number);
694
695                return new FlickrImageDataset<IMAGE>(reader, photos);
696        }
697
698        private static Flickr makeFlickr(FlickrAPIToken token) throws ParserConfigurationException {
699                if (token.secret == null)
700                        return new Flickr(token.apikey, new REST(Flickr.DEFAULT_HOST));
701                return new Flickr(token.apikey, token.secret, new REST(Flickr.DEFAULT_HOST));
702        }
703}