001/**
002 * Copyright 2011 The University of Southampton, Yahoo Inc., and the
003 * individual contributors. All rights reserved.
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *    http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.openimaj.web.scraping.images;
018
019import java.net.URL;
020import java.util.Arrays;
021import java.util.List;
022
023import org.jsoup.Jsoup;
024import org.jsoup.nodes.Document;
025import org.jsoup.select.Elements;
026import org.openimaj.web.scraping.SiteSpecificConsumer;
027
028/**
029 * Download images from twitter's own image hosting service
030 * 
031 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
032 * 
033 */
034public class TwitterPhotoConsumer implements SiteSpecificConsumer {
035        @Override
036        public boolean canConsume(URL url) {
037                // http://twitter.com/HutchSelenator/status/222772697531301890/photo/1
038                return url.getHost().equals("twitter.com") && url.getPath().contains("photo");
039        }
040
041        @Override
042        public List<URL> consume(URL url) {
043                String largeURLStr = url.toString();
044                if (!largeURLStr.endsWith("large")) {
045                        largeURLStr += "/large";
046                }
047                try {
048                        final Document doc = Jsoup.connect(largeURLStr).get();
049                        final Elements largeimage = doc.select(".media-slideshow-image");
050                        final URL link = new URL(largeimage.get(0).attr("src"));
051                        return Arrays.asList(link);
052                } catch (final Exception e) {
053                        return null;
054                }
055
056        }
057
058}