001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.hadoop.tools.sequencefile;
031
032import java.awt.image.BufferedImage;
033import java.io.ByteArrayInputStream;
034import java.io.IOException;
035import java.util.ArrayList;
036import java.util.List;
037
038import javax.imageio.ImageIO;
039
040import org.apache.hadoop.fs.Path;
041import org.apache.hadoop.io.BytesWritable;
042import org.apache.hadoop.io.SequenceFile;
043import org.ontoware.rdf2go.model.node.impl.URIImpl;
044import org.openimaj.hadoop.sequencefile.RecordInformationExtractor;
045import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier;
046
047/**
048 * Options for controlling what is printed when listing the contents
049 * of a {@link SequenceFile} with the {@link SequenceFileTool}.
050 * 
051 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
052 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
053 */
054public enum ListModeOptions {
055        /**
056         * Print the record key
057         * 
058         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
059         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
060         */
061        KEY {
062                @Override
063                public RecordInformationExtractor getExtractor() {
064                        return new RecordInformationExtractor(){
065                                @Override
066                                public <K,V> String extract(K key, V value, long offset, Path seqFile) {
067                                        return key.toString();
068                                }
069                        };
070                }
071        },
072        /**
073         * Print the offset of the record in the {@link SequenceFile}
074         * 
075         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
076         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
077         */
078        OFFSET {
079                @Override
080                public RecordInformationExtractor getExtractor() {
081                        return new RecordInformationExtractor(){
082                                @Override
083                                public <K,V> String extract(K key, V value, long offset, Path seqFile) {
084                                        return ((Long)offset).toString();
085                                }
086                        };
087                }               
088        },
089        /**
090         * Print the path to the {@link SequenceFile} in question. 
091         * This is useful if you're working with a directory of
092         * {@link SequenceFile}s
093         * 
094         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
095         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
096         */
097        SEQUENCEFILE {
098                @Override
099                public RecordInformationExtractor getExtractor() {
100                        return new RecordInformationExtractor(){
101                                @Override
102                                public <K,V> String extract(K key, V value, long offset, Path seqFile) {
103                                        return seqFile.toString();
104                                }
105                        };
106                }
107        },
108        /**
109         * Print the mimetype of the value in each record
110         * 
111         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
112         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
113         */
114        MIMETYPE {
115                @Override
116                public RecordInformationExtractor getExtractor() {
117                        return new RecordInformationExtractor(){
118                                @Override
119                                public <K,V> String extract(K key, V value, long offset, Path seqFile) {
120                                        if(value instanceof BytesWritable) {
121                                                MagicMimeTypeIdentifier match;
122                                                try {
123                                                        BytesWritable bw = (BytesWritable)value;
124                                                        match = new MagicMimeTypeIdentifier ();
125                                                        String ident = match.identify(bw.getBytes(),key.toString(),new URIImpl(seqFile.toUri().toString()));
126                                                        return ident;
127                                                } catch(Exception e){
128                                                        System.err.println("Failed!");
129                                                }
130                                        }
131                                        return null;
132                                }
133                        };
134                }
135        },
136        /**
137         * Print the size of the record value in bytes
138         * 
139         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
140         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
141         */
142        SIZE {
143                @Override
144                public RecordInformationExtractor getExtractor() {
145                        return new RecordInformationExtractor(){
146                                @Override
147                                public <K,V> String extract(K key, V value, long offset, Path seqFile) {
148                                        if(value instanceof BytesWritable) {
149                                                return "" + ((BytesWritable)value).getLength();
150                                        }
151                                        return null;
152                                }
153                        };
154                }
155        },
156        /**
157         * Print the dimensions of each records value if it is a
158         * valid image.
159         * 
160         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
161         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
162         */
163        IMAGE_DIMENSIONS {
164                @Override
165                public RecordInformationExtractor getExtractor() {
166                        return new RecordInformationExtractor() {
167                                @Override
168                                public <K,V> String extract(K key, V value, long offset, Path seqFile) {
169                                        if(value instanceof BytesWritable) {
170                                                try {
171                                                        BufferedImage im = ImageIO.read(new ByteArrayInputStream(((BytesWritable) value).getBytes()));
172                                                        return String.format("%d %d", im.getWidth(), im.getHeight());
173                                                } catch (IOException e) {
174                                                        return null;
175                                                }
176                                        }
177                                        return null;
178                                }
179                        };
180                }
181        };
182        
183        /**
184         * @return a {@link RecordInformationExtractor} for extracting information from a {@link SequenceFile} record.
185         */
186        public abstract RecordInformationExtractor getExtractor();
187
188        /**
189         * Construct a list of extractors from the given options.
190         * 
191         * @param options the options
192         * @return the extractors in the same order as the given options
193         */
194        public static List<RecordInformationExtractor> listOptionsToExtractPolicy(List<ListModeOptions> options) {
195                List<RecordInformationExtractor> extractors = new ArrayList<RecordInformationExtractor>();
196                
197                for(ListModeOptions opt : options) 
198                        extractors.add(opt.getExtractor());
199                
200                return extractors;
201        }
202}