001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.hadoop.tools.sequencefile; 031 032import java.awt.image.BufferedImage; 033import java.io.ByteArrayInputStream; 034import java.io.IOException; 035import java.util.ArrayList; 036import java.util.List; 037 038import javax.imageio.ImageIO; 039 040import org.apache.hadoop.fs.Path; 041import org.apache.hadoop.io.BytesWritable; 042import org.apache.hadoop.io.SequenceFile; 043import org.ontoware.rdf2go.model.node.impl.URIImpl; 044import org.openimaj.hadoop.sequencefile.RecordInformationExtractor; 045import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier; 046 047/** 048 * Options for controlling what is printed when listing the contents 049 * of a {@link SequenceFile} with the {@link SequenceFileTool}. 050 * 051 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 052 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 053 */ 054public enum ListModeOptions { 055 /** 056 * Print the record key 057 * 058 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 059 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 060 */ 061 KEY { 062 @Override 063 public RecordInformationExtractor getExtractor() { 064 return new RecordInformationExtractor(){ 065 @Override 066 public <K,V> String extract(K key, V value, long offset, Path seqFile) { 067 return key.toString(); 068 } 069 }; 070 } 071 }, 072 /** 073 * Print the offset of the record in the {@link SequenceFile} 074 * 075 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 076 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 077 */ 078 OFFSET { 079 @Override 080 public RecordInformationExtractor getExtractor() { 081 return new RecordInformationExtractor(){ 082 @Override 083 public <K,V> String extract(K key, V value, long offset, Path seqFile) { 084 return ((Long)offset).toString(); 085 } 086 }; 087 } 088 }, 089 /** 090 * Print the path to the {@link SequenceFile} in question. 091 * This is useful if you're working with a directory of 092 * {@link SequenceFile}s 093 * 094 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 095 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 096 */ 097 SEQUENCEFILE { 098 @Override 099 public RecordInformationExtractor getExtractor() { 100 return new RecordInformationExtractor(){ 101 @Override 102 public <K,V> String extract(K key, V value, long offset, Path seqFile) { 103 return seqFile.toString(); 104 } 105 }; 106 } 107 }, 108 /** 109 * Print the mimetype of the value in each record 110 * 111 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 112 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 113 */ 114 MIMETYPE { 115 @Override 116 public RecordInformationExtractor getExtractor() { 117 return new RecordInformationExtractor(){ 118 @Override 119 public <K,V> String extract(K key, V value, long offset, Path seqFile) { 120 if(value instanceof BytesWritable) { 121 MagicMimeTypeIdentifier match; 122 try { 123 BytesWritable bw = (BytesWritable)value; 124 match = new MagicMimeTypeIdentifier (); 125 String ident = match.identify(bw.getBytes(),key.toString(),new URIImpl(seqFile.toUri().toString())); 126 return ident; 127 } catch(Exception e){ 128 System.err.println("Failed!"); 129 } 130 } 131 return null; 132 } 133 }; 134 } 135 }, 136 /** 137 * Print the size of the record value in bytes 138 * 139 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 140 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 141 */ 142 SIZE { 143 @Override 144 public RecordInformationExtractor getExtractor() { 145 return new RecordInformationExtractor(){ 146 @Override 147 public <K,V> String extract(K key, V value, long offset, Path seqFile) { 148 if(value instanceof BytesWritable) { 149 return "" + ((BytesWritable)value).getLength(); 150 } 151 return null; 152 } 153 }; 154 } 155 }, 156 /** 157 * Print the dimensions of each records value if it is a 158 * valid image. 159 * 160 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 161 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 162 */ 163 IMAGE_DIMENSIONS { 164 @Override 165 public RecordInformationExtractor getExtractor() { 166 return new RecordInformationExtractor() { 167 @Override 168 public <K,V> String extract(K key, V value, long offset, Path seqFile) { 169 if(value instanceof BytesWritable) { 170 try { 171 BufferedImage im = ImageIO.read(new ByteArrayInputStream(((BytesWritable) value).getBytes())); 172 return String.format("%d %d", im.getWidth(), im.getHeight()); 173 } catch (IOException e) { 174 return null; 175 } 176 } 177 return null; 178 } 179 }; 180 } 181 }; 182 183 /** 184 * @return a {@link RecordInformationExtractor} for extracting information from a {@link SequenceFile} record. 185 */ 186 public abstract RecordInformationExtractor getExtractor(); 187 188 /** 189 * Construct a list of extractors from the given options. 190 * 191 * @param options the options 192 * @return the extractors in the same order as the given options 193 */ 194 public static List<RecordInformationExtractor> listOptionsToExtractPolicy(List<ListModeOptions> options) { 195 List<RecordInformationExtractor> extractors = new ArrayList<RecordInformationExtractor>(); 196 197 for(ListModeOptions opt : options) 198 extractors.add(opt.getExtractor()); 199 200 return extractors; 201 } 202}