001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.hadoop.sequencefile; 031 032import java.util.ArrayList; 033import java.util.List; 034import java.util.Map; 035import java.util.Map.Entry; 036 037import org.apache.hadoop.conf.Configuration; 038import org.apache.hadoop.io.SequenceFile; 039import org.apache.hadoop.io.Text; 040import org.apache.hadoop.io.SequenceFile.Metadata; 041import org.apache.hadoop.util.Tool; 042 043/** 044 * Utility functions for storing and retrieving metadata 045 * to be stored in a {@link SequenceFile} by the 046 * {@link MetadataSequenceFileOutputFormat}. 047 * <p> 048 * Standard usage would be to use the {@link #setMetadata(Map, Configuration)} 049 * method to add the given metadata to the {@link Configuration} in the 050 * part of the code that runs locally (i.e. in a {@link Tool#run(String[])} method). 051 * The configuration would then be distributed across the cluster, and any 052 * mappers or reducers that use the {@link MetadataSequenceFileOutputFormat} 053 * will automatically have the metadata added to their output file(s). 054 * 055 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 056 * 057 */ 058public class MetadataConfiguration { 059 private static final String META_PREFIX = "org.openimaj.hadoop.sequencefile.metadata."; 060 private static final String META_KEYS = META_PREFIX + "__metadataKeys__"; 061 062 /** 063 * Standard key for a unique identifier metadata item 064 */ 065 public static final String UUID_KEY = "UUID"; 066 067 /** 068 * Standard key for a comment metadata item 069 */ 070 public static final String COMMENT_KEY = "Comment"; 071 072 /** 073 * Standard key for a storing an indicator of the mime-type of the value fields 074 */ 075 public static final String CONTENT_TYPE_KEY = "ContentType"; 076 077 private MetadataConfiguration() {} 078 079 /** 080 * Read any metadata stored in the {@link Configuration}. 081 * 082 * @param conf the configuration 083 * @return the metadata map 084 */ 085 public static Metadata getMetadata(Configuration conf) { 086 Metadata metadata = new Metadata(); 087 088 String [] keys = conf.getStrings(META_KEYS); 089 090 if (keys != null) { 091 for (String key : keys) { 092 String value = conf.get(META_PREFIX + key); 093 094 if (value != null) 095 metadata.set(new Text(key), new Text(value)); 096 } 097 } 098 099 return metadata; 100 } 101 102 /** 103 * Write the given metadata to the {@link Configuration}. 104 * 105 * @param metadata the metadata. 106 * @param conf the configuration. 107 */ 108 public static void setMetadata(Map<String, String> metadata, Configuration conf) { 109 for (Entry<String, String> entry : metadata.entrySet()) { 110 conf.set(META_PREFIX + entry.getKey(), entry.getValue()); 111 } 112 113 List<String> keys = new ArrayList<String>(); 114 if (conf.getStringCollection(META_KEYS) != null) 115 keys.addAll(conf.getStringCollection(META_KEYS)); 116 117 for (String key : metadata.keySet()) { 118 keys.add(key); 119 } 120 121 conf.setStrings(META_KEYS, keys.toArray(new String[keys.size()])); 122 } 123 124}