001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.experiment.dataset.util; 031 032import java.util.ArrayList; 033import java.util.List; 034import java.util.Map; 035 036import org.openimaj.data.dataset.Dataset; 037import org.openimaj.data.dataset.GroupedDataset; 038import org.openimaj.data.dataset.ListBackedDataset; 039import org.openimaj.data.dataset.ListDataset; 040import org.openimaj.data.dataset.MapBackedDataset; 041 042/** 043 * Helper methods to provide different types of view on a dataset. 044 * 045 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 046 */ 047public class DatasetAdaptors { 048 /** 049 * Create a {@link List} view of the given dataset. If the dataset is a 050 * {@link ListDataset} it is returned, otherwise this method creates a new 051 * {@link List} containing all the instances in the dataset. The list is 052 * populated by iterating through the dataset. 053 * 054 * @param <INSTANCE> 055 * The type of instances in the dataset 056 * @param dataset 057 * The dataset. 058 * @return a list of all instances. 059 */ 060 public static <INSTANCE> List<INSTANCE> asList(final Dataset<INSTANCE> dataset) { 061 if (dataset instanceof ListDataset) 062 return (ListDataset<INSTANCE>) dataset; 063 064 final ArrayList<INSTANCE> list = new ArrayList<INSTANCE>(); 065 066 for (final INSTANCE instance : dataset) 067 list.add(instance); 068 069 return list; 070 } 071 072 /** 073 * if you have a grouped dataset where the groups contains lists of feature 074 * objects (i.e. GroupedDataset<KEY,ListDataset<List<INSTANCE>>,INSTANCE>) 075 * then this will flatten those internal list, so that all the instances 076 * from those lists are directly associated with the key. This type of thing 077 * might occur if your dataset element reader can extract multiple media 078 * parts from a single dataset item, that will all end up with the same key. 079 * 080 * @param dataset 081 * The dataset 082 * @return The new dataset 083 */ 084 public static <ANN, INSTANCE> GroupedDataset<ANN, ListDataset<INSTANCE>, INSTANCE> 085 flattenListGroupedDataset( 086 final GroupedDataset<ANN, ? extends ListDataset<List<INSTANCE>>, ? extends List<INSTANCE>> dataset) 087 { 088 // Create a grouped dataset without the lists 089 final MapBackedDataset<ANN, ListDataset<INSTANCE>, INSTANCE> g = 090 new MapBackedDataset<ANN, ListDataset<INSTANCE>, INSTANCE>(); 091 092 // Go through each of the groups... 093 for (final ANN a : dataset.getGroups()) 094 { 095 // Get the group 096 final ListDataset<? extends List<INSTANCE>> l = dataset.getInstances(a); 097 098 // Add each of the instances in that dataset to a new list dataset 099 final ListBackedDataset<INSTANCE> newListDataset = new ListBackedDataset<INSTANCE>(); 100 for (final List<INSTANCE> le : l) 101 for (final INSTANCE ll : le) 102 newListDataset.add(ll); 103 104 // Put that list dataset straight into the new grouped dataset. 105 g.add(a, newListDataset); 106 } 107 108 return g; 109 } 110 111 /** 112 * Takes a grouped dataset and returns a new dataset that contains only 113 * those groups specified. If the given groups do not exist in the provided 114 * dataset, then they will be ignored. 115 * 116 * @param data 117 * The dataset to take the groups from 118 * @param groups 119 * The groups to take 120 * @return the new dataset containing only those groups. 121 */ 122 public static <ANN, DATASET extends Dataset<INSTANCE>, INSTANCE> GroupedDataset<ANN, DATASET, INSTANCE> 123 getGroupedDatasetSubset(final GroupedDataset<ANN, DATASET, INSTANCE> data, final ANN... groups) 124 { 125 // New dataset 126 final MapBackedDataset<ANN, DATASET, INSTANCE> newDataset = new MapBackedDataset<ANN, DATASET, INSTANCE>(); 127 128 // Loop through each of the groups specified... 129 for (final ANN group : groups) 130 { 131 // Copy the dataset into the new dataset (if it's not null) 132 final DATASET ds = data.getInstances(group); 133 if (ds != null) 134 newDataset.put(group, ds); 135 } 136 137 return newDataset; 138 } 139 140 /** 141 * Takes a grouped dataset and returns a new dataset with the groups 142 * re-shuffled as specified in the regrouping criteria. 143 * 144 * The regrouping criteria is a map from new group name to old group name. 145 * Instances in the old group names will be mapped to the new group names. 146 * 147 * Where many old groups map to a single new group, the groups will be 148 * merged. 149 * 150 * For example: 151 * 152 * <pre> 153 * <code> 154 * old == GroupedDataset: {G1=[1,2,3],G2=[4,5,6],G3=[7,8,9]} 155 * 156 * new = getGroupedDatasetSubset( old, {A->[G1,G3],B->[G2]} ) 157 * 158 * new == GroupedDataset: {A=[1,2,3,7,8,9],B=[4,5,6]} 159 * </code> 160 * </pre> 161 * 162 * If the given groups do not exist in the provided dataset, then they will 163 * be ignored. 164 * 165 * @param data 166 * The dataset to take the groups from 167 * @param regroupCriteria 168 * The regrouping criteria 169 * @return the new dataset containing the new regrouping. 170 */ 171 public static <ANN, DATASET extends ListDataset<INSTANCE>, INSTANCE> 172 GroupedDataset<ANN, ListBackedDataset<INSTANCE>, INSTANCE> 173 getRegroupedDataset(final GroupedDataset<ANN, DATASET, INSTANCE> data, final Map<ANN, ANN[]> regroupCriteria) 174 { 175 // New dataset 176 final MapBackedDataset<ANN, ListBackedDataset<INSTANCE>, INSTANCE> newDataset = 177 new MapBackedDataset<ANN, ListBackedDataset<INSTANCE>, INSTANCE>(); 178 179 // Loop through each of the new groups specified... 180 for (final ANN newGroup : regroupCriteria.keySet()) 181 { 182 for (final ANN oldGroup : regroupCriteria.get(newGroup)) 183 { 184 // Copy the dataset into the new dataset (if it's not null) 185 final DATASET ds = data.getInstances(oldGroup); 186 if (ds != null) 187 { 188 // Create a new list backed dataset (which we know we can 189 // write to)... 190 final ListBackedDataset<INSTANCE> lbd = new ListBackedDataset<INSTANCE>(); 191 lbd.addAll(ds); 192 193 // We merge the groups if there's already one in our new 194 // dataset 195 if (newDataset.get(newGroup) != null) 196 newDataset.get(newGroup).addAll(lbd); 197 else 198 newDataset.put(newGroup, lbd); 199 } 200 } 201 } 202 203 return newDataset; 204 } 205}