Source code

001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.experiment.dataset.util;
031
032import java.util.ArrayList;
033import java.util.List;
034import java.util.Map;
035
036import org.openimaj.data.dataset.Dataset;
037import org.openimaj.data.dataset.GroupedDataset;
038import org.openimaj.data.dataset.ListBackedDataset;
039import org.openimaj.data.dataset.ListDataset;
040import org.openimaj.data.dataset.MapBackedDataset;
041
042/**
043 * Helper methods to provide different types of view on a dataset.
044 * 
045 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
046 */
047public class DatasetAdaptors {
048        /**
049         * Create a {@link List} view of the given dataset. If the dataset is a
050         * {@link ListDataset} it is returned, otherwise this method creates a new
051         * {@link List} containing all the instances in the dataset. The list is
052         * populated by iterating through the dataset.
053         * 
054         * @param <INSTANCE>
055         *            The type of instances in the dataset
056         * @param dataset
057         *            The dataset.
058         * @return a list of all instances.
059         */
060        public static <INSTANCE> List<INSTANCE> asList(final Dataset<INSTANCE> dataset) {
061                if (dataset instanceof ListDataset)
062                        return (ListDataset<INSTANCE>) dataset;
063
064                final ArrayList<INSTANCE> list = new ArrayList<INSTANCE>();
065
066                for (final INSTANCE instance : dataset)
067                        list.add(instance);
068
069                return list;
070        }
071
072        /**
073         * if you have a grouped dataset where the groups contains lists of feature
074         * objects (i.e. GroupedDataset<KEY,ListDataset<List<INSTANCE>>,INSTANCE>)
075         * then this will flatten those internal list, so that all the instances
076         * from those lists are directly associated with the key. This type of thing
077         * might occur if your dataset element reader can extract multiple media
078         * parts from a single dataset item, that will all end up with the same key.
079         * 
080         * @param dataset
081         *            The dataset
082         * @return The new dataset
083         */
084        public static <ANN, INSTANCE> GroupedDataset<ANN, ListDataset<INSTANCE>, INSTANCE>
085                        flattenListGroupedDataset(
086                                        final GroupedDataset<ANN, ? extends ListDataset<List<INSTANCE>>, ? extends List<INSTANCE>> dataset)
087        {
088                // Create a grouped dataset without the lists
089                final MapBackedDataset<ANN, ListDataset<INSTANCE>, INSTANCE> g =
090                                new MapBackedDataset<ANN, ListDataset<INSTANCE>, INSTANCE>();
091
092                // Go through each of the groups...
093                for (final ANN a : dataset.getGroups())
094                {
095                        // Get the group
096                        final ListDataset<? extends List<INSTANCE>> l = dataset.getInstances(a);
097
098                        // Add each of the instances in that dataset to a new list dataset
099                        final ListBackedDataset<INSTANCE> newListDataset = new ListBackedDataset<INSTANCE>();
100                        for (final List<INSTANCE> le : l)
101                                for (final INSTANCE ll : le)
102                                        newListDataset.add(ll);
103
104                        // Put that list dataset straight into the new grouped dataset.
105                        g.add(a, newListDataset);
106                }
107
108                return g;
109        }
110
111        /**
112         * Takes a grouped dataset and returns a new dataset that contains only
113         * those groups specified. If the given groups do not exist in the provided
114         * dataset, then they will be ignored.
115         * 
116         * @param data
117         *            The dataset to take the groups from
118         * @param groups
119         *            The groups to take
120         * @return the new dataset containing only those groups.
121         */
122        public static <ANN, DATASET extends Dataset<INSTANCE>, INSTANCE> GroupedDataset<ANN, DATASET, INSTANCE>
123                        getGroupedDatasetSubset(final GroupedDataset<ANN, DATASET, INSTANCE> data, final ANN... groups)
124        {
125                // New dataset
126                final MapBackedDataset<ANN, DATASET, INSTANCE> newDataset = new MapBackedDataset<ANN, DATASET, INSTANCE>();
127
128                // Loop through each of the groups specified...
129                for (final ANN group : groups)
130                {
131                        // Copy the dataset into the new dataset (if it's not null)
132                        final DATASET ds = data.getInstances(group);
133                        if (ds != null)
134                                newDataset.put(group, ds);
135                }
136
137                return newDataset;
138        }
139
140        /**
141         * Takes a grouped dataset and returns a new dataset with the groups
142         * re-shuffled as specified in the regrouping criteria.
143         * 
144         * The regrouping criteria is a map from new group name to old group name.
145         * Instances in the old group names will be mapped to the new group names.
146         * 
147         * Where many old groups map to a single new group, the groups will be
148         * merged.
149         * 
150         * For example:
151         * 
152         * <pre>
153         * <code>
154         *      old == GroupedDataset: {G1=[1,2,3],G2=[4,5,6],G3=[7,8,9]}
155         * 
156         *              new = getGroupedDatasetSubset( old, {A->[G1,G3],B->[G2]} )
157         * 
158         *              new == GroupedDataset: {A=[1,2,3,7,8,9],B=[4,5,6]}
159         *      </code>
160         * </pre>
161         * 
162         * If the given groups do not exist in the provided dataset, then they will
163         * be ignored.
164         * 
165         * @param data
166         *            The dataset to take the groups from
167         * @param regroupCriteria
168         *            The regrouping criteria
169         * @return the new dataset containing the new regrouping.
170         */
171        public static <ANN, DATASET extends ListDataset<INSTANCE>, INSTANCE>
172                        GroupedDataset<ANN, ListBackedDataset<INSTANCE>, INSTANCE>
173                        getRegroupedDataset(final GroupedDataset<ANN, DATASET, INSTANCE> data, final Map<ANN, ANN[]> regroupCriteria)
174        {
175                // New dataset
176                final MapBackedDataset<ANN, ListBackedDataset<INSTANCE>, INSTANCE> newDataset =
177                                new MapBackedDataset<ANN, ListBackedDataset<INSTANCE>, INSTANCE>();
178
179                // Loop through each of the new groups specified...
180                for (final ANN newGroup : regroupCriteria.keySet())
181                {
182                        for (final ANN oldGroup : regroupCriteria.get(newGroup))
183                        {
184                                // Copy the dataset into the new dataset (if it's not null)
185                                final DATASET ds = data.getInstances(oldGroup);
186                                if (ds != null)
187                                {
188                                        // Create a new list backed dataset (which we know we can
189                                        // write to)...
190                                        final ListBackedDataset<INSTANCE> lbd = new ListBackedDataset<INSTANCE>();
191                                        lbd.addAll(ds);
192
193                                        // We merge the groups if there's already one in our new
194                                        // dataset
195                                        if (newDataset.get(newGroup) != null)
196                                                newDataset.get(newGroup).addAll(lbd);
197                                        else
198                                                newDataset.put(newGroup, lbd);
199                                }
200                        }
201                }
202
203                return newDataset;
204        }
205}