001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.data.dataset;
031
032import java.util.AbstractMap;
033import java.util.Collection;
034import java.util.HashMap;
035import java.util.Iterator;
036import java.util.Map;
037import java.util.Set;
038
039import org.openimaj.data.identity.Identifiable;
040import org.openimaj.util.iterator.ConcatenatedIterable;
041
042/**
043 * A {@link MapBackedDataset} is a concrete implementation of a
044 * {@link GroupedDataset} backed by a {@link Map}.
045 * 
046 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
047 * 
048 * @param <KEY>
049 *            Type of dataset class key
050 * @param <DATASET>
051 *            Type of sub-datasets.
052 * @param <INSTANCE>
053 *            Type of objects in the dataset
054 */
055public class MapBackedDataset<KEY extends Object, DATASET extends Dataset<INSTANCE>, INSTANCE>
056                extends AbstractMap<KEY, DATASET>
057                implements GroupedDataset<KEY, DATASET, INSTANCE>
058{
059        protected Map<KEY, DATASET> map;
060
061        /**
062         * Construct an empty {@link MapBackedDataset} backed by a {@link HashMap}.
063         */
064        public MapBackedDataset() {
065                this.map = new HashMap<KEY, DATASET>();
066        }
067
068        /**
069         * Construct with the given map.
070         * 
071         * @param map
072         *            the map
073         */
074        public MapBackedDataset(Map<KEY, DATASET> map) {
075                this.map = map;
076        }
077
078        /**
079         * Get the underlying map.
080         * 
081         * @return the underlying map
082         */
083        public Map<KEY, DATASET> getMap() {
084                return map;
085        }
086
087        @Override
088        public DATASET getInstances(KEY key) {
089                return map.get(key);
090        }
091
092        @Override
093        public Set<KEY> getGroups() {
094                return map.keySet();
095        }
096
097        @Override
098        public INSTANCE getRandomInstance(KEY key) {
099                return map.get(key).getRandomInstance();
100        }
101
102        @Override
103        public INSTANCE getRandomInstance() {
104                final int index = (int) (Math.random() * numInstances());
105                int count = 0;
106
107                for (final DATASET d : map.values()) {
108                        if (index >= count + d.numInstances()) {
109                                count += d.numInstances();
110                        } else {
111                                if (d instanceof ListDataset) {
112                                        return ((ListDataset<INSTANCE>) d).get(index - count);
113                                } else {
114                                        for (final INSTANCE i : d) {
115                                                if (index == count)
116                                                        return i;
117
118                                                count++;
119                                        }
120                                }
121                        }
122                }
123                return null;
124        }
125
126        @Override
127        public int numInstances() {
128                int size = 0;
129
130                for (final DATASET d : map.values()) {
131                        size += d.numInstances();
132                }
133
134                return size;
135        }
136
137        @Override
138        public Iterator<INSTANCE> iterator() {
139                return new ConcatenatedIterable<INSTANCE>(map.values()).iterator();
140        }
141
142        @Override
143        public String toString() {
144                return map.toString();
145        }
146
147        @Override
148        public Set<Entry<KEY, DATASET>> entrySet() {
149                return map.entrySet();
150        }
151
152        /*
153         * (non-Javadoc)
154         * 
155         * @see java.util.AbstractMap#put(java.lang.Object, java.lang.Object)
156         */
157        @Override
158        public DATASET put(KEY key, DATASET value) {
159                return map.put(key, value);
160        }
161
162        /**
163         * Convenience method for populating a dataset by chaining method calls:
164         * 
165         * <pre>
166         * final MapBackedDataset&lt;String, ListDataset&lt;String&gt;, String&gt; ds = new MapBackedDataset&lt;String, ListDataset&lt;String&gt;, String&gt;()
167         *              .add(&quot;A&quot;, new ListBackedDataset&lt;String&gt;())
168         *              .add(&quot;B&quot;, new ListBackedDataset&lt;String&gt;());
169         * </pre>
170         * 
171         * @param key
172         *            the key to insert
173         * @param dataset
174         *            the value to insert
175         * @return this dataset
176         */
177        public MapBackedDataset<KEY, DATASET, INSTANCE> add(KEY key, DATASET dataset) {
178                this.put(key, dataset);
179                return this;
180        }
181
182        /**
183         * Convenience method to construct a {@link MapBackedDataset} from a number
184         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
185         * and the key is the identifier returned by {@link Identifiable#getID()}.
186         * 
187         * @param datasets
188         *            the datasets representing the groups
189         * @return the newly constructed grouped dataset.
190         */
191        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
192                        MapBackedDataset<String, DATASET, INSTANCE> of(DATASET... datasets)
193        {
194                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
195
196                for (final DATASET d : datasets) {
197                        ds.put(d.getID(), d);
198                }
199
200                return ds;
201        }
202
203        /**
204         * A builder for creating {@link MapBackedDataset} instances from
205         * {@link Identifiable} sub-datasets. Example:
206         * 
207         * <pre>
208         * final MapBackedDataset<String, VFSListDataset<String>, String> ds = new MapBackedDataset.IdentifiableBuilder<VFSListDataset<String>, String>()
209         *                                      .add(new VFSListDataset<String>(...))
210         *                                      .add(new VFSListDataset<String>(...))
211         *                                      .build();
212         * </pre>
213         * 
214         * For small {@link MapBackedDataset}s, the <tt>MapBackedDataset.of()</tt>
215         * methods are even more convenient.
216         * <p>
217         * Builder instances can be reused - it is safe to call {@link #build()}
218         * multiple times to build multiple maps in series. Each map is a superset
219         * of the maps created before it.
220         * 
221         * 
222         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
223         * 
224         * @param <DATASET>
225         *            Type of sub-datasets.
226         * @param <INSTANCE>
227         *            Type of objects in the dataset
228         */
229        public static class IdentifiableBuilder<DATASET extends Dataset<INSTANCE> & Identifiable, INSTANCE> {
230                MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
231
232                /**
233                 * Add the sub-dataset such that it becomes a group in the
234                 * {@link MapBackedDataset} returned by {@link #build()} where the key
235                 * is the identifier returned by {@link Identifiable#getID()}.
236                 * <p>
237                 * If duplicate keys (i.e. sub-datasets with duplicate identifiers) are
238                 * added, only the last one will appear in the resultant dataset
239                 * produced by {@link #build()}.
240                 * 
241                 * @param dataset
242                 *            the sub-dataset to add
243                 * @return the builder
244                 */
245                public IdentifiableBuilder<DATASET, INSTANCE> add(DATASET dataset) {
246                        ds.put(dataset.getID(), dataset);
247
248                        return this;
249                }
250
251                /**
252                 * Returns a newly-created {@link MapBackedDataset}.
253                 * 
254                 * @return a newly-created {@link MapBackedDataset}.
255                 */
256                public MapBackedDataset<String, DATASET, INSTANCE> build() {
257                        return new MapBackedDataset<String, DATASET, INSTANCE>(ds);
258                }
259        }
260
261        /**
262         * Returns a new builder. The generated builder is equivalent to the builder
263         * created by the {@link IdentifiableBuilder#IdentifiableBuilder()}
264         * constructor.
265         * 
266         * @return a new builder.
267         */
268        public static <DATASET extends Dataset<INSTANCE> & Identifiable, INSTANCE>
269                        IdentifiableBuilder<DATASET, INSTANCE> builder()
270        {
271                return new IdentifiableBuilder<DATASET, INSTANCE>();
272        }
273
274        /**
275         * Convenience method to construct a {@link MapBackedDataset} from a number
276         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
277         * and the key is the identifier returned by {@link Identifiable#getID()}.
278         * 
279         * @param d1
280         *            first dataset
281         * 
282         * @return the newly constructed grouped dataset.
283         */
284        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
285                        MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1)
286        {
287                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
288                ds.put(d1.getID(), d1);
289                return ds;
290        }
291
292        /**
293         * Convenience method to construct a {@link MapBackedDataset} from a number
294         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
295         * and the key is the identifier returned by {@link Identifiable#getID()}.
296         * 
297         * @param d1
298         *            first dataset
299         * @param d2
300         *            second dataset
301         * 
302         * @return the newly constructed grouped dataset.
303         */
304        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
305                        MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2)
306        {
307                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
308                ds.put(d1.getID(), d1);
309                ds.put(d2.getID(), d2);
310                return ds;
311        }
312
313        /**
314         * Convenience method to construct a {@link MapBackedDataset} from a number
315         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
316         * and the key is the identifier returned by {@link Identifiable#getID()}.
317         * 
318         * @param d1
319         *            first dataset
320         * @param d2
321         *            second dataset
322         * @param d3
323         *            third dataset
324         * 
325         * @return the newly constructed grouped dataset.
326         */
327        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
328                        MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2, DATASET d3)
329        {
330                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
331                ds.put(d1.getID(), d1);
332                ds.put(d2.getID(), d2);
333                ds.put(d3.getID(), d3);
334                return ds;
335        }
336
337        /**
338         * Convenience method to construct a {@link MapBackedDataset} from a number
339         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
340         * and the key is the identifier returned by {@link Identifiable#getID()}.
341         * 
342         * @param d1
343         *            first dataset
344         * @param d2
345         *            second dataset
346         * @param d3
347         *            third dataset
348         * @param d4
349         *            forth dataset
350         * @return the newly constructed grouped dataset.
351         */
352        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
353                        MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2, DATASET d3, DATASET d4)
354        {
355                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
356                ds.put(d1.getID(), d1);
357                ds.put(d2.getID(), d2);
358                ds.put(d3.getID(), d3);
359                ds.put(d4.getID(), d4);
360                return ds;
361        }
362
363        /**
364         * Convenience method to construct a {@link MapBackedDataset} from a number
365         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
366         * and the key is the identifier returned by {@link Identifiable#getID()}.
367         * 
368         * @param d1
369         *            first dataset
370         * @param d2
371         *            second dataset
372         * @param d3
373         *            third dataset
374         * @param d4
375         *            forth dataset
376         * @param d5
377         *            fifth dataset
378         * @return the newly constructed grouped dataset.
379         */
380        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
381                        MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2, DATASET d3, DATASET d4, DATASET d5)
382        {
383                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
384                ds.put(d1.getID(), d1);
385                ds.put(d2.getID(), d2);
386                ds.put(d3.getID(), d3);
387                ds.put(d4.getID(), d4);
388                ds.put(d5.getID(), d5);
389                return ds;
390        }
391
392        /**
393         * Convenience method to construct a {@link MapBackedDataset} from a number
394         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
395         * and the key is the identifier returned by {@link Identifiable#getID()}.
396         * 
397         * @param datasets
398         *            the datasets representing the groups
399         * @return the newly constructed grouped dataset.
400         */
401        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
402                        MapBackedDataset<String, DATASET, INSTANCE> of(Collection<DATASET> datasets)
403        {
404                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
405
406                for (final DATASET d : datasets) {
407                        ds.put(d.getID(), d);
408                }
409
410                return ds;
411        }
412}