001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.data.dataset; 031 032import java.util.AbstractMap; 033import java.util.Collection; 034import java.util.HashMap; 035import java.util.Iterator; 036import java.util.Map; 037import java.util.Set; 038 039import org.openimaj.data.identity.Identifiable; 040import org.openimaj.util.iterator.ConcatenatedIterable; 041 042/** 043 * A {@link MapBackedDataset} is a concrete implementation of a 044 * {@link GroupedDataset} backed by a {@link Map}. 045 * 046 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 047 * 048 * @param <KEY> 049 * Type of dataset class key 050 * @param <DATASET> 051 * Type of sub-datasets. 052 * @param <INSTANCE> 053 * Type of objects in the dataset 054 */ 055public class MapBackedDataset<KEY extends Object, DATASET extends Dataset<INSTANCE>, INSTANCE> 056 extends AbstractMap<KEY, DATASET> 057 implements GroupedDataset<KEY, DATASET, INSTANCE> 058{ 059 protected Map<KEY, DATASET> map; 060 061 /** 062 * Construct an empty {@link MapBackedDataset} backed by a {@link HashMap}. 063 */ 064 public MapBackedDataset() { 065 this.map = new HashMap<KEY, DATASET>(); 066 } 067 068 /** 069 * Construct with the given map. 070 * 071 * @param map 072 * the map 073 */ 074 public MapBackedDataset(Map<KEY, DATASET> map) { 075 this.map = map; 076 } 077 078 /** 079 * Get the underlying map. 080 * 081 * @return the underlying map 082 */ 083 public Map<KEY, DATASET> getMap() { 084 return map; 085 } 086 087 @Override 088 public DATASET getInstances(KEY key) { 089 return map.get(key); 090 } 091 092 @Override 093 public Set<KEY> getGroups() { 094 return map.keySet(); 095 } 096 097 @Override 098 public INSTANCE getRandomInstance(KEY key) { 099 return map.get(key).getRandomInstance(); 100 } 101 102 @Override 103 public INSTANCE getRandomInstance() { 104 final int index = (int) (Math.random() * numInstances()); 105 int count = 0; 106 107 for (final DATASET d : map.values()) { 108 if (index >= count + d.numInstances()) { 109 count += d.numInstances(); 110 } else { 111 if (d instanceof ListDataset) { 112 return ((ListDataset<INSTANCE>) d).get(index - count); 113 } else { 114 for (final INSTANCE i : d) { 115 if (index == count) 116 return i; 117 118 count++; 119 } 120 } 121 } 122 } 123 return null; 124 } 125 126 @Override 127 public int numInstances() { 128 int size = 0; 129 130 for (final DATASET d : map.values()) { 131 size += d.numInstances(); 132 } 133 134 return size; 135 } 136 137 @Override 138 public Iterator<INSTANCE> iterator() { 139 return new ConcatenatedIterable<INSTANCE>(map.values()).iterator(); 140 } 141 142 @Override 143 public String toString() { 144 return map.toString(); 145 } 146 147 @Override 148 public Set<Entry<KEY, DATASET>> entrySet() { 149 return map.entrySet(); 150 } 151 152 /* 153 * (non-Javadoc) 154 * 155 * @see java.util.AbstractMap#put(java.lang.Object, java.lang.Object) 156 */ 157 @Override 158 public DATASET put(KEY key, DATASET value) { 159 return map.put(key, value); 160 } 161 162 /** 163 * Convenience method for populating a dataset by chaining method calls: 164 * 165 * <pre> 166 * final MapBackedDataset<String, ListDataset<String>, String> ds = new MapBackedDataset<String, ListDataset<String>, String>() 167 * .add("A", new ListBackedDataset<String>()) 168 * .add("B", new ListBackedDataset<String>()); 169 * </pre> 170 * 171 * @param key 172 * the key to insert 173 * @param dataset 174 * the value to insert 175 * @return this dataset 176 */ 177 public MapBackedDataset<KEY, DATASET, INSTANCE> add(KEY key, DATASET dataset) { 178 this.put(key, dataset); 179 return this; 180 } 181 182 /** 183 * Convenience method to construct a {@link MapBackedDataset} from a number 184 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 185 * and the key is the identifier returned by {@link Identifiable#getID()}. 186 * 187 * @param datasets 188 * the datasets representing the groups 189 * @return the newly constructed grouped dataset. 190 */ 191 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 192 MapBackedDataset<String, DATASET, INSTANCE> of(DATASET... datasets) 193 { 194 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 195 196 for (final DATASET d : datasets) { 197 ds.put(d.getID(), d); 198 } 199 200 return ds; 201 } 202 203 /** 204 * A builder for creating {@link MapBackedDataset} instances from 205 * {@link Identifiable} sub-datasets. Example: 206 * 207 * <pre> 208 * final MapBackedDataset<String, VFSListDataset<String>, String> ds = new MapBackedDataset.IdentifiableBuilder<VFSListDataset<String>, String>() 209 * .add(new VFSListDataset<String>(...)) 210 * .add(new VFSListDataset<String>(...)) 211 * .build(); 212 * </pre> 213 * 214 * For small {@link MapBackedDataset}s, the <tt>MapBackedDataset.of()</tt> 215 * methods are even more convenient. 216 * <p> 217 * Builder instances can be reused - it is safe to call {@link #build()} 218 * multiple times to build multiple maps in series. Each map is a superset 219 * of the maps created before it. 220 * 221 * 222 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 223 * 224 * @param <DATASET> 225 * Type of sub-datasets. 226 * @param <INSTANCE> 227 * Type of objects in the dataset 228 */ 229 public static class IdentifiableBuilder<DATASET extends Dataset<INSTANCE> & Identifiable, INSTANCE> { 230 MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 231 232 /** 233 * Add the sub-dataset such that it becomes a group in the 234 * {@link MapBackedDataset} returned by {@link #build()} where the key 235 * is the identifier returned by {@link Identifiable#getID()}. 236 * <p> 237 * If duplicate keys (i.e. sub-datasets with duplicate identifiers) are 238 * added, only the last one will appear in the resultant dataset 239 * produced by {@link #build()}. 240 * 241 * @param dataset 242 * the sub-dataset to add 243 * @return the builder 244 */ 245 public IdentifiableBuilder<DATASET, INSTANCE> add(DATASET dataset) { 246 ds.put(dataset.getID(), dataset); 247 248 return this; 249 } 250 251 /** 252 * Returns a newly-created {@link MapBackedDataset}. 253 * 254 * @return a newly-created {@link MapBackedDataset}. 255 */ 256 public MapBackedDataset<String, DATASET, INSTANCE> build() { 257 return new MapBackedDataset<String, DATASET, INSTANCE>(ds); 258 } 259 } 260 261 /** 262 * Returns a new builder. The generated builder is equivalent to the builder 263 * created by the {@link IdentifiableBuilder#IdentifiableBuilder()} 264 * constructor. 265 * 266 * @return a new builder. 267 */ 268 public static <DATASET extends Dataset<INSTANCE> & Identifiable, INSTANCE> 269 IdentifiableBuilder<DATASET, INSTANCE> builder() 270 { 271 return new IdentifiableBuilder<DATASET, INSTANCE>(); 272 } 273 274 /** 275 * Convenience method to construct a {@link MapBackedDataset} from a number 276 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 277 * and the key is the identifier returned by {@link Identifiable#getID()}. 278 * 279 * @param d1 280 * first dataset 281 * 282 * @return the newly constructed grouped dataset. 283 */ 284 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 285 MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1) 286 { 287 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 288 ds.put(d1.getID(), d1); 289 return ds; 290 } 291 292 /** 293 * Convenience method to construct a {@link MapBackedDataset} from a number 294 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 295 * and the key is the identifier returned by {@link Identifiable#getID()}. 296 * 297 * @param d1 298 * first dataset 299 * @param d2 300 * second dataset 301 * 302 * @return the newly constructed grouped dataset. 303 */ 304 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 305 MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2) 306 { 307 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 308 ds.put(d1.getID(), d1); 309 ds.put(d2.getID(), d2); 310 return ds; 311 } 312 313 /** 314 * Convenience method to construct a {@link MapBackedDataset} from a number 315 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 316 * and the key is the identifier returned by {@link Identifiable#getID()}. 317 * 318 * @param d1 319 * first dataset 320 * @param d2 321 * second dataset 322 * @param d3 323 * third dataset 324 * 325 * @return the newly constructed grouped dataset. 326 */ 327 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 328 MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2, DATASET d3) 329 { 330 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 331 ds.put(d1.getID(), d1); 332 ds.put(d2.getID(), d2); 333 ds.put(d3.getID(), d3); 334 return ds; 335 } 336 337 /** 338 * Convenience method to construct a {@link MapBackedDataset} from a number 339 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 340 * and the key is the identifier returned by {@link Identifiable#getID()}. 341 * 342 * @param d1 343 * first dataset 344 * @param d2 345 * second dataset 346 * @param d3 347 * third dataset 348 * @param d4 349 * forth dataset 350 * @return the newly constructed grouped dataset. 351 */ 352 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 353 MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2, DATASET d3, DATASET d4) 354 { 355 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 356 ds.put(d1.getID(), d1); 357 ds.put(d2.getID(), d2); 358 ds.put(d3.getID(), d3); 359 ds.put(d4.getID(), d4); 360 return ds; 361 } 362 363 /** 364 * Convenience method to construct a {@link MapBackedDataset} from a number 365 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 366 * and the key is the identifier returned by {@link Identifiable#getID()}. 367 * 368 * @param d1 369 * first dataset 370 * @param d2 371 * second dataset 372 * @param d3 373 * third dataset 374 * @param d4 375 * forth dataset 376 * @param d5 377 * fifth dataset 378 * @return the newly constructed grouped dataset. 379 */ 380 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 381 MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2, DATASET d3, DATASET d4, DATASET d5) 382 { 383 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 384 ds.put(d1.getID(), d1); 385 ds.put(d2.getID(), d2); 386 ds.put(d3.getID(), d3); 387 ds.put(d4.getID(), d4); 388 ds.put(d5.getID(), d5); 389 return ds; 390 } 391 392 /** 393 * Convenience method to construct a {@link MapBackedDataset} from a number 394 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 395 * and the key is the identifier returned by {@link Identifiable#getID()}. 396 * 397 * @param datasets 398 * the datasets representing the groups 399 * @return the newly constructed grouped dataset. 400 */ 401 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 402 MapBackedDataset<String, DATASET, INSTANCE> of(Collection<DATASET> datasets) 403 { 404 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 405 406 for (final DATASET d : datasets) { 407 ds.put(d.getID(), d); 408 } 409 410 return ds; 411 } 412}