001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.ml.clustering; 031 032import gnu.trove.list.array.TIntArrayList; 033 034import java.io.DataInput; 035import java.io.DataOutput; 036import java.io.IOException; 037import java.io.PrintWriter; 038import java.io.StringWriter; 039import java.util.Arrays; 040import java.util.HashMap; 041import java.util.List; 042import java.util.Map; 043import java.util.Map.Entry; 044import java.util.Scanner; 045 046import org.openimaj.io.IOUtils; 047 048/** 049 * Interface to describe objects that are the result of the clustering where the 050 * training data is implicitly clustered 051 * 052 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 053 */ 054public class IndexClusters implements Clusters { 055 protected int[][] clusters; 056 protected int nEntries; 057 058 /** 059 * Used only to initailise for {@link IOUtils} 060 */ 061 public IndexClusters() { 062 } 063 064 /** 065 * @param clusters 066 * the clusters 067 * @param nEntries 068 * the number of entries 069 */ 070 public IndexClusters(int[][] clusters, int nEntries) { 071 this.nEntries = nEntries; 072 this.clusters = clusters; 073 } 074 075 /** 076 * @param clusters 077 * the clusters 078 */ 079 public IndexClusters(int[][] clusters) { 080 this.nEntries = 0; 081 this.clusters = clusters; 082 for (int i = 0; i < clusters.length; i++) { 083 this.nEntries += clusters[i].length; 084 } 085 } 086 087 /** 088 * @param assignments 089 * convert a list of cluster assignments to a 2D array to cluster 090 * to assignments 091 */ 092 public IndexClusters(int[] assignments) { 093 this.nEntries = assignments.length; 094 final Map<Integer, TIntArrayList> clusters = new HashMap<Integer, TIntArrayList>(); 095 for (int i = 0; i < assignments.length; i++) { 096 final int ass = assignments[i]; 097 TIntArrayList current = clusters.get(ass); 098 if (current == null) { 099 clusters.put(ass, current = new TIntArrayList()); 100 } 101 current.add(i); 102 } 103 int clustersSeen = 0; 104 this.clusters = new int[clusters.size()][]; 105 for (final Entry<Integer, TIntArrayList> i : clusters.entrySet()) { 106 this.clusters[clustersSeen] = i.getValue().toArray(); 107 clustersSeen++; 108 } 109 } 110 111 public IndexClusters(List<int[]> completedClusters) { 112 this.nEntries = 0; 113 this.clusters = new int[completedClusters.size()][]; 114 for (int i = 0; i < clusters.length; i++) { 115 clusters[i] = completedClusters.get(i); 116 this.nEntries += clusters[i].length; 117 } 118 } 119 120 /** 121 * Get the number of clusters. 122 * 123 * @return number of clusters. 124 */ 125 public int[][] clusters() { 126 return clusters; 127 } 128 129 /** 130 * Get the number of data entries 131 * 132 * @return the number of data entries. 133 */ 134 public int numEntries() { 135 return nEntries; 136 } 137 138 /** 139 * Get the number of clusters. 140 * 141 * @return number of clusters. 142 */ 143 public int numClusters() { 144 return this.clusters.length; 145 } 146 147 @Override 148 public void readASCII(Scanner in) throws IOException { 149 this.clusters = new int[in.nextInt()][]; 150 this.nEntries = in.nextInt(); 151 for (int i = 0; i < this.nEntries;) { 152 final int cluster = in.nextInt(); 153 final int count = in.nextInt(); 154 i += count; 155 this.clusters[cluster] = new int[count]; 156 for (int j = 0; j < count; j++) { 157 this.clusters[cluster][j] = in.nextInt(); 158 } 159 } 160 } 161 162 @Override 163 public String asciiHeader() { 164 return "IDX" + CLUSTER_HEADER; 165 } 166 167 @Override 168 public void readBinary(DataInput in) throws IOException { 169 this.clusters = new int[in.readInt()][]; 170 this.nEntries = in.readInt(); 171 for (int i = 0; i < this.nEntries;) { 172 final int cluster = in.readInt(); 173 final int count = in.readInt(); 174 i += count; 175 this.clusters[cluster] = new int[count]; 176 for (int j = 0; j < count; j++) { 177 this.clusters[cluster][j] = in.readInt(); 178 } 179 } 180 } 181 182 @Override 183 public byte[] binaryHeader() { 184 return asciiHeader().getBytes(); 185 } 186 187 @Override 188 public void writeASCII(PrintWriter out) throws IOException { 189 out.println(this.numClusters()); 190 out.println(this.nEntries); 191 for (int i = 0; i < this.clusters.length; i++) { 192 final int[] cluster = this.clusters[i]; 193 out.println(i); 194 out.println(cluster.length); 195 for (int j = 0; j < cluster.length; j++) { 196 out.println(cluster[j]); 197 } 198 } 199 } 200 201 @Override 202 public void writeBinary(DataOutput out) throws IOException { 203 out.writeInt(this.numClusters()); 204 out.writeInt(nEntries); 205 for (int i = 0; i < this.clusters.length; i++) { 206 final int[] cluster = this.clusters[i]; 207 out.writeInt(i); 208 out.writeInt(cluster.length); 209 for (int j = 0; j < cluster.length; j++) { 210 out.writeInt(cluster[j]); 211 } 212 } 213 } 214 215 @Override 216 public String toString() { 217 final int[][] clusters = this.clusters(); 218 int i = 0; 219 final StringWriter sw = new StringWriter(); 220 final PrintWriter out = new PrintWriter(sw); 221 out.println("N-Clusters: " + this.numClusters()); 222 out.println("Entities: " + this.numEntries()); 223 String str = sw.toString(); 224 for (final int[] member : clusters) { 225 str += String.format("%d %s\n", i++, Arrays.toString(member)); 226 } 227 return str; 228 } 229}