001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.ml.clustering;
031
032import gnu.trove.list.array.TIntArrayList;
033
034import java.io.DataInput;
035import java.io.DataOutput;
036import java.io.IOException;
037import java.io.PrintWriter;
038import java.io.StringWriter;
039import java.util.Arrays;
040import java.util.HashMap;
041import java.util.List;
042import java.util.Map;
043import java.util.Map.Entry;
044import java.util.Scanner;
045
046import org.openimaj.io.IOUtils;
047
048/**
049 * Interface to describe objects that are the result of the clustering where the
050 * training data is implicitly clustered
051 * 
052 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
053 */
054public class IndexClusters implements Clusters {
055        protected int[][] clusters;
056        protected int nEntries;
057
058        /**
059         * Used only to initailise for {@link IOUtils}
060         */
061        public IndexClusters() {
062        }
063
064        /**
065         * @param clusters
066         *            the clusters
067         * @param nEntries
068         *            the number of entries
069         */
070        public IndexClusters(int[][] clusters, int nEntries) {
071                this.nEntries = nEntries;
072                this.clusters = clusters;
073        }
074
075        /**
076         * @param clusters
077         *            the clusters
078         */
079        public IndexClusters(int[][] clusters) {
080                this.nEntries = 0;
081                this.clusters = clusters;
082                for (int i = 0; i < clusters.length; i++) {
083                        this.nEntries += clusters[i].length;
084                }
085        }
086
087        /**
088         * @param assignments
089         *            convert a list of cluster assignments to a 2D array to cluster
090         *            to assignments
091         */
092        public IndexClusters(int[] assignments) {
093                this.nEntries = assignments.length;
094                final Map<Integer, TIntArrayList> clusters = new HashMap<Integer, TIntArrayList>();
095                for (int i = 0; i < assignments.length; i++) {
096                        final int ass = assignments[i];
097                        TIntArrayList current = clusters.get(ass);
098                        if (current == null) {
099                                clusters.put(ass, current = new TIntArrayList());
100                        }
101                        current.add(i);
102                }
103                int clustersSeen = 0;
104                this.clusters = new int[clusters.size()][];
105                for (final Entry<Integer, TIntArrayList> i : clusters.entrySet()) {
106                        this.clusters[clustersSeen] = i.getValue().toArray();
107                        clustersSeen++;
108                }
109        }
110
111        public IndexClusters(List<int[]> completedClusters) {
112                this.nEntries = 0;
113                this.clusters = new int[completedClusters.size()][];
114                for (int i = 0; i < clusters.length; i++) {
115                        clusters[i] = completedClusters.get(i);
116                        this.nEntries += clusters[i].length;
117                }
118        }
119
120        /**
121         * Get the number of clusters.
122         * 
123         * @return number of clusters.
124         */
125        public int[][] clusters() {
126                return clusters;
127        }
128
129        /**
130         * Get the number of data entries
131         * 
132         * @return the number of data entries.
133         */
134        public int numEntries() {
135                return nEntries;
136        }
137
138        /**
139         * Get the number of clusters.
140         * 
141         * @return number of clusters.
142         */
143        public int numClusters() {
144                return this.clusters.length;
145        }
146
147        @Override
148        public void readASCII(Scanner in) throws IOException {
149                this.clusters = new int[in.nextInt()][];
150                this.nEntries = in.nextInt();
151                for (int i = 0; i < this.nEntries;) {
152                        final int cluster = in.nextInt();
153                        final int count = in.nextInt();
154                        i += count;
155                        this.clusters[cluster] = new int[count];
156                        for (int j = 0; j < count; j++) {
157                                this.clusters[cluster][j] = in.nextInt();
158                        }
159                }
160        }
161
162        @Override
163        public String asciiHeader() {
164                return "IDX" + CLUSTER_HEADER;
165        }
166
167        @Override
168        public void readBinary(DataInput in) throws IOException {
169                this.clusters = new int[in.readInt()][];
170                this.nEntries = in.readInt();
171                for (int i = 0; i < this.nEntries;) {
172                        final int cluster = in.readInt();
173                        final int count = in.readInt();
174                        i += count;
175                        this.clusters[cluster] = new int[count];
176                        for (int j = 0; j < count; j++) {
177                                this.clusters[cluster][j] = in.readInt();
178                        }
179                }
180        }
181
182        @Override
183        public byte[] binaryHeader() {
184                return asciiHeader().getBytes();
185        }
186
187        @Override
188        public void writeASCII(PrintWriter out) throws IOException {
189                out.println(this.numClusters());
190                out.println(this.nEntries);
191                for (int i = 0; i < this.clusters.length; i++) {
192                        final int[] cluster = this.clusters[i];
193                        out.println(i);
194                        out.println(cluster.length);
195                        for (int j = 0; j < cluster.length; j++) {
196                                out.println(cluster[j]);
197                        }
198                }
199        }
200
201        @Override
202        public void writeBinary(DataOutput out) throws IOException {
203                out.writeInt(this.numClusters());
204                out.writeInt(nEntries);
205                for (int i = 0; i < this.clusters.length; i++) {
206                        final int[] cluster = this.clusters[i];
207                        out.writeInt(i);
208                        out.writeInt(cluster.length);
209                        for (int j = 0; j < cluster.length; j++) {
210                                out.writeInt(cluster[j]);
211                        }
212                }
213        }
214
215        @Override
216        public String toString() {
217                final int[][] clusters = this.clusters();
218                int i = 0;
219                final StringWriter sw = new StringWriter();
220                final PrintWriter out = new PrintWriter(sw);
221                out.println("N-Clusters: " + this.numClusters());
222                out.println("Entities: " + this.numEntries());
223                String str = sw.toString();
224                for (final int[] member : clusters) {
225                        str += String.format("%d %s\n", i++, Arrays.toString(member));
226                }
227                return str;
228        }
229}