001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.ml.clustering.spectral; 031 032import java.util.Iterator; 033 034import org.apache.log4j.Logger; 035import org.openimaj.ml.clustering.DataClusterer; 036import org.openimaj.ml.clustering.IndexClusters; 037import org.openimaj.ml.clustering.SpatialClusterer; 038import org.openimaj.ml.clustering.SpatialClusters; 039import org.openimaj.util.pair.DoubleObjectPair; 040import org.openimaj.util.pair.IndependentPair; 041 042import ch.akuhn.matrix.Vector; 043import ch.akuhn.matrix.Vector.Entry; 044import ch.akuhn.matrix.eigenvalues.Eigenvalues; 045 046/** 047 * For a given set of {@link Eigenvalues} perform the stages of spectral clustering 048 * which involve the selection of the best eigen values and the calling of an internal clustering 049 * algorithm 050 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 051 */ 052public class PreparedSpectralClustering implements DataClusterer<Eigenvalues, SpectralIndexedClusters>{ 053 final static Logger logger = Logger.getLogger(PreparedSpectralClustering.class); 054 private SpectralClusteringConf<double[]> conf; 055 056 /** 057 * @param conf 058 */ 059 public PreparedSpectralClustering(SpectralClusteringConf<double[]> conf) { 060 this.conf = conf; 061 } 062 063 @Override 064 public int[][] performClustering(Eigenvalues data) { 065 return cluster(data).clusters(); 066 } 067 068 @Override 069 public SpectralIndexedClusters cluster(Eigenvalues eig) { 070 // Also normalise each row 071 IndependentPair<double[], double[][]> lowestCols = bestCols(eig); 072 // Using the eigenspace, cluster 073 return eigenspaceCluster(lowestCols); 074 } 075 076 protected SpectralIndexedClusters eigenspaceCluster(IndependentPair<double[], double[][]> lowestCols) { 077 SpatialClusterer<? extends SpatialClusters<double[]>, double[]> clusterer = conf.internal.apply(lowestCols); 078 // Cluster the rows with the internal spatial clusterer 079 SpatialClusters<double[]> cluster = clusterer.cluster(lowestCols.getSecondObject()); 080 // if the clusters contain the cluster indexes of the training examples use those 081 if(cluster instanceof IndexClusters){ 082 IndexClusters clusters = new IndexClusters(((IndexClusters)cluster).clusters()); 083// logger.debug(clusters); 084 return new SpectralIndexedClusters(clusters, lowestCols); 085 } 086 // Otherwise attempt to assign values to clusters 087 int[] clustered = cluster.defaultHardAssigner().assign(lowestCols.getSecondObject()); 088 // done! 089 return new SpectralIndexedClusters(new IndexClusters(clustered),lowestCols); 090 } 091 092 protected IndependentPair<double[], double[][]> bestCols(final Eigenvalues eig) { 093 094 095 int eigenVectorSelect = conf.eigenChooser.nEigenVectors(this.conf.laplacian.eigenIterator(eig), eig.getN()); 096 int eigenVectorSkip = this.conf.skipEigenVectors; 097 logger.debug("Selected dimensions: " + eigenVectorSelect); 098 logger.debug("Skipping dimesions: " + eigenVectorSkip); 099 eigenVectorSelect -= eigenVectorSkip; 100 101 102 int nrows = eig.vector[0].size(); 103 double[][] ret = new double[nrows][eigenVectorSelect]; 104 double[] retSum = new double[nrows]; 105 double[] eigvals = new double[eigenVectorSelect]; 106 Iterator<DoubleObjectPair<Vector>> iterator = this.conf.laplacian.eigenIterator(eig); 107 // Skip a few at the beggining 108 for (int i = 0; i < eigenVectorSkip; i++) iterator.next(); 109 int col = 0; 110 // Calculate U matrix (containing n smallests eigen valued columns) 111 for (; iterator.hasNext();) { 112 DoubleObjectPair<Vector> v = iterator.next(); 113 eigvals[col] = v.first; 114 115 for (Entry d : v.second.entries()) { 116 double elColI = d.value; 117 if(conf.eigenValueScale){ 118 elColI *= Math.sqrt(v.first); 119 } 120 ret[d.index][col] = elColI; 121 retSum[d.index] += elColI * elColI; 122 } 123 col++; 124 if(col == eigenVectorSelect) break; 125 } 126 127 if(!conf.eigenValueScale){ 128 // normalise rows 129 for (int i = 0; i < ret.length; i++) { 130 double[] row = ret[i]; 131 for (int j = 0; j < row.length; j++) { 132 row[j] /= Math.sqrt(retSum[i]); 133 } 134 } 135 } 136 137 return IndependentPair.pair(eigvals, ret); 138 } 139 140}