From d88d00232cfdbfd508834911af6ad89a217b84e1 Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Fri, 27 Nov 2015 00:18:32 +0100 Subject: Start assignment 4 --- Assignment 4/packages/clusterVal.py | 47 +++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 Assignment 4/packages/clusterVal.py (limited to 'Assignment 4/packages/clusterVal.py') diff --git a/Assignment 4/packages/clusterVal.py b/Assignment 4/packages/clusterVal.py new file mode 100644 index 0000000..df97334 --- /dev/null +++ b/Assignment 4/packages/clusterVal.py @@ -0,0 +1,47 @@ +def clusterVal(y, clusterid): + ''' + CLUSTERVAL Estimate cluster validity using Entropy, Purity, Rand Statistic, + and Jaccard coefficient. + + Usage: + Entropy, Purity, Rand, Jaccard = clusterval(y, clusterid); + + Input: + y N-by-1 vector of class labels + clusterid N-by-1 vector of cluster indices + + Output: + Entropy Entropy measure. + Purity Purity measure. + Rand Rand index. + Jaccard Jaccard coefficient. + ''' + + import numpy as np + + y = np.asarray(y).ravel(); clusterid = np.asarray(clusterid).ravel() + C = np.unique(y).size; K = np.unique(clusterid).size; N = y.shape[0] + EPS = 2.22e-16 + + p_ij = np.zeros((K,C)) # probability that member of i'th cluster belongs to j'th class + m_i = np.zeros((K,1)) # total number of objects in i'th cluster + for k in range(K): + m_i[k] = (clusterid==k).sum() + yk = y[clusterid==k] + for c in range(C): + m_ij = (yk==c).sum() # number of objects of j'th class in i'th cluster + p_ij[k,c] = m_ij.astype(float)/m_i[k] + entropy = ( (1-(p_ij*np.log2(p_ij+EPS)).sum(axis=1))*m_i.T ).sum() / (N*K) + purity = ( p_ij.max(axis=1) ).sum() / K + + f00=0; f01=0; f10=0; f11=0 + for i in range(N): + for j in range(i): + if y[i]!=y[j] and clusterid[i]!=clusterid[j]: f00 += 1; # different class, different cluster + elif y[i]==y[j] and clusterid[i]==clusterid[j]: f11 += 1; # same class, same cluster + elif y[i]==y[j] and clusterid[i]!=clusterid[j]: f10 += 1; # same class, different cluster + else: f01 +=1; # different class, same cluster + rand = np.float(f00+f11)/(f00+f01+f10+f11) + jaccard = np.float(f11)/(f01+f10+f11) + + return entropy, purity, rand, jaccard -- cgit v1.2.3