1 files changed, 47 insertions, 0 deletions
diff --git a/Assignment 4/packages/clusterVal.py b/Assignment 4/packages/clusterVal.py
new file mode 100644
index 0000000..df97334
--- /dev/null
+++ b/Assignment 4/packages/clusterVal.py
@@ -0,0 +1,47 @@
+def clusterVal(y, clusterid):
+    '''
+    CLUSTERVAL Estimate cluster validity using Entropy, Purity, Rand Statistic,
+    and Jaccard coefficient.
+    
+    Usage:
+      Entropy, Purity, Rand, Jaccard = clusterval(y, clusterid);
+    
+    Input:
+       y         N-by-1 vector of class labels 
+       clusterid N-by-1 vector of cluster indices
+    
+    Output:
+      Entropy    Entropy measure.
+      Purity     Purity measure.
+      Rand       Rand index.
+      Jaccard    Jaccard coefficient.
+    '''
+    
+    import numpy as np    
+    
+    y = np.asarray(y).ravel(); clusterid = np.asarray(clusterid).ravel()
+    C = np.unique(y).size; K = np.unique(clusterid).size; N = y.shape[0]
+    EPS = 2.22e-16
+    
+    p_ij = np.zeros((K,C))          # probability that member of i'th cluster belongs to j'th class
+    m_i = np.zeros((K,1))           # total number of objects in i'th cluster
+    for k in range(K):
+        m_i[k] = (clusterid==k).sum()
+        yk = y[clusterid==k]
+        for c in range(C):
+            m_ij = (yk==c).sum()    # number of objects of j'th class in i'th cluster
+            p_ij[k,c] = m_ij.astype(float)/m_i[k]
+    entropy = ( (1-(p_ij*np.log2(p_ij+EPS)).sum(axis=1))*m_i.T ).sum() / (N*K) 
+    purity = ( p_ij.max(axis=1) ).sum() / K
+
+    f00=0; f01=0; f10=0; f11=0
+    for i in range(N):
+        for j in range(i):
+            if y[i]!=y[j] and clusterid[i]!=clusterid[j]: f00 += 1;     # different class, different cluster    
+            elif y[i]==y[j] and clusterid[i]==clusterid[j]: f11 += 1;   # same class, same cluster
+            elif y[i]==y[j] and clusterid[i]!=clusterid[j]: f10 += 1;   # same class, different cluster    
+            else: f01 +=1;                                              # different class, same cluster
+    rand = np.float(f00+f11)/(f00+f01+f10+f11)
+    jaccard = np.float(f11)/(f01+f10+f11)
+
+    return entropy, purity, rand, jaccard