1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
def clusterVal(y, clusterid):
'''
CLUSTERVAL Estimate cluster validity using Entropy, Purity, Rand Statistic,
and Jaccard coefficient.
Usage:
Entropy, Purity, Rand, Jaccard = clusterval(y, clusterid);
Input:
y N-by-1 vector of class labels
clusterid N-by-1 vector of cluster indices
Output:
Entropy Entropy measure.
Purity Purity measure.
Rand Rand index.
Jaccard Jaccard coefficient.
'''
import numpy as np
y = np.asarray(y).ravel(); clusterid = np.asarray(clusterid).ravel()
C = np.unique(y).size; K = np.unique(clusterid).size; N = y.shape[0]
EPS = 2.22e-16
p_ij = np.zeros((K,C)) # probability that member of i'th cluster belongs to j'th class
m_i = np.zeros((K,1)) # total number of objects in i'th cluster
for k in range(K):
m_i[k] = (clusterid==k).sum()
yk = y[clusterid==k]
for c in range(C):
m_ij = (yk==c).sum() # number of objects of j'th class in i'th cluster
p_ij[k,c] = m_ij.astype(float)/m_i[k]
entropy = ( (1-(p_ij*np.log2(p_ij+EPS)).sum(axis=1))*m_i.T ).sum() / (N*K)
purity = ( p_ij.max(axis=1) ).sum() / K
f00=0; f01=0; f10=0; f11=0
for i in range(N):
for j in range(i):
if y[i]!=y[j] and clusterid[i]!=clusterid[j]: f00 += 1; # different class, different cluster
elif y[i]==y[j] and clusterid[i]==clusterid[j]: f11 += 1; # same class, same cluster
elif y[i]==y[j] and clusterid[i]!=clusterid[j]: f10 += 1; # same class, different cluster
else: f01 +=1; # different class, same cluster
rand = np.float(f00+f11)/(f00+f01+f10+f11)
jaccard = np.float(f11)/(f01+f10+f11)
return entropy, purity, rand, jaccard
|