From d88d00232cfdbfd508834911af6ad89a217b84e1 Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Fri, 27 Nov 2015 00:18:32 +0100 Subject: Start assignment 4 --- Assignment 4/data/digits.mat | Bin 0 -> 3863457 bytes Assignment 4/data/synth1.mat | Bin 0 -> 7121 bytes Assignment 4/data/synth2.mat | Bin 0 -> 7091 bytes Assignment 4/data/synth3.mat | Bin 0 -> 7217 bytes Assignment 4/data/synth4.mat | Bin 0 -> 50386 bytes Assignment 4/data/wildfaces.mat | Bin 0 -> 36145247 bytes Assignment 4/ex41.py | 92 +++++++++++++++++++++++++++++++++++ Assignment 4/packages/clusterPlot.py | 75 ++++++++++++++++++++++++++++ Assignment 4/packages/clusterVal.py | 47 ++++++++++++++++++ 9 files changed, 214 insertions(+) create mode 100644 Assignment 4/data/digits.mat create mode 100644 Assignment 4/data/synth1.mat create mode 100644 Assignment 4/data/synth2.mat create mode 100644 Assignment 4/data/synth3.mat create mode 100644 Assignment 4/data/synth4.mat create mode 100644 Assignment 4/data/wildfaces.mat create mode 100644 Assignment 4/ex41.py create mode 100644 Assignment 4/packages/clusterPlot.py create mode 100644 Assignment 4/packages/clusterVal.py (limited to 'Assignment 4') diff --git a/Assignment 4/data/digits.mat b/Assignment 4/data/digits.mat new file mode 100644 index 0000000..434cf47 Binary files /dev/null and b/Assignment 4/data/digits.mat differ diff --git a/Assignment 4/data/synth1.mat b/Assignment 4/data/synth1.mat new file mode 100644 index 0000000..4eb623f Binary files /dev/null and b/Assignment 4/data/synth1.mat differ diff --git a/Assignment 4/data/synth2.mat b/Assignment 4/data/synth2.mat new file mode 100644 index 0000000..99838d2 Binary files /dev/null and b/Assignment 4/data/synth2.mat differ diff --git a/Assignment 4/data/synth3.mat b/Assignment 4/data/synth3.mat new file mode 100644 index 0000000..adefbcf Binary files /dev/null and b/Assignment 4/data/synth3.mat differ diff --git a/Assignment 4/data/synth4.mat b/Assignment 4/data/synth4.mat new file mode 100644 index 0000000..8a445f9 Binary files /dev/null and b/Assignment 4/data/synth4.mat differ diff --git a/Assignment 4/data/wildfaces.mat b/Assignment 4/data/wildfaces.mat new file mode 100644 index 0000000..1f5894a Binary files /dev/null and b/Assignment 4/data/wildfaces.mat differ diff --git a/Assignment 4/ex41.py b/Assignment 4/ex41.py new file mode 100644 index 0000000..5ae66db --- /dev/null +++ b/Assignment 4/ex41.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Oct 23 14:45:21 2015 + +@author: Camil Staps, s4498062 + +This is Python 2 code. +""" + +import sys +sys.path.insert(0, './packages') + +import numpy as np +from scipy import io as sciio +from sklearn import cluster +from clusterPlot import clusterPlot +from clusterVal import clusterVal +import matplotlib.pyplot as plt + +# 4.1.1 +n = 1 +synth = sciio.loadmat('./data/synth' + str(n) + '.mat') +X = synth['X'] +y = synth['y'] +centroid, label, inertia = cluster.k_means(X, 4) +clusterPlot(X, label, centroid, y) + +# 4.1.2 +entropies, purities, rands, jaccards = [], [], [], [] +for i in range(1, 11): + _, label, _ = cluster.k_means(X, i) + entropy, purity, rand, jaccard = clusterVal(y, label) + entropies.append(entropy) + purities.append(purity) + rands.append(rand) + jaccards.append(jaccard) + +print(entropies, purities, rands, jaccards) + +x = np.arange(1,11) +plt.figure(figsize=(8,8)) +plt.subplot(2,2,1) +plt.plot(x, entropies, label='Entropy') +plt.legend() +plt.subplot(2,2,2) +plt.plot(x, purities, label='Purity') +plt.legend(loc=4) +plt.subplot(2,2,3) +plt.plot(x, rands, label='Rand') +plt.legend(loc=4) +plt.subplot(2,2,4) +plt.plot(x, jaccards, label='Jaccard') +plt.legend(loc=4) +plt.show() + +# 4.1.3 +faces = sciio.loadmat('./data/wildfaces.mat') +X = faces['X'] +k = 0 +centroid, label, inertia = cluster.k_means(X, 10) + +n = 10 +plt.figure(figsize=(n*2,4)) +for k in range(0,n): + plt.subplot(2, n, k + 1) + plt.imshow(np.reshape(X[k,:], (3,40,40)).T) + plt.axis('off') + plt.subplot(2, n, k + 1 + n) + plt.imshow(np.reshape(centroid[label[k],:], (3,40,40)).T) + plt.axis('off') +plt.show() + +# 4.1.4 +digits = sciio.loadmat('./data/digits.mat') +X = digits['X'] +k = 20 + +plt.figure(figsize=(6,4)) +for k in range(0,24): + plt.subplot(4, 6, k + 1) + plt.imshow(np.reshape(X[k], (16,16)), cmap=plt.cm.binary) + plt.axis('off') +plt.show() + +centroid, label, inertia = cluster.k_means(X, k) + +plt.figure(figsize=(6,4)) +for k in range(0,24): + plt.subplot(4, 6, k + 1) + plt.imshow(np.reshape(centroid[label[k]], (16,16)), cmap=plt.cm.binary) + plt.axis('off') +plt.show() diff --git a/Assignment 4/packages/clusterPlot.py b/Assignment 4/packages/clusterPlot.py new file mode 100644 index 0000000..2f37a3d --- /dev/null +++ b/Assignment 4/packages/clusterPlot.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Apr 14 09:01:18 2014 + +""" + +def clusterPlot(X, clusterid, centroids='None', y='None', covars='None', figsize=(16,10)): + ''' + CLUSTERPLOT Plots a clustering of a data set as well as the true class + labels. If data is more than 2-dimensional it should be first projected + onto the first two principal components. Data objects are plotted as a dot + with a circle around. The color of the dot indicates the true class, + and the cicle indicates the cluster index. Optionally, the centroids are + plotted as filled-star markers, and ellipsoids corresponding to covariance + matrices (e.g. for gaussian mixture models). + + Usage: + clusterplot(X, clusterid) + clusterplot(X, clusterid, centroids=c_matrix, y=y_matrix) + clusterplot(X, clusterid, centroids=c_matrix, y=y_matrix, covars=c_tensor) + + Input: + X N-by-M data matrix (N data objects with M attributes) + clusterid N-by-1 vector of cluster indices + centroids K-by-M matrix of cluster centroids (optional) + y N-by-1 vector of true class labels (optional) + covars M-by-M-by-K tensor of covariance matrices (optional) + ''' + import numpy as np + from matplotlib.pyplot import figure, cm, plot, hold, legend, xlim, show + + + X = np.asarray(X) + cls = np.asarray(clusterid) + if y=='None': + y = np.zeros((X.shape[0],1)) + else: + y = np.asarray(y) + if centroids!='None': + centroids = np.asarray(centroids) + K = np.size(np.unique(cls)) + C = np.size(np.unique(y)) + ncolors = np.max([C,K]) + + # plot data points color-coded by class, cluster markers and centroids + figure(figsize=figsize) + hold(True) + colors = [0]*ncolors + for color in range(ncolors): + colors[color] = cm.jet.__call__(color*1.0/(1.0*ncolors-1))[:3] + for i,cs in enumerate(np.unique(y)): + plot(X[(y==cs).ravel(),0], X[(y==cs).ravel(),1], 'o', markeredgecolor='k', markerfacecolor=colors[i],markersize=6, zorder=2) + for i,cr in enumerate(np.unique(cls)): + plot(X[(cls==cr).ravel(),0], X[(cls==cr).ravel(),1], 'o', markersize=12, markeredgecolor=colors[i], markerfacecolor='None', markeredgewidth=3, zorder=1) + if centroids!='None': + for cd in range(centroids.shape[0]): + plot(centroids[cd,0], centroids[cd,1], '*', markersize=22, markeredgecolor='k', markerfacecolor=colors[cd], markeredgewidth=2, zorder=3) + # plot cluster shapes: + if covars!='None': + for cd in range(centroids.shape[0]): + x1, x2 = gauss_2d(centroids[cd],covars[cd,:,:]) + plot(x1,x2,'-', color=colors[cd], linewidth=3, zorder=5) + hold(False) + + # create legend + legend_items = np.unique(y).tolist()+np.unique(cls).tolist()+np.unique(cls).tolist() + for i in range(len(legend_items)): + if i