Assignment 1 continuing, only 1.2.2c-e to be done

author: Camil Staps 2015-09-21 15:17:40 +0200
committer: Camil Staps 2015-09-21 15:17:40 +0200
commit: dc77e1c88e7973ef2b7ec3afa2d22ec830a82dbe (patch)
tree: 356b2db45f40c843488e6d8706f4c02455b87ea1
parent: Assignment 1, 1.1 & 1.2 start (diff)
5 files changed, 172 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index 612ebeb..a1e4aad 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 *.spyderproject
 *.spyderworkspace
+*.pyc
 
diff --git a/Assignment 1/Data/wildfaces_grayscale.mat b/Assignment 1/Data/wildfaces_grayscale.mat
new file mode 100644
index 0000000..bcab41e
--- /dev/null
+++ b/Assignment 1/Data/wildfaces_grayscale.mat
diff --git a/Assignment 1/ex12.py b/Assignment 1/ex12.py
index 3db49e1..5c467dc 100644
--- a/Assignment 1/ex12.py
+++ b/Assignment 1/ex12.py
@@ -47,3 +47,14 @@ handles = [pltpatches.Patch(label=k, color=v) for k, v in colors.iteritems()] +
 ax.legend(handles=handles, numpoints=1, loc=2)
 
 plt.show()
+
+# 1.2.2 a
+# PCA is a method that can be used to reduce dimensionality of a dataset. It 
+# can be used when some variables are correlated; we then basically rewrite one
+# of them as a function of the other. Of course, in general that implies data
+# loss.
+
+# 1.2.2 b
+# EVD is a way to rewrite a diagonalizable matrix into a canonical form (a
+# summation of products of eigenvalues and corresponding eigenvectors). SVD is 
+# a generalisation which can be applied to any matrix.
diff --git a/Assignment 1/ex13.py b/Assignment 1/ex13.py
new file mode 100644
index 0000000..b91d885
--- /dev/null
+++ b/Assignment 1/ex13.py
@@ -0,0 +1,88 @@
+# exercise 3.2.1
+
+from __future__ import print_function
+from pylab import *
+from scipy.io import loadmat
+from similarity import similarity
+
+# Image to use as query
+i = 635
+
+# Similarity: 'SMC', 'Jaccard', 'ExtendedJaccard', 'Cosine', 'Correlation' 
+similarity_measure = 'jaccard'
+
+# Load the CBCL face database
+# Load Matlab data file to python dict structure
+X = loadmat('./Data/wildfaces_grayscale.mat')['X']
+N, M = shape(X)
+
+
+# Search the face database for similar faces
+# Index of all other images than i
+noti = range(0,i) + range(i+1,N) 
+# Compute similarity between image i and all others
+sim = similarity(X[i,:], X[noti,:], similarity_measure)
+sim = sim.tolist()[0]
+# Tuples of sorted similarities and their indices
+sim_to_index = sorted(zip(sim,noti))
+
+
+# Visualize query image and 5 most/least similar images
+figure(figsize=(12,8))
+subplot(3,1,1)
+imshow(np.reshape(X[i],(40,40)).T, cmap=cm.gray)
+xticks([]); yticks([])
+title('Query image')
+ylabel('image #{0}'.format(i))
+
+
+for ms in range(5):
+
+    # 5 most similar images found
+    subplot(3,5,6+ms)
+    im_id = sim_to_index[-ms-1][1]
+    im_sim = sim_to_index[-ms-1][0]
+    imshow(np.reshape(X[im_id],(40,40)).T, cmap=cm.gray)
+    xlabel('sim={0:.3f}'.format(im_sim))
+    ylabel('image #{0}'.format(im_id))
+    xticks([]); yticks([])
+    if ms==2: title('Most similar images')
+
+    # 5 least similar images found
+    subplot(3,5,11+ms)
+    im_id = sim_to_index[ms][1]
+    im_sim = sim_to_index[ms][0]
+    imshow(np.reshape(X[im_id],(40,40)).T, cmap=cm.gray)
+    xlabel('sim={0:.3f}'.format(im_sim))
+    ylabel('image #{0}'.format(im_id))
+    xticks([]); yticks([])
+    if ms==2: title('Least similar images')
+    
+show()
+
+# 1.3.1
+# For any two similarity measures, the five least similar are quite different.
+# Based on the five most similar images, SMC and Jaccard produce similar 
+# results. Correlation and Cosine produce some similar results.
+# Using image 2 it is clear that SMC and ExtendedJaccard are sensitive to 
+# lighting conditions, and thus maybe not a very good choice to compare faces.
+# Also Correlation seems a little sensitive to this. Lastly Cosine seems to 
+# recognise faces somewhat better than Jaccard (take e.g. #635).
+
+# 1.3.2
+measures = {'Cosine': 'cos', 'ExtJac': 'ext', 'Correl': 'cor'}
+scalar = 0.5        # Note: pick from (0,1), values > 1 aren't handled nicely
+translation = 0.1   # Note: pick a small value, for similar reasons
+
+# Round list to resist numerical variances (hint #2)
+X = np.around(X, decimals = 4)
+
+for name, measure in measures.iteritems():
+    sim1 = similarity(scalar * X[i,:], X[noti,:], measure)
+    sim2 = similarity(X[i,:], X[noti,:], measure)
+    print("Scalar,", name, ":", (sim1 == sim2).all())
+
+for name, measure in measures.iteritems():    
+    sim1 = similarity(translation + X[i,:], X[noti,:], measure)
+    sim2 = similarity(X[i,:], X[noti,:], measure)
+    print("Translation,", name, ":", (sim1 == sim2).any())
diff --git a/Assignment 1/similarity.py b/Assignment 1/similarity.py
new file mode 100644
index 0000000..4a49317
--- /dev/null
+++ b/Assignment 1/similarity.py
@@ -0,0 +1,72 @@
+import numpy as np
+from scipy.stats import zscore
+
+
+def similarity(X, Y, method):
+    '''
+    SIMILARITY Computes similarity matrices
+
+    Usage:
+        sim = similarity(X, Y, method)
+
+    Input:
+    X   N1 x M matrix
+    Y   N2 x M matrix 
+    method   string defining one of the following similarity measure
+           'SMC', 'smc'             : Simple Matching Coefficient
+           'Jaccard', 'jac'         : Jaccard coefficient 
+           'ExtendedJaccard', 'ext' : The Extended Jaccard coefficient
+           'Cosine', 'cos'          : Cosine Similarity
+           'Correlation', 'cor'     : Correlation coefficient
+
+    Output:
+    sim Estimated similarity matrix between X and Y
+        If input is not binary, SMC and Jaccard will make each
+        attribute binary according to x>median(x)
+
+    Copyright, Morten Morup and Mikkel N. Schmidt
+    Technical University of Denmark '''
+
+    X = np.mat(X)
+    Y = np.mat(Y)
+    N1, M = np.shape(X)
+    N2, M = np.shape(Y)
+    
+    method = method[:3].lower()
+    if method=='smc': # SMC
+        X,Y = binarize(X,Y);
+        sim = ((X*Y.T)+((1-X)*(1-Y).T))/M
+    elif method=='jac': # Jaccard
+        X,Y = binarize(X,Y);
+        sim = (X*Y.T)/(M-(1-X)*(1-Y).T)        
+    elif method=='ext': # Extended Jaccard
+        XYt = X*Y.T
+        sim = XYt / (np.log( np.exp(sum(np.power(X.T,2))).T * np.exp(sum(np.power(Y.T,2))) ) - XYt)
+    elif method=='cos': # Cosine
+        sim = (X*Y.T)/(np.sqrt(sum(np.power(X.T,2))).T * np.sqrt(sum(np.power(Y.T,2))))
+    elif method=='cor': # Correlation
+        X_ = zscore(X,axis=1,ddof=1)
+        Y_ = zscore(Y,axis=1,ddof=1)
+        sim = (X_*Y_.T)/(M-1)
+    return sim
+        
+def binarize(X,Y=None):
+    ''' Force binary representation of the matrix, according to X>median(X) '''
+    if Y==None:
+        X = np.matrix(X)
+        Xmedians = np.ones((np.shape(X)[0],1)) * np.median(X,0)
+        Xflags = X>Xmedians
+        X[Xflags] = 1; X[~Xflags] = 0
+        return X
+    else:
+        X = np.matrix(X); Y = np.matrix(Y);
+        XYmedian= np.median(np.bmat('X; Y'),0)
+        Xmedians = np.ones((np.shape(X)[0],1)) * XYmedian
+        Xflags = X>Xmedians
+        X[Xflags] = 1; X[~Xflags] = 0
+        Ymedians = np.ones((np.shape(Y)[0],1)) * XYmedian
+        Yflags = Y>Ymedians
+        Y[Yflags] = 1; Y[~Yflags] = 0
+        return [X,Y]
+        
+
author	Camil Staps	2015-09-21 15:17:40 +0200
committer	Camil Staps	2015-09-21 15:17:40 +0200
commit	dc77e1c88e7973ef2b7ec3afa2d22ec830a82dbe (patch)
tree	356b2db45f40c843488e6d8706f4c02455b87ea1
parent	Assignment 1, 1.1 & 1.2 start (diff)