From dc77e1c88e7973ef2b7ec3afa2d22ec830a82dbe Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Mon, 21 Sep 2015 15:17:40 +0200 Subject: Assignment 1 continuing, only 1.2.2c-e to be done --- .gitignore | 1 + Assignment 1/Data/wildfaces_grayscale.mat | Bin 0 -> 12058603 bytes Assignment 1/ex12.py | 11 ++++ Assignment 1/ex13.py | 88 ++++++++++++++++++++++++++++++ Assignment 1/similarity.py | 72 ++++++++++++++++++++++++ 5 files changed, 172 insertions(+) create mode 100644 Assignment 1/Data/wildfaces_grayscale.mat create mode 100644 Assignment 1/ex13.py create mode 100644 Assignment 1/similarity.py diff --git a/.gitignore b/.gitignore index 612ebeb..a1e4aad 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.spyderproject *.spyderworkspace +*.pyc diff --git a/Assignment 1/Data/wildfaces_grayscale.mat b/Assignment 1/Data/wildfaces_grayscale.mat new file mode 100644 index 0000000..bcab41e Binary files /dev/null and b/Assignment 1/Data/wildfaces_grayscale.mat differ diff --git a/Assignment 1/ex12.py b/Assignment 1/ex12.py index 3db49e1..5c467dc 100644 --- a/Assignment 1/ex12.py +++ b/Assignment 1/ex12.py @@ -47,3 +47,14 @@ handles = [pltpatches.Patch(label=k, color=v) for k, v in colors.iteritems()] + ax.legend(handles=handles, numpoints=1, loc=2) plt.show() + +# 1.2.2 a +# PCA is a method that can be used to reduce dimensionality of a dataset. It +# can be used when some variables are correlated; we then basically rewrite one +# of them as a function of the other. Of course, in general that implies data +# loss. + +# 1.2.2 b +# EVD is a way to rewrite a diagonalizable matrix into a canonical form (a +# summation of products of eigenvalues and corresponding eigenvectors). SVD is +# a generalisation which can be applied to any matrix. diff --git a/Assignment 1/ex13.py b/Assignment 1/ex13.py new file mode 100644 index 0000000..b91d885 --- /dev/null +++ b/Assignment 1/ex13.py @@ -0,0 +1,88 @@ +# exercise 3.2.1 + +from __future__ import print_function +from pylab import * +from scipy.io import loadmat +from similarity import similarity + +# Image to use as query +i = 635 + +# Similarity: 'SMC', 'Jaccard', 'ExtendedJaccard', 'Cosine', 'Correlation' +similarity_measure = 'jaccard' + +# Load the CBCL face database +# Load Matlab data file to python dict structure +X = loadmat('./Data/wildfaces_grayscale.mat')['X'] +N, M = shape(X) + + +# Search the face database for similar faces +# Index of all other images than i +noti = range(0,i) + range(i+1,N) +# Compute similarity between image i and all others +sim = similarity(X[i,:], X[noti,:], similarity_measure) +sim = sim.tolist()[0] +# Tuples of sorted similarities and their indices +sim_to_index = sorted(zip(sim,noti)) + + +# Visualize query image and 5 most/least similar images +figure(figsize=(12,8)) +subplot(3,1,1) +imshow(np.reshape(X[i],(40,40)).T, cmap=cm.gray) +xticks([]); yticks([]) +title('Query image') +ylabel('image #{0}'.format(i)) + + +for ms in range(5): + + # 5 most similar images found + subplot(3,5,6+ms) + im_id = sim_to_index[-ms-1][1] + im_sim = sim_to_index[-ms-1][0] + imshow(np.reshape(X[im_id],(40,40)).T, cmap=cm.gray) + xlabel('sim={0:.3f}'.format(im_sim)) + ylabel('image #{0}'.format(im_id)) + xticks([]); yticks([]) + if ms==2: title('Most similar images') + + # 5 least similar images found + subplot(3,5,11+ms) + im_id = sim_to_index[ms][1] + im_sim = sim_to_index[ms][0] + imshow(np.reshape(X[im_id],(40,40)).T, cmap=cm.gray) + xlabel('sim={0:.3f}'.format(im_sim)) + ylabel('image #{0}'.format(im_id)) + xticks([]); yticks([]) + if ms==2: title('Least similar images') + +show() + +# 1.3.1 +# For any two similarity measures, the five least similar are quite different. +# Based on the five most similar images, SMC and Jaccard produce similar +# results. Correlation and Cosine produce some similar results. +# Using image 2 it is clear that SMC and ExtendedJaccard are sensitive to +# lighting conditions, and thus maybe not a very good choice to compare faces. +# Also Correlation seems a little sensitive to this. Lastly Cosine seems to +# recognise faces somewhat better than Jaccard (take e.g. #635). + +# 1.3.2 +measures = {'Cosine': 'cos', 'ExtJac': 'ext', 'Correl': 'cor'} +scalar = 0.5 # Note: pick from (0,1), values > 1 aren't handled nicely +translation = 0.1 # Note: pick a small value, for similar reasons + +# Round list to resist numerical variances (hint #2) +X = np.around(X, decimals = 4) + +for name, measure in measures.iteritems(): + sim1 = similarity(scalar * X[i,:], X[noti,:], measure) + sim2 = similarity(X[i,:], X[noti,:], measure) + print("Scalar,", name, ":", (sim1 == sim2).all()) + +for name, measure in measures.iteritems(): + sim1 = similarity(translation + X[i,:], X[noti,:], measure) + sim2 = similarity(X[i,:], X[noti,:], measure) + print("Translation,", name, ":", (sim1 == sim2).any()) diff --git a/Assignment 1/similarity.py b/Assignment 1/similarity.py new file mode 100644 index 0000000..4a49317 --- /dev/null +++ b/Assignment 1/similarity.py @@ -0,0 +1,72 @@ +import numpy as np +from scipy.stats import zscore + + +def similarity(X, Y, method): + ''' + SIMILARITY Computes similarity matrices + + Usage: + sim = similarity(X, Y, method) + + Input: + X N1 x M matrix + Y N2 x M matrix + method string defining one of the following similarity measure + 'SMC', 'smc' : Simple Matching Coefficient + 'Jaccard', 'jac' : Jaccard coefficient + 'ExtendedJaccard', 'ext' : The Extended Jaccard coefficient + 'Cosine', 'cos' : Cosine Similarity + 'Correlation', 'cor' : Correlation coefficient + + Output: + sim Estimated similarity matrix between X and Y + If input is not binary, SMC and Jaccard will make each + attribute binary according to x>median(x) + + Copyright, Morten Morup and Mikkel N. Schmidt + Technical University of Denmark ''' + + X = np.mat(X) + Y = np.mat(Y) + N1, M = np.shape(X) + N2, M = np.shape(Y) + + method = method[:3].lower() + if method=='smc': # SMC + X,Y = binarize(X,Y); + sim = ((X*Y.T)+((1-X)*(1-Y).T))/M + elif method=='jac': # Jaccard + X,Y = binarize(X,Y); + sim = (X*Y.T)/(M-(1-X)*(1-Y).T) + elif method=='ext': # Extended Jaccard + XYt = X*Y.T + sim = XYt / (np.log( np.exp(sum(np.power(X.T,2))).T * np.exp(sum(np.power(Y.T,2))) ) - XYt) + elif method=='cos': # Cosine + sim = (X*Y.T)/(np.sqrt(sum(np.power(X.T,2))).T * np.sqrt(sum(np.power(Y.T,2)))) + elif method=='cor': # Correlation + X_ = zscore(X,axis=1,ddof=1) + Y_ = zscore(Y,axis=1,ddof=1) + sim = (X_*Y_.T)/(M-1) + return sim + +def binarize(X,Y=None): + ''' Force binary representation of the matrix, according to X>median(X) ''' + if Y==None: + X = np.matrix(X) + Xmedians = np.ones((np.shape(X)[0],1)) * np.median(X,0) + Xflags = X>Xmedians + X[Xflags] = 1; X[~Xflags] = 0 + return X + else: + X = np.matrix(X); Y = np.matrix(Y); + XYmedian= np.median(np.bmat('X; Y'),0) + Xmedians = np.ones((np.shape(X)[0],1)) * XYmedian + Xflags = X>Xmedians + X[Xflags] = 1; X[~Xflags] = 0 + Ymedians = np.ones((np.shape(Y)[0],1)) * XYmedian + Yflags = Y>Ymedians + Y[Yflags] = 1; Y[~Yflags] = 0 + return [X,Y] + + -- cgit v1.2.3