aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCamil Staps2015-09-21 15:17:40 +0200
committerCamil Staps2015-09-21 15:17:40 +0200
commitdc77e1c88e7973ef2b7ec3afa2d22ec830a82dbe (patch)
tree356b2db45f40c843488e6d8706f4c02455b87ea1
parentAssignment 1, 1.1 & 1.2 start (diff)
Assignment 1 continuing, only 1.2.2c-e to be done
-rw-r--r--.gitignore1
-rw-r--r--Assignment 1/Data/wildfaces_grayscale.matbin0 -> 12058603 bytes
-rw-r--r--Assignment 1/ex12.py11
-rw-r--r--Assignment 1/ex13.py88
-rw-r--r--Assignment 1/similarity.py72
5 files changed, 172 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index 612ebeb..a1e4aad 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
*.spyderproject
*.spyderworkspace
+*.pyc
diff --git a/Assignment 1/Data/wildfaces_grayscale.mat b/Assignment 1/Data/wildfaces_grayscale.mat
new file mode 100644
index 0000000..bcab41e
--- /dev/null
+++ b/Assignment 1/Data/wildfaces_grayscale.mat
Binary files differ
diff --git a/Assignment 1/ex12.py b/Assignment 1/ex12.py
index 3db49e1..5c467dc 100644
--- a/Assignment 1/ex12.py
+++ b/Assignment 1/ex12.py
@@ -47,3 +47,14 @@ handles = [pltpatches.Patch(label=k, color=v) for k, v in colors.iteritems()] +
ax.legend(handles=handles, numpoints=1, loc=2)
plt.show()
+
+# 1.2.2 a
+# PCA is a method that can be used to reduce dimensionality of a dataset. It
+# can be used when some variables are correlated; we then basically rewrite one
+# of them as a function of the other. Of course, in general that implies data
+# loss.
+
+# 1.2.2 b
+# EVD is a way to rewrite a diagonalizable matrix into a canonical form (a
+# summation of products of eigenvalues and corresponding eigenvectors). SVD is
+# a generalisation which can be applied to any matrix.
diff --git a/Assignment 1/ex13.py b/Assignment 1/ex13.py
new file mode 100644
index 0000000..b91d885
--- /dev/null
+++ b/Assignment 1/ex13.py
@@ -0,0 +1,88 @@
+# exercise 3.2.1
+
+from __future__ import print_function
+from pylab import *
+from scipy.io import loadmat
+from similarity import similarity
+
+# Image to use as query
+i = 635
+
+# Similarity: 'SMC', 'Jaccard', 'ExtendedJaccard', 'Cosine', 'Correlation'
+similarity_measure = 'jaccard'
+
+# Load the CBCL face database
+# Load Matlab data file to python dict structure
+X = loadmat('./Data/wildfaces_grayscale.mat')['X']
+N, M = shape(X)
+
+
+# Search the face database for similar faces
+# Index of all other images than i
+noti = range(0,i) + range(i+1,N)
+# Compute similarity between image i and all others
+sim = similarity(X[i,:], X[noti,:], similarity_measure)
+sim = sim.tolist()[0]
+# Tuples of sorted similarities and their indices
+sim_to_index = sorted(zip(sim,noti))
+
+
+# Visualize query image and 5 most/least similar images
+figure(figsize=(12,8))
+subplot(3,1,1)
+imshow(np.reshape(X[i],(40,40)).T, cmap=cm.gray)
+xticks([]); yticks([])
+title('Query image')
+ylabel('image #{0}'.format(i))
+
+
+for ms in range(5):
+
+ # 5 most similar images found
+ subplot(3,5,6+ms)
+ im_id = sim_to_index[-ms-1][1]
+ im_sim = sim_to_index[-ms-1][0]
+ imshow(np.reshape(X[im_id],(40,40)).T, cmap=cm.gray)
+ xlabel('sim={0:.3f}'.format(im_sim))
+ ylabel('image #{0}'.format(im_id))
+ xticks([]); yticks([])
+ if ms==2: title('Most similar images')
+
+ # 5 least similar images found
+ subplot(3,5,11+ms)
+ im_id = sim_to_index[ms][1]
+ im_sim = sim_to_index[ms][0]
+ imshow(np.reshape(X[im_id],(40,40)).T, cmap=cm.gray)
+ xlabel('sim={0:.3f}'.format(im_sim))
+ ylabel('image #{0}'.format(im_id))
+ xticks([]); yticks([])
+ if ms==2: title('Least similar images')
+
+show()
+
+# 1.3.1
+# For any two similarity measures, the five least similar are quite different.
+# Based on the five most similar images, SMC and Jaccard produce similar
+# results. Correlation and Cosine produce some similar results.
+# Using image 2 it is clear that SMC and ExtendedJaccard are sensitive to
+# lighting conditions, and thus maybe not a very good choice to compare faces.
+# Also Correlation seems a little sensitive to this. Lastly Cosine seems to
+# recognise faces somewhat better than Jaccard (take e.g. #635).
+
+# 1.3.2
+measures = {'Cosine': 'cos', 'ExtJac': 'ext', 'Correl': 'cor'}
+scalar = 0.5 # Note: pick from (0,1), values > 1 aren't handled nicely
+translation = 0.1 # Note: pick a small value, for similar reasons
+
+# Round list to resist numerical variances (hint #2)
+X = np.around(X, decimals = 4)
+
+for name, measure in measures.iteritems():
+ sim1 = similarity(scalar * X[i,:], X[noti,:], measure)
+ sim2 = similarity(X[i,:], X[noti,:], measure)
+ print("Scalar,", name, ":", (sim1 == sim2).all())
+
+for name, measure in measures.iteritems():
+ sim1 = similarity(translation + X[i,:], X[noti,:], measure)
+ sim2 = similarity(X[i,:], X[noti,:], measure)
+ print("Translation,", name, ":", (sim1 == sim2).any())
diff --git a/Assignment 1/similarity.py b/Assignment 1/similarity.py
new file mode 100644
index 0000000..4a49317
--- /dev/null
+++ b/Assignment 1/similarity.py
@@ -0,0 +1,72 @@
+import numpy as np
+from scipy.stats import zscore
+
+
+def similarity(X, Y, method):
+ '''
+ SIMILARITY Computes similarity matrices
+
+ Usage:
+ sim = similarity(X, Y, method)
+
+ Input:
+ X N1 x M matrix
+ Y N2 x M matrix
+ method string defining one of the following similarity measure
+ 'SMC', 'smc' : Simple Matching Coefficient
+ 'Jaccard', 'jac' : Jaccard coefficient
+ 'ExtendedJaccard', 'ext' : The Extended Jaccard coefficient
+ 'Cosine', 'cos' : Cosine Similarity
+ 'Correlation', 'cor' : Correlation coefficient
+
+ Output:
+ sim Estimated similarity matrix between X and Y
+ If input is not binary, SMC and Jaccard will make each
+ attribute binary according to x>median(x)
+
+ Copyright, Morten Morup and Mikkel N. Schmidt
+ Technical University of Denmark '''
+
+ X = np.mat(X)
+ Y = np.mat(Y)
+ N1, M = np.shape(X)
+ N2, M = np.shape(Y)
+
+ method = method[:3].lower()
+ if method=='smc': # SMC
+ X,Y = binarize(X,Y);
+ sim = ((X*Y.T)+((1-X)*(1-Y).T))/M
+ elif method=='jac': # Jaccard
+ X,Y = binarize(X,Y);
+ sim = (X*Y.T)/(M-(1-X)*(1-Y).T)
+ elif method=='ext': # Extended Jaccard
+ XYt = X*Y.T
+ sim = XYt / (np.log( np.exp(sum(np.power(X.T,2))).T * np.exp(sum(np.power(Y.T,2))) ) - XYt)
+ elif method=='cos': # Cosine
+ sim = (X*Y.T)/(np.sqrt(sum(np.power(X.T,2))).T * np.sqrt(sum(np.power(Y.T,2))))
+ elif method=='cor': # Correlation
+ X_ = zscore(X,axis=1,ddof=1)
+ Y_ = zscore(Y,axis=1,ddof=1)
+ sim = (X_*Y_.T)/(M-1)
+ return sim
+
+def binarize(X,Y=None):
+ ''' Force binary representation of the matrix, according to X>median(X) '''
+ if Y==None:
+ X = np.matrix(X)
+ Xmedians = np.ones((np.shape(X)[0],1)) * np.median(X,0)
+ Xflags = X>Xmedians
+ X[Xflags] = 1; X[~Xflags] = 0
+ return X
+ else:
+ X = np.matrix(X); Y = np.matrix(Y);
+ XYmedian= np.median(np.bmat('X; Y'),0)
+ Xmedians = np.ones((np.shape(X)[0],1)) * XYmedian
+ Xflags = X>Xmedians
+ X[Xflags] = 1; X[~Xflags] = 0
+ Ymedians = np.ones((np.shape(Y)[0],1)) * XYmedian
+ Yflags = Y>Ymedians
+ Y[Yflags] = 1; Y[~Yflags] = 0
+ return [X,Y]
+
+