From dc77e1c88e7973ef2b7ec3afa2d22ec830a82dbe Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Mon, 21 Sep 2015 15:17:40 +0200 Subject: Assignment 1 continuing, only 1.2.2c-e to be done --- Assignment 1/similarity.py | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 Assignment 1/similarity.py (limited to 'Assignment 1/similarity.py') diff --git a/Assignment 1/similarity.py b/Assignment 1/similarity.py new file mode 100644 index 0000000..4a49317 --- /dev/null +++ b/Assignment 1/similarity.py @@ -0,0 +1,72 @@ +import numpy as np +from scipy.stats import zscore + + +def similarity(X, Y, method): + ''' + SIMILARITY Computes similarity matrices + + Usage: + sim = similarity(X, Y, method) + + Input: + X N1 x M matrix + Y N2 x M matrix + method string defining one of the following similarity measure + 'SMC', 'smc' : Simple Matching Coefficient + 'Jaccard', 'jac' : Jaccard coefficient + 'ExtendedJaccard', 'ext' : The Extended Jaccard coefficient + 'Cosine', 'cos' : Cosine Similarity + 'Correlation', 'cor' : Correlation coefficient + + Output: + sim Estimated similarity matrix between X and Y + If input is not binary, SMC and Jaccard will make each + attribute binary according to x>median(x) + + Copyright, Morten Morup and Mikkel N. Schmidt + Technical University of Denmark ''' + + X = np.mat(X) + Y = np.mat(Y) + N1, M = np.shape(X) + N2, M = np.shape(Y) + + method = method[:3].lower() + if method=='smc': # SMC + X,Y = binarize(X,Y); + sim = ((X*Y.T)+((1-X)*(1-Y).T))/M + elif method=='jac': # Jaccard + X,Y = binarize(X,Y); + sim = (X*Y.T)/(M-(1-X)*(1-Y).T) + elif method=='ext': # Extended Jaccard + XYt = X*Y.T + sim = XYt / (np.log( np.exp(sum(np.power(X.T,2))).T * np.exp(sum(np.power(Y.T,2))) ) - XYt) + elif method=='cos': # Cosine + sim = (X*Y.T)/(np.sqrt(sum(np.power(X.T,2))).T * np.sqrt(sum(np.power(Y.T,2)))) + elif method=='cor': # Correlation + X_ = zscore(X,axis=1,ddof=1) + Y_ = zscore(Y,axis=1,ddof=1) + sim = (X_*Y_.T)/(M-1) + return sim + +def binarize(X,Y=None): + ''' Force binary representation of the matrix, according to X>median(X) ''' + if Y==None: + X = np.matrix(X) + Xmedians = np.ones((np.shape(X)[0],1)) * np.median(X,0) + Xflags = X>Xmedians + X[Xflags] = 1; X[~Xflags] = 0 + return X + else: + X = np.matrix(X); Y = np.matrix(Y); + XYmedian= np.median(np.bmat('X; Y'),0) + Xmedians = np.ones((np.shape(X)[0],1)) * XYmedian + Xflags = X>Xmedians + X[Xflags] = 1; X[~Xflags] = 0 + Ymedians = np.ones((np.shape(Y)[0],1)) * XYmedian + Yflags = Y>Ymedians + Y[Yflags] = 1; Y[~Yflags] = 0 + return [X,Y] + + -- cgit v1.2.3