diff options
author | Camil Staps | 2016-01-08 23:15:14 +0100 |
---|---|---|
committer | Camil Staps | 2016-01-08 23:15:14 +0100 |
commit | 087f0526345ed45593295fdafcaeed496a621c68 (patch) | |
tree | 7804cac6319b4cc9e2037717ec6773bb3e464791 /Assignment 6/ex61.py | |
parent | Fix assignment 3.2 (diff) |
Assignment 6
Diffstat (limited to 'Assignment 6/ex61.py')
-rw-r--r-- | Assignment 6/ex61.py | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/Assignment 6/ex61.py b/Assignment 6/ex61.py new file mode 100644 index 0000000..89a152c --- /dev/null +++ b/Assignment 6/ex61.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Jan 8 13:15:01 2016 + +@author: camil +""" + +from random import shuffle + +import numpy as np + +import scipy.io as sciio +from scipy.spatial.distance import cosine, correlation + +import matplotlib.pyplot as plt + +from sklearn.neighbors import KNeighborsClassifier +from sklearn import cross_validation +from sklearn import datasets + +def ex611(sets=range(1,5)): + fs = 15 + for n in sets: + synth = sciio.loadmat('./data/synth' + str(n) + '.mat') + X = synth['X'] + y = synth['y'] + + plt.figure(figsize=(18,3)) + plt.subplot(1, 5, 1) + plt.scatter(X[:,1], X[:,0], c=y, s=50, alpha=0.5, marker='s') + plt.title('synth' + str(n), fontsize=fs) + + measures = ['euclidean', 'manhattan', cosine, correlation] + measure_names = ['euclidean', 'manhattan', 'cosine', 'correlation'] + ns = np.arange(1, 41, 1) + + combined = zip(X, y) + shuffle(combined) + X[:], y[:] = zip(*combined) + + cv = cross_validation.KFold(len(X), n_folds=10) + + for measure, measure_name, d in zip(measures, measure_names, range(len(measures))): + accuracies = [] + for i in ns: + acc = [] + for train, test in cv: + X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test] + + clf = KNeighborsClassifier(n_neighbors=i, metric=measure) + clf.fit(X_train, y_train.ravel()) + acc.append(clf.score(X_test, y_test.ravel())) + accuracies.append(np.mean(acc)) + errors = np.repeat(1, len(accuracies)) - accuracies + + plt.subplot(1, 5, 2+d) + plt.plot(ns, accuracies, c='blue') + plt.plot(ns, errors, c='red') + plt.xlabel('N neighbors') + plt.ylabel('Accuracy / Error rate') + plt.ylim([-0.05, 1.05]) + plt.title(measure_name, fontsize=fs) + + plt.tight_layout() + plt.show() + +def ex612(): + iris = datasets.load_iris() + X, y = iris.data, iris.target + + ns, scores = range(1,41), [] + for n in ns: + cv = cross_validation.LeaveOneOut(len(X)) + scores_ = [] + for train, test in cv: + X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test] + + clf = KNeighborsClassifier(n_neighbors=n) + clf.fit(X_train, y_train) + scores_.append(1-clf.score(X_test, y_test)) + scores.append(np.mean(scores_)) + + plt.plot(ns, scores, color='red') + plt.ylim([min(scores) - (max(scores)-min(scores))*0.1, + max(scores) + (max(scores)-min(scores))*0.1]) + plt.title('Classifying the Iris data set with KNN', fontsize=18) + plt.xlabel('N neighbors') + plt.ylabel('Error rate') + plt.show() + +def ex613(): + wine = sciio.loadmat('./data/wine.mat') + #classNames = [str(n[0][0]) for n in wine['classNames']] + X = wine['X'][:,:-1] + y = np.array(wine['X'][:,-1], dtype='f') + + ns, scores = range(1,41,1), [] + for n in ns: + clf = KNeighborsClassifier(n_neighbors=n) + clf.fit(X, np.repeat(0, len(X))) + scores_ = [] + for r, p, i in zip(X, y, range(len(X))): + ind = clf.kneighbors([r], n_neighbors=n+1, return_distance=False) + prediction = np.mean(y[[j for j in ind[0] if j != i]]) + scores_.append((prediction - p) ** 2) + scores.append(np.mean(scores_)) + + plt.plot(ns, scores, color='red') + plt.ylim([min(scores) - (max(scores)-min(scores))*0.1, + max(scores) + (max(scores)-min(scores))*0.1]) + plt.title('Predicting alcohol percentage with KNN', fontsize=18) + plt.xlabel('N neighbors') + plt.ylabel('Average error') + plt.show() + +if __name__ == '__main__': + #ex611() + #ex612() + ex613() |