aboutsummaryrefslogtreecommitdiff
path: root/Assignment 6/ex61.py
diff options
context:
space:
mode:
authorCamil Staps2016-01-08 23:15:14 +0100
committerCamil Staps2016-01-08 23:15:14 +0100
commit087f0526345ed45593295fdafcaeed496a621c68 (patch)
tree7804cac6319b4cc9e2037717ec6773bb3e464791 /Assignment 6/ex61.py
parentFix assignment 3.2 (diff)
Assignment 6
Diffstat (limited to 'Assignment 6/ex61.py')
-rw-r--r--Assignment 6/ex61.py119
1 files changed, 119 insertions, 0 deletions
diff --git a/Assignment 6/ex61.py b/Assignment 6/ex61.py
new file mode 100644
index 0000000..89a152c
--- /dev/null
+++ b/Assignment 6/ex61.py
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jan 8 13:15:01 2016
+
+@author: camil
+"""
+
+from random import shuffle
+
+import numpy as np
+
+import scipy.io as sciio
+from scipy.spatial.distance import cosine, correlation
+
+import matplotlib.pyplot as plt
+
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn import cross_validation
+from sklearn import datasets
+
+def ex611(sets=range(1,5)):
+ fs = 15
+ for n in sets:
+ synth = sciio.loadmat('./data/synth' + str(n) + '.mat')
+ X = synth['X']
+ y = synth['y']
+
+ plt.figure(figsize=(18,3))
+ plt.subplot(1, 5, 1)
+ plt.scatter(X[:,1], X[:,0], c=y, s=50, alpha=0.5, marker='s')
+ plt.title('synth' + str(n), fontsize=fs)
+
+ measures = ['euclidean', 'manhattan', cosine, correlation]
+ measure_names = ['euclidean', 'manhattan', 'cosine', 'correlation']
+ ns = np.arange(1, 41, 1)
+
+ combined = zip(X, y)
+ shuffle(combined)
+ X[:], y[:] = zip(*combined)
+
+ cv = cross_validation.KFold(len(X), n_folds=10)
+
+ for measure, measure_name, d in zip(measures, measure_names, range(len(measures))):
+ accuracies = []
+ for i in ns:
+ acc = []
+ for train, test in cv:
+ X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test]
+
+ clf = KNeighborsClassifier(n_neighbors=i, metric=measure)
+ clf.fit(X_train, y_train.ravel())
+ acc.append(clf.score(X_test, y_test.ravel()))
+ accuracies.append(np.mean(acc))
+ errors = np.repeat(1, len(accuracies)) - accuracies
+
+ plt.subplot(1, 5, 2+d)
+ plt.plot(ns, accuracies, c='blue')
+ plt.plot(ns, errors, c='red')
+ plt.xlabel('N neighbors')
+ plt.ylabel('Accuracy / Error rate')
+ plt.ylim([-0.05, 1.05])
+ plt.title(measure_name, fontsize=fs)
+
+ plt.tight_layout()
+ plt.show()
+
+def ex612():
+ iris = datasets.load_iris()
+ X, y = iris.data, iris.target
+
+ ns, scores = range(1,41), []
+ for n in ns:
+ cv = cross_validation.LeaveOneOut(len(X))
+ scores_ = []
+ for train, test in cv:
+ X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test]
+
+ clf = KNeighborsClassifier(n_neighbors=n)
+ clf.fit(X_train, y_train)
+ scores_.append(1-clf.score(X_test, y_test))
+ scores.append(np.mean(scores_))
+
+ plt.plot(ns, scores, color='red')
+ plt.ylim([min(scores) - (max(scores)-min(scores))*0.1,
+ max(scores) + (max(scores)-min(scores))*0.1])
+ plt.title('Classifying the Iris data set with KNN', fontsize=18)
+ plt.xlabel('N neighbors')
+ plt.ylabel('Error rate')
+ plt.show()
+
+def ex613():
+ wine = sciio.loadmat('./data/wine.mat')
+ #classNames = [str(n[0][0]) for n in wine['classNames']]
+ X = wine['X'][:,:-1]
+ y = np.array(wine['X'][:,-1], dtype='f')
+
+ ns, scores = range(1,41,1), []
+ for n in ns:
+ clf = KNeighborsClassifier(n_neighbors=n)
+ clf.fit(X, np.repeat(0, len(X)))
+ scores_ = []
+ for r, p, i in zip(X, y, range(len(X))):
+ ind = clf.kneighbors([r], n_neighbors=n+1, return_distance=False)
+ prediction = np.mean(y[[j for j in ind[0] if j != i]])
+ scores_.append((prediction - p) ** 2)
+ scores.append(np.mean(scores_))
+
+ plt.plot(ns, scores, color='red')
+ plt.ylim([min(scores) - (max(scores)-min(scores))*0.1,
+ max(scores) + (max(scores)-min(scores))*0.1])
+ plt.title('Predicting alcohol percentage with KNN', fontsize=18)
+ plt.xlabel('N neighbors')
+ plt.ylabel('Average error')
+ plt.show()
+
+if __name__ == '__main__':
+ #ex611()
+ #ex612()
+ ex613()