aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCamil Staps2015-09-27 15:26:00 +0200
committerCamil Staps2015-09-27 15:26:00 +0200
commite1ab964a3ddc6b2960a078594c4d40344b25c1c9 (patch)
treebc7e6e24c2f212d01576e0f37069c5c9dbe09fc6
parentAssignment 1 continuing, only 1.2.2c-e to be done (diff)
Finish assignment 1
-rw-r--r--Assignment 1/ex11.py4
-rw-r--r--Assignment 1/ex12.py55
-rw-r--r--Assignment 1/ex13.py9
3 files changed, 65 insertions, 3 deletions
diff --git a/Assignment 1/ex11.py b/Assignment 1/ex11.py
index b3c3683..238a3f9 100644
--- a/Assignment 1/ex11.py
+++ b/Assignment 1/ex11.py
@@ -2,7 +2,9 @@
"""
Created on Wed Sep 9 16:03:05 2015
-@author: camilstaps
+@author: Camil Staps, s4498062
+
+Use Python 2.*
"""
from __future__ import print_function
diff --git a/Assignment 1/ex12.py b/Assignment 1/ex12.py
index 5c467dc..3c38371 100644
--- a/Assignment 1/ex12.py
+++ b/Assignment 1/ex12.py
@@ -2,12 +2,15 @@
"""
Created on Fri Sep 11 13:12:03 2015
-@author: camilstaps
+@author: Camil Staps, s4498062
+
+Use Python 2.*
"""
from __future__ import print_function
import xlrd
import numpy as np
+import numpy.linalg as la
import matplotlib.lines as pltlines
import matplotlib.patches as pltpatches
import matplotlib.pyplot as plt
@@ -43,7 +46,8 @@ ax.scatter(xs, data.tolist()[1], s=60, c=graph_colors, alpha=0.4, marker='o')
line_a = pltlines.Line2D([], [], ls=' ', marker='s', label='A', c='w')
line_b = pltlines.Line2D([], [], ls=' ', marker='o', label='B', c='w')
-handles = [pltpatches.Patch(label=k, color=v) for k, v in colors.iteritems()] + [line_a, line_b]
+handles = [pltpatches.Patch(label=k, color=v) for k, v in
+ colors.iteritems()] + [line_a, line_b]
ax.legend(handles=handles, numpoints=1, loc=2)
plt.show()
@@ -58,3 +62,50 @@ plt.show()
# EVD is a way to rewrite a diagonalizable matrix into a canonical form (a
# summation of products of eigenvalues and corresponding eigenvectors). SVD is
# a generalisation which can be applied to any matrix.
+#
+# In SVD, we write A = U*S*V^T. The Us are eigenvectors of AA^T (which can be
+# found using EVD); the Vs are eigenvectors of A^TA.
+
+# 1.2.2 c
+# Subtract mean
+means = [np.mean(np.array(data.T)[i]) for i in range(0,8)]
+means_matrix = np.transpose([means for _ in range(0, len(np.array(data)[0]))])
+normalised_data = np.transpose(data - means_matrix)
+# Perform SVD
+U, S, V = la.svd(normalised_data)
+# Compute component variances
+squared_sum = sum([S[m_] ** 2 for m_ in range(1,len(S))])
+variance = [100 * S[m] ** 2 / squared_sum for m in range(1,len(S))]
+# Plot
+fig = plt.figure(figsize=(2,4))
+ax = plt.gca()
+ax.bar(range(0, len(variance)), variance, 1)
+plt.show()
+
+print("The first 3 components account for", sum(variance[:3]), "% variance.")
+
+# 1.2.2 d
+V = np.transpose(V)
+projections = [normalised_data * V[:,i] for i in range(0,8)]
+for i in range(0,8): # Isn't this possible in an easier way?
+ projections[i] = [np.array(e)[0][0] for s in projections[i] for e in s]
+# Plot
+fig = plt.figure(figsize=(12,6))
+ax = plt.gca()
+ax.scatter(xs, projections[0],
+ c='#ff0000', marker='o', label='Projection onto component 1')
+ax.scatter(xs, projections[1],
+ c='#00ff00', marker='o', label='Projection onto component 2')
+handles, labels = ax.get_legend_handles_labels()
+ax.legend(handles, labels, loc=2)
+plt.show()
+
+print("In the graph above we see that rougly 70% of the variance is accounted "
+ "for by the first two components. If we would plot only the first two "
+ "dimensions of the data, we would have on average only 25% accounted for.")
+
+# 1.2.2 e
+print(V[:,1])
+# As you can see, mainly takes into account the last two attributes (G and H).
+# Adherence with attribute A, B or C would give a large negative projection.
+# Adherence with attribute G or H would give a large positive projection.
diff --git a/Assignment 1/ex13.py b/Assignment 1/ex13.py
index b91d885..a6600ee 100644
--- a/Assignment 1/ex13.py
+++ b/Assignment 1/ex13.py
@@ -1,3 +1,12 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 11 13:12:03 2015
+
+@author: Camil Staps, s4498062
+
+Use Python 2.*
+"""
+
# exercise 3.2.1
from __future__ import print_function