aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCamil Staps2015-10-10 23:00:52 +0200
committerCamil Staps2015-10-10 23:00:52 +0200
commitfde51005d9a06d40b094435d9354cab7e2f2b89d (patch)
tree1b5d1fe61a280fa8b3138dc4bb9d1f8c3d3811bc
parentAdded solutions LaTeX file (diff)
Start assignment 2
-rw-r--r--Assignment2/Data/wine.matbin0 -> 126090 bytes
-rw-r--r--Assignment2/ex21.py62
2 files changed, 62 insertions, 0 deletions
diff --git a/Assignment2/Data/wine.mat b/Assignment2/Data/wine.mat
new file mode 100644
index 0000000..da15efd
--- /dev/null
+++ b/Assignment2/Data/wine.mat
Binary files differ
diff --git a/Assignment2/ex21.py b/Assignment2/ex21.py
new file mode 100644
index 0000000..87d68cd
--- /dev/null
+++ b/Assignment2/ex21.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Oct 10 21:28:45 2015
+
+@author: camilstaps
+"""
+
+import matplotlib.pyplot as plt
+from scipy import io as sciio, stats
+import numpy as np
+
+# 2.1.1
+wine = sciio.loadmat('./Data/wine.mat')
+data = wine['X']
+atts = [str(s[0]) for s in wine['attributeNames'][0]]
+
+# Initial boxplots & histograms
+plt.figure(figsize=(20,10))
+plt.boxplot(stats.zscore(data))
+plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right')
+plt.show()
+
+plt.figure(figsize=(20,10))
+for i in range(len(data[0])):
+ plt.subplot(3, 4, i + 1)
+ plt.hist(data[:,i])
+ plt.xlabel(atts[i])
+plt.show()
+
+# Removing known outliers
+data = np.array([d for d in data if d[1] < 20 and # Volatide acidity
+ 0.01 < d[7] and d[7] < 10 and # Density
+ 0.5 < d[10] and d[10] < 200]) # Alcohol
+
+# Clean boxplots & histograms
+plt.figure(figsize=(20,10))
+plt.boxplot(stats.zscore(data))
+plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right')
+plt.show()
+
+plt.figure(figsize=(20,10))
+for i in range(len(data[0])):
+ plt.subplot(3, 4, i + 1)
+ plt.hist(data[:,i])
+ plt.xlabel(atts[i])
+plt.show()
+
+# 2.1.2
+data = np.transpose(data)
+plt.figure(figsize=(20,10))
+for i in range(len(data) - 1):
+ plt.subplot(3, 4, i + 1)
+ plt.scatter(data[i], data[11], marker='.', alpha=0.2)
+ plt.xlabel(atts[i])
+plt.show()
+
+fig, ax = plt.subplots(figsize=(10,5))
+it = np.arange(len(data) - 1)
+ax.bar(it, [stats.pearsonr(data[i], data[11])[0] for i in it])
+ax.set_xticks(it + 0.5)
+ax.set_xticklabels(atts, rotation=90, ha='center')
+plt.show()