aboutsummaryrefslogtreecommitdiff
path: root/Assignment 2/ex21.py
diff options
context:
space:
mode:
Diffstat (limited to 'Assignment 2/ex21.py')
-rw-r--r--Assignment 2/ex21.py64
1 files changed, 64 insertions, 0 deletions
diff --git a/Assignment 2/ex21.py b/Assignment 2/ex21.py
new file mode 100644
index 0000000..2594c61
--- /dev/null
+++ b/Assignment 2/ex21.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Oct 10 21:28:45 2015
+
+@author: Camil Staps (s4498062)
+
+Run with Python 2.7
+"""
+
+import matplotlib.pyplot as plt
+from scipy import io as sciio, stats
+import numpy as np
+
+# 2.1.1
+wine = sciio.loadmat('./Data/wine.mat')
+data = wine['X']
+atts = [str(s[0]) for s in wine['attributeNames'][0]]
+
+# Initial boxplots & histograms
+plt.figure(figsize=(20,10))
+plt.boxplot(stats.zscore(data))
+plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right')
+plt.show()
+
+plt.figure(figsize=(20,10))
+for i in range(len(data[0])):
+ plt.subplot(3, 4, i + 1)
+ plt.hist(data[:,i])
+ plt.xlabel(atts[i])
+plt.show()
+
+# Removing known outliers
+data = np.array([d for d in data if d[1] < 20 and # Volatide acidity
+ 0.01 < d[7] and d[7] < 10 and # Density
+ 0.5 < d[10] and d[10] < 200]) # Alcohol
+
+# Clean boxplots & histograms
+plt.figure(figsize=(20,10))
+plt.boxplot(stats.zscore(data))
+plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right')
+plt.show()
+
+plt.figure(figsize=(20,10))
+for i in range(len(data[0])):
+ plt.subplot(3, 4, i + 1)
+ plt.hist(data[:,i])
+ plt.xlabel(atts[i])
+plt.show()
+
+# 2.1.2
+data = np.transpose(data)
+plt.figure(figsize=(20,10))
+for i in range(len(data) - 1):
+ plt.subplot(3, 4, i + 1)
+ plt.scatter(data[i], data[11], marker='.', alpha=0.2)
+ plt.xlabel(atts[i])
+plt.show()
+
+fig, ax = plt.subplots(figsize=(10,5))
+it = np.arange(len(data) - 1)
+ax.bar(it, [stats.pearsonr(data[i], data[11])[0] for i in it])
+ax.set_xticks(it + 0.5)
+ax.set_xticklabels(atts, rotation=90, ha='center')
+plt.show()