From 1beb50ef75a7db236a5ab3fdf88faf4c55f7c19d Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Sun, 11 Oct 2015 21:14:28 +0200 Subject: Assignment 2 finished --- Assignment 2/ex21.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 Assignment 2/ex21.py (limited to 'Assignment 2/ex21.py') diff --git a/Assignment 2/ex21.py b/Assignment 2/ex21.py new file mode 100644 index 0000000..2594c61 --- /dev/null +++ b/Assignment 2/ex21.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +""" +Created on Sat Oct 10 21:28:45 2015 + +@author: Camil Staps (s4498062) + +Run with Python 2.7 +""" + +import matplotlib.pyplot as plt +from scipy import io as sciio, stats +import numpy as np + +# 2.1.1 +wine = sciio.loadmat('./Data/wine.mat') +data = wine['X'] +atts = [str(s[0]) for s in wine['attributeNames'][0]] + +# Initial boxplots & histograms +plt.figure(figsize=(20,10)) +plt.boxplot(stats.zscore(data)) +plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right') +plt.show() + +plt.figure(figsize=(20,10)) +for i in range(len(data[0])): + plt.subplot(3, 4, i + 1) + plt.hist(data[:,i]) + plt.xlabel(atts[i]) +plt.show() + +# Removing known outliers +data = np.array([d for d in data if d[1] < 20 and # Volatide acidity + 0.01 < d[7] and d[7] < 10 and # Density + 0.5 < d[10] and d[10] < 200]) # Alcohol + +# Clean boxplots & histograms +plt.figure(figsize=(20,10)) +plt.boxplot(stats.zscore(data)) +plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right') +plt.show() + +plt.figure(figsize=(20,10)) +for i in range(len(data[0])): + plt.subplot(3, 4, i + 1) + plt.hist(data[:,i]) + plt.xlabel(atts[i]) +plt.show() + +# 2.1.2 +data = np.transpose(data) +plt.figure(figsize=(20,10)) +for i in range(len(data) - 1): + plt.subplot(3, 4, i + 1) + plt.scatter(data[i], data[11], marker='.', alpha=0.2) + plt.xlabel(atts[i]) +plt.show() + +fig, ax = plt.subplots(figsize=(10,5)) +it = np.arange(len(data) - 1) +ax.bar(it, [stats.pearsonr(data[i], data[11])[0] for i in it]) +ax.set_xticks(it + 0.5) +ax.set_xticklabels(atts, rotation=90, ha='center') +plt.show() -- cgit v1.2.3