# -*- coding: utf-8 -*- """ Created on Sat Oct 10 21:28:45 2015 @author: camilstaps """ import matplotlib.pyplot as plt from scipy import io as sciio, stats import numpy as np # 2.1.1 wine = sciio.loadmat('./Data/wine.mat') data = wine['X'] atts = [str(s[0]) for s in wine['attributeNames'][0]] # Initial boxplots & histograms plt.figure(figsize=(20,10)) plt.boxplot(stats.zscore(data)) plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right') plt.show() plt.figure(figsize=(20,10)) for i in range(len(data[0])): plt.subplot(3, 4, i + 1) plt.hist(data[:,i]) plt.xlabel(atts[i]) plt.show() # Removing known outliers data = np.array([d for d in data if d[1] < 20 and # Volatide acidity 0.01 < d[7] and d[7] < 10 and # Density 0.5 < d[10] and d[10] < 200]) # Alcohol # Clean boxplots & histograms plt.figure(figsize=(20,10)) plt.boxplot(stats.zscore(data)) plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right') plt.show() plt.figure(figsize=(20,10)) for i in range(len(data[0])): plt.subplot(3, 4, i + 1) plt.hist(data[:,i]) plt.xlabel(atts[i]) plt.show() # 2.1.2 data = np.transpose(data) plt.figure(figsize=(20,10)) for i in range(len(data) - 1): plt.subplot(3, 4, i + 1) plt.scatter(data[i], data[11], marker='.', alpha=0.2) plt.xlabel(atts[i]) plt.show() fig, ax = plt.subplots(figsize=(10,5)) it = np.arange(len(data) - 1) ax.bar(it, [stats.pearsonr(data[i], data[11])[0] for i in it]) ax.set_xticks(it + 0.5) ax.set_xticklabels(atts, rotation=90, ha='center') plt.show()