1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 10 21:28:45 2015
@author: Camil Staps (s4498062)
Run with Python 2.7
"""
import matplotlib.pyplot as plt
from scipy import io as sciio, stats
import numpy as np
# 2.1.1
wine = sciio.loadmat('./Data/wine.mat')
data = wine['X']
atts = [str(s[0]) for s in wine['attributeNames'][0]]
# Initial boxplots & histograms
plt.figure(figsize=(20,10))
plt.boxplot(stats.zscore(data))
plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right')
plt.show()
plt.figure(figsize=(20,10))
for i in range(len(data[0])):
plt.subplot(3, 4, i + 1)
plt.hist(data[:,i])
plt.xlabel(atts[i])
plt.show()
# Removing known outliers
data = np.array([d for d in data if d[1] < 20 and # Volatide acidity
0.01 < d[7] and d[7] < 10 and # Density
0.5 < d[10] and d[10] < 200]) # Alcohol
# Clean boxplots & histograms
plt.figure(figsize=(20,10))
plt.boxplot(stats.zscore(data))
plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right')
plt.show()
plt.figure(figsize=(20,10))
for i in range(len(data[0])):
plt.subplot(3, 4, i + 1)
plt.hist(data[:,i])
plt.xlabel(atts[i])
plt.show()
# 2.1.2
data = np.transpose(data)
plt.figure(figsize=(20,10))
for i in range(len(data) - 1):
plt.subplot(3, 4, i + 1)
plt.scatter(data[i], data[11], marker='.', alpha=0.2)
plt.xlabel(atts[i])
plt.show()
fig, ax = plt.subplots(figsize=(10,5))
it = np.arange(len(data) - 1)
ax.bar(it, [stats.pearsonr(data[i], data[11])[0] for i in it])
ax.set_xticks(it + 0.5)
ax.set_xticklabels(atts, rotation=90, ha='center')
plt.show()
|