aboutsummaryrefslogtreecommitdiff
path: root/Assignment 2/ex21.py
blob: 2594c6167d26e5bb9f359f6c39dc71f8c85bb1d2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 10 21:28:45 2015

@author: Camil Staps (s4498062)

Run with Python 2.7
"""

import matplotlib.pyplot as plt
from scipy import io as sciio, stats
import numpy as np

# 2.1.1
wine = sciio.loadmat('./Data/wine.mat')
data = wine['X']
atts = [str(s[0]) for s in wine['attributeNames'][0]]

# Initial boxplots & histograms
plt.figure(figsize=(20,10))
plt.boxplot(stats.zscore(data))
plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right')
plt.show()

plt.figure(figsize=(20,10))
for i in range(len(data[0])):
    plt.subplot(3, 4, i + 1)
    plt.hist(data[:,i])
    plt.xlabel(atts[i])
plt.show()

# Removing known outliers
data = np.array([d for d in data if d[1] < 20 and           # Volatide acidity
                           0.01 < d[7] and d[7] < 10 and    # Density
                           0.5 < d[10] and d[10] < 200])    # Alcohol

# Clean boxplots & histograms
plt.figure(figsize=(20,10))
plt.boxplot(stats.zscore(data))
plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right')
plt.show()

plt.figure(figsize=(20,10))
for i in range(len(data[0])):
    plt.subplot(3, 4, i + 1)
    plt.hist(data[:,i])
    plt.xlabel(atts[i])
plt.show()

# 2.1.2
data = np.transpose(data)
plt.figure(figsize=(20,10))
for i in range(len(data) - 1):
    plt.subplot(3, 4, i + 1)
    plt.scatter(data[i], data[11], marker='.', alpha=0.2)
    plt.xlabel(atts[i])
plt.show()

fig, ax = plt.subplots(figsize=(10,5))
it = np.arange(len(data) - 1)
ax.bar(it, [stats.pearsonr(data[i], data[11])[0] for i in it])
ax.set_xticks(it + 0.5)
ax.set_xticklabels(atts, rotation=90, ha='center')
plt.show()