From 1beb50ef75a7db236a5ab3fdf88faf4c55f7c19d Mon Sep 17 00:00:00 2001
From: Camil Staps
Date: Sun, 11 Oct 2015 21:14:28 +0200
Subject: Assignment 2 finished

---
 Assignment 2/Data/wine.mat                         | Bin 0 -> 126090 bytes
 Assignment 2/Data/zipdata.mat                      | Bin 0 -> 3862827 bytes
 Assignment 2/ex21.py                               |  64 ++++++++++
 Assignment 2/ex22.py                               |  88 +++++++++++++
 Assignment 2/ex23.py                               |  68 ++++++++++
 Assignment 2/report/assignment2.tex                | 140 +++++++++++++++++++++
 Assignment 2/report/ex211-boxplots-1.png           | Bin 0 -> 82809 bytes
 Assignment 2/report/ex211-boxplots-2.png           | Bin 0 -> 83829 bytes
 Assignment 2/report/ex211-hists-1.png              | Bin 0 -> 134486 bytes
 Assignment 2/report/ex211-hists-2.png              | Bin 0 -> 137724 bytes
 Assignment 2/report/ex212-correlation-bars.png     | Bin 0 -> 48225 bytes
 Assignment 2/report/ex212-scatters.png             | Bin 0 -> 177256 bytes
 .../report/ex221-reconstructed-visualisation.png   | Bin 0 -> 45269 bytes
 Assignment 2/report/ex221-scatter-3d.png           | Bin 0 -> 141551 bytes
 Assignment 2/report/ex221-scatters.png             | Bin 0 -> 226468 bytes
 Assignment 2/report/ex221-visualisation.png        | Bin 0 -> 29592 bytes
 Assignment 2/report/ex231-hists.png                | Bin 0 -> 39326 bytes
 Assignment2/Data/wine.mat                          | Bin 126090 -> 0 bytes
 Assignment2/ex21.py                                |  62 ---------
 19 files changed, 360 insertions(+), 62 deletions(-)
 create mode 100644 Assignment 2/Data/wine.mat
 create mode 100644 Assignment 2/Data/zipdata.mat
 create mode 100644 Assignment 2/ex21.py
 create mode 100644 Assignment 2/ex22.py
 create mode 100644 Assignment 2/ex23.py
 create mode 100644 Assignment 2/report/assignment2.tex
 create mode 100644 Assignment 2/report/ex211-boxplots-1.png
 create mode 100644 Assignment 2/report/ex211-boxplots-2.png
 create mode 100644 Assignment 2/report/ex211-hists-1.png
 create mode 100644 Assignment 2/report/ex211-hists-2.png
 create mode 100644 Assignment 2/report/ex212-correlation-bars.png
 create mode 100644 Assignment 2/report/ex212-scatters.png
 create mode 100644 Assignment 2/report/ex221-reconstructed-visualisation.png
 create mode 100644 Assignment 2/report/ex221-scatter-3d.png
 create mode 100644 Assignment 2/report/ex221-scatters.png
 create mode 100644 Assignment 2/report/ex221-visualisation.png
 create mode 100644 Assignment 2/report/ex231-hists.png
 delete mode 100644 Assignment2/Data/wine.mat
 delete mode 100644 Assignment2/ex21.py

diff --git a/Assignment 2/Data/wine.mat b/Assignment 2/Data/wine.mat
new file mode 100644
index 0000000..da15efd
Binary files /dev/null and b/Assignment 2/Data/wine.mat differ
diff --git a/Assignment 2/Data/zipdata.mat b/Assignment 2/Data/zipdata.mat
new file mode 100644
index 0000000..a98e796
Binary files /dev/null and b/Assignment 2/Data/zipdata.mat differ
diff --git a/Assignment 2/ex21.py b/Assignment 2/ex21.py
new file mode 100644
index 0000000..2594c61
--- /dev/null
+++ b/Assignment 2/ex21.py	
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Oct 10 21:28:45 2015
+
+@author: Camil Staps (s4498062)
+
+Run with Python 2.7
+"""
+
+import matplotlib.pyplot as plt
+from scipy import io as sciio, stats
+import numpy as np
+
+# 2.1.1
+wine = sciio.loadmat('./Data/wine.mat')
+data = wine['X']
+atts = [str(s[0]) for s in wine['attributeNames'][0]]
+
+# Initial boxplots & histograms
+plt.figure(figsize=(20,10))
+plt.boxplot(stats.zscore(data))
+plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right')
+plt.show()
+
+plt.figure(figsize=(20,10))
+for i in range(len(data[0])):
+    plt.subplot(3, 4, i + 1)
+    plt.hist(data[:,i])
+    plt.xlabel(atts[i])
+plt.show()
+
+# Removing known outliers
+data = np.array([d for d in data if d[1] < 20 and           # Volatide acidity
+                           0.01 < d[7] and d[7] < 10 and    # Density
+                           0.5 < d[10] and d[10] < 200])    # Alcohol
+
+# Clean boxplots & histograms
+plt.figure(figsize=(20,10))
+plt.boxplot(stats.zscore(data))
+plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right')
+plt.show()
+
+plt.figure(figsize=(20,10))
+for i in range(len(data[0])):
+    plt.subplot(3, 4, i + 1)
+    plt.hist(data[:,i])
+    plt.xlabel(atts[i])
+plt.show()
+
+# 2.1.2
+data = np.transpose(data)
+plt.figure(figsize=(20,10))
+for i in range(len(data) - 1):
+    plt.subplot(3, 4, i + 1)
+    plt.scatter(data[i], data[11], marker='.', alpha=0.2)
+    plt.xlabel(atts[i])
+plt.show()
+
+fig, ax = plt.subplots(figsize=(10,5))
+it = np.arange(len(data) - 1)
+ax.bar(it, [stats.pearsonr(data[i], data[11])[0] for i in it])
+ax.set_xticks(it + 0.5)
+ax.set_xticklabels(atts, rotation=90, ha='center')
+plt.show()
diff --git a/Assignment 2/ex22.py b/Assignment 2/ex22.py
new file mode 100644
index 0000000..3487752
--- /dev/null
+++ b/Assignment 2/ex22.py	
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Oct 11 09:28:15 2015
+
+@author: Camil Staps (s4498062)
+
+Run with Python 2.7
+"""
+
+import itertools
+import matplotlib.pyplot as plt
+import matplotlib.pylab as plab
+from mpl_toolkits.mplot3d import Axes3D
+import numpy as np
+import scipy.io
+
+# 2.2.1
+zipd = scipy.io.loadmat('./Data/zipdata.mat')
+traindata = zipd['traindata']
+testdata = zipd['testdata']
+
+data = traindata[:,1:]
+classes = traindata[:,0]
+temp = [(d, c) for d, c in zip(data, classes) if c < 2]
+[data, classes] = [np.array(t) for t in zip(*temp)]
+
+mean = data.mean(0)
+
+# First visualisation
+for i in range(10):
+    plt.subplot(2, 5, i)
+    image = plab.reshape(data[i,:], (16, 16))
+    plt.imshow(image, extent=(0, 16, 0, 16), cmap=plab.cm.gray_r)
+    plt.axis('off')
+plt.show()
+
+# PCA
+Y = data - np.ones((len(data), 1)) * mean
+U, S, Vt = np.linalg.svd(Y, full_matrices=False)
+V = Vt.T
+Z = np.dot(Y, V[:,0:4])
+
+W = np.dot(Z[:10], V[:,0:4].T) + mean
+for i in range(10):
+    plt.subplot(2, 5, i)
+    image = plab.reshape(W[i,:], (16, 16))
+    plt.imshow(image, extent=(0, 16, 0, 16), cmap=plab.cm.gray_r)
+    plt.axis('off')
+plt.show()
+
+Y0 = [d for c, d in zip(classes, Y) if c == 0]
+Y1 = [d for c, d in zip(classes, Y) if c == 1]
+plt.figure(figsize=(16,16))
+for i, j in itertools.product(*[range(4), range(4)]):
+    plt.subplot(4, 4, 4 * i + j + 1)
+    
+    Z1 = np.dot(Y0, V[:,i:i + 1])
+    Z2 = np.dot(Y0, V[:,j:j + 1])
+    plt.scatter(Z1, Z2, color='r', marker='.', s=1, label='0')
+    Z1 = np.dot(Y1, V[:,i:i + 1])
+    Z2 = np.dot(Y1, V[:,j:j + 1])
+    plt.scatter(Z1, Z2, color='b', marker='.', s=1, label='1')
+    
+    plt.ylabel('PC' + str(i))
+    plt.xlabel('PC' + str(j))
+    plt.gca().axes.get_xaxis().set_ticks([])
+    plt.gca().axes.get_yaxis().set_ticks([])
+plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
+plt.show()
+
+fig = plt.figure(figsize=(8,8))
+ax = fig.add_subplot(111, projection='3d')
+Z1 = np.dot(Y0, V[:,0:1])
+Z2 = np.dot(Y0, V[:,1:2])
+Z3 = np.dot(Y0, V[:,2:3])
+ax.scatter(Z1, Z2, Z3, color='r', marker='.', s=10, label='0')
+Z1 = np.dot(Y1, V[:,0:1])
+Z2 = np.dot(Y1, V[:,1:2])
+Z3 = np.dot(Y1, V[:,2:3])
+ax.scatter(Z1, Z2, Z3, color='b', marker='.', s=10, label='1')
+ax.set_xlabel('PC1')
+ax.set_ylabel('PC2')
+ax.set_zlabel('PC3')
+ax.set_xticks([])
+ax.set_yticks([])
+ax.set_zticks([])
+plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
+plt.show()
diff --git a/Assignment 2/ex23.py b/Assignment 2/ex23.py
new file mode 100644
index 0000000..7c763ee
--- /dev/null
+++ b/Assignment 2/ex23.py	
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Oct 11 18:47:35 2015
+
+@author: Camil Staps (s4498062)
+
+Run with Python 2.7
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+def all_samples(data, n):
+    """All samples without replacement or ordering with n elements from data"""
+    if n == 0:
+        return [[]]
+    else:
+        samples = []
+        for i, d in enumerate(data):
+            samples = samples + [[d] + s for s in all_samples(data[i+1:], n-1)]
+        return samples
+        
+def nearly_equal(m, n, sig_fig = 5):
+    """Determine whether two numbers are nearly equal"""
+    # http://stackoverflow.com/a/558289/1544337
+    return m == n or int(m * 10 ** sig_fig) == int(n * 10 ** sig_fig)
+
+data = np.array([2,3,6,8,11,18])
+
+# i
+print("Mean: %f\nStandard deviation: %f" % (data.mean(), data.std()))
+    
+samples_2 = all_samples(data, 2)
+samples_4 = all_samples(data, 4)
+
+# ii
+print([(s, np.mean(s)) for s in samples_2])
+print([(s, np.mean(s)) for s in samples_4])
+
+# iii
+samples_2_means = [np.mean(s) for s in samples_2]
+samples_4_means = [np.mean(s) for s in samples_4]
+
+print("Mean of 2-sample means: %f" % np.mean(samples_2_means))
+print("Standard deviation of 2-sample means: %f" % np.std(samples_2_means))
+print("Mean of 4-sample means: %f" % np.mean(samples_4_means))
+print("Standard deviation of 4-sample means: %f" % np.std(samples_4_means))
+
+# iv
+print("Means are equal (2): %r" % (np.mean(samples_2_means) == data.mean()))
+print("Means are equal (4): %r" % (np.mean(samples_4_means) == data.mean()))
+print("σ2 ≈ σ/√2×√(4/5): %r" % nearly_equal(
+        np.std(samples_2_means), data.std() / np.sqrt(2.) * np.sqrt(4./5.)))
+print("σ4 ≈ σ/√4×√(2/5): %r" % nearly_equal(
+        np.std(samples_4_means), data.std() / np.sqrt(4.) * np.sqrt(2./5.)))
+        
+# v
+plt.figure(figsize=(10,4))
+plt.subplot(1, 3, 1)
+plt.hist(data)
+plt.title('Population distribution')
+plt.subplot(1, 3, 2)
+plt.hist(samples_2_means)
+plt.title('2-Sample mean distribution')
+plt.subplot(1, 3, 3)
+plt.hist(samples_4_means)
+plt.title('4-Sample mean distribution')
+plt.show()
diff --git a/Assignment 2/report/assignment2.tex b/Assignment 2/report/assignment2.tex
new file mode 100644
index 0000000..6ae0b55
--- /dev/null
+++ b/Assignment 2/report/assignment2.tex	
@@ -0,0 +1,140 @@
+\documentclass[10pt,a4paper]{article}
+
+\usepackage[margin=2cm]{geometry}
+\usepackage{graphicx}
+
+\let\assignment2
+
+\usepackage{enumitem}
+\setenumerate[1]{label=\assignment.\arabic*.}
+\setenumerate[2]{label=\arabic*.}
+\setenumerate[3]{label=\roman*.}
+
+% textcomp package is not available everywhere, and we only need the Copyright symbol
+% taken from http://tex.stackexchange.com/a/1677/23992
+\DeclareTextCommandDefault{\textregistered}{\textcircled{\check@mathfonts\fontsize\sf@size\z@\math@fontsfalse\selectfont R}}
+
+\usepackage{fancyhdr}
+\renewcommand{\headrulewidth}{0pt}
+\renewcommand{\footrulewidth}{0pt}
+\fancyhead{}
+%\fancyfoot[C]{Copyright {\textcopyright} 2015 Camil Staps}
+\pagestyle{fancy}
+
+\usepackage{caption}
+\usepackage{subcaption}
+
+\parindent0pt
+
+\title{Data Mining - assignment \assignment}
+\author{Camil Staps\\\small{s4498062}}
+
+\begin{document}
+
+\maketitle
+\thispagestyle{fancy}
+
+\begin{enumerate}
+    \item \begin{enumerate}
+            \item See figure \ref{fig:211-boxplots-1} through \ref{fig:211-hists-2}. It is clear that after eliminating the outliers we get a much better idea of the distributions.
+
+                \begin{figure}[p]
+                    \centering
+                    \includegraphics[width=\linewidth]{ex211-boxplots-1}
+                    \caption{Boxplots before eliminating outliers}
+                    \label{fig:211-boxplots-1}
+                \end{figure}
+                \begin{figure}[p]
+                    \centering
+                    \includegraphics[width=\linewidth]{ex211-hists-1}
+                    \caption{Histograms before eliminating outliers}
+                    \label{fig:211-hists-1}
+                \end{figure}
+                \begin{figure}[p]
+                    \centering
+                    \includegraphics[width=\linewidth]{ex211-boxplots-2}
+                    \caption{Boxplots after eliminating outliers}
+                    \label{fig:211-boxplots-2}
+                \end{figure}
+                \begin{figure}[p]
+                    \centering
+                    \includegraphics[width=\linewidth]{ex211-hists-2}
+                    \caption{Histograms after eliminating outliers}
+                    \label{fig:211-hists-2}
+                \end{figure}
+
+            \item See figure \ref{fig:212-scatters} and \ref{fig:212-correlation-bars}. As can be seen in the latter, there is a large (positive) correlation between alcohol percentage and quality, and there is a large (negative) correlation between density, volatile acidity and chlorides and quality.
+
+                From the first plots we also see that high quality wine has a `citric acid' level of around $0.4$.
+
+                \begin{figure}[p]
+                    \centering
+                    \includegraphics[width=\linewidth]{ex212-scatters}
+                    \caption{Scatter plots between attributes and wine quality}
+                    \label{fig:212-scatters}
+                \end{figure}
+                \begin{figure}[p]
+                    \centering
+                    \includegraphics[width=\linewidth]{ex212-correlation-bars}
+                    \caption{Correlations coefficients between attributes and wine quality}
+                    \label{fig:212-correlation-bars}
+                \end{figure}
+        \end{enumerate}
+
+    \item \begin{enumerate}
+            \item See figure \ref{fig:221-visualisation}. PCA seems to work quite well here. However, it is also clear (not from this picture though) that if we want to reconstruct all ten digits, we need more principal components. But in this case the digits are easily recognisable. This is even more clear in the scatter plot of PC0 against PC0 in figure \ref{fig:221-scatters}. It is clear that almost all zeroes and ones can be recognised by checking whether the first principal component is below some threshold.
+
+                \begin{figure}[p]
+                    \centering
+                    \begin{subfigure}{.45\linewidth}
+                        \includegraphics[width=\linewidth]{ex221-visualisation}
+                        \caption{Initially}
+                    \end{subfigure}
+                    \begin{subfigure}{.45\linewidth}
+                        \includegraphics[width=\linewidth]{ex221-reconstructed-visualisation}
+                        \caption{Reconstructed with four principal components}
+                    \end{subfigure}
+                    \caption{Visualisations of the first ten zeroes and ones}
+                    \label{fig:221-visualisation}
+                \end{figure}
+
+                See figure \ref{fig:221-scatters} and \ref{fig:221-scatter-3d} for scatter plots.
+
+                \begin{figure}[p]
+                    \centering
+                    \includegraphics[width=\linewidth]{ex221-scatters}
+                    \caption{Scatter plots of handwritten digits, projected on two principal components (NB: the principal components are numbered starting from $0$)}
+                    \label{fig:221-scatters}
+                \end{figure}
+                \begin{figure}[p]
+                    \centering
+                    \includegraphics[width=\linewidth]{ex221-scatter-3d}
+                    \caption{Scatter plot of handwritten digits, projected on three principal components}
+                    \label{fig:221-scatter-3d}
+                \end{figure}
+        \end{enumerate}
+
+    \item \begin{enumerate}
+            \item \begin{enumerate}
+                    \item \texttt{Mean: 8.000000\\Standard deviation: 5.385165}
+                    \item These are all combinations along with their means:
+                        
+                        \texttt{[([2, 3], 2.5), ([2, 6], 4.0), ([2, 8], 5.0), ([2, 11], 6.5), ([2, 18], 10.0), ([3, 6], 4.5), ([3, 8], 5.5), ([3, 11], 7.0), ([3, 18], 10.5), ([6, 8], 7.0), ([6, 11], 8.5), ([6, 18], 12.0), ([8, 11], 9.5), ([8, 18], 13.0), ([11, 18], 14.5)]{\\}[([2, 3, 6, 8], 4.75), ([2, 3, 6, 11], 5.5), ([2, 3, 6, 18], 7.25), ([2, 3, 8, 11], 6.0), ([2, 3, 8, 18], 7.75), ([2, 3, 11, 18], 8.5), ([2, 6, 8, 11], 6.75), ([2, 6, 8, 18], 8.5), ([2, 6, 11, 18], 9.25), ([2, 8, 11, 18], 9.75), ([3, 6, 8, 11], 7.0), ([3, 6, 8, 18], 8.75), ([3, 6, 11, 18], 9.5), ([3, 8, 11, 18], 10.0), ([6, 8, 11, 18], 10.75)]}
+                    \item \texttt{Mean of 2-sample means: 8.000000\\Standard deviation of 2-sample means: 3.405877\\Mean of 4-sample means: 8.000000\\Standard deviation of 4-sample means: 1.702939}
+                    \item \texttt{Means are equal (2): True\\Means are equal (4): True\\$\sigma_2 \approx \sigma/\sqrt2\times\sqrt{4/5}$: True\\$\sigma_4 \approx \sigma/\sqrt4\times\sqrt{2/5}$: True}
+
+                        Therefore, the Central Limit Theorem seems to be correct judging from this dataset.
+                    \item See figure \ref{fig:231-hists}. The top of the shape shifts from left to right as $N$ increases.
+
+                        \begin{figure}[p]
+                            \centering
+                            \includegraphics[width=\linewidth]{ex231-hists}
+                            \caption{Histograms of the population distribution and the sample means distributions}
+                            \label{fig:231-hists}
+                        \end{figure}
+                \end{enumerate}
+        \end{enumerate}
+\end{enumerate}
+
+\end{document}
+
diff --git a/Assignment 2/report/ex211-boxplots-1.png b/Assignment 2/report/ex211-boxplots-1.png
new file mode 100644
index 0000000..67ec78b
Binary files /dev/null and b/Assignment 2/report/ex211-boxplots-1.png differ
diff --git a/Assignment 2/report/ex211-boxplots-2.png b/Assignment 2/report/ex211-boxplots-2.png
new file mode 100644
index 0000000..cacb3cb
Binary files /dev/null and b/Assignment 2/report/ex211-boxplots-2.png differ
diff --git a/Assignment 2/report/ex211-hists-1.png b/Assignment 2/report/ex211-hists-1.png
new file mode 100644
index 0000000..bde8f94
Binary files /dev/null and b/Assignment 2/report/ex211-hists-1.png differ
diff --git a/Assignment 2/report/ex211-hists-2.png b/Assignment 2/report/ex211-hists-2.png
new file mode 100644
index 0000000..88d9386
Binary files /dev/null and b/Assignment 2/report/ex211-hists-2.png differ
diff --git a/Assignment 2/report/ex212-correlation-bars.png b/Assignment 2/report/ex212-correlation-bars.png
new file mode 100644
index 0000000..3918fb1
Binary files /dev/null and b/Assignment 2/report/ex212-correlation-bars.png differ
diff --git a/Assignment 2/report/ex212-scatters.png b/Assignment 2/report/ex212-scatters.png
new file mode 100644
index 0000000..0ab049c
Binary files /dev/null and b/Assignment 2/report/ex212-scatters.png differ
diff --git a/Assignment 2/report/ex221-reconstructed-visualisation.png b/Assignment 2/report/ex221-reconstructed-visualisation.png
new file mode 100644
index 0000000..ea070db
Binary files /dev/null and b/Assignment 2/report/ex221-reconstructed-visualisation.png differ
diff --git a/Assignment 2/report/ex221-scatter-3d.png b/Assignment 2/report/ex221-scatter-3d.png
new file mode 100644
index 0000000..f5b45e1
Binary files /dev/null and b/Assignment 2/report/ex221-scatter-3d.png differ
diff --git a/Assignment 2/report/ex221-scatters.png b/Assignment 2/report/ex221-scatters.png
new file mode 100644
index 0000000..ca6a932
Binary files /dev/null and b/Assignment 2/report/ex221-scatters.png differ
diff --git a/Assignment 2/report/ex221-visualisation.png b/Assignment 2/report/ex221-visualisation.png
new file mode 100644
index 0000000..2a0de19
Binary files /dev/null and b/Assignment 2/report/ex221-visualisation.png differ
diff --git a/Assignment 2/report/ex231-hists.png b/Assignment 2/report/ex231-hists.png
new file mode 100644
index 0000000..3e24297
Binary files /dev/null and b/Assignment 2/report/ex231-hists.png differ
diff --git a/Assignment2/Data/wine.mat b/Assignment2/Data/wine.mat
deleted file mode 100644
index da15efd..0000000
Binary files a/Assignment2/Data/wine.mat and /dev/null differ
diff --git a/Assignment2/ex21.py b/Assignment2/ex21.py
deleted file mode 100644
index 87d68cd..0000000
--- a/Assignment2/ex21.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Sat Oct 10 21:28:45 2015
-
-@author: camilstaps
-"""
-
-import matplotlib.pyplot as plt
-from scipy import io as sciio, stats
-import numpy as np
-
-# 2.1.1
-wine = sciio.loadmat('./Data/wine.mat')
-data = wine['X']
-atts = [str(s[0]) for s in wine['attributeNames'][0]]
-
-# Initial boxplots & histograms
-plt.figure(figsize=(20,10))
-plt.boxplot(stats.zscore(data))
-plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right')
-plt.show()
-
-plt.figure(figsize=(20,10))
-for i in range(len(data[0])):
-    plt.subplot(3, 4, i + 1)
-    plt.hist(data[:,i])
-    plt.xlabel(atts[i])
-plt.show()
-
-# Removing known outliers
-data = np.array([d for d in data if d[1] < 20 and           # Volatide acidity
-                           0.01 < d[7] and d[7] < 10 and    # Density
-                           0.5 < d[10] and d[10] < 200])    # Alcohol
-
-# Clean boxplots & histograms
-plt.figure(figsize=(20,10))
-plt.boxplot(stats.zscore(data))
-plt.xticks(range(len(atts) + 1), [''] + atts, rotation=45, ha='right')
-plt.show()
-
-plt.figure(figsize=(20,10))
-for i in range(len(data[0])):
-    plt.subplot(3, 4, i + 1)
-    plt.hist(data[:,i])
-    plt.xlabel(atts[i])
-plt.show()
-
-# 2.1.2
-data = np.transpose(data)
-plt.figure(figsize=(20,10))
-for i in range(len(data) - 1):
-    plt.subplot(3, 4, i + 1)
-    plt.scatter(data[i], data[11], marker='.', alpha=0.2)
-    plt.xlabel(atts[i])
-plt.show()
-
-fig, ax = plt.subplots(figsize=(10,5))
-it = np.arange(len(data) - 1)
-ax.bar(it, [stats.pearsonr(data[i], data[11])[0] for i in it])
-ax.set_xticks(it + 0.5)
-ax.set_xticklabels(atts, rotation=90, ha='center')
-plt.show()
-- 
cgit v1.2.3