# -*- coding: utf-8 -*- """ Created on Fri Sep 11 13:12:03 2015 @author: camilstaps """ from __future__ import print_function import xlrd import numpy as np import matplotlib.lines as pltlines import matplotlib.patches as pltpatches import matplotlib.pyplot as plt # 1.2.1 a xls = xlrd.open_workbook(filename='Data/nanonose.xls') xls = xls.sheet_by_index(0) fst_col, fst_row = 3, 2 data = np.asmatrix([xls.col_values(i)[fst_row:] for i in range(fst_col,fst_col + 8)]) # 1.2.1 b colors = {'Water': '#61d4fa', 'Ethanol': '#ff3333', 'Acetone': '#549900', 'Heptane': '#d9910d', 'Pentanol': '#990096'} graph_colors = [colors[r] for r in xls.col_values(0)[fst_row:]] xs = xls.col_values(1)[fst_row:] fig = plt.figure(figsize=(12,6)) ax = plt.gca() ax.set_xscale('log') ax.set_yscale('symlog') plt.xlim([80,10 ** 5]) plt.ylim([-1, 500]) ax.scatter(xs, data.tolist()[0], s=60, c=graph_colors, alpha=0.4, marker='s') ax.scatter(xs, data.tolist()[1], s=60, c=graph_colors, alpha=0.4, marker='o') line_a = pltlines.Line2D([], [], ls=' ', marker='s', label='A', c='w') line_b = pltlines.Line2D([], [], ls=' ', marker='o', label='B', c='w') handles = [pltpatches.Patch(label=k, color=v) for k, v in colors.iteritems()] + [line_a, line_b] ax.legend(handles=handles, numpoints=1, loc=2) plt.show() # 1.2.2 a # PCA is a method that can be used to reduce dimensionality of a dataset. It # can be used when some variables are correlated; we then basically rewrite one # of them as a function of the other. Of course, in general that implies data # loss. # 1.2.2 b # EVD is a way to rewrite a diagonalizable matrix into a canonical form (a # summation of products of eigenvalues and corresponding eigenvectors). SVD is # a generalisation which can be applied to any matrix.