# -*- coding: utf-8 -*-
"""
Created on Fri Oct 23 15:52:59 2015

@author: Camil Staps, s4498062

This is Python 2 code.
"""

import sys
sys.path.insert(0, './packages')

import matplotlib.pyplot as plt
import xlrd
from sklearn import metrics

# 3.3.1
xls = xlrd.open_workbook(filename='./Data/classprobs.xls')
xls = xls.sheet_by_index(0)

[clss, m1, m2] = [xls.col_values(i) for i in range(3)]

# 3.3.2
xs = [x/108. for x in range(108)]
[(_,roc1,_), (_,roc2,_)] = [metrics.roc_curve(clss, m) for m in [m1, m2]]
plt.plot(xs, roc1, label='ROC M1')
plt.plot(xs, roc2, label='ROC M2')
plt.plot(xs, xs, label='Null hypothesis')
plt.xlim([0,1])
plt.ylim([0,1])
plt.legend(loc=4)
plt.grid()
plt.show()

# 3.3.3
[auc1, auc2] = [metrics.roc_auc_score(clss, m) for m in [m1, m2]]
print(auc1, auc2)

# 3.3.4
[pred1, pred2] = [[prob > 0.5 for prob in m] for m in [m1, m2]]
[acc1, acc2] = [100 * metrics.accuracy_score(clss, p) for p in [pred1, pred2]]
print(acc1, acc2)