aboutsummaryrefslogtreecommitdiff
path: root/Assignment 2/ex23.py
blob: 7c763ee818083d525f87c8981f3fc2702fc4af59 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 11 18:47:35 2015

@author: Camil Staps (s4498062)

Run with Python 2.7
"""

import matplotlib.pyplot as plt
import numpy as np

def all_samples(data, n):
    """All samples without replacement or ordering with n elements from data"""
    if n == 0:
        return [[]]
    else:
        samples = []
        for i, d in enumerate(data):
            samples = samples + [[d] + s for s in all_samples(data[i+1:], n-1)]
        return samples
        
def nearly_equal(m, n, sig_fig = 5):
    """Determine whether two numbers are nearly equal"""
    # http://stackoverflow.com/a/558289/1544337
    return m == n or int(m * 10 ** sig_fig) == int(n * 10 ** sig_fig)

data = np.array([2,3,6,8,11,18])

# i
print("Mean: %f\nStandard deviation: %f" % (data.mean(), data.std()))
    
samples_2 = all_samples(data, 2)
samples_4 = all_samples(data, 4)

# ii
print([(s, np.mean(s)) for s in samples_2])
print([(s, np.mean(s)) for s in samples_4])

# iii
samples_2_means = [np.mean(s) for s in samples_2]
samples_4_means = [np.mean(s) for s in samples_4]

print("Mean of 2-sample means: %f" % np.mean(samples_2_means))
print("Standard deviation of 2-sample means: %f" % np.std(samples_2_means))
print("Mean of 4-sample means: %f" % np.mean(samples_4_means))
print("Standard deviation of 4-sample means: %f" % np.std(samples_4_means))

# iv
print("Means are equal (2): %r" % (np.mean(samples_2_means) == data.mean()))
print("Means are equal (4): %r" % (np.mean(samples_4_means) == data.mean()))
print("σ2 ≈ σ/√2×√(4/5): %r" % nearly_equal(
        np.std(samples_2_means), data.std() / np.sqrt(2.) * np.sqrt(4./5.)))
print("σ4 ≈ σ/√4×√(2/5): %r" % nearly_equal(
        np.std(samples_4_means), data.std() / np.sqrt(4.) * np.sqrt(2./5.)))
        
# v
plt.figure(figsize=(10,4))
plt.subplot(1, 3, 1)
plt.hist(data)
plt.title('Population distribution')
plt.subplot(1, 3, 2)
plt.hist(samples_2_means)
plt.title('2-Sample mean distribution')
plt.subplot(1, 3, 3)
plt.hist(samples_4_means)
plt.title('4-Sample mean distribution')
plt.show()