1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
import numpy as np
from subprocess import call
import re
import os
'''
run_apriory.py
version: 0.2
last change: 26/11/2014, by Wout Megchelenbrink
fixed lots of issues
'''
'''
Adapted by Camil Staps 2015/12/12
* Make it a function
* Allow for calling it from another directory
'''
def run_apriori(filename, minSup, minConf, maxRule):
from sys import exit, platform as _platform
# Run Apriori Algorithm
print('Mining for frequent itemsets by the Apriori algorithm')
if _platform == 'linux' or _platform == 'linux2':
cmd = '"' + os.path.dirname(os.path.realpath(__file__)) + '/apriori"'
elif _platform == 'darwin':
cmd = '"' + os.path.dirname(os.path.realpath(__file__)) + '/aprioriMAC"'
elif _platform == 'win32':
cmd = '"' + os.path.dirname(os.path.realpath(__file__)) + '/apriori.exe"'
status1 = call(cmd + " -s{0} -v\"[Sup. %3S]\" {1} apriori_temp1.txt".format(minSup, filename), shell=True)
if status1!=0:
print('An error occured while calling apriori, a likely cause is that minSup was set to high such that no frequent itemsets were generated or spaces are included in the path to the apriori files.')
exit()
if minConf>0:
print('Mining for associations by the Apriori algorithm')
status2 = call(cmd + ' -tr -f"," -o -n{0} -c{1} -s{2} -v"[Conf. %3C,Sup. %3S]" {3} apriori_temp2.txt'.format(maxRule, minConf, minSup, filename), shell=True)
if status2!=0:
print('An error occured while calling apriori')
exit()
print('Apriori analysis done, extracting results')
# Extract information from stored files apriori_temp1.txt and apriori_temp2.txt
f = open('apriori_temp1.txt','r')
lines = f.readlines()
f.close()
# Extract Frequent Itemsets
FrequentItemsets = ['']*len(lines)
sup = np.zeros((len(lines),1))
for i,line in enumerate(lines):
FrequentItemsets[i] = line[0:-1]
tmpSupport = re.findall(' \d*[.]\d*', line)
if len(tmpSupport) == 0:
tmpSupport = re.findall(' \d*', line)
sup[i] = tmpSupport[0]
os.remove('apriori_temp1.txt')
# Read the file
f = open('apriori_temp2.txt','r')
lines = f.readlines()
f.close()
# Extract Association rules
AssocRules = ['']*len(lines)
conf = np.zeros((len(lines),1))
for i,line in enumerate(lines):
AssocRules[i] = line[0:-1]
tmpConf = re.findall(' \d*[.]\d*,', line)
if len(tmpConf) == 0:
tmpConf = re.findall(' \d*,', line)
conf[i] = tmpConf[0][1:-1]
os.remove('apriori_temp2.txt')
# sort (FrequentItemsets by support value, AssocRules by confidence value)
AssocRulesSorted = [AssocRules[item] for item in np.argsort(conf,axis=0).ravel()]
AssocRulesSorted.reverse()
FrequentItemsetsSorted = [FrequentItemsets[item] for item in np.argsort(sup,axis=0).ravel()]
FrequentItemsetsSorted.reverse()
return FrequentItemsetsSorted, AssocRulesSorted
def apriori_print(FrequentItemsetsSorted, AssocRulesSorted):
# Print the results
import time; time.sleep(.5)
print('\n')
print('RESULTS:\n')
print('Frequent itemsets:')
for i,item in enumerate(FrequentItemsetsSorted):
print('Item: {0}'.format(item))
print('\n')
print('Association rules:')
for i,item in enumerate(AssocRulesSorted):
print('Rule: {0}'.format(item))
|