aboutsummaryrefslogtreecommitdiff
path: root/Assignment 5/packages
diff options
context:
space:
mode:
authorCamil Staps2015-12-12 16:47:44 +0000
committerCamil Staps2015-12-12 16:47:44 +0000
commitc6f86bdb722aac53bb39b0d78d2b538b6f07a692 (patch)
treea13212987d15f369b0a448df87b49bcca4cc7f51 /Assignment 5/packages
parentFinish assignment 4 (diff)
Assignment 5
Diffstat (limited to 'Assignment 5/packages')
-rwxr-xr-xAssignment 5/packages/aprioribin0 -> 348990 bytes
-rw-r--r--Assignment 5/packages/run_apriori.py102
2 files changed, 102 insertions, 0 deletions
diff --git a/Assignment 5/packages/apriori b/Assignment 5/packages/apriori
new file mode 100755
index 0000000..97c006e
--- /dev/null
+++ b/Assignment 5/packages/apriori
Binary files differ
diff --git a/Assignment 5/packages/run_apriori.py b/Assignment 5/packages/run_apriori.py
new file mode 100644
index 0000000..d2affac
--- /dev/null
+++ b/Assignment 5/packages/run_apriori.py
@@ -0,0 +1,102 @@
+import numpy as np
+from subprocess import call
+import re
+import os
+
+'''
+ run_apriory.py
+ version: 0.2
+ last change: 26/11/2014, by Wout Megchelenbrink
+ fixed lots of issues
+'''
+
+'''
+ Adapted by Camil Staps 2015/12/12
+
+ * Make it a function
+ * Allow for calling it from another directory
+'''
+
+def run_apriori(filename, minSup, minConf, maxRule):
+ from sys import exit, platform as _platform
+
+ # Run Apriori Algorithm
+ print('Mining for frequent itemsets by the Apriori algorithm')
+
+ if _platform == 'linux' or _platform == 'linux2':
+ cmd = '"' + os.path.dirname(os.path.realpath(__file__)) + '/apriori"'
+ elif _platform == 'darwin':
+ cmd = '"' + os.path.dirname(os.path.realpath(__file__)) + '/aprioriMAC"'
+ elif _platform == 'win32':
+ cmd = '"' + os.path.dirname(os.path.realpath(__file__)) + '/apriori.exe"'
+
+ status1 = call(cmd + " -s{0} -v\"[Sup. %3S]\" {1} apriori_temp1.txt".format(minSup, filename), shell=True)
+
+ if status1!=0:
+ print('An error occured while calling apriori, a likely cause is that minSup was set to high such that no frequent itemsets were generated or spaces are included in the path to the apriori files.')
+ exit()
+ if minConf>0:
+ print('Mining for associations by the Apriori algorithm')
+
+ status2 = call(cmd + ' -tr -f"," -o -n{0} -c{1} -s{2} -v"[Conf. %3C,Sup. %3S]" {3} apriori_temp2.txt'.format(maxRule, minConf, minSup, filename), shell=True)
+
+ if status2!=0:
+ print('An error occured while calling apriori')
+ exit()
+ print('Apriori analysis done, extracting results')
+
+
+ # Extract information from stored files apriori_temp1.txt and apriori_temp2.txt
+ f = open('apriori_temp1.txt','r')
+ lines = f.readlines()
+ f.close()
+ # Extract Frequent Itemsets
+ FrequentItemsets = ['']*len(lines)
+ sup = np.zeros((len(lines),1))
+
+ for i,line in enumerate(lines):
+ FrequentItemsets[i] = line[0:-1]
+ tmpSupport = re.findall(' \d*[.]\d*', line)
+ if len(tmpSupport) == 0:
+ tmpSupport = re.findall(' \d*', line)
+
+ sup[i] = tmpSupport[0]
+ os.remove('apriori_temp1.txt')
+
+ # Read the file
+ f = open('apriori_temp2.txt','r')
+ lines = f.readlines()
+ f.close()
+ # Extract Association rules
+ AssocRules = ['']*len(lines)
+ conf = np.zeros((len(lines),1))
+ for i,line in enumerate(lines):
+ AssocRules[i] = line[0:-1]
+
+ tmpConf = re.findall(' \d*[.]\d*,', line)
+ if len(tmpConf) == 0:
+ tmpConf = re.findall(' \d*,', line)
+
+ conf[i] = tmpConf[0][1:-1]
+ os.remove('apriori_temp2.txt')
+
+ # sort (FrequentItemsets by support value, AssocRules by confidence value)
+ AssocRulesSorted = [AssocRules[item] for item in np.argsort(conf,axis=0).ravel()]
+ AssocRulesSorted.reverse()
+ FrequentItemsetsSorted = [FrequentItemsets[item] for item in np.argsort(sup,axis=0).ravel()]
+ FrequentItemsetsSorted.reverse()
+
+ return FrequentItemsetsSorted, AssocRulesSorted
+
+def apriori_print(FrequentItemsetsSorted, AssocRulesSorted):
+ # Print the results
+ import time; time.sleep(.5)
+ print('\n')
+ print('RESULTS:\n')
+ print('Frequent itemsets:')
+ for i,item in enumerate(FrequentItemsetsSorted):
+ print('Item: {0}'.format(item))
+ print('\n')
+ print('Association rules:')
+ for i,item in enumerate(AssocRulesSorted):
+ print('Rule: {0}'.format(item))