From c6f86bdb722aac53bb39b0d78d2b538b6f07a692 Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Sat, 12 Dec 2015 16:47:44 +0000 Subject: Assignment 5 --- Assignment 5/packages/apriori | Bin 0 -> 348990 bytes Assignment 5/packages/run_apriori.py | 102 +++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100755 Assignment 5/packages/apriori create mode 100644 Assignment 5/packages/run_apriori.py (limited to 'Assignment 5/packages') diff --git a/Assignment 5/packages/apriori b/Assignment 5/packages/apriori new file mode 100755 index 0000000..97c006e Binary files /dev/null and b/Assignment 5/packages/apriori differ diff --git a/Assignment 5/packages/run_apriori.py b/Assignment 5/packages/run_apriori.py new file mode 100644 index 0000000..d2affac --- /dev/null +++ b/Assignment 5/packages/run_apriori.py @@ -0,0 +1,102 @@ +import numpy as np +from subprocess import call +import re +import os + +''' + run_apriory.py + version: 0.2 + last change: 26/11/2014, by Wout Megchelenbrink + fixed lots of issues +''' + +''' + Adapted by Camil Staps 2015/12/12 + + * Make it a function + * Allow for calling it from another directory +''' + +def run_apriori(filename, minSup, minConf, maxRule): + from sys import exit, platform as _platform + + # Run Apriori Algorithm + print('Mining for frequent itemsets by the Apriori algorithm') + + if _platform == 'linux' or _platform == 'linux2': + cmd = '"' + os.path.dirname(os.path.realpath(__file__)) + '/apriori"' + elif _platform == 'darwin': + cmd = '"' + os.path.dirname(os.path.realpath(__file__)) + '/aprioriMAC"' + elif _platform == 'win32': + cmd = '"' + os.path.dirname(os.path.realpath(__file__)) + '/apriori.exe"' + + status1 = call(cmd + " -s{0} -v\"[Sup. %3S]\" {1} apriori_temp1.txt".format(minSup, filename), shell=True) + + if status1!=0: + print('An error occured while calling apriori, a likely cause is that minSup was set to high such that no frequent itemsets were generated or spaces are included in the path to the apriori files.') + exit() + if minConf>0: + print('Mining for associations by the Apriori algorithm') + + status2 = call(cmd + ' -tr -f"," -o -n{0} -c{1} -s{2} -v"[Conf. %3C,Sup. %3S]" {3} apriori_temp2.txt'.format(maxRule, minConf, minSup, filename), shell=True) + + if status2!=0: + print('An error occured while calling apriori') + exit() + print('Apriori analysis done, extracting results') + + + # Extract information from stored files apriori_temp1.txt and apriori_temp2.txt + f = open('apriori_temp1.txt','r') + lines = f.readlines() + f.close() + # Extract Frequent Itemsets + FrequentItemsets = ['']*len(lines) + sup = np.zeros((len(lines),1)) + + for i,line in enumerate(lines): + FrequentItemsets[i] = line[0:-1] + tmpSupport = re.findall(' \d*[.]\d*', line) + if len(tmpSupport) == 0: + tmpSupport = re.findall(' \d*', line) + + sup[i] = tmpSupport[0] + os.remove('apriori_temp1.txt') + + # Read the file + f = open('apriori_temp2.txt','r') + lines = f.readlines() + f.close() + # Extract Association rules + AssocRules = ['']*len(lines) + conf = np.zeros((len(lines),1)) + for i,line in enumerate(lines): + AssocRules[i] = line[0:-1] + + tmpConf = re.findall(' \d*[.]\d*,', line) + if len(tmpConf) == 0: + tmpConf = re.findall(' \d*,', line) + + conf[i] = tmpConf[0][1:-1] + os.remove('apriori_temp2.txt') + + # sort (FrequentItemsets by support value, AssocRules by confidence value) + AssocRulesSorted = [AssocRules[item] for item in np.argsort(conf,axis=0).ravel()] + AssocRulesSorted.reverse() + FrequentItemsetsSorted = [FrequentItemsets[item] for item in np.argsort(sup,axis=0).ravel()] + FrequentItemsetsSorted.reverse() + + return FrequentItemsetsSorted, AssocRulesSorted + +def apriori_print(FrequentItemsetsSorted, AssocRulesSorted): + # Print the results + import time; time.sleep(.5) + print('\n') + print('RESULTS:\n') + print('Frequent itemsets:') + for i,item in enumerate(FrequentItemsetsSorted): + print('Item: {0}'.format(item)) + print('\n') + print('Association rules:') + for i,item in enumerate(AssocRulesSorted): + print('Rule: {0}'.format(item)) -- cgit v1.2.3