import numpy as np from subprocess import call import re import os ''' run_apriory.py version: 0.2 last change: 26/11/2014, by Wout Megchelenbrink fixed lots of issues ''' ''' Adapted by Camil Staps 2015/12/12 * Make it a function * Allow for calling it from another directory ''' def run_apriori(filename, minSup, minConf, maxRule): from sys import exit, platform as _platform # Run Apriori Algorithm print('Mining for frequent itemsets by the Apriori algorithm') if _platform == 'linux' or _platform == 'linux2': cmd = '"' + os.path.dirname(os.path.realpath(__file__)) + '/apriori"' elif _platform == 'darwin': cmd = '"' + os.path.dirname(os.path.realpath(__file__)) + '/aprioriMAC"' elif _platform == 'win32': cmd = '"' + os.path.dirname(os.path.realpath(__file__)) + '/apriori.exe"' status1 = call(cmd + " -s{0} -v\"[Sup. %3S]\" {1} apriori_temp1.txt".format(minSup, filename), shell=True) if status1!=0: print('An error occured while calling apriori, a likely cause is that minSup was set to high such that no frequent itemsets were generated or spaces are included in the path to the apriori files.') exit() if minConf>0: print('Mining for associations by the Apriori algorithm') status2 = call(cmd + ' -tr -f"," -o -n{0} -c{1} -s{2} -v"[Conf. %3C,Sup. %3S]" {3} apriori_temp2.txt'.format(maxRule, minConf, minSup, filename), shell=True) if status2!=0: print('An error occured while calling apriori') exit() print('Apriori analysis done, extracting results') # Extract information from stored files apriori_temp1.txt and apriori_temp2.txt f = open('apriori_temp1.txt','r') lines = f.readlines() f.close() # Extract Frequent Itemsets FrequentItemsets = ['']*len(lines) sup = np.zeros((len(lines),1)) for i,line in enumerate(lines): FrequentItemsets[i] = line[0:-1] tmpSupport = re.findall(' \d*[.]\d*', line) if len(tmpSupport) == 0: tmpSupport = re.findall(' \d*', line) sup[i] = tmpSupport[0] os.remove('apriori_temp1.txt') # Read the file f = open('apriori_temp2.txt','r') lines = f.readlines() f.close() # Extract Association rules AssocRules = ['']*len(lines) conf = np.zeros((len(lines),1)) for i,line in enumerate(lines): AssocRules[i] = line[0:-1] tmpConf = re.findall(' \d*[.]\d*,', line) if len(tmpConf) == 0: tmpConf = re.findall(' \d*,', line) conf[i] = tmpConf[0][1:-1] os.remove('apriori_temp2.txt') # sort (FrequentItemsets by support value, AssocRules by confidence value) AssocRulesSorted = [AssocRules[item] for item in np.argsort(conf,axis=0).ravel()] AssocRulesSorted.reverse() FrequentItemsetsSorted = [FrequentItemsets[item] for item in np.argsort(sup,axis=0).ravel()] FrequentItemsetsSorted.reverse() return FrequentItemsetsSorted, AssocRulesSorted def apriori_print(FrequentItemsetsSorted, AssocRulesSorted): # Print the results import time; time.sleep(.5) print('\n') print('RESULTS:\n') print('Frequent itemsets:') for i,item in enumerate(FrequentItemsetsSorted): print('Item: {0}'.format(item)) print('\n') print('Association rules:') for i,item in enumerate(AssocRulesSorted): print('Rule: {0}'.format(item))