diff options
Diffstat (limited to 'run.py')
-rwxr-xr-x | run.py | 52 |
1 files changed, 52 insertions, 0 deletions
@@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +import fileinput +import json +import os +from urllib.parse import quote_plus + +DATADIR = '/home/camil/temp/information-retrieval-data' +ERRORFILE = 'errors' + +def get(dbpediaid): + outfile = os.path.join(DATADIR, quote_plus(dbpediaid) + '.json') + if not os.path.isfile(outfile): + return None + with open(outfile) as f: + return json.load(f) + +def match(value, terms): + for v in value.split(): + if v in terms: + return True + return False + +def run(queries, line): + try: + query, _, dbpediaid, _, relevance, method = line.split('\t') + except ValueError: # For qrels.txt + query, _, dbpediaid, relevance = line.split('\t') + terms = queries[query].split() + try: + result = get(dbpediaid) + if result is None: + return + for field, values in result.items(): + matches = 0 + for value in values: + if match(value, terms): + matches += 1 + print('{}\t{}\t{}\t{}\t{}\t{}'.format( + query, dbpediaid, float(relevance), field, len(values), matches)) + except Exception as e: + print(dbpediaid) + print(e) + with open(ERRORFILE, 'a') as f: + f.write(dbpediaid + '\t' + e + '\n') + +if __name__ == '__main__': + with open('queries_stopped.json') as f: + queries = json.load(f) + + for line in fileinput.input(): + run(queries, line) |