#!/usr/bin/env python3 import fileinput import json import os from urllib.parse import quote_plus DATADIR = 'data' ERRORFILE = 'errors' def get(dbpediaid): outfile = os.path.join(DATADIR, quote_plus(dbpediaid) + '.json') if not os.path.isfile(outfile): return None with open(outfile) as f: return json.load(f) def match(value, terms): for v in value.split(): if v in terms: return True return False def run(queries, line, outfile): query, _, dbpediaid, _, relevance, method = line.split('\t') terms = queries[query].split() try: result = get(dbpediaid) if result is None: return for field, values in result.items(): matches = 0 for value in values: if match(value, terms): matches += 1 outfile.write('{}\t{}\t{}\t{}\t{}\n'.format( query, dbpediaid, field, len(values), matches)) except Exception as e: print(dbpediaid) print(e) with open(ERRORFILE, 'a') as f: f.write(dbpediaid + '\t' + e + '\n') if __name__ == '__main__': with open('queries_stopped.json') as f: queries = json.load(f) with open('run.txt', 'w') as out: for line in fileinput.input(): run(queries, line, out)