diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rwxr-xr-x | run.py | 55 |
2 files changed, 56 insertions, 0 deletions
@@ -1,2 +1,3 @@ errors data/ +run.txt @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 + +import fileinput +import json +import os +from urllib.parse import quote_plus +from urllib.request import urlopen, Request + +DATADIR = 'data' +ERRORFILE = 'errors' + +def get(dbpediaid): + outfile = os.path.join(DATADIR, quote_plus(dbpediaid) + '.json') + if not os.path.isfile(outfile): + url = 'http://api.nordlys.cc/ec/lookup_id/{}'.format( + quote_plus(dbpediaid.replace('/', '___SLASH')).replace('___SLASH', '/')) + print(url) + result = urlopen(Request(url, + headers={'User-Agent': 'Radboud University'})).read() + with open(outfile, 'w') as f: + f.write(result.decode(encoding='UTF-8')) + with open(outfile) as f: + return json.load(f) + +def match(value, terms): + for v in value.split(): + if v in terms: + return True + return False + +def run(queries, line, outfile): + query, _, dbpediaid, _, relevance, method = line.split('\t') + terms = queries[query].split() + try: + result = get(dbpediaid) + for field, values in result.items(): + matches = 0 + for value in values: + if match(value, terms): + matches += 1 + outfile.write('{}\t{}\t{}\t{}\t{}\n'.format( + query, dbpediaid, field, len(values), matches)) + except Exception as e: + print(dbpediaid) + print(e) + with open(ERRORFILE, 'a') as f: + f.write(dbpediaid + '\t' + e + '\n') + +if __name__ == '__main__': + with open('queries_stopped.json') as f: + queries = json.load(f) + + with open('run.txt', 'w') as out: + for line in fileinput.input(): + run(queries, line, out) |