#!/usr/bin/env python3 import fileinput import json import os from urllib.parse import quote_plus DATADIR = '/home/camil/temp/information-retrieval-data' ERRORFILE = 'errors' def get(dbpediaid): outfile = os.path.join(DATADIR, quote_plus(dbpediaid) + '.json') if not os.path.isfile(outfile): return None with open(outfile) as f: return json.load(f) def match(value, terms): for v in value.split(): if v in terms: return True return False def run(queries, line): try: query, _, dbpediaid, _, relevance, method = line.split('\t') except ValueError: # For qrels.txt query, _, dbpediaid, relevance = line.split('\t') terms = queries[query].split() try: result = get(dbpediaid) if result is None: return for field, values in result.items(): matches = 0 for value in values: if match(value, terms): matches += 1 print('{}\t{}\t{}\t{}\t{}\t{}'.format( query, dbpediaid, float(relevance), field, len(values), matches)) except Exception as e: print(dbpediaid) print(e) with open(ERRORFILE, 'a') as f: f.write(dbpediaid + '\t' + e + '\n') if __name__ == '__main__': with open('queries_stopped.json') as f: queries = json.load(f) for line in fileinput.input(): run(queries, line)