aboutsummaryrefslogtreecommitdiff
path: root/run.py
diff options
context:
space:
mode:
Diffstat (limited to 'run.py')
-rwxr-xr-xrun.py52
1 files changed, 52 insertions, 0 deletions
diff --git a/run.py b/run.py
new file mode 100755
index 0000000..1551717
--- /dev/null
+++ b/run.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+
+import fileinput
+import json
+import os
+from urllib.parse import quote_plus
+
+DATADIR = '/home/camil/temp/information-retrieval-data'
+ERRORFILE = 'errors'
+
+def get(dbpediaid):
+ outfile = os.path.join(DATADIR, quote_plus(dbpediaid) + '.json')
+ if not os.path.isfile(outfile):
+ return None
+ with open(outfile) as f:
+ return json.load(f)
+
+def match(value, terms):
+ for v in value.split():
+ if v in terms:
+ return True
+ return False
+
+def run(queries, line):
+ try:
+ query, _, dbpediaid, _, relevance, method = line.split('\t')
+ except ValueError: # For qrels.txt
+ query, _, dbpediaid, relevance = line.split('\t')
+ terms = queries[query].split()
+ try:
+ result = get(dbpediaid)
+ if result is None:
+ return
+ for field, values in result.items():
+ matches = 0
+ for value in values:
+ if match(value, terms):
+ matches += 1
+ print('{}\t{}\t{}\t{}\t{}\t{}'.format(
+ query, dbpediaid, float(relevance), field, len(values), matches))
+ except Exception as e:
+ print(dbpediaid)
+ print(e)
+ with open(ERRORFILE, 'a') as f:
+ f.write(dbpediaid + '\t' + e + '\n')
+
+if __name__ == '__main__':
+ with open('queries_stopped.json') as f:
+ queries = json.load(f)
+
+ for line in fileinput.input():
+ run(queries, line)