aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rwxr-xr-xrun.py55
2 files changed, 56 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index eaf0161..fe1d8a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
errors
data/
+run.txt
diff --git a/run.py b/run.py
new file mode 100755
index 0000000..7f0fea9
--- /dev/null
+++ b/run.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+import fileinput
+import json
+import os
+from urllib.parse import quote_plus
+from urllib.request import urlopen, Request
+
+DATADIR = 'data'
+ERRORFILE = 'errors'
+
+def get(dbpediaid):
+ outfile = os.path.join(DATADIR, quote_plus(dbpediaid) + '.json')
+ if not os.path.isfile(outfile):
+ url = 'http://api.nordlys.cc/ec/lookup_id/{}'.format(
+ quote_plus(dbpediaid.replace('/', '___SLASH')).replace('___SLASH', '/'))
+ print(url)
+ result = urlopen(Request(url,
+ headers={'User-Agent': 'Radboud University'})).read()
+ with open(outfile, 'w') as f:
+ f.write(result.decode(encoding='UTF-8'))
+ with open(outfile) as f:
+ return json.load(f)
+
+def match(value, terms):
+ for v in value.split():
+ if v in terms:
+ return True
+ return False
+
+def run(queries, line, outfile):
+ query, _, dbpediaid, _, relevance, method = line.split('\t')
+ terms = queries[query].split()
+ try:
+ result = get(dbpediaid)
+ for field, values in result.items():
+ matches = 0
+ for value in values:
+ if match(value, terms):
+ matches += 1
+ outfile.write('{}\t{}\t{}\t{}\t{}\n'.format(
+ query, dbpediaid, field, len(values), matches))
+ except Exception as e:
+ print(dbpediaid)
+ print(e)
+ with open(ERRORFILE, 'a') as f:
+ f.write(dbpediaid + '\t' + e + '\n')
+
+if __name__ == '__main__':
+ with open('queries_stopped.json') as f:
+ queries = json.load(f)
+
+ with open('run.txt', 'w') as out:
+ for line in fileinput.input():
+ run(queries, line, out)