diff options
author | Camil Staps | 2017-12-11 13:39:08 +0100 |
---|---|---|
committer | Camil Staps | 2017-12-11 13:39:08 +0100 |
commit | abfc3c3bf3fa914af330a5be1a6982af65bd7e97 (patch) | |
tree | 40a93ba00800e021659d70871d7f013dd46ef1ec | |
parent | run.py: don't download missing files (diff) |
Add check.py
-rw-r--r-- | .gitignore | 1 | ||||
-rwxr-xr-x | check.py | 14 | ||||
-rwxr-xr-x | run.py | 13 |
3 files changed, 21 insertions, 7 deletions
@@ -1,3 +1,4 @@ errors data/ run.txt +scores.txt diff --git a/check.py b/check.py new file mode 100755 index 0000000..69c247c --- /dev/null +++ b/check.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 + +import fileinput + +if __name__ == '__main__': + scores = dict() + for line in fileinput.input(): + query, dbpediaid, relevance, field, nvalues, nmatches = line.split('\t') + if field not in scores: + scores[field] = 0 + scores[field] += float(relevance) * int(nmatches) / int(nvalues) + + for field, score in scores.items(): + print('{}\t{}'.format(field, score)) @@ -5,7 +5,7 @@ import json import os from urllib.parse import quote_plus -DATADIR = 'data' +DATADIR = '/home/camil/temp/information-retrieval-data' ERRORFILE = 'errors' def get(dbpediaid): @@ -21,7 +21,7 @@ def match(value, terms): return True return False -def run(queries, line, outfile): +def run(queries, line): query, _, dbpediaid, _, relevance, method = line.split('\t') terms = queries[query].split() try: @@ -33,8 +33,8 @@ def run(queries, line, outfile): for value in values: if match(value, terms): matches += 1 - outfile.write('{}\t{}\t{}\t{}\t{}\n'.format( - query, dbpediaid, field, len(values), matches)) + print('{}\t{}\t{}\t{}\t{}\t{}\n'.format( + query, dbpediaid, relevance, field, len(values), matches)) except Exception as e: print(dbpediaid) print(e) @@ -45,6 +45,5 @@ if __name__ == '__main__': with open('queries_stopped.json') as f: queries = json.load(f) - with open('run.txt', 'w') as out: - for line in fileinput.input(): - run(queries, line, out) + for line in fileinput.input(): + run(queries, line) |