aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCamil Staps2017-12-11 13:39:08 +0100
committerCamil Staps2017-12-11 13:39:08 +0100
commitabfc3c3bf3fa914af330a5be1a6982af65bd7e97 (patch)
tree40a93ba00800e021659d70871d7f013dd46ef1ec
parentrun.py: don't download missing files (diff)
Add check.py
-rw-r--r--.gitignore1
-rwxr-xr-xcheck.py14
-rwxr-xr-xrun.py13
3 files changed, 21 insertions, 7 deletions
diff --git a/.gitignore b/.gitignore
index fe1d8a4..5baa1a3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
errors
data/
run.txt
+scores.txt
diff --git a/check.py b/check.py
new file mode 100755
index 0000000..69c247c
--- /dev/null
+++ b/check.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+import fileinput
+
+if __name__ == '__main__':
+ scores = dict()
+ for line in fileinput.input():
+ query, dbpediaid, relevance, field, nvalues, nmatches = line.split('\t')
+ if field not in scores:
+ scores[field] = 0
+ scores[field] += float(relevance) * int(nmatches) / int(nvalues)
+
+ for field, score in scores.items():
+ print('{}\t{}'.format(field, score))
diff --git a/run.py b/run.py
index 96fa10e..7b42ea8 100755
--- a/run.py
+++ b/run.py
@@ -5,7 +5,7 @@ import json
import os
from urllib.parse import quote_plus
-DATADIR = 'data'
+DATADIR = '/home/camil/temp/information-retrieval-data'
ERRORFILE = 'errors'
def get(dbpediaid):
@@ -21,7 +21,7 @@ def match(value, terms):
return True
return False
-def run(queries, line, outfile):
+def run(queries, line):
query, _, dbpediaid, _, relevance, method = line.split('\t')
terms = queries[query].split()
try:
@@ -33,8 +33,8 @@ def run(queries, line, outfile):
for value in values:
if match(value, terms):
matches += 1
- outfile.write('{}\t{}\t{}\t{}\t{}\n'.format(
- query, dbpediaid, field, len(values), matches))
+ print('{}\t{}\t{}\t{}\t{}\t{}\n'.format(
+ query, dbpediaid, relevance, field, len(values), matches))
except Exception as e:
print(dbpediaid)
print(e)
@@ -45,6 +45,5 @@ if __name__ == '__main__':
with open('queries_stopped.json') as f:
queries = json.load(f)
- with open('run.txt', 'w') as out:
- for line in fileinput.input():
- run(queries, line, out)
+ for line in fileinput.input():
+ run(queries, line)