Add check.py

author: Camil Staps 2017-12-11 13:39:08 +0100
committer: Camil Staps 2017-12-11 13:39:08 +0100
commit: abfc3c3bf3fa914af330a5be1a6982af65bd7e97 (patch)
tree: 40a93ba00800e021659d70871d7f013dd46ef1ec
parent: run.py: don't download missing files (diff)
3 files changed, 21 insertions, 7 deletions
diff --git a/.gitignore b/.gitignore
index fe1d8a4..5baa1a3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 errors
 data/
 run.txt
+scores.txt
diff --git a/check.py b/check.py
new file mode 100755
index 0000000..69c247c
--- /dev/null
+++ b/check.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+import fileinput
+
+if __name__ == '__main__':
+    scores = dict()
+    for line in fileinput.input():
+        query, dbpediaid, relevance, field, nvalues, nmatches = line.split('\t')
+        if field not in scores:
+            scores[field] = 0
+        scores[field] += float(relevance) * int(nmatches) / int(nvalues)
+
+    for field, score in scores.items():
+        print('{}\t{}'.format(field, score))
diff --git a/run.py b/run.py
index 96fa10e..7b42ea8 100755
--- a/run.py
+++ b/run.py
@@ -5,7 +5,7 @@ import json
 import os
 from urllib.parse import quote_plus
 
-DATADIR = 'data'
+DATADIR = '/home/camil/temp/information-retrieval-data'
 ERRORFILE = 'errors'
 
 def get(dbpediaid):
@@ -21,7 +21,7 @@ def match(value, terms):
             return True
     return False
 
-def run(queries, line, outfile):
+def run(queries, line):
     query, _, dbpediaid, _, relevance, method = line.split('\t')
     terms = queries[query].split()
     try:
@@ -33,8 +33,8 @@ def run(queries, line, outfile):
             for value in values:
                 if match(value, terms):
                     matches += 1
-            outfile.write('{}\t{}\t{}\t{}\t{}\n'.format(
-                query, dbpediaid, field, len(values), matches))
+            print('{}\t{}\t{}\t{}\t{}\t{}\n'.format(
+                query, dbpediaid, relevance, field, len(values), matches))
     except Exception as e:
         print(dbpediaid)
         print(e)
@@ -45,6 +45,5 @@ if __name__ == '__main__':
     with open('queries_stopped.json') as f:
         queries = json.load(f)
 
-        with open('run.txt', 'w') as out:
-            for line in fileinput.input():
-                run(queries, line, out)
+        for line in fileinput.input():
+            run(queries, line)
author	Camil Staps	2017-12-11 13:39:08 +0100
committer	Camil Staps	2017-12-11 13:39:08 +0100
commit	abfc3c3bf3fa914af330a5be1a6982af65bd7e97 (patch)
tree	40a93ba00800e021659d70871d7f013dd46ef1ec
parent	run.py: don't download missing files (diff)