1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
#!/usr/bin/env python3
import fileinput
import json
import os
from urllib.parse import quote_plus
DATADIR = '/home/camil/temp/information-retrieval-data'
ERRORFILE = 'errors'
def get(dbpediaid):
outfile = os.path.join(DATADIR, quote_plus(dbpediaid) + '.json')
if not os.path.isfile(outfile):
return None
with open(outfile) as f:
return json.load(f)
def match(value, terms):
for v in value.split():
if v in terms:
return True
return False
def run(queries, line):
try:
query, _, dbpediaid, _, relevance, method = line.split('\t')
except ValueError: # For qrels.txt
query, _, dbpediaid, relevance = line.split('\t')
terms = queries[query].split()
try:
result = get(dbpediaid)
if result is None:
return
for field, values in result.items():
matches = 0
for value in values:
if match(value, terms):
matches += 1
print('{}\t{}\t{}\t{}\t{}\t{}'.format(
query, dbpediaid, float(relevance), field, len(values), matches))
except Exception as e:
print(dbpediaid)
print(e)
with open(ERRORFILE, 'a') as f:
f.write(dbpediaid + '\t' + e + '\n')
if __name__ == '__main__':
with open('queries_stopped.json') as f:
queries = json.load(f)
for line in fileinput.input():
run(queries, line)
|