run.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

#!/usr/bin/env python3

import fileinput
import json
import os
from urllib.parse import quote_plus

DATADIR = '/home/camil/temp/information-retrieval-data'
ERRORFILE = 'errors'

def get(dbpediaid):
    outfile = os.path.join(DATADIR, quote_plus(dbpediaid) + '.json')
    if not os.path.isfile(outfile):
        return None
    with open(outfile) as f:
        return json.load(f)

def match(value, terms):
    for v in value.split():
        if v in terms:
            return True
    return False

def run(queries, line):
    try:
        query, _, dbpediaid, _, relevance, method = line.split('\t')
    except ValueError: # For qrels.txt
        query, _, dbpediaid, relevance = line.split('\t')
    terms = queries[query].split()
    try:
        result = get(dbpediaid)
        if result is None:
            return
        for field, values in result.items():
            matches = 0
            for value in values:
                if match(value, terms):
                    matches += 1
            print('{}\t{}\t{}\t{}\t{}\t{}'.format(
                query, dbpediaid, float(relevance), field, len(values), matches))
    except Exception as e:
        print(dbpediaid)
        print(e)
        with open(ERRORFILE, 'a') as f:
            f.write(dbpediaid + '\t' + e + '\n')

if __name__ == '__main__':
    with open('queries_stopped.json') as f:
        queries = json.load(f)

        for line in fileinput.input():
            run(queries, line)