aboutsummaryrefslogblamecommitdiff
path: root/run.py
blob: 96fa10e6318d6d077aa6c0ed48eef12cdc67d909 (plain) (tree)
1
2
3
4
5
6




                                   





                                                                    
                   












                                                                
                          


















                                                               
#!/usr/bin/env python3

import fileinput
import json
import os
from urllib.parse import quote_plus

DATADIR = 'data'
ERRORFILE = 'errors'

def get(dbpediaid):
    outfile = os.path.join(DATADIR, quote_plus(dbpediaid) + '.json')
    if not os.path.isfile(outfile):
        return None
    with open(outfile) as f:
        return json.load(f)

def match(value, terms):
    for v in value.split():
        if v in terms:
            return True
    return False

def run(queries, line, outfile):
    query, _, dbpediaid, _, relevance, method = line.split('\t')
    terms = queries[query].split()
    try:
        result = get(dbpediaid)
        if result is None:
            return
        for field, values in result.items():
            matches = 0
            for value in values:
                if match(value, terms):
                    matches += 1
            outfile.write('{}\t{}\t{}\t{}\t{}\n'.format(
                query, dbpediaid, field, len(values), matches))
    except Exception as e:
        print(dbpediaid)
        print(e)
        with open(ERRORFILE, 'a') as f:
            f.write(dbpediaid + '\t' + e + '\n')

if __name__ == '__main__':
    with open('queries_stopped.json') as f:
        queries = json.load(f)

        with open('run.txt', 'w') as out:
            for line in fileinput.input():
                run(queries, line, out)