#!/usr/bin/env python3 import fileinput import os from urllib.parse import quote_plus from urllib.request import urlopen, Request OUTDIR = 'data' ERRORFILE = 'errors' def get(dbpediaid): outfile = os.path.join(OUTDIR, quote_plus(dbpediaid) + '.json') if os.path.isfile(outfile): return url = 'http://api.nordlys.cc/ec/lookup_id/{}'.format( quote_plus(dbpediaid.replace('/', '___SLASH')).replace('___SLASH', '/')) print(url) result = urlopen(Request(url, headers={'User-Agent': 'Radboud University'})).read() with open(outfile, 'w') as f: f.write(result.decode(encoding='UTF-8')) def scrape(line): index, query, dbpediaid, relevance = line.split('\t') try: get(dbpediaid) except Exception as e: with open(ERRORFILE, 'a') as f: f.write(dbpediaid + '\t' + e + '\n') if __name__ == '__main__': for line in fileinput.input(): scrape(line)