#!/usr/bin/env python3 import os import json from collections import Counter if __name__ == '__main__': queries = dict() with open('queries_stopped.json') as f: queries = json.load(f) terms = set([t for q in queries.values() for t in q.split()]) store = dict() for filename in os.listdir('information-retrieval-data/'): with open('information-retrieval-data/' + filename) as f: entity = json.load(f) for field, values in entity.items(): if field not in store: store[field] = [] store[field] += [v.lower() for value in values for v in value.split() if v in terms] for field in store: cnt = Counter(store[field]) for term in cnt.items(): print('{}\t{}\t{}'.format(field, term[0], term[1]))