blob: 0137cd84e3f4d4e1bb499e427672d984051d5ff8 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
#!/usr/bin/env python3
import os
import json
from collections import Counter
if __name__ == '__main__':
queries = dict()
with open('queries_stopped.json') as f:
queries = json.load(f)
terms = set([t for q in queries.values() for t in q.split()])
store = dict()
for filename in os.listdir('information-retrieval-data/'):
with open('information-retrieval-data/' + filename) as f:
entity = json.load(f)
for field, values in entity.items():
if field not in store:
store[field] = []
store[field] += [v.lower() for value in values for v in value.split() if v in terms]
for field in store:
cnt = Counter(store[field])
for term in cnt.items():
print('{}\t{}\t{}'.format(field, term[0], term[1]))
|