blob: fc57f8b999d180d36fdeb7ef140d03caf09a1f55 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
#!/usr/bin/env python3
import os
import json
from collections import Counter
if __name__ == '__main__':
store = dict()
for filename in os.listdir('information-retrieval-data/'):
with open('information-retrieval-data/' + filename) as f:
entity = json.load(f)
for field, values in entity.items():
for value in values:
if field not in store:
store[field] = []
store[field] += [v.lower() for v in value.split(" ")]
for field in store:
cnt = Counter(store[field])
for term in cnt.items():
print('{}\t{}\t{}'.format(field, term[0], term[1]))
|