aboutsummaryrefslogtreecommitdiff
path: root/tf.py
blob: fc57f8b999d180d36fdeb7ef140d03caf09a1f55 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/usr/bin/env python3

import os
import json
from collections import Counter

if __name__ == '__main__':
    store = dict()
    for filename in os.listdir('information-retrieval-data/'):
        with open('information-retrieval-data/' + filename) as f:
            entity = json.load(f)
        for field, values in entity.items():
            for value in values:
                if field not in store:
                    store[field] = []
                store[field] += [v.lower() for v in value.split(" ")]
    for field in store:
        cnt = Counter(store[field])
        for term in cnt.items():
            print('{}\t{}\t{}'.format(field, term[0], term[1]))