diff options
-rwxr-xr-x | tf.py | 20 |
1 files changed, 20 insertions, 0 deletions
@@ -0,0 +1,20 @@ +#!/usr/bin/env python3 + +import os +import json +from collections import Counter + +if __name__ == '__main__': + store = dict() + for filename in os.listdir('information-retrieval-data/'): + with open('information-retrieval-data/' + filename) as f: + entity = json.load(f) + for field, values in entity.items(): + for value in values: + if field not in store: + store[field] = [] + store[field] += [v.lower() for v in value.split(" ")] + for field in store: + cnt = Counter(store[field]) + for term in cnt.items(): + print('{}\t{}\t{}'.format(field, term[0], term[1])) |