diff options
author | Erin van der Veen | 2017-12-15 12:42:21 +0100 |
---|---|---|
committer | Erin van der Veen | 2017-12-15 12:42:21 +0100 |
commit | 75e913ed3fb1172b27e2d7eaae7ad8bd7b57538e (patch) | |
tree | 26472c05da39f7303bd7390d8e51ed939370a3f7 | |
parent | Use logarithm for weight measure; add code explanation (diff) |
Calculate term frequency
-rwxr-xr-x | tf.py | 20 |
1 files changed, 20 insertions, 0 deletions
@@ -0,0 +1,20 @@ +#!/usr/bin/env python3 + +import os +import json +from collections import Counter + +if __name__ == '__main__': + store = dict() + for filename in os.listdir('information-retrieval-data/'): + with open('information-retrieval-data/' + filename) as f: + entity = json.load(f) + for field, values in entity.items(): + for value in values: + if field not in store: + store[field] = [] + store[field] += [v.lower() for v in value.split(" ")] + for field in store: + cnt = Counter(store[field]) + for term in cnt.items(): + print('{}\t{}\t{}'.format(field, term[0], term[1])) |