aboutsummaryrefslogtreecommitdiff
path: root/stats.py
diff options
context:
space:
mode:
authorCamil Staps2016-07-18 19:50:48 +0200
committerCamil Staps2016-07-18 19:55:25 +0200
commit4d919723513c0723847762890e9e6226e0cf32a2 (patch)
tree2ac1821b7316b1135e0cea08aaf78eac16a6bb6d /stats.py
Initial commit
Diffstat (limited to 'stats.py')
-rwxr-xr-xstats.py117
1 files changed, 117 insertions, 0 deletions
diff --git a/stats.py b/stats.py
new file mode 100755
index 0000000..c77eb6b
--- /dev/null
+++ b/stats.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+import argparse
+from email.utils import mktime_tz, parsedate_tz, parseaddr
+from mailbox import Maildir, MaildirMessage
+import re
+import sys
+
+from orator import Model
+from orator.exceptions.orm import ModelNotFound
+
+import config
+from models.folder import Folder
+from models.address import Address
+from models.message import Message
+
+def start_logger(conn):
+ conn.enable_query_log()
+ logger = logging.getLogger('orator.connection.queries')
+ logger.setLevel(logging.DEBUG)
+ formatter = logging.Formatter('%(query)s')
+ handler = logging.StreamHandler()
+ handler.setFormatter(formatter)
+ logger.addHandler(handler)
+
+def log_tick(char='.', flush=True):
+ print(char, end='')
+ if flush:
+ sys.stdout.flush()
+
+def log_line(string):
+ print()
+ print(string)
+
+def flush_database(conn):
+ conn.statement('TRUNCATE TABLE `addresses_messages`')
+ conn.statement('TRUNCATE TABLE `messages`')
+ conn.statement('TRUNCATE TABLE `addresses`')
+ conn.statement('TRUNCATE TABLE `folders`')
+
+def parse_date(date):
+ if date == None:
+ return None
+ else:
+ return mktime_tz(parsedate_tz(date))
+
+def insert_addresses(mail):
+ tos = [('to', addr) for addr in mail.get_all('to', [])]
+ ccs = [('cc', addr) for addr in mail.get_all('cc', [])]
+ froms = [('from', addr) for addr in mail.get_all('from', [])]
+ for type_, addr in tos + ccs + froms:
+ _, addr = parseaddr(str(addr))
+
+ if addr == '':
+ continue
+
+ addrs = Address.where('email', addr)
+ if addrs.count() == 0:
+ yield (type_, Address.create(email=addr, domain=''))
+ log_tick('a')
+ else:
+ yield (type_, addrs.first())
+
+def main():
+ pars = argparse.ArgumentParser('Import Maildir messages to a database.')
+
+ pars.add_argument(
+ 'location', metavar='LOCATION', type=str,
+ help='Location of the Maildir')
+ pars.add_argument(
+ '-v', dest='verbose', action='store_true',
+ help='Verbose output')
+ pars.add_argument(
+ '-xf', '--exclude-folders', dest='exclude_folders', type=str,
+ help='Exclude folders matching a regular expression')
+
+ args = pars.parse_args()
+
+ if args.verbose:
+ start_logger(config.db)
+
+ Model.set_connection_resolver(config.db)
+
+ flush_database(config.db)
+
+ md = Maildir(args.location)
+ for folder in md.list_folders():
+ if 'exclude_folders' in args:
+ if re.match(args.exclude_folders, folder):
+ log_line('Skipping %s' % folder)
+ continue
+
+ # Insert folder
+ Folder.create(name=folder)
+ log_line(folder)
+
+ for key, mail in md.get_folder(folder).iteritems():
+ # Insert message
+ msg = Message.create(
+ filename=key,
+ folder=folder,
+ subject=str(mail.get('Subject')).encode('utf8')[:1022],
+ date=parse_date(mail.get('Date')),
+ content_type=mail.get_content_type()
+ )
+
+ # Insert addresses
+ addrs = list(insert_addresses(mail))
+ for type_, addr in addrs:
+ msg.addresses().save(addr, {'type': type_})
+
+ log_tick('.')
+
+ print()
+
+
+if __name__ == '__main__':
+ main()