diff options
author | Camil Staps | 2016-07-18 19:50:48 +0200 |
---|---|---|
committer | Camil Staps | 2016-07-18 19:55:25 +0200 |
commit | 4d919723513c0723847762890e9e6226e0cf32a2 (patch) | |
tree | 2ac1821b7316b1135e0cea08aaf78eac16a6bb6d /stats.py |
Initial commit
Diffstat (limited to 'stats.py')
-rwxr-xr-x | stats.py | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/stats.py b/stats.py new file mode 100755 index 0000000..c77eb6b --- /dev/null +++ b/stats.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +import argparse +from email.utils import mktime_tz, parsedate_tz, parseaddr +from mailbox import Maildir, MaildirMessage +import re +import sys + +from orator import Model +from orator.exceptions.orm import ModelNotFound + +import config +from models.folder import Folder +from models.address import Address +from models.message import Message + +def start_logger(conn): + conn.enable_query_log() + logger = logging.getLogger('orator.connection.queries') + logger.setLevel(logging.DEBUG) + formatter = logging.Formatter('%(query)s') + handler = logging.StreamHandler() + handler.setFormatter(formatter) + logger.addHandler(handler) + +def log_tick(char='.', flush=True): + print(char, end='') + if flush: + sys.stdout.flush() + +def log_line(string): + print() + print(string) + +def flush_database(conn): + conn.statement('TRUNCATE TABLE `addresses_messages`') + conn.statement('TRUNCATE TABLE `messages`') + conn.statement('TRUNCATE TABLE `addresses`') + conn.statement('TRUNCATE TABLE `folders`') + +def parse_date(date): + if date == None: + return None + else: + return mktime_tz(parsedate_tz(date)) + +def insert_addresses(mail): + tos = [('to', addr) for addr in mail.get_all('to', [])] + ccs = [('cc', addr) for addr in mail.get_all('cc', [])] + froms = [('from', addr) for addr in mail.get_all('from', [])] + for type_, addr in tos + ccs + froms: + _, addr = parseaddr(str(addr)) + + if addr == '': + continue + + addrs = Address.where('email', addr) + if addrs.count() == 0: + yield (type_, Address.create(email=addr, domain='')) + log_tick('a') + else: + yield (type_, addrs.first()) + +def main(): + pars = argparse.ArgumentParser('Import Maildir messages to a database.') + + pars.add_argument( + 'location', metavar='LOCATION', type=str, + help='Location of the Maildir') + pars.add_argument( + '-v', dest='verbose', action='store_true', + help='Verbose output') + pars.add_argument( + '-xf', '--exclude-folders', dest='exclude_folders', type=str, + help='Exclude folders matching a regular expression') + + args = pars.parse_args() + + if args.verbose: + start_logger(config.db) + + Model.set_connection_resolver(config.db) + + flush_database(config.db) + + md = Maildir(args.location) + for folder in md.list_folders(): + if 'exclude_folders' in args: + if re.match(args.exclude_folders, folder): + log_line('Skipping %s' % folder) + continue + + # Insert folder + Folder.create(name=folder) + log_line(folder) + + for key, mail in md.get_folder(folder).iteritems(): + # Insert message + msg = Message.create( + filename=key, + folder=folder, + subject=str(mail.get('Subject')).encode('utf8')[:1022], + date=parse_date(mail.get('Date')), + content_type=mail.get_content_type() + ) + + # Insert addresses + addrs = list(insert_addresses(mail)) + for type_, addr in addrs: + msg.addresses().save(addr, {'type': type_}) + + log_tick('.') + + print() + + +if __name__ == '__main__': + main() |