aboutsummaryrefslogtreecommitdiff
path: root/stats.py
blob: c77eb6b2ec62509186d4c32921cd482737840313 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
import argparse
from email.utils import mktime_tz, parsedate_tz, parseaddr
from mailbox import Maildir, MaildirMessage
import re
import sys

from orator import Model
from orator.exceptions.orm import ModelNotFound

import config
from models.folder import Folder
from models.address import Address
from models.message import Message

def start_logger(conn):
    conn.enable_query_log()
    logger = logging.getLogger('orator.connection.queries')
    logger.setLevel(logging.DEBUG)
    formatter = logging.Formatter('%(query)s')
    handler = logging.StreamHandler()
    handler.setFormatter(formatter)
    logger.addHandler(handler)

def log_tick(char='.', flush=True):
    print(char, end='')
    if flush:
        sys.stdout.flush()

def log_line(string):
    print()
    print(string)

def flush_database(conn):
    conn.statement('TRUNCATE TABLE `addresses_messages`')
    conn.statement('TRUNCATE TABLE `messages`')
    conn.statement('TRUNCATE TABLE `addresses`')
    conn.statement('TRUNCATE TABLE `folders`')

def parse_date(date):
    if date == None:
        return None
    else:
        return mktime_tz(parsedate_tz(date))

def insert_addresses(mail):
    tos = [('to', addr) for addr in mail.get_all('to', [])]
    ccs = [('cc', addr) for addr in mail.get_all('cc', [])]
    froms = [('from', addr) for addr in mail.get_all('from', [])]
    for type_, addr in tos + ccs + froms:
        _, addr = parseaddr(str(addr))
        
        if addr == '':
            continue

        addrs = Address.where('email', addr)
        if addrs.count() == 0:
            yield (type_, Address.create(email=addr, domain=''))
            log_tick('a')
        else:
            yield (type_, addrs.first())

def main():
    pars = argparse.ArgumentParser('Import Maildir messages to a database.')
    
    pars.add_argument(
        'location', metavar='LOCATION', type=str,
        help='Location of the Maildir')
    pars.add_argument(
        '-v', dest='verbose', action='store_true',
        help='Verbose output')
    pars.add_argument(
        '-xf', '--exclude-folders', dest='exclude_folders', type=str,
        help='Exclude folders matching a regular expression')

    args = pars.parse_args()

    if args.verbose:
        start_logger(config.db)

    Model.set_connection_resolver(config.db)

    flush_database(config.db)

    md = Maildir(args.location)
    for folder in md.list_folders():
        if 'exclude_folders' in args:
            if re.match(args.exclude_folders, folder):
                log_line('Skipping %s' % folder)
                continue

        # Insert folder
        Folder.create(name=folder)
        log_line(folder)

        for key, mail in md.get_folder(folder).iteritems():
            # Insert message
            msg = Message.create(
                filename=key,
                folder=folder,
                subject=str(mail.get('Subject')).encode('utf8')[:1022],
                date=parse_date(mail.get('Date')),
                content_type=mail.get_content_type()
            )

            # Insert addresses
            addrs = list(insert_addresses(mail))
            for type_, addr in addrs:
                msg.addresses().save(addr, {'type': type_})

            log_tick('.')

    print()


if __name__ == '__main__':
    main()