From 4d919723513c0723847762890e9e6226e0cf32a2 Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Mon, 18 Jul 2016 19:50:48 +0200 Subject: Initial commit --- .gitignore | 93 ++++++++++++++++ README.md | 41 ++++++++ .../2016_07_18_144136_create_folders_table.py | 17 +++ .../2016_07_18_144145_create_addresses_table.py | 19 ++++ .../2016_07_18_144155_create_messages_table.py | 22 ++++ ...07_18_144435_create_addresses_messages_table.py | 21 ++++ migrations/__init__.py | 0 models/__init__.py | 0 models/address.py | 7 ++ models/folder.py | 8 ++ models/message.py | 20 ++++ stats.py | 117 +++++++++++++++++++++ 12 files changed, 365 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 migrations/2016_07_18_144136_create_folders_table.py create mode 100644 migrations/2016_07_18_144145_create_addresses_table.py create mode 100644 migrations/2016_07_18_144155_create_messages_table.py create mode 100644 migrations/2016_07_18_144435_create_addresses_messages_table.py create mode 100644 migrations/__init__.py create mode 100644 models/__init__.py create mode 100644 models/address.py create mode 100644 models/folder.py create mode 100644 models/message.py create mode 100755 stats.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6c0bd22 --- /dev/null +++ b/.gitignore @@ -0,0 +1,93 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject + +############################################################################### + +config.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..fc090de --- /dev/null +++ b/README.md @@ -0,0 +1,41 @@ +# maildir-stats + +Maildir statistics scripts + +## Installation + +Create a `config.py` with a basic configuration as explained in the [Orator +documentation](https://orator-orm.com/docs/0.8/basic_usage.html#configuration). + + # pip3 install orator PyMySQL + $ orator migrate + +## Usage + + $ ./stats.py /home/mail/my-maildir + +## Author, copyright, license + +``` +MIT License + +Copyright (c) 2016 Camil Staps + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +``` diff --git a/migrations/2016_07_18_144136_create_folders_table.py b/migrations/2016_07_18_144136_create_folders_table.py new file mode 100644 index 0000000..8ac9bf7 --- /dev/null +++ b/migrations/2016_07_18_144136_create_folders_table.py @@ -0,0 +1,17 @@ +from orator.migrations import Migration + + +class CreateFoldersTable(Migration): + + def up(self): + """ + Run the migrations. + """ + with self.schema.create('folders') as table: + table.string('name').primary() + + def down(self): + """ + Revert the migrations. + """ + self.schema.drop('folders') diff --git a/migrations/2016_07_18_144145_create_addresses_table.py b/migrations/2016_07_18_144145_create_addresses_table.py new file mode 100644 index 0000000..3b69a70 --- /dev/null +++ b/migrations/2016_07_18_144145_create_addresses_table.py @@ -0,0 +1,19 @@ +from orator.migrations import Migration + + +class CreateAddressesTable(Migration): + + def up(self): + """ + Run the migrations. + """ + with self.schema.create('addresses') as table: + table.increments('id') + table.string('email') + table.string('domain') + + def down(self): + """ + Revert the migrations. + """ + self.schema.drop('addresses') diff --git a/migrations/2016_07_18_144155_create_messages_table.py b/migrations/2016_07_18_144155_create_messages_table.py new file mode 100644 index 0000000..369bbc6 --- /dev/null +++ b/migrations/2016_07_18_144155_create_messages_table.py @@ -0,0 +1,22 @@ +from orator.migrations import Migration + + +class CreateMessagesTable(Migration): + + def up(self): + """ + Run the migrations. + """ + with self.schema.create('messages') as table: + table.increments('id') + table.string('filename') + table.string('folder').references('name').on('folders') + table.string('subject', 1023).nullable() + table.datetime('date').nullable() + table.string('content_type') + + def down(self): + """ + Revert the migrations. + """ + self.schema.drop('messages') diff --git a/migrations/2016_07_18_144435_create_addresses_messages_table.py b/migrations/2016_07_18_144435_create_addresses_messages_table.py new file mode 100644 index 0000000..e23c8da --- /dev/null +++ b/migrations/2016_07_18_144435_create_addresses_messages_table.py @@ -0,0 +1,21 @@ +from orator.migrations import Migration + + +class CreateAddressesMessagesTable(Migration): + + def up(self): + """ + Run the migrations. + """ + with self.schema.create('addresses_messages') as table: + table.integer('message_id').unsigned().references('id').on('messages') + table.integer('address_id').unsigned().references('id').on('addresses') + table.enum('type', ['to', 'from', 'cc']) + + table.primary(['message_id', 'address_id', 'type']) + + def down(self): + """ + Revert the migrations. + """ + self.schema.drop('addresses_messages') diff --git a/migrations/__init__.py b/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/address.py b/models/address.py new file mode 100644 index 0000000..3db15de --- /dev/null +++ b/models/address.py @@ -0,0 +1,7 @@ +from orator import Model + + +class Address(Model): + + __timestamps__ = False + __fillable__ = ['email', 'domain'] diff --git a/models/folder.py b/models/folder.py new file mode 100644 index 0000000..3bda925 --- /dev/null +++ b/models/folder.py @@ -0,0 +1,8 @@ +from orator import Model + + +class Folder(Model): + + __timestamps__ = False + __fillable__ = ['name'] + __primary_key__ = 'name' diff --git a/models/message.py b/models/message.py new file mode 100644 index 0000000..9963b71 --- /dev/null +++ b/models/message.py @@ -0,0 +1,20 @@ +from orator import Model +from orator.orm import has_one, belongs_to_many + +from .address import Address +from .folder import Folder + + +class Message(Model): + + __timestamps__ = False + __fillable__ = ['filename', 'folder', 'subject', 'date', 'content_type'] + __dates__ = ['date'] + + @has_one + def folder(self): + return Folder + + @belongs_to_many + def addresses(self): + return Address diff --git a/stats.py b/stats.py new file mode 100755 index 0000000..c77eb6b --- /dev/null +++ b/stats.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +import argparse +from email.utils import mktime_tz, parsedate_tz, parseaddr +from mailbox import Maildir, MaildirMessage +import re +import sys + +from orator import Model +from orator.exceptions.orm import ModelNotFound + +import config +from models.folder import Folder +from models.address import Address +from models.message import Message + +def start_logger(conn): + conn.enable_query_log() + logger = logging.getLogger('orator.connection.queries') + logger.setLevel(logging.DEBUG) + formatter = logging.Formatter('%(query)s') + handler = logging.StreamHandler() + handler.setFormatter(formatter) + logger.addHandler(handler) + +def log_tick(char='.', flush=True): + print(char, end='') + if flush: + sys.stdout.flush() + +def log_line(string): + print() + print(string) + +def flush_database(conn): + conn.statement('TRUNCATE TABLE `addresses_messages`') + conn.statement('TRUNCATE TABLE `messages`') + conn.statement('TRUNCATE TABLE `addresses`') + conn.statement('TRUNCATE TABLE `folders`') + +def parse_date(date): + if date == None: + return None + else: + return mktime_tz(parsedate_tz(date)) + +def insert_addresses(mail): + tos = [('to', addr) for addr in mail.get_all('to', [])] + ccs = [('cc', addr) for addr in mail.get_all('cc', [])] + froms = [('from', addr) for addr in mail.get_all('from', [])] + for type_, addr in tos + ccs + froms: + _, addr = parseaddr(str(addr)) + + if addr == '': + continue + + addrs = Address.where('email', addr) + if addrs.count() == 0: + yield (type_, Address.create(email=addr, domain='')) + log_tick('a') + else: + yield (type_, addrs.first()) + +def main(): + pars = argparse.ArgumentParser('Import Maildir messages to a database.') + + pars.add_argument( + 'location', metavar='LOCATION', type=str, + help='Location of the Maildir') + pars.add_argument( + '-v', dest='verbose', action='store_true', + help='Verbose output') + pars.add_argument( + '-xf', '--exclude-folders', dest='exclude_folders', type=str, + help='Exclude folders matching a regular expression') + + args = pars.parse_args() + + if args.verbose: + start_logger(config.db) + + Model.set_connection_resolver(config.db) + + flush_database(config.db) + + md = Maildir(args.location) + for folder in md.list_folders(): + if 'exclude_folders' in args: + if re.match(args.exclude_folders, folder): + log_line('Skipping %s' % folder) + continue + + # Insert folder + Folder.create(name=folder) + log_line(folder) + + for key, mail in md.get_folder(folder).iteritems(): + # Insert message + msg = Message.create( + filename=key, + folder=folder, + subject=str(mail.get('Subject')).encode('utf8')[:1022], + date=parse_date(mail.get('Date')), + content_type=mail.get_content_type() + ) + + # Insert addresses + addrs = list(insert_addresses(mail)) + for type_, addr in addrs: + msg.addresses().save(addr, {'type': type_}) + + log_tick('.') + + print() + + +if __name__ == '__main__': + main() -- cgit v1.2.3