aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCamil Staps2016-07-18 19:50:48 +0200
committerCamil Staps2016-07-18 19:55:25 +0200
commit4d919723513c0723847762890e9e6226e0cf32a2 (patch)
tree2ac1821b7316b1135e0cea08aaf78eac16a6bb6d
Initial commit
-rw-r--r--.gitignore93
-rw-r--r--README.md41
-rw-r--r--migrations/2016_07_18_144136_create_folders_table.py17
-rw-r--r--migrations/2016_07_18_144145_create_addresses_table.py19
-rw-r--r--migrations/2016_07_18_144155_create_messages_table.py22
-rw-r--r--migrations/2016_07_18_144435_create_addresses_messages_table.py21
-rw-r--r--migrations/__init__.py0
-rw-r--r--models/__init__.py0
-rw-r--r--models/address.py7
-rw-r--r--models/folder.py8
-rw-r--r--models/message.py20
-rwxr-xr-xstats.py117
12 files changed, 365 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6c0bd22
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,93 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# IPython Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+
+###############################################################################
+
+config.py
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..fc090de
--- /dev/null
+++ b/README.md
@@ -0,0 +1,41 @@
+# maildir-stats
+
+Maildir statistics scripts
+
+## Installation
+
+Create a `config.py` with a basic configuration as explained in the [Orator
+documentation](https://orator-orm.com/docs/0.8/basic_usage.html#configuration).
+
+ # pip3 install orator PyMySQL
+ $ orator migrate
+
+## Usage
+
+ $ ./stats.py /home/mail/my-maildir
+
+## Author, copyright, license
+
+```
+MIT License
+
+Copyright (c) 2016 Camil Staps
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+```
diff --git a/migrations/2016_07_18_144136_create_folders_table.py b/migrations/2016_07_18_144136_create_folders_table.py
new file mode 100644
index 0000000..8ac9bf7
--- /dev/null
+++ b/migrations/2016_07_18_144136_create_folders_table.py
@@ -0,0 +1,17 @@
+from orator.migrations import Migration
+
+
+class CreateFoldersTable(Migration):
+
+ def up(self):
+ """
+ Run the migrations.
+ """
+ with self.schema.create('folders') as table:
+ table.string('name').primary()
+
+ def down(self):
+ """
+ Revert the migrations.
+ """
+ self.schema.drop('folders')
diff --git a/migrations/2016_07_18_144145_create_addresses_table.py b/migrations/2016_07_18_144145_create_addresses_table.py
new file mode 100644
index 0000000..3b69a70
--- /dev/null
+++ b/migrations/2016_07_18_144145_create_addresses_table.py
@@ -0,0 +1,19 @@
+from orator.migrations import Migration
+
+
+class CreateAddressesTable(Migration):
+
+ def up(self):
+ """
+ Run the migrations.
+ """
+ with self.schema.create('addresses') as table:
+ table.increments('id')
+ table.string('email')
+ table.string('domain')
+
+ def down(self):
+ """
+ Revert the migrations.
+ """
+ self.schema.drop('addresses')
diff --git a/migrations/2016_07_18_144155_create_messages_table.py b/migrations/2016_07_18_144155_create_messages_table.py
new file mode 100644
index 0000000..369bbc6
--- /dev/null
+++ b/migrations/2016_07_18_144155_create_messages_table.py
@@ -0,0 +1,22 @@
+from orator.migrations import Migration
+
+
+class CreateMessagesTable(Migration):
+
+ def up(self):
+ """
+ Run the migrations.
+ """
+ with self.schema.create('messages') as table:
+ table.increments('id')
+ table.string('filename')
+ table.string('folder').references('name').on('folders')
+ table.string('subject', 1023).nullable()
+ table.datetime('date').nullable()
+ table.string('content_type')
+
+ def down(self):
+ """
+ Revert the migrations.
+ """
+ self.schema.drop('messages')
diff --git a/migrations/2016_07_18_144435_create_addresses_messages_table.py b/migrations/2016_07_18_144435_create_addresses_messages_table.py
new file mode 100644
index 0000000..e23c8da
--- /dev/null
+++ b/migrations/2016_07_18_144435_create_addresses_messages_table.py
@@ -0,0 +1,21 @@
+from orator.migrations import Migration
+
+
+class CreateAddressesMessagesTable(Migration):
+
+ def up(self):
+ """
+ Run the migrations.
+ """
+ with self.schema.create('addresses_messages') as table:
+ table.integer('message_id').unsigned().references('id').on('messages')
+ table.integer('address_id').unsigned().references('id').on('addresses')
+ table.enum('type', ['to', 'from', 'cc'])
+
+ table.primary(['message_id', 'address_id', 'type'])
+
+ def down(self):
+ """
+ Revert the migrations.
+ """
+ self.schema.drop('addresses_messages')
diff --git a/migrations/__init__.py b/migrations/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/migrations/__init__.py
diff --git a/models/__init__.py b/models/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/models/__init__.py
diff --git a/models/address.py b/models/address.py
new file mode 100644
index 0000000..3db15de
--- /dev/null
+++ b/models/address.py
@@ -0,0 +1,7 @@
+from orator import Model
+
+
+class Address(Model):
+
+ __timestamps__ = False
+ __fillable__ = ['email', 'domain']
diff --git a/models/folder.py b/models/folder.py
new file mode 100644
index 0000000..3bda925
--- /dev/null
+++ b/models/folder.py
@@ -0,0 +1,8 @@
+from orator import Model
+
+
+class Folder(Model):
+
+ __timestamps__ = False
+ __fillable__ = ['name']
+ __primary_key__ = 'name'
diff --git a/models/message.py b/models/message.py
new file mode 100644
index 0000000..9963b71
--- /dev/null
+++ b/models/message.py
@@ -0,0 +1,20 @@
+from orator import Model
+from orator.orm import has_one, belongs_to_many
+
+from .address import Address
+from .folder import Folder
+
+
+class Message(Model):
+
+ __timestamps__ = False
+ __fillable__ = ['filename', 'folder', 'subject', 'date', 'content_type']
+ __dates__ = ['date']
+
+ @has_one
+ def folder(self):
+ return Folder
+
+ @belongs_to_many
+ def addresses(self):
+ return Address
diff --git a/stats.py b/stats.py
new file mode 100755
index 0000000..c77eb6b
--- /dev/null
+++ b/stats.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+import argparse
+from email.utils import mktime_tz, parsedate_tz, parseaddr
+from mailbox import Maildir, MaildirMessage
+import re
+import sys
+
+from orator import Model
+from orator.exceptions.orm import ModelNotFound
+
+import config
+from models.folder import Folder
+from models.address import Address
+from models.message import Message
+
+def start_logger(conn):
+ conn.enable_query_log()
+ logger = logging.getLogger('orator.connection.queries')
+ logger.setLevel(logging.DEBUG)
+ formatter = logging.Formatter('%(query)s')
+ handler = logging.StreamHandler()
+ handler.setFormatter(formatter)
+ logger.addHandler(handler)
+
+def log_tick(char='.', flush=True):
+ print(char, end='')
+ if flush:
+ sys.stdout.flush()
+
+def log_line(string):
+ print()
+ print(string)
+
+def flush_database(conn):
+ conn.statement('TRUNCATE TABLE `addresses_messages`')
+ conn.statement('TRUNCATE TABLE `messages`')
+ conn.statement('TRUNCATE TABLE `addresses`')
+ conn.statement('TRUNCATE TABLE `folders`')
+
+def parse_date(date):
+ if date == None:
+ return None
+ else:
+ return mktime_tz(parsedate_tz(date))
+
+def insert_addresses(mail):
+ tos = [('to', addr) for addr in mail.get_all('to', [])]
+ ccs = [('cc', addr) for addr in mail.get_all('cc', [])]
+ froms = [('from', addr) for addr in mail.get_all('from', [])]
+ for type_, addr in tos + ccs + froms:
+ _, addr = parseaddr(str(addr))
+
+ if addr == '':
+ continue
+
+ addrs = Address.where('email', addr)
+ if addrs.count() == 0:
+ yield (type_, Address.create(email=addr, domain=''))
+ log_tick('a')
+ else:
+ yield (type_, addrs.first())
+
+def main():
+ pars = argparse.ArgumentParser('Import Maildir messages to a database.')
+
+ pars.add_argument(
+ 'location', metavar='LOCATION', type=str,
+ help='Location of the Maildir')
+ pars.add_argument(
+ '-v', dest='verbose', action='store_true',
+ help='Verbose output')
+ pars.add_argument(
+ '-xf', '--exclude-folders', dest='exclude_folders', type=str,
+ help='Exclude folders matching a regular expression')
+
+ args = pars.parse_args()
+
+ if args.verbose:
+ start_logger(config.db)
+
+ Model.set_connection_resolver(config.db)
+
+ flush_database(config.db)
+
+ md = Maildir(args.location)
+ for folder in md.list_folders():
+ if 'exclude_folders' in args:
+ if re.match(args.exclude_folders, folder):
+ log_line('Skipping %s' % folder)
+ continue
+
+ # Insert folder
+ Folder.create(name=folder)
+ log_line(folder)
+
+ for key, mail in md.get_folder(folder).iteritems():
+ # Insert message
+ msg = Message.create(
+ filename=key,
+ folder=folder,
+ subject=str(mail.get('Subject')).encode('utf8')[:1022],
+ date=parse_date(mail.get('Date')),
+ content_type=mail.get_content_type()
+ )
+
+ # Insert addresses
+ addrs = list(insert_addresses(mail))
+ for type_, addr in addrs:
+ msg.addresses().save(addr, {'type': type_})
+
+ log_tick('.')
+
+ print()
+
+
+if __name__ == '__main__':
+ main()