Orator, Click

author: Camil Staps 2015-10-04 14:31:38 +0200
committer: Camil Staps 2015-10-04 14:54:10 +0200
commit: 5ea8db32ef9d5f788b854e82248cf62316ed0120 (patch)
tree: 7f9e9992ec40f1017019cac02cf67c6ee27fc0c0
parent: Script to read Bible Hub database; Readme (diff)
4 files changed, 212 insertions, 110 deletions
diff --git a/README.md b/README.md
index 7038cb3..ac2295f 100644
--- a/README.md
+++ b/README.md
@@ -5,28 +5,21 @@ This is a Python toolbox to analyse the Bible.
 
 Copyright &copy; 2015 Camil Staps. See license details below.
 
-## Overview
+## Setting up
 
-The Bible is stored in an SQLite database with the structure:
+Install the necessary packages:
 
-Table        | Column        | Data type | Specials
--------------|---------------|-----------|---------------------------
-book         | name          | TEXT      | PK
-reference    | book          | TEXT      | PK, FK book.name
-reference    | chapter       | INTEGER   | PK
-reference    | verse         | INTEGER   | PK
-translation  | name          | TEXT      | PK
-text         | translation   | TEXT      | PK, FK translation.name
-text         | book          | TEXT      | PK, FK reference.book
-text         | chapter       | INTEGER   | PK, FK reference.chapter
-text         | verse         | INTEGER   | PK, FK reference.verse
-text         | text          | TEXT      |
+    # pip3 install orator click
 
-<sup>(PK: primary key; FK: foreign key)</sup>
+First we need to setup an SQLite database:
 
-## Setting up
+    $ ./cli.py init pyble.db
+
+You're going to need one or several translations. One possibility is to [get them from Bible Hub][biblehubdb] (take the "Text file (zipped, 5.5MB)"). Download and unzip the zipped text file, then run:
 
-You're going to need one or several translations. One possibility is to [get them from Bible Hub][biblehubdb]. Download and unzip the zipped text file, then run `read-biblehub-db.py`.
+    $ unzip bibles.zip
+    $ ./cli.py load --type=biblehub bibles.txt pyble.db
+    $ rm bibles.zip bibles.txt
 
 ## To do
 
@@ -34,10 +27,6 @@ Add a table for cross references, and a script to read in the database from [Ope
 
 Make a web frontend for anyone to search, add cross references, and vote for cross references.
 
-## Troubleshooting
-
-In case anything goes wrong, first check you're using **Python 3**.
-
 ## License
 
 This project is licensed under the GPLv2 license. 
diff --git a/cli.py b/cli.py
new file mode 100755
index 0000000..bcb4c94
--- /dev/null
+++ b/cli.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+import click
+
+from orator import DatabaseManager, Model
+import orator
+
+from pyble import *
+
+import re
+import time
+
+def setup_database(filename):
+    """Setup a connection with an SQLite database in filename"""
+    config = {
+            'default': 'sqlite',
+            'sqlite': {
+                'driver': 'sqlite',
+                'database': filename,
+            }
+        }
+    db = DatabaseManager(config)
+    Model.set_connection_resolver(db)
+    return db
+
+@click.group()
+def cli():
+    """Wrapper for CLI commands"""
+    pass
+
+@cli.command()
+@click.argument('filename', type=click.Path())
+def init(filename):
+    """Initialise an SQLite database in filename, and create tables"""
+    click.echo('Initialising pyble database...')
+    db = setup_database(filename)
+    setup_tables(db)
+    click.echo('Done.')
+
+def load_biblehub_line(columns, line):
+    """Split a line from a Bible Hub dump into a usable dictionary
+    
+    Arguments:
+    columns -- the columns from the first line of the dump
+    line -- the line to read
+    """
+    pieces = line.split('\t')
+    ref = pieces[0]
+    refm = re.search('([\w ]+?)\s*(\d+):(\d+)', ref)
+    try:
+        parsed = { 'book' : refm.group(1),
+                'chapter' : int(refm.group(2)),
+                'verse' : int(refm.group(3)),
+                'translations' : {} }
+        for i, col in enumerate(columns[1:]):
+            parsed['translations'][col] = pieces[i + 1].strip()
+        return parsed
+    except AttributeError:
+        print('Something wrong with your regex.\n', str(pieces))
+
+def load_biblehub(db, f):
+    """Load a Bible Hub dump into the database"""
+    content = f.read().splitlines()
+    columns = content[0]
+    columns, verses = columns.split('\t'), content[1:]
+
+    for translation in columns[1:]:
+        Translation.first_or_create(name=translation)
+
+    db.begin_transaction()
+    conn = db.connection().get_connection()
+
+    cur_book = None
+    for v in verses:
+        v = load_biblehub_line(columns, v)
+
+        if cur_book != v['book']:
+            if cur_book != None:
+                click.echo(str(round(time.time() - timer, 3)) + 's')
+            timer = time.time()
+            
+            click.echo(v['book'] + '...', nl=False)
+            cur_book = v['book']
+
+            book = Book.first_or_create(canonical_name=cur_book)
+
+        # We don't use Orator here to make things ~4x faster
+        conn.execute('INSERT OR IGNORE INTO verses (book, chapter, nr) '
+                'VALUES (?, ?, ?)', (v['book'], v['chapter'], v['verse']))
+        
+        for translation, text in v['translations'].items():
+            conn.execute('INSERT OR IGNORE INTO translated_verses '
+                    '(translation, book, chapter, nr, text) '
+                    'VALUES (?, ?, ?, ?, ?)',
+                    (translation, cur_book, v['chapter'], v['verse'], text))
+
+    click.echo(str(round(time.time() - timer, 3)) + 's')
+    db.commit()
+
+@cli.command()
+@click.option('--type', required=True)
+@click.argument('filename', type=click.Path())
+@click.argument('dbname', type=click.Path())
+def load(type, filename, dbname):
+    """Load external data into the database"""
+    db = setup_database(dbname)
+    if type == 'biblehub':
+        with open(filename) as f:
+            load_biblehub(db, f)
+    else:
+        click.secho('I don\'t know a type ' + type, fg='red')
+
+if __name__ == '__main__':
+    cli()
+
diff --git a/pyble.py b/pyble.py
new file mode 100644
index 0000000..b399a29
--- /dev/null
+++ b/pyble.py
@@ -0,0 +1,88 @@
+from orator import DatabaseManager, Model, Schema
+
+class Part(Model):
+    __timestamps__ = False
+    __fillable__ = ['name']
+
+class Book(Model):
+    __timestamps__ = False
+    __fillable__ = ['canonical_name']
+
+class Verse(Model):
+    __timestamps__ = False
+    __fillable__ = ['book', 'chapter', 'nr']
+
+class Translation(Model):
+    __timestamps__ = False
+    __fillable__ = ['name']
+
+class TranslatedVerse(Model):
+    __timestamps__ = False
+    __table__ = 'translated_verses'
+    __fillable__ = ['translation', 'book', 'chapter', 'nr', 'text']
+
+class Passage(Model):
+    __timestamps__ = False
+    __fillable__ = ['fst', 'snd']
+
+class CrossReference(Model):
+    __timestamps__ = False
+    __fillable__ = ['passage_id1', 'passage_id2', 'relevance']
+
+def setup_tables(db):
+    schema = Schema(db)
+
+    with schema.create('parts') as table:
+        table.string('name')
+        table.primary('name')
+
+    with schema.create('books') as table:
+        table.string('canonical_name').primary()
+
+    with schema.create('verses') as table:
+        table.string('book')
+        table.small_integer('chapter')
+        table.small_integer('nr')
+        table.foreign('book').references('canonical_name').on('books')
+        table.primary(['book', 'chapter', 'nr'])
+
+    with schema.create('translations') as table:
+        table.string('name').primary()
+    
+    with schema.create('passages') as table:
+        table.increments('id')
+        table.string('fst_book')
+        table.small_integer('fst_chapter')
+        table.small_integer('fst_nr')
+        table.string('snd_book')
+        table.small_integer('snd_chapter')
+        table.small_integer('snd_nr')
+        table.foreign(['fst_book', 'fst_chapter', 'fst_nr'])\
+                .references(['book', 'chapter', 'nr']).on('verses')\
+                .on_delete('restrict').on_update('cascade')
+        table.foreign(['snd_book', 'snd_chapter', 'snd_nr'])\
+                .references(['book', 'chapter', 'nr']).on('verses')\
+                .on_delete('restrict').on_update('cascade')
+        table.unique(['fst_book', 'fst_chapter', 'fst_nr',
+            'snd_book', 'snd_chapter', 'snd_nr'])
+
+    with schema.create('crossreferences') as table:
+        table.integer('passage_id1')
+        table.integer('passage_id2')
+        table.small_integer('relevance').default(0)
+        table.foreign('passage_id1').references('id').on('passages')
+        table.foreign('passage_id2').references('id').on('passages')
+        table.primary(['passage_id1', 'passage_id2'])
+
+    with schema.create('translated_verses') as table:
+        table.string('translation')
+        table.string('book')
+        table.small_integer('chapter')
+        table.small_integer('nr')
+        table.string('text')
+        table.foreign('translation').references('name').on('translations')
+        table.foreign(['book', 'chapter', 'nr'])\
+                .references(['book', 'chapter', 'nr']).on('verses')\
+                .on_delete('restrict').on_update('cascade')
+        table.primary(['translation', 'book', 'chapter', 'nr'])
+
diff --git a/read-biblehub-db.py b/read-biblehub-db.py
deleted file mode 100644
index 451852d..0000000
--- a/read-biblehub-db.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import re
-import sqlite3
-
-filename = 'bibles.txt'
-dbname = 'bibles.db'
-
-def connect():
-    return sqlite3.connect(dbname)
-
-def create_tables(conn):
-    conn.execute('CREATE TABLE IF NOT EXISTS book '
-                    '(name TEXT PRIMARY KEY)')
-    conn.execute('CREATE TABLE IF NOT EXISTS reference '
-                    '(book TEXT REFERENCES book (name) '
-                        'ON DELETE RESTRICT ON UPDATE CASCADE, '
-                    'chapter INTEGER, '
-                    'verse INTEGER, '
-                    'PRIMARY KEY (book, chapter, verse))')
-    conn.execute('CREATE TABLE IF NOT EXISTS translation '
-                    '(name TEXT PRIMARY KEY)')
-    conn.execute('CREATE TABLE IF NOT EXISTS text '
-                    '(translation TEXT REFERENCES translation (name) '
-                        'ON DELETE RESTRICT ON UPDATE CASCADE, '
-                    'book TEXT, '
-                    'chapter INTEGER, '
-                    'verse INTEGER, '
-                    'text TEXT, '
-                    'PRIMARY KEY (translation, book, chapter, verse), '
-                    'FOREIGN KEY (book, chapter, verse) '
-                        'REFERENCES reference (book, chapter, verse) '
-                        'ON DELETE RESTRICT ON UPDATE RESTRICT)')
-    conn.commit()
-
-def read_file():
-    content = []
-    with open(filename) as f:
-        content = f.read().splitlines()
-    columns = content[0]
-    return (columns.split('\t'), content[1:])
-
-def read_verse(columns, line):
-    pieces = line.split('\t')
-    ref = pieces[0]
-    refm = re.search('([\w ]+?)\s*(\d+):(\d+)', ref)
-    try:
-        parsed = { 'book' : refm.group(1),
-                'chapter' : int(refm.group(2)),
-                'verse' : int(refm.group(3)),
-                'translations' : {} }
-        for i, col in enumerate(columns[1:]):
-            parsed['translations'][col] = pieces[i + 1].strip()
-        return parsed
-    except AttributeError:
-        print('Something wrong with your regex.\n', str(pieces))
-
-def main():
-    conn = connect()
-    create_tables(conn)
-
-    columns, verses = read_file()
-
-    for translation in columns[1:]:
-        conn.execute('INSERT OR IGNORE INTO translation (name) VALUES (?)', 
-                (translation,))
-
-    cur_book, cur_chapter = '', 0
-    for v in verses:
-        v = read_verse(columns, v)
-                
-        if cur_book != v['book'] or cur_chapter != v['chapter']:
-            print(v['book'], v['chapter'])
-            cur_book, cur_chapter = v['book'], v['chapter']
-        
-        conn.execute('INSERT OR IGNORE INTO book (name) VALUES (?)', 
-                (v['book'],))
-        conn.execute('INSERT OR IGNORE INTO reference (book, chapter, verse) '
-                    'VALUES (?, ?, ?)', (v['book'], v['chapter'], v['verse']))
-
-        for translation, text in v['translations'].items():
-            conn.execute('INSERT OR IGNORE INTO text '
-                    '(translation, book, chapter, verse, text) '
-                    'VALUES (?, ?, ?, ?, ?)',
-                    (translation, v['book'], v['chapter'], v['verse'], text))
-    
-    conn.commit()
-    conn.close()
-
-if __name__ == '__main__':
-    main()
author	Camil Staps	2015-10-04 14:31:38 +0200
committer	Camil Staps	2015-10-04 14:54:10 +0200
commit	5ea8db32ef9d5f788b854e82248cf62316ed0120 (patch)
tree	7f9e9992ec40f1017019cac02cf67c6ee27fc0c0
parent	Script to read Bible Hub database; Readme (diff)