diff options
-rw-r--r-- | README.md | 31 | ||||
-rwxr-xr-x | cli.py | 114 | ||||
-rw-r--r-- | pyble.py | 88 | ||||
-rw-r--r-- | read-biblehub-db.py | 89 |
4 files changed, 212 insertions, 110 deletions
@@ -5,28 +5,21 @@ This is a Python toolbox to analyse the Bible. Copyright © 2015 Camil Staps. See license details below. -## Overview +## Setting up -The Bible is stored in an SQLite database with the structure: +Install the necessary packages: -Table | Column | Data type | Specials --------------|---------------|-----------|--------------------------- -book | name | TEXT | PK -reference | book | TEXT | PK, FK book.name -reference | chapter | INTEGER | PK -reference | verse | INTEGER | PK -translation | name | TEXT | PK -text | translation | TEXT | PK, FK translation.name -text | book | TEXT | PK, FK reference.book -text | chapter | INTEGER | PK, FK reference.chapter -text | verse | INTEGER | PK, FK reference.verse -text | text | TEXT | + # pip3 install orator click -<sup>(PK: primary key; FK: foreign key)</sup> +First we need to setup an SQLite database: -## Setting up + $ ./cli.py init pyble.db + +You're going to need one or several translations. One possibility is to [get them from Bible Hub][biblehubdb] (take the "Text file (zipped, 5.5MB)"). Download and unzip the zipped text file, then run: -You're going to need one or several translations. One possibility is to [get them from Bible Hub][biblehubdb]. Download and unzip the zipped text file, then run `read-biblehub-db.py`. + $ unzip bibles.zip + $ ./cli.py load --type=biblehub bibles.txt pyble.db + $ rm bibles.zip bibles.txt ## To do @@ -34,10 +27,6 @@ Add a table for cross references, and a script to read in the database from [Ope Make a web frontend for anyone to search, add cross references, and vote for cross references. -## Troubleshooting - -In case anything goes wrong, first check you're using **Python 3**. - ## License This project is licensed under the GPLv2 license. @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +import click + +from orator import DatabaseManager, Model +import orator + +from pyble import * + +import re +import time + +def setup_database(filename): + """Setup a connection with an SQLite database in filename""" + config = { + 'default': 'sqlite', + 'sqlite': { + 'driver': 'sqlite', + 'database': filename, + } + } + db = DatabaseManager(config) + Model.set_connection_resolver(db) + return db + +@click.group() +def cli(): + """Wrapper for CLI commands""" + pass + +@cli.command() +@click.argument('filename', type=click.Path()) +def init(filename): + """Initialise an SQLite database in filename, and create tables""" + click.echo('Initialising pyble database...') + db = setup_database(filename) + setup_tables(db) + click.echo('Done.') + +def load_biblehub_line(columns, line): + """Split a line from a Bible Hub dump into a usable dictionary + + Arguments: + columns -- the columns from the first line of the dump + line -- the line to read + """ + pieces = line.split('\t') + ref = pieces[0] + refm = re.search('([\w ]+?)\s*(\d+):(\d+)', ref) + try: + parsed = { 'book' : refm.group(1), + 'chapter' : int(refm.group(2)), + 'verse' : int(refm.group(3)), + 'translations' : {} } + for i, col in enumerate(columns[1:]): + parsed['translations'][col] = pieces[i + 1].strip() + return parsed + except AttributeError: + print('Something wrong with your regex.\n', str(pieces)) + +def load_biblehub(db, f): + """Load a Bible Hub dump into the database""" + content = f.read().splitlines() + columns = content[0] + columns, verses = columns.split('\t'), content[1:] + + for translation in columns[1:]: + Translation.first_or_create(name=translation) + + db.begin_transaction() + conn = db.connection().get_connection() + + cur_book = None + for v in verses: + v = load_biblehub_line(columns, v) + + if cur_book != v['book']: + if cur_book != None: + click.echo(str(round(time.time() - timer, 3)) + 's') + timer = time.time() + + click.echo(v['book'] + '...', nl=False) + cur_book = v['book'] + + book = Book.first_or_create(canonical_name=cur_book) + + # We don't use Orator here to make things ~4x faster + conn.execute('INSERT OR IGNORE INTO verses (book, chapter, nr) ' + 'VALUES (?, ?, ?)', (v['book'], v['chapter'], v['verse'])) + + for translation, text in v['translations'].items(): + conn.execute('INSERT OR IGNORE INTO translated_verses ' + '(translation, book, chapter, nr, text) ' + 'VALUES (?, ?, ?, ?, ?)', + (translation, cur_book, v['chapter'], v['verse'], text)) + + click.echo(str(round(time.time() - timer, 3)) + 's') + db.commit() + +@cli.command() +@click.option('--type', required=True) +@click.argument('filename', type=click.Path()) +@click.argument('dbname', type=click.Path()) +def load(type, filename, dbname): + """Load external data into the database""" + db = setup_database(dbname) + if type == 'biblehub': + with open(filename) as f: + load_biblehub(db, f) + else: + click.secho('I don\'t know a type ' + type, fg='red') + +if __name__ == '__main__': + cli() + diff --git a/pyble.py b/pyble.py new file mode 100644 index 0000000..b399a29 --- /dev/null +++ b/pyble.py @@ -0,0 +1,88 @@ +from orator import DatabaseManager, Model, Schema + +class Part(Model): + __timestamps__ = False + __fillable__ = ['name'] + +class Book(Model): + __timestamps__ = False + __fillable__ = ['canonical_name'] + +class Verse(Model): + __timestamps__ = False + __fillable__ = ['book', 'chapter', 'nr'] + +class Translation(Model): + __timestamps__ = False + __fillable__ = ['name'] + +class TranslatedVerse(Model): + __timestamps__ = False + __table__ = 'translated_verses' + __fillable__ = ['translation', 'book', 'chapter', 'nr', 'text'] + +class Passage(Model): + __timestamps__ = False + __fillable__ = ['fst', 'snd'] + +class CrossReference(Model): + __timestamps__ = False + __fillable__ = ['passage_id1', 'passage_id2', 'relevance'] + +def setup_tables(db): + schema = Schema(db) + + with schema.create('parts') as table: + table.string('name') + table.primary('name') + + with schema.create('books') as table: + table.string('canonical_name').primary() + + with schema.create('verses') as table: + table.string('book') + table.small_integer('chapter') + table.small_integer('nr') + table.foreign('book').references('canonical_name').on('books') + table.primary(['book', 'chapter', 'nr']) + + with schema.create('translations') as table: + table.string('name').primary() + + with schema.create('passages') as table: + table.increments('id') + table.string('fst_book') + table.small_integer('fst_chapter') + table.small_integer('fst_nr') + table.string('snd_book') + table.small_integer('snd_chapter') + table.small_integer('snd_nr') + table.foreign(['fst_book', 'fst_chapter', 'fst_nr'])\ + .references(['book', 'chapter', 'nr']).on('verses')\ + .on_delete('restrict').on_update('cascade') + table.foreign(['snd_book', 'snd_chapter', 'snd_nr'])\ + .references(['book', 'chapter', 'nr']).on('verses')\ + .on_delete('restrict').on_update('cascade') + table.unique(['fst_book', 'fst_chapter', 'fst_nr', + 'snd_book', 'snd_chapter', 'snd_nr']) + + with schema.create('crossreferences') as table: + table.integer('passage_id1') + table.integer('passage_id2') + table.small_integer('relevance').default(0) + table.foreign('passage_id1').references('id').on('passages') + table.foreign('passage_id2').references('id').on('passages') + table.primary(['passage_id1', 'passage_id2']) + + with schema.create('translated_verses') as table: + table.string('translation') + table.string('book') + table.small_integer('chapter') + table.small_integer('nr') + table.string('text') + table.foreign('translation').references('name').on('translations') + table.foreign(['book', 'chapter', 'nr'])\ + .references(['book', 'chapter', 'nr']).on('verses')\ + .on_delete('restrict').on_update('cascade') + table.primary(['translation', 'book', 'chapter', 'nr']) + diff --git a/read-biblehub-db.py b/read-biblehub-db.py deleted file mode 100644 index 451852d..0000000 --- a/read-biblehub-db.py +++ /dev/null @@ -1,89 +0,0 @@ -import re -import sqlite3 - -filename = 'bibles.txt' -dbname = 'bibles.db' - -def connect(): - return sqlite3.connect(dbname) - -def create_tables(conn): - conn.execute('CREATE TABLE IF NOT EXISTS book ' - '(name TEXT PRIMARY KEY)') - conn.execute('CREATE TABLE IF NOT EXISTS reference ' - '(book TEXT REFERENCES book (name) ' - 'ON DELETE RESTRICT ON UPDATE CASCADE, ' - 'chapter INTEGER, ' - 'verse INTEGER, ' - 'PRIMARY KEY (book, chapter, verse))') - conn.execute('CREATE TABLE IF NOT EXISTS translation ' - '(name TEXT PRIMARY KEY)') - conn.execute('CREATE TABLE IF NOT EXISTS text ' - '(translation TEXT REFERENCES translation (name) ' - 'ON DELETE RESTRICT ON UPDATE CASCADE, ' - 'book TEXT, ' - 'chapter INTEGER, ' - 'verse INTEGER, ' - 'text TEXT, ' - 'PRIMARY KEY (translation, book, chapter, verse), ' - 'FOREIGN KEY (book, chapter, verse) ' - 'REFERENCES reference (book, chapter, verse) ' - 'ON DELETE RESTRICT ON UPDATE RESTRICT)') - conn.commit() - -def read_file(): - content = [] - with open(filename) as f: - content = f.read().splitlines() - columns = content[0] - return (columns.split('\t'), content[1:]) - -def read_verse(columns, line): - pieces = line.split('\t') - ref = pieces[0] - refm = re.search('([\w ]+?)\s*(\d+):(\d+)', ref) - try: - parsed = { 'book' : refm.group(1), - 'chapter' : int(refm.group(2)), - 'verse' : int(refm.group(3)), - 'translations' : {} } - for i, col in enumerate(columns[1:]): - parsed['translations'][col] = pieces[i + 1].strip() - return parsed - except AttributeError: - print('Something wrong with your regex.\n', str(pieces)) - -def main(): - conn = connect() - create_tables(conn) - - columns, verses = read_file() - - for translation in columns[1:]: - conn.execute('INSERT OR IGNORE INTO translation (name) VALUES (?)', - (translation,)) - - cur_book, cur_chapter = '', 0 - for v in verses: - v = read_verse(columns, v) - - if cur_book != v['book'] or cur_chapter != v['chapter']: - print(v['book'], v['chapter']) - cur_book, cur_chapter = v['book'], v['chapter'] - - conn.execute('INSERT OR IGNORE INTO book (name) VALUES (?)', - (v['book'],)) - conn.execute('INSERT OR IGNORE INTO reference (book, chapter, verse) ' - 'VALUES (?, ?, ?)', (v['book'], v['chapter'], v['verse'])) - - for translation, text in v['translations'].items(): - conn.execute('INSERT OR IGNORE INTO text ' - '(translation, book, chapter, verse, text) ' - 'VALUES (?, ?, ?, ?, ?)', - (translation, v['book'], v['chapter'], v['verse'], text)) - - conn.commit() - conn.close() - -if __name__ == '__main__': - main() |