aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCamil Staps2015-10-04 14:31:38 +0200
committerCamil Staps2015-10-04 14:54:10 +0200
commit5ea8db32ef9d5f788b854e82248cf62316ed0120 (patch)
tree7f9e9992ec40f1017019cac02cf67c6ee27fc0c0
parentScript to read Bible Hub database; Readme (diff)
Orator, Click
-rw-r--r--README.md31
-rwxr-xr-xcli.py114
-rw-r--r--pyble.py88
-rw-r--r--read-biblehub-db.py89
4 files changed, 212 insertions, 110 deletions
diff --git a/README.md b/README.md
index 7038cb3..ac2295f 100644
--- a/README.md
+++ b/README.md
@@ -5,28 +5,21 @@ This is a Python toolbox to analyse the Bible.
Copyright © 2015 Camil Staps. See license details below.
-## Overview
+## Setting up
-The Bible is stored in an SQLite database with the structure:
+Install the necessary packages:
-Table | Column | Data type | Specials
--------------|---------------|-----------|---------------------------
-book | name | TEXT | PK
-reference | book | TEXT | PK, FK book.name
-reference | chapter | INTEGER | PK
-reference | verse | INTEGER | PK
-translation | name | TEXT | PK
-text | translation | TEXT | PK, FK translation.name
-text | book | TEXT | PK, FK reference.book
-text | chapter | INTEGER | PK, FK reference.chapter
-text | verse | INTEGER | PK, FK reference.verse
-text | text | TEXT |
+ # pip3 install orator click
-<sup>(PK: primary key; FK: foreign key)</sup>
+First we need to setup an SQLite database:
-## Setting up
+ $ ./cli.py init pyble.db
+
+You're going to need one or several translations. One possibility is to [get them from Bible Hub][biblehubdb] (take the "Text file (zipped, 5.5MB)"). Download and unzip the zipped text file, then run:
-You're going to need one or several translations. One possibility is to [get them from Bible Hub][biblehubdb]. Download and unzip the zipped text file, then run `read-biblehub-db.py`.
+ $ unzip bibles.zip
+ $ ./cli.py load --type=biblehub bibles.txt pyble.db
+ $ rm bibles.zip bibles.txt
## To do
@@ -34,10 +27,6 @@ Add a table for cross references, and a script to read in the database from [Ope
Make a web frontend for anyone to search, add cross references, and vote for cross references.
-## Troubleshooting
-
-In case anything goes wrong, first check you're using **Python 3**.
-
## License
This project is licensed under the GPLv2 license.
diff --git a/cli.py b/cli.py
new file mode 100755
index 0000000..bcb4c94
--- /dev/null
+++ b/cli.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+import click
+
+from orator import DatabaseManager, Model
+import orator
+
+from pyble import *
+
+import re
+import time
+
+def setup_database(filename):
+ """Setup a connection with an SQLite database in filename"""
+ config = {
+ 'default': 'sqlite',
+ 'sqlite': {
+ 'driver': 'sqlite',
+ 'database': filename,
+ }
+ }
+ db = DatabaseManager(config)
+ Model.set_connection_resolver(db)
+ return db
+
+@click.group()
+def cli():
+ """Wrapper for CLI commands"""
+ pass
+
+@cli.command()
+@click.argument('filename', type=click.Path())
+def init(filename):
+ """Initialise an SQLite database in filename, and create tables"""
+ click.echo('Initialising pyble database...')
+ db = setup_database(filename)
+ setup_tables(db)
+ click.echo('Done.')
+
+def load_biblehub_line(columns, line):
+ """Split a line from a Bible Hub dump into a usable dictionary
+
+ Arguments:
+ columns -- the columns from the first line of the dump
+ line -- the line to read
+ """
+ pieces = line.split('\t')
+ ref = pieces[0]
+ refm = re.search('([\w ]+?)\s*(\d+):(\d+)', ref)
+ try:
+ parsed = { 'book' : refm.group(1),
+ 'chapter' : int(refm.group(2)),
+ 'verse' : int(refm.group(3)),
+ 'translations' : {} }
+ for i, col in enumerate(columns[1:]):
+ parsed['translations'][col] = pieces[i + 1].strip()
+ return parsed
+ except AttributeError:
+ print('Something wrong with your regex.\n', str(pieces))
+
+def load_biblehub(db, f):
+ """Load a Bible Hub dump into the database"""
+ content = f.read().splitlines()
+ columns = content[0]
+ columns, verses = columns.split('\t'), content[1:]
+
+ for translation in columns[1:]:
+ Translation.first_or_create(name=translation)
+
+ db.begin_transaction()
+ conn = db.connection().get_connection()
+
+ cur_book = None
+ for v in verses:
+ v = load_biblehub_line(columns, v)
+
+ if cur_book != v['book']:
+ if cur_book != None:
+ click.echo(str(round(time.time() - timer, 3)) + 's')
+ timer = time.time()
+
+ click.echo(v['book'] + '...', nl=False)
+ cur_book = v['book']
+
+ book = Book.first_or_create(canonical_name=cur_book)
+
+ # We don't use Orator here to make things ~4x faster
+ conn.execute('INSERT OR IGNORE INTO verses (book, chapter, nr) '
+ 'VALUES (?, ?, ?)', (v['book'], v['chapter'], v['verse']))
+
+ for translation, text in v['translations'].items():
+ conn.execute('INSERT OR IGNORE INTO translated_verses '
+ '(translation, book, chapter, nr, text) '
+ 'VALUES (?, ?, ?, ?, ?)',
+ (translation, cur_book, v['chapter'], v['verse'], text))
+
+ click.echo(str(round(time.time() - timer, 3)) + 's')
+ db.commit()
+
+@cli.command()
+@click.option('--type', required=True)
+@click.argument('filename', type=click.Path())
+@click.argument('dbname', type=click.Path())
+def load(type, filename, dbname):
+ """Load external data into the database"""
+ db = setup_database(dbname)
+ if type == 'biblehub':
+ with open(filename) as f:
+ load_biblehub(db, f)
+ else:
+ click.secho('I don\'t know a type ' + type, fg='red')
+
+if __name__ == '__main__':
+ cli()
+
diff --git a/pyble.py b/pyble.py
new file mode 100644
index 0000000..b399a29
--- /dev/null
+++ b/pyble.py
@@ -0,0 +1,88 @@
+from orator import DatabaseManager, Model, Schema
+
+class Part(Model):
+ __timestamps__ = False
+ __fillable__ = ['name']
+
+class Book(Model):
+ __timestamps__ = False
+ __fillable__ = ['canonical_name']
+
+class Verse(Model):
+ __timestamps__ = False
+ __fillable__ = ['book', 'chapter', 'nr']
+
+class Translation(Model):
+ __timestamps__ = False
+ __fillable__ = ['name']
+
+class TranslatedVerse(Model):
+ __timestamps__ = False
+ __table__ = 'translated_verses'
+ __fillable__ = ['translation', 'book', 'chapter', 'nr', 'text']
+
+class Passage(Model):
+ __timestamps__ = False
+ __fillable__ = ['fst', 'snd']
+
+class CrossReference(Model):
+ __timestamps__ = False
+ __fillable__ = ['passage_id1', 'passage_id2', 'relevance']
+
+def setup_tables(db):
+ schema = Schema(db)
+
+ with schema.create('parts') as table:
+ table.string('name')
+ table.primary('name')
+
+ with schema.create('books') as table:
+ table.string('canonical_name').primary()
+
+ with schema.create('verses') as table:
+ table.string('book')
+ table.small_integer('chapter')
+ table.small_integer('nr')
+ table.foreign('book').references('canonical_name').on('books')
+ table.primary(['book', 'chapter', 'nr'])
+
+ with schema.create('translations') as table:
+ table.string('name').primary()
+
+ with schema.create('passages') as table:
+ table.increments('id')
+ table.string('fst_book')
+ table.small_integer('fst_chapter')
+ table.small_integer('fst_nr')
+ table.string('snd_book')
+ table.small_integer('snd_chapter')
+ table.small_integer('snd_nr')
+ table.foreign(['fst_book', 'fst_chapter', 'fst_nr'])\
+ .references(['book', 'chapter', 'nr']).on('verses')\
+ .on_delete('restrict').on_update('cascade')
+ table.foreign(['snd_book', 'snd_chapter', 'snd_nr'])\
+ .references(['book', 'chapter', 'nr']).on('verses')\
+ .on_delete('restrict').on_update('cascade')
+ table.unique(['fst_book', 'fst_chapter', 'fst_nr',
+ 'snd_book', 'snd_chapter', 'snd_nr'])
+
+ with schema.create('crossreferences') as table:
+ table.integer('passage_id1')
+ table.integer('passage_id2')
+ table.small_integer('relevance').default(0)
+ table.foreign('passage_id1').references('id').on('passages')
+ table.foreign('passage_id2').references('id').on('passages')
+ table.primary(['passage_id1', 'passage_id2'])
+
+ with schema.create('translated_verses') as table:
+ table.string('translation')
+ table.string('book')
+ table.small_integer('chapter')
+ table.small_integer('nr')
+ table.string('text')
+ table.foreign('translation').references('name').on('translations')
+ table.foreign(['book', 'chapter', 'nr'])\
+ .references(['book', 'chapter', 'nr']).on('verses')\
+ .on_delete('restrict').on_update('cascade')
+ table.primary(['translation', 'book', 'chapter', 'nr'])
+
diff --git a/read-biblehub-db.py b/read-biblehub-db.py
deleted file mode 100644
index 451852d..0000000
--- a/read-biblehub-db.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import re
-import sqlite3
-
-filename = 'bibles.txt'
-dbname = 'bibles.db'
-
-def connect():
- return sqlite3.connect(dbname)
-
-def create_tables(conn):
- conn.execute('CREATE TABLE IF NOT EXISTS book '
- '(name TEXT PRIMARY KEY)')
- conn.execute('CREATE TABLE IF NOT EXISTS reference '
- '(book TEXT REFERENCES book (name) '
- 'ON DELETE RESTRICT ON UPDATE CASCADE, '
- 'chapter INTEGER, '
- 'verse INTEGER, '
- 'PRIMARY KEY (book, chapter, verse))')
- conn.execute('CREATE TABLE IF NOT EXISTS translation '
- '(name TEXT PRIMARY KEY)')
- conn.execute('CREATE TABLE IF NOT EXISTS text '
- '(translation TEXT REFERENCES translation (name) '
- 'ON DELETE RESTRICT ON UPDATE CASCADE, '
- 'book TEXT, '
- 'chapter INTEGER, '
- 'verse INTEGER, '
- 'text TEXT, '
- 'PRIMARY KEY (translation, book, chapter, verse), '
- 'FOREIGN KEY (book, chapter, verse) '
- 'REFERENCES reference (book, chapter, verse) '
- 'ON DELETE RESTRICT ON UPDATE RESTRICT)')
- conn.commit()
-
-def read_file():
- content = []
- with open(filename) as f:
- content = f.read().splitlines()
- columns = content[0]
- return (columns.split('\t'), content[1:])
-
-def read_verse(columns, line):
- pieces = line.split('\t')
- ref = pieces[0]
- refm = re.search('([\w ]+?)\s*(\d+):(\d+)', ref)
- try:
- parsed = { 'book' : refm.group(1),
- 'chapter' : int(refm.group(2)),
- 'verse' : int(refm.group(3)),
- 'translations' : {} }
- for i, col in enumerate(columns[1:]):
- parsed['translations'][col] = pieces[i + 1].strip()
- return parsed
- except AttributeError:
- print('Something wrong with your regex.\n', str(pieces))
-
-def main():
- conn = connect()
- create_tables(conn)
-
- columns, verses = read_file()
-
- for translation in columns[1:]:
- conn.execute('INSERT OR IGNORE INTO translation (name) VALUES (?)',
- (translation,))
-
- cur_book, cur_chapter = '', 0
- for v in verses:
- v = read_verse(columns, v)
-
- if cur_book != v['book'] or cur_chapter != v['chapter']:
- print(v['book'], v['chapter'])
- cur_book, cur_chapter = v['book'], v['chapter']
-
- conn.execute('INSERT OR IGNORE INTO book (name) VALUES (?)',
- (v['book'],))
- conn.execute('INSERT OR IGNORE INTO reference (book, chapter, verse) '
- 'VALUES (?, ?, ?)', (v['book'], v['chapter'], v['verse']))
-
- for translation, text in v['translations'].items():
- conn.execute('INSERT OR IGNORE INTO text '
- '(translation, book, chapter, verse, text) '
- 'VALUES (?, ?, ?, ?, ?)',
- (translation, v['book'], v['chapter'], v['verse'], text))
-
- conn.commit()
- conn.close()
-
-if __name__ == '__main__':
- main()