aboutsummaryrefslogtreecommitdiff
path: root/cli.py
blob: 921ecf577a7422d69ec864241c4cf5acd81dcee7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python3
import click

from orator import DatabaseManager, Model
import orator

from pyble import *

import re
import time

def setup_database(filename):
    """Setup a connection with an SQLite database in filename"""
    config = {
            'default': 'sqlite',
            'sqlite': {
                'driver': 'sqlite',
                'database': filename,
            }
        }
    db = DatabaseManager(config)
    Model.set_connection_resolver(db)
    return db

@click.group()
def cli():
    """Wrapper for CLI commands"""
    pass

@cli.command()
@click.argument('filename', type=click.Path())
def init(filename):
    """Initialise an SQLite database in filename, and create tables"""
    click.echo('Initialising pyble database...')
    db = setup_database(filename)
    setup_tables(db)
    click.echo('Inserting common book names...')
    setup_fill_alternative_book_names(db)
    click.echo('Done.')

def load_biblehub_line(columns, line):
    """Split a line from a Bible Hub dump into a usable dictionary
    
    Arguments:
    columns -- the columns from the first line of the dump
    line -- the line to read
    """
    pieces = line.split('\t')
    ref = pieces[0]
    refm = re.search('([\w ]+?)\s*(\d+):(\d+)', ref)
    try:
        parsed = { 'book' : refm.group(1),
                'chapter' : int(refm.group(2)),
                'verse' : int(refm.group(3)),
                'translations' : {} }
        for i, col in enumerate(columns[1:]):
            parsed['translations'][col] = pieces[i + 1].strip()
        return parsed
    except AttributeError:
        print('Something wrong with your regex.\n', str(pieces))

def load_biblehub(db, f):
    """Load a Bible Hub dump into the database"""
    content = f.read().splitlines()
    columns = content[0]
    columns, verses = columns.split('\t'), content[1:]

    for translation in columns[1:]:
        Translation.first_or_create(name=translation)

    db.begin_transaction()
    conn = db.connection().get_connection()

    cur_book = None
    for v in verses:
        v = load_biblehub_line(columns, v)

        if cur_book != v['book']:
            if cur_book != None:
                click.echo(str(round(time.time() - timer, 3)) + 's')
            timer = time.time()
            
            click.echo(v['book'] + '...', nl=False)
            cur_book = v['book']

            book = Book.first_or_create(canonical_name=cur_book)

        # We don't use Orator here to make things ~4x faster
        conn.execute('INSERT OR IGNORE INTO verses (book, chapter, nr) '
                'VALUES (?, ?, ?)', (v['book'], v['chapter'], v['verse']))
        
        for translation, text in v['translations'].items():
            text = text.strip()
            if text != '':
                conn.execute('INSERT OR IGNORE INTO translated_verses '
                        '(translation, book, chapter, nr, text) '
                        'VALUES (?, ?, ?, ?, ?)',
                        (translation, cur_book, v['chapter'], v['verse'], text))

    click.echo(str(round(time.time() - timer, 3)) + 's')
    db.commit()

@cli.command()
@click.option('--type', required=True)
@click.argument('filename', type=click.Path())
@click.argument('dbname', type=click.Path())
def load(type, filename, dbname):
    """Load external data into the database"""
    db = setup_database(dbname)
    if type == 'biblehub':
        with open(filename) as f:
            load_biblehub(db, f)
    else:
        click.secho('I don\'t know a type ' + type, fg='red')

if __name__ == '__main__':
    cli()