diff options
Diffstat (limited to 'read-biblehub-db.py')
-rw-r--r-- | read-biblehub-db.py | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/read-biblehub-db.py b/read-biblehub-db.py new file mode 100644 index 0000000..451852d --- /dev/null +++ b/read-biblehub-db.py @@ -0,0 +1,89 @@ +import re +import sqlite3 + +filename = 'bibles.txt' +dbname = 'bibles.db' + +def connect(): + return sqlite3.connect(dbname) + +def create_tables(conn): + conn.execute('CREATE TABLE IF NOT EXISTS book ' + '(name TEXT PRIMARY KEY)') + conn.execute('CREATE TABLE IF NOT EXISTS reference ' + '(book TEXT REFERENCES book (name) ' + 'ON DELETE RESTRICT ON UPDATE CASCADE, ' + 'chapter INTEGER, ' + 'verse INTEGER, ' + 'PRIMARY KEY (book, chapter, verse))') + conn.execute('CREATE TABLE IF NOT EXISTS translation ' + '(name TEXT PRIMARY KEY)') + conn.execute('CREATE TABLE IF NOT EXISTS text ' + '(translation TEXT REFERENCES translation (name) ' + 'ON DELETE RESTRICT ON UPDATE CASCADE, ' + 'book TEXT, ' + 'chapter INTEGER, ' + 'verse INTEGER, ' + 'text TEXT, ' + 'PRIMARY KEY (translation, book, chapter, verse), ' + 'FOREIGN KEY (book, chapter, verse) ' + 'REFERENCES reference (book, chapter, verse) ' + 'ON DELETE RESTRICT ON UPDATE RESTRICT)') + conn.commit() + +def read_file(): + content = [] + with open(filename) as f: + content = f.read().splitlines() + columns = content[0] + return (columns.split('\t'), content[1:]) + +def read_verse(columns, line): + pieces = line.split('\t') + ref = pieces[0] + refm = re.search('([\w ]+?)\s*(\d+):(\d+)', ref) + try: + parsed = { 'book' : refm.group(1), + 'chapter' : int(refm.group(2)), + 'verse' : int(refm.group(3)), + 'translations' : {} } + for i, col in enumerate(columns[1:]): + parsed['translations'][col] = pieces[i + 1].strip() + return parsed + except AttributeError: + print('Something wrong with your regex.\n', str(pieces)) + +def main(): + conn = connect() + create_tables(conn) + + columns, verses = read_file() + + for translation in columns[1:]: + conn.execute('INSERT OR IGNORE INTO translation (name) VALUES (?)', + (translation,)) + + cur_book, cur_chapter = '', 0 + for v in verses: + v = read_verse(columns, v) + + if cur_book != v['book'] or cur_chapter != v['chapter']: + print(v['book'], v['chapter']) + cur_book, cur_chapter = v['book'], v['chapter'] + + conn.execute('INSERT OR IGNORE INTO book (name) VALUES (?)', + (v['book'],)) + conn.execute('INSERT OR IGNORE INTO reference (book, chapter, verse) ' + 'VALUES (?, ?, ?)', (v['book'], v['chapter'], v['verse'])) + + for translation, text in v['translations'].items(): + conn.execute('INSERT OR IGNORE INTO text ' + '(translation, book, chapter, verse, text) ' + 'VALUES (?, ?, ?, ?, ?)', + (translation, v['book'], v['chapter'], v['verse'], text)) + + conn.commit() + conn.close() + +if __name__ == '__main__': + main() |