import re import sqlite3 filename = 'bibles.txt' dbname = 'bibles.db' def connect(): return sqlite3.connect(dbname) def create_tables(conn): conn.execute('CREATE TABLE IF NOT EXISTS book ' '(name TEXT PRIMARY KEY)') conn.execute('CREATE TABLE IF NOT EXISTS reference ' '(book TEXT REFERENCES book (name) ' 'ON DELETE RESTRICT ON UPDATE CASCADE, ' 'chapter INTEGER, ' 'verse INTEGER, ' 'PRIMARY KEY (book, chapter, verse))') conn.execute('CREATE TABLE IF NOT EXISTS translation ' '(name TEXT PRIMARY KEY)') conn.execute('CREATE TABLE IF NOT EXISTS text ' '(translation TEXT REFERENCES translation (name) ' 'ON DELETE RESTRICT ON UPDATE CASCADE, ' 'book TEXT, ' 'chapter INTEGER, ' 'verse INTEGER, ' 'text TEXT, ' 'PRIMARY KEY (translation, book, chapter, verse), ' 'FOREIGN KEY (book, chapter, verse) ' 'REFERENCES reference (book, chapter, verse) ' 'ON DELETE RESTRICT ON UPDATE RESTRICT)') conn.commit() def read_file(): content = [] with open(filename) as f: content = f.read().splitlines() columns = content[0] return (columns.split('\t'), content[1:]) def read_verse(columns, line): pieces = line.split('\t') ref = pieces[0] refm = re.search('([\w ]+?)\s*(\d+):(\d+)', ref) try: parsed = { 'book' : refm.group(1), 'chapter' : int(refm.group(2)), 'verse' : int(refm.group(3)), 'translations' : {} } for i, col in enumerate(columns[1:]): parsed['translations'][col] = pieces[i + 1].strip() return parsed except AttributeError: print('Something wrong with your regex.\n', str(pieces)) def main(): conn = connect() create_tables(conn) columns, verses = read_file() for translation in columns[1:]: conn.execute('INSERT OR IGNORE INTO translation (name) VALUES (?)', (translation,)) cur_book, cur_chapter = '', 0 for v in verses: v = read_verse(columns, v) if cur_book != v['book'] or cur_chapter != v['chapter']: print(v['book'], v['chapter']) cur_book, cur_chapter = v['book'], v['chapter'] conn.execute('INSERT OR IGNORE INTO book (name) VALUES (?)', (v['book'],)) conn.execute('INSERT OR IGNORE INTO reference (book, chapter, verse) ' 'VALUES (?, ?, ?)', (v['book'], v['chapter'], v['verse'])) for translation, text in v['translations'].items(): conn.execute('INSERT OR IGNORE INTO text ' '(translation, book, chapter, verse, text) ' 'VALUES (?, ?, ?, ?, ?)', (translation, v['book'], v['chapter'], v['verse'], text)) conn.commit() conn.close() if __name__ == '__main__': main()