import argparse
import sqlite3
-import sys
import requests
from bs4 import BeautifulSoup
-def main(args=sys.argv[1:]):
- # Argument parsing
+def parse_args():
parser = argparse.ArgumentParser(
description="Generate SQLite db of Latin famous phrases from Wikipedia.")
parser.add_argument("-o", "--output",
default="phrases.db",
help="set custom output file location")
- args = parser.parse_args()
+ return parser.parse_args()
- url = ("""https://en.wikipedia.org/w/index.php?title=List_of_Latin_phrases_(
- full)&oldid=986793908""")
+def get_html(url):
print("downloading webpage")
- soup = BeautifulSoup(requests.get(url).content, "html.parser")
+ return BeautifulSoup(requests.get(url).content, "html.parser")
+def prep_database(c):
print("prepping database")
- conn = sqlite3.connect(args.output)
- c = conn.cursor()
c.execute("DROP TABLE IF EXISTS phrases")
c.execute("""CREATE TABLE phrases(
id INTEGER,
notes TEXT,
length INTEGER)""")
- i = 0 # For the phrase id
-
- # iterate through the tables in the page
- list_table = soup.find_all("table", attrs={"class":"wikitable"})
+def fill_database(list_table, c, conn):
+ i = 0 # phrase id
print("iterating through tables")
for table in list_table:
for row in table.tbody.find_all("tr", recursive=False):
cell = row.find_all("td", recursive=False)
if len(cell) > 2:
print(i, end="\r")
+
latin = (cell[0].get_text(" ", strip=True)).rstrip()
english = (cell[1].get_text(" ", strip=True)).rstrip()
notes = (cell[2].get_text(" ", strip=True)).rstrip()
- c.execute("""INSERT INTO phrases (id, latin, english, notes, length)
- VALUES(?, ?, ?, ?, ?)""", (i, latin, english, notes, len(latin)))
+ c.execute("""INSERT INTO phrases
+ (id, latin, english, notes, length)
+ VALUES(?, ?, ?, ?, ?)""",
+ (i, latin, english, notes, len(latin)))
conn.commit()
-
i = i + 1
- print("closing database")
- c.close()
- conn.close()
+def get_tables():
+ url = ("""https://en.wikipedia.org/w/index.php?title=List_of_Latin_phrases_(
+ full)&oldid=986793908""")
+ return get_html(url).find_all("table", attrs={"class":"wikitable"})
+
+def main(args):
+ conn = sqlite3.connect(args.output)
+ c = conn.cursor()
+ prep_database(c)
+ fill_database(get_tables(), c, conn)
if __name__ == "__main__":
- main()
+ main(parse_args())