X-Git-Url: https://git.armaanb.net/?p=lightcards.git;a=blobdiff_plain;f=lightcards%2Fparse.py;h=6fc2a759cf2ac152f636b9a376efa9ddf2d9db29;hp=6f3c25f6baf0d40b75fbdcfcf6ecbe4e850fae98;hb=468d491893758e3b9ac532757ea98791303f4a6d;hpb=da5f204dfb0e33a99bba1fa00842b253bf9947e0 diff --git a/lightcards/parse.py b/lightcards/parse.py old mode 100755 new mode 100644 index 6f3c25f..6fc2a75 --- a/lightcards/parse.py +++ b/lightcards/parse.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # Parse markdown table into tuple of lists # Armaan Bhojwani 2021 @@ -10,28 +9,34 @@ from .deck import Card def md2html(file): - with open(file, "r", encoding="utf-8") as input_file: - return markdown.markdown(input_file.read(), extensions=['tables']) + """Use the markdown module to convert input to HTML""" + try: + return markdown.markdown(open(file, "r").read(), extensions=["tables"]) + except FileNotFoundError: + sys.exit(f'lightcards: "{file}": No such file or directory') def parse_html(html): + """Use BeautifulSoup to parse the HTML""" + def clean_text(inp): return inp.get_text().rstrip() - def clean_list(inp): - for z in inp: - if not len(z) == 2: - inp.remove(z) - return inp - - soup = BeautifulSoup(html, 'html.parser') + soup = BeautifulSoup(html, "html.parser").find("table") outp = [] - for x in soup.find_all("tr"): - outp.append(Card([clean_text(y) for y in x.find_all("td")])) + try: + for x in soup.find_all("tr"): + outp.append(Card(tuple([clean_text(y) for y in x.find_all("td")]))) + except AttributeError: + sys.exit("lightcards: No table found") + + ths = soup.find_all("th") + if len(ths) != 2: + sys.exit("lightcards: Headings malformed") - return ([clean_text(x) for x in soup.find_all("th")], - clean_list(outp)) + # Return a tuple of nested lists + return ([clean_text(x) for x in ths], outp[1:]) def main(file):