X-Git-Url: https://git.armaanb.net/?a=blobdiff_plain;f=lightcards%2Fparse.py;h=082b7dce1dedf0fca6879bf91818915f39fb086f;hb=b43e50be2267001cd085bc5cd1be01a59a5eddea;hp=6fc2a759cf2ac152f636b9a376efa9ddf2d9db29;hpb=da0b2a5411a041e29a0574d3aeb3f0da311ff5c1;p=lightcards.git diff --git a/lightcards/parse.py b/lightcards/parse.py index 6fc2a75..082b7dc 100644 --- a/lightcards/parse.py +++ b/lightcards/parse.py @@ -1,7 +1,6 @@ # Parse markdown table into tuple of lists # Armaan Bhojwani 2021 -import sys from bs4 import BeautifulSoup import markdown @@ -10,38 +9,49 @@ from .deck import Card def md2html(file): """Use the markdown module to convert input to HTML""" - try: - return markdown.markdown(open(file, "r").read(), extensions=["tables"]) - except FileNotFoundError: - sys.exit(f'lightcards: "{file}": No such file or directory') + outp = "" + for i in file: + try: + outp += markdown.markdown( + open(i, "r").read(), extensions=["tables"] + ) + except FileNotFoundError: + raise Exception( + f'lightcards: "{i}": No such file or directory' + ) from None + return outp -def parse_html(html): + +def parse_html(html, args, conf): """Use BeautifulSoup to parse the HTML""" def clean_text(inp): return inp.get_text().rstrip() - soup = BeautifulSoup(html, "html.parser").find("table") - outp = [] - - try: - for x in soup.find_all("tr"): - outp.append(Card(tuple([clean_text(y) for y in x.find_all("td")]))) - except AttributeError: - sys.exit("lightcards: No table found") - - ths = soup.find_all("th") - if len(ths) != 2: - sys.exit("lightcards: Headings malformed") + soup = BeautifulSoup(html, "html.parser") + outp, ths = [], [] + + if args.table: + table_num = args.table + elif conf["table"]: + table_num = conf["table"] + else: + table_num = False + + for i, table in enumerate(soup.find_all("table"), start=1): + ths = table.find_all("th") + if len(ths) != 2: + if conf["lenient"] or not args.lenient: + raise Exception("lightcards: Headings malformed") + elif (table_num and i == table_num) or not table_num: + try: + for x in table.find_all("tr"): + y = x.find_all("td") + if y: + outp.append(Card(tuple([clean_text(z) for z in y]))) + except AttributeError: + raise Exception("lightcards: No table found") from None # Return a tuple of nested lists - return ([clean_text(x) for x in ths], outp[1:]) - - -def main(file): - return parse_html(md2html(file)) - - -if __name__ == "__main__": - print(main(sys.argv[1])) + return ([clean_text(x) for x in ths], outp)