X-Git-Url: https://git.armaanb.net/?p=lightcards.git;a=blobdiff_plain;f=lightcards%2Fparse.py;fp=lightcards%2Fparse.py;h=95319f4628c5aecc1f683267be547f3f0fbbaf72;hp=8e0d320f6202f5e0bde347b540bcca36d02cc93e;hb=9c901da1569d3c2ec5f4a59f57e5ad067112b4ba;hpb=3ec382bac0913a7268e8059eaf337a54cf1b0f5c diff --git a/lightcards/parse.py b/lightcards/parse.py index 8e0d320..95319f4 100644 --- a/lightcards/parse.py +++ b/lightcards/parse.py @@ -24,35 +24,28 @@ def md2html(file): return outp -def parse_html(html): +def parse_html(html, lenient): """Use BeautifulSoup to parse the HTML""" def clean_text(inp): return inp.get_text().rstrip() - soup = BeautifulSoup(html, "html.parser").find_all("table") - outp = [] - - for table in soup: - try: - for x in table.find_all("tr"): - y = x.find_all("td") - if y: - outp.append(Card(tuple([clean_text(z) for z in y]))) - except AttributeError: - raise Exception("lightcards: No table found") from None + soup = BeautifulSoup(html, "html.parser") + outp, ths = [], [] + for table in soup.find_all("table"): ths = table.find_all("th") if len(ths) != 2: - raise Exception("lightcards: Headings malformed") + if not lenient: + raise Exception("lightcards: Headings malformed") + else: + try: + for x in table.find_all("tr"): + y = x.find_all("td") + if y: + outp.append(Card(tuple([clean_text(z) for z in y]))) + except AttributeError: + raise Exception("lightcards: No table found") from None # Return a tuple of nested lists return ([clean_text(x) for x in ths], outp) - - -def main(file): - return parse_html(md2html(file)) - - -if __name__ == "__main__": - print(main(sys.argv[1]))