return outp
-def parse_html(html):
+def parse_html(html, lenient):
"""Use BeautifulSoup to parse the HTML"""
def clean_text(inp):
return inp.get_text().rstrip()
- soup = BeautifulSoup(html, "html.parser").find_all("table")
- outp = []
-
- for table in soup:
- try:
- for x in table.find_all("tr"):
- y = x.find_all("td")
- if y:
- outp.append(Card(tuple([clean_text(z) for z in y])))
- except AttributeError:
- raise Exception("lightcards: No table found") from None
+ soup = BeautifulSoup(html, "html.parser")
+ outp, ths = [], []
+ for table in soup.find_all("table"):
ths = table.find_all("th")
if len(ths) != 2:
- raise Exception("lightcards: Headings malformed")
+ if not lenient:
+ raise Exception("lightcards: Headings malformed")
+ else:
+ try:
+ for x in table.find_all("tr"):
+ y = x.find_all("td")
+ if y:
+ outp.append(Card(tuple([clean_text(z) for z in y])))
+ except AttributeError:
+ raise Exception("lightcards: No table found") from None
# Return a tuple of nested lists
return ([clean_text(x) for x in ths], outp)
-
-
-def main(file):
- return parse_html(md2html(file))
-
-
-if __name__ == "__main__":
- print(main(sys.argv[1]))