X-Git-Url: https://git.armaanb.net/?a=blobdiff_plain;f=lightcards%2Fparse.py;h=6fc2a759cf2ac152f636b9a376efa9ddf2d9db29;hb=6226519694970400eb98632369d042b0499ecabe;hp=f9ca3ee15afc6648ced65bd8d9436e911c19d4cf;hpb=ddc9c9451cc55b4aa8869963439a48a8f1977a3c;p=lightcards.git diff --git a/lightcards/parse.py b/lightcards/parse.py index f9ca3ee..6fc2a75 100644 --- a/lightcards/parse.py +++ b/lightcards/parse.py @@ -22,14 +22,21 @@ def parse_html(html): def clean_text(inp): return inp.get_text().rstrip() - soup = BeautifulSoup(html, "html.parser") + soup = BeautifulSoup(html, "html.parser").find("table") outp = [] - for x in soup.find_all("tr"): - outp.append(Card([clean_text(y) for y in x.find_all("td")[:2]])) + try: + for x in soup.find_all("tr"): + outp.append(Card(tuple([clean_text(y) for y in x.find_all("td")]))) + except AttributeError: + sys.exit("lightcards: No table found") + + ths = soup.find_all("th") + if len(ths) != 2: + sys.exit("lightcards: Headings malformed") # Return a tuple of nested lists - return ([clean_text(x) for x in soup.find_all("th")][:2], outp[1:]) + return ([clean_text(x) for x in ths], outp[1:]) def main(file):