From: Armaan Bhojwani Date: Sun, 31 Jan 2021 18:53:53 +0000 (-0500) Subject: Clean input better, only parse first two columns X-Git-Tag: v0.3.0~2 X-Git-Url: https://git.armaanb.net/?p=lightcards.git;a=commitdiff_plain;h=6ed831357a8c023b9f871c7de54316c232b07737 Clean input better, only parse first two columns --- diff --git a/contrib/3col.md b/contrib/3col.md new file mode 100644 index 0000000..f86e072 --- /dev/null +++ b/contrib/3col.md @@ -0,0 +1,10 @@ +| Side 1 | Side 2 | Side 3 | +|--------------|-------------|--------------| +| Card 1 Front | Card 1 Back | Card 1 Third | +| Card 2 Front | Card 2 Back | Card 2 Third | +| Card 3 Front | Card 3 Back | Card 3 Third | +| Card 4 Front | Card 4 Back | Card 4 Third | +| Card 5 Front | Card 5 Back | Card 5 Third | +| Card 6 Front | Card 6 Back | Card 6 Third | +| Card 7 Front | Card 7 Back | Card 7 Third | +| Card 8 Front | Card 8 Back | Card 8 Third | diff --git a/lightcards/parse.py b/lightcards/parse.py index 65cad62..fb5ac68 100755 --- a/lightcards/parse.py +++ b/lightcards/parse.py @@ -20,21 +20,14 @@ def parse_html(html): def clean_text(inp): return inp.get_text().rstrip() - def clean_list(inp): - for z in inp: - if not len(z) == 2: - inp.remove(z) - return inp - soup = BeautifulSoup(html, 'html.parser') outp = [] for x in soup.find_all("tr"): - outp.append(Card([clean_text(y) for y in x.find_all("td")])) + outp.append(Card([clean_text(y) for y in x.find_all("td")[:2]])) # Return a tuple of nested lists - return ([clean_text(x) for x in soup.find_all("th")], - clean_list(outp)) + return ([clean_text(x) for x in soup.find_all("th")][:2], outp[1:]) def main(file):