X-Git-Url: https://git.armaanb.net/?a=blobdiff_plain;f=lightcards%2Fparse.py;h=9b1820af5214edc3c9911721d8bb12eb04dcd5c9;hb=94cf6952335a3965f64ea3a818ef0acad16f6574;hp=457da136f4b30b503f3350dfdcf3f9dd7d58150c;hpb=52521ebab49dc4d0e2fb2a2b970873d7b832da01;p=lightcards.git diff --git a/lightcards/parse.py b/lightcards/parse.py old mode 100755 new mode 100644 index 457da13..9b1820a --- a/lightcards/parse.py +++ b/lightcards/parse.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # Parse markdown table into tuple of lists # Armaan Bhojwani 2021 @@ -6,30 +5,32 @@ import sys from bs4 import BeautifulSoup import markdown +from .deck import Card + def md2html(file): - with open(file, "r", encoding="utf-8") as input_file: - return markdown.markdown(input_file.read(), extensions=['tables']) + """Use the markdown module to convert input to HTML""" + try: + with open(file, "r", encoding="utf-8") as input_file: + return markdown.markdown(input_file.read(), extensions=['tables']) + except FileNotFoundError: + print(f"lightcards: \"{file}\": No such file or directory") + exit(1) def parse_html(html): + """Use BeautifulSoup to parse the HTML""" def clean_text(inp): return inp.get_text().rstrip() - def clean_list(inp): - for z in inp: - if not len(z) == 2: - inp.remove(z) - return inp - soup = BeautifulSoup(html, 'html.parser') outp = [] for x in soup.find_all("tr"): - outp.append([clean_text(y) for y in x.find_all("td")]) + outp.append(Card([clean_text(y) for y in x.find_all("td")[:2]])) - return ([clean_text(x) for x in soup.find_all("th")], - clean_list(outp)) + # Return a tuple of nested lists + return ([clean_text(x) for x in soup.find_all("th")][:2], outp[1:]) def main(file):