]> git.armaanb.net Git - lightcards.git/blob - lightcards/parse.py
Clean up lightcards.py
[lightcards.git] / lightcards / parse.py
1 # Parse markdown table into tuple of lists
2 # Armaan Bhojwani 2021
3
4 import sys
5 from bs4 import BeautifulSoup
6 import markdown
7
8 from .deck import Card
9
10
11 def md2html(file):
12     """Use the markdown module to convert input to HTML"""
13     try:
14         with open(file, "r", encoding="utf-8") as input_file:
15             return markdown.markdown(input_file.read(), extensions=['tables'])
16     except FileNotFoundError:
17         print(f"lightcards: \"{file}\": No such file or directory")
18         exit(1)
19
20
21 def parse_html(html):
22     """Use BeautifulSoup to parse the HTML"""
23     def clean_text(inp):
24         return inp.get_text().rstrip()
25
26     soup = BeautifulSoup(html, 'html.parser')
27     outp = []
28
29     for x in soup.find_all("tr"):
30         outp.append(Card([clean_text(y) for y in x.find_all("td")[:2]]))
31
32     # Return a tuple of nested lists
33     return ([clean_text(x) for x in soup.find_all("th")][:2], outp[1:])
34
35
36 def main(file):
37     return parse_html(md2html(file))
38
39
40 if __name__ == "__main__":
41     print(main(sys.argv[1]))