]> git.armaanb.net Git - lightcards.git/blob - lightcards/parse.py
Make file not found error print more standard
[lightcards.git] / lightcards / parse.py
1 #!/usr/bin/env python
2 # Parse markdown table into tuple of lists
3 # Armaan Bhojwani 2021
4
5 import sys
6 from bs4 import BeautifulSoup
7 import markdown
8
9 from .deck import Card
10
11
12 def md2html(file):
13     """Use the markdown module to convert input to HTML"""
14     try:
15         with open(file, "r", encoding="utf-8") as input_file:
16             return markdown.markdown(input_file.read(), extensions=['tables'])
17     except FileNotFoundError:
18         print(f"lightcards: \"{file}\": No such file or directory")
19         exit(1)
20
21
22 def parse_html(html):
23     """Use BeautifulSoup to parse the HTML"""
24     def clean_text(inp):
25         return inp.get_text().rstrip()
26
27     soup = BeautifulSoup(html, 'html.parser')
28     outp = []
29
30     for x in soup.find_all("tr"):
31         outp.append(Card([clean_text(y) for y in x.find_all("td")[:2]]))
32
33     # Return a tuple of nested lists
34     return ([clean_text(x) for x in soup.find_all("th")][:2], outp[1:])
35
36
37 def main(file):
38     return parse_html(md2html(file))
39
40
41 if __name__ == "__main__":
42     print(main(sys.argv[1]))