]> git.armaanb.net Git - lightcards.git/blob - lightcards/parse.py
8e0d320f6202f5e0bde347b540bcca36d02cc93e
[lightcards.git] / lightcards / parse.py
1 # Parse markdown table into tuple of lists
2 # Armaan Bhojwani 2021
3
4 import sys
5 from bs4 import BeautifulSoup
6 import markdown
7
8 from .deck import Card
9
10
11 def md2html(file):
12     """Use the markdown module to convert input to HTML"""
13     outp = ""
14     for i in file:
15         try:
16             outp += markdown.markdown(
17                 open(i, "r").read(), extensions=["tables"]
18             )
19         except FileNotFoundError:
20             raise Exception(
21                 f'lightcards: "{i}": No such file or directory'
22             ) from None
23
24     return outp
25
26
27 def parse_html(html):
28     """Use BeautifulSoup to parse the HTML"""
29
30     def clean_text(inp):
31         return inp.get_text().rstrip()
32
33     soup = BeautifulSoup(html, "html.parser").find_all("table")
34     outp = []
35
36     for table in soup:
37         try:
38             for x in table.find_all("tr"):
39                 y = x.find_all("td")
40                 if y:
41                     outp.append(Card(tuple([clean_text(z) for z in y])))
42         except AttributeError:
43             raise Exception("lightcards: No table found") from None
44
45         ths = table.find_all("th")
46         if len(ths) != 2:
47             raise Exception("lightcards: Headings malformed")
48
49     # Return a tuple of nested lists
50     return ([clean_text(x) for x in ths], outp)
51
52
53 def main(file):
54     return parse_html(md2html(file))
55
56
57 if __name__ == "__main__":
58     print(main(sys.argv[1]))