]> git.armaanb.net Git - lightcards.git/blob - lightcards/parse.py
95319f4628c5aecc1f683267be547f3f0fbbaf72
[lightcards.git] / lightcards / parse.py
1 # Parse markdown table into tuple of lists
2 # Armaan Bhojwani 2021
3
4 import sys
5 from bs4 import BeautifulSoup
6 import markdown
7
8 from .deck import Card
9
10
11 def md2html(file):
12     """Use the markdown module to convert input to HTML"""
13     outp = ""
14     for i in file:
15         try:
16             outp += markdown.markdown(
17                 open(i, "r").read(), extensions=["tables"]
18             )
19         except FileNotFoundError:
20             raise Exception(
21                 f'lightcards: "{i}": No such file or directory'
22             ) from None
23
24     return outp
25
26
27 def parse_html(html, lenient):
28     """Use BeautifulSoup to parse the HTML"""
29
30     def clean_text(inp):
31         return inp.get_text().rstrip()
32
33     soup = BeautifulSoup(html, "html.parser")
34     outp, ths = [], []
35
36     for table in soup.find_all("table"):
37         ths = table.find_all("th")
38         if len(ths) != 2:
39             if not lenient:
40                 raise Exception("lightcards: Headings malformed")
41         else:
42             try:
43                 for x in table.find_all("tr"):
44                     y = x.find_all("td")
45                     if y:
46                         outp.append(Card(tuple([clean_text(z) for z in y])))
47             except AttributeError:
48                 raise Exception("lightcards: No table found") from None
49
50     # Return a tuple of nested lists
51     return ([clean_text(x) for x in ths], outp)