]> git.armaanb.net Git - lightcards.git/blobdiff - lightcards/parse.py
Allow for multiple input files to be given
[lightcards.git] / lightcards / parse.py
index 9b1820af5214edc3c9911721d8bb12eb04dcd5c9..8e0d320f6202f5e0bde347b540bcca36d02cc93e 100644 (file)
@@ -10,27 +10,44 @@ from .deck import Card
 
 def md2html(file):
     """Use the markdown module to convert input to HTML"""
-    try:
-        with open(file, "r", encoding="utf-8") as input_file:
-            return markdown.markdown(input_file.read(), extensions=['tables'])
-    except FileNotFoundError:
-        print(f"lightcards: \"{file}\": No such file or directory")
-        exit(1)
+    outp = ""
+    for i in file:
+        try:
+            outp += markdown.markdown(
+                open(i, "r").read(), extensions=["tables"]
+            )
+        except FileNotFoundError:
+            raise Exception(
+                f'lightcards: "{i}": No such file or directory'
+            ) from None
+
+    return outp
 
 
 def parse_html(html):
     """Use BeautifulSoup to parse the HTML"""
+
     def clean_text(inp):
         return inp.get_text().rstrip()
 
-    soup = BeautifulSoup(html, 'html.parser')
+    soup = BeautifulSoup(html, "html.parser").find_all("table")
     outp = []
 
-    for x in soup.find_all("tr"):
-        outp.append(Card([clean_text(y) for y in x.find_all("td")[:2]]))
+    for table in soup:
+        try:
+            for x in table.find_all("tr"):
+                y = x.find_all("td")
+                if y:
+                    outp.append(Card(tuple([clean_text(z) for z in y])))
+        except AttributeError:
+            raise Exception("lightcards: No table found") from None
+
+        ths = table.find_all("th")
+        if len(ths) != 2:
+            raise Exception("lightcards: Headings malformed")
 
     # Return a tuple of nested lists
-    return ([clean_text(x) for x in soup.find_all("th")][:2], outp[1:])
+    return ([clean_text(x) for x in ths], outp)
 
 
 def main(file):