Add lenient option

[lightcards.git] / lightcards / parse.py
diff --git a/lightcards/parse.py b/lightcards/parse.py

old mode 100755 (executable)

new mode 100644 (file)

index fb5ac68..95319f4
--- a/lightcards/parse.py
+++ b/lightcards/parse.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
  # Parse markdown table into tuple of lists
  # Armaan Bhojwani 2021
  
@@ -11,28 +10,42 @@ from .deck import Card
  
  def md2html(file):
      """Use the markdown module to convert input to HTML"""
-    with open(file, "r", encoding="utf-8") as input_file:
-        return markdown.markdown(input_file.read(), extensions=['tables'])
+    outp = ""
+    for i in file:
+        try:
+            outp += markdown.markdown(
+                open(i, "r").read(), extensions=["tables"]
+            )
+        except FileNotFoundError:
+            raise Exception(
+                f'lightcards: "{i}": No such file or directory'
+            ) from None
  
+    return outp
  
-def parse_html(html):
+
+def parse_html(html, lenient):
      """Use BeautifulSoup to parse the HTML"""
+
      def clean_text(inp):
          return inp.get_text().rstrip()
  
-    soup = BeautifulSoup(html, 'html.parser')
-    outp = []
-
-    for x in soup.find_all("tr"):
-        outp.append(Card([clean_text(y) for y in x.find_all("td")[:2]]))
+    soup = BeautifulSoup(html, "html.parser")
+    outp, ths = [], []
+
+    for table in soup.find_all("table"):
+        ths = table.find_all("th")
+        if len(ths) != 2:
+            if not lenient:
+                raise Exception("lightcards: Headings malformed")
+        else:
+            try:
+                for x in table.find_all("tr"):
+                    y = x.find_all("td")
+                    if y:
+                        outp.append(Card(tuple([clean_text(z) for z in y])))
+            except AttributeError:
+                raise Exception("lightcards: No table found") from None
  
      # Return a tuple of nested lists
-    return ([clean_text(x) for x in soup.find_all("th")][:2], outp[1:])
-
-
-def main(file):
-    return parse_html(md2html(file))
-
-
-if __name__ == "__main__":
-    print(main(sys.argv[1]))
+    return ([clean_text(x) for x in ths], outp)