]> git.armaanb.net Git - lightcards.git/blobdiff - lightcards/parse.py
Add lenient option
[lightcards.git] / lightcards / parse.py
index 8e0d320f6202f5e0bde347b540bcca36d02cc93e..95319f4628c5aecc1f683267be547f3f0fbbaf72 100644 (file)
@@ -24,35 +24,28 @@ def md2html(file):
     return outp
 
 
-def parse_html(html):
+def parse_html(html, lenient):
     """Use BeautifulSoup to parse the HTML"""
 
     def clean_text(inp):
         return inp.get_text().rstrip()
 
-    soup = BeautifulSoup(html, "html.parser").find_all("table")
-    outp = []
-
-    for table in soup:
-        try:
-            for x in table.find_all("tr"):
-                y = x.find_all("td")
-                if y:
-                    outp.append(Card(tuple([clean_text(z) for z in y])))
-        except AttributeError:
-            raise Exception("lightcards: No table found") from None
+    soup = BeautifulSoup(html, "html.parser")
+    outp, ths = [], []
 
+    for table in soup.find_all("table"):
         ths = table.find_all("th")
         if len(ths) != 2:
-            raise Exception("lightcards: Headings malformed")
+            if not lenient:
+                raise Exception("lightcards: Headings malformed")
+        else:
+            try:
+                for x in table.find_all("tr"):
+                    y = x.find_all("td")
+                    if y:
+                        outp.append(Card(tuple([clean_text(z) for z in y])))
+            except AttributeError:
+                raise Exception("lightcards: No table found") from None
 
     # Return a tuple of nested lists
     return ([clean_text(x) for x in ths], outp)
-
-
-def main(file):
-    return parse_html(md2html(file))
-
-
-if __name__ == "__main__":
-    print(main(sys.argv[1]))