]> git.armaanb.net Git - lightcards.git/blobdiff - lightcards/parse.py
Add setup.py, and make into actual Python module
[lightcards.git] / lightcards / parse.py
diff --git a/lightcards/parse.py b/lightcards/parse.py
new file mode 100755 (executable)
index 0000000..12c1073
--- /dev/null
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# Parse markdown table into tuple of lists
+# Armaan Bhojwani 2021
+
+import sys
+from bs4 import BeautifulSoup
+import markdown
+
+
+def md2html(file):
+    with open(file, "r", encoding="utf-8") as input_file:
+        return markdown.markdown(input_file.read(), extensions=['tables'])
+
+
+def parse_html(html):
+    def clean_text(inp):
+        return inp.get_text().rstrip()
+
+    def clean_list(inp):
+        for z in inp:
+            if not len(z) == 2:
+                inp.remove(z)
+        return inp
+
+    soup = BeautifulSoup(html, 'html.parser')
+    outp = []
+
+    for x in soup.find_all("tr"):
+        outp.append([clean_text(y) for y in x.find_all("td")])
+
+    return ([clean_text(x) for x in soup.find_all("th")],
+            clean_list(outp))
+
+def main(file):
+    return parse_html(md2html(file))
+
+if __name__ == "__main__":
+    print(main(sys.argv[1]))