]> git.armaanb.net Git - lightcards.git/commitdiff
Add kvtml2html script
authorArmaan Bhojwani <me@armaanb.net>
Sat, 30 Jan 2021 23:28:43 +0000 (18:28 -0500)
committerArmaan Bhojwani <me@armaanb.net>
Sat, 30 Jan 2021 23:28:43 +0000 (18:28 -0500)
Add script to convert KWordQuiz files into HTML

contrib/kvtml2html.py [new file with mode: 0755]

diff --git a/contrib/kvtml2html.py b/contrib/kvtml2html.py
new file mode 100755 (executable)
index 0000000..313a44b
--- /dev/null
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# Converts .kvtml KWordQuiz files to HTML
+
+import argparse
+from bs4 import BeautifulSoup
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Convert KWordQuiz file into Markdown for Lightcards")
+    parser.add_argument("inp", metavar="input file", type=str, nargs=1)
+    parser.add_argument("outp", metavar="output file", type=str, nargs=1)
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    with open(args.inp[0], "r", encoding="utf-8") as input_file:
+        soup = BeautifulSoup(input_file, "lxml")
+
+    headers = [x.get_text().split("\n")[1] for x in soup.find_all("identifier")]
+    body = soup.find_all("entry")
+    col1 = [x.find("translation", {"id": "0"}) for x in body]
+    col2 = [x.find("translation", {"id": "1"}) for x in body]
+
+    html = f"<html><table><tr><th>{headers[0]}</th><th>{headers[1]}</th></tr>"
+    for i in range(len(col1)):
+        try:
+            html += f"<html><table><tr><td>{col1[i].get_text().rstrip()}</td>"
+            html += f"<td>{col2[i].get_text().rstrip()}</td></tr>"
+        except:
+            pass
+
+    with open(args.outp[0], "w", encoding="utf-8") as output_file:
+        output_file.write(html)
+
+if __name__ == "__main__":
+    main()