From f03d1f1de5ed1c36fe3e27505653e669f01a59a3 Mon Sep 17 00:00:00 2001 From: Armaan Bhojwani <3fb650a9-b47e-4604-a282-1dd91953b2ee@anonaddy.me> Date: Wed, 2 Dec 2020 18:16:22 -0500 Subject: [PATCH] add argument parsing, fix program logic * Add argument parsing to extract.py to be able to configure a custom output file * Update README formatting * Fixed program logic in phrases.py, where the program would reference the wrong list * Lots of general code cleanup and refactoring --- README.md | 10 +++--- extract.py | 32 ++++++++++------- phrases.py | 102 +++++++++++++++++++++++++++++++++++------------------ 3 files changed, 92 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index c8e3270..f13e0a1 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,18 @@ # phrases -Get Latin famous phrases in the terminal - -## Source of phrases -There are currently 2239 phrases in the database, sourced from the Wikipedia page [List of Latin phrases (full)](https://en.wikipedia.org/w/index.php?title=List_of_Latin_phrases_(full)&oldid=986793908) +Latin famous phrases in the terminal +## Source of famous phrases Wikipedia contributors, "List of Latin phrases (full)," Wikipedia, The Free Encyclopedia, https://en.wikipedia.org/w/index.php?title=List_of_Latin_phrases_(full)&oldid=986793908. +There are currently 2239 famous phrases in the database + ## Installation `sudo make` to install. `sudo make uninstall` to uninstall completely. ### Notes - * If you want to regenerate the phrases.csv file with the `extract.py` script, then you need the BeautifulSoup Python module. + * If you want to generate a new phrases.csv file with the `extract.py` script, then you need the BeautifulSoup Python module. * Tested and written on Python 3.9.0 ## License diff --git a/extract.py b/extract.py index a76fbfb..4e6c1f6 100755 --- a/extract.py +++ b/extract.py @@ -2,27 +2,35 @@ # Extract Latin famous phrases from wikipedia # Armaan Bhojwani 2020 -from bs4 import BeautifulSoup -import requests +import argparse +import sys import csv +import requests +from bs4 import BeautifulSoup -def main(): - url = 'https://en.wikipedia.org/wiki/List_of_Latin_phrases_(full)' - response = requests.get(url) - html = response.content - - soup = BeautifulSoup(html, "html.parser") - list_table = soup.find_all("table", attrs={"class":"wikitable"}) - with open('phrases.csv', 'w') as f: +def main(args=sys.argv[1:]): + # Argument parsing + parser = argparse.ArgumentParser( + description="Generate CSV file of Latin famous phrases from Wikipedia.") + parser.add_argument("-o", "--output", + default="phrases.csv", + help="set custom output file location") + args = parser.parse_args() + + url = ('https://en.wikipedia.org/w/index.php?title=List_of_Latin_phrases_(' + 'full)&oldid=986793908') + soup = BeautifulSoup(requests.get(url).content, "html.parser") + i = 0 # For the phrase id + + with open(args.output, 'w') as f: writer = csv.writer(f, lineterminator="\n") - i = 0 # For the phrase id - # write header headers = ['id', 'Latin', 'English', 'Notes', 'Length'] writer.writerow(headers) # iterate through the tables in the page + list_table = soup.find_all("table", attrs={"class":"wikitable"}) for table in list_table: for row in table.tbody.find_all("tr", recursive=False): cell = row.find_all("td", recursive=False) diff --git a/phrases.py b/phrases.py index d1d934f..63199d1 100755 --- a/phrases.py +++ b/phrases.py @@ -3,52 +3,84 @@ # Armaan Bhojwani 2020 import argparse +import csv import random import sys -import csv def main(args=sys.argv[1:]): # Argument parsing - parser = argparse.ArgumentParser(description="Latin famous phrases in the terminal.") - parser.add_argument("-e", "--english", action='store_true', help="Print the English translation.") - parser.add_argument("-i", "--id", action='store_true', help="Print the id of the phrase.") - parser.add_argument("-l", "--latin", action='store_true', help="Print the Latin phrase (default)") - parser.add_argument("-m", "--min", default=0, type=int, help="Set the minimum length of the Latin phrase") - parser.add_argument("-M", "--max", default=10000000, type=int, help="Set the maximum length of Latin phrase") - parser.add_argument("-n", "--notes", action='store_true', help="Print any notes on phrase") + parser = argparse.ArgumentParser( + description="Latin famous phrases in the terminal.") + parser.add_argument("-i", "--id", + action='store_true', + help="print the id of the phrase.") + parser.add_argument("-l", "--latin", + action='store_true', + help="print the Latin phrase (default)") + parser.add_argument("-e", "--english", + action='store_true', + help="print the English translation.") + parser.add_argument("-n", "--notes", + action='store_true', + help="print any notes on phrase") + parser.add_argument("-m", "--min", + default=0, + type=int, + help="set the minimum length of the Latin phrase") + parser.add_argument("-M", "--max", + default=10000000, + type=int, + help="set the maximum length of Latin phrase") + parser.add_argument("-p", "--num", + action='store_true', + help="print number of possibilities within constraints") + parser.add_argument("-f", "--file", + default="/usr/share/phrases/phrases.csv", + help="set the location of the phrase file") args = parser.parse_args() right_length = [] - # Find phrases of the right size - with open('/usr/share/phrases/phrases.csv') as f: + # convert csv file into list + with open(args.file) as f: reader = csv.reader(f) - all_lines = list(reader) next(reader, None) # skip header - for row in all_lines: - try: - if args.max >= int(row[4]) >= args.min: # generate a shortlist of phrases of the right length - right_length.append(row[0]) - except: - pass # skip malformed rows - - try: - chosen = int(right_length[random.randint(0, len(right_length) - 1)]) # choose a random id from the shortlist - except: - sys.exit("No phrase within the given parameters!") - - # Output as specified in flags - if not (args.english or args.latin or args.notes): - print(all_lines[chosen][1]) - else: - if args.id: - print(all_lines[chosen][1]) - if args.latin: - print(all_lines[chosen][1]) - if args.english: - print(all_lines[chosen][2]) - if args.notes: - print(all_lines[chosen][3]) + all_lines = list(reader) + f.close() + + # iterate through all the phrases + for row in all_lines: + try: # generate a shortlist of phrases of the right length + if args.max >= int(row[4]) >= args.min: + right_length.append(row[0]) + except: # skip malformed rows without exiting + pass + + try: # choose a random id from the shortlist + chosen = int(right_length[random.randint(0, len(right_length) - 1)]) + except: + sys.exit("No phrase within the given parameters!") + + # Output as specified in flags + for row in all_lines: + if int(row[0]) == chosen: + if not (args.id + or args.latin + or args.english + or args.notes + or args.num): + print(row[1]) + else: + if args.id: + print(row[1]) + if args.latin: + print(row[1]) + if args.english: + print(row[2]) + if args.notes: + print(row[3]) + if args.num: + print(len(right_length)) if __name__ == "__main__": main() -- 2.39.2