2 # Extract Latin famous phrases from wikipedia
9 from bs4 import BeautifulSoup
11 def main(args=sys.argv[1:]):
13 parser = argparse.ArgumentParser(
14 description="Generate CSV file of Latin famous phrases from Wikipedia.")
15 parser.add_argument("-o", "--output",
16 default="phrases.csv",
17 help="set custom output file location")
18 args = parser.parse_args()
20 url = ('https://en.wikipedia.org/w/index.php?title=List_of_Latin_phrases_('
21 'full)&oldid=986793908')
22 soup = BeautifulSoup(requests.get(url).content, "html.parser")
23 i = 0 # For the phrase id
25 with open(args.output, 'w') as f:
26 writer = csv.writer(f, lineterminator="\n")
29 headers = ['id', 'Latin', 'English', 'Notes', 'Length']
30 writer.writerow(headers)
32 # iterate through the tables in the page
33 list_table = soup.find_all("table", attrs={"class":"wikitable"})
34 for table in list_table:
35 for row in table.tbody.find_all("tr", recursive=False):
36 cell = row.find_all("td", recursive=False)
39 rowc.append(i) # append phrase id
43 text = (content.get_text(" ", strip=True)).rstrip()
47 rowc.append(len(rowc[1]))
52 if __name__ == "__main__":