From 8465285f3aec18a8c80f2a1a08048e0a1e09692c Mon Sep 17 00:00:00 2001 From: ThiaudioTT Date: Mon, 1 Jul 2024 09:38:57 -0300 Subject: [PATCH] fix(getCountries): fix scraping for images --- tools/getVanillaCountries.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/getVanillaCountries.py b/tools/getVanillaCountries.py index a37f469..383dd8a 100644 --- a/tools/getVanillaCountries.py +++ b/tools/getVanillaCountries.py @@ -29,14 +29,25 @@ def getCountries(table: list[str], filename: str): # the flag is in another webpage image_webpage = HOI_SOURCE + row.find_all("td")[0].find("a")["href"][1:] - image_response = requests.get(image_webpage) - if image_response.status_code != 200: + image_webpage_response = requests.get(image_webpage) + if image_webpage_response.status_code != 200: print(f"Failed to get flag WEBPAGE for {country_name}") continue + image_webpage_soup = BeautifulSoup(image_webpage_response.text, 'html.parser') + + # image = image_webpage_soup.select_one("div.eu4box:nth-child(2) > a:nth-child(2) > img") + image_location = HOI_SOURCE + image_webpage_soup.select_one("div.mw-parser-output:nth-child(4) > div:nth-child(2) > a")["href"][1:] + + image_response = requests.get(image_location) + if image_response.status_code != 200: + print(f"Failed to get flag IMAGE for {country_name}") + continue + image_soup = BeautifulSoup(image_response.text, 'html.parser') - image = image_soup.select_one("div.eu4box:nth-child(2) > a:nth-child(2) > img") + image = image_soup.select_one("#file > a > img") + if not image: print(f"Failed to get flag IMAGE for {country_name}") continue