Skip to content

Commit

Permalink
fix(getCountries): fix scraping for images
Browse files Browse the repository at this point in the history
  • Loading branch information
ThiaudioTT committed Jul 1, 2024
1 parent 5e90bcf commit 8465285
Showing 1 changed file with 14 additions and 3 deletions.
17 changes: 14 additions & 3 deletions tools/getVanillaCountries.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,25 @@ def getCountries(table: list[str], filename: str):
# the flag is in another webpage
image_webpage = HOI_SOURCE + row.find_all("td")[0].find("a")["href"][1:]

image_response = requests.get(image_webpage)
if image_response.status_code != 200:
image_webpage_response = requests.get(image_webpage)
if image_webpage_response.status_code != 200:
print(f"Failed to get flag WEBPAGE for {country_name}")
continue

image_webpage_soup = BeautifulSoup(image_webpage_response.text, 'html.parser')

# image = image_webpage_soup.select_one("div.eu4box:nth-child(2) > a:nth-child(2) > img")
image_location = HOI_SOURCE + image_webpage_soup.select_one("div.mw-parser-output:nth-child(4) > div:nth-child(2) > a")["href"][1:]

image_response = requests.get(image_location)
if image_response.status_code != 200:
print(f"Failed to get flag IMAGE for {country_name}")
continue

image_soup = BeautifulSoup(image_response.text, 'html.parser')

image = image_soup.select_one("div.eu4box:nth-child(2) > a:nth-child(2) > img")
image = image_soup.select_one("#file > a > img")

if not image:
print(f"Failed to get flag IMAGE for {country_name}")
continue
Expand Down

0 comments on commit 8465285

Please sign in to comment.