-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
download MIZ: script to download all MIZ shapefiles
- Loading branch information
1 parent
046dad6
commit 65231d3
Showing
1 changed file
with
96 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
""" | ||
Downloads all MIZ shapefiles from this link: https://usicecenter.gov/Products/ArchiveSearchMulti?table=DailyArcticShapefiles | ||
To run: | ||
pixi run python scripts/download_all_miz.py | ||
""" | ||
import os | ||
from datetime import datetime | ||
|
||
import requests | ||
from bs4 import BeautifulSoup | ||
|
||
|
||
def format_date(date_obj: datetime) -> str: | ||
"""Function to format date in MM/DD/YYYY format""" | ||
return date_obj.strftime("%m/%d/%Y") | ||
|
||
|
||
def get_date(string: str) -> str: | ||
"""From file link, extract date for MIZ product""" | ||
date_str = string.split("=")[1][1:] | ||
if len(date_str) != 8: | ||
raise ValueError( | ||
"The date string must be 8 characters long in the form 'MMDDYYYY'." | ||
) | ||
|
||
# Extract parts of the date | ||
mm = date_str[:2] | ||
dd = date_str[2:4] | ||
yyyy = date_str[4:] | ||
|
||
# Construct the new date string in 'YYYYMMDD' format | ||
new_date_str = yyyy + mm + dd | ||
|
||
return new_date_str | ||
|
||
|
||
def download_miz_files( | ||
start_date: datetime, end_date: datetime, directory: str | ||
) -> None: | ||
"""Function to download MIZ files from the website""" | ||
search_url = "https://usicecenter.gov/Products/DisplaySearchResults" | ||
base_url = "https://usicecenter.gov" | ||
|
||
# The link requires filling out a form for the date range. | ||
# Adjust these parameters based on the form data required for MIZ files. | ||
form_data = { | ||
"searchText": "DailyArcticShapefiles", | ||
"searchProduct": "Arctic MIZ Shapefile", | ||
"startDate": format_date(start_date), | ||
"endDate": format_date(end_date), | ||
} | ||
|
||
headers = {"Content-Type": "application/x-www-form-urlencoded"} | ||
|
||
with requests.Session() as session: | ||
response = session.post(search_url, data=form_data, headers=headers) | ||
response.raise_for_status() | ||
|
||
soup = BeautifulSoup(response.text, "html.parser") | ||
|
||
file_links = [ | ||
a["href"] | ||
for a in soup.find_all("a", href=True) | ||
if "DownloadArchive" in a["href"] | ||
] | ||
|
||
# Ensure the directory exists. | ||
if not os.path.exists(directory): | ||
os.makedirs(directory) | ||
|
||
# Download each file. | ||
for file_link in file_links: | ||
full_url = base_url + file_link | ||
file_response = session.get(full_url) | ||
# Here we assume the file name can be parsed similarly; adjust as necessary. | ||
file_name = file_link.split("/")[ | ||
-1 | ||
] # Adapt this based on the actual link structure. | ||
datestring = get_date(file_name) | ||
file_name = f"nic_miz{datestring}nc_pl_a.zip" | ||
file_path = os.path.join(directory, file_name) | ||
|
||
if not os.path.exists(file_path): | ||
with open(file_path, "wb") as file: | ||
file.write(file_response.content) | ||
print(f"Downloaded {file_name}") | ||
else: | ||
print(f"{file_name} already on disk, skipping download...") | ||
|
||
|
||
if __name__ == "__main__": | ||
start_date = datetime(2014, 12, 14) # Adjust start date as needed | ||
end_date = datetime.now() | ||
directory = "data/MIZ_Files" | ||
download_miz_files(start_date, end_date, directory) |