From de15147955040649621f6a848f6c3c7d156865e4 Mon Sep 17 00:00:00 2001 From: Eric Simley Date: Mon, 6 May 2024 13:04:45 -0600 Subject: [PATCH 1/5] adding merra2 hourly downloader --- openoa/utils/downloader.py | 163 +++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) diff --git a/openoa/utils/downloader.py b/openoa/utils/downloader.py index 1817b9df..2f35c7f8 100644 --- a/openoa/utils/downloader.py +++ b/openoa/utils/downloader.py @@ -482,3 +482,166 @@ def get_merra2_monthly( df.to_csv(save_pathname / f"{save_filename}.csv", index=True) return df + + +def get_merra2_hourly( + lat: float, + lon: float, + save_pathname: str | Path, + save_filename: str, + start_date: str = "2000-01", + end_date: str = None, +) -> pd.DataFrame: + """ + Get MERRA2 data directly from the NASA GES DISC service, which requires registration on the + GES DISC service. See: https://disc.gsfc.nasa.gov/data-access#python-requests. + + This function returns hourly MERRA2 data from the "M2T1NXSLV" dataset. See further details + regarding the dataset at: https://disc.gsfc.nasa.gov/datasets/M2T1NXSLV_5.12.4/summary. + U and V components of wind speed at 50 m, temperature at 2 m, and surface pressure are + downloaded here. + + As well as returning the data as a dataframe, the data is also saved as monthly NetCDF files + and a csv file with the concatenated data. These are located in the "save_pathname" directory, + with "save_filename" prefix. This allows future loading without download from the CDS service. + + Args: + lat(:obj:`float`): Latitude in WGS 84 spatial reference system (decimal degrees). + lon(:obj:`float`): Longitude in WGS 84 spatial reference system (decimal degrees). + save_pathname(:obj:`str` | :obj:`Path`): The path where the downloaded reanalysis data will + be saved. + save_filename(:obj:`str`): The file name used to save the downloaded reanalysis data. + start_date(:obj:`str`): The starting year and month that data is downloaded for. This + should be provided as a string in the format "YYYY-MM". Defaults to "2000-01". + end_date(:obj:`str`): The final year and month that data is downloaded for. This should be + provided as a string in the format "YYYY-MM". Defaults to current year and most recent + month. + + Returns: + df(:obj:`dataframe`): A dataframe containing time series of the requested reanalysis + variables: + 1. windspeed_ms: the surface wind speed in m/s. + 2. temperature_K: surface air temperature in Kelvin. + 3. surf_pres_Pa: surface pressure in Pascals. + + Raises: + ValueError: If the start_year is greater than the end_year. + """ + + logger.info("Please note access to MERRA2 data requires registration") + logger.info("Please see: https://disc.gsfc.nasa.gov/data-access#python-requests") + + # base url containing the monthly data set M2T1NXSLV + base_url = r"https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2T1NXSLV.5.12.4/" + + # create save_pathname if it does not exist + save_pathname = Path(save_pathname).resolve() + if not save_pathname.exists(): + save_pathname.mkdir() + + # get the current date + now = datetime.datetime.now() + + # assign end_year to current year if not provided by the user + if end_date is None: + end_date = f"{now.year}-{now.month:02}" + + # convert dates to datetime objects + start_date = datetime.datetime.strptime(start_date, "%Y-%m") + end_date = datetime.datetime.strptime(end_date, "%Y-%m") + + # check that the start and end dates are the right way around + if start_date > end_date: + logger.error("The start_date should be less than or equal to the end_date") + logger.error(f"start_date = {start_date.date()}, end_date = {end_date.date()}") + raise ValueError("The start_date should be less than or equal to the end_date") + + # list all years that will be downloaded + years = list(range(start_date.year, end_date.year + 1, 1)) + + # download the data + for year in years: + # Limit to up to 2 months ago to ensure data are available + # TODO: Can we update to check for most recent data if available? + if year == now.year: + months = list(range(1, now.month - 1, 1)) + else: + months = list(range(1, 12 + 1, 1)) + + for month in months: + # get the file names from the GES DISC site for the year + result = requests.get(base_url + str(year) + "/%02d" % month) + files = re.findall(r"(>MERRA2_\S+.nc4)", result.text) + files = list(dict.fromkeys(files)) + files = [x[1:] for x in files] + + # coordinate indexes + lat_i = "" + lon_i = "" + + # download each of the files and save them + for f in files: + outfile = save_pathname / f"{save_filename}_{f.split('.')[-2]}.nc" + + if not outfile.is_file(): + # download one file for determining coordinate indicies + if lat_i == "": + url = ( + f"{base_url}{year}/{month:02d}//{f}" + + r".nc4?PS,U50M,V50M,T2M,time,lat,lon" + ) + download_file(url, outfile) + ds_nc = xr.open_dataset(outfile) + ds_nc_idx = ds_nc.assign_coords( + lon_idx=("lon", range(ds_nc.dims["lon"])), + lat_idx=("lat", range(ds_nc.dims["lat"])), + ) + sel = ds_nc_idx.sel(lat=lat, lon=lon, method="nearest") + lon_i = f"[{sel.lon_idx.values-1}:{sel.lon_idx.values+1}]" + lat_i = f"[{sel.lat_idx.values-1}:{sel.lat_idx.values+1}]" + ds_nc.close() + outfile.unlink() + + # download file with specified coordinates + url = ( + f"{base_url}{year}/{month:02d}//{f}" + r".nc4?PS[0:23]" + f"{lat_i}{lon_i}" + f",U50M[0:23]{lat_i}{lon_i}" + f",V50M[0:23]{lat_i}{lon_i}" + f",T2M[0:23]{lat_i}{lon_i}" + f",time,lat{lat_i},lon{lon_i}" + ) + + download_file(url, outfile) + + # get the saved data + ds_nc = xr.open_mfdataset(f"{save_pathname / f'{save_filename}*.nc'}") + + # rename variables to conform with OpenOA + ds_nc = ds_nc.rename_vars( + {"U50M": "u_ms", "V50M": "v_ms", "T2M": "temperature_K", "PS": "surf_pres_Pa"} + ) + + # select the central node only for now + sel = ds_nc.sel(lat=lat, lon=lon, method="nearest") + + # convert to a pandas dataframe + df = sel.to_dataframe() + + # select required columns + df = df[["u_ms", "v_ms", "temperature_K", "surf_pres_Pa"]] + + # rename the index to match other datasets + df.index.name = "datetime" + + # drop any empty rows + df = df.dropna() + + # crop time series to only the selected time period + df = df.loc[start_date:end_date] + + # save to csv for easy loading as required + df.to_csv(save_pathname / f"{save_filename}.csv", index=True) + + return df From 9e248a3294a77be56b766f8d4cbba2ab91e9e3dd Mon Sep 17 00:00:00 2001 From: Eric Simley Date: Tue, 7 May 2024 06:28:14 -0600 Subject: [PATCH 2/5] adding era5 hourly downloader --- openoa/utils/downloader.py | 257 ++++++++++++++++++++++++++++++++++++- 1 file changed, 254 insertions(+), 3 deletions(-) diff --git a/openoa/utils/downloader.py b/openoa/utils/downloader.py index 2f35c7f8..f0ec0614 100644 --- a/openoa/utils/downloader.py +++ b/openoa/utils/downloader.py @@ -37,12 +37,14 @@ from pathlib import Path from zipfile import ZipFile +import numpy as np import cdsapi import pandas as pd import xarray as xr import requests from tqdm import tqdm +from openoa.utils import met_data_processing as met from openoa.logging import logging @@ -333,6 +335,244 @@ def get_era5_monthly( return df +def get_era5_hourly( + lat: float, + lon: float, + save_pathname: str | Path, + save_filename: str, + start_date: str = "2000-01", + end_date: str = None, + calc_derived_vars: bool = False, +) -> pd.DataFrame: + """ + Get ERA5 data directly from the CDS service. This requires registration on the CDS service. + See registration details at: https://cds.climate.copernicus.eu/api-how-to + + This function returns hourly ERA5 data from the "ERA5 monthly averaged data on single levels + from 1959 to present" dataset. See further details regarding the dataset at: + https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels-monthly-means. + U and V components of wind speed at 100 m, temperature at 2 m, and surface pressure are + downloaded here. + + As well as returning the data as a dataframe, the data is also saved as monthly NetCDF files and + a csv file with the concatenated data. These are located in the "save_pathname" directory, with + "save_filename" prefix. This allows future loading without download from the CDS service. + + Args: + lat(:obj:`float`): Latitude in WGS 84 spatial reference system (decimal degrees). + lon(:obj:`float`): Longitude in WGS 84 spatial reference system (decimal degrees). + save_pathname(:obj:`str` | :obj:`Path`): The path where the downloaded reanalysis data will + be saved. + save_filename(:obj:`str`): The file name used to save the downloaded reanalysis data. + start_date(:obj:`str`): The starting year and month that data is downloaded for. This + should be provided as a string in the format "YYYY-MM". Defaults to "2000-01". + end_date(:obj:`str`): The final year and month that data is downloaded for. This should be + provided as a string in the format "YYYY-MM". Defaults to current year and most recent + month with full data, accounting for the fact that the ERA5 monthly dataset is released + around the the 6th of the month. + calc_derived_vars (:obj:`bool`, optional): Boolean that specifies whether wind speed, wind + direction, and air density are computed from the downloaded reanalysis variables and + saved. Defaults to False. + + + Returns: + df(:obj:`dataframe`): A dataframe containing time series of the requested reanalysis + variables: + 1. u_ms: the U component of wind speed at a height of 100 m in m/s. + 2. v_ms: the V component of wind speed at a height of 100 m in m/s. + 3. temperature_K: air temperature at a height of 2 m in Kelvin. + 4. surf_pres_Pa: surface pressure in Pascals. + + Raises: + ValueError: If the start_date is greater than the end_date. + Exception: If unable to connect to the cdsapi client. + """ + + logger.info("Please note access to ERA5 data requires registration") + logger.info("Please see: https://cds.climate.copernicus.eu/api-how-to") + + # set up cds-api client + try: + c = cdsapi.Client() + except Exception as e: + logger.error("Failed to make connection to cds") + logger.error("Please see https://cds.climate.copernicus.eu/api-how-to for help") + logger.error(e) + raise + + # create save_pathname if it does not exist + save_pathname = Path(save_pathname).resolve() + if not save_pathname.exists(): + save_pathname.mkdir() + + # get the current date + now = datetime.datetime.now() + + # assign end_year to current year if not provided by the user + if end_date is None: + end_date = f"{now.year}-{now.month:02}" + + # convert dates to datetime objects + start_date = datetime.datetime.strptime(start_date, "%Y-%m") + end_date = datetime.datetime.strptime(end_date, "%Y-%m") + + # check that the start and end dates are the right way around + if start_date > end_date: + logger.error("The start_date should be less than or equal to the end_date") + logger.error(f"start_date = {start_date.date()}, end_date = {end_date.date()}") + raise ValueError("The start_date should be less than or equal to the end_date") + + # list all years that will be downloaded + years = list(range(start_date.year, end_date.year + 1, 1)) + + # get the data for the closest 9 nodes to the coordinates + node_spacing = 0.250500001 * 1 + + # See: https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels?tab=form + # for formulating other requests from cds + cds_dataset = "reanalysis-era5-single-levels" + cds_request = { + "product_type": "reanalysis", + "format": "netcdf", + "variable": [ + "100m_u_component_of_wind", + "100m_v_component_of_wind", + "2m_temperature", + "surface_pressure", + ], + "year": None, + "month": None, + "day": [ + "01", + "02", + "03", + "04", + "05", + "06", + "07", + "08", + "09", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + ], + "time": [ + "00:00", + "01:00", + "02:00", + "03:00", + "04:00", + "05:00", + "06:00", + "07:00", + "08:00", + "09:00", + "10:00", + "11:00", + "12:00", + "13:00", + "14:00", + "15:00", + "16:00", + "17:00", + "18:00", + "19:00", + "20:00", + "21:00", + "22:00", + "23:00", + ], + "product_type": "reanalysis", + "area": [ + lat + node_spacing, + lon - node_spacing, + lat - node_spacing, + lon + node_spacing, + ], + } + + # download the data + for year in years: + outfile = save_pathname / f"{save_filename}_{year}.nc" + + if year == now.year: + # Limit to up to 2 months ago to ensure data are available + # TODO: Is this needed? + months = list(range(1, now.month - 1, 1)) + else: + months = list(range(1, 12 + 1, 1)) + + if not outfile.is_file(): + logger.info(f"Downloading ERA5: {outfile}") + + try: + cds_request.update({"year": year, "month": months}) + c.retrieve(cds_dataset, cds_request, outfile) + + except Exception as e: + logger.error(f"Failed to download ERA5: {outfile}") + logger.error(e) + + # get the saved data + ds_nc = xr.open_mfdataset(f"{save_pathname / f'{save_filename}*.nc'}") + + # rename variables to conform with OpenOA + ds_nc = ds_nc.rename_vars( + {"u100": "u_ms", "v100": "v_ms", "t2m": "temperature_K", "sp": "surf_pres_Pa"} + ) + + # select the central node only for now + if "expver" in ds_nc.dims: + sel = ds_nc.sel(expver=1, latitude=lat, longitude=lon, method="nearest") + else: + sel = ds_nc.sel(latitude=lat, longitude=lon, method="nearest") + + # convert to a pandas dataframe + df = sel.to_dataframe() + + # select required columns + df = df[["u_ms", "v_ms", "temperature_K", "surf_pres_Pa"]] + + # compute derived variables if requested + if calc_derived_vars: + df["windspeed_ms"] = np.sqrt(df["u_ms"] ** 2 + df["v_ms"] ** 2) + df["winddirection_deg"] = met.compute_wind_direction(df["u_ms"], df["v_ms"]).values + df["rho_kgm-3"] = met.compute_air_density(df["temperature_K"], df["surf_pres_Pa"]) + + # rename the index to match other datasets + df.index.name = "datetime" + + # drop any empty rows + df = df.dropna() + + # crop time series to only the selected time period + df = df.loc[start_date:end_date] + + # save to csv for easy loading as required + df.to_csv(save_pathname / f"{save_filename}.csv", index=True) + + return df + + def get_merra2_monthly( lat: float, lon: float, @@ -491,6 +731,7 @@ def get_merra2_hourly( save_filename: str, start_date: str = "2000-01", end_date: str = None, + calc_derived_vars: bool = False, ) -> pd.DataFrame: """ Get MERRA2 data directly from the NASA GES DISC service, which requires registration on the @@ -516,13 +757,17 @@ def get_merra2_hourly( end_date(:obj:`str`): The final year and month that data is downloaded for. This should be provided as a string in the format "YYYY-MM". Defaults to current year and most recent month. + calc_derived_vars (:obj:`bool`, optional): Boolean that specifies whether wind speed, wind + direction, and air density are computed from the downloaded reanalysis variables and + saved. Defaults to False. Returns: df(:obj:`dataframe`): A dataframe containing time series of the requested reanalysis variables: - 1. windspeed_ms: the surface wind speed in m/s. - 2. temperature_K: surface air temperature in Kelvin. - 3. surf_pres_Pa: surface pressure in Pascals. + 1. u_ms: the U component of wind speed at a height of 50 m in m/s. + 2. v_ms: the V component of wind speed at a height of 50 m in m/s. + 3. temperature_K: air temperature at a height of 2 m in Kelvin. + 4. surf_pres_Pa: surface pressure in Pascals. Raises: ValueError: If the start_year is greater than the end_year. @@ -632,6 +877,12 @@ def get_merra2_hourly( # select required columns df = df[["u_ms", "v_ms", "temperature_K", "surf_pres_Pa"]] + # compute derived variables if requested + if calc_derived_vars: + df["windspeed_ms"] = np.sqrt(df["u_ms"] ** 2 + df["v_ms"] ** 2) + df["winddirection_deg"] = met.compute_wind_direction(df["u_ms"], df["v_ms"]).values + df["rho_kgm-3"] = met.compute_air_density(df["temperature_K"], df["surf_pres_Pa"]) + # rename the index to match other datasets df.index.name = "datetime" From 4fa9adb719350ff3808d7e8c6aebf1608c2a4c0f Mon Sep 17 00:00:00 2001 From: Eric Simley Date: Tue, 4 Jun 2024 07:10:27 -0600 Subject: [PATCH 3/5] removing 2 month lag in requested reanalysis data to download --- openoa/utils/downloader.py | 61 ++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/openoa/utils/downloader.py b/openoa/utils/downloader.py index f0ec0614..2709eb62 100644 --- a/openoa/utils/downloader.py +++ b/openoa/utils/downloader.py @@ -364,12 +364,11 @@ def get_era5_hourly( save_pathname(:obj:`str` | :obj:`Path`): The path where the downloaded reanalysis data will be saved. save_filename(:obj:`str`): The file name used to save the downloaded reanalysis data. - start_date(:obj:`str`): The starting year and month that data is downloaded for. This - should be provided as a string in the format "YYYY-MM". Defaults to "2000-01". - end_date(:obj:`str`): The final year and month that data is downloaded for. This should be - provided as a string in the format "YYYY-MM". Defaults to current year and most recent - month with full data, accounting for the fact that the ERA5 monthly dataset is released - around the the 6th of the month. + start_date(:obj:`str`): The starting year, month, and day that data is downloaded for. This + should be provided as a string in the format "YYYY-MM-DD". Defaults to "2000-01-01". + end_date(:obj:`str`): The final year, month, and day that data is downloaded for. This should be + provided as a string in the format "YYYY-MM-DD". Defaults to current date. Note that data + may not be available yet for the most recent couple months. calc_derived_vars (:obj:`bool`, optional): Boolean that specifies whether wind speed, wind direction, and air density are computed from the downloaded reanalysis variables and saved. Defaults to False. @@ -410,11 +409,12 @@ def get_era5_hourly( # assign end_year to current year if not provided by the user if end_date is None: - end_date = f"{now.year}-{now.month:02}" + end_date = f"{now.year}-{now.month:02}-{now.day:02}" # convert dates to datetime objects - start_date = datetime.datetime.strptime(start_date, "%Y-%m") - end_date = datetime.datetime.strptime(end_date, "%Y-%m") + start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d") + end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d") + end_date += datetime.timedelta(hours=23, minutes=59) # include all times in last day # check that the start and end dates are the right way around if start_date > end_date: @@ -514,10 +514,14 @@ def get_era5_hourly( for year in years: outfile = save_pathname / f"{save_filename}_{year}.nc" - if year == now.year: - # Limit to up to 2 months ago to ensure data are available - # TODO: Is this needed? - months = list(range(1, now.month - 1, 1)) + # limit to months of interest + if year == start_date.year: + if year == end_date.year: + months = list(range(start_date.month, end_date.month + 1, 1)) + else: + months = list(range(start_date.month, 12 + 1, 1)) + elif year == end_date.year: + months = list(range(1, end_date.month + 1, 1)) else: months = list(range(1, 12 + 1, 1)) @@ -752,11 +756,11 @@ def get_merra2_hourly( save_pathname(:obj:`str` | :obj:`Path`): The path where the downloaded reanalysis data will be saved. save_filename(:obj:`str`): The file name used to save the downloaded reanalysis data. - start_date(:obj:`str`): The starting year and month that data is downloaded for. This - should be provided as a string in the format "YYYY-MM". Defaults to "2000-01". - end_date(:obj:`str`): The final year and month that data is downloaded for. This should be - provided as a string in the format "YYYY-MM". Defaults to current year and most recent - month. + start_date(:obj:`str`): The starting year, month, and day that data is downloaded for. This + should be provided as a string in the format "YYYY-MM-DD". Defaults to "2000-01-01". + end_date(:obj:`str`): The final year, month, and day that data is downloaded for. This should be + provided as a string in the format "YYYY-MM-DD". Defaults to current date. Note that data + may not be available yet for the most recent couple months. calc_derived_vars (:obj:`bool`, optional): Boolean that specifies whether wind speed, wind direction, and air density are computed from the downloaded reanalysis variables and saved. Defaults to False. @@ -789,11 +793,12 @@ def get_merra2_hourly( # assign end_year to current year if not provided by the user if end_date is None: - end_date = f"{now.year}-{now.month:02}" + end_date = f"{now.year}-{now.month:02}-{now.day:02}" # convert dates to datetime objects - start_date = datetime.datetime.strptime(start_date, "%Y-%m") - end_date = datetime.datetime.strptime(end_date, "%Y-%m") + start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d") + end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d") + end_date += datetime.timedelta(hours=23, minutes=59) # include all times in last day # check that the start and end dates are the right way around if start_date > end_date: @@ -806,10 +811,14 @@ def get_merra2_hourly( # download the data for year in years: - # Limit to up to 2 months ago to ensure data are available - # TODO: Can we update to check for most recent data if available? - if year == now.year: - months = list(range(1, now.month - 1, 1)) + # limit to months of interest + if year == start_date.year: + if year == end_date.year: + months = list(range(start_date.month, end_date.month + 1, 1)) + else: + months = list(range(start_date.month, 12 + 1, 1)) + elif year == end_date.year: + months = list(range(1, end_date.month + 1, 1)) else: months = list(range(1, 12 + 1, 1)) @@ -829,7 +838,7 @@ def get_merra2_hourly( outfile = save_pathname / f"{save_filename}_{f.split('.')[-2]}.nc" if not outfile.is_file(): - # download one file for determining coordinate indicies + # download one file for determining coordinate indices if lat_i == "": url = ( f"{base_url}{year}/{month:02d}//{f}" From 21d5521de23a2af7225c72bbc46ba47781c7959e Mon Sep 17 00:00:00 2001 From: Eric Simley Date: Tue, 4 Jun 2024 07:19:13 -0600 Subject: [PATCH 4/5] updating downloader documenentation --- openoa/utils/downloader.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/openoa/utils/downloader.py b/openoa/utils/downloader.py index 2709eb62..659fdca8 100644 --- a/openoa/utils/downloader.py +++ b/openoa/utils/downloader.py @@ -3,14 +3,14 @@ This module provides functions for downloading data, including long-term historical atmospheric data from the MERRA2 and ERA5 reanalysis products and returning as pandas DataFrames and saving -data in csv files. Currently by default the module downloads monthly reanalysis data for a time -period of interest using NASA Goddard Earth Sciences Data and Information Services Center -(GES DISC) for MERRA2 and the Copernicus Climate Data Store (CDS) API for ERA5, but this can be -modified to get hourly data, and indeed other data sources available on GES DISC and CDS. - -To use this module to download data users must first create user accounts. Instructions can be -found at https://disc.gsfc.nasa.gov/data-access#python-requests and -https://cds.climate.copernicus.eu/api-how-to +data in csv files. The module contains functions for downloading either monthly or hourly +reanalysis data for a time period of interest using NASA Goddard Earth Sciences Data and Information +Services Center (GES DISC) for MERRA2 and the Copernicus Climate Data Store (CDS) API for ERA5. +These functions could be modified to get other data sources available on GES DISC and CDS if desired. + +To use this module to download data users must first create user accounts and save user credential +files locally. Instructions can be found at https://disc.gsfc.nasa.gov/data-access#python-requests +and https://cds.climate.copernicus.eu/api-how-to In addition you can download data directly from these source: From 6939e3bb5fb9bac6681e6fef92298b243932e128 Mon Sep 17 00:00:00 2001 From: Eric Simley Date: Tue, 4 Jun 2024 09:49:40 -0600 Subject: [PATCH 5/5] adding downloader to sphinx documentation --- openoa/utils/downloader.py | 23 ++++++++++++----------- sphinx/api/utils.rst | 10 ++++++++++ 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/openoa/utils/downloader.py b/openoa/utils/downloader.py index 659fdca8..ef4448bb 100644 --- a/openoa/utils/downloader.py +++ b/openoa/utils/downloader.py @@ -1,18 +1,19 @@ """ -This module provides functions for downloading files, including reanalysis data +This module provides functions for downloading files, including arbitrary files, files from Zenodo, +and reanalysis data. -This module provides functions for downloading data, including long-term historical atmospheric -data from the MERRA2 and ERA5 reanalysis products and returning as pandas DataFrames and saving -data in csv files. The module contains functions for downloading either monthly or hourly -reanalysis data for a time period of interest using NASA Goddard Earth Sciences Data and Information -Services Center (GES DISC) for MERRA2 and the Copernicus Climate Data Store (CDS) API for ERA5. -These functions could be modified to get other data sources available on GES DISC and CDS if desired. +It contains functions for downloading long-term historical atmospheric data from the MERRA2 and +ERA5 reanalysis products and returning as pandas DataFrames and saving data in csv files. The +module contains functions for downloading either monthly or hourly reanalysis data for a time +period of interest using NASA Goddard Earth Sciences Data and Information Services Center (GES +DISC) for MERRA2 and the Copernicus Climate Data Store (CDS) API for ERA5. These functions could be +modified to get other data sources available on GES DISC and CDS if desired. -To use this module to download data users must first create user accounts and save user credential -files locally. Instructions can be found at https://disc.gsfc.nasa.gov/data-access#python-requests -and https://cds.climate.copernicus.eu/api-how-to +To use this module to download reanalysis data users must first create user accounts and save user +credential files locally. Instructions can be found at +https://disc.gsfc.nasa.gov/data-access#python-requests and https://cds.climate.copernicus.eu/api-how-to -In addition you can download data directly from these source: +In addition you can download reanalysis data directly from these source: * Hourly MERRA2 data can be downloaded directly from NASA GES DISC by selecting the "Subset / Get Data" link on the following webpage: diff --git a/sphinx/api/utils.rst b/sphinx/api/utils.rst index 1fb60813..07f1c003 100644 --- a/sphinx/api/utils.rst +++ b/sphinx/api/utils.rst @@ -7,6 +7,15 @@ The utils subpackage provides module-level methods that operate on Pandas `DataF `Series`. These modules and their methods are used throughout the OpenOA codebase, and can be imported and used individually into your own scripts. +Downloader +******* + +.. automodule:: openoa.utils.downloader + :members: + :undoc-members: + :show-inheritance: + + Quality Assurance ***************** @@ -15,6 +24,7 @@ Quality Assurance :undoc-members: :show-inheritance: + Filters *******