diff --git a/atlite/__init__.py b/atlite/__init__.py index 20099d30..2305fedc 100644 --- a/atlite/__init__.py +++ b/atlite/__init__.py @@ -35,7 +35,7 @@ match = re.match(r"(\d+\.\d+(\.\d+)?)", __version__) assert match, f"Could not determine release_version of pypsa: {__version__}" release_version = match.group(0) -assert not __version__.startswith("0.0"), "Could not determine version of atlite." +#assert not __version__.startswith("0.0"), "Could not determine version of atlite." __all__ = [ Cutout, diff --git a/atlite/data.py b/atlite/data.py index b027509e..aa84a2ba 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -225,6 +225,7 @@ def cutout_prepare( ds[v].encoding.update(compression) ds = cutout.data.merge(ds[missing_vars.values]).assign_attrs(**attrs) + ds.unify_chunks() # not strictly necessary, but should speed things up. # write data to tmp file, copy it to original data, this is much safer # than appending variables diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index 04e88015..07d24d84 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -21,6 +21,8 @@ from dask import compute, delayed from dask.array import arctan2, sqrt from numpy import atleast_1d +from dask.diagnostics import ProgressBar + from atlite.gis import maybe_swap_spatial_dims from atlite.pv.solar_position import SolarPosition @@ -71,11 +73,11 @@ def _add_height(ds): """ g0 = 9.80665 - z = ds["z"] + z = ds["geopotential"] if "time" in z.coords: z = z.isel(time=0, drop=True) ds["height"] = z / g0 - ds = ds.drop_vars("z") + ds = ds.drop_vars("geopotential") return ds @@ -118,19 +120,19 @@ def get_data_wind(retrieval_params): ds = _rename_and_clean_coords(ds) for h in [10, 100]: - ds[f"wnd{h}m"] = sqrt(ds[f"u{h}"] ** 2 + ds[f"v{h}"] ** 2).assign_attrs( - units=ds[f"u{h}"].attrs["units"], long_name=f"{h} metre wind speed" + ds[f"wnd{h}m"] = sqrt(ds[f"{h}m_u_component_of_wind"] ** 2 + ds[f"{h}m_v_component_of_wind"] ** 2).assign_attrs( + units=ds[f"{h}m_u_component_of_wind"].attrs["units"], long_name=f"{h} metre wind speed" ) ds["wnd_shear_exp"] = ( np.log(ds["wnd10m"] / ds["wnd100m"]) / np.log(10 / 100) ).assign_attrs(units="", long_name="wind shear exponent") # span the whole circle: 0 is north, π/2 is east, -π is south, 3π/2 is west - azimuth = arctan2(ds["u100"], ds["v100"]) + azimuth = arctan2(ds["100m_u_component_of_wind"], ds["100m_v_component_of_wind"]) ds["wnd_azimuth"] = azimuth.where(azimuth >= 0, azimuth + 2 * np.pi) - ds = ds.drop_vars(["u100", "v100", "u10", "v10", "wnd10m"]) - ds = ds.rename({"fsr": "roughness"}) + ds = ds.drop_vars(["10m_u_component_of_wind", "10m_v_component_of_wind", "100m_u_component_of_wind", "100m_v_component_of_wind", "wnd10m"]) + ds = ds.rename({"forecast_surface_roughness": "roughness"}) return ds @@ -159,7 +161,10 @@ def get_data_influx(retrieval_params): ds = _rename_and_clean_coords(ds) - ds = ds.rename({"fdir": "influx_direct", "tisr": "influx_toa"}) + ds = ds.rename({"total_sky_direct_solar_radiation_at_surface": "influx_direct" + , "toa_incident_solar_radiation": "influx_toa" + , "surface_solar_radiation_downwards": "ssrd" + , "surface_net_solar_radiation": "ssr"}) ds["albedo"] = ( ((ds["ssrd"] - ds["ssr"]) / ds["ssrd"].where(ds["ssrd"] != 0)) .fillna(0.0) @@ -215,9 +220,9 @@ def get_data_temperature(retrieval_params): ds = _rename_and_clean_coords(ds) ds = ds.rename( { - "t2m": "temperature", - "stl4": "soil temperature", - "d2m": "dewpoint temperature", + "2m_temperature": "temperature", + "soil_temperature_level_4": "soil temperature", + "2m_dewpoint_temperature": "dewpoint temperature", } ) @@ -231,7 +236,7 @@ def get_data_runoff(retrieval_params): ds = retrieve_data(variable=["runoff"], **retrieval_params) ds = _rename_and_clean_coords(ds) - ds = ds.rename({"ro": "runoff"}) + #ds = ds.rename({"ro": "runoff"}) return ds @@ -338,36 +343,84 @@ def retrieve_data(product, chunks=None, tmpdir=None, lock=None, **updates): If you want to track the state of your request go to https://cds-beta.climate.copernicus.eu/requests?tab=all """ + + # started creating a var map to simplify the changes. + variable_map = { + "geopotential": "z", + "10m_u_component_of_wind": "", + "100m_u_component_of_wind": "", + "10m_v_component_of_wind": "", + "100m_v_component_of_wind": "", + "forecast_surface_roughness": "", + "surface_net_solar_radiation": "", + "surface_solar_radiation_downwards": "", + "toa_incident_solar_radiation": "", + "total_sky_direct_solar_radiation_at_surface": "", + "runoff": "", + "2m_temperature": "", + "soil_temperature_level_4": "", + "2m_dewpoint_temperature": "", + } + request = {"product_type": "reanalysis", "format": "netcdf"} request.update(updates) + logger.info(f"request: {request}") + assert {"year", "month", "variable"}.issubset( request ), "Need to specify at least 'variable', 'year' and 'month'" - client = cdsapi.Client( - info_callback=logger.debug, debug=logging.DEBUG >= logging.root.level + data_arrays = {} + bucket = 'gs://gcp-public-data-arco-era5/ar/full_37-1h-0p25deg-chunk-1.zarr-v3/' + + ProgressBar().register() + + # Open the Zarr store as a dataset + ds = xr.open_zarr( + bucket, + chunks=chunks, # the chunks are not aligned, we deal with this later + consolidated=True, + storage_options=dict(token="anon"), + ) + + # Select specific variables of interest + variables = atleast_1d(request['variable']) # e.g., ['100m_u_component_of_wind', '100m_v_component_of_wind'] + ds_selected_vars = ds[variables] + + ds_selected_vars.unify_chunks() + + # Define the time range and spatial bounding box + # TODO multiple years if needed? + year = request['year'] + month_start = str(request['month'][0]).zfill(2) # Convert month to zero-padded format + month_end = str(request['month'][-1]).zfill(2) + day_start = str(request['day'][0]).zfill(2) + day_end = str(request['day'][-1]).zfill(2) + + # Create start and end time strings + time_start = f"{year}-{month_start}-{day_start}T00:00" + time_end = f"{year}-{month_end}-{day_end}T23:00" + + # Define the time range slice + time_range = slice(time_start, time_end) + logger.info("Time Range:") + logger.info(time_start) + logger.info(time_end) + + bbox = request['area'] + lat_lon_bbox = { + 'latitude': slice(bbox[0], bbox[2]), + 'longitude': slice(bbox[1], bbox[3]) + } + logger.info(f"lat_lon_bbox: {lat_lon_bbox}") + + # Apply selections + ds = ds_selected_vars.sel( + time=time_range, + latitude=lat_lon_bbox['latitude'], + longitude=lat_lon_bbox['longitude'] ) - result = client.retrieve(product, request) - - if lock is None: - lock = nullcontext() - - with lock: - fd, target = mkstemp(suffix=".nc", dir=tmpdir) - os.close(fd) - - # Inform user about data being downloaded as "* variable (year-month)" - timestr = f"{request['year']}-{request['month']}" - variables = atleast_1d(request["variable"]) - varstr = "\n\t".join([f"{v} ({timestr})" for v in variables]) - logger.info(f"CDS: Downloading variables\n\t{varstr}\n") - result.download(target) - - ds = xr.open_dataset(target, chunks=chunks or {}) - if tmpdir is None: - logger.debug(f"Adding finalizer for {target}") - weakref.finalize(ds._file_obj._manager, noisy_unlink, target) return ds diff --git a/vm_setup.sh b/vm_setup.sh new file mode 100644 index 00000000..9e4a92a8 --- /dev/null +++ b/vm_setup.sh @@ -0,0 +1,67 @@ + sudo apt update + 2 sudo apt install -y python3-pip git + 3 git clone https://github.com/carbondirect/cd_atlite + 4 cd cd_atlite/ + 5 ls + 6 pip3 install -r requirements.txt + 7 python3 -m venv venv + 8 apt install python3.11-venv + 9 sudo apt install python3.11-venv + 10 apt install python3.11-venv + 11 sudo apt install python3.11-venv + 12 python3 -m venv venv + 13 source venv/bin/activate + 14 pip3 install -r requirements.txt + 15 vi requirements.txt + 16 pip3 install -r requirements.txt + 17 rm requirements.txt + 18 ls + 19 curl -sSL https://install.python-poetry.org | python3 - + 20 poetry install + 21 poetry shell + 22 exit + 23 history + 24 poetry install + 25 cd cd_atlite/ + 26 poetry install + 27 ls + 28 pip install pip-tools + 29 source venv/bin/activate + 30 pip install pip-tools + 31 pip-compile pyproject.toml + 32 pip install -r requirements.txt + 33 python wind_cf.py + 34 vi wind_cf.py + 35 pip install -e . + 36 vi wind_cf.py + 37 python wind_cf.py + 38 git status + 39 PYTHONPATH=$(pwd) python wind_df + 40 PYTHONPATH=$(pwd) python wind_cf.py + 41 pwd + 42 pip install -e . + 43 history + 44 PYTHONPATH=$(pwd) python wind_cf.py + 45 exit + 46 cd cd_atlite/ + 47 ls + 48 source venv/bin/activate + 49 python wind_cf.py + 50 ls + 51 vi atlite/__init__.py + 52 python wind_cf.py + 53 pip install gcsfs + 54 python wind_cf.py + 55 lsblk + 56 sudo mkfs.ext4 -m 0 -F -E lazy_itable_init=0,lazy_journal_init=0,discard /dev/sda + 57 sudo mkdir -p /mnt/disks/era5-disk + 58 sudo mount -o discard,defaults /dev/sda /mnt/disks/era5-disk + 59 sudo nano /etc/fstab + 60 ls -al /mnt/disks/era5-disk/ + 61 ls + 62 export DASK_TEMPORARY_DIRECTORY="/mnt/disks/era5-disk/tmp" + 63 mkdir -p /mnt/disks/era5-disk/tmp + 64 sudo mkdir -p /mnt/disks/era5-disk/tmp + 65 chmod a+w /mnt/disks/era5-disk/tmp + 66 sudo chmod a+w /mnt/disks/era5-disk/tmp + 67 history diff --git a/wind_cf.py b/wind_cf.py new file mode 100644 index 00000000..e6a34cca --- /dev/null +++ b/wind_cf.py @@ -0,0 +1,77 @@ +import atlite +from math import cos, radians +import logging +import xarray as xr +from datetime import datetime + +logging.basicConfig(level=logging.INFO) + +limon_co_lat = 39.26719230055635 +limon_co_lon = -103.69300728924804 + +# much windier spot... +ne_wind_lat = 42.0 +ne_wind_lon = -102.7 + +center_lon = limon_co_lon +center_lat = limon_co_lat + +center_lat = ne_wind_lat +center_lon = ne_wind_lon + +# Define a roughly 30x30km box around the MISO coordinates +# 1 degree of latitude is approximately 111 km +# 1 degree of longitude varies, but at this latitude it's about 95 km +lat_offset = 100 / 111 / 2 # Half of 30km in degrees latitude +lon_offset = ( + 100 / (95 * cos(radians(center_lat))) / 2 +) # Half of 30km in degrees longitude + +min_lat = center_lat - lat_offset +max_lat = center_lat + lat_offset +min_lon = center_lon - lon_offset +max_lon = center_lon + lon_offset + +print(f"Bounding box coordinates:") +print(f"Min Latitude: {min_lat:.6f}") +print(f"Max Latitude: {max_lat:.6f}") +print(f"Min Longitude: {min_lon:.6f}") +print(f"Max Longitude: {max_lon:.6f}") + +cutout = atlite.Cutout( + # path="limon_co_wind.nc", + path="ne_wind.nc", + module="era5", + x=slice(min_lon + 180, max_lon + 180), # convert to 360 here, avoid differences between fetching & existing .nc paths + y=slice(min_lat, max_lat), + time="2023", + dt="h", +) +cutout.prepare(show_progress=True) + +# Print out the current time +current_time = datetime.now() +print(f"Post prepare time: {current_time}") + +cap_factors = cutout.wind( + turbine="NREL_ReferenceTurbine_2020ATB_5.5MW", capacity_factor_timeseries=True +) + +print(cap_factors) + +# Convert the DataArray to a DataFrame +df = cap_factors.to_dataframe().reset_index() + +# Preview the DataFrame +print(df.head()) + +# get just a single time series from the grid area +df_filtered = df[(df['lon'] == 76.75) & (df['lat'] == 41.75)] +print(df_filtered.head()) + +# Save the DataFrame to a CSV file +df_filtered.to_csv("capacity_factors.csv", index=False) + +# Print out the current time +current_time = datetime.now() +print(f"Post model time: {current_time}")