diff --git a/app/api/netcdfs.py b/app/api/netcdfs.py index 99a6895..ff8fed6 100644 --- a/app/api/netcdfs.py +++ b/app/api/netcdfs.py @@ -1,29 +1,61 @@ from fastapi import Header, APIRouter from fastapi.responses import FileResponse import uuid -from app.processing.netcdfs_processing import get_netcdf_from_point as get_netcdf +from app.processing.netcdfs_processing import get_netcdf_from_point as get_netcdf_point +from app.processing.netcdfs_processing import get_netcdf_from_area as get_netcdf_area +from app.processing.netcdfs_processing import get_netcdf_from_mask as get_netcdf_mask import json + router = APIRouter() -@router.get("/") +@router.get("/get_netcdf_from_point") def get_netcdf_from_point( - longitude: float, latitude: float, filepath: str, start_date: str, end_date: str + indicator_id : int, longitude: float, latitude: float, start_date = None, end_date = None ): """_summary_ Args: + indicator_id (int): Indicator of the netcdf file longitude (float): Longitude of the point latitude (float): Latitude of the point - filepath (str): Filepath of the netcdf file start_date (str): Start date of the data end_date (str): End date of the data Returns: _type_: Netcdf file """ - ds = get_netcdf(longitude, latitude, filepath, start_date, end_date) + ds = get_netcdf_point(longitude, latitude, indicator_id, start_date, end_date) + unique_filename = str(uuid.uuid4()) + ds.to_netcdf("/tmp/{0}.nc".format(unique_filename)) + return FileResponse( + "/tmp/{0}.nc".format(unique_filename), + media_type="application/x-netcdf", + filename="data.nc", + ) + + + +@router.get("/get_netcdf_from_area") +def get_netcdf_from_area( + longitude_min: float, longitude_max : float, latitude_min: float, latitude_max : float, indicator_id : int, start_date = None, end_date = None +): + """_summary_ + + Args: + longitude_min (float): Minimum longitude of the area + longitude_max (float): Maximum longitude of the area + latitude_min (float): Minimum latitude of the area + latitude_max (float): Maximum latitude of the area + indicator_id (int): Indicator of the netcdf file + start_date (str): Start date of the data + end_date (str): End date of the data + + Returns: + _type_: Netcdf file + """ + ds = get_netcdf_area(longitude_min, longitude_max, latitude_min, latitude_max, indicator_id, start_date, end_date) unique_filename = str(uuid.uuid4()) ds.to_netcdf("/tmp/{0}.nc".format(unique_filename)) return FileResponse( @@ -31,3 +63,29 @@ def get_netcdf_from_point( media_type="application/x-netcdf", filename="data.nc", ) + + + +@router.get("/get_netcdf_from_mask") +def get_netcdf_from_mask( + filepath_mask : str, indicator_id : int, row_ID = None, start_date = None, end_date = None +): + """_summary_ + + Args: + filepath_mask (str): Filepath or URL of the mask file + indicator_id (int): Indicator of the netcdf file + start_date (str): Start date of the data + end_date (str): End date of the data + row_ID (int): ID of the desired mask + Returns: + _type_: xarray dataset + """ + ds = get_netcdf_mask(filepath_mask, indicator_id, row_ID, start_date, end_date) + unique_filename = str(uuid.uuid4()) + ds.to_netcdf("/tmp/{0}.nc".format(unique_filename)) + return FileResponse( + "/tmp/{0}.nc".format(unique_filename), + media_type="application/x-netcdf", + filename="data.nc", + ) \ No newline at end of file diff --git a/app/processing/netcdfs_processing.py b/app/processing/netcdfs_processing.py index dfe0bb9..f150ea5 100644 --- a/app/processing/netcdfs_processing.py +++ b/app/processing/netcdfs_processing.py @@ -1,24 +1,143 @@ import os import xarray as xr +import geopandas +import rioxarray +from shapely.geometry import mapping +import fiona +import requests +import pandas as pd +import numpy as np -def get_netcdf_from_point(longitude: float, latitude: float, filepath: str, start_date: str, end_date: str): + +def get_filepath_from_indicator_id(indicator_id) : + request_id = requests.get("https://datahubdes.ihcantabria.com/v1/public/Products").json() + for i in range(0,468) : + if request_id[i]["id"] == indicator_id : + path = request_id[i]["urlBase"] + request_id[i]["urlXmlLatest"] + data = pd.read_xml(path) + ncdf_URL = data["urlPath"][1] + ncdf_URL = "https://ihthreddsdev.ihcantabria.com" + "/thredds/dodsC/" + ncdf_URL + print(ncdf_URL) + return ncdf_URL + +def get_netcdf_from_point(longitude: float, latitude: float, indicator_id : int, start_date: str, end_date: str): """_summary_ Args: longitude (float): Longitude of the point latitude (float): Latitude of the point - filepath (str): Filepath of the netcdf file + indicator_id (int): Indicator of the netcdf file start_date (str): Start date of the data end_date (str): End date of the data Returns: _type_: xarray dataset """ - files = os.listdir(filepath) - if len(files) >1: - ds = xr.open_mfdataset("{0}/*.nc".format(filepath), concat_dim="time", combine='nested', engine="netcdf4") - else: - ds = xr.open_dataset("{0}/{1}".format(filepath, files[0]), engine="netcdf4") - ds = ds.sel(time=slice(start_date, end_date)) + + try : + files = os.listdir(indicator_id) + if len(files) >1: + ds = xr.open_mfdataset("{0}*.nc".format(indicator_id), concat_dim="time", combine='nested', engine="netcdf4") + else: + ds = xr.open_dataset("{0}{1}".format(indicator_id, files[0]), engine="netcdf4") + except : + filepath = get_filepath_from_indicator_id(indicator_id) + ds = xr.open_dataset(filepath) + + try : + ds = ds.rename({"lat" : "latitude", "lon" : "longitude"}) + except: + pass + try : + ds = ds.sel(time=slice(start_date, end_date)) + except : + pass ds = ds.sel(longitude=longitude, latitude=latitude, method="nearest") - return ds \ No newline at end of file + return ds + + + +def get_netcdf_from_area(longitude_min: float, longitude_max : float, latitude_min: float, latitude_max : float, indicator_id: int, start_date: str, end_date: str): + """_summary_ + + Args: + longitude_min (float): Minimum longitude of the area + longitude_max (float): Maximum longitude of the area + latitude_min (float): Minimum latitude of the area + latitude_max (float): Maximum latitude of the area + indicator_id (int): Indicator of the netcdf file + start_date (str): Start date of the data + end_date (str): End date of the data + + Returns: + _type_: xarray dataset + """ + + try : + files = os.listdir(indicator_id) + if len(files) >1: + ds = xr.open_mfdataset("{0}*.nc".format(indicator_id), concat_dim="time", combine='nested', engine="netcdf4") + else: + ds = xr.open_dataset("{0}{1}".format(indicator_id, files[0]), engine="netcdf4") + except : + filepath = get_filepath_from_indicator_id(indicator_id) + ds = xr.open_dataset(filepath) + try : + ds = ds.rename({"lat" : "latitude", "lon" : "longitude"}) + except: + pass + try: + ds = ds.sel(time=slice(start_date, end_date)) + except: + pass + mask_lon = (ds.longitude >= longitude_min) & (ds.longitude <= longitude_max) + mask_lat = (ds.latitude >= latitude_min) & (ds.latitude <= latitude_max) + ds = ds.where(mask_lon & mask_lat, drop=True) + return ds + + + + +def get_netcdf_from_mask(filepath_mask : str, indicator_id : int, row_ID = None, start_date = None, end_date = None) : + """_summary_ + + Args: + filepath_mask (str): Filepath of the mask file + indicator_id (int): Indicator of the netcdf file + start_date (str): Start date of the data + end_date (str): End date of the data + row_ID (int): ID of the desired mask + Returns: + _type_: xarray dataset + """ + + try : + files = os.listdir(indicator_id) + if len(files) >1: + ds = xr.open_mfdataset("{0}*.nc".format(indicator_id), concat_dim="time", combine='nested', engine="netcdf4", drop_variables=["DATA_spatially_aggregated", "latitude_bounds", "longitude_bounds", "spatial_aggregation_region_id", "climatology_bounds"]) + else: + ds = xr.open_dataset("{0}{1}".format(indicator_id, files[0]), engine="netcdf4", drop_variables="DATA_spatially_aggregated") + except : + filepath_netcdf = get_filepath_from_indicator_id(indicator_id) + ds = xr.open_dataset(filepath_netcdf) + if row_ID != None : + mask = fiona.open(filepath_mask) + mask = geopandas.GeoDataFrame.from_features([mask[int(row_ID)]]) + else : + mask = geopandas.read_file(filepath_mask, crs="epsg:4326") + try : + ds = ds.rename({"lat" : "latitude", "lon" : "longitude"}) + except : + pass + print(mask) + ds.rio.set_spatial_dims(x_dim="longitude", y_dim="latitude", inplace=True) + ds.rio.write_crs("epsg:4326", inplace=True) + ds = ds.rio.clip(mask.geometry.apply(mapping), mask.crs, drop=False) + mask_lon = (ds.longitude >= float(mask["MIN_X"].iloc[0])) & (ds.longitude <= float(mask["MAX_X"].iloc[0])) + mask_lat = (ds.latitude > float(mask["MIN_Y"].iloc[0])) & (ds.latitude < float(mask["MAX_Y"].iloc[0])) + ds = ds.where(mask_lon & mask_lat, drop=True) + try: + ds = ds.sel(time=slice(start_date, end_date)) + except: + pass + return ds diff --git a/requirements.txt b/requirements.txt index df03353..7b0313c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,38 +1,124 @@ +affine==2.4.0 +annotated-types==0.6.0 anyio==3.6.2 +asciitree==0.3.3 attrs==22.2.0 +beautifulsoup4==4.12.3 black==22.12.0 +blinker==1.4 +certifi==2024.2.2 cftime==1.6.2 +charset-normalizer==3.3.2 click==8.1.3 +click-plugins==1.1.1 +cligj==0.7.2 cloudpickle==2.2.0 +command-not-found==0.3 +contourpy==1.2.1 coverage==7.0.5 +cryptography==3.4.8 +cycler==0.12.1 dask==2022.12.1 +dbus-python==1.2.18 +distro==1.7.0 +distro-info==1.1+ubuntu0.2 +dnspython==2.6.1 +email_validator==2.1.1 +et-xmlfile==1.1.0 exceptiongroup==1.1.0 fastapi==0.89.1 +fastapi-cli==0.0.3 +fasteners==0.19 +fiona==1.9.6 +fonttools==4.51.0 fsspec==2022.11.0 +geopandas==0.14.3 h11==0.14.0 +httpcore==1.0.5 +httplib2==0.20.2 +httptools==0.6.1 +httpx==0.27.0 idna==3.4 +importlib-metadata==4.6.4 iniconfig==2.0.0 +jeepney==0.7.1 +Jinja2==3.1.4 +keyring==23.5.0 +kiwisolver==1.4.5 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 locket==1.0.0 +lxml==5.2.2 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +matplotlib==3.8.4 +mdurl==0.1.2 +MetPy==1.6.2 +more-itertools==8.10.0 mypy-extensions==0.4.3 -netCDF4==1.6.2 +netCDF4==1.6.5 +netifaces==0.11.0 +numcodecs==0.12.1 numpy==1.24.1 -packaging==23.0 -pandas==1.5.2 +oauthlib==3.2.0 +openpyxl==3.1.2 +orjson==3.10.3 +packaging==24.0 +pandas==2.2.2 partd==1.3.0 pathspec==0.10.3 +pillow==10.3.0 +Pint==0.23 platformdirs==2.6.2 pluggy==1.0.0 +pooch==1.8.1 pydantic==1.10.4 +pydantic_core==2.18.2 +Pygments==2.18.0 +PyGObject==3.42.1 +PyJWT==2.3.0 +pyparsing==2.4.7 +pyproj==3.6.1 +pyshp==2.3.1 pytest==7.2.1 pytest-cov==4.0.0 +python-apt==2.4.0+ubuntu3 python-dateutil==2.8.2 +python-dotenv==1.0.1 +python-multipart==0.0.9 pytz==2022.7 PyYAML==6.0 +rasterio==1.3.9 +requests==2.31.0 +rich==13.7.1 +rioxarray==0.15.3 +scipy==1.13.0 +SecretStorage==3.3.1 +shapely==2.0.3 +shellingham==1.5.4 six==1.16.0 sniffio==1.3.0 +snuggs==1.4.7 +soupsieve==2.5 starlette==0.22.0 +systemd-python==234 tomli==2.0.1 toolz==0.12.0 +traitlets==5.14.3 +typer==0.12.3 typing_extensions==4.4.0 +tzdata==2024.1 +ubuntu-pro-client==8001 +ufw==0.36.1 +ujson==5.9.0 +unattended-upgrades==0.1 +urllib3==2.2.1 uvicorn==0.20.0 -xarray==2022.12.0 +uvloop==0.19.0 +wadllib==1.3.6 +watchfiles==0.21.0 +websockets==12.0 +xarray==2024.5.0 +zarr==2.17.2 +zipp==1.0.0 diff --git a/tests/test_get_api.py b/tests/test_get_api.py index 639fead..33faa4f 100644 --- a/tests/test_get_api.py +++ b/tests/test_get_api.py @@ -2,13 +2,13 @@ longitude = -8.33 latitude = 43.46 -filepath = "tests/data/get_data_from_point_single_file" +indicator_id = "tests/data/get_data_from_point_single_file/" start_date = "2023-01-08" end_date = "2023-01-11" def test_get_netcdf_from_point(): response = app.api.netcdfs.get_netcdf_from_point( - longitude, latitude, filepath, start_date, end_date + indicator_id, longitude, latitude, start_date, end_date ) assert response.status_code == 200 assert response.headers["Content-Type"] == "application/x-netcdf" diff --git a/tests/test_get_netcdf_from_point.py b/tests/test_get_netcdf_from_point.py index 6356a20..b28b265 100644 --- a/tests/test_get_netcdf_from_point.py +++ b/tests/test_get_netcdf_from_point.py @@ -1,6 +1,7 @@ # create test for get_data_from_point -from app.processing.netcdfs_processing import get_netcdf_from_point +from app.processing.netcdfs_processing import get_netcdf_from_point, get_netcdf_from_area, get_netcdf_from_mask import pytest +import numpy as np longitude = -8.33 latitude = 43.46 @@ -8,9 +9,9 @@ end_date = "2023-01-11" def test_get_netcdf_from_point_single_file(): - filepath = "tests/data/get_data_from_point_single_file" + indicator_id = "tests/data/get_data_from_point_single_file/" ds = get_netcdf_from_point( - longitude, latitude, filepath, start_date, end_date + longitude, latitude, indicator_id, start_date, end_date ) assert ds.dims["time"] == 95 assert ds["sea_surface_height_above_sea_level"].values[0] == pytest.approx(-327 * 0.001) @@ -20,15 +21,43 @@ def test_get_netcdf_from_point_single_file(): assert ds["sea_surface_height_above_sea_level"].values[94] == pytest.approx(-1355 * 0.001) assert ds["sea_water_salinity"].values[94] == pytest.approx(10521 * 0.001 + 20) -def test_get_netcdf_from_point_multiple_files(): - filepath = "tests/data/get_data_from_point_multiple_files" - ds = get_netcdf_from_point( - longitude, latitude, filepath, start_date, end_date + + +def test_get_netcdf_from_area(): + indicator_id = "tests/data/get_data_from_point_single_file/" + longitude_min = -8.267496 + longitude_max = -8.258834 + latitude_min = 43.44947052 + latitude_max = 43.46964645 + ds = get_netcdf_from_area( + longitude_min, longitude_max, latitude_min, latitude_max, indicator_id, start_date, end_date ) assert ds.dims["time"] == 95 - assert ds["sea_surface_height_above_sea_level"].values[0] == pytest.approx(-327 * 0.001) - assert ds["sea_water_salinity"].values[0] == pytest.approx(10701 * 0.001 + 20) - assert ds["sea_surface_height_above_sea_level"].values[8] == pytest.approx(-842 * 0.001) - assert ds["sea_water_salinity"].values[8] == pytest.approx(10653 * 0.001 + 20) - assert ds["sea_surface_height_above_sea_level"].values[94] == pytest.approx(-1355 * 0.001) - assert ds["sea_water_salinity"].values[94] == pytest.approx(10521 * 0.001 + 20) + assert ds.dims["longitude"] == 11 + assert ds.dims["latitude"] == 33 + assert np.nanmean(ds.variables["sea_surface_height_above_sea_level"]) == pytest.approx(-0.29108825) + assert np.nanmean(ds.variables["eastward_sea_barotropic_velocity"]) == pytest.approx(0.0016770974) + assert np.nanmean(ds.variables["eastward_sea_water_velocity"]) == pytest.approx(0.024458151) + assert np.nanmean(ds.variables["northward_sea_barotropic_velocity"]) == pytest.approx(-0.008823973) + assert np.nanmean(ds.variables["northward_sea_water_velocity"]) == pytest.approx(-0.0089076795) + assert np.nanmean(ds.variables["sea_water_potential_temperature"]) == pytest.approx(11.782998) + assert np.nanmean(ds.variables["sea_water_salinity"]) == pytest.approx(30.22909) + + + +def test_get_netcdf_from_mask(): + indicator_id = "/home/pablo807/workspace/NetcdfClimatico/PercentilesPorPaises/Data/Ncdf_test_mask/" + filepath_mask = "/home/pablo807/workspace/NetcdfClimatico/PercentilesPorPaises/Data/MasksCountries/WB_GAD_ADM0.shp" + row_ID = 61 + start_date = "1950/01/01" + end_date = "2050/01/01" + ds = get_netcdf_from_mask( + filepath_mask, indicator_id, row_ID, start_date, end_date + ) + assert ds.dims["longitude"] == 23 + assert ds.dims["latitude"] == 16 + assert ds.dims["time"] == 1 + assert np.nanmean(ds.variables["climatology_cdsrperiodmean_period"]) == pytest.approx(1.4977748) + + +