diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 463736ff..4f2005ac 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,7 +12,7 @@ jobs: runs-on: ${{ matrix.os }} defaults: run: - shell: bash -l {0} + shell: bash -leo pipefail {0} strategy: fail-fast: false matrix: @@ -23,9 +23,13 @@ jobs: - uses: actions/checkout@v4 - name: Setup Micromamba - uses: mamba-org/setup-micromamba@v1 + uses: mamba-org/setup-micromamba@v1.9.0 with: + # https://github.com/mamba-org/setup-micromamba/issues/227 + micromamba-version: 1.5.10-0 environment-file: ${{ matrix.env }} + cache-environment: true + cache-downloads: true - name: Test env: diff --git a/doc/source/conf.py b/doc/source/conf.py index 37b8703d..c56da87a 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -106,7 +106,7 @@ commit_id = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).strip().decode("ascii") nbsphinx_prolog = f""" .. image:: https://colab.research.google.com/assets/colab-badge.svg - :target: {{{{ "https://githubtocolab.com/wildlife-dynamics/ecoscope/blob/{commit_id}/doc/source/" + env.docname|urlencode + ".ipynb" }}}} + :target: {{{{ "https://colab.research.google.com/github/wildlife-dynamics/ecoscope/blob/{commit_id}/doc/source/" + env.docname|urlencode + ".ipynb" }}}} ---- """ # noqa diff --git a/doc/source/notebooks/01. IO/EarthRanger_IO.ipynb b/doc/source/notebooks/01. IO/EarthRanger_IO.ipynb index f5e2dd6e..92f6f311 100644 --- a/doc/source/notebooks/01. IO/EarthRanger_IO.ipynb +++ b/doc/source/notebooks/01. IO/EarthRanger_IO.ipynb @@ -29,7 +29,7 @@ }, "outputs": [], "source": [ - "%pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.3' &> /dev/null" + "%pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.5' &> /dev/null" ] }, { @@ -614,7 +614,11 @@ "metadata": {}, "outputs": [], "source": [ - "patrol_df = er_io.get_patrols()\n", + "patrol_df = er_io.get_patrols(\n", + " since=pd.Timestamp(\"2017-01-01\").isoformat(),\n", + " until=pd.Timestamp(\"2017-04-01\").isoformat(),\n", + ")\n", + "\n", "\n", "relocs = er_io.get_patrol_observations(\n", " patrol_df,\n", @@ -709,8 +713,8 @@ ")\n", "\n", "if not elephants.empty:\n", - " for i, value in elephants.iterrows():\n", - " er_io.delete_observation(observation_id=elephants.loc[i, \"extra__id\"])" + " for observation_id in elephants[\"extra__id\"].unique():\n", + " er_io.delete_observation(observation_id)" ] }, { @@ -863,11 +867,7 @@ "metadata": {}, "outputs": [], "source": [ - "relocs.drop(\n", - " columns=relocs.columns[relocs.applymap(lambda x: isinstance(x, list)).any()],\n", - " errors=\"ignore\",\n", - " inplace=True,\n", - ")\n", + "relocs = relocs.select_dtypes(exclude=[list])\n", "\n", "relocs.to_file(os.path.join(output_dir, \"observations.gpkg\"), layer=\"observations\")" ] diff --git a/doc/source/notebooks/01. IO/GEE_IO.ipynb b/doc/source/notebooks/01. IO/GEE_IO.ipynb index 205e28b9..73d01f72 100644 --- a/doc/source/notebooks/01. IO/GEE_IO.ipynb +++ b/doc/source/notebooks/01. IO/GEE_IO.ipynb @@ -27,7 +27,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.3' &> /dev/null" + "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.5' &> /dev/null" ] }, { @@ -38,7 +38,6 @@ "source": [ "import os\n", "import sys\n", - "import zipfile\n", "\n", "import shapely\n", "\n", @@ -208,26 +207,9 @@ "ecoscope.io.utils.download_file(\n", " url=img.getDownloadUrl(download_config),\n", " path=img_zip_file,\n", + " unzip=True,\n", ")" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Unzip" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with zipfile.ZipFile(img_zip_file) as z:\n", - " for name in z.namelist():\n", - " z.extract(name, output_dir)" - ] } ], "metadata": { diff --git a/doc/source/notebooks/01. IO/Landscape Dynamics Data.ipynb b/doc/source/notebooks/01. IO/Landscape Dynamics Data.ipynb index 6a271033..468ede93 100644 --- a/doc/source/notebooks/01. IO/Landscape Dynamics Data.ipynb +++ b/doc/source/notebooks/01. IO/Landscape Dynamics Data.ipynb @@ -29,7 +29,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.3' &> /dev/null" + "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.5' &> /dev/null" ] }, { @@ -40,7 +40,6 @@ "source": [ "import os\n", "import sys\n", - "import zipfile\n", "\n", "import geopandas as gpd\n", "\n", @@ -90,25 +89,10 @@ " url=\"https://maraelephant.maps.arcgis.com/sharing/rest/content/items/162e299f0c7d472b8e36211e946bb273/data\",\n", " path=output_dir,\n", " overwrite_existing=False,\n", + " unzip=True,\n", ")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extract ZIP" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zipfile.ZipFile(os.path.join(output_dir, \"active_public_uncategorized_shpfiles.zip\")).extractall(path=output_dir)" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/doc/source/notebooks/02. Relocations & Trajectories/Relocations_and_Trajectories.ipynb b/doc/source/notebooks/02. Relocations & Trajectories/Relocations_and_Trajectories.ipynb index 6a343243..67285736 100644 --- a/doc/source/notebooks/02. Relocations & Trajectories/Relocations_and_Trajectories.ipynb +++ b/doc/source/notebooks/02. Relocations & Trajectories/Relocations_and_Trajectories.ipynb @@ -36,7 +36,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.3' &> /dev/null" + "%pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.5' &> /dev/null" ] }, { @@ -252,7 +252,7 @@ "metadata": {}, "outputs": [], "source": [ - "relocs[[\"groupby_col\", \"fixtime\", \"geometry\"]].explore()" + "relocs[[\"groupby_col\", \"geometry\"]].explore()" ] }, { diff --git a/doc/source/notebooks/03. Home Range & Movescape/EcoGraph.ipynb b/doc/source/notebooks/03. Home Range & Movescape/EcoGraph.ipynb index d21c2a30..281f9de5 100644 --- a/doc/source/notebooks/03. Home Range & Movescape/EcoGraph.ipynb +++ b/doc/source/notebooks/03. Home Range & Movescape/EcoGraph.ipynb @@ -29,7 +29,7 @@ "source": [ "ECOSCOPE_RAW = \"https://raw.githubusercontent.com/wildlife-dynamics/ecoscope/master\"\n", "\n", - "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.3' &> /dev/null" + "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.5' &> /dev/null" ] }, { @@ -152,7 +152,7 @@ "metadata": {}, "outputs": [], "source": [ - "traj.explore()" + "traj[\"geometry\"].explore()" ] }, { @@ -546,7 +546,7 @@ " individual=\"1d22ff96-44d4-45c4-adc3-db1513acbe7d\",\n", " interpolation=\"dofjojfs\",\n", " )\n", - "except NotImplemented as e:\n", + "except NotImplementedError as e:\n", " print(e)" ] }, diff --git a/doc/source/notebooks/03. Home Range & Movescape/Elliptical Time Density (ETD).ipynb b/doc/source/notebooks/03. Home Range & Movescape/Elliptical Time Density (ETD).ipynb index 2064bbb2..0b54a721 100644 --- a/doc/source/notebooks/03. Home Range & Movescape/Elliptical Time Density (ETD).ipynb +++ b/doc/source/notebooks/03. Home Range & Movescape/Elliptical Time Density (ETD).ipynb @@ -29,7 +29,7 @@ "source": [ "ECOSCOPE_RAW = \"https://raw.githubusercontent.com/wildlife-dynamics/ecoscope/master\"\n", "\n", - "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.3' &> /dev/null" + "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.5' &> /dev/null" ] }, { diff --git a/doc/source/notebooks/03. Home Range & Movescape/Reduce Regions.ipynb b/doc/source/notebooks/03. Home Range & Movescape/Reduce Regions.ipynb index cce9c27d..aba9972c 100644 --- a/doc/source/notebooks/03. Home Range & Movescape/Reduce Regions.ipynb +++ b/doc/source/notebooks/03. Home Range & Movescape/Reduce Regions.ipynb @@ -29,7 +29,7 @@ "source": [ "ECOSCOPE_RAW = \"https://raw.githubusercontent.com/wildlife-dynamics/ecoscope/master\"\n", "\n", - "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.3' &> /dev/null" + "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.5' &> /dev/null" ] }, { diff --git a/doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb b/doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb index 94b56c7c..68d6d675 100644 --- a/doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb +++ b/doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb @@ -36,7 +36,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.3' &> /dev/null" + "%pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.5' &> /dev/null" ] }, { diff --git a/doc/source/notebooks/04. EcoMap & EcoPlot/EcoPlot.ipynb b/doc/source/notebooks/04. EcoMap & EcoPlot/EcoPlot.ipynb index 494e0b6e..6c151407 100644 --- a/doc/source/notebooks/04. EcoMap & EcoPlot/EcoPlot.ipynb +++ b/doc/source/notebooks/04. EcoMap & EcoPlot/EcoPlot.ipynb @@ -36,7 +36,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.3' &> /dev/null" + "%pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.5' &> /dev/null" ] }, { @@ -98,7 +98,6 @@ "ecoscope.io.download_file(\n", " f\"{ECOSCOPE_RAW}/tests/sample_data/vector/er_relocs.csv.zip\",\n", " os.path.join(output_dir, \"er_relocs.csv.zip\"),\n", - " unzip=False,\n", ")\n", "\n", "data = pd.read_csv(os.path.join(output_dir, \"er_relocs.csv.zip\"), header=0, index_col=0)\n", diff --git a/doc/source/notebooks/05. Environmental Analyses/Landscape Grid.ipynb b/doc/source/notebooks/05. Environmental Analyses/Landscape Grid.ipynb index edb43be9..d500532a 100644 --- a/doc/source/notebooks/05. Environmental Analyses/Landscape Grid.ipynb +++ b/doc/source/notebooks/05. Environmental Analyses/Landscape Grid.ipynb @@ -29,7 +29,7 @@ "source": [ "ECOSCOPE_RAW = \"https://raw.githubusercontent.com/wildlife-dynamics/ecoscope/master\"\n", "\n", - "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.3' &> /dev/null" + "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.5' &> /dev/null" ] }, { diff --git a/doc/source/notebooks/05. Environmental Analyses/Remote Sensing Time Series Anomaly.ipynb b/doc/source/notebooks/05. Environmental Analyses/Remote Sensing Time Series Anomaly.ipynb index f0055862..3d3755ab 100644 --- a/doc/source/notebooks/05. Environmental Analyses/Remote Sensing Time Series Anomaly.ipynb +++ b/doc/source/notebooks/05. Environmental Analyses/Remote Sensing Time Series Anomaly.ipynb @@ -29,7 +29,7 @@ "source": [ "ECOSCOPE_RAW = \"https://raw.githubusercontent.com/wildlife-dynamics/ecoscope/master\"\n", "\n", - "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.3' &> /dev/null" + "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.5' &> /dev/null" ] }, { diff --git a/doc/source/notebooks/05. Environmental Analyses/Seasonal Calculation.ipynb b/doc/source/notebooks/05. Environmental Analyses/Seasonal Calculation.ipynb index 75ad5d47..12422f60 100644 --- a/doc/source/notebooks/05. Environmental Analyses/Seasonal Calculation.ipynb +++ b/doc/source/notebooks/05. Environmental Analyses/Seasonal Calculation.ipynb @@ -29,7 +29,7 @@ "source": [ "ECOSCOPE_RAW = \"https://raw.githubusercontent.com/wildlife-dynamics/ecoscope/master\"\n", "\n", - "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.3' &> /dev/null" + "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.5' &> /dev/null" ] }, { diff --git a/doc/source/notebooks/06. Data Management/Tracking Data Gantt Chart.ipynb b/doc/source/notebooks/06. Data Management/Tracking Data Gantt Chart.ipynb index a88657eb..2d51fbf8 100644 --- a/doc/source/notebooks/06. Data Management/Tracking Data Gantt Chart.ipynb +++ b/doc/source/notebooks/06. Data Management/Tracking Data Gantt Chart.ipynb @@ -29,7 +29,7 @@ "source": [ "ECOSCOPE_RAW = \"https://raw.githubusercontent.com/wildlife-dynamics/ecoscope/master\"\n", "\n", - "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.3' &> /dev/null" + "!pip install 'ecoscope[analysis,mapping,plotting] @ git+https://github.com/wildlife-dynamics/ecoscope@v1.8.5' &> /dev/null" ] }, { diff --git a/ecoscope/analysis/astronomy.py b/ecoscope/analysis/astronomy.py index f9e0992f..825fb23d 100644 --- a/ecoscope/analysis/astronomy.py +++ b/ecoscope/analysis/astronomy.py @@ -1,7 +1,8 @@ import warnings + import numpy as np -import pyproj import pandas as pd +import pyproj try: import astroplan @@ -109,4 +110,4 @@ def get_daynight_ratio(traj, n_grid_points=150) -> pd.Series: day_distance += (day_night_df.dist_meters * i).sum() night_distance += ((1 - i) * day_night_df.dist_meters).sum() - return day_distance / night_distance + return night_distance / day_distance diff --git a/ecoscope/analysis/classifier.py b/ecoscope/analysis/classifier.py index c2ff9c0f..acc18973 100644 --- a/ecoscope/analysis/classifier.py +++ b/ecoscope/analysis/classifier.py @@ -135,11 +135,22 @@ def apply_color_map(dataframe, input_column_name, cmap, output_column_name=None) cmap = mpl.colormaps[cmap] cmap = cmap.resampled(dataframe[input_column_name].nunique()) - cmap_colors = cmap(range(dataframe[input_column_name].nunique())) + if pd.api.types.is_numeric_dtype(dataframe[input_column_name].dtype): + cmap_colors = [] + val_min = dataframe[input_column_name].min() + val_max = dataframe[input_column_name].max() + for val in dataframe[input_column_name].unique(): + cmap_colors.append(cmap((val - val_min) / (val_max - val_min))) + else: + cmap_colors = cmap(range(len(dataframe[input_column_name].unique()))) + + color_list = [] + for color in cmap_colors: + color_list.append(tuple([round(val * 255) for val in color])) # convert to hex first to put values in range(0,255), then to an RGBA tuple cmap = pd.Series( - [hex_to_rgba(mpl.colors.to_hex(color)) for color in cmap_colors], + color_list, index=dataframe[input_column_name].unique(), ) diff --git a/ecoscope/analysis/ecograph.py b/ecoscope/analysis/ecograph.py index ac94f10f..6eb3de0f 100644 --- a/ecoscope/analysis/ecograph.py +++ b/ecoscope/analysis/ecograph.py @@ -85,7 +85,7 @@ def compute(df): G = self._get_ecograph(df, subject_name, radius, cutoff, tortuosity_length) self.graphs[subject_name] = G - self.trajectory.groupby("groupby_col")[self.trajectory.columns].progress_apply(compute) + self.trajectory.groupby("groupby_col")[self.trajectory.columns].apply(compute) def to_csv(self, output_path): """ diff --git a/ecoscope/analysis/feature_density.py b/ecoscope/analysis/feature_density.py index 2cc48347..f1f5fdf6 100644 --- a/ecoscope/analysis/feature_density.py +++ b/ecoscope/analysis/feature_density.py @@ -1,3 +1,6 @@ +import numpy as np + + def calculate_feature_density(selection, grid, geometry_type="point"): def clip_density(cell): if geometry_type == "point": @@ -12,5 +15,5 @@ def clip_density(cell): raise ValueError("Unsupported geometry type") grid["density"] = grid.geometry.apply(clip_density) - # grid["density"] = grid["density"].replace(0, np.nan) # Set 0's to nan so they don't draw on map + grid["density"] = grid["density"].replace(0, np.nan) # Set 0's to nan so they don't draw on map return grid diff --git a/ecoscope/io/earthranger.py b/ecoscope/io/earthranger.py index b71a83fc..ca2b3d50 100644 --- a/ecoscope/io/earthranger.py +++ b/ecoscope/io/earthranger.py @@ -13,8 +13,14 @@ from tqdm.auto import tqdm import ecoscope +from ecoscope.io.earthranger_utils import ( + clean_kwargs, + clean_time_cols, + dataframe_to_dict, + format_iso_time, + to_gdf, +) from ecoscope.io.utils import pack_columns, to_hex -from ecoscope.io.earthranger_utils import clean_kwargs, dataframe_to_dict, to_gdf, clean_time_cols class EarthRangerIO(ERClient): @@ -79,7 +85,10 @@ def get_sources( ) df = pd.DataFrame( self.get_objects_multithreaded( - object="sources/", threads=self.tcp_limit, page_size=self.sub_page_size, **params + object="sources/", + threads=self.tcp_limit, + page_size=self.sub_page_size, + **params, ) ) return df @@ -154,7 +163,10 @@ def partial_subjects(subjects): params["id"] = ",".join(subjects) return pd.DataFrame( self.get_objects_multithreaded( - object="subjects/", threads=self.tcp_limit, page_size=self.sub_page_size, **params + object="subjects/", + threads=self.tcp_limit, + page_size=self.sub_page_size, + **params, ) ) @@ -169,7 +181,10 @@ def partial_subjects(subjects): else: df = pd.DataFrame( self.get_objects_multithreaded( - object="subjects/", threads=self.tcp_limit, page_size=self.sub_page_size, **params + object="subjects/", + threads=self.tcp_limit, + page_size=self.sub_page_size, + **params, ) ) @@ -192,7 +207,10 @@ def get_subjectsources(self, subjects=None, sources=None, **addl_kwargs): params = clean_kwargs(addl_kwargs, sources=sources, subjects=subjects) df = pd.DataFrame( self.get_objects_multithreaded( - object="subjectsources/", threads=self.tcp_limit, page_size=self.sub_page_size, **params + object="subjectsources/", + threads=self.tcp_limit, + page_size=self.sub_page_size, + **params, ) ) df = clean_time_cols(df) @@ -262,7 +280,10 @@ def _get_observations( pbar.set_description(f"Downloading Observations for {id_name}={_id}") dataframe = pd.DataFrame( self.get_objects_multithreaded( - object="observations/", threads=self.tcp_limit, page_size=self.sub_page_size, **params + object="observations/", + threads=self.tcp_limit, + page_size=self.sub_page_size, + **params, ) ) dataframe[id_name] = _id @@ -452,7 +473,11 @@ def get_subjectsource_observations( return observations def get_subjectgroup_observations( - self, subject_group_id=None, subject_group_name=None, include_inactive=True, **kwargs + self, + subject_group_id=None, + subject_group_name=None, + include_inactive=True, + **kwargs, ): """ Parameters @@ -585,7 +610,10 @@ def get_events( df = pd.DataFrame( self.get_objects_multithreaded( - object="activity/events/", threads=self.tcp_limit, page_size=self.sub_page_size, **params + object="activity/events/", + threads=self.tcp_limit, + page_size=self.sub_page_size, + **params, ) ) gdf = gpd.GeoDataFrame(df) @@ -607,44 +635,61 @@ def get_patrol_types(self): df = pd.DataFrame(self._get("activity/patrols/types")) return df.set_index("id") - def get_patrols(self, since=None, until=None, patrol_type=None, status=None, **addl_kwargs): + def get_patrols(self, since=None, until=None, patrol_type=None, patrol_type_value=None, status=None, **addl_kwargs): """ Parameters ---------- since: - lower date range + Lower time range until: - upper date range + Upper time range patrol_type: - Comma-separated list of type of patrol UUID + A patrol type UUID or a list of UUIDs + patrol_type_value: + A patrol type value or a list of patrol type values status - Comma-separated list of 'scheduled'/'active'/'overdue'/'done'/'cancelled' + 'scheduled'/'active'/'overdue'/'done'/'cancelled' + Accept a status string or a list of statuses Returns ------- patrols : pd.DataFrame DataFrame of queried patrols """ + patrol_type_value_list = [patrol_type_value] if isinstance(patrol_type_value, str) else patrol_type_value params = clean_kwargs( addl_kwargs, status=status, patrol_type=[patrol_type] if isinstance(patrol_type, str) else patrol_type, + patrol_type_value=patrol_type_value_list, return_data=True, ) filter = {"date_range": {}, "patrol_type": []} if since is not None: - filter["date_range"]["lower"] = since + filter["date_range"]["lower"] = format_iso_time(since) if until is not None: - filter["date_range"]["upper"] = until + filter["date_range"]["upper"] = format_iso_time(until) if patrol_type is not None: filter["patrol_type"] = params["patrol_type"] + if patrol_type_value_list is not None: + patrol_types = self.get_patrol_types() + matching_rows = patrol_types[patrol_types["value"].isin(patrol_type_value_list)] + missing_values = set(patrol_type_value_list) - set(matching_rows["value"]) + if missing_values: + raise ValueError(f"Failed to find IDs for values: {missing_values}") + + filter["patrol_type"] = matching_rows.index.tolist() + params["filter"] = json.dumps(filter) df = pd.DataFrame( self.get_objects_multithreaded( - object="activity/patrols", threads=self.tcp_limit, page_size=self.sub_page_size, **params + object="activity/patrols", + threads=self.tcp_limit, + page_size=self.sub_page_size, + **params, ) ) if "serial_number" in df.columns: @@ -652,24 +697,36 @@ def get_patrols(self, since=None, until=None, patrol_type=None, status=None, **a df = clean_time_cols(df) return df - def get_patrol_events(self, since=None, until=None, patrol_type=None, status=None, **addl_kwargs): + def get_patrol_events( + self, since=None, until=None, patrol_type=None, patrol_type_value=None, status=None, **addl_kwargs + ): """ Parameters ---------- since: - lower date range + Lower time range until: - upper date range + Upper time range patrol_type: - Comma-separated list of type of patrol UUID + A patrol type UUID or a list of UUIDs + patrol_type_value: + A patrol type value or a list of patrol type values status - Comma-separated list of 'scheduled'/'active'/'overdue'/'done'/'cancelled' + 'scheduled'/'active'/'overdue'/'done'/'cancelled' + Accept a status string or a list of statuses Returns ------- events : pd.DataFrame DataFrame of queried patrols """ - patrol_df = self.get_patrols(since=since, until=until, patrol_type=patrol_type, status=status, **addl_kwargs) + patrol_df = self.get_patrols( + since=since, + until=until, + patrol_type=patrol_type, + patrol_type_value=patrol_type_value, + status=status, + **addl_kwargs, + ) events = [] for _, row in patrol_df.iterrows(): @@ -683,6 +740,10 @@ def get_patrol_events(self, since=None, until=None, patrol_type=None, status=Non events_df = clean_time_cols(events_df) events_df["geometry"] = events_df["geojson"].apply(lambda x: shape(x.get("geometry"))) + events_df["time"] = events_df["geojson"].apply( + lambda x: datetime.datetime.strptime(x.get("properties").get("datetime"), "%Y-%m-%dT%H:%M:%S%z") + ) + return gpd.GeoDataFrame(events_df, geometry="geometry", crs=4326) def get_patrol_segments_from_patrol_id(self, patrol_id, **addl_kwargs): @@ -718,7 +779,14 @@ def get_patrol_segments(self): ) def get_patrol_observations_with_patrol_filter( - self, since=None, until=None, patrol_type=None, status=None, include_patrol_details=False, **kwargs + self, + since=None, + until=None, + patrol_type=None, + patrol_type_value=None, + status=None, + include_patrol_details=False, + **kwargs, ): """ Download observations for patrols with provided filters. @@ -726,13 +794,16 @@ def get_patrol_observations_with_patrol_filter( Parameters ---------- since: - lower date range + Lower time range until: - upper date range + Upper time range patrol_type: - Comma-separated list of type of patrol UUID + A patrol type UUID or a list of UUIDs + patrol_type_value: + A patrol type value or a list of patrol type values status - Comma-separated list of 'scheduled'/'active'/'overdue'/'done'/'cancelled' + 'scheduled'/'active'/'overdue'/'done'/'cancelled' + Accept a status string or a list of statuses include_patrol_details : bool, optional Whether to merge patrol details into dataframe kwargs @@ -743,7 +814,14 @@ def get_patrol_observations_with_patrol_filter( relocations : ecoscope.base.Relocations """ - patrols_df = self.get_patrols(since=since, until=until, patrol_type=patrol_type, status=status, **kwargs) + patrols_df = self.get_patrols( + since=since, + until=until, + patrol_type=patrol_type, + patrol_type_value=patrol_type_value, + status=status, + **kwargs, + ) return self.get_patrol_observations(patrols_df, include_patrol_details=include_patrol_details, **kwargs) def get_patrol_observations(self, patrols_df, include_patrol_details=False, **kwargs): @@ -779,7 +857,10 @@ def get_patrol_observations(self, patrols_df, include_patrol_details=False, **kw try: observation = self.get_subject_observations( - subject_ids=[subject_id], since=patrol_start_time, until=patrol_end_time, **kwargs + subject_ids=[subject_id], + since=patrol_start_time, + until=patrol_end_time, + **kwargs, ) if include_patrol_details: observation["patrol_id"] = patrol["id"] @@ -839,7 +920,10 @@ def get_patrol_segment_events( object = f"activity/patrols/segments/{patrol_segment_id}/events/" df = pd.DataFrame( self.get_objects_multithreaded( - object=object, threads=self.tcp_limit, page_size=self.sub_page_size, **params + object=object, + threads=self.tcp_limit, + page_size=self.sub_page_size, + **params, ) ) df = clean_time_cols(df) @@ -1041,7 +1125,7 @@ def upload(obs): else: return pd.DataFrame(results) - return observations.groupby(source_id_col, group_keys=False).progress_apply(upload) + return observations.groupby(source_id_col, group_keys=False).apply(upload) def post_event( self, @@ -1123,17 +1207,38 @@ def post_patrol_segment( } if tracked_subject_id is not None: - payload.update({"leader": {"content_type": "observations.subject", "id": tracked_subject_id}}) + payload.update( + { + "leader": { + "content_type": "observations.subject", + "id": tracked_subject_id, + } + } + ) else: payload.update({"leader": None}) if start_location is not None: - payload.update({"start_location": {"latitude": start_location[0], "longitude": start_location[1]}}) + payload.update( + { + "start_location": { + "latitude": start_location[0], + "longitude": start_location[1], + } + } + ) else: payload.update({"start_location": None}) if end_location is not None: - payload.update({"end_location": {"latitude": end_location[0], "longitude": end_location[1]}}) + payload.update( + { + "end_location": { + "latitude": end_location[0], + "longitude": end_location[1], + } + } + ) else: payload.update({"end_location": None}) diff --git a/ecoscope/io/earthranger_utils.py b/ecoscope/io/earthranger_utils.py index 5ab77c0d..f2b2ffc0 100644 --- a/ecoscope/io/earthranger_utils.py +++ b/ecoscope/io/earthranger_utils.py @@ -43,3 +43,10 @@ def clean_time_cols(df): # convert x is not None to pd.isna(x) is False df[col] = df[col].apply(lambda x: pd.to_datetime(parser.parse(x)) if not pd.isna(x) else None) return df + + +def format_iso_time(date_string: str) -> str: + try: + return pd.to_datetime(date_string).isoformat() + except ValueError: + raise ValueError(f"Failed to parse timestamp'{date_string}'") diff --git a/ecoscope/io/utils.py b/ecoscope/io/utils.py index e60c530f..8c5609bc 100644 --- a/ecoscope/io/utils.py +++ b/ecoscope/io/utils.py @@ -6,7 +6,9 @@ import pandas as pd import requests +from requests.adapters import HTTPAdapter from tqdm.auto import tqdm +from urllib3.util import Retry def to_hex(val, default="#ff0000"): @@ -27,22 +29,26 @@ def pack_columns(dataframe: pd.DataFrame, columns: typing.List): return dataframe -def download_file(url, path, overwrite_existing=False, chunk_size=1024, unzip=True, **request_kwargs): +def download_file(url, path, retries=2, overwrite_existing=False, chunk_size=1024, unzip=False, **request_kwargs): """ Download a file from a URL to a local path. If the path is a directory, the filename will be inferred from the response header """ + s = requests.Session() + retries = Retry(total=retries, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]) + s.mount("https://", HTTPAdapter(max_retries=retries)) + if __is_gdrive_url(url): url = __transform_gdrive_url(url) elif __is_dropbox_url(url): url = __transform_dropbox_url(url) - r = requests.get(url, stream=True, **request_kwargs) + r = s.get(url, stream=True, **request_kwargs) if os.path.isdir(path): m = email.message.Message() - m["content-type"] = r.headers["content-disposition"] + m["content-type"] = r.headers.get("content-disposition") filename = m.get_param("filename") if filename is None: raise ValueError("URL has no RFC 6266 filename.") @@ -53,7 +59,8 @@ def download_file(url, path, overwrite_existing=False, chunk_size=1024, unzip=Tr return with open(path, "wb") as f: - with tqdm.wrapattr(f, "write", total=int(r.headers["Content-Length"])) as fout: + content_length = r.headers.get("content-length") + with tqdm.wrapattr(f, "write", total=int(content_length)) if content_length else f as fout: for chunk in r.iter_content(chunk_size=chunk_size): fout.write(chunk) diff --git a/nb-tests/test_notebooks.py b/nb-tests/test_notebooks.py index df47e535..a26af66d 100644 --- a/nb-tests/test_notebooks.py +++ b/nb-tests/test_notebooks.py @@ -16,18 +16,7 @@ NB_DIR = pathlib.Path(__file__).parent.parent / "doc" / "source" / "notebooks" -KNOWN_ERRORS_REGEXES = { # This is basically a GitHub ticket queue - "EarthRanger_IO.ipynb": "Series found", - "Relocations_and_Trajectories.ipynb": "No module named 'branca'", - "EcoGraph.ipynb": "not a zip file", - "EcoPlot.ipynb": "not a zip file", - "Landscape Grid.ipynb": "No module named 'branca'", - "Seasonal Calculation.ipynb": "No module named 'branca'", - "Tracking Data Gantt Chart.ipynb": "Bad CRC-32 for file 'er_relocs.csv.zip'", - "Remote Sensing Time Series Anomaly.ipynb": "No module named 'branca'", - "Reduce Regions.ipynb": "No module named 'branca'", - "Landscape Dynamics Data.ipynb": "No module named 'branca'", -} +KNOWN_ERRORS_REGEXES = {} # This is basically a GitHub ticket queue @dataclass diff --git a/requirements-notebooks-test.txt b/requirements-notebooks-test.txt index eb03be4e..b3f8c43e 100644 --- a/requirements-notebooks-test.txt +++ b/requirements-notebooks-test.txt @@ -3,4 +3,5 @@ pytest papermill .[all] ipykernel -pytest-xdist \ No newline at end of file +pytest-xdist +folium \ No newline at end of file diff --git a/tests/test_astronomy.py b/tests/test_astronomy.py index 9d483317..50f53e75 100644 --- a/tests/test_astronomy.py +++ b/tests/test_astronomy.py @@ -1,7 +1,8 @@ -import pyproj import pandas as pd -from ecoscope.base import Trajectory +import pyproj + from ecoscope.analysis import astronomy +from ecoscope.base import Trajectory def test_to_EarthLocation(movebank_relocations): @@ -36,8 +37,8 @@ def test_daynight_ratio(movebank_relocations): trajectory = Trajectory.from_relocations(movebank_relocations) expected = pd.Series( [ - 2.212816, - 0.656435, + 0.451912, + 1.523379, ], index=pd.Index(["Habiba", "Salif Keita"], name="groupby_col"), ) diff --git a/tests/test_asyncearthranger_io.py b/tests/test_asyncearthranger_io.py index 4384846f..8eafb57d 100644 --- a/tests/test_asyncearthranger_io.py +++ b/tests/test_asyncearthranger_io.py @@ -218,7 +218,10 @@ async def test_get_patrols(er_io_async, get_patrols_fields): @pytest.mark.asyncio async def test_get_patrol_observations(er_io_async, get_patrol_observations_fields): - observations = await er_io_async.get_patrol_observations_with_patrol_filter() + observations = await er_io_async.get_patrol_observations_with_patrol_filter( + since=pd.Timestamp("2017-01-01").isoformat(), + until=pd.Timestamp("2017-04-01").isoformat(), + ) assert not observations.empty assert set(observations.columns) == set(get_patrol_observations_fields) assert type(observations["fixtime"] == pd.Timestamp) @@ -228,7 +231,11 @@ async def test_get_patrol_observations(er_io_async, get_patrol_observations_fiel async def test_get_patrol_observations_with_patrol_details( er_io_async, get_patrol_observations_fields, get_patrol_details_fields ): - observations = await er_io_async.get_patrol_observations_with_patrol_filter(include_patrol_details=True) + observations = await er_io_async.get_patrol_observations_with_patrol_filter( + since=pd.Timestamp("2017-01-01").isoformat(), + until=pd.Timestamp("2017-04-01").isoformat(), + include_patrol_details=True, + ) assert not observations.empty assert set(observations.columns) == set(get_patrol_observations_fields).union(get_patrol_details_fields) assert type(observations["fixtime"] == pd.Timestamp) diff --git a/tests/test_classifier.py b/tests/test_classifier.py index f91065de..4b5491e5 100644 --- a/tests/test_classifier.py +++ b/tests/test_classifier.py @@ -1,5 +1,6 @@ import pytest import pandas as pd +import numpy as np from ecoscope.base import Trajectory from ecoscope.analysis.classifier import apply_classification, apply_color_map @@ -65,7 +66,18 @@ def test_apply_colormap(sample_df, cmap): apply_classification(sample_df, input_column_name="value", scheme="equal_interval") apply_color_map(sample_df, "value_classified", cmap, output_column_name="colormap") - assert len(set(sample_df["colormap"].unique())) == len(sample_df["value_classified"].unique()) + assert len(sample_df["colormap"].unique()) == len(sample_df["value_classified"].unique()) + + +def test_apply_colormap_with_nan(): + df = pd.DataFrame( + data={"value": [1, 2, 3, 4, np.nan]}, + index=["A", "B", "C", "D", "E"], + ) + apply_color_map(df, "value", "viridis", output_column_name="colormap") + + assert len(df["colormap"].unique()) == len(df["value"].unique()) + assert df.loc["E"]["colormap"] == (0, 0, 0, 0) def test_apply_colormap_k2(sample_df): @@ -73,7 +85,7 @@ def test_apply_colormap_k2(sample_df): cmap = "viridis" apply_color_map(sample_df, "value_classified", cmap, output_column_name="colormap") - assert len(set(sample_df["colormap"].unique())) == len(sample_df["value_classified"].unique()) + assert len(sample_df["colormap"].unique()) == len(sample_df["value_classified"].unique()) def test_apply_colormap_user_defined(movebank_relocations): @@ -92,7 +104,7 @@ def test_apply_colormap_user_defined(movebank_relocations): ] apply_color_map(trajectory, "speed_bins", cmap) - assert len(set(trajectory["speed_bins_colormap"].unique())) == len(trajectory["speed_bins"].unique()) + assert len(trajectory["speed_bins_colormap"].unique()) == len(trajectory["speed_bins"].unique()) def test_apply_colormap_cmap_user_defined_bad(movebank_relocations): diff --git a/tests/test_earthranger_io.py b/tests/test_earthranger_io.py index 8336fd3b..add3d6c0 100644 --- a/tests/test_earthranger_io.py +++ b/tests/test_earthranger_io.py @@ -4,6 +4,7 @@ import geopandas as gpd import pandas as pd import pytest +import pytz from shapely.geometry import Point import ecoscope @@ -81,18 +82,75 @@ def test_das_client_method(er_io): er_io.get_me() -def test_get_patrols(er_io): - patrols = er_io.get_patrols() +def test_get_patrols_datestr(er_io): + since_str = "2017-01-01" + since_time = pd.to_datetime(since_str).replace(tzinfo=pytz.UTC) + until_str = "2017-04-01" + until_time = pd.to_datetime(until_str).replace(tzinfo=pytz.UTC) + patrols = er_io.get_patrols(since=since_str, until=until_str) + assert len(patrols) > 0 + time_ranges = [ + segment["time_range"] + for segments in patrols["patrol_segments"] + for segment in segments + if "time_range" in segment + ] + + for time_range in time_ranges: + start = pd.to_datetime(time_range["start_time"]) + end = pd.to_datetime(time_range["end_time"]) + + assert start <= until_time and end >= since_time + + +def test_get_patrols_datestr_invalid_format(er_io): + with pytest.raises(ValueError): + er_io.get_patrols(since="not a date") + + +def test_get_patrols_with_type_value(er_io): + patrols = er_io.get_patrols(since="2017-01-01", until="2017-04-01", patrol_type_value="ecoscope_patrol") + + patrol_types = [ + segment["patrol_type"] + for segments in patrols["patrol_segments"] + for segment in segments + if "patrol_type" in segment + ] + assert all(value == "ecoscope_patrol" for value in patrol_types) + + +def test_get_patrols_with_type_value_list(er_io): + patrol_type_value_list = ["ecoscope_patrol", "MEP_Distance_Survey_Patrol"] + patrols = er_io.get_patrols(since="2024-01-01", until="2024-04-01", patrol_type_value=patrol_type_value_list) + + patrol_types = [ + segment["patrol_type"] + for segments in patrols["patrol_segments"] + for segment in segments + if "patrol_type" in segment + ] + assert all(value in patrol_type_value_list for value in patrol_types) + + +def test_get_patrols_with_invalid_type_value(er_io): + with pytest.raises(ValueError): + er_io.get_patrols(since="2017-01-01", until="2017-04-01", patrol_type_value="invalid") + def test_get_patrol_events(er_io): - events = er_io.get_patrol_events() + events = er_io.get_patrol_events( + since=pd.Timestamp("2017-01-01").isoformat(), + until=pd.Timestamp("2017-04-01").isoformat(), + ) assert "id" in events assert "event_type" in events assert "geometry" in events assert "patrol_id" in events assert "patrol_segment_id" in events + assert "time" in events def test_post_observations(er_io): @@ -196,7 +254,11 @@ def test_patch_event(er_io): def test_get_patrol_observations(er_io): - patrols = er_io.get_patrols() + patrols = er_io.get_patrols( + since=pd.Timestamp("2017-01-01").isoformat(), + until=pd.Timestamp("2017-04-01").isoformat(), + ) + observations = er_io.get_patrol_observations( patrols, include_source_details=False, diff --git a/tests/test_io_utils.py b/tests/test_io_utils.py index 2ca0123b..8cd739b8 100644 --- a/tests/test_io_utils.py +++ b/tests/test_io_utils.py @@ -1,8 +1,12 @@ import json import os +import pytest import fsspec import pandas as pd +from unittest.mock import Mock, patch +from http.client import HTTPMessage +from requests.exceptions import RetryError import ecoscope @@ -80,3 +84,24 @@ def test_download_file_dropbox_share_link(): data = pd.read_csv(os.path.join(output_dir, "download_data.csv")) assert len(data) > 0 + + +@patch("urllib3.connectionpool.HTTPConnectionPool._get_conn") +def test_download_file_retry_on_error(mock): + mock.return_value.getresponse.side_effect = [ + Mock(status=500, msg=HTTPMessage(), headers={}), + Mock(status=504, msg=HTTPMessage(), headers={}), + Mock(status=503, msg=HTTPMessage(), headers={}), + ] + + url = "https://totallyreal.com" + output_dir = "tests/test_output" + + with pytest.raises(RetryError): + ecoscope.io.download_file( + url, + output_dir, + overwrite_existing=True, + ) + + assert mock.call_count == 3