diff --git a/doc/source/notebooks/01. IO/EarthRanger_IO.ipynb b/doc/source/notebooks/01. IO/EarthRanger_IO.ipynb index 8b13905e..3ad15f6e 100644 --- a/doc/source/notebooks/01. IO/EarthRanger_IO.ipynb +++ b/doc/source/notebooks/01. IO/EarthRanger_IO.ipynb @@ -614,7 +614,11 @@ "metadata": {}, "outputs": [], "source": [ - "patrol_df = er_io.get_patrols()\n", + "patrol_df = er_io.get_patrols(\n", + " since=pd.Timestamp(\"2017-01-01\").isoformat(),\n", + " until=pd.Timestamp(\"2017-04-01\").isoformat(),\n", + ")\n", + "\n", "\n", "relocs = er_io.get_patrol_observations(\n", " patrol_df,\n", @@ -709,8 +713,8 @@ ")\n", "\n", "if not elephants.empty:\n", - " for i, value in elephants.iterrows():\n", - " er_io.delete_observation(observation_id=elephants.loc[i, \"extra__id\"])" + " for observation_id in elephants[\"extra__id\"].unique():\n", + " er_io.delete_observation(observation_id)" ] }, { @@ -863,11 +867,7 @@ "metadata": {}, "outputs": [], "source": [ - "relocs.drop(\n", - " columns=relocs.columns[relocs.applymap(lambda x: isinstance(x, list)).any()],\n", - " errors=\"ignore\",\n", - " inplace=True,\n", - ")\n", + "relocs = relocs.select_dtypes(exclude=[list])\n", "\n", "relocs.to_file(os.path.join(output_dir, \"observations.gpkg\"), layer=\"observations\")" ] diff --git a/doc/source/notebooks/01. IO/GEE_IO.ipynb b/doc/source/notebooks/01. IO/GEE_IO.ipynb index 34f591d5..236c44cb 100644 --- a/doc/source/notebooks/01. IO/GEE_IO.ipynb +++ b/doc/source/notebooks/01. IO/GEE_IO.ipynb @@ -38,7 +38,6 @@ "source": [ "import os\n", "import sys\n", - "import zipfile\n", "\n", "import shapely\n", "\n", @@ -208,26 +207,9 @@ "ecoscope.io.utils.download_file(\n", " url=img.getDownloadUrl(download_config),\n", " path=img_zip_file,\n", + " unzip=True,\n", ")" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Unzip" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with zipfile.ZipFile(img_zip_file) as z:\n", - " for name in z.namelist():\n", - " z.extract(name, output_dir)" - ] } ], "metadata": { diff --git a/doc/source/notebooks/01. IO/Landscape Dynamics Data.ipynb b/doc/source/notebooks/01. IO/Landscape Dynamics Data.ipynb index c099ae63..a8e72883 100644 --- a/doc/source/notebooks/01. IO/Landscape Dynamics Data.ipynb +++ b/doc/source/notebooks/01. IO/Landscape Dynamics Data.ipynb @@ -40,7 +40,6 @@ "source": [ "import os\n", "import sys\n", - "import zipfile\n", "\n", "import geopandas as gpd\n", "\n", @@ -90,25 +89,10 @@ " url=\"https://maraelephant.maps.arcgis.com/sharing/rest/content/items/162e299f0c7d472b8e36211e946bb273/data\",\n", " path=output_dir,\n", " overwrite_existing=False,\n", + " unzip=True,\n", ")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extract ZIP" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zipfile.ZipFile(os.path.join(output_dir, \"active_public_uncategorized_shpfiles.zip\")).extractall(path=output_dir)" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/doc/source/notebooks/02. Relocations & Trajectories/Relocations_and_Trajectories.ipynb b/doc/source/notebooks/02. Relocations & Trajectories/Relocations_and_Trajectories.ipynb index 3504e798..0d28c290 100644 --- a/doc/source/notebooks/02. Relocations & Trajectories/Relocations_and_Trajectories.ipynb +++ b/doc/source/notebooks/02. Relocations & Trajectories/Relocations_and_Trajectories.ipynb @@ -252,7 +252,7 @@ "metadata": {}, "outputs": [], "source": [ - "relocs[[\"groupby_col\", \"fixtime\", \"geometry\"]].explore()" + "relocs[[\"groupby_col\", \"geometry\"]].explore()" ] }, { diff --git a/doc/source/notebooks/03. Home Range & Movescape/EcoGraph.ipynb b/doc/source/notebooks/03. Home Range & Movescape/EcoGraph.ipynb index d9fe061f..d3d45642 100644 --- a/doc/source/notebooks/03. Home Range & Movescape/EcoGraph.ipynb +++ b/doc/source/notebooks/03. Home Range & Movescape/EcoGraph.ipynb @@ -152,7 +152,7 @@ "metadata": {}, "outputs": [], "source": [ - "traj.explore()" + "traj[\"geometry\"].explore()" ] }, { @@ -546,7 +546,7 @@ " individual=\"1d22ff96-44d4-45c4-adc3-db1513acbe7d\",\n", " interpolation=\"dofjojfs\",\n", " )\n", - "except NotImplemented as e:\n", + "except NotImplementedError as e:\n", " print(e)" ] }, diff --git a/doc/source/notebooks/04. EcoMap & EcoPlot/EcoPlot.ipynb b/doc/source/notebooks/04. EcoMap & EcoPlot/EcoPlot.ipynb index bf930a7b..81772c63 100644 --- a/doc/source/notebooks/04. EcoMap & EcoPlot/EcoPlot.ipynb +++ b/doc/source/notebooks/04. EcoMap & EcoPlot/EcoPlot.ipynb @@ -98,7 +98,6 @@ "ecoscope.io.download_file(\n", " f\"{ECOSCOPE_RAW}/tests/sample_data/vector/er_relocs.csv.zip\",\n", " os.path.join(output_dir, \"er_relocs.csv.zip\"),\n", - " unzip=False,\n", ")\n", "\n", "data = pd.read_csv(os.path.join(output_dir, \"er_relocs.csv.zip\"), header=0, index_col=0)\n", diff --git a/ecoscope/analysis/ecograph.py b/ecoscope/analysis/ecograph.py index ac94f10f..6eb3de0f 100644 --- a/ecoscope/analysis/ecograph.py +++ b/ecoscope/analysis/ecograph.py @@ -85,7 +85,7 @@ def compute(df): G = self._get_ecograph(df, subject_name, radius, cutoff, tortuosity_length) self.graphs[subject_name] = G - self.trajectory.groupby("groupby_col")[self.trajectory.columns].progress_apply(compute) + self.trajectory.groupby("groupby_col")[self.trajectory.columns].apply(compute) def to_csv(self, output_path): """ diff --git a/ecoscope/io/earthranger.py b/ecoscope/io/earthranger.py index d2bf58b9..f2cdec46 100644 --- a/ecoscope/io/earthranger.py +++ b/ecoscope/io/earthranger.py @@ -1088,7 +1088,7 @@ def upload(obs): else: return pd.DataFrame(results) - return observations.groupby(source_id_col, group_keys=False).progress_apply(upload) + return observations.groupby(source_id_col, group_keys=False).apply(upload) def post_event( self, diff --git a/ecoscope/io/utils.py b/ecoscope/io/utils.py index e60c530f..8c5609bc 100644 --- a/ecoscope/io/utils.py +++ b/ecoscope/io/utils.py @@ -6,7 +6,9 @@ import pandas as pd import requests +from requests.adapters import HTTPAdapter from tqdm.auto import tqdm +from urllib3.util import Retry def to_hex(val, default="#ff0000"): @@ -27,22 +29,26 @@ def pack_columns(dataframe: pd.DataFrame, columns: typing.List): return dataframe -def download_file(url, path, overwrite_existing=False, chunk_size=1024, unzip=True, **request_kwargs): +def download_file(url, path, retries=2, overwrite_existing=False, chunk_size=1024, unzip=False, **request_kwargs): """ Download a file from a URL to a local path. If the path is a directory, the filename will be inferred from the response header """ + s = requests.Session() + retries = Retry(total=retries, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]) + s.mount("https://", HTTPAdapter(max_retries=retries)) + if __is_gdrive_url(url): url = __transform_gdrive_url(url) elif __is_dropbox_url(url): url = __transform_dropbox_url(url) - r = requests.get(url, stream=True, **request_kwargs) + r = s.get(url, stream=True, **request_kwargs) if os.path.isdir(path): m = email.message.Message() - m["content-type"] = r.headers["content-disposition"] + m["content-type"] = r.headers.get("content-disposition") filename = m.get_param("filename") if filename is None: raise ValueError("URL has no RFC 6266 filename.") @@ -53,7 +59,8 @@ def download_file(url, path, overwrite_existing=False, chunk_size=1024, unzip=Tr return with open(path, "wb") as f: - with tqdm.wrapattr(f, "write", total=int(r.headers["Content-Length"])) as fout: + content_length = r.headers.get("content-length") + with tqdm.wrapattr(f, "write", total=int(content_length)) if content_length else f as fout: for chunk in r.iter_content(chunk_size=chunk_size): fout.write(chunk) diff --git a/nb-tests/test_notebooks.py b/nb-tests/test_notebooks.py index df47e535..a26af66d 100644 --- a/nb-tests/test_notebooks.py +++ b/nb-tests/test_notebooks.py @@ -16,18 +16,7 @@ NB_DIR = pathlib.Path(__file__).parent.parent / "doc" / "source" / "notebooks" -KNOWN_ERRORS_REGEXES = { # This is basically a GitHub ticket queue - "EarthRanger_IO.ipynb": "Series found", - "Relocations_and_Trajectories.ipynb": "No module named 'branca'", - "EcoGraph.ipynb": "not a zip file", - "EcoPlot.ipynb": "not a zip file", - "Landscape Grid.ipynb": "No module named 'branca'", - "Seasonal Calculation.ipynb": "No module named 'branca'", - "Tracking Data Gantt Chart.ipynb": "Bad CRC-32 for file 'er_relocs.csv.zip'", - "Remote Sensing Time Series Anomaly.ipynb": "No module named 'branca'", - "Reduce Regions.ipynb": "No module named 'branca'", - "Landscape Dynamics Data.ipynb": "No module named 'branca'", -} +KNOWN_ERRORS_REGEXES = {} # This is basically a GitHub ticket queue @dataclass diff --git a/requirements-notebooks-test.txt b/requirements-notebooks-test.txt index eb03be4e..b3f8c43e 100644 --- a/requirements-notebooks-test.txt +++ b/requirements-notebooks-test.txt @@ -3,4 +3,5 @@ pytest papermill .[all] ipykernel -pytest-xdist \ No newline at end of file +pytest-xdist +folium \ No newline at end of file diff --git a/tests/test_asyncearthranger_io.py b/tests/test_asyncearthranger_io.py index 4384846f..8eafb57d 100644 --- a/tests/test_asyncearthranger_io.py +++ b/tests/test_asyncearthranger_io.py @@ -218,7 +218,10 @@ async def test_get_patrols(er_io_async, get_patrols_fields): @pytest.mark.asyncio async def test_get_patrol_observations(er_io_async, get_patrol_observations_fields): - observations = await er_io_async.get_patrol_observations_with_patrol_filter() + observations = await er_io_async.get_patrol_observations_with_patrol_filter( + since=pd.Timestamp("2017-01-01").isoformat(), + until=pd.Timestamp("2017-04-01").isoformat(), + ) assert not observations.empty assert set(observations.columns) == set(get_patrol_observations_fields) assert type(observations["fixtime"] == pd.Timestamp) @@ -228,7 +231,11 @@ async def test_get_patrol_observations(er_io_async, get_patrol_observations_fiel async def test_get_patrol_observations_with_patrol_details( er_io_async, get_patrol_observations_fields, get_patrol_details_fields ): - observations = await er_io_async.get_patrol_observations_with_patrol_filter(include_patrol_details=True) + observations = await er_io_async.get_patrol_observations_with_patrol_filter( + since=pd.Timestamp("2017-01-01").isoformat(), + until=pd.Timestamp("2017-04-01").isoformat(), + include_patrol_details=True, + ) assert not observations.empty assert set(observations.columns) == set(get_patrol_observations_fields).union(get_patrol_details_fields) assert type(observations["fixtime"] == pd.Timestamp) diff --git a/tests/test_earthranger_io.py b/tests/test_earthranger_io.py index 8336fd3b..5a374c78 100644 --- a/tests/test_earthranger_io.py +++ b/tests/test_earthranger_io.py @@ -87,7 +87,10 @@ def test_get_patrols(er_io): def test_get_patrol_events(er_io): - events = er_io.get_patrol_events() + events = er_io.get_patrol_events( + since=pd.Timestamp("2017-01-01").isoformat(), + until=pd.Timestamp("2017-04-01").isoformat(), + ) assert "id" in events assert "event_type" in events assert "geometry" in events @@ -196,7 +199,11 @@ def test_patch_event(er_io): def test_get_patrol_observations(er_io): - patrols = er_io.get_patrols() + patrols = er_io.get_patrols( + since=pd.Timestamp("2017-01-01").isoformat(), + until=pd.Timestamp("2017-04-01").isoformat(), + ) + observations = er_io.get_patrol_observations( patrols, include_source_details=False, diff --git a/tests/test_io_utils.py b/tests/test_io_utils.py index 2ca0123b..8cd739b8 100644 --- a/tests/test_io_utils.py +++ b/tests/test_io_utils.py @@ -1,8 +1,12 @@ import json import os +import pytest import fsspec import pandas as pd +from unittest.mock import Mock, patch +from http.client import HTTPMessage +from requests.exceptions import RetryError import ecoscope @@ -80,3 +84,24 @@ def test_download_file_dropbox_share_link(): data = pd.read_csv(os.path.join(output_dir, "download_data.csv")) assert len(data) > 0 + + +@patch("urllib3.connectionpool.HTTPConnectionPool._get_conn") +def test_download_file_retry_on_error(mock): + mock.return_value.getresponse.side_effect = [ + Mock(status=500, msg=HTTPMessage(), headers={}), + Mock(status=504, msg=HTTPMessage(), headers={}), + Mock(status=503, msg=HTTPMessage(), headers={}), + ] + + url = "https://totallyreal.com" + output_dir = "tests/test_output" + + with pytest.raises(RetryError): + ecoscope.io.download_file( + url, + output_dir, + overwrite_existing=True, + ) + + assert mock.call_count == 3