Skip to content

Commit

Permalink
io.utils.download_file update (#270)
Browse files Browse the repository at this point in the history
  • Loading branch information
atmorling authored Sep 27, 2024
1 parent cce62e3 commit 8517a1e
Show file tree
Hide file tree
Showing 14 changed files with 72 additions and 71 deletions.
16 changes: 8 additions & 8 deletions doc/source/notebooks/01. IO/EarthRanger_IO.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,11 @@
"metadata": {},
"outputs": [],
"source": [
"patrol_df = er_io.get_patrols()\n",
"patrol_df = er_io.get_patrols(\n",
" since=pd.Timestamp(\"2017-01-01\").isoformat(),\n",
" until=pd.Timestamp(\"2017-04-01\").isoformat(),\n",
")\n",
"\n",
"\n",
"relocs = er_io.get_patrol_observations(\n",
" patrol_df,\n",
Expand Down Expand Up @@ -709,8 +713,8 @@
")\n",
"\n",
"if not elephants.empty:\n",
" for i, value in elephants.iterrows():\n",
" er_io.delete_observation(observation_id=elephants.loc[i, \"extra__id\"])"
" for observation_id in elephants[\"extra__id\"].unique():\n",
" er_io.delete_observation(observation_id)"
]
},
{
Expand Down Expand Up @@ -863,11 +867,7 @@
"metadata": {},
"outputs": [],
"source": [
"relocs.drop(\n",
" columns=relocs.columns[relocs.applymap(lambda x: isinstance(x, list)).any()],\n",
" errors=\"ignore\",\n",
" inplace=True,\n",
")\n",
"relocs = relocs.select_dtypes(exclude=[list])\n",
"\n",
"relocs.to_file(os.path.join(output_dir, \"observations.gpkg\"), layer=\"observations\")"
]
Expand Down
20 changes: 1 addition & 19 deletions doc/source/notebooks/01. IO/GEE_IO.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
"source": [
"import os\n",
"import sys\n",
"import zipfile\n",
"\n",
"import shapely\n",
"\n",
Expand Down Expand Up @@ -208,26 +207,9 @@
"ecoscope.io.utils.download_file(\n",
" url=img.getDownloadUrl(download_config),\n",
" path=img_zip_file,\n",
" unzip=True,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Unzip"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with zipfile.ZipFile(img_zip_file) as z:\n",
" for name in z.namelist():\n",
" z.extract(name, output_dir)"
]
}
],
"metadata": {
Expand Down
18 changes: 1 addition & 17 deletions doc/source/notebooks/01. IO/Landscape Dynamics Data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
"source": [
"import os\n",
"import sys\n",
"import zipfile\n",
"\n",
"import geopandas as gpd\n",
"\n",
Expand Down Expand Up @@ -90,25 +89,10 @@
" url=\"https://maraelephant.maps.arcgis.com/sharing/rest/content/items/162e299f0c7d472b8e36211e946bb273/data\",\n",
" path=output_dir,\n",
" overwrite_existing=False,\n",
" unzip=True,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Extract ZIP"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"zipfile.ZipFile(os.path.join(output_dir, \"active_public_uncategorized_shpfiles.zip\")).extractall(path=output_dir)"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@
"metadata": {},
"outputs": [],
"source": [
"relocs[[\"groupby_col\", \"fixtime\", \"geometry\"]].explore()"
"relocs[[\"groupby_col\", \"geometry\"]].explore()"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@
"metadata": {},
"outputs": [],
"source": [
"traj.explore()"
"traj[\"geometry\"].explore()"
]
},
{
Expand Down Expand Up @@ -546,7 +546,7 @@
" individual=\"1d22ff96-44d4-45c4-adc3-db1513acbe7d\",\n",
" interpolation=\"dofjojfs\",\n",
" )\n",
"except NotImplemented as e:\n",
"except NotImplementedError as e:\n",
" print(e)"
]
},
Expand Down
1 change: 0 additions & 1 deletion doc/source/notebooks/04. EcoMap & EcoPlot/EcoPlot.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@
"ecoscope.io.download_file(\n",
" f\"{ECOSCOPE_RAW}/tests/sample_data/vector/er_relocs.csv.zip\",\n",
" os.path.join(output_dir, \"er_relocs.csv.zip\"),\n",
" unzip=False,\n",
")\n",
"\n",
"data = pd.read_csv(os.path.join(output_dir, \"er_relocs.csv.zip\"), header=0, index_col=0)\n",
Expand Down
2 changes: 1 addition & 1 deletion ecoscope/analysis/ecograph.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def compute(df):
G = self._get_ecograph(df, subject_name, radius, cutoff, tortuosity_length)
self.graphs[subject_name] = G

self.trajectory.groupby("groupby_col")[self.trajectory.columns].progress_apply(compute)
self.trajectory.groupby("groupby_col")[self.trajectory.columns].apply(compute)

def to_csv(self, output_path):
"""
Expand Down
2 changes: 1 addition & 1 deletion ecoscope/io/earthranger.py
Original file line number Diff line number Diff line change
Expand Up @@ -1088,7 +1088,7 @@ def upload(obs):
else:
return pd.DataFrame(results)

return observations.groupby(source_id_col, group_keys=False).progress_apply(upload)
return observations.groupby(source_id_col, group_keys=False).apply(upload)

def post_event(
self,
Expand Down
15 changes: 11 additions & 4 deletions ecoscope/io/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

import pandas as pd
import requests
from requests.adapters import HTTPAdapter
from tqdm.auto import tqdm
from urllib3.util import Retry


def to_hex(val, default="#ff0000"):
Expand All @@ -27,22 +29,26 @@ def pack_columns(dataframe: pd.DataFrame, columns: typing.List):
return dataframe


def download_file(url, path, overwrite_existing=False, chunk_size=1024, unzip=True, **request_kwargs):
def download_file(url, path, retries=2, overwrite_existing=False, chunk_size=1024, unzip=False, **request_kwargs):
"""
Download a file from a URL to a local path. If the path is a directory, the filename will be inferred from
the response header
"""

s = requests.Session()
retries = Retry(total=retries, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504])
s.mount("https://", HTTPAdapter(max_retries=retries))

if __is_gdrive_url(url):
url = __transform_gdrive_url(url)
elif __is_dropbox_url(url):
url = __transform_dropbox_url(url)

r = requests.get(url, stream=True, **request_kwargs)
r = s.get(url, stream=True, **request_kwargs)

if os.path.isdir(path):
m = email.message.Message()
m["content-type"] = r.headers["content-disposition"]
m["content-type"] = r.headers.get("content-disposition")
filename = m.get_param("filename")
if filename is None:
raise ValueError("URL has no RFC 6266 filename.")
Expand All @@ -53,7 +59,8 @@ def download_file(url, path, overwrite_existing=False, chunk_size=1024, unzip=Tr
return

with open(path, "wb") as f:
with tqdm.wrapattr(f, "write", total=int(r.headers["Content-Length"])) as fout:
content_length = r.headers.get("content-length")
with tqdm.wrapattr(f, "write", total=int(content_length)) if content_length else f as fout:
for chunk in r.iter_content(chunk_size=chunk_size):
fout.write(chunk)

Expand Down
13 changes: 1 addition & 12 deletions nb-tests/test_notebooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,7 @@

NB_DIR = pathlib.Path(__file__).parent.parent / "doc" / "source" / "notebooks"

KNOWN_ERRORS_REGEXES = { # This is basically a GitHub ticket queue
"EarthRanger_IO.ipynb": "Series found",
"Relocations_and_Trajectories.ipynb": "No module named 'branca'",
"EcoGraph.ipynb": "not a zip file",
"EcoPlot.ipynb": "not a zip file",
"Landscape Grid.ipynb": "No module named 'branca'",
"Seasonal Calculation.ipynb": "No module named 'branca'",
"Tracking Data Gantt Chart.ipynb": "Bad CRC-32 for file 'er_relocs.csv.zip'",
"Remote Sensing Time Series Anomaly.ipynb": "No module named 'branca'",
"Reduce Regions.ipynb": "No module named 'branca'",
"Landscape Dynamics Data.ipynb": "No module named 'branca'",
}
KNOWN_ERRORS_REGEXES = {} # This is basically a GitHub ticket queue


@dataclass
Expand Down
3 changes: 2 additions & 1 deletion requirements-notebooks-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ pytest
papermill
.[all]
ipykernel
pytest-xdist
pytest-xdist
folium
11 changes: 9 additions & 2 deletions tests/test_asyncearthranger_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,10 @@ async def test_get_patrols(er_io_async, get_patrols_fields):

@pytest.mark.asyncio
async def test_get_patrol_observations(er_io_async, get_patrol_observations_fields):
observations = await er_io_async.get_patrol_observations_with_patrol_filter()
observations = await er_io_async.get_patrol_observations_with_patrol_filter(
since=pd.Timestamp("2017-01-01").isoformat(),
until=pd.Timestamp("2017-04-01").isoformat(),
)
assert not observations.empty
assert set(observations.columns) == set(get_patrol_observations_fields)
assert type(observations["fixtime"] == pd.Timestamp)
Expand All @@ -228,7 +231,11 @@ async def test_get_patrol_observations(er_io_async, get_patrol_observations_fiel
async def test_get_patrol_observations_with_patrol_details(
er_io_async, get_patrol_observations_fields, get_patrol_details_fields
):
observations = await er_io_async.get_patrol_observations_with_patrol_filter(include_patrol_details=True)
observations = await er_io_async.get_patrol_observations_with_patrol_filter(
since=pd.Timestamp("2017-01-01").isoformat(),
until=pd.Timestamp("2017-04-01").isoformat(),
include_patrol_details=True,
)
assert not observations.empty
assert set(observations.columns) == set(get_patrol_observations_fields).union(get_patrol_details_fields)
assert type(observations["fixtime"] == pd.Timestamp)
Expand Down
11 changes: 9 additions & 2 deletions tests/test_earthranger_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@ def test_get_patrols(er_io):


def test_get_patrol_events(er_io):
events = er_io.get_patrol_events()
events = er_io.get_patrol_events(
since=pd.Timestamp("2017-01-01").isoformat(),
until=pd.Timestamp("2017-04-01").isoformat(),
)
assert "id" in events
assert "event_type" in events
assert "geometry" in events
Expand Down Expand Up @@ -196,7 +199,11 @@ def test_patch_event(er_io):


def test_get_patrol_observations(er_io):
patrols = er_io.get_patrols()
patrols = er_io.get_patrols(
since=pd.Timestamp("2017-01-01").isoformat(),
until=pd.Timestamp("2017-04-01").isoformat(),
)

observations = er_io.get_patrol_observations(
patrols,
include_source_details=False,
Expand Down
25 changes: 25 additions & 0 deletions tests/test_io_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import json
import os
import pytest

import fsspec
import pandas as pd
from unittest.mock import Mock, patch
from http.client import HTTPMessage
from requests.exceptions import RetryError

import ecoscope

Expand Down Expand Up @@ -80,3 +84,24 @@ def test_download_file_dropbox_share_link():

data = pd.read_csv(os.path.join(output_dir, "download_data.csv"))
assert len(data) > 0


@patch("urllib3.connectionpool.HTTPConnectionPool._get_conn")
def test_download_file_retry_on_error(mock):
mock.return_value.getresponse.side_effect = [
Mock(status=500, msg=HTTPMessage(), headers={}),
Mock(status=504, msg=HTTPMessage(), headers={}),
Mock(status=503, msg=HTTPMessage(), headers={}),
]

url = "https://totallyreal.com"
output_dir = "tests/test_output"

with pytest.raises(RetryError):
ecoscope.io.download_file(
url,
output_dir,
overwrite_existing=True,
)

assert mock.call_count == 3

0 comments on commit 8517a1e

Please sign in to comment.