Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert ecoscope.base classes to dataframe accessors #258

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
64018d9
add maximize option to to_html()
atmorling Aug 20, 2024
3b51d2f
bump lonboard version
atmorling Aug 20, 2024
68a31e6
pin numpy version
atmorling Aug 20, 2024
74e544a
update env.yaml
atmorling Aug 26, 2024
8c28315
initial relocations replacement
atmorling Aug 23, 2024
724dffe
add straighttrack props as top level props
atmorling Aug 23, 2024
332f9e1
use built-in cached property
atmorling Aug 23, 2024
fef0e50
adding trajectories, settling on structure, feeling good about this
atmorling Aug 23, 2024
8997f0e
well here goes nothing
atmorling Aug 23, 2024
7629156
remove speed for now
atmorling Aug 23, 2024
c2ba588
remove EDF tests
atmorling Aug 23, 2024
3a1de12
fixing some tests
atmorling Aug 23, 2024
46a8513
fix some base returns
atmorling Aug 26, 2024
349a19a
update map/graph test fixtures
atmorling Aug 26, 2024
3d56885
update earthranger
atmorling Aug 26, 2024
a9049a3
all tests pass, time for panik
atmorling Aug 26, 2024
183ebe5
absorb proximity into trajectories
atmorling Aug 26, 2024
467c7dc
yay no upper bounds
atmorling Aug 26, 2024
88a52d6
ee.Geometry.toGeoJSON doesn't produce valid geojson
atmorling Aug 26, 2024
92457fa
regenerate feather
atmorling Aug 26, 2024
f564e37
update environment.yml
atmorling Aug 26, 2024
4628f09
remove ecoscope from environment.yml
atmorling Aug 27, 2024
30a25d3
add async_earthranger to [all]
atmorling Aug 27, 2024
7b2ee03
an attempt at trimming environment.yml down
atmorling Aug 27, 2024
11cbd33
update unit test ci
atmorling Aug 27, 2024
d6b17b9
fix typo
atmorling Aug 27, 2024
616eeec
and again
atmorling Aug 27, 2024
9719b0b
first pass notebooks
atmorling Aug 27, 2024
b87fddf
another pass
atmorling Aug 27, 2024
73d60fe
update docstrings
atmorling Aug 27, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ jobs:
uses: mamba-org/setup-micromamba@v1
with:
environment-file: ${{ matrix.env }}
cache-environment: true
cache-downloads: true
init-shell: bash

- name: Install pip dependencies and our package
shell: bash -leo pipefail {0}
run: |
python -m pip install ".[all]"
Comment on lines +29 to +36
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice that's a much more intelligible separation between base environment and package under test.


- name: Test
env:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,7 @@
"metadata": {},
"outputs": [],
"source": [
"relocs = ecoscope.base.Relocations.from_gdf(\n",
" gdf,\n",
"relocs = gdf.relocations.from_gdf(\n",
" groupby_col=\"individual-local-identifier\",\n",
" time_col=\"timestamp\",\n",
" uuid_col=\"event-id\",\n",
Expand Down Expand Up @@ -275,7 +274,7 @@
" max_y=18,\n",
" filter_point_coords=[[180, 90], [0, 0]],\n",
")\n",
"relocs.apply_reloc_filter(coord_filter, inplace=True)"
"relocs = relocs.relocations.apply_reloc_filter(coord_filter)"
]
},
{
Expand Down Expand Up @@ -308,7 +307,7 @@
"outputs": [],
"source": [
"speed_filter = ecoscope.base.RelocsSpeedFilter(max_speed_kmhr=4.0)\n",
"relocs.apply_reloc_filter(speed_filter, inplace=True)"
"relocs = relocs.relocations.apply_reloc_filter(speed_filter)"
]
},
{
Expand All @@ -333,7 +332,7 @@
"metadata": {},
"outputs": [],
"source": [
"relocs.remove_filtered(inplace=True)"
"relocs = relocs.relocations.remove_filtered()"
]
},
{
Expand Down Expand Up @@ -500,7 +499,7 @@
"metadata": {},
"outputs": [],
"source": [
"relocs = ecoscope.base.Relocations.from_gdf(gdf)"
"relocs = gdf.relocations.from_gdf()"
]
},
{
Expand All @@ -516,7 +515,7 @@
"metadata": {},
"outputs": [],
"source": [
"traj = ecoscope.base.Trajectory.from_relocations(relocs)"
"traj = relocs.trajectories.from_relocations()"
]
},
{
Expand Down Expand Up @@ -576,7 +575,7 @@
" min_speed_kmhr=0.0,\n",
" max_speed_kmhr=8.0,\n",
")\n",
"traj.apply_traj_filter(traj_seg_filter, inplace=True)"
"traj = traj.trajectories.apply_traj_filter(traj_seg_filter)"
]
},
{
Expand All @@ -594,7 +593,7 @@
"metadata": {},
"outputs": [],
"source": [
"traj.remove_filtered(inplace=True)"
"traj = traj.trajectories.remove_filtered()"
]
},
{
Expand Down Expand Up @@ -685,7 +684,7 @@
"metadata": {},
"outputs": [],
"source": [
"new_relocs = traj.to_relocations()"
"new_relocs = traj.trajectories.to_relocations()"
]
},
{
Expand All @@ -701,7 +700,7 @@
"metadata": {},
"outputs": [],
"source": [
"upsampled_relocs = traj.upsample(\"180S\")"
"upsampled_relocs = traj.trajectories.upsample(\"180S\")"
]
},
{
Expand All @@ -717,7 +716,7 @@
"metadata": {},
"outputs": [],
"source": [
"downsampled_relocs_int = traj.downsample(\"10800S\", interpolation=True)"
"downsampled_relocs_int = traj.trajectories.downsample(\"10800S\", interpolation=True)"
]
},
{
Expand All @@ -733,7 +732,7 @@
"metadata": {},
"outputs": [],
"source": [
"downsampled_relocs_noint = traj.downsample(\"10800S\", tolerance=\"900S\")"
"downsampled_relocs_noint = traj.trajectories.downsample(\"10800S\", tolerance=\"900S\")"
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
"outputs": [],
"source": [
"gdf = gpd.GeoDataFrame(data, geometry=data[\"geometry\"].apply(lambda x: shapely.wkt.loads(x)), crs=4326)\n",
"relocs = ecoscope.base.Relocations.from_gdf(gdf)"
"relocs = gdf.relocations.from_gdf()"
]
},
{
Expand All @@ -119,7 +119,7 @@
"metadata": {},
"outputs": [],
"source": [
"traj = ecoscope.base.Trajectory.from_relocations(relocs)\n",
"traj = relocs.trajectories.from_relocations()\n",
"traj"
]
},
Expand Down Expand Up @@ -173,8 +173,8 @@
" max_length_meters=50000,\n",
")\n",
"\n",
"traj.apply_traj_filter(traj_seg_filter, inplace=True)\n",
"traj.remove_filtered(inplace=True)\n",
"traj = traj.trajectories.apply_traj_filter(traj_seg_filter)\n",
"traj = traj.trajectories.remove_filtered()\n",
"traj[\"geometry\"].explore()"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
"df = pd.read_csv(os.path.join(output_dir, \"movebank_data.csv\"), index_col=0)\n",
"gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[\"location-long\"], df[\"location-lat\"]), crs=4326)\n",
"\n",
"relocs = ecoscope.base.Relocations.from_gdf(gdf, groupby_col=\"individual-local-identifier\", time_col=\"timestamp\")\n",
"relocs = gdf.relocations.from_gdf(groupby_col=\"individual-local-identifier\", time_col=\"timestamp\")\n",
"relocs"
]
},
Expand All @@ -114,7 +114,7 @@
"metadata": {},
"outputs": [],
"source": [
"traj = ecoscope.base.Trajectory.from_relocations(relocs)\n",
"traj = relocs.trajectories.from_relocations()\n",
"traj"
]
},
Expand Down Expand Up @@ -155,7 +155,7 @@
" min_speed_kmhr=0.0,\n",
" max_speed_kmhr=8.0,\n",
")\n",
"traj.apply_traj_filter(traj_seg_filter, inplace=True)"
"traj = traj.trajectories.apply_traj_filter(traj_seg_filter)"
]
},
{
Expand All @@ -164,7 +164,7 @@
"metadata": {},
"outputs": [],
"source": [
"traj.remove_filtered(inplace=True)"
"traj = traj.relocations.remove_filtered()"
]
},
{
Expand Down
17 changes: 8 additions & 9 deletions doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,9 @@
"\n",
"vehicle_gdf = pd.read_csv(os.path.join(output_dir, \"KDB025Z.csv\"), index_col=\"id\")\n",
"vehicle_gdf[\"geometry\"] = vehicle_gdf[\"geometry\"].apply(lambda x: shapely.wkt.loads(x))\n",
"vehicle_gdf = ecoscope.base.Relocations.from_gdf(gpd.GeoDataFrame(vehicle_gdf, crs=4326))\n",
"vehicle_gdf = ecoscope.base.Trajectory.from_relocations(vehicle_gdf)"
"vehicle_gdf = gpd.GeoDataFrame(vehicle_gdf, crs=4326)\n",
"vehicle_gdf = vehicle_gdf.relocations.from_gdf()\n",
"vehicle_gdf = vehicle_gdf.trajectories.from_relocations()"
]
},
{
Expand Down Expand Up @@ -212,9 +213,7 @@
"\n",
"df = pd.read_csv(os.path.join(output_dir, \"movebank_data.csv\"), index_col=0)\n",
"gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(x=df[\"location-long\"], y=df[\"location-lat\"]), crs=4326)\n",
"movebank_relocations_gdf = ecoscope.base.Relocations.from_gdf(\n",
" gdf, groupby_col=\"individual-local-identifier\", time_col=\"timestamp\"\n",
")\n",
"movebank_relocations_gdf = gdf.relocations.from_gdf(groupby_col=\"individual-local-identifier\", time_col=\"timestamp\")\n",
"\n",
"pnts_filter = ecoscope.base.RelocsCoordinateFilter(\n",
" min_x=-5,\n",
Expand All @@ -223,8 +222,8 @@
" max_y=18,\n",
" filter_point_coords=[[180, 90], [0, 0]],\n",
")\n",
"movebank_relocations_gdf.apply_reloc_filter(pnts_filter, inplace=True)\n",
"movebank_relocations_gdf.remove_filtered(inplace=True)"
"movebank_relocations_gdf = movebank_relocations_gdf.relocations.apply_reloc_filter(pnts_filter)\n",
"movebank_relocations_gdf = movebank_relocations_gdf.relocations.remove_filtered()"
]
},
{
Expand Down Expand Up @@ -389,7 +388,7 @@
"metadata": {},
"outputs": [],
"source": [
"movebank_trajectory_gdf = ecoscope.base.Trajectory.from_relocations(movebank_relocations_gdf)\n",
"movebank_trajectory_gdf = movebank_relocations_gdf.trajectories.from_relocations()\n",
"movebank_traj_seg_filter = ecoscope.base.TrajSegFilter(\n",
" min_length_meters=0.0,\n",
" max_length_meters=float(\"inf\"),\n",
Expand All @@ -398,7 +397,7 @@
" min_speed_kmhr=0.0,\n",
" max_speed_kmhr=10.0,\n",
")\n",
"movebank_trajectory_gdf.apply_traj_filter(movebank_traj_seg_filter, inplace=True)"
"movebank_trajectory_gdf = movebank_trajectory_gdf.trajectories.apply_traj_filter(movebank_traj_seg_filter)"
]
},
{
Expand Down
6 changes: 2 additions & 4 deletions doc/source/notebooks/04. EcoMap & EcoPlot/EcoPlot.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,10 @@
"gdf = gpd.GeoDataFrame(data, geometry=gpd.GeoSeries.from_wkt(data[\"geometry\"], crs=4326))\n",
"\n",
"# Create Relocations from the GeoPandas DataFrame\n",
"relocs = ecoscope.base.Relocations.from_gdf(\n",
" gdf,\n",
")\n",
"relocs = gdf.relocations.from_gdf()\n",
"\n",
"# Create a Trajectory from the Relocations\n",
"traj = ecoscope.base.Trajectory.from_relocations(relocs)"
"traj = relocs.trajectories.from_relocations()"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@
"\n",
"\n",
"# Create Relocations from the GeoPandas DataFrame\n",
"relocs = ecoscope.base.Relocations.from_gdf(gdf)\n",
"relocs = gdf.relocations.from_gdf()\n",
"\n",
"# Create Trajectory from Relocations\n",
"traj = ecoscope.base.Trajectory.from_relocations(relocs)"
"traj = relocs.trajectories.from_relocations()"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions ecoscope/analysis/UD/etd_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dataclasses import dataclass

import numpy as np
from ecoscope.base import Trajectory
import geopandas as gpd
from ecoscope.io import raster

try:
Expand Down Expand Up @@ -83,7 +83,7 @@ class Weibull3Parameter(WeibullPDF):


def calculate_etd_range(
trajectory_gdf: Trajectory,
trajectory_gdf: gpd.GeoDataFrame,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Love that this is now possible.

Do we want to be more opinionated about typing here, perhaps via pandera schemas?

Certainly the majority of gpd.GeoDataFrames will not work if passed here.

Not necessarily a question to be solved by this PR, but something to consider as a follow-on perhaps.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another way of putting this, if this can only operate on Trajectories, than it should be a method (i.e. accessor) on a Trajectory. (Which is the accessors paradigm, is a GeoDataFrame that adheres to a validated schema.)

Copy link
Contributor

@cisaacstern cisaacstern Aug 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

traj = ...  # but, how do we get this? 🤷 

traj.ecoscope.calculate_etd()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plain_gdf = ...

traj = plain_gdf.ecoscope.EcoTrajectory()
turn_angle = traj.ecotrajectory.get_turn_angle()
etd = traj.ecotrajectory.calculate_etd()

🤔

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Typing for alex...)

plain_gdf.EcoscopeBase.init()
inited_gdf.relocations.thing

Copy link
Contributor

@cisaacstern cisaacstern Aug 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from typing import TypeVar

import pandera as pa
import pandas as pd

S = TypeVar("S")


class RelocationsSchema(pa.Schema): ...

class TrajectorySchema(pa.Schema): ...

class GDFWithSchema(Generic[S]):  ...


@pd.api.extensions.register_dataframe_accessor("ecoscope")
class ecoscope():

    def __init__(self, pandas_obj):
        assert isinstance(obj, gpd.GeoDataFrame)
        self._gdf = pandas_obj

    def Relocations(..., parser=...) -> GDFWithSchema[RelocationsSchema]: 
        # possibly coerce to schema here

        assert pa.validate(self._gdf, RelocationsSchema)
        return self._gdf

    def is_trajectory():
        return pa.validate(self._gdf, TrajectorySchema)

    def Trajectory(..., parser=...) -> GDFWithSchema[TrajectorySchema]:
        relocs = self.Relocations(parser=parser)
        ...
        
@pd.api.extensions.register_dataframe_accessor("trajectory")
class trajectory():

    def __init__(self, pandas_obj):
        pa.validate(self._gdf, TrajectorySchema)
        self._gdf = pandas_obj

    def get_turn_angle(...) ...:

    def calculate_etd(...) ...:

@pd.api.extensions.register_dataframe_accessor("is_trajectory")
class is_trajectory():
    def __call__(self) -> bool:
        return pa.validate(self._gdf, TrajectorySchema)

Copy link
Contributor

@cisaacstern cisaacstern Aug 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plain_gdf = ...

reloc = plain_gdf.ecoscope.Relocations()
traj = reloc.ecoscope.Trajectory()

# ~ OR ~

traj = plain_gdf.ecoscope.Trajectory(parser=...)


traj.trajectory.get_turn_angle()
traj.trajectory.calculate_etd()

...

if not plain_gdf.is_trajectory(): ... # can accessors implement __call__()?

# ~ OR ~

if not plain_gdf.ecoscope.is_trajectory(): ...

plain_gdf.trajectory.get_turn_angle()  # -> validation error

output_path: typing.Union[str, bytes, os.PathLike],
max_speed_kmhr: float = 0.0,
max_speed_percentage: float = 0.9999,
Expand Down
2 changes: 0 additions & 2 deletions ecoscope/analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
__all__ = [
"ecograph",
"speed",
"UD",
"astronomy",
"classifier",
"geofence",
"geospatial",
"immobility",
"percentile",
"proximity",
"seasons",
"apply_classification",
"calculate_feature_density",
Expand Down
2 changes: 1 addition & 1 deletion ecoscope/analysis/ecograph.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Ecograph:

Parameters
----------
trajectory : ecoscope.base.Trajectory
trajectory : gpd.GeoDataFrame
Trajectory dataframe
resolution : float
Pixel size, in meters
Expand Down
10 changes: 4 additions & 6 deletions ecoscope/analysis/geofence.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
import pandas as pd
import shapely

import ecoscope


class Region(collections.UserDict):
def __init__(self, geometry: typing.Any, unique_id: str = None, region_name: str = None):
Expand Down Expand Up @@ -55,7 +53,7 @@ class GeoFenceCrossing:
def analyse(
cls,
geocrossing_profile: GeoCrossingProfile,
trajectory: ecoscope.base.Trajectory,
trajectory: gpd.GeoDataFrame,
):
"""
Analyze the trajectory of each subject in relation to set of virtual fences and regions to determine where/when
Expand All @@ -66,12 +64,12 @@ def analyse(
----------
geocrossing_profile: GeoCrossingProfile
Object that contains the geonfences and regions
trajectory: ecoscope.base.Trajectory
trajectory: gpd.GeoDataFrame
Geodataframe stores goemetry, speed_kmhr, heading etc. for each subject.

Returns
-------
ecoscope.base.EcoDataFrame
gpd.GeoDataFrame

"""
trajectory = trajectory.copy()
Expand Down Expand Up @@ -113,4 +111,4 @@ def apply_func(fence):
fences = geocrossing_profile.geofence_df
df = pd.concat([apply_func(fence) for _, fence in fences.iterrows()])
df.drop(["start_point", "end_point"], axis=1, inplace=True)
return ecoscope.base.EcoDataFrame(df, geometry="geometry")
return gpd.GeoDataFrame(df, geometry="geometry")
Loading
Loading