Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Campaign observations - Abstract Point and Image data metadata #156

Merged
merged 40 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
a7b8017
Tables - Extract SingleLocation class to dedicated file
jomey Oct 3, 2024
c9c3e45
Tables - Remove outdated header docstring
jomey Oct 3, 2024
f36f1ec
Tables - Add CampaignObservation class and tables
jomey Oct 5, 2024
3822128
Tables - Add CampaignObservation STI tables.
jomey Oct 5, 2024
f67a66b
Tables - Add HasObserver relationship class
jomey Oct 5, 2024
34fc38f
Tables - Use CampaingObservation for Point and Image data
jomey Oct 5, 2024
380524e
Tables - CampaignObservation; use STI subclasses
jomey Oct 5, 2024
1f011c7
DB connection - break up method to initiate
jomey Oct 7, 2024
137cc00
Tests - Introduce Factory-Boy to create test data.
jomey Oct 7, 2024
a930a56
Tests - Setup fixtures for db session
jomey Oct 7, 2024
edb881a
Tests - Tables - Start dedicated model/table tests
jomey Oct 7, 2024
9864609
Tables - Replace a few more instances of measurement_type
jomey Oct 9, 2024
02aae1c
Tests - Tables - Shorten test method names for point_observation
jomey Oct 9, 2024
cb4df4e
Tables - CampaignObservation - Improve polymorphic mapping
jomey Oct 10, 2024
6a90a42
Tables - Point|ImageObservation - indicate relationship return type
jomey Oct 10, 2024
98ff75f
Tables - CampaignObservation - Add date column
jomey Oct 10, 2024
9276ab6
Tables - ImageData - remove units column
jomey Oct 10, 2024
6ae9660
Factories - PointData - Add values for geom and elevation
jomey Oct 10, 2024
d39d655
Factories - Base - Use 'commit' strategy
jomey Oct 10, 2024
574f5e9
Tables - PointData - Use 'hybrid' attribute for dates
jomey Oct 10, 2024
437ed3f
Tests - Add ability to debug statement via print in a test run
jomey Oct 10, 2024
74c7cae
Tests - Add factory to 'patch' db sessions/connections
jomey Oct 10, 2024
aaed85d
API - Point Data - Update to new PointData table structure.
jomey Oct 10, 2024
dad11b2
Tests - Add deletion of Layer test data on class teardown.
jomey Oct 11, 2024
80819a1
Tables - HasObserver - Update definition to indicate return type.
jomey Oct 11, 2024
37cd9cb
Tables - Update table documentation.
jomey Oct 11, 2024
8509f5e
Tests - Configure the DB session once for the whole suite
jomey Oct 11, 2024
24134a6
API - Fix PointMeasurement all_instrument context block call
jomey Oct 11, 2024
4f149fc
Tests - DB - patch connection string for a few tests
jomey Oct 11, 2024
fe34a94
Tests - Set a connect timeout for test runs
jomey Oct 11, 2024
37deab2
Tests - DB - One more case where connection info needs to be patched
jomey Oct 11, 2024
b2d065e
Tests - DB - Add case for host and db name check in connection string
jomey Oct 11, 2024
c1e2cd3
Tests - DBSetup - Remove flush and rollback for session
jomey Oct 12, 2024
1954881
Tests - DB - Break up test case class
jomey Oct 12, 2024
d23454e
Tests - Tables - Add site table tests
jomey Oct 13, 2024
9ce40a7
Tests - Tables - Add layer data tests
jomey Oct 13, 2024
8563134
Tests - Site table - Add case for many observers
jomey Oct 13, 2024
467e575
Tests - DB - Remove hardcoded strings for user and password
jomey Oct 18, 2024
5d211b1
Tests - DB connection - Ensure connection is in UTC time
jomey Oct 18, 2024
e87c564
SingleLocation tables - Use a datetime throughout
jomey Oct 18, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ dependencies = [

[project.optional-dependencies]
dev = [
"factory_boy<4",
"pytest-factoryboy<3",
"pytest",
"pytest-cov",
"sphinx-autobuild<=2024.5",
Expand Down
293 changes: 193 additions & 100 deletions snowexsql/api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import os
from contextlib import contextmanager

import geoalchemy2.functions as gfunc
Expand All @@ -10,9 +11,8 @@

from snowexsql.conversions import query_to_geopandas, raster_to_rasterio
from snowexsql.db import get_db
from snowexsql.tables import ImageData, LayerData, PointData, Instrument, \
Observer, Site, Campaign, MeasurementType, DOI

from snowexsql.tables import Campaign, DOI, ImageData, Instrument, LayerData, \
MeasurementType, Observer, PointData, PointObservation, Site

LOG = logging.getLogger(__name__)
DB_NAME = 'snow:[email protected]/snowex'
Expand Down Expand Up @@ -102,6 +102,22 @@ def _filter_observers(cls, qry, v):
).filter(Observer.name == v)
return qry

@classmethod
def _filter_instrument(cls, qry, value):
return qry.filter(
cls.MODEL.instrument.has(name=value)
)

@classmethod
def _filter_measurement_type(cls, qry, value):
return qry.join(
cls.MODEL.measurement_type
).filter(MeasurementType.name == value)

@classmethod
def _filter_doi(cls, qry, value):
return qry.join(cls.MODEL.doi).filter(DOI.doi == value)

@classmethod
def extend_qry(cls, qry, check_size=True, **kwargs):
if cls.MODEL is None:
Expand All @@ -111,12 +127,15 @@ def extend_qry(cls, qry, check_size=True, **kwargs):
for k, v in kwargs.items():
# Handle special operations
if k in cls.ALLOWED_QRY_KWARGS:

qry_model = cls.MODEL
# Logic for filtering on date with LayerData
if "date" in k and cls.MODEL == LayerData:
qry = qry.join(LayerData.site)
qry_model = Site
else:
qry_model = cls.MODEL
elif cls.MODEL == PointData:
qry = qry.join(PointData.observation)

# standard filtering using qry.filter
if isinstance(v, list):
filter_col = getattr(qry_model, k)
Expand All @@ -137,17 +156,17 @@ def extend_qry(cls, qry, check_size=True, **kwargs):
# Filter boundary
if "_greater_equal" in k:
key = k.split("_greater_equal")[0]
filter_col = getattr(qry_model, key)
qry = qry.filter(filter_col >= v)
qry = qry.filter(
getattr(qry_model, key) >= v
)
elif "_less_equal" in k:
key = k.split("_less_equal")[0]
filter_col = getattr(qry_model, key)
qry = qry.filter(filter_col <= v)
# Filter linked columns
elif k == "instrument":
qry = qry.filter(
qry_model.instrument.has(name=v)
getattr(qry_model, key) <= v
)
# Filter linked columns
elif k == "instrument":
qry = cls._filter_instrument(qry, v)
elif k == "campaign":
qry = cls._filter_campaign(qry, v)
elif k == "site_id":
Expand All @@ -157,17 +176,14 @@ def extend_qry(cls, qry, check_size=True, **kwargs):
elif k == "observer":
qry = cls._filter_observers(qry, v)
elif k == "doi":
qry = qry.join(
qry_model.doi
).filter(DOI.doi == v)
qry = cls._filter_doi(qry, v)
elif k == "type":
qry = qry.join(
qry_model.measurement
).filter(MeasurementType.name == v)
qry = cls._filter_measurement_type(qry, v)
# Filter to exact value
else:
filter_col = getattr(qry_model, k)
qry = qry.filter(filter_col == v)
qry = qry.filter(
getattr(qry_model, k) == v
)
LOG.debug(
f"Filtering {k} to list {v}"
)
Expand Down Expand Up @@ -207,6 +223,103 @@ def from_unique_entries(cls, columns_to_search, **kwargs):

return results

@classmethod
def from_filter(cls, **kwargs):
jomey marked this conversation as resolved.
Show resolved Hide resolved
"""
Get data for the class by filtering by allowed arguments. The allowed
filters are cls.ALLOWED_QRY_KWARGS.
"""
with db_session(cls.DB_NAME) as (session, engine):
try:
qry = session.query(cls.MODEL)
qry = cls.extend_qry(qry, **kwargs)

# For debugging in the test suite and not recommended
# in production
# https://docs.sqlalchemy.org/en/20/faq/sqlexpressions.html#rendering-postcompile-parameters-as-bound-parameters ## noqa
if 'DEBUG_QUERY' in os.environ:
full_sql_query = qry.statement.compile(
compile_kwargs={"literal_binds": True}
)
print("\n ** SQL query **")
print(full_sql_query)
jomey marked this conversation as resolved.
Show resolved Hide resolved

df = query_to_geopandas(qry, engine)
except Exception as e:
session.close()
LOG.error("Failed query for PointData")
raise e

return df

@classmethod
def from_area(cls, shp=None, pt=None, buffer=None, crs=26912, **kwargs):
"""
Get data for the class within a specific shapefile or
within a point and a known buffer
Args:
shp: shapely geometry in which to filter
pt: shapely point that will have a buffer applied in order
to find search area
buffer: in same units as point
crs: integer crs to use
kwargs: for more filtering or limiting (cls.ALLOWED_QRY_KWARGS)
Returns: Geopandas dataframe of results

"""
if shp is None and pt is None:
raise ValueError(
"Inputs must be a shape description or a point and buffer"
)
if (pt is not None and buffer is None) or \
(buffer is not None and pt is None):
raise ValueError("pt and buffer must be given together")
with db_session(cls.DB_NAME) as (session, engine):
try:
if shp is not None:
qry = session.query(cls.MODEL)
# Filter geometry based on Site for LayerData
if cls.MODEL == LayerData:
qry = qry.join(cls.MODEL.site).filter(
func.ST_Within(
Site.geom, from_shape(shp, srid=crs)
)
)
else:
qry = qry.filter(
func.ST_Within(
cls.MODEL.geom, from_shape(shp, srid=crs)
)
)
qry = cls.extend_qry(qry, check_size=True, **kwargs)
df = query_to_geopandas(qry, engine)
else:
qry_pt = from_shape(pt)
qry = session.query(
gfunc.ST_SetSRID(
func.ST_Buffer(qry_pt, buffer), crs
)
)

buffered_pt = qry.all()[0][0]
qry = session.query(cls.MODEL)
# Filter geometry based on Site for LayerData
if cls.MODEL == LayerData:
qry = qry.join(cls.MODEL.site).filter(
func.ST_Within(Site.geom, buffered_pt)
)
else:
qry = qry.filter(
func.ST_Within(cls.MODEL.geom, buffered_pt)
)
qry = cls.extend_qry(qry, check_size=True, **kwargs)
df = query_to_geopandas(qry, engine)
except Exception as e:
session.close()
raise e

return df

@property
def all_site_names(self):
"""
Expand Down Expand Up @@ -287,98 +400,78 @@ class PointMeasurements(BaseDataset):
MODEL = PointData

@classmethod
def from_filter(cls, **kwargs):
"""
Get data for the class by filtering by allowed arguments. The allowed
filters are cls.ALLOWED_QRY_KWARGS.
"""
with db_session(cls.DB_NAME) as (session, engine):
try:
qry = session.query(cls.MODEL)
qry = cls.extend_qry(qry, **kwargs)
df = query_to_geopandas(qry, engine)
except Exception as e:
session.close()
LOG.error("Failed query for PointData")
raise e
def _filter_campaign(cls, qry, value):
jomey marked this conversation as resolved.
Show resolved Hide resolved
return qry.join(
cls.MODEL.observation
).join(
PointObservation.campaign
).filter(
Campaign.name == value
)

return df
@classmethod
def _filter_measurement_type(cls, qry, value):
return qry.join(
cls.MODEL.observation
).join(
PointObservation.measurement_type
).filter(
MeasurementType.name == value
)

@classmethod
def from_area(cls, shp=None, pt=None, buffer=None, crs=26912, **kwargs):
"""
Get data for the class within a specific shapefile or
within a point and a known buffer
Args:
shp: shapely geometry in which to filter
pt: shapely point that will have a buffer applied in order
to find search area
buffer: in same units as point
crs: integer crs to use
kwargs: for more filtering or limiting (cls.ALLOWED_QRY_KWARGS)
Returns: Geopandas dataframe of results
def _filter_instrument(cls, qry, value):
return qry.join(
cls.MODEL.observation
).join(
PointObservation.instrument
).filter(
Instrument.name == value
)

"""
if shp is None and pt is None:
raise ValueError(
"Inputs must be a shape description or a point and buffer"
)
if (pt is not None and buffer is None) or \
(buffer is not None and pt is None):
raise ValueError("pt and buffer must be given together")
with db_session(cls.DB_NAME) as (session, engine):
try:
if shp is not None:
qry = session.query(cls.MODEL)
# Filter geometry based on Site for LayerData
if cls.MODEL == LayerData:
qry = qry.join(cls.MODEL.site).filter(
func.ST_Within(
Site.geom, from_shape(shp, srid=crs)
)
)
else:
qry = qry.filter(
func.ST_Within(
cls.MODEL.geom, from_shape(shp, srid=crs)
)
)
qry = cls.extend_qry(qry, check_size=True, **kwargs)
df = query_to_geopandas(qry, engine)
else:
qry_pt = from_shape(pt)
qry = session.query(
gfunc.ST_SetSRID(
func.ST_Buffer(qry_pt, buffer), crs
)
)
@classmethod
def _filter_doi(cls, qry, value):
return qry.join(
cls.MODEL.observation
).join(
PointObservation.doi
).filter(
DOI.doi == value
)

buffered_pt = qry.all()[0][0]
qry = session.query(cls.MODEL)
# Filter geometry based on Site for LayerData
if cls.MODEL == LayerData:
qry = qry.join(cls.MODEL.site).filter(
func.ST_Within(Site.geom, buffered_pt)
)
else:
qry = qry.filter(
func.ST_Within(cls.MODEL.geom, buffered_pt)
)
qry = cls.extend_qry(qry, check_size=True, **kwargs)
df = query_to_geopandas(qry, engine)
except Exception as e:
session.close()
raise e
@classmethod
def _filter_observers(cls, qry, value):
return qry.join(
cls.MODEL.observation
).join(
PointObservation.observer
).filter(
Observer.name == value
)

return df
@property
def all_instruments(self):
"""
Return all distinct instruments in the data
"""
with db_session(self.DB_NAME) as (session, engine):
result = session.query(Instrument.name).filter(
Instrument.id.in_(
session.query(PointObservation.instrument_id).distinct()
)
).all()
return self.retrieve_single_value_result(result)


class TooManyRastersException(Exception):
""" Exceptiont to report to users that their query will produce too many rasters"""
"""
Exception to report to users that their query will produce too many
rasters
"""
pass


class LayerMeasurements(PointMeasurements):
class LayerMeasurements(BaseDataset):
"""
API class for access to LayerData
"""
Expand Down
7 changes: 4 additions & 3 deletions snowexsql/conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
filetypes, datatypes, etc. Many tools here will be useful for most end users
of the database.
"""

import geopandas as gpd
import pandas as pd
from geoalchemy2.shape import to_shape
Expand Down Expand Up @@ -54,8 +53,10 @@ def query_to_geopandas(query, engine, **kwargs):
# Fill out the variables in the query
sql = query.statement.compile(dialect=postgresql.dialect())

# Get dataframe from geopandas using the query and engine
df = gpd.GeoDataFrame.from_postgis(sql, engine, **kwargs)
# Get dataframe from geopandas using the query and the DB connection.
# By passing in the actual connection, we maintain ownership of it and
# keep it alive until we close it.
df = gpd.read_postgis(sql, engine.connect(), **kwargs)
jomey marked this conversation as resolved.
Show resolved Hide resolved

return df

Expand Down
Loading