diff --git a/snowexsql/api.py b/snowexsql/api.py index 616e51a..acc2e6d 100644 --- a/snowexsql/api.py +++ b/snowexsql/api.py @@ -10,14 +10,15 @@ from snowexsql.conversions import query_to_geopandas, raster_to_rasterio from snowexsql.db import get_db -from snowexsql.tables import ImageData, LayerData, PointData +from snowexsql.tables import ImageData, LayerData, PointData, Instrument, \ + Observer, Site, Campaign + LOG = logging.getLogger(__name__) DB_NAME = 'snow:hackweek@db.snowexdata.org/snowex' # TODO: # * Possible enums -# * filtering based on dates # * implement 'like' or 'contains' method @@ -47,8 +48,10 @@ class BaseDataset: # Use this database name DB_NAME = DB_NAME - ALLOWED_QRY_KWARGS = ["site_name", "site_id", "date", "instrument", "observers", "type", - "utm_zone", "date_greater_equal", "date_less_equal", "value_greater_equal", 'value_less_equal', + ALLOWED_QRY_KWARGS = [ + "campaign", "site_id", "date", "instrument", "type", + "utm_zone", "date_greater_equal", "date_less_equal", + "value_greater_equal", 'value_less_equal', ] SPECIAL_KWARGS = ["limit"] # Default max record count @@ -119,6 +122,25 @@ def extend_qry(cls, qry, check_size=True, **kwargs): key = k.split("_less_equal")[0] filter_col = getattr(cls.MODEL, key) qry = qry.filter(filter_col <= v) + # Filter linked columns + elif k == "instrument": + qry = qry.filter( + cls.MODEL.instrument.has(name=v) + ) + elif k == "campaign": + qry = qry.join( + cls.MODEL.site + ).filter( + Site.campaign.has(Campaign.name == v) + ) + elif k == "site_id": + qry = qry.filter( + cls.MODEL.site.has(name=v) + ) + elif k == "observer": + qry = qry.join( + LayerData.observers + ).filter(Observer.name == v) # Filter to exact value else: filter_col = getattr(cls.MODEL, k) @@ -165,10 +187,20 @@ def from_unique_entries(cls, columns_to_search, **kwargs): @property def all_site_names(self): """ - Return all types of the data + Return all campaign names + """ + with db_session(self.DB_NAME) as (session, engine): + qry = session.query(Campaign.name).distinct() + result = qry.all() + return self.retrieve_single_value_result(result) + + @property + def all_site_ids(self): + """ + Return all specific site names """ with db_session(self.DB_NAME) as (session, engine): - qry = session.query(self.MODEL.site_name).distinct() + qry = session.query(Site.name).distinct() result = qry.all() return self.retrieve_single_value_result(result) @@ -198,7 +230,7 @@ def all_observers(self): Return all distinct observers in the data """ with db_session(self.DB_NAME) as (session, engine): - qry = session.query(self.MODEL.observers).distinct() + qry = session.query(Observer.name).distinct() result = qry.all() return self.retrieve_single_value_result(result) @@ -218,7 +250,9 @@ def all_instruments(self): Return all distinct instruments in the data """ with db_session(self.DB_NAME) as (session, engine): - qry = session.query(self.MODEL.instrument).distinct() + qry = session.query(Instrument.name).join( + self.MODEL, Instrument.id == self.MODEL.instrument_id + ).distinct() result = qry.all() return self.retrieve_single_value_result(result) @@ -299,30 +333,22 @@ def from_area(cls, shp=None, pt=None, buffer=None, crs=26912, **kwargs): return df + class TooManyRastersException(Exception): """ Exceptiont to report to users that their query will produce too many rasters""" pass + class LayerMeasurements(PointMeasurements): """ API class for access to LayerData """ MODEL = LayerData ALLOWED_QRY_KWARGS = [ - "site_name", "site_id", "date", "instrument", "observers", "type", + "campaign", "site_id", "date", "instrument", "observer", "type", "utm_zone", "pit_id", "date_greater_equal", "date_less_equal" ] - # TODO: layer analysis methods? - @property - def all_site_ids(self): - """ - Return all types of the data - """ - with db_session(self.DB_NAME) as (session, engine): - qry = session.query(self.MODEL.site_id).distinct() - result = qry.all() - return self.retrieve_single_value_result(result) class RasterMeasurements(BaseDataset): MODEL = ImageData diff --git a/snowexsql/tables/__init__.py b/snowexsql/tables/__init__.py index 3a43124..a6d3309 100644 --- a/snowexsql/tables/__init__.py +++ b/snowexsql/tables/__init__.py @@ -1,11 +1,18 @@ from .image_data import ImageData from .layer_data import LayerData from .point_data import PointData -from .site_data import SiteData +from .site_data import SiteCondition +from .observers import Observer +from .instrument import Instrument +from .campaign import Campaign +from .site import Site __all__ = [ - 'ImageData', - 'LayerData', - 'PointData', - 'SnowData', + "Campaign", + "ImageData", + "Instrument", + "LayerData", + "Observer", + "PointData", + "Site", ] diff --git a/snowexsql/tables/base.py b/snowexsql/tables/base.py index eb6a915..b946803 100644 --- a/snowexsql/tables/base.py +++ b/snowexsql/tables/base.py @@ -26,7 +26,6 @@ class Base(DeclarativeBase): time_updated = Column(DateTime(timezone=True), onupdate=func.now()) date_accessed = Column(Date) - site_name = Column(String(250)) date = Column(Date) doi = Column(String(50)) @@ -38,14 +37,11 @@ class SingleLocationData: elevation = Column(Float) geom = Column(Geometry("POINT")) time = Column(Time(timezone=True)) - site_id = Column(String(50)) class Measurement(object): """ Base Class providing attributes required for a measurement of any kind """ - instrument = Column(String(50)) type = Column(String(50)) units = Column(String(50)) - observers = Column(String(100)) diff --git a/snowexsql/tables/campaign.py b/snowexsql/tables/campaign.py new file mode 100644 index 0000000..fed8608 --- /dev/null +++ b/snowexsql/tables/campaign.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Aug 22 11:56:34 2024 + +@author: jtmaz +""" + +from sqlalchemy import Column, String, Integer + +from .base import Base + + +class Campaign(Base): + """ + Table stores Campaign data. Does not store data values, + it only stores the campaign metadata. + """ + __tablename__ = 'campaigns' + + # TODO: could we add a campaign shapefile? + + id = Column(Integer, primary_key=True) + name = Column(String()) + description = Column(String()) diff --git a/snowexsql/tables/instrument.py b/snowexsql/tables/instrument.py new file mode 100644 index 0000000..82a888c --- /dev/null +++ b/snowexsql/tables/instrument.py @@ -0,0 +1,12 @@ +from sqlalchemy import Column, Integer, String +from .base import Base + + +class Instrument(Base): + __tablename__ = 'instruments' + # auto created id + id = Column(Integer, primary_key=True) + # Name of the instrument + name = Column(String(), index=True) + model = Column(String()) + specifications = Column(String()) diff --git a/snowexsql/tables/layer_data.py b/snowexsql/tables/layer_data.py index 603da42..e7fb93f 100644 --- a/snowexsql/tables/layer_data.py +++ b/snowexsql/tables/layer_data.py @@ -1,6 +1,24 @@ -from sqlalchemy import Column, Float, String +from sqlalchemy import Column, Float, Integer, String, ForeignKey +from sqlalchemy.orm import Mapped +from sqlalchemy.orm import mapped_column +from typing import List +from sqlalchemy.orm import relationship from .base import Base, Measurement, SingleLocationData +from .observers import Observer +from .instrument import Instrument +from .site import Site + + +class LayerObservers(Base): + """ + Link table + """ + __tablename__ = 'layer_observers' + __table_args__ = {'schema': 'public'} + + layer_id = Column(Integer, ForeignKey('public.layers.id')) + observer_id = Column(Integer, ForeignKey("public.observers.id")) class LayerData(SingleLocationData, Measurement, Base): @@ -13,7 +31,6 @@ class LayerData(SingleLocationData, Measurement, Base): __tablename__ = 'layers' depth = Column(Float) - site_id = Column(String(50)) pit_id = Column(String(50)) bottom_depth = Column(Float) comments = Column(String(1000)) @@ -22,3 +39,23 @@ class LayerData(SingleLocationData, Measurement, Base): sample_c = Column(String(20)) value = Column(String(50)) flags = Column(String(20)) + + # Link the instrument id with a foreign key + instrument_id = Column( + Integer, ForeignKey('public.instruments.id'), index=True + ) + # Link the Instrument class + instrument = relationship('Instrument') + + # Link the site id with a foreign key + site_id = Column( + Integer, ForeignKey('public.sites.id'), index=True + ) + # Link the Site class + site = relationship('Site') + + # id is a mapped column for many-to-many with observers + id: Mapped[int] = mapped_column(primary_key=True) + observers: Mapped[List[Observer]] = relationship( + secondary=LayerObservers.__table__ + ) diff --git a/snowexsql/tables/observers.py b/snowexsql/tables/observers.py new file mode 100644 index 0000000..07f1e0e --- /dev/null +++ b/snowexsql/tables/observers.py @@ -0,0 +1,12 @@ +from sqlalchemy.orm import mapped_column, Mapped +from sqlalchemy import Column, String + +from .base import Base + + +class Observer(Base): + __tablename__ = 'observers' + # id is mapped column for many-to-many + id: Mapped[int] = mapped_column(primary_key=True) + # Name of the observer + name = Column(String()) diff --git a/snowexsql/tables/point_data.py b/snowexsql/tables/point_data.py index c6189ee..ceb691c 100644 --- a/snowexsql/tables/point_data.py +++ b/snowexsql/tables/point_data.py @@ -1,6 +1,23 @@ -from sqlalchemy import Column, Float, Integer, String +from sqlalchemy import Column, Float, Integer, String, ForeignKey +from sqlalchemy.orm import Mapped +from sqlalchemy.orm import mapped_column +from typing import List +from sqlalchemy.orm import relationship from .base import Base, Measurement, SingleLocationData +from .observers import Observer +from .instrument import Instrument +from .site import Site + + +class PointObservers(Base): + """ + Link table + """ + __tablename__ = 'point_observers' + + point_id = Column(Integer, ForeignKey('public.points.id')) + observer_id = Column(Integer, ForeignKey("public.observers.id")) class PointData(SingleLocationData, Measurement, Base): @@ -12,5 +29,29 @@ class PointData(SingleLocationData, Measurement, Base): __tablename__ = 'points' version_number = Column(Integer) - equipment = Column(String(50)) + equipment = Column(String()) value = Column(Float) + + # bring these in instead of Measurement + type = Column(String()) + units = Column(String()) + + # Link the instrument id with a foreign key + instrument_id = Column( + Integer, ForeignKey('public.instruments.id'), index=True + ) + # Link the Instrument class + instrument = relationship('Instrument') + + # Link the site id with a foreign key + site_id = Column( + Integer, ForeignKey('public.sites.id'), index=True + ) + # Link the Site class + site = relationship('Site') + + # id is a mapped column for many-to-many with observers + id: Mapped[int] = mapped_column(primary_key=True) + observers: Mapped[List[Observer]] = relationship( + secondary=PointObservers.__table__ + ) diff --git a/snowexsql/tables/site.py b/snowexsql/tables/site.py new file mode 100644 index 0000000..e7db07a --- /dev/null +++ b/snowexsql/tables/site.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Aug 22 11:56:34 2024 + +@author: jtmaz +""" + +from sqlalchemy import Column, String, Integer, ForeignKey, Float +from sqlalchemy.orm import relationship + +from .base import Base +from .campaign import Campaign + + +class Site(Base): + """ + Table stores Site data. Does not store data values, + it only stores the site metadata. + """ + # TODO: add geometry here and remove from site_condtions + __tablename__ = 'sites' + + id = Column(Integer, primary_key=True) + name = Column(String()) + description = Column(String()) + + # Link the campaign id with a foreign key + campaign_id = Column( + Integer, ForeignKey('public.campaigns.id'), index=True + ) + # Link the Campaign class + campaign = relationship('Campaign') diff --git a/snowexsql/tables/site_data.py b/snowexsql/tables/site_data.py index a88441b..1ea240e 100644 --- a/snowexsql/tables/site_data.py +++ b/snowexsql/tables/site_data.py @@ -3,13 +3,13 @@ from .base import Base, SingleLocationData -class SiteData(SingleLocationData, Base): +class SiteCondition(SingleLocationData, Base): """ Table for storing pit site meta data, This table doesn't represent any main data record but only support data for each site """ - __tablename__ = 'sites' - __table_args__ = {"schema": "public"} + # TODO: leaving this for later - we should link this to Sites table + __tablename__ = 'site_condition' pit_id = Column(String(50)) slope_angle = Column(Float) diff --git a/tests/sql_test_base.py b/tests/sql_test_base.py index 2ef6934..303e058 100644 --- a/tests/sql_test_base.py +++ b/tests/sql_test_base.py @@ -42,9 +42,8 @@ def setup_class(self): @classmethod def teardown_class(self): """ - Remove the databse + Close the session """ - self.metadata.drop_all(bind=self.engine) self.session.close() # optional, depends on use case def teardown(self): diff --git a/tests/test_api.py b/tests/test_api.py index 46fa128..d2317c8 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -2,12 +2,14 @@ import geopandas as gpd import numpy as np import pytest -from datetime import date - +from datetime import date, time +from geoalchemy2.elements import WKTElement from snowexsql.api import ( - PointMeasurements, LargeQueryCheckException, LayerMeasurements + PointMeasurements, LargeQueryCheckException, LayerMeasurements, db_session ) from snowexsql.db import get_db, initialize +from snowexsql.tables import Instrument, Observer, PointData, LayerData, Site +from snowexsql.tables.campaign import Campaign @pytest.fixture(scope="session") @@ -35,7 +37,8 @@ class DBConnection: @pytest.fixture(scope="class") def db(self, creds, db_url): engine, session, metadata = get_db( - db_url, credentials=creds, return_metadata=True) + db_url, credentials=creds, return_metadata=True + ) initialize(engine) yield engine @@ -45,8 +48,100 @@ def db(self, creds, db_url): metadata.drop_all(bind=engine) session.close() + @staticmethod + def _add_entry( + url, data_cls, instrument_name, + observer_names, campaign_name, site_name, + **kwargs + ): + url_long = f"{url.username}:{url.password}@{url.host}/{url.database}" + with db_session(url_long) as (session, engine): + # Check if the instrument already exists + instrument = session.query(Instrument).filter_by( + name=instrument_name).first() + + if not instrument: + # If the instrument does not exist, create it + instrument = Instrument(name=instrument_name) + session.add(instrument) + session.commit() # Commit to ensure instrument is saved and has an ID + + campaign = session.query(Campaign).filter_by( + name=campaign_name).first() + + if not campaign: + # If the campaign does not exist, create it + campaign = Campaign(name=campaign_name) + session.add(campaign) + session.commit() # Commit to ensure instrument is saved and has an ID + + site = session.query(Site).filter_by( + name=site_name).first() + if not site: + # Add the site with specific campaign + site = Site(name=site_name, campaign=campaign) + session.add(site) + session.commit() + + observer_list = [] + for obs_name in observer_names: + observer = session.query(Observer).filter_by( + name=obs_name).first() + if not observer: + # If the instrument does not exist, create it + observer = Observer(name=obs_name) + session.add(observer) + session.commit() # Commit to ensure instrument is saved and has an ID + observer_list.append(observer) + + # Now that the instrument exists, create the entry, notice we only need the instrument object + new_entry = data_cls( + instrument=instrument, observers=observer_list, + site=site, **kwargs + ) + session.add(new_entry) + session.commit() + + @pytest.fixture(scope="class") + def populated_points(self, db): + # Add made up data at the initialization of the class + row = { + 'date': date(2020, 1, 28), + 'time': time(18, 48), + 'elevation': 3148.2, + 'equipment': 'CRREL_B', + 'version_number': 1, + 'geom': WKTElement("POINT(747987.6190615438 4324061.7062127385)", + srid=26912), + 'date_accessed': date(2024, 7, 10), + 'value': 94, 'type': 'depth', 'units': 'cm' + } + self._add_entry( + db.url, PointData, 'magnaprobe', ["TEST"], + 'Grand Mesa', 'the_middle', **row + ) + @pytest.fixture(scope="class") - def clz(self, db, db_url): + def populated_layer(self, db): + # Fake data to implement + row = { + 'date': date(2020, 1, 28), + 'time': time(18, 48), + 'elevation': 3148.2, + 'geom': WKTElement("POINT(747987.6190615438 4324061.7062127385)", + srid=26912), + 'date_accessed': date(2024, 7, 10), + 'value': '42.5', 'type': 'density', 'units': 'kgm3', + 'pit_id': 'Fakepit1', + 'sample_a': '42.5' + } + self._add_entry( + db.url, LayerData, 'fakeinstrument', ["TEST"], + 'Grand Mesa', 'the_side', **row + ) + + @pytest.fixture(scope="class") + def clz(self, db, db_url, populated_points, populated_layer): """ Extend the class and overwrite the database name """ @@ -57,14 +152,6 @@ class Extended(self.CLZ): yield Extended -def unsorted_list_tuple_compare(l1, l2): - # turn lists into sets, but get rid of any Nones - l1 = set([l[0] for l in l1 if l[0] is not None]) - l2 = set([l[0] for l in l2 if l[0] is not None]) - # compare the sets - return l1 == l2 - - class TestPointMeasurements(DBConnection): """ Test the Point Measurement class @@ -73,32 +160,23 @@ class TestPointMeasurements(DBConnection): def test_all_types(self, clz): result = clz().all_types - assert unsorted_list_tuple_compare( - result, - [] - ) + assert result == ['depth'] def test_all_site_names(self, clz): result = clz().all_site_names - assert unsorted_list_tuple_compare( - result, [] - ) + assert result ==['Grand Mesa'] def test_all_dates(self, clz): result = clz().all_dates - assert len(result) == 0 + assert len(result) == 1 def test_all_observers(self, clz): result = clz().all_observers - assert unsorted_list_tuple_compare( - result, [] - ) + assert result == ['TEST'] def test_all_instruments(self, clz): result = clz().all_instruments - assert unsorted_list_tuple_compare( - result, [] - ) + assert result == ["magnaprobe"] @pytest.mark.parametrize( "kwargs, expected_length, mean_value", [ @@ -106,7 +184,7 @@ def test_all_instruments(self, clz): "date": date(2020, 5, 28), "instrument": 'camera' }, 0, np.nan), - ({"instrument": "magnaprobe", "limit": 10}, 0, np.nan), # limit works + ({"instrument": "magnaprobe", "limit": 10}, 1, 94.0), # limit works ({ "date": date(2020, 5, 28), "instrument": 'pit ruler' @@ -167,23 +245,27 @@ class TestLayerMeasurements(DBConnection): def test_all_types(self, clz): result = clz().all_types - assert result == [] + assert result == ["density"] def test_all_site_names(self, clz): result = clz().all_site_names - assert result == [] + assert result == ['Grand Mesa'] + + def test_all_site_ids(self, clz): + result = clz().all_site_ids + assert result == ['the_middle', 'the_side'] def test_all_dates(self, clz): result = clz().all_dates - assert len(result) == 0 + assert result == [date(2020, 1, 28)] def test_all_observers(self, clz): result = clz().all_observers - assert unsorted_list_tuple_compare(result, []) + assert result == ['TEST'] def test_all_instruments(self, clz): result = clz().all_instruments - assert unsorted_list_tuple_compare(result, []) + assert result == ['fakeinstrument'] @pytest.mark.parametrize( "kwargs, expected_length, mean_value", [ @@ -204,6 +286,14 @@ def test_all_instruments(self, clz): "date_greater_equal": date(2020, 5, 13), "type": 'density' }, 0, np.nan), + ({ + "type": 'density', + "campaign": 'Grand Mesa' + }, 1, 42.5), + ({ + "observer": 'TEST', + "campaign": 'Grand Mesa' + }, 1, 42.5), ] ) def test_from_filter(self, clz, kwargs, expected_length, mean_value): diff --git a/tests/test_db.py b/tests/test_db.py index 18d0df4..f632453 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -4,17 +4,17 @@ from sqlalchemy import Table from snowexsql.db import get_db, get_table_attributes -from snowexsql.tables import ImageData, LayerData, PointData, SiteData +from snowexsql.tables import ImageData, LayerData, PointData, SiteCondition from .sql_test_base import DBSetup class TestDB(DBSetup): - base_atts = ['site_name', 'date', 'site_id'] + base_atts = ['date', 'site_id'] single_loc_atts = ['elevation', 'geom', 'time'] - meas_atts = ['instrument', 'type', 'units', 'observers'] + meas_atts = ['type', 'units'] - site_atts = base_atts + single_loc_atts + \ + site_atts = single_loc_atts + \ ['slope_angle', 'aspect', 'air_temp', 'total_depth', 'weather_description', 'precip', 'sky_cover', 'wind', 'ground_condition', 'ground_roughness', @@ -22,7 +22,7 @@ class TestDB(DBSetup): 'tree_canopy', 'site_notes'] point_atts = single_loc_atts + meas_atts + \ - ['version_number', 'equipment', 'value'] + ['version_number', 'equipment', 'value', 'instrument_id'] layer_atts = single_loc_atts + meas_atts + \ ['depth', 'value', 'bottom_depth', 'comments', 'sample_a', @@ -59,7 +59,7 @@ def test_layer_structure(self): assert c in columns @pytest.mark.parametrize("DataCls,attributes", [ - (SiteData, site_atts), + (SiteCondition, site_atts), (PointData, point_atts), (LayerData, layer_atts), (ImageData, raster_atts)])