Skip to content

Commit

Permalink
Table endpoint (#59)
Browse files Browse the repository at this point in the history
* WIP table endpoint with filters

* set to use polars instead of duckdb. it is faster

* query builder and model for table filters json.

* add a couple of tests for the query builder

* add missing __init__ 🤦

* model tidy up models

* fix tests to new model

* add more test cases for the all the ops and changes order by to use '-' as desc indicator in column name

* tidy up names and tests

* remove duckdb from requirements

* add test

* set tests settings env vars in conftest
  • Loading branch information
BielStela authored Jul 15, 2024
1 parent 332dc96 commit 0702788
Show file tree
Hide file tree
Showing 16 changed files with 608 additions and 270 deletions.
4 changes: 2 additions & 2 deletions api/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from app.auth.auth import verify_token
from app.config.config import get_settings
from app.routers.h3 import h3_grid_router
from app.routers.grid import grid_router
from app.routers.zonal_stats import ZonalTilerFactory


Expand All @@ -29,7 +29,7 @@ def path_params(raster_filename: Annotated[str, Query(description="Raster filena
tiler_routes = ZonalTilerFactory(path_dependency=path_params)

app.include_router(tiler_routes.router, tags=["Raster"], dependencies=[Depends(verify_token)])
app.include_router(h3_grid_router, prefix="/grid", tags=["Grid"], dependencies=[Depends(verify_token)])
app.include_router(grid_router, prefix="/grid", tags=["Grid"], dependencies=[Depends(verify_token)])

add_exception_handlers(app, DEFAULT_STATUS_CODES)

Expand Down
74 changes: 72 additions & 2 deletions api/app/models/grid.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# ruff: noqa: D101

from enum import Enum
from typing import Literal
from typing import Annotated, Literal

from fastapi import Query
from pydantic import BaseModel, ConfigDict, Field
from pydantic.color import Color
from pydantic_extra_types.color import Color
from sqlalchemy.sql import column, desc, nullslast, select, table


class LegendTypes(str, Enum):
Expand Down Expand Up @@ -60,3 +62,71 @@ class H3GridInfo(BaseModel):
class MultiDatasetMeta(BaseModel):
datasets: list[DatasetMeta] = Field(description="Variables represented in this dataset")
h3_grid_info: list[H3GridInfo] = Field(description="H3 related information")


# ===============================================
# TABLE FILTERING
# ===============================================


class NumericalOperators(str, Enum):
eq = "eq"
gt = "gt"
lt = "lt"
gte = "gte"
lte = "lte"
not_eq = "not_eq"


class CategoricalOperators(str, Enum):
in_ = "in"
not_in = "not_in"


class CategoricalFilter(BaseModel):
filter_type: Literal["categorical"]
column_name: str = Field(description="Name of the column to which the filter will apply")
operation: CategoricalOperators = Field()
value: list[int] = Field(description="Value to compare with")


class NumericalFilter(BaseModel):
filter_type: Literal["numerical"]
column_name: str = Field(description="Name of the column to which the filter will apply")
operation: NumericalOperators = Field(description="Operation to use in compare")
value: float = Field(description="Value to compare with")


class TableFilters(BaseModel):
filters: list[Annotated[CategoricalFilter | NumericalFilter, Field(discriminator="filter_type")]]
limit: int = Field(10, lt=1000, description="Number of records")
order_by: Annotated[list[str], Field(Query(..., description="Prepend '-' to column name to make it descending"))]

def to_sql_query(self, table_name: str) -> str:
"""Compile model to sql query"""
op_to_python_dunder = {
"eq": "__eq__",
"gt": "__gt__",
"lt": "__lt__",
"gte": "__ge__",
"lte": "__le__",
"not_eq": "__ne__",
"in": "in_",
}
filters_to_apply = []
for _filter in self.filters:
if _filter is None:
continue
col = column(_filter.column_name)
param = getattr(col, op_to_python_dunder.get(_filter.operation, _filter.operation))(_filter.value)
filters_to_apply.append(param)
query = (
select("*")
.select_from(table(table_name))
.where(*filters_to_apply)
.limit(self.limit)
.order_by(
*[nullslast(desc(column(col[1:]))) if col.startswith("-") else column(col) for col in self.order_by]
)
)
return str(query.compile(compile_kwargs={"literal_binds": True}))
File renamed without changes.
84 changes: 84 additions & 0 deletions api/app/routers/grid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import logging
import os
from pathlib import Path
from typing import Annotated

import h3
import polars as pl
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi.responses import FileResponse, ORJSONResponse
from h3 import H3CellError
from pydantic import ValidationError

from app.config.config import get_settings
from app.models.grid import MultiDatasetMeta, TableFilters

log = logging.getLogger("uvicorn.error")

grid_router = APIRouter()


@grid_router.get(
"/tile/{tile_index}",
responses={200: {"description": "Get a grid tile"}, 404: {"description": "Not found"}},
response_model=None,
)
async def grid_tile(tile_index: str) -> FileResponse:
"""Request a tile of h3 cells
:raises HTTPException 404: Item not found
:raises HTTPException 422: H3 index is not valid
"""
try:
z = h3.api.basic_str.h3_get_resolution(tile_index)
except H3CellError:
raise HTTPException(status_code=422, detail="Tile index is not a valid H3 cell") from None

tile_file = os.path.join(get_settings().grid_tiles_path, f"{z}/{tile_index}.arrow")
if not os.path.exists(tile_file):
raise HTTPException(status_code=404, detail=f"Tile {tile_file} not found")
return FileResponse(tile_file, media_type="application/octet-stream")


@grid_router.get(
"/meta",
)
async def grid_dataset_metadata() -> MultiDatasetMeta:
"""Dataset metadata"""
file = os.path.join(get_settings().grid_tiles_path, "meta.json")
with open(file) as f:
raw = f.read()
try:
meta = MultiDatasetMeta.model_validate_json(raw)
except ValidationError as e:
# validation error is our fault because meta file is internal. We don't want to show internal error details
# so raise controlled 500
log.exception(e)
raise HTTPException(status_code=500, detail="Metadata file is malformed. Please contact developer.") from None
return meta


@grid_router.post("/table")
def read_table(
level: Annotated[int, Query(..., description="Tile level at which the query will be computed")],
filters: TableFilters = Depends(),
) -> ORJSONResponse:
"""Query tile dataset and return table data"""
files_path = Path(get_settings().grid_tiles_path) / str(level)
if not files_path.exists():
raise HTTPException(404, detail=f"Level {level} does not exist") from None
lf = pl.scan_ipc(files_path.glob("*.arrow"))
query = filters.to_sql_query("frame")
log.debug(query)
try:
res = pl.SQLContext(frame=lf).execute(query).collect()
except pl.exceptions.ColumnNotFoundError as e:
# bad column in order by clause
log.exception(e)
raise HTTPException(status_code=404, detail=f"Column '{e}' not found in dataset") from None

except pl.exceptions.ComputeError as e:
# possibly raise if wrong type in compare. I'm not aware of other sources of ComputeError
log.exception(e)
raise HTTPException(status_code=422, detail=str(e)) from None
return ORJSONResponse(res.to_dict(as_series=False))
55 changes: 0 additions & 55 deletions api/app/routers/h3.py

This file was deleted.

7 changes: 2 additions & 5 deletions api/app/routers/zonal_stats.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Minimal COG tiler."""
import os

from typing import Annotated, List, Union

import rasterio
Expand All @@ -8,8 +8,7 @@
from geojson_pydantic import Feature, FeatureCollection
from titiler.core.factory import TilerFactory

from app.config.config import get_settings
from app.models.exact_extract import StatsFeatures, StatsOps
from app.models.zonal_stats import StatsFeatures, StatsOps


class ZonalTilerFactory(TilerFactory):
Expand Down Expand Up @@ -65,8 +64,6 @@ def exact_zonal_stats(
features = [geojson.model_dump()]

with rasterio.Env(**env):
tiff_path = get_settings().tiff_path
src_path = os.path.join(tiff_path, src_path)
with rasterio.open(src_path, **reader_params) as src_dst:
statistics = [op.value for op in statistics] # extract the values from the Enum
stats = exact_extract(src_dst, features, ops=statistics)
Expand Down
5 changes: 4 additions & 1 deletion api/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool.ruff]
line-length = 120
extend-include = ["*.ipynb"]
src = ["app"]
src = ["."]

[tool.ruff.lint]
select = [
Expand All @@ -24,3 +24,6 @@ extend-immutable-calls = ["fastapi.Depends", "fastapi.params.Depends", "fastapi.

[tool.ruff.lint.per-file-ignores]
"**/{tests}/*" = ["D103"] # Missing docstring in public function

[tool.mypy]
disable_error_code = ["import-untyped", "attr-defined"]
3 changes: 3 additions & 0 deletions api/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ pydantic_settings
titiler.core
uvicorn
h3
pydantic-extra-types
polars
sqlalchemy
7 changes: 7 additions & 0 deletions api/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ fastapi==0.110.1
# via titiler-core
geojson-pydantic==1.0.2
# via titiler-core
greenlet==3.0.3
# via sqlalchemy
h11==0.14.0
# via
# httpcore
Expand Down Expand Up @@ -70,16 +72,19 @@ numpy==1.26.4
# snuggs
# titiler-core
orjson==3.10.0
polars==1.1.0
pydantic==2.6.4
# via
# fastapi
# geojson-pydantic
# morecantile
# pydantic-extra-types
# pydantic-settings
# rio-tiler
# titiler-core
pydantic-core==2.16.3
# via pydantic
pydantic-extra-types==2.9.0
pydantic-settings==2.2.1
pyparsing==3.1.2
# via snuggs
Expand Down Expand Up @@ -109,6 +114,7 @@ sniffio==1.3.1
# httpx
snuggs==1.4.7
# via rasterio
sqlalchemy==2.0.31
starlette==0.37.2
# via fastapi
titiler-core==0.18.0
Expand All @@ -117,5 +123,6 @@ typing-extensions==4.11.0
# fastapi
# pydantic
# pydantic-core
# sqlalchemy
# titiler-core
uvicorn==0.29.0
28 changes: 28 additions & 0 deletions api/tests/benchmark_post.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
-- example HTTP POST script which demonstrates setting the
-- HTTP method, body, and adding a header
-- command:
-- wrk -c 100 -t 10 -d 10s -s benchmark_post.lua 'http://localhost:8000/grid/table?level=4&limit=10&order_by=-population'


wrk.method = "POST"
wrk.body = [[
[
{
"filter_type": "categorical",
"column_name": "fire",
"operation": "in",
"value": [
1,2,3
]
},
{
"filter_type": "numerical",
"column_name": "population",
"operation": "gt",
"value": 10000
}
]
]]
wrk.headers["Content-Type"] = "application/json"
wrk.headers["accept"] = "application/json"
wrk.headers["Authorization"] = "Bearer 1234"
Loading

0 comments on commit 0702788

Please sign in to comment.