Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use osm-rawdata for generating data extracts #1183

Merged
merged 24 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b820b30
refactor: remove old unused DefineMapArea component
spwoodcock Feb 7, 2024
cde3b80
refactor: rename /generate endpoint --> /generate_project_data
spwoodcock Feb 7, 2024
296d142
fix(frontend): correctly send data_extract_type to project create
spwoodcock Feb 7, 2024
612255d
fix(frontend): add data_extract_url param, refactor DataExtract for c…
spwoodcock Feb 7, 2024
f410941
build: add migration for projects.data_extract_url field
spwoodcock Feb 7, 2024
7fcf6f1
fix: add slash to osm-data-extract url for correct method
spwoodcock Feb 7, 2024
15e2855
fix(frontend): pass through data extract url & type during proj creation
spwoodcock Feb 8, 2024
823af9b
fix: set data extract url and type on generate endpoint
spwoodcock Feb 8, 2024
9596efd
fix(backend): use osm-rawdata for data extract generation + filtering
spwoodcock Feb 8, 2024
eff2bdd
fix(frontend): add form_category for osm data extract post params
spwoodcock Feb 8, 2024
16a8120
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 8, 2024
f33b931
build: update osm-rawdata --> v0.2.0
spwoodcock Feb 9, 2024
02345ea
build: update osm-rawdata --> v0.2.1
spwoodcock Feb 9, 2024
a6faaa7
feat: correctly set data extract url if custom upload or osm
spwoodcock Feb 9, 2024
5b21653
Merge branch 'feat/consistent-data-extracts' of https://github.com/ho…
spwoodcock Feb 9, 2024
c353552
fix: load fgb data extract and get geojson geoms
spwoodcock Feb 11, 2024
f9dbd12
fix: accept xls forms in xls and xlsx format
spwoodcock Feb 11, 2024
1b1f00a
fix: optional data extracts for tasking splitting algo
spwoodcock Feb 11, 2024
be8cc93
build: update fmtm-splitter --> v1.1.1
spwoodcock Feb 11, 2024
3b2a0c4
refactor: simplify project file generation during creation
spwoodcock Feb 11, 2024
b690899
fix(frontend): passing params for tasking splitting algo
spwoodcock Feb 11, 2024
9ab0db4
refactor: remove data extract type if osm generated
spwoodcock Feb 11, 2024
02bab7f
build: update minio browser to use env var
spwoodcock Feb 11, 2024
39dcc68
refactor: do not include minio port bind by default
spwoodcock Feb 12, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -188,13 +188,13 @@ services:
MINIO_ROOT_USER: ${S3_ACCESS_KEY:-fmtm}
MINIO_ROOT_PASSWORD: ${S3_SECRET_KEY:-somelongpassword}
MINIO_VOLUMES: "/mnt/data"
MINIO_BROWSER: "off"
# MINIO_CONSOLE_ADDRESS: ":9090"
MINIO_BROWSER: ${MINIO_BROWSER:-off}
MINIO_CONSOLE_ADDRESS: ":9090"
volumes:
- fmtm_data:/mnt/data
# ports:
# - 9000:9000
# - 9090:9090
# - 9000:9000
# - 9090:9090
networks:
- fmtm-net
command: minio server
Expand Down
1 change: 1 addition & 0 deletions src/backend/app/db/db_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,7 @@ def tasks_bad(self):
data_extract_type = cast(
str, Column(String)
) # Type of data extract (Polygon or Centroid)
data_extract_url = cast(str, Column(String))
task_split_type = cast(
TaskSplitType, Column(Enum(TaskSplitType), nullable=True)
) # Options: divide on square, manual upload, task splitting algo
Expand Down
207 changes: 197 additions & 10 deletions src/backend/app/db/postgis_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,23 @@
"""PostGIS and geometry handling helper funcs."""

import datetime
import logging
from json import dumps as json_dumps
from typing import Optional, Union

from fastapi import HTTPException
from geoalchemy2 import WKBElement
from geoalchemy2.shape import to_shape
from geojson import FeatureCollection
from geojson import loads as geojson_loads
from geojson_pydantic import Feature
from shapely.geometry import mapping
from sqlalchemy import text
from sqlalchemy.exc import ProgrammingError
from sqlalchemy.orm import Session

log = logging.getLogger(__name__)


def timestamp():
"""Get the current time.
Expand Down Expand Up @@ -83,38 +90,218 @@ async def geojson_to_flatgeobuf(
Returns:
flatgeobuf (bytes): a Python bytes representation of a flatgeobuf file.
"""
sql = f"""
# FIXME make this with with properties / tags
# FIXME this is important
# sql = """
# DROP TABLE IF EXISTS public.temp_features CASCADE;

# CREATE TABLE IF NOT EXISTS public.temp_features(
# geom geometry,
# osm_id integer,
# changeset integer,
# timestamp timestamp
# );

# WITH data AS (SELECT CAST(:geojson AS json) AS fc)
# INSERT INTO public.temp_features (geom, osm_id, changeset, timestamp)
# SELECT
# ST_SetSRID(ST_GeomFromGeoJSON(feat->>'geometry'), 4326) AS geom,
# COALESCE((feat->'properties'->>'osm_id')::integer, -1) AS osm_id,
# COALESCE((feat->'properties'->>'changeset')::integer, -1) AS changeset,
# CASE
# WHEN feat->'properties'->>'timestamp' IS NOT NULL
# THEN (feat->'properties'->>'timestamp')::timestamp
# ELSE NULL
# END AS timestamp
# FROM (
# SELECT json_array_elements(fc->'features') AS feat
# FROM data
# ) AS f;

# -- Second param = generate with spatial index
# SELECT ST_AsFlatGeobuf(geoms, true)
# FROM (SELECT * FROM public.temp_features) AS geoms;
# """
sql = """
DROP TABLE IF EXISTS public.temp_features CASCADE;

CREATE TABLE IF NOT EXISTS public.temp_features(
id serial PRIMARY KEY,
geom geometry
);

WITH data AS (SELECT '{geojson}'::json AS fc)
WITH data AS (SELECT CAST(:geojson AS json) AS fc)
INSERT INTO public.temp_features (geom)
SELECT
ST_AsText(ST_GeomFromGeoJSON(feat->>'geometry')) AS geom
ST_SetSRID(ST_GeomFromGeoJSON(feat->>'geometry'), 4326) AS geom
FROM (
SELECT json_array_elements(fc->'features') AS feat
FROM data
) AS f;

WITH thegeom AS
(SELECT * FROM public.temp_features)
SELECT ST_AsFlatGeobuf(thegeom.*)
FROM thegeom;
SELECT ST_AsFlatGeobuf(fgb_data)
FROM (SELECT * FROM public.temp_features as geoms) AS fgb_data;
"""
# Run the SQL
result = db.execute(text(sql))
result = db.execute(text(sql), {"geojson": json_dumps(geojson)})
# Get a memoryview object, then extract to Bytes
flatgeobuf = result.first()

# Cleanup table
db.execute(text("DROP TABLE IF EXISTS public.temp_features CASCADE;"))
# db.execute(text("DROP TABLE IF EXISTS public.temp_features CASCADE;"))

if flatgeobuf:
return flatgeobuf[0].tobytes()

# Nothing returned (either no features passed, or failed)
return None


async def flatgeobuf_to_geojson(
db: Session, flatgeobuf: bytes
) -> Optional[FeatureCollection]:
"""Converts FlatGeobuf data to GeoJSON.

Args:
db (Session): SQLAlchemy db session.
flatgeobuf (bytes): FlatGeobuf data in bytes format.

Returns:
FeatureCollection: A GeoJSON FeatureCollection object.
"""
sql = text(
"""
DROP TABLE IF EXISTS public.temp_fgb CASCADE;

SELECT ST_FromFlatGeobufToTable('public', 'temp_fgb', :fgb_bytes);

SELECT jsonb_build_object(
'type', 'FeatureCollection',
'features', jsonb_agg(feature)
) AS feature_collection
FROM (
SELECT jsonb_build_object(
'type', 'Feature',
'geometry', ST_AsGeoJSON(fgb_data.geom)::jsonb,
'properties', fgb_data.properties::jsonb
) AS feature
FROM (
SELECT *,
NULL as properties
FROM ST_FromFlatGeobuf(null::temp_fgb, :fgb_bytes)
) AS fgb_data
) AS features;
"""
)

try:
result = db.execute(sql, {"fgb_bytes": flatgeobuf})
feature_collection = result.first()
except ProgrammingError as e:
log.error(e)
log.error(
"Attempted flatgeobuf --> geojson conversion, but duplicate column found"
)
return None

if feature_collection:
return geojson_loads(json_dumps(feature_collection[0]))

return None


async def parse_and_filter_geojson(geojson_str: str) -> Optional[FeatureCollection]:
"""Parse geojson string and filter out incomaptible geometries."""
log.debug("Parsing geojson file")
geojson_parsed = geojson_loads(geojson_str)
if isinstance(geojson_parsed, FeatureCollection):
log.debug("Already in FeatureCollection format, skipping reparse")
featcol = geojson_parsed
elif isinstance(geojson_parsed, Feature):
log.debug("Converting Feature to FeatureCollection")
featcol = FeatureCollection(geojson_parsed)
else:
log.debug("Converting geometry to FeatureCollection")
featcol = FeatureCollection[Feature(geometry=geojson_parsed)]

# Validating Coordinate Reference System
check_crs(featcol)

geom_type = await get_featcol_main_geom_type(featcol)

# Filter out geoms not matching main type
features_filtered = [
feature
for feature in featcol.get("features", [])
if feature.get("geometry", {}).get("type", "") == geom_type
]

return FeatureCollection(features_filtered)


async def get_featcol_main_geom_type(featcol: FeatureCollection) -> str:
"""Get the predominant geometry type in a FeatureCollection."""
geometry_counts = {"Polygon": 0, "Point": 0, "Polyline": 0}

for feature in featcol.get("features", []):
geometry_type = feature.get("geometry", {}).get("type", "")
if geometry_type in geometry_counts:
geometry_counts[geometry_type] += 1

return max(geometry_counts, key=geometry_counts.get)


async def check_crs(input_geojson: Union[dict, FeatureCollection]):
"""Validate CRS is valid for a geojson."""
log.debug("validating coordinate reference system")

def is_valid_crs(crs_name):
valid_crs_list = [
"urn:ogc:def:crs:OGC:1.3:CRS84",
"urn:ogc:def:crs:EPSG::4326",
"WGS 84",
]
return crs_name in valid_crs_list

def is_valid_coordinate(coord):
if coord is None:
return False
return -180 <= coord[0] <= 180 and -90 <= coord[1] <= 90

error_message = (
"ERROR: Unsupported coordinate system, it is recommended to use a "
"GeoJSON file in WGS84(EPSG 4326) standard."
)
if "crs" in input_geojson:
crs = input_geojson.get("crs", {}).get("properties", {}).get("name")
if not is_valid_crs(crs):
log.error(error_message)
raise HTTPException(status_code=400, detail=error_message)
return

if input_geojson_type := input_geojson.get("type") == "FeatureCollection":
features = input_geojson.get("features", [])
coordinates = (
features[-1].get("geometry", {}).get("coordinates", []) if features else []
)
elif input_geojson_type := input_geojson.get("type") == "Feature":
coordinates = input_geojson.get("geometry", {}).get("coordinates", [])

geometry_type = (
features[0].get("geometry", {}).get("type")
if input_geojson_type == "FeatureCollection" and features
else input_geojson.get("geometry", {}).get("type", "")
)
if geometry_type == "MultiPolygon":
first_coordinate = coordinates[0][0] if coordinates and coordinates[0] else None
elif geometry_type == "Point":
first_coordinate = coordinates if coordinates else None

elif geometry_type == "LineString":
first_coordinate = coordinates[0] if coordinates else None

else:
first_coordinate = coordinates[0][0] if coordinates else None

if not is_valid_coordinate(first_coordinate):
log.error(error_message)
raise HTTPException(status_code=400, detail=error_message)
Loading
Loading