Skip to content

Commit

Permalink
Merge pull request #104 from SkyTruth/feature-optimize-database-inserts
Browse files Browse the repository at this point in the history
Feature optimize database inserts
  • Loading branch information
jonaraphael authored Nov 21, 2023
2 parents 4b10d42 + b585996 commit 287f813
Show file tree
Hide file tree
Showing 10 changed files with 246 additions and 127 deletions.
72 changes: 35 additions & 37 deletions alembic/versions/3c4693517ef6_add_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""
import sqlalchemy as sa
from geoalchemy2 import Geography
from geoalchemy2 import Geography, Geometry
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.types import ARRAY

Expand Down Expand Up @@ -146,39 +146,12 @@ def upgrade() -> None:
sa.Column("machine_confidence", sa.Float),
sa.Column("precursor_slicks", ARRAY(sa.BigInteger)),
sa.Column("notes", sa.Text),
sa.Column(
"length",
sa.Float,
sa.Computed(
"""
GREATEST(
ST_Distance(
ST_PointN(ST_ExteriorRing(ST_OrientedEnvelope(geometry::geometry)), 1)::geography,
ST_PointN(ST_ExteriorRing(ST_OrientedEnvelope(geometry::geometry)), 2)::geography
),
ST_Distance(
ST_PointN(ST_ExteriorRing(ST_OrientedEnvelope(geometry::geometry)), 2)::geography,
ST_PointN(ST_ExteriorRing(ST_OrientedEnvelope(geometry::geometry)), 3)::geography
)
)
"""
),
),
sa.Column("area", sa.Float, sa.Computed("ST_Area(geometry)")),
sa.Column("perimeter", sa.Float, sa.Computed("ST_Perimeter(geometry)")),
sa.Column("centroid", Geography("POINT"), sa.Computed("ST_Centroid(geometry)")),
sa.Column(
"polsby_popper",
sa.Float,
sa.Computed("4 * pi() * ST_Area(geometry) / ST_Perimeter(geometry)^2"),
),
sa.Column(
"fill_factor",
sa.Float,
sa.Computed(
"ST_Area(geometry) / ST_Area(ST_OrientedEnvelope(geometry::geometry)::geography)"
),
),
sa.Column("length", sa.Float),
sa.Column("area", sa.Float),
sa.Column("perimeter", sa.Float),
sa.Column("centroid", Geography("POINT")),
sa.Column("polsby_popper", sa.Float),
sa.Column("fill_factor", sa.Float),
)

op.create_table(
Expand Down Expand Up @@ -276,11 +249,36 @@ def upgrade() -> None:
sa.Column("create_time", sa.DateTime, server_default=sa.func.now()),
)

op.create_table(
"aoi_chunks",
sa.Column(
"id",
sa.BigInteger,
sa.ForeignKey(
"aoi.id", ondelete="CASCADE", deferrable=True, initially="DEFERRED"
),
),
sa.Column("geometry", Geometry("POLYGON", srid=4326), nullable=False),
)

op.create_table(
"slick_to_aoi",
sa.Column("id", sa.BigInteger, primary_key=True),
sa.Column("slick", sa.BigInteger, sa.ForeignKey("slick.id"), nullable=False),
sa.Column("aoi", sa.BigInteger, sa.ForeignKey("aoi.id"), nullable=False),
sa.Column(
"slick",
sa.BigInteger,
sa.ForeignKey(
"slick.id", ondelete="CASCADE", deferrable=True, initially="DEFERRED"
),
primary_key=True,
),
sa.Column(
"aoi",
sa.BigInteger,
sa.ForeignKey(
"aoi.id", ondelete="CASCADE", deferrable=True, initially="DEFERRED"
),
primary_key=True,
),
)

op.create_table(
Expand Down
1 change: 0 additions & 1 deletion alembic/versions/54c42e9e879f_add_postgis.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

def upgrade() -> None:
"""create postgis extension"""
op.execute("CREATE EXTENSION postgis;")
op.execute("CREATE EXTENSION IF NOT EXISTS postgis WITH SCHEMA public;")
op.execute(
"COMMENT ON EXTENSION postgis IS 'PostGIS geometry, geography, and raster spatial types and functions';"
Expand Down
11 changes: 8 additions & 3 deletions alembic/versions/5e03ce584f3c_add_eez.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
Create Date: 2022-07-08 11:24:31.802462
"""
import json

import geojson
import httpx
from shapely import from_geojson, to_wkt
from geoalchemy2.shape import from_shape
from shapely.geometry import MultiPolygon, shape
from sqlalchemy import orm

import cerulean_cloud.database_schema as database_schema
Expand Down Expand Up @@ -53,11 +53,16 @@ def upgrade() -> None:
for k in sovereign_keys
if feat["properties"][k] is not None
]

geometry = shape(feat["geometry"]).buffer(0)
if not isinstance(geometry, MultiPolygon):
geometry = MultiPolygon([geometry])

with session.begin():
aoi_eez = database_schema.AoiEez(
type=1,
name=feat["properties"]["GEONAME"],
geometry=to_wkt(from_geojson(json.dumps(feat["geometry"]))),
geometry=from_shape(geometry),
mrgid=feat["properties"]["MRGID"],
sovereigns=sovereigns,
)
Expand Down
11 changes: 8 additions & 3 deletions alembic/versions/c0bd1215a3ca_add_iho.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
Create Date: 2023-07-15 00:26:04.493750
"""
import json

import geojson
import httpx
from shapely import from_geojson, to_wkt
from geoalchemy2.shape import from_shape
from shapely.geometry import MultiPolygon, shape
from sqlalchemy import orm

import cerulean_cloud.database_schema as database_schema
Expand All @@ -36,12 +36,17 @@ def upgrade() -> None:
session = orm.Session(bind=bind)

iho = get_iho_from_url()

for feat in iho.get("features"):
geometry = shape(feat["geometry"]).buffer(0)
if not isinstance(geometry, MultiPolygon):
geometry = MultiPolygon([geometry])

with session.begin():
aoi_iho = database_schema.AoiIho(
type=2,
name=feat["properties"]["NAME"],
geometry=to_wkt(from_geojson(json.dumps(feat["geometry"]))),
geometry=from_shape(geometry),
mrgid=feat["properties"]["MRGID"],
)
session.add(aoi_iho)
Expand Down
61 changes: 61 additions & 0 deletions alembic/versions/c7c033c1cdb5_populate_aoi_chunks_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Populate aoi_chunks table
Revision ID: c7c033c1cdb5
Revises: 3736e85bc273
Create Date: 2023-11-17 11:44:59.370910
"""
import sqlalchemy as sa
from sqlalchemy import orm

from alembic import op

# revision identifiers, used by Alembic.
revision = "c7c033c1cdb5"
down_revision = "3736e85bc273"
branch_labels = None
depends_on = None


def upgrade() -> None:
"""
Upgrade the database schema.
This function subdivides the geometries in the 'aoi' table into smaller chunks
if they have more than 255 points, and inserts these chunks into the 'aoi_chunks' table.
This is done to manage large geometries more efficiently.
"""
bind = op.get_bind()
session = orm.Session(bind=bind)
with session.begin():
session.execute(
sa.text(
"""
WITH dumped AS (
SELECT aoi.id, (st_dump(st_makevalid(st_buffer(geometry::geometry,0)))).geom as dgeom
FROM aoi
), split as(
SELECT id, st_subdivide(dgeom) as dgeom FROM dumped WHERE st_npoints(dgeom)>255
UNION ALL
SELECT id, dgeom FROM dumped WHERE st_npoints(dgeom)<=255
)
INSERT INTO aoi_chunks (id, geometry)
SELECT id, dgeom
FROM split;
"""
)
)


def downgrade() -> None:
"""
Downgrade the database schema.
This function clears the 'aoi_chunks' table by truncating it, effectively
undoing the changes made by the upgrade function.
"""
bind = op.get_bind()
session = orm.Session(bind=bind)
with session.begin():
session.execute(sa.text("TRUNCATE TABLE aoi_chunks;"))
pass
105 changes: 52 additions & 53 deletions alembic/versions/cb7ceecc3f87_add_funcs_on_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
"""

from sqlalchemy import text

from alembic import op

# revision identifiers, used by Alembic.
Expand All @@ -18,37 +20,41 @@
def upgrade() -> None:
"""Add funcs on insert"""

# Create a trigger function that maps slick to aoi upon insert
op.execute(
"""
CREATE OR REPLACE FUNCTION map_slick_to_aoi()
RETURNS TRIGGER AS $$
BEGIN
INSERT INTO slick_to_aoi (slick, aoi)
SELECT DISTINCT NEW.id, aoi.id FROM aoi
WHERE ST_Intersects(aoi.geometry, NEW.geometry);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
"""
)

# Create a trigger that calls the trigger function after insert
op.execute(
"""
CREATE TRIGGER trigger_map_slick_to_aoi
AFTER INSERT ON slick
FOR EACH ROW
EXECUTE FUNCTION map_slick_to_aoi();
"""
)

op.execute(
"""
CREATE OR REPLACE FUNCTION populate_cls()
RETURNS TRIGGER AS
$$
text(
"""
CREATE OR REPLACE FUNCTION slick_before_trigger_func()
RETURNS trigger
AS $$
DECLARE
timer timestamptz := clock_timestamp();
_geog geography := NEW.geometry;
_geom geometry;
oriented_envelope geometry;
oe_ring geometry;
rec record;
BEGIN
RAISE NOTICE '---------------------------------------------------------';
RAISE NOTICE 'In slick_before_trigger_func. %', (clock_timestamp() - timer)::interval;
_geom := _geog::geometry;
oriented_envelope := st_orientedenvelope(_geom);
oe_ring := st_exteriorring(oriented_envelope);
NEW.area := st_area(_geog);
NEW.centroid := st_centroid(_geog);
NEW.perimeter = st_perimeter(_geog);
NEW.polsby_popper := 4.0 * pi() * NEW.area / (NEW.perimeter ^ 2.0);
NEW.fill_factor := NEW.area / st_area(oriented_envelope::geography);
NEW.length := GREATEST(
st_distance(
st_pointn(oe_ring,1)::geography,
st_pointn(oe_ring,2)::geography
),
st_distance(
st_pointn(oe_ring,2)::geography,
st_pointn(oe_ring,3)::geography
)
);
RAISE NOTICE 'Calculated all generated fields. %', (clock_timestamp() - timer)::interval;
NEW.cls := (
SELECT cls.id
FROM cls
Expand All @@ -59,43 +65,36 @@ def upgrade() -> None:
WHERE cls.short_name = value
LIMIT 1
);
RAISE NOTICE 'Calculated NEW.cls. %', (clock_timestamp() - timer)::interval;
INSERT INTO slick_to_aoi(slick, aoi)
SELECT DISTINCT NEW.id, aoi_chunks.id
FROM aoi_chunks
WHERE st_intersects(_geom, aoi_chunks.geometry);
RAISE NOTICE 'Insert done to slick_to_aoi. %', (clock_timestamp() - timer)::interval;
RETURN NEW;
END;
$$
LANGUAGE plpgsql;
"""
)
$$ LANGUAGE PLPGSQL;
op.execute(
"""
CREATE TRIGGER trigger_populate_cls
BEFORE INSERT ON slick
FOR EACH ROW
EXECUTE FUNCTION populate_cls();
"""
CREATE TRIGGER slick_before_trigger BEFORE INSERT ON slick FOR EACH ROW EXECUTE FUNCTION slick_before_trigger_func();
SET CLIENT_MIN_MESSAGES TO NOTICE;
"""
)
)


def downgrade() -> None:
"""Add funcs on insert"""

op.execute(
"""
DROP TRIGGER IF EXISTS trigger_map_slick_to_aoi ON slick;
"""
)
op.execute(
"""
DROP FUNCTION IF EXISTS map_slick_to_aoi();
"""
)
op.execute(
"""
DROP TRIGGER IF EXISTS trigger_populate_cls ON slick;
DROP TRIGGER IF EXISTS slick_before_trigger ON slick;
"""
)
op.execute(
"""
DROP FUNCTION IF EXISTS populate_cls();
DROP FUNCTION IF EXISTS slick_before_trigger_func();
"""
)
Loading

0 comments on commit 287f813

Please sign in to comment.