Skip to content

Commit

Permalink
Fix FIXMEs in list pre-processing lambda
Browse files Browse the repository at this point in the history
  • Loading branch information
dmannarino committed Aug 5, 2024
1 parent 17395a4 commit b7d627f
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 12 deletions.
18 changes: 7 additions & 11 deletions lambdas/preprocessing/src/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,10 @@

from raster_analysis.boto import s3_client
from raster_analysis.exceptions import QueryParseException
from raster_analysis.globals import LOGGER
from raster_analysis.globals import LOGGER, S3_PIPELINE_BUCKET

patch(["boto3"])

# FIXME: Get these from env
BUCKET = "gfw-pipelines-test"
REGION = "us-east-1"


@xray_recorder.capture("Preprocessing")
def handler(event, context):
Expand All @@ -41,26 +37,26 @@ def handler(event, context):
geom_wkb = wkb_dumps(getattr(record, "geometry"), hex=True)
rows.append([getattr(record, id_field), geom_wkb])

# FIXME: Hash those args for cacheability!
# Consider replacing UUID with hash of args for cacheability
request_hash: UUID = uuid4()
geom_prefix = f"analysis/jobs/input/{str(request_hash)}/geometries.csv"
output_prefix = f"analysis/jobs/output/{str(request_hash)}/output"
geom_prefix = f"analysis/jobs/{str(request_hash)}/geometries.csv"
output_prefix = f"analysis/jobs/{str(request_hash)}/output"

with tempfile.TemporaryDirectory() as tmp_dir:
some_path = os.path.join(tmp_dir, "geometries.csv")
df = pd.DataFrame(rows, columns=[id_field, 'geometry'])
df.to_csv(some_path, index=False)

upload_to_s3(some_path, BUCKET, geom_prefix)
upload_to_s3(some_path, S3_PIPELINE_BUCKET, geom_prefix)

return {
"status": "success",
"geometries": {
"bucket": BUCKET,
"bucket": S3_PIPELINE_BUCKET,
"key": geom_prefix
},
"output": {
"bucket": BUCKET,
"bucket": S3_PIPELINE_BUCKET,
"prefix": output_prefix
}
}
Expand Down
2 changes: 2 additions & 0 deletions raster_analysis/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,5 @@

DYNAMODB_REQUEST_ITEMS_LIMIT = 100
DYNAMODB_WRITE_ITEMS_LIMIT = 25

S3_PIPELINE_BUCKET = os.environ.get("S3_PIPELINE_BUCKET", "")
2 changes: 1 addition & 1 deletion terraform/lambdas.tf
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ resource "aws_lambda_function" "preprocessing" {
environment {
variables = {
ENV = var.environment
S3_BUCKET_DATA_LAKE = data.terraform_remote_state.core.outputs.data-lake_bucket
S3_PIPELINE_BUCKET = data.terraform_remote_state.core.outputs.pipelines_bucket
SETUPTOOLS_USE_DISTUTILS = "stdlib"
}
}
Expand Down

0 comments on commit b7d627f

Please sign in to comment.