From 69581ec62be534fd8b76d25ccb19286c5d93917b Mon Sep 17 00:00:00 2001 From: Joel Balcaen Date: Fri, 5 Apr 2024 13:43:41 -0300 Subject: [PATCH] increase timeout --- lambdas/rich_pdf_ingestion/lambda.tf | 2 +- lambdas/rich_pdf_ingestion/src/index.py | 18 +++++++----------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/lambdas/rich_pdf_ingestion/lambda.tf b/lambdas/rich_pdf_ingestion/lambda.tf index 1cd1d06..1ade5c5 100644 --- a/lambdas/rich_pdf_ingestion/lambda.tf +++ b/lambdas/rich_pdf_ingestion/lambda.tf @@ -1,7 +1,7 @@ locals { lambda_function_name = "rich_pdf_ingestion" ses_arn = "arn:aws:ses:${var.aws_region}:${data.aws_caller_identity.current.account_id}" - timeout = 30 + timeout = 60 } data "aws_caller_identity" "current" {} diff --git a/lambdas/rich_pdf_ingestion/src/index.py b/lambdas/rich_pdf_ingestion/src/index.py index a35b8bd..1dcc67d 100644 --- a/lambdas/rich_pdf_ingestion/src/index.py +++ b/lambdas/rich_pdf_ingestion/src/index.py @@ -41,30 +41,26 @@ def fetch_file(bucket, key): except ClientError as e: print(e) raise e + return local_filename -def lambda_handler(event, context): +def lambda_handler(event, context): print(event) records = event["Records"] for record in records: eventName = record["eventName"] print(f"eventName: {eventName}") + try: bucket, key = get_bucket_and_key(record) print(f"source_bucket: {bucket}, source_key: {key}") - if eventName.startswith(OBJECT_CREATED): + if eventName.startswith(OBJECT_CREATED) and os.path.splitext(key)[1][1:] == "pdf": local_filename = fetch_file(bucket, key) - - - # collection_name = bucket + "-" - # collection_name += os.path.dirname(key).replace("/", "-") - - if os.path.splitext(key)[1][1:] == "pdf": - print("Extracting text from pdf") - document_text = generate_text_form_pdf(local_filename) - print(f"Extracted: {document_text}") + print("Extracting text from pdf") + document_text = generate_text_form_pdf(local_filename) + print(f"Extracted: {document_text}") except Exception as e: print(e)