Skip to content

Commit

Permalink
increase timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
Joel Balcaen committed Apr 5, 2024
1 parent c629015 commit 69581ec
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 12 deletions.
2 changes: 1 addition & 1 deletion lambdas/rich_pdf_ingestion/lambda.tf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
locals {
lambda_function_name = "rich_pdf_ingestion"
ses_arn = "arn:aws:ses:${var.aws_region}:${data.aws_caller_identity.current.account_id}"
timeout = 30
timeout = 60
}

data "aws_caller_identity" "current" {}
Expand Down
18 changes: 7 additions & 11 deletions lambdas/rich_pdf_ingestion/src/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,30 +41,26 @@ def fetch_file(bucket, key):
except ClientError as e:
print(e)
raise e

return local_filename


def lambda_handler(event, context):
def lambda_handler(event, context):
print(event)
records = event["Records"]
for record in records:
eventName = record["eventName"]
print(f"eventName: {eventName}")

try:
bucket, key = get_bucket_and_key(record)
print(f"source_bucket: {bucket}, source_key: {key}")

if eventName.startswith(OBJECT_CREATED):
if eventName.startswith(OBJECT_CREATED) and os.path.splitext(key)[1][1:] == "pdf":
local_filename = fetch_file(bucket, key)


# collection_name = bucket + "-"
# collection_name += os.path.dirname(key).replace("/", "-")

if os.path.splitext(key)[1][1:] == "pdf":
print("Extracting text from pdf")
document_text = generate_text_form_pdf(local_filename)
print(f"Extracted: {document_text}")
print("Extracting text from pdf")
document_text = generate_text_form_pdf(local_filename)
print(f"Extracted: {document_text}")

except Exception as e:
print(e)
Expand Down

0 comments on commit 69581ec

Please sign in to comment.