Skip to content

Commit

Permalink
fix lambda
Browse files Browse the repository at this point in the history
  • Loading branch information
joelbalcaen committed Apr 25, 2024
1 parent f2c7310 commit 53a80c1
Showing 1 changed file with 4 additions and 19 deletions.
23 changes: 4 additions & 19 deletions lambdas/rich_pdf_ingestion/src/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@ def lambda_handler(event, context):
attachment_s3_arn = event['path']

try:
attachment_s3_info = parse_s3_arn(attachment_s3_arn)
print("Attachment s3 arn parsed info: ", attachment_s3_info)
bucket = attachment_s3_info["bucket"]
key = attachment_s3_info["key"]
bucket, key = parse_s3_arn(attachment_s3_arn)
print(f"Attachment located at bucket: {bucket} and key: {key}")

if os.path.splitext(key)[1][1:] != "pdf":
return {
Expand Down Expand Up @@ -72,24 +70,11 @@ def extract_text_from_pdf(pdf_file_path):


def parse_s3_arn(s3_arn):
# Remove the ARN prefix
s3_path = s3_arn.replace("arn:aws:s3:::", "")

# Split the path into components
components = s3_path.split("/")

# The first component is the bucket
bucket = components[0]


# The folder is all components of the key except the last one
folder = "/".join(components[1:-1])

return {
"bucket": bucket,
"folder": folder,
}

key = components[0]
return bucket, key

def fetch_file(bucket, key):
local_filename = f"{PATH_TO_WRITE_FILES}/{key.split('/')[-1]}"
Expand Down

0 comments on commit 53a80c1

Please sign in to comment.