From 16186f425c58df5eda30865e1662581b02a1c4ba Mon Sep 17 00:00:00 2001 From: Joel Balcaen Date: Wed, 24 Apr 2024 11:50:51 -0300 Subject: [PATCH] refactor --- lambdas/bedrock_invoker/src/index.py | 3 +++ lambdas/rich_pdf_ingestion/src/index.py | 17 +++++++---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lambdas/bedrock_invoker/src/index.py b/lambdas/bedrock_invoker/src/index.py index 3f35876..bd800af 100644 --- a/lambdas/bedrock_invoker/src/index.py +++ b/lambdas/bedrock_invoker/src/index.py @@ -6,6 +6,9 @@ bedrock = boto3.client('bedrock') def lambda_handler(event, context): + """ + Invokes a bedrock model with the given parameters and s3 text object + """ s3_arn = event['s3_arn'] bedrock_params = event['bedrock_params'] prompt = event['prompt'] diff --git a/lambdas/rich_pdf_ingestion/src/index.py b/lambdas/rich_pdf_ingestion/src/index.py index d5ff31b..79d6a48 100644 --- a/lambdas/rich_pdf_ingestion/src/index.py +++ b/lambdas/rich_pdf_ingestion/src/index.py @@ -3,6 +3,7 @@ import boto3 from pypdf import PdfReader import uuid +from pathlib import Path OBJECT_CREATED = "ObjectCreated" EXTRACTED_TEXT_S3_OBJECT_KEY_PREFIX = 'pdf_extraction_result' @@ -21,28 +22,24 @@ def lambda_handler(event, context): attachment_s3_info = parse_s3_arn(attachment_s3_arn) print("Attachment s3 arn parsed info: ", attachment_s3_info) bucket = attachment_s3_info["bucket"] - folder = attachment_s3_info['folder'] key = attachment_s3_info["key"] - filename_without_extension = attachment_s3_info['filename_without_extension'] extracted_files_s3_arns = [] if os.path.splitext(key)[1][1:] == "pdf": local_filename = fetch_file(bucket, key) print("Extracting text from pdf") extracted_text = extract_text_from_pdf(local_filename) - extracted_text_local_file = store_extracted_text_in_local_file( - extracted_text) + extracted_text_local_file = store_extracted_text_in_local_file(extracted_text) print("Finished extracting text from pdf") - extracted_text_s3_key = "/".join( - [folder, filename_without_extension+"_extracted_pdf_content", str(uuid.uuid4())+".txt"]) - print("Uploading file to ", extracted_text_s3_key) + base_name = Path(key).stem + new_key = f"{base_name}_extracted_pdf_content.txt" + print("Uploading file to ", new_key) upload_file( file_to_upload=extracted_text_local_file, bucket=bucket, - key=extracted_text_s3_key + key=new_key ) - extracted_files_s3_arns.append( - f"arn:aws:s3:::{bucket}/{extracted_text_s3_key}") + extracted_files_s3_arns.append(f"arn:aws:s3:::{bucket}/{new_key}") return { 'statusCode': 200,