Skip to content

Commit

Permalink
refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
joelbalcaen committed Apr 24, 2024
1 parent 753cb3d commit 16186f4
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 10 deletions.
3 changes: 3 additions & 0 deletions lambdas/bedrock_invoker/src/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
bedrock = boto3.client('bedrock')

def lambda_handler(event, context):
"""
Invokes a bedrock model with the given parameters and s3 text object
"""
s3_arn = event['s3_arn']
bedrock_params = event['bedrock_params']
prompt = event['prompt']
Expand Down
17 changes: 7 additions & 10 deletions lambdas/rich_pdf_ingestion/src/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import boto3
from pypdf import PdfReader
import uuid
from pathlib import Path

OBJECT_CREATED = "ObjectCreated"
EXTRACTED_TEXT_S3_OBJECT_KEY_PREFIX = 'pdf_extraction_result'
Expand All @@ -21,28 +22,24 @@ def lambda_handler(event, context):
attachment_s3_info = parse_s3_arn(attachment_s3_arn)
print("Attachment s3 arn parsed info: ", attachment_s3_info)
bucket = attachment_s3_info["bucket"]
folder = attachment_s3_info['folder']
key = attachment_s3_info["key"]
filename_without_extension = attachment_s3_info['filename_without_extension']
extracted_files_s3_arns = []

if os.path.splitext(key)[1][1:] == "pdf":
local_filename = fetch_file(bucket, key)
print("Extracting text from pdf")
extracted_text = extract_text_from_pdf(local_filename)
extracted_text_local_file = store_extracted_text_in_local_file(
extracted_text)
extracted_text_local_file = store_extracted_text_in_local_file(extracted_text)
print("Finished extracting text from pdf")
extracted_text_s3_key = "/".join(
[folder, filename_without_extension+"_extracted_pdf_content", str(uuid.uuid4())+".txt"])
print("Uploading file to ", extracted_text_s3_key)
base_name = Path(key).stem
new_key = f"{base_name}_extracted_pdf_content.txt"
print("Uploading file to ", new_key)
upload_file(
file_to_upload=extracted_text_local_file,
bucket=bucket,
key=extracted_text_s3_key
key=new_key
)
extracted_files_s3_arns.append(
f"arn:aws:s3:::{bucket}/{extracted_text_s3_key}")
extracted_files_s3_arns.append(f"arn:aws:s3:::{bucket}/{new_key}")

return {
'statusCode': 200,
Expand Down

0 comments on commit 16186f4

Please sign in to comment.