Skip to content

Commit

Permalink
new lambda to invoke bedrock
Browse files Browse the repository at this point in the history
  • Loading branch information
joelbalcaen committed Apr 24, 2024
1 parent 04a2d55 commit b86e642
Show file tree
Hide file tree
Showing 6 changed files with 201 additions and 45 deletions.
74 changes: 74 additions & 0 deletions lambdas/bedrock_invoker/lambda.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
locals {
lambda_function_name = "levio-esta-bedrock-invoker"
timeout = 30
runtime = "python3.11"
powertools_layer_arn = "arn:aws:lambda:${var.aws_region}:017000801446:layer:AWSLambdaPowertoolsPythonV2:67"
}

data "aws_caller_identity" "current" {}


module "lambda_function_container_image" {
source = "terraform-aws-modules/lambda/aws"
function_name = local.lambda_function_name
handler = "index.lambda_handler"
publish = true
runtime = local.runtime
timeout = local.timeout
layers = [local.powertools_layer_arn]
source_path = "${path.module}/src"
s3_bucket = var.lambda_storage_bucket
memory_size = 256
role_name = "${local.lambda_function_name}-role"
attach_policy_statements = true

policy_statements = {
log_group = {
effect = "Allow"
actions = [
"logs:CreateLogGroup"
]
resources = [
"arn:aws:logs:*:*:*"
]
}

bedrock_invoke = {
effect = "Allow"
actions = [
"bedrock:InvokeModel"
]
resources = [
"arn:aws:bedrock:*:${data.aws_caller_identity.current.account_id}:model/*"
]
}

s3 = {
effect = "Allow"
actions = [
"s3:Get*",
"s3:List*",
"s3:Describe*",
"s3:PutObject",
"s3-object-lambda:Get*",
"s3-object-lambda:List*",
"s3-object-lambda:WriteGetObjectResponse"
]
resources = var.allowed_s3_resources
}

log_write = {
effect = "Allow"

resources = [
"arn:aws:logs:*:*:log-group:/aws/${local.lambda_function_name}/*:*"
]

actions = [
"logs:CreateLogStream",
"logs:PutLogEvents",
]
}

}
}
7 changes: 7 additions & 0 deletions lambdas/bedrock_invoker/output.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
output "lambda_function_arn" {
value = module.lambda_function_container_image.lambda_function_arn
}

output "lambda_function_name" {
value = module.lambda_function_container_image.lambda_function_name
}
48 changes: 48 additions & 0 deletions lambdas/bedrock_invoker/src/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import boto3
import json
from botocore.exceptions import BotoCoreError, ClientError

s3 = boto3.client('s3')
bedrock = boto3.client('bedrock')

def lambda_handler(event, context):
s3_arn = event['s3_arn']
bedrock_params = event['bedrock_params']
prompt = event['prompt']

# Parse the S3 ARN to get the bucket and key
bucket, key = s3_arn.split(':::')[1].split('/')

# Download the file from S3
try:
s3_object = s3.get_object(Bucket=bucket, Key=key)
except ClientError as e:
return {
'statusCode': 400,
'body': str(e)
}

# Extract text from the S3 object
extracted_text = s3_object['Body'].read().decode('utf-8')

# Invoke the Bedrock model with the extracted text and the provided parameters
try:
response = bedrock.invoke_model(
ModelName=bedrock_params['model_name'],
Payload=json.dumps({
'master': bedrock_params['master'],
'prompt': prompt,
'message': extracted_text
})
)
except BotoCoreError as e:
return {
'statusCode': 400,
'body': str(e)
}

return {
'statusCode': 200,
'body': 'Successfully processed the S3 ARN',
'bedrockResponse': response
}
15 changes: 15 additions & 0 deletions lambdas/bedrock_invoker/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
variable "lambda_storage_bucket" {
type = string
nullable = false
}

variable "aws_region" {
type = string
nullable = false
}

variable "allowed_s3_resources" {
type = list(string)
nullable = false
description = "values for the s3 resources that the lambda function can access"
}
93 changes: 48 additions & 45 deletions lambdas/rich_pdf_ingestion/src/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,54 @@
s3 = boto3.client("s3")


def lambda_handler(event, context):
"""
Etract text/tables from a PDF and store in a s3 object
"""
print(event)
attachment_s3_arn = event['path']

try:
attachment_s3_info = parse_s3_arn(attachment_s3_arn)
print("Attachment s3 arn parsed info: ", attachment_s3_info)
bucket = attachment_s3_info["bucket"]
folder = attachment_s3_info['folder']
key = attachment_s3_info["key"]
filename_without_extension = attachment_s3_info['filename_without_extension']
extracted_files_s3_arns = []

if os.path.splitext(key)[1][1:] == "pdf":
local_filename = fetch_file(bucket, key)
print("Extracting text from pdf")
extracted_text = extract_text_from_pdf(local_filename)
extracted_text_local_file = store_extracted_text_in_local_file(
extracted_text)
print("Finished extracting text from pdf")
extracted_text_s3_key = "/".join(
[folder, filename_without_extension+"_extracted_pdf_content", str(uuid.uuid4())+".txt"])
print("Uploading file to ", extracted_text_s3_key)
upload_file(
file_to_upload=extracted_text_local_file,
bucket=bucket,
key=extracted_text_s3_key
)
extracted_files_s3_arns.append(
f"arn:aws:s3:::{bucket}/{extracted_text_s3_key}")

return {
'statusCode': 200,
'body': 'PDF text content extracted and saved',
'attachment_arns': extracted_files_s3_arns
}

except Exception as e:
print(e)
return {
'statusCode': 400,
'body': e
}


def extract_text_from_pdf(pdf_file_path):
text = ""

Expand Down Expand Up @@ -67,48 +115,3 @@ def store_extracted_text_in_local_file(extracted_text):
f.write(extracted_text)

return local_file_path


def lambda_handler(event, context):
print(event)
attachment_s3_arn = event['path']

try:
attachment_s3_info = parse_s3_arn(attachment_s3_arn)
print("Attachment s3 arn parsed info: ", attachment_s3_info)
bucket = attachment_s3_info["bucket"]
folder = attachment_s3_info['folder']
key = attachment_s3_info["key"]
filename_without_extension = attachment_s3_info['filename_without_extension']
extracted_files_s3_arns = []

if os.path.splitext(key)[1][1:] == "pdf":
local_filename = fetch_file(bucket, key)
print("Extracting text from pdf")
extracted_text = extract_text_from_pdf(local_filename)
extracted_text_local_file = store_extracted_text_in_local_file(
extracted_text)
print("Finished extracting text from pdf")
extracted_text_s3_key = "/".join(
[folder, filename_without_extension+"_extracted_pdf_content", str(uuid.uuid4())+".txt"])
print("Uploading file to ", extracted_text_s3_key)
upload_file(
file_to_upload=extracted_text_local_file,
bucket=bucket,
key=extracted_text_s3_key
)
extracted_files_s3_arns.append(
f"arn:aws:s3:::{bucket}/{extracted_text_s3_key}")

return {
'statusCode': 200,
'body': 'PDF text content extracted and saved',
'attachment_arns': extracted_files_s3_arns
}

except Exception as e:
print(e)
return {
'statusCode': 400,
'body': e
}
9 changes: 9 additions & 0 deletions terraform/modules.tf
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,12 @@ module "email_attachment_saver" {
allowed_s3_resources = [module.s3_bucket.s3_bucket_arn, "${module.s3_bucket.s3_bucket_arn}/*"]
}


module "bedrock_invoker" {
source = "../lambdas/berock_invoker"
lambda_storage_bucket = aws_s3_bucket.lambda_storage.id
aws_region = var.aws_region
allowed_s3_resources = [module.s3_bucket.s3_bucket_arn, "${module.s3_bucket.s3_bucket_arn}/*"]
}


0 comments on commit b86e642

Please sign in to comment.