Skip to content

Commit

Permalink
rename
Browse files Browse the repository at this point in the history
  • Loading branch information
joelbalcaen committed May 3, 2024
1 parent c43e7c5 commit e2688a4
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions lambdas/excel_ingestion/src/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@

def lambda_handler(event, context):
"""
Downloads the given docx file from S3, extracts the text content and saves it as a txt file in the same bucket, adjacent to the original docx file.
Downloads the given xlsx file from S3, extracts the text content and saves it as a csv file in the same bucket, adjacent to the original xlsx file.
"""
try:
print(event)

bucket, key = event['docx_s3_uri'].replace("s3://", "").split("/", 1)
bucket, key = event['xlsx_s3_uri'].replace("s3://", "").split("/", 1)
print(f"File located at bucket: {bucket} and key: {key}")

if os.path.splitext(key)[1][1:] != "xlsx":
Expand All @@ -29,15 +29,15 @@ def lambda_handler(event, context):
df.to_csv(csv_file_path, index=False, sep='\t')
base_path = Path(key).parent
base_name = Path(key).stem
new_key = f"{base_path}/{base_name}_extracted_docx_content.csv"
new_key = f"{base_path}/{base_name}_extracted_xlsx_content.csv"

with open(csv_file_path, "rb") as f:
s3.upload_fileobj(f, bucket, new_key)

return {
'statusCode': 200,
'body': 'xlsx text content extracted and saved',
'attachment_uri': f"s3://{bucket}/{new_key}"
'extracted_csv_uri': f"s3://{bucket}/{new_key}"
}

except Exception as e:
Expand Down

0 comments on commit e2688a4

Please sign in to comment.