diff --git a/lambdas/excel_ingestion/src/index.py b/lambdas/excel_ingestion/src/index.py index db76f6d..63f350b 100644 --- a/lambdas/excel_ingestion/src/index.py +++ b/lambdas/excel_ingestion/src/index.py @@ -9,12 +9,12 @@ def lambda_handler(event, context): """ - Downloads the given docx file from S3, extracts the text content and saves it as a txt file in the same bucket, adjacent to the original docx file. + Downloads the given xlsx file from S3, extracts the text content and saves it as a csv file in the same bucket, adjacent to the original xlsx file. """ try: print(event) - bucket, key = event['docx_s3_uri'].replace("s3://", "").split("/", 1) + bucket, key = event['xlsx_s3_uri'].replace("s3://", "").split("/", 1) print(f"File located at bucket: {bucket} and key: {key}") if os.path.splitext(key)[1][1:] != "xlsx": @@ -29,7 +29,7 @@ def lambda_handler(event, context): df.to_csv(csv_file_path, index=False, sep='\t') base_path = Path(key).parent base_name = Path(key).stem - new_key = f"{base_path}/{base_name}_extracted_docx_content.csv" + new_key = f"{base_path}/{base_name}_extracted_xlsx_content.csv" with open(csv_file_path, "rb") as f: s3.upload_fileobj(f, bucket, new_key) @@ -37,7 +37,7 @@ def lambda_handler(event, context): return { 'statusCode': 200, 'body': 'xlsx text content extracted and saved', - 'attachment_uri': f"s3://{bucket}/{new_key}" + 'extracted_csv_uri': f"s3://{bucket}/{new_key}" } except Exception as e: