diff --git a/lambdas/FormProcessor/FormRequestProcessorFunction/pom.xml b/lambdas/FormProcessor/FormRequestProcessorFunction/pom.xml index 1f26b86..f936855 100644 --- a/lambdas/FormProcessor/FormRequestProcessorFunction/pom.xml +++ b/lambdas/FormProcessor/FormRequestProcessorFunction/pom.xml @@ -88,6 +88,12 @@ jackson-datatype-joda 2.15.3 + + org.apache.pdfbox + pdfbox + 3.0.2 + + diff --git a/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/App.java b/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/App.java index 11bfccd..1eb3819 100644 --- a/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/App.java +++ b/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/App.java @@ -7,13 +7,17 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.datatype.joda.JodaModule; import com.levio.awsdemo.formrequestprocessor.service.*; +import com.levio.awsdemo.formrequestprocessor.utils.PDF; import jakarta.mail.MessagingException; import jakarta.mail.internet.InternetAddress; import jakarta.mail.internet.MimeMessage; +import org.apache.pdfbox.cos.COSDocument; +import org.apache.pdfbox.io.RandomAccessRead; +import org.apache.pdfbox.pdfparser.PDFParser; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; +import java.io.*; import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; @@ -69,14 +73,14 @@ public Void handleRequest(final SQSEvent input, final Context context) { var formKey = formFillRequest.getFormKey(); var questionsMapper = retrieveDocumentMapper(formKey); - String email = s3Service.getFile(formKey + "/email/" + formFillRequest.getEmailId()); + String email = s3Service.getObjectAsString(formKey + "/email/" + formFillRequest.getEmailId()); try { MimeMessage message = mailService.getMimeMessage(new ByteArrayInputStream(email.getBytes(StandardCharsets.UTF_8))); String emailBody = "Formulaire response"; String sender = ((InternetAddress) message.getFrom()[0]).getAddress(); String subject = message.getSubject(); - String content = s3Service.getFile(attachmentKey); + String content = getS3ObjectContent(attachmentKey); questionsMapper.entrySet().parallelStream() .forEach(positionQuestionAnswerMapper -> { @@ -118,6 +122,24 @@ private static Map getMessageAttributes(Strin return messageAttributes; } + + private String getS3ObjectContent(String key) { + final var isPDF = key.endsWith(".pdf"); + + try { + if (isPDF) { + final var file = s3Service.getObjectAsFile(key); + return PDF.generateTextFromPDF(file); + } else { + return s3Service.getObjectAsString(key); + } + } catch(IOException e) { + System.out.print(e); + } + + return s3Service.getObjectAsString(key); + } + private HashMap> retrieveDocumentMapper(String formKey) { try { return documentService.retrieveQuestionsMapper(formKey); diff --git a/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/service/S3Service.java b/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/service/S3Service.java index b3e4a3a..d25b8b3 100644 --- a/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/service/S3Service.java +++ b/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/service/S3Service.java @@ -9,7 +9,7 @@ import software.amazon.awssdk.services.s3.model.PutObjectRequest; import software.amazon.awssdk.services.s3.model.PutObjectResponse; -import java.io.InputStream; +import java.io.*; public class S3Service { @@ -18,11 +18,27 @@ public class S3Service { private final S3Client s3 = S3Client.builder() .region(Region.US_EAST_1) .build(); - public String getFile(String key) { + + public String getObjectAsString(String key) { ResponseBytes objectBytes = getObjectResponseBytes(key); return new String(objectBytes.asByteArray()); } + public File getObjectAsFile(String key) { + try { + ResponseBytes objectBytes = getObjectResponseBytes(key); + final var file = new File("/tmp/"+key); + OutputStream os = new FileOutputStream(file); + os.write(objectBytes.asByteArray()); + os.close(); + return file; + } catch (IOException e) { + e.printStackTrace(); + } + + return null; + } + public InputStream getInputFileStream(String key) { ResponseBytes objectBytes = getObjectResponseBytes(key); return objectBytes.asInputStream(); @@ -36,8 +52,10 @@ private ResponseBytes getObjectResponseBytes(String key) { .build(); return s3.getObjectAsBytes(objectRequest); + } + public String saveFile(String fileKey, byte[] fileContent) { PutObjectResponse objectResponse = s3.putObject( PutObjectRequest.builder() diff --git a/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/utils/PDF.java b/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/utils/PDF.java new file mode 100644 index 0000000..7b2b460 --- /dev/null +++ b/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/utils/PDF.java @@ -0,0 +1,27 @@ +package com.levio.awsdemo.formrequestprocessor.utils; + +import org.apache.pdfbox.cos.COSDocument; +import org.apache.pdfbox.io.RandomAccessRead; +import org.apache.pdfbox.pdfparser.PDFParser; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; + +public class PDF { + + public static String generateTextFromPDF(File file) throws IOException { + String parsedText; + PDFParser parser = new PDFParser((RandomAccessRead) new RandomAccessFile(file, "r")); + parser.parse(); + + COSDocument cosDoc = parser.parse().getDocument(); + PDFTextStripper pdfStripper = new PDFTextStripper(); + PDDocument pdDoc = new PDDocument(cosDoc); + parsedText = pdfStripper.getText(pdDoc); + + return parsedText; + } +}