diff --git a/lambdas/FormProcessor/FormRequestProcessorFunction/pom.xml b/lambdas/FormProcessor/FormRequestProcessorFunction/pom.xml
index 1f26b86..f936855 100644
--- a/lambdas/FormProcessor/FormRequestProcessorFunction/pom.xml
+++ b/lambdas/FormProcessor/FormRequestProcessorFunction/pom.xml
@@ -88,6 +88,12 @@
jackson-datatype-joda
2.15.3
+
+ org.apache.pdfbox
+ pdfbox
+ 3.0.2
+
+
diff --git a/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/App.java b/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/App.java
index 11bfccd..1eb3819 100644
--- a/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/App.java
+++ b/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/App.java
@@ -7,13 +7,17 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.joda.JodaModule;
import com.levio.awsdemo.formrequestprocessor.service.*;
+import com.levio.awsdemo.formrequestprocessor.utils.PDF;
import jakarta.mail.MessagingException;
import jakarta.mail.internet.InternetAddress;
import jakarta.mail.internet.MimeMessage;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.io.RandomAccessRead;
+import org.apache.pdfbox.pdfparser.PDFParser;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
+import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
@@ -69,14 +73,14 @@ public Void handleRequest(final SQSEvent input, final Context context) {
var formKey = formFillRequest.getFormKey();
var questionsMapper = retrieveDocumentMapper(formKey);
- String email = s3Service.getFile(formKey + "/email/" + formFillRequest.getEmailId());
+ String email = s3Service.getObjectAsString(formKey + "/email/" + formFillRequest.getEmailId());
try {
MimeMessage message = mailService.getMimeMessage(new ByteArrayInputStream(email.getBytes(StandardCharsets.UTF_8)));
String emailBody = "Formulaire response";
String sender = ((InternetAddress) message.getFrom()[0]).getAddress();
String subject = message.getSubject();
- String content = s3Service.getFile(attachmentKey);
+ String content = getS3ObjectContent(attachmentKey);
questionsMapper.entrySet().parallelStream()
.forEach(positionQuestionAnswerMapper -> {
@@ -118,6 +122,24 @@ private static Map getMessageAttributes(Strin
return messageAttributes;
}
+
+ private String getS3ObjectContent(String key) {
+ final var isPDF = key.endsWith(".pdf");
+
+ try {
+ if (isPDF) {
+ final var file = s3Service.getObjectAsFile(key);
+ return PDF.generateTextFromPDF(file);
+ } else {
+ return s3Service.getObjectAsString(key);
+ }
+ } catch(IOException e) {
+ System.out.print(e);
+ }
+
+ return s3Service.getObjectAsString(key);
+ }
+
private HashMap> retrieveDocumentMapper(String formKey) {
try {
return documentService.retrieveQuestionsMapper(formKey);
diff --git a/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/service/S3Service.java b/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/service/S3Service.java
index b3e4a3a..d25b8b3 100644
--- a/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/service/S3Service.java
+++ b/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/service/S3Service.java
@@ -9,7 +9,7 @@
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import software.amazon.awssdk.services.s3.model.PutObjectResponse;
-import java.io.InputStream;
+import java.io.*;
public class S3Service {
@@ -18,11 +18,27 @@ public class S3Service {
private final S3Client s3 = S3Client.builder()
.region(Region.US_EAST_1)
.build();
- public String getFile(String key) {
+
+ public String getObjectAsString(String key) {
ResponseBytes objectBytes = getObjectResponseBytes(key);
return new String(objectBytes.asByteArray());
}
+ public File getObjectAsFile(String key) {
+ try {
+ ResponseBytes objectBytes = getObjectResponseBytes(key);
+ final var file = new File("/tmp/"+key);
+ OutputStream os = new FileOutputStream(file);
+ os.write(objectBytes.asByteArray());
+ os.close();
+ return file;
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ return null;
+ }
+
public InputStream getInputFileStream(String key) {
ResponseBytes objectBytes = getObjectResponseBytes(key);
return objectBytes.asInputStream();
@@ -36,8 +52,10 @@ private ResponseBytes getObjectResponseBytes(String key) {
.build();
return s3.getObjectAsBytes(objectRequest);
+
}
+
public String saveFile(String fileKey, byte[] fileContent) {
PutObjectResponse objectResponse = s3.putObject(
PutObjectRequest.builder()
diff --git a/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/utils/PDF.java b/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/utils/PDF.java
new file mode 100644
index 0000000..7b2b460
--- /dev/null
+++ b/lambdas/FormProcessor/FormRequestProcessorFunction/src/main/java/com/levio/awsdemo/formrequestprocessor/utils/PDF.java
@@ -0,0 +1,27 @@
+package com.levio.awsdemo.formrequestprocessor.utils;
+
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.io.RandomAccessRead;
+import org.apache.pdfbox.pdfparser.PDFParser;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
+public class PDF {
+
+ public static String generateTextFromPDF(File file) throws IOException {
+ String parsedText;
+ PDFParser parser = new PDFParser((RandomAccessRead) new RandomAccessFile(file, "r"));
+ parser.parse();
+
+ COSDocument cosDoc = parser.parse().getDocument();
+ PDFTextStripper pdfStripper = new PDFTextStripper();
+ PDDocument pdDoc = new PDDocument(cosDoc);
+ parsedText = pdfStripper.getText(pdDoc);
+
+ return parsedText;
+ }
+}