Skip to content

Commit

Permalink
add a method to get s3 object as file, add utils to parse pdf file to…
Browse files Browse the repository at this point in the history
… text.
  • Loading branch information
Joel Balcaen committed Apr 3, 2024
1 parent dfc6d45 commit 9226a22
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 7 deletions.
6 changes: 6 additions & 0 deletions lambdas/FormProcessor/FormRequestProcessorFunction/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@
<artifactId>jackson-datatype-joda</artifactId>
<version>2.15.3</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>3.0.2</version>
</dependency>

</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,17 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.joda.JodaModule;
import com.levio.awsdemo.formrequestprocessor.service.*;
import com.levio.awsdemo.formrequestprocessor.utils.PDF;
import jakarta.mail.MessagingException;
import jakarta.mail.internet.InternetAddress;
import jakarta.mail.internet.MimeMessage;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
Expand Down Expand Up @@ -69,14 +73,14 @@ public Void handleRequest(final SQSEvent input, final Context context) {
var formKey = formFillRequest.getFormKey();
var questionsMapper = retrieveDocumentMapper(formKey);

String email = s3Service.getFile(formKey + "/email/" + formFillRequest.getEmailId());
String email = s3Service.getObjectAsString(formKey + "/email/" + formFillRequest.getEmailId());
try {
MimeMessage message = mailService.getMimeMessage(new ByteArrayInputStream(email.getBytes(StandardCharsets.UTF_8)));
String emailBody = "Formulaire response";
String sender = ((InternetAddress) message.getFrom()[0]).getAddress();
String subject = message.getSubject();

String content = s3Service.getFile(attachmentKey);
String content = getS3ObjectContent(attachmentKey);

questionsMapper.entrySet().parallelStream()
.forEach(positionQuestionAnswerMapper -> {
Expand Down Expand Up @@ -118,6 +122,24 @@ private static Map<String, SQSEvent.MessageAttribute> getMessageAttributes(Strin
return messageAttributes;
}


private String getS3ObjectContent(String key) {
final var isPDF = key.endsWith(".pdf");

try {
if (isPDF) {
final var file = s3Service.getObjectAsFile(key);
return PDF.generateTextFromPDF(file);
} else {
return s3Service.getObjectAsString(key);
}
} catch(IOException e) {
System.out.print(e);
}

return s3Service.getObjectAsString(key);
}

private HashMap<Integer, Map<String, String>> retrieveDocumentMapper(String formKey) {
try {
return documentService.retrieveQuestionsMapper(formKey);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import software.amazon.awssdk.services.s3.model.PutObjectResponse;

import java.io.InputStream;
import java.io.*;

public class S3Service {

Expand All @@ -18,11 +18,27 @@ public class S3Service {
private final S3Client s3 = S3Client.builder()
.region(Region.US_EAST_1)
.build();
public String getFile(String key) {

public String getObjectAsString(String key) {
ResponseBytes<GetObjectResponse> objectBytes = getObjectResponseBytes(key);
return new String(objectBytes.asByteArray());
}

public File getObjectAsFile(String key) {
try {
ResponseBytes<GetObjectResponse> objectBytes = getObjectResponseBytes(key);
final var file = new File("/tmp/"+key);
OutputStream os = new FileOutputStream(file);
os.write(objectBytes.asByteArray());
os.close();
return file;
} catch (IOException e) {
e.printStackTrace();
}

return null;
}

public InputStream getInputFileStream(String key) {
ResponseBytes<GetObjectResponse> objectBytes = getObjectResponseBytes(key);
return objectBytes.asInputStream();
Expand All @@ -36,8 +52,10 @@ private ResponseBytes<GetObjectResponse> getObjectResponseBytes(String key) {
.build();

return s3.getObjectAsBytes(objectRequest);

}


public String saveFile(String fileKey, byte[] fileContent) {
PutObjectResponse objectResponse = s3.putObject(
PutObjectRequest.builder()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.levio.awsdemo.formrequestprocessor.utils;

import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;

public class PDF {

public static String generateTextFromPDF(File file) throws IOException {
String parsedText;
PDFParser parser = new PDFParser((RandomAccessRead) new RandomAccessFile(file, "r"));
parser.parse();

COSDocument cosDoc = parser.parse().getDocument();
PDFTextStripper pdfStripper = new PDFTextStripper();
PDDocument pdDoc = new PDDocument(cosDoc);
parsedText = pdfStripper.getText(pdDoc);

return parsedText;
}
}

0 comments on commit 9226a22

Please sign in to comment.