From aca320888eeb1eee51723624f803de7880fee311 Mon Sep 17 00:00:00 2001 From: Luke Sikina Date: Thu, 16 Nov 2023 10:27:13 -0500 Subject: [PATCH] [ALS-5065] Write phenotypic and genomic data to file - We need to be able to upload hpds query results to aws - Most of this work will be done by another service, but we need hpds to write the results to a file prior to upload --- .../avillach/hpds/service/PicSureService.java | 42 ++++++++- .../filesharing/FileSharingService.java | 54 +++++++++++ .../service/filesharing/FileSystemConfig.java | 30 +++++++ .../filesharing/FileSystemService.java | 57 ++++++++++++ .../filesharing/FileSharingServiceTest.java | 90 +++++++++++++++++++ .../filesharing/FileSystemServiceTest.java | 41 +++++++++ 6 files changed, 312 insertions(+), 2 deletions(-) create mode 100644 service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingService.java create mode 100644 service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemConfig.java create mode 100644 service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemService.java create mode 100644 service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingServiceTest.java create mode 100644 service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemServiceTest.java diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/PicSureService.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/PicSureService.java index 12287521..1b29fad5 100644 --- a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/PicSureService.java +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/PicSureService.java @@ -11,6 +11,8 @@ import javax.ws.rs.core.Response.ResponseBuilder; import javax.ws.rs.core.Response.Status; +import edu.harvard.hms.dbmi.avillach.hpds.service.filesharing.FileSharingService; +import edu.harvard.hms.dbmi.avillach.hpds.service.filesharing.FileSystemService; import edu.harvard.hms.dbmi.avillach.hpds.service.util.Paginator; import org.apache.http.entity.ContentType; import org.slf4j.Logger; @@ -34,6 +36,7 @@ import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query; import edu.harvard.hms.dbmi.avillach.hpds.processing.*; import org.springframework.stereotype.Component; +import org.springframework.web.bind.annotation.RequestBody; @Path("PIC-SURE") @Produces("application/json") @@ -42,13 +45,16 @@ public class PicSureService implements IResourceRS { @Autowired public PicSureService(QueryService queryService, TimelineProcessor timelineProcessor, CountProcessor countProcessor, - VariantListProcessor variantListProcessor, AbstractProcessor abstractProcessor, Paginator paginator) { + VariantListProcessor variantListProcessor, AbstractProcessor abstractProcessor, + Paginator paginator, FileSharingService fileSystemService + ) { this.queryService = queryService; this.timelineProcessor = timelineProcessor; this.countProcessor = countProcessor; this.variantListProcessor = variantListProcessor; this.abstractProcessor = abstractProcessor; this.paginator = paginator; + this.fileSystemService = fileSystemService; Crypto.loadDefaultKey(); } @@ -68,6 +74,8 @@ public PicSureService(QueryService queryService, TimelineProcessor timelineProce private final Paginator paginator; + private final FileSharingService fileSystemService; + private static final String QUERY_METADATA_FIELD = "queryMetadata"; private static final int RESPONSE_CACHE_SIZE = 50; @@ -250,6 +258,36 @@ public Response queryResult(@PathParam("resourceQueryId") UUID queryId, QueryReq } } + @POST + @Path("/write/{dataType}") + public Response writeQueryResult( + @RequestBody() Query query, @PathParam("dataType") String datatype + ) { + AsyncResult result = queryService.getResultFor(query.getId()); + // the queryResult has this DIY retry logic that blocks a system thread. + // I'm not going to do that here. If the service can't find it, you get a 404. + // Retry it client side. + if (result == null) { + return Response.status(404).build(); + } + if (result.status == AsyncResult.Status.ERROR) { + return Response.status(500).build(); + } + if (result.status != AsyncResult.Status.SUCCESS) { + return Response.status(503).build(); // 503 = unavailable + } + + // at least for now, this is going to block until we finish writing + // Not very restful, but it will make this API very easy to consume + boolean success = false; + if ("phenotypic".equals(datatype)) { + success = fileSystemService.createPhenotypicData(query); + } else if ("genomic".equals(datatype)) { + success = fileSystemService.createGenomicData(query); + } + return success ? Response.ok().build() : Response.serverError().build(); + } + @POST @Path("/query/{resourceQueryId}/status") @Override @@ -266,7 +304,7 @@ public Response queryFormat(QueryRequest resultRequest) { return Response.ok().entity(convertIncomingQuery(resultRequest).toString()).build(); } catch (IOException e) { return Response.ok() - .entity("An error occurred formatting the query for display: " + e.getLocalizedMessage()).build(); + .entity("An error occurred formatting the query for display: " + e.getLocalizedMessage()).build(); } } diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingService.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingService.java new file mode 100644 index 00000000..6e3ba149 --- /dev/null +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingService.java @@ -0,0 +1,54 @@ +package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing; + +import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query; +import edu.harvard.hms.dbmi.avillach.hpds.data.query.ResultType; +import edu.harvard.hms.dbmi.avillach.hpds.processing.AsyncResult; +import edu.harvard.hms.dbmi.avillach.hpds.processing.VariantListProcessor; +import edu.harvard.hms.dbmi.avillach.hpds.service.QueryService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.io.IOException; +import java.time.Duration; +import java.time.temporal.ChronoUnit; + +/** + * Used for sharing data. Given a query, this service will write + * phenotypic and genomic data into a directory + */ +@Service +public class FileSharingService { + + private static final Logger LOG = LoggerFactory.getLogger(FileSharingService.class); + + @Autowired + private QueryService queryService; + + @Autowired + private FileSystemService fileWriter; + + @Autowired + private VariantListProcessor variantListProcessor; + + public boolean createPhenotypicData(Query phenotypicQuery) { + AsyncResult result = queryService.getResultFor(phenotypicQuery.getId()); + if (result.status != AsyncResult.Status.SUCCESS) { + return false; + } + return fileWriter.writeResultToFile("phenotypic_data.tsv", phenotypicQuery.getId(), result); + } + + public boolean createGenomicData(Query phenotypicQuery) { + String vcf = ""; + try { + vcf = variantListProcessor.runVcfExcerptQuery(phenotypicQuery, true); + } catch (IOException e) { + LOG.error("Error running genomic query", e); + return false; + } + + return fileWriter.writeResultToFile("genomic_data.tsv", phenotypicQuery.getId(), vcf, phenotypicQuery.getId()); + } +} diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemConfig.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemConfig.java new file mode 100644 index 00000000..1448c8f2 --- /dev/null +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemConfig.java @@ -0,0 +1,30 @@ +package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import java.nio.file.Path; + +@Configuration +public class FileSystemConfig { + @Value("${file_sharing_root:/gic_query_results/}") + private String fileSharingRootDir; + + @Value("${enable_file_sharing:false}") + private boolean enableFileSharing; + + @Bean() + Path sharingRoot() { + if (!enableFileSharing) { + return Path.of("/dev/null"); + } + + Path path = Path.of(fileSharingRootDir); + if (!path.toFile().exists()) { + throw new RuntimeException(fileSharingRootDir + " DNE."); + } + + return path; + } +} diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemService.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemService.java new file mode 100644 index 00000000..09c67106 --- /dev/null +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemService.java @@ -0,0 +1,57 @@ +package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing; + +import edu.harvard.hms.dbmi.avillach.hpds.processing.AsyncResult; +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; + +@Service +public class FileSystemService { + + private static final Logger LOG = LoggerFactory.getLogger(FileSystemService.class); + + @Autowired + private Path sharingRoot; + + @Value("${enable_file_sharing:false}") + private boolean enableFileSharing; + + public boolean writeResultToFile(String fileName, String directory, AsyncResult result) { + result.stream.open(); + return writeStreamToFile(fileName, result.stream, result.id); + } + + public boolean writeResultToFile(String fileName, String directory, String result, String id) { + return writeStreamToFile(fileName, new ByteArrayInputStream(result.getBytes()), id); + } + + private boolean writeStreamToFile(String fileName, InputStream content, String queryId) { + if (!enableFileSharing) { + LOG.warn("Attempted to write query result to file while file sharing is disabled. No-op."); + return false; + } + + Path dirPath = Path.of(sharingRoot.toString(), queryId); + Path filePath = Path.of(sharingRoot.toString(), queryId, fileName); + + try { + LOG.info("Writing query {} to file: {}", queryId, filePath); + Files.createDirectory(dirPath); + return Files.copy(content, filePath) > 0; + } catch (IOException e) { + LOG.error("Error writing result.", e); + return false; + } finally { + IOUtils.closeQuietly(content); + } + } +} diff --git a/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingServiceTest.java b/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingServiceTest.java new file mode 100644 index 00000000..3021ddcf --- /dev/null +++ b/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingServiceTest.java @@ -0,0 +1,90 @@ +package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing; + +import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query; +import edu.harvard.hms.dbmi.avillach.hpds.processing.AsyncResult; +import edu.harvard.hms.dbmi.avillach.hpds.processing.VariantListProcessor; +import edu.harvard.hms.dbmi.avillach.hpds.service.QueryService; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnitRunner; + +import java.io.IOException; + +@RunWith(MockitoJUnitRunner.class) +public class FileSharingServiceTest { + + @Mock + QueryService queryService; + + @Mock + FileSystemService fileWriter; + + @Mock + VariantListProcessor variantListProcessor; + + @InjectMocks + FileSharingService subject; + + @Test + public void shouldCreatePhenotypicData() { + Query query = new Query(); + query.setId("my-id"); + AsyncResult result = new AsyncResult(query, new String[]{}); + result.status = AsyncResult.Status.SUCCESS; + + Mockito.when(queryService.getResultFor("my-id")) + .thenReturn(result); + Mockito.when(fileWriter.writeResultToFile("phenotypic_data.tsv", "my-id", result)) + .thenReturn(true); + + boolean actual = subject.createPhenotypicData(query); + + Assert.assertTrue(actual); + } + + @Test + public void shouldNotCreatePhenotypicData() { + Query query = new Query(); + query.setId("my-id"); + AsyncResult result = new AsyncResult(query, new String[]{}); + result.status = AsyncResult.Status.ERROR; + + Mockito.when(queryService.getResultFor("my-id")) + .thenReturn(result); + + boolean actual = subject.createPhenotypicData(query); + + Assert.assertFalse(actual); + } + + @Test + public void shouldCreateGenomicData() throws IOException { + Query query = new Query(); + query.setId("my-id"); + String vcf = "lol lets put the whole vcf in a string"; + Mockito.when(variantListProcessor.runVcfExcerptQuery(query, true)) + .thenReturn(vcf); + Mockito.when(fileWriter.writeResultToFile("genomic_data.tsv", "my-id", vcf, "my-id")) + .thenReturn(true); + + boolean actual = subject.createGenomicData(query); + + Assert.assertTrue(actual); + } + + @Test + public void shouldNotCreateGenomicData() throws IOException { + Query query = new Query(); + query.setId("my-id"); + Mockito.when(variantListProcessor.runVcfExcerptQuery(query, true)) + .thenThrow(new IOException("oh no!")); + + boolean actual = subject.createGenomicData(query); + + Assert.assertFalse(actual); + } +} \ No newline at end of file diff --git a/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemServiceTest.java b/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemServiceTest.java new file mode 100644 index 00000000..fc80f566 --- /dev/null +++ b/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemServiceTest.java @@ -0,0 +1,41 @@ +package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing; + +import org.junit.Assert; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +public class FileSystemServiceTest { + + @Test + public void shouldWriteToFile() throws IOException { + Path dir = Files.createTempDirectory("my-upload-dir"); + FileSystemService subject = new FileSystemService(); + ReflectionTestUtils.setField(subject, "sharingRoot", dir); + ReflectionTestUtils.setField(subject, "enableFileSharing", true); + String fileContent = "I just got an ad that tried to sell a baguette with moz, dressing, " + + "and tomatoes as a healthy lunch, and that's just so far from the truth that it's bugging me. " + + "Like, come on. It's bread and cheese and oil. I don't care how fresh the tomatoes are."; + + boolean actual = subject.writeResultToFile("out.tsv", dir.toAbsolutePath().toString(), fileContent, "my-id"); + String actualContent = Files.readString(dir.resolve("my-id/out.tsv")); + + Assert.assertTrue(actual); + Assert.assertEquals(fileContent, actualContent); + } + + @Test + public void shouldNotWriteToFile() throws IOException { + Path dir = Files.createTempDirectory("my-upload-dir"); + FileSystemService subject = new FileSystemService(); + ReflectionTestUtils.setField(subject, "sharingRoot", dir); + ReflectionTestUtils.setField(subject, "enableFileSharing", false); + + boolean actual = subject.writeResultToFile("out.tsv", dir.toAbsolutePath().toString(), "", "my-id"); + + Assert.assertFalse(actual); + } +} \ No newline at end of file