Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ALS-7014: Implement signed URL functionality for data exports #119

Merged
merged 9 commits into from
Sep 6, 2024
Merged
11 changes: 11 additions & 0 deletions data/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,17 @@
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>s3</artifactId>
<exclusions>
<!--Spring boot will complain about this dep on startup. It's not needed (we use SLF4J + Logback)-->
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>

</project>
14 changes: 13 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<dockerfile-maven-version>1.4.10</dockerfile-maven-version>
<aws.version>2.20.153</aws.version>
</properties>
<repositories>
<repository>
Expand Down Expand Up @@ -320,7 +321,18 @@
<artifactId>snappy-java</artifactId>
<version>1.1.10.5</version>
</dependency>

<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>s3</artifactId>
<version>${aws.version}</version>
<exclusions>
<!--Spring boot will complain about this dep on startup. It's not needed (we use SLF4J + Logback)-->
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>

</dependencies>
</dependencyManagement>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ public MediaType getResponseType() {
return responseType;
}

public File getFile() {
return stream.getFile();
}

public static enum Status{
SUCCESS {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,8 @@ public long estimatedSize() {
public void closeWriter() {
writer.close();
}

public File getFile() {
return writer.getFile();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package edu.harvard.hms.dbmi.avillach.hpds.processing.upload;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import software.amazon.awssdk.services.s3.presigner.S3Presigner;
import software.amazon.awssdk.services.s3.presigner.model.GetObjectPresignRequest;
import software.amazon.awssdk.services.s3.presigner.model.PresignedGetObjectRequest;

import java.io.File;
import java.time.Duration;
import java.util.HashMap;
import java.util.Map;

@Component
public class SignUrlService {

private final String bucketName;
private final int signedUrlExpiryMinutes;
private final Region region;

private static Logger log = LoggerFactory.getLogger(SignUrlService.class);

@Autowired
public SignUrlService(
@Value("${data-export.s3.bucket-name:}") String bucketName,
@Value("${data-export.s3.region:us-east-1}") String region,
@Value("${data-export.s3.signedUrl-expiry-minutes:60}") int signedUrlExpiryMinutes
) {
this.bucketName = bucketName;
this.signedUrlExpiryMinutes = signedUrlExpiryMinutes;
this.region = Region.of(region);
}

public void uploadFile(File file, String objectKey) {
S3Client s3 = S3Client.builder()
.region(region)
.build();

putS3Object(s3, bucketName, objectKey, file);
s3.close();
}

// This example uses RequestBody.fromFile to avoid loading the whole file into
// memory.
public void putS3Object(S3Client s3, String bucketName, String objectKey, File file) {
Map<String, String> metadata = new HashMap<>();
PutObjectRequest putOb = PutObjectRequest.builder()
.bucket(bucketName)
.key(objectKey)
.metadata(metadata)
.build();

s3.putObject(putOb, RequestBody.fromFile(file));
log.info("Successfully placed " + objectKey + " into bucket " + bucketName);
}

public String createPresignedGetUrl(String keyName) {
PresignedGetObjectRequest presignedRequest;
try (S3Presigner presigner = S3Presigner.builder().region(region).build()) {
GetObjectRequest objectRequest = GetObjectRequest.builder()
.bucket(bucketName)
.key(keyName)
.build();

GetObjectPresignRequest presignRequest = GetObjectPresignRequest.builder()
.signatureDuration(Duration.ofMinutes(signedUrlExpiryMinutes)) // The URL will expire in 10 minutes.
.getObjectRequest(objectRequest)
.build();

presignedRequest = presigner.presignGetObject(presignRequest);
}
log.info("Presigned URL: [{}]", presignedRequest.url().toString());

return presignedRequest.url().toExternalForm();
}
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
package edu.harvard.hms.dbmi.avillach.hpds.service;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.Map.Entry;
import java.util.stream.Collectors;

import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.InfoColumnMeta;
import edu.harvard.hms.dbmi.avillach.hpds.processing.upload.SignUrlService;
import edu.harvard.hms.dbmi.avillach.hpds.service.util.Paginator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -28,7 +28,6 @@
import edu.harvard.dbmi.avillach.domain.*;
import edu.harvard.dbmi.avillach.util.UUIDv5;
import edu.harvard.hms.dbmi.avillach.hpds.crypto.Crypto;
import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.FileBackedByteIndexedInfoStore;
import edu.harvard.hms.dbmi.avillach.hpds.data.phenotype.ColumnMeta;
import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query;
import edu.harvard.hms.dbmi.avillach.hpds.processing.*;
Expand All @@ -41,13 +40,15 @@ public class PicSureService {

@Autowired
public PicSureService(QueryService queryService, TimelineProcessor timelineProcessor, CountProcessor countProcessor,
VariantListProcessor variantListProcessor, AbstractProcessor abstractProcessor, Paginator paginator) {
VariantListProcessor variantListProcessor, AbstractProcessor abstractProcessor, Paginator paginator,
SignUrlService signUrlService) {
this.queryService = queryService;
this.timelineProcessor = timelineProcessor;
this.countProcessor = countProcessor;
this.variantListProcessor = variantListProcessor;
this.abstractProcessor = abstractProcessor;
this.paginator = paginator;
this.signUrlService = signUrlService;
Crypto.loadDefaultKey();
}

Expand All @@ -67,6 +68,8 @@ public PicSureService(QueryService queryService, TimelineProcessor timelineProce

private final Paginator paginator;

private final SignUrlService signUrlService;

private static final String QUERY_METADATA_FIELD = "queryMetadata";
private static final int RESPONSE_CACHE_SIZE = 50;

Expand Down Expand Up @@ -237,6 +240,37 @@ public ResponseEntity queryResult(@PathVariable("resourceQueryId") UUID queryId,
}
}

@PostMapping(value = "/query/{resourceQueryId}/signed-url")
public ResponseEntity querySignedURL(@PathVariable("resourceQueryId") UUID queryId, @RequestBody QueryRequest resultRequest) throws IOException {
AsyncResult result = queryService.getResultFor(queryId.toString());
if (result == null) {
// This happens sometimes when users immediately request the status for a query
// before it can be initialized. We wait a bit and try again before throwing an
// error.
try {
Thread.sleep(100);
} catch (InterruptedException e) {
return ResponseEntity.status(500).build();
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does waiting do? Are you debouncing a button on the backend?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is boilerplate code copied from the /result endpoint. But true, it should not be needed, no one should call this endpoint until the status returns SUCCESS which would mean the query exists


result = queryService.getResultFor(queryId.toString());
if (result == null) {
return ResponseEntity.status(404).build();
}
}
if (result.getStatus() == AsyncResult.Status.SUCCESS) {
File file = result.getFile();
signUrlService.uploadFile(file, file.getName());
String presignedGetUrl = signUrlService.createPresignedGetUrl(file.getName());
log.info("Presigned url: " + presignedGetUrl);
return ResponseEntity.ok()
.contentType(MediaType.APPLICATION_JSON)
.body(new SignedUrlResponse(presignedGetUrl));
} else {
return ResponseEntity.status(400).body("Status : " + result.getStatus().name());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is 400 a fair response code for pending statuses? Maybe 202?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think it is appropriate. Or at least consistent with the current workflow of the /result endpoint. The front end polls /status until it is SUCCESS, then calls this endpoint for a signed url, or /result for the document itself

}
}

@PostMapping("/query/{resourceQueryId}/status")
public QueryStatus queryStatus(@PathVariable("resourceQueryId") UUID queryId, @RequestBody QueryRequest request) {
return convertToQueryStatus(queryService.getStatusFor(queryId.toString()));
Expand Down
6 changes: 5 additions & 1 deletion service/src/main/resources/application-bdc-auth.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,8 @@ SMALL_TASK_THREADS = 1
LARGE_TASK_THREADS = 1

hpds.genomicProcessor.impl=localPatientDistributed
HPDS_GENOMIC_DATA_DIRECTORY=/opt/local/hpds/all/
HPDS_GENOMIC_DATA_DIRECTORY=/opt/local/hpds/all/

data-export.s3.bucket-name=pic-sure-auth-dev-data-export
data-export.s3.region=us-east-1
data-export.s3.signedUrl-expiry-minutes=30
Loading