From 7625327aaafb662472ee0c16f1c430f70b7078ff Mon Sep 17 00:00:00 2001 From: Kathy Tran Date: Wed, 24 Jan 2024 10:30:47 -0500 Subject: [PATCH] Split workflow executions in half if request body size is too big (#481) https://ucsc-cgl.atlassian.net/browse/SEAB-6183 --- THIRD-PARTY-LICENSES.txt | 12 ++-- metricsaggregator/pom.xml | 5 ++ .../client/cli/TerraMetricsSubmitter.java | 59 ++++++++++++++++--- pom.xml | 2 +- 4 files changed, 63 insertions(+), 15 deletions(-) diff --git a/THIRD-PARTY-LICENSES.txt b/THIRD-PARTY-LICENSES.txt index 8090106c..134abfad 100644 --- a/THIRD-PARTY-LICENSES.txt +++ b/THIRD-PARTY-LICENSES.txt @@ -131,10 +131,10 @@ Lists of 417 third-party dependencies. (The Apache Software License, Version 2.0) docker-java-core (com.github.docker-java:docker-java-core:3.3.0 - https://github.com/docker-java/docker-java) (The Apache Software License, Version 2.0) docker-java-transport (com.github.docker-java:docker-java-transport:3.3.0 - https://github.com/docker-java/docker-java) (The Apache Software License, Version 2.0) docker-java-transport-httpclient5 (com.github.docker-java:docker-java-transport-httpclient5:3.3.0 - https://github.com/docker-java/docker-java) - (Apache Software License, Version 2.0) dockstore-common (io.dockstore:dockstore-common:1.15.0-rc.0 - no url defined) - (Apache Software License, Version 2.0) dockstore-integration-testing (io.dockstore:dockstore-integration-testing:1.15.0-rc.0 - no url defined) - (Apache Software License, Version 2.0) dockstore-language-plugin-parent (io.dockstore:dockstore-language-plugin-parent:1.15.0-rc.0 - no url defined) - (Apache Software License, Version 2.0) dockstore-webservice (io.dockstore:dockstore-webservice:1.15.0-rc.0 - no url defined) + (Apache Software License, Version 2.0) dockstore-common (io.dockstore:dockstore-common:1.15.0-rc.1 - no url defined) + (Apache Software License, Version 2.0) dockstore-integration-testing (io.dockstore:dockstore-integration-testing:1.15.0-rc.1 - no url defined) + (Apache Software License, Version 2.0) dockstore-language-plugin-parent (io.dockstore:dockstore-language-plugin-parent:1.15.0-rc.1 - no url defined) + (Apache Software License, Version 2.0) dockstore-webservice (io.dockstore:dockstore-webservice:1.15.0-rc.1 - no url defined) (Apache License 2.0) Dropwizard (io.dropwizard:dropwizard-core:4.0.2 - http://www.dropwizard.io/4.0.2/dropwizard-bom/dropwizard-dependencies/dropwizard-parent/dropwizard-core) (Apache License 2.0) Dropwizard Asset Bundle (io.dropwizard:dropwizard-assets:4.0.2 - http://www.dropwizard.io/4.0.2/dropwizard-bom/dropwizard-dependencies/dropwizard-parent/dropwizard-assets) (Apache License 2.0) Dropwizard Authentication (io.dropwizard:dropwizard-auth:4.0.2 - http://www.dropwizard.io/4.0.2/dropwizard-bom/dropwizard-dependencies/dropwizard-parent/dropwizard-auth) @@ -354,7 +354,7 @@ Lists of 417 third-party dependencies. (Apache License, Version 2.0) Objenesis (org.objenesis:objenesis:3.2 - http://objenesis.org/objenesis) (The Apache Software License, Version 2.0) okhttp (com.squareup.okhttp3:okhttp:4.10.0 - https://square.github.io/okhttp/) (The Apache Software License, Version 2.0) okio (com.squareup.okio:okio-jvm:3.0.0 - https://github.com/square/okio/) - (Apache Software License, Version 2.0) openapi-java-client (io.dockstore:openapi-java-client:1.15.0-rc.0 - no url defined) + (Apache Software License, Version 2.0) openapi-java-client (io.dockstore:openapi-java-client:1.15.0-rc.1 - no url defined) (The Apache License, Version 2.0) OpenCensus (io.opencensus:opencensus-api:0.31.0 - https://github.com/census-instrumentation/opencensus-java) (Apache 2) opencsv (com.opencsv:opencsv:5.7.1 - http://opencsv.sf.net) (Apache 2.0) optics (io.circe:circe-optics_2.13:0.14.1 - https://github.com/circe/circe-optics) @@ -395,7 +395,7 @@ Lists of 417 third-party dependencies. (Apache License 2.0) swagger-core-jakarta (io.swagger.core.v3:swagger-core-jakarta:2.2.15 - https://github.com/swagger-api/swagger-core/modules/swagger-core-jakarta) (Apache License 2.0) swagger-integration-jakarta (io.swagger.core.v3:swagger-integration-jakarta:2.2.15 - https://github.com/swagger-api/swagger-core/modules/swagger-integration-jakarta) (Apache Software License, Version 2.0) swagger-java-bitbucket-client (io.dockstore:swagger-java-bitbucket-client:2.0.3 - no url defined) - (Apache Software License, Version 2.0) swagger-java-client (io.dockstore:swagger-java-client:1.15.0-rc.0 - no url defined) + (Apache Software License, Version 2.0) swagger-java-client (io.dockstore:swagger-java-client:1.15.0-rc.1 - no url defined) (Apache Software License, Version 2.0) swagger-java-discourse-client (io.dockstore:swagger-java-discourse-client:2.0.1 - no url defined) (Apache Software License, Version 2.0) swagger-java-quay-client (io.dockstore:swagger-java-quay-client:2.0.2 - no url defined) (Apache Software License, Version 2.0) swagger-java-sam-client (io.dockstore:swagger-java-sam-client:2.0.2 - no url defined) diff --git a/metricsaggregator/pom.xml b/metricsaggregator/pom.xml index 362f0e83..09695036 100644 --- a/metricsaggregator/pom.xml +++ b/metricsaggregator/pom.xml @@ -156,6 +156,10 @@ org.apache.commons commons-csv + + com.google.guava + guava + org.junit.jupiter junit-jupiter-api @@ -292,6 +296,7 @@ org.javamoney.moneta:moneta-core ch.qos.logback:logback-classic ch.qos.logback:logback-core + com.google.guava:guava diff --git a/metricsaggregator/src/main/java/io/dockstore/metricsaggregator/client/cli/TerraMetricsSubmitter.java b/metricsaggregator/src/main/java/io/dockstore/metricsaggregator/client/cli/TerraMetricsSubmitter.java index 70dc2df4..24cabbc3 100644 --- a/metricsaggregator/src/main/java/io/dockstore/metricsaggregator/client/cli/TerraMetricsSubmitter.java +++ b/metricsaggregator/src/main/java/io/dockstore/metricsaggregator/client/cli/TerraMetricsSubmitter.java @@ -5,6 +5,8 @@ import static io.dockstore.utils.ExceptionHandler.exceptionMessage; import static java.util.stream.Collectors.groupingBy; +import com.google.common.collect.Lists; +import com.google.common.math.IntMath; import io.dockstore.common.Partner; import io.dockstore.metricsaggregator.MetricsAggregatorConfig; import io.dockstore.metricsaggregator.client.cli.CommandLineArgs.SubmitTerraMetrics; @@ -22,6 +24,7 @@ import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.math.RoundingMode; import java.nio.charset.StandardCharsets; import java.time.Instant; import java.time.LocalDateTime; @@ -44,6 +47,7 @@ import org.apache.commons.csv.CSVPrinter; import org.apache.commons.csv.CSVRecord; import org.apache.commons.lang3.StringUtils; +import org.apache.http.HttpStatus; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -167,21 +171,60 @@ private void submitWorkflowExecutions(String sourceUrl, List workflow } final SourceUrlTrsInfo sourceUrlTrsInfo = sourceUrlToSourceUrlTrsInfo.get(sourceUrl); - final List workflowExecutionsToSubmit = workflowMetricRecords.stream() + List workflowExecutionsToSubmit = workflowMetricRecords.stream() .map(workflowExecution -> getTerraWorkflowExecutionFromCsvRecord(workflowExecution, sourceUrlTrsInfo.sourceUrl(), skippedExecutionsCsvPrinter)) .filter(Optional::isPresent) .map(Optional::get) .toList(); - final ExecutionsRequestBody executionsRequestBody = new ExecutionsRequestBody().runExecutions(workflowExecutionsToSubmit); + String description = "Submitted using the metricsaggregator's submit-terra-metrics command"; + if (StringUtils.isNotBlank(submitTerraMetricsCommand.getDescription())) { + description += ". " + submitTerraMetricsCommand.getDescription(); + } + + executionMetricsPost(workflowExecutionsToSubmit, sourceUrlTrsInfo, description, extendedGa4GhApi, workflowMetricRecords, skippedExecutionsCsvPrinter); + } + + /** + * Submit Terra workflow executions to Dockstore. + * If the request fails with a 413 Request Entity Too Large and there are more than one execution to submit, the function halves the number of workflow executions to submit then re-attempts submission + * until it's successful or a non-413 error occurs. + * @param workflowExecutionsToSubmit + * @param sourceUrlTrsInfo + * @param description + * @param extendedGa4GhApi + * @param workflowMetricRecords + * @param skippedExecutionsCsvPrinter + */ + private void executionMetricsPost(List workflowExecutionsToSubmit, SourceUrlTrsInfo sourceUrlTrsInfo, String description, ExtendedGa4GhApi extendedGa4GhApi, List workflowMetricRecords, CSVPrinter skippedExecutionsCsvPrinter) { try { - String description = "Submitted using the metricsaggregator's submit-terra-metrics command"; - if (StringUtils.isNotBlank(submitTerraMetricsCommand.getDescription())) { - description += ". " + submitTerraMetricsCommand.getDescription(); - } - extendedGa4GhApi.executionMetricsPost(executionsRequestBody, Partner.TERRA.toString(), sourceUrlTrsInfo.trsId(), sourceUrlTrsInfo.version(), description); + extendedGa4GhApi.executionMetricsPost(new ExecutionsRequestBody().runExecutions(workflowExecutionsToSubmit), Partner.TERRA.toString(), sourceUrlTrsInfo.trsId(), + sourceUrlTrsInfo.version(), description); numberOfExecutionsSubmitted.addAndGet(workflowMetricRecords.size()); } catch (ApiException e) { - logSkippedExecutions(sourceUrlTrsInfo.sourceUrl(), workflowMetricRecords, String.format("Could not submit execution metrics to Dockstore for workflow %s: %s", sourceUrlTrsInfo, e.getMessage()), skippedExecutionsCsvPrinter, false); + if (e.getCode() == HttpStatus.SC_REQUEST_TOO_LONG) { + // One execution is too large, not much that can be done, so log and skip it + if (workflowExecutionsToSubmit.size() == 1) { + logSkippedExecutions(sourceUrlTrsInfo.sourceUrl(), workflowMetricRecords, + String.format("Could not submit execution metric to Dockstore for workflow %s. Single execution is too large: %s", sourceUrlTrsInfo, + e.getMessage()), skippedExecutionsCsvPrinter, false); + } else { + int partitionSize = IntMath.divide(workflowExecutionsToSubmit.size(), 2, RoundingMode.UP); + List> workflowExecutionsToSubmitPartitions = Lists.partition(workflowExecutionsToSubmit, + partitionSize); + LOG.info( + "Request body too large, dividing list of {} workflow executions in half with partition size {} and re-attempting", + workflowExecutionsToSubmit.size(), partitionSize); + for (List partition : workflowExecutionsToSubmitPartitions) { + LOG.info("Re-attempting with {} workflow executions", partition.size()); + executionMetricsPost(partition, sourceUrlTrsInfo, description, extendedGa4GhApi, workflowMetricRecords, + skippedExecutionsCsvPrinter); + } + } + } else { + logSkippedExecutions(sourceUrlTrsInfo.sourceUrl(), workflowMetricRecords, + String.format("Could not submit execution metrics to Dockstore for workflow %s: %s", sourceUrlTrsInfo, + e.getMessage()), skippedExecutionsCsvPrinter, false); + } } } diff --git a/pom.xml b/pom.xml index 79e132cd..9240d322 100644 --- a/pom.xml +++ b/pom.xml @@ -38,7 +38,7 @@ scm:git:git@github.com:dockstore/dockstore-support.git UTF-8 - 1.15.0-rc.0 + 1.15.0-rc.1 3.0.0-M5 2.22.2 false