From c406d890c14338f30248b532f67ab675b31e15b0 Mon Sep 17 00:00:00 2001 From: jsotobroad Date: Fri, 22 Nov 2024 13:18:38 -0500 Subject: [PATCH] TSPS-174 renaming non cosmetic things from tsps to teaspoons (#93) Co-authored-by: Jose Soto --- .github/actions/create-bee/action.yml | 2 +- .github/workflows/build-and-test.yml | 2 +- .github/workflows/run-e2e-tests.yaml | 6 +- .github/workflows/tag-publish.yml | 2 +- .github/workflows/workflow-tester.yml | 2 +- README.md | 14 +- scripts/write-config.sh | 2 +- service/build.gradle | 2 +- .../dependencies/stairway/JobService.java | 6 +- .../service/PipelineRunsService.java | 2 +- service/src/main/resources/application.yml | 4 +- service/src/main/resources/db/changelog.xml | 35 +- .../resources/db/changesets/20221025.yaml | 73 ---- .../resources/db/changesets/20221128.yaml | 52 --- .../resources/db/changesets/20230711.yaml | 25 -- .../resources/db/changesets/20240104.yaml | 23 -- .../resources/db/changesets/20240202.yaml | 73 ---- ...12_add_pipeline_input_defs_and_beagle.yaml | 99 ----- ...0240417_add_workspace_id_to_pipelines.yaml | 13 - .../db/changesets/20240429_add_vcf_type.yaml | 14 - .../20240502_add_imputation_inputs.yaml | 111 ------ ...40520_add_outputbasename_input_beagle.yaml | 27 -- ...9_convert_jobs_table_to_pipeline_runs.yaml | 84 ---- ...10_change_beagle_reference_path_input.yaml | 17 - ...control_workspace_id_to_pipeline_runs.yaml | 12 - ...dl_variable_name_to_input_definitions.yaml | 71 ---- ...orkspace_storage_url_to_pipeline_runs.yaml | 12 - ...40711_add_pipeline_output_definitions.yaml | 112 ------ ...x_to_input_defs_and_update_FILE_types.yaml | 31 -- ..._workspace_project_and_workspace_name.yaml | 26 -- ...0_add_workspace_storage_container_url.yaml | 12 - ...d_wdl_version_update_pipeline_version.yaml | 23 -- ...roject_rename_storage_container_field.yaml | 26 -- .../20240903_rename_workspace_project.yaml | 14 - ...40906_add_wdl_version_to_pipeline_run.yaml | 12 - .../changesets/20240911_remove_isSuccess.yaml | 11 - ...240923_update_input_defs_bucket_paths.yaml | 30 -- ...0240925_update_ref_panel_prefix_input.yaml | 14 - .../20241030_add_pipeline_quotas.yaml | 38 -- .../changesets/20241103_add_user_quotas.yaml | 51 --- .../db/changesets/20241106-add-indexes.yaml | 34 ++ .../20241106-base-data-insertion.yaml | 239 ++++++++++++ .../db/changesets/20241106-base-tables.yaml | 366 ++++++++++++++++++ .../{20240412-testdata.yaml => testdata.yaml} | 10 +- .../terra/pipelines/testutils/TestUtils.java | 4 +- 45 files changed, 672 insertions(+), 1166 deletions(-) delete mode 100644 service/src/main/resources/db/changesets/20221025.yaml delete mode 100644 service/src/main/resources/db/changesets/20221128.yaml delete mode 100644 service/src/main/resources/db/changesets/20230711.yaml delete mode 100644 service/src/main/resources/db/changesets/20240104.yaml delete mode 100644 service/src/main/resources/db/changesets/20240202.yaml delete mode 100644 service/src/main/resources/db/changesets/20240412_add_pipeline_input_defs_and_beagle.yaml delete mode 100644 service/src/main/resources/db/changesets/20240417_add_workspace_id_to_pipelines.yaml delete mode 100644 service/src/main/resources/db/changesets/20240429_add_vcf_type.yaml delete mode 100644 service/src/main/resources/db/changesets/20240502_add_imputation_inputs.yaml delete mode 100644 service/src/main/resources/db/changesets/20240520_add_outputbasename_input_beagle.yaml delete mode 100644 service/src/main/resources/db/changesets/20240529_convert_jobs_table_to_pipeline_runs.yaml delete mode 100644 service/src/main/resources/db/changesets/20240610_change_beagle_reference_path_input.yaml delete mode 100644 service/src/main/resources/db/changesets/20240611_add_control_workspace_id_to_pipeline_runs.yaml delete mode 100644 service/src/main/resources/db/changesets/20240621_add_wdl_variable_name_to_input_definitions.yaml delete mode 100644 service/src/main/resources/db/changesets/20240625_add_workspace_storage_url_to_pipeline_runs.yaml delete mode 100644 service/src/main/resources/db/changesets/20240711_add_pipeline_output_definitions.yaml delete mode 100644 service/src/main/resources/db/changesets/20240716_add_file_suffix_to_input_defs_and_update_FILE_types.yaml delete mode 100644 service/src/main/resources/db/changesets/20240806_add_workspace_project_and_workspace_name.yaml delete mode 100644 service/src/main/resources/db/changesets/20240820_add_workspace_storage_container_url.yaml delete mode 100644 service/src/main/resources/db/changesets/20240824_add_wdl_version_update_pipeline_version.yaml delete mode 100644 service/src/main/resources/db/changesets/20240830_add_workspace_google_project_rename_storage_container_field.yaml delete mode 100644 service/src/main/resources/db/changesets/20240903_rename_workspace_project.yaml delete mode 100644 service/src/main/resources/db/changesets/20240906_add_wdl_version_to_pipeline_run.yaml delete mode 100644 service/src/main/resources/db/changesets/20240911_remove_isSuccess.yaml delete mode 100644 service/src/main/resources/db/changesets/20240923_update_input_defs_bucket_paths.yaml delete mode 100644 service/src/main/resources/db/changesets/20240925_update_ref_panel_prefix_input.yaml delete mode 100644 service/src/main/resources/db/changesets/20241030_add_pipeline_quotas.yaml delete mode 100644 service/src/main/resources/db/changesets/20241103_add_user_quotas.yaml create mode 100644 service/src/main/resources/db/changesets/20241106-add-indexes.yaml create mode 100644 service/src/main/resources/db/changesets/20241106-base-data-insertion.yaml create mode 100644 service/src/main/resources/db/changesets/20241106-base-tables.yaml rename service/src/main/resources/db/changesets/{20240412-testdata.yaml => testdata.yaml} (75%) diff --git a/.github/actions/create-bee/action.yml b/.github/actions/create-bee/action.yml index d79abb6a..5843c54a 100644 --- a/.github/actions/create-bee/action.yml +++ b/.github/actions/create-bee/action.yml @@ -14,7 +14,7 @@ inputs: required: false type: string custom_version_json: - description: 'json containing custom versions to push. e.g. {"tsps":{"appVersion":"0.0.81-66ceced"}}' + description: 'json containing custom versions to push. e.g. {"teaspoons":{"appVersion":"0.0.81-66ceced"}}' required: false type: string diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index fd62cd72..7aeee8e1 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -136,7 +136,7 @@ jobs: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} with: status: failure - channel: "#terra-tsps-alerts" + channel: "#terra-teaspoons-alerts" username: "Teaspoons push to main branch" author_name: "build-and-test" icon_emoji: ":triangular_ruler:" diff --git a/.github/workflows/run-e2e-tests.yaml b/.github/workflows/run-e2e-tests.yaml index 50e0cbaa..525530ca 100644 --- a/.github/workflows/run-e2e-tests.yaml +++ b/.github/workflows/run-e2e-tests.yaml @@ -42,7 +42,7 @@ jobs: if [ -z ${{ inputs.custom-app-version }} ]; then echo "custom-app-version-formatted={}" >> $GITHUB_OUTPUT else - echo "custom-app-version-formatted={\\\"tsps\\\": {\\\"appVersion\\\":\\\"${{ inputs.custom-app-version }}\\\"} }" >> $GITHUB_OUTPUT + echo "custom-app-version-formatted={\\\"teaspoons\\\": {\\\"appVersion\\\":\\\"${{ inputs.custom-app-version }}\\\"} }" >> $GITHUB_OUTPUT fi project_name=$(echo "tmp-billing-project-$(uuidgen)" | cut -c -30) echo "project_name=${project_name}" >> $GITHUB_OUTPUT @@ -89,7 +89,7 @@ jobs: permissions: contents: read id-token: write - uses: broadinstitute/dsp-reusable-workflows/.github/workflows/run_tsps_e2e_tests.yaml@main + uses: broadinstitute/dsp-reusable-workflows/.github/workflows/run_teaspoons_e2e_tests.yaml@main with: billing-project-name: '${{ needs.init-github-context-and-params-gen.outputs.project-name }}' bee-name: '${{ needs.init-github-context-and-params-gen.outputs.bee-name }}' @@ -116,6 +116,6 @@ jobs: uses: broadinstitute/sherlock/.github/workflows/client-report-workflow.yaml@main if: github.ref == 'refs/heads/main' with: - notify-slack-channels-upon-workflow-completion: "#terra-tsps-alerts" + notify-slack-channels-upon-workflow-completion: "#terra-teaspoons-alerts" permissions: id-token: write diff --git a/.github/workflows/tag-publish.yml b/.github/workflows/tag-publish.yml index 870d2354..e52f2ac1 100644 --- a/.github/workflows/tag-publish.yml +++ b/.github/workflows/tag-publish.yml @@ -216,7 +216,7 @@ jobs: env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} with: - channel: '#terra-tsps-alerts' + channel: '#terra-teaspoons-alerts' status: failure author_name: Publish image fields: job diff --git a/.github/workflows/workflow-tester.yml b/.github/workflows/workflow-tester.yml index 7a947fd9..f6b42ff5 100644 --- a/.github/workflows/workflow-tester.yml +++ b/.github/workflows/workflow-tester.yml @@ -100,7 +100,7 @@ jobs: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} with: status: ${{ job.status }} - channel: "#terra-tsps-alerts" + channel: "#terra-teaspoons-alerts" username: "Teaspoons push to main branch" author_name: "build-and-test" icon_emoji: ":triangular_ruler:" diff --git a/README.md b/README.md index 13eadf58..f6d98f2a 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ ## Overview -Terra Scientific Pipelines Service, or teaspoons, facilitates running a number of defined scientific pipelines +Terra Scientific Pipelines Service, or Teaspoons, facilitates running a number of defined scientific pipelines on behalf of users that users can't run themselves in Terra. The most common reason for this is that the pipeline accesses proprietary data that users are not allowed to access directly, but that may be used as e.g. a reference panel for imputation. @@ -61,7 +61,7 @@ To run locally: If using Intellij (only IDE we use on the team), you can run the server with a debugger. Follow the steps above but instead of running `./gradlew bootRun` to spin up the server, you can run (debug) the App.java class through intellij and set breakpoints in the code. Be sure to set the -GOOGLE_APPLICATION_CREDENTIALS=config/tsps-sa.json in the Run/Debug configuration Environment Variables. +GOOGLE_APPLICATION_CREDENTIALS=config/teaspoons-sa.json in the Run/Debug configuration Environment Variables. ### Testing the CLI locally If you make changes to [openapi.yml](common/openapi.yml), you should test the CLI locally. @@ -98,7 +98,7 @@ SonarQube and want to debug the problem locally, you need to get the sonar token before running the gradle task. ```shell -export SONAR_TOKEN=$(vault read -field=sonar_token secret/secops/ci/sonarcloud/tsps) +export SONAR_TOKEN=$(vault read -field=sonar_token secret/secops/ci/sonarcloud/teaspoons) ./gradlew sonarqube ``` @@ -116,7 +116,7 @@ does all the setup for you. Clone that repo and make sure you're either on Broad to the VPN. Then run the following command: ```shell -./db/psql-connect.sh dev tsps +./db/psql-connect.sh dev teaspoons ``` ### Deploying to dev @@ -125,8 +125,8 @@ Upon merging to main, the dev environment will be automatically deployed via the (that workflow is defined [here](https://github.com/DataBiosphere/terra-scientific-pipelines-service/blob/main/.github/workflows/tag-publish.yml)). The two tasks `report-to-sherlock` and `set-version-in-dev` will prompt Sherlock to deploy the new version to dev. -You can check the status of the deployment in [Beehive](https://beehive.dsp-devops.broadinstitute.org/apps/tsps) and in -[ArgoCD](https://ap-argocd.dsp-devops.broadinstitute.org/applications/ap-argocd/tsps-dev). +You can check the status of the deployment in [Beehive](https://beehive.dsp-devops.broadinstitute.org/apps/teaspoons) and in +[ArgoCD](https://ap-argocd.dsp-devops.broadinstitute.org/applications/ap-argocd/teaspoons-dev). For more information about deployment to dev, check out DevOps' [excellent documentation](https://docs.google.com/document/d/1lkUkN2KOpHKWufaqw_RIE7EN3vN4G2xMnYBU83gi8VA/). @@ -139,7 +139,7 @@ See [this DSP blog post](https://broadworkbench.atlassian.net/wiki/x/AoGlrg) for ### Running the end-to-end tests The end-to-end test is specified in `.github/workflows/run-e2e-tests.yaml`. It calls [the test script defined -in the dsp-reusable-workflows repo](https://github.com/broadinstitute/dsp-reusable-workflows/blob/main/e2e-test/tsps_e2e_test.py). +in the dsp-reusable-workflows repo](https://github.com/broadinstitute/dsp-reusable-workflows/blob/main/e2e-test/teaspoons_gcp_e2e_test.py). The end-to-end test is automatically run nightly on the dev environment. diff --git a/scripts/write-config.sh b/scripts/write-config.sh index 31b08b36..1e62ec24 100755 --- a/scripts/write-config.sh +++ b/scripts/write-config.sh @@ -181,7 +181,7 @@ function vaultgetdb { } # grab teaspoons service account json from vault -vaultget "secret/dsde/firecloud/${fcenv}/tsps/tsps-account.json" "${outputdir}/tsps-sa.json" +vaultget "secret/dsde/firecloud/${fcenv}/teaspoons/teaspoons-account.json" "${outputdir}/teaspoons-sa.json" # We made it to the end, so record the target and avoid redos echo "$target" > "${outputdir}/target.txt" diff --git a/service/build.gradle b/service/build.gradle index 05369a65..47ad118c 100644 --- a/service/build.gradle +++ b/service/build.gradle @@ -119,7 +119,7 @@ dependencies { // set GOOGLE_APPLICATION_CREDENTIALS if this file exists - should only exist when // write-config.sh is run. // GOOGLE_APPLICATION_CREDENTIALS is set for us when running in a deployed environment -def googleCredentialsFile = "${rootDir}/config/tsps-sa.json" +def googleCredentialsFile = "${rootDir}/config/teaspoons-sa.json" bootRun { if(project.file(googleCredentialsFile).exists()) { environment.put("GOOGLE_APPLICATION_CREDENTIALS", "${googleCredentialsFile}") diff --git a/service/src/main/java/bio/terra/pipelines/dependencies/stairway/JobService.java b/service/src/main/java/bio/terra/pipelines/dependencies/stairway/JobService.java index f2639dcb..76ca289a 100644 --- a/service/src/main/java/bio/terra/pipelines/dependencies/stairway/JobService.java +++ b/service/src/main/java/bio/terra/pipelines/dependencies/stairway/JobService.java @@ -156,8 +156,7 @@ public JobResultOrException retrieveJobResult( switch (flightState.getFlightStatus()) { case FATAL: - logAlert( - "Teaspoons Stairway flight {} encountered dismal failure", flightState.getFlightId()); + logAlert("Stairway flight {} encountered dismal failure", flightState.getFlightId()); return handleFailedFlight(flightState); case ERROR: return handleFailedFlight(flightState); @@ -264,8 +263,7 @@ private JobResultOrException handleFailedFlight(FlightState flightState) .exception(new InternalServerErrorException("wrap non-runtime exception", exception)); } } - logAlert( - "Teaspoons Stairway flight {} failed with no exception given", flightState.getFlightId()); + logAlert("Stairway flight {} failed with no exception given", flightState.getFlightId()); throw new InvalidResultStateException("Failed operation with no exception reported."); } diff --git a/service/src/main/java/bio/terra/pipelines/service/PipelineRunsService.java b/service/src/main/java/bio/terra/pipelines/service/PipelineRunsService.java index a10f8081..d7345371 100644 --- a/service/src/main/java/bio/terra/pipelines/service/PipelineRunsService.java +++ b/service/src/main/java/bio/terra/pipelines/service/PipelineRunsService.java @@ -231,7 +231,7 @@ protected PipelineRun writePipelineRunToDbThrowsDuplicateException(PipelineRun p logger.info("pipelineRun saved for jobId: {}", pipelineRun.getJobId()); } catch (DataIntegrityViolationException e) { if (e.getCause() instanceof ConstraintViolationException c - && c.getConstraintName().contains("jobId_unique")) { + && c.getConstraintName().contains("pipeline_runs_jobId_unique")) { throw new DuplicateObjectException( String.format("Duplicate jobId %s found", pipelineRun.getJobId())); } diff --git a/service/src/main/resources/application.yml b/service/src/main/resources/application.yml index 970c593f..f4f33ae4 100644 --- a/service/src/main/resources/application.yml +++ b/service/src/main/resources/application.yml @@ -25,7 +25,7 @@ env: authorityEndpoint: ${OIDC_AUTHORITY_ENDPOINT:https://terradevb2c.b2clogin.com/terradevb2c.onmicrosoft.com/oauth2/v2.0/authorize?p=b2c_1a_signup_signin_dev} tokenEndpoint: ${OIDC_TOKEN_ENDPOINT:https://terradevb2c.b2clogin.com/terradevb2c.onmicrosoft.com/oauth2/v2.0/token?p=b2c_1a_signup_signin_dev} ingress: - domainName: ${TSPS_INGRESS_DOMAIN_NAME:localhost:8080} + domainName: ${TEASPOONS_INGRESS_DOMAIN_NAME:localhost:8080} kubernetes: in-kubernetes: ${TERRA_COMMON_KUBERNETES_IN_KUBERNETES:false} pod-name: ${TERRA_COMMON_KUBERNETES_POD_NAME:} @@ -162,7 +162,7 @@ terra.common: # these values are used by TCL stairway: - cluster-name-suffix: tsps-stairway + cluster-name-suffix: teaspoons-stairway force-clean-start: false # ${env.db.init} max-parallel-flights: 50 migrate-upgrade: true diff --git a/service/src/main/resources/db/changelog.xml b/service/src/main/resources/db/changelog.xml index 2a074231..6ee000ee 100644 --- a/service/src/main/resources/db/changelog.xml +++ b/service/src/main/resources/db/changelog.xml @@ -3,37 +3,10 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + - + diff --git a/service/src/main/resources/db/changesets/20221025.yaml b/service/src/main/resources/db/changesets/20221025.yaml deleted file mode 100644 index 225b4caf..00000000 --- a/service/src/main/resources/db/changesets/20221025.yaml +++ /dev/null @@ -1,73 +0,0 @@ -# Draft database setup to start -databaseChangeLog: - - changeSet: - id: draft initial schema - author: ma - changes: - - createTable: - tableName: pipelines - remarks: | - There is one row in the pipelines table for type of pipeline the service runs. - columns: - - column: - name: id - type: serial - constraints: - primaryKey: true - nullable: false - - column: - name: pipeline_id - type: text - constraints: - nullable: false - - column: - name: version - type: text - constraints: - nullable: false - - column: - name: display_name - type: text - remarks: | - A user-friendly name for the pipeline. - constraints: - nullable: false - - column: - name: description - type: text - remarks: | - Free-form text describing the pipeline. Can be null. - - addUniqueConstraint: - tableName: pipelines - constraintName: pipelines_pid_version_uk - columnNames: pipeline_id, version - - insert: - columns: - - column: - name: pipeline_id - value: calculate_file_size - - column: - name: version - value: 1.0.0 - - column: - name: display_name - value: Calculate File Size - - column: - name: description - value: Toy pipeline for development. Calculates and returns the size of an input file. - tableName: pipelines - - insert: - columns: - - column: - name: pipeline_id - value: imputation - - column: - name: version - value: 1.0.0 - - column: - name: display_name - value: Imputation - - column: - name: description - value: Run Imputation on data. - tableName: pipelines diff --git a/service/src/main/resources/db/changesets/20221128.yaml b/service/src/main/resources/db/changesets/20221128.yaml deleted file mode 100644 index b89f5016..00000000 --- a/service/src/main/resources/db/changesets/20221128.yaml +++ /dev/null @@ -1,52 +0,0 @@ -# Add jobs table -databaseChangeLog: - - changeSet: - id: add jobs table - author: ma - changes: - - createTable: - tableName: jobs - remarks: | - There is one row in the jobs table per job submitted to the service. - columns: - - column: - name: id - type: serial - constraints: - primaryKeY: true - nullable: false - - column: - name: job_id - type: uuid - constraints: - unique: true - uniqueConstraintName: jobs_jobId_unique - nullable: false - - column: - name: user_id - type: text - constraints: - nullable: false - - column: - name: pipeline_id - type: text - constraints: - nullable: false - - column: - name: pipeline_version - type: text - constraints: - nullable: false - - column: - name: time_submitted - type: datetime - constraints: - nullable: false - - column: - name: time_completed - type: datetime - - column: - name: status - type: text - constraints: - nullable: false diff --git a/service/src/main/resources/db/changesets/20230711.yaml b/service/src/main/resources/db/changesets/20230711.yaml deleted file mode 100644 index f9a08ab6..00000000 --- a/service/src/main/resources/db/changesets/20230711.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# Add pipeline_inputs table -databaseChangeLog: - - changeSet: - id: add pipeline_inputs table - author: js - changes: - - createTable: - tableName: pipeline_inputs - remarks: | - There is one row in the pipeline_inputs table per job submitted to the service. - columns: - - column: - name: job_id - type: int - constraints: - nullable: false - primaryKey: true - foreignKeyName: pipeline_inputs_job_id_FK - referencedTableName: jobs - referencedColumnNames: id - - column: - name: inputs - type: text - constraints: - nullable: false diff --git a/service/src/main/resources/db/changesets/20240104.yaml b/service/src/main/resources/db/changesets/20240104.yaml deleted file mode 100644 index 263a1365..00000000 --- a/service/src/main/resources/db/changesets/20240104.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Change Jobs table to ImputationJobs table -databaseChangeLog: - - changeSet: - id: change jobs to imputation_jobs, remove fields that Stairway tracks, and remove toy pipeline from Pipelines table - author: mma - changes: - - renameTable: - oldTableName: jobs - newTableName: imputation_jobs - - dropColumn: - tableName: imputation_jobs - columns: - - column: - name: pipeline_id - - column: - name: time_submitted - - column: - name: time_completed - - column: - name: status - - delete: - tableName: pipelines - where: pipeline_id='calculate_file_size' diff --git a/service/src/main/resources/db/changesets/20240202.yaml b/service/src/main/resources/db/changesets/20240202.yaml deleted file mode 100644 index 79f9df07..00000000 --- a/service/src/main/resources/db/changesets/20240202.yaml +++ /dev/null @@ -1,73 +0,0 @@ -# change pipeline_id column to pipeline_name in pipelines table -# add pipeline_type, wdl_url, wdl_method_name columns to pipeline table and update pipeline_anme value for imputation pipeline -databaseChangeLog: - - changeSet: - id: update pipelines table with new columns - author: js - changes: - # rename pipeline_id column to name - - renameColumn: - columnDataType: text - newColumnName: name - oldColumnName: pipeline_id - tableName: pipelines - # add columns to pipelines table, set values for the only pipeline that currently exists, then add not null constraint - - addColumn: - tableName: pipelines - columns: - - column: - name: pipeline_type - type: text - - column: - name: wdl_url - type: text - - column: - name: wdl_method_name - type: text - - update: - columns: - - column: - name: name - value: imputation_minimac4 - - column: - name: pipeline_type - value: imputation - - column: - name: wdl_url - value: https://github.com/broadinstitute/warp/blob/js_try_imputation_azure/pipelines/broad/arrays/imputation/hello_world_no_file_input.wdl - - column: - name: wdl_method_name - value: imputation_minimac4 - tableName: pipelines - where: name='imputation' - - addNotNullConstraint: - columnDataType: text - columnName: pipeline_type - constraintName: not_null_pipelines_pipeline_type - tableName: pipelines - validate: true - # delete rows from imputation_jobs table so that we can add foreign key constraint to pipeline_name column - # since pipeline_inputs has a FK on imputation_jobs, we have to delete those rows as well - # ONLY DOING THIS CUZ ITS BEFORE WE'VE DEPLOYED PAST DEV - - delete: - tableName: pipeline_inputs - - delete: - tableName: imputation_jobs - # add pipeline_id column to imputation_jobs table with a FK to the pipelines table - - addColumn: - tableName: imputation_jobs - columns: - - column: - name: pipeline_id - type: int - constraints: - nullable: false - foreignKeyName: imputation_jobs_pipelines_id_FK - referencedTableName: pipelines - referencedColumnNames: id - # drop unnecessary columns in imputation_jobs table that already exist in the pipelines table - - dropColumn: - tableName: imputation_jobs - columns: - - column: - name: pipeline_version diff --git a/service/src/main/resources/db/changesets/20240412_add_pipeline_input_defs_and_beagle.yaml b/service/src/main/resources/db/changesets/20240412_add_pipeline_input_defs_and_beagle.yaml deleted file mode 100644 index 517575a6..00000000 --- a/service/src/main/resources/db/changesets/20240412_add_pipeline_input_defs_and_beagle.yaml +++ /dev/null @@ -1,99 +0,0 @@ -# Add pipeline_inputs_definitions table, populate for beagle, remove minimac -databaseChangeLog: - - changeSet: - id: add pipeline_inputs_definitions table, add beagle data, remove minimac - author: mma - changes: - # remove minimac inputs from pipeline_inputs - - delete: - tableName: pipeline_inputs - where: job_id IN (SELECT imputation_jobs.id FROM imputation_jobs JOIN pipelines ON imputation_jobs.pipeline_id=pipelines.id WHERE pipelines.name='imputation_minimac4') - # remove minimac jobs from imputation_jobs - - delete: - tableName: imputation_jobs - where: pipeline_id=(SELECT id FROM pipelines WHERE name='imputation_minimac4') - # rename imputation_jobs to jobs - - renameTable: - oldTableName: imputation_jobs - newTableName: jobs - # remove minimac from pipelines table and add beagle - - delete: - tableName: pipelines - where: name='imputation_minimac4' - - insert: - tableName: pipelines - columns: - - column: - name: name - value: imputation_beagle - - column: - name: version - value: "1" - - column: - name: display_name - value: Beagle Imputation - - column: - name: pipeline_type - value: imputation - - column: - name: description - value: Impute missing genotypes using Beagle - - column: - name: wdl_url - value: https://github.com/broadinstitute/warp/blob/TSPS-183_mma_beagle_imputation_hg38/pipelines/broad/arrays/imputation_beagle/ImputationBeagle.wdl - - column: - name: wdl_method_name - value: ImputationBeagle - # add pipeline_inputs_definitions table - - createTable: - tableName: pipeline_input_definitions - remarks: | - There is one row in the pipeline_input_definitions table per input for the designated pipeline. - columns: - - column: - name: id - type: serial - constraints: - primaryKey: true - nullable: false - - column: - name: pipeline_id - type: int - constraints: - nullable: false - primaryKey: true - foreignKeyName: pipeline_input_defs_pipeline_id_FK - referencedTableName: pipelines - referencedColumnNames: id - - column: - name: name - type: text - constraints: - nullable: false - - column: - name: type - type: text - constraints: - nullable: false - - column: - name: is_required - type: boolean - constraints: - nullable: false - # add beagle inputs to pipeline_input_definitions table - - insert: - tableName: pipeline_input_definitions - columns: - - column: - name: pipeline_id - valueComputed: (SELECT id FROM pipelines WHERE name='imputation_beagle') - - column: - name: name - value: multi_sample_vcf - # for now this is the path for one file - - column: - name: type - value: String - - column: - name: is_required - value: true diff --git a/service/src/main/resources/db/changesets/20240417_add_workspace_id_to_pipelines.yaml b/service/src/main/resources/db/changesets/20240417_add_workspace_id_to_pipelines.yaml deleted file mode 100644 index e837e009..00000000 --- a/service/src/main/resources/db/changesets/20240417_add_workspace_id_to_pipelines.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# add workspace_id column to pipelines table to power the new admin endpoint -databaseChangeLog: - - changeSet: - id: add workspace_id column to pipelines table - author: js - changes: - # add workspace_id column to pipelines table - - addColumn: - tableName: pipelines - columns: - - column: - name: workspace_id - type: uuid diff --git a/service/src/main/resources/db/changesets/20240429_add_vcf_type.yaml b/service/src/main/resources/db/changesets/20240429_add_vcf_type.yaml deleted file mode 100644 index cc095fca..00000000 --- a/service/src/main/resources/db/changesets/20240429_add_vcf_type.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Change multi_sample_vcf input type from string to vcf -databaseChangeLog: - - changeSet: - id: change multi_sample_vcf input type from string to vcf - author: mma - changes: - # update multi_sample_vcf input type to vcf - - update: - tableName: pipeline_input_definitions - where: name='multi_sample_vcf' AND pipeline_id=(SELECT id FROM pipelines WHERE name='imputation_beagle') - columns: - - column: - name: type - value: VCF diff --git a/service/src/main/resources/db/changesets/20240502_add_imputation_inputs.yaml b/service/src/main/resources/db/changesets/20240502_add_imputation_inputs.yaml deleted file mode 100644 index 4427678f..00000000 --- a/service/src/main/resources/db/changesets/20240502_add_imputation_inputs.yaml +++ /dev/null @@ -1,111 +0,0 @@ -# add non-user-provided inputs to the imputation pipeline -databaseChangeLog: - - changeSet: - id: add non-user-provided inputs to the imputation pipeline - author: mma - changes: - # add user_provided and default_value columns to the pipeline_input_definitions table - - addColumn: - tableName: pipeline_input_definitions - columns: - - column: - name: user_provided - type: boolean - defaultValueBoolean: true - constraints: - nullable: false - - column: - name: default_value - type: text - constraints: - nullable: true - # ensure we can't add two inputs with the same name to the same pipeline - - addUniqueConstraint: - - columnNames: pipeline_id, name - - constraintName: unique_pipeline_input_name - - tableName: pipeline_input_definitions - # add non-user-provided input definitions for beagle imputation - - insert: - tableName: pipeline_input_definitions - columns: - - column: - name: pipeline_id - valueComputed: (SELECT id FROM pipelines WHERE name='imputation_beagle') - - column: - name: name - value: contigs - - column: - name: type - value: STRING_ARRAY - - column: - name: is_required - value: true - - column: - name: user_provided - value: false - - column: - name: default_value - value: '["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22"]' - - insert: - tableName: pipeline_input_definitions - columns: - - column: - name: pipeline_id - valueComputed: (SELECT id FROM pipelines WHERE name='imputation_beagle') - - column: - name: name - value: genetic_maps_path - - column: - name: type - value: STRING - - column: - name: is_required - value: true - - column: - name: user_provided - value: false - - column: - name: default_value - value: "/plink-genetic-maps/GRCh38_fixed/" - - insert: - tableName: pipeline_input_definitions - columns: - - column: - name: pipeline_id - valueComputed: (SELECT id FROM pipelines WHERE name='imputation_beagle') - - column: - name: name - value: ref_dict - - column: - name: type - value: STRING - - column: - name: is_required - value: true - - column: - name: user_provided - value: false - - column: - name: default_value - value: "/hg38/Homo_sapiens_assembly38.dict" - - insert: - tableName: pipeline_input_definitions - columns: - - column: - name: pipeline_id - valueComputed: (SELECT id FROM pipelines WHERE name='imputation_beagle') - - column: - name: name - value: reference_panel_path - - column: - name: type - value: STRING - - column: - name: is_required - value: true - - column: - name: user_provided - value: false - - column: - name: default_value - value: "/hg38/1000G_HGDP_no_singletons_ref_panel/" diff --git a/service/src/main/resources/db/changesets/20240520_add_outputbasename_input_beagle.yaml b/service/src/main/resources/db/changesets/20240520_add_outputbasename_input_beagle.yaml deleted file mode 100644 index 81a27363..00000000 --- a/service/src/main/resources/db/changesets/20240520_add_outputbasename_input_beagle.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# add output_basename as an input to the beagle imputation pipeline -databaseChangeLog: - - changeSet: - id: add output_basename as an input to the beagle imputation pipeline - author: mma - changes: - - insert: - tableName: pipeline_input_definitions - columns: - - column: - name: pipeline_id - valueComputed: (SELECT id FROM pipelines WHERE name='imputation_beagle') - - column: - name: name - value: output_basename - - column: - name: type - value: STRING - - column: - name: is_required - value: true - - column: - name: user_provided - value: true - - column: - name: default_value - value: null diff --git a/service/src/main/resources/db/changesets/20240529_convert_jobs_table_to_pipeline_runs.yaml b/service/src/main/resources/db/changesets/20240529_convert_jobs_table_to_pipeline_runs.yaml deleted file mode 100644 index e57eea1d..00000000 --- a/service/src/main/resources/db/changesets/20240529_convert_jobs_table_to_pipeline_runs.yaml +++ /dev/null @@ -1,84 +0,0 @@ -# Convert the jobs table to pipeline_runs table and add fields -databaseChangeLog: - - changeSet: - id: Convert the jobs table to pipeline_runs table and add fields - author: mma - changes: - - renameTable: - oldTableName: jobs - newTableName: pipeline_runs - # add created, updated, completed, status, description, result_url, and is_success columns to pipeline_runs table - - addColumn: - tableName: pipeline_runs - columns: - - column: - name: created - type: timestamp with time zone - defaultValueComputed: NOW() - constraints: - nullable: false - - addColumn: - tableName: pipeline_runs - columns: - - column: - name: updated - type: timestamp with time zone - defaultValueComputed: NOW() - constraints: - nullable: false - - addColumn: - tableName: pipeline_runs - columns: - - column: - name: status - type: text - - addColumn: - tableName: pipeline_runs - columns: - - column: - name: description - type: text - - addColumn: - tableName: pipeline_runs - columns: - - column: - name: result_url - type: text - - addColumn: - tableName: pipeline_runs - columns: - - column: - name: is_success - type: boolean - - addColumn: - tableName: pipeline_runs - columns: - - column: - name: output - type: text - # create the "set updated timestamp to now()" function to be used in a trigger - # note that the liquibase `createFunction` change type requires a pro license, - # so we use the `sql` changeset here - - sql: - dbms: 'postgresql' - splitStatements: false - sql: > - create function public.pipeline_run_updated_to_now() - returns trigger as $$ - BEGIN - NEW.updated = now(); - return NEW; - END; - $$ language 'plpgsql'; - rollback: drop function if exists public.pipeline_run_updated_to_now; - # create a trigger on the `job` table to set `updated` col to now() on updates - # note that the liquibase `createTrigger` change type requires a pro license, - # so we use the `sql` changeset here - - sql: - dbms: 'postgresql' - sql: > - create trigger pipeline_run_updated - before update on public.pipeline_runs - for each row - execute procedure public.pipeline_run_updated_to_now(); - rollback: drop trigger if exists pipeline_run_updated on public.pipeline_runs; diff --git a/service/src/main/resources/db/changesets/20240610_change_beagle_reference_path_input.yaml b/service/src/main/resources/db/changesets/20240610_change_beagle_reference_path_input.yaml deleted file mode 100644 index bf5cef9c..00000000 --- a/service/src/main/resources/db/changesets/20240610_change_beagle_reference_path_input.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# Change reference panel path input key name and value -databaseChangeLog: - - changeSet: - id: change reference panel path input key name and value - author: js - changes: - # update reference_panel_path value to the new prefixed path and change the name to reference_panel_path_prefix - - update: - tableName: pipeline_input_definitions - where: name='reference_panel_path' AND pipeline_id=(SELECT id FROM pipelines WHERE name='imputation_beagle') - columns: - - column: - name: name - value: reference_panel_path_prefix - - column: - name: default_value - value: "/hg38/1000G_HGDP_no_singletons_ref_panel/hgdp.tgp.gwaspy.merged.merged.AN_added.bcf.ac2" diff --git a/service/src/main/resources/db/changesets/20240611_add_control_workspace_id_to_pipeline_runs.yaml b/service/src/main/resources/db/changesets/20240611_add_control_workspace_id_to_pipeline_runs.yaml deleted file mode 100644 index 95bd7c09..00000000 --- a/service/src/main/resources/db/changesets/20240611_add_control_workspace_id_to_pipeline_runs.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# Add a workspace_id column to the pipeline_runs table and change output type to jsonb -databaseChangeLog: - - changeSet: - id: Add a workspace_id column to the pipeline_runs table - author: mma - changes: - - addColumn: - tableName: pipeline_runs - columns: - - column: - name: workspace_id - type: uuid diff --git a/service/src/main/resources/db/changesets/20240621_add_wdl_variable_name_to_input_definitions.yaml b/service/src/main/resources/db/changesets/20240621_add_wdl_variable_name_to_input_definitions.yaml deleted file mode 100644 index 94a3a31f..00000000 --- a/service/src/main/resources/db/changesets/20240621_add_wdl_variable_name_to_input_definitions.yaml +++ /dev/null @@ -1,71 +0,0 @@ -# Make input definition names camelCase; add wdl_variable_name column containing snake_case name used in WDL -databaseChangeLog: - - changeSet: - id: Make input definition names camelCase; add wdl_variable_name column containing snake_case name used in WDL - author: mma - changes: - - addColumn: - tableName: pipeline_input_definitions - columns: - - column: - name: wdl_variable_name - type: varchar(255) - constraints: - nullable: true - - update: - tableName: pipeline_input_definitions - where: name='multi_sample_vcf' AND pipeline_id=(SELECT id FROM pipelines WHERE name='imputation_beagle') - columns: - - column: - name: name - value: "multiSampleVcf" - - column: - name: wdl_variable_name - value: "multi_sample_vcf" - - update: - tableName: pipeline_input_definitions - where: name='output_basename' AND pipeline_id=(SELECT id FROM pipelines WHERE name='imputation_beagle') - columns: - - column: - name: name - value: "outputBasename" - - column: - name: wdl_variable_name - value: "output_basename" - - update: - tableName: pipeline_input_definitions - where: name='reference_panel_path_prefix' AND pipeline_id=(SELECT id FROM pipelines WHERE name='imputation_beagle') - columns: - - column: - name: name - value: "referencePanelPathPrefix" - - column: - name: wdl_variable_name - value: "reference_panel_path_prefix" - - update: - tableName: pipeline_input_definitions - where: name='genetic_maps_path' AND pipeline_id=(SELECT id FROM pipelines WHERE name='imputation_beagle') - columns: - - column: - name: name - value: "geneticMapsPath" - - column: - name: wdl_variable_name - value: "genetic_maps_path" - - update: - tableName: pipeline_input_definitions - where: name='ref_dict' AND pipeline_id=(SELECT id FROM pipelines WHERE name='imputation_beagle') - columns: - - column: - name: name - value: "refDict" - - column: - name: wdl_variable_name - value: "ref_dict" - - update: - tableName: pipeline_input_definitions - where: name='contigs' AND pipeline_id=(SELECT id FROM pipelines WHERE name='imputation_beagle') - columns: - - column: - name: wdl_variable_name - value: "contigs" diff --git a/service/src/main/resources/db/changesets/20240625_add_workspace_storage_url_to_pipeline_runs.yaml b/service/src/main/resources/db/changesets/20240625_add_workspace_storage_url_to_pipeline_runs.yaml deleted file mode 100644 index dcf0b4cb..00000000 --- a/service/src/main/resources/db/changesets/20240625_add_workspace_storage_url_to_pipeline_runs.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# Add a workspace_storage_container_url column to the pipeline_runs table -databaseChangeLog: - - changeSet: - id: Add a workspace_storage_container_url column to the pipeline_runs table - author: mma - changes: - - addColumn: - tableName: pipeline_runs - columns: - - column: - name: workspace_storage_container_url - type: text diff --git a/service/src/main/resources/db/changesets/20240711_add_pipeline_output_definitions.yaml b/service/src/main/resources/db/changesets/20240711_add_pipeline_output_definitions.yaml deleted file mode 100644 index b9f0721a..00000000 --- a/service/src/main/resources/db/changesets/20240711_add_pipeline_output_definitions.yaml +++ /dev/null @@ -1,112 +0,0 @@ -# Add pipeline_output_definitions table and populate, add pipeline_outputs table -databaseChangeLog: - - changeSet: - id: add pipeline_output_definitions and populate, add pipeline_outputs table - author: mma - changes: - # create new pipeline_output_definitions table - - createTable: - tableName: pipeline_output_definitions - remarks: | - There is one row in the pipeline_output_definitions table per output for the designated pipeline. - columns: - - column: - name: id - type: serial - constraints: - primaryKey: true - nullable: false - - column: - name: pipeline_id - type: int - constraints: - nullable: false - primaryKey: true - foreignKeyName: pipeline_output_defs_pipeline_id_FK - referencedTableName: pipelines - referencedColumnNames: id - - column: - name: name - type: text - constraints: - nullable: false - - column: - name: type - type: text - constraints: - nullable: false - - column: - name: wdl_variable_name - type: varchar(255) - constraints: - nullable: true - # add output definitions - - insert: - tableName: pipeline_output_definitions - columns: - - column: - name: pipeline_id - valueComputed: (SELECT id FROM pipelines WHERE name='imputation_beagle') - - column: - name: name - value: imputedMultiSampleVcf - - column: - name: type - value: FILE - - column: - name: wdl_variable_name - value: imputed_multi_sample_vcf - - insert: - tableName: pipeline_output_definitions - columns: - - column: - name: pipeline_id - valueComputed: (SELECT id FROM pipelines WHERE name='imputation_beagle') - - column: - name: name - value: imputedMultiSampleVcfIndex - - column: - name: type - value: FILE - - column: - name: wdl_variable_name - value: imputed_multi_sample_vcf_index - - insert: - tableName: pipeline_output_definitions - columns: - - column: - name: pipeline_id - valueComputed: (SELECT id FROM pipelines WHERE name='imputation_beagle') - - column: - name: name - value: chunksInfo - - column: - name: type - value: FILE - - column: - name: wdl_variable_name - value: chunks_info - # add pipeline_outputs table - - createTable: - tableName: pipeline_outputs - remarks: | - There is one row in the pipeline_outputs table per (completed) pipeline run. - columns: - - column: - name: job_id - type: int - constraints: - nullable: false - primaryKey: true - foreignKeyName: pipeline_outputs_job_id_FK - referencedTableName: pipeline_runs - referencedColumnNames: id - - column: - name: outputs - type: text - constraints: - nullable: false - # remove output column from pipeline_runs table - - dropColumn: - tableName: pipeline_runs - columnName: output diff --git a/service/src/main/resources/db/changesets/20240716_add_file_suffix_to_input_defs_and_update_FILE_types.yaml b/service/src/main/resources/db/changesets/20240716_add_file_suffix_to_input_defs_and_update_FILE_types.yaml deleted file mode 100644 index f0c5c9f2..00000000 --- a/service/src/main/resources/db/changesets/20240716_add_file_suffix_to_input_defs_and_update_FILE_types.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# Add file_suffix field to pipeline_input_definitions and update all VCF types to FILE -databaseChangeLog: - - changeSet: - id: Add file_suffix field to pipeline_input_definitions and update all VCF types to FILE - author: mma - changes: - # add file_suffix column to pipeline_input_definitions - - addColumn: - tableName: pipeline_input_definitions - columns: - - column: - name: file_suffix - type: text - constraints: - nullable: true - # set all VCF types' file_suffix to ".vcf.gz" - - update: - tableName: pipeline_input_definitions - where: type='VCF' - columns: - - column: - name: file_suffix - value: ".vcf.gz" - # update all VCF types to FILE - - update: - tableName: pipeline_input_definitions - where: type='VCF' - columns: - - column: - name: type - value: FILE diff --git a/service/src/main/resources/db/changesets/20240806_add_workspace_project_and_workspace_name.yaml b/service/src/main/resources/db/changesets/20240806_add_workspace_project_and_workspace_name.yaml deleted file mode 100644 index a27a6e66..00000000 --- a/service/src/main/resources/db/changesets/20240806_add_workspace_project_and_workspace_name.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Add workspace_project and workspace_name to pipelines and pipeline_runs tables -databaseChangeLog: - - changeSet: - id: Add workspace_project and workspace_name to pipelines and pipeline_runs tables - author: mma - changes: - # add workspace_project and workspace_name to pipelines table - - addColumn: - tableName: pipelines - columns: - - column: - name: workspace_project - type: text - - column: - name: workspace_name - type: text - # add workspace_project and workspace_name to pipeline_runs table - - addColumn: - tableName: pipeline_runs - columns: - - column: - name: workspace_project - type: text - - column: - name: workspace_name - type: text diff --git a/service/src/main/resources/db/changesets/20240820_add_workspace_storage_container_url.yaml b/service/src/main/resources/db/changesets/20240820_add_workspace_storage_container_url.yaml deleted file mode 100644 index 57c225b8..00000000 --- a/service/src/main/resources/db/changesets/20240820_add_workspace_storage_container_url.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# Add workspace_storage_container_url to pipelines table -databaseChangeLog: - - changeSet: - id: Add workspace_storage_container_url to pipelines table - author: mma - changes: - - addColumn: - tableName: pipelines - columns: - - column: - name: workspace_storage_container_url - type: text diff --git a/service/src/main/resources/db/changesets/20240824_add_wdl_version_update_pipeline_version.yaml b/service/src/main/resources/db/changesets/20240824_add_wdl_version_update_pipeline_version.yaml deleted file mode 100644 index 41f82779..00000000 --- a/service/src/main/resources/db/changesets/20240824_add_wdl_version_update_pipeline_version.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Add wdl_method_version to pipelines table and change type of version column to int and set it to 0 -databaseChangeLog: - - changeSet: - id: Add wdl_method_version to pipelines table and change type of version column to int and set it to 0 - author: js - changes: - - addColumn: - tableName: pipelines - columns: - - column: - name: wdl_method_version - type: text - - modifyDataType: - columnName: version - newDataType: int - tableName: pipelines - - update: - tableName: pipelines - where: name='imputation_beagle' and version=1 - columns: - - column: - name: version - value: 0 diff --git a/service/src/main/resources/db/changesets/20240830_add_workspace_google_project_rename_storage_container_field.yaml b/service/src/main/resources/db/changesets/20240830_add_workspace_google_project_rename_storage_container_field.yaml deleted file mode 100644 index 5af027fa..00000000 --- a/service/src/main/resources/db/changesets/20240830_add_workspace_google_project_rename_storage_container_field.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Add workspace_google_project, rename workspace_storage_container_url in pipelines and pipeline_runs tables -databaseChangeLog: - - changeSet: - id: Add workspace_google_project, rename workspace_storage_container_url in pipelines and pipeline_runs tables - author: mma - changes: - - addColumn: - tableName: pipelines - columns: - - column: - name: workspace_google_project - type: text - - addColumn: - tableName: pipeline_runs - columns: - - column: - name: workspace_google_project - type: text - - renameColumn: - tableName: pipelines - oldColumnName: workspace_storage_container_url - newColumnName: workspace_storage_container_name - - renameColumn: - tableName: pipeline_runs - oldColumnName: workspace_storage_container_url - newColumnName: workspace_storage_container_name diff --git a/service/src/main/resources/db/changesets/20240903_rename_workspace_project.yaml b/service/src/main/resources/db/changesets/20240903_rename_workspace_project.yaml deleted file mode 100644 index 0041010b..00000000 --- a/service/src/main/resources/db/changesets/20240903_rename_workspace_project.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Add workspace_google_project, rename workspace_storage_container_url in pipelines and pipeline_runs tables -databaseChangeLog: - - changeSet: - id: Rename workspace_project to workspace_billing_project in pipelines and pipeline_runs tables - author: mma - changes: - - renameColumn: - tableName: pipelines - oldColumnName: workspace_project - newColumnName: workspace_billing_project - - renameColumn: - tableName: pipeline_runs - oldColumnName: workspace_project - newColumnName: workspace_billing_project diff --git a/service/src/main/resources/db/changesets/20240906_add_wdl_version_to_pipeline_run.yaml b/service/src/main/resources/db/changesets/20240906_add_wdl_version_to_pipeline_run.yaml deleted file mode 100644 index 7fb6e9b3..00000000 --- a/service/src/main/resources/db/changesets/20240906_add_wdl_version_to_pipeline_run.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# Add wdl_method_version to pipeline_runs table -databaseChangeLog: - - changeSet: - id: Add wdl_method_version to pipeline_runs table - author: mma - changes: - - addColumn: - tableName: pipeline_runs - columns: - - column: - name: wdl_method_version - type: text diff --git a/service/src/main/resources/db/changesets/20240911_remove_isSuccess.yaml b/service/src/main/resources/db/changesets/20240911_remove_isSuccess.yaml deleted file mode 100644 index 08387242..00000000 --- a/service/src/main/resources/db/changesets/20240911_remove_isSuccess.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Remove is_success from pipeline_runs table -databaseChangeLog: - - changeSet: - id: Remove is_success from pipeline_runs table - author: mma - changes: - - dropColumn: - tableName: pipeline_runs - columns: - - column: - name: is_success diff --git a/service/src/main/resources/db/changesets/20240923_update_input_defs_bucket_paths.yaml b/service/src/main/resources/db/changesets/20240923_update_input_defs_bucket_paths.yaml deleted file mode 100644 index c06739d8..00000000 --- a/service/src/main/resources/db/changesets/20240923_update_input_defs_bucket_paths.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# Update input definitions bucket paths for service provided inputs -databaseChangeLog: - - changeSet: - id: Update input definitions bucket paths for service provided inputs - author: mma - changes: - # update geneticMapsPath default_value from /plink-genetic-maps/GRCh38_fixed/ - - update: - tableName: pipeline_input_definitions - where: name='geneticMapsPath' AND pipeline_id=(SELECT id FROM pipelines WHERE name='imputation_beagle') - columns: - - column: - name: default_value - value: "/hg38/plink-genetic-maps/" - # update refDict default_value from /hg38/Homo_sapiens_assembly38.dict - - update: - tableName: pipeline_input_definitions - where: name='refDict' AND pipeline_id=(SELECT id FROM pipelines WHERE name='imputation_beagle') - columns: - - column: - name: default_value - value: "/hg38/ref_dict/Homo_sapiens_assembly38.dict" - # update referencePanelPathPrefix default_value from /hg38/1000G_HGDP_no_singletons_ref_panel/hgdp.tgp.gwaspy.merged.merged.AN_added.bcf.ac2 - - update: - tableName: pipeline_input_definitions - where: name='referencePanelPathPrefix' AND pipeline_id=(SELECT id FROM pipelines WHERE name='imputation_beagle') - columns: - - column: - name: default_value - value: "/hg38/ref_panels/1000G_HGDP_no_singletons/hgdp.tgp.gwaspy.merged.merged.AN_added.bcf.ac2" diff --git a/service/src/main/resources/db/changesets/20240925_update_ref_panel_prefix_input.yaml b/service/src/main/resources/db/changesets/20240925_update_ref_panel_prefix_input.yaml deleted file mode 100644 index 8abc933c..00000000 --- a/service/src/main/resources/db/changesets/20240925_update_ref_panel_prefix_input.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Update ref panel input definition default_value -databaseChangeLog: - - changeSet: - id: Update ref panel input definition default_value - author: mma - changes: - # update referencePanelPathPrefix default_value from /hg38/ref_panels/1000G_HGDP_no_singletons/hgdp.tgp.gwaspy.merged.merged.AN_added.bcf.ac2 - - update: - tableName: pipeline_input_definitions - where: name='referencePanelPathPrefix' AND pipeline_id=(SELECT id FROM pipelines WHERE name='imputation_beagle') - columns: - - column: - name: default_value - value: "/hg38/ref_panels/1000G_HGDP_no_singletons/hgdp.tgp.gwaspy.AN_added.bcf.ac2" diff --git a/service/src/main/resources/db/changesets/20241030_add_pipeline_quotas.yaml b/service/src/main/resources/db/changesets/20241030_add_pipeline_quotas.yaml deleted file mode 100644 index 6779b860..00000000 --- a/service/src/main/resources/db/changesets/20241030_add_pipeline_quotas.yaml +++ /dev/null @@ -1,38 +0,0 @@ -databaseChangeLog: - - changeSet: - id: add pipeline quotas table - author: ks - changes: - # add pipeline_quotas table - - createTable: - tableName: pipeline_quotas - remarks: | - this table describes the quota allowed per pipeline - columns: - - column: - name: id - type: serial - constraints: - primaryKey: true - nullable: false - - column: - name: pipeline_name - type: text - constraints: - nullable: false - unique: true - - column: - name: default_quota - type: int - constraints: - nullable: false - # add default quota for imputation_beagle to pipeline_quotas table - - insert: - tableName: pipeline_quotas - columns: - - column: - name: pipeline_name - value: imputation_beagle - - column: - name: default_quota - value: 10000 diff --git a/service/src/main/resources/db/changesets/20241103_add_user_quotas.yaml b/service/src/main/resources/db/changesets/20241103_add_user_quotas.yaml deleted file mode 100644 index 76863c9d..00000000 --- a/service/src/main/resources/db/changesets/20241103_add_user_quotas.yaml +++ /dev/null @@ -1,51 +0,0 @@ -databaseChangeLog: - - changeSet: - id: add user quotas table - author: js - changes: - # add user_quotas table - - createTable: - tableName: user_quotas - remarks: | - this table describes the quota a user has consumed for a given pipeline - columns: - - column: - name: id - type: serial - constraints: - primaryKey: true - nullable: false - - column: - name: pipeline_name - type: text - constraints: - nullable: false - unique: true - - column: - name: user_id - type: text - constraints: - nullable: false - - column: - name: quota - type: int - constraints: - nullable: false - - column: - name: quota_consumed - type: int - constraints: - nullable: false - defaultValue: 0 - - addUniqueConstraint: - tableName: user_quotas - constraintName: user_quotas_userid_pname_uk - columnNames: user_id, pipeline_name - - createIndex: - tableName: user_quotas - clustered: false - columns: - - column: - name: user_id - indexName: user_quotas_userid_idx - unique: false diff --git a/service/src/main/resources/db/changesets/20241106-add-indexes.yaml b/service/src/main/resources/db/changesets/20241106-add-indexes.yaml new file mode 100644 index 00000000..16b03d70 --- /dev/null +++ b/service/src/main/resources/db/changesets/20241106-add-indexes.yaml @@ -0,0 +1,34 @@ +databaseChangeLog: + - changeSet: + id: add-indexes + author: js + changes: + # Pipeline runs indexes + - createIndex: + tableName: pipeline_runs + indexName: idx_pipeline_runs_user_id + columns: + - column: + name: user_id + # Supports findAllByUserId queries + + - createIndex: + tableName: pipeline_runs + indexName: idx_pipeline_runs_job_id_user_id + columns: + - column: + name: job_id + - column: + name: user_id + # Supports findByJobIdAndUserId queries + + - createIndex: + tableName: user_quotas + indexName: idx_user_quotas_user_pipeline + columns: + - column: + name: user_id + - column: + name: pipeline_name + # Supports findByUserIdAndPipelineName queries + diff --git a/service/src/main/resources/db/changesets/20241106-base-data-insertion.yaml b/service/src/main/resources/db/changesets/20241106-base-data-insertion.yaml new file mode 100644 index 00000000..609bebf1 --- /dev/null +++ b/service/src/main/resources/db/changesets/20241106-base-data-insertion.yaml @@ -0,0 +1,239 @@ +# this is a consolidated yaml that contains all the data initially inserted in the database +databaseChangeLog: + - changeSet: + id: consolidated-schema + author: js + changes: + # Insert initial array_imputation pipeline data + - insert: + tableName: pipelines + columns: + - column: + name: name + value: array_imputation + - column: + name: version + value: 0 + - column: + name: display_name + value: Array Imputation + - column: + name: description + value: Phase and impute genotypes using Beagle 5.4 with the AoU/AnVIL reference panel of 515,579 samples. + - column: + name: pipeline_type + value: imputation + - column: + name: wdl_url + value: https://github.com/broadinstitute/warp/blob/TSPS-183_mma_beagle_imputation_hg38/pipelines/broad/arrays/imputation_beagle/ImputationBeagle.wdl + - column: + name: wdl_method_name + value: ImputationBeagle + + # Insert quota for array_imputation + - insert: + tableName: pipeline_quotas + columns: + - column: + name: pipeline_name + value: array_imputation + - column: + name: default_quota + value: 10000 + + # Insert pipeline input definitions for array_imputation + - insert: + tableName: pipeline_input_definitions + columns: + - column: + name: pipeline_id + valueComputed: (SELECT id FROM pipelines WHERE name='array_imputation') + - column: + name: name + value: multiSampleVcf + - column: + name: type + value: FILE + - column: + name: is_required + value: true + - column: + name: file_suffix + value: .vcf.gz + - column: + name: user_provided + value: true + - column: + name: wdl_variable_name + value: multi_sample_vcf + + - insert: + tableName: pipeline_input_definitions + columns: + - column: + name: pipeline_id + valueComputed: (SELECT id FROM pipelines WHERE name='array_imputation') + - column: + name: name + value: outputBasename + - column: + name: type + value: STRING + - column: + name: is_required + value: true + - column: + name: user_provided + value: true + - column: + name: wdl_variable_name + value: output_basename + + # Insert non-user-provided input definitions for array_imputation + - insert: + tableName: pipeline_input_definitions + columns: + - column: + name: pipeline_id + valueComputed: (SELECT id FROM pipelines WHERE name='array_imputation') + - column: + name: name + value: contigs + - column: + name: type + value: STRING_ARRAY + - column: + name: is_required + value: true + - column: + name: user_provided + value: false + - column: + name: default_value + value: '["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22"]' + - column: + name: wdl_variable_name + value: contigs + + - insert: + tableName: pipeline_input_definitions + columns: + - column: + name: pipeline_id + valueComputed: (SELECT id FROM pipelines WHERE name='array_imputation') + - column: + name: name + value: geneticMapsPath + - column: + name: type + value: STRING + - column: + name: is_required + value: true + - column: + name: user_provided + value: false + - column: + name: default_value + value: /hg38/plink-genetic-maps/ + - column: + name: wdl_variable_name + value: genetic_maps_path + + - insert: + tableName: pipeline_input_definitions + columns: + - column: + name: pipeline_id + valueComputed: (SELECT id FROM pipelines WHERE name='array_imputation') + - column: + name: name + value: refDict + - column: + name: type + value: STRING + - column: + name: is_required + value: true + - column: + name: user_provided + value: false + - column: + name: default_value + value: /hg38/ref_dict/Homo_sapiens_assembly38.dict + - column: + name: wdl_variable_name + value: ref_dict + + - insert: + tableName: pipeline_input_definitions + columns: + - column: + name: pipeline_id + valueComputed: (SELECT id FROM pipelines WHERE name='array_imputation') + - column: + name: name + value: referencePanelPathPrefix + - column: + name: type + value: STRING + - column: + name: is_required + value: true + - column: + name: user_provided + value: false + - column: + name: default_value + value: /hg38/ref_panels/1000G_HGDP_no_singletons/hgdp.tgp.gwaspy.AN_added.bcf.ac2 + - column: + name: wdl_variable_name + value: reference_panel_path_prefix + + # Insert pipeline output definitions for array_imputation + - insert: + tableName: pipeline_output_definitions + columns: + - column: + name: pipeline_id + valueComputed: (SELECT id FROM pipelines WHERE name='array_imputation') + - column: + name: name + value: imputedMultiSampleVcf + - column: + name: type + value: FILE + - column: + name: wdl_variable_name + value: imputed_multi_sample_vcf + + - insert: + tableName: pipeline_output_definitions + columns: + - column: + name: pipeline_id + valueComputed: (SELECT id FROM pipelines WHERE name='array_imputation') + - column: + name: name + value: imputedMultiSampleVcfIndex + - column: + name: type + value: FILE + - column: + name: wdl_variable_name + value: imputed_multi_sample_vcf_index + - insert: + tableName: pipeline_output_definitions + columns: + - column: + name: pipeline_id + valueComputed: (SELECT id FROM pipelines WHERE name='array_imputation') + - column: + name: name + value: chunksInfo + - column: + name: type + value: FILE + - column: + name: wdl_variable_name + value: chunks_info diff --git a/service/src/main/resources/db/changesets/20241106-base-tables.yaml b/service/src/main/resources/db/changesets/20241106-base-tables.yaml new file mode 100644 index 00000000..c59daec0 --- /dev/null +++ b/service/src/main/resources/db/changesets/20241106-base-tables.yaml @@ -0,0 +1,366 @@ +# this is a consolidated yaml that contains all the tables created in one changeset +databaseChangeLog: + - changeSet: + id: consolidated-schema + author: js + changes: + # Create pipelines table + - createTable: + tableName: pipelines + remarks: | + Core table containing pipeline definitions and metadata + columns: + - column: + name: id + type: serial + constraints: + primaryKey: true + nullable: false + - column: + name: name + type: text + constraints: + nullable: false + - column: + name: version + type: int + constraints: + nullable: false + - column: + name: display_name + type: text + constraints: + nullable: false + - column: + name: description + type: text + - column: + name: pipeline_type + type: text + constraints: + nullable: false + - column: + name: wdl_url + type: text + - column: + name: wdl_method_name + type: text + - column: + name: wdl_method_version + type: text + - column: + name: workspace_id + type: uuid + - column: + name: workspace_billing_project + type: text + - column: + name: workspace_name + type: text + - column: + name: workspace_google_project + type: text + - column: + name: workspace_storage_container_name + type: text + + # Create pipeline_input_definitions table + - createTable: + tableName: pipeline_input_definitions + remarks: | + Defines inputs required for each pipeline + columns: + - column: + name: id + type: serial + constraints: + primaryKey: true + nullable: false + - column: + name: pipeline_id + type: int + constraints: + nullable: false + foreignKeyName: pipeline_input_defs_pipeline_id_FK + referencedTableName: pipelines + referencedColumnNames: id + - column: + name: name + type: text + constraints: + nullable: false + - column: + name: type + type: text + constraints: + nullable: false + - column: + name: is_required + type: boolean + constraints: + nullable: false + - column: + name: file_suffix + type: text + - column: + name: user_provided + type: boolean + defaultValueBoolean: true + constraints: + nullable: false + - column: + name: default_value + type: text + - column: + name: wdl_variable_name + type: varchar(255) + + # Create pipeline_output_definitions table + - createTable: + tableName: pipeline_output_definitions + remarks: | + Defines expected outputs for each pipeline + columns: + - column: + name: id + type: serial + constraints: + primaryKey: true + nullable: false + - column: + name: pipeline_id + type: int + constraints: + nullable: false + foreignKeyName: pipeline_output_defs_pipeline_id_FK + referencedTableName: pipelines + referencedColumnNames: id + - column: + name: name + type: text + constraints: + nullable: false + - column: + name: type + type: text + constraints: + nullable: false + - column: + name: wdl_variable_name + type: varchar(255) + + # Create pipeline_runs table + - createTable: + tableName: pipeline_runs + remarks: | + Tracks individual pipeline execution runs + columns: + - column: + name: id + type: serial + constraints: + primaryKey: true + nullable: false + - column: + name: job_id + type: uuid + constraints: + nullable: false + uniqueConstraintName: pipeline_runs_jobId_unique + unique: true + - column: + name: user_id + type: text + constraints: + nullable: false + - column: + name: pipeline_id + type: int + constraints: + nullable: false + foreignKeyName: pipeline_runs_pipeline_id_FK + referencedTableName: pipelines + referencedColumnNames: id + - column: + name: created + type: timestamp with time zone + defaultValueComputed: NOW() + constraints: + nullable: false + - column: + name: updated + type: timestamp with time zone + defaultValueComputed: NOW() + constraints: + nullable: false + - column: + name: status + type: text + - column: + name: description + type: text + - column: + name: result_url + type: text + - column: + name: workspace_id + type: uuid + - column: + name: workspace_billing_project + type: text + - column: + name: workspace_name + type: text + - column: + name: workspace_google_project + type: text + - column: + name: workspace_storage_container_name + type: text + - column: + name: wdl_method_version + type: text + + # Create pipeline_inputs table + - createTable: + tableName: pipeline_inputs + remarks: | + Stores input parameters for pipeline runs + columns: + - column: + name: job_id + type: int + constraints: + nullable: false + primaryKey: true + foreignKeyName: pipeline_inputs_job_id_FK + referencedTableName: pipeline_runs + referencedColumnNames: id + - column: + name: inputs + type: text + constraints: + nullable: false + + # Create pipeline_outputs table + - createTable: + tableName: pipeline_outputs + remarks: | + Stores output results from pipeline runs + columns: + - column: + name: job_id + type: int + constraints: + nullable: false + primaryKey: true + foreignKeyName: pipeline_outputs_job_id_FK + referencedTableName: pipeline_runs + referencedColumnNames: id + - column: + name: outputs + type: text + constraints: + nullable: false + + # Create pipeline_quotas table + - createTable: + tableName: pipeline_quotas + remarks: | + Defines quota limits for each pipeline type + columns: + - column: + name: id + type: serial + constraints: + primaryKey: true + nullable: false + - column: + name: pipeline_name + type: text + constraints: + nullable: false + unique: true + - column: + name: default_quota + type: int + constraints: + nullable: false + + # Create user_quotas table + - createTable: + tableName: user_quotas + remarks: | + Tracks quota usage per user per pipeline + columns: + - column: + name: id + type: serial + constraints: + primaryKey: true + nullable: false + - column: + name: pipeline_name + type: text + constraints: + nullable: false + - column: + name: user_id + type: text + constraints: + nullable: false + - column: + name: quota + type: int + constraints: + nullable: false + - column: + name: quota_consumed + type: int + constraints: + nullable: false + defaultValue: 0 + + # Add constraints and indexes + - addUniqueConstraint: + tableName: pipelines + constraintName: pipelines_name_version_uk + columnNames: name, version + + - addUniqueConstraint: + tableName: pipeline_input_definitions + constraintName: unique_pipeline_input_name + columnNames: pipeline_id, name + + - addUniqueConstraint: + tableName: user_quotas + constraintName: user_quotas_userid_pname_uk + columnNames: user_id, pipeline_name + + - createIndex: + tableName: user_quotas + indexName: user_quotas_userid_idx + columns: + - column: + name: user_id + + # Create trigger for updated timestamp + - sql: + dbms: 'postgresql' + splitStatements: false + sql: > + create function public.pipeline_run_updated_to_now() + returns trigger as $$ + BEGIN + NEW.updated = now(); + return NEW; + END; + $$ language 'plpgsql'; + + - sql: + dbms: 'postgresql' + sql: > + create trigger pipeline_run_updated + before update on public.pipeline_runs + for each row + execute procedure public.pipeline_run_updated_to_now(); diff --git a/service/src/main/resources/db/changesets/20240412-testdata.yaml b/service/src/main/resources/db/changesets/testdata.yaml similarity index 75% rename from service/src/main/resources/db/changesets/20240412-testdata.yaml rename to service/src/main/resources/db/changesets/testdata.yaml index 7be1e438..fdb86c3d 100644 --- a/service/src/main/resources/db/changesets/20240412-testdata.yaml +++ b/service/src/main/resources/db/changesets/testdata.yaml @@ -4,7 +4,10 @@ databaseChangeLog: author: mma context: test changes: + # Insert test data (only in test context) - insert: + tableName: pipeline_runs + context: test columns: - column: name: job_id @@ -17,11 +20,10 @@ databaseChangeLog: valueComputed: (SELECT id FROM pipelines WHERE name='array_imputation') - column: name: description - value: "Test Pipeline Description" + value: Test Pipeline Description - column: name: status - value: "RUNNING" + value: RUNNING - column: name: result_url - value: "https://some-teaspoons-domain.com/test/result/path" - tableName: pipeline_runs + value: https://some-teaspoons-domain.com/test/result/path diff --git a/service/src/test/java/bio/terra/pipelines/testutils/TestUtils.java b/service/src/test/java/bio/terra/pipelines/testutils/TestUtils.java index 4f6a85c3..3313c8f5 100644 --- a/service/src/test/java/bio/terra/pipelines/testutils/TestUtils.java +++ b/service/src/test/java/bio/terra/pipelines/testutils/TestUtils.java @@ -21,7 +21,7 @@ public class TestUtils { .getValue(); // this matches the job pre-populated in the db for tests in that it is in // the jobs table - public static final Long TEST_PIPELINE_ID_1 = 3L; + public static final Long TEST_PIPELINE_ID_1 = 1L; public static final int TEST_PIPELINE_VERSION_1 = 0; public static final String TEST_PIPELINE_DISPLAY_NAME_1 = "Test Pipeline Name"; // this matches the job pre-populated in the db for tests @@ -172,7 +172,7 @@ public static Pipeline createTestPipelineWithId() { TestUtils.TEST_PIPELINE_1.getWorkspaceGoogleProject(), TestUtils.TEST_PIPELINE_1.getPipelineInputDefinitions(), TestUtils.TEST_PIPELINE_1.getPipelineOutputDefinitions()); - pipeline.setId(3L); + pipeline.setId(1L); return pipeline; }