diff --git a/config/icav2.yaml b/config/icav2.yaml index dde608fc..c9d0ca50 100644 --- a/config/icav2.yaml +++ b/config/icav2.yaml @@ -25,12 +25,149 @@ projects: project_id: eba5c946-1677-441d-bbce-6a11baadecbb tenant_name: umccr-prod - +# REFERENCE DATA SETS datasets: - # REFERENCE DATA SETS # Dragen Hash Tables + # chm13 v10 r4 graph + - dataset_tenant_name: umccr-prod + dataset_name: dragen_hash_table_chm13_v2_v10_r4_graph_cnv_hla_rna + dataset_description: | + This tarball was generated by downloading / uploading the illumina tar ball under a nested folder + The original tarball can be found here - + https://webdata.illumina.com/downloads/software/dragen/resource-files/chm13_v2-cnv.graph.hla.rna-10-r4.0-1.tar.gz + From https://support.illumina.com/sequencing/sequencing_software/dragen-bio-it-platform/product_files.html + cwl_username: Alexis Lucattini + dataset_region_id: 1efd315d-6309-4d7e-826b-d3824b0b5acb + dataset_region_city_name: Sydney + dataset_creation_time: '2024-11-04T02:05:03Z' + dataset_id_hash: 9511ca5eb8a31b30dd4ee8d80d83cb93 + data: + - data_id: fil.523d8dcc66b443987ade08dcf793071e + owning_project_id: 92bc8608-9393-44b4-bf16-fb0c5a12269a + owning_project_name: reference-data + data_uri: + icav2://reference-data/dragen-hash-tables/v10-r4/chm13-v2-graph-cnv-hla-rna/chm13_v2-cnv.graph.hla.rna-10-r4.0-1.tar.gz + creation_time: '2024-10-30T00:46:17Z' + modification_time: '2024-10-30T12:18:49Z' + creator_id: + creator_name: + data_type: FILE + file_size_in_bytes: 15327549226 + object_e_tag: 806c5e4389920a9356c0b8f06b39e871-1828 + # chm13 v10 r4 linear - dataset_tenant_name: umccr-prod - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + dataset_name: dragen_hash_table_chm13_v2_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_description: | + This tarball was generated by downloading / uploading the illumina tar ball under a nested folder + The original tarball can be found here - + https://webdata.illumina.com/downloads/software/dragen/resource-files/misc/chm13_v2-cnv.hla.methylated_combined.rna-10-r4.0-1.tar.gz + From https://support.illumina.com/sequencing/sequencing_software/dragen-bio-it-platform/product_files.html + cwl_username: Alexis Lucattini + dataset_region_id: 1efd315d-6309-4d7e-826b-d3824b0b5acb + dataset_region_city_name: Sydney + dataset_creation_time: '2024-11-05T23:16:55Z' + dataset_id_hash: f62a6eb3655012f0f01bb3372d0e7029 + data: + - data_id: fil.046e468bff4240486a6a08dcfd053b95 + owning_project_id: 92bc8608-9393-44b4-bf16-fb0c5a12269a + owning_project_name: reference-data + data_uri: + icav2://reference-data/dragen-hash-tables/v10-r4/chm13-v2-linear-cnv-hla-rna-methylated/chm13_v2-cnv.hla.methylated_combined.rna-10-r4.0-1.tar.gz + creation_time: '2024-11-04T20:53:02Z' + modification_time: '2024-11-05T15:14:02Z' + creator_id: + creator_name: + data_type: FILE + file_size_in_bytes: 14041386948 + object_e_tag: a05953db455f60f0f9460379c598ee8b-1674 + + # hg38 v10 r4 graph + - dataset_tenant_name: umccr-prod + dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_graph_cnv_hla_rna + dataset_description: | + This tarball was generated by downloading / uploading the illumina tar ball under a nested folder + The original tarball can be found here - + https://webdata.illumina.com/downloads/software/dragen/resource-files/hg38-alt_masked.cnv.graph.hla.rna-10-r4.0-1.tar.gz + From https://support.illumina.com/sequencing/sequencing_software/dragen-bio-it-platform/product_files.html + cwl_username: Alexis Lucattini + dataset_region_id: 1efd315d-6309-4d7e-826b-d3824b0b5acb + dataset_region_city_name: Sydney + dataset_creation_time: '2024-11-04T01:55:11Z' + dataset_id_hash: d6ac5cf3214df27d63a634cbe725ebea + data: + - data_id: fil.066636f433c441317b4708dcf793071e + owning_project_id: 92bc8608-9393-44b4-bf16-fb0c5a12269a + owning_project_name: reference-data + data_uri: + icav2://reference-data/dragen-hash-tables/v10-r4/hg38-alt_masked-cnv-hla-rna/hg38-alt_masked.cnv.graph.hla.rna-10-r4.0-1.tar.gz + creation_time: '2024-10-30T00:50:30Z' + modification_time: '2024-10-30T12:18:49Z' + creator_id: + creator_name: + data_type: FILE + file_size_in_bytes: 15752669461 + object_e_tag: 39a6610732f4140fcb47bb40fba4cc5f-1878 + + # hg38 v10 r4 linear + - dataset_tenant_name: umccr-prod + dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_description: | + This tarball was generated by downloading / uploading the illumina tar ball under a nested folder + The original tarball can be found here - + https://webdata.illumina.com/downloads/software/dragen/resource-files/misc/hg38-alt_masked.cnv.hla.methylated_combined.rna-10-r4.0-1.tar + From https://support.illumina.com/sequencing/sequencing_software/dragen-bio-it-platform/product_files.html + + Note that despite the .tar suffix, this url was actually gzipped, so the url source above may change. + This has been recorded under Ilmn Tech Support ticket SFC: 04034243 + cwl_username: Alexis Lucattini + dataset_region_id: 1efd315d-6309-4d7e-826b-d3824b0b5acb + dataset_region_city_name: Sydney + dataset_creation_time: '2024-11-05T23:19:50Z' + dataset_id_hash: e3d80dec2012c2e65627b68ee9a07293 + data: + - data_id: fil.030e1b16b3aa4a2d6a2a08dcfd053b95 + owning_project_id: 92bc8608-9393-44b4-bf16-fb0c5a12269a + owning_project_name: reference-data + data_uri: + icav2://reference-data/dragen-hash-tables/v10-r4/hg38-alt_masked-linear-cnv-hla-rna-methylated/hg38-alt_masked.cnv.hla.methylated_combined.rna-10-r4.0-1.tar.gz + creation_time: '2024-11-04T20:45:57Z' + modification_time: '2024-11-05T15:14:04Z' + creator_id: + creator_name: + data_type: FILE + file_size_in_bytes: 13086566012 + object_e_tag: 8d5d1eb8d7159ecfa6bc88b0eb1bd30c-1561 + + # chm13 v9 r3 graph + - dataset_tenant_name: umccr-prod + dataset_name: dragen_hash_table_chm13_v2_v9_r3_graph_cnv_hla_rna + dataset_description: | + This tarball was generated by downloading / uploading the illumina tar ball under a nested folder + The original tarball can be found here - + https://webdata.illumina.com/downloads/software/dragen/resource-files/misc/chm13_v2-cnv.graph.hla.rna-9-r3.0-1.tar.gz + From https://support.illumina.com/sequencing/sequencing_software/dragen-bio-it-platform/product_files.html + cwl_username: Alexis Lucattini + dataset_region_id: 1efd315d-6309-4d7e-826b-d3824b0b5acb + dataset_region_city_name: Sydney + dataset_creation_time: '2024-11-04T02:07:30Z' + dataset_id_hash: 2157dcfee23036650bbe873e8a149fbc + data: + - data_id: fil.91180398f19d4485754a08dcf54e56f6 + owning_project_id: 92bc8608-9393-44b4-bf16-fb0c5a12269a + owning_project_name: reference-data + data_uri: + icav2://reference-data/dragen-hash-tables/v9-r3/chm13-v2-graph-cnv-hla-rna/chm13_v2-cnv.graph.hla.rna-9-r3.0-1.tar.gz + creation_time: '2024-10-28T01:29:32Z' + modification_time: '2024-10-28T08:10:30Z' + creator_id: + creator_name: + data_type: FILE + file_size_in_bytes: 9334745252 + object_e_tag: 48354546011d29254a42d3b4fe4ff60b-1113 + + # hg38 - r9 v3 linear + - dataset_tenant_name: umccr-prod + dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_description: | A tarred version of the v9 r3 hash table for dragen 4.2.4 This reference is a custom reference by UMCCR. It was built using the following parameters with the dragen version 4.2.4 @@ -1205,7 +1342,6 @@ datasets: object_e_tag: cd29935e722171bee7b568b08852cabd-286 # List of bunches / bunch versions that are precursors to bundles - bunches: # BCLConvert Interop QC - bunch_name: bclconvert_interop_qc_prod__1_3_1__1_21 @@ -1230,7 +1366,7 @@ bunches: datasets: [] # DRAGEN WGTS Alignment QC - # Validation Only + # Validation Only 4.2.4 - bunch_name: dragen_alignment_pipeline_with_validation_data__4_2_4 bunch_description: | Contains the UMCCR dragen-alignment-pipeline, validation data and the appropriate reference data for dragen version 4.2.4 @@ -1259,7 +1395,7 @@ bunches: Version for v9-r3 with alt-masked, cnv, hla, rna enabled features version_creation_date: 2024-06-23T23:21:07+00:00Z datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_v39_gencode_annotation @@ -1310,7 +1446,7 @@ bunches: - dataset_name: wts_validation_fastq__SBJ00238 dataset_creation_time: '2024-06-21T09:50:29Z' dataset_id_hash: ba04ff08acbfbd1c17f1aadc0e8b3cf5 - # Production + # Production 4.2.4 - bunch_name: dragen_alignment_pipeline_prod__4_2_4 bunch_description: | Contains the UMCCR dragen-alignment-pipeline, and the appropriate reference data for dragen version 4.2.4 @@ -1338,15 +1474,142 @@ bunches: Version for v9-r3 with alt-masked, cnv, hla, rna enabled features version_creation_date: 2024-06-23T23:22:01+00:00Z datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_v39_gencode_annotation dataset_creation_time: '2024-07-08T01:23:37Z' dataset_id_hash: 7e88d10c15bfc88ee2e9527962c63984 + # Validation 4.3.6 + - bunch_name: dragen_alignment_pipeline_with_validation_data__4_3_6 + bunch_description: | + Contains the UMCCR dragen-alignment-pipeline, validation data and the appropriate reference data for dragen version 4.3.6 + tenant_name: umccr-prod + pipeline_path: workflows/dragen-alignment-pipeline/4.3.6/dragen-alignment-pipeline__4.3.6.cwl + pipeline_project_name: pipelines + bunch_region_id: 1efd315d-6309-4d7e-826b-d3824b0b5acb + bunch_region_city_name: Sydney + projects: + - development + - staging + categories: + - wgs + - wts + - alignment + - qc + - validation + - primary-analysis + bunch_versions: + - version: v10_r4 + version_description: | + This version of the bunch contains the dragen hash table for hg38 + chm13 references + Version for v10-r4 graph and linear alt-masked, with cnv, hla, rna, and methylated enabled features. + Also contains all the fastq validation data + version_creation_date: 2024-11-05T23:57:08+00:00Z + datasets: + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T02:05:03Z' + dataset_id_hash: 9511ca5eb8a31b30dd4ee8d80d83cb93 + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:16:55Z' + dataset_id_hash: f62a6eb3655012f0f01bb3372d0e7029 + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T01:55:11Z' + dataset_id_hash: d6ac5cf3214df27d63a634cbe725ebea + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:19:50Z' + dataset_id_hash: e3d80dec2012c2e65627b68ee9a07293 + - dataset_name: hg38_v39_gencode_annotation + dataset_creation_time: '2024-07-08T01:23:37Z' + dataset_id_hash: 7e88d10c15bfc88ee2e9527962c63984 + - dataset_name: wgs_validation_fastq__cups_pair_8 + dataset_creation_time: '2024-06-21T09:35:36Z' + dataset_id_hash: fca2fb140731ea7900d2c5c96061a280 + - dataset_name: wgs_validation_fastq__2016_249_17_MH_P033 + dataset_creation_time: '2024-06-21T09:37:03Z' + dataset_id_hash: ab2e2265f84e64ff9687b6e8a3f5c5d4 + - dataset_name: wgs_validation_fastq__2016_249_18_WH_P025 + dataset_creation_time: '2024-06-21T09:37:36Z' + dataset_id_hash: 48b0a8616df58b94ec6f358037850f1e + - dataset_name: wgs_validation_fastq__B_ALL_Case_10 + dataset_creation_time: '2024-06-21T09:38:04Z' + dataset_id_hash: 3938ab2e04aae0156897f3fee3676ac2 + - dataset_name: wgs_validation_fastq_Diploid_Never_Responder + dataset_creation_time: '2024-06-21T09:38:33Z' + dataset_id_hash: 45c8ce2f0995873364f760ca797c5c6a + - dataset_name: wgs_validation_fastq_SBJ00303 + dataset_creation_time: '2024-06-21T09:39:42Z' + dataset_id_hash: ebfe847356803cb27a8009deedf30944 + - dataset_name: wgs_validation_fastq_SEQC50 + dataset_creation_time: '2024-06-21T09:40:14Z' + dataset_id_hash: 2e5fbe564ac04069f3fc2cd254465dd8 + - dataset_name: wgs_validation_fastq_SFRC01073 + dataset_creation_time: '2024-06-21T09:40:51Z' + dataset_id_hash: deac29472b118a9b51889a661e804cd8 + - dataset_name: wts_validation_fastq__SBJ00480 + dataset_creation_time: '2024-06-21T09:41:15Z' + dataset_id_hash: c8b8bdc7e384aaa470b12992ce196693 + - dataset_name: wts_validation_fastq__SBJ00028 + dataset_creation_time: '2024-06-21T09:42:31Z' + dataset_id_hash: fa76e6ca3a0fb0be2bf41840a7aeeb01 + - dataset_name: wts_validation_fastq__SBJ00061 + dataset_creation_time: '2024-06-21T09:42:54Z' + dataset_id_hash: 63ee438354c347b05218b9bddc53d936 + - dataset_name: wts_validation_fastq__SBJ00188 + dataset_creation_time: '2024-06-21T09:43:11Z' + dataset_id_hash: 401d12d90e00db19918e5c9ce13a3da0 + - dataset_name: wts_validation_fastq__SBJ00199 + dataset_creation_time: '2024-06-21T09:49:27Z' + dataset_id_hash: b95ac04c7e3c03a8b1868a4571d23d5f + - dataset_name: wts_validation_fastq__SBJ00236 + dataset_creation_time: '2024-06-21T09:49:54Z' + dataset_id_hash: f084081c52ddda14328294d7bb7f13f4 + - dataset_name: wts_validation_fastq__SBJ00238 + dataset_creation_time: '2024-06-21T09:50:29Z' + dataset_id_hash: ba04ff08acbfbd1c17f1aadc0e8b3cf5 + # Production 4.3.6 + - bunch_name: dragen_alignment_pipeline_prod__4_3_6 + bunch_description: | + Contains the UMCCR dragen-alignment-pipeline, and the appropriate reference data for dragen version 4.3.6 + tenant_name: umccr-prod + pipeline_path: workflows/dragen-alignment-pipeline/4.3.6/dragen-alignment-pipeline__4.3.6.cwl + pipeline_project_name: pipelines + bunch_region_id: 1efd315d-6309-4d7e-826b-d3824b0b5acb + bunch_region_city_name: Sydney + projects: + - production + categories: + - wgs + - wts + - alignment + - qc + - production + - primary-analysis + bunch_versions: + - version: v10_r4 + version_description: | + This version of the bunch contains the dragen hash table for hg38 + chm13 references + Version for v10-r4 graph and linear alt-masked, with cnv, hla, rna, and methylated enabled features. + version_creation_date: 2024-11-05T23:58:33+00:00Z + datasets: + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T02:05:03Z' + dataset_id_hash: 9511ca5eb8a31b30dd4ee8d80d83cb93 + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:16:55Z' + dataset_id_hash: f62a6eb3655012f0f01bb3372d0e7029 + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T01:55:11Z' + dataset_id_hash: d6ac5cf3214df27d63a634cbe725ebea + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:19:50Z' + dataset_id_hash: e3d80dec2012c2e65627b68ee9a07293 + - dataset_name: hg38_v39_gencode_annotation + dataset_creation_time: '2024-07-08T01:23:37Z' + dataset_id_hash: 7e88d10c15bfc88ee2e9527962c63984 # DRAGEN WTS Pipeline - # Validation + # Validation 4.2.4 - bunch_name: dragen_transcriptome_pipeline_with_validation_data__4_2_4 bunch_description: | Contains the UMCCR dragen-transcriptome-pipeline, validation data and the appropriate reference data for dragen version 4.2.4 @@ -1374,7 +1637,7 @@ bunches: Version for v9-r3 with alt-masked, cnv, hla, rna enabled features version_creation_date: 2024-06-23T23:24:23+00:00Z datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_fasta @@ -1422,7 +1685,7 @@ bunches: - dataset_name: wts_validation_fastq__SBJ00238 dataset_creation_time: '2024-06-21T09:50:29Z' dataset_id_hash: ba04ff08acbfbd1c17f1aadc0e8b3cf5 - # Production + # Production 4.2.4 - bunch_name: dragen_transcriptome_pipeline_prod__4_2_4 bunch_description: | Contains the UMCCR dragen-transcriptome-pipeline, and the appropriate reference data for dragen version 4.2.4 @@ -1449,7 +1712,7 @@ bunches: Version for v9-r3 with alt-masked, cnv, hla, rna enabled features version_creation_date: 2024-06-23T23:24:58+00:00Z datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_v39_gencode_annotation @@ -1476,9 +1739,220 @@ bunches: - dataset_name: wts_multiqc__2023_07_21__4_2_4__Ref_5_Bad__SBJ01673 dataset_creation_time: '2024-06-21T11:35:25Z' dataset_id_hash: 42a9396b3f5d7e919e30a0f59f2401b7 + # Validation 4.3.6 + - bunch_name: dragen_transcriptome_pipeline_with_validation_data__4_3_6 + bunch_description: | + Contains the UMCCR dragen-transcriptome-pipeline, validation data and the appropriate reference data for dragen version 4.3.6 + tenant_name: umccr-prod + pipeline_path: + workflows/dragen-transcriptome-pipeline/4.3.6/dragen-transcriptome-pipeline__4.3.6.cwl + pipeline_project_name: pipelines + bunch_region_id: 1efd315d-6309-4d7e-826b-d3824b0b5acb + bunch_region_city_name: Sydney + projects: + - development + - staging + categories: + - wts + - somatic + - validation + - secondary-analysis + bunch_versions: + - version: v10_r4 + version_description: | + This version of the bunch contains the dragen hash table for hg38 + chm13 reference genomes. + Version for v10-r4 graph and linear alt-masked, with cnv, hla, rna, and methylated enabled features. + Also contains all the fastq validation data + version_creation_date: 2024-11-05T23:58:58+00:00Z + datasets: + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T02:05:03Z' + dataset_id_hash: 9511ca5eb8a31b30dd4ee8d80d83cb93 + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:16:55Z' + dataset_id_hash: f62a6eb3655012f0f01bb3372d0e7029 + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T01:55:11Z' + dataset_id_hash: d6ac5cf3214df27d63a634cbe725ebea + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:19:50Z' + dataset_id_hash: e3d80dec2012c2e65627b68ee9a07293 + - dataset_name: hg38_fasta + dataset_creation_time: '2024-06-21T11:31:50Z' + dataset_id_hash: 5f129b14aca5b4a7fda902590b15abc2 + - dataset_name: hg38_v39_gencode_annotation + dataset_creation_time: '2024-07-08T01:23:37Z' + dataset_id_hash: 7e88d10c15bfc88ee2e9527962c63984 + - dataset_name: arriba_2_4_0 + dataset_creation_time: '2024-06-21T11:31:26Z' + dataset_id_hash: 8695cf0a98c5cda5726f67d4922da67f + - dataset_name: wts_validation_fastq__SBJ00480 + dataset_creation_time: '2024-06-21T09:41:15Z' + dataset_id_hash: c8b8bdc7e384aaa470b12992ce196693 + - dataset_name: wts_validation_fastq__SBJ00028 + dataset_creation_time: '2024-06-21T09:42:31Z' + dataset_id_hash: fa76e6ca3a0fb0be2bf41840a7aeeb01 + - dataset_name: wts_validation_fastq__SBJ00061 + dataset_creation_time: '2024-06-21T09:42:54Z' + dataset_id_hash: 63ee438354c347b05218b9bddc53d936 + - dataset_name: wts_validation_fastq__SBJ00188 + dataset_creation_time: '2024-06-21T09:43:11Z' + dataset_id_hash: 401d12d90e00db19918e5c9ce13a3da0 + - dataset_name: wts_validation_fastq__SBJ00199 + dataset_creation_time: '2024-06-21T09:49:27Z' + dataset_id_hash: b95ac04c7e3c03a8b1868a4571d23d5f + - dataset_name: wts_validation_fastq__SBJ00236 + dataset_creation_time: '2024-06-21T09:49:54Z' + dataset_id_hash: f084081c52ddda14328294d7bb7f13f4 + - dataset_name: wts_validation_fastq__SBJ00238 + dataset_creation_time: '2024-06-21T09:50:29Z' + dataset_id_hash: ba04ff08acbfbd1c17f1aadc0e8b3cf5 + # Production 4.3.6 + - bunch_name: dragen_transcriptome_pipeline_prod__4_3_6 + bunch_description: | + Contains the UMCCR dragen-transcriptome-pipeline, and appropriate reference data for dragen version 4.3.6 + tenant_name: umccr-prod + pipeline_path: + workflows/dragen-transcriptome-pipeline/4.3.6/dragen-transcriptome-pipeline__4.3.6.cwl + pipeline_project_name: pipelines + bunch_region_id: 1efd315d-6309-4d7e-826b-d3824b0b5acb + bunch_region_city_name: Sydney + projects: + - production + categories: + - wts + - somatic + - production + - secondary-analysis + bunch_versions: + - version: v10_r4 + version_description: | + This version of the bunch contains the dragen hash table for hg38 + chm13 reference genomes. + Version for v10-r4 graph and linear alt-masked, with cnv, hla, rna, and methylated enabled features. + version_creation_date: 2024-11-05T23:59:15+00:00Z + datasets: + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T02:05:03Z' + dataset_id_hash: 9511ca5eb8a31b30dd4ee8d80d83cb93 + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:16:55Z' + dataset_id_hash: f62a6eb3655012f0f01bb3372d0e7029 + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T01:55:11Z' + dataset_id_hash: d6ac5cf3214df27d63a634cbe725ebea + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:19:50Z' + dataset_id_hash: e3d80dec2012c2e65627b68ee9a07293 + - dataset_name: hg38_fasta + dataset_creation_time: '2024-06-21T11:31:50Z' + dataset_id_hash: 5f129b14aca5b4a7fda902590b15abc2 + - dataset_name: hg38_v39_gencode_annotation + dataset_creation_time: '2024-07-08T01:23:37Z' + dataset_id_hash: 7e88d10c15bfc88ee2e9527962c63984 + - dataset_name: arriba_2_4_0 + dataset_creation_time: '2024-06-21T11:31:26Z' + dataset_id_hash: 8695cf0a98c5cda5726f67d4922da67f + + # Dragen germline + # Validation 4.3.6 + - bunch_name: dragen_germline_pipeline_with_validation_data__4_3_6 + bunch_description: | + Contains the UMCCR dragen-germline-pipeline, validation data and the appropriate reference data for dragen version 4.3.6 + tenant_name: umccr-prod + pipeline_path: workflows/dragen-germline-pipeline/4.3.6/dragen-germline-pipeline__4.3.6.cwl + pipeline_project_name: pipelines + bunch_region_id: 1efd315d-6309-4d7e-826b-d3824b0b5acb + bunch_region_city_name: Sydney + projects: + - development + - staging + categories: + - wgs + - germline + - validation + - secondary-analysis + bunch_versions: + - version: v10_r4 + version_description: | + This version of the bunch contains the dragen hash table for hg38 + chm13 references + Version for v10-r4 graph and linear alt-masked, with cnv, hla, rna, and methylated enabled features. + Also contains all the fastq validation data + version_creation_date: 2024-11-05T23:59:35+00:00Z + datasets: + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T02:05:03Z' + dataset_id_hash: 9511ca5eb8a31b30dd4ee8d80d83cb93 + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:16:55Z' + dataset_id_hash: f62a6eb3655012f0f01bb3372d0e7029 + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T01:55:11Z' + dataset_id_hash: d6ac5cf3214df27d63a634cbe725ebea + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:19:50Z' + dataset_id_hash: e3d80dec2012c2e65627b68ee9a07293 + - dataset_name: wgs_validation_fastq__cups_pair_8 + dataset_creation_time: '2024-06-21T09:35:36Z' + dataset_id_hash: fca2fb140731ea7900d2c5c96061a280 + - dataset_name: wgs_validation_fastq__2016_249_17_MH_P033 + dataset_creation_time: '2024-06-21T09:37:03Z' + dataset_id_hash: ab2e2265f84e64ff9687b6e8a3f5c5d4 + - dataset_name: wgs_validation_fastq__2016_249_18_WH_P025 + dataset_creation_time: '2024-06-21T09:37:36Z' + dataset_id_hash: 48b0a8616df58b94ec6f358037850f1e + - dataset_name: wgs_validation_fastq__B_ALL_Case_10 + dataset_creation_time: '2024-06-21T09:38:04Z' + dataset_id_hash: 3938ab2e04aae0156897f3fee3676ac2 + - dataset_name: wgs_validation_fastq_Diploid_Never_Responder + dataset_creation_time: '2024-06-21T09:38:33Z' + dataset_id_hash: 45c8ce2f0995873364f760ca797c5c6a + - dataset_name: wgs_validation_fastq_SBJ00303 + dataset_creation_time: '2024-06-21T09:39:42Z' + dataset_id_hash: ebfe847356803cb27a8009deedf30944 + - dataset_name: wgs_validation_fastq_SEQC50 + dataset_creation_time: '2024-06-21T09:40:14Z' + dataset_id_hash: 2e5fbe564ac04069f3fc2cd254465dd8 + - dataset_name: wgs_validation_fastq_SFRC01073 + dataset_creation_time: '2024-06-21T09:40:51Z' + dataset_id_hash: deac29472b118a9b51889a661e804cd8 + # Production 4.3.6 + - bunch_name: dragen_germline_pipeline_prod__4_3_6 + bunch_description: | + Contains the UMCCR dragen-germline-pipeline, and the appropriate reference data for dragen version 4.3.6 + tenant_name: umccr-prod + pipeline_path: workflows/dragen-germline-pipeline/4.3.6/dragen-germline-pipeline__4.3.6.cwl + pipeline_project_name: pipelines + bunch_region_id: 1efd315d-6309-4d7e-826b-d3824b0b5acb + bunch_region_city_name: Sydney + projects: + - production + categories: + - wgs + - germline + - production + - secondary-analysis + bunch_versions: + - version: v10_r4 + version_description: | + This version of the bunch contains the dragen hash table for hg38 + chm13 references + Version for v10-r4 graph and linear alt-masked, with cnv, hla, rna, and methylated enabled features. + version_creation_date: 2024-11-05T23:59:51+00:00Z + datasets: + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T02:05:03Z' + dataset_id_hash: 9511ca5eb8a31b30dd4ee8d80d83cb93 + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:16:55Z' + dataset_id_hash: f62a6eb3655012f0f01bb3372d0e7029 + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T01:55:11Z' + dataset_id_hash: d6ac5cf3214df27d63a634cbe725ebea + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:19:50Z' + dataset_id_hash: e3d80dec2012c2e65627b68ee9a07293 # Dragen Somatic with Germline Pipeline - # Validation + # Validation 4.2.4 - bunch_name: dragen_somatic_with_germline_pipeline_with_validation_data__4_2_4 bunch_description: | Contains the UMCCR dragen-somatic-with-germline pipeline, validation data and the appropriate reference data for dragen version 4.2.4 @@ -1505,7 +1979,7 @@ bunches: Version for v9-r3 with alt-masked, cnv, hla, rna enabled features version_creation_date: 2024-06-23T23:26:34+00:00Z datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: wgs_validation_fastq__cups_pair_8 @@ -1532,7 +2006,7 @@ bunches: - dataset_name: wgs_validation_fastq_SFRC01073 dataset_creation_time: '2024-06-21T09:40:51Z' dataset_id_hash: deac29472b118a9b51889a661e804cd8 - # Production + # Production 4.2.4 - bunch_name: dragen_somatic_with_germline_pipeline_prod__4_2_4 bunch_description: | Contains the UMCCR dragen-somatic-with-germline pipeline, and the appropriate reference data for dragen version 4.2.4 @@ -1559,9 +2033,109 @@ bunches: Version for v9-r3 with alt-masked, cnv, hla, rna enabled features version_creation_date: 2024-06-23T23:25:57+00:00Z datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 + + # Validation 4.3.6 + - bunch_name: dragen_somatic_with_germline_pipeline_with_validation_data__4_3_6 + bunch_description: | + Contains the UMCCR dragen-somatic_with_germline-pipeline, validation data and the appropriate reference data for dragen version 4.3.6 + tenant_name: umccr-prod + pipeline_path: + workflows/dragen-somatic-with-germline-pipeline/4.3.6/dragen-somatic-with-germline-pipeline__4.3.6.cwl + pipeline_project_name: pipelines + bunch_region_id: 1efd315d-6309-4d7e-826b-d3824b0b5acb + bunch_region_city_name: Sydney + projects: + - development + - staging + categories: + - wgs + - somatic + - validation + - secondary-analysis + bunch_versions: + - version: v10_r4 + version_description: | + This version of the bunch contains the dragen hash table for hg38 + chm13 references + Version for v10-r4 graph and linear alt-masked, with cnv, hla, rna, and methylated enabled features. + Also contains all the fastq validation data + version_creation_date: 2024-11-06T00:00:09+00:00Z + datasets: + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T02:05:03Z' + dataset_id_hash: 9511ca5eb8a31b30dd4ee8d80d83cb93 + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:16:55Z' + dataset_id_hash: f62a6eb3655012f0f01bb3372d0e7029 + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T01:55:11Z' + dataset_id_hash: d6ac5cf3214df27d63a634cbe725ebea + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:19:50Z' + dataset_id_hash: e3d80dec2012c2e65627b68ee9a07293 + - dataset_name: wgs_validation_fastq__cups_pair_8 + dataset_creation_time: '2024-06-21T09:35:36Z' + dataset_id_hash: fca2fb140731ea7900d2c5c96061a280 + - dataset_name: wgs_validation_fastq__2016_249_17_MH_P033 + dataset_creation_time: '2024-06-21T09:37:03Z' + dataset_id_hash: ab2e2265f84e64ff9687b6e8a3f5c5d4 + - dataset_name: wgs_validation_fastq__2016_249_18_WH_P025 + dataset_creation_time: '2024-06-21T09:37:36Z' + dataset_id_hash: 48b0a8616df58b94ec6f358037850f1e + - dataset_name: wgs_validation_fastq__B_ALL_Case_10 + dataset_creation_time: '2024-06-21T09:38:04Z' + dataset_id_hash: 3938ab2e04aae0156897f3fee3676ac2 + - dataset_name: wgs_validation_fastq_Diploid_Never_Responder + dataset_creation_time: '2024-06-21T09:38:33Z' + dataset_id_hash: 45c8ce2f0995873364f760ca797c5c6a + - dataset_name: wgs_validation_fastq_SBJ00303 + dataset_creation_time: '2024-06-21T09:39:42Z' + dataset_id_hash: ebfe847356803cb27a8009deedf30944 + - dataset_name: wgs_validation_fastq_SEQC50 + dataset_creation_time: '2024-06-21T09:40:14Z' + dataset_id_hash: 2e5fbe564ac04069f3fc2cd254465dd8 + - dataset_name: wgs_validation_fastq_SFRC01073 + dataset_creation_time: '2024-06-21T09:40:51Z' + dataset_id_hash: deac29472b118a9b51889a661e804cd8 + # Production + - bunch_name: dragen_somatic_with_germline_pipeline_prod__4_3_6 + bunch_description: | + Contains the UMCCR dragen-somatic_with_germline-pipeline, and the appropriate reference data for dragen version 4.3.6 + tenant_name: umccr-prod + pipeline_path: + workflows/dragen-somatic-with-germline-pipeline/4.3.6/dragen-somatic-with-germline-pipeline__4.3.6.cwl + pipeline_project_name: pipelines + bunch_region_id: 1efd315d-6309-4d7e-826b-d3824b0b5acb + bunch_region_city_name: Sydney + projects: + - production + categories: + - wgs + - somatic + - production + - secondary-analysis + bunch_versions: + - version: v10_r4 + version_description: | + This version of the bunch contains the dragen hash table for hg38 + chm13 references + Version for v10-r4 graph and linear alt-masked, with cnv, hla, rna, and methylated enabled features. + Also contains all the fastq validation data + version_creation_date: 2024-11-06T00:00:20+00:00Z + datasets: + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T02:05:03Z' + dataset_id_hash: 9511ca5eb8a31b30dd4ee8d80d83cb93 + - dataset_name: dragen_hash_table_chm13_v2_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:16:55Z' + dataset_id_hash: f62a6eb3655012f0f01bb3372d0e7029 + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_graph_cnv_hla_rna + dataset_creation_time: '2024-11-04T01:55:11Z' + dataset_id_hash: d6ac5cf3214df27d63a634cbe725ebea + - dataset_name: dragen_hash_table_hg38_alt_masked_v10_r4_linear_cnv_hla_rna_methylated_combined + dataset_creation_time: '2024-11-05T23:19:50Z' + dataset_id_hash: e3d80dec2012c2e65627b68ee9a07293 # RNASum - bunch_name: rnasum_prod__1_0_0 @@ -1668,7 +2242,6 @@ bunches: dataset_creation_time: '2024-10-30T04:01:43Z' dataset_id_hash: 82a4434e4f8e5363842ded1f0a3bcd42 - # List of generated bundles bundles: - bundle_name: bclconvert_interop_qc_prod__1_3_1__1_21__20240627051309 @@ -1729,7 +2302,7 @@ bundles: bunch_name: dragen_alignment_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: wgs_validation_fastq__cups_pair_8 @@ -1862,7 +2435,7 @@ bundles: bunch_name: dragen_alignment_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 bundle_id: fdc59e54-d067-440a-97c8-0d28ff19cef3 @@ -1904,7 +2477,7 @@ bundles: bunch_name: dragen_transcriptome_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_fasta @@ -2011,7 +2584,7 @@ bundles: bunch_name: dragen_transcriptome_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_fasta @@ -2083,7 +2656,7 @@ bundles: bunch_name: dragen_somatic_with_germline_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: wgs_validation_fastq__cups_pair_8 @@ -2179,7 +2752,7 @@ bundles: bunch_name: dragen_somatic_with_germline_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 bundle_id: 95bc27fa-eb8c-46f7-acad-092e7e010d81 @@ -2299,7 +2872,7 @@ bundles: bunch_name: dragen_alignment_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_v39_gencode_annotation @@ -2436,7 +3009,7 @@ bundles: bunch_name: dragen_alignment_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 bundle_id: 218726be-7f1a-4847-96ae-48a1f2caf882 @@ -2478,7 +3051,7 @@ bundles: bunch_name: dragen_transcriptome_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_fasta @@ -2589,7 +3162,7 @@ bundles: bunch_name: dragen_transcriptome_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_fasta @@ -2659,7 +3232,7 @@ bundles: bunch_name: dragen_alignment_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_v39_gencode_annotation @@ -2796,7 +3369,7 @@ bundles: bunch_name: dragen_alignment_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_v39_gencode_annotation @@ -2843,7 +3416,7 @@ bundles: bunch_name: dragen_somatic_with_germline_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: wgs_validation_fastq__cups_pair_8 @@ -2939,7 +3512,7 @@ bundles: bunch_name: dragen_somatic_with_germline_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 bundle_id: 6ad036ef-f859-483f-b098-07cb7df7df83 @@ -2979,7 +3552,7 @@ bundles: bunch_name: dragen_transcriptome_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_fasta @@ -3090,7 +3663,7 @@ bundles: bunch_name: dragen_transcriptome_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_fasta @@ -3164,7 +3737,7 @@ bundles: bunch_name: dragen_alignment_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_v39_gencode_annotation @@ -3301,7 +3874,7 @@ bundles: bunch_name: dragen_alignment_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_v39_gencode_annotation @@ -3347,7 +3920,7 @@ bundles: bunch_name: dragen_transcriptome_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_fasta @@ -3458,7 +4031,7 @@ bundles: bunch_name: dragen_transcriptome_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_fasta @@ -3534,7 +4107,7 @@ bundles: bunch_name: dragen_somatic_with_germline_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: wgs_validation_fastq__cups_pair_8 @@ -3630,7 +4203,7 @@ bundles: bunch_name: dragen_somatic_with_germline_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 bundle_id: 1a9287d8-9806-4474-a12a-5c08336cbd73 @@ -3751,7 +4324,7 @@ bundles: bunch_name: dragen_transcriptome_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_fasta @@ -3862,7 +4435,7 @@ bundles: bunch_name: dragen_transcriptome_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_fasta @@ -3936,7 +4509,7 @@ bundles: bunch_name: dragen_alignment_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_v39_gencode_annotation @@ -4073,7 +4646,7 @@ bundles: bunch_name: dragen_alignment_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: hg38_v39_gencode_annotation @@ -4120,7 +4693,7 @@ bundles: bunch_name: dragen_somatic_with_germline_pipeline_with_validation_data__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 - dataset_name: wgs_validation_fastq__cups_pair_8 @@ -4216,7 +4789,7 @@ bundles: bunch_name: dragen_somatic_with_germline_pipeline_prod__4_2_4 bunch_version: v9_r3 bunch_datasets: - - dataset_name: dragen_hash_table_v9_r3_alt_masked_cnv_hla_rna + - dataset_name: dragen_hash_table_hg38_alt_masked_v9_r3_linear_cnv_hla_rna dataset_creation_time: '2024-06-21T09:21:42Z' dataset_id_hash: a5fe1d2e2fe35737bdbc0871d46878e1 bundle_id: 71297be1-5180-4f64-ac0e-2846a0a51e56 diff --git a/config/workflow.yaml b/config/workflow.yaml index d3be067e..516fa913 100644 --- a/config/workflow.yaml +++ b/config/workflow.yaml @@ -29,6 +29,9 @@ workflows: - name: 4.2.4 path: 4.2.4/dragen-germline-pipeline__4.2.4.cwl md5sum: 6fc0cce26d0adffb48084d4e182d349c + - name: 4.3.6 + path: 4.3.6/dragen-germline-pipeline__4.3.6.cwl + md5sum: 65ab25120dd83bc3f77ae9fa7e028433 categories: - dragen - name: dragen-qc-hla-pipeline @@ -92,6 +95,9 @@ workflows: - name: 4.2.4 path: 4.2.4/dragen-transcriptome-pipeline__4.2.4.cwl md5sum: 297f7efb1d149c1a162566e2727e618b + - name: 4.3.6 + path: 4.3.6/dragen-transcriptome-pipeline__4.3.6.cwl + md5sum: c9e823dbdf406f61e221a4eee400a8f4 categories: [] - name: dragen-alignment-pipeline path: dragen-alignment-pipeline @@ -108,6 +114,9 @@ workflows: - name: 4.2.4 path: 4.2.4/dragen-alignment-pipeline__4.2.4.cwl md5sum: 54e2dd90864bb85f334af92d4e255da1 + - name: 4.3.6 + path: 4.3.6/dragen-alignment-pipeline__4.3.6.cwl + md5sum: cc4d8f38e17006edcd6acb18cf7dea5a categories: - alignment - dragen @@ -265,6 +274,9 @@ workflows: - name: 4.2.4 path: 4.2.4/dragen-somatic-with-germline-pipeline__4.2.4.cwl md5sum: 1d482653dfffb10f803bfe766f19d8ee + - name: 4.3.6 + path: 4.3.6/dragen-somatic-with-germline-pipeline__4.3.6.cwl + md5sum: 9585b66b4fb95e312c4a76e47d4697b1 categories: [] - name: illumina-interop-qc path: illumina-interop-qc diff --git a/tools/dragen-alignment/4.3.6/dragen-alignment__4.3.6.cwl b/tools/dragen-alignment/4.3.6/dragen-alignment__4.3.6.cwl new file mode 100644 index 00000000..d9483e90 --- /dev/null +++ b/tools/dragen-alignment/4.3.6/dragen-alignment__4.3.6.cwl @@ -0,0 +1,704 @@ +cwlVersion: v1.1 +class: CommandLineTool + +# Extensions +$namespaces: + s: https://schema.org/ + ilmn-tes: https://platform.illumina.com/rdf/ica/ +$schemas: + - https://schema.org/version/latest/schemaorg-current-http.rdf + +# Metadata +s:author: + class: s:Person + s:name: Alexis Lucattini + s:email: Alexis.Lucattini@umccr.org + s:identifier: https://orcid.org/0000-0001-9754-647X + +# ID/Docs +id: dragen-alignment--4.3.6 +label: dragen-alignment v(4.3.6) +doc: | + Documentation for dragen-alignment v4.3.6 + +# ILMN V1 Resources Guide: https://illumina.gitbook.io/ica-v1/analysis/a-taskexecution#type-and-size +# ILMN V2 Resources Guide: https://help.ica.illumina.com/project/p-flow/f-pipelines#compute-types +hints: + ResourceRequirement: + ilmn-tes:resources/tier: standard + ilmn-tes:resources/type: fpga + ilmn-tes:resources/size: medium + coresMin: 16 + ramMin: 240000 + DockerRequirement: + dockerPull: 079623148045.dkr.ecr.ap-southeast-2.amazonaws.com/cp-prod/c3add40b-1be2-431d-a322-29529f7d2866:latest + +requirements: + ResourceRequirement: + tmpdirMin: | + ${ + /* 1 Tb */ + return Math.pow(2, 20); + } + SchemaDefRequirement: + types: + - $import: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml + InlineJavascriptRequirement: + expressionLib: + - $include: ../../../typescript-expressions/dragen-tools/4.0.3/dragen-tools__4.0.3.cwljs + - $include: ../../../typescript-expressions/utils/1.0.0/utils__1.0.0.cwljs + InitialWorkDirRequirement: + listing: + - entryname: $(get_script_path()) + entry: | + #!/usr/bin/env bash + + # Fail on non-zero exit of subshell + set -euo pipefail + + # Reset dragen + /opt/edico/bin/dragen_reset + + # Create directories + mkdir --parents \\ + "$(get_ref_mount())" \\ + "$(get_intermediate_results_dir())" \\ + "$(inputs.output_directory)" + + # untar ref data into scratch space + tar \\ + --directory "$(get_ref_mount())" \\ + --extract \\ + --file "$(inputs.reference_tar.path)" + + # Confirm not both fastq_list and fastq_list_rows are defined + if [[ "$(is_not_null(inputs.fastq_list))" == "true" && "$(is_not_null(inputs.fastq_list_rows))" == "true" ]]; then + echo "Cannot set both CWL inputs fastq_list AND fastq_list_rows" 1>&2 + exit 1 + fi + + # Run dragen command and import options from cli + "$(get_dragen_bin_path())" "\${@}" + - | + ${ + return generate_germline_mount_points(inputs); + } + +# Base command and args +baseCommand: [ "bash" ] + +arguments: + # Script path + - valueFrom: "$(get_script_path())" + position: -1 + # Set fastq list + - prefix: "--fastq-list=" + separate: False + valueFrom: "$(get_fastq_list_csv_path())" + # Preset parameters + - prefix: "--intermediate-results-dir=" + separate: False + valueFrom: "$(get_intermediate_results_dir())" + - prefix: "--output-format=" + separate: False + valueFrom: "BAM" + +# Inputs +inputs: + # File inputs + # Option 1: + fastq_list: + label: fastq list + doc: | + CSV file that contains a list of FASTQ files + to process. + Read1File and Read2File must be presigned urls in order to use the fastq_list option. + Otherwise use the fastq_list_rows option + type: File? + # Option 2: + fastq_list_rows: + label: fastq list rows + doc: | + Alternative to providing a file, one can instead provide a list of 'fastq-list-row' objects + type: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml#fastq-list-row[]? + reference_tar: + label: reference tar + doc: | + Path to ref data tarball + type: File + inputBinding: + prefix: "--ref-dir=" + separate: False + valueFrom: "$(get_ref_path(self))" + # RNA options + enable_rna: + label: enable rna + doc: | + Enable rna specific settings + type: boolean? + inputBinding: + prefix: "--enable-rna=" + separate: False + valueFrom: "$(self.toString())" + enable_rrna_filter: + label: enable rrna filtering + doc: | + Use the DRAGEN RNA pipeline to filter rRNA reads during alignment. The default value is false. + type: boolean? + inputBinding: + prefix: "--rrna-filter-enable=" + separate: False + valueFrom: "$(self.toString())" + enable_rna_quantification: + label: enable rna quantification + doc: | + If set to true, enables RNA quantification. Requires --enable-rna to be set to true. + type: boolean? + inputBinding: + prefix: "--enable-rna-quantification=" + separate: False + valueFrom: "$(self.toString())" + annotation_file: + label: annotation file + doc: | + Use to supply a gene annotation file. Required for quantification and gene-fusion. + type: File? + inputBinding: + prefix: "--annotation-file=" + separate: False + # Output naming options + output_file_prefix: + label: output file prefix + doc: | + The prefix given to all output files + type: string + inputBinding: + prefix: "--output-file-prefix=" + separate: False + output_directory: + label: output directory + doc: | + The directory where all output files are placed + type: string + inputBinding: + prefix: "--output-directory=" + separate: False + + ### Start mapper options ### + ann_sj_max_indel: + label: ann sj max indel + doc: | + Maximum indel length to expect near an annotated splice junction. + Range: 0 - 63 + type: int? + inputBinding: + prefix: "--Mapper.ann-sj-max-indel=" + separate: False + edit_chain_limit: + label: edit chain limit + doc: | + For edit-mode 1 or 2: Maximum seed chain length in a read to qualify for seed editing. + Range: > 0 + type: int? + inputBinding: + prefix: "--Mapper.edit-chain-limit=" + separate: False + edit_mode: + label: edit mode + doc: | + 0 = No edits, 1 = Chain len test, 2 = Paired chain len test, 3 = Edit all std seeds. + type: + - "null" + - type: enum + symbols: + - "0" + - "1" + - "2" + - "3" + inputBinding: + prefix: "--Mapper.edit-mode=" + separate: False + edit_read_len: + label: edit read len + doc: | + For edit-mode 1 or 2: Read length in which to try edit-seed-num edited seeds. + Range: > 0 + type: int? + inputBinding: + prefix: "--Mapper.edit-read-len=" + separate: False + edit_seed_num: + label: edit seed num + doc: | + For edit-mode 1 or 2: Requested number of seeds per read to allow editing on. + Range: > 0 + type: int? + inputBinding: + prefix: "--Mapper.edit-seed-num=" + separate: False + enable_map_align: + label: enable map align + doc: | + Enable step of mapper/aligner. + type: boolean? + inputBinding: + prefix: "--enable-map-align=" + separate: False + valueFrom: "$(self.toString())" + enable_map_align_output: + label: enable map align + doc: | + Enables saving the output from the map/align stage. + If only running map/align, the default value is true. + If running the variant caller, the default value is false. + Therefore in the case of the dragen alignment pipeline, this will always be true. + For sanity purposes, we have it as an option since its default state is not intuitive + type: boolean? + inputBinding: + prefix: "--enable-map-align-output=" + separate: False + valueFrom: "$(self.toString())" + max_intron_bases: + label: max intron bases + doc: | + Maximum intron length reported. + type: int? + inputBinding: + prefix: "--Mapper.max-intron-bases=" + separate: False + min_intron_bases: + label: min intron bases + doc: | + Minimum reference deletion length reported as an intron. + type: int? + inputBinding: + prefix: "--Mapper.min-intron-bases=" + separate: False + seed_density: + label: seed density + doc: | + Requested density of seeds from reads queried in the hash table + Range: 0 - 1 + type: float? + inputBinding: + prefix: "--Mapper.seed-density=" + separate: False + ### End mapper options + ### Start Alignment options ### + aln_min_score: + label: aln min score + doc: | + (signed) Minimum alignment score to report; baseline for MAPQ. + + When using local alignments (global = 0), aln-min-score is computed by the host software as "22 * match-score". + + When using global alignments (global = 1), aln-min-score is set to -1000000. + + Host software computation may be overridden by setting aln-min-score in configuration file. + + Range: −2,147,483,648 to 2,147,483,647 + type: int? + inputBinding: + prefix: "--Aligner.aln-min-score=" + separate: False + dedup_min_qual: + label: dedup min qual + doc: | + Minimum base quality for calculating read quality metric for deduplication. + Range: 0-63 + type: int? + inputBinding: + prefix: "--Aligner.dedup-min-qual=" + separate: False + en_alt_hap_aln: + label: en alt hap aln + doc: | + Allows chimeric alignments to be output, as supplementary. + type: boolean? + inputBinding: + prefix: "--Aligner.en-alt-hap-aln=" + separate: False + valueFrom: "$(Number(self))" + en_chimeric_aln: + label: en chimeric aln + doc: | + Allows chimeric alignments to be output, as supplementary. + type: boolean? + inputBinding: + prefix: "--Aligner.en-chimeric-aln=" + separate: False + valueFrom: "$(Number(self))" + gap_ext_pen: + label: gap ext pen + doc: | + Score penalty for gap extension. + type: int? + inputBinding: + prefix: "--Aligner.gap-ext-pen=" + separate: False + gap_open_pen: + label: gap open pen + doc: | + Score penalty for opening a gap (insertion or deletion). + type: int? + inputBinding: + prefix: "--gap-open-pen=" + separate: False + global: + label: global + doc: | + If alignment is global (Needleman-Wunsch) rather than local (Smith-Waterman). + type: boolean? + inputBinding: + prefix: "--Aligner.global=" + separate: False + valueFrom: "$(Number(self))" + hard_clips: + label: hard clips + doc: | + Flags for hard clipping: [0] primary, [1] supplementary, [2] secondary. + The hard-clips option is used as a field of 3 bits, with values ranging from 0 to 7. + The bits specify alignments, as follows: + * Bit 0—primary alignments + * Bit 1—supplementary alignments + * Bit 2—secondary alignments + Each bit determines whether local alignments of that type are reported with hard clipping (1) + or soft clipping (0). + The default is 6, meaning primary alignments use soft clipping and supplementary and + secondary alignments use hard clipping. + type: int? + inputBinding: + prefix: "--Aligner.hard-clips=" + separate: False + valueFrom: | + ${ + return (self >> 0).toString(2); + } + map_orientations: + label: map orientations + doc: | + Constrain orientations to accept forward-only, reverse-complement only, or any alignments. + type: + - "null" + - type: enum + symbols: + - "0" # (any) + - "1" # (forward only) + - "2" # (reverse only) + inputBinding: + prefix: "--Aligner.map-orientations=" + separate: False + mapq_max: + label: mapq max + doc: | + Ceiling on reported MAPQ. Max 255 + type: int? + inputBinding: + prefix: "--Aligner.mapq-max=" + separate: False + mapq_strict_js: + label: mapq strict js + doc: | + Specific to RNA. When set to 0, a higher MAPQ value is returned, expressing confidence that the alignment is at least partially correct. When set to 1, a lower MAPQ value is returned, expressing the splice junction ambiguity. + type: boolean? + inputBinding: + prefix: "--mapq-strict-js=" + separate: False + valueFrom: "$(Number(self))" + match_n_score: + label: match n score + doc: | + (signed) Score increment for matching a reference 'N' nucleotide IUB code. + Range: -16 to 15 + type: int? + inputBinding: + prefix: "--Aligner.match-n-score=" + separate: False + match_score: + label: match score + doc: | + Score increment for matching reference nucleotide. + When global = 0, match-score > 0; When global = 1, match-score >= 0 + type: float? + inputBinding: + prefix: "--Aligner.match-score=" + separate: False + max_rescues: + label: max rescues + doc: | + Maximum rescue alignments per read pair. Default is 10 + type: int? + inputBinding: + prefix: "--max-rescues=" + separate: False + min_score_coeff: + label: min score coeff + doc: | + Adjustment to aln-min-score per read base. + Range: -64 to 63.999 + type: float? + inputBinding: + prefix: "--Aligner.min-score-coeff=" + separate: False + mismatch_pen: + label: mismatch pen + doc: | + Score penalty for a mismatch. + type: int? + inputBinding: + prefix: "--Aligner.mismatch-pen=" + separate: False + no_unclip_score: + label: no unclip score + doc: | + When no-unclip-score is set to 1, any unclipped bonus (unclip-score) contributing to an alignment is removed from the alignment score before further processing. + type: boolean? + inputBinding: + prefix: "--Aligner.no-unclip-score=" + separate: False + valueFrom: "$(Number(self))" + no_unpaired: + label: no unpaired + doc: | + If only properly paired alignments should be reported for paired reads. + type: boolean? + inputBinding: + prefix: "--Aligner.no-unpaired=" + separate: False + valueFrom: "$(Number(self))" + pe_max_penalty: + label: pe max penalty + doc: | + Maximum pairing score penalty, for unpaired or distant ends. + Range: 0-255 + type: int? + inputBinding: + prefix: "--Aligner.pe-max-penalty=" + separate: False + pe_orientation: + label: pe orientation + doc: | + Expected paired-end orientation: 0=FR, 1=RF, 2=FF. + type: + - "null" + - type: enum + symbols: + - "0" # FR + - "1" # RF + - "2" # FF + inputBinding: + prefix: "--Aligner.pe-orientation=" + separate: False + rescue_sigmas: + label: rescue sigmas + doc: | + Deviations from the mean read length used for rescue scan radius. Default is 2.5. + type: float? + inputBinding: + prefix: "--Aligner.rescue-sigmas=" + separate: False + sec_aligns: + label: sec aligns + doc: | + Maximum secondary (suboptimal) alignments to report per read. + Range: 0 - 30 + type: int? + inputBinding: + prefix: "--Aligner.sec-aligns=" + separate: False + sec_aligns_hard: + label: sec aligns hard + doc: | + Set to force unmapped when not all secondary alignments can be output. + type: boolean? + inputBinding: + prefix: "--Aligner.sec-aligns-hard=" + separate: False + valueFrom: "$(Number(self))" + sec_phred_delta: + label: sec phred delta + doc: | + Only secondary alignments with likelihood within this Phred of the primary are reported. + Range: 0 - 255 + type: int? + inputBinding: + prefix: "--Aligner.sec-phred-delta=" + separate: False + sec_score_delta: + label: sec score delta + doc: | + Secondary aligns allowed with pair score no more than this far below primary. + type: float? + inputBinding: + prefix: "--Aligner.sec-score-delta=" + separate: False + supp_aligns: + label: supp aligns + doc: | + Maximum supplementary (chimeric) alignments to report per read. + type: int? + inputBinding: + prefix: "--Aligner.supp-aligns=" + separate: False + supp_as_sec: + label: supp as sec + doc: | + If supplementary alignments should be reported with secondary flag. + type: boolean? + inputBinding: + prefix: "--Aligner.supp-as-sec=" + separate: False + valueFrom: "$(Number(self))" + supp_min_score_adj: + label: supp min score adj + doc: | + Amount to increase minimum alignment score for supplementary alignments. + This score is computed by host software as "8 * match-score" for DNA, and is default 0 for RNA. + type: float? + inputBinding: + prefix: "--Aligner.supp-min-score-adj=" + separate: False + unclip_score: + label: unclip score + doc: | + Score bonus for reaching each edge of the read. + Range: 0 - 127 + type: int? + inputBinding: + prefix: "--Aligner.unclip-score=" + separate: False + unpaired_pen: + label: unpaired pen + doc: | + Penalty for unpaired alignments in Phred scale. + Range: 0 - 255 + type: int? + inputBinding: + prefix: "--Aligner.unpaired-pen=" + separate: False + ### End Alignment options ### + ### Start General software options + # Alt aware mapping + alt_aware: + label: alt aware + doc: | + Enables special processing for alt contigs, if alt liftover was used in hash table. + Enabled by default if reference was built with liftover. + type: boolean? + inputBinding: + prefix: "--alt-aware=" + separate: False + valueFrom: "$(self.toString())" + # Duplicate marking + enable_duplicate_marking: + label: enable duplicate marking + doc: | + Enable the flagging of duplicate output alignment records. + type: boolean? + inputBinding: + prefix: "--enable-duplicate-marking=" + separate: False + valueFrom: "$(self.toString())" + remove_duplicates: + label: remove duplicates + doc: | + If true, remove duplicate alignment records instead of just flagging them. + type: boolean? + inputBinding: + prefix: "--remove-duplicates=" + separate: False + valueFrom: "$(self.toString())" + # Tag generation + generate_md_tags: + label: generate md tags + doc: | + Whether to generate MD tags with alignment output records. Default is false. + type: boolean? + inputBinding: + prefix: "--generate-md-tags=" + separate: False + valueFrom: "$(self.toString())" + generate_sa_tags: + label: generate sa tags + doc: | + Whether to generate SA:Z tags for records that have chimeric/supplemental alignments. + type: boolean? + inputBinding: + prefix: "--generate-sa-tags=" + separate: False + valueFrom: "$(self.toString())" + generate_zs_tags: + label: generate zs tags + doc: | + Whether to generate ZS tags for alignment output records. Default is false. + type: boolean? + inputBinding: + prefix: "--generate-zs-tags=" + separate: False + valueFrom: "$(self.toString())" + # Sorting logic + enable_sort: + label: enable sort + doc: | + Enable sorting after mapping/alignment. + type: boolean? + inputBinding: + prefix: "--enable-sort=" + separate: False + valueFrom: "$(self.toString())" + preserve_map_align_order: + label: preserve map align order + doc: | + Produce output file that preserves original order of reads in the input file. + type: boolean? + inputBinding: + prefix: "--preserve-map-align-order=" + separate: False + valueFrom: "$(self.toString())" + # Add in the lic-license-id-location + lic_instance_id_location: + label: license instance id location + doc: | + You may wish to place your own in. + Optional value, default set to /opt/instance-identity + which is a path inside the dragen container + type: + - File? + - string? + default: "/opt/instance-identity" + inputBinding: + prefix: "--lic-instance-id-location=" + separate: False + # Verbosity + verbose: + label: verbose + doc: | + Enable verbose output from DRAGEN. + type: boolean? + inputBinding: + prefix: "-v" + +# Outputs +outputs: + # Will also include mounted-files.txt + dragen_alignment_output_directory: + label: dragen alignment output directory + doc: | + The output directory containing all alignment output files and qc metrics + type: Directory + outputBinding: + glob: "$(inputs.output_directory)" + # Whilst these files reside inside the output directory, specifying them here as outputs + # provides easier access and reference + dragen_bam_out: + label: dragen bam out + doc: | + The output alignment file + type: File + outputBinding: + glob: "$(inputs.output_directory)/$(inputs.output_file_prefix).bam" + secondaryFiles: + - ".bai" + +successCodes: + - 0 diff --git a/tools/dragen-germline/4.3.6/dragen-germline__4.3.6.cwl b/tools/dragen-germline/4.3.6/dragen-germline__4.3.6.cwl new file mode 100644 index 00000000..4d930037 --- /dev/null +++ b/tools/dragen-germline/4.3.6/dragen-germline__4.3.6.cwl @@ -0,0 +1,878 @@ +cwlVersion: v1.1 +class: CommandLineTool + +# Extensions +$namespaces: + s: https://schema.org/ + ilmn-tes: https://platform.illumina.com/rdf/ica/ +$schemas: + - https://schema.org/version/latest/schemaorg-current-http.rdf + +# Metadata +s:author: + class: s:Person + s:name: Alexis Lucattini + s:email: Alexis.Lucattini@umccr.org + s:identifier: https://orcid.org/0000-0001-9754-647X + +# ID/Docs +id: dragen-germline--4.2.4 +label: dragen-germline v(4.2.4) +doc: | + Documentation for dragen-germline v4.2.4 + +# ILMN V1 Resources Guide: https://illumina.gitbook.io/ica-v1/analysis/a-taskexecution#type-and-size +# ILMN V2 Resources Guide: https://help.ica.illumina.com/project/p-flow/f-pipelines#compute-types +hints: + ResourceRequirement: + ilmn-tes:resources/tier: standard + ilmn-tes:resources/type: fpga + ilmn-tes:resources/size: medium + coresMin: 16 + ramMin: 240000 + DockerRequirement: + dockerPull: 699120554104.dkr.ecr.us-east-1.amazonaws.com/public/dragen:4.3.6 + +requirements: + ResourceRequirement: + tmpdirMin: | + ${ + /* 1 Tb */ + return Math.pow(2, 20); + } + SchemaDefRequirement: + types: + - $import: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml + InlineJavascriptRequirement: + expressionLib: + - $include: ../../../typescript-expressions/dragen-tools/4.0.3/dragen-tools__4.0.3.cwljs + - $include: ../../../typescript-expressions/utils/1.0.0/utils__1.0.0.cwljs + InitialWorkDirRequirement: + listing: + - entryname: $(get_script_path()) + entry: | + #!/usr/bin/env bash + + # Fail on non-zero exit of subshell + set -euo pipefail + + # Run partial reconfig + /opt/edico/bin/dragen \\ + --partial-reconfig HMM \\ + --ignore-version-check true + + # Create directories + mkdir --parents \\ + "$(get_ref_mount())" \\ + "$(get_intermediate_results_dir())" \\ + "$(inputs.output_directory)" + + # untar ref data into scratch space + tar \\ + --directory "$(get_ref_mount())" \\ + --extract \\ + --file "$(inputs.reference_tar.path)" + + # Confirm either of fastq_list, fastq_list_rows, bam_input or cram_input is defined + if [[ "$(boolean_to_int(is_not_null(inputs.fastq_list)) + boolean_to_int(is_not_null(inputs.fastq_list_rows)) + boolean_to_int(is_not_null(inputs.bam_input)) + boolean_to_int(is_not_null(inputs.cram_input)))" -ne "1" ]]; then + echo "Please set one and only one of fastq_list, fastq_list_rows and bam_input for normal sample" 1>&2 + exit 1 + fi + + # Run dragen command and import options from cli + "$(get_dragen_bin_path())" "\${@}" + - | + ${ + return generate_germline_mount_points(inputs); + } + +baseCommand: [ "bash" ] + +arguments: + # Script path + - valueFrom: "$(get_script_path())" + position: -1 + # Parameters that are always true + - prefix: "--enable-variant-caller=" + separate: False + valueFrom: "true" + - prefix: "--intermediate-results-dir=" + separate: False + valueFrom: "$(get_intermediate_results_dir())" + + +inputs: + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/OptionReference.htm + # Inputs fastq list csv or actual fastq list file with presigned urls for Read1File and Read2File columns + # File inputs + # Option 1: + fastq_list: + label: fastq list + doc: | + CSV file that contains a list of FASTQ files + to process. + Read1File and Read2File may be presigned urls or use this in conjunction with + the fastq_list_mount_paths inputs. + type: File? + inputBinding: + loadContents: true + prefix: "--fastq-list=" + separate: False + valueFrom: "$(get_fastq_list_csv_path())" + # Option 2: + fastq_list_rows: + label: fastq list rows + doc: | + Alternative to providing a file, one can instead provide a list of 'fastq-list-row' objects + type: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml#fastq-list-row[]? + inputBinding: + prefix: "--fastq-list=" + separate: False + valueFrom: "$(get_fastq_list_csv_path())" + # Option 3 + bam_input: + label: bam input + doc: | + Input a normal BAM file for the variant calling stage + type: File? + inputBinding: + prefix: "--bam-input=" + separate: False + secondaryFiles: + - pattern: ".bai" + required: true + # Option 4 + cram_input: + label: cram input + doc: | + Input a normal CRAM file for the variant calling stage + type: File? + inputBinding: + prefix: "--cram-input=" + separate: False + secondaryFiles: + - pattern: ".crai" + required: true + cram_reference: + label: cram reference + doc: | + Path to the reference fasta file for the CRAM input. + Required only if the input is a cram file AND not the reference in the tarball + type: File? + inputBinding: + prefix: "--cram-reference=" + separate: False + secondaryFiles: + - pattern: ".fai" + required: true + reference_tar: + label: reference tar + doc: | + Path to ref data tarball + type: File + inputBinding: + prefix: "--ref-dir=" + separate: False + valueFrom: "$(get_ref_path(self))" + # Output naming options + output_file_prefix: + label: output file prefix + doc: | + The prefix given to all output files + type: string + inputBinding: + prefix: "--output-file-prefix=" + separate: False + output_directory: + label: output directory + doc: | + The directory where all output files are placed + type: string + inputBinding: + prefix: "--output-directory=" + separate: False + output_format : + label: output format + doc: | + For mapping and aligning, the output is sorted and compressed into BAM format by default before saving to disk. + You can control the output format from the map/align stage with the --output-format option. + type: + - "null" + - type: enum + symbols: + - SAM + - BAM + - CRAM + inputBinding: + prefix: "--output-format=" + separate: False + + # Optional operation modes + # Given we're running from fastqs + # --enable-variant-caller option must be set to true (set in arguments), --enable-map-align is then activated by default + # --enable-map-align-output to keep bams + # --enable-duplicate-marking to mark duplicate reads at the same time + # --enable-sv to enable the structural variant calling step. + enable_sort: + label: enable sort + doc: | + True by default, only set this to false if using --bam-input parameter + type: boolean? + inputBinding: + prefix: "--enable-sort=" + separate: False + valueFrom: "$(self.toString())" + enable_map_align: + label: enable map align + doc: | + Enabled by default since --enable-variant-caller option is set to true. + Set this value to false if using bam_input + type: boolean? + inputBinding: + prefix: "--enable-map-align=" + separate: False + valueFrom: "$(self.toString())" + enable_map_align_output: + label: enable map align output + doc: | + Do you wish to have the output bam files present + type: boolean? + inputBinding: + prefix: "--enable-map-align-output=" + separate: False + valueFrom: "$(self.toString())" + enable_duplicate_marking: + label: enable duplicate marking + doc: | + Mark identical alignments as duplicates + type: boolean? + inputBinding: + prefix: "--enable-duplicate-marking=" + separate: False + valueFrom: "$(self.toString())" + dedup_min_qual: + label: deduplicate minimum quality + doc: | + Specifies the Phred quality score below which a base should be excluded from the quality score + calculation used for choosing among duplicate reads. + type: int? + inputBinding: + prefix: "--dedup-min-qual=" + separate: False + valueFrom: "$(self.toString())" + enable_pgx: + label: enable pgx + doc: | + Enable star allele caller. This also turns on other PGx callers such as CYP2D6, CYP2B6 + type: boolean? + inputBinding: + prefix: "--enable-pgx=" + separate: False + valueFrom: "$(self.toString())" + enable_targeted: + label: enable targeted + doc: | + Enable targeted variant calling for repetitive regions + type: boolean? + inputBinding: + prefix: "--enable-targeted=" + separate: False + valueFrom: "$(self.toString())" + + # Structural Variant Caller Options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/StructuralVariantCalling.htm + enable_sv: + label: enable sv + doc: | + Enable/disable structural variant + caller. Default is false. + type: boolean? + inputBinding: + prefix: "--enable-sv=" + separate: False + valueFrom: "$(self.toString())" + # Structural Variant Caller Options + sv_call_regions_bed: + label: sv call regions bed + doc: | + Specifies a BED file containing the set of regions to call. + type: File? + inputBinding: + prefix: "--sv-call-regions-bed=" + separate: False + sv_region: + label: sv region + doc: | + Limit the analysis to a specified region of the genome for debugging purposes. + This option can be specified multiple times to build a list of regions. + The value must be in the format "chr:startPos-endPos".. + type: string? + inputBinding: + prefix: "--sv-region=" + separate: False + valueFrom: "$(self.toString())" + sv_exome: + label: sv exome + doc: | + Set to true to configure the variant caller for targeted sequencing inputs, + which includes disabling high depth filters. + In integrated mode, the default is to autodetect targeted sequencing input, + and in standalone mode the default is false. + type: boolean? + inputBinding: + prefix: "--sv-exome=" + separate: False + valueFrom: "$(self.toString())" + sv_output_contigs: + label: sv output contigs + doc: | + Set to true to have assembled contig sequences output in a VCF file. The default is false. + type: boolean? + inputBinding: + prefix: "--sv-output-contigs=" + separate: False + valueFrom: "$(self.toString())" + sv_forcegt_vcf: + label: sv forcegt vcf + doc: | + Specify a VCF of structural variants for forced genotyping. The variants are scored and emitted + in the output VCF even if not found in the sample data. + The variants are merged with any additional variants discovered directly from the sample data. + type: File? + inputBinding: + prefix: "--sv-forcegt-vcf=" + separate: False + sv_discovery: + label: sv discovery + doc: | + Enable SV discovery. This flag can be set to false only when --sv-forcegt-vcf is used. + When set to false, SV discovery is disabled and only the forced genotyping input variants + are processed. The default is true. + type: boolean? + inputBinding: + prefix: "--sv-discovery=" + separate: False + valueFrom: "$(self.toString())" + sv_se_overlap_pair_evidence: + label: sv use overlap pair evidence + doc: | + Allow overlapping read pairs to be considered as evidence. + By default, DRAGEN uses autodetect on the fraction of overlapping read pairs if <20%. + type: boolean? + inputBinding: + prefix: "--sv-use-overlap-pair-evidence=" + separate: False + valueFrom: "$(self.toString())" + sv_enable_liquid_tumor_mode: + label: sv enable liquid tumor mode + doc: | + Enable liquid tumor mode. + type: boolean? + inputBinding: + prefix: "--sv-enable-liquid-tumor-mode=" + separate: False + valueFrom: "$(self.toString())" + sv_tin_contam_tolerance: + label: sv tin contam tolerance + doc: | + Set the Tumor-in-Normal (TiN) contamination tolerance level. + You can enter any value between 0-1. The default maximum TiN contamination tolerance is 0.15. + type: float? + inputBinding: + prefix: "--sv-tin-contam-tolerance=" + separate: False + + # Variant calling options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/SmallVariantCaller.htm + vc_target_bed: + label: vc target bed + doc: | + This is an optional command line input that restricts processing of the small variant caller, + target bed related coverage, and callability metrics to regions specified in a BED file. + type: File? + inputBinding: + prefix: "--vc-target-bed=" + separate: False + vc_target_bed_padding: + label: vc target bed padding + doc: | + This is an optional command line input that can be used to pad all of the target + BED regions with the specified value. + For example, if a BED region is 1:1000-2000 and a padding value of 100 is used, + it is equivalent to using a BED region of 1:900-2100 and a padding value of 0. + + Any padding added to --vc-target-bed-padding is used by the small variant caller + and by the target bed coverage/callability reports. The default padding is 0. + type: int? + inputBinding: + prefix: "--vc-target-bed-padding=" + separate: False + vc_target_coverage: + label: vc target coverage + doc: | + The --vc-target-coverage option specifies the target coverage for down-sampling. + The default value is 500 for germline mode and 50 for somatic mode. + type: int? + inputBinding: + prefix: "--vc-target-coverage=" + separate: False + vc_enable_gatk_acceleration: + label: vc enable gatk acceleration + doc: | + If is set to true, the variant caller runs in GATK mode + (concordant with GATK 3.7 in germline mode and GATK 4.0 in somatic mode). + type: boolean? + inputBinding: + prefix: "--vc-enable-gatk-acceleration=" + separate: False + valueFrom: "$(self.toString())" + vc_remove_all_soft_clips: + label: vc remove all soft clips + doc: | + If is set to true, the variant caller does not use soft clips of reads to determine variants. + type: boolean? + inputBinding: + prefix: "--vc-remove-all-soft-clips=" + separate: False + valueFrom: "$(self.toString())" + vc_decoy_contigs: + label: vc decoy contigs + doc: | + The --vc-decoy-contigs option specifies a comma-separated list of contigs to skip during variant calling. + This option can be set in the configuration file. + type: string? + inputBinding: + prefix: "--vc-decoy-contigs=" + separate: False + vc_enable_decoy_contigs: + label: vc enable decoy contigs + doc: | + If --vc-enable-decoy-contigs is set to true, variant calls on the decoy contigs are enabled. + The default value is false. + type: boolean? + inputBinding: + prefix: "--vc-enable-decoy-contigs=" + separate: False + valueFrom: "$(self.toString())" + vc_enable_phasing: + label: vc enable phasing + doc: | + The -vc-enable-phasing option enables variants to be phased when possible. The default value is true. + type: boolean? + inputBinding: + prefix: "--vc-enable-phasing=" + separate: False + valueFrom: "$(self.toString())" + vc_enable_vcf_output: + label: vc enable vcf output + doc: | + The -vc-enable-vcf-output option enables VCF file output during a gVCF run. The default value is false. + type: boolean? + inputBinding: + prefix: "--vc-enable-vcf-output=" + separate: False + valueFrom: "$(self.toString())" + vc_emit_ref_confidence: + label: vc emit ref confidence + doc: | + A genomic VCF (gVCF) file contains information on variants and positions determined to be homozygous to the reference genome. + For homozygous regions, the gVCF file includes statistics that indicate how well reads support the absence of variants or + alternative alleles. To enable gVCF output, set to GVCF. By default, contiguous runs of homozygous reference calls with similar + scores are collapsed into blocks (hom-ref blocks). Hom-ref blocks save disk space and processing time of downstream analysis tools. + DRAGEN recommends using the default mode. To produce unbanded output, set --vc-emit-ref-confidence to BP_RESOLUTION. + type: string? + inputBinding: + prefix: "--vc-emit-ref-confidence=" + separate: False + vc_ml_enable_recalibration: + label: vc ml enable recalibration + doc: | + DRAGEN employs machine learning-based variant recalibration (DRAGEN-ML) for germline SNV VC. + Variant calling accuracy is improved using powerful and efficient machine learning techniques that augment the variant caller, + by exploiting more of the available read and context information that does not easily integrate into the Bayesian processing + used by the haplotype variant caller. + type: boolean? + inputBinding: + prefix: "--vc-ml-enable-recalibration=" + separate: False + valueFrom: "$(self.toString())" + + # Sex chromosome mosaic variants options + vc_enable_sex_chr_diploid: + label: vc enable sex chr diploid + doc: | + For male samples in germline calling mode, DRAGEN calls potential mosaic variants in non-PAR regions of sex chromosomes. + A variant is called as mosaic when the allele frequency (FORMAT/AF) is below 85% or if multiple alt alleles are called, + suggesting incompatibility with the haploid assumption. The GT field for bi-allelic mosaic variants is "0/1", + denoting a mixture of reference and alt alleles, as opposed to the regular GT of "1" for haploid variants. + The GT field for multi-allelic mosaic variants is "1/2" in VCF. + You can disable the calling of mosaic variants by setting --vc-enable-sex-chr-diploid to false. + type: boolean? + inputBinding: + prefix: "--vc-enable-sex-chr-diploid=" + separate: False + valueFrom: "$(self.toString())" + vc_haploid_call_af_threshold: + label: vc haploid call af threshold + doc: | + Option --vc-haploid-call-af-threshold= to control threshold. + * Diploid model is applied to haploid (chrX/Y, non-PAR) regions in male samples. + * Variants with only one alt allele and with AF>=85% are rewritten to haploid calls. + * The potential mosaic calls with AF<85% will have GT of "0/1" and an INFO tag of + "MOSAIC" will be added. + type: float? + inputBinding: + prefix: "--vc-haploid-call-af-threshold=" + separate: False + + # Downsampling options + vc_max_reads_per_active_region: + label: vc max reads per active region + doc: | + specifies the maximum number of reads covering a given active region. + Default is 10000 for the germline workflow + type: int? + inputBinding: + prefix: "--vc-max-reads-per-active-region=" + separate: False + vc_max_reads_per_raw_region: + label: vc max reads per raw region + doc: | + specifies the maximum number of reads covering a given raw region. + Default is 30000 for the germline workflow + type: int? + inputBinding: + prefix: "--vc-max-read-per-raw-region=" + separate: False + + # Ploidy support + sample_sex: + label: sample sex + doc: | + Specifies the sex of a sample + type: + - "null" + - type: enum + symbols: + - none + - auto + - male + - female + inputBinding: + prefix: "--sample-sex=" + separate: False + # ROH options + vc_enable_roh: + label: vc enable roh + doc: | + Enable or disable the ROH caller by setting this option to true or false. Enabled by default for human autosomes only. + type: boolean? + inputBinding: + prefix: "--vc-enable-roh=" + separate: False + valueFrom: "$(self.toString())" + vc_roh_blacklist_bed: + label: vc roh blacklist bed + doc: | + If provided, the ROH caller ignores variants that are contained in any region in the blacklist BED file. + DRAGEN distributes blacklist files for all popular human genomes and automatically selects a blacklist to + match the genome in use, unless this option is used explicitly select a file. + type: File? + inputBinding: + prefix: "--vc-roh-blacklist-bed=" + separate: False + + # BAF options + vc_enable_baf: + label: vc enable baf + doc: | + Enable or disable B-allele frequency output. Enabled by default. + type: boolean? + inputBinding: + prefix: "--vc-enable-baf=" + separate: False + valueFrom: "$(self.toString())" + + # Germline variant small hard filtering options + vc_hard_filter: + label: vc hard filter + doc: | + DRAGEN provides post-VCF variant filtering based on annotations present in the VCF records. + However, due to the nature of DRAGEN's algorithms, which incorporate the hypothesis of correlated errors + from within the core of variant caller, the pipeline has improved capabilities in distinguishing + the true variants from noise, and therefore the dependency on post-VCF filtering is substantially reduced. + For this reason, the default post-VCF filtering in DRAGEN is very simple + type: string? + inputBinding: + prefix: "--vc-hard-filter=" + separate: False + # dbSNP annotation + dbsnp_annotation: + label: dbsnp annotation + doc: | + In Germline, Tumor-Normal somatic, or Tumor-Only somatic modes, + DRAGEN can look up variant calls in a dbSNP database and add annotations for any matches that it finds there. + To enable the dbSNP database search, set the --dbsnp option to the full path to the dbSNP database + VCF or .vcf.gz file, which must be sorted in reference order. + type: File? + secondaryFiles: + - pattern: ".tbi" + required: true + inputBinding: + prefix: "--dbsnp=" + separate: False + + # Repeat expansion calling + repeat_genotype_enable: + label: repeat genotype enable + doc: | + Enable DRAGEN repeat expansion detection + type: boolean? + inputBinding: + prefix: "--repeat-genotype-enable=" + separate: False + valueFrom: "$(self.toString())" + repeat_genotype_use_catalog: + label: repeat genotype use catalog + doc: | + The repeat-specification (also called variant catalog) JSON file defines the repeat regions for ExpansionHunter to analyze. + Default repeat-specification for some pathogenic and polymorphic repeats are in the /opt/edico/repeat-specs/ directory, + based on the reference genome used with DRAGEN. Users can choose between any of the three default repeat-specification files + packaged with DRAGEN using + type: + - "null" + - type: enum + symbols: + - default + - default_plus_smn + - expanded + inputBinding: + prefix: "--repeat-genotype-use-catalog=" + separate: False + repeat_genotype_specs: + label: repeat genotype specs + doc: | + Specifies the full path to the JSON file that contains the repeat variant catalog (specification) describing the loci to call. + --repeat-genotype-specs is required for ExpansionHunter. + If the option is not provided, + DRAGEN attempts to autodetect the applicable catalog file from /opt/edico/repeat-specs/ based on the reference provided. + type: + - "null" + - File + - string + inputBinding: + prefix: "--repeat-genotype-specs=" + separate: False + # Force genotyping + vc_forcegt_vcf: + label: vc forcegt vcf + doc: | + AGENsupports force genotyping (ForceGT) for Germline SNV variant calling. + To use ForceGT, use the --vc-forcegt-vcf option with a list of small variants to force genotype. + The input list of small variants can be a .vcf or .vcf.gz file. + + The current limitations of ForceGT are as follows: + * ForceGT is supported for Germline SNV variant calling in the V3 mode. + The V1, V2, and V2+ modes are not supported. + * ForceGT is not supported for Somatic SNV variant calling. + * ForceGT variants do not propagate through Joint Genotyping. + type: File? + secondaryFiles: + - pattern: ".tbi" + required: true + inputBinding: + prefix: "--vc-forcegt-vcf=" + separate: False + + # cnv pipeline - with this we must also specify one of --cnv-normal-b-allele-vcf, + # More info at https://support-docs.illumina.com/SW/DRAGEN_v39/Content/SW/DRAGEN/CNVExamples_fDG_dtREF.htm?Highlight=cnv-normal-b-allele-vcf + enable_cnv: + label: enable cnv calling + doc: | + Enable CNV processing in the DRAGEN Host Software. + type: boolean? + inputBinding: + prefix: "--enable-cnv=" + separate: False + valueFrom: "$(self.toString())" + cnv_enable_self_normalization: + label: cnv enable self normalization + doc: | + Enable CNV self normalization. + Self Normalization requires that the DRAGEN hash table be generated with the enable-cnv=true option. + type: boolean? + inputBinding: + prefix: "--cnv-enable-self-normalization=" + separate: False + valueFrom: "$(self.toString())" + + # QC options + qc_coverage_region_1: + label: qc coverage region 1 + doc: | + Generates coverage region report using bed file 1. + type: File? + inputBinding: + prefix: "--qc-coverage-region-1=" + separate: False + qc_coverage_region_2: + label: qc coverage region 2 + doc: | + Generates coverage region report using bed file 2. + type: File? + inputBinding: + prefix: "--qc-coverage-region-2=" + separate: False + qc_coverage_region_3: + label: qc coverage region 3 + doc: | + Generates coverage region report using bed file 3. + type: File? + inputBinding: + prefix: "--qc-coverage-region-3=" + separate: False + qc_coverage_ignore_overlaps: + label: qc coverage ignore overlaps + doc: | + Set to true to resolve all of the alignments for each fragment and avoid double-counting any + overlapping bases. This might result in marginally longer run times. + This option also requires setting --enable-map-align=true. + type: boolean? + inputBinding: + prefix: "--qc-coverage-ignore-overlaps=" + separate: False + valueFrom: "$(self.toString())" + + # HLA calling + enable_hla: + label: enable hla + doc: | + Enable HLA typing by setting --enable-hla flag to true + type: boolean? + inputBinding: + prefix: "--enable-hla=" + separate: False + valueFrom: "$(self.toString())" + hla_enable_class_2: + label: hla enable class 2 + doc: | + Enable class II HLA typing by setting --hla-enable-class-2 flag to true + type: boolean? + inputBinding: + prefix: "--hla-enable-class-2=" + separate: False + valueFrom: "$(self.toString())" + hla_bed_file: + label: hla bed file + doc: | + Use the HLA region BED input file to specify the region to extract HLA reads from. + DRAGEN HLA Caller parses the input file for regions within the BED file, and then + extracts reads accordingly to align with the HLA allele reference. + type: File? + inputBinding: + prefix: "--hla-bed-file=" + separate: False + hla_reference_file: + label: hla reference file + doc: | + Use the HLA allele reference file to specify the reference alleles to align against. + The input HLA reference file must be in FASTA format and contain the protein sequence separated into exons. + If --hla-reference-file is not specified, DRAGEN uses hla_classI_ref_freq.fasta from /opt/edico/config/. + The reference HLA sequences are obtained from the IMGT/HLA database. + type: File? + inputBinding: + prefix: "--hla-reference-file=" + separate: False + hla_allele_frequency_file: + label: hla allele frequency file + doc: | + Use the population-level HLA allele frequency file to break ties if one or more HLA allele produces the same or similar results. + The input HLA allele frequency file must be in CSV format and contain the HLA alleles and the occurrence frequency in population. + If --hla-allele-frequency-file is not specified, DRAGEN automatically uses hla_classI_allele_frequency.csv from /opt/edico/config/. + Population-level allele frequencies can be obtained from the Allele Frequency Net database. + type: File? + inputBinding: + prefix: "--hla-allele-frequency-file=" + separate: False + hla_tiebreaker_threshold: + label: hla tiebreaker threshold + doc: | + If more than one allele has a similar number of reads aligned and there is not a clear indicator for the best allele, + the alleles are considered as ties. The HLA Caller places the tied alleles into a candidate set for tie breaking based + on the population allele frequency. If an allele has more than the specified fraction of reads aligned (normalized to + the top hit), then the allele is included into the candidate set for tie breaking. The default value is 0.97. + type: float? + inputBinding: + prefix: "--hla-tiebreaker-threshold=" + separate: False + hla_zygosity_threshold: + label: hla zygosity threshold + doc: | + If the minor allele at a given locus has fewer reads mapped than a fraction of the read count of the major allele, + then the HLA Caller infers homozygosity for the given HLA-I gene. You can use this option to specify the fraction value. + The default value is 0.15. + type: float? + inputBinding: + prefix: "--hla zygosity threshold=" + separate: False + hla_min_reads: + label: hla min reads + doc: | + Set the minimum number of reads to align to HLA alleles to ensure sufficient coverage and perform HLA typing. + The default value is 1000 and suggested for WES samples. If using samples with less coverage, you can use a + lower threshold value. + type: int? + inputBinding: + prefix: "--hla-min-reads=" + separate: False + # Miscellaneous options + lic_instance_id_location: + label: license instance id location + doc: | + You may wish to place your own in. + Optional value, default set to /opt/instance-identity + which is a path inside the dragen container + type: + - File? + - string? + default: "/opt/instance-identity" + inputBinding: + prefix: "--lic-instance-id-location=" + separate: False + +outputs: + # Will also include mounted-files.txt + dragen_germline_output_directory: + label: dragen germline output directory + doc: | + The output directory containing all germline output files + type: Directory + outputBinding: + glob: "$(inputs.output_directory)" + # Optional files to be used in downstream workflows. + # Whilst these files reside inside the germline directory, specifying them here as outputs + # provides easier access and reference + # Only exists if --enable-map-align-output is set to true# + dragen_bam_out: + label: dragen bam out + doc: | + The output bam file, exists only if --enable-map-align-output is set to true + type: File? + outputBinding: + glob: "$(inputs.output_directory)/$(inputs.output_file_prefix).bam" + secondaryFiles: + - ".bai" + # Should always be available as an output + dragen_vcf_out: + label: dragen vcf out + doc: | + The output germline vcf file + type: File? + outputBinding: + glob: "$(inputs.output_directory)/$(inputs.output_file_prefix).vcf.gz" + secondaryFiles: + - ".tbi" + + +successCodes: + - 0 diff --git a/tools/dragen-somatic/4.3.6/dragen-somatic__4.3.6.cwl b/tools/dragen-somatic/4.3.6/dragen-somatic__4.3.6.cwl new file mode 100644 index 00000000..73e4aaeb --- /dev/null +++ b/tools/dragen-somatic/4.3.6/dragen-somatic__4.3.6.cwl @@ -0,0 +1,1542 @@ +cwlVersion: v1.1 +class: CommandLineTool + +# Extensions +$namespaces: + s: https://schema.org/ + ilmn-tes: https://platform.illumina.com/rdf/ica/ +$schemas: + - https://schema.org/version/latest/schemaorg-current-http.rdf + +# Metadata +s:author: + class: s:Person + s:name: Alexis Lucattini + s:email: Alexis.Lucattini@umccr.org + s:identifier: https://orcid.org/0000-0001-9754-647X + +# ID/Docs +id: dragen-somatic--4.3.6 +label: dragen-somatic v(4.3.6) +doc: | + Run tumor-normal dragen somatic pipeline v 4.3.6. + Workflow takes in two separate lists of object stor version of the fastq_list.csv equivalent + See the fastq_list_row schema definitions for more information. + More information on the documentation can be found [here](https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/SomaticMode.htm). + + +# ILMN V1 Resources Guide: https://illumina.gitbook.io/ica-v1/analysis/a-taskexecution#type-and-size +# ILMN V2 Resources Guide: https://help.ica.illumina.com/project/p-flow/f-pipelines#compute-types +hints: + ResourceRequirement: + ilmn-tes:resources/tier: standard + ilmn-tes:resources/type: fpga + ilmn-tes:resources/size: medium + coresMin: 16 + ramMin: 240000 + DockerRequirement: + dockerPull: 079623148045.dkr.ecr.ap-southeast-2.amazonaws.com/cp-prod/c3add40b-1be2-431d-a322-29529f7d2866:latest + +requirements: + ResourceRequirement: + tmpdirMin: | + ${ + /* 2 Tb */ + return Math.pow(2, 21); + } + SchemaDefRequirement: + types: + - $import: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml + InlineJavascriptRequirement: + expressionLib: + - $include: ../../../typescript-expressions/dragen-tools/4.0.3/dragen-tools__4.0.3.cwljs + - $include: ../../../typescript-expressions/utils/1.0.0/utils__1.0.0.cwljs + InitialWorkDirRequirement: + listing: + - entryname: $(get_script_path()) + entry: | + #!/usr/bin/env bash + + # Fail on non-zero exit of subshell + set -euo pipefail + + # Confirm not more than one of fastq_list, fastq_list_rows, bam_input and cram_input are defined + if [[ "$(boolean_to_int(is_not_null(inputs.fastq_list)) + boolean_to_int(is_not_null(inputs.fastq_list_rows)) + boolean_to_int(is_not_null(inputs.bam_input)) + boolean_to_int(is_not_null(inputs.cram_input)))" -gt "1" ]]; then + echo "Please set no more than one of fastq_list, fastq_list_rows, bam_input or cram_input for normal sample" 1>&2 + exit 1 + fi + + # Ensure that at least one (and only one) of tumor_fastq_list, tumor_fastq_list_rows, tumor_bam_input and tumor_cram_input are defined but not both defined (XOR) + if [[ "$(boolean_to_int(is_not_null(inputs.tumor_fastq_list)) + boolean_to_int(is_not_null(inputs.tumor_fastq_list_rows)) + boolean_to_int(is_not_null(inputs.tumor_bam_input)) + boolean_to_int(is_not_null(inputs.tumor_cram_input)))" -ne "1" ]]; then + echo "One and only one of inputs tumor_fastq_list, tumor_fastq_list_rows, tumor_bam_input, tumor_cram_input must be defined" 1>&2 + exit 1 + fi + + # Reset dragen + /opt/edico/bin/dragen \\ + --partial-reconfig HMM \\ + --ignore-version-check true + + # Create directories + mkdir --parents \\ + "$(get_ref_mount())" \\ + "$(get_intermediate_results_dir())" \\ + "$(inputs.output_directory)" + + # untar ref data into scratch space + tar \\ + --directory "$(get_ref_mount())" \\ + --extract \\ + --file "$(inputs.reference_tar.path)" + + # Check if both bam inputs are set + if [[ "$(is_not_null(inputs.bam_input))" == "true" && "$(is_not_null(inputs.tumor_bam_input))" == "true" && ( "$(get_bool_value_as_str(inputs.enable_map_align))" == "true" || "$(get_bool_value_as_str(inputs.enable_map_align_output))" == "true" ) ]]; then + echo "More than one bam input is set, need to run enable map align first beforehand then run variant calling in a separate step" 1>&2 + + # Collect options relating to map alignment (these options will be popped from the args list and not used in the variant calling step) + enable_sort_parameter="" + enable_duplicate_marking_parameter="" + dedup_min_qual_parameter="" + + # Pop arguments + # Get args from command line + # But capture them again since we need them when we actually run dragen + existing_args_array=() + while [ $# -gt 0 ]; do + case "$1" in + --enable-sort=*) + enable_sort_parameter="$1" + ;; + --enable-duplicate-marking=*) + enable_duplicate_marking_parameter="\${1}" + ;; + --enable-map-align=*) + : # Just popping from array, we set this by default in these steps but dont want it in final dragen call + ;; + --enable-map-align-output=*) + : # Just popping from array, we set this by default in these steps but dont want it in final dragen call + ;; + --dedup-min-qual=*) + dedup_min_qual_parameter="\${1}" + ;; + --bam-input=*) + : # Just popping from array as we set the new location elsewhere + ;; + --tumor-bam-input=*) + : # Just popping from array as we set the new location elsewhere + ;; + *) + existing_args_array+=("\${1}") + esac + shift 1 + done + + # Then run dragen map-align and place the files in the output directories + # Tumor Then Normal + echo "Aligning tumor" 1>&2 + # Eval prefix required here as some parameters are empty + eval /opt/edico/bin/dragen \\ + --enable-map-align=true \\ + --enable-map-align-output=true \\ + "\${enable_sort_parameter}" \\ + "\${enable_duplicate_marking_parameter}" \\ + "\${dedup_min_qual_parameter}" \\ + "--ref-dir=$(get_ref_path(inputs.reference_tar))" \\ + "--output-directory=$(inputs.output_directory)" \\ + "--output-file-prefix=$(inputs.output_file_prefix)" \\ + "--intermediate-results-dir=$(get_intermediate_results_dir())" \\ + "--lic-instance-id-location=$(get_optional_attribute_from_multi_type_input_object(inputs.lic_instance_id_location, "path"))" \\ + "--tumor-bam-input=$(get_attribute_from_optional_input(inputs.tumor_bam_input, "path"))" + + echo "Aligning normal" 1>&2 + # Eval prefix required here as some parameters are empty + eval /opt/edico/bin/dragen \\ + --enable-map-align=true \\ + --enable-map-align-output=true \\ + "\${enable_sort_parameter}" \\ + "\${enable_duplicate_marking_parameter}" \\ + "\${dedup_min_qual_parameter}" \\ + "--ref-dir=$(get_ref_path(inputs.reference_tar))" \\ + "--output-directory=$(inputs.output_directory)" \\ + "--output-file-prefix=$(inputs.output_file_prefix)" \\ + "--intermediate-results-dir=$(get_intermediate_results_dir())" \\ + "--lic-instance-id-location=$(get_optional_attribute_from_multi_type_input_object(inputs.lic_instance_id_location, "path"))" \\ + "--bam-input=$(get_attribute_from_optional_input(inputs.bam_input, "path"))" + + # Pop back in existing arguments into \${@} + for existing_arg in "\${existing_args_array[@]}"; do + set -- "\${@}" "\${existing_arg}" + done + + # Update bam input and tumor bam input parameters + set -- "\${@}" "--bam-input=$(inputs.output_directory)/$(inputs.output_file_prefix).bam" + set -- "\${@}" "--tumor-bam-input=$(inputs.output_directory)/$(inputs.output_file_prefix)_tumor.bam" + + # Explicity set enable map align to false + # Setting --enable-map-align to false, sets --enable-map-align-output to false as well + set -- "\${@}" "--enable-map-align=false" + fi + + # Check if cram inputs are set and enable map align output is set + # Like bam inputs, we need to run map align first before running variant calling + if [[ "$(is_not_null(inputs.cram_input))" == "true" && "$(is_not_null(inputs.tumor_cram_input))" == "true" && ( "$(get_bool_value_as_str(inputs.enable_map_align))" == "true" || "$(get_bool_value_as_str(inputs.enable_map_align_output))" == "true" ) ]]; then + echo "More than one cram input is set, need to run enable map align first beforehand then run variant calling in a separate step" 1>&2 + + # Collect options relating to map alignment (these options will be popped from the args list and not used in the variant calling step) + enable_sort_parameter="" + enable_duplicate_marking_parameter="" + dedup_min_qual_parameter="" + cram_reference_parameter="" + + # Pop arguments + # Get args from command line + # But capture them again since we need them when we actually run dragen + existing_args_array=() + while [ $# -gt 0 ]; do + case "$1" in + --enable-sort=*) + enable_sort_parameter="$1" + ;; + --enable-duplicate-marking=*) + enable_duplicate_marking_parameter="\${1}" + ;; + --enable-map-align=*) + : # Just popping from array, we set this by default in these steps but dont want it in final dragen call + ;; + --enable-map-align-output=*) + : # Just popping from array, we set this by default in these steps but dont want it in final dragen call + ;; + --dedup-min-qual=*) + dedup_min_qual_parameter="\${1}" + ;; + --cram-input=*) + : # Just popping from array as we set the new location elsewhere + ;; + --tumor-cram-input=*) + : # Just popping from array as we set the new location elsewhere + ;; + --cram-reference=*) + cram_reference_parameter="\${1}" + ;; + *) + existing_args_array+=("\${1}") + esac + shift 1 + done + + # Then run dragen map-align and place the files in the output directories + # Tumor Then Normal + echo "Aligning tumor" 1>&2 + # Eval prefix required here as some parameters are empty + eval /opt/edico/bin/dragen \\ + --enable-map-align=true \\ + --enable-map-align-output=true \\ + "\${enable_sort_parameter}" \\ + "\${enable_duplicate_marking_parameter}" \\ + "\${dedup_min_qual_parameter}" \\ + "\${cram_reference_parameter}" \\ + "--ref-dir=$(get_ref_path(inputs.reference_tar))" \\ + "--output-directory=$(inputs.output_directory)" \\ + "--output-file-prefix=$(inputs.output_file_prefix)" \\ + "--intermediate-results-dir=$(get_intermediate_results_dir())" \\ + "--lic-instance-id-location=$(get_optional_attribute_from_multi_type_input_object(inputs.lic_instance_id_location, "path"))" \\ + "--tumor-cram-input=$(get_attribute_from_optional_input(inputs.tumor_cram_input, "path"))" + + echo "Aligning normal" 1>&2 + # Eval prefix required here as some parameters are empty + eval /opt/edico/bin/dragen \\ + --enable-map-align=true \\ + --enable-map-align-output=true \\ + "\${enable_sort_parameter}" \\ + "\${enable_duplicate_marking_parameter}" \\ + "\${dedup_min_qual_parameter}" \\ + "\${cram_reference_parameter}" \\ + "--ref-dir=$(get_ref_path(inputs.reference_tar))" \\ + "--output-directory=$(inputs.output_directory)" \\ + "--output-file-prefix=$(inputs.output_file_prefix)" \\ + "--intermediate-results-dir=$(get_intermediate_results_dir())" \\ + "--lic-instance-id-location=$(get_optional_attribute_from_multi_type_input_object(inputs.lic_instance_id_location, "path"))" \\ + "--cram-input=$(get_attribute_from_optional_input(inputs.cram_input, "path"))" + + # Pop back in existing arguments into \${@} + for existing_arg in "\${existing_args_array[@]}"; do + set -- "\${@}" "\${existing_arg}" + done + + # Update bam input and tumor bam input parameters + # Note that we output bams by default + set -- "\${@}" "--bam-input=$(inputs.output_directory)/$(inputs.output_file_prefix).bam" + set -- "\${@}" "--tumor-bam-input=$(inputs.output_directory)/$(inputs.output_file_prefix)_tumor.bam" + + # Explicity set enable map align to false since we have already done the alignment + # Setting --enable-map-align to false, sets --enable-map-align-output to false as well + set -- "\${@}" "--enable-map-align=false" + fi + + # Run dragen command and import options from cli + echo "Running dragen variant calling" 1>&2 + "$(get_dragen_bin_path())" "\${@}" + + # Check if a normal input is set + if [[ "$(is_not_null(inputs.fastq_list))" == "true" || "$(is_not_null(inputs.fastq_list_rows))" == "true" || "$(is_not_null(inputs.bam_input))" == "true" || "$(is_not_null(inputs.cram_input))" == "true" ]]; then + # --enable-map-align-output is set to false + if [[ "$(get_bool_value_as_str(inputs.enable_map_align_output))" == "false" ]]; then + # No bams output if -enable-map-align-output is false and --enable-map-align is also false + # if --enable-map-align-output is false and --enable-map-align is false, no bam output + if [[ "$(get_bool_value_as_str(inputs.enable_map_align))" == "false" ]]; then + echo "--enable-map-align-output and --enable-map-align set to false, no bam output" 1>&2 + + # Bams output if --enable-map-align-output is false but --enable-map-align is true + # And one of tumor_bam_input or tumor_cram_input is set + # And one of bam_input or cram_input is set + # Then bam is generated even when --enable-map-align-output is explicitly set to false + # Since we needed to align the tumor and normal prior to running the variant calling step + # So when --enable-map-align-output is false under these conditions, we should delete the normal bam file rather than move it + elif [[ "$(get_bool_value_as_str(inputs.enable_map_align))" == "true" && ( "$(is_not_null(inputs.tumor_bam_input))" == "true" || "$(is_not_null(inputs.tumor_cram_input))" == "true" ) && ( "$(is_not_null(inputs.bam_input))" == "true" || "$(is_not_null(inputs.cram_input))" == "true" ) ]]; then + echo "--enable-map-align is set to true but --enable-map-align-output is set to false, but we ignored --enable-map-align-output=false because one both tumor and normal inputs were set to true and thus needed to be aligned separately, deleting tumor and normal bam files" 1>&2 + rm -f "$(inputs.output_directory)/$(inputs.output_file_prefix).bam" "$(inputs.output_directory)/$(inputs.output_file_prefix).bam.bai" "$(inputs.output_directory)/$(inputs.output_file_prefix).bam.md5sum" + rm -f "$(inputs.output_directory)/$(inputs.output_file_prefix)_tumor.bam" "$(inputs.output_directory)/$(inputs.output_file_prefix)_tumor.bam.bai" "$(inputs.output_directory)/$(inputs.output_file_prefix)_tumor.bam.md5sum" + fi + + # No action required otherwise --enable-map-align-output is false + + # --enable-map-align-output is true + # Move normal bam file to new normal bam file name prefix + else + # Ensure that we have a normal RGSM value, otherwise exit. + if [[ "$(is_not_null(get_normal_output_prefix(inputs)))" == "false" ]]; then + echo "Could not get the normal bam file prefix" 1>&2 + echo "Exiting" 1>&2 + exit + fi + + # Get new normal file name prefix from the fastq_list.csv + new_normal_file_name_prefix="$(get_normal_output_prefix(inputs))" + + # Ensure output normal bam file exists and the destination normal bam file also does not exist yet + if [[ -f "$(inputs.output_directory)/$(inputs.output_file_prefix).bam" && ! -f "$(inputs.output_directory)/\${new_normal_file_name_prefix}.bam" ]]; then + # Move normal bam, normal bam index and normal bam md5sum + ( + cd "$(inputs.output_directory)" + mv "$(inputs.output_file_prefix).bam" "\${new_normal_file_name_prefix}.bam" + mv "$(inputs.output_file_prefix).bam.bai" "\${new_normal_file_name_prefix}.bam.bai" + mv "$(inputs.output_file_prefix).bam.md5sum" "\${new_normal_file_name_prefix}.bam.md5sum" + ) + else + echo "Error! Expected to move file from $(inputs.output_file_prefix).bam to \${new_normal_file_name_prefix}.bam but either $(inputs.output_file_prefix).bam does not exist or \${new_normal_file_name_prefix}.bam already exists" 1>&2 + fi + fi + fi + + # If --enable-sv has been selected, we need to remove the empty genomeDepth directory + # https://github.com/umccr-illumina/ica_v2/issues/131 + if [[ "$(is_not_null(inputs.enable_sv))" == "true" && "$(get_bool_value_as_str(inputs.enable_sv))" == "true" && -d "$(inputs.output_directory)/sv/" ]]; then + find "$(inputs.output_directory)/sv/" -type d -empty -delete + fi + - | + ${ + return generate_somatic_mount_points(inputs); + } + +baseCommand: [ "bash" ] + +arguments: + - position: -1 + valueFrom: "$(get_script_path())" + - position: 1 + prefix: "--enable-variant-caller=" + separate: False + valueFrom: "true" + - prefix: "--intermediate-results-dir=" + separate: False + valueFrom: "$(get_intermediate_results_dir())" + +inputs: + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/OptionReference.htm + # Inputs fastq list csv or actual fastq list file with presigned urls for Read1File and Read2File columns + # File inputs + # Input Option 1 + fastq_list: + label: fastq list + doc: | + CSV file that contains a list of FASTQ files for normal sample + to process. read_1 and read_2 components in the CSV file must be presigned urls. + type: File? + inputBinding: + loadContents: true + prefix: "--fastq-list=" + separate: False + valueFrom: "$(get_fastq_list_csv_path())" + tumor_fastq_list: + label: tumor fastq list + doc: | + CSV file that contains a list of FASTQ files + to process. read_1 and read_2 components in the CSV file must be presigned urls. + type: File? + inputBinding: + prefix: "--tumor-fastq-list=" + separate: False + valueFrom: "$(get_tumor_fastq_list_csv_path())" + # Input Option 2 + fastq_list_rows: + label: fastq list rows + doc: | + Alternative to providing a file, one can instead provide a list of 'fastq-list-row' objects for normal sample + type: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml#fastq-list-row[]? + inputBinding: + prefix: "--fastq-list=" + separate: False + valueFrom: "$(get_fastq_list_csv_path())" + tumor_fastq_list_rows: + label: tumor fastq list rows + doc: | + Alternative to providing a file, one can instead provide a list of 'fastq-list-row' objects for tumor sample + type: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml#fastq-list-row[]? + inputBinding: + prefix: "--tumor-fastq-list=" + separate: False + valueFrom: "$(get_tumor_fastq_list_csv_path())" + # Input Option 3 + bam_input: + label: bam input + doc: | + Input a normal BAM file for the variant calling stage + type: File? + inputBinding: + prefix: "--bam-input=" + separate: False + secondaryFiles: + - pattern: ".bai" + required: true + tumor_bam_input: + label: tumor bam input + doc: | + Input a tumor BAM file for the variant calling stage + type: File? + inputBinding: + prefix: "--tumor-bam-input=" + separate: False + secondaryFiles: + - pattern: ".bai" + required: true + # Input Option 4 + cram_input: + label: cram input + doc: | + Input a normal CRAM file for the variant calling stage + type: File? + inputBinding: + prefix: "--cram-input=" + separate: False + secondaryFiles: + - pattern: ".crai" + required: true + tumor_cram_input: + label: tumor cram input + doc: | + Input a tumor CRAM file for the variant calling stage + type: File? + inputBinding: + prefix: "--tumor-cram-input=" + separate: False + secondaryFiles: + - pattern: ".crai" + required: true + cram_reference: + label: cram reference + doc: | + Path to the reference fasta file for the CRAM input. + Required only if the input is a cram file AND not the reference in the tarball + type: File? + inputBinding: + prefix: "--cram-reference=" + separate: False + secondaryFiles: + - pattern: ".fai" + required: true + + # Dragen reference tar ball + reference_tar: + label: reference tar + doc: | + Path to ref data tarball + type: File + inputBinding: + prefix: "--ref-dir=" + separate: False + valueFrom: "$(get_ref_path(self))" + + # Mandatory parameters + output_directory: + label: output directory + doc: | + Required - The output directory. + type: string + inputBinding: + prefix: "--output-directory=" + separate: False + output_file_prefix: + label: output file prefix + doc: | + Required - the output file prefix + type: string + inputBinding: + prefix: "--output-file-prefix=" + separate: False + + # Optional operation modes + # Optional operation modes + # Given we're running from fastqs + # --enable-variant-caller option must be set to true (set in arguments), --enable-map-align is then activated by default + # --enable-map-align-output to keep bams + # --enable-duplicate-marking to mark duplicate reads at the same time + # --enable-sv to enable the structural variant calling step. + enable_sort: + label: enable sort + doc: | + True by default, only set this to false if using --bam-input and --tumor-bam-input parameters + type: boolean? + inputBinding: + prefix: "--enable-sort=" + separate: False + valueFrom: "$(self.toString())" + enable_map_align: + label: enable map align + doc: | + Enabled by default since --enable-variant-caller option is set to true. + Set this value to false if using bam_input AND tumor_bam_input + type: boolean? + inputBinding: + prefix: "--enable-map-align=" + separate: False + valueFrom: "$(self.toString())" + enable_map_align_output: + label: enable map align output + doc: | + Enables saving the output from the + map/align stage. Default is true when only + running map/align. Default is false if + running the variant caller. + type: boolean? + inputBinding: + prefix: "--enable-map-align-output=" + separate: False + valueFrom: "$(self.toString())" + enable_duplicate_marking: + label: enable duplicate marking + doc: | + Enable the flagging of duplicate output + alignment records. + type: boolean? + inputBinding: + prefix: "--enable-duplicate-marking=" + separate: False + valueFrom: "$(self.toString())" + enable_sv: + label: enable sv + doc: | + Enable/disable structural variant + caller. Default is false. + type: boolean? + inputBinding: + prefix: "--enable-sv=" + separate: False + valueFrom: "$(self.toString())" + + # Deduplication options + dedup_min_qual: + label: deduplicate minimum quality + doc: | + Specifies the Phred quality score below which a base should be excluded from the quality score + calculation used for choosing among duplicate reads. + type: int? + inputBinding: + prefix: "--dedup-min-qual=" + separate: False + + + # Structural Variant Caller Options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/StructuralVariantCalling.htm + sv_call_regions_bed: + label: sv call regions bed + doc: | + Specifies a BED file containing the set of regions to call. + type: File? + inputBinding: + prefix: "--sv-call-regions-bed=" + separate: False + sv_region: + label: sv region + doc: | + Limit the analysis to a specified region of the genome for debugging purposes. + This option can be specified multiple times to build a list of regions. + The value must be in the format "chr:startPos-endPos".. + type: string? + inputBinding: + prefix: "--sv-region=" + separate: False + valueFrom: "$(self.toString())" + sv_exome: + label: sv exome + doc: | + Set to true to configure the variant caller for targeted sequencing inputs, + which includes disabling high depth filters. + In integrated mode, the default is to autodetect targeted sequencing input, + and in standalone mode the default is false. + type: boolean? + inputBinding: + prefix: "--sv-exome=" + separate: False + valueFrom: "$(self.toString())" + sv_output_contigs: + label: sv output contigs + doc: | + Set to true to have assembled contig sequences output in a VCF file. The default is false. + type: boolean? + inputBinding: + prefix: "--sv-output-contigs=" + separate: False + valueFrom: "$(self.toString())" + sv_forcegt_vcf: + label: sv forcegt vcf + doc: | + Specify a VCF of structural variants for forced genotyping. The variants are scored and emitted + in the output VCF even if not found in the sample data. + The variants are merged with any additional variants discovered directly from the sample data. + type: File? + inputBinding: + prefix: "--sv-forcegt-vcf=" + separate: False + sv_discovery: + label: sv discovery + doc: | + Enable SV discovery. This flag can be set to false only when --sv-forcegt-vcf is used. + When set to false, SV discovery is disabled and only the forced genotyping input variants + are processed. The default is true. + type: boolean? + inputBinding: + prefix: "--sv-discovery=" + separate: False + valueFrom: "$(self.toString())" + sv_se_overlap_pair_evidence: + label: sv use overlap pair evidence + doc: | + Allow overlapping read pairs to be considered as evidence. + By default, DRAGEN uses autodetect on the fraction of overlapping read pairs if <20%. + type: boolean? + inputBinding: + prefix: "--sv-use-overlap-pair-evidence=" + separate: False + valueFrom: "$(self.toString())" + sv_somatic_ins_tandup_hotspot_regions_bed: + label: sv somatic ins tandup hotspot regions bed + doc: | + Specify a BED of ITD hotspot regions to increase sensitivity for calling ITDs in somatic variant analysis. + By default, DRAGEN SV automatically selects areference-specific hotspots BED file from + /opt/edico/config/sv_somatic_ins_tandup_hotspot_*.bed. + type: File? + inputBinding: + prefix: "--sv-somatic-ins-tandup-hotspot-regions-bed=" + separate: False + sv_enable_somatic_ins_tandup_hotspot_regions: + label: sv enable somatic ins tandup hotspot regions + doc: | + Enable or disable the ITD hotspot region input. The default is true in somatic variant analysis. + type: boolean? + inputBinding: + prefix: "--sv-enable-somatic-ins-tandup-hotspot-regions=" + separate: False + valueFrom: "$(self.toString())" + sv_enable_liquid_tumor_mode: + label: sv enable liquid tumor mode + doc: | + Enable liquid tumor mode. + type: boolean? + inputBinding: + prefix: "--sv-enable-liquid-tumor-mode=" + separate: False + valueFrom: "$(self.toString())" + sv_tin_contam_tolerance: + label: sv tin contam tolerance + doc: | + Set the Tumor-in-Normal (TiN) contamination tolerance level. + You can enter any value between 0-1. The default maximum TiN contamination tolerance is 0.15. + type: float? + inputBinding: + prefix: "--sv-tin-contam-tolerance=" + separate: False + + # Variant calling options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/SmallVariantCaller.htm + vc_target_bed: + label: vc target bed + doc: | + This is an optional command line input that restricts processing of the small variant caller, + target bed related coverage, and callability metrics to regions specified in a BED file. + type: File? + inputBinding: + prefix: "--vc-target-bed=" + separate: False + vc_target_bed_padding: + label: vc target bed padding + doc: | + This is an optional command line input that can be used to pad all of the target + BED regions with the specified value. + For example, if a BED region is 1:1000-2000 and a padding value of 100 is used, + it is equivalent to using a BED region of 1:900-2100 and a padding value of 0. + + Any padding added to --vc-target-bed-padding is used by the small variant caller + and by the target bed coverage/callability reports. The default padding is 0. + type: int? + inputBinding: + prefix: "--vc-target-bed-padding=" + separate: False + vc_target_coverage: + label: vc target coverage + doc: | + The --vc-target-coverage option specifies the target coverage for down-sampling. + The default value is 500 for germline mode and 50 for somatic mode. + type: int? + inputBinding: + prefix: "--vc-target-coverage=" + separate: False + vc_target_vaf: + label: vc target vaf + doc: | + The vc-target-vaf is used to select the variant allele frequencies of interest. + The variant caller will aim to detect variants with allele frequencies larger than this setting. + We recommend adding a small safety factor, e.g. to ensure variants in the ballpark of 1% are detected, + the minimum vc-target-vaf can be specified as 0.009 (0.9%). This setting will not apply a hard threshold, + and it is possible to detect variants with allele frequencies lower than the selected threshold. + On high coverage and clean datasets, a lower target-vaf may help increase sensitivity. + On noisy samples (like FFPE) a higher target-vaf (like 0.03) maybe help reduce false positives. + Using a low target-vaf may also increase runtime. Set the vc-target-vaf to 0 to disable this feature. + When this feature is disabled the variant caller will require at least 2 supporting reads to discover a candidate variant. + Default=0.01. + type: float? + inputBinding: + prefix: "--vc-target-vaf=" + separate: False + vc_enable_gatk_acceleration: + label: vc enable gatk acceleration + doc: | + If is set to true, the variant caller runs in GATK mode + (concordant with GATK 3.7 in germline mode and GATK 4.0 in somatic mode). + type: boolean? + inputBinding: + prefix: "--vc-enable-gatk-acceleration=" + separate: False + valueFrom: "$(self.toString())" + vc_remove_all_soft_clips: + label: vc remove all soft clips + doc: | + If is set to true, the variant caller does not use soft clips of reads to determine variants. + type: boolean? + inputBinding: + prefix: "--vc-remove-all-soft-clips=" + separate: False + valueFrom: "$(self.toString())" + vc_decoy_contigs: + label: vc decoy contigs + doc: | + The --vc-decoy-contigs option specifies a comma-separated list of contigs to skip during variant calling. + This option can be set in the configuration file. + type: string? + inputBinding: + prefix: "--vc-decoy-contigs=" + separate: False + vc_enable_decoy_contigs: + label: vc enable decoy contigs + doc: | + If --vc-enable-decoy-contigs is set to true, variant calls on the decoy contigs are enabled. + The default value is false. + type: boolean? + inputBinding: + prefix: "--vc-enable-decoy-contigs=" + separate: False + valueFrom: "$(self.toString())" + vc_enable_phasing: + label: vc enable phasing + doc: | + The -vc-enable-phasing option enables variants to be phased when possible. The default value is true. + type: boolean? + inputBinding: + prefix: "--vc-enable-phasing=" + separate: False + valueFrom: "$(self.toString())" + vc_enable_vcf_output: + label: vc enable vcf output + doc: | + The -vc-enable-vcf-output option enables VCF file output during a gVCF run. The default value is false. + type: boolean? + inputBinding: + prefix: "--vc-enable-vcf-output=" + separate: False + valueFrom: "$(self.toString())" + # Downsampling options + vc_max_reads_per_active_region: + label: vc max reads per active region + doc: | + specifies the maximum number of reads covering a given active region. + Default is 10000 for the somatic workflow + type: int? + inputBinding: + prefix: "--vc-max-reads-per-active-region=" + separate: False + vc_max_reads_per_raw_region: + label: vc max reads per raw region + doc: | + specifies the maximum number of reads covering a given raw region. + Default is 30000 for the somatic workflow + type: int? + inputBinding: + prefix: "--vc-max-read-per-raw-region=" + separate: False + # Ploidy support + sample_sex: + label: sample sex + doc: | + Specifies the sex of a sample + type: + - "null" + - type: enum + symbols: + - none + - auto + - male + - female + inputBinding: + prefix: "--sample-sex=" + separate: False + # ROH options + vc_enable_roh: + label: vc enable roh + doc: | + Enable or disable the ROH caller by setting this option to true or false. Enabled by default for human autosomes only. + type: boolean? + inputBinding: + prefix: "--vc-enable-roh=" + separate: False + valueFrom: "$(self.toString())" + vc_roh_blacklist_bed: + label: vc roh blacklist bed + doc: | + If provided, the ROH caller ignores variants that are contained in any region in the blacklist BED file. + DRAGEN distributes blacklist files for all popular human genomes and automatically selects a blacklist to + match the genome in use, unless this option is used explicitly select a file. + type: File? + inputBinding: + prefix: "--vc-roh-blacklist-bed=" + separate: False + # BAF options + vc_enable_baf: + label: vc enable baf + doc: | + Enable or disable B-allele frequency output. Enabled by default. + type: boolean? + inputBinding: + prefix: "--vc-enable-baf=" + separate: False + valueFrom: "$(self.toString())" + # Somatic calling options + vc_base_qual_threshold: + label: vc base qual threshold + doc: | + (Replaces --vc-min-base-qual) + Specifies the minimum base quality to be considered in the active region detection of the small variant caller. + The default value is 10. + type: int? + inputBinding: + prefix: "--vc-base-qual-threshold=" + separate: False + vc_min_tumor_read_qual: + label: vc min tumor read qual + type: int? + doc: | + The --vc-min-tumor-read-qual option specifies the minimum read quality (MAPQ) to be considered for + variant calling. The default value is 3 for tumor-normal analysis or 20 for tumor-only analysis. + inputBinding: + prefix: "--vc-min-tumor-read-qual=" + separate: False + valueFrom: "$(self.toString())" + vc_callability_tumor_thresh: + label: vc callability tumor thresh + type: int? + doc: | + The --vc-callability-tumor-thresh option specifies the callability threshold for tumor samples. The + somatic callable regions report includes all regions with tumor coverage above the tumor threshold. + inputBinding: + prefix: "--vc-callability-tumor-thresh=" + separate: False + vc_callability_normal_thresh: + label: vc callability normal thresh + type: int? + doc: | + The --vc-callability-normal-thresh option specifies the callability threshold for normal samples. + The somatic callable regions report includes all regions with normal coverage above the normal threshold. + inputBinding: + prefix: "--vc-callability-normal-thresh=" + separate: False + vc_somatic_hotspots: + label: vc somatic hotspots + type: File? + doc: | + The somatic hotspots option allows an input VCF to specify the positions where the risk for somatic + mutations are assumed to be significantly elevated. DRAGEN genotyping priors are boosted for all + postions specified in the VCF, so it is possible to call a variant at one of these sites with fewer supporting + reads. The cosmic database in VCF format can be used as one source of prior information to boost + sensitivity for known somatic mutations. + inputBinding: + prefix: "--vc-somatic-hotspots=" + separate: False + vc_hotspot_log10_prior_boost: + label: vc hotspot log10 prior boost + type: int? + doc: | + The size of the hotspot adjustment can be controlled via vc-hotspotlog10-prior-boost, + which has a default value of 4 (log10 scale) corresponding to an increase of 40 phred. + inputBinding: + prefix: "--vc-hotspot-log10-prior-boost=" + separate: False + vc_enable_liquid_tumor_mode: + label: vc enable liquid tumor mode + type: boolean? + doc: | + In a tumor-normal analysis, DRAGEN accounts for tumor-in-normal (TiN) contamination by running liquid + tumor mode. Liquid tumor mode is disabled by default. When liquid tumor mode is enabled, DRAGEN is + able to call variants in the presence of TiN contamination up to a specified maximum tolerance level. + vc-enable-liquid-tumor-mode enables liquid tumor mode with a default maximum contamination + TiN tolerance of 0.15. If using the default maximum contamination TiN tolerance, somatic variants are + expected to be observed in the normal sample with allele frequencies up to 15% of the corresponding + allele in the tumor sample. + inputBinding: + prefix: "--vc-enable-liquid-tumor-mode=" + separate: False + valueFrom: "$(self.toString())" + vc_tin_contam_tolerance: + label: vc tin contam tolerance + type: float? + doc: | + vc-tin-contam-tolerance enables liquid tumor mode and allows you to + set the maximum contamination TiN tolerance. The maximum contamination TiN tolerance must be + greater than zero. For example, vc-tin-contam-tolerance=-0.1. + inputBinding: + prefix: "--vc-tin-contam-tolerance=" + separate: False + vc_enable_orientation_bias_filter: + label: vc enable orientation bias filter + type: boolean? + doc: | + Enables the orientation bias filter. The default value is false, which means the option is disabled. + inputBinding: + prefix: "--vc-enable-orientation-bias-filter=" + separate: False + valueFrom: "$(self.toString())" + vc_enable_orientation_bias_filter_artifacts: + label: vc enable orientation bias filter artifacts + type: string? + doc: | + The artifact type to be filtered can be specified with the --vc-orientation-bias-filter-artifacts option. + The default is C/T,G/T, which correspond to OxoG and FFPE artifacts. Valid values include C/T, or G/T, or C/T,G/T,C/A. + An artifact (or an artifact and its reverse compliment) cannot be listed twice. + For example, C/T,G/A is not valid, because C->G and T->A are reverse compliments. + inputBinding: + prefix: "--vc-enable-orientation-bias-filter-artifacts=" + separate: False + valueFrom: "$(self.toString())" + # Post somatic calling filtering options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/PostSomaticFilters.htm + vc_hard_filter: + label: vc hard filter + doc: | + DRAGEN provides post-VCF variant filtering based on annotations present in the VCF records. + However, due to the nature of DRAGEN's algorithms, which incorporate the hypothesis of correlated errors + from within the core of variant caller, the pipeline has improved capabilities in distinguishing + the true variants from noise, and therefore the dependency on post-VCF filtering is substantially reduced. + For this reason, the default post-VCF filtering in DRAGEN is very simple + type: string? + inputBinding: + prefix: "--vc-hard-filter=" + separate: False + valueFrom: "$(self.toString())" + vc_sq_call_threshold: + label: vc sq call threshold + type: float? + doc: | + Emits calls in the VCF. The default is 3. + If the value for vc-sq-filter-threshold is lower than vc-sq-callthreshold, + the filter threshold value is used instead of the call threshold value + inputBinding: + prefix: "--vc-sq-call-threshold=" + separate: False + vc_sq_filter_threshold: + label: vc sq filter threshold + type: float? + doc: | + Marks emitted VCF calls as filtered. + The default is 17.5 for tumor-normal and 6.5 for tumor-only. + inputBinding: + prefix: "--vc-sq-filter-threshold=" + separate: False + vc_enable_triallelic_filter: + label: vc enable triallelic filter + type: boolean? + doc: | + Enables the multiallelic filter. The default is true. + inputBinding: + prefix: "--vc-enable-triallelic-filter=" + separate: False + valueFrom: "$(self.toString())" + vc_enable_af_filter: + label: vc enable af filter + type: boolean? + doc: | + Enables the allele frequency filter. The default value is false. When set to true, the VCF excludes variants + with allele frequencies below the AF call threshold or variants with an allele frequency below the AF filter + threshold and tagged with low AF filter tag. The default AF call threshold is 1% and the default AF filter + threshold is 5%. + To change the threshold values, use the following command line options: + --vc-af-callthreshold and --vc-af-filter-threshold. + inputBinding: + prefix: "--vc-enable-af-filter=" + separate: False + valueFrom: "$(self.toString())" + vc_af_call_threshold: + label: vc af call threshold + type: float? + doc: | + Set the allele frequency call threshold to emit a call in the VCF if the AF filter is enabled. + The default is 0.01. + inputBinding: + prefix: "--vc-af-call-threshold=" + separate: False + vc_af_filter_threshold: + label: vc af filter threshold + type: float? + doc: | + Set the allele frequency filter threshold to mark emitted VCF calls as filtered if the AF filter is + enabled. + The default is 0.05. + inputBinding: + prefix: "--vc-af-filter-threshold=" + separate: False + vc_enable_non_homref_normal_filter: + label: vc enable non homoref normal filter + doc: | + Enables the non-homref normal filter. The default value is true. When set to true, the VCF filters out + variants if the normal sample genotype is not a homozygous reference. + type: boolean? + inputBinding: + prefix: "--vc-enable-non-homref-normal-filter=" + separate: False + valueFrom: "$(self.toString())" + + # Mitochondrial allele frequency filters + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/MitochondrialCalling.htm + vc_af_call_threshold_mito: + label: vc af call threshold mito + doc: | + If the AF filter is enabled using --vc-enable-af-filter-mito=true, + the option sets the allele frequency call threshold to emit a call in the VCF for mitochondrial variant calling. + The default value is 0.01. + type: boolean? + inputBinding: + prefix: "--vc-af-call-threshold-mito=" + separate: False + valueFrom: "$(self.toString())" + vc_af_filter_threshold_mito: + label: vc af filter threshold mito + doc: | + If the AF filter is enabled using --vc-enable-af-filter-mito=true, + the option sets the allele frequency filter threshold to mark emitted VCF calls + as filtered for mitochondrial variant calling. The default value is 0.02. + type: float? + inputBinding: + prefix: "--vc-af-filter-threshold-mito=" + separate: False + valueFrom: "$(self.toString())" + + # Enable non primary allelic filter + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/PostSomaticFilters.htm + vc_enable_non_primary_allelic_filter: + label: vc enable non primary allelic filter + doc: | + Similar to vc-enable-triallelic-filter, but less aggressive. + Keep the allele per position with highest alt AD, and only filter the rest. + The default is false. Not compatible with vc-enable-triallelic-filter. + type: boolean? + inputBinding: + prefix: "--vc-enable-non-primary-allelic-filter=" + separate: False + valueFrom: "$(self.toString())" + + # Turn off ntd error bias estimation + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/SNVErrorEstimation.htm + vc_enable_unequal_ntd: + label: vc enable unequal ntd + doc: | + Nucleotide (NTD) Error Bias Estimation is on by default and recommended as a replacement for the orientation bias filter. + Both methods take account of strand-specific biases (systematic differences between F1R2 and F2R1 reads). + In addition, NTD error estimation accounts for non-strand-specific biases such as sample-wide elevation of a certain SNV type, + eg C->T or any other transition or transversion. + NTD error estimation can also capture the biases in a trinucleotide context. + type: + - "null" + - boolean + - type: enum + symbols: + - "true" + - "false" + - "auto" + inputBinding: + prefix: "--vc-enable-unequal-ntd=" + separate: False + valueFrom: "$(self.toString())" + + + # Phased / MNV Calling options + vc_combine_phased_variants_distance: + label: vc combine phased variants distance + doc: | + When the specified value is greater than 0, combines all phased variants in the phasing set that have a distance + less than or equal to the provided value. The max allowed phasing distance is 15. + The default value is 0, which disables the option. + type: int? + inputBinding: + prefix: "--vc-combine-phased-variants-distance=" + separate: False + vc_combine_phased_variants_max_vaf_delta: + label: vc combine phased variants max vaf delta + doc: | + Component SNVs/INDELs of MNV calls are output only if the VAF of the component + call is greater than that of the MNV by more than 0.1. The VAF difference + threshold for outputting component calls along with MNV calls can be controlled by + the --vc-combine-phased-variants-max-vaf-delta option. + This option is mutually exclusive with --vc-mnv-emit-component-calls + type: float? + inputBinding: + prefix: "--vc-combine-phased-variants-max-vaf-delta=" + separate: False + vc_mnv_emit_component_calls: + label: vc mnv emit component calls + doc: | + To output all component SNVs/INDELs of MNVs, regardless of VAF difference, + when enabled, use the option --vc-mnv-emit-component-calls. + This option is mutually exclusive with --vc-combine-phased-variants-max-vaf-delta + type: boolean? + inputBinding: + prefix: "--vc-mnv-emit-component-calls=" + separate: False + valueFrom: "$(self.toString())" + + # dbSNP annotation + dbsnp_annotation: + label: dbsnp annotation + doc: | + In Germline, Tumor-Normal somatic, or Tumor-Only somatic modes, + DRAGEN can look up variant calls in a dbSNP database and add annotations for any matches that it finds there. + To enable the dbSNP database search, set the --dbsnp option to the full path to the dbSNP database + VCF or .vcf.gz file, which must be sorted in reference order. + type: File? + secondaryFiles: + - pattern: ".tbi" + required: true + inputBinding: + prefix: "--dbsnp=" + separate: False + + # cnv pipeline - with this we must also specify one of --cnv-normal-b-allele-vcf, + # --cnv-population-b-allele-vcf, or cnv-use-somatic-vc-baf. + # If known, specify the sex of the sample. + # If the sample sex is not specified, the caller attempts to estimate the sample sex from tumor alignments. + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/CopyNumVariantCalling.htm + enable_cnv: + label: enable cnv calling + doc: | + Enable CNV processing in the DRAGEN Host Software. + type: boolean? + inputBinding: + prefix: "--enable-cnv=" + separate: False + valueFrom: "$(self.toString())" + cnv_normal_b_allele_vcf: + label: cnv normal b allele vcf + doc: | + Specify a matched normal SNV VCF. + type: File? + inputBinding: + prefix: "--cnv-normal-b-allele-vcf=" + separate: False + cnv_population_b_allele_vcf: + label: cnv population b allele vcf + doc: | + Specify a population SNP catalog. + type: File? + inputBinding: + prefix: "--cnv-population-b-allele-vcf=" + separate: False + cnv_use_somatic_vc_baf: + label: cnv use somatic vc baf + doc: | + If running in tumor-normal mode with the SNV caller enabled, use this option + to specify the germline heterozygous sites. + type: boolean? + inputBinding: + prefix: "--cnv-use-somatic-vc-baf=" + separate: False + valueFrom: "$(self.toString())" + # For more info on following options - see + # https://support-docs.illumina.com/SW/DRAGEN_v39/Content/SW/DRAGEN/SomaticWGSModes.htm#Germline + cnv_normal_cnv_vcf: + label: cnv normal cnv vcf + doc: | + Specify germline CNVs from the matched normal sample. + type: boolean? + inputBinding: + prefix: "--cnv-normal-cnv-vcf=" + separate: False + valueFrom: "$(self.toString())" + cnv_use_somatic_vc_vaf: + label: cnv use somatic vc vaf + doc: | + Use the variant allele frequencies (VAFs) from the somatic SNVs to help select + the tumor model for the sample. + type: boolean? + inputBinding: + prefix: "--cnv-use-somatic-vc-vaf=" + separate: False + valueFrom: "$(self.toString())" + cnv_somatic_enable_het_calling: + label: cnv somatic enable het calling + doc: | + Enable HET-calling mode for heterogeneous segments. + type: boolean? + inputBinding: + prefix: "--cnv-somatic-enable-het-calling=" + separate: False + valueFrom: "$(self.toString())" + cnv_enable_self_normalization: + label: cnv enable self normalization + doc: | + Enable CNV self normalization. + Self Normalization requires that the DRAGEN hash table be generated with the enable-cnv=true option. + type: boolean? + inputBinding: + prefix: "--cnv-enable-self-normalization=" + separate: False + valueFrom: "$(self.toString())" + cnv_somatic_enable_lower_ploidy_limit: + label: cnv somatic enable lower ploidy limit + doc: | + To improve accuracy on the tumor ploidy model estimation, the somatic WGS CNV caller estimates whether the chosen model calls + homozygous deletions on regions that are likely to reduce the overall fitness of cells, + which are therefore deemed to be "essential" and under negative selection. + In the current literature, recent efforts tried to map such cell-essential genes (eg, in 2015 - https://www.science.org/doi/10.1126/science.aac7041). + The check on essential regions is controlled with --cnv-somatic-enable-lower-ploidy-limit (default true). + type: boolean? + inputBinding: + prefix: "--cnv-somatic-enable-lower-ploidy-limit=" + separate: False + valueFrom: "$(self.toString())" + cnv_somatic_essential_genes_bed: + label: cnv somatic essential genes bed + doc: | + Default bedfiles describing the essential regions are provided for hg19, GRCh37, hs37d5, GRCh38, + but a custom bedfile can also be provided in input through the + --cnv-somatic-essential-genes-bed= parameter. + In such case, the feature is automatically enabled. + A custom essential regions bedfile needs to have the following format: 4-column, tab-separated, + where the first 3 columns identify the coordinates of the essential region (chromosome, 0-based start, excluded end). + The fourth column is the region id (string type). For the purpose of the algorithm, currently only the first 3 columns are used. + However, the fourth might be helpful to investigate manually which regions drove the decisions on model plausibility made by the caller. + type: + - "null" + - string + - File + inputBinding: + prefix: "--cnv-somatic-essential-genes-bed=" + separate: False + + # HRD + enable_hrd: + label: enable hrd + doc: | + Set to true to enable HRD scoring to quantify genomic instability. + Requires somatic CNV calls. + type: boolean? + inputBinding: + prefix: "--enable-hrd=" + separate: False + valueFrom: "$(self.toString())" + + # QC options + qc_coverage_region_1: + label: qc coverage region 1 + doc: | + Generates coverage region report using bed file 1. + type: File? + inputBinding: + prefix: "--qc-coverage-region-1=" + separate: False + qc_coverage_region_2: + label: qc coverage region 2 + doc: | + Generates coverage region report using bed file 2. + type: File? + inputBinding: + prefix: "--qc-coverage-region-2=" + separate: False + qc_coverage_region_3: + label: qc coverage region 3 + doc: | + Generates coverage region report using bed file 3. + type: File? + inputBinding: + prefix: "--qc-coverage-region-3=" + separate: False + qc_coverage_ignore_overlaps: + label: qc coverage ignore overlaps + doc: | + Set to true to resolve all of the alignments for each fragment and avoid double-counting any + overlapping bases. This might result in marginally longer run times. + This option also requires setting --enable-map-align=true. + type: boolean? + inputBinding: + prefix: "--qc-coverage-ignore-overlaps=" + separate: False + valueFrom: "$(self.toString())" + + # TMB options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/Biomarkers_TMB.htm + enable_tmb: + label: enable tmb + doc: | + Enables TMB. If set, the small variant caller, Illumina Annotation Engine, + and the related callability report are enabled. + type: boolean? + inputBinding: + prefix: "--enable-tmb=" + separate: False + valueFrom: "$(self.toString())" + tmb_vaf_threshold: + label: tmb vaf threshold + doc: | + Specify the minimum VAF threshold for a variant. Variants that do not meet the threshold are filtered out. + The default value is 0.05. + type: float? + inputBinding: + prefix: "--tmb-db-threshold=" + separate: False + tmb_db_threshold: + label: tmb db threshold + doc: | + Specify the minimum allele count (total number of observations) for an allele in gnomAD or 1000 Genome + to be considered a germline variant. Variant calls that have the same positions and allele are ignored + from the TMB calculation. The default value is 10. + type: int? + inputBinding: + prefix: "--tmb-db-threshold=" + separate: False + + # HLA calling + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/HLACaller.htm + enable_hla: + label: enable hla + doc: | + Enable HLA typing by setting --enable-hla flag to true + type: boolean? + inputBinding: + prefix: "--enable-hla=" + separate: False + valueFrom: "$(self.toString())" + hla_bed_file: + label: hla bed file + doc: | + Use the HLA region BED input file to specify the region to extract HLA reads from. + DRAGEN HLA Caller parses the input file for regions within the BED file, and then + extracts reads accordingly to align with the HLA allele reference. + type: File? + inputBinding: + prefix: "--hla-bed-file=" + separate: False + hla_reference_file: + label: hla reference file + doc: | + Use the HLA allele reference file to specify the reference alleles to align against. + The input HLA reference file must be in FASTA format and contain the protein sequence separated into exons. + If --hla-reference-file is not specified, DRAGEN uses hla_classI_ref_freq.fasta from /opt/edico/config/. + The reference HLA sequences are obtained from the IMGT/HLA database. + type: File? + inputBinding: + prefix: "--hla-reference-file=" + separate: False + hla_allele_frequency_file: + label: hla allele frequency file + doc: | + Use the population-level HLA allele frequency file to break ties if one or more HLA allele produces the same or similar results. + The input HLA allele frequency file must be in CSV format and contain the HLA alleles and the occurrence frequency in population. + If --hla-allele-frequency-file is not specified, DRAGEN automatically uses hla_classI_allele_frequency.csv from /opt/edico/config/. + Population-level allele frequencies can be obtained from the Allele Frequency Net database. + type: File? + inputBinding: + prefix: "--hla-allele-frequency-file=" + separate: False + hla_tiebreaker_threshold: + label: hla tiebreaker threshold + doc: | + If more than one allele has a similar number of reads aligned and there is not a clear indicator for the best allele, + the alleles are considered as ties. The HLA Caller places the tied alleles into a candidate set for tie breaking based + on the population allele frequency. If an allele has more than the specified fraction of reads aligned (normalized to + the top hit), then the allele is included into the candidate set for tie breaking. The default value is 0.97. + type: float? + inputBinding: + prefix: "--hla-tiebreaker-threshold=" + separate: False + hla_zygosity_threshold: + label: hla zygosity threshold + doc: | + If the minor allele at a given locus has fewer reads mapped than a fraction of the read count of the major allele, + then the HLA Caller infers homozygosity for the given HLA-I gene. You can use this option to specify the fraction value. + The default value is 0.15. + type: float? + inputBinding: + prefix: "--hla zygosity threshold=" + separate: False + hla_min_reads: + label: hla min reads + doc: | + Set the minimum number of reads to align to HLA alleles to ensure sufficient coverage and perform HLA typing. + The default value is 1000 and suggested for WES samples. If using samples with less coverage, you can use a + lower threshold value. + type: int? + inputBinding: + prefix: "--hla-min-reads=" + separate: False + + # RNA + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/TPipelineIntro_fDG.htm + enable_rna: + label: enable rna + doc: | + Set this option for running RNA samples through T/N workflow + type: boolean? + inputBinding: + prefix: "--enable-rna=" + separate: False + valueFrom: "$(self.toString())" + + # Repeat Expansion + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/RepeatGenotyping.htm + repeat_genotype_enable: + label: repeat genotype enable + doc: | + Enables repeat expansion detection. + type: boolean? + inputBinding: + prefix: "--repeat-genotype-enable=" + separate: False + valueFrom: "$(self.toString())" + repeat_genotype_specs: + label: repeat genotype specs + doc: | + Specifies the full path to the JSON file that contains the + repeat variant catalog (specification) describing the loci to call. + If the option is not provided, DRAGEN attempts to autodetect the applicable catalog file + from /opt/edico/repeat-specs/ based on the reference provided. + type: File? + inputBinding: + prefix: "--repeate-genotype-specs=" + separate: False + repeat_genotype_use_catalog: + label: repeat genotype use catalog + doc: | + Repeat variant catalog type to use (default - ~60 repeats, default_plus_smn - + same as default with SMN repeat, expanded - ~50K repeats) + type: + - "null" + - type: enum + symbols: + - default + - default_plus_smn + - expanded + inputBinding: + prefix: "--repeat-genotype-use-catalog=" + separate: False + + # Miscell + lic_instance_id_location: + label: license instance id location + doc: | + You may wish to place your own in. + Optional value, default set to /opt/instance-identity + which is a path inside the dragen container + type: + - File? + - string? + default: "/opt/instance-identity" + inputBinding: + prefix: "--lic-instance-id-location=" + separate: False + +outputs: + # Will also include mounted-files.txt + dragen_somatic_output_directory: + label: dragen somatic output directory + doc: | + Output directory containing all outputs of the somatic dragen run + type: Directory + outputBinding: + glob: "$(inputs.output_directory)" + # Optional output files (inside the output directory) that we'll continue to append to as we need them + tumor_bam_out: + label: output tumor bam + doc: | + Bam file of the tumor sample. + Exists only if --enable-map-align-output set to true + type: File? + outputBinding: + glob: "$(inputs.output_directory)/$(inputs.output_file_prefix)_tumor.bam" + secondaryFiles: + - ".bai" + normal_bam_out: + label: output normal bam + doc: | + Bam file of the normal sample + Exists only if --enable-map-align-output set to true + type: File? + outputBinding: + glob: "$(inputs.output_directory)/$(get_normal_output_prefix(inputs)).bam" + secondaryFiles: + - ".bai" + somatic_snv_vcf_out: + label: somatic snv vcf + doc: | + Output of the snv vcf tumor calls + type: File? + outputBinding: + glob: "$(inputs.output_directory)/$(inputs.output_file_prefix).vcf.gz" + secondaryFiles: + - ".tbi" + somatic_snv_vcf_hard_filtered_out: + label: somatic snv vcf filetered + doc: | + Output of the snv vcf filtered tumor calls + type: File? + outputBinding: + glob: "$(inputs.output_directory)/$(inputs.output_file_prefix).hard-filtered.vcf.gz" + secondaryFiles: + - ".tbi" + somatic_structural_vcf_out: + label: somatic sv vcf filetered + doc: | + Output of the sv vcf filtered tumor calls. + Exists only if --enable-sv is set to true. + type: File? + outputBinding: + glob: "$(inputs.output_directory)/$(inputs.output_file_prefix).sv.vcf.gz" + secondaryFiles: + - ".tbi" + +successCodes: + - 0 + diff --git a/tools/dragen-transcriptome/4.3.6/dragen-transcriptome__4.3.6.cwl b/tools/dragen-transcriptome/4.3.6/dragen-transcriptome__4.3.6.cwl new file mode 100644 index 00000000..c6d51b57 --- /dev/null +++ b/tools/dragen-transcriptome/4.3.6/dragen-transcriptome__4.3.6.cwl @@ -0,0 +1,379 @@ +cwlVersion: v1.1 +class: CommandLineTool + +# Extensions +$namespaces: + s: https://schema.org/ + ilmn-tes: https://platform.illumina.com/rdf/ica/ +$schemas: + - https://schema.org/version/latest/schemaorg-current-http.rdf + +# Metadata +s:author: + class: s:Person + s:name: Sehrish Kanwal + s:email: sehrish.kanwal@umccr.org + +# ID/Docs +id: dragen-transcriptome--4.2.4 +label: dragen-transcriptome v(4.2.4) +doc: | + Documentation for dragen-transcriptome v4.2.4 + +# ILMN V1 Resources Guide: https://illumina.gitbook.io/ica-v1/analysis/a-taskexecution#type-and-size +# ILMN V2 Resources Guide: https://help.ica.illumina.com/project/p-flow/f-pipelines#compute-types +hints: + ResourceRequirement: + ilmn-tes:resources/tier: standard + ilmn-tes:resources/type: fpga + ilmn-tes:resources/size: medium + coresMin: 16 + ramMin: 240000 + DockerRequirement: + dockerPull: "079623148045.dkr.ecr.ap-southeast-2.amazonaws.com/cp-prod/c3add40b-1be2-431d-a322-29529f7d2866:latest" + +requirements: + ResourceRequirement: + tmpdirMin: | + ${ + /* 1 Tb */ + return Math.pow(2, 20); + } + SchemaDefRequirement: + types: + - $import: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml + InlineJavascriptRequirement: + expressionLib: + - $include: ../../../typescript-expressions/dragen-tools/4.0.3/dragen-tools__4.0.3.cwljs + - $include: ../../../typescript-expressions/utils/1.0.0/utils__1.0.0.cwljs + + InitialWorkDirRequirement: + listing: + - entryname: $(get_script_path()) + entry: | + #!/usr/bin/env bash + + # Fail on non-zero exit of subshell + set -euo pipefail + + # Initialise dragen + /opt/edico/bin/dragen \\ + --partial-reconfig DNA-MAPPER \\ + --ignore-version-check true + + # Create directories + mkdir --parents \\ + "$(get_ref_mount())" \\ + "$(get_intermediate_results_dir())" \\ + "$(inputs.output_directory)" + + # untar ref data into scratch space + tar \\ + --directory "$(get_ref_mount())" \\ + --extract \\ + --file "$(inputs.reference_tar.path)" + + # Run dragen command and import options from cli + "$(get_dragen_bin_path())" "\${@}" + - | + ${ + return generate_transcriptome_mount_points(inputs); + } + +baseCommand: [ "bash" ] + +arguments: + # Script path + - valueFrom: "$(get_script_path())" + position: -1 + # Set intermediate directory + - prefix: "--intermediate-results-dir=" + separate: False + valueFrom: "$(get_intermediate_results_dir())" + # Parameters that are always true + - prefix: "--enable-rna=" + separate: False + valueFrom: "true" + +inputs: + # File inputs + # Option 1: + fastq_list: + label: fastq list + doc: | + CSV file that contains a list of FASTQ files + to process. read_1 and read_2 components in the CSV file must be presigned urls. + type: File? + inputBinding: + loadContents: true + prefix: "--fastq-list=" + separate: False + valueFrom: "$(get_fastq_list_csv_path())" + # Option 2: + fastq_list_rows: + label: fastq list rows + doc: | + Alternative to providing a file, one can instead provide a list of 'fastq-list-row' objects + type: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml#fastq-list-row[]? + inputBinding: + prefix: "--fastq-list=" + separate: False + valueFrom: "$(get_fastq_list_csv_path())" + # Option 3 + bam_input: + label: bam input + doc: | + Input a BAM file for the Dragen RNA options + type: File? + inputBinding: + prefix: "--bam-input=" + separate: False + secondaryFiles: + - pattern: ".bai" + required: true + reference_tar: + label: reference tar + doc: | + Path to ref data tarball. + type: File + inputBinding: + prefix: "--ref-dir=" + separate: False + valueFrom: "$(get_ref_path(self))" + # Output naming options + output_file_prefix: + label: output file prefix + doc: | + The prefix given to all output files. + type: string + inputBinding: + prefix: "--output-file-prefix=" + separate: False + output_directory: + label: output directory + doc: | + The directory where all output files are placed. + type: string + inputBinding: + prefix: "--output-directory=" + separate: False + # Alignment options + enable_map_align: + label: enable map align + doc: | + Enabled by default. + Set this value to false if using bam_input + type: boolean? + inputBinding: + prefix: "--enable-map-align=" + separate: False + valueFrom: "$(self.toString())" + enable_map_align_output: + label: enable map align output + doc: | + Do you wish to have the output bam files present + type: boolean + inputBinding: + prefix: "--enable-map-align-output=" + separate: False + valueFrom: "$(self.toString())" + enable_sort: + label: enable sort + doc: | + True by default, only set this to false if using --bam-input parameters + type: boolean? + inputBinding: + prefix: "--enable-sort=" + separate: False + valueFrom: "$(self.toString())" + enable_duplicate_marking: + label: enable duplicate marking + doc: | + Mark identical alignments as duplicates + type: boolean + inputBinding: + prefix: "--enable-duplicate-marking=" + separate: False + valueFrom: "$(self.toString())" + # Transcript annotation file + annotation_file: + label: annotation file + doc: | + Path to annotation transcript file. + type: File + inputBinding: + prefix: "--annotation-file=" + separate: False + # Optional operation modes + enable_rna_quantification: + label: enable rna quantification + type: boolean? + default: true + doc: | + Enable the quantification module. The default value is true. + inputBinding: + prefix: "--enable-rna-quantification=" + separate: False + valueFrom: "$(self.toString())" + enable_rna_gene_fusion: + label: enable rna gene fusion + type: boolean? + default: true + doc: | + Enable the DRAGEN Gene Fusion module. The default value is true. + inputBinding: + prefix: "--enable-rna-gene-fusion=" + separate: False + valueFrom: "$(self.toString())" + enable_rrna_filter: + label: enable rrna filtering + type: boolean? + default: true + doc: | + Use the DRAGEN RNA pipeline to filter rRNA reads during alignment. The default value is false. + inputBinding: + prefix: "--rrna-filter-enable=" + separate: False + valueFrom: "$(self.toString())" + rrna_filter_contig: + label: name of the rRNA sequences to use for filtering + type: string? + #default: chrUn_GL000220v1 + doc: | + Specify the name of the rRNA sequences to use for filtering. + inputBinding: + prefix: "--rrna-filter-contig=" + separate: False + read_trimmers: + label: read trimming + type: string? + doc: | + To enable trimming filters in hard-trimming mode, set to a comma-separated list of the trimmer tools + you would like to use. To disable trimming, set to none. During mapping, artifacts are removed from all reads. + Read trimming is disabled by default. + inputBinding: + prefix: "--read-trimmers=" + separate: False + soft_read_trimmers: + label: soft read trimming + type: string? + doc: | + To enable trimming filters in soft-trimming mode, set to a comma-separated list of the trimmer tools + you would like to use. To disable soft trimming, set to none. During mapping, reads are aligned as if trimmed, + and bases are not removed from the reads. Soft-trimming is enabled for the polyg filter by default. + inputBinding: + prefix: "--soft-read-trimmers=" + separate: False + trim_adapter_read1: + label: trim adapter read1 + type: File? + doc: | + Specify the FASTA file that contains adapter sequences to trim from the 3' end of Read 1. + inputBinding: + prefix: "--trim-adapter-read1=" + separate: False + trim_adapter_read2: + label: trim adapter read2 + type: File? + doc: | + Specify the FASTA file that contains adapter sequences to trim from the 3' end of Read 2. + inputBinding: + prefix: "--trim_adapter_read2=" + separate: False + trim_adapter_r1_5prime: + label: trim adapter r1 5prime + type: File? + doc: | + Specify the FASTA file that contains adapter sequences to trim from the 5' end of Read 1. + NB: the sequences should be in reverse order (with respect to their appearance in the FASTQ) but not complemented. + inputBinding: + prefix: "--trim-adapter-r1-5prime=" + separate: False + trim_adapter_r2_5prime: + label: trim adapter r2 5prime + type: File? + doc: | + Specify the FASTA file that contains adapter sequences to trim from the 5' end of Read 2. + NB: the sequences should be in reverse order (with respect to their appearance in the FASTQ) but not complemented. + inputBinding: + prefix: "--trim-adapter-r2-5prime=" + separate: False + trim_adapter_stringency: + label: trim adapter stringency + type: int? + doc: | + Specify the minimum number of adapter bases required for trimming + inputBinding: + prefix: "--trim-adapter-stringency=" + separate: False + trim_r1_5prime: + label: trim r1 5prime + type: int? + doc: | + Specify the minimum number of bases to trim from the 5' end of Read 1 (default: 0). + inputBinding: + prefix: "--trim-min-r1-5prime=" + separate: False + trim_r1_3prime: + label: trim r1 3prime + type: int? + doc: | + Specify the minimum number of bases to trim from the 3' end of Read 1 (default: 0). + inputBinding: + prefix: "--trim-min-r1-3prime=" + separate: False + trim_r2_5prime: + label: trim r2 5prime + type: int? + doc: | + Specify the minimum number of bases to trim from the 5' end of Read 2 (default: 0). + inputBinding: + prefix: "--trim-min-r2-5prime=" + separate: False + trim_r2_3prime: + label: trim r2 3prime + type: int? + doc: | + Specify the minimum number of bases to trim from the 3' end of Read 2 (default: 0). + inputBinding: + prefix: "--trim-min-r2-3prime=" + separate: False + lic_instance_id_location: + label: license instance id location + doc: | + You may wish to place your own in. + Optional value, default set to /opt/instance-identity + which is a path inside the dragen container + type: + - File? + - string? + default: "/opt/instance-identity" + inputBinding: + prefix: "--lic-instance-id-location=" + separate: False + +outputs: + # Will also include mounted-files.txt + dragen_transcriptome_directory: + label: dragen transcriptome output directory + doc: | + The output directory containing all wts analysis output files + type: Directory + outputBinding: + glob: "$(inputs.output_directory)" + # Optional files to be used in downstream workflows. + # Whilst these files reside inside the germline directory, specifying them here as outputs + # provides easier access and reference + # Only exists if --enable-map-align-output is set to true# + dragen_bam_out: + label: dragen bam out + doc: | + The output bam file, exists only if --enable-map-align-output is set to true + type: File? + outputBinding: + glob: "$(inputs.output_directory)/$(inputs.output_file_prefix).bam" + secondaryFiles: + - ".bai" + +successCodes: + - 0 \ No newline at end of file diff --git a/tools/multiqc/1.25.1/multiqc__1.25.1.cwl b/tools/multiqc/1.25.1/multiqc__1.25.1.cwl new file mode 100644 index 00000000..3c1d17a4 --- /dev/null +++ b/tools/multiqc/1.25.1/multiqc__1.25.1.cwl @@ -0,0 +1,115 @@ +cwlVersion: v1.1 +class: CommandLineTool + +# Extensions +$namespaces: + s: https://schema.org/ + ilmn-tes: https://platform.illumina.com/rdf/ica/ +$schemas: + - https://schema.org/version/latest/schemaorg-current-http.rdf + +# Metadata +s:author: + class: s:Person + s:name: Alexis Lucattini + s:email: Alexis.Lucattini@umccr.org + s:identifier: https://orcid.org/0000-0001-9754-647X + +# ID/Docs +id: multiqc--1.25.0 +label: multiqc v(1.25.0) +doc: | + Documentation for multiqc v1.25.0 + Use patch that includes https://github.com/ewels/MultiQC/pull/1969 + +# ILMN V1 Resources Guide: https://illumina.gitbook.io/ica-v1/analysis/a-taskexecution#type-and-size +# ILMN V2 Resources Guide: https://help.ica.illumina.com/project/p-flow/f-pipelines#compute-types +hints: + ResourceRequirement: + ilmn-tes:resources/tier: standard + ilmn-tes:resources/type: standard + ilmn-tes:resources/size: large + coresMin: 2 + ramMin: 4000 + DockerRequirement: + dockerPull: ghcr.io/multiqc/multiqc:v1.25.1 + +requirements: + InlineJavascriptRequirement: {} + +baseCommand: ["multiqc"] + +inputs: + # Required inputs + input_directories: + label: input directories + doc: | + The list of directories to place in the analysis + type: Directory[] + inputBinding: + position: 100 # Last items on the command line + output_directory_name: + label: output directory + doc: | + The output directory + type: string + inputBinding: + prefix: "--outdir" + valueFrom: "$(runtime.outdir)/$(self)" + output_filename: + label: output filename + doc: | + Report filename in html format. + Defaults to 'multiqc-report.html" + type: string + inputBinding: + prefix: "--filename" + title: + label: title + doc: | + Report title. + Printed as page header, used for filename if not otherwise specified. + type: string + inputBinding: + prefix: "--title" + comment: + label: comment + doc: | + Custom comment, will be printed at the top of the report. + type: string? + inputBinding: + prefix: "--comment" + config: + label: config + doc: | + Configuration file for bclconvert + type: File? + streamable: true + inputBinding: + prefix: "--config" + cl_config: + label: cl config + doc: | + Override config from the cli + type: string? + inputBinding: + prefix: "--cl-config" + +outputs: + output_directory: + label: output directory + doc: | + Directory that contains all multiqc analysis data + type: Directory + outputBinding: + glob: "$(inputs.output_directory_name)" + output_file: + label: output file + doc: | + Output html file + type: File + outputBinding: + glob: "$(inputs.output_directory_name)/$(inputs.output_filename)" + +successCodes: + - 0 diff --git a/workflows/dragen-alignment-pipeline/4.3.6/dragen-alignment-pipeline__4.3.6.cwl b/workflows/dragen-alignment-pipeline/4.3.6/dragen-alignment-pipeline__4.3.6.cwl new file mode 100644 index 00000000..e7643e6d --- /dev/null +++ b/workflows/dragen-alignment-pipeline/4.3.6/dragen-alignment-pipeline__4.3.6.cwl @@ -0,0 +1,579 @@ +cwlVersion: v1.1 +class: Workflow + +# Extensions +$namespaces: + s: https://schema.org/ +$schemas: + - https://schema.org/version/latest/schemaorg-current-http.rdf + +# Metadata +s:author: + class: s:Person + s:name: Alexis Lucattini + s:email: Alexis.Lucattini@umccr.org + s:identifier: https://orcid.org/0000-0001-9754-647X + +# ID/Docs +id: dragen-alignment-pipeline--4.3.6 +label: dragen-alignment-pipeline v(4.3.6) +doc: | + Documentation for dragen-alignment-pipeline v4.3.6 + +requirements: + InlineJavascriptRequirement: {} + ScatterFeatureRequirement: {} + MultipleInputFeatureRequirement: {} + StepInputExpressionRequirement: {} + SchemaDefRequirement: + types: + - $import: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml + +inputs: + # File input logic + # Option 1 + fastq_list_rows: + label: Row of fastq lists + doc: | + The row of fastq lists. + Each row has the following attributes: + * RGID + * RGLB + * RGSM + * Lane + * Read1File + * Read2File (optional) + type: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml#fastq-list-row[]? + # Option 2 + fastq_list: + label: fastq list + doc: | + CSV file that contains a list of FASTQ files for normal sample + to process (read_1 and read_2 attributes must be presigned urls for each column) + type: File? + reference_tar: + label: reference tar + doc: | + Path to ref data tarball + type: File + # RNA options + enable_rna: + label: enable rna + doc: | + Enable rna specific settings + type: boolean? + enable_rrna_filter: + label: enable rrna filtering + doc: | + Use the DRAGEN RNA pipeline to filter rRNA reads during alignment. The default value is false. + type: boolean? + enable_rna_quantification: + label: enable rna quantification + doc: | + If set to true, enables RNA quantification. Requires --enable-rna to be set to true. + type: boolean? + annotation_file: + label: annotation file + doc: | + Use to supply a gene annotation file. Required for quantification and gene-fusion. + type: File? + # Output naming options + output_prefix: + label: output prefix + doc: | + The prefix given to all output files + type: string + ### Start mapper options ### + ann_sj_max_indel: + label: ann sj max indel + doc: | + Maximum indel length to expect near an annotated splice junction. + Range: 0 - 63 + type: int? + edit_chain_limit: + label: edit chain limit + doc: | + For edit-mode 1 or 2: Maximum seed chain length in a read to qualify for seed editing. + Range: > 0 + type: int? + edit_mode: + label: edit mode + doc: | + 0 = No edits, 1 = Chain len test, 2 = Paired chain len test, 3 = Edit all std seeds. + type: + - "null" + - type: enum + symbols: + - "0" + - "1" + - "2" + - "3" + edit_read_len: + label: edit read len + doc: | + For edit-mode 1 or 2: Read length in which to try edit-seed-num edited seeds. + Range: > 0 + type: int? + edit_seed_num: + label: edit seed num + doc: | + For edit-mode 1 or 2: Requested number of seeds per read to allow editing on. + Range: > 0 + type: int? + enable_map_align: + label: enable map align + doc: | + Enable use of BAM input files for mapper/aligner. + type: boolean? + enable_map_align_output: + label: enable map align + doc: | + Enables saving the output from the map/align stage. + If only running map/align, the default value is true. + If running the variant caller, the default value is false. + Therefore in the case of the dragen alignment pipeline, this will always be true. + For sanity purposes, we have it as an option since its default state is not intuitive + type: boolean? + max_intron_bases: + label: max intron bases + doc: | + Maximum intron length reported. + type: int? + min_intron_bases: + label: min intron bases + doc: | + Minimum reference deletion length reported as an intron. + type: int? + seed_density: + label: seed density + doc: | + Requested density of seeds from reads queried in the hash table + Range: 0 - 1 + type: float? + ### End mapper options + ### Start Alignment options ### + aln_min_score: + label: aln min score + doc: | + (signed) Minimum alignment score to report; baseline for MAPQ. + + When using local alignments (global = 0), aln-min-score is computed by the host software as "22 * match-score". + + When using global alignments (global = 1), aln-min-score is set to -1000000. + + Host software computation may be overridden by setting aln-min-score in configuration file. + + Range: −2,147,483,648 to 2,147,483,647 + type: int? + dedup_min_qual: + label: dedup min qual + doc: | + Minimum base quality for calculating read quality metric for deduplication. + Range: 0-63 + type: int? + en_alt_hap_aln: + label: en alt hap aln + doc: | + Allows chimeric alignments to be output, as supplementary. + type: boolean? + en_chimeric_aln: + label: en chimeric aln + doc: | + Allows chimeric alignments to be output, as supplementary. + type: boolean? + gap_ext_pen: + label: gap ext pen + doc: | + Score penalty for gap extension. + type: int? + gap_open_pen: + label: gap open pen + doc: | + Score penalty for opening a gap (insertion or deletion). + type: int? + global: + label: global + doc: | + If alignment is global (Needleman-Wunsch) rather than local (Smith-Waterman). + type: boolean? + hard_clips: + label: hard clips + doc: | + Flags for hard clipping: [0] primary, [1] supplementary, [2] secondary. + The hard-clips option is used as a field of 3 bits, with values ranging from 0 to 7. + The bits specify alignments, as follows: + * Bit 0—primary alignments + * Bit 1—supplementary alignments + * Bit 2—secondary alignments + Each bit determines whether local alignments of that type are reported with hard clipping (1) + or soft clipping (0). + The default is 6, meaning primary alignments use soft clipping and supplementary and + secondary alignments use hard clipping. + type: int? + map_orientations: + label: map orientations + doc: | + Constrain orientations to accept forward-only, reverse-complement only, or any alignments. + type: + - "null" + - type: enum + symbols: + - "0" # (any) + - "1" # (forward only) + - "2" # (reverse only) + mapq_max: + label: mapq max + doc: | + Ceiling on reported MAPQ. Max 255 + type: int? + mapq_strict_js: + label: mapq strict js + doc: | + Specific to RNA. When set to 0, a higher MAPQ value is returned, expressing confidence that the alignment is at least partially correct. When set to 1, a lower MAPQ value is returned, expressing the splice junction ambiguity. + type: boolean? + match_n_score: + label: match n score + doc: | + (signed) Score increment for matching a reference 'N' nucleotide IUB code. + Range: -16 to 15 + type: int? + match_score: + label: match score + doc: | + Score increment for matching reference nucleotide. + When global = 0, match-score > 0; When global = 1, match-score >= 0 + type: float? + max_rescues: + label: max rescues + doc: | + Maximum rescue alignments per read pair. Default is 10 + type: int? + min_score_coeff: + label: min score coeff + doc: | + Adjustment to aln-min-score per read base. + Range: -64 to 63.999 + type: float? + mismatch_pen: + label: mismatch pen + doc: | + Score penalty for a mismatch. + type: int? + no_unclip_score: + label: no unclip score + doc: | + When no-unclip-score is set to 1, any unclipped bonus (unclip-score) contributing to an alignment is removed from the alignment score before further processing. + type: boolean? + no_unpaired: + label: no unpaired + doc: | + If only properly paired alignments should be reported for paired reads. + type: boolean? + pe_max_penalty: + label: pe max penalty + doc: | + Maximum pairing score penalty, for unpaired or distant ends. + Range: 0-255 + type: int? + pe_orientation: + label: pe orientation + doc: | + Expected paired-end orientation: 0=FR, 1=RF, 2=FF. + type: + - "null" + - type: enum + symbols: + - "0" # FR + - "1" # RF + - "2" # FF + rescue_sigmas: + label: rescue sigmas + doc: | + Deviations from the mean read length used for rescue scan radius. Default is 2.5. + type: float? + sec_aligns: + label: sec aligns + doc: | + Maximum secondary (suboptimal) alignments to report per read. + Range: 0 - 30 + type: int? + sec_aligns_hard: + label: sec aligns hard + doc: | + Set to force unmapped when not all secondary alignments can be output. + type: boolean? + sec_phred_delta: + label: sec phred delta + doc: | + Only secondary alignments with likelihood within this Phred of the primary are reported. + Range: 0 - 255 + type: int? + sec_score_delta: + label: sec score delta + doc: | + Secondary aligns allowed with pair score no more than this far below primary. + type: float? + supp_aligns: + label: supp aligns + doc: | + Maximum supplementary (chimeric) alignments to report per read. + type: int? + supp_as_sec: + label: supp as sec + doc: | + If supplementary alignments should be reported with secondary flag. + type: boolean? + supp_min_score_adj: + label: supp min score adj + doc: | + Amount to increase minimum alignment score for supplementary alignments. + This score is computed by host software as "8 * match-score" for DNA, and is default 0 for RNA. + type: float? + unclip_score: + label: unclip score + doc: | + Score bonus for reaching each edge of the read. + Range: 0 - 127 + type: int? + unpaired_pen: + label: unpaired pen + doc: | + Penalty for unpaired alignments in Phred scale. + Range: 0 - 255 + type: int? + ### End Alignment options ### + ### Start General software options + # Alt aware mapping + alt_aware: + label: alt aware + doc: | + Enables special processing for alt contigs, if alt liftover was used in hash table. + Enabled by default if reference was built with liftover. + type: boolean? + # Duplicate marking + enable_duplicate_marking: + label: enable duplicate marking + doc: | + Enable the flagging of duplicate output alignment records. + type: boolean? + remove_duplicates: + label: remove duplicates + doc: | + If true, remove duplicate alignment records instead of just flagging them. + type: boolean? + # Tag generation + generate_md_tags: + label: generate md tags + doc: | + Whether to generate MD tags with alignment output records. Default is false. + type: boolean? + generate_sa_tags: + label: generate sa tags + doc: | + Whether to generate SA:Z tags for records that have chimeric/supplemental alignments. + type: boolean? + generate_zs_tags: + label: generate zs tags + doc: | + Whether to generate ZS tags for alignment output records. Default is false. + type: boolean? + # Sorting logic + enable_sort: + label: enable sort + doc: | + Enable sorting after mapping/alignment. + type: boolean? + preserve_map_align_order: + label: preserve map align order + doc: | + Produce output file that preserves original order of reads in the input file. + type: boolean? + # Verbosity + verbose: + label: verbose + doc: | + Enable verbose output from DRAGEN. + type: boolean? + + +steps: + # Run Dragen + run_dragen_alignment_step: + label: run dragen alignment step + doc: | + Runs the alignment step on a dragen fpga + Takes in a fastq list and corresponding mount paths from the predefined mount paths + All other options available at the top of the workflow + in: + fastq_list: + source: fastq_list + fastq_list_rows: + source: fastq_list_rows + reference_tar: + source: reference_tar + enable_rna: + source: enable_rna + enable_rrna_filter: + source: enable_rrna_filter + enable_rna_quantification: + source: enable_rna_quantification + annotation_file: + source: annotation_file + output_file_prefix: + source: output_prefix + output_directory: + source: output_prefix + valueFrom: "$(self)_dragen_alignment" + ann_sj_max_indel: + source: ann_sj_max_indel + edit_chain_limit: + source: edit_chain_limit + edit_mode: + source: edit_mode + edit_read_len: + source: edit_read_len + edit_seed_num: + source: edit_seed_num + enable_map_align: + source: enable_map_align + enable_map_align_output: + source: enable_map_align_output + max_intron_bases: + source: max_intron_bases + min_intron_bases: + source: min_intron_bases + seed_density: + source: seed_density + aln_min_score: + source: aln_min_score + dedup_min_qual: + source: dedup_min_qual + en_alt_hap_aln: + source: en_alt_hap_aln + en_chimeric_aln: + source: en_chimeric_aln + gap_ext_pen: + source: gap_ext_pen + gap_open_pen: + source: gap_open_pen + global: + source: global + hard_clips: + source: hard_clips + map_orientations: + source: map_orientations + mapq_max: + source: mapq_max + mapq_strict_js: + source: mapq_strict_js + match_n_score: + source: match_n_score + match_score: + source: match_score + max_rescues: + source: max_rescues + min_score_coeff: + source: min_score_coeff + mismatch_pen: + source: mismatch_pen + no_unclip_score: + source: no_unclip_score + no_unpaired: + source: no_unpaired + pe_max_penalty: + source: pe_max_penalty + pe_orientation: + source: pe_orientation + rescue_sigmas: + source: rescue_sigmas + sec_aligns: + source: sec_aligns + sec_aligns_hard: + source: sec_aligns_hard + sec_phred_delta: + source: sec_phred_delta + sec_score_delta: + source: sec_score_delta + supp_aligns: + source: supp_aligns + supp_as_sec: + source: supp_as_sec + supp_min_score_adj: + source: supp_min_score_adj + unclip_score: + source: unclip_score + unpaired_pen: + source: unpaired_pen + alt_aware: + source: alt_aware + enable_duplicate_marking: + source: enable_duplicate_marking + remove_duplicates: + source: remove_duplicates + generate_md_tags: + source: generate_md_tags + generate_sa_tags: + source: generate_sa_tags + generate_zs_tags: + source: generate_zs_tags + enable_sort: + source: enable_sort + preserve_map_align_order: + source: preserve_map_align_order + verbose: + source: verbose + out: + - id: dragen_alignment_output_directory + - id: dragen_bam_out + run: ../../../tools/dragen-alignment/4.3.6/dragen-alignment__4.3.6.cwl + + # Create a Dragen specific QC report + dragen_qc_step: + label: dragen qc step + doc: | + The dragen qc step - this takes in an array of dirs + in: + input_directories: + source: run_dragen_alignment_step/dragen_alignment_output_directory + valueFrom: | + ${ + return [self]; + } + output_directory_name: + source: output_prefix + valueFrom: "$(self)_dragen_alignment_multiqc" + output_filename: + source: output_prefix + valueFrom: "$(self)_dragen_alignment_multiqc.html" + title: + source: output_prefix + valueFrom: "UMCCR MultiQC Dragen Alignment Report for $(self)" + out: + - id: output_directory + run: ../../../tools/multiqc/1.25.1/multiqc__1.25.1.cwl + +outputs: + # All output files will be under the output directory + dragen_alignment_output_directory: + label: dragen alignment output directory + doc: | + The output directory containing all alignment output files and qc metrics + type: Directory + outputSource: run_dragen_alignment_step/dragen_alignment_output_directory + # Whilst these files reside inside the output directory, specifying them here as outputs + # provides easier access and reference + dragen_bam_out: + label: dragen bam out + doc: | + The output alignment file + type: File + outputSource: run_dragen_alignment_step/dragen_bam_out + secondaryFiles: + - ".bai" + #multiQC output + multiqc_output_directory: + label: dragen QC report out + doc: | + The dragen multiQC output + type: Directory + outputSource: dragen_qc_step/output_directory diff --git a/workflows/dragen-germline-pipeline/4.3.6/dragen-germline-pipeline__4.3.6.cwl b/workflows/dragen-germline-pipeline/4.3.6/dragen-germline-pipeline__4.3.6.cwl new file mode 100644 index 00000000..b88d6d46 --- /dev/null +++ b/workflows/dragen-germline-pipeline/4.3.6/dragen-germline-pipeline__4.3.6.cwl @@ -0,0 +1,723 @@ +cwlVersion: v1.1 +class: Workflow + +# Extensions +$namespaces: + s: https://schema.org/ +$schemas: + - https://schema.org/version/latest/schemaorg-current-http.rdf + +# Metadata +s:author: + class: s:Person + s:name: Alexis Lucattini + s:email: Alexis.Lucattini@umccr.org + +# ID/Docs +id: dragen-germline-pipeline--4.3.6 +label: dragen-germline-pipeline v(4.3.6) +doc: | + Documentation for dragen-germline-pipeline v4.3.6 + +requirements: + InlineJavascriptRequirement: {} + ScatterFeatureRequirement: {} + MultipleInputFeatureRequirement: {} + StepInputExpressionRequirement: {} + SchemaDefRequirement: + types: + - $import: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml + +inputs: + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/OptionReference.htm + # Inputs fastq list csv or actual fastq list file with presigned urls for Read1File and Read2File columns + # File inputs + # Option 1: + fastq_list: + label: fastq list + doc: | + CSV file that contains a list of FASTQ files + to process. + Read1File and Read2File may be presigned urls or use this in conjunction with + the fastq_list_mount_paths inputs. + type: File? + # Option 2: + fastq_list_rows: + label: fastq list rows + doc: | + Alternative to providing a file, one can instead provide a list of 'fastq-list-row' objects + type: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml#fastq-list-row[]? + # Option 3 + bam_input: + label: bam input + doc: | + Input a normal BAM file for the variant calling stage + type: File? + secondaryFiles: + - pattern: ".bai" + required: true + # Option 4 + cram_input: + label: cram input + doc: | + Input a normal CRAM file for the variant calling stage + type: File? + cram_reference: + label: cram reference + doc: | + Path to the reference fasta file for the CRAM input. + Required only if the input is a cram file AND not the reference in the tarball + type: File? + reference_tar: + label: reference tar + doc: | + Path to ref data tarball + type: File + + # Output naming options + output_prefix: + label: output prefix + doc: | + The prefix given to all output files + type: string + output_format : + label: output format + doc: | + For mapping and aligning, the output is sorted and compressed into BAM format by default before saving to disk. + You can control the output format from the map/align stage with the --output-format option. + type: + - "null" + - type: enum + symbols: + - SAM + - BAM + - CRAM + + # Optional operation modes + # Given we're running from fastqs + # --enable-variant-caller option must be set to true (set in arguments), --enable-map-align is then activated by default + # --enable-map-align-output to keep bams + # --enable-duplicate-marking to mark duplicate reads at the same time + # --enable-sv to enable the structural variant calling step. + enable_map_align: + label: enable map align + doc: | + Enabled by default since --enable-variant-caller option is set to true. + Set this value to false if using bam_input + type: boolean? + enable_map_align_output: + label: enable map align output + doc: | + Do you wish to have the output bam files present + type: boolean? + enable_duplicate_marking: + label: enable duplicate marking + doc: | + Mark identical alignments as duplicates + type: boolean? + dedup_min_qual: + label: deduplicate minimum quality + doc: | + Specifies the Phred quality score below which a base should be excluded from the quality score + calculation used for choosing among duplicate reads. + type: int? + enable_pgx: + label: enable pgx + doc: | + Enable star allele caller. This also turns on other PGx callers such as CYP2D6, CYP2B6 + type: boolean? + enable_targeted: + label: enable targeted + doc: | + Enable targeted variant calling for repetitive regions + type: boolean? + + # Structural Variant Caller Options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/StructuralVariantCalling.htm + enable_sv: + label: enable sv + doc: | + Enable/disable structural variant + caller. Default is false. + type: boolean? + # Structural Variant Caller Options + sv_call_regions_bed: + label: sv call regions bed + doc: | + Specifies a BED file containing the set of regions to call. + type: File? + sv_region: + label: sv region + doc: | + Limit the analysis to a specified region of the genome for debugging purposes. + This option can be specified multiple times to build a list of regions. + The value must be in the format "chr:startPos-endPos".. + type: string? + sv_exome: + label: sv exome + doc: | + Set to true to configure the variant caller for targeted sequencing inputs, + which includes disabling high depth filters. + In integrated mode, the default is to autodetect targeted sequencing input, + and in standalone mode the default is false. + type: boolean? + sv_output_contigs: + label: sv output contigs + doc: | + Set to true to have assembled contig sequences output in a VCF file. The default is false. + type: boolean? + sv_forcegt_vcf: + label: sv forcegt vcf + doc: | + Specify a VCF of structural variants for forced genotyping. The variants are scored and emitted + in the output VCF even if not found in the sample data. + The variants are merged with any additional variants discovered directly from the sample data. + type: File? + sv_discovery: + label: sv discovery + doc: | + Enable SV discovery. This flag can be set to false only when --sv-forcegt-vcf is used. + When set to false, SV discovery is disabled and only the forced genotyping input variants + are processed. The default is true. + type: boolean? + sv_se_overlap_pair_evidence: + label: sv use overlap pair evidence + doc: | + Allow overlapping read pairs to be considered as evidence. + By default, DRAGEN uses autodetect on the fraction of overlapping read pairs if <20%. + type: boolean? + sv_enable_liquid_tumor_mode: + label: sv enable liquid tumor mode + doc: | + Enable liquid tumor mode. + type: boolean? + sv_tin_contam_tolerance: + label: sv tin contam tolerance + doc: | + Set the Tumor-in-Normal (TiN) contamination tolerance level. + You can enter any value between 0-1. The default maximum TiN contamination tolerance is 0.15. + type: float? + + # Variant calling options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/SmallVariantCaller.htm + vc_target_bed: + label: vc target bed + doc: | + This is an optional command line input that restricts processing of the small variant caller, + target bed related coverage, and callability metrics to regions specified in a BED file. + type: File? + vc_target_bed_padding: + label: vc target bed padding + doc: | + This is an optional command line input that can be used to pad all of the target + BED regions with the specified value. + For example, if a BED region is 1:1000-2000 and a padding value of 100 is used, + it is equivalent to using a BED region of 1:900-2100 and a padding value of 0. + + Any padding added to --vc-target-bed-padding is used by the small variant caller + and by the target bed coverage/callability reports. The default padding is 0. + type: int? + vc_target_coverage: + label: vc target coverage + doc: | + The --vc-target-coverage option specifies the target coverage for down-sampling. + The default value is 500 for germline mode and 50 for somatic mode. + type: int? + vc_enable_gatk_acceleration: + label: vc enable gatk acceleration + doc: | + If is set to true, the variant caller runs in GATK mode + (concordant with GATK 3.7 in germline mode and GATK 4.0 in somatic mode). + type: boolean? + vc_remove_all_soft_clips: + label: vc remove all soft clips + doc: | + If is set to true, the variant caller does not use soft clips of reads to determine variants. + type: boolean? + vc_decoy_contigs: + label: vc decoy contigs + doc: | + The --vc-decoy-contigs option specifies a comma-separated list of contigs to skip during variant calling. + This option can be set in the configuration file. + type: string? + vc_enable_decoy_contigs: + label: vc enable decoy contigs + doc: | + If --vc-enable-decoy-contigs is set to true, variant calls on the decoy contigs are enabled. + The default value is false. + type: boolean? + vc_enable_phasing: + label: vc enable phasing + doc: | + The -vc-enable-phasing option enables variants to be phased when possible. The default value is true. + type: boolean? + vc_enable_vcf_output: + label: vc enable vcf output + doc: | + The -vc-enable-vcf-output option enables VCF file output during a gVCF run. The default value is false. + type: boolean? + vc_emit_ref_confidence: + label: vc emit ref confidence + doc: | + A genomic VCF (gVCF) file contains information on variants and positions determined to be homozygous to the reference genome. + For homozygous regions, the gVCF file includes statistics that indicate how well reads support the absence of variants or + alternative alleles. To enable gVCF output, set to GVCF. By default, contiguous runs of homozygous reference calls with similar + scores are collapsed into blocks (hom-ref blocks). Hom-ref blocks save disk space and processing time of downstream analysis tools. + DRAGEN recommends using the default mode. To produce unbanded output, set --vc-emit-ref-confidence to BP_RESOLUTION. + type: string? + vc_ml_enable_recalibration: + label: vc ml enable recalibration + doc: | + DRAGEN employs machine learning-based variant recalibration (DRAGEN-ML) for germline SNV VC. + Variant calling accuracy is improved using powerful and efficient machine learning techniques that augment the variant caller, + by exploiting more of the available read and context information that does not easily integrate into the Bayesian processing + used by the haplotype variant caller. + type: boolean? + + # Sex chromosome mosaic variants options + vc_enable_sex_chr_diploid: + label: vc enable sex chr diploid + doc: | + For male samples in germline calling mode, DRAGEN calls potential mosaic variants in non-PAR regions of sex chromosomes. + A variant is called as mosaic when the allele frequency (FORMAT/AF) is below 85% or if multiple alt alleles are called, + suggesting incompatibility with the haploid assumption. The GT field for bi-allelic mosaic variants is "0/1", + denoting a mixture of reference and alt alleles, as opposed to the regular GT of "1" for haploid variants. + The GT field for multi-allelic mosaic variants is "1/2" in VCF. + You can disable the calling of mosaic variants by setting --vc-enable-sex-chr-diploid to false. + type: boolean? + + vc_haploid_call_af_threshold: + label: vc haploid call af threshold + doc: | + Option --vc-haploid-call-af-threshold= to control threshold. + * Diploid model is applied to haploid (chrX/Y, non-PAR) regions in male samples. + * Variants with only one alt allele and with AF>=85% are rewritten to haploid calls. + * The potential mosaic calls with AF<85% will have GT of "0/1" and an INFO tag of + "MOSAIC" will be added. + type: float? + + # Downsampling options + vc_max_reads_per_active_region: + label: vc max reads per active region + doc: | + specifies the maximum number of reads covering a given active region. + Default is 10000 for the germline workflow + type: int? + vc_max_reads_per_raw_region: + label: vc max reads per raw region + doc: | + specifies the maximum number of reads covering a given raw region. + Default is 30000 for the germline workflow + type: int? + + # Ploidy support + sample_sex: + label: sample sex + doc: | + Specifies the sex of a sample + type: + - "null" + - type: enum + symbols: + - male + - female + # ROH options + vc_enable_roh: + label: vc enable roh + doc: | + Enable or disable the ROH caller by setting this option to true or false. Enabled by default for human autosomes only. + type: boolean? + vc_roh_blacklist_bed: + label: vc roh blacklist bed + doc: | + If provided, the ROH caller ignores variants that are contained in any region in the blacklist BED file. + DRAGEN distributes blacklist files for all popular human genomes and automatically selects a blacklist to + match the genome in use, unless this option is used explicitly select a file. + type: File? + + # BAF options + vc_enable_baf: + label: vc enable baf + doc: | + Enable or disable B-allele frequency output. Enabled by default. + type: boolean? + + # Germline variant small hard filtering options + vc_hard_filter: + label: vc hard fitler + doc: | + DRAGEN provides post-VCF variant filtering based on annotations present in the VCF records. + However, due to the nature of DRAGEN's algorithms, which incorporate the hypothesis of correlated errors + from within the core of variant caller, the pipeline has improved capabilities in distinguishing + the true variants from noise, and therefore the dependency on post-VCF filtering is substantially reduced. + For this reason, the default post-VCF filtering in DRAGEN is very simple + type: string? + + # dbSNP annotation + dbsnp_annotation: + label: dbsnp annotation + doc: | + In Germline, Tumor-Normal somatic, or Tumor-Only somatic modes, + DRAGEN can look up variant calls in a dbSNP database and add annotations for any matches that it finds there. + To enable the dbSNP database search, set the --dbsnp option to the full path to the dbSNP database + VCF or .vcf.gz file, which must be sorted in reference order. + type: File? + secondaryFiles: + - pattern: ".tbi" + required: true + + # Repeat expansion calling + repeat_genotype_enable: + label: repeat genotype enable + doc: | + Enable DRAGEN repeat expansion detection + type: boolean? + repeat_genotype_use_catalog: + label: repeat genotype use catalog + doc: | + The repeat-specification (also called variant catalog) JSON file defines the repeat regions for ExpansionHunter to analyze. + Default repeat-specification for some pathogenic and polymorphic repeats are in the /opt/edico/repeat-specs/ directory, + based on the reference genome used with DRAGEN. Users can choose between any of the three default repeat-specification files + packaged with DRAGEN using + type: + - "null" + - type: enum + symbols: + - default + - default_plus_smn + - expanded + repeat_genotype_specs: + label: repeat genotype specs + doc: | + Specifies the full path to the JSON file that contains the repeat variant catalog (specification) describing the loci to call. + --repeat-genotype-specs is required for ExpansionHunter. + If the option is not provided, + DRAGEN attempts to autodetect the applicable catalog file from /opt/edico/repeat-specs/ based on the reference provided. + type: + - "null" + - File + - string + + # Force genotyping + vc_forcegt_vcf: + label: vc forcegt vcf + doc: | + AGENsupports force genotyping (ForceGT) for Germline SNV variant calling. + To use ForceGT, use the --vc-forcegt-vcf option with a list of small variants to force genotype. + The input list of small variants can be a .vcf or .vcf.gz file. + + The current limitations of ForceGT are as follows: + * ForceGT is supported for Germline SNV variant calling in the V3 mode. + The V1, V2, and V2+ modes are not supported. + * ForceGT is not supported for Somatic SNV variant calling. + * ForceGT variants do not propagate through Joint Genotyping. + type: File? + secondaryFiles: + - pattern: ".tbi" + required: true + + # cnv pipeline - with this we must also specify one of --cnv-normal-b-allele-vcf, + # More info at https://support-docs.illumina.com/SW/DRAGEN_v39/Content/SW/DRAGEN/CNVExamples_fDG_dtREF.htm?Highlight=cnv-normal-b-allele-vcf + enable_cnv: + label: enable cnv calling + doc: | + Enable CNV processing in the DRAGEN Host Software. + type: boolean? + cnv_enable_self_normalization: + label: cnv enable self normalization + doc: | + Enable CNV self normalization. + Self Normalization requires that the DRAGEN hash table be generated with the enable-cnv=true option. + type: boolean? + + # QC options + qc_coverage_region_1: + label: qc coverage region 1 + doc: | + Generates coverage region report using bed file 1. + type: File? + qc_coverage_region_2: + label: qc coverage region 2 + doc: | + Generates coverage region report using bed file 2. + type: File? + qc_coverage_region_3: + label: qc coverage region 3 + doc: | + Generates coverage region report using bed file 3. + type: File? + qc_coverage_ignore_overlaps: + label: qc coverage ignore overlaps + doc: | + Set to true to resolve all of the alignments for each fragment and avoid double-counting any + overlapping bases. This might result in marginally longer run times. + This option also requires setting --enable-map-align=true. + type: boolean? + + # HLA calling + enable_hla: + label: enable hla + doc: | + Enable HLA typing by setting --enable-hla flag to true + type: boolean? + hla_bed_file: + label: hla bed file + doc: | + Use the HLA region BED input file to specify the region to extract HLA reads from. + DRAGEN HLA Caller parses the input file for regions within the BED file, and then + extracts reads accordingly to align with the HLA allele reference. + type: File? + hla_reference_file: + label: hla reference file + doc: | + Use the HLA allele reference file to specify the reference alleles to align against. + The input HLA reference file must be in FASTA format and contain the protein sequence separated into exons. + If --hla-reference-file is not specified, DRAGEN uses hla_classI_ref_freq.fasta from /opt/edico/config/. + The reference HLA sequences are obtained from the IMGT/HLA database. + type: File? + hla_allele_frequency_file: + label: hla allele frequency file + doc: | + Use the population-level HLA allele frequency file to break ties if one or more HLA allele produces the same or similar results. + The input HLA allele frequency file must be in CSV format and contain the HLA alleles and the occurrence frequency in population. + If --hla-allele-frequency-file is not specified, DRAGEN automatically uses hla_classI_allele_frequency.csv from /opt/edico/config/. + Population-level allele frequencies can be obtained from the Allele Frequency Net database. + type: File? + hla_tiebreaker_threshold: + label: hla tiebreaker threshold + doc: | + If more than one allele has a similar number of reads aligned and there is not a clear indicator for the best allele, + the alleles are considered as ties. The HLA Caller places the tied alleles into a candidate set for tie breaking based + on the population allele frequency. If an allele has more than the specified fraction of reads aligned (normalized to + the top hit), then the allele is included into the candidate set for tie breaking. The default value is 0.97. + type: float? + hla_zygosity_threshold: + label: hla zygosity threshold + doc: | + If the minor allele at a given locus has fewer reads mapped than a fraction of the read count of the major allele, + then the HLA Caller infers homozygosity for the given HLA-I gene. You can use this option to specify the fraction value. + The default value is 0.15. + type: float? + hla_min_reads: + label: hla min reads + doc: | + Set the minimum number of reads to align to HLA alleles to ensure sufficient coverage and perform HLA typing. + The default value is 1000 and suggested for WES samples. If using samples with less coverage, you can use a + lower threshold value. + type: int? + + # Miscellaneous options + lic_instance_id_location: + label: license instance id location + doc: | + You may wish to place your own in. + Optional value, default set to /opt/instance-identity + which is a path inside the dragen container + type: + - File? + - string? + default: "/opt/instance-identity" + +steps: + # Run dragen germline workflow + run_dragen_germline_step: + label: run dragen germline step + doc: | + Runs the dragen germline workflow on the FPGA. + Takes in either a fastq list as a file or a fastq_list_rows schema object + in: + fastq_list_rows: + source: fastq_list_rows + fastq_list: + source: fastq_list + bam_input: + source: bam_input + cram_input: + source: cram_input + cram_reference: + source: cram_reference + reference_tar: + source: reference_tar + output_file_prefix: + source: output_prefix + output_directory: + source: output_prefix + valueFrom: "$(self)_dragen_germline" + output_format: + source: output_format + enable_map_align_output: + source: enable_map_align_output + enable_duplicate_marking: + source: enable_duplicate_marking + dedup_min_qual: + source: dedup_min_qual + enable_pgx: + source: enable_pgx + enable_targeted: + source: enable_targeted + vc_target_bed: + source: vc_target_bed + vc_target_bed_padding: + source: vc_target_bed_padding + vc_target_coverage: + source: vc_target_coverage + vc_enable_gatk_acceleration: + source: vc_enable_gatk_acceleration + vc_remove_all_soft_clips: + source: vc_remove_all_soft_clips + vc_decoy_contigs: + source: vc_decoy_contigs + vc_enable_decoy_contigs: + source: vc_enable_decoy_contigs + vc_enable_phasing: + source: vc_enable_phasing + vc_enable_vcf_output: + source: vc_enable_vcf_output + vc_emit_ref_confidence: + source: vc_emit_ref_confidence + vc_ml_enable_recalibration: + source: vc_ml_enable_recalibration + vc_enable_sex_chr_diploid: + source: vc_enable_sex_chr_diploid + vc_haploid_call_af_threshold: + source: vc_haploid_call_af_threshold + vc_max_reads_per_active_region: + source: vc_max_reads_per_active_region + vc_max_reads_per_raw_region: + source: vc_max_reads_per_raw_region + sample_sex: + source: sample_sex + vc_enable_roh: + source: vc_enable_roh + vc_roh_blacklist_bed: + source: vc_roh_blacklist_bed + vc_enable_baf: + source: vc_enable_baf + vc_hard_filter: + source: vc_hard_filter + + # Structural Variant Caller Options + enable_sv: + source: enable_sv + sv_call_regions_bed: + source: sv_call_regions_bed + sv_region: + source: sv_region + sv_exome: + source: sv_exome + sv_output_contigs: + source: sv_output_contigs + sv_forcegt_vcf: + source: sv_forcegt_vcf + sv_discovery: + source: sv_discovery + sv_se_overlap_pair_evidence: + source: sv_se_overlap_pair_evidence + sv_enable_liquid_tumor_mode: + source: sv_enable_liquid_tumor_mode + sv_tin_contam_tolerance: + source: sv_tin_contam_tolerance + dbsnp_annotation: + source: dbsnp_annotation + # repeat genotype options + repeat_genotype_enable: + source: repeat_genotype_enable + repeat_genotype_use_catalog: + source: repeat_genotype_use_catalog + repeat_genotype_specs: + source: repeat_genotype_specs + #cnv options + enable_cnv: + source: enable_cnv + cnv_enable_self_normalization: + source: cnv_enable_self_normalization + #qc options + qc_coverage_region_1: + source: qc_coverage_region_1 + qc_coverage_region_2: + source: qc_coverage_region_2 + qc_coverage_region_3: + source: qc_coverage_region_3 + qc_coverage_ignore_overlaps: + source: qc_coverage_ignore_overlaps + #hla + enable_hla: + source: enable_hla + hla_bed_file: + source: hla_bed_file + hla_reference_file: + source: hla_reference_file + hla_allele_frequency_file: + source: hla_allele_frequency_file + hla_tiebreaker_threshold: + source: hla_tiebreaker_threshold + hla_zygosity_threshold: + source: hla_zygosity_threshold + hla_min_reads: + source: hla_min_reads + lic_instance_id_location: + source: lic_instance_id_location + out: + - id: dragen_germline_output_directory + - id: dragen_bam_out + - id: dragen_vcf_out + run: ../../../tools/dragen-germline/4.3.6/dragen-germline__4.3.6.cwl + + # Run the qc step + dragen_qc_step: + label: dragen qc step + doc: | + The dragen qc step - this takes in an array of dirs + in: + input_directories: + source: run_dragen_germline_step/dragen_germline_output_directory + valueFrom: | + ${ + return [self]; + } + output_directory_name: + source: output_prefix + valueFrom: "$(self)_dragen_germline_multiqc" + output_filename: + source: output_prefix + valueFrom: "$(self)_dragen_germline_multiqc.html" + title: + source: output_prefix + valueFrom: "UMCCR MultiQC Dragen Germline Report for $(self)" + out: + - id: output_directory + run: ../../../tools/multiqc/1.25.1/multiqc__1.25.1.cwl + +outputs: + dragen_germline_output_directory: + label: dragen germline output directory + doc: | + The output directory containing all germline output files + type: Directory + outputSource: run_dragen_germline_step/dragen_germline_output_directory + # provides easier access and reference + # Only exists if --enable-map-align-output is set to true# + dragen_bam_out: + label: dragen bam out + doc: | + The output bam file, exists only if --enable-map-align-output is set to true + type: File? + secondaryFiles: + - ".bai" + outputSource: run_dragen_germline_step/dragen_bam_out + # Should always be available as an output + dragen_vcf_out: + label: dragen vcf out + doc: | + The output germline vcf file + type: File? + secondaryFiles: + - ".tbi" + outputSource: run_dragen_germline_step/dragen_vcf_out + # The multiqc output directory + multiqc_output_directory: + label: multiqc output directory + doc: | + The output directory for multiqc + type: Directory + outputSource: dragen_qc_step/output_directory diff --git a/workflows/dragen-somatic-with-germline-pipeline/4.3.6/dragen-somatic-with-germline-pipeline__4.3.6.cwl b/workflows/dragen-somatic-with-germline-pipeline/4.3.6/dragen-somatic-with-germline-pipeline__4.3.6.cwl new file mode 100644 index 00000000..fce34645 --- /dev/null +++ b/workflows/dragen-somatic-with-germline-pipeline/4.3.6/dragen-somatic-with-germline-pipeline__4.3.6.cwl @@ -0,0 +1,1483 @@ +cwlVersion: v1.1 +class: Workflow + +# Extensions +$namespaces: + s: https://schema.org/ +$schemas: + - https://schema.org/version/latest/schemaorg-current-http.rdf + +# Metadata +s:author: + class: s:Person + s:name: Alexis Lucattini + s:email: Alexis.Lucattini@umccr.org + s:identifier: https://orcid.org/0000-0001-9754-647X + +# ID/Docs +id: dragen-somatic-with-germline-pipeline--4.3.6 +label: dragen-somatic-with-germline-pipeline v(4.3.6) +doc: | + Documentation for dragen-somatic-with-germline-pipeline + v4.3.6 + +requirements: + InlineJavascriptRequirement: + expressionLib: + - $include: ../../../typescript-expressions/utils/1.0.0/utils__1.0.0.cwljs + ScatterFeatureRequirement: {} + MultipleInputFeatureRequirement: {} + StepInputExpressionRequirement: {} + SchemaDefRequirement: + types: + - $import: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml + +inputs: + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/OptionReference.htm + # Inputs fastq list csv or actual fastq list file with presigned urls for Read1File and Read2File columns + # File inputs + # Option 1 + fastq_list: + label: fastq list + doc: | + CSV file that contains a list of FASTQ files for normal sample + to process. + type: File? + tumor_fastq_list: + label: tumor fastq list + doc: | + CSV file that contains a list of FASTQ files + to process. + type: File? + # Option 2 + fastq_list_rows: + label: Row of fastq lists + doc: | + The row of fastq lists. + Each row has the following attributes: + * RGID + * RGLB + * RGSM + * Lane + * Read1File + * Read2File (optional) + type: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml#fastq-list-row[]? + tumor_fastq_list_rows: + label: Row of fastq lists + doc: | + The row of fastq lists. + Each row has the following attributes: + * RGID + * RGLB + * RGSM + * Lane + * Read1File + * Read2File (optional) + type: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml#fastq-list-row[]? + # Option 3 + bam_input: + label: bam input + doc: | + Input a normal BAM file for the variant calling stage + type: File? + secondaryFiles: + - pattern: ".bai" + required: true + tumor_bam_input: + label: tumor bam input + doc: | + Input a tumor BAM file for the variant calling stage + type: File? + secondaryFiles: + - pattern: ".bai" + required: true + # Option 4 + cram_input: + label: cram input + doc: | + Input a normal CRAM file for the variant calling stage + type: File? + tumor_cram_input: + label: tumor cram input + doc: | + Input a tumor CRAM file for the variant calling stage + type: File? + cram_reference: + label: cram reference + doc: | + Path to the reference fasta file for the CRAM input. + Required only if the input is a cram file AND not the reference in the tarball + type: File? + # Add reference tar + reference_tar: + label: reference tar + doc: | + Path to ref data tarball + type: File + + # Output naming options + # Germline + output_prefix_germline: + label: output prefix germline + doc: | + The prefix given to all outputs for the dragen germline pipeline + type: string + # Somatic + output_prefix_somatic: + label: output prefix somatic + doc: | + The prefix given to all outputs for the dragen somatic pipeline + type: string + + # Optional operation modes + # Given we're running from fastqs + # --enable-variant-caller option must be set to true (set in arguments), --enable-map-align is then activated by default + # --enable-map-align-output to keep bams + # --enable-duplicate-marking to mark duplicate reads at the same time + # --enable-sv to enable the structural variant calling step. + # For the following inputs we also allow splitting options between somatic and germline outputs + # --enable-sort + # --enable-map-align + # --enable-map-align-output + # --enable-duplicate-marking + # --dedup-min-qual + enable_sort: + label: enable sort + doc: | + True by default, only set this to false if using --bam-input parameter + type: boolean? + enable_sort_germline: + label: enable sort germline + doc: | + True by default, only set this to false if using --bam-input parameter + type: boolean? + enable_sort_somatic: + label: enable sort somatic + doc: | + True by default, only set this to false if using --bam-input parameter + type: boolean? + enable_map_align: + label: enable map align + doc: | + Enabled by default since --enable-variant-caller option is set to true. + Set this value to false if using bam_input + type: boolean? + enable_map_align_germline: + label: enable map align germline + doc: | + Enabled by default since --enable-variant-caller option is set to true. + Set this value to false if using bam_input + type: boolean? + enable_map_align_somatic: + label: enable map align somatic + doc: | + Enabled by default since --enable-variant-caller option is set to true. + Set this value to false if using bam_input + type: boolean? + enable_map_align_output: + label: enable map align output + doc: | + Enables saving the output from the + map/align stage. Default is true when only + running map/align. Default is false if + running the variant caller. + type: boolean? + enable_map_align_output_germline: + label: enable map align output germline + doc: | + Enables saving the output from the + map/align stage. Default is true when only + running map/align. Default is false if + running the variant caller. + type: boolean? + enable_map_align_output_somatic: + label: enable map align output somatic + doc: | + Enables saving the output from the + map/align stage. Default is true when only + running map/align. Default is false if + running the variant caller. + type: boolean? + enable_duplicate_marking: + label: enable duplicate marking + doc: | + Enable the flagging of duplicate output + alignment records. + type: boolean? + enable_duplicate_marking_germline: + label: enable duplicate marking germline + doc: | + Enable the flagging of duplicate output + alignment records. + type: boolean? + enable_duplicate_marking_somatic: + label: enable duplicate marking somatic + doc: | + Enable the flagging of duplicate output + alignment records. + type: boolean? + enable_sv: + label: enable sv + doc: | + Enable/disable structural variant + caller. Default is false. + type: boolean? + enable_sv_germline: + label: enable sv germline + doc: | + Enable/disable structural variant + caller. Default is false. + type: boolean? + enable_sv_somatic: + label: enable sv somatic + doc: | + Enable/disable structural variant + caller. Default is false. + type: boolean? + + # Phased / MNV Calling options + vc_combine_phased_variants_distance_somatic: + label: vc combine phased variants distance somatic + doc: | + When the specified value is greater than 0, combines all phased variants in the phasing set that have a distance + less than or equal to the provided value. The max allowed phasing distance is 15. + The default value is 0, which disables the option. + type: int? + vc_combine_phased_variants_max_vaf_delta_somatic: + label: vc combine phased variants max vaf delta somatic + doc: | + Component SNVs/INDELs of MNV calls are output only if the VAF of the component + call is greater than that of the MNV by more than 0.1. The VAF difference + threshold for outputting component calls along with MNV calls can be controlled by + the --vc-combine-phased-variants-max-vaf-delta option. + This option is mutually exclusive with --vc-mnv-emit-component-calls + type: float? + vc_mnv_emit_component_calls_somatic: + label: vc mnv emit component calls somatic + doc: | + To output all component SNVs/INDELs of MNVs, regardless of VAF difference, + when enabled, use the option --vc-mnv-emit-component-calls. + This option is mutually exclusive with --vc-combine-phased-variants-max-vaf-delta + type: boolean? + + # Deduplication options + dedup_min_qual: + label: deduplicate minimum quality + doc: | + Specifies the Phred quality score below which a base should be excluded from the quality score + calculation used for choosing among duplicate reads. + type: int? + dedup_min_qual_germline: + label: deduplicate minimum quality germline + doc: | + Specifies the Phred quality score below which a base should be excluded from the quality score + calculation used for choosing among duplicate reads. + type: int? + dedup_min_qual_somatic: + label: deduplicate minimum quality somatic + doc: | + Specifies the Phred quality score below which a base should be excluded from the quality score + calculation used for choosing among duplicate reads. + type: int? + + # Structural Variant Caller Options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/StructuralVariantCalling.htm + sv_call_regions_bed: + label: sv call regions bed + doc: | + Specifies a BED file containing the set of regions to call. + type: File? + sv_region: + label: sv region + doc: | + Limit the analysis to a specified region of the genome for debugging purposes. + This option can be specified multiple times to build a list of regions. + The value must be in the format "chr:startPos-endPos".. + type: string? + sv_exome: + label: sv exome + doc: | + Set to true to configure the variant caller for targeted sequencing inputs, + which includes disabling high depth filters. + In integrated mode, the default is to autodetect targeted sequencing input, + and in standalone mode the default is false. + type: boolean? + sv_output_contigs: + label: sv output contigs + doc: | + Set to true to have assembled contig sequences output in a VCF file. The default is false. + type: boolean? + sv_forcegt_vcf: + label: sv forcegt vcf + doc: | + Specify a VCF of structural variants for forced genotyping. The variants are scored and emitted + in the output VCF even if not found in the sample data. + The variants are merged with any additional variants discovered directly from the sample data. + type: File? + sv_discovery: + label: sv discovery + doc: | + Enable SV discovery. This flag can be set to false only when --sv-forcegt-vcf is used. + When set to false, SV discovery is disabled and only the forced genotyping input variants + are processed. The default is true. + type: boolean? + sv_se_overlap_pair_evidence: + label: sv use overlap pair evidence + doc: | + Allow overlapping read pairs to be considered as evidence. + By default, DRAGEN uses autodetect on the fraction of overlapping read pairs if <20%. + type: boolean? + sv_somatic_ins_tandup_hotspot_regions_bed: + label: sv somatic ins tandup hotspot regions bed + doc: | + Specify a BED of ITD hotspot regions to increase sensitivity for calling ITDs in somatic variant analysis. + By default, DRAGEN SV automatically selects areference-specific hotspots BED file from + /opt/edico/config/sv_somatic_ins_tandup_hotspot_*.bed. + type: File? + sv_enable_somatic_ins_tandup_hotspot_regions: + label: sv enable somatic ins tandup hotspot regions + doc: | + Enable or disable the ITD hotspot region input. The default is true in somatic variant analysis. + type: boolean? + sv_enable_liquid_tumor_mode: + label: sv enable liquid tumor mode + doc: | + Enable liquid tumor mode. + type: boolean? + sv_tin_contam_tolerance: + label: sv tin contam tolerance + doc: | + Set the Tumor-in-Normal (TiN) contamination tolerance level. + You can enter any value between 0-1. The default maximum TiN contamination tolerance is 0.15. + type: float? + + # Variant calling options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/SmallVariantCaller.htm + vc_base_qual_threshold: + label: vc base qual threshold + doc: | + (Replaces --vc-min-base-qual) + Specifies the minimum base quality to be considered in the active region detection of the small variant caller. + The default value is 10. + type: int? + vc_base_qual_threshold_somatic: + label: vc base qual threshold somatic + doc: | + (Replaces --vc-min-base-qual) + Specifies the minimum base quality to be considered in the active region detection of the small variant caller. + The default value is 10. + type: int? + vc_base_qual_threshold_germline: + label: vc base qual threshold germline + doc: | + (Replaces --vc-min-base-qual) + Specifies the minimum base quality to be considered in the active region detection of the small variant caller. + The default value is 10. + type: int? + vc_target_bed: + label: vc target bed + doc: | + This is an optional command line input that restricts processing of the small variant caller, + target bed related coverage, and callability metrics to regions specified in a BED file. + type: File? + vc_target_bed_padding: + label: vc target bed padding + doc: | + This is an optional command line input that can be used to pad all of the target + BED regions with the specified value. + For example, if a BED region is 1:1000-2000 and a padding value of 100 is used, + it is equivalent to using a BED region of 1:900-2100 and a padding value of 0. + + Any padding added to --vc-target-bed-padding is used by the small variant caller + and by the target bed coverage/callability reports. The default padding is 0. + type: int? + vc_target_coverage: + label: vc target coverage + doc: | + The --vc-target-coverage option specifies the target coverage for down-sampling. + The default value is 500 for germline mode and 50 for somatic mode. + type: int? + vc_target_vaf_somatic: + label: vc target vaf somatic + doc: | + The vc-target-vaf is used to select the variant allele frequencies of interest. + The variant caller will aim to detect variants with allele frequencies larger than this setting. + We recommend adding a small safety factor, e.g. to ensure variants in the ballpark of 1% are detected, + the minimum vc-target-vaf can be specified as 0.009 (0.9%). This setting will not apply a hard threshold, + and it is possible to detect variants with allele frequencies lower than the selected threshold. + On high coverage and clean datasets, a lower target-vaf may help increase sensitivity. + On noisy samples (like FFPE) a higher target-vaf (like 0.03) maybe help reduce false positives. + Using a low target-vaf may also increase runtime. Set the vc-target-vaf to 0 to disable this feature. + When this feature is disabled the variant caller will require at least 2 supporting reads to discover a candidate variant. + Default=0.01. + type: float? + + vc_enable_gatk_acceleration: + label: vc enable gatk acceleration + doc: | + If is set to true, the variant caller runs in GATK mode + (concordant with GATK 3.7 in germline mode and GATK 4.0 in somatic mode). + type: boolean? + vc_remove_all_soft_clips: + label: vc remove all soft clips + doc: | + If is set to true, the variant caller does not use soft clips of reads to determine variants. + type: boolean? + vc_decoy_contigs: + label: vc decoy contigs + doc: | + The --vc-decoy-contigs option specifies a comma-separated list of contigs to skip during variant calling. + This option can be set in the configuration file. + type: string? + vc_enable_decoy_contigs: + label: vc enable decoy contigs + doc: | + If --vc-enable-decoy-contigs is set to true, variant calls on the decoy contigs are enabled. + The default value is false. + type: boolean? + vc_enable_phasing: + label: vc enable phasing + doc: | + The -vc-enable-phasing option enables variants to be phased when possible. The default value is true. + type: boolean? + vc_enable_vcf_output: + label: vc enable vcf output + doc: | + The -vc-enable-vcf-output option enables VCF file output during a gVCF run. The default value is false. + type: boolean? + # Downsampling options + vc_max_reads_per_active_region: + label: vc max reads per active region + doc: | + specifies the maximum number of reads covering a given active region. + Default is 10000 for the somatic workflow + type: int? + vc_max_reads_per_raw_region: + label: vc max reads per raw region + doc: | + specifies the maximum number of reads covering a given raw region. + Default is 30000 for the somatic workflow + type: int? + # Ploidy support + sample_sex: + label: sample sex + doc: | + Specifies the sex of a sample + type: + - "null" + - type: enum + symbols: + - male + - female + # ROH options + vc_enable_roh: + label: vc enable roh + doc: | + Enable or disable the ROH caller by setting this option to true or false. Enabled by default for human autosomes only. + type: boolean? + vc_roh_blacklist_bed: + label: vc roh blacklist bed + doc: | + If provided, the ROH caller ignores variants that are contained in any region in the blacklist BED file. + DRAGEN distributes blacklist files for all popular human genomes and automatically selects a blacklist to + match the genome in use, unless this option is used explicitly select a file. + type: File? + # BAF options + vc_enable_baf: + label: vc enable baf + doc: | + Enable or disable B-allele frequency output. Enabled by default. + type: boolean? + # Somatic calling options + vc_min_tumor_read_qual: + label: vc min tumor read qual + type: int? + doc: | + The --vc-min-tumor-read-qual option specifies the minimum read quality (MAPQ) to be considered for + variant calling. The default value is 3 for tumor-normal analysis or 20 for tumor-only analysis. + vc_callability_tumor_thresh: + label: vc callability tumor thresh + type: int? + doc: | + The --vc-callability-tumor-thresh option specifies the callability threshold for tumor samples. The + somatic callable regions report includes all regions with tumor coverage above the tumor threshold. + vc_callability_normal_thresh: + label: vc callability normal thresh + type: int? + doc: | + The --vc-callability-normal-thresh option specifies the callability threshold for normal samples. + The somatic callable regions report includes all regions with normal coverage above the normal threshold. + vc_somatic_hotspots: + label: vc somatic hotspots + type: File? + doc: | + The somatic hotspots option allows an input VCF to specify the positions where the risk for somatic + mutations are assumed to be significantly elevated. DRAGEN genotyping priors are boosted for all + postions specified in the VCF, so it is possible to call a variant at one of these sites with fewer supporting + reads. The cosmic database in VCF format can be used as one source of prior information to boost + sensitivity for known somatic mutations. + vc_hotspot_log10_prior_boost: + label: vc hotspot log10 prior boost + type: int? + doc: | + The size of the hotspot adjustment can be controlled via vc-hotspotlog10-prior-boost, + which has a default value of 4 (log10 scale) corresponding to an increase of 40 phred. + vc_enable_liquid_tumor_mode: + label: vc enable liquid tumor mode + type: boolean? + doc: | + In a tumor-normal analysis, DRAGEN accounts for tumor-in-normal (TiN) contamination by running liquid + tumor mode. Liquid tumor mode is disabled by default. When liquid tumor mode is enabled, DRAGEN is + able to call variants in the presence of TiN contamination up to a specified maximum tolerance level. + vc-enable-liquid-tumor-mode enables liquid tumor mode with a default maximum contamination + TiN tolerance of 0.15. If using the default maximum contamination TiN tolerance, somatic variants are + expected to be observed in the normal sample with allele frequencies up to 15% of the corresponding + allele in the tumor sample. + vc_tin_contam_tolerance: + label: vc tin contam tolerance + type: float? + doc: | + vc-tin-contam-tolerance enables liquid tumor mode and allows you to + set the maximum contamination TiN tolerance. The maximum contamination TiN tolerance must be + greater than zero. For example, vc-tin-contam-tolerance=-0.1. + vc_enable_orientation_bias_filter: + label: vc enable orientation bias filter + type: boolean? + doc: | + Enables the orientation bias filter. The default value is false, which means the option is disabled. + vc_enable_orientation_bias_filter_artifacts: + label: vc enable orientation bias filter artifacts + type: string? + doc: | + The artifact type to be filtered can be specified with the --vc-orientation-bias-filter-artifacts option. + The default is C/T,G/T, which correspond to OxoG and FFPE artifacts. Valid values include C/T, or G/T, or C/T,G/T,C/A. + An artifact (or an artifact and its reverse compliment) cannot be listed twice. + For example, C/T,G/A is not valid, because C->G and T->A are reverse compliments. + # Post somatic calling filtering options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/PostSomaticFilters.htm + vc_hard_filter: + label: vc hard filter + doc: | + DRAGEN provides post-VCF variant filtering based on annotations present in the VCF records. + However, due to the nature of DRAGEN's algorithms, which incorporate the hypothesis of correlated errors + from within the core of variant caller, the pipeline has improved capabilities in distinguishing + the true variants from noise, and therefore the dependency on post-VCF filtering is substantially reduced. + For this reason, the default post-VCF filtering in DRAGEN is very simple + type: string? + vc_sq_call_threshold: + label: vc sq call threshold + type: float? + doc: | + Emits calls in the VCF. The default is 3. + If the value for vc-sq-filter-threshold is lower than vc-sq-callthreshold, + the filter threshold value is used instead of the call threshold value + vc_sq_filter_threshold: + label: vc sq filter threshold + type: float? + doc: | + Marks emitted VCF calls as filtered. + The default is 17.5 for tumor-normal and 6.5 for tumor-only. + vc_enable_triallelic_filter: + label: vc enable triallelic filter + type: boolean? + doc: | + Enables the multiallelic filter. The default is true. + vc_enable_af_filter: + label: vc enable af filter + type: boolean? + doc: | + Enables the allele frequency filter. The default value is false. When set to true, the VCF excludes variants + with allele frequencies below the AF call threshold or variants with an allele frequency below the AF filter + threshold and tagged with low AF filter tag. The default AF call threshold is 1% and the default AF filter + threshold is 5%. + To change the threshold values, use the following command line options: + --vc-af-callthreshold and --vc-af-filter-threshold. + vc_af_call_threshold: + label: vc af call threshold + type: float? + doc: | + Set the allele frequency call threshold to emit a call in the VCF if the AF filter is enabled. + The default is 0.01. + vc_af_filter_threshold: + label: vc af filter threshold + type: float? + doc: | + Set the allele frequency filter threshold to mark emitted VCF calls as filtered if the AF filter is + enabled. + The default is 0.05. + vc_enable_non_homref_normal_filter: + label: vc enable non homoref normal filter + doc: | + Enables the non-homref normal filter. The default value is true. When set to true, the VCF filters out + variants if the normal sample genotype is not a homozygous reference. + type: boolean? + + # Mitochondrial allele frequency filters + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/MitochondrialCalling.htm + vc_af_call_threshold_mito: + label: vc af call threshold mito + doc: | + If the AF filter is enabled using --vc-enable-af-filter-mito=true, + the option sets the allele frequency call threshold to emit a call in the VCF for mitochondrial variant calling. + The default value is 0.01. + type: boolean? + vc_af_filter_threshold_mito: + label: vc af filter threshold mito + doc: | + If the AF filter is enabled using --vc-enable-af-filter-mito=true, + the option sets the allele frequency filter threshold to mark emitted VCF calls + as filtered for mitochondrial variant calling. The default value is 0.02. + type: float? + + # Enable non primary allelic filter + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/PostSomaticFilters.htm + vc_enable_non_primary_allelic_filter: + label: vc enable non primary allelic filter + doc: | + Similar to vc-enable-triallelic-filter, but less aggressive. + Keep the allele per position with highest alt AD, and only filter the rest. + The default is false. Not compatible with vc-enable-triallelic-filter. + type: boolean? + + # Turn off ntd error bias estimation + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/SNVErrorEstimation.htm + vc_enable_unequal_ntd: + label: vc enable unequal ntd + doc: | + Nucleotide (NTD) Error Bias Estimation is on by default and recommended as a replacement for the orientation bias filter. + Both methods take account of strand-specific biases (systematic differences between F1R2 and F2R1 reads). + In addition, NTD error estimation accounts for non-strand-specific biases such as sample-wide elevation of a certain SNV type, + eg C->T or any other transition or transversion. + NTD error estimation can also capture the biases in a trinucleotide context. + type: + - "null" + - boolean + - type: enum + symbols: + - "true" + - "false" + - "auto" + + # dbSNP annotation + dbsnp_annotation: + label: dbsnp annotation + doc: | + In Germline, Tumor-Normal somatic, or Tumor-Only somatic modes, + DRAGEN can look up variant calls in a dbSNP database and add annotations for any matches that it finds there. + To enable the dbSNP database search, set the --dbsnp option to the full path to the dbSNP database + VCF or .vcf.gz file, which must be sorted in reference order. + type: File? + secondaryFiles: + - pattern: ".tbi" + required: true + + # cnv pipeline - with this we must also specify one of --cnv-normal-b-allele-vcf, + # --cnv-population-b-allele-vcf, or cnv-use-somatic-vc-baf. + # If known, specify the sex of the sample. + # If the sample sex is not specified, the caller attempts to estimate the sample sex from tumor alignments. + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/CopyNumVariantCalling.htm + enable_cnv: + label: enable cnv calling + doc: | + Enable CNV processing in the DRAGEN Host Software. + type: boolean? + cnv_normal_b_allele_vcf: + label: cnv normal b allele vcf + doc: | + Specify a matched normal SNV VCF. + type: File? + cnv_population_b_allele_vcf: + label: cnv population b allele vcf + doc: | + Specify a population SNP catalog. + type: File? + cnv_use_somatic_vc_baf: + label: cnv use somatic vc baf + doc: | + If running in tumor-normal mode with the SNV caller enabled, use this option + to specify the germline heterozygous sites. + type: boolean? + # For more info on following options - see + # https://support-docs.illumina.com/SW/DRAGEN_v39/Content/SW/DRAGEN/SomaticWGSModes.htm#Germline + cnv_normal_cnv_vcf: + label: cnv normal cnv vcf + doc: | + Specify germline CNVs from the matched normal sample. + type: boolean? + cnv_use_somatic_vc_vaf: + label: cnv use somatic vc vaf + doc: | + Use the variant allele frequencies (VAFs) from the somatic SNVs to help select + the tumor model for the sample. + type: boolean? + cnv_somatic_enable_het_calling: + label: cnv somatic enable het calling + doc: | + Enable HET-calling mode for heterogeneous segments. + type: boolean? + cnv_enable_self_normalization: + label: cnv enable self normalization + doc: | + Enable CNV self normalization. + Self Normalization requires that the DRAGEN hash table be generated with the enable-cnv=true option. + type: boolean? + cnv_somatic_enable_lower_ploidy_limit: + label: cnv somatic enable lower ploidy limit + doc: | + To improve accuracy on the tumor ploidy model estimation, the somatic WGS CNV caller estimates whether the chosen model calls + homozygous deletions on regions that are likely to reduce the overall fitness of cells, + which are therefore deemed to be "essential" and under negative selection. + In the current literature, recent efforts tried to map such cell-essential genes (eg, in 2015 - https://www.science.org/doi/10.1126/science.aac7041). + The check on essential regions is controlled with --cnv-somatic-enable-lower-ploidy-limit (default true). + type: boolean? + cnv_somatic_essential_genes_bed: + label: cnv somatic essential genes bed + doc: | + Default bedfiles describing the essential regions are provided for hg19, GRCh37, hs37d5, GRCh38, + but a custom bedfile can also be provided in input through the + --cnv-somatic-essential-genes-bed= parameter. + In such case, the feature is automatically enabled. + A custom essential regions bedfile needs to have the following format: 4-column, tab-separated, + where the first 3 columns identify the coordinates of the essential region (chromosome, 0-based start, excluded end). + The fourth column is the region id (string type). For the purpose of the algorithm, currently only the first 3 columns are used. + However, the fourth might be helpful to investigate manually which regions drove the decisions on model plausibility made by the caller. + type: + - "null" + - string + - File + + # HRD + enable_hrd: + label: enable hrd + doc: | + Set to true to enable HRD scoring to quantify genomic instability. + Requires somatic CNV calls. + type: boolean? + + # QC options + qc_coverage_region_1: + label: qc coverage region 1 + doc: | + Generates coverage region report using bed file 1. + type: File? + qc_coverage_region_2: + label: qc coverage region 2 + doc: | + Generates coverage region report using bed file 2. + type: File? + qc_coverage_region_3: + label: qc coverage region 3 + doc: | + Generates coverage region report using bed file 3. + type: File? + qc_coverage_ignore_overlaps: + label: qc coverage ignore overlaps + doc: | + Set to true to resolve all of the alignments for each fragment and avoid double-counting any + overlapping bases. This might result in marginally longer run times. + This option also requires setting --enable-map-align=true. + type: boolean? + + # TMB options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/Biomarkers_TMB.htm + enable_tmb: + label: enable tmb + doc: | + Enables TMB. If set, the small variant caller, Illumina Annotation Engine, + and the related callability report are enabled. + type: boolean? + tmb_vaf_threshold: + label: tmb vaf threshold + doc: | + Specify the minimum VAF threshold for a variant. Variants that do not meet the threshold are filtered out. + The default value is 0.05. + type: float? + tmb_db_threshold: + label: tmb db threshold + doc: | + Specify the minimum allele count (total number of observations) for an allele in gnomAD or 1000 Genome + to be considered a germline variant. Variant calls that have the same positions and allele are ignored + from the TMB calculation. The default value is 10. + type: int? + + # HLA calling + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/HLACaller.htm + enable_hla: + label: enable hla + doc: | + Enable HLA typing by setting --enable-hla flag to true + type: boolean? + hla_bed_file: + label: hla bed file + doc: | + Use the HLA region BED input file to specify the region to extract HLA reads from. + DRAGEN HLA Caller parses the input file for regions within the BED file, and then + extracts reads accordingly to align with the HLA allele reference. + type: File? + hla_reference_file: + label: hla reference file + doc: | + Use the HLA allele reference file to specify the reference alleles to align against. + The input HLA reference file must be in FASTA format and contain the protein sequence separated into exons. + If --hla-reference-file is not specified, DRAGEN uses hla_classI_ref_freq.fasta from /opt/edico/config/. + The reference HLA sequences are obtained from the IMGT/HLA database. + type: File? + hla_allele_frequency_file: + label: hla allele frequency file + doc: | + Use the population-level HLA allele frequency file to break ties if one or more HLA allele produces the same or similar results. + The input HLA allele frequency file must be in CSV format and contain the HLA alleles and the occurrence frequency in population. + If --hla-allele-frequency-file is not specified, DRAGEN automatically uses hla_classI_allele_frequency.csv from /opt/edico/config/. + Population-level allele frequencies can be obtained from the Allele Frequency Net database. + type: File? + hla_tiebreaker_threshold: + label: hla tiebreaker threshold + doc: | + If more than one allele has a similar number of reads aligned and there is not a clear indicator for the best allele, + the alleles are considered as ties. The HLA Caller places the tied alleles into a candidate set for tie breaking based + on the population allele frequency. If an allele has more than the specified fraction of reads aligned (normalized to + the top hit), then the allele is included into the candidate set for tie breaking. The default value is 0.97. + type: float? + hla_zygosity_threshold: + label: hla zygosity threshold + doc: | + If the minor allele at a given locus has fewer reads mapped than a fraction of the read count of the major allele, + then the HLA Caller infers homozygosity for the given HLA-I gene. You can use this option to specify the fraction value. + The default value is 0.15. + type: float? + hla_min_reads: + label: hla min reads + doc: | + Set the minimum number of reads to align to HLA alleles to ensure sufficient coverage and perform HLA typing. + The default value is 1000 and suggested for WES samples. If using samples with less coverage, you can use a + lower threshold value. + type: int? + + # RNA + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/TPipelineIntro_fDG.htm + enable_rna: + label: enable rna + doc: | + Set this option for running RNA samples through T/N workflow + type: boolean? + + # Repeat Expansion + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/RepeatGenotyping.htm + repeat_genotype_enable: + label: repeat genotype enable + doc: | + Enables repeat expansion detection. + type: boolean? + repeat_genotype_specs: + label: repeat genotype specs + doc: | + Specifies the full path to the JSON file that contains the + repeat variant catalog (specification) describing the loci to call. + If the option is not provided, DRAGEN attempts to autodetect the applicable catalog file + from /opt/edico/repeat-specs/ based on the reference provided. + type: File? + repeat_genotype_use_catalog: + label: repeat genotype use catalog + doc: | + Repeat variant catalog type to use (default - ~60 repeats, default_plus_smn - + same as default with SMN repeat, expanded - ~50K repeats) + type: + - "null" + - type: enum + symbols: + - default + - default_plus_smn + - expanded + + # Germline specific parameters + # Force genotyping for gf + vc_forcegt_vcf: + label: vc forcegt vcf + doc: | + AGENsupports force genotyping (ForceGT) for Germline SNV variant calling. + To use ForceGT, use the --vc-forcegt-vcf option with a list of small variants to force genotype. + The input list of small variants can be a .vcf or .vcf.gz file. + + The current limitations of ForceGT are as follows: + * ForceGT is supported for Germline SNV variant calling in the V3 mode. + The V1, V2, and V2+ modes are not supported. + * ForceGT is not supported for Somatic SNV variant calling. + * ForceGT variants do not propagate through Joint Genotyping. + type: File? + secondaryFiles: + - pattern: ".tbi" + required: true + + # Miscell + lic_instance_id_location: + label: license instance id location + doc: | + You may wish to place your own in. + Optional value, default set to /opt/instance-identity + which is a path inside the dragen container + type: + - File? + - string? + default: "/opt/instance-identity" + + +steps: + # We run the germline and somatic tools in parallel + # Run dragen germline workflow + run_dragen_germline_step: + label: run dragen germline step + doc: | + Runs the dragen germline workflow on the FPGA. + Takes in either a fastq list as a file or a fastq_list_rows schema object + in: + # Option 1 + fastq_list_rows: + source: fastq_list_rows + # Option 2 + fastq_list: + source: fastq_list + # Option 3 + bam_input: + source: bam_input + # Option 4 + cram_input: + source: cram_input + cram_reference: + source: cram_reference + reference_tar: + source: reference_tar + output_file_prefix: + source: output_prefix_germline + output_directory: + source: output_prefix_germline + valueFrom: "$(self)_dragen_germline" + enable_sort: + source: [ enable_sort_germline, enable_sort] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + enable_map_align: + source: [ enable_map_align_germline, enable_map_align ] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + enable_map_align_output: + source: [ enable_map_align_output_germline, enable_map_align_output ] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + enable_duplicate_marking: + source: [ enable_duplicate_marking_germline, enable_duplicate_marking ] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + dedup_min_qual: + source: [ dedup_min_qual_germline, dedup_min_qual ] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + enable_sv: + source: [ enable_sv_germline, enable_sv ] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + # Variant calling options + vc_base_qual_threshold: + source: [ vc_base_qual_threshold_germline, vc_base_qual_threshold ] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + vc_target_bed: + source: vc_target_bed + vc_target_bed_padding: + source: vc_target_bed_padding + vc_target_coverage: + source: vc_target_coverage + vc_enable_gatk_acceleration: + source: vc_enable_gatk_acceleration + vc_remove_all_soft_clips: + source: vc_remove_all_soft_clips + vc_decoy_contigs: + source: vc_decoy_contigs + vc_enable_decoy_contigs: + source: vc_enable_decoy_contigs + vc_enable_phasing: + source: vc_enable_phasing + vc_enable_vcf_output: + source: vc_enable_vcf_output + vc_max_reads_per_active_region: + source: vc_max_reads_per_active_region + vc_max_reads_per_raw_region: + source: vc_max_reads_per_raw_region + sample_sex: + source: sample_sex + vc_enable_roh: + source: vc_enable_roh + vc_roh_blacklist_bed: + source: vc_roh_blacklist_bed + vc_enable_baf: + source: vc_enable_baf + vc_hard_filter: + source: vc_hard_filter + vc_forcegt_vcf: + source: vc_forcegt_vcf + # Structural Variant Caller Options + sv_call_regions_bed: + source: sv_call_regions_bed + sv_region: + source: sv_region + sv_exome: + source: sv_exome + sv_output_contigs: + source: sv_output_contigs + sv_forcegt_vcf: + source: sv_forcegt_vcf + sv_discovery: + source: sv_discovery + sv_se_overlap_pair_evidence: + source: sv_se_overlap_pair_evidence + sv_enable_liquid_tumor_mode: + source: sv_enable_liquid_tumor_mode + sv_tin_contam_tolerance: + source: sv_tin_contam_tolerance + dbsnp_annotation: + source: dbsnp_annotation + #cnv options + enable_cnv: + source: enable_cnv + cnv_enable_self_normalization: + source: cnv_enable_self_normalization + #qc options + qc_coverage_region_1: + source: qc_coverage_region_1 + qc_coverage_region_2: + source: qc_coverage_region_2 + qc_coverage_region_3: + source: qc_coverage_region_3 + qc_coverage_ignore_overlaps: + source: qc_coverage_ignore_overlaps + #hla + enable_hla: + source: enable_hla + hla_bed_file: + source: hla_bed_file + hla_reference_file: + source: hla_reference_file + hla_allele_frequency_file: + source: hla_allele_frequency_file + hla_tiebreaker_threshold: + source: hla_tiebreaker_threshold + hla_zygosity_threshold: + source: hla_zygosity_threshold + hla_min_reads: + source: hla_min_reads + lic_instance_id_location: + source: lic_instance_id_location + out: + - id: dragen_germline_output_directory + - id: dragen_bam_out + - id: dragen_vcf_out + run: ../../../tools/dragen-germline/4.3.6/dragen-germline__4.3.6.cwl + run_dragen_somatic_step: + label: run dragen somatic step + doc: | + Run dragen somatic v4.3.6 + in: + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/OptionReference.htm + # Inputs fastq list csv or actual fastq list file with presigned urls for Read1File and Read2File columns + # File inputs + # Option 1 + fastq_list: + source: fastq_list + tumor_fastq_list: + source: tumor_fastq_list + # Option 2 + fastq_list_rows: + source: fastq_list_rows + tumor_fastq_list_rows: + source: tumor_fastq_list_rows + # Option 3 + bam_input: + source: bam_input + tumor_bam_input: + source: tumor_bam_input + # Option 4 + cram_input: + source: cram_input + tumor_cram_input: + source: tumor_cram_input + cram_reference: + source: cram_reference + reference_tar: + source: reference_tar + # Mandatory parameters + output_file_prefix: + source: output_prefix_somatic + output_directory: + source: output_prefix_somatic + valueFrom: "$(self)_dragen_somatic" + # Optional operation modes + # Optional operation modes + # Given we're running from fastqs + # --enable-variant-caller option must be set to true (set in arguments), --enable-map-align is then activated by default + # --enable-map-align-output to keep bams + # --enable-duplicate-marking to mark duplicate reads at the same time + # --enable-sv to enable the structural variant calling step. + enable_sort: + source: [ enable_sort_somatic, enable_sort ] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + enable_map_align: + source: [ enable_map_align_somatic, enable_map_align ] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + enable_map_align_output: + source: [ enable_map_align_output_somatic, enable_map_align_output ] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + enable_duplicate_marking: + source: [ enable_duplicate_marking_somatic, enable_duplicate_marking ] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + dedup_min_qual: + source: [ dedup_min_qual_somatic, dedup_min_qual ] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + enable_sv: + source: [ enable_sv_somatic, enable_sv ] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + # Phased / MNV Calling Options + # Phased / MNV Calling options + vc_combine_phased_variants_distance: + source: vc_combine_phased_variants_distance_somatic + vc_combine_phased_variants_max_vaf_delta: + source: vc_combine_phased_variants_max_vaf_delta_somatic + vc_mnv_emit_component_calls: + source: vc_mnv_emit_component_calls_somatic + # Structural Variant Caller Options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/StructuralVariantCalling.htm + sv_call_regions_bed: + source: sv_call_regions_bed + sv_region: + source: sv_region + sv_exome: + source: sv_exome + sv_output_contigs: + source: sv_output_contigs + sv_forcegt_vcf: + source: sv_forcegt_vcf + sv_discovery: + source: sv_discovery + sv_se_overlap_pair_evidence: + source: sv_se_overlap_pair_evidence + sv_somatic_ins_tandup_hotspot_regions_bed: + source: sv_somatic_ins_tandup_hotspot_regions_bed + sv_enable_somatic_ins_tandup_hotspot_regions: + source: sv_enable_somatic_ins_tandup_hotspot_regions + sv_enable_liquid_tumor_mode: + source: sv_enable_liquid_tumor_mode + sv_tin_contam_tolerance: + source: sv_tin_contam_tolerance + # Variant calling options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/SmallVariantCaller.htm + vc_base_qual_threshold: + source: [ vc_base_qual_threshold_somatic, vc_base_qual_threshold ] + valueFrom: | + ${ + return get_first_non_null_input(self); + } + vc_target_bed: + source: vc_target_bed + vc_target_bed_padding: + source: vc_target_bed_padding + vc_target_coverage: + source: vc_target_coverage + vc_target_vaf: + source: vc_target_vaf_somatic + vc_enable_gatk_acceleration: + source: vc_enable_gatk_acceleration + vc_remove_all_soft_clips: + source: vc_remove_all_soft_clips + vc_decoy_contigs: + source: vc_decoy_contigs + vc_enable_decoy_contigs: + source: vc_enable_decoy_contigs + vc_enable_phasing: + source: vc_enable_phasing + vc_enable_vcf_output: + source: vc_enable_vcf_output + # Downsampling options + vc_max_reads_per_active_region: + source: vc_max_reads_per_active_region + vc_max_reads_per_raw_region: + source: vc_max_reads_per_raw_region + # Ploidy support + sample_sex: + source: sample_sex + # ROH options + vc_enable_roh: + source: vc_enable_roh + vc_roh_blacklist_bed: + source: vc_roh_blacklist_bed + # BAF options + vc_enable_baf: + source: vc_enable_baf + # Somatic calling options + vc_hard_filter: + source: vc_hard_filter + vc_min_tumor_read_qual: + source: vc_min_tumor_read_qual + vc_callability_tumor_thresh: + source: vc_callability_tumor_thresh + vc_callability_normal_thresh: + source: vc_callability_normal_thresh + vc_somatic_hotspots: + source: vc_somatic_hotspots + vc_hotspot_log10_prior_boost: + source: vc_hotspot_log10_prior_boost + vc_enable_liquid_tumor_mode: + source: vc_enable_liquid_tumor_mode + vc_tin_contam_tolerance: + source: vc_tin_contam_tolerance + vc_enable_orientation_bias_filter: + source: vc_enable_orientation_bias_filter + vc_enable_orientation_bias_filter_artifacts: + source: vc_enable_orientation_bias_filter_artifacts + # Post somatic calling filtering options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/PostSomaticFilters.htm + vc_sq_call_threshold: + source: vc_sq_call_threshold + vc_sq_filter_threshold: + source: vc_sq_filter_threshold + vc_enable_triallelic_filter: + source: vc_enable_triallelic_filter + vc_enable_af_filter: + source: vc_enable_af_filter + vc_af_call_threshold: + source: vc_af_call_threshold + vc_af_filter_threshold: + source: vc_af_filter_threshold + vc_enable_non_homref_normal_filter: + source: vc_enable_non_homref_normal_filter + # Mitochondrial allele frequency filters + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/MitochondrialCalling.htm + vc_af_call_threshold_mito: + source: vc_af_call_threshold_mito + vc_af_filter_threshold_mito: + source: vc_af_filter_threshold_mito + # Enable non primary allelic filter + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/PostSomaticFilters.htm + vc_enable_non_primary_allelic_filter: + source: vc_enable_non_primary_allelic_filter + # Turn off ntd error bias estimation + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/SNVErrorEstimation.htm + vc_enable_unequal_ntd: + source: vc_enable_unequal_ntd + # dbSNP annotation + dbsnp_annotation: + source: dbsnp_annotation + # cnv pipeline - with this we must also specify one of --cnv-normal-b-allele-vcf, + # --cnv-population-b-allele-vcf, or cnv-use-somatic-vc-baf. + # If known, specify the sex of the sample. + # If the sample sex is not specified, the caller attempts to estimate the sample sex from tumor alignments. + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/CopyNumVariantCalling.htm + enable_cnv: + source: enable_cnv + cnv_enable_self_normalization: + source: cnv_enable_self_normalization + cnv_normal_b_allele_vcf: + source: cnv_normal_b_allele_vcf + cnv_population_b_allele_vcf: + source: cnv_population_b_allele_vcf + cnv_use_somatic_vc_baf: + source: cnv_use_somatic_vc_baf + # For more info on following options - see + # https://support-docs.illumina.com/SW/DRAGEN_v39/Content/SW/DRAGEN/SomaticWGSModes.htm#Germline + cnv_normal_cnv_vcf: + source: cnv_normal_cnv_vcf + cnv_use_somatic_vc_vaf: + source: cnv_use_somatic_vc_vaf + cnv_somatic_enable_het_calling: + source: cnv_somatic_enable_het_calling + # Somatic specific CNV calling options + cnv_somatic_enable_lower_ploidy_limit: + source: cnv_somatic_enable_lower_ploidy_limit + cnv_somatic_essential_genes_bed: + source: cnv_somatic_essential_genes_bed + # HRD + enable_hrd: + source: enable_hrd + # QC options + qc_coverage_region_1: + source: qc_coverage_region_1 + qc_coverage_region_2: + source: qc_coverage_region_2 + qc_coverage_region_3: + source: qc_coverage_region_3 + qc_coverage_ignore_overlaps: + source: qc_coverage_ignore_overlaps + # TMB options + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/Biomarkers_TMB.htm + enable_tmb: + source: enable_tmb + tmb_vaf_threshold: + source: tmb_vaf_threshold + tmb_db_threshold: + source: tmb_db_threshold + # HLA calling + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/HLACaller.htm + enable_hla: + source: enable_hla + hla_bed_file: + source: hla_bed_file + hla_reference_file: + source: hla_reference_file + hla_allele_frequency_file: + source: hla_allele_frequency_file + hla_tiebreaker_threshold: + source: hla_tiebreaker_threshold + hla_zygosity_threshold: + source: hla_zygosity_threshold + hla_min_reads: + source: hla_min_reads + # RNA + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/TPipelineIntro_fDG.htm + enable_rna: + source: enable_rna + # Repeat Expansion + # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/RepeatGenotyping.htm + repeat_genotype_enable: + source: repeat_genotype_enable + repeat_genotype_specs: + source: repeat_genotype_specs + repeat_genotype_use_catalog: + source: repeat_genotype_use_catalog + # Miscell + lic_instance_id_location: + source: lic_instance_id_location + out: + # Will also include mounted-files.txt + - id: dragen_somatic_output_directory + # Optional output files (inside the output directory) that we'll continue to append to as we need them + - id: tumor_bam_out + - id: normal_bam_out + - id: somatic_snv_vcf_out + - id: somatic_snv_vcf_hard_filtered_out + - id: somatic_structural_vcf_out + run: ../../../tools/dragen-somatic/4.3.6/dragen-somatic__4.3.6.cwl + + # Run the multiqc step + run_dragen_qc_step: + label: dragen qc step + doc: | + The dragen qc step - this takes in an array of dirs + in: + input_directories: + source: + - run_dragen_germline_step/dragen_germline_output_directory + - run_dragen_somatic_step/dragen_somatic_output_directory + output_directory_name: + source: [ output_prefix_somatic, output_prefix_germline] + valueFrom: "$(self[0])__$(self[1])_dragen_somatic_and_germline_multiqc" + output_filename: + source: [ output_prefix_somatic, output_prefix_germline] + valueFrom: "$(self[0])__$(self[1])_dragen_somatic_and_germline_multiqc.html" + title: + source: [ output_prefix_somatic, output_prefix_germline] + valueFrom: "UMCCR MultiQC Dragen Somatic And Germline Report for $(self[0])__$(self[1])" + out: + - id: output_directory + run: ../../../tools/multiqc/1.25.1/multiqc__1.25.1.cwl + get_normal_bam_out: + label: get normal bam out + doc: | + Get the normal bam value from one of the two available options + From the germline step (preferred) + From the somatic step (backup option) + in: + input_bams: + source: + - run_dragen_germline_step/dragen_bam_out + - run_dragen_somatic_step/normal_bam_out + out: + - id: output_bam_file + run: ../../../expressions/get-first-non-null-bam-file/1.0.0/get-first-non-null-bam-file__1.0.0.cwl + +outputs: + # Will also include mounted-files.txt + dragen_somatic_output_directory: + label: dragen somatic output directory + doc: | + Output directory containing all outputs of the somatic dragen run + type: Directory + outputSource: run_dragen_somatic_step/dragen_somatic_output_directory + dragen_germline_output_directory: + label: dragen germline output directory + doc: | + The output directory containing all germline output files + type: Directory + outputSource: run_dragen_germline_step/dragen_germline_output_directory + germline_snv_vcf_out: + label: germline snv vcf out + doc: | + The output vcf file of germline step + type: File? + outputSource: run_dragen_germline_step/dragen_vcf_out + # Optional output files (inside the output directory) that we'll continue to append to as we need them + tumor_bam_out: + label: output tumor bam + doc: | + Bam file of the tumor sample + type: File? + outputSource: run_dragen_somatic_step/tumor_bam_out + normal_bam_out: + label: output normal bam + doc: | + Bam file of the normal sample + type: File? + outputSource: get_normal_bam_out/output_bam_file + somatic_snv_vcf_out: + label: somatic snv vcf + doc: | + Output of the snv vcf tumor calls + type: File? + outputSource: run_dragen_somatic_step/somatic_snv_vcf_out + somatic_snv_vcf_hard_filtered_out: + label: somatic snv vcf filetered + doc: | + Output of the snv vcf filtered tumor calls + type: File? + outputSource: run_dragen_somatic_step/somatic_snv_vcf_hard_filtered_out + somatic_structural_vcf_out: + label: somatic sv vcf filetered + doc: | + Output of the sv vcf filtered tumor calls. + Exists only if --enable-sv is set to true. + type: File? + outputSource: run_dragen_somatic_step/somatic_structural_vcf_out + multiqc_output_directory: + label: multiqc output directory + doc: | + The output directory for multiqc + type: Directory + outputSource: run_dragen_qc_step/output_directory diff --git a/workflows/dragen-transcriptome-pipeline/4.3.6/dragen-transcriptome-pipeline__4.3.6.cwl b/workflows/dragen-transcriptome-pipeline/4.3.6/dragen-transcriptome-pipeline__4.3.6.cwl new file mode 100644 index 00000000..143c72e9 --- /dev/null +++ b/workflows/dragen-transcriptome-pipeline/4.3.6/dragen-transcriptome-pipeline__4.3.6.cwl @@ -0,0 +1,448 @@ +cwlVersion: v1.1 +class: Workflow + +# Extensions +$namespaces: + s: https://schema.org/ +$schemas: + - https://schema.org/version/latest/schemaorg-current-http.rdf + +# Metadata +s:author: + class: s:Person + s:name: Sehrish Kanwal + s:email: sehrish.kanwal@umccr.org + +s:maintainer: + class: s:Person + s:name: Alexis Lucattini + s:email: Alexis.Lucattini@umccr.org + s:identifier: https://orcid.org/0000-0001-9754-647X + +# ID/Docs +id: dragen-transcriptome-pipeline--4.3.6 +label: dragen-transcriptome-pipeline v(4.3.6) +doc: | + Documentation for dragen-transcriptome-pipeline v4.3.6 + +requirements: + InlineJavascriptRequirement: + expressionLib: + - $include: ../../../typescript-expressions/multiqc-tools/1.0.0/multiqc-tools__1.0.0.cwljs + ScatterFeatureRequirement: {} + MultipleInputFeatureRequirement: {} + StepInputExpressionRequirement: {} + SchemaDefRequirement: + types: + - $import: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml + +inputs: + # Option 1 + fastq_list: + label: fastq list + doc: | + CSV file that contains a list of FASTQ files + to process. read_1 and read_2 components in the CSV file must be presigned urls. + type: File? + # Option 2 + fastq_list_rows: + label: Row of fastq lists + doc: | + The row of fastq lists. + Each row has the following attributes: + * RGID + * RGLB + * RGSM + * Lane + * Read1File + * Read2File (optional) + type: ../../../schemas/fastq-list-row/1.0.0/fastq-list-row__1.0.0.yaml#fastq-list-row[]? + # Option 3 + bam_input: + label: bam input + doc: | + Input a BAM file for WTS analysis + type: File? + secondaryFiles: + - pattern: ".bai" + required: true + reference_tar: + label: reference tar + doc: | + Path to ref data tarball + type: File + # Transcript annotation file + annotation_file: + label: annotation file + doc: | + Path to annotation transcript file. + type: File + # Output naming options + output_prefix: + label: output file prefix + doc: | + The prefix given to all output files + type: string + # Alignment options + enable_map_align: + label: enable map align + doc: | + Enabled by default. + Set this value to false if using bam_input AND tumor_bam_input + type: boolean? + enable_map_align_output: + label: enable map align output + doc: | + Do you wish to have the output bam files present + type: boolean + enable_duplicate_marking: + label: enable duplicate marking + doc: | + Mark identical alignments as duplicates + type: boolean + enable_sort: + label: enable sort + doc: | + True by default, only set this to false if using --bam-input as input parameter + type: boolean? + # Quantification options + enable_rna_quantification: + label: enable rna quantification + type: boolean? + doc: | + Optional - Enable the quantification module - defaults to true + # Read trimming options + read_trimmers: + label: read trimming + type: string? + doc: | + To enable trimming filters in hard-trimming mode, set to a comma-separated list of the trimmer tools + you would like to use. To disable trimming, set to none. During mapping, artifacts are removed from all reads. + Read trimming is disabled by default. + soft_read_trimmers: + label: soft read trimming + type: string? + doc: | + To enable trimming filters in soft-trimming mode, set to a comma-separated list of the trimmer tools + you would like to use. To disable soft trimming, set to none. During mapping, reads are aligned as if trimmed, + and bases are not removed from the reads. Soft-trimming is enabled for the polyg filter by default. + trim_adapter_read1: + label: trim adapter read1 + type: File? + doc: | + Specify the FASTA file that contains adapter sequences to trim from the 3' end of Read 1. + trim_adapter_read2: + label: trim adapter read2 + type: File? + doc: | + Specify the FASTA file that contains adapter sequences to trim from the 3' end of Read 2. + trim_adapter_r1_5prime: + label: trim adapter r1 5prime + type: File? + doc: | + Specify the FASTA file that contains adapter sequences to trim from the 5' end of Read 1. + NB: the sequences should be in reverse order (with respect to their appearance in the FASTQ) but not complemented. + trim_adapter_r2_5prime: + label: trim adapter r2 5prime + type: File? + doc: | + Specify the FASTA file that contains adapter sequences to trim from the 5' end of Read 2. + NB: the sequences should be in reverse order (with respect to their appearance in the FASTQ) but not complemented. + trim_adapter_stringency: + label: trim adapter stringency + type: int? + doc: | + Specify the minimum number of adapter bases required for trimming + trim_r1_5prime: + label: trim r1 5prime + type: int? + doc: | + Specify the minimum number of bases to trim from the 5' end of Read 1 (default: 0). + trim_r1_3prime: + label: trim r1 3prime + type: int? + doc: | + Specify the minimum number of bases to trim from the 3' end of Read 1 (default: 0). + trim_r2_5prime: + label: trim r2 5prime + type: int? + doc: | + Specify the minimum number of bases to trim from the 5' end of Read 2 (default: 0). + trim_r2_3prime: + label: trim r2 3prime + type: int? + doc: | + Specify the minimum number of bases to trim from the 3' end of Read 2 (default: 0). + # Fusion calling options + enable_rna_gene_fusion: + label: enable rna gene fusion + type: boolean? + doc: | + Optional - Enable the DRAGEN Gene Fusion module - defaults to true + # Arriba fusion calling options + contigs: + label: contigs + type: string? + doc: | + Optional - List of interesting contigs + If not specified, defaults to 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y + blacklist: + label: blacklist + type: File + doc: | + File with blacklist range + reference_fasta: + label: reference Fasta + type: File + doc: | + FastA file with genome sequence + secondaryFiles: + - pattern: ".fai" + required: true + # Arriba drawing options + cytobands: + label: cytobands + type: File + doc: | + Coordinates of the Giemsa staining bands. + protein_domains: + label: protein domains + type: File + doc: | + GFF3 file containing the genomic coordinates of protein domains. + # qualimap inputs + java_mem: + label: java mem + type: string + doc: | + Set desired Java heap memory size + default: "20G" + algorithm: + label: algorithm + type: string? + doc: | + Counting algorithm: + uniquely-mapped-reads(default) or proportional. + default: "proportional" + # multiQC input + cl_config: + label: cl config + doc: | + command line config to supply additional config values on the command line. + type: string? + # Location of license + lic_instance_id_location: + label: license instance id location + doc: | + You may wish to place your own in. + Optional value, default set to /opt/instance-identity + which is a path inside the dragen container + type: + - File? + - string? + +steps: + # Step-1: Run Dragen transcriptome workflow + run_dragen_transcriptome_step: + label: run dragen transcriptome step + doc: | + Runs the dragen transcriptome workflow on the FPGA. + Takes in a fastq list and corresponding mount paths from the predefined_mount_paths. + All other options avaiable at the top of the workflow + in: + # Input fastq files to dragen + # Option 1 + fastq_list: + source: fastq_list + # Option 2 + fastq_list_rows: + source: fastq_list_rows + # Option 3 + bam_input: + source: bam_input + reference_tar: + source: reference_tar + output_file_prefix: + source: output_prefix + output_directory: + source: output_prefix + valueFrom: "$(self)_dragen_transcriptome" + enable_map_align: + source: enable_map_align + enable_map_align_output: + source: enable_map_align_output + enable_duplicate_marking: + source: enable_duplicate_marking + enable_sort: + source: enable_sort + annotation_file: + source: annotation_file + enable_rna_quantification: + source: enable_rna_quantification + enable_rna_gene_fusion: + source: enable_rna_gene_fusion + read_trimmers: + source: read_trimmers + soft_read_trimmers: + source: soft_read_trimmers + trim_adapter_read1: + source: trim_adapter_read1 + trim_adapter_read2: + source: trim_adapter_read2 + trim_adapter_r1_5prime: + source: trim_adapter_r1_5prime + trim_adapter_r2_5prime: + source: trim_adapter_r2_5prime + trim_r1_5prime: + source: trim_r1_5prime + trim_r1_3prime: + source: trim_r1_3prime + trim_r2_5prime: + source: trim_r2_5prime + trim_r2_3prime: + source: trim_r2_3prime + trim_adapter_stringency: + source: trim_adapter_stringency + lic_instance_id_location: + source: lic_instance_id_location + out: + - id: dragen_transcriptome_directory + - id: dragen_bam_out + run: ../../../tools/dragen-transcriptome/4.3.6/dragen-transcriptome__4.3.6.cwl + + # Step-2: Call Arriba fusion calling step + arriba_fusion_step: + label: arriba fusion step + doc: | + Runs Arriba fusion calling on the bam file produced by Dragen. + in: + bam_file: + source: run_dragen_transcriptome_step/dragen_bam_out + annotation: + source: annotation_file + reference: + source: reference_fasta + contigs: + source: contigs + blacklist: + source: blacklist + out: + - id: fusions + - id: discarded_fusions + run: ../../../tools/arriba-fusion-calling/2.4.0/arriba-fusion-calling__2.4.0.cwl + + # Step-3: Call Arriba drawing script + arriba_drawing_step: + label: arriba drawing step + doc: | + Run Arriba drawing script for fusions predicted by previous step. + in: + annotation: + source: annotation_file + fusions: + source: arriba_fusion_step/fusions + bam_file: + source: run_dragen_transcriptome_step/dragen_bam_out + cytobands: + source: cytobands + protein_domains: + source: protein_domains + out: + - id: output_pdf + run: ../../../tools/arriba-drawing/2.4.0/arriba-drawing__2.4.0.cwl + + # Step-4: Create Arriba output directory + create_arriba_output_directory: + label: create arriba output directory + doc: | + Create an output directory to contain the arriba files + in: + input_files: + source: + - arriba_fusion_step/fusions + - arriba_fusion_step/discarded_fusions + - arriba_drawing_step/output_pdf + output_directory_name: + source: output_prefix + valueFrom: "$(self)_arriba" + out: + - output_directory + run: ../../../tools/custom-create-directory/1.0.0/custom-create-directory__1.0.0.cwl + + # Step-5: Run qualimap + run_qualimap_step: + label: run qualimap step + doc: | + Run qualimap step to generate additional QC metrics + in: + java_mem: + source: java_mem + algorithm: + source: algorithm + out_dir: + source: output_prefix + valueFrom: "$(self)_qualimap" + gtf: + source: annotation_file + input_bam: + source: run_dragen_transcriptome_step/dragen_bam_out + out: + - id: qualimap_qc + run: ../../../tools/qualimap/2.2.2/qualimap__2.2.2.cwl + + # Step-6: Create dummy file for the qc step + + # Step-7: Create multiQC report + dragen_qc_step: + label: dragen qc step + doc: | + The dragen qc step - this takes in an array of dirs + in: + input_directories: + source: + - run_dragen_transcriptome_step/dragen_transcriptome_directory + - run_qualimap_step/qualimap_qc + linkMerge: merge_flattened + output_directory_name: + source: output_prefix + valueFrom: "$(self)_dragen_transcriptome_multiqc" + output_filename: + source: output_prefix + valueFrom: "$(self)_dragen_transcriptome_multiqc.html" + title: + source: output_prefix + valueFrom: "UMCCR MultiQC Dragen Transcriptome Report for $(self)" + out: + - id: output_directory + - id: output_file + run: ../../../tools/multiqc/1.25.1/multiqc__1.25.1.cwl + +outputs: + # The dragen output directory + dragen_transcriptome_output_directory: + label: dragen transcriptome output directory + doc: | + The output directory containing all transcriptome output files + type: Directory + outputSource: run_dragen_transcriptome_step/dragen_transcriptome_directory + # The arriba output directory + arriba_output_directory: + label: arriba output directory + doc: | + The directory containing output files from arriba + type: Directory + outputSource: create_arriba_output_directory/output_directory + # The multiqc output directory + multiqc_output_directory: + label: multiqc output directory + doc: | + The output directory for multiqc + type: Directory + outputSource: dragen_qc_step/output_directory + # The qualimap output directory + qualimap_output_directory: + label: dragen transcriptome output directory + doc: | + The output directory containing all transcriptome output files + type: Directory + outputSource: run_qualimap_step/qualimap_qc