From aecf38662964282567c8615fbca65239fd179397 Mon Sep 17 00:00:00 2001 From: tcezard Date: Mon, 9 Sep 2024 12:59:36 +0100 Subject: [PATCH] Address comments from review --- eva_sub_cli/jinja_templates/html_report.html | 6 ++--- .../jinja_templates/shallow_validation.html | 8 +++--- eva_sub_cli/validators/docker_validator.py | 27 +++++++------------ eva_sub_cli/validators/validator.py | 1 + .../expected_report_metadata_json.html | 2 +- .../expected_report_metadata_xlsx.html | 2 +- ...expected_shallow_metadata_xlsx_report.html | 2 +- 7 files changed, 20 insertions(+), 28 deletions(-) diff --git a/eva_sub_cli/jinja_templates/html_report.html b/eva_sub_cli/jinja_templates/html_report.html index cc50889..5398d00 100644 --- a/eva_sub_cli/jinja_templates/html_report.html +++ b/eva_sub_cli/jinja_templates/html_report.html @@ -4,7 +4,7 @@ {% from 'sample_name_check.html' import sample_name_check_report %} {% from 'fasta_check.html' import fasta_check_report %} {% from 'metadata_validation.html' import metadata_validation_report %} -{% from 'shallow_validation.html' import shallow_validation_report %} +{% from 'shallow_validation.html' import optional_shallow_validation_report %} @@ -47,9 +47,7 @@
eva-sub-cli v{{cli_version}}
-
- {{ shallow_validation_report(validation_results) }} -
+{{ optional_shallow_validation_report(validation_results) }}

Project Summary

diff --git a/eva_sub_cli/jinja_templates/shallow_validation.html b/eva_sub_cli/jinja_templates/shallow_validation.html index cf20851..e8dfe50 100644 --- a/eva_sub_cli/jinja_templates/shallow_validation.html +++ b/eva_sub_cli/jinja_templates/shallow_validation.html @@ -1,8 +1,9 @@ -{% macro shallow_validation_report(validation_results) -%} +{% macro optional_shallow_validation_report(validation_results) -%} {% set results = validation_results.get('shallow_validation', {}) %} {% if results.get('required') %} +
You requested to run the shallow validation, please run full validation before submitting the data
@@ -10,8 +11,8 @@ - - + + {% for vcf_file in results.get('metrics') %} @@ -22,6 +23,7 @@ {% endfor %}
VCF FileRecords validated in VCFRecords validated in FastaVariant lines validated in VCFEntries used in Fasta
+
{% endif %} {%- endmacro %} \ No newline at end of file diff --git a/eva_sub_cli/validators/docker_validator.py b/eva_sub_cli/validators/docker_validator.py index 2c862f3..6b01e49 100644 --- a/eva_sub_cli/validators/docker_validator.py +++ b/eva_sub_cli/validators/docker_validator.py @@ -37,24 +37,15 @@ def _validation_file_path_for(file_path): return f'{container_validation_dir}/{file_path}' def get_docker_validation_cmd(self): - if self.metadata_xlsx and not self.metadata_json: - docker_cmd = ''.join([ - f"{self.docker_path} exec {self.container_name} nextflow run eva_sub_cli/nextflow/validation.nf ", - f"--base_dir {container_validation_dir} ", - f"--vcf_files_mapping {self.mapping_file} ", - f"--metadata_xlsx {self.metadata_xlsx} ", - f"--shallow_validation true " if self.shallow_validation else "", - f"--output_dir {container_validation_output_dir}" - ]) - else: - docker_cmd = ''.join([ - f"{self.docker_path} exec {self.container_name} nextflow run eva_sub_cli/nextflow/validation.nf ", - f"--base_dir {container_validation_dir} ", - f"--vcf_files_mapping {self.mapping_file} ", - f"--metadata_json {self.metadata_json} ", - f"--shallow_validation true " if self.shallow_validation else "", - f"--output_dir {container_validation_output_dir}" - ]) + docker_cmd = ''.join([ + f"{self.docker_path} exec {self.container_name} nextflow run eva_sub_cli/nextflow/validation.nf ", + f"--base_dir {container_validation_dir} ", + f"--vcf_files_mapping {self.mapping_file} ", + f"--metadata_xlsx {self.metadata_xlsx} " if self.metadata_xlsx and not self.metadata_json + else f"--metadata_json {self.metadata_json} ", + f"--shallow_validation true " if self.shallow_validation else "", + f"--output_dir {container_validation_output_dir}" + ]) return docker_cmd def run_docker_validator(self): diff --git a/eva_sub_cli/validators/validator.py b/eva_sub_cli/validators/validator.py index 4a1984a..f3e591d 100755 --- a/eva_sub_cli/validators/validator.py +++ b/eva_sub_cli/validators/validator.py @@ -167,6 +167,7 @@ def verify_ready_for_submission_to_eva(self): )), self.results.get('sample_check', {}).get('overall_differences', True) is False, len(self.results.get('metadata_check', {}).get('spreadsheet_errors', [])) == 0, + len(self.results.get('metadata_check', {}).get('json_errors', [])) == 0, any(( self.results['shallow_validation']['requested'] is False, self.results['shallow_validation'].get('required', True) is False diff --git a/tests/resources/validation_reports/expected_report_metadata_json.html b/tests/resources/validation_reports/expected_report_metadata_json.html index 01a2c35..9972434 100644 --- a/tests/resources/validation_reports/expected_report_metadata_json.html +++ b/tests/resources/validation_reports/expected_report_metadata_json.html @@ -19,4 +19,4 @@ .fail { background-color: #FFB6C1; } .pass { background-color: #90EE90; } .info { background-color: #dadada; } - .error-list, .no-show { display: none; }

Validation Report

eva-sub-cli vcligeneratedversion

Project Summary

General details about the project

Project Title: My cool project

Validation Date: 2023-08-31 12:34:56

Submission Directory: /test/submission/dir

Files mapping
VCF FileFasta FileAnalysis
input_fail.vcfinput_fail.faA
input_pass.vcfinput_pass.faB
input_test.vcfinput_test.facould not be linked

Metadata validation results

Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the EVA website.
❌ Metadata validation check
Full report: /path/to/json/metadata/report
JSON PropertyError Description
.filesshould have required property 'files'
/project.titleshould have required property 'title'
/project.descriptionshould have required property 'description'
/project.taxIdshould have required property 'taxId'
/project.centreshould have required property 'centre'
/analysis/0.analysisTitleshould have required property 'analysisTitle'
/analysis/0.descriptionshould have required property 'description'
/analysis/0.experimentTypeshould have required property 'experimentType'
/analysis/0.referenceGenomeshould have required property 'referenceGenome'
/sample/0.bioSampleAccessionshould have required property 'bioSampleAccession'
/sample/0.bioSampleObjectshould have required property 'bioSampleObject'
/sample/0should match exactly one schema in oneOf

VCF validation results

Checks whether each file is compliant with the VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.

input_fail.vcf

❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
CategoryError
Parsing ErrorThe assembly checking could not be completed: Contig 'chr23' not found in assembly report
mismatch errorChromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c'
❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
CategoryError
critical errorLine 4: Error in meta-data section.
non-critical errorSample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=..

input_passed.vcf

✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors

Sample name concordance check

Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
Analysis A: Sample names in metadata do not match with those in VCF files
CategoryFirst 5 Errors For CategoryLink To View All Errors
Samples described in the metadata but not in the VCF filesSampleA1, SampleA2 , SampleA3, SampleA4, SampleA5Show All Errors For Category
Samples in the VCF files but not described in the metadataA1Sample , A2Sample, A3Sample, A4Sample, A5SampleShow All Errors For Category
All Errors For Category - Samples described in the metadata but not in the VCF files:
  1. •SampleA1
  2. SampleA2•
  3. SampleA3
  4. SampleA4
  5. SampleA5
  6. SampleA6
  7. SampleA7
  8. SampleA8
  9. SampleA9
  10. SampleA10
Hide
All Errors For Category - Samples in the VCF files but not described in the metadata:
  1. A1Sample•
  2. •A2Sample
  3. A3Sample
  4. A4Sample
  5. A5Sample
  6. A6Sample
  7. A7Sample
  8. A8Sample
  9. A9Sample
  10. A10Sample
Hide
Analysis B: Sample names in metadata match with those in VCF files
Analysis C: Sample names in metadata do not match with those in VCF files
CategoryFirst 5 Errors For CategoryLink To View All Errors
Samples described in the metadata but not in the VCF filesSampleC1 , SampleC2, SampleC3, SampleC4Show All Errors For Category
Samples in the VCF files but not described in the metadataC1Sample , C2Sample, C3Sample, C4SampleShow All Errors For Category
All Errors For Category - Samples described in the metadata but not in the VCF files:
  1. SampleC1•
  2. •SampleC2
  3. SampleC3
  4. SampleC4
Hide
All Errors For Category - Samples in the VCF files but not described in the metadata:
  1. C1Sample•
  2. •C2Sample
  3. C3Sample
  4. C4Sample
Hide

Reference genome INSDC check

Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.

metadata_asm_match.fa

✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible

metadata_asm_not_found.fa

✔ All sequences are INSDC accessioned
❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
CategoryAccessions
Assembly accession found in metadataNot found
Assembly accession(s) compatible with FASTAGCA_1

metadata_asm_not_match.fa

✔ All sequences are INSDC accessioned
❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
CategoryAccessions
Assembly accession found in metadataGCA_2
Assembly accession(s) compatible with FASTAGCA_1

metadata_error.fa

Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible

not_all_insdc.fa

❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence nameRefget md5
2hjfdoijsfc47hfg0gh9qwjrve
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file + .error-list, .no-show { display: none; }

Validation Report

eva-sub-cli v0.4.dev82+g45e6d68.d20240909

Project Summary

General details about the project

Project Title: My cool project

Validation Date: 2023-08-31 12:34:56

Submission Directory: /test/submission/dir

Files mapping
VCF FileFasta FileAnalysis
input_fail.vcfinput_fail.faA
input_pass.vcfinput_pass.faB
input_test.vcfinput_test.facould not be linked

Metadata validation results

Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the EVA website.
❌ Metadata validation check
Full report: /path/to/json/metadata/report
JSON PropertyError Description
.filesshould have required property 'files'
/project.titleshould have required property 'title'
/project.descriptionshould have required property 'description'
/project.taxIdshould have required property 'taxId'
/project.centreshould have required property 'centre'
/analysis/0.analysisTitleshould have required property 'analysisTitle'
/analysis/0.descriptionshould have required property 'description'
/analysis/0.experimentTypeshould have required property 'experimentType'
/analysis/0.referenceGenomeshould have required property 'referenceGenome'
/sample/0.bioSampleAccessionshould have required property 'bioSampleAccession'
/sample/0.bioSampleObjectshould have required property 'bioSampleObject'
/sample/0should match exactly one schema in oneOf

VCF validation results

Checks whether each file is compliant with the VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.

input_fail.vcf

❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
CategoryError
Parsing ErrorThe assembly checking could not be completed: Contig 'chr23' not found in assembly report
mismatch errorChromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c'
❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
CategoryError
critical errorLine 4: Error in meta-data section.
non-critical errorSample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=..

input_passed.vcf

✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors

Sample name concordance check

Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
Analysis A: Sample names in metadata do not match with those in VCF files
CategoryFirst 5 Errors For CategoryLink To View All Errors
Samples described in the metadata but not in the VCF filesSampleA1, SampleA2 , SampleA3, SampleA4, SampleA5Show All Errors For Category
Samples in the VCF files but not described in the metadataA1Sample , A2Sample, A3Sample, A4Sample, A5SampleShow All Errors For Category
All Errors For Category - Samples described in the metadata but not in the VCF files:
  1. •SampleA1
  2. SampleA2•
  3. SampleA3
  4. SampleA4
  5. SampleA5
  6. SampleA6
  7. SampleA7
  8. SampleA8
  9. SampleA9
  10. SampleA10
Hide
All Errors For Category - Samples in the VCF files but not described in the metadata:
  1. A1Sample•
  2. •A2Sample
  3. A3Sample
  4. A4Sample
  5. A5Sample
  6. A6Sample
  7. A7Sample
  8. A8Sample
  9. A9Sample
  10. A10Sample
Hide
Analysis B: Sample names in metadata match with those in VCF files
Analysis C: Sample names in metadata do not match with those in VCF files
CategoryFirst 5 Errors For CategoryLink To View All Errors
Samples described in the metadata but not in the VCF filesSampleC1 , SampleC2, SampleC3, SampleC4Show All Errors For Category
Samples in the VCF files but not described in the metadataC1Sample , C2Sample, C3Sample, C4SampleShow All Errors For Category
All Errors For Category - Samples described in the metadata but not in the VCF files:
  1. SampleC1•
  2. •SampleC2
  3. SampleC3
  4. SampleC4
Hide
All Errors For Category - Samples in the VCF files but not described in the metadata:
  1. C1Sample•
  2. •C2Sample
  3. C3Sample
  4. C4Sample
Hide

Reference genome INSDC check

Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.

metadata_asm_match.fa

✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible

metadata_asm_not_found.fa

✔ All sequences are INSDC accessioned
❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
CategoryAccessions
Assembly accession found in metadataNot found
Assembly accession(s) compatible with FASTAGCA_1

metadata_asm_not_match.fa

✔ All sequences are INSDC accessioned
❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
CategoryAccessions
Assembly accession found in metadataGCA_2
Assembly accession(s) compatible with FASTAGCA_1

metadata_error.fa

Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible

not_all_insdc.fa

❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence nameRefget md5
2hjfdoijsfc47hfg0gh9qwjrve
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file diff --git a/tests/resources/validation_reports/expected_report_metadata_xlsx.html b/tests/resources/validation_reports/expected_report_metadata_xlsx.html index a1576d8..fc7dbb1 100644 --- a/tests/resources/validation_reports/expected_report_metadata_xlsx.html +++ b/tests/resources/validation_reports/expected_report_metadata_xlsx.html @@ -19,4 +19,4 @@ .fail { background-color: #FFB6C1; } .pass { background-color: #90EE90; } .info { background-color: #dadada; } - .error-list, .no-show { display: none; }

Validation Report

eva-sub-cli vcligeneratedversion

Project Summary

General details about the project

Project Title: My cool project

Validation Date: 2023-08-31 12:34:56

Submission Directory: /test/submission/dir

Files mapping
VCF FileFasta FileAnalysis
input_fail.vcfinput_fail.faA
input_pass.vcfinput_pass.faB
input_test.vcfinput_test.facould not be linked

Metadata validation results

Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the EVA website.
❌ Metadata validation check
Full report: /path/to/metadata/metadata_spreadsheet_validation.txt
SheetRowColumnDescription
FilesSheet "Files" is missing
Project2Project TitleColumn "Project Title" is not populated
Project2DescriptionColumn "Description" is not populated
Project2Tax IDColumn "Tax ID" is not populated
Project2CenterColumn "Center" is not populated
Analysis2Analysis TitleColumn "Analysis Title" is not populated
Analysis2DescriptionColumn "Description" is not populated
Analysis2Experiment TypeColumn "Experiment Type" is not populated
Analysis2ReferenceColumn "Reference" is not populated
Sample3Sample AccessionColumn "Sample Accession" is not populated

VCF validation results

Checks whether each file is compliant with the VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.

input_fail.vcf

❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
CategoryError
Parsing ErrorThe assembly checking could not be completed: Contig 'chr23' not found in assembly report
mismatch errorChromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c'
❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
CategoryError
critical errorLine 4: Error in meta-data section.
non-critical errorSample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=..

input_passed.vcf

✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors

Sample name concordance check

Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
Analysis A: Sample names in metadata do not match with those in VCF files
CategoryFirst 5 Errors For CategoryLink To View All Errors
Samples described in the metadata but not in the VCF filesSampleA1, SampleA2 , SampleA3, SampleA4, SampleA5Show All Errors For Category
Samples in the VCF files but not described in the metadataA1Sample , A2Sample, A3Sample, A4Sample, A5SampleShow All Errors For Category
All Errors For Category - Samples described in the metadata but not in the VCF files:
  1. •SampleA1
  2. SampleA2•
  3. SampleA3
  4. SampleA4
  5. SampleA5
  6. SampleA6
  7. SampleA7
  8. SampleA8
  9. SampleA9
  10. SampleA10
Hide
All Errors For Category - Samples in the VCF files but not described in the metadata:
  1. A1Sample•
  2. •A2Sample
  3. A3Sample
  4. A4Sample
  5. A5Sample
  6. A6Sample
  7. A7Sample
  8. A8Sample
  9. A9Sample
  10. A10Sample
Hide
Analysis B: Sample names in metadata match with those in VCF files
Analysis C: Sample names in metadata do not match with those in VCF files
CategoryFirst 5 Errors For CategoryLink To View All Errors
Samples described in the metadata but not in the VCF filesSampleC1 , SampleC2, SampleC3, SampleC4Show All Errors For Category
Samples in the VCF files but not described in the metadataC1Sample , C2Sample, C3Sample, C4SampleShow All Errors For Category
All Errors For Category - Samples described in the metadata but not in the VCF files:
  1. SampleC1•
  2. •SampleC2
  3. SampleC3
  4. SampleC4
Hide
All Errors For Category - Samples in the VCF files but not described in the metadata:
  1. C1Sample•
  2. •C2Sample
  3. C3Sample
  4. C4Sample
Hide

Reference genome INSDC check

Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.

metadata_asm_match.fa

✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible

metadata_asm_not_found.fa

✔ All sequences are INSDC accessioned
❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
CategoryAccessions
Assembly accession found in metadataNot found
Assembly accession(s) compatible with FASTAGCA_1

metadata_asm_not_match.fa

✔ All sequences are INSDC accessioned
❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
CategoryAccessions
Assembly accession found in metadataGCA_2
Assembly accession(s) compatible with FASTAGCA_1

metadata_error.fa

Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible

not_all_insdc.fa

❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence nameRefget md5
2hjfdoijsfc47hfg0gh9qwjrve
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file + .error-list, .no-show { display: none; }

Validation Report

eva-sub-cli v0.4.dev82+g45e6d68.d20240909

Project Summary

General details about the project

Project Title: My cool project

Validation Date: 2023-08-31 12:34:56

Submission Directory: /test/submission/dir

Files mapping
VCF FileFasta FileAnalysis
input_fail.vcfinput_fail.faA
input_pass.vcfinput_pass.faB
input_test.vcfinput_test.facould not be linked

Metadata validation results

Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the EVA website.
❌ Metadata validation check
Full report: /path/to/metadata/metadata_spreadsheet_validation.txt
SheetRowColumnDescription
FilesSheet "Files" is missing
Project2Project TitleColumn "Project Title" is not populated
Project2DescriptionColumn "Description" is not populated
Project2Tax IDColumn "Tax ID" is not populated
Project2CenterColumn "Center" is not populated
Analysis2Analysis TitleColumn "Analysis Title" is not populated
Analysis2DescriptionColumn "Description" is not populated
Analysis2Experiment TypeColumn "Experiment Type" is not populated
Analysis2ReferenceColumn "Reference" is not populated
Sample3Sample AccessionColumn "Sample Accession" is not populated

VCF validation results

Checks whether each file is compliant with the VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.

input_fail.vcf

❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
CategoryError
Parsing ErrorThe assembly checking could not be completed: Contig 'chr23' not found in assembly report
mismatch errorChromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c'
❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
CategoryError
critical errorLine 4: Error in meta-data section.
non-critical errorSample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=..

input_passed.vcf

✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors

Sample name concordance check

Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
Analysis A: Sample names in metadata do not match with those in VCF files
CategoryFirst 5 Errors For CategoryLink To View All Errors
Samples described in the metadata but not in the VCF filesSampleA1, SampleA2 , SampleA3, SampleA4, SampleA5Show All Errors For Category
Samples in the VCF files but not described in the metadataA1Sample , A2Sample, A3Sample, A4Sample, A5SampleShow All Errors For Category
All Errors For Category - Samples described in the metadata but not in the VCF files:
  1. •SampleA1
  2. SampleA2•
  3. SampleA3
  4. SampleA4
  5. SampleA5
  6. SampleA6
  7. SampleA7
  8. SampleA8
  9. SampleA9
  10. SampleA10
Hide
All Errors For Category - Samples in the VCF files but not described in the metadata:
  1. A1Sample•
  2. •A2Sample
  3. A3Sample
  4. A4Sample
  5. A5Sample
  6. A6Sample
  7. A7Sample
  8. A8Sample
  9. A9Sample
  10. A10Sample
Hide
Analysis B: Sample names in metadata match with those in VCF files
Analysis C: Sample names in metadata do not match with those in VCF files
CategoryFirst 5 Errors For CategoryLink To View All Errors
Samples described in the metadata but not in the VCF filesSampleC1 , SampleC2, SampleC3, SampleC4Show All Errors For Category
Samples in the VCF files but not described in the metadataC1Sample , C2Sample, C3Sample, C4SampleShow All Errors For Category
All Errors For Category - Samples described in the metadata but not in the VCF files:
  1. SampleC1•
  2. •SampleC2
  3. SampleC3
  4. SampleC4
Hide
All Errors For Category - Samples in the VCF files but not described in the metadata:
  1. C1Sample•
  2. •C2Sample
  3. C3Sample
  4. C4Sample
Hide

Reference genome INSDC check

Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.

metadata_asm_match.fa

✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible

metadata_asm_not_found.fa

✔ All sequences are INSDC accessioned
❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
CategoryAccessions
Assembly accession found in metadataNot found
Assembly accession(s) compatible with FASTAGCA_1

metadata_asm_not_match.fa

✔ All sequences are INSDC accessioned
❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
CategoryAccessions
Assembly accession found in metadataGCA_2
Assembly accession(s) compatible with FASTAGCA_1

metadata_error.fa

Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible

not_all_insdc.fa

❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence nameRefget md5
2hjfdoijsfc47hfg0gh9qwjrve
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file diff --git a/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.html b/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.html index 8f26e24..cf1cf7f 100644 --- a/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.html +++ b/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.html @@ -19,4 +19,4 @@ .fail { background-color: #FFB6C1; } .pass { background-color: #90EE90; } .info { background-color: #dadada; } - .error-list, .no-show { display: none; }

Validation Report

eva-sub-cli vcligeneratedversion
You requested to run the shallow validation, please run full validation before submitting the data
VCF FileRecords validated in VCFRecords validated in Fasta
input_fail.vcf1000024
input_passed.vcf1000024

Project Summary

General details about the project

Project Title: My cool project

Validation Date: 2023-08-31 12:34:56

Submission Directory: /test/submission/dir

Files mapping
VCF FileFasta FileAnalysis
input_fail.vcfinput_fail.faA
input_pass.vcfinput_pass.faB
input_test.vcfinput_test.facould not be linked

Metadata validation results

Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the EVA website.
❌ Metadata validation check
Full report: /path/to/metadata/metadata_spreadsheet_validation.txt
SheetRowColumnDescription
FilesSheet "Files" is missing
Project2Project TitleColumn "Project Title" is not populated
Project2DescriptionColumn "Description" is not populated
Project2Tax IDColumn "Tax ID" is not populated
Project2CenterColumn "Center" is not populated
Analysis2Analysis TitleColumn "Analysis Title" is not populated
Analysis2DescriptionColumn "Description" is not populated
Analysis2Experiment TypeColumn "Experiment Type" is not populated
Analysis2ReferenceColumn "Reference" is not populated
Sample3Sample AccessionColumn "Sample Accession" is not populated

VCF validation results

Checks whether each file is compliant with the VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.

input_fail.vcf

❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
CategoryError
Parsing ErrorThe assembly checking could not be completed: Contig 'chr23' not found in assembly report
mismatch errorChromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c'
❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
CategoryError
critical errorLine 4: Error in meta-data section.
non-critical errorSample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=..

input_passed.vcf

✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors

Sample name concordance check

Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
Analysis A: Sample names in metadata do not match with those in VCF files
CategoryFirst 5 Errors For CategoryLink To View All Errors
Samples described in the metadata but not in the VCF filesSampleA1, SampleA2 , SampleA3, SampleA4, SampleA5Show All Errors For Category
Samples in the VCF files but not described in the metadataA1Sample , A2Sample, A3Sample, A4Sample, A5SampleShow All Errors For Category
All Errors For Category - Samples described in the metadata but not in the VCF files:
  1. •SampleA1
  2. SampleA2•
  3. SampleA3
  4. SampleA4
  5. SampleA5
  6. SampleA6
  7. SampleA7
  8. SampleA8
  9. SampleA9
  10. SampleA10
Hide
All Errors For Category - Samples in the VCF files but not described in the metadata:
  1. A1Sample•
  2. •A2Sample
  3. A3Sample
  4. A4Sample
  5. A5Sample
  6. A6Sample
  7. A7Sample
  8. A8Sample
  9. A9Sample
  10. A10Sample
Hide
Analysis B: Sample names in metadata match with those in VCF files
Analysis C: Sample names in metadata do not match with those in VCF files
CategoryFirst 5 Errors For CategoryLink To View All Errors
Samples described in the metadata but not in the VCF filesSampleC1 , SampleC2, SampleC3, SampleC4Show All Errors For Category
Samples in the VCF files but not described in the metadataC1Sample , C2Sample, C3Sample, C4SampleShow All Errors For Category
All Errors For Category - Samples described in the metadata but not in the VCF files:
  1. SampleC1•
  2. •SampleC2
  3. SampleC3
  4. SampleC4
Hide
All Errors For Category - Samples in the VCF files but not described in the metadata:
  1. C1Sample•
  2. •C2Sample
  3. C3Sample
  4. C4Sample
Hide

Reference genome INSDC check

Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.

metadata_asm_match.fa

✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible

metadata_asm_not_found.fa

✔ All sequences are INSDC accessioned
❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
CategoryAccessions
Assembly accession found in metadataNot found
Assembly accession(s) compatible with FASTAGCA_1

metadata_asm_not_match.fa

✔ All sequences are INSDC accessioned
❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
CategoryAccessions
Assembly accession found in metadataGCA_2
Assembly accession(s) compatible with FASTAGCA_1

metadata_error.fa

Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible

not_all_insdc.fa

❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence nameRefget md5
2hjfdoijsfc47hfg0gh9qwjrve
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file + .error-list, .no-show { display: none; }

Validation Report

eva-sub-cli v0.4.dev82+g45e6d68.d20240909
You requested to run the shallow validation, please run full validation before submitting the data
VCF FileVariant lines validated in VCFEntries used in Fasta
input_fail.vcf1000024
input_passed.vcf1000024

Project Summary

General details about the project

Project Title: My cool project

Validation Date: 2023-08-31 12:34:56

Submission Directory: /test/submission/dir

Files mapping
VCF FileFasta FileAnalysis
input_fail.vcfinput_fail.faA
input_pass.vcfinput_pass.faB
input_test.vcfinput_test.facould not be linked

Metadata validation results

Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the EVA website.
❌ Metadata validation check
Full report: /path/to/metadata/metadata_spreadsheet_validation.txt
SheetRowColumnDescription
FilesSheet "Files" is missing
Project2Project TitleColumn "Project Title" is not populated
Project2DescriptionColumn "Description" is not populated
Project2Tax IDColumn "Tax ID" is not populated
Project2CenterColumn "Center" is not populated
Analysis2Analysis TitleColumn "Analysis Title" is not populated
Analysis2DescriptionColumn "Description" is not populated
Analysis2Experiment TypeColumn "Experiment Type" is not populated
Analysis2ReferenceColumn "Reference" is not populated
Sample3Sample AccessionColumn "Sample Accession" is not populated

VCF validation results

Checks whether each file is compliant with the VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.

input_fail.vcf

❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
CategoryError
Parsing ErrorThe assembly checking could not be completed: Contig 'chr23' not found in assembly report
mismatch errorChromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c'
❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
CategoryError
critical errorLine 4: Error in meta-data section.
non-critical errorSample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=..

input_passed.vcf

✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors

Sample name concordance check

Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
Analysis A: Sample names in metadata do not match with those in VCF files
CategoryFirst 5 Errors For CategoryLink To View All Errors
Samples described in the metadata but not in the VCF filesSampleA1, SampleA2 , SampleA3, SampleA4, SampleA5Show All Errors For Category
Samples in the VCF files but not described in the metadataA1Sample , A2Sample, A3Sample, A4Sample, A5SampleShow All Errors For Category
All Errors For Category - Samples described in the metadata but not in the VCF files:
  1. •SampleA1
  2. SampleA2•
  3. SampleA3
  4. SampleA4
  5. SampleA5
  6. SampleA6
  7. SampleA7
  8. SampleA8
  9. SampleA9
  10. SampleA10
Hide
All Errors For Category - Samples in the VCF files but not described in the metadata:
  1. A1Sample•
  2. •A2Sample
  3. A3Sample
  4. A4Sample
  5. A5Sample
  6. A6Sample
  7. A7Sample
  8. A8Sample
  9. A9Sample
  10. A10Sample
Hide
Analysis B: Sample names in metadata match with those in VCF files
Analysis C: Sample names in metadata do not match with those in VCF files
CategoryFirst 5 Errors For CategoryLink To View All Errors
Samples described in the metadata but not in the VCF filesSampleC1 , SampleC2, SampleC3, SampleC4Show All Errors For Category
Samples in the VCF files but not described in the metadataC1Sample , C2Sample, C3Sample, C4SampleShow All Errors For Category
All Errors For Category - Samples described in the metadata but not in the VCF files:
  1. SampleC1•
  2. •SampleC2
  3. SampleC3
  4. SampleC4
Hide
All Errors For Category - Samples in the VCF files but not described in the metadata:
  1. C1Sample•
  2. •C2Sample
  3. C3Sample
  4. C4Sample
Hide

Reference genome INSDC check

Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.

metadata_asm_match.fa

✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible

metadata_asm_not_found.fa

✔ All sequences are INSDC accessioned
❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
CategoryAccessions
Assembly accession found in metadataNot found
Assembly accession(s) compatible with FASTAGCA_1

metadata_asm_not_match.fa

✔ All sequences are INSDC accessioned
❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
CategoryAccessions
Assembly accession found in metadataGCA_2
Assembly accession(s) compatible with FASTAGCA_1

metadata_error.fa

Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible

not_all_insdc.fa

❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence nameRefget md5
2hjfdoijsfc47hfg0gh9qwjrve
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file