diff --git a/eva_sub_cli/jinja_templates/html_report.html b/eva_sub_cli/jinja_templates/html_report.html
index cc50889..5398d00 100644
--- a/eva_sub_cli/jinja_templates/html_report.html
+++ b/eva_sub_cli/jinja_templates/html_report.html
@@ -4,7 +4,7 @@
{% from 'sample_name_check.html' import sample_name_check_report %}
{% from 'fasta_check.html' import fasta_check_report %}
{% from 'metadata_validation.html' import metadata_validation_report %}
-{% from 'shallow_validation.html' import shallow_validation_report %}
+{% from 'shallow_validation.html' import optional_shallow_validation_report %}
@@ -47,9 +47,7 @@
Project Summary
diff --git a/eva_sub_cli/jinja_templates/shallow_validation.html b/eva_sub_cli/jinja_templates/shallow_validation.html
index cf20851..e8dfe50 100644
--- a/eva_sub_cli/jinja_templates/shallow_validation.html
+++ b/eva_sub_cli/jinja_templates/shallow_validation.html
@@ -1,8 +1,9 @@
-{% macro shallow_validation_report(validation_results) -%}
+{% macro optional_shallow_validation_report(validation_results) -%}
{% set results = validation_results.get('shallow_validation', {}) %}
{% if results.get('required') %}
+
▶
❌ You requested to run the shallow validation, please run full validation before submitting the data
@@ -10,8 +11,8 @@
VCF File |
- Records validated in VCF |
- Records validated in Fasta |
+ Variant lines validated in VCF |
+ Entries used in Fasta |
{% for vcf_file in results.get('metrics') %}
@@ -22,6 +23,7 @@
{% endfor %}
+
{% endif %}
{%- endmacro %}
\ No newline at end of file
diff --git a/eva_sub_cli/validators/docker_validator.py b/eva_sub_cli/validators/docker_validator.py
index 2c862f3..6b01e49 100644
--- a/eva_sub_cli/validators/docker_validator.py
+++ b/eva_sub_cli/validators/docker_validator.py
@@ -37,24 +37,15 @@ def _validation_file_path_for(file_path):
return f'{container_validation_dir}/{file_path}'
def get_docker_validation_cmd(self):
- if self.metadata_xlsx and not self.metadata_json:
- docker_cmd = ''.join([
- f"{self.docker_path} exec {self.container_name} nextflow run eva_sub_cli/nextflow/validation.nf ",
- f"--base_dir {container_validation_dir} ",
- f"--vcf_files_mapping {self.mapping_file} ",
- f"--metadata_xlsx {self.metadata_xlsx} ",
- f"--shallow_validation true " if self.shallow_validation else "",
- f"--output_dir {container_validation_output_dir}"
- ])
- else:
- docker_cmd = ''.join([
- f"{self.docker_path} exec {self.container_name} nextflow run eva_sub_cli/nextflow/validation.nf ",
- f"--base_dir {container_validation_dir} ",
- f"--vcf_files_mapping {self.mapping_file} ",
- f"--metadata_json {self.metadata_json} ",
- f"--shallow_validation true " if self.shallow_validation else "",
- f"--output_dir {container_validation_output_dir}"
- ])
+ docker_cmd = ''.join([
+ f"{self.docker_path} exec {self.container_name} nextflow run eva_sub_cli/nextflow/validation.nf ",
+ f"--base_dir {container_validation_dir} ",
+ f"--vcf_files_mapping {self.mapping_file} ",
+ f"--metadata_xlsx {self.metadata_xlsx} " if self.metadata_xlsx and not self.metadata_json
+ else f"--metadata_json {self.metadata_json} ",
+ f"--shallow_validation true " if self.shallow_validation else "",
+ f"--output_dir {container_validation_output_dir}"
+ ])
return docker_cmd
def run_docker_validator(self):
diff --git a/eva_sub_cli/validators/validator.py b/eva_sub_cli/validators/validator.py
index 4a1984a..f3e591d 100755
--- a/eva_sub_cli/validators/validator.py
+++ b/eva_sub_cli/validators/validator.py
@@ -167,6 +167,7 @@ def verify_ready_for_submission_to_eva(self):
)),
self.results.get('sample_check', {}).get('overall_differences', True) is False,
len(self.results.get('metadata_check', {}).get('spreadsheet_errors', [])) == 0,
+ len(self.results.get('metadata_check', {}).get('json_errors', [])) == 0,
any((
self.results['shallow_validation']['requested'] is False,
self.results['shallow_validation'].get('required', True) is False
diff --git a/tests/resources/validation_reports/expected_report_metadata_json.html b/tests/resources/validation_reports/expected_report_metadata_json.html
index 01a2c35..9972434 100644
--- a/tests/resources/validation_reports/expected_report_metadata_json.html
+++ b/tests/resources/validation_reports/expected_report_metadata_json.html
@@ -19,4 +19,4 @@
.fail { background-color: #FFB6C1; }
.pass { background-color: #90EE90; }
.info { background-color: #dadada; }
- .error-list, .no-show { display: none; }Project Summary
General details about the project
Project Title: My cool project
Validation Date: 2023-08-31 12:34:56
Submission Directory: /test/submission/dir
▶ Files mapping
VCF File | Fasta File | Analysis |
---|
input_fail.vcf | input_fail.fa | A |
input_pass.vcf | input_pass.fa | B |
input_test.vcf | input_test.fa | could not be linked |
Metadata validation results
Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the
EVA website.
▶ ❌ Metadata validation check
Full report: /path/to/json/metadata/report
JSON Property | Error Description |
---|
.files | should have required property 'files' |
/project.title | should have required property 'title' |
/project.description | should have required property 'description' |
/project.taxId | should have required property 'taxId' |
/project.centre | should have required property 'centre' |
/analysis/0.analysisTitle | should have required property 'analysisTitle' |
/analysis/0.description | should have required property 'description' |
/analysis/0.experimentType | should have required property 'experimentType' |
/analysis/0.referenceGenome | should have required property 'referenceGenome' |
/sample/0.bioSampleAccession | should have required property 'bioSampleAccession' |
/sample/0.bioSampleObject | should have required property 'bioSampleObject' |
/sample/0 | should match exactly one schema in oneOf |
VCF validation results
Checks whether each file is compliant with the
VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.
input_fail.vcf
▶ ❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
Category | Error |
---|
Parsing Error | The assembly checking could not be completed: Contig 'chr23' not found in assembly report |
mismatch error | Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c' |
▶ ❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
Category | Error |
---|
critical error | Line 4: Error in meta-data section. |
non-critical error | Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. |
input_passed.vcf
✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors
Sample name concordance check
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
▶ ❌ Analysis A: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleA1, SampleA2 , SampleA3, SampleA4, SampleA5 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | A1Sample , A2Sample, A3Sample, A4Sample, A5Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- A1Sample•
- •A2Sample
- A3Sample
- A4Sample
- A5Sample
- A6Sample
- A7Sample
- A8Sample
- A9Sample
- A10Sample
Hide ✔ Analysis B: Sample names in metadata match with those in VCF files
▶ ❌ Analysis C: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleC1 , SampleC2, SampleC3, SampleC4 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | C1Sample , C2Sample, C3Sample, C4Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- C1Sample•
- •C2Sample
- C3Sample
- C4Sample
HideReference genome INSDC check
Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.
metadata_asm_match.fa
✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible
metadata_asm_not_found.fa
✔ All sequences are INSDC accessioned
▶ ❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
Category | Accessions |
---|
Assembly accession found in metadata | Not found |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_asm_not_match.fa
✔ All sequences are INSDC accessioned
▶ ❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
Category | Accessions |
---|
Assembly accession found in metadata | GCA_2 |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_error.fa
Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible
not_all_insdc.fa
▶ ❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence name | Refget md5 |
---|
2 | hjfdoijsfc47hfg0gh9qwjrve |
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file
+ .error-list, .no-show { display: none; }Project Summary
General details about the project
Project Title: My cool project
Validation Date: 2023-08-31 12:34:56
Submission Directory: /test/submission/dir
▶ Files mapping
VCF File | Fasta File | Analysis |
---|
input_fail.vcf | input_fail.fa | A |
input_pass.vcf | input_pass.fa | B |
input_test.vcf | input_test.fa | could not be linked |
Metadata validation results
Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the
EVA website.
▶ ❌ Metadata validation check
Full report: /path/to/json/metadata/report
JSON Property | Error Description |
---|
.files | should have required property 'files' |
/project.title | should have required property 'title' |
/project.description | should have required property 'description' |
/project.taxId | should have required property 'taxId' |
/project.centre | should have required property 'centre' |
/analysis/0.analysisTitle | should have required property 'analysisTitle' |
/analysis/0.description | should have required property 'description' |
/analysis/0.experimentType | should have required property 'experimentType' |
/analysis/0.referenceGenome | should have required property 'referenceGenome' |
/sample/0.bioSampleAccession | should have required property 'bioSampleAccession' |
/sample/0.bioSampleObject | should have required property 'bioSampleObject' |
/sample/0 | should match exactly one schema in oneOf |
VCF validation results
Checks whether each file is compliant with the
VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.
input_fail.vcf
▶ ❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
Category | Error |
---|
Parsing Error | The assembly checking could not be completed: Contig 'chr23' not found in assembly report |
mismatch error | Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c' |
▶ ❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
Category | Error |
---|
critical error | Line 4: Error in meta-data section. |
non-critical error | Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. |
input_passed.vcf
✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors
Sample name concordance check
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
▶ ❌ Analysis A: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleA1, SampleA2 , SampleA3, SampleA4, SampleA5 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | A1Sample , A2Sample, A3Sample, A4Sample, A5Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- A1Sample•
- •A2Sample
- A3Sample
- A4Sample
- A5Sample
- A6Sample
- A7Sample
- A8Sample
- A9Sample
- A10Sample
Hide ✔ Analysis B: Sample names in metadata match with those in VCF files
▶ ❌ Analysis C: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleC1 , SampleC2, SampleC3, SampleC4 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | C1Sample , C2Sample, C3Sample, C4Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- C1Sample•
- •C2Sample
- C3Sample
- C4Sample
HideReference genome INSDC check
Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.
metadata_asm_match.fa
✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible
metadata_asm_not_found.fa
✔ All sequences are INSDC accessioned
▶ ❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
Category | Accessions |
---|
Assembly accession found in metadata | Not found |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_asm_not_match.fa
✔ All sequences are INSDC accessioned
▶ ❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
Category | Accessions |
---|
Assembly accession found in metadata | GCA_2 |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_error.fa
Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible
not_all_insdc.fa
▶ ❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence name | Refget md5 |
---|
2 | hjfdoijsfc47hfg0gh9qwjrve |
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file
diff --git a/tests/resources/validation_reports/expected_report_metadata_xlsx.html b/tests/resources/validation_reports/expected_report_metadata_xlsx.html
index a1576d8..fc7dbb1 100644
--- a/tests/resources/validation_reports/expected_report_metadata_xlsx.html
+++ b/tests/resources/validation_reports/expected_report_metadata_xlsx.html
@@ -19,4 +19,4 @@
.fail { background-color: #FFB6C1; }
.pass { background-color: #90EE90; }
.info { background-color: #dadada; }
- .error-list, .no-show { display: none; }Project Summary
General details about the project
Project Title: My cool project
Validation Date: 2023-08-31 12:34:56
Submission Directory: /test/submission/dir
▶ Files mapping
VCF File | Fasta File | Analysis |
---|
input_fail.vcf | input_fail.fa | A |
input_pass.vcf | input_pass.fa | B |
input_test.vcf | input_test.fa | could not be linked |
Metadata validation results
Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the
EVA website.
▶ ❌ Metadata validation check
Full report: /path/to/metadata/metadata_spreadsheet_validation.txt
Sheet | Row | Column | Description |
---|
Files | | | Sheet "Files" is missing |
Project | 2 | Project Title | Column "Project Title" is not populated |
Project | 2 | Description | Column "Description" is not populated |
Project | 2 | Tax ID | Column "Tax ID" is not populated |
Project | 2 | Center | Column "Center" is not populated |
Analysis | 2 | Analysis Title | Column "Analysis Title" is not populated |
Analysis | 2 | Description | Column "Description" is not populated |
Analysis | 2 | Experiment Type | Column "Experiment Type" is not populated |
Analysis | 2 | Reference | Column "Reference" is not populated |
Sample | 3 | Sample Accession | Column "Sample Accession" is not populated |
VCF validation results
Checks whether each file is compliant with the
VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.
input_fail.vcf
▶ ❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
Category | Error |
---|
Parsing Error | The assembly checking could not be completed: Contig 'chr23' not found in assembly report |
mismatch error | Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c' |
▶ ❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
Category | Error |
---|
critical error | Line 4: Error in meta-data section. |
non-critical error | Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. |
input_passed.vcf
✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors
Sample name concordance check
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
▶ ❌ Analysis A: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleA1, SampleA2 , SampleA3, SampleA4, SampleA5 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | A1Sample , A2Sample, A3Sample, A4Sample, A5Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- A1Sample•
- •A2Sample
- A3Sample
- A4Sample
- A5Sample
- A6Sample
- A7Sample
- A8Sample
- A9Sample
- A10Sample
Hide ✔ Analysis B: Sample names in metadata match with those in VCF files
▶ ❌ Analysis C: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleC1 , SampleC2, SampleC3, SampleC4 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | C1Sample , C2Sample, C3Sample, C4Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- C1Sample•
- •C2Sample
- C3Sample
- C4Sample
HideReference genome INSDC check
Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.
metadata_asm_match.fa
✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible
metadata_asm_not_found.fa
✔ All sequences are INSDC accessioned
▶ ❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
Category | Accessions |
---|
Assembly accession found in metadata | Not found |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_asm_not_match.fa
✔ All sequences are INSDC accessioned
▶ ❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
Category | Accessions |
---|
Assembly accession found in metadata | GCA_2 |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_error.fa
Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible
not_all_insdc.fa
▶ ❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence name | Refget md5 |
---|
2 | hjfdoijsfc47hfg0gh9qwjrve |
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file
+ .error-list, .no-show { display: none; }Project Summary
General details about the project
Project Title: My cool project
Validation Date: 2023-08-31 12:34:56
Submission Directory: /test/submission/dir
▶ Files mapping
VCF File | Fasta File | Analysis |
---|
input_fail.vcf | input_fail.fa | A |
input_pass.vcf | input_pass.fa | B |
input_test.vcf | input_test.fa | could not be linked |
Metadata validation results
Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the
EVA website.
▶ ❌ Metadata validation check
Full report: /path/to/metadata/metadata_spreadsheet_validation.txt
Sheet | Row | Column | Description |
---|
Files | | | Sheet "Files" is missing |
Project | 2 | Project Title | Column "Project Title" is not populated |
Project | 2 | Description | Column "Description" is not populated |
Project | 2 | Tax ID | Column "Tax ID" is not populated |
Project | 2 | Center | Column "Center" is not populated |
Analysis | 2 | Analysis Title | Column "Analysis Title" is not populated |
Analysis | 2 | Description | Column "Description" is not populated |
Analysis | 2 | Experiment Type | Column "Experiment Type" is not populated |
Analysis | 2 | Reference | Column "Reference" is not populated |
Sample | 3 | Sample Accession | Column "Sample Accession" is not populated |
VCF validation results
Checks whether each file is compliant with the
VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.
input_fail.vcf
▶ ❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
Category | Error |
---|
Parsing Error | The assembly checking could not be completed: Contig 'chr23' not found in assembly report |
mismatch error | Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c' |
▶ ❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
Category | Error |
---|
critical error | Line 4: Error in meta-data section. |
non-critical error | Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. |
input_passed.vcf
✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors
Sample name concordance check
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
▶ ❌ Analysis A: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleA1, SampleA2 , SampleA3, SampleA4, SampleA5 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | A1Sample , A2Sample, A3Sample, A4Sample, A5Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- A1Sample•
- •A2Sample
- A3Sample
- A4Sample
- A5Sample
- A6Sample
- A7Sample
- A8Sample
- A9Sample
- A10Sample
Hide ✔ Analysis B: Sample names in metadata match with those in VCF files
▶ ❌ Analysis C: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleC1 , SampleC2, SampleC3, SampleC4 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | C1Sample , C2Sample, C3Sample, C4Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- C1Sample•
- •C2Sample
- C3Sample
- C4Sample
HideReference genome INSDC check
Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.
metadata_asm_match.fa
✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible
metadata_asm_not_found.fa
✔ All sequences are INSDC accessioned
▶ ❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
Category | Accessions |
---|
Assembly accession found in metadata | Not found |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_asm_not_match.fa
✔ All sequences are INSDC accessioned
▶ ❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
Category | Accessions |
---|
Assembly accession found in metadata | GCA_2 |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_error.fa
Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible
not_all_insdc.fa
▶ ❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence name | Refget md5 |
---|
2 | hjfdoijsfc47hfg0gh9qwjrve |
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file
diff --git a/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.html b/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.html
index 8f26e24..cf1cf7f 100644
--- a/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.html
+++ b/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.html
@@ -19,4 +19,4 @@
.fail { background-color: #FFB6C1; }
.pass { background-color: #90EE90; }
.info { background-color: #dadada; }
- .error-list, .no-show { display: none; }▶ ❌ You requested to run the shallow validation, please run full validation before submitting the data
VCF File | Records validated in VCF | Records validated in Fasta |
---|
input_fail.vcf | 10000 | 24 |
input_passed.vcf | 10000 | 24 |
Project Summary
General details about the project
Project Title: My cool project
Validation Date: 2023-08-31 12:34:56
Submission Directory: /test/submission/dir
▶ Files mapping
VCF File | Fasta File | Analysis |
---|
input_fail.vcf | input_fail.fa | A |
input_pass.vcf | input_pass.fa | B |
input_test.vcf | input_test.fa | could not be linked |
Metadata validation results
Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the
EVA website.
▶ ❌ Metadata validation check
Full report: /path/to/metadata/metadata_spreadsheet_validation.txt
Sheet | Row | Column | Description |
---|
Files | | | Sheet "Files" is missing |
Project | 2 | Project Title | Column "Project Title" is not populated |
Project | 2 | Description | Column "Description" is not populated |
Project | 2 | Tax ID | Column "Tax ID" is not populated |
Project | 2 | Center | Column "Center" is not populated |
Analysis | 2 | Analysis Title | Column "Analysis Title" is not populated |
Analysis | 2 | Description | Column "Description" is not populated |
Analysis | 2 | Experiment Type | Column "Experiment Type" is not populated |
Analysis | 2 | Reference | Column "Reference" is not populated |
Sample | 3 | Sample Accession | Column "Sample Accession" is not populated |
VCF validation results
Checks whether each file is compliant with the
VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.
input_fail.vcf
▶ ❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
Category | Error |
---|
Parsing Error | The assembly checking could not be completed: Contig 'chr23' not found in assembly report |
mismatch error | Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c' |
▶ ❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
Category | Error |
---|
critical error | Line 4: Error in meta-data section. |
non-critical error | Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. |
input_passed.vcf
✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors
Sample name concordance check
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
▶ ❌ Analysis A: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleA1, SampleA2 , SampleA3, SampleA4, SampleA5 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | A1Sample , A2Sample, A3Sample, A4Sample, A5Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- A1Sample•
- •A2Sample
- A3Sample
- A4Sample
- A5Sample
- A6Sample
- A7Sample
- A8Sample
- A9Sample
- A10Sample
Hide ✔ Analysis B: Sample names in metadata match with those in VCF files
▶ ❌ Analysis C: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleC1 , SampleC2, SampleC3, SampleC4 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | C1Sample , C2Sample, C3Sample, C4Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- C1Sample•
- •C2Sample
- C3Sample
- C4Sample
HideReference genome INSDC check
Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.
metadata_asm_match.fa
✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible
metadata_asm_not_found.fa
✔ All sequences are INSDC accessioned
▶ ❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
Category | Accessions |
---|
Assembly accession found in metadata | Not found |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_asm_not_match.fa
✔ All sequences are INSDC accessioned
▶ ❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
Category | Accessions |
---|
Assembly accession found in metadata | GCA_2 |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_error.fa
Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible
not_all_insdc.fa
▶ ❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence name | Refget md5 |
---|
2 | hjfdoijsfc47hfg0gh9qwjrve |
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file
+ .error-list, .no-show { display: none; }▶ ❌ You requested to run the shallow validation, please run full validation before submitting the data
VCF File | Variant lines validated in VCF | Entries used in Fasta |
---|
input_fail.vcf | 10000 | 24 |
input_passed.vcf | 10000 | 24 |
Project Summary
General details about the project
Project Title: My cool project
Validation Date: 2023-08-31 12:34:56
Submission Directory: /test/submission/dir
▶ Files mapping
VCF File | Fasta File | Analysis |
---|
input_fail.vcf | input_fail.fa | A |
input_pass.vcf | input_pass.fa | B |
input_test.vcf | input_test.fa | could not be linked |
Metadata validation results
Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the
EVA website.
▶ ❌ Metadata validation check
Full report: /path/to/metadata/metadata_spreadsheet_validation.txt
Sheet | Row | Column | Description |
---|
Files | | | Sheet "Files" is missing |
Project | 2 | Project Title | Column "Project Title" is not populated |
Project | 2 | Description | Column "Description" is not populated |
Project | 2 | Tax ID | Column "Tax ID" is not populated |
Project | 2 | Center | Column "Center" is not populated |
Analysis | 2 | Analysis Title | Column "Analysis Title" is not populated |
Analysis | 2 | Description | Column "Description" is not populated |
Analysis | 2 | Experiment Type | Column "Experiment Type" is not populated |
Analysis | 2 | Reference | Column "Reference" is not populated |
Sample | 3 | Sample Accession | Column "Sample Accession" is not populated |
VCF validation results
Checks whether each file is compliant with the
VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.
input_fail.vcf
▶ ❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
Category | Error |
---|
Parsing Error | The assembly checking could not be completed: Contig 'chr23' not found in assembly report |
mismatch error | Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c' |
▶ ❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
Category | Error |
---|
critical error | Line 4: Error in meta-data section. |
non-critical error | Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. |
input_passed.vcf
✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors
Sample name concordance check
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
▶ ❌ Analysis A: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleA1, SampleA2 , SampleA3, SampleA4, SampleA5 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | A1Sample , A2Sample, A3Sample, A4Sample, A5Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- A1Sample•
- •A2Sample
- A3Sample
- A4Sample
- A5Sample
- A6Sample
- A7Sample
- A8Sample
- A9Sample
- A10Sample
Hide ✔ Analysis B: Sample names in metadata match with those in VCF files
▶ ❌ Analysis C: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleC1 , SampleC2, SampleC3, SampleC4 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | C1Sample , C2Sample, C3Sample, C4Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- C1Sample•
- •C2Sample
- C3Sample
- C4Sample
HideReference genome INSDC check
Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.
metadata_asm_match.fa
✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible
metadata_asm_not_found.fa
✔ All sequences are INSDC accessioned
▶ ❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
Category | Accessions |
---|
Assembly accession found in metadata | Not found |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_asm_not_match.fa
✔ All sequences are INSDC accessioned
▶ ❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
Category | Accessions |
---|
Assembly accession found in metadata | GCA_2 |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_error.fa
Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible
not_all_insdc.fa
▶ ❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence name | Refget md5 |
---|
2 | hjfdoijsfc47hfg0gh9qwjrve |
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file