Skip to content

Commit

Permalink
Merge pull request galaxyproject#6788 from bernt-matthias/fix-ncbi_da…
Browse files Browse the repository at this point in the history
…tasets

Update and fix ncbi datasets
  • Loading branch information
bgruening authored Feb 21, 2025
2 parents 599c4dd + 3033d1d commit 1c7abf3
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 38 deletions.
38 changes: 23 additions & 15 deletions tools/ncbi_datasets/datasets_gene.xml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ dataformat
## unzip and rehydrate if any data is to be downloaded (include is not None)
#if $file_choices.kingdom_cond.include
## unzip
&& 7z x -y ncbi_dataset.zip > 7z.log
&& unzip ncbi_dataset.zip
#end if
]]></command>
<inputs>
Expand Down Expand Up @@ -114,7 +114,7 @@ dataformat
</valid>
</sanitizer>
</param>
<param argument="--include-flanks-bp" type="integer" optional="true" min="0" label="Length of flanking nucleotides" help="WP accessions only"/>
<param argument="--include-flanks-bp" type="integer" optional="true" min="0" value="" label="Length of flanking nucleotides" help="WP accessions only"/>
</when>
<when value="taxon">
<expand macro="taxon_positional"/>
Expand Down Expand Up @@ -196,6 +196,9 @@ dataformat
<data name="gene_fasta" label="NCBI Gene Datasets: Gene fasta" format="fasta" from_work_dir="ncbi_dataset/data/gene.fna">
<filter>file_choices['kingdom_cond']['include'] and "gene" in file_choices['kingdom_cond']['include']</filter>
</data>
<data name="gene_flanks" label="NCBI Gene Datasets: Flanking faste" format="fasta" from_work_dir="ncbi_dataset/data/gene_flank.fna">
<filter>query['subcommand']['download_by'] == "accession" and query['subcommand']['include_flanks_bp'] != ""</filter>
</data>
<data name="rna_fasta" label="NCBI Gene Datasets: RNA fasta" format="fasta" from_work_dir="ncbi_dataset/data/rna.fna">
<filter>file_choices['kingdom_cond']['include'] and "rna" in file_choices['kingdom_cond']['include']</filter>
</data>
Expand Down Expand Up @@ -353,7 +356,7 @@ dataformat
<has_text text="house mouse"/>
<has_text text="XR_004936704.1"/>
<has_n_lines min="130"/>
<has_n_columns n="38"/>
<has_n_columns n="39"/>
</assert_contents>
</output>
<output name="threep_utr_fasta">
Expand Down Expand Up @@ -437,15 +440,13 @@ dataformat
</output>
</test>

<!-- 9: datasets download gene accession WP_004675351.1 + include_flanks_bp
test broken at the moment https://github.com/ncbi/datasets/issues/328
-->
<test expect_failure="true"> <!-- expect_num_outputs="3" -->
<!-- 9: datasets download gene accession WP_003249567.1 + include_flanks_bp -->
<test expect_num_outputs="4">
<conditional name="query|subcommand">
<param name="download_by" value="accession"/>
<conditional name="text_or_file">
<param name="text_or_file" value="text"/>
<param name="accession" value="WP_004675351.1"/>
<param name="accession" value="WP_003249567.1"/>
</conditional>
<param name="include_flanks_bp" value="100"/>
</conditional>
Expand All @@ -455,9 +456,10 @@ dataformat
<param name="include" value="gene,protein"/>
</conditional>
</section>
<!-- <output name="gene_data_report">
<output name="gene_data_report">
<assert_contents>
<has_text text="glcE"/>
<has_text text="WP_003249567.1"/>
<has_text text="menG"/>
<has_n_lines n="2"/>
<has_n_columns n="7"/>
</assert_contents>
Expand All @@ -467,18 +469,23 @@ dataformat
<has_text text=">"/>
</assert_contents>
</output>
<output name="gene_flanks">
<assert_contents>
<has_text text=">"/>
</assert_contents>
</output>
<output name="protein_fasta">
<assert_contents>
<has_text text=">"/>
</assert_contents>
</output> -->
</output>
<assert_command>
<has_text text="include-flanks-bp 100"/>
</assert_command>
</test>

<!-- 10: datasets download gene taxon human -->
<test expect_num_outputs="1">
<!-- <test expect_num_outputs="1">
<conditional name="query|subcommand">
<param name="download_by" value="taxon"/>
<param name="taxon_positional" value="human"/>
Expand All @@ -495,9 +502,9 @@ dataformat
<has_n_columns n="8"/>
</assert_contents>
</output>
</test>
</test> -->
<!-- 11: datasets download gene taxon human + \-\-fasta-filter -->
<test expect_num_outputs="2">
<!-- <test expect_num_outputs="2">
<conditional name="query|subcommand">
<param name="download_by" value="taxon"/>
<param name="taxon_positional" value="human"/>
Expand All @@ -524,7 +531,8 @@ dataformat
<assert_contents>
<has_text text=">" n="1" />
</assert_contents>
</output></test>
</output>
</test> -->
</tests>
<help>
<![CDATA[
Expand Down
6 changes: 3 additions & 3 deletions tools/ncbi_datasets/datasets_genome.xml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ $filters.exclude_atypical
## unzip and rehydrate if any data is to be downloaded (include is not None)
#if $file_choices.include
## unzip
&& 7z x -y ncbi_dataset.zip > 7z.log
&& unzip ncbi_dataset.zip
## rehydrate
&& datasets rehydrate
Expand Down Expand Up @@ -463,8 +463,8 @@ $filters.exclude_atypical
</section>
<output_collection name="sequence_report" type="list" count="2"/>
<output_collection name="genome_fasta" type="list:list" count="2">
<expand macro="genome_fasta_assert" el1="GCF_000002945.1" el2="GCF_000002945.1_ASM294v2" expression=">NC_[0-9]+\.[0-9]+ Schizosaccharomyces pombe (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="4"/>
<expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/>
<expand macro="genome_fasta_assert" el1="GCF_000002945.2" el2="GCF_000002945.2_ASM294v3" expression=">NC_[0-9]+\.[0-9]+ Schizosaccharomyces pombe.*" expression_n="4"/>
<expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc].*" expression_n="17"/>
</output_collection>
</test>
<!-- tax_exact_match should filter out strains
Expand Down
Loading

0 comments on commit 1c7abf3

Please sign in to comment.