Skip to content

Commit

Permalink
Merge pull request #69 from pirovc/dev
Browse files Browse the repository at this point in the history
genome_updater v0.5.1
  • Loading branch information
pirovc authored May 26, 2022
2 parents 1e046dd + d1c2fc1 commit a512fae
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 11 deletions.
4 changes: 0 additions & 4 deletions .simplecov

This file was deleted.

1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ dist: focal
before_install:
- gem install bashcov codecov
- sudo apt-get install parallel
- echo -e "require 'codecov'\nrequire 'simplecov'\nSimpleCov.formatter = Codecov::SimpleCov::Formatter" > .simplecov

script:
- bashcov tests/libs/bats/bin/bats tests/integration_offline.bats
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ or
2) representative genome
3) na
B) Assembly level:
1) Complete genome
1) Complete Genome
2) Chromosome
3) Scaffold
4) Contig
Expand All @@ -200,7 +200,7 @@ or
┌─┐┌─┐┌┐┌┌─┐┌┬┐┌─┐ ┬ ┬┌─┐┌┬┐┌─┐┌┬┐┌─┐┬─┐
│ ┬├┤ ││││ ││││├┤ │ │├─┘ ││├─┤ │ ├┤ ├┬┘
└─┘└─┘┘└┘└─┘┴ ┴└─┘────└─┘┴ ─┴┘┴ ┴ ┴ └─┘┴└─
v0.5.0
v0.5.1

Database options:
-d Database (comma-separated entries)
Expand Down
8 changes: 4 additions & 4 deletions genome_updater.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ IFS=$' '
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

version="0.5.0"
version="0.5.1"

# Define base_url or use local files (for testing)
local_dir=${local_dir:-}
Expand Down Expand Up @@ -114,7 +114,7 @@ check_assembly_summary() # parameter: ${1} assembly_summary file - return 0 true
if [ ! -z $(tail -c -1 "${1}") ]; then return 1; fi

# if contains header char parts of the header anywhere starting lines
grep -m 1 "^#" "${1}" #> /dev/null
grep -m 1 "^#" "${1}" > /dev/null 2>&1
if [ $? -eq 0 ]; then return 1; fi

# if contains parts of the header anywhere
Expand Down Expand Up @@ -474,7 +474,7 @@ filter_top_assemblies() # parameter: ${1} assembly_summary file, ${2} modified a
col5["reference genome"]=1;
col5["representative genome"]=2;
col5["na"]=3;
col12["Complete genome"]=1;
col12["Complete Genome"]=1;
col12["Chromosome"]=2;
col12["Scaffold"]=3;
col12["Contig"]=4;
Expand Down Expand Up @@ -1048,7 +1048,7 @@ elif [[ "${tax_mode}" == "gtdb" ]]; then
fi

# top assemblies by rank
if [[ ! "${top_assemblies}" =~ ^[0-9]+$ && ! "${top_assemblies}" =~ ^(superkingdom|phylum|class|order|family|genus|species)\:[1-9]+$ ]]; then
if [[ ! "${top_assemblies}" =~ ^[0-9]+$ && ! "${top_assemblies}" =~ ^(superkingdom|phylum|class|order|family|genus|species)\:[1-9][0-9]*$ ]]; then
echo "${top_assemblies}: invalid top assemblies - should be a number > 0 or [superkingdom|phylum|class|order|family|genus|species]:number"; exit 1;
else
top_assemblies_rank=""
Expand Down
68 changes: 67 additions & 1 deletion tests/integration_offline.bats
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ setup_file() {
@test "Top 1 superkingdom ncbi" {
outdir=${outprefix}top-superkingdom-ncbi/
label="test"
# Keep only top 1 for selected species
# Keep only top 1 for superkingdom
run ./genome_updater.sh -d refseq -g archaea,fungi -A superkingdom:1 -b ${label} -o ${outdir}
sanity_check ${outdir} ${label}

Expand Down Expand Up @@ -303,6 +303,72 @@ setup_file() {
assert [ $(count_files ${outdir} ${label_phylum}) -gt 0 ]
}

@test "Top assemblies order" {

outdir=${outprefix}top-assemblies-order-refseq-category/

# Selection order
# col5["reference genome"]=1;
# col5["representative genome"]=2;
# col5["na"]=3;
# should always pick the correct refseq category for top superkingdom (just one)

label="3"
rscat="reference genome,representative genome,na"
run ./genome_updater.sh -d refseq -g archaea -c "${rscat}" -A superkingdom:1 -b ${label} -o ${outdir}
sanity_check ${outdir} ${label}
# --- no reference genome in example files ---
assert_equal "representative genome" "$(get_values_as ${outdir}assembly_summary.txt 5)"

label="2"
rscat="representative genome,na"
run ./genome_updater.sh -d refseq -g archaea -c "${rscat}" -A superkingdom:1 -b ${label} -o ${outdir}
sanity_check ${outdir} ${label}
assert_equal "representative genome" "$(get_values_as ${outdir}assembly_summary.txt 5)"

label="1"
rscat="na"
run ./genome_updater.sh -d refseq -g archaea -c "${rscat}" -A superkingdom:1 -b ${label} -o ${outdir}
sanity_check ${outdir} ${label}
assert_equal "na" "$(get_values_as ${outdir}assembly_summary.txt 5)"


outdir=${outprefix}top-assemblies-order-assembly-level/

# Selection order
# col12["Complete Genome"]=1;
# col12["Chromosome"]=2;
# col12["Scaffold"]=3;
# col12["Contig"]=4;

# should always pick the correct assembly level for top superkingdom (just one)

label="4"
aslvl="complete genome,chromosome,scaffold,contig"
run ./genome_updater.sh -d refseq -g archaea -l "${aslvl}" -A superkingdom:1 -b ${label} -o ${outdir}
sanity_check ${outdir} ${label}
assert_equal "Complete Genome" "$(get_values_as ${outdir}assembly_summary.txt 12)"

label="3"
aslvl="chromosome,scaffold,contig"
run ./genome_updater.sh -d refseq -g archaea -l "${aslvl}" -A superkingdom:1 -b ${label} -o ${outdir}
sanity_check ${outdir} ${label}
assert_equal "Chromosome" "$(get_values_as ${outdir}assembly_summary.txt 12)"

label="2"
aslvl="scaffold,contig"
run ./genome_updater.sh -d refseq -g archaea -l "${aslvl}" -A superkingdom:1 -b ${label} -o ${outdir}
sanity_check ${outdir} ${label}
assert_equal "Scaffold" "$(get_values_as ${outdir}assembly_summary.txt 12)"

label="1"
aslvl="contig"
run ./genome_updater.sh -d refseq -g archaea -l "${aslvl}" -A superkingdom:1 -b ${label} -o ${outdir}
sanity_check ${outdir} ${label}
assert_equal "Contig" "$(get_values_as ${outdir}assembly_summary.txt 12)"


}

@test "Date start filter" {
outdir=${outprefix}date-start-filter/
Expand Down

0 comments on commit a512fae

Please sign in to comment.