From 520a807d177cd2f2532c3b3bb49539448cf49025 Mon Sep 17 00:00:00 2001 From: Migun Shakya Date: Thu, 4 Oct 2018 15:13:26 -0600 Subject: [PATCH] now outputs different tables --- docs/cases.rst | 8 ++++---- docs/docker.rst | 8 ++++---- src/buildSNPDB.pl | 22 ++++++++++++++-------- src/phame | 18 +++++++++++++++--- test/ctl_files/t3_ebola_contigs.ctl | 4 ++-- 5 files changed, 39 insertions(+), 21 deletions(-) diff --git a/docs/cases.rst b/docs/cases.rst index f6a1fba..ae4235c 100644 --- a/docs/cases.rst +++ b/docs/cases.rst @@ -54,7 +54,7 @@ Save the above file in the same directory where *ref* is, for example as *comple .. code-block:: console - $ runPhaME complete_phame.ctl + $ phame complete_phame.ctl With complete genomes and contigs @@ -113,7 +113,7 @@ Save the above control file in the same directory where *ref* is, for example as .. code-block:: console - $ runPhaME contig_and_complete_phame.ctl + $ phame contig_and_complete_phame.ctl With raw reads, complete genomes, and contigs @@ -173,7 +173,7 @@ Save the above control file in the same directory where *ref* is, for example as .. code-block:: console - $ runPhaME read_contig_and_complete_phame.ctl + $ phame read_contig_and_complete_phame.ctl @@ -214,4 +214,4 @@ PhaME has a unique feature that allows subsetting genomes from a PhaME analysis. .. code-block:: console - $ runPhaME zoom_in.ctl \ No newline at end of file + $ phame zoom_in.ctl \ No newline at end of file diff --git a/docs/docker.rst b/docs/docker.rst index 5e28622..1891fb3 100644 --- a/docs/docker.rst +++ b/docs/docker.rst @@ -17,8 +17,8 @@ To bypass the installation steps, we have provided a docker [image](https://stac .. code-block:: console - $ docker run --rm quay.io/biocontainers/phame:1.0.1--1 runPhaME -h - $ docker run --rm quay.io/biocontainers/phame:1.0.1--1 runPhaME -vcheck + $ docker run --rm quay.io/biocontainers/phame:1.0.1--1 phame -h + $ docker run --rm quay.io/biocontainers/phame:1.0.1--1 phame -vcheck 4. Run your own data using docker. A step by step guide - Create a folder to mount onto your docker @@ -53,8 +53,8 @@ To bypass the installation steps, we have provided a docker [image](https://stac .. code-block:: console - $ docker run --rm -v $(pwd)/phame_analysis_folder:/data migun/phame src/runPhaME /data/ecoli.ctl + $ docker run --rm -v $(pwd)/phame_analysis_folder:/data migun/phame src/phame /data/ecoli.ctl $ git clone https://github.com/mshakya/phame_examples.git - $ docker run --rm -v $(pwd)/phame_examples:/data migun/phame-1 perl src/runPhaME.pl /data/ecoli/ecoli.ctl + $ docker run --rm -v $(pwd)/phame_examples:/data migun/phame-1 perl src/phame /data/ecoli/ecoli.ctl diff --git a/src/buildSNPDB.pl b/src/buildSNPDB.pl index f639099..7beb16a 100755 --- a/src/buildSNPDB.pl +++ b/src/buildSNPDB.pl @@ -68,6 +68,7 @@ my $query; my %positions; my $coreSNPstat; +my $coverageFile; # When 60% linear reference lenght are gap (query compared to) # The query will not use to build snp alignment @@ -108,6 +109,7 @@ $cdsMatrixfile = "$indir\/$project\_snp_CDSMatrix.txt"; $intMatrixfile = "$indir\/$project\_snp_intergenicMatrix.txt"; $coreSNPstat = "$indir\/$project\_core_stats.txt"; +$coverageFile = "$indir\/$project\_coverage.txt"; open( ALL_OUT, ">$all_outfile" ) || die "$!"; open( OUT, ">$allSNPoutfile" ) || die "$!"; @@ -119,6 +121,8 @@ open( BASE, ">>$basefile" ) || die "$!"; open( PMAT, ">$pairMatrixfile" ) || die "$!"; open( CMAT, ">$coreMatrixfile" ) || die "$!"; +open( COV, ">>$coverageFile" ) || die "$!"; +print COV "Genome\t\Gaps\tLinear Coverage\n"; if ( $coding == 1 ) { open( IMAT, ">$intMatrixfile" ) || die "$!"; open( CDSMAT, ">$cdsMatrixfile" ) || die "$!"; @@ -174,6 +178,7 @@ close COMP; close GAPF; close BASE; +close COV; close PMAT; close CMAT; close CDSMAT; @@ -402,7 +407,7 @@ sub create_ALLsnp_array { print OUT "\n"; } #header list $SNPcount = scalar( keys %SNPcount ); - print BASE "Total SNPs:\t$SNPcount\n"; + print BASE "Total SNPs\t$SNPcount\n"; } ################################################################################ @@ -452,7 +457,7 @@ sub create_CDSsnp_array { print CDSOUT "\n"; } #header list $CDScount = scalar( keys %CDSSNPcount ); - if ( $coding == 1 ) { print BASE "CDS SNPs:\t$CDScount\n"; } + if ( $coding == 1 ) { print BASE "CDS SNPs\t$CDScount\n"; } } ################################################################################ @@ -797,11 +802,12 @@ sub print_summary { my $ref_len = length $ref_sequence; foreach my $query ( sort keys %query_gaps ) { - print BASE "$query\tGap_length\t", $query_gaps{$query}, "\n"; - print BASE "$query\tLinear_coverage\t", sprintf("%.3f", ($ref_len - $query_gaps{$query})/$ref_len), "\n"; + # print BASE "$query\tGap_length\t", $query_gaps{$query}, "\n"; + # print BASE "$query\tLinear_coverage\t", sprintf("%.3f", ($ref_len - $query_gaps{$query})/$ref_len), "\n"; + print COV "$query\t", $query_gaps{$query}, "\t", sprintf("%.3f", ($ref_len - $query_gaps{$query})/$ref_len), "\n"; } - print BASE "Reference used:\t$name\n"; + print BASE "Reference used\t$name\n"; my ( $start, $end ); foreach my $keys ( sort { $a <=> $b } keys %gap_location ) { @@ -822,9 +828,9 @@ sub print_summary { } my $base_total = $ref_len - $gap_total; - print BASE "Reference sequence length:\t", $ref_len, - "\nTotal gap length:\t$gap_total -Core genome length:\t$base_total\n"; + print BASE "Reference genome length\t", $ref_len, + "\nTotal gap length\t$gap_total +Core genome length\t$base_total\n"; close GAPF; } diff --git a/src/phame b/src/phame index 1863aff..71012c7 100755 --- a/src/phame +++ b/src/phame @@ -11,6 +11,16 @@ use File::Basename; use Time::Piece; use PhaME; use Cwd; +# use Statistics::Distribution; +# use Parallel::ForkManagers; +use Bio::SeqIO; +use Bio::Tools::CodonTable; +use Cwd 'abs_path'; +use Data::Dumper; +use FileHandle; +use Getopt::Std; +use diagnostics; +use fastq_utility; $| = 1; @@ -368,12 +378,13 @@ if ( $data == 7 ) { $check = 0; } my $snpdir = $outdir . '/snps'; my $gapdir = $outdir . '/gaps'; my $statdir = $outdir . '/stats'; -my $summary = $outdir . '/' . "$project\_summaryStatistics.txt"; +my $summary = $outdir . '/' . "$project\_genome_lengths.txt"; `mkdir -p $snpdir $gapdir $statdir`; if ( $time == 1 && $check == 0 && $data != 7 ) { open( ALL, ">$workdir/working_list.txt" ) || die "$!"; open( STAT, ">$summary" ) || die "$!"; + print STAT "Genome\tLength(bp)\n"; } if ( $time == 1 && $check > 0 ) { @@ -382,6 +393,7 @@ if ( $time == 1 && $check > 0 ) { elsif ( $time == 2 || $data == 7 ) { open( ALL, ">>$workdir/working_list.txt" ) || die "$!"; open( STAT, ">>$summary" ) || die "$!"; + print STAT "Genome\tLength(bp)\n"; } if ( $check == 0 ) { @@ -644,7 +656,7 @@ if ( $nucmer == 1 ) { foreach my $names ( sort keys %fasta_list ) { print FAS "$names\n"; print ALL "$names\n"; - print STAT "$names\tTotal_length\t", $fasta_list{$names}, "\n" + print STAT "$names\t", $fasta_list{$names}, "\n" if ( $time ne "2" ); # print "$names\t",$fasta_list{$names},"\n"; @@ -662,7 +674,7 @@ if ( $contig_nucmer == 1 ) { foreach my $names ( sort keys %contig_list ) { print CON "$names\n"; print ALL "$names\n"; - print STAT "$names\tTotal_length\t", $contig_list{$names}, "\n"; + print STAT "$names\t", $contig_list{$names}, "\n"; } if ( $buildSNPdb == 1 ) { diff --git a/test/ctl_files/t3_ebola_contigs.ctl b/test/ctl_files/t3_ebola_contigs.ctl index 8cd8681..b637395 100644 --- a/test/ctl_files/t3_ebola_contigs.ctl +++ b/test/ctl_files/t3_ebola_contigs.ctl @@ -8,7 +8,7 @@ cdsSNPS = 0 # 0:no cds SNPS; 1:cds SNPs - buildSNPdb = 0 # 0: only align to reference 1: build SNP database of all complete genome + buildSNPdb = 1 # 0: only align to reference 1: build SNP database of all complete genome FirstTime = 1 # 1:yes; 2:update existing SNP alignment @@ -25,7 +25,7 @@ code = 1 # 0:Bacteria; 1:Virus - clean = 0 # 0:no clean; 1:clean + clean = 1 # 0:no clean; 1:clean threads = 2 # Number of threads to use