diff --git a/lib/PhaME.pm b/lib/PhaME.pm index a53d89b..d6dd5eb 100644 --- a/lib/PhaME.pm +++ b/lib/PhaME.pm @@ -131,7 +131,6 @@ print OUT ">$name\n"; open (IN,$file)||die "$!"; while (){ chomp; - $_ =~ s/\r\n|\n|\r/\n/g; if (!/^>/){$sequence.=$_;}#print OUT $_;} } print OUT "$sequence\n";; @@ -164,7 +163,6 @@ if ($fh->open("<$file")){ $/=">"; while (<$fh>){ $_=~ s/\>//g; - $_ =~ s/\r\n|\n|\r/\n/g; unless($_){next;}; ($header,@seq)=split /\n/,$_; $sequence= join "",@seq; @@ -433,6 +431,7 @@ my $list=shift; my $thread=shift; my $name=shift; my $error=shift; +my $aligner=shift; my $log=shift; my $outdir=$indir."/results"; my $reference= $outdir.'/temp/'.$name.'.fna'; @@ -440,7 +439,7 @@ my $type; if(!-e $reference || -z $reference){ $reference = $indir.'/files/'.$name.'.fna';} print "\n"; -my $map="runReadsMapping.pl -r $reference -q $indir -d $outdir -t $thread -l $list -a bowtie 2>>$error >> $log\n\n"; +my $map="runReadsMapping.pl -r $reference -q $indir -d $outdir -t $thread -l $list -a $aligner 2>>$error >> $log\n\n"; print $map; if (system ($map)){die "Error running $map.\n";} diff --git a/phame.ctl b/phame.ctl index fe93b27..d7017af 100644 --- a/phame.ctl +++ b/phame.ctl @@ -15,6 +15,8 @@ # 6:combination F+C+R; 7:realignment *See below reads = 2 # 1: single reads; 2: paired reads; 3: both types present; + aligner = bowtie # support bowtie/bwa/minimap2 + tree = 0 # 0:no tree; 1:use FastTree; 2:use RAxML; 3:use both; modelTest = 0 # 0:no; 1:yes; # Only used when building a tree using RAxML bootstrap = 0 # 0:no; 1:yes; # Run bootstrapping *See below diff --git a/src/runPhaME.pl b/src/runPhaME.pl index 7e6a362..4bbf67b 100755 --- a/src/runPhaME.pl +++ b/src/runPhaME.pl @@ -47,6 +47,7 @@ my $project; my $rsignal=0; my $reference; +my $aligner="bowtie"; my $name; my $path; my $suffix; @@ -97,6 +98,7 @@ if (! -e $outdir){`mkdir -p $outdir`;} } if (/project\s*=\s*(\S+)\s*#{0,1}.*$/){$project=$1;} + if (/aligner\s*=\s*(\S+)\s*#{0,1}.*$/){$aligner=$1;} if (/reference\s*=\s*(0|1)\s*#{0,1}.*$/){$rsignal=$1;} if ($rsignal==1 && /reffile\s*=\s*(\S+)\s*#{0,1}.*$/){$reference="$refdir/$1";} elsif($rsignal==0){ @@ -315,6 +317,7 @@ print "Complete\n"; } # check=0 +$name =~ s/\W/_/g; $reference = "$workdir/files/$name.fna"; if ($nucmer==1){ @@ -358,14 +361,14 @@ $mappingGaps=PhaME::identifyGaps($outdir,"$workdir/fasta_list.txt",$name,"map",$project); PhaME::removeGaps($bindir,$reference,$mappingGaps); &print_timeInterval($runtime,"Mapping reads to reference\n"); - my $end=PhaME::readsMapping($workdir,$bindir,"$workdir/reads_list.txt",$threads,$name,$error,$logfile); + my $end=PhaME::readsMapping($workdir,$bindir,"$workdir/reads_list.txt",$threads,$name,$error,$aligner,$logfile); &print_timeInterval($runtime,"$end\n"); } elsif ($time==1){ my $tempdir=$outdir.'/temp/'; `mkdir -p $tempdir; cp $reference $tempdir`; &print_timeInterval($runtime,"Mapping reads to reference\n"); - my $end=PhaME::readsMapping($workdir,$bindir,"$workdir/reads_list.txt",$threads,$name,$error,$logfile); + my $end=PhaME::readsMapping($workdir,$bindir,"$workdir/reads_list.txt",$threads,$name,$error,$aligner,$logfile); &print_timeInterval($runtime,"$end\n"); } } diff --git a/src/runReadsMapping.pl b/src/runReadsMapping.pl index 47f0b85..db81368 100755 --- a/src/runReadsMapping.pl +++ b/src/runReadsMapping.pl @@ -179,6 +179,7 @@ sub create_bowtie_commands elsif ($temp=~/sread/i){ $prefix.="\_$name"; my $bowtie_command= "runReadsToGenome.pl -u $read -ref $reference -pre $prefix -d $outdir -aligner $aligner" ; + $bowtie_command= "runReadsToGenome.pl -long $read -ref $reference -pre $prefix -d $outdir -aligner $aligner" if ($aligner =~ /minimap/); # print "READ1: $read1\n$bowtie_command\n\n\n"; push (@command,$bowtie_command); }elsif ($temp=~/_read/i){ diff --git a/src/runReadsToGenome.pl b/src/runReadsToGenome.pl index 3acee0a..b7fb828 100755 --- a/src/runReadsToGenome.pl +++ b/src/runReadsToGenome.pl @@ -1,9 +1,9 @@ #! /usr/bin/perl # required: 1. R -# 2. samtools 0.1.18 mt +# 2. samtools > 1.1 # 3. bwa 0.6 sampe patched # 4. bowtie2 -# 5. bcftools (from samtools package) +# 5. bcftools > 1.1 # 6. vcfutils.pl (from samtools package) # 7. snap # input: paired reads files: forward.fasta/q and reverse.fasta/q @@ -38,6 +38,7 @@ my $bwa_options="-t 4 "; my $bowtie_options="-p 8 -a"; my $snap_options="-t 4 -M "; +my $minimap2_options="-t 4 "; my $aligner="bwa"; my ($window_size, $step_size)=(1000,200); my $pacbio_bwa_option="-b5 -q2 -r1 -z10 "; @@ -55,12 +56,15 @@ 'd=s' => \$outDir, 'bwa_options=s' => \$bwa_options, 'bowtie_options=s' => \$bowtie_options, + 'minimap2_options=s' => \$minimap2_options, 'snap_options=s' => \$snap_options, 'pacbio' => \$pacbio, 'debug' => \$debug, 'help|?', sub {Usage()} ); +my $tmp = "$outDir"; + ## input check ## unless ( -e $ref_file && $outDir) { &Usage;} unless ( $paired_files or -e $file_long or -e $singleton) { &Usage; } @@ -85,7 +89,8 @@ my ($bwa_threads)= $bwa_options =~ /-t (\d+)/; my ($bowtie_threads)= $bowtie_options =~ /-p (\d+)/; my ($snap_threads)= $snap_options =~ /-t (\d+)/; -my $samtools_threads = $bwa_threads || $bowtie_threads || $snap_threads ||"1"; +my ($minimap2_threads)= $minimap2_options =~ /-t (\d+)/; +my $samtools_threads = $bwa_threads || $bowtie_threads || $snap_threads || $minimap2_threads || "1"; my ($ref_file_name, $ref_file_path, $ref_file_suffix)=fileparse("$ref_file", qr/\.[^.]*/); # index reference @@ -104,6 +109,14 @@ $ref_file=&fold($ref_file); `snap index $ref_file $ref_file.snap `; } +elsif ($aligner =~ /minimap2/i ){ + # fold sequence in 100 bp per line (samtools cannot accept > 65535 bp one line sequence) + $ref_file=&fold($ref_file); + `minimap2 -d $tmp/$ref_file_name.mmi $ref_file `; +} + +## index reference sequence +`samtools faidx $ref_file`; if ($file_long){ print "Mapping long reads\n"; @@ -115,7 +128,10 @@ #`echo -e "Mapped_reads_number:\t$mapped_Long_reads" >>$outDir/LongReads_aln_stats.txt`; } elsif ($aligner =~ /snap/i){`snap single $ref_file.snap $file_long -o $outDir/LongReads$$.sam $snap_options`;} - `samtools view -@ $samtools_threads -uhS $outDir/LongReads$$.sam | samtools sort -@ $samtools_threads - $outDir/LongReads$$`; + elsif ($aligner =~ /minimap2/i){ + `minimap2 -La $minimap2_options $tmp/$ref_file_name.mmi $file_long > $outDir/LongReads$$.sam`; + } + `samtools view -t $ref_file.fai -@ $samtools_threads -uhS $outDir/LongReads$$.sam | samtools sort -@ $samtools_threads -O BAM -T $outDir -o $outDir/LongReads$$.bam - `; } if ($paired_files){ print "Mapping paired end reads\n"; @@ -128,7 +144,10 @@ `bwa sampe -t $bwa_threads -a 100000 $ref_file /tmp/reads_1_$$.sai /tmp/reads_2_$$.sai $file1 $file2 > $outDir/paired$$.sam`; } elsif ($aligner =~ /snap/i){`snap paired $ref_file.snap $file1 $file2 -o $outDir/paired$$.sam $snap_options`;} - `samtools view -@ $samtools_threads -uhS $outDir/paired$$.sam | samtools sort -@ $samtools_threads - $outDir/paired$$`; + elsif ($aligner =~ /minimap2/i){ + `minimap2 $minimap2_options -ax sr $tmp/$ref_file_name.mmi $file1 $file2 > $outDir/paired$$.sam`; + } + `samtools view -t $ref_file.fai -@ $samtools_threads -uhS $outDir/paired$$.sam | samtools sort -@ $samtools_threads -O BAM -T $outDir -o $outDir/paired$$.bam -`; } if ($singleton){ @@ -141,7 +160,10 @@ `bwa samse -n 50 -t $bwa_threads $ref_file /tmp/singleton$$.sai $singleton > $outDir/singleton$$.sam`; } elsif($aligner =~ /snap/i){`snap single $ref_file.snap $file_long -o $outDir/singleton$$.sam $snap_options`;} - `samtools view -@ $samtools_threads -uhS $outDir/singleton$$.sam | samtools sort -@ $samtools_threads - $outDir/singleton$$`; + elsif ($aligner =~ /minimap2/i){ + `minimap2 $minimap2_options -ax sr $tmp/$ref_file_name.mmi $singleton> $outDir/singleton$$.sam`; + } + `samtools view -t $ref_file.fai -@ $samtools_threads -uhS $outDir/singleton$$.sam | samtools sort -@ $samtools_threads -O BAM -T $outDir -o $outDir/singleton$$.bam -`; } # merge bam files if there are different file type, paired, single end, long.. @@ -167,8 +189,6 @@ `mv $outDir/LongReads$$.bam $bam_output`; } -## index reference sequence -`samtools faidx $ref_file`; ## index BAM file `samtools index $bam_output $bam_index_output`; @@ -179,8 +199,13 @@ ## SNP call print "SNPs/Indels call...\n"; -`samtools mpileup -ugf $ref_file $bam_output | bcftools view -bcg - > $bcf_output `; -`bcftools view $bcf_output | bcftools view -v -S - | vcfutils.pl varFilter -a 3 -d1 -D1000 > $vcf_output`; +my $max_depth=100000; +my $min_indel_candidate_depth=3; +my $min_alt_bases=3; +my $min_depth=1; + +`bcftools mpileup -d $max_depth -L $max_depth -m $min_indel_candidate_depth -Ov -f $ref_file $bam_output | bcftools call -cO b - > $bcf_output 2>/dev/null`; +`bcftools view -v snps,indels,mnps,ref,bnd,other -Ov $bcf_output | vcfutils.pl varFilter -a$min_alt_bases -d$min_depth -D$max_depth > $vcf_output`; ## derived chimera info if ($aligner=~ /bwa/i and $paired_files){ @@ -193,7 +218,7 @@ ## generate genome coverage plots and histograms print "Generate genome coverage plots and histograms...\n"; -`samtools mpileup -BQ0 -d10000000 -f $ref_file $bam_output >$pileup_output`; +`samtools mpileup -ABQ0 -d10000000 -f $ref_file $bam_output >$pileup_output`; if ($paired_files){ ## generate proper-paired reads coverage diff --git a/test/phame.ctl b/test/phame.ctl index f77615f..6bebe51 100644 --- a/test/phame.ctl +++ b/test/phame.ctl @@ -15,12 +15,14 @@ # 6:combination F+C+R; 7:realignment *See below reads = 2 # 1: single reads; 2: paired reads; 3: both types present; + aligner = bowtie # support bowtie/bwa/minimap2 + tree = 1 # 0:no tree; 1:use FastTree; 2:use RAxML; 3:use both; modelTest = 0 # 0:no; 1:yes; # Only used when building a tree using RAxML bootstrap = 1 # 0:no; 1:yes; # Run bootstrapping *See below N = 100 # Number of bootstraps to run *See below - PosSelect = 2 # 0:No; 1:use PAML; 2:use HyPhy; 3:use both + PosSelect = 0 # 0:No; 1:use PAML; 2:use HyPhy; 3:use both code = 1 # 0:Bacteria; 1:Virus