Skip to content

Commit

Permalink
add alinger option for PhaME. support bowtie/bwa/minimap2
Browse files Browse the repository at this point in the history
  • Loading branch information
chienchi committed Jun 19, 2018
1 parent cd4408b commit 81505f5
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 17 deletions.
5 changes: 2 additions & 3 deletions lib/PhaME.pm
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ print OUT ">$name\n";
open (IN,$file)||die "$!";
while (<IN>){
chomp;
$_ =~ s/\r\n|\n|\r/\n/g;
if (!/^>/){$sequence.=$_;}#print OUT $_;}
}
print OUT "$sequence\n";;
Expand Down Expand Up @@ -164,7 +163,6 @@ if ($fh->open("<$file")){
$/=">";
while (<$fh>){
$_=~ s/\>//g;
$_ =~ s/\r\n|\n|\r/\n/g;
unless($_){next;};
($header,@seq)=split /\n/,$_;
$sequence= join "",@seq;
Expand Down Expand Up @@ -433,14 +431,15 @@ my $list=shift;
my $thread=shift;
my $name=shift;
my $error=shift;
my $aligner=shift;
my $log=shift;
my $outdir=$indir."/results";
my $reference= $outdir.'/temp/'.$name.'.fna';
my $type;

if(!-e $reference || -z $reference){ $reference = $indir.'/files/'.$name.'.fna';}
print "\n";
my $map="runReadsMapping.pl -r $reference -q $indir -d $outdir -t $thread -l $list -a bowtie 2>>$error >> $log\n\n";
my $map="runReadsMapping.pl -r $reference -q $indir -d $outdir -t $thread -l $list -a $aligner 2>>$error >> $log\n\n";
print $map;
if (system ($map)){die "Error running $map.\n";}

Expand Down
2 changes: 2 additions & 0 deletions phame.ctl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# 6:combination F+C+R; 7:realignment *See below
reads = 2 # 1: single reads; 2: paired reads; 3: both types present;

aligner = bowtie # support bowtie/bwa/minimap2

tree = 0 # 0:no tree; 1:use FastTree; 2:use RAxML; 3:use both;
modelTest = 0 # 0:no; 1:yes; # Only used when building a tree using RAxML
bootstrap = 0 # 0:no; 1:yes; # Run bootstrapping *See below
Expand Down
7 changes: 5 additions & 2 deletions src/runPhaME.pl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
my $project;
my $rsignal=0;
my $reference;
my $aligner="bowtie";
my $name;
my $path;
my $suffix;
Expand Down Expand Up @@ -97,6 +98,7 @@
if (! -e $outdir){`mkdir -p $outdir`;}
}
if (/project\s*=\s*(\S+)\s*#{0,1}.*$/){$project=$1;}
if (/aligner\s*=\s*(\S+)\s*#{0,1}.*$/){$aligner=$1;}
if (/reference\s*=\s*(0|1)\s*#{0,1}.*$/){$rsignal=$1;}
if ($rsignal==1 && /reffile\s*=\s*(\S+)\s*#{0,1}.*$/){$reference="$refdir/$1";}
elsif($rsignal==0){
Expand Down Expand Up @@ -315,6 +317,7 @@
print "Complete\n";
} # check=0

$name =~ s/\W/_/g;
$reference = "$workdir/files/$name.fna";

if ($nucmer==1){
Expand Down Expand Up @@ -358,14 +361,14 @@
$mappingGaps=PhaME::identifyGaps($outdir,"$workdir/fasta_list.txt",$name,"map",$project);
PhaME::removeGaps($bindir,$reference,$mappingGaps);
&print_timeInterval($runtime,"Mapping reads to reference\n");
my $end=PhaME::readsMapping($workdir,$bindir,"$workdir/reads_list.txt",$threads,$name,$error,$logfile);
my $end=PhaME::readsMapping($workdir,$bindir,"$workdir/reads_list.txt",$threads,$name,$error,$aligner,$logfile);
&print_timeInterval($runtime,"$end\n");
}
elsif ($time==1){
my $tempdir=$outdir.'/temp/';
`mkdir -p $tempdir; cp $reference $tempdir`;
&print_timeInterval($runtime,"Mapping reads to reference\n");
my $end=PhaME::readsMapping($workdir,$bindir,"$workdir/reads_list.txt",$threads,$name,$error,$logfile);
my $end=PhaME::readsMapping($workdir,$bindir,"$workdir/reads_list.txt",$threads,$name,$error,$aligner,$logfile);
&print_timeInterval($runtime,"$end\n");
}
}
Expand Down
1 change: 1 addition & 0 deletions src/runReadsMapping.pl
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ sub create_bowtie_commands
elsif ($temp=~/sread/i){
$prefix.="\_$name";
my $bowtie_command= "runReadsToGenome.pl -u $read -ref $reference -pre $prefix -d $outdir -aligner $aligner" ;
$bowtie_command= "runReadsToGenome.pl -long $read -ref $reference -pre $prefix -d $outdir -aligner $aligner" if ($aligner =~ /minimap/);
# print "READ1: $read1\n$bowtie_command\n\n\n";
push (@command,$bowtie_command);
}elsif ($temp=~/_read/i){
Expand Down
47 changes: 36 additions & 11 deletions src/runReadsToGenome.pl
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#! /usr/bin/perl
# required: 1. R
# 2. samtools 0.1.18 mt
# 2. samtools > 1.1
# 3. bwa 0.6 sampe patched
# 4. bowtie2
# 5. bcftools (from samtools package)
# 5. bcftools > 1.1
# 6. vcfutils.pl (from samtools package)
# 7. snap
# input: paired reads files: forward.fasta/q and reverse.fasta/q
Expand Down Expand Up @@ -38,6 +38,7 @@
my $bwa_options="-t 4 ";
my $bowtie_options="-p 8 -a";
my $snap_options="-t 4 -M ";
my $minimap2_options="-t 4 ";
my $aligner="bwa";
my ($window_size, $step_size)=(1000,200);
my $pacbio_bwa_option="-b5 -q2 -r1 -z10 ";
Expand All @@ -55,12 +56,15 @@
'd=s' => \$outDir,
'bwa_options=s' => \$bwa_options,
'bowtie_options=s' => \$bowtie_options,
'minimap2_options=s' => \$minimap2_options,
'snap_options=s' => \$snap_options,
'pacbio' => \$pacbio,
'debug' => \$debug,
'help|?', sub {Usage()}
);

my $tmp = "$outDir";

## input check ##
unless ( -e $ref_file && $outDir) { &Usage;}
unless ( $paired_files or -e $file_long or -e $singleton) { &Usage; }
Expand All @@ -85,7 +89,8 @@
my ($bwa_threads)= $bwa_options =~ /-t (\d+)/;
my ($bowtie_threads)= $bowtie_options =~ /-p (\d+)/;
my ($snap_threads)= $snap_options =~ /-t (\d+)/;
my $samtools_threads = $bwa_threads || $bowtie_threads || $snap_threads ||"1";
my ($minimap2_threads)= $minimap2_options =~ /-t (\d+)/;
my $samtools_threads = $bwa_threads || $bowtie_threads || $snap_threads || $minimap2_threads || "1";
my ($ref_file_name, $ref_file_path, $ref_file_suffix)=fileparse("$ref_file", qr/\.[^.]*/);

# index reference
Expand All @@ -104,6 +109,14 @@
$ref_file=&fold($ref_file);
`snap index $ref_file $ref_file.snap `;
}
elsif ($aligner =~ /minimap2/i ){
# fold sequence in 100 bp per line (samtools cannot accept > 65535 bp one line sequence)
$ref_file=&fold($ref_file);
`minimap2 -d $tmp/$ref_file_name.mmi $ref_file `;
}

## index reference sequence
`samtools faidx $ref_file`;

if ($file_long){
print "Mapping long reads\n";
Expand All @@ -115,7 +128,10 @@
#`echo -e "Mapped_reads_number:\t$mapped_Long_reads" >>$outDir/LongReads_aln_stats.txt`;
}
elsif ($aligner =~ /snap/i){`snap single $ref_file.snap $file_long -o $outDir/LongReads$$.sam $snap_options`;}
`samtools view -@ $samtools_threads -uhS $outDir/LongReads$$.sam | samtools sort -@ $samtools_threads - $outDir/LongReads$$`;
elsif ($aligner =~ /minimap2/i){
`minimap2 -La $minimap2_options $tmp/$ref_file_name.mmi $file_long > $outDir/LongReads$$.sam`;
}
`samtools view -t $ref_file.fai -@ $samtools_threads -uhS $outDir/LongReads$$.sam | samtools sort -@ $samtools_threads -O BAM -T $outDir -o $outDir/LongReads$$.bam - `;
}
if ($paired_files){
print "Mapping paired end reads\n";
Expand All @@ -128,7 +144,10 @@
`bwa sampe -t $bwa_threads -a 100000 $ref_file /tmp/reads_1_$$.sai /tmp/reads_2_$$.sai $file1 $file2 > $outDir/paired$$.sam`;
}
elsif ($aligner =~ /snap/i){`snap paired $ref_file.snap $file1 $file2 -o $outDir/paired$$.sam $snap_options`;}
`samtools view -@ $samtools_threads -uhS $outDir/paired$$.sam | samtools sort -@ $samtools_threads - $outDir/paired$$`;
elsif ($aligner =~ /minimap2/i){
`minimap2 $minimap2_options -ax sr $tmp/$ref_file_name.mmi $file1 $file2 > $outDir/paired$$.sam`;
}
`samtools view -t $ref_file.fai -@ $samtools_threads -uhS $outDir/paired$$.sam | samtools sort -@ $samtools_threads -O BAM -T $outDir -o $outDir/paired$$.bam -`;
}

if ($singleton){
Expand All @@ -141,7 +160,10 @@
`bwa samse -n 50 -t $bwa_threads $ref_file /tmp/singleton$$.sai $singleton > $outDir/singleton$$.sam`;
}
elsif($aligner =~ /snap/i){`snap single $ref_file.snap $file_long -o $outDir/singleton$$.sam $snap_options`;}
`samtools view -@ $samtools_threads -uhS $outDir/singleton$$.sam | samtools sort -@ $samtools_threads - $outDir/singleton$$`;
elsif ($aligner =~ /minimap2/i){
`minimap2 $minimap2_options -ax sr $tmp/$ref_file_name.mmi $singleton> $outDir/singleton$$.sam`;
}
`samtools view -t $ref_file.fai -@ $samtools_threads -uhS $outDir/singleton$$.sam | samtools sort -@ $samtools_threads -O BAM -T $outDir -o $outDir/singleton$$.bam -`;
}

# merge bam files if there are different file type, paired, single end, long..
Expand All @@ -167,8 +189,6 @@
`mv $outDir/LongReads$$.bam $bam_output`;
}

## index reference sequence
`samtools faidx $ref_file`;

## index BAM file
`samtools index $bam_output $bam_index_output`;
Expand All @@ -179,8 +199,13 @@

## SNP call
print "SNPs/Indels call...\n";
`samtools mpileup -ugf $ref_file $bam_output | bcftools view -bcg - > $bcf_output `;
`bcftools view $bcf_output | bcftools view -v -S - | vcfutils.pl varFilter -a 3 -d1 -D1000 > $vcf_output`;
my $max_depth=100000;
my $min_indel_candidate_depth=3;
my $min_alt_bases=3;
my $min_depth=1;

`bcftools mpileup -d $max_depth -L $max_depth -m $min_indel_candidate_depth -Ov -f $ref_file $bam_output | bcftools call -cO b - > $bcf_output 2>/dev/null`;
`bcftools view -v snps,indels,mnps,ref,bnd,other -Ov $bcf_output | vcfutils.pl varFilter -a$min_alt_bases -d$min_depth -D$max_depth > $vcf_output`;

## derived chimera info
if ($aligner=~ /bwa/i and $paired_files){
Expand All @@ -193,7 +218,7 @@

## generate genome coverage plots and histograms
print "Generate genome coverage plots and histograms...\n";
`samtools mpileup -BQ0 -d10000000 -f $ref_file $bam_output >$pileup_output`;
`samtools mpileup -ABQ0 -d10000000 -f $ref_file $bam_output >$pileup_output`;

if ($paired_files){
## generate proper-paired reads coverage
Expand Down
4 changes: 3 additions & 1 deletion test/phame.ctl
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@
# 6:combination F+C+R; 7:realignment *See below
reads = 2 # 1: single reads; 2: paired reads; 3: both types present;

aligner = bowtie # support bowtie/bwa/minimap2

tree = 1 # 0:no tree; 1:use FastTree; 2:use RAxML; 3:use both;
modelTest = 0 # 0:no; 1:yes; # Only used when building a tree using RAxML
bootstrap = 1 # 0:no; 1:yes; # Run bootstrapping *See below
N = 100 # Number of bootstraps to run *See below

PosSelect = 2 # 0:No; 1:use PAML; 2:use HyPhy; 3:use both
PosSelect = 0 # 0:No; 1:use PAML; 2:use HyPhy; 3:use both

code = 1 # 0:Bacteria; 1:Virus

Expand Down

0 comments on commit 81505f5

Please sign in to comment.