From aba694ec9160cffcf3f738399e86920a70f093ad Mon Sep 17 00:00:00 2001
From: Jun Yin <jy344@compute-7-6.local>
Date: Fri, 31 Aug 2012 16:10:29 -0400
Subject: [PATCH 1/8] Replace SimpleAlign.pm

---
 Bio/SimpleAlign.pm | 3014 +++++++++++++++++++++++++-------------------
 1 file changed, 1736 insertions(+), 1278 deletions(-)
diff --git a/Bio/SimpleAlign.pm b/Bio/SimpleAlign.pm
index 6114a82630..8dc666e02d 100644
--- a/Bio/SimpleAlign.pm
+++ b/Bio/SimpleAlign.pm
@@ -13,6 +13,7 @@
 #  History:
 #	11/3/00 Added threshold feature to consensus and consensus_aa  - PS
 #	May 2001 major rewrite - Heikki Lehvaslaiho
+#	August 2010 refactoring - Jun Yin
 
 =head1 NAME
 
@@ -37,7 +38,7 @@ Bio::SimpleAlign - Multiple alignments held as a set of sequences
   $pos = $aln->column_from_residue_number('1433_LYCES', 14); # = 6;
 
   # Extract sequences and check values for the alignment column $pos
-  foreach $seq ($aln->each_seq) {
+  foreach $seq ($aln->next_Seq) {
       $res = $seq->subseq($pos, $pos);
       $count{$res}++;
   }
@@ -46,11 +47,13 @@ Bio::SimpleAlign - Multiple alignments held as a set of sequences
   }
 
   # Manipulate
-  $aln->remove_seq($seq);
-  $mini_aln = $aln->slice(20,30);  # get a block of columns
-  $mini_aln = $aln->select_noncont(1,3,5,7,11); # select certain sequences
-  $new_aln = $aln->remove_columns([20,30]); # remove by position
-  $new_aln = $aln->remove_columns(['mismatch']); # remove by property
+  $aln->remove_LocatableSeq($seq);
+  $mini_aln = $aln->select_columns([20..30,45..48])  # get a block of columns
+  #more parameters
+  $mini_aln = $aln->select_columns(-selection=>[20..30],-toggle=>0,-keepgaponly=>1)
+  $mini_aln = $aln->select_Seqs([1,5..10,15]) # select certain sequences
+  $aln->remove_columns([20..30]); # remove by position
+  $aln->remove_columns(['mismatch']); # remove by property
 
   # Analyze
   $str = $aln->consensus_string($threshold_percent);
@@ -83,6 +86,8 @@ C<displayname>, and generally is what is used to print out the
 alignment. They default to name/start-end.
 
 The SimpleAlign Module is derived from the Align module by Ewan Birney.
+This package is refactored by Jun Yin as part of the Google Summer of 
+Code project in 2010.
 
 =head1 FEEDBACK
 
@@ -133,6 +138,7 @@ Weigang Qiu, Weigang at GENECTR-HUNTER-CUNY-EDU
 Hongyu Zhang, forward at hongyu.org
 Jay Hannah, jay at jays.net
 Alexandr Bezginov, albezg at gmail.com
+Jun Yin, jun.yin at ucd.ie
 
 =head1 SEE ALSO
 
@@ -200,10 +206,13 @@ use base qw(Bio::Root::Root Bio::Align::AlignI Bio::AnnotatableI
  Args      : -source     => string representing the source program
                             where this alignment came from
              -annotation => Bio::AnnotationCollectionI
-             -seq_annotation => Bio::AnnotationCollectionI for sequences (requires -annotation also be set)
-             -seqs       => array ref containing Bio::LocatableSeq or Bio::Seq::Meta
+             -seq_annotation => Bio::AnnotationCollectionI for sequences
+                                 (requires -annotation also be set)
+             -seqs       => array ref containing Bio::LocatableSeq 
+                                                 or Bio::Seq::Meta
              -consensus  => consensus string
-             -consensus_meta  => Bio::Seq::Meta object containing consensus met information (kludge)
+             -consensus_meta  => Bio::Seq::Meta object containing 
+                                 consensus met information (kludge)
 
 =cut
 
@@ -257,29 +266,29 @@ sub new {
   # assumes these are in correct alignment order
   if ($seqs && ref($seqs) eq 'ARRAY') {
     for my $seq (@$seqs) {
-        $self->add_seq($seq);
+        $self->add_Seq($seq);
     }
   }
 
   return $self; # success - we hope!
 }
 
-=head1 Modifier methods
+=head1 Alignment modifier methods
 
-These methods modify the MSA by adding, removing or shuffling complete
-sequences.
+These methods modify the original MSA by adding, removing or shuffling 
+complete sequences.
 
-=head2 add_seq
+=head2 add_Seq
 
- Title     : add_seq
- Usage     : $myalign->add_seq($newseq);
-             $myalign->add_seq(-SEQ=>$newseq, -ORDER=>5);
+ Title     : add_Seq
+ Usage     : $aln->add_Seq($newseq);
+             $aln->add_Seq(-SEQ=>$newseq, -ORDER=>5);
  Function  : Adds another sequence to the alignment. *Does not* align
              it - just adds it to the hashes.
              If -ORDER is specified, the sequence is inserted at the
              the position spec'd by -ORDER, and existing sequences
              are pushed down the storage array.
- Returns   : nothing
+ Returns   : 1
  Args      : A Bio::LocatableSeq object
              Positive integer for the sequence position (optional)
 
@@ -289,11 +298,17 @@ See L<Bio::LocatableSeq> for more information
 
 sub addSeq {
     my $self = shift;
-    $self->deprecated("addSeq - deprecated method. Use add_seq() instead.");
-    $self->add_seq(@_);
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"addSeq - deprecated method. Use add_Seq() instead.");
+    $self->add_Seq(@_);
 }
 
 sub add_seq {
+    my $self = shift;
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"add_seq - deprecated method. Use add_Seq() instead.");
+    $self->add_Seq(@_);
+}
+
+sub add_Seq {
     my $self = shift;
     my @args = @_;
     my ($seq, $order) = $self->_rearrange([qw(SEQ ORDER)], @args);
@@ -350,27 +365,36 @@ sub add_seq {
     }
 
     $self->{'_seq'}->{$name} = $seq;
+    
+    1;
 
 }
 
 
-=head2 remove_seq
+=head2 remove_LocatableSeq
 
- Title     : remove_seq
- Usage     : $aln->remove_seq($seq);
+ Title     : remove_LocatableSeq
+ Usage     : $aln->remove_LocatableSeq($seq);
  Function  : Removes a single sequence from an alignment
- Returns   :
+ Returns   : 1
  Argument  : a Bio::LocatableSeq object
 
 =cut
 
 sub removeSeq {
     my $self = shift;
-    $self->deprecated("removeSeq - deprecated method. Use remove_seq() instead.");
-    $self->remove_seq(@_);
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"removeSeq - deprecated method. Use remove_seq() instead.");
+    $self->remove_LocatableSeq(@_);
 }
 
 sub remove_seq {
+    my $self = shift;
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"remove_seq - deprecated method. Use remove_LocatableSeq() instead.");
+    $self->remove_LocatableSeq(@_);	
+}
+
+
+sub remove_LocatableSeq {
     my $self = shift;
     my $seq = shift;
     my ($name,$id);
@@ -379,7 +403,14 @@ sub remove_seq {
 	unless ref $seq && $seq->isa( 'Bio::LocatableSeq');
 
     $id = $seq->id();
+<<<<<<< HEAD
+    $start = $seq->start();
+    $end  = $seq->end();
+    $name = sprintf("%s/%d-%d",$id,$start,$end);
+    
+=======
     $name = $seq->get_nse;
+>>>>>>> upstream/master
 
     if( !exists $self->{'_seq'}->{$name} ) {
 	$self->throw("Sequence $name does not exist in the alignment to remove!");
@@ -420,11 +451,50 @@ sub remove_seq {
     return 1;
 }
 
+=head2 remove_Seqs
+
+ Title     : remove_Seqs
+ Usage     : $aln->remove_Seqs([1,3,5..7]);
+             $aln->remove_Seqs(-selection=>[1],-toggle=>1);
+ Function  : Removes specified sequences from the alignment
+ Returns   : 1 
+ Argument  : An reference list of positive integers for the selected 
+             sequences. An optional parameter can be defined to toggle 
+             the coordinate selection.
+             See also select_Seqs
+
+=cut
+
+sub remove_Seqs {
+	my $self=shift;
+	my ($sel, $toggle) = $self->_rearrange([qw(SELECTION TOGGLE)], @_);
+	@{$sel}=sort {$a<=>$b} @{$sel};
+
+   $self->throw("Select start has to be a positive integer, not [".$sel->[0]."]")
+	unless $sel->[0] =~ /^\d+$/ and $sel->[0] > 0;
+    $self->throw("Select end has to be a positive integer, not [".$sel->[$#$sel]."]")
+	unless $sel->[$#$sel] =~ /^\d+$/ and $sel->[$#$sel] > 0;
+	
+	#use select_Seqs to select removed Seq objects, then use remove_LocatableSeq to remove the selected Seqs
+	my $removed_aln;
+	if($toggle) {
+		$removed_aln=$self->select_Seqs(_toggle_selection($sel,$self->num_sequences));
+	}
+	else {
+		$removed_aln=$self->select_Seqs($sel);
+	}
+	
+	foreach my $seq ($removed_aln->next_Seq) {
+		$self->remove_LocatableSeq($seq);
+	}
+
+	return 1;
+}
 
-=head2 purge
+=head2 remove_redundant_Seqs
 
- Title   : purge
- Usage   : $aln->purge(0.7);
+ Title   : remove_redundant_Seqs
+ Usage   : $aln->remove_redundant_Seqs(0.7);
  Function: Removes sequences above given sequence similarity
            This function will grind on large alignments. Beware!
  Example :
@@ -434,10 +504,16 @@ sub remove_seq {
 =cut
 
 sub purge {
+    my ($self,@args) = @_;
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"purge - deprecated method. Use remove_redundant_Seqs() instead.");
+    $self->remove_redundant_Seq(@args);
+}
+
+sub remove_redundant_Seqs {
 	my ($self,$perc) = @_;
 	my (%duplicate, @dups);
 
-	my @seqs = $self->each_seq();
+	my @seqs = $self->next_Seq();
 
 	for (my $i=0;$i< @seqs - 1;$i++ ) { #for each seq in alignment
 		my $seq = $seqs[$i];
@@ -483,138 +559,19 @@ sub purge {
 		}
 	}
 	foreach my $seq (@dups) {
-		$self->remove_seq($seq);
+		$self->remove_LocatableSeq($seq);
 	}
 	return @dups;
 }
 
-=head2 sort_alphabetically
-
- Title     : sort_alphabetically
- Usage     : $ali->sort_alphabetically
- Function  : Changes the order of the alignment to alphabetical on name
-             followed by numerical by number.
- Returns   :
- Argument  :
-
-=cut
-
-sub sort_alphabetically {
-    my $self = shift;
-    my ($seq,$nse,@arr,%hash,$count);
-
-    foreach $seq ( $self->each_seq() ) {
-	$nse = $seq->get_nse;
-	$hash{$nse} = $seq;
-    }
-
-    $count = 0;
-
-    %{$self->{'_order'}} = (); # reset the hash;
-
-    foreach $nse ( sort _alpha_startend keys %hash) {
-	$self->{'_order'}->{$count} = $nse;
-
-	$count++;
-    }
-    1;
-}
-
-=head2 sort_by_list
-
- Title     : sort_by_list
- Usage     : $aln_ordered=$aln->sort_by_list($list_file)
- Function  : Arbitrarily order sequences in an alignment
- Returns   : A new Bio::SimpleAlign object
- Argument  : a file listing sequence names in intended order (one name per line)
-
-=cut
-
-sub sort_by_list {
-    my ($self, $list) = @_;
-    my (@seq, @ids, %order);
-
-    foreach my $seq ( $self->each_seq() ) {
-        push @seq, $seq;
-        push @ids, $seq->display_id;
-    }
-
-    my $ct=1;
-    open(my $listfh, '<', $list) || $self->throw("can't open file for reading: $list");
-    while (<$listfh>) {
-      chomp;
-      my $name=$_;
-      $self->throw("Not found in alignment: $name") unless &_in_aln($name, \@ids);
-      $order{$name}=$ct++;
-    }
-    close($listfh);
-    
-    # use the map-sort-map idiom:
-    my @sorted= map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [$order{$_->id()}, $_] } @seq;
-    my $aln = $self->new;
-    foreach (@sorted) { $aln->add_seq($_) }
-    return $aln;
-}
-
-=head2 set_new_reference
-
- Title     : set_new_reference
- Usage     : $aln->set_new_reference(3 or 'B31'):  Select the 3rd sequence, or
-             the sequence whoes name is "B31" (full, exact, and case-sensitive),
-             as the reference (1st) sequence
- Function  : Change/Set a new reference (i.e., the first) sequence
- Returns   : a new Bio::SimpleAlign object.
-             Throws an exception if designated sequence not found
- Argument  : a positive integer of sequence order, or a sequence name
-             in the original alignment
-
-=cut
-
-sub set_new_reference {
-    my ($self, $seqid) = @_;
-    my $aln = $self->new;
-    my (@seq, @ids, @new_seq);
-    my $is_num=0;
-    foreach my $seq ( $self->each_seq() ) {
-	push @seq, $seq;
-	push @ids, $seq->display_id;
-    }
-
-    if ($seqid =~ /^\d+$/) { # argument is seq position
-	$is_num=1;
-	$self->throw("The new reference sequence number has to be a positive integer >1 and <= num_sequences ") if ($seqid <= 1 || $seqid > $self->num_sequences);
-    } else { # argument is a seq name
-	$self->throw("The new reference sequence not in alignment ") unless &_in_aln($seqid, \@ids);
-    }
-
-    for (my $i=0; $i<=$#seq; $i++) {
-	my $pos=$i+1;
-        if ( ($is_num && $pos == $seqid) || ($seqid eq $seq[$i]->display_id) ) {
-	    unshift @new_seq, $seq[$i];
-	} else {
-	    push @new_seq, $seq[$i];
-	}
-    }
-    foreach (@new_seq) { $aln->add_seq($_);  }
-    return $aln;
-}
-
-sub _in_aln {  # check if input name exists in the alignment
-    my ($str, $ref) = @_;
-    foreach (@$ref) {
-	return 1 if $str eq $_;
-    }
-    return 0;
-}
-
+=head2 uniq_Seq
 
-=head2 uniq_seq
-
- Title     : uniq_seq
- Usage     : $aln->uniq_seq():  Remove identical sequences in
+ Title     : uniq_Seq
+ Usage     : $aln->uniq_Seq():  Remove identical sequences in
              in the alignment.  Ambiguous base ("N", "n") and
              leading and ending gaps ("-") are NOT counted as
              differences.
+             $aln->uniq_Seq is a variation of $aln->remove_redundant_Seqs(1)
  Function  : Make a new alignment of unique sequence types (STs)
  Returns   : 1a. if called in a scalar context, 
                 a new Bio::SimpleAlign object (all sequences renamed as "ST")
@@ -629,13 +586,19 @@ sub _in_aln {  # check if input name exists in the alignment
 =cut
 
 sub uniq_seq {
+	my ($self,@args) = @_;
+	$self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"uniq_seq - deprecated method. Use uniq_Seq() instead.");
+	$self->uniq_Seq(@args);	
+}
+
+sub uniq_Seq {
     my ($self, $seqid) = @_;
     my $aln = $self->new;
     my (%member, %order, @seq, @uniq_str, $st);
     my $order=0;
     my $len = $self->length();
     $st = {};
-    foreach my $seq ( $self->each_seq() ) {
+    foreach my $seq ( $self->next_Seq() ) {
 	my $str = $seq->seq();
 
 # it's necessary to ignore "n", "N", leading gaps and ending gaps in
@@ -687,7 +650,7 @@ sub uniq_seq {
 					 -start=>1,
 					 -end  =>$end
 					 );
-	$aln->add_seq($new);
+	$aln->add_Seq($new);
 	foreach (@{$member{$str}}) {
 	    push @{$$st{$order{$str}}}, $_->id(); # per Tristan's patch/Bug #2805
         $self->debug($_->id(), "\t", "ST", $order{$str}, "\n");
@@ -734,118 +697,366 @@ sub _convert_leading_ending_gaps {
     return $s_new;
 }
 
-=head1 Sequence selection methods
 
-Methods returning one or more sequences objects.
 
-=head2 each_seq
+=head2 sort_alphabetically
 
- Title     : each_seq
- Usage     : foreach $seq ( $align->each_seq() )
- Function  : Gets a Seq object from the alignment
- Returns   : Seq object
- Argument  :
+ Title     : sort_alphabetically
+ Usage     : $ali->sort_alphabetically
+ Function  : Changes the order of the alignment to alphabetical on name
+             followed by numerical by number.
+ Returns   : 1
+ Argument  : None
 
 =cut
 
-sub eachSeq {
+sub sort_alphabetically {
     my $self = shift;
-    $self->deprecated("eachSeq - deprecated method. Use each_seq() instead.");
-    $self->each_seq();
-}
-
-sub each_seq {
-	my $self = shift;
-	my (@arr,$order);
+    my ($seq,$nse,@arr,%hash,$count);
 
-	foreach $order ( sort { $a <=> $b } keys %{$self->{'_order'}} ) {
-		if( exists $self->{'_seq'}->{$self->{'_order'}->{$order}} ) {
-			push(@arr,$self->{'_seq'}->{$self->{'_order'}->{$order}});
-		}
-	}
-	return @arr;
-}
+    foreach $seq ( $self->next_Seq() ) {
+	$nse = $seq->get_nse;
+	$hash{$nse} = $seq;
+    }
 
+    $count = 0;
 
-=head2 each_alphabetically
+    %{$self->{'_order'}} = (); # reset the hash;
 
- Title     : each_alphabetically
- Usage     : foreach $seq ( $ali->each_alphabetically() )
- Function  : Returns a sequence object, but the objects are returned
-             in alphabetically sorted order.
-             Does not change the order of the alignment.
- Returns   : Seq object
- Argument  :
+    foreach $nse ( sort _alpha_startend keys %hash) {
+	$self->{'_order'}->{$count} = $nse;
 
-=cut
+	$count++;
+    }
+    1;
+}
 
-sub each_alphabetically {
-	my $self = shift;
-	my ($seq,$nse,@arr,%hash,$count);
+=head2 sort_by_list
 
-	foreach $seq ( $self->each_seq() ) {
-		$nse = $seq->get_nse;
-		$hash{$nse} = $seq;
-	}
+ Title     : sort_by_list
+ Usage     : $aln_ordered=$aln->sort_by_list($list_file)
+ Function  : Arbitrarily order sequences in an alignment
+ Returns   : A new Bio::SimpleAlign object
+ Argument  : a file listing sequence names in intended order 
+             (one name per line)
 
-	foreach $nse ( sort _alpha_startend keys %hash) {
-		push(@arr,$hash{$nse});
-	}
-	return @arr;
-}
+=cut
 
-sub _alpha_startend {
-    my ($aname,$astart,$bname,$bstart);
-    ($aname,$astart) = split (/-/,$a);
-    ($bname,$bstart) = split (/-/,$b);
+sub sort_by_list {
+    my ($self, $list) = @_;
+    my (@seq, @ids, %order);
 
-    if( $aname eq $bname ) {
-	return $astart <=> $bstart;
+    foreach my $seq ( $self->next_Seq() ) {
+        push @seq, $seq;
+        push @ids, $seq->display_id;
     }
-    else {
-	return $aname cmp $bname;
+
+    my $ct=1;
+    open(my $listfh, '<', $list) || $self->throw("can't open file for reading: $list");
+    while (<$listfh>) {
+      chomp;
+      my $name=$_;
+      $self->throw("Not found in alignment: $name") unless &_in_aln($name, \@ids);
+      $order{$name}=$ct++;
     }
+    close($listfh);
+    
+    # use the map-sort-map idiom:
+    my @sorted= map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [$order{$_->id()}, $_] } @seq;
+    my $aln = $self->new;
+    foreach (@sorted) { $aln->add_Seq($_) }
+    return $aln;
 }
 
-=head2 each_seq_with_id
 
- Title     : each_seq_with_id
- Usage     : foreach $seq ( $align->each_seq_with_id() )
- Function  : Gets a Seq objects from the alignment, the contents
-             being those sequences with the given name (there may be
-             more than one)
- Returns   : Seq object
- Argument  : a seq name
+=head2 sort_by_pairwise_identity
 
-=cut
+ Title     : sort_by_pairwise_identity()
+ Usage     : $ali->sort_by_pairwise_identity(2)
+ Function  : Changes the order of the alignment by the pairwise percentage 
+             identity of the reference sequence
+ Returns   : 1
+ Argument  : Optional, the position or id of reference sequences to be compared 
+             with. Default is the first sequence
+             See also set_new_reference
 
-sub eachSeqWithId {
-    my $self = shift;
-    $self->deprecated("eachSeqWithId - deprecated method. Use each_seq_with_id() instead.");
-    $self->each_seq_with_id(@_);
-}
+=cut
 
-sub each_seq_with_id {
-    my $self = shift;
-    my $id = shift;
+sub sort_by_pairwise_identity {
+	my ($self,$seqid) = @_;
+	my ($seq,$nse,@arr,%hash,$count);
+	$seqid=defined($seqid)?$seqid:1;
+	my @pairwise_iden=$self->pairwise_percentage_identity($seqid);
+	
+	my $seqcount = 0;
+	foreach $seq ( $self->next_Seq() ) {
+		$nse = $seq->get_nse;
+		$hash{$nse} = $pairwise_iden[$seqcount++];
+	}
 
-    $self->throw("Method each_seq_with_id needs a sequence name argument")
-	unless defined $id;
+	$count=0;
 
-    my (@arr, $seq);
+	%{$self->{'_order'}} = (); # reset the hash;
 
-    if (exists($self->{'_start_end_lists'}->{$id})) {
-	@arr = @{$self->{'_start_end_lists'}->{$id}};
-    }
-    return @arr;
+	foreach $nse ( sort {$hash{$b}<=>$hash{$a}} keys %hash) {
+		$self->{'_order'}->{$count} = $nse;
+		$count++;
+	}
+	1;	
 }
 
-=head2 get_seq_by_pos
+=head2 sort_by_length
 
- Title     : get_seq_by_pos
- Usage     : $seq = $aln->get_seq_by_pos(3) # third sequence from the alignment
- Function  : Gets a sequence based on its position in the alignment.
-             Numbering starts from 1.  Sequence positions larger than
+ Title     : sort_by_length()
+ Usage     : $ali->sort_by_length()
+ Function  : Changes the order of the alignment by the ungapped length of 
+              the sequences
+ Returns   : 1
+ Argument  : None
+
+=cut
+
+sub sort_by_length {
+	my $self = shift;
+	my %seq2length;
+	
+	foreach my $order (keys %{$self->{'_order'}}) {
+		my $nse=$self->{'_order'}->{$order};
+		my $seq=$self->{'_seq'}->{$nse};
+		$seq2length{$seq->get_nse}=$seq->_ungapped_len;
+	}
+	
+	%{$self->{'_order'}} = (); # reset the hash;
+	
+	my $count=0;
+	foreach my $nse (sort {$seq2length{$a}<=>$seq2length{$b}} keys %seq2length) {
+		$self->{'_order'}->{$count} = $nse;
+		$count++;
+	}
+	
+	1;
+}
+
+=head2 sort_by_start
+
+ Title     : sort_by_start
+ Usage     : $ali->sort_by_start
+ Function  : Changes the order of the alignment to the start position of 
+             each subalignment    
+ Returns   : 1
+ Argument  : None
+
+=cut
+
+sub sort_by_start {
+    my $self = shift;
+    my ($seq,$nse,@arr,%hash,$count);
+    foreach $seq ( $self->next_Seq() ) {
+        $nse = $seq->get_nse;
+        $hash{$nse} = $seq;
+    }
+    $count = 0;
+    %{$self->{'_order'}} = (); # reset the hash;
+    foreach $nse ( sort _startend keys %hash) {
+        $self->{'_order'}->{$count} = $nse;
+        $count++;
+    }
+    1;
+}
+
+sub _startend
+{
+    my ($aname,$arange) = split (/[\/]/,$a);
+    my ($bname,$brange) = split (/[\/]/,$b);
+    my ($astart,$aend) = split(/\-/,$arange);
+    my ($bstart,$bend) = split(/\-/,$brange);
+    return $astart <=> $bstart;
+}
+=head2 set_new_reference
+
+ Title     : set_new_reference
+ Usage     : $aln->set_new_reference(3 or 'B31'):  Select the 3rd sequence, 
+             or the sequence whoes name is "B31" (full, exact, and 
+             case-sensitive), as the reference (1st) sequence
+ Function  : Change/Set a new reference (i.e., the first) sequence
+ Returns   : a new Bio::SimpleAlign object.
+             Throws an exception if designated sequence not found
+ Argument  : a positive integer of sequence order, or a sequence name
+             in the original alignment
+
+=cut
+
+sub set_new_reference {
+    my ($self, $seqid) = @_;
+    my $aln = $self->new;
+    my (@seq, @ids, @new_seq);
+    my $is_num=0;
+    foreach my $seq ( $self->next_Seq() ) {
+	push @seq, $seq;
+	push @ids, $seq->display_id;
+    }
+
+    if ($seqid =~ /^\d+$/) { # argument is seq position
+	$is_num=1;
+	$self->throw("The new reference sequence number has to be a positive integer >1 and <= num_sequences ") if ($seqid <= 1 || $seqid > $self->num_sequences);
+    } else { # argument is a seq name
+	$self->throw("The new reference sequence not in alignment ") unless &_in_aln($seqid, \@ids);
+    }
+
+    for (my $i=0; $i<=$#seq; $i++) {
+	my $pos=$i+1;
+        if ( ($is_num && $pos == $seqid) || ($seqid eq $seq[$i]->display_id) ) {
+	    unshift @new_seq, $seq[$i];
+	} else {
+	    push @new_seq, $seq[$i];
+	}
+    }
+    foreach (@new_seq) { $aln->add_Seq($_);  }
+    return $aln;
+}
+
+sub _in_aln {  # check if input name exists in the alignment
+    my ($str, $ref) = @_;
+    foreach (@$ref) {
+	return 1 if $str eq $_;
+    }
+    return 0;
+}
+
+
+
+=head1 Alignment selection methods
+
+These methods are used to select the sequences or horizontal/vertical 
+subsets of the current MSA.
+
+=head2 next_Seq
+
+ Title     : next_Seq
+ Usage     : foreach $seq ( $aln->next_Seq() )
+ Function  : Gets a Seq object from the alignment
+ Returns   : Seq object
+ Argument  :
+
+=cut
+
+sub eachSeq {
+    my $self = shift;
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"eachSeq - deprecated method. Use next_Seq() instead.");
+    $self->next_Seq();
+}
+
+sub each_seq {
+    my $self = shift;
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"each_seq - deprecated method. Use next_Seq() instead.");
+    $self->next_Seq();
+}
+
+sub next_Seq {
+	my $self = shift;
+	my (@arr,$order);
+
+	foreach $order ( sort { $a <=> $b } keys %{$self->{'_order'}} ) {
+		if( exists $self->{'_seq'}->{$self->{'_order'}->{$order}} ) {
+			push(@arr,$self->{'_seq'}->{$self->{'_order'}->{$order}});
+		}
+	}
+	return @arr;
+}
+
+
+=head2 next_alphabetically
+
+ Title     : next_alphabetically
+ Usage     : foreach $seq ( $aln->next_alphabetically() )
+ Function  : Returns a sequence object, but the objects are returned
+             in alphabetically sorted order.
+             Does not change the order of the alignment.
+ Returns   : Seq object
+ Argument  : None
+
+=cut
+
+sub each_alphabetically {
+    my $self = shift;
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"each_alphabetically - deprecated method. Use next_alphabetically() instead.");
+    $self->next_Seq();	
+}
+
+
+sub next_alphabetically {
+	my $self = shift;
+	my ($seq,$nse,@arr,%hash,$count);
+
+	foreach $seq ( $self->next_Seq() ) {
+		$nse = $seq->get_nse;
+		$hash{$nse} = $seq;
+	}
+
+	foreach $nse ( sort _alpha_startend keys %hash) {
+		push(@arr,$hash{$nse});
+	}
+	return @arr;
+}
+
+sub _alpha_startend {
+    my ($aname,$astart,$bname,$bstart);
+    ($aname,$astart) = split (/-/,$a);
+    ($bname,$bstart) = split (/-/,$b);
+
+    if( $aname eq $bname ) {
+	return $astart <=> $bstart;
+    }
+    else {
+	return $aname cmp $bname;
+    }
+}
+
+=head2 next_Seq_with_id
+
+ Title     : next_Seq_with_id
+ Usage     : foreach $seq ( $aln->next_Seq_with_id() )
+ Function  : Gets a Seq objects from the alignment, the contents
+             being those sequences with the given name (there may be
+             more than one)
+ Returns   : Seq object
+ Argument  : a seq name
+
+=cut
+
+sub eachSeqWithId {
+    my $self = shift;
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"eachSeqWithId - deprecated method. Use next_Seq_with_id() instead.");
+    $self->next_Seq_with_id(@_);
+}
+
+sub each_seq_with_id {
+    my $self = shift;
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"each_seq_with_id - deprecated method. Use next_Seq_with_id() instead.");
+    $self->next_Seq_with_id(@_);
+}
+
+sub next_Seq_with_id {
+    my $self = shift;
+    my $id = shift;
+
+    $self->throw("Method next_Seq_with_id needs a sequence name argument")
+	unless defined $id;
+
+    my (@arr, $seq);
+
+    if (exists($self->{'_start_end_lists'}->{$id})) {
+	@arr = @{$self->{'_start_end_lists'}->{$id}};
+    }
+    return @arr;
+}
+
+=head2 get_Seq_by_pos
+
+ Title     : get_Seq_by_pos
+ Usage     : $seq = $aln->get_Seq_by_pos(3) # third sequence from the alignment
+ Function  : Gets a sequence based on its position in the alignment.
+             Numbering starts from 1.  Sequence positions larger than
              num_sequences() will throw an error.
  Returns   : a Bio::LocatableSeq object
  Args      : positive integer for the sequence position
@@ -853,6 +1064,12 @@ sub each_seq_with_id {
 =cut
 
 sub get_seq_by_pos {
+	my $self = shift;
+	$self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"get_seq_by_pos - deprecated method. Use get_Seq_by_pos() instead.");
+	$self->get_Seq_by_pos(@_);	
+}
+
+sub get_Seq_by_pos {
 
     my $self = shift;
     my ($pos) = @_;
@@ -866,10 +1083,10 @@ sub get_seq_by_pos {
     return $self->{'_seq'}->{$nse};
 }
 
-=head2 get_seq_by_id
+=head2 get_Seq_by_id
 
- Title     : get_seq_by_id
- Usage     : $seq = $aln->get_seq_by_id($name) # seq named $name
+ Title     : get_Seq_by_id
+ Usage     : $seq = $aln->get_Seq_by_id($name) # seq named $name
  Function  : Gets a sequence based on its name.
              Sequences that do not exist will warn and return undef
  Returns   : a Bio::LocatableSeq object
@@ -878,6 +1095,13 @@ sub get_seq_by_pos {
 =cut
 
 sub get_seq_by_id {
+	my $self = shift;
+	$self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"get_seq_by_id - deprecated method. Use get_Seq_by_id() instead.");
+	$self->get_Seq_by_id(@_);	
+}
+
+
+sub get_Seq_by_id {
     my ($self,$name) = @_;
     unless( defined $name ) {
       $self->warn("Must provide a sequence name");
@@ -891,84 +1115,95 @@ sub get_seq_by_id {
     return;
 }
 
-=head2 seq_with_features
 
- Title   : seq_with_features
- Usage   : $seq = $aln->seq_with_features(-pos => 1,
-                                          -consensus => 60
-                                          -mask =>
-           sub { my $consensus = shift;
+=head2 select_Seqs
 
-                 for my $i (1..5){
-                    my $n = 'N' x $i;
-                    my $q = '\?' x $i;
-                    while($consensus =~ /[^?]$q[^?]/){
-                       $consensus =~ s/([^?])$q([^?])/$1$n$2/;
-                    }
-                  }
-                 return $consensus;
-               }
-                                         );
- Function: produces a Bio::Seq object by first splicing gaps from -pos
-           (by means of a splice_by_seq_pos() call), then creating
-           features using non-? chars (by means of a consensus_string()
-           call with stringency -consensus).
- Returns : a Bio::Seq object
- Args    : -pos : required. sequence from which to build the Bio::Seq
-             object
-           -consensus : optional, defaults to consensus_string()'s
-             default cutoff value
-           -mask : optional, a coderef to apply to consensus_string()'s
-             output before building features.  this may be useful for
-             closing gaps of 1 bp by masking over them with N, for
-             instance
+ Title     : select_Seqs
+ Usage     : $aln2 = $aln->select_Seqs([1,5..10,15]) # three first sequences
+             $aln2 = $aln->select_Seqs(-selection=>[2..4,11..14],-toggle=>1) # toggle selection
+ Function  : Creates a new alignment from a subset of
+             sequences.  Numbering starts from 1.  Sequence positions
+             larger than num_sequences() will thow an error.
+ Returns   : a Bio::SimpleAlign object
+ Args      : An reference list of positive integers for the selected 
+             sequences. An optional parameter can be defined to toggle the 
+             coordinate selection.
+             
 
 =cut
 
-sub seq_with_features{
-   my ($self,%arg) = @_;
-
-   #first do the preparatory splice
-   $self->throw("must provide a -pos argument") unless $arg{-pos};
-   $self->splice_by_seq_pos($arg{-pos});
+sub select_Seqs {
+	my $self=shift;
+	my ($sel, $toggle) = $self->_rearrange([qw(SELECTION TOGGLE)], @_);
+	@{$sel}=sort {$a<=>$b} @{$sel};
 
-   my $consensus_string = $self->consensus_string($arg{-consensus});
-   $consensus_string = $arg{-mask}->($consensus_string)
-	 if defined($arg{-mask});
+   $self->throw("Select start has to be a positive integer, not [".$sel->[0]."]")
+	unless $sel->[0] =~ /^\d+$/ and $sel->[0] > 0;
+    $self->throw("Select end has to be a positive integer, not [".$sel->[$#$sel]."]")
+	unless $sel->[$#$sel] =~ /^\d+$/ and $sel->[$#$sel] > 0;
+	
+	my @positions;
+	if($toggle) {
+		@positions=@{_toggle_selection($sel,$self->num_sequences)};
+	}
+	else {
+		@positions=@{$sel};
+	}
+	
+	my $aln = $self->new;
+	foreach my $pos (@positions) {
+		$aln->add_Seq($self->get_Seq_by_pos($pos));
+	}
+	$aln->id($self->id);
+	# fix for meta, sf, ann    
+	return $aln;	
+}
 
-   my(@bs,@es);
 
-   push @bs, 1 if $consensus_string =~ /^[^?]/;
+sub select {
+    my $self = shift;
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"select - deprecated method. Use select_Seqs() instead.");
+    $self->select_Seqs([$_[0]..$_[1]]);
+}
 
-   while($consensus_string =~ /\?[^?]/g){
-	 push @bs, pos($consensus_string);
-   }
-   while($consensus_string =~ /[^?]\?/g){
-	 push @es, pos($consensus_string);
+sub select_noncont {
+	my $self = shift;
+	$self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"select_noncont - deprecated method. Use select_Seqs() instead.");
+	if($_[0] eq 'nosort') {
+    	shift;
    }
+   $self->select_Seqs([@_]);
+}
 
-   push @es, CORE::length($consensus_string) if $consensus_string =~ /[^?]$/;
+=head2 select_columns
 
-   my $seq = Bio::Seq->new();
+ Title     : select_columns
+ Usage     : $newaln = $aln->select_columns([20..30,45..48]);
+             $newaln = $aln->select_columns(['mismatch']);
+             $newaln = $aln->select_columns(-selection=>['mismatch'],-toggle=>1);
+ Function  : Creates a slice from the alignment from the selected columns. 
+             The first column in the alignment is denoted 1.
+             Sequences with no residues in the slice are excluded from the
+             new alignment and a warning is printed. Slice beyond the length
+             of the sequence does not do padding.
+ Returns   : A Bio::SimpleAlign object
+ Args      : Positive integers for the selected colums, or specified type
+             ('match'|'weak'|'strong'|'mismatch')
+             First optional boolean can be defined to toggle the coordinate 
+             selection. Second optional boolean which if true will keep gap-only 
+             columns in the newly created slice. Example:
 
-#   my $rootfeature = Bio::SeqFeature::Generic->new(
-#                -source_tag => 'location',
-#                -start      => $self->get_seq_by_pos($arg{-pos})->start,
-#                -end        => $self->get_seq_by_pos($arg{-pos})->end,
-#                                                  );
-#   $seq->add_SeqFeature($rootfeature);
+             $aln2 = $aln->select_columns([20..30],0,1)
+             or $aln2 = $aln->select_columns(-selection=>[20..30],-toggle=>0,-keepgaponly=>1)
 
-   while(my $b = shift @bs){
-	 my $e = shift @es;
-	 $seq->add_SeqFeature(
-       Bio::SeqFeature::Generic->new(
-         -start => $b - 1 + $self->get_seq_by_pos($arg{-pos})->start,
-         -end   => $e - 1 + $self->get_seq_by_pos($arg{-pos})->start,
-         -source_tag => $self->source || 'MSA',
-       )
-     );
-   }
+=cut
 
+<<<<<<< HEAD
+sub slice {
+	my $self = shift;
+	$self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"slice - deprecated method. Use select_columns() instead.");
+	$self->select_columns([$_[0]..$_[1]],0,defined($_[2])?$_[2]:0);	
+=======
    return $seq;
 }
 
@@ -1009,52 +1244,21 @@ sub select {
     $aln->id($self->id);
     # fix for meta, sf, ann    
     return $aln;
+>>>>>>> upstream/master
 }
 
-=head2 select_noncont
-
- Title     : select_noncont
- Usage     : # 1st and 3rd sequences, sorted
-             $aln2 = $aln->select_noncont(1, 3)
-
-             # 1st and 3rd sequences, sorted (same as first)
-             $aln2 = $aln->select_noncont(3, 1)
-
-             # 1st and 3rd sequences, unsorted
-             $aln2 = $aln->select_noncont('nosort',3, 1)
-
- Function  : Creates a new alignment from a subset of sequences.  Numbering
-             starts from 1.  Sequence positions larger than num_sequences() will
-             throw an error.  Sorts the order added to new alignment by default,
-             to prevent sorting pass 'nosort' as the first argument in the list.
- Returns   : a Bio::SimpleAlign object
- Args      : array of integers for the sequences.  If the string 'nosort' is
-             passed as the first argument, the sequences will not be sorted
-             in the new alignment but will appear in the order listed.
-
-=cut
 
-sub select_noncont {
+sub select_columns {
 	my $self = shift;
-    my $nosort = 0;
-	my (@pos) = @_;
-    if ($pos[0] !~ m{^\d+$}) {
-        my $sortcmd = shift @pos;
-        if ($sortcmd eq 'nosort') {
-            $nosort = 1;
-        } else {
-            $self->throw("Command not recognized: $sortcmd.  Only 'nosort' implemented at this time.");
-        }
-    }
-	
-    my $end = $self->num_sequences;
-    foreach ( @pos ) {
-		$self->throw("position must be a positive integer, > 0 and <= $end not [$_]")
-		  unless( /^\d+$/ && $_ > 0 && $_ <= $end );
+	#my ($start, $end, $keep_gap_only) = @_;
+	my ($sel, $toggle,$keep_gap_only) = $self->_rearrange([qw(SELECTION TOGGLE KEEPGAPONLY)], @_);
+	if($sel->[0]=~/^[a-z]/i) {
+		$sel=$self->_select_columns_by_type($sel);
 	}
-    
-	@pos = sort {$a <=> $b} @pos unless $nosort;
 	
+<<<<<<< HEAD
+	@{$sel}=sort {$a<=$b} @{$sel};
+=======
 	my $aln = $self->new;
 	foreach my $p (@pos) {
 		$aln->add_seq($self->get_seq_by_pos($p));
@@ -1100,27 +1304,29 @@ sub select_noncont_by_name {
  Args      : Positive integer for start column, positive integer for end column,
              optional boolean which if true will keep gap-only columns in the newly
              created slice. Example:
+>>>>>>> upstream/master
 
-             $aln2 = $aln->slice(20,30,1)
+	#warnings
+	$self->throw("select_columns start has to be a positive integer, not [".$sel->[0]."]")
+	  unless $sel->[0] =~ /^\d+$/ and $sel->[0] > 0;
+	$self->throw("select_columns end has to be a positive integer, not [".$sel->[$#$sel]."]")
+	  unless $sel->[$#$sel] =~ /^\d+$/ and $sel->[$#$sel] > 0;
+	$self->throw("This alignment has only ". $self->length . " residues. select_columns start " .
+					 "[".$sel->[0]."] is too big.") if $sel->[0] > $self->length;
 
-=cut
+	my $newcoords;
+	
+	if($toggle) {
+		$sel=_toggle_selection($sel,$self->length);
+	}
+	$newcoords=_cont_coords($sel);
 
-sub slice {
-	my $self = shift;
-	my ($start, $end, $keep_gap_only) = @_;
-
-	$self->throw("Slice start has to be a positive integer, not [$start]")
-	  unless $start =~ /^\d+$/ and $start > 0;
-	$self->throw("Slice end has to be a positive integer, not [$end]")
-	  unless $end =~ /^\d+$/ and $end > 0;
-	$self->throw("Slice start [$start] has to be smaller than or equal to end [$end]")
-	  unless $start <= $end;
-	$self->throw("This alignment has only ". $self->length . " residues. Slice start " .
-					 "[$start] is too big.") if $start > $self->length;
     my $cons_meta = $self->consensus_meta;
 	my $aln = $self->new;
 	$aln->id($self->id);
-	foreach my $seq ( $self->each_seq() ) {
+	my ($start,$end)=($newcoords->[0],$newcoords->[$#$newcoords]);
+	
+	foreach my $seq ( $self->next_Seq() ) {
 	    my $new_seq = $seq->isa('Bio::Seq::MetaI') ?
             Bio::Seq::Meta->new
         (-id      => $seq->id,
@@ -1134,43 +1340,103 @@ sub slice {
 		 -verbose => $self->verbose);
         
 	    # seq
+	    
 	    my $seq_end = $end;
 	    $seq_end = $seq->length if( $end > $seq->length );
+	    # the actual selected seq
 
-	    my $slice_seq = $seq->subseq($start, $seq_end);
+	    my $slice_seq;
+      for(my $num=0;$num<$#$newcoords;) {
+	    	$slice_seq.= $seq->subseq($newcoords->[$num], $newcoords->[$num+1]);
+	    	$num+=2;
+	   }
+	    
 	    $new_seq->seq( $slice_seq );
 
-	    $slice_seq =~ s/\W//g;
+		#all the non residue chars
+		my $nonres = join("",$self->gap_char, $self->match_char,$self->missing_char);
+	   
+	   #need to convert to non-gapped start and end
+	   my ($ng_start,$ng_end); #ungapped start and ungapped end
+	   
+	   if($slice_seq=~/^[\Q$nonres\E]+$/) {
+	   	($ng_start,$ng_end)=($start,$end);
+	   }
+	   else {
+		   if($slice_seq=~/^[\Q$nonres\E]+/) {
+		   	$ng_start=$sel->[length($&)];
+		   }
+		   else {
+		   	$ng_start=$start;
+		   }
+		   
+		   if($slice_seq=~/[\Q$nonres\E]+$/) {
+		   	$ng_end=$sel->[$#$sel-length($&)-1];
+		   }
+		   else {
+		   	$ng_end=$end;
+		   }
+		} 
+	   
+	   my $slice_length=($slice_seq =~s/([^\Q$nonres\E])/$1/g);
+	   my ($newstart, $newend, $fakestart, $oldend);
+	   my ($pre_start_seq,$pre_start_length,$after_end_seq,$after_end_length);
+	   
+	   
+	   if($ng_end>=CORE::length($seq->seq())) {
+	   	$after_end_length=0;
+	   }
+	   else {
+	   	$after_end_seq = $seq->subseq($ng_end+1,CORE::length($seq->seq()));	
+	   	$after_end_length=($after_end_seq=~s/([^\Q$nonres\E])/$1/g);
+	   }
+		
+		#calculate the new start and new end
+		#new_start=old_start+pre_start-1
+		#new_end=old_end-after_end+1
+		#in order to pass the $seq->end test, we should assign a fake start first		
+	   if($ng_start>1) {
+	   	
+		   my $pre_start_seq = $seq->subseq(1, $ng_start - 1);
+			my $pre_start_length=($pre_start_seq=~s/([^\Q$nonres\E])/$1/g); #need to check
+			if (defined($seq->strand) && $seq->strand < 0){
+				$newend=$seq->end-$pre_start_length;
+				$fakestart=$newend-$slice_length+1;
+				$newstart=$seq->start+$after_end_length;				
+			}
+			else {
+				$newend=$seq->end-$after_end_length;
+				$fakestart=$newend-$slice_length+1;
+				$newstart=$seq->start+$pre_start_length;
+			}
+		}
+		else {
+			if (defined($seq->strand) && $seq->strand < 0){
+				$newend=$seq->end;
+				$fakestart=$newend-$slice_length+1;
+				$newstart=$seq->start+$after_end_length;
+			}
+			else {
+				$newend=$seq->end-$after_end_length;
+				$fakestart=$newend-$slice_length+1;
+				$newstart=$seq->start;
+			}			
+		}
+		$new_seq->start($fakestart);
+		$new_seq->end($newend);
+		$new_seq->start($newstart);
 
-	    if ($start > 1) {
-            my $pre_start_seq = $seq->subseq(1, $start - 1);
-            $pre_start_seq =~ s/\W//g;
-            if (!defined($seq->strand)) {
-                $new_seq->start( $seq->start + CORE::length($pre_start_seq) );
-            } elsif ($seq->strand < 0){
-                $new_seq->start( $seq->end - CORE::length($pre_start_seq) - CORE::length($slice_seq) + 1);
-            } else {
-                $new_seq->start( $seq->start + CORE::length($pre_start_seq)  );
-            }
-	    } else {
-            if ((defined $seq->strand)&&($seq->strand < 0)){
-                $new_seq->start( $seq->end - CORE::length($slice_seq) + 1);
-            } else {
-               $new_seq->start( $seq->start);
-            }
-	    }
         if ($new_seq->isa('Bio::Seq::MetaI')) {
             for my $meta_name ($seq->meta_names) {
-                $new_seq->named_meta($meta_name, $seq->named_submeta($meta_name, $start, $end));
+                $new_seq->named_meta($meta_name, $seq->named_submeta($meta_name, $ng_start, $end));
             }
         }
-	    $new_seq->end( $new_seq->start + CORE::length($slice_seq) - 1 );
 
 	    if ($new_seq->start and $new_seq->end >= $new_seq->start) {
-            $aln->add_seq($new_seq);
+            $aln->add_Seq($new_seq);
 	    } else {
             if( $keep_gap_only ) {
-                $aln->add_seq($new_seq);
+                $aln->add_Seq($new_seq);
             } else {
                 my $nse = $seq->get_nse();
                 $self->warn("Slice [$start-$end] of sequence [$nse] contains no residues.".
@@ -1190,69 +1456,161 @@ sub slice {
 	return $aln;
 }
 
+
+sub _select_columns_by_type {
+	#code copied from _remove_columns_by_type to enable selecting columns by type
+	my ($self,$type) = @_;
+	my @selects;
+
+	my $gap = $self->gap_char if (grep { $_ eq 'gaps'} @{$type});
+	my $all_gaps_columns = $self->gap_char if (grep /all_gaps_columns/,@{$type});
+	my %matchchars = ( 'match'           => '\*',
+                       'weak'             => '\.',
+                       'strong'           => ':',
+                       'mismatch'         => ' ',
+                     );
+	# get the characters to delete against
+	my $del_char;
+	foreach my $type (@{$type}){
+		$del_char.= $matchchars{$type};
+	}
+
+	my $length = 0;
+	my $match_line = $self->match_line;
+	# do the matching to get the segments to remove
+	if($del_char){
+		while($match_line =~ m/[$del_char]/g ){
+			my $start = pos($match_line);
+			$match_line=~/\G[$del_char]+/gc;
+			my $end = pos($match_line);
+
+			#have to offset the start and end for subsequent removes
+			push @selects, $start..$end;
+		}
+	}
+
+	# remove the segments
+	return \@selects;
+}
 =head2 remove_columns
 
  Title     : remove_columns
- Usage     : $aln2 = $aln->remove_columns(['mismatch','weak']) or
-             $aln2 = $aln->remove_columns([0,0],[6,8])
- Function  : Creates an aligment with columns removed corresponding to
+ Usage     : my $newaln=$aln->remove_columns(['mismatch','weak']) or
+             my $newaln=$aln->remove_columns([3,6..8]) or
+             my $newaln=$aln->remove_columns(-selection=>[3,6..8],-toggle=>1,-keepgaponly=>0)
+ Function  : Modify the aligment with columns removed corresponding to
              the specified type or by specifying the columns by number.
- Returns   : Bio::SimpleAlign object
- Args      : Array ref of types ('match'|'weak'|'strong'|'mismatch'|'gaps'|
-             'all_gaps_columns') or array ref where the referenced array
+             remove_columns is a variance of select_columns(-toogle=>1)
+ Returns   : 1
+ Args      : Array ref of types ('match'|'weak'|'strong'|'mismatch') 
+             or array ref where the referenced array
              contains a pair of integers that specify a range.
-             The first column is 0
+             use remove_gaps to remove columns containing gaps
+             The first column is 1
+             See also select_columns
 
 =cut
 
 sub remove_columns {
-    my ($self,@args) = @_;
-    @args || $self->throw("Must supply column ranges or column types");
-    my $aln;
+	my ($self,@args) = @_;
+   @args || $self->throw("Must supply column ranges or column types");
+	my ($sel, $toggle,$gap) = $self->_rearrange([qw(SELECTION TOGGLE KEEPGAPONLY)], @args);
+	my $newaln=$self->select_columns(-selection=>$sel,-toggle=>$toggle?0:1,-keepgaponly=>$gap);
+	return $newaln;
+}
 
-    if ($args[0][0] =~ /^[a-z_]+$/i) {
-        $aln = $self->_remove_columns_by_type($args[0]);
-    } elsif ($args[0][0] =~ /^\d+$/) {
-        $aln = $self->_remove_columns_by_num(\@args);
-    } else {
-        $self->throw("You must pass array references to remove_columns(), not @args");
-    }
-    # fix for meta, sf, ann
-    $aln;
+sub _cont_coords {
+	#This function is used to merge the coordinates from select and remove functions in order to reduce the number of calculations in select and #remove. For exmaple, if the input of remove_columns is remove_columns([2,5,7..10]), this function will transform ([2,5,7..10]) to 
+ 	# ([2,2,5,5,7,10]).
+	
+	my ($old_coords)=@_;
+	@{$old_coords}=sort {$a<=>$b} @{$old_coords};
+	
+	my $cont_coords;
+	
+	push @{$cont_coords},$old_coords->[0];
+	for(my $num=0;$num<@{$old_coords};) {
+		if($num==@{$old_coords}-1) {
+			#for single selection
+			push @{$cont_coords},$old_coords->[$num];
+			last;
+		}
+		if($old_coords->[$num+1]-$old_coords->[$num]>1) {
+			if($num+2==@{$old_coords}) {
+				push @{$cont_coords},$old_coords->[$num],$old_coords->[$num+1],$old_coords->[$num+1];
+				last;
+			}
+			else {
+				push @{$cont_coords},$old_coords->[$num],$old_coords->[$num+1];
+			}
+		}
+		else {
+			if ($num+2==@{$old_coords}) {
+				push @{$cont_coords},$old_coords->[$num+1];
+				last;
+			}
+		}
+		$num++;
+	}
+	return $cont_coords;
+}
+
+
+sub _toggle_selection {
+	#This function is used to toggle the selection of sequences or columns
+	my ($old_coords,$length)=@_;
+	my %hash=map {$_=>1} @{$old_coords};
+	my $new_coords;
+	for(my $num=1;$num<=$length;$num++) {
+		unless(defined($hash{$num})) {
+			push @{$new_coords},$num;
+		}
+	}
+	return $new_coords;
 }
 
 
 =head2 remove_gaps
 
  Title     : remove_gaps
- Usage     : $aln2 = $aln->remove_gaps
+ Usage     : $aln2 = $aln->remove_gaps(-reference=>5)
  Function  : Creates an aligment with gaps removed
  Returns   : a Bio::SimpleAlign object
- Args      : a gap character(optional) if none specified taken
+ Args      : -GAPCHAR a gap character(optional) if none specified taken
                 from $self->gap_char,
-             [optional] $all_gaps_columns flag (1 or 0, default is 0)
-                        indicates that only all-gaps columns should be deleted
-
-Used from method L<remove_columns> in most cases. Set gap character
-using L<gap_char()|gap_char>.
+             -ALLGAPCOL $all_gaps_columns flag (1 or 0, default is 0)
+                 indicates that only all-gaps columns should be deleted
+             -REFERENCE splices all aligned sequences where the specified 
+                 sequence has gaps.
 
 =cut
 
 sub remove_gaps {
-    my ($self,$gapchar,$all_gaps_columns) = @_;
-    my $gap_line;
-    if ($all_gaps_columns) {
-        $gap_line = $self->all_gap_line($gapchar);
-    } else {
-        $gap_line = $self->gap_line($gapchar);
-    }
+	my $self=shift @_;
+	my ($gapchar,$all_gaps_columns,$reference) = $self->_rearrange([qw(GAPCHAR ALLGAPCOL REFERENCE)], @_);
+	my $gap_line;
+   if($reference) {
+   	my $refseq=$self->get_Seq_by_pos($reference);
+    	$gap_line=$refseq->seq();
+   }
+   else {
+		if ($all_gaps_columns) {
+		  $gap_line = $self->all_gap_line($gapchar);
+		} else {
+		  $gap_line = $self->gap_line($gapchar);
+		}
+	}
+    
     my $aln = $self->new;
 
-    my @remove;
     my $length = 0;
     my $del_char = $gapchar || $self->gap_char;
     # Do the matching to get the segments to remove
+    my $removed_cols;
     while ($gap_line =~ m/[$del_char]/g) {
+<<<<<<< HEAD
+        push @{$removed_cols}, pos($gap_line);
+=======
         my $start = pos($gap_line)-1;
         $gap_line =~ m/\G[$del_char]+/gc;
         my $end = pos($gap_line)-1;
@@ -1262,251 +1620,790 @@ sub remove_gaps {
         $end  -=$length;
         $length += ($end-$start+1);
         push @remove, [$start,$end];
+>>>>>>> upstream/master
     }
-
     #remove the segments
-    $aln = $#remove >= 0 ? $self->_remove_col($aln,\@remove) : $self;
+    $aln = $#$removed_cols >= 0 ? $self->select_columns($removed_cols,1) : $self;
     # fix for meta, sf, ann        
     return $aln;
 }
 
-
-sub _remove_col {
-    my ($self,$aln,$remove) = @_;
-    my @new;
-    
-    my $gap = $self->gap_char;
-    
-    # splice out the segments and create new seq
-    foreach my $seq($self->each_seq){
-        my $new_seq = Bio::LocatableSeq->new(
-					     -id      => $seq->id,
-					     -alphabet=> $seq->alphabet,
-					     -strand  => $seq->strand,
-					     -verbose => $self->verbose);
-        my $sequence = $seq->seq;
-        foreach my $pair(@{$remove}){
-            my $start = $pair->[0];
-            my $end   = $pair->[1];
-            $sequence = $seq->seq unless $sequence;
-            my $orig = $sequence;
-            my $head =  $start > 0 ? substr($sequence, 0, $start) : '';
-            my $tail = ($end + 1) >= CORE::length($sequence) ? '' : substr($sequence, $end + 1);
-            $sequence = $head.$tail;
-            # start
-            unless (defined $new_seq->start) {
-                if ($start == 0) {
-                    my $start_adjust = () = substr($orig, 0, $end + 1) =~ /$gap/g;
-                    $new_seq->start($seq->start + $end + 1 - $start_adjust);
-                }
-                else {
-                    my $start_adjust = $orig =~ /^$gap+/;
-                    if ($start_adjust) {
-                        $start_adjust = $+[0] == $start;
-                    }
-                    $new_seq->start($seq->start + $start_adjust);
-                }
-            }
-            # end
-            if (($end + 1) >= CORE::length($orig)) {
-                my $end_adjust = () = substr($orig, $start) =~ /$gap/g;
-                $new_seq->end($seq->end - (CORE::length($orig) - $start) + $end_adjust);
-            }
-            else {
-                $new_seq->end($seq->end);
-            }
-        }
-        
-        if ($new_seq->end < $new_seq->start) {
-            # we removed all columns except for gaps: set to 0 to indicate no
-            # sequence
-            $new_seq->start(0);
-            $new_seq->end(0);
-        }
-        
-        $new_seq->seq($sequence) if $sequence;
-		push @new, $new_seq;
-    }
-    # add the new seqs to the alignment
-    foreach my $new(@new){
-        $aln->add_seq($new);
-    }
-    # fix for meta, sf, ann    
-    return $aln;
+sub splice_by_seq_pos{
+    my $self = shift;
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"splice_by_seq_pos - deprecated method. Use remove_gaps() instead.");
+    $self->remove_gaps(-reference=>$_[0]);
 }
 
-sub _remove_columns_by_type {
-	my ($self,$type) = @_;
-	my $aln = $self->new;
-	my @remove;
+=head2 mask_columns
 
-	my $gap = $self->gap_char if (grep { $_ eq 'gaps'} @{$type});
-	my $all_gaps_columns = $self->gap_char if (grep /all_gaps_columns/,@{$type});
-	my %matchchars = ( 'match'           => '\*',
-                       'weak'             => '\.',
-                       'strong'           => ':',
-                       'mismatch'         => ' ',
-                       'gaps'             => '',
-                       'all_gaps_columns' => ''
-                     );
-	# get the characters to delete against
-	my $del_char;
-	foreach my $type (@{$type}){
-		$del_char.= $matchchars{$type};
-	}
+ Title     : mask_columns
+ Usage     : $aln2 = $aln->mask_columns([3..8,11..22])
+             $aln2 = $aln->mask_columns(-selection=>[2,5,7..10],-toggle=>1)
+ Function  : Masks slices of the alignment inclusive of the defined
+             columns, and the first column in the alignment is denoted 1.
+             Mask beyond the length of the sequence does not do padding.
+ Returns   : A Bio::SimpleAlign object
+ Args      : Positive integers should be used to defined the column numbers
+             The mask character should be defined by $aln->mask_char() or "?" as 
+             default. An optional parameter can be defined to toggle the 
+             coordinate selection.
+ Note      : 
 
-	my $length = 0;
-	my $match_line = $self->match_line;
-	# do the matching to get the segments to remove
-	if($del_char){
-		while($match_line =~ m/[$del_char]/g ){
-			my $start = pos($match_line)-1;
-			$match_line=~/\G[$del_char]+/gc;
-			my $end = pos($match_line)-1;
+=cut
 
-			#have to offset the start and end for subsequent removes
-			$start-=$length;
-			$end  -=$length;
-			$length += ($end-$start+1);
-			push @remove, [$start,$end];
-		}
+sub mask_columns {
+    #based on slice(), but did not include the Bio::Seq::Meta sections as I was not sure what it is doing
+	my $self=shift;
+	my ($sel, $toggle) = $self->_rearrange([qw(SELECTION TOGGLE)], @_);
+	@{$sel}=sort {$a<=>$b} @{$sel};
+	
+	my $nonres = join("",$self->gap_char, $self->match_char,$self->missing_char);
+	my $mask_char=$self->mask_char;
+   
+	#Exceptions
+	$self->throw("Mask start has to be a positive integer and less than ".
+                 "alignment length, not [".$sel->[0]."]")
+	unless $sel->[0] =~ /^\d+$/ && $sel->[0] > 0 && $sel->[0] <= $self->length;	
+
+   $self->throw("Mask end has to be a positive integer and less than ".
+                 "alignment length, not [".$sel->[$#$sel]."]")
+	unless $sel->[$#$sel] =~ /^\d+$/ && $sel->[$#$sel] > 0 && $sel->[$#$sel] <= $self->length;                 
+	
+	#calculate the coords, and make it in a continous way
+	my @newcoords;
+	if($toggle) {
+		@newcoords=@{_cont_coords(_toggle_selection($sel,$self->length))};
 	}
-
-	# remove the segments
-	$aln = $#remove >= 0 ? $self->_remove_col($aln,\@remove) : $self;
-	$aln = $aln->remove_gaps() if $gap;
-	$aln = $aln->remove_gaps('', 1) if $all_gaps_columns;
-    # fix for meta, sf, ann    
-	$aln;
-}
-
-
-sub _remove_columns_by_num {
-	my ($self,$positions) = @_;
+	else {
+		@newcoords=@{_cont_coords($sel)};
+	}
+	
 	my $aln = $self->new;
+	$aln->id($self->id);
+	
 
-	# sort the positions
-	@$positions = sort { $a->[0] <=> $b->[0] } @$positions;
-    
-    my @remove;
-    my $length = 0;
-    foreach my $pos (@{$positions}) {
-        my ($start, $end) = @{$pos};
-        
-        #have to offset the start and end for subsequent removes
-        $start-=$length;
-        $end  -=$length;
-        $length += ($end-$start+1);
-        push @remove, [$start,$end];
-    }
-
-    #remove the segments
-    $aln = $#remove >= 0 ? $self->_remove_col($aln,\@remove) : $self;
-    # fix for meta, sf, ann    
-	$aln;
+	foreach my $seq ( $self->next_Seq() ) {
+		my $new_seq = Bio::LocatableSeq->new(-id => $seq->id,
+				-alphabet => $seq->alphabet,
+				-strand  => $seq->strand,
+				-verbose => $self->verbose);
+		my $dna_string=$seq->seq();
+		my $masked_string;
+		for(my $num=0;$num<@newcoords;) {
+			my ($start,$end)=@newcoords[$num,$num+1];
+			$masked_string = substr($dna_string, $start - 1, $end - $start + 1);
+         $masked_string =~ s{[^$nonres]}{$mask_char}g;
+         $dna_string = substr($dna_string,0,$start-1) . $masked_string . substr($dna_string,$end);    
+			$num+=2;
+		}
+		$new_seq->seq($dna_string);
+		$aln->add_Seq($new_seq);
+	}
+	return $aln;
 }
+=head2 seq_with_features
 
+ Title   : seq_with_features
+ Usage   : $seq = $aln->seq_with_features(-pos => 1,
+                                          -consensus => 60
+                                          -mask =>
+           sub { my $consensus = shift;
 
-=head1 Change sequences within the MSA
+                 for my $i (1..5){
+                    my $n = 'N' x $i;
+                    my $q = '\?' x $i;
+                    while($consensus =~ /[^?]$q[^?]/){
+                       $consensus =~ s/([^?])$q([^?])/$1$n$2/;
+                    }
+                  }
+                 return $consensus;
+               }
+                                         );
+ Function: produces a Bio::Seq object by first splicing gaps from -pos
+           (by means of a remove_gaps(-reference=>1) call), then creating
+           features using non-? chars (by means of a consensus_string()
+           call with stringency -consensus).
+ Returns : a Bio::Seq object
+ Args    : -pos : required. sequence from which to build the Bio::Seq
+             object
+           -consensus : optional, defaults to consensus_string()'s
+             default cutoff value
+           -mask : optional, a coderef to apply to consensus_string()'s
+             output before building features.  this may be useful for
+             closing gaps of 1 bp by masking over them with N, for
+             instance
 
-These methods affect characters in all sequences without changing the
-alignment.
+=cut
 
-=head2 splice_by_seq_pos
+sub seq_with_features{
+   my ($self,%arg) = @_;
 
- Title   : splice_by_seq_pos
- Usage   : $status = splice_by_seq_pos(1);
- Function: splices all aligned sequences where the specified sequence
-           has gaps.
- Example :
- Returns : 1 on success
- Args    : position of sequence to splice by
+   #first do the preparatory splice
+   $self->throw("must provide a -pos argument") unless $arg{-pos};
+   $self->remove_gaps(-reference=>$arg{-pos});
 
+   my $consensus_string = $self->consensus_string($arg{-consensus});
+   $consensus_string = $arg{-mask}->($consensus_string)
+	 if defined($arg{-mask});
 
-=cut
+   my(@bs,@es);
 
-sub splice_by_seq_pos{
-  my ($self,$pos) = @_;
+   push @bs, 1 if $consensus_string =~ /^[^?]/;
 
-  my $guide = $self->get_seq_by_pos($pos);
-  my $guide_seq = $guide->seq;
+   while($consensus_string =~ /\?[^?]/g){
+	 push @bs, pos($consensus_string);
+   }
+   while($consensus_string =~ /[^?]\?/g){
+	 push @es, pos($consensus_string);
+   }
 
-  $guide_seq =~ s/\./\-/g;
+   push @es, CORE::length($consensus_string) if $consensus_string =~ /[^?]$/;
 
-  my @gaps = ();
-  $pos = -1;
-  while(($pos = index($guide_seq, '-', $pos)) > -1 ){
-    unshift @gaps, $pos;
-    $pos++;
-  }
+   my $seq = Bio::Seq->new();
+
+#   my $rootfeature = Bio::SeqFeature::Generic->new(
+#                -source_tag => 'location',
+#                -start      => $self->get_seq_by_pos($arg{-pos})->start,
+#                -end        => $self->get_seq_by_pos($arg{-pos})->end,
+#                                                  );
+#   $seq->add_SeqFeature($rootfeature);
 
-  foreach my $seq ($self->each_seq){
-    my @bases = split '', $seq->seq;
+   while(my $b = shift @bs){
+	 my $e = shift @es;
+	 $seq->add_SeqFeature(
+       Bio::SeqFeature::Generic->new(
+         -start => $b - 1 + $self->get_Seq_by_pos($arg{-pos})->start,
+         -end   => $e - 1 + $self->get_Seq_by_pos($arg{-pos})->start,
+         -source_tag => $self->source || 'MSA',
+       )
+     );
+   }
+
+   return $seq;
+}
+
+
+=head1 Change sequences within the MSA
+
+These methods affect characters in all sequences without changing the
+alignment.
+
+
+=head2 map_chars
+
+ Title     : map_chars
+ Usage     : $ali->map_chars('\.','-')
+ Function  : Does a s/$arg1/$arg2/ on the sequences. Useful for gap
+             characters.
+
+             Note that the first argument is interpreted as a regexp
+             so be careful and escape any wild card characters (e.g.
+             do $ali->map_chars('\.','-') to replace periods with dashes.
+ Returns   :
+ Argument  : A regexp and a string
+
+=cut
+
+sub map_chars {
+    my $self = shift;
+    my $from = shift;
+    my $to   = shift;
+    my ( $seq, $temp );
+
+    $self->throw("Need two arguments: a regexp and a string")
+      unless defined $from and defined $to;
+
+<<<<<<< HEAD
+    foreach $seq ( $self->next_Seq() ) {
+	$temp = $seq->seq();
+	$temp =~ s/$from/$to/g;
+	$seq->seq($temp);
+=======
+    foreach $seq ( $self->each_seq() ) {
+        $temp = $seq->seq();
+        $temp =~ s/$from/$to/g;
+        $seq->seq($temp);
+>>>>>>> upstream/master
+    }
+    return 1;
+}
+
+
+=head2 uppercase
+
+ Title     : uppercase()
+ Usage     : $ali->uppercase()
+ Function  : Sets all the sequences to uppercase
+ Returns   : 1
+ Argument  : None
+
+=cut
+
+sub uppercase {
+    my $self = shift;
+    my $seq;
+    my $temp;
+
+    foreach $seq ( $self->next_Seq() ) {
+      $temp = $seq->seq();
+      $temp =~ tr/[a-z]/[A-Z]/;
+
+      $seq->seq($temp);
+    }
+    return 1;
+}
+
+=head2 lowercase
+
+ Title     : lowercase()
+ Usage     : $ali->lowercase()
+ Function  : Sets all the sequences to lowercase
+ Returns   : 1
+ Argument  : None
+
+=cut
+
+sub lowercase {
+    my $self = shift;
+    my $seq;
+    my $temp;
+
+    foreach $seq ( $self->next_Seq() ) {
+      $temp = $seq->seq();
+      $temp =~ tr/[A-Z]/[a-z]/;
+
+      $seq->seq($temp);
+    }
+    return 1;
+}
+
+
+=head2 togglecase
+
+ Title     : togglecase()
+ Usage     : $ali->togglecase()
+ Function  : Sets all the sequences to opposite case
+ Returns   : 1
+ Argument  : None
+
+=cut
+
+<<<<<<< HEAD
+sub togglecase {
+    my $self = shift;
+    my $seq;
+    my $temp;
+=======
+sub gap_line {
+    my ($self,$gapchar) = @_;
+    $gapchar = $gapchar || $self->gap_char;
+    my %gap_hsh; # column gaps vector
+    foreach my $seq ( $self->each_seq ) {
+		my $i = 0;
+    	map {$gap_hsh{$_->[0]} = undef} grep {$_->[1] =~ m/[$gapchar]/}
+		  map {[$i++, $_]} split(//, uc ($seq->seq));
+    }
+    my $gap_line;
+    foreach my $pos ( 0..$self->length-1 ) {
+	  $gap_line .= (exists $gap_hsh{$pos}) ? $self->gap_char:'.';
+    }
+    return $gap_line;
+}
+
+=head2 all_gap_line
+
+ Title    : all_gap_line()
+ Usage    : $line = $align->all_gap_line()
+ Function : Generates a gap line - much like consensus string
+            except that a line where '-' represents all-gap column
+ Args     : (optional) gap line characters ('-' by default)
+ Returns  : string
+>>>>>>> upstream/master
+
+    foreach $seq ( $self->next_Seq() ) {
+      $temp = $seq->seq();
+      $temp =~ tr/[A-Za-z]/[a-zA-Z]/;
+
+<<<<<<< HEAD
+      $seq->seq($temp);
+=======
+sub all_gap_line {
+    my ($self,$gapchar) = @_;
+    $gapchar = $gapchar || $self->gap_char;
+    my %gap_hsh;		# column gaps counter hash
+    my @seqs = $self->each_seq;
+    foreach my $seq ( @seqs ) {
+	my $i = 0;
+    	map {$gap_hsh{$_->[0]}++} grep {$_->[1] =~ m/[$gapchar]/}
+	map {[$i++, $_]} split(//, uc ($seq->seq));
+    }
+    my $gap_line;
+    foreach my $pos ( 0..$self->length-1 ) {
+	if (exists $gap_hsh{$pos} && $gap_hsh{$pos} == scalar @seqs) {
+            # gaps column
+	    $gap_line .= $self->gap_char;
+	} else {
+	    $gap_line .= '.';
+	}
+>>>>>>> upstream/master
+    }
+    return 1;
+}
+
+<<<<<<< HEAD
+=======
+=head2 gap_col_matrix
+
+ Title    : gap_col_matrix()
+ Usage    : my $cols = $align->gap_col_matrix()
+ Function : Generates an array where each element in the array is a 
+            hash reference with a key of the sequence name and a
+            value of 1 if the sequence has a gap at that column
+ Returns  : Reference to an array
+ Args     : Optional: gap line character ($aln->gap_char or '-' by default)
+
+=cut
+
+sub gap_col_matrix {
+    my ( $self, $gapchar ) = @_;
+    $gapchar = $gapchar || $self->gap_char;
+    my %gap_hsh;    # column gaps vector
+    my @cols;
+    foreach my $seq ( $self->each_seq ) {
+        my $i   = 0;
+        my $str = $seq->seq;
+        my $len = $seq->length;
+        my $ch;
+        my $id = $seq->display_id;
+        while ( $i < $len ) {
+            $ch = substr( $str, $i, 1 );
+            $cols[ $i++ ]->{$id} = ( $ch =~ m/[$gapchar]/ );
+        }
+    }
+    return \@cols;
+}
+>>>>>>> upstream/master
+
+=head2 match
+
+ Title     : match()
+ Usage     : $aln->match()
+ Function  : Goes through all columns and changes residues that are
+             identical to residue in first sequence to match '.'
+             character. Sets match_char.
+
+             USE WITH CARE: Most MSA formats do not support match
+             characters in sequences, so this is mostly for output
+             only. NEXUS format (Bio::AlignIO::nexus) can handle
+             it.
+ Returns   : 1
+ Argument  : a match character, optional, defaults to '.'
+             If the character is defined, it will reset $aln->match_char 
+
+=cut
+
+sub match {
+    my ($self, $match) = @_;
+
+   if(defined $match) {
+		$self->match_char($match);
+	}
+	else {
+		$match=$self->match_char();
+	}
+    
+    my ($matching_char) = $match;
+    $matching_char = "\\$match" if $match =~ /[\^.$|()\[\]]/ ;  #';
+    $self->map_chars($matching_char, '-');
+
+    my @seqs = $self->next_Seq();
+    return 1 unless scalar @seqs > 1;
+
+    my $refseq = shift @seqs ;
+    my @refseq = split //, $refseq->seq;
+    my $gapchar = $self->gap_char;
+
+    foreach my $seq ( @seqs ) {
+	my @varseq = split //, $seq->seq();
+	for ( my $i=0; $i < scalar @varseq; $i++) {
+	    $varseq[$i] = $match if defined $refseq[$i] &&
+		( $refseq[$i] =~ /[A-Za-z\*]/ ||
+		  $refseq[$i] =~ /$gapchar/ )
+		      && $refseq[$i] eq $varseq[$i];
+	}
+	$seq->seq(join '', @varseq);
+    }
+    
+    return 1;
+}
+
+
+=head2 unmatch
+
+ Title     : unmatch()
+ Usage     : $ali->unmatch()
+ Function  : Undoes the effect of method match. Unsets match_char.
+ Returns   : 1
+ Argument  : a match character, optional, defaults to '.'
+
+See L<match> and L<match_char>
+
+=cut
+
+sub unmatch {
+    my ($self, $match) = @_;
+
+   if(defined $match) {
+		$self->match_char($match);
+	}
+	else {
+		$match=$self->match_char();
+	}
+
+    my @seqs = $self->next_Seq();
+    return 1 unless scalar @seqs > 1;
+
+    my $refseq = shift @seqs ;
+    my @refseq = split //, $refseq->seq;
+    my $gapchar = $self->gap_char;
+    foreach my $seq ( @seqs ) {
+	my @varseq = split //, $seq->seq();
+	for ( my $i=0; $i < scalar @varseq; $i++) {
+	    $varseq[$i] = $refseq[$i] if defined $refseq[$i] &&
+		( $refseq[$i] =~ /[A-Za-z\*]/ ||
+		  $refseq[$i] =~ /$gapchar/ ) &&
+		      $varseq[$i] eq $match;
+	}
+	$seq->seq(join '', @varseq);
+    }
+    $self->match_char('');
+    return 1;
+}
+
+
+=head1 Consensus sequences
+
+Methods to calculate consensus sequences for the MSA
+
+=head2 consensus_string
+
+ Title     : consensus_string
+ Usage     : $str = $ali->consensus_string($threshold_percent)
+ Function  : Makes a strict consensus
+ Returns   : Consensus string
+ Argument  : Optional treshold ranging from 0 to 100.
+             The consensus residue has to appear at least threshold %
+             of the sequences at a given location, otherwise a '?'
+             character will be placed at that location.
+             (Default value = 0%)
+
+=cut
+
+sub consensus_string {
+    my $self = shift;
+    my $threshold = shift;
+
+    my $out = "";
+    my $len = $self->length - 1;
+
+    foreach ( 0 .. $len ) {
+	$out .= $self->_consensus_aa($_,$threshold);
+    }
+    return $out;
+}
+
+sub _consensus_aa {
+    my $self = shift;
+    my $point = shift;
+    my $threshold_percent = shift || -1 ;
+    my ($seq,%hash,$count,$letter,$key);
+    my $gapchar = $self->gap_char;
+    foreach $seq ( $self->next_Seq() ) {
+	$letter = substr($seq->seq,$point,1);
+	$self->throw("--$point-----------") if $letter eq '';
+	($letter eq $gapchar || $letter =~ /\./) && next;
+	# print "Looking at $letter\n";
+	$hash{$letter}++;
+    }
+    my $number_of_sequences = $self->num_sequences();
+    my $threshold = $number_of_sequences * $threshold_percent / 100. ;
+    $count = -1;
+    $letter = '?';
+
+    foreach $key ( sort keys %hash ) {
+	# print "Now at $key $hash{$key}\n";
+	if( $hash{$key} > $count && $hash{$key} >= $threshold) {
+	    $letter = $key;
+	    $count = $hash{$key};
+	}
+    }
+    return $letter;
+}
+
+=head2 consensus_conservation
+
+ Title     : consensus_conservation
+ Usage     : @conservation = $ali->consensus_conservation();
+ Function  : Conservation (as a percent) of each position of alignment
+ Returns   : Array of percentages [0-100]. Gap columns are 0% conserved.
+ Argument  : 
+ 
+=cut
+
+sub consensus_conservation {
+    my $self = shift;
+    my @cons;
+    my $num_sequences = $self->num_sequences;
+    foreach my $point (0..$self->length-1) {
+        my %hash = $self->_consensus_counts($point);
+        # max frequency of a non-gap letter
+        my $max = (sort {$b<=>$a} values %hash )[0];
+        push @cons, 100 * $max / $num_sequences;
+    }
+    return @cons; 
+}
+
+# Frequency of each letter in one column
+sub _consensus_counts {
+    my $self = shift;
+    my $point = shift;
+    my %hash;
+    my $gapchar = $self->gap_char;
+    foreach my $seq ( $self->next_Seq() ) {
+        my $letter = substr($seq->seq,$point,1);
+        $self->throw("--$point-----------") if $letter eq '';
+        ($letter eq $gapchar || $letter =~ /\./) && next;
+        $hash{$letter}++;
+    }
+    return %hash;
+}
+
+=head2 consensus_iupac
+
+ Title     : consensus_iupac
+ Usage     : $str = $ali->consensus_iupac()
+ Function  : Makes a consensus using IUPAC ambiguity codes from DNA
+             and RNA. The output is in upper case except when gaps in
+             a column force output to be in lower case.
+
+             Note that if your alignment sequences contain a lot of
+             IUPAC ambiquity codes you often have to manually set
+             alphabet.  Bio::PrimarySeq::_guess_type thinks they
+             indicate a protein sequence.
+ Returns   : consensus string
+ Argument  : none
+ Throws    : on protein sequences
+
+=cut
+
+sub consensus_iupac {
+    my $self = shift;
+    my $out = "";
+    my $len = $self->length-1;
+
+    # only DNA and RNA sequences are valid
+    foreach my $seq ( $self->next_Seq() ) {
+	$self->throw("Seq [". $seq->get_nse. "] is a protein")
+	    if $seq->alphabet eq 'protein';
+    }
+    # loop over the alignment columns
+    foreach my $count ( 0 .. $len ) {
+	$out .= $self->_consensus_iupac($count);
+    }
+    return $out;
+}
+
+sub _consensus_iupac {
+    my ($self, $column) = @_;
+    my ($string, $char, $rna);
+
+    #determine all residues in a column
+    foreach my $seq ( $self->next_Seq() ) {
+	$string .= substr($seq->seq, $column, 1);
+    }
+    $string = uc $string;
+
+    # quick exit if there's an N in the string
+    if ($string =~ /N/) {
+	$string =~ /\W/ ? return 'n' : return 'N';
+    }
+    # ... or if there are only gap characters
+    return '-' if $string =~ /^\W+$/;
+
+    # treat RNA as DNA in regexps
+    if ($string =~ /U/) {
+	$string =~ s/U/T/;
+	$rna = 1;
+    }
+
+    # the following s///'s only need to be done to the _first_ ambiguity code
+    # as we only need to see the _range_ of characters in $string
+
+    if ($string =~ /[VDHB]/) {
+	$string =~ s/V/AGC/;
+	$string =~ s/D/AGT/;
+	$string =~ s/H/ACT/;
+	$string =~ s/B/CTG/;
+    }
+
+    if ($string =~ /[SKYRWM]/) {
+	$string =~ s/S/GC/;
+	$string =~ s/K/GT/;
+	$string =~ s/Y/CT/;
+	$string =~ s/R/AG/;
+	$string =~ s/W/AT/;
+	$string =~ s/M/AC/;
+    }
+
+    # and now the guts of the thing
+
+    if ($string =~ /A/) {
+        $char = 'A';                     # A                      A
+        if ($string =~ /G/) {
+            $char = 'R';                 # A and G (purines)      R
+            if ($string =~ /C/) {
+                $char = 'V';             # A and G and C          V
+                if ($string =~ /T/) {
+                    $char = 'N';         # A and G and C and T    N
+                }
+            } elsif ($string =~ /T/) {
+                $char = 'D';             # A and G and T          D
+            }
+        } elsif ($string =~ /C/) {
+            $char = 'M';                 # A and C                M
+            if ($string =~ /T/) {
+                $char = 'H';             # A and C and T          H
+            }
+        } elsif ($string =~ /T/) {
+            $char = 'W';                 # A and T                W
+        }
+    } elsif ($string =~ /C/) {
+        $char = 'C';                     # C                      C
+        if ($string =~ /T/) {
+            $char = 'Y';                 # C and T (pyrimidines)  Y
+            if ($string =~ /G/) {
+                $char = 'B';             # C and T and G          B
+            }
+        } elsif ($string =~ /G/) {
+            $char = 'S';                 # C and G                S
+        }
+    } elsif ($string =~ /G/) {
+        $char = 'G';                     # G                      G
+        if ($string =~ /C/) {
+            $char = 'S';                 # G and C                S
+        } elsif ($string =~ /T/) {
+            $char = 'K';                 # G and T                K
+        }
+    } elsif ($string =~ /T/) {
+        $char = 'T';                     # T                      T
+    }
 
-    splice(@bases, $_, 1) foreach @gaps;
-    $seq->seq(join('', @bases));
-  }
+    $char = 'U' if $rna and $char eq 'T';
+    $char = lc $char if $string =~ /\W/;
 
-  1;
+    return $char;
 }
 
-=head2 map_chars
 
- Title     : map_chars
- Usage     : $ali->map_chars('\.','-')
- Function  : Does a s/$arg1/$arg2/ on the sequences. Useful for gap
-             characters.
+=head2 consensus_meta
 
-             Note that the first argument is interpreted as a regexp
-             so be careful and escape any wild card characters (e.g.
-             do $ali->map_chars('\.','-') to replace periods with dashes.
- Returns   :
- Argument  : A regexp and a string
+ Title     : consensus_meta
+ Usage     : $seqmeta = $ali->consensus_meta()
+ Function  : Returns a Bio::Seq::Meta object containing the consensus
+             strings derived from meta data analysis.
+ Returns   : Bio::Seq::Meta 
+ Argument  : Bio::Seq::Meta 
+ Throws    : non-MetaI object
 
 =cut
 
-sub map_chars {
-    my $self = shift;
-    my $from = shift;
-    my $to   = shift;
-    my ( $seq, $temp );
-
-    $self->throw("Need two arguments: a regexp and a string")
-      unless defined $from and defined $to;
-
-    foreach $seq ( $self->each_seq() ) {
-        $temp = $seq->seq();
-        $temp =~ s/$from/$to/g;
-        $seq->seq($temp);
+sub consensus_meta {
+    my ($self, $meta) = @_;
+    if ($meta && (!ref $meta || !$meta->isa('Bio::Seq::MetaI'))) {
+        $self->throw('Not a Bio::Seq::MetaI object');
     }
-    return 1;
+    return $self->{'_aln_meta'} = $meta if $meta;
+    return $self->{'_aln_meta'} 
 }
 
 
-=head2 uppercase
+=head2 bracket_string
 
- Title     : uppercase()
- Usage     : $ali->uppercase()
- Function  : Sets all the sequences to uppercase
- Returns   :
- Argument  :
+ Title     : bracket_string
+ Usage     : my @params = (-refseq     => 'testseq',
+                           -allele1    => 'allele1',
+                           -allele2    => 'allele2',
+                           -delimiters => '{}',
+                           -separator  => '/');
+             $str = $aln->bracket_string(@params)
 
-=cut
+ Function :  When supplied with a list of parameters (see below), returns a
+             string in BIC format. This is used for allelic comparisons.
+             Briefly, if either allele contains a base change when compared to
+             the refseq, the base or gap for each allele is represented in
+             brackets in the order present in the 'alleles' parameter.
 
-sub uppercase {
-    my $self = shift;
-    my $seq;
-    my $temp;
+             For the following data:
 
-    foreach $seq ( $self->each_seq() ) {
-      $temp = $seq->seq();
-      $temp =~ tr/[a-z]/[A-Z]/;
+             >testseq
+             GGATCCATTGCTACT
+             >allele1
+             GGATCCATTCCTACT
+             >allele2
+             GGAT--ATTCCTCCT
 
-      $seq->seq($temp);
+             the returned string with parameters 'refseq => testseq' and
+             'alleles => [qw(allele1 allele2)]' would be:
+
+             GGAT[C/-][C/-]ATT[C/C]CT[A/C]CT
+ Returns   : BIC-formatted string
+ Argument  : Required args
+                refseq    : string (ID) of the reference sequence used
+                            as basis for comparison
+                allele1   : string (ID) of the first allele
+                allele2   : string (ID) of the second allele
+             Optional args
+                delimiters: two symbol string of left and right delimiters.
+                            Only the first two symbols are used
+                            default = '[]'
+                separator : string used as a separator.  Only the first
+                            symbol is used
+                            default = '/'
+ Throws    : On no refseq/alleles, or invalid refseq/alleles.
+
+=cut
+
+sub bracket_string {
+    my ($self, @args) = @_;
+    my ($ref, $a1, $a2, $delim, $sep) =
+        $self->_rearrange([qw(refseq allele1 allele2 delimiters separator)], @args);
+    $self->throw('Missing refseq/allele1/allele2') if (!$a1 || !$a2 || !$ref);
+    my ($ld, $rd);
+    ($ld, $rd) = split('', $delim, 2) if $delim;
+    $ld ||= '[';
+    $rd ||= ']';
+    $sep ||= '/';
+    my ($refseq, $allele1, $allele2) =
+        map {( $self->next_Seq_with_id($_) )} ($ref, $a1, $a2);
+    if (!$refseq || !$allele1 || !$allele2) {
+        $self->throw("One of your refseq/allele IDs is invalid!");
     }
-    return 1;
+    my $len = $self->length-1;
+    my $bic = '';
+    # loop over the alignment columns
+    for my $column ( 0 .. $len ) {
+        my $string;
+        my ($compres, $res1, $res2) =
+            map{substr($_->seq, $column, 1)} ($refseq, $allele1, $allele2);
+        # are any of the allele symbols different from the refseq?
+        $string = ($compres eq $res1 && $compres eq $res2) ? $compres :
+                $ld.$res1.$sep.$res2.$rd;
+        $bic .= $string;
+    }
+    return $bic;
 }
 
+
 =head2 cigar_line
 
  Title    : cigar_line()
@@ -1532,7 +2429,7 @@ sub cigar_line {
 
 	# create a precursor, something like (1,4,5,6,7,33,45),
 	# where each number corresponds to a conserved position
-	foreach my $seq ( $self->each_seq ) {
+	foreach my $seq ( $self->next_Seq ) {
 		my @seq = split "", uc ($seq->seq);
 		my $pos = 1;
 		for (my $x = 0 ; $x < $len ; $x++ ) {
@@ -1587,7 +2484,7 @@ sub cigar_line {
 =head2 match_line
 
  Title    : match_line()
- Usage    : $line = $align->match_line()
+ Usage    : $line = $aln->match_line()
  Function : Generates a match line - much like consensus string
             except that a line indicating the '*' for a match.
  Args     : (optional) Match line characters ('*' by default)
@@ -1607,7 +2504,7 @@ sub match_line {
 
 	my @seqchars;
 	my $alphabet;
-	foreach my $seq ( $self->each_seq ) {
+	foreach my $seq ( $self->next_Seq ) {
 		push @seqchars, [ split(//, uc ($seq->seq)) ];
 		$alphabet = $seq->alphabet unless defined $alphabet;
 	}
@@ -1676,7 +2573,7 @@ sub match_line {
 =head2 gap_line
 
  Title    : gap_line()
- Usage    : $line = $align->gap_line()
+ Usage    : $line = $aln->gap_line()
  Function : Generates a gap line - much like consensus string
             except that a line where '-' represents gap
  Args     : (optional) gap line characters ('-' by default)
@@ -1688,14 +2585,14 @@ sub gap_line {
     my ($self,$gapchar) = @_;
     $gapchar = $gapchar || $self->gap_char;
     my %gap_hsh; # column gaps vector
-    foreach my $seq ( $self->each_seq ) {
+    foreach my $seq ( $self->next_Seq ) {
 		my $i = 0;
-    	map {$gap_hsh{$_->[0]} = undef} grep {$_->[1] =~ m/[$gapchar]/}
+    	map {$gap_hsh{$_->[0]} = undef} grep {$_->[1] eq $gapchar}
 		  map {[$i++, $_]} split(//, uc ($seq->seq));
     }
     my $gap_line;
     foreach my $pos ( 0..$self->length-1 ) {
-	  $gap_line .= (exists $gap_hsh{$pos}) ? $self->gap_char:'.';
+	  $gap_line .= (exists $gap_hsh{$pos}) ? $gapchar:'.';
     }
     return $gap_line;
 }
@@ -1703,7 +2600,7 @@ sub gap_line {
 =head2 all_gap_line
 
  Title    : all_gap_line()
- Usage    : $line = $align->all_gap_line()
+ Usage    : $line = $aln->all_gap_line()
  Function : Generates a gap line - much like consensus string
             except that a line where '-' represents all-gap column
  Args     : (optional) gap line characters ('-' by default)
@@ -1715,17 +2612,17 @@ sub all_gap_line {
     my ($self,$gapchar) = @_;
     $gapchar = $gapchar || $self->gap_char;
     my %gap_hsh;		# column gaps counter hash
-    my @seqs = $self->each_seq;
+    my @seqs = $self->next_Seq;
     foreach my $seq ( @seqs ) {
 	my $i = 0;
-    	map {$gap_hsh{$_->[0]}++} grep {$_->[1] =~ m/[$gapchar]/}
+    	map {$gap_hsh{$_->[0]}++} grep {$_->[1] eq $gapchar}
 	map {[$i++, $_]} split(//, uc ($seq->seq));
     }
     my $gap_line;
     foreach my $pos ( 0..$self->length-1 ) {
 	if (exists $gap_hsh{$pos} && $gap_hsh{$pos} == scalar @seqs) {
             # gaps column
-	    $gap_line .= $self->gap_char;
+	    $gap_line .= $gapchar;
 	} else {
 	    $gap_line .= '.';
 	}
@@ -1736,118 +2633,35 @@ sub all_gap_line {
 =head2 gap_col_matrix
 
  Title    : gap_col_matrix()
- Usage    : my $cols = $align->gap_col_matrix()
- Function : Generates an array where each element in the array is a 
-            hash reference with a key of the sequence name and a
-            value of 1 if the sequence has a gap at that column
- Returns  : Reference to an array
- Args     : Optional: gap line character ($aln->gap_char or '-' by default)
+ Usage    : my $cols = $aln->gap_col_matrix()
+ Function : Generates an array of hashes where
+            each entry in the array is a hash reference
+            with keys of all the sequence names and
+            and value of 1 or 0 if the sequence has a gap at that column
+ Args     : (optional) gap line characters ($aln->gap_char or '-' by default)
 
 =cut
 
 sub gap_col_matrix {
-    my ( $self, $gapchar ) = @_;
+    my ($self,$gapchar) = @_;
     $gapchar = $gapchar || $self->gap_char;
-    my %gap_hsh;    # column gaps vector
+    my %gap_hsh; # column gaps vector
     my @cols;
-    foreach my $seq ( $self->each_seq ) {
-        my $i   = 0;
-        my $str = $seq->seq;
-        my $len = $seq->length;
-        my $ch;
-        my $id = $seq->display_id;
-        while ( $i < $len ) {
-            $ch = substr( $str, $i, 1 );
-            $cols[ $i++ ]->{$id} = ( $ch =~ m/[$gapchar]/ );
-        }
-    }
-    return \@cols;
-}
-
-=head2 match
-
- Title     : match()
- Usage     : $ali->match()
- Function  : Goes through all columns and changes residues that are
-             identical to residue in first sequence to match '.'
-             character. Sets match_char.
-
-             USE WITH CARE: Most MSA formats do not support match
-             characters in sequences, so this is mostly for output
-             only. NEXUS format (Bio::AlignIO::nexus) can handle
-             it.
- Returns   : 1
- Argument  : a match character, optional, defaults to '.'
-
-=cut
-
-sub match {
-    my ($self, $match) = @_;
-
-    $match ||= '.';
-    my ($matching_char) = $match;
-    $matching_char = "\\$match" if $match =~ /[\^.$|()\[\]]/ ;  #';
-    $self->map_chars($matching_char, '-');
-
-    my @seqs = $self->each_seq();
-    return 1 unless scalar @seqs > 1;
-
-    my $refseq = shift @seqs ;
-    my @refseq = split //, $refseq->seq;
-    my $gapchar = $self->gap_char;
-
-    foreach my $seq ( @seqs ) {
-	my @varseq = split //, $seq->seq();
-	for ( my $i=0; $i < scalar @varseq; $i++) {
-	    $varseq[$i] = $match if defined $refseq[$i] &&
-		( $refseq[$i] =~ /[A-Za-z\*]/ ||
-		  $refseq[$i] =~ /$gapchar/ )
-		      && $refseq[$i] eq $varseq[$i];
+    foreach my $seq ( $self->next_Seq ) {
+	my $i = 0;
+	my $str = $seq->seq;
+	my $len = $seq->length;
+	my $ch;
+	my $id = $seq->display_id;
+	while( $i < $len ) {
+	    $ch = substr($str, $i, 1);
+	    $cols[$i++]->{$id} = ($ch eq $gapchar);
 	}
-	$seq->seq(join '', @varseq);
     }
-    $self->match_char($match);
-    return 1;
+    return \@cols;
 }
 
 
-=head2 unmatch
-
- Title     : unmatch()
- Usage     : $ali->unmatch()
- Function  : Undoes the effect of method match. Unsets match_char.
- Returns   : 1
- Argument  : a match character, optional, defaults to '.'
-
-See L<match> and L<match_char>
-
-=cut
-
-sub unmatch {
-    my ($self, $match) = @_;
-
-    $match ||= '.';
-
-    my @seqs = $self->each_seq();
-    return 1 unless scalar @seqs > 1;
-
-    my $refseq = shift @seqs ;
-    my @refseq = split //, $refseq->seq;
-    my $gapchar = $self->gap_char;
-    foreach my $seq ( @seqs ) {
-	my @varseq = split //, $seq->seq();
-	for ( my $i=0; $i < scalar @varseq; $i++) {
-	    $varseq[$i] = $refseq[$i] if defined $refseq[$i] &&
-		( $refseq[$i] =~ /[A-Za-z\*]/ ||
-		  $refseq[$i] =~ /$gapchar/ ) &&
-		      $varseq[$i] eq $match;
-	}
-	$seq->seq(join '', @varseq);
-    }
-    $self->match_char('');
-    return 1;
-}
-
 =head1 MSA attributes
 
 Methods for setting and reading the MSA attributes.
@@ -1904,37 +2718,58 @@ sub accession {
  Returns   : An description string
  Argument  : An description string (optional)
 
-=cut
+=cut
+
+sub description {
+    my ($self, $name) = @_;
+
+    if (defined( $name )) {
+	$self->{'_description'} = $name;
+    }
+
+    return $self->{'_description'};
+}
+
+=head2 source
+
+ Title   : source
+ Usage   : $aln->source($newval)
+ Function: sets the Alignment source program
+ Example :
+ Returns : value of source
+ Args    : newvalue (optional)
+
 
-sub description {
-    my ($self, $name) = @_;
+=cut
 
-    if (defined( $name )) {
-	$self->{'_description'} = $name;
+sub source{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_source'} = $value;
     }
-
-    return $self->{'_description'};
+    return $self->{'_source'};
 }
 
 =head2 missing_char
 
  Title     : missing_char
- Usage     : $myalign->missing_char("?")
+ Usage     : $myalign->missing_char("&")
  Function  : Gets/sets the missing_char attribute of the alignment
              It is generally recommended to set it to 'n' or 'N'
              for nucleotides and to 'X' for protein.
  Returns   : An missing_char string,
- Argument  : An missing_char string (optional)
+ Argument  : An missing_char string (optional), default as '&'
 
 =cut
 
 sub missing_char {
     my ($self, $char) = @_;
-
-    if (defined $char ) {
-	$self->throw("Single missing character, not [$char]!") if CORE::length($char) > 1;
-	$self->{'_missing_char'} = $char;
-    }
+	
+	if (defined $char || ! defined $self->{'_missing_char'} ) {
+		$char= '&' unless defined $char;
+		$self->throw("Single gap character, not [$char]!") if CORE::length($char) > 1;
+		$self->{'_missing_char'} = $char;
+	}
 
     return $self->{'_missing_char'};
 }
@@ -1945,17 +2780,18 @@ sub missing_char {
  Usage     : $myalign->match_char('.')
  Function  : Gets/sets the match_char attribute of the alignment
  Returns   : An match_char string,
- Argument  : An match_char string (optional)
+ Argument  : An match_char string (optional), default as '.'
 
 =cut
 
 sub match_char {
     my ($self, $char) = @_;
 
-    if (defined $char ) {
-	$self->throw("Single match character, not [$char]!") if CORE::length($char) > 1;
-	$self->{'_match_char'} = $char;
-    }
+	if (defined $char || ! defined $self->{'_match_char'} ) {
+		$char= '.' unless defined $char;
+		$self->throw("Single gap character, not [$char]!") if CORE::length($char) > 1;
+		$self->{'_match_char'} = $char;
+	}
 
     return $self->{'_match_char'};
 }
@@ -1966,23 +2802,48 @@ sub match_char {
  Usage     : $myalign->gap_char('-')
  Function  : Gets/sets the gap_char attribute of the alignment
  Returns   : An gap_char string, defaults to '-'
- Argument  : An gap_char string (optional)
+ Argument  : An gap_char string (optional), default as '-'
 
 =cut
 
 sub gap_char {
     my ($self, $char) = @_;
 
-    if (defined $char || ! defined $self->{'_gap_char'} ) {
-	$char= '-' unless defined $char;
-	$self->throw("Single gap character, not [$char]!") if CORE::length($char) > 1;
-	$self->{'_gap_char'} = $char;
-    }
-    return $self->{'_gap_char'};
+	if (defined $char || ! defined $self->{'_gap_char'} ) {
+		$char= '-' unless defined $char;
+		$self->throw("Single gap character, not [$char]!") if CORE::length($char) > 1;
+		$self->{'_gap_char'} = $char;
+	}
+	return $self->{'_gap_char'};
 }
 
-=head2 symbol_chars
+=head2 mask_char
+
+ Title     : mask_char
+ Usage     : $aln->mask_char('?')
+ Function  : Gets/sets the mask_char attribute of the alignment
+ Returns   : An mask_char string,
+ Argument  : An mask_char string (optional), default as '?'
+
+=cut
+
+sub mask_char {
+	my ($self, $char) = @_;
+	
+	#may need to check whether $char is the same with _gap_char, _match_char and _missing_char
+	if (defined $char || ! defined $self->{'_mask_char'} ) {
+		$char= '?' unless defined $char;
+		$self->throw("Single mask character, not [$char]!") if CORE::length($char) > 1;
+		$self->{'_mask_char'} = $char;
+	}
+	return $self->{'_mask_char'};
+}		
+		
+		
 
+
+
+=head2 symbol_chars
  Title   : symbol_chars
  Usage   : my @symbolchars = $aln->symbol_chars;
  Function: Returns all the seen symbols (other than gaps)
@@ -1995,277 +2856,45 @@ sub symbol_chars{
    my ($self,$includeextra) = @_;
 
    unless ($self->{'_symbols'}) {
-       foreach my $seq ($self->each_seq) {
+       foreach my $seq ($self->next_Seq) {
            map { $self->{'_symbols'}->{$_} = 1; } split(//,$seq->seq);
        }
    }
    my %copy = %{$self->{'_symbols'}};
    if( ! $includeextra ) {
        foreach my $char ( $self->gap_char, $self->match_char,
-			  $self->missing_char) {
+			  $self->missing_char, $self->mask_char) {
 	   delete $copy{$char} if( defined $char );
        }
    }
    return keys %copy;
 }
 
-=head1 Alignment descriptors
-
-These read only methods describe the MSA in various ways.
-
-
-=head2 score
-
- Title     : score
- Usage     : $str = $ali->score()
- Function  : get/set a score of the alignment
- Returns   : a score for the alignment
- Argument  : an optional score to set
-
-=cut
-
-sub score {
-  my $self = shift;
-  $self->{score} = shift if @_;
-  return $self->{score};
-}
-
-=head2 consensus_string
-
- Title     : consensus_string
- Usage     : $str = $ali->consensus_string($threshold_percent)
- Function  : Makes a strict consensus
- Returns   : Consensus string
- Argument  : Optional threshold ranging from 0 to 100.
-             The consensus residue has to appear at least threshold %
-             of the sequences at a given location, otherwise a '?'
-             character will be placed at that location.
-             (Default value = 0%)
-
-=cut
-
-sub consensus_string {
-    my $self = shift;
-    my $threshold = shift;
-
-    my $out = "";
-    my $len = $self->length - 1;
-
-    foreach ( 0 .. $len ) {
-	$out .= $self->_consensus_aa($_,$threshold);
-    }
-    return $out;
-}
-
-=head2 consensus_conservation
-
- Title     : consensus_conservation
- Usage     : @conservation = $ali->consensus_conservation();
- Function  : Conservation (as a percent) of each position of alignment
- Returns   : Array of percentages [0-100]. Gap columns are 0% conserved.
- Argument  : 
- 
-=cut
-
-sub consensus_conservation {
-    my $self = shift;
-    my @cons;
-    my $num_sequences = $self->num_sequences;
-    foreach my $point (0..$self->length-1) {
-        my %hash = $self->_consensus_counts($point);
-        # max frequency of a non-gap letter
-        my $max = (sort {$b<=>$a} values %hash )[0];
-        push @cons, 100 * $max / $num_sequences;
-    }
-    return @cons; 
-}
-
-sub _consensus_aa {
-    my $self = shift;
-    my $point = shift;
-    my $threshold_percent = shift || -1 ;
-    my ($seq,%hash,$count,$letter,$key);
-    my $gapchar = $self->gap_char;
-    %hash = $self->_consensus_counts($point);
-    my $number_of_sequences = $self->num_sequences();
-    my $threshold = $number_of_sequences * $threshold_percent / 100. ;
-    $count = -1;
-    $letter = '?';
-
-    foreach $key ( sort keys %hash ) {
-	# print "Now at $key $hash{$key}\n";
-	if( $hash{$key} > $count && $hash{$key} >= $threshold) {
-	    $letter = $key;
-	    $count = $hash{$key};
-	}
-    }
-    return $letter;
-}
-
-# Frequency of each letter in one column
-sub _consensus_counts {
-    my $self = shift;
-    my $point = shift;
-    my %hash;
-    my $gapchar = $self->gap_char;
-    foreach my $seq ( $self->each_seq() ) {
-        my $letter = substr($seq->seq,$point,1);
-        $self->throw("--$point-----------") if $letter eq '';
-        ($letter eq $gapchar || $letter =~ /\./) && next;
-        $hash{$letter}++;
-    }
-    return %hash;
-}
-
-
-=head2 consensus_iupac
-
- Title     : consensus_iupac
- Usage     : $str = $ali->consensus_iupac()
- Function  : Makes a consensus using IUPAC ambiguity codes from DNA
-             and RNA. The output is in upper case except when gaps in
-             a column force output to be in lower case.
-
-             Note that if your alignment sequences contain a lot of
-             IUPAC ambiquity codes you often have to manually set
-             alphabet.  Bio::PrimarySeq::_guess_type thinks they
-             indicate a protein sequence.
- Returns   : consensus string
- Argument  : none
- Throws    : on protein sequences
-
-=cut
-
-sub consensus_iupac {
-    my $self = shift;
-    my $out = "";
-    my $len = $self->length-1;
-
-    # only DNA and RNA sequences are valid
-    foreach my $seq ( $self->each_seq() ) {
-	$self->throw("Seq [". $seq->get_nse. "] is a protein")
-	    if $seq->alphabet eq 'protein';
-    }
-    # loop over the alignment columns
-    foreach my $count ( 0 .. $len ) {
-	$out .= $self->_consensus_iupac($count);
-    }
-    return $out;
-}
-
-sub _consensus_iupac {
-    my ($self, $column) = @_;
-    my ($string, $char, $rna);
-
-    #determine all residues in a column
-    foreach my $seq ( $self->each_seq() ) {
-	$string .= substr($seq->seq, $column, 1);
-    }
-    $string = uc $string;
-
-    # quick exit if there's an N in the string
-    if ($string =~ /N/) {
-	$string =~ /\W/ ? return 'n' : return 'N';
-    }
-    # ... or if there are only gap characters
-    return '-' if $string =~ /^\W+$/;
-
-    # treat RNA as DNA in regexps
-    if ($string =~ /U/) {
-	$string =~ s/U/T/;
-	$rna = 1;
-    }
-
-    # the following s///'s only need to be done to the _first_ ambiguity code
-    # as we only need to see the _range_ of characters in $string
-
-    if ($string =~ /[VDHB]/) {
-	$string =~ s/V/AGC/;
-	$string =~ s/D/AGT/;
-	$string =~ s/H/ACT/;
-	$string =~ s/B/CTG/;
-    }
-
-    if ($string =~ /[SKYRWM]/) {
-	$string =~ s/S/GC/;
-	$string =~ s/K/GT/;
-	$string =~ s/Y/CT/;
-	$string =~ s/R/AG/;
-	$string =~ s/W/AT/;
-	$string =~ s/M/AC/;
-    }
-
-    # and now the guts of the thing
 
-    if ($string =~ /A/) {
-        $char = 'A';                     # A                      A
-        if ($string =~ /G/) {
-            $char = 'R';                 # A and G (purines)      R
-            if ($string =~ /C/) {
-                $char = 'V';             # A and G and C          V
-                if ($string =~ /T/) {
-                    $char = 'N';         # A and G and C and T    N
-                }
-            } elsif ($string =~ /T/) {
-                $char = 'D';             # A and G and T          D
-            }
-        } elsif ($string =~ /C/) {
-            $char = 'M';                 # A and C                M
-            if ($string =~ /T/) {
-                $char = 'H';             # A and C and T          H
-            }
-        } elsif ($string =~ /T/) {
-            $char = 'W';                 # A and T                W
-        }
-    } elsif ($string =~ /C/) {
-        $char = 'C';                     # C                      C
-        if ($string =~ /T/) {
-            $char = 'Y';                 # C and T (pyrimidines)  Y
-            if ($string =~ /G/) {
-                $char = 'B';             # C and T and G          B
-            }
-        } elsif ($string =~ /G/) {
-            $char = 'S';                 # C and G                S
-        }
-    } elsif ($string =~ /G/) {
-        $char = 'G';                     # G                      G
-        if ($string =~ /C/) {
-            $char = 'S';                 # G and C                S
-        } elsif ($string =~ /T/) {
-            $char = 'K';                 # G and T                K
-        }
-    } elsif ($string =~ /T/) {
-        $char = 'T';                     # T                      T
-    }
 
-    $char = 'U' if $rna and $char eq 'T';
-    $char = lc $char if $string =~ /\W/;
+=head1 Alignment descriptors
 
-    return $char;
-}
+These read only methods describe the MSA in various ways.
 
 
-=head2 consensus_meta
+=head2 score
 
- Title     : consensus_meta
- Usage     : $seqmeta = $ali->consensus_meta()
- Function  : Returns a Bio::Seq::Meta object containing the consensus
-             strings derived from meta data analysis.
- Returns   : Bio::Seq::Meta 
- Argument  : Bio::Seq::Meta 
- Throws    : non-MetaI object
+ Title     : score
+ Usage     : $str = $ali->score()
+ Function  : get/set a score of the alignment
+ Returns   : a score for the alignment
+ Argument  : an optional score to set
 
 =cut
 
-sub consensus_meta {
-    my ($self, $meta) = @_;
-    if ($meta && (!ref $meta || !$meta->isa('Bio::Seq::MetaI'))) {
-        $self->throw('Not a Bio::Seq::MetaI object');
-    }
-    return $self->{'_aln_meta'} = $meta if $meta;
-    return $self->{'_aln_meta'} 
+sub score {
+  my $self = shift;
+  $self->{score} = shift if @_;
+  return $self->{score};
 }
 
+
+
 =head2 is_flush
 
  Title     : is_flush
@@ -2273,7 +2902,7 @@ sub consensus_meta {
  Function  : Tells you whether the alignment
            : is flush, i.e. all of the same length
  Returns   : 1 or 0
- Argument  :
+ Argument  : None
 
 =cut
 
@@ -2283,7 +2912,7 @@ sub is_flush {
     my $length = (-1);
     my $temp;
 
-    foreach $seq ( $self->each_seq() ) {
+    foreach $seq ( $self->next_Seq() ) {
 	if( $length == (-1) ) {
 	    $length = CORE::length($seq->seq());
 	    next;
@@ -2311,13 +2940,13 @@ sub is_flush {
  Function  : Returns the maximum length of the alignment.
              To be sure the alignment is a block, use is_flush
  Returns   : Integer
- Argument  :
+ Argument  : None
 
 =cut
 
 sub length_aln {
     my $self = shift;
-    $self->deprecated("length_aln - deprecated method. Use length() instead.");
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"length_aln - deprecated method. Use length() instead.");
     $self->length(@_);
 }
 
@@ -2327,7 +2956,7 @@ sub length {
     my $length = -1;
     my $temp;
     
-    foreach $seq ( $self->each_seq() ) {
+    foreach $seq ( $self->next_Seq() ) {
         $temp = $seq->length();
         if( $temp > $length ) {
             $length = $temp;
@@ -2345,20 +2974,20 @@ sub length {
  Function  : Gets the maximum length of the displayname in the
              alignment. Used in writing out various MSA formats.
  Returns   : integer
- Argument  :
+ Argument  : None
 
 =cut
 
 sub maxname_length {
     my $self = shift;
-    $self->deprecated("maxname_length - deprecated method.".
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"maxname_length - deprecated method.".
 		      " Use maxdisplayname_length() instead.");
     $self->maxdisplayname_length();
 }
 
 sub maxnse_length {
     my $self = shift;
-    $self->deprecated("maxnse_length - deprecated method.".
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"maxnse_length - deprecated method.".
 		      " Use maxnse_length() instead.");
     $self->maxdisplayname_length();
 }
@@ -2368,7 +2997,7 @@ sub maxdisplayname_length {
     my $maxname = (-1);
     my ($seq,$len);
 
-    foreach $seq ( $self->each_seq() ) {
+    foreach $seq ( $self->next_Seq() ) {
 	$len = CORE::length $self->displayname($seq->get_nse());
 
 	if( $len > $maxname ) {
@@ -2397,7 +3026,7 @@ sub max_metaname_length {
     my ($seq,$len);
     
     # check seq meta first
-    for $seq ( $self->each_seq() ) {
+    for $seq ( $self->next_Seq() ) {
         next if !$seq->isa('Bio::Seq::MetaI' || !$seq->meta_names);
         for my $mtag ($seq->meta_names) {
             $len = CORE::length $mtag;
@@ -2427,7 +3056,7 @@ sub max_metaname_length {
  Usage     : $no = $ali->num_residues
  Function  : number of residues in total in the alignment
  Returns   : integer
- Argument  :
+ Argument  : None
  Note      : replaces no_residues() 
 
 =cut
@@ -2436,7 +3065,7 @@ sub num_residues {
     my $self = shift;
     my $count = 0;
 
-    foreach my $seq ($self->each_seq) {
+    foreach my $seq ($self->next_Seq) {
 	my $str = $seq->seq();
 
 	$count += ($str =~ s/[A-Za-z]//g);
@@ -2458,7 +3087,7 @@ sub num_residues {
 
 sub num_sequences {
     my $self = shift;
-    return scalar($self->each_seq);
+    return scalar($self->next_Seq);
 }
 
 
@@ -2470,41 +3099,35 @@ sub num_sequences {
            percentage identity of the alignment
  Returns : The average percentage identity of the alignment
  Args    : None
- Notes   : This method implemented by Kevin Howe calculates a figure that is
-           designed to be similar to the average pairwise identity of the
-           alignment (identical in the absence of gaps), without having to
-           explicitly calculate pairwise identities proposed by Richard Durbin.
-           Validated by Ewan Birney ad Alex Bateman.
+ Notes   : This method implemented by Kevin Howe calculates a figure that
+           is designed to be similar to the average pairwise identity of 
+           the alignment (identical in the absence of gaps), without having
+           to explicitly calculate pairwise identities proposed by Richard 
+           Durbin. Validated by Ewan Birney ad Alex Bateman.
 
 =cut
 
 sub average_percentage_identity{
    my ($self,@args) = @_;
 
-   my @alphabet = ('A','B','C','D','E','F','G','H','I','J','K','L','M',
-                   'N','O','P','Q','R','S','T','U','V','W','X','Y','Z');
-
+	my %alphabet = map {$_ => undef} qw (A C G T U B D E F H I J K L M N O P Q R S V W X Y Z);
+   
    my ($len, $total, $subtotal, $divisor, $subdivisor, @seqs, @countHashes);
 
    if (! $self->is_flush()) {
        $self->throw("All sequences in the alignment must be the same length");
    }
 
-   @seqs = $self->each_seq();
+   @seqs = $self->next_Seq();
    $len = $self->length();
 
    # load the each hash with correct keys for existence checks
 
-   for( my $index=0; $index < $len; $index++) {
-       foreach my $letter (@alphabet) {
-	   $countHashes[$index]->{$letter} = 0;
-       }
-   }
    foreach my $seq (@seqs)  {
        my @seqChars = split //, $seq->seq();
        for( my $column=0; $column < @seqChars; $column++ ) {
 	   my $char = uc($seqChars[$column]);
-	   if (exists $countHashes[$column]->{$char}) {
+	   if (exists $alphabet{$char}) {
 	       $countHashes[$column]->{$char}++;
 	   }
        }
@@ -2575,7 +3198,7 @@ sub overall_percentage_identity{
    my $len;
    my $total = 0; # number of positions with identical residues
    my @countHashes;
-   my @seqs = $self->each_seq;
+   my @seqs = $self->next_Seq;
    my $nof_seqs = scalar @seqs;
    my $aln_len = $self->length();
    for my $seq (@seqs)  {
@@ -2624,19 +3247,82 @@ sub overall_percentage_identity{
    return ($total / $len ) * 100.0;
 }
 
+=head2 pairwise_percentage_identity
+
+ Title     : pairwise_percentage_identity()
+ Usage     : @pairwiseiden=$ali->pairwise_percentage_identity(3)
+ Function  : Returns pairwise percentage identity of each sequence to the 
+             reference sequence(first sequence as default), or selected sequence
+ 				 See set_new_reference for information on reference sequence
+ Returns   : A list of percentage identity to the reference sequence
+ Argument  : A number for the position of the reference sequence or the 
+             sequence name of the reference sequence
+
+=cut
+
+sub pairwise_percentage_identity {
+	my ($self,$seqid)=@_;
 
+	my %alphabet = map {$_ => undef} qw (A C G T U B D E F H I J K L M N O P Q R S V W X Y Z);
 
-=head1 Alignment positions
+   my ($refseq, @seqs,@ids,@pairwise_iden);
+
+   if (! $self->is_flush()) {
+       $self->throw("All sequences in the alignment must be the same length");
+   }
+
+	foreach my $seq ( $self->next_Seq() ) {
+		push @seqs, $seq;
+		push @ids, $seq->display_id;
+	}
+
+   # load the each hash with correct keys for existence checks
+   if(defined($seqid)) {
+   	my $is_num=0;
+	   if ($seqid =~ /^\d+$/) { # argument is seq position
+			$is_num=1;
+			$self->throw("The new reference sequence number has to be a positive integer >=1 and <= num_sequences ") if ($seqid < 1 || $seqid > $self->num_sequences);
+		}
+		else { # argument is a seq name
+			$self->throw("The new reference sequence not in alignment ") unless &_in_aln($seqid, \@ids);
+	   }
+		#return the requested seq as the reference seq in the pairwise comparison		
+		for (my $i=0; $i<=$#seqs; $i++) {
+			my $pos=$i+1;
+			if ( ($is_num && $pos == $seqid) || ($seqid eq $seqs[$i]->display_id) ) {
+				$refseq= $seqs[$i];
+				last;
+			}
+		}
+	}
+	else {
+		$refseq=$seqs[0];
+	}
+	
+	#calculate the length of the reference sequence
+   my @refseqchars=split //, uc($refseq->seq());
+   my $reflength=$refseq->seq()=~tr/A-Za-z//;;
 
-Methods to map a sequence position into an alignment column and back.
-column_from_residue_number() does the former. The latter is really a
-property of the sequence object and can done using
-L<Bio::LocatableSeq::location_from_column>:
+   if($reflength==0) {
+   	$self->throw("The reference sequence should be non-zero length ");
+   }
+   
+   #calculate the pairwise identity
+   foreach my $seq (@seqs)  {
+   	my $idcount=0;
+		my @seqChars = split //, $seq->seq();
+		for( my $column=0; $column < @seqChars; $column++ ) {
+			my $char = uc($seqChars[$column]);
+			if(exists $alphabet{$char} && exists $alphabet{$refseqchars[$column]} && $char eq $refseqchars[$column]) {
+				$idcount++;
+			}
+		}
+		push @pairwise_iden,$idcount/$reflength;
+	}
+	
+	return @pairwise_iden;
+}
 
-    # select somehow a sequence from the alignment, e.g.
-    my $seq = $aln->get_seq_by_pos(1);
-    #$loc is undef or Bio::LocationI object
-    my $loc = $seq->location_from_column(5);
 
 =head2 column_from_residue_number
 
@@ -2677,7 +3363,7 @@ sub column_from_residue_number {
     $self->throw("No sequence with name [$name]") unless $self->{'_start_end_lists'}->{$name};
     $self->throw("Second argument residue number missing") unless $resnumber;
 
-    foreach my $seq ($self->each_seq_with_id($name)) {
+    foreach my $seq ($self->next_Seq_with_id($name)) {
 	my $col;
 	eval {
 	    $col = $seq->column_from_residue_number($resnumber);
@@ -2691,6 +3377,7 @@ sub column_from_residue_number {
 
 }
 
+
 =head1 Sequence names
 
 Methods to manipulate the display name. The default name based on the
@@ -2710,13 +3397,13 @@ ways.
 
 sub get_displayname {
     my $self = shift;
-    $self->deprecated("get_displayname - deprecated method. Use displayname() instead.");
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"get_displayname - deprecated method. Use displayname() instead.");
     $self->displayname(@_);
 }
 
 sub set_displayname {
     my $self = shift;
-    $self->deprecated("set_displayname - deprecated method. Use displayname() instead.");
+    $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"set_displayname - deprecated method. Use displayname() instead.");
     $self->displayname(@_);
 }
 
@@ -2744,7 +3431,7 @@ sub displayname {
  Function  : Sets the names to be name_# where # is the number of
              times this name has been used.
  Returns   : 1, on success
- Argument  :
+ Argument  : None
 
 =cut
 
@@ -2780,7 +3467,7 @@ sub set_displayname_count {
  Function  : Makes all the sequences be displayed as just their name,
              not name/start-end
  Returns   : 1
- Argument  :
+ Argument  : None
 
 =cut
 
@@ -2788,7 +3475,7 @@ sub set_displayname_flat {
     my $self = shift;
     my ($nse,$seq);
 
-    foreach $seq ( $self->each_seq() ) {
+    foreach $seq ( $self->next_Seq() ) {
 	$nse = $seq->get_nse();
 	$self->displayname($nse,$seq->id());
     }
@@ -2801,7 +3488,7 @@ sub set_displayname_flat {
  Usage     : $ali->set_displayname_normal()
  Function  : Makes all the sequences be displayed as name/start-end
  Returns   : 1, on success
- Argument  :
+ Argument  : None
 
 =cut
 
@@ -2809,33 +3496,13 @@ sub set_displayname_normal {
     my $self = shift;
     my ($nse,$seq);
 
-    foreach $seq ( $self->each_seq() ) {
+    foreach $seq ( $self->next_Seq() ) {
 	$nse = $seq->get_nse();
 	$self->displayname($nse,$nse);
     }
     return 1;
 }
 
-=head2 source
-
- Title   : source
- Usage   : $obj->source($newval)
- Function: sets the Alignment source program
- Example :
- Returns : value of source
- Args    : newvalue (optional)
-
-
-=cut
-
-sub source{
-   my ($self,$value) = @_;
-   if( defined $value) {
-      $self->{'_source'} = $value;
-    }
-    return $self->{'_source'};
-}
-
 =head2 set_displayname_safe
 
  Title     : set_displayname_safe
@@ -2856,7 +3523,7 @@ sub set_displayname_safe {
     my ($seq, %phylip_name);
     my $ct=0;
     my $new=Bio::SimpleAlign->new();
-    foreach $seq ( $self->each_seq() ) {
+    foreach $seq ( $self->next_Seq() ) {
 	$ct++;
 	my $pname="S". sprintf "%0" . ($idlength-1) . "s", $ct;
 	$phylip_name{$pname}=$seq->id();
@@ -2866,7 +3533,7 @@ sub set_displayname_safe {
 					    -start    => $seq->{_start},
 					    -end      => $seq->{_end}
 					    );
-	$new->add_seq($new_seq);
+	$new->add_Seq($new_seq);
     }
 
     $self->debug("$ct seq names changed. Restore names by using restore_displayname.");
@@ -2889,7 +3556,7 @@ sub restore_displayname {
     my $ref=shift;
     my %name=%$ref;
     my $new=Bio::SimpleAlign->new();
-    foreach my $seq ( $self->each_seq() ) {
+    foreach my $seq ( $self->next_Seq() ) {
       $self->throw("No sequence with name") unless defined $name{$seq->id()};
       my $new_seq= Bio::LocatableSeq->new(-id       => $name{$seq->id()},
 					  -seq      => $seq->seq(),
@@ -2897,123 +3564,11 @@ sub restore_displayname {
 					  -start    => $seq->{_start},
 					  -end      => $seq->{_end}
 					  );
-      $new->add_seq($new_seq);
+      $new->add_Seq($new_seq);
     }
     return $new;
 }
 
-=head2 sort_by_start
-
- Title     : sort_by_start
- Usage     : $ali->sort_by_start
- Function  : Changes the order of the alignment to the start position of each
-             subalignment    
- Returns   :
- Argument  :
-
-=cut
-
-sub sort_by_start {
-    my $self = shift;
-    my ($seq,$nse,@arr,%hash,$count);
-    foreach $seq ( $self->each_seq() ) {
-        $nse = $seq->get_nse;
-        $hash{$nse} = $seq;
-    }
-    $count = 0;
-    %{$self->{'_order'}} = (); # reset the hash;
-    foreach $nse ( sort _startend keys %hash) {
-        $self->{'_order'}->{$count} = $nse;
-        $count++;
-    }
-    1;
-}
-
-sub _startend
-{
-    my ($aname,$arange) = split (/[\/]/,$a);
-    my ($bname,$brange) = split (/[\/]/,$b);
-    my ($astart,$aend) = split(/\-/,$arange);
-    my ($bstart,$bend) = split(/\-/,$brange);
-    return $astart <=> $bstart;
-}
-
-=head2 bracket_string
-
- Title     : bracket_string
- Usage     : my @params = (-refseq     => 'testseq',
-                           -allele1    => 'allele1',
-                           -allele2    => 'allele2',
-                           -delimiters => '{}',
-                           -separator  => '/');
-             $str = $aln->bracket_string(@params)
-
- Function :  When supplied with a list of parameters (see below), returns a
-             string in BIC format. This is used for allelic comparisons.
-             Briefly, if either allele contains a base change when compared to
-             the refseq, the base or gap for each allele is represented in
-             brackets in the order present in the 'alleles' parameter.
-
-             For the following data:
-
-             >testseq
-             GGATCCATTGCTACT
-             >allele1
-             GGATCCATTCCTACT
-             >allele2
-             GGAT--ATTCCTCCT
-
-             the returned string with parameters 'refseq => testseq' and
-             'alleles => [qw(allele1 allele2)]' would be:
-
-             GGAT[C/-][C/-]ATT[C/C]CT[A/C]CT
- Returns   : BIC-formatted string
- Argument  : Required args
-                refseq    : string (ID) of the reference sequence used
-                            as basis for comparison
-                allele1   : string (ID) of the first allele
-                allele2   : string (ID) of the second allele
-             Optional args
-                delimiters: two symbol string of left and right delimiters.
-                            Only the first two symbols are used
-                            default = '[]'
-                separator : string used as a separator.  Only the first
-                            symbol is used
-                            default = '/'
- Throws    : On no refseq/alleles, or invalid refseq/alleles.
-
-=cut
-
-sub bracket_string {
-    my ($self, @args) = @_;
-    my ($ref, $a1, $a2, $delim, $sep) =
-        $self->_rearrange([qw(refseq allele1 allele2 delimiters separator)], @args);
-    $self->throw('Missing refseq/allele1/allele2') if (!$a1 || !$a2 || !$ref);
-    my ($ld, $rd);
-    ($ld, $rd) = split('', $delim, 2) if $delim;
-    $ld ||= '[';
-    $rd ||= ']';
-    $sep ||= '/';
-    my ($refseq, $allele1, $allele2) =
-        map {( $self->each_seq_with_id($_) )} ($ref, $a1, $a2);
-    if (!$refseq || !$allele1 || !$allele2) {
-        $self->throw("One of your refseq/allele IDs is invalid!");
-    }
-    my $len = $self->length-1;
-    my $bic = '';
-    # loop over the alignment columns
-    for my $column ( 0 .. $len ) {
-        my $string;
-        my ($compres, $res1, $res2) =
-            map{substr($_->seq, $column, 1)} ($refseq, $allele1, $allele2);
-        # are any of the allele symbols different from the refseq?
-        $string = ($compres eq $res1 && $compres eq $res2) ? $compres :
-                $ld.$res1.$sep.$res2.$rd;
-        $bic .= $string;
-    }
-    return $bic;
-}
-
 
 =head2 methods implementing Bio::FeatureHolderI
 
@@ -3127,18 +3682,6 @@ sub feature_count {
     }
 }
 
-=head2 get_all_SeqFeatures
-
- Title   : get_all_SeqFeatures
- Usage   : 
- Function: Get all SeqFeatures.
- Example :
- Returns : an array of Bio::SeqFeatureI implementing objects
- Args    : none
- Note    : Falls through to Bio::FeatureHolderI implementation.
-
-=cut
-
 =head2 methods for Bio::AnnotatableI
 
 AnnotatableI implementation to support sequence alignments which
@@ -3171,20 +3714,6 @@ sub annotation {
     return $obj->{'_annotation'};
 }
 
-=head1 Deprecated methods
-
-=cut
-
-=head2 no_residues
-
- Title     : no_residues
- Usage     : $no = $ali->no_residues
- Function  : number of residues in total in the alignment
- Returns   : integer
- Argument  :
- Note      : deprecated in favor of num_residues() 
-
-=cut
 
 sub no_residues {
 	my $self = shift;
@@ -3194,17 +3723,6 @@ sub no_residues {
     $self->num_residues(@_);
 }
 
-=head2 no_sequences
-
- Title     : no_sequences
- Usage     : $depth = $ali->no_sequences
- Function  : number of sequence in the sequence alignment
- Returns   : integer
- Argument  :
- Note      : deprecated in favor of num_sequences()
-
-=cut
-
 sub no_sequences {
 	my $self = shift;
 	$self->deprecated(-warn_version => 1.0069,
@@ -3213,66 +3731,6 @@ sub no_sequences {
     $self->num_sequences(@_);
 }
 
-=head2 mask_columns
-
- Title     : mask_columns
- Usage     : $aln2 = $aln->mask_columns(20,30)
- Function  : Masks a slice of the alignment inclusive of start and
-             end columns, and the first column in the alignment is denoted 1.
-             Mask beyond the length of the sequence does not do padding.
- Returns   : A Bio::SimpleAlign object
- Args      : Positive integer for start column, positive integer for end column,
-             optional string value use for the mask. Example:
-
-             $aln2 = $aln->mask_columns(20,30,'?')
- Note      : Masking must use a character that is not used for gaps or
-             frameshifts.  These can be adjusted using the relevant global
-             variables, but be aware these may be (uncontrollably) modified
-             elsewhere within BioPerl (see bug 2715)
-
-=cut
-
-sub mask_columns {
-    #based on slice(), but did not include the Bio::Seq::Meta sections as I was not sure what it is doing
-    my $self = shift;
-
-    my $nonres = $Bio::LocatableSeq::GAP_SYMBOLS.
-             $Bio::LocatableSeq::FRAMESHIFT_SYMBOLS;
-    
-    # coordinates are alignment-based, not sequence-based
-    my ($start, $end, $mask_char) = @_;
-    unless (defined $mask_char) { $mask_char = 'N' }
-
-    $self->throw("Mask start has to be a positive integer and less than ".
-                 "alignment length, not [$start]")
-      unless $start =~ /^\d+$/ && $start > 0 && $start <= $self->length;
-    $self->throw("Mask end has to be a positive integer and less than ".
-                 "alignment length, not [$end]")
-      unless $end =~ /^\d+$/ && $end > 0 && $end <= $self->length;
-    $self->throw("Mask start [$start] has to be smaller than or equal to ".
-                 "end [$end]") unless $start <= $end;
-    $self->throw("Mask character $mask_char has to be a single character ".
-                 "and not a gap or frameshift symbol")
-      unless CORE::length($mask_char) == 1 && $mask_char !~ m{$nonres};
-    
-    my $aln = $self->new;
-    $aln->id($self->id);
-    foreach my $seq ( $self->each_seq() ) {
-        my $new_seq = Bio::LocatableSeq->new(-id => $seq->id,
-         -alphabet => $seq->alphabet,
-         -strand  => $seq->strand,
-         -verbose => $self->verbose);
-        
-        # convert from 1-based alignment coords!
-        my $masked_string = substr($seq->seq, $start - 1, $end - $start + 1);
-        $masked_string =~ s{[^$nonres]}{$mask_char}g;
-        my $new_dna_string = substr($seq->seq,0,$start-1) . $masked_string . substr($seq->seq,$end);
-        $new_seq->seq($new_dna_string);
-        $aln->add_seq($new_seq);
-    }
-    return $aln;
-}
-
 
 
 1;

From 09545cc6f7fda052ccbd5bba60ce5add6ff1b642 Mon Sep 17 00:00:00 2001
From: Jun Yin <yinjun111@gmail.com>
Date: Tue, 4 Sep 2012 11:12:43 -0400
Subject: [PATCH 2/8] get_nse

---
 Bio/SimpleAlign.pm | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/Bio/SimpleAlign.pm b/Bio/SimpleAlign.pm
index 8dc666e02d..9572c92f5f 100644
--- a/Bio/SimpleAlign.pm
+++ b/Bio/SimpleAlign.pm
@@ -403,14 +403,7 @@ sub remove_LocatableSeq {
 	unless ref $seq && $seq->isa( 'Bio::LocatableSeq');
 
     $id = $seq->id();
-<<<<<<< HEAD
-    $start = $seq->start();
-    $end  = $seq->end();
-    $name = sprintf("%s/%d-%d",$id,$start,$end);
-    
-=======
     $name = $seq->get_nse;
->>>>>>> upstream/master
 
     if( !exists $self->{'_seq'}->{$name} ) {
 	$self->throw("Sequence $name does not exist in the alignment to remove!");

From 8738981518882e3260171b24f23f5300ba4a4618 Mon Sep 17 00:00:00 2001
From: Jun Yin <junyin@cwlmcd1.med.yale.edu>
Date: Wed, 12 Sep 2012 16:51:25 -0400
Subject: [PATCH 3/8] All contradicts fixed

---
 Bio/SimpleAlign.pm | 311 ++++++++++++++-------------------------------
 1 file changed, 94 insertions(+), 217 deletions(-)

diff --git a/Bio/SimpleAlign.pm b/Bio/SimpleAlign.pm
index 9572c92f5f..0cb49c3f43 100644
--- a/Bio/SimpleAlign.pm
+++ b/Bio/SimpleAlign.pm
@@ -499,7 +499,7 @@ sub remove_Seqs {
 sub purge {
     my ($self,@args) = @_;
     $self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"purge - deprecated method. Use remove_redundant_Seqs() instead.");
-    $self->remove_redundant_Seq(@args);
+    $self->remove_redundant_Seqs(@args);
 }
 
 sub remove_redundant_Seqs {
@@ -1114,21 +1114,26 @@ sub get_Seq_by_id {
  Title     : select_Seqs
  Usage     : $aln2 = $aln->select_Seqs([1,5..10,15]) # three first sequences
              $aln2 = $aln->select_Seqs(-selection=>[2..4,11..14],-toggle=>1) # toggle selection
+             $aln2 = $aln->select_Seqs(-selection=>[11..14,2..4],-nosort=>1) #no sort
  Function  : Creates a new alignment from a subset of
              sequences.  Numbering starts from 1.  Sequence positions
              larger than num_sequences() will thow an error.
  Returns   : a Bio::SimpleAlign object
  Args      : An reference list of positive integers for the selected 
-             sequences. An optional parameter can be defined to toggle the 
-             coordinate selection.
+             sequences. 
+             An optional parameter can be defined to toggle the coordinate selection. 
+             If nosort is defined, sequences in the alignment will be returned following 
+             the given order.
              
 
 =cut
 
 sub select_Seqs {
 	my $self=shift;
-	my ($sel, $toggle) = $self->_rearrange([qw(SELECTION TOGGLE)], @_);
-	@{$sel}=sort {$a<=>$b} @{$sel};
+	my ($sel, $toggle,$nosort) = $self->_rearrange([qw(SELECTION TOGGLE NOSORT)], @_);
+	unless($nosort) {
+		@{$sel}=sort {$a<=>$b} @{$sel};
+	}
 
    $self->throw("Select start has to be a positive integer, not [".$sel->[0]."]")
 	unless $sel->[0] =~ /^\d+$/ and $sel->[0] > 0;
@@ -1163,9 +1168,36 @@ sub select_noncont {
 	my $self = shift;
 	$self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"select_noncont - deprecated method. Use select_Seqs() instead.");
 	if($_[0] eq 'nosort') {
-    	shift;
+		shift @_;
+    	$self->select_Seqs(-selection=>[@_],-nosort=>1);
    }
-   $self->select_Seqs([@_]);
+   else {
+	   $self->select_Seqs([@_]);
+	}
+}
+
+
+=head2 select_noncont_by_name
+
+ Title     : select_noncont_by_name
+ Usage     : my $aln2 = $aln->select_noncont_by_name('A123', 'B456');
+ Function  : Creates a new alignment from a subset of sequences which are
+             selected by name (sequence ID).
+ Returns   : a Bio::SimpleAlign object
+ Args      : array of names (i.e., identifiers) for the sequences.
+
+=cut
+
+sub select_noncont_by_name {
+    my ($self, @names) = @_;
+    
+    my $aln = $self->new;
+    foreach my $name (@names) {
+        $aln->add_seq($self->get_seq_by_id($name));
+    }
+    $aln->id($self->id);
+
+    return $aln;
 }
 
 =head2 select_columns
@@ -1191,53 +1223,11 @@ sub select_noncont {
 
 =cut
 
-<<<<<<< HEAD
+
 sub slice {
 	my $self = shift;
 	$self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"slice - deprecated method. Use select_columns() instead.");
 	$self->select_columns([$_[0]..$_[1]],0,defined($_[2])?$_[2]:0);	
-=======
-   return $seq;
-}
-
-
-=head1 Create new alignments
-
-The result of these methods are horizontal or vertical subsets of the
-current MSA.
-
-=head2 select
-
- Title     : select
- Usage     : $aln2 = $aln->select(1, 3) # three first sequences
- Function  : Creates a new alignment from a continuous subset of
-             sequences.  Numbering starts from 1.  Sequence positions
-             larger than num_sequences() will throw an error.
- Returns   : a Bio::SimpleAlign object
- Args      : positive integer for the first sequence
-             positive integer for the last sequence to include (optional)
-
-=cut
-
-sub select {
-    my $self = shift;
-    my ($start, $end) = @_;
-
-    $self->throw("Select start has to be a positive integer, not [$start]")
-	unless $start =~ /^\d+$/ and $start > 0;
-    $self->throw("Select end has to be a positive integer, not [$end]")
-	unless $end  =~ /^\d+$/ and $end > 0;
-    $self->throw("Select $start [$start] has to be smaller than or equal to end [$end]")
-	unless $start <= $end;
-
-    my $aln = $self->new;
-    foreach my $pos ($start .. $end) {
-	$aln->add_seq($self->get_seq_by_pos($pos));
-    }
-    $aln->id($self->id);
-    # fix for meta, sf, ann    
-    return $aln;
->>>>>>> upstream/master
 }
 
 
@@ -1249,66 +1239,37 @@ sub select_columns {
 		$sel=$self->_select_columns_by_type($sel);
 	}
 	
-<<<<<<< HEAD
 	@{$sel}=sort {$a<=$b} @{$sel};
-=======
-	my $aln = $self->new;
-	foreach my $p (@pos) {
-		$aln->add_seq($self->get_seq_by_pos($p));
-	}
-	$aln->id($self->id);
-    # fix for meta, sf, ann    
-	return $aln;
-}
-
-=head2 select_noncont_by_name
-
- Title     : select_noncont_by_name
- Usage     : my $aln2 = $aln->select_noncont_by_name('A123', 'B456');
- Function  : Creates a new alignment from a subset of sequences which are
-             selected by name (sequence ID).
- Returns   : a Bio::SimpleAlign object
- Args      : array of names (i.e., identifiers) for the sequences.
-
-=cut
-
-sub select_noncont_by_name {
-    my ($self, @names) = @_;
-    
-    my $aln = $self->new;
-    foreach my $name (@names) {
-        $aln->add_seq($self->get_seq_by_id($name));
-    }
-    $aln->id($self->id);
-
-    return $aln;
-}
-
-=head2 slice
-
- Title     : slice
- Usage     : $aln2 = $aln->slice(20,30)
- Function  : Creates a slice from the alignment inclusive of start and
-             end columns, and the first column in the alignment is denoted 1.
-             Sequences with no residues in the slice are excluded from the
-             new alignment and a warning is printed. Slice beyond the length of
-             the sequence does not do padding.
- Returns   : A Bio::SimpleAlign object
- Args      : Positive integer for start column, positive integer for end column,
-             optional boolean which if true will keep gap-only columns in the newly
-             created slice. Example:
->>>>>>> upstream/master
 
 	#warnings
-	$self->throw("select_columns start has to be a positive integer, not [".$sel->[0]."]")
-	  unless $sel->[0] =~ /^\d+$/ and $sel->[0] > 0;
-	$self->throw("select_columns end has to be a positive integer, not [".$sel->[$#$sel]."]")
-	  unless $sel->[$#$sel] =~ /^\d+$/ and $sel->[$#$sel] > 0;
+	#the old remove_columns used a 0-based coordinates, so if the first coordinate is 0, the first two coordinates will be added by 1.
+	unless($sel->[0] =~ /^\d+$/ and $sel->[0] > 0) {
+		if($sel->[0] == 0) {
+			$self->deprecated(-warn_version => 1.0060, -throw_version => 1.0080, -message =>"select_columns start is 1 based. It has to be a positive integer, not [".$sel->[0]."]");
+			$sel->[0]++;
+			$sel->[1]++;
+		}
+		else {
+			$self->throw("select_columns start has to be a positive integer, not [".$sel->[0]."]");
+		}
+	}
+
+	unless ($sel->[$#$sel] =~ /^\d+$/ and $sel->[$#$sel] > 0) {
+		if($sel->[$#$sel] == 0) {
+			$self->deprecated(-warn_version => 1.0060, -throw_version => 1.0080, -message =>"select_columns end is 1 based. It has to be a positive integer, not [".$sel->[$#$sel]."]");
+			$sel->[0]++;
+			$sel->[1]++;
+		}
+		else {
+			$self->throw("select_columns end has to be a positive integer, not [".$sel->[$#$sel]."]");
+		}
+	}
 	$self->throw("This alignment has only ". $self->length . " residues. select_columns start " .
 					 "[".$sel->[0]."] is too big.") if $sel->[0] > $self->length;
 
 	my $newcoords;
 	
+	#toggle the coordinate selection
 	if($toggle) {
 		$sel=_toggle_selection($sel,$self->length);
 	}
@@ -1601,19 +1562,7 @@ sub remove_gaps {
     # Do the matching to get the segments to remove
     my $removed_cols;
     while ($gap_line =~ m/[$del_char]/g) {
-<<<<<<< HEAD
         push @{$removed_cols}, pos($gap_line);
-=======
-        my $start = pos($gap_line)-1;
-        $gap_line =~ m/\G[$del_char]+/gc;
-        my $end = pos($gap_line)-1;
-
-        #have to offset the start and end for subsequent removes
-        $start-=$length;
-        $end  -=$length;
-        $length += ($end-$start+1);
-        push @remove, [$start,$end];
->>>>>>> upstream/master
     }
     #remove the segments
     $aln = $#$removed_cols >= 0 ? $self->select_columns($removed_cols,1) : $self;
@@ -1647,21 +1596,39 @@ sub splice_by_seq_pos{
 sub mask_columns {
     #based on slice(), but did not include the Bio::Seq::Meta sections as I was not sure what it is doing
 	my $self=shift;
-	my ($sel, $toggle) = $self->_rearrange([qw(SELECTION TOGGLE)], @_);
-	@{$sel}=sort {$a<=>$b} @{$sel};
+	my @args=@_;
+	my ($sel, $toggle);
+
+	#Exceptions
+	unless(ref($args[0])) {
+		$self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"mask_columns - column range must be in refrence.");
+		my ($start, $end, $mask_char) = @args;
+		$sel=[($start, $end)];
+    	if(defined $mask_char) { 
+    		$self->mask_char($mask_char);
+    	}
+    }
+	else {
+		($sel, $toggle) = $self->_rearrange([qw(SELECTION TOGGLE)], @_);
+	}
 	
-	my $nonres = join("",$self->gap_char, $self->match_char,$self->missing_char);
-	my $mask_char=$self->mask_char;
-   
+	@{$sel}=sort {$a<=>$b} @{$sel};
+
 	#Exceptions
+	
 	$self->throw("Mask start has to be a positive integer and less than ".
                  "alignment length, not [".$sel->[0]."]")
 	unless $sel->[0] =~ /^\d+$/ && $sel->[0] > 0 && $sel->[0] <= $self->length;	
 
    $self->throw("Mask end has to be a positive integer and less than ".
                  "alignment length, not [".$sel->[$#$sel]."]")
-	unless $sel->[$#$sel] =~ /^\d+$/ && $sel->[$#$sel] > 0 && $sel->[$#$sel] <= $self->length;                 
+	unless $sel->[$#$sel] =~ /^\d+$/ && $sel->[$#$sel] > 0 && $sel->[$#$sel] <= $self->length;
+
 	
+	my $nonres = join("",$self->gap_char, $self->match_char,$self->missing_char);
+	my $mask_char=$self->mask_char;
+   
+
 	#calculate the coords, and make it in a continous way
 	my @newcoords;
 	if($toggle) {
@@ -1806,17 +1773,10 @@ sub map_chars {
     $self->throw("Need two arguments: a regexp and a string")
       unless defined $from and defined $to;
 
-<<<<<<< HEAD
     foreach $seq ( $self->next_Seq() ) {
-	$temp = $seq->seq();
-	$temp =~ s/$from/$to/g;
-	$seq->seq($temp);
-=======
-    foreach $seq ( $self->each_seq() ) {
-        $temp = $seq->seq();
-        $temp =~ s/$from/$to/g;
-        $seq->seq($temp);
->>>>>>> upstream/master
+		$temp = $seq->seq();
+		$temp =~ s/$from/$to/g;
+		$seq->seq($temp);
     }
     return 1;
 }
@@ -1881,102 +1841,19 @@ sub lowercase {
 
 =cut
 
-<<<<<<< HEAD
 sub togglecase {
     my $self = shift;
     my $seq;
     my $temp;
-=======
-sub gap_line {
-    my ($self,$gapchar) = @_;
-    $gapchar = $gapchar || $self->gap_char;
-    my %gap_hsh; # column gaps vector
-    foreach my $seq ( $self->each_seq ) {
-		my $i = 0;
-    	map {$gap_hsh{$_->[0]} = undef} grep {$_->[1] =~ m/[$gapchar]/}
-		  map {[$i++, $_]} split(//, uc ($seq->seq));
-    }
-    my $gap_line;
-    foreach my $pos ( 0..$self->length-1 ) {
-	  $gap_line .= (exists $gap_hsh{$pos}) ? $self->gap_char:'.';
-    }
-    return $gap_line;
-}
-
-=head2 all_gap_line
-
- Title    : all_gap_line()
- Usage    : $line = $align->all_gap_line()
- Function : Generates a gap line - much like consensus string
-            except that a line where '-' represents all-gap column
- Args     : (optional) gap line characters ('-' by default)
- Returns  : string
->>>>>>> upstream/master
-
+    
     foreach $seq ( $self->next_Seq() ) {
-      $temp = $seq->seq();
-      $temp =~ tr/[A-Za-z]/[a-zA-Z]/;
-
-<<<<<<< HEAD
-      $seq->seq($temp);
-=======
-sub all_gap_line {
-    my ($self,$gapchar) = @_;
-    $gapchar = $gapchar || $self->gap_char;
-    my %gap_hsh;		# column gaps counter hash
-    my @seqs = $self->each_seq;
-    foreach my $seq ( @seqs ) {
-	my $i = 0;
-    	map {$gap_hsh{$_->[0]}++} grep {$_->[1] =~ m/[$gapchar]/}
-	map {[$i++, $_]} split(//, uc ($seq->seq));
-    }
-    my $gap_line;
-    foreach my $pos ( 0..$self->length-1 ) {
-	if (exists $gap_hsh{$pos} && $gap_hsh{$pos} == scalar @seqs) {
-            # gaps column
-	    $gap_line .= $self->gap_char;
-	} else {
-	    $gap_line .= '.';
-	}
->>>>>>> upstream/master
+		$temp = $seq->seq();
+		$temp =~ tr/[A-Za-z]/[a-zA-Z]/;    
+		$seq->seq($temp);
     }
     return 1;
 }
 
-<<<<<<< HEAD
-=======
-=head2 gap_col_matrix
-
- Title    : gap_col_matrix()
- Usage    : my $cols = $align->gap_col_matrix()
- Function : Generates an array where each element in the array is a 
-            hash reference with a key of the sequence name and a
-            value of 1 if the sequence has a gap at that column
- Returns  : Reference to an array
- Args     : Optional: gap line character ($aln->gap_char or '-' by default)
-
-=cut
-
-sub gap_col_matrix {
-    my ( $self, $gapchar ) = @_;
-    $gapchar = $gapchar || $self->gap_char;
-    my %gap_hsh;    # column gaps vector
-    my @cols;
-    foreach my $seq ( $self->each_seq ) {
-        my $i   = 0;
-        my $str = $seq->seq;
-        my $len = $seq->length;
-        my $ch;
-        my $id = $seq->display_id;
-        while ( $i < $len ) {
-            $ch = substr( $str, $i, 1 );
-            $cols[ $i++ ]->{$id} = ( $ch =~ m/[$gapchar]/ );
-        }
-    }
-    return \@cols;
-}
->>>>>>> upstream/master
-
 =head2 match
 
  Title     : match()

From 9a29065cc28c6e97690cfc7363d61bad7c956f81 Mon Sep 17 00:00:00 2001
From: Jun Yin <junyin@cwlmcd1.med.yale.edu>
Date: Wed, 12 Sep 2012 17:18:45 -0400
Subject: [PATCH 4/8] Reformat SimpleAlign2.t

---
 t/Align/SimpleAlign2.t | 879 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 879 insertions(+)
 create mode 100644 t/Align/SimpleAlign2.t

diff --git a/t/Align/SimpleAlign2.t b/t/Align/SimpleAlign2.t
new file mode 100644
index 0000000000..e1ca10b0f0
--- /dev/null
+++ b/t/Align/SimpleAlign2.t
@@ -0,0 +1,879 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id$
+
+use strict;
+
+BEGIN {
+    use lib '.';
+    use Bio::Root::Test;
+
+    test_begin( -tests => 214 );
+
+    use_ok('Bio::SimpleAlign');
+    use_ok('Bio::AlignIO');
+    use_ok('Bio::SeqFeature::Generic');
+    use_ok('Bio::Location::Simple');
+    use_ok('Bio::Location::Split');
+}
+
+my $DEBUG = test_debug();
+
+my ( $str, $aln, @seqs, $seq );
+
+$str = Bio::AlignIO->new( -file => test_input_file('testaln.pfam') );
+isa_ok( $str, 'Bio::AlignIO' );
+$aln = $str->next_aln();
+is $aln->get_Seq_by_pos(1)->get_nse, '1433_LYCES/9-246', "pfam input test";
+
+my $aln1 = $aln->remove_columns(['mismatch']);
+is(
+    $aln1->match_line,
+    '::*::::*:**:*:*:***:**.***::*.*::**::**:***..**:'
+      . '*:*.::::*:.:*.*.**:***.**:*.:.**::**.*:***********:::*:.:*:**.*::*:'
+      . '.*.:*:**:****************::',
+    'match_line'
+);
+
+my $aln2 = $aln->select_Seqs([1]);
+isa_ok( $aln2, 'Bio::Align::AlignI' );
+is( $aln2->num_sequences, 1, 'select_Seqs single selection' );
+
+# test select sequences
+$aln2 = $aln->select_Seqs([1..6,8,9,10]);
+is( $aln2->num_sequences, 9, 'select_Seqs multiple selection' );
+
+# test select sequences with parameter
+$aln2 = $aln->select_Seqs(-selection=>[1,2,3,4,5,6,7,8,9,10],-toggle=>0);
+is( $aln2->num_sequences, 10, 'select_Seqs parameter check' );
+
+# test remove_Seqs reverse selection
+$aln2->remove_Seqs([2..$aln2->num_sequences],1);
+is( $aln2->num_sequences, 9, 'remove_Seqs' );
+
+
+# test select non continuous-nosort option
+$aln2->remove_Seqs(-selection=>[1,3,5,7..9],-toggle=>0);
+is( $aln2->num_sequences, 3, 'remove_Seqs' );
+
+
+@seqs = $aln->next_Seq();
+is scalar @seqs, 16, 'each_seq';
+is $seqs[0]->get_nse, '1433_LYCES/9-246', 'get_nse';
+is $seqs[0]->id,      '1433_LYCES',       'id';
+is $seqs[0]->num_gaps, 3,                  'num_gaps';
+@seqs = $aln->next_alphabetically();
+is scalar @seqs, 16, 'each_alphabetically';
+
+is $aln->column_from_residue_number( '1433_LYCES', 10 ), 2,
+  'column_from_residue_number';
+is $aln->displayname( '1433_LYCES/9-246', 'my_seq' ), 'my_seq',
+  'display_name get/set';
+is $aln->displayname('1433_LYCES/9-246'), 'my_seq', 'display_name get';
+is substr( $aln->consensus_string(50), 0, 60 ),
+  "RE??VY?AKLAEQAERYEEMV??MK?VAE??????ELSVEERNLLSVAYKNVIGARRASW",
+  'consensus_string';
+is substr( $aln->consensus_string(100), 0, 60 ),
+  "?????????L????E????M???M????????????L??E?RNL?SV?YKN??G??R??W",
+  'consensus_string';
+is substr( $aln->consensus_string(0), 0, 60 ),
+  "REDLVYLAKLAEQAERYEEMVEFMKKVAELGAPAEELSVEERNLLSVAYKNVIGARRASW",
+  'consensus_string';
+
+ok( @seqs = $aln->next_Seq_with_id('143T_HUMAN') );
+is scalar @seqs, 1, 'each_seq_with_id';
+
+is $aln->is_flush, 1, 'is_flush';
+ok( $aln->id('x') && $aln->id eq 'x', 'id get/set' );
+
+is $aln->length,        242,  'length';
+is $aln->num_residues,  3769, 'num_residues';
+is $aln->num_sequences, 16,   'num_sequences';
+is( sprintf( "%.2f", $aln->overall_percentage_identity() ),
+    33.06, 'overall_percentage_identity' );
+is( sprintf( "%.2f", $aln->overall_percentage_identity('align') ),
+    33.06, 'overall_percentage_identity (align)' );
+is( sprintf( "%.2f", $aln->overall_percentage_identity('short') ),
+    35.24, 'overall_percentage_identity (short)' );
+is( sprintf( "%.2f", $aln->overall_percentage_identity('long') ),
+    33.47, 'overall_percentage_identity (long)' );
+is( sprintf( "%.2f", $aln->average_percentage_identity() ),
+    66.91, 'average_percentage_identity' );
+
+ok $aln->set_displayname_count;
+is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES_1',
+  'set_displayname_count';
+ok $aln->set_displayname_flat;
+is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES', 'set_displayname_flat';
+ok $aln->set_displayname_normal;
+is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES/9-246',
+  'set_displayname_normal';
+ok $aln->uppercase;
+ok $aln->map_chars( '\.', '-' );
+@seqs = $aln->next_Seq_with_id('143T_HUMAN');
+is substr( $seqs[0]->seq, 0, 60 ),
+  'KTELIQKAKLAEQAERYDDMATCMKAVTEQGA---ELSNEERNLLSVAYKNVVGGRRSAW',
+  'uppercase, map_chars';
+
+is(
+    $aln->match_line,
+    '       ::*::::*  : *   *:           *: *:***:**.***::*.'
+      . ' *::**::**:***      .  .      **  :* :*   .  :: ::   *:  .     :* .*. **:'
+      . '***.** :*.            :  .*  *   :   : **.*:***********:::* : .: *  :** .'
+      . '*::*: .*. : *: **:****************::     ',
+    'match_line'
+);
+ok $aln->remove_LocatableSeq( $seqs[0] ), 'remove_seqs';
+is $aln->num_sequences, 15, 'remove_seqs';
+ok $aln->add_Seq( $seqs[0] ), 'add_seq';
+is $aln->num_sequences, 16, 'add_seq';
+ok $seq = $aln->get_Seq_by_pos(1), 'get_seq_by_pos';
+is( $seq->id, '1433_LYCES', 'get_seq_by_pos' );
+ok( ( $aln->missing_char() eq '&' ) and ( $aln->missing_char('X') eq 'X' ) );
+ok( ( $aln->match_char() eq   '.' ) and ( $aln->match_char('-') eq   '-' ) );
+ok( ( $aln->gap_char() eq    '-' ) and ( $aln->gap_char('.') eq     '.' ) );
+ok( ( $aln->mask_char() eq    '?' ) and ( $aln->gap_char('N') eq     'N' ) );
+
+is $aln->remove_redundant_Seqs(0.7), 12, 'purge';
+is $aln->num_sequences, 4, 'purge';
+
+SKIP: {
+    test_skip( -tests => 24, -requires_module => 'IO::String' );
+
+    my $string;
+    my $out = IO::String->new($string);
+
+    my $s1 = Bio::LocatableSeq->new(
+        -id       => 'AAA',
+        -seq      => 'aawtat-tn-',
+        -start    => 1,
+        -end      => 8,
+        -alphabet => 'dna'
+    );
+    my $s2 = Bio::LocatableSeq->new(
+        -id       => 'BBB',
+        -seq      => '-aaaat-tt-',
+        -start    => 1,
+        -end      => 7,
+        -alphabet => 'dna'
+    );
+    $a = Bio::SimpleAlign->new();
+    $a->add_Seq($s1);
+    $a->add_Seq($s2);
+    
+    my $a_removed=$a->remove_gaps(-reference=>2);
+    is($a_removed->length, 7, "remove_gaps reference check");
+    
+
+    is( $a->consensus_iupac, "aAWWAT-TN-", 'IO::String consensus_iupac' );
+    $s1->seq('aaaaattttt');
+    $s1->alphabet('dna');
+    $s1->end(10);
+    $s2->seq('-aaaatttt-');
+    $s2->end(8);
+    $a = Bio::SimpleAlign->new();
+    $a->add_Seq($s1);
+    $a->add_Seq($s2);
+
+    my $strout = Bio::AlignIO->new( -fh => $out, -format => 'pfam' );
+    $strout->write_aln($a);
+    is(
+        $string,
+        "AAA/1-10    aaaaattttt\n" . "BBB/1-8     -aaaatttt-\n",
+        'IO::String write_aln normal'
+    );
+
+    $out->setpos(0);
+    $string = '';
+    my $b = $a->select_columns([2..9]);
+    $strout->write_aln($b);
+    is $string,
+      "AAA/2-9    aaaatttt\n" . "BBB/1-8    aaaatttt\n",
+      'IO::String write_aln slice';
+
+    $out->setpos(0);
+    $string = '';
+    $b = $a->select_columns([9..10]);
+    $strout->write_aln($b);
+    is $string,
+      "AAA/9-10    tt\n" . "BBB/8-8     t-\n",
+      'IO::String write_aln slice';
+
+    $a->verbose(-1);
+    $out->setpos(0);
+    $string = '';
+    $b = $a->select_columns([1..2]);
+    $strout->write_aln($b);
+    is $string,
+      "AAA/1-2    aa\n" . "BBB/1-1    -a\n",
+      'IO::String write_aln slice';
+
+    $a->verbose(-1);
+    $out->setpos(0);
+    $string = '';
+    my $c = $a->select_columns(-selection=>[3..$a->length],-toggle=>1);
+    $strout->write_aln($c);
+    is $string,
+      "AAA/1-2    aa\n" . "BBB/1-1    -a\n",
+      'IO::String write_aln slice';
+
+    # not sure what coordinates this should return...
+    $a->verbose(-1);
+    $out->setpos(0);
+    $string = '';
+    $b = $a->select_columns(-selection=>[1..1],-toggle=>0,-keepgaponly=>1);
+    $strout->write_aln($b);
+    is $string,
+      "AAA/1-1    a\n" . "BBB/1-0    -\n",
+      'IO::String write_aln slice';
+
+
+    $a->verbose(-1);
+    $out->setpos(0);
+    $string = '';
+    $b = $a->select_columns(-selection=>[2]);
+    $strout->write_aln($b);
+    is $string,
+      "AAA/2-2    a\n" . "BBB/1-1    a\n",
+      'IO::String write_aln slice';
+
+    eval { $b = $a->select_columns([11..13]); };
+
+    like( $@, qr/EX/ );
+
+    # remove_columns by position
+    $out->setpos(0);
+    $string = '';
+    $str    = Bio::AlignIO->new( -file => test_input_file('mini-align.aln') );
+    $aln1   = $str->next_aln;
+    $aln2   = $aln1->remove_columns([1]);
+    $strout->write_aln($aln2);
+    is $string,
+        "P84139/2-33              NEGEHQIKLDELFEKLLRARLIFKNKDVLRRC\n"
+      . "P814153/2-33             NEGMHQIKLDVLFEKLLRARLIFKNKDVLRRC\n"
+      . "BAB68554/1-14            ------------------AMLIFKDKQLLQQC\n"
+      . "gb|443893|124775/1-32    MRFRFQIKVPPAVEGARPALLIFKSRPELGGC\n",
+      'remove_columns by position';
+
+
+
+    # and when arguments are entered in "wrong order"?
+    $out->setpos(0);
+    $string = '';
+    my $aln3 = $aln1->remove_columns(-selection=>[2,6,7,31]);
+    $strout->write_aln($aln3);
+    is $string,
+        "P84139/1-33              MEGEIKLDELFEKLLRARLIFKNKDVLRC\n"
+      . "P814153/1-33             MEGMIKLDVLFEKLLRARLIFKNKDVLRC\n"
+      . "BAB68554/1-14            ----------------AMLIFKDKQLLQC\n"
+      . "gb|443893|124775/2-32    -RFRIKVPPAVEGARPALLIFKSRPELGC\n",
+      'remove_columns by position (wrong order)';
+
+    my %cigars = $aln1->cigar_line;
+    is $cigars{'gb|443893|124775/1-32'}, '19,19:21,24:29,29:32,32',
+      'cigar_line';
+    is $cigars{'P814153/1-33'},  '20,20:22,25:30,30:33,33', 'cigar_line';
+    is $cigars{'BAB68554/1-14'}, '1,1:3,6:11,11:14,14',     'cigar_line';
+    is $cigars{'P84139/1-33'},   '20,20:22,25:30,30:33,33', 'cigar_line';
+
+	my @idens=$aln1->pairwise_percentage_identity();
+	is join(";",map {sprintf("%0.4f",$_)} @idens),"1.0000;0.9394;0.2424;0.3333","pairwise_percentage_identity";
+
+	$aln1->sort_by_pairwise_identity;
+	is join(";",map {$_->display_id} $aln1->next_Seq),"P84139;P814153;gb|443893|124775;BAB68554","sort_by_pairwise_identity";
+
+    # sort_alphabetically
+    my $s3 = Bio::LocatableSeq->new(
+        -id       => 'ABB',
+        -seq      => '-attat-tt-',
+        -start    => 1,
+        -end      => 7,
+        -alphabet => 'dna'
+    );
+    $a->add_Seq($s3);
+
+    is $a->get_Seq_by_pos(2)->id, "BBB", 'sort_alphabetically - before';
+    ok $a->sort_alphabetically;
+    is $a->get_Seq_by_pos(2)->id, "ABB", 'sort_alphabetically - after';
+
+    $b = $a->remove_gaps();
+    is $b->consensus_string, "aaaattt", 'remove_gaps';
+
+    $s1->seq('aaaaattt--');
+
+    $b = $a->remove_gaps( undef, 'all_gaps_only' );
+    is $b->consensus_string, "aaaaatttt", 'remove_gaps all_gaps_only';
+
+    # test set_new_reference:
+    $str = Bio::AlignIO->new( -file => test_input_file('testaln.aln') );
+    $aln = $str->next_aln();
+    my $new_aln = $aln->set_new_reference(3);
+    $a       = $new_aln->get_Seq_by_pos(1)->display_id;
+    $new_aln = $aln->set_new_reference('P851414');
+    $b       = $new_aln->get_Seq_by_pos(1)->display_id;
+    is $a, 'P851414', 'set_new_reference';
+    is $b, 'P851414', 'set_new_reference';
+
+    # test uniq_seq:
+    $str = Bio::AlignIO->new(
+        -verbose => $DEBUG,
+        -file    => test_input_file('testaln2.fasta')
+    );
+    $aln     = $str->next_aln();
+    $new_aln = $aln->uniq_Seq();
+    $a       = $new_aln->num_sequences;
+    is $a, 11, 'uniq_seq';
+
+    # check if slice works well with a LocateableSeq in its negative strand
+    my $seq1 = Bio::LocatableSeq->new(
+        -SEQ    => "ATGCTG-ATG",
+        -START  => 1,
+        -END    => 9,
+        -ID     => "test1",
+        -STRAND => -1
+    );
+
+    my $seq2 = Bio::LocatableSeq->new(
+        -SEQ    => "A-GCTGCATG",
+        -START  => 1,
+        -END    => 9,
+        -ID     => "test2",
+        -STRAND => 1
+    );
+
+    $string = '';
+    my $aln_negative = Bio::SimpleAlign->new();
+    $aln_negative->add_Seq($seq1);
+    $aln_negative->add_Seq($seq2);
+    my $start_column =
+      $aln_negative->column_from_residue_number(
+        $aln_negative->get_Seq_by_pos(1)->display_id, 2 );
+    my $end_column =
+      $aln_negative->column_from_residue_number(
+        $aln_negative->get_Seq_by_pos(1)->display_id, 5 );
+    $aln_negative = $aln_negative->select_columns([$end_column..$start_column]);
+    my $seq_negative = $aln_negative->get_Seq_by_pos(1);
+    is( $seq_negative->start, 2, "bug 2099" );
+    is( $seq_negative->end,   5, "bug 2099" );
+
+    # bug 2793
+    my $s11 = Bio::LocatableSeq->new( -id => 'testseq1', -seq => 'AAA' );
+    my $s21 = Bio::LocatableSeq->new( -id => 'testseq2', -seq => 'CCC' );
+    $a = Bio::SimpleAlign->new();
+    ok( $a->add_Seq( $s11, 1 ), "bug 2793" );
+    is( $a->get_Seq_by_pos(1)->seq, 'AAA', "bug 2793" );
+    ok( $a->add_Seq( $s21, 2 ), "bug 2793" );
+    is( $a->get_Seq_by_pos(2)->seq, 'CCC', "bug 2793" );
+    throws_ok { $a->add_Seq( $s21, 0 ) } qr/must be >= 1/, 'Bad sequence, bad!';
+}
+
+# test for Bio::SimpleAlign annotation method and
+# Bio::FeatureHolder stuff
+
+$aln = Bio::SimpleAlign->new;
+for my $seqset ( [qw(one AGAGGAT)], [qw(two AGACGAT)], [qw(three AGAGGTT)] ) {
+    $aln->add_Seq(
+        Bio::LocatableSeq->new(
+            -id  => $seqset->[0],
+            -seq => $seqset->[1]
+        )
+    );
+}
+
+is $aln->num_sequences, 3, 'added 3 seqs';
+
+$aln->add_SeqFeature(
+    Bio::SeqFeature::Generic->new(
+        -start       => 1,
+        -end         => 1,
+        -primary_tag => 'charLabel',
+    )
+);
+$aln->add_SeqFeature(
+    Bio::SeqFeature::Generic->new(
+        -start       => 3,
+        -end         => 3,
+        -primary_tag => 'charLabel',
+
+    )
+);
+is( $aln->feature_count, 2, 'first 2 features added' );
+
+my $splitloc = Bio::Location::Split->new;
+$splitloc->add_sub_Location(
+    Bio::Location::Simple->new(
+        -start => 2,
+        -end   => 3
+    )
+);
+
+$splitloc->add_sub_Location(
+    Bio::Location::Simple->new(
+        -start => 5,
+        -end   => 6
+    )
+);
+
+$aln->add_SeqFeature(
+    Bio::SeqFeature::Generic->new(
+        -location    => $splitloc,
+        -primary_tag => 'charLabel',
+    )
+);
+
+is( $aln->feature_count, 3, '3rd feature added' );
+
+#do slices and masks as defined by the feature
+my $i          = 0;
+my @slice_lens = qw(1 1 2 2);
+for my $feature ( $aln->get_SeqFeatures ) {
+    for my $loc ( $feature->location->each_Location ) {
+        my $masked = $aln->mask_columns([$loc->start..$loc->end]);
+        #"This should pass but dies; see bug 2842";
+        lives_ok {my $fslice = $masked->select_columns([$loc->start..$loc->end])};
+        $masked->verbose(-1);
+        my $fslice = $masked->select_columns([$loc->start..$loc->end]);
+        is( $fslice->length, $slice_lens[ $i++ ], "slice $i len" );
+        for my $s ( $fslice->next_Seq ) {
+            like( $s->seq, qr/^\?+$/, 'correct masked seq' );
+        }
+    }
+}
+
+# test set_displayname_safe & restore_displayname:
+$str = Bio::AlignIO->new( -file => test_input_file('pep-266.aln') );
+$aln = $str->next_aln();
+is $aln->get_Seq_by_pos(3)->display_id, 'Smik_Contig1103.1',
+  'initial display id ok';
+my ( $new_aln, $ref ) = $aln->set_displayname_safe();
+is $new_aln->get_Seq_by_pos(3)->display_id, 'S000000003', 'safe display id ok';
+my $restored_aln = $new_aln->restore_displayname($ref);
+is $restored_aln->get_Seq_by_pos(3)->display_id, 'Smik_Contig1103.1',
+  'restored display id ok';
+
+# test sort_by_list:
+$str = Bio::AlignIO->new( -file => test_input_file('testaln.aln') );
+my $list_file = test_input_file('testaln.list');
+$aln     = $str->next_aln();
+$new_aln = $aln->sort_by_list($list_file);
+$a       = $new_aln->get_Seq_by_pos(1)->display_id;
+is $a, 'BAB68554', 'sort by list ok';
+
+# test for Binary/Morphological/Mixed data
+
+# sort_by_start
+
+# test sort_by_list:
+
+my $s1 = Bio::LocatableSeq->new(
+    -id       => 'AAA',
+    -seq      => 'aawtat-tn-',
+    -start    => 12,
+    -end      => 19,
+    -alphabet => 'dna'
+);
+my $s2 = Bio::LocatableSeq->new(
+    -id       => 'BBB',
+    -seq      => '-aaaat-tt-',
+    -start    => 1,
+    -end      => 7,
+    -alphabet => 'dna'
+);
+my $s3 = Bio::LocatableSeq->new(
+    -id       => 'BBB',
+    -seq      => '-aaaat-t--',
+    -start    => 31,
+    -end      => 36,
+    -alphabet => 'dna'
+);
+
+
+$a = Bio::SimpleAlign->new();
+$a->add_Seq($s1);
+$a->add_Seq($s2);
+$a->add_Seq($s3);
+
+$a->uppercase();
+is ($s1->seq,'AAWTAT-TN-', "uppercase");
+$a->lowercase();
+is ($s1->seq,'aawtat-tn-', "lowercase");
+$a->togglecase();
+is ($s1->seq,'AAWTAT-TN-', "togglecase");
+$a->lowercase();
+
+@seqs = $a->next_Seq;
+is( $seqs[0]->start, 12 );
+is( $seqs[1]->start, 1 );
+is( $seqs[2]->start, 31 );
+
+$a->sort_by_start;
+@seqs = $a->next_Seq;
+
+is( $seqs[0]->start, 1 , "sort_by_start");
+is( $seqs[1]->start, 12 );
+is( $seqs[2]->start, 31 );
+
+
+$a->sort_by_length;
+@seqs = $a->next_Seq;
+
+is( $seqs[0]->start, 31 , "sort_by_length");
+is( $seqs[1]->start, 1 );
+is( $seqs[2]->start, 12 );
+
+$a->sort_by_pairwise_identity(3);
+@seqs = $a->next_Seq;
+
+is( $seqs[0]->start, 12 , "sort_by_pairwise_identity");
+is( $seqs[1]->start, 1 );
+is( $seqs[2]->start, 31 );
+
+my %testdata = (
+    'allele1' => 'GGATCCATT[C/C]CTACT',
+    'allele2' => 'GGAT[C/-][C/-]ATT[C/C]CT[A/C]CT',
+    'allele3' => 'G[G/C]ATCCATT[C/G]CTACT',
+    'allele4' => 'GGATCCATT[C/G]CTACT',
+    'allele5' => 'GGATCCATT[C/G]CTAC[T/A]',
+    'allele6' => 'GGATCCATT[C/G]CTA[C/G][T/A]',
+    'testseq' => 'GGATCCATT[C/G]CTACT'
+);
+
+my $alnin = Bio::AlignIO->new(
+    -format => 'fasta',
+    -file   => test_input_file('alleles.fas')
+);
+
+$aln = $alnin->next_aln;
+
+my $ct = 0;
+
+# compare all to test seq
+
+for my $ls ( sort keys %testdata ) {
+    $ct++;
+    my $str = $aln->bracket_string(
+        -refseq  => 'testseq',
+        -allele1 => 'allele1',
+        -allele2 => $ls,
+    );
+    is( $str, $testdata{$ls}, "BIC:$str" );
+}
+
+%testdata = (
+    'allele1' => 'GGATCCATT{C.C}CTACT',
+    'allele2' => 'GGAT{C.-}{C.-}ATT{C.C}CT{A.C}CT',
+    'allele3' => 'G{G.C}ATCCATT{C.G}CTACT',
+    'allele4' => 'GGATCCATT{C.G}CTACT',
+    'allele5' => 'GGATCCATT{C.G}CTAC{T.A}',
+    'allele6' => 'GGATCCATT{C.G}CTA{C.G}{T.A}',
+    'testseq' => 'GGATCCATT{C.G}CTACT'
+);
+
+for my $ls ( sort keys %testdata ) {
+    $ct++;
+    my $str = $aln->bracket_string(
+        -refseq     => 'testseq',
+        -allele1    => 'allele1',
+        -allele2    => $ls,
+        -delimiters => '{}',
+        -separator  => '.'
+    );
+    is( $str, $testdata{$ls}, "BIC:$str" );
+}
+
+# is _remove_col really working correctly?
+my $a = Bio::LocatableSeq->new(
+    -id    => 'a',
+    -strand => 1,
+    -seq   => 'atcgatcgatcgatcg',
+    -start => 5,
+    -end   => 20
+);
+my $b = Bio::LocatableSeq->new(
+    -id    => 'b',
+    -strand => 1,
+    -seq   => '-tcgatc-atcgatcg',
+    -start => 30,
+    -end   => 43
+);
+my $c = Bio::LocatableSeq->new(
+    -id    => 'c',
+    -strand => -1,
+    -seq   => 'atcgatcgatc-atc-',
+    -start => 50,
+    -end   => 63
+);
+my $d = Bio::LocatableSeq->new(
+    -id    => 'd',
+    -strand => -1,
+    -seq   => '--cgatcgatcgat--',
+    -start => 80,
+    -end   => 91
+);
+my $e = Bio::LocatableSeq->new(
+    -id    => 'e',
+    -strand => 1,    
+    -seq   => '-t-gatcgatcga-c-',
+    -start => 100,
+    -end   => 111
+);
+$aln = Bio::SimpleAlign->new();
+$aln->add_Seq($a);
+$aln->add_Seq($b);
+$aln->add_Seq($c);
+
+
+my $gapless = $aln->remove_gaps();
+foreach my $seq ( $gapless->next_Seq ) {
+    if ( $seq->id eq 'a' ) {
+        is $seq->start, 6;
+        is $seq->end,   19;
+        is $seq->seq,   'tcgatcatcatc','remove_gaps ok';
+    }
+    elsif ( $seq->id eq 'b' ) {
+        is $seq->start, 30;
+        is $seq->end,   42;
+        is $seq->seq,   'tcgatcatcatc';
+    }
+    elsif ( $seq->id eq 'c' ) {
+        is $seq->start, 50;
+        is $seq->end,   62;
+        is $seq->seq,   'tcgatcatcatc';
+    }
+}
+
+$aln->add_Seq($d);
+$aln->add_Seq($e);
+
+
+$gapless = $aln->remove_gaps();
+foreach my $seq ( $gapless->next_Seq ) {
+    if ( $seq->id eq 'a' ) {
+        is $seq->start, 8;
+        is $seq->end,   17;
+        is $seq->seq,   'gatcatca';
+    }
+    elsif ( $seq->id eq 'b' ) {
+        is $seq->start, 32;
+        is $seq->end,   40;
+        is $seq->seq,   'gatcatca';
+    }
+    elsif ( $seq->id eq 'c' ) {
+        is $seq->start, 52;
+        is $seq->end,   60;
+        is $seq->seq,   'gatcatca';
+    }
+    elsif ( $seq->id eq 'd' ) {
+        is $seq->start, 81;
+        is $seq->end,   90;
+        is $seq->seq,   'gatcatca';
+    }
+    elsif ( $seq->id eq 'e' ) {
+        is $seq->start, 101;
+        is $seq->end,   110;
+        is $seq->seq,   'gatcatca';
+    }
+}
+
+# bug 3077
+
+my $slice = $aln->select_columns([1..4]);
+
+for my $seq ($slice->next_Seq) {
+    if ( $seq->id eq 'a' ) {
+        is $seq->start, 5;
+        is $seq->end,   8;
+        is $seq->strand, 1;
+        is $seq->seq,   'atcg';
+    }
+    elsif ( $seq->id eq 'b' ) {
+        is $seq->start, 30;
+        is $seq->end,   32;
+        is $seq->strand, 1;
+        is $seq->seq,   '-tcg';
+    }
+    elsif ( $seq->id eq 'c' ) {
+        is $seq->start, 60;
+        is $seq->end,   63;
+        is $seq->strand, -1;
+        is $seq->seq,   'atcg';
+    }
+    elsif ( $seq->id eq 'd' ) {
+        is $seq->start, 90;
+        is $seq->end,   91;
+        is $seq->strand, -1;
+        is $seq->seq,   '--cg';
+    }
+    elsif ( $seq->id eq 'e' ) {
+        is $seq->start, 100;
+        is $seq->end,   101;
+        is $seq->strand, 1;
+        is $seq->seq,   '-t-g';
+    }    
+}
+
+my $f = Bio::LocatableSeq->new(
+    -id    => 'f',
+    -seq   => 'a-cgatcgatcgat-g',
+    -start => 30,
+    -end   => 43
+);
+$aln = Bio::SimpleAlign->new();
+$aln->add_Seq($a);
+$aln->add_Seq($f);
+
+$gapless = $aln->remove_gaps();
+foreach my $seq ( $gapless->next_Seq ) {
+    if ( $seq->id eq 'a' ) {
+        is $seq->start, 5;
+        is $seq->end,   20;
+        is $seq->seq,   'acgatcgatcgatg';
+    }
+    elsif ( $seq->id eq 'f' ) {
+        is $seq->start, 30;
+        is $seq->end,   43;
+        is $seq->seq,   'acgatcgatcgatg';
+    }
+}
+
+my $g =
+  Bio::LocatableSeq->new( -id => 'g', -seq => 'atgc', -start => 5, -end => 8 );
+my $h = Bio::LocatableSeq->new(
+    -id    => 'h',
+    -seq   => 't-cg',
+    -start => 30,
+    -end   => 32
+);
+$aln = Bio::SimpleAlign->new();
+$aln->add_Seq($g);
+$aln->add_Seq($h);
+
+# test for new method in API get_seq_by_id
+my $retrieved = $aln->get_Seq_by_id('g');
+is( defined $retrieved, 1 );
+my $removed = $aln->remove_columns(-selection=>[1],-toggle=>1);
+foreach my $seq ( $removed->next_Seq ) {
+    if ( $seq->id eq 'g' ) {
+        is $seq->start, 5, "remove_columns toggle selection";
+        is $seq->end,   5;
+        is $seq->seq,   'a';
+    }
+    elsif ( $seq->id eq 'h' ) {
+        is $seq->start, 30;
+        is $seq->end,   30;
+        is $seq->seq,   't';
+    }
+}
+
+# work out mask_columns(), see bug 2842
+SKIP: {
+    test_skip(-tests => 6, -requires_module => 'IO::String');
+    my $io = Bio::AlignIO->new( -file => test_input_file("testaln.aln") );
+    my $aln = $io->next_aln();
+    isa_ok( $aln, 'Bio::SimpleAlign' );
+    my $consensus = <<EOU;
+MNEGEHQIKLDELFEKLLRARKIFKNKDVLRHSWEPKDLPHRHEQIEALAQILV
+PVLRGETMKIIFCGHHACELGEDRGTKGFVIDELKDVDEDRNGKVDVIEINCEH
+MDTHYRVLPNIAKLFDDCTGIGVPMHGGPTDEVTAKLKQVIDMKERFVIIVLDE
+IDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEE
+EVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDL
+LRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTLPLQSKVLLYAIVLL
+DENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSK
+GRYGRTKEIRLMVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
+EOU
+
+
+    $consensus =~ s/\n//g;
+
+    my @cons_got = $aln->consensus_conservation;
+    # 422 positions, mostly two of six sequences conserved, set as default
+    my @cons_expect = (100 * 2/6) x 422;
+    # Exceptionally columns as a mask, manually determined (1-based columns)
+    $cons_expect[$_-1] = 100 * 1/6 for (5,12,41,70,82,310,390);
+    $cons_expect[$_-1] = 100 * 3/6 for (27,30,32,36,47,49,61,66,69,71,77,79,
+        81,91,96,97,105,114,115,117,118,121,122,129,140,146,156,159,160,162,
+        183,197,217,221,229,242,247,248,261,266,282,287,295,316,323,329,335,337,344,);
+    $cons_expect[$_-1] = 100 * 4/6 for (84,93,99,100,102,107,108,112,113,119,150,);
+    $cons_expect[$_-1] = 100 * 5/6 for (81,110);
+    # Format for string comparison
+    @cons_expect = map { sprintf "%4.1f", $_ } @cons_expect;   
+    @cons_got = map { sprintf "%4.1f", $_ } @cons_got; 
+    is(length($aln->consensus_string), scalar(@cons_got),"conservation length");
+    is_deeply(\@cons_got, \@cons_expect, "conservation scores");
+    
+    is( $aln->consensus_string, $consensus, 'consensus string looks ok' );
+
+    is( aln2str( $aln => 'pfam' ), <<EOA, 'looks like correct unmasked alignment (from clustalw)' );
+P84139/1-420              MNEGEHQIKLDELFEKLLRARKIFKNKDVLRHSYTPKDLPLRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIGVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTRPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLNVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
+P814153/1-420             MNEGMHQIKLDVLFEKLLRARKIFKNKDVLRHSYTPKDLPHRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIEVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTLPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLMVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
+P851414/1-60              -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILQTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+P841414/1-60              -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILVTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+BAB68554/1-141            --------------------MLTEDDKQLIQHVWEKVLEHQEDFGAEALERMFIVYPSTKTYFPHFDLHHDSEQIRHHGKK-VVGALGDAVKHIDNLSATLSELSNLHCY-NLRVDPVNFKLLSHCFQVVLGAHLG--REYTPQVQVAYDKFLAAVSAVLAEKYR-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+gb|443893|124775/1-331    -MRFRFGVVVPPAVAGARPELLVVGSRPELG-RWEPRGAVRLRPAGTAAGDGALALQEPGLWLGEVELA-AEEAAQDGAEPGRVDTFWYKFLKREPGGELSWEGNGPHHDRCCTYNENNLVDGVYCLPIG---HWGEATGHTNEMKHTTDFYFNIAGHQAMHYSRILPNIWLGSCPRQVEHVTIKLKHELGITAVMN-FQTEWDIVQNSSGCNRYPEPMTPDTMIKLYREEGLAYIWMP-TPDMSTEGRVQMLPQAVCLLHALLEKGHIVY-----VHCNAGVGRSTAAVCGWLQYVMGWNLRKVQYFLMAKRPAVYIDEEALARAQEDFFQKFGKVRSSVCSL------------------------------------------------------------------------------
+EOA
+
+    my $newaln = $aln->mask_columns([12..20]);
+    is( aln2str( $newaln, 'pfam' ), <<EOA, 'looks like correct masked alignment (from clustalw)' );
+P84139/1-420              MNEGEHQIKLD?????????RKIFKNKDVLRHSYTPKDLPLRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIGVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTRPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLNVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
+P814153/1-420             MNEGMHQIKLD?????????RKIFKNKDVLRHSYTPKDLPHRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIEVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTLPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLMVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
+P851414/1-60              -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILQTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+P841414/1-60              -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILVTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+BAB68554/1-141            --------------------MLTEDDKQLIQHVWEKVLEHQEDFGAEALERMFIVYPSTKTYFPHFDLHHDSEQIRHHGKK-VVGALGDAVKHIDNLSATLSELSNLHCY-NLRVDPVNFKLLSHCFQVVLGAHLG--REYTPQVQVAYDKFLAAVSAVLAEKYR-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+gb|443893|124775/1-331    -MRFRFGVVVP?????????LLVVGSRPELG-RWEPRGAVRLRPAGTAAGDGALALQEPGLWLGEVELA-AEEAAQDGAEPGRVDTFWYKFLKREPGGELSWEGNGPHHDRCCTYNENNLVDGVYCLPIG---HWGEATGHTNEMKHTTDFYFNIAGHQAMHYSRILPNIWLGSCPRQVEHVTIKLKHELGITAVMN-FQTEWDIVQNSSGCNRYPEPMTPDTMIKLYREEGLAYIWMP-TPDMSTEGRVQMLPQAVCLLHALLEKGHIVY-----VHCNAGVGRSTAAVCGWLQYVMGWNLRKVQYFLMAKRPAVYIDEEALARAQEDFFQKFGKVRSSVCSL------------------------------------------------------------------------------
+EOA
+
+$aln->mask_char('N');
+
+    my $newaln2 = $aln->mask_columns([12..20]);
+    is( aln2str( $newaln2, 'pfam' ), <<EOA, 'looks like correct masked alignment (from clustalw)' );
+P84139/1-420              MNEGEHQIKLDNNNNNNNNNRKIFKNKDVLRHSYTPKDLPLRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIGVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTRPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLNVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
+P814153/1-420             MNEGMHQIKLDNNNNNNNNNRKIFKNKDVLRHSYTPKDLPHRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIEVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTLPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLMVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
+P851414/1-60              -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILQTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+P841414/1-60              -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILVTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+BAB68554/1-141            --------------------MLTEDDKQLIQHVWEKVLEHQEDFGAEALERMFIVYPSTKTYFPHFDLHHDSEQIRHHGKK-VVGALGDAVKHIDNLSATLSELSNLHCY-NLRVDPVNFKLLSHCFQVVLGAHLG--REYTPQVQVAYDKFLAAVSAVLAEKYR-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+gb|443893|124775/1-331    -MRFRFGVVVPNNNNNNNNNLLVVGSRPELG-RWEPRGAVRLRPAGTAAGDGALALQEPGLWLGEVELA-AEEAAQDGAEPGRVDTFWYKFLKREPGGELSWEGNGPHHDRCCTYNENNLVDGVYCLPIG---HWGEATGHTNEMKHTTDFYFNIAGHQAMHYSRILPNIWLGSCPRQVEHVTIKLKHELGITAVMN-FQTEWDIVQNSSGCNRYPEPMTPDTMIKLYREEGLAYIWMP-TPDMSTEGRVQMLPQAVCLLHALLEKGHIVY-----VHCNAGVGRSTAAVCGWLQYVMGWNLRKVQYFLMAKRPAVYIDEEALARAQEDFFQKFGKVRSSVCSL------------------------------------------------------------------------------
+EOA
+
+$aln->mask_char('?');
+
+
+###### test with phylip
+
+    my $phy_fh = IO::String->new( <<EOF );
+ 3   37
+seq1        AAAATGGGGG TGGT------ GGTACCT--- -------
+seq2        -----GGCGG TGGTGNNNNG GGTTCCCTNN NNNNNNN
+new         AAAATGGNGG TGGTN----N GGTNCCNTNN NNNNNNN
+EOF
+
+    my $in = Bio::AlignIO->new( -fh => $phy_fh, -format => 'phylip' );
+
+    $aln = $in->next_aln();
+    is( aln2str( $aln, 'phylip' ), <<EOU );
+ 3 37
+seq1         AAAATGGGGG TGGT------ GGTACCT--- ------- 
+seq2         -----GGCGG TGGTGNNNNG GGTTCCCTNN NNNNNNN 
+new          AAAATGGNGG TGGTN----N GGTNCCNTNN NNNNNNN 
+
+EOU
+
+    $newaln = $aln->mask_columns([15..20]);
+    is( aln2str( $newaln,'phylip' ), <<EOU, 'align after looks ok' );
+ 3 37
+seq1         AAAATGGGGG TGGT------ GGTACCT--- ------- 
+seq2         -----GGCGG TGGT?????? GGTTCCCTNN NNNNNNN 
+new          AAAATGGNGG TGGT?----? GGTNCCNTNN NNNNNNN 
+
+EOU
+}
+
+######## SUBROUTINES
+
+sub aln2str {
+    my ( $aln, $fmt ) = @_;
+    my $out;
+    my $out_fh = IO::String->new( $out );
+    my $alignio_out = Bio::AlignIO->new(-fh => $out_fh, -format => $fmt);
+    $alignio_out->write_aln( $aln );
+    return $out;
+}
+

From 00075252edc8a0afa85f7cc9cb066ba3b3fddb2d Mon Sep 17 00:00:00 2001
From: Jun Yin <junyin@cwlmcd1.med.yale.edu>
Date: Tue, 2 Oct 2012 14:37:59 -0400
Subject: [PATCH 5/8] up-to-date Oct 2,2012

---
 Bio/SimpleAlign.pm     |  2 ++
 t/Align/SimpleAlign2.t | 12 ++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/Bio/SimpleAlign.pm b/Bio/SimpleAlign.pm
index 0cb49c3f43..959644ef7a 100644
--- a/Bio/SimpleAlign.pm
+++ b/Bio/SimpleAlign.pm
@@ -2714,12 +2714,14 @@ sub mask_char {
 
 
 =head2 symbol_chars
+
  Title   : symbol_chars
  Usage   : my @symbolchars = $aln->symbol_chars;
  Function: Returns all the seen symbols (other than gaps)
  Returns : array of characters that are the seen symbols
  Args    : boolean to include the gap/missing/match characters
 
+
 =cut
 
 sub symbol_chars{
diff --git a/t/Align/SimpleAlign2.t b/t/Align/SimpleAlign2.t
index e1ca10b0f0..35cf00f688 100644
--- a/t/Align/SimpleAlign2.t
+++ b/t/Align/SimpleAlign2.t
@@ -850,18 +850,18 @@ EOF
     $aln = $in->next_aln();
     is( aln2str( $aln, 'phylip' ), <<EOU );
  3 37
-seq1         AAAATGGGGG TGGT------ GGTACCT--- ------- 
-seq2         -----GGCGG TGGTGNNNNG GGTTCCCTNN NNNNNNN 
-new          AAAATGGNGG TGGTN----N GGTNCCNTNN NNNNNNN 
+seq1         AAAATGGGGG TGGT------ GGTACCT--- -------
+seq2         -----GGCGG TGGTGNNNNG GGTTCCCTNN NNNNNNN
+new          AAAATGGNGG TGGTN----N GGTNCCNTNN NNNNNNN
 
 EOU
 
     $newaln = $aln->mask_columns([15..20]);
     is( aln2str( $newaln,'phylip' ), <<EOU, 'align after looks ok' );
  3 37
-seq1         AAAATGGGGG TGGT------ GGTACCT--- ------- 
-seq2         -----GGCGG TGGT?????? GGTTCCCTNN NNNNNNN 
-new          AAAATGGNGG TGGT?----? GGTNCCNTNN NNNNNNN 
+seq1         AAAATGGGGG TGGT------ GGTACCT--- -------
+seq2         -----GGCGG TGGT?????? GGTTCCCTNN NNNNNNN
+new          AAAATGGNGG TGGT?----? GGTNCCNTNN NNNNNNN
 
 EOU
 }

From 9e9add13b43adc117c9d00a169efca940f57d8cd Mon Sep 17 00:00:00 2001
From: Jun Yin <junyin@cwlmcd1.med.yale.edu>
Date: Tue, 2 Oct 2012 16:02:46 -0400
Subject: [PATCH 6/8] Backward support for mask_columsn

---
 Bio/SimpleAlign.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Bio/SimpleAlign.pm b/Bio/SimpleAlign.pm
index 959644ef7a..da6a41f65c 100644
--- a/Bio/SimpleAlign.pm
+++ b/Bio/SimpleAlign.pm
@@ -1603,7 +1603,7 @@ sub mask_columns {
 	unless(ref($args[0])) {
 		$self->deprecated(-warn_version => 1.0080, -throw_version => 1.0090, -message =>"mask_columns - column range must be in refrence.");
 		my ($start, $end, $mask_char) = @args;
-		$sel=[($start, $end)];
+		$sel=[$start..$end];
     	if(defined $mask_char) { 
     		$self->mask_char($mask_char);
     	}

From 72b46db98fab5e9a8c92dbb1cb4c8edf4f68e283 Mon Sep 17 00:00:00 2001
From: Jun Yin <junyin@cwlmcd1.med.yale.edu>
Date: Tue, 2 Oct 2012 17:26:13 -0400
Subject: [PATCH 7/8] Improved backward support for remove_columns

---
 Bio/SimpleAlign.pm | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/Bio/SimpleAlign.pm b/Bio/SimpleAlign.pm
index da6a41f65c..1845b14215 100644
--- a/Bio/SimpleAlign.pm
+++ b/Bio/SimpleAlign.pm
@@ -83,7 +83,8 @@ However, in some cases people do not want the name/start-end to be displayed:
 either multiple names in an alignment or names specific to the alignment
 (ROA1_HUMAN_1, ROA1_HUMAN_2 etc). These names are called
 C<displayname>, and generally is what is used to print out the
-alignment. They default to name/start-end.
+alignment. They default to name/start-end. Please be cautious that, the 
+sequence coordinates are 1 based for all the methods in Bio::SimpleAlign.
 
 The SimpleAlign Module is derived from the Align module by Ewan Birney.
 This package is refactored by Jun Yin as part of the Google Summer of 
@@ -1468,7 +1469,32 @@ sub _select_columns_by_type {
 sub remove_columns {
 	my ($self,@args) = @_;
    @args || $self->throw("Must supply column ranges or column types");
-	my ($sel, $toggle,$gap) = $self->_rearrange([qw(SELECTION TOGGLE KEEPGAPONLY)], @args);
+   
+   #provide backward support
+   #backward support for remove columns using multiple anonymous array reference
+   if(defined $args[1] && ref($args[0]) && ref($args[1])) { 
+		$self->deprecated(-warn_version => 1.0060, -throw_version => 1.0080, -message =>"select_columns start is 1 based and in a single array. Please check the method description.");
+		#the old method use multiple arrays and the coordiantes are 0 based
+		my @newargs;
+		foreach my $arg (@args) {
+			push @{$newargs[0]},($arg->[0]+1)..($arg->[1]+1);
+		}
+		@args=@newargs;
+	}
+	   
+   my ($sel, $toggle,$gap) = $self->_rearrange([qw(SELECTION TOGGLE KEEPGAPONLY)], @args);
+   
+   #backward support for remove columns using 0-based coordinate
+   if($sel->[0] =~ /^\d+$/ && $sel->[0]==0) {
+		$self->deprecated(-warn_version => 1.0060, -throw_version => 1.0080, -message =>"select_columns start is 1 based and in a single array. Please check the method description.");
+		#the old method use multiple arrays and the coordiantes are 0 based
+		my @newargs;
+		foreach my $arg (@{$sel}) {
+			push @{$newargs[0]},$arg+1;
+		}
+		@{$sel}=@newargs;
+	}
+   	
 	my $newaln=$self->select_columns(-selection=>$sel,-toggle=>$toggle?0:1,-keepgaponly=>$gap);
 	return $newaln;
 }

From 3dd4aeeaba3ab2f8837cecb1797d27d3db540f79 Mon Sep 17 00:00:00 2001
From: Jun Yin <junyin@cwlmcd1.med.yale.edu>
Date: Tue, 2 Oct 2012 17:55:44 -0400
Subject: [PATCH 8/8] remove_columns updated

---
 Bio/SimpleAlign.pm | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Bio/SimpleAlign.pm b/Bio/SimpleAlign.pm
index 1845b14215..38a8bbcb02 100644
--- a/Bio/SimpleAlign.pm
+++ b/Bio/SimpleAlign.pm
@@ -1484,15 +1484,15 @@ sub remove_columns {
 	   
    my ($sel, $toggle,$gap) = $self->_rearrange([qw(SELECTION TOGGLE KEEPGAPONLY)], @args);
    
-   #backward support for remove columns using 0-based coordinate
+   #if there is a 0 based coordinate, all coordinates will be added by 1
    if($sel->[0] =~ /^\d+$/ && $sel->[0]==0) {
 		$self->deprecated(-warn_version => 1.0060, -throw_version => 1.0080, -message =>"select_columns start is 1 based and in a single array. Please check the method description.");
 		#the old method use multiple arrays and the coordiantes are 0 based
-		my @newargs;
+		my @newsel;
 		foreach my $arg (@{$sel}) {
-			push @{$newargs[0]},$arg+1;
+			push @newsel,$arg+1;
 		}
-		@{$sel}=@newargs;
+		$sel=[@newsel];
 	}
    	
 	my $newaln=$self->select_columns(-selection=>$sel,-toggle=>$toggle?0:1,-keepgaponly=>$gap);