diff --git a/src/branch_sequences.c b/src/branch_sequences.c index cf9b6275..74864e7a 100644 --- a/src/branch_sequences.c +++ b/src/branch_sequences.c @@ -173,21 +173,23 @@ void fill_in_recombinations_with_gaps(newick_node ** nodeArray, int node_index, // Set number of branch bases in recombination by iterating through // the first part of merged blocks (i.e. only blocks on the branch to this node) set_number_of_branch_bases_in_recombinations(sequence_index, - calculate_number_of_bases_in_recombinations_excluding_gaps(merged_block_coordinates, - node->number_of_blocks, - node_sequence, - snp_locations, - number_of_snps) + calculate_number_of_bases_in_recombinations(merged_block_coordinates, + node->number_of_blocks, + node_sequence, + snp_locations, + number_of_snps, + 0) ); // Set number of total bases in recombination by iterating through // all merged blocks leading to this node set_number_of_bases_in_recombinations(sequence_index, - calculate_number_of_bases_in_recombinations_excluding_gaps(merged_block_coordinates, - (num_blocks[parent_node_index] + node->number_of_blocks), - node_sequence, - snp_locations, - number_of_snps) + calculate_number_of_bases_in_recombinations(merged_block_coordinates, + (num_blocks[parent_node_index] + node->number_of_blocks), + node_sequence, + snp_locations, + number_of_snps, + 0) ); free(node_sequence); @@ -226,7 +228,7 @@ void fill_in_recombinations_with_gaps(newick_node ** nodeArray, int node_index, } -int calculate_number_of_bases_in_recombinations_excluding_gaps(int ** block_coordinates, int num_blocks,char * child_sequence, int * snp_locations,int current_total_snps) +int calculate_number_of_bases_in_recombinations(int ** block_coordinates, int num_blocks,char * child_sequence, int * snp_locations, int current_total_snps, int count_gaps) { int total_bases = 0; int current_block = 1; @@ -277,11 +279,15 @@ int calculate_number_of_bases_in_recombinations_excluding_gaps(int ** block_coor { if(block_coordinates[0][start_block] != -1 && block_coordinates[1][start_block] != -1) { - total_bases += calculate_block_size_without_gaps(child_sequence, - snp_locations, - block_coordinates[0][start_block], - block_coordinates[1][start_block], - current_total_snps); + if (count_gaps > 0) { + total_bases += (1 + block_coordinates[1][start_block] - block_coordinates[0][start_block]); + } else { + total_bases += calculate_block_size_without_gaps(child_sequence, + snp_locations, + block_coordinates[0][start_block], + block_coordinates[1][start_block], + current_total_snps); + } } } diff --git a/src/branch_sequences.h b/src/branch_sequences.h index 9b70fac5..a93e27af 100644 --- a/src/branch_sequences.h +++ b/src/branch_sequences.h @@ -38,7 +38,7 @@ int calculate_cutoff(int branch_genome_size, int window_size, int num_branch_snp int get_smallest_log_likelihood(double * candidate_blocks, int number_of_candidate_blocks); int exclude_snp_sites_in_block(int window_start_coordinate, int window_end_coordinate, int * snp_site_coords, int number_of_branch_snps); int flag_smallest_log_likelihood_recombinations(int ** candidate_blocks, int number_of_candidate_blocks, int number_of_branch_snps, int * snp_site_coords, int * recombinations, int number_of_recombinations,newick_node * current_node, FILE * block_file_pointer, newick_node *root,int * snp_locations, int total_num_snps, FILE * gff_file_pointer, double * block_likelihooods); -int calculate_number_of_bases_in_recombinations_excluding_gaps(int ** block_coordinates, int num_blocks,char * child_sequence, int * snp_locations,int current_total_snps); +int calculate_number_of_bases_in_recombinations(int ** block_coordinates, int num_blocks,char * child_sequence, int * snp_locations,int current_total_snps, int count_gaps); void carry_unambiguous_gaps_up_tree(newick_node *root); void move_blocks_inwards_while_likelihood_improves(int number_of_blocks,int ** block_coordinates, int min_snps, int * snp_site_coords, int number_of_branch_snps,char * branch_snp_sequence, int * snp_locations, int branch_genome_size,char * child_sequence, int length_of_sequence, double * block_likelihoods, int cutoff_value, float trimming_ratio); int get_blocks(int ** block_coordinates, int branch_genome_size,int * snp_site_coords,int number_of_branch_snps, int window_size, int cutoff, char * original_sequence, int * snp_locations, int number_of_snps);