Skip to content

Commit

Permalink
Calculate recombination length including gaps
Browse files Browse the repository at this point in the history
  • Loading branch information
nickjcroucher committed Jun 17, 2024
1 parent d2ed906 commit b2135ab
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 36 deletions.
42 changes: 25 additions & 17 deletions src/branch_sequences.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,10 @@ void fill_in_recombinations_with_gaps(newick_node ** nodeArray, int node_index,
// Set root-specific values
if (parent_node_index == -1)
{
set_number_of_bases_in_recombinations(sequence_index,0);
set_number_of_branch_bases_in_recombinations(sequence_index,0);
for (int include_gaps = 0; include_gaps <= 1; ++include_gaps) {
set_number_of_bases_in_recombinations(sequence_index,0,include_gaps);
set_number_of_branch_bases_in_recombinations(sequence_index,0,include_gaps);
}
set_internal_node(1,sequence_index);
set_genome_length_excluding_blocks_and_gaps_for_sample(sequence_index,
length_of_original_genome);
Expand Down Expand Up @@ -172,25 +174,31 @@ void fill_in_recombinations_with_gaps(newick_node ** nodeArray, int node_index,

// Set number of branch bases in recombination by iterating through
// the first part of merged blocks (i.e. only blocks on the branch to this node)
set_number_of_branch_bases_in_recombinations(sequence_index,
calculate_number_of_bases_in_recombinations(merged_block_coordinates,
node->number_of_blocks,
node_sequence,
snp_locations,
number_of_snps,
0)
);
for (int include_gaps = 0; include_gaps <= 1; ++include_gaps) {
set_number_of_branch_bases_in_recombinations(sequence_index,
calculate_number_of_bases_in_recombinations(merged_block_coordinates,
node->number_of_blocks,
node_sequence,
snp_locations,
number_of_snps,
include_gaps),
include_gaps
);
}

// Set number of total bases in recombination by iterating through
// all merged blocks leading to this node
set_number_of_bases_in_recombinations(sequence_index,
calculate_number_of_bases_in_recombinations(merged_block_coordinates,
(num_blocks[parent_node_index] + node->number_of_blocks),
node_sequence,
snp_locations,
number_of_snps,
0)
for (int include_gaps = 0; include_gaps <= 1; ++include_gaps) {
set_number_of_bases_in_recombinations(sequence_index,
calculate_number_of_bases_in_recombinations(merged_block_coordinates,
(num_blocks[parent_node_index] + node->number_of_blocks),
node_sequence,
snp_locations,
number_of_snps,
include_gaps),
include_gaps
);
}
free(node_sequence);

for(i = 0; i < num_recombinations[node_index]; i++)
Expand Down
25 changes: 18 additions & 7 deletions src/parse_phylip.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,23 +295,34 @@ void set_genome_length_excluding_blocks_and_gaps_for_sample(int sample_index, in
((sample_statistics *) statistics_for_samples[sample_index])->genome_length_excluding_blocks_and_gaps = genome_length_excluding_blocks_and_gaps;
}

void set_number_of_branch_bases_in_recombinations(int sample_index, int bases_in_recombinations)
void set_number_of_branch_bases_in_recombinations(int sample_index, int bases_in_recombinations, int include_gaps)
{

if( sample_index == -1)
{
return;
}
((sample_statistics *) statistics_for_samples[sample_index])->branch_bases_in_recombinations = bases_in_recombinations;

if (include_gaps > 0) {
((sample_statistics *) statistics_for_samples[sample_index])->branch_bases_in_recombinations_including_gaps = bases_in_recombinations;
} else {
((sample_statistics *) statistics_for_samples[sample_index])->branch_bases_in_recombinations = bases_in_recombinations;
}
}

void set_number_of_bases_in_recombinations(int sample_index, int bases_in_recombinations)
void set_number_of_bases_in_recombinations(int sample_index, int bases_in_recombinations, int include_gaps)
{
if( sample_index == -1)
{
return;
}
if( sample_index == -1)
{
return;
}

if (include_gaps > 0) {
((sample_statistics *) statistics_for_samples[sample_index])->bases_in_recombinations_including_gaps = bases_in_recombinations;
} else {
((sample_statistics *) statistics_for_samples[sample_index])->bases_in_recombinations = bases_in_recombinations;
}

}


Expand Down
6 changes: 4 additions & 2 deletions src/parse_phylip.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
int number_of_snps;
int genome_length_without_gaps;
int number_of_blocks;
int bases_in_recombinations_including_gaps;
int branch_bases_in_recombinations_including_gaps;
int bases_in_recombinations;
int branch_bases_in_recombinations;
int genome_length_excluding_blocks_and_gaps;
Expand Down Expand Up @@ -54,8 +56,8 @@ int get_internal_node(int sequence_index);
void fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap(int parent_sequence_index, int * child_sequence_indices, int num_children);
void fill_in_unambiguous_gaps_in_parent_from_children(int parent_sequence_index, int * child_sequence_indices, int num_children);
void freeup_memory(void);
void set_number_of_branch_bases_in_recombinations(int sample_index, int bases_in_recombinations);
void set_number_of_bases_in_recombinations(int sample_index, int bases_in_recombinations);
void set_number_of_branch_bases_in_recombinations(int sample_index, int bases_in_recombinations, int include_gaps);
void set_number_of_bases_in_recombinations(int sample_index, int bases_in_recombinations, int include_gaps);
void filter_sequence_bases_and_rotate(char * reference_bases, char ** filtered_bases_for_snps, int number_of_filtered_snps);
void set_genome_length_excluding_blocks_and_gaps_for_sample(int sample_index, int genome_length_excluding_blocks_and_gaps);

Expand Down
22 changes: 12 additions & 10 deletions src/tree_statistics.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,27 @@ void create_tree_statistics_file(char filename[], sample_statistics ** statistic
char extension[7] = {".stats"};
concat_strings_created_with_malloc(base_filename,extension);
file_pointer = fopen(base_filename, "w");
fprintf( file_pointer, "Node\tTotal SNPs\tNumber of SNPs Inside Recombinations\tNumber of SNPs Outside Recombinations\tNumber of Recombination Blocks\tBases in Recombinations\tCumulative Bases in Recombinations\tr/m\trho/theta\tGenome Length\tBases in Clonal Frame\n");
fprintf( file_pointer, "Node\tTotal SNPs\tNumber of SNPs Inside Recombinations\tNumber of SNPs Outside Recombinations\tNumber of Recombination Blocks\tBases in Recombinations Including Gaps\tCumulative Bases in Recombinations Including Gaps\tBases in Recombinations Excluding Gaps\tCumulative Bases in Recombinations Excluding Gaps\tr/m\trho/theta\tGenome Length\tBases in Clonal Frame\n");

for(sample_counter=0; sample_counter< number_of_samples; sample_counter++)
{
sample_statistics * sample_details = ((sample_statistics *) statistics_for_samples[sample_counter]);
fprintf( file_pointer, "%s\t", sample_details->sample_name);
fprintf( file_pointer, "%i\t", (sample_details->number_of_recombinations + sample_details->number_of_snps));
fprintf( file_pointer, "%i\t", sample_details->number_of_recombinations);
fprintf( file_pointer, "%i\t", (sample_details->number_of_snps));
fprintf( file_pointer, "%i\t", sample_details->number_of_blocks);
fprintf( file_pointer, "%i\t", sample_details->branch_bases_in_recombinations);
fprintf( file_pointer, "%i\t", sample_details->bases_in_recombinations);
fprintf( file_pointer, "%f\t", recombination_to_mutation_ratio(sample_details->number_of_recombinations, (sample_details->number_of_snps)));
fprintf( file_pointer, "%i\t", (sample_details->number_of_recombinations + sample_details->number_of_snps));
fprintf( file_pointer, "%i\t", sample_details->number_of_recombinations);
fprintf( file_pointer, "%i\t", (sample_details->number_of_snps));
fprintf( file_pointer, "%i\t", sample_details->number_of_blocks);
fprintf( file_pointer, "%i\t", sample_details->branch_bases_in_recombinations_including_gaps);
fprintf( file_pointer, "%i\t", sample_details->bases_in_recombinations_including_gaps);
fprintf( file_pointer, "%i\t", sample_details->branch_bases_in_recombinations);
fprintf( file_pointer, "%i\t", sample_details->bases_in_recombinations);
fprintf( file_pointer, "%f\t", recombination_to_mutation_ratio(sample_details->number_of_recombinations, (sample_details->number_of_snps)));
fprintf( file_pointer, "%f\t", rho_theta(sample_details->number_of_blocks,sample_details->number_of_snps));
fprintf( file_pointer, "%i\t", sample_details->genome_length_without_gaps);
fprintf( file_pointer, "%i\t", sample_details->genome_length_without_gaps);
fprintf( file_pointer, "%i", sample_details->genome_length_excluding_blocks_and_gaps);
fprintf( file_pointer, "\n");

free(sample_details->sample_name);
free(sample_details->sample_name);
free(sample_details);
}

Expand Down

0 comments on commit b2135ab

Please sign in to comment.