Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a test for diploid blockgroups #84

Merged
merged 2 commits into from
Nov 5, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
225 changes: 213 additions & 12 deletions src/models/block_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ use crate::models::path::{Path, PathBlock, PathData};
use crate::models::path_edge::PathEdge;
use crate::models::strand::Strand;
use crate::models::traits::*;
use crate::test_helpers::save_graph;

#[derive(Debug, Deserialize, Serialize)]
pub struct BlockGroup {
Expand Down Expand Up @@ -827,9 +826,7 @@ impl BlockGroup {
mod tests {
use super::*;
use crate::models::{collection::Collection, node::Node, sample::Sample, sequence::Sequence};
use crate::test_helpers::{
get_connection, interval_tree_verify, save_graph, setup_block_group,
};
use crate::test_helpers::{get_connection, interval_tree_verify, setup_block_group};

#[test]
fn test_blockgroup_create() {
Expand Down Expand Up @@ -1946,10 +1943,6 @@ mod tests {
fn test_changes_against_derivative_blockgroups() {
let conn = &get_connection(None);
let (block_group_id, path) = setup_block_group(conn);
save_graph(
&BlockGroup::get_graph(conn, block_group_id),
&format!("parent_{block_group_id}.dot"),
);
let new_sample = Sample::create(conn, "child");
let new_bg_id =
BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None)
Expand Down Expand Up @@ -1992,10 +1985,6 @@ mod tests {
all_sequences,
HashSet::from_iter(vec!["AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),])
);
save_graph(
&BlockGroup::get_graph(conn, new_bg_id),
&format!("child_{new_bg_id}.dot"),
);

// Now, we make a change against another descendant
let new_sample = Sample::create(conn, "grandchild");
Expand Down Expand Up @@ -2042,4 +2031,216 @@ mod tests {
HashSet::from_iter(vec!["AAAAAAANNNNTCCCCCCCCCCGGGGGGGGGG".to_string(),])
)
}

#[test]
fn test_changes_against_derivative_diploid_blockgroups() {
// This test ensures that if we have heterozygous changes that do not introduce frameshifts,
// we can modify regions downstream of them.
let conn = &get_connection(None);
let (block_group_id, path) = setup_block_group(conn);
let new_sample = Sample::create(conn, "child");
let new_bg_id =
BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None)
.unwrap();
let new_path = Path::query(
conn,
"select * from paths where block_group_id = ?1",
vec![SQLValue::from(new_bg_id)],
);
let insert_sequence = Sequence::new()
.sequence_type("DNA")
.sequence("NNNN")
.save(conn);
let insert_node_id = Node::create(conn, insert_sequence.hash.as_str(), None);
let insert = PathBlock {
id: 0,
node_id: insert_node_id,
block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
sequence_start: 0,
sequence_end: 4,
path_start: 7,
path_end: 11,
strand: Strand::Forward,
};
let change = PathChange {
block_group_id: new_bg_id,
path: new_path[0].clone(),
path_accession: None,
start: 7,
end: 11,
block: insert,
chromosome_index: 1,
phased: 0,
};
// note we are making our change against the new blockgroup, and not the parent blockgroup
let tree = BlockGroup::intervaltree_for(conn, new_bg_id, true);
BlockGroup::insert_change(conn, &change, &tree);
let all_sequences = BlockGroup::get_all_sequences(conn, new_bg_id, true);
assert_eq!(
all_sequences,
HashSet::from_iter(vec![
"AAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
])
);

// Now, we make a change against another descendant
let new_sample = Sample::create(conn, "grandchild");
let gc_bg_id = BlockGroup::get_or_create_sample_block_group(
conn,
"test",
"grandchild",
"chr1",
Some("child"),
)
.unwrap();
let new_path = Path::query(
conn,
"select * from paths where block_group_id = ?1",
vec![SQLValue::from(gc_bg_id)],
);

let insert_sequence = Sequence::new()
.sequence_type("DNA")
.sequence("NNNN")
.save(conn);
let insert_node_id =
Node::create(conn, insert_sequence.hash.as_str(), "new-hash".to_string());

let insert = PathBlock {
id: 0,
node_id: insert_node_id,
block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
sequence_start: 0,
sequence_end: 4,
path_start: 20,
path_end: 24,
strand: Strand::Forward,
};
let change = PathChange {
block_group_id: gc_bg_id,
path: new_path[0].clone(),
path_accession: None,
start: 20,
end: 24,
block: insert,
chromosome_index: 1,
phased: 0,
};
// take out an entire block.
let tree = BlockGroup::intervaltree_for(conn, gc_bg_id, true);
BlockGroup::insert_change(conn, &change, &tree);
let all_sequences = BlockGroup::get_all_sequences(conn, gc_bg_id, true);
assert_eq!(
all_sequences,
HashSet::from_iter(vec![
"AAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAAAAATTTTTTTTTTNNNNCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAANNNNTTTTTTTTTNNNNCCCCCCGGGGGGGGGG".to_string()
])
)
}

#[test]
#[should_panic]
fn test_prohibits_out_of_frame_changes_against_derivative_diploid_blockgroups() {
// This test ensures that we do not allow ambiguous changes by coordinates
let conn = &get_connection(None);
let (block_group_id, path) = setup_block_group(conn);
let new_sample = Sample::create(conn, "child");
let new_bg_id =
BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None)
.unwrap();
let new_path = Path::query(
conn,
"select * from paths where block_group_id = ?1",
vec![SQLValue::from(new_bg_id)],
);
// This is a heterozygous replacement of 5 bases with 4 bases, so positions
// downstream of this are not addressable.
let insert_sequence = Sequence::new()
.sequence_type("DNA")
.sequence("NNNN")
.save(conn);
let insert_node_id = Node::create(conn, insert_sequence.hash.as_str(), None);
let insert = PathBlock {
id: 0,
node_id: insert_node_id,
block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
sequence_start: 0,
sequence_end: 4,
path_start: 7,
path_end: 12,
strand: Strand::Forward,
};
let change = PathChange {
block_group_id: new_bg_id,
path: new_path[0].clone(),
path_accession: None,
start: 7,
end: 12,
block: insert,
chromosome_index: 1,
phased: 0,
};
// note we are making our change against the new blockgroup, and not the parent blockgroup
let tree = BlockGroup::intervaltree_for(conn, new_bg_id, true);
BlockGroup::insert_change(conn, &change, &tree);
let all_sequences = BlockGroup::get_all_sequences(conn, new_bg_id, true);
assert_eq!(
all_sequences,
HashSet::from_iter(vec![
"AAAAAAANNNNTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
])
);

// Now, we make a change against another descendant and get an error
let new_sample = Sample::create(conn, "grandchild");
let gc_bg_id = BlockGroup::get_or_create_sample_block_group(
conn,
"test",
"grandchild",
"chr1",
Some("child"),
)
.unwrap();
let new_path = Path::query(
conn,
"select * from paths where block_group_id = ?1",
vec![SQLValue::from(gc_bg_id)],
);

let insert_sequence = Sequence::new()
.sequence_type("DNA")
.sequence("NNNN")
.save(conn);
let insert_node_id =
Node::create(conn, insert_sequence.hash.as_str(), "new-hash".to_string());

let insert = PathBlock {
id: 0,
node_id: insert_node_id,
block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
sequence_start: 0,
sequence_end: 4,
path_start: 20,
path_end: 24,
strand: Strand::Forward,
};
let change = PathChange {
block_group_id: gc_bg_id,
path: new_path[0].clone(),
path_accession: None,
start: 20,
end: 24,
block: insert,
chromosome_index: 1,
phased: 0,
};
// take out an entire block.
let tree = BlockGroup::intervaltree_for(conn, gc_bg_id, true);
BlockGroup::insert_change(conn, &change, &tree);
}
}