From b3e74027ff7d5d666de653a3a73eae023f8d2c80 Mon Sep 17 00:00:00 2001 From: chris Mitchell Date: Tue, 5 Nov 2024 09:15:54 -0500 Subject: [PATCH 1/2] Add a test for diploid blockgroups --- src/models/block_group.rs | 123 ++++++++++++++++++++++++++++++++++---- 1 file changed, 111 insertions(+), 12 deletions(-) diff --git a/src/models/block_group.rs b/src/models/block_group.rs index 004229b2..d5961768 100644 --- a/src/models/block_group.rs +++ b/src/models/block_group.rs @@ -20,7 +20,6 @@ use crate::models::path::{Path, PathBlock, PathData}; use crate::models::path_edge::PathEdge; use crate::models::strand::Strand; use crate::models::traits::*; -use crate::test_helpers::save_graph; #[derive(Debug, Deserialize, Serialize)] pub struct BlockGroup { @@ -827,9 +826,7 @@ impl BlockGroup { mod tests { use super::*; use crate::models::{collection::Collection, node::Node, sample::Sample, sequence::Sequence}; - use crate::test_helpers::{ - get_connection, interval_tree_verify, save_graph, setup_block_group, - }; + use crate::test_helpers::{get_connection, interval_tree_verify, setup_block_group}; #[test] fn test_blockgroup_create() { @@ -1946,10 +1943,6 @@ mod tests { fn test_changes_against_derivative_blockgroups() { let conn = &get_connection(None); let (block_group_id, path) = setup_block_group(conn); - save_graph( - &BlockGroup::get_graph(conn, block_group_id), - &format!("parent_{block_group_id}.dot"), - ); let new_sample = Sample::create(conn, "child"); let new_bg_id = BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None) @@ -1992,10 +1985,6 @@ mod tests { all_sequences, HashSet::from_iter(vec!["AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),]) ); - save_graph( - &BlockGroup::get_graph(conn, new_bg_id), - &format!("child_{new_bg_id}.dot"), - ); // Now, we make a change against another descendant let new_sample = Sample::create(conn, "grandchild"); @@ -2042,4 +2031,114 @@ mod tests { HashSet::from_iter(vec!["AAAAAAANNNNTCCCCCCCCCCGGGGGGGGGG".to_string(),]) ) } + + #[test] + fn test_changes_against_derivative_diploid_blockgroups() { + // This test ensures that if we have heterozygous changes that do not introduce frameshifts, + // we can modify regions downstream of them. + let conn = &get_connection(None); + let (block_group_id, path) = setup_block_group(conn); + let new_sample = Sample::create(conn, "child"); + let new_bg_id = + BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None) + .unwrap(); + let new_path = Path::query( + conn, + "select * from paths where block_group_id = ?1", + vec![SQLValue::from(new_bg_id)], + ); + let insert_sequence = Sequence::new() + .sequence_type("DNA") + .sequence("NNNN") + .save(conn); + let insert_node_id = Node::create(conn, insert_sequence.hash.as_str(), None); + let insert = PathBlock { + id: 0, + node_id: insert_node_id, + block_sequence: insert_sequence.get_sequence(0, 4).to_string(), + sequence_start: 0, + sequence_end: 4, + path_start: 7, + path_end: 11, + strand: Strand::Forward, + }; + let change = PathChange { + block_group_id: new_bg_id, + path: new_path[0].clone(), + path_accession: None, + start: 7, + end: 11, + block: insert, + chromosome_index: 1, + phased: 0, + }; + // note we are making our change against the new blockgroup, and not the parent blockgroup + let tree = BlockGroup::intervaltree_for(conn, new_bg_id, true); + BlockGroup::insert_change(conn, &change, &tree); + let all_sequences = BlockGroup::get_all_sequences(conn, new_bg_id, true); + assert_eq!( + all_sequences, + HashSet::from_iter(vec![ + "AAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(), + "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(), + ]) + ); + + // Now, we make a change against another descendant + let new_sample = Sample::create(conn, "grandchild"); + let gc_bg_id = BlockGroup::get_or_create_sample_block_group( + conn, + "test", + "grandchild", + "chr1", + Some("child"), + ) + .unwrap(); + let new_path = Path::query( + conn, + "select * from paths where block_group_id = ?1", + vec![SQLValue::from(gc_bg_id)], + ); + + let insert_sequence = Sequence::new() + .sequence_type("DNA") + .sequence("NNNN") + .save(conn); + let insert_node_id = + Node::create(conn, insert_sequence.hash.as_str(), "new-hash".to_string()); + + let insert = PathBlock { + id: 0, + node_id: insert_node_id, + block_sequence: insert_sequence.get_sequence(0, 4).to_string(), + sequence_start: 0, + sequence_end: 4, + path_start: 20, + path_end: 24, + strand: Strand::Forward, + }; + let change = PathChange { + block_group_id: gc_bg_id, + path: new_path[0].clone(), + path_accession: None, + start: 20, + end: 24, + block: insert, + chromosome_index: 1, + phased: 0, + }; + // take out an entire block. + let tree = BlockGroup::intervaltree_for(conn, gc_bg_id, true); + BlockGroup::insert_change(conn, &change, &tree); + let all_sequences = BlockGroup::get_all_sequences(conn, gc_bg_id, true); + assert_eq!( + all_sequences, + HashSet::from_iter(vec![ + "AAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(), + "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(), + "AAAAAAAAAATTTTTTTTTTNNNNCCCCCCGGGGGGGGGG".to_string(), + "AAAAAAANNNNTTTTTTTTTNNNNCCCCCCGGGGGGGGGG".to_string() + ]) + ) + } } From 8f9803f85b1192ce96a3ca894af38e0f163fd878 Mon Sep 17 00:00:00 2001 From: chris Mitchell Date: Tue, 5 Nov 2024 09:19:22 -0500 Subject: [PATCH 2/2] Test error case --- src/models/block_group.rs | 102 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/src/models/block_group.rs b/src/models/block_group.rs index d5961768..7d16657b 100644 --- a/src/models/block_group.rs +++ b/src/models/block_group.rs @@ -2141,4 +2141,106 @@ mod tests { ]) ) } + + #[test] + #[should_panic] + fn test_prohibits_out_of_frame_changes_against_derivative_diploid_blockgroups() { + // This test ensures that we do not allow ambiguous changes by coordinates + let conn = &get_connection(None); + let (block_group_id, path) = setup_block_group(conn); + let new_sample = Sample::create(conn, "child"); + let new_bg_id = + BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None) + .unwrap(); + let new_path = Path::query( + conn, + "select * from paths where block_group_id = ?1", + vec![SQLValue::from(new_bg_id)], + ); + // This is a heterozygous replacement of 5 bases with 4 bases, so positions + // downstream of this are not addressable. + let insert_sequence = Sequence::new() + .sequence_type("DNA") + .sequence("NNNN") + .save(conn); + let insert_node_id = Node::create(conn, insert_sequence.hash.as_str(), None); + let insert = PathBlock { + id: 0, + node_id: insert_node_id, + block_sequence: insert_sequence.get_sequence(0, 4).to_string(), + sequence_start: 0, + sequence_end: 4, + path_start: 7, + path_end: 12, + strand: Strand::Forward, + }; + let change = PathChange { + block_group_id: new_bg_id, + path: new_path[0].clone(), + path_accession: None, + start: 7, + end: 12, + block: insert, + chromosome_index: 1, + phased: 0, + }; + // note we are making our change against the new blockgroup, and not the parent blockgroup + let tree = BlockGroup::intervaltree_for(conn, new_bg_id, true); + BlockGroup::insert_change(conn, &change, &tree); + let all_sequences = BlockGroup::get_all_sequences(conn, new_bg_id, true); + assert_eq!( + all_sequences, + HashSet::from_iter(vec![ + "AAAAAAANNNNTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(), + "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(), + ]) + ); + + // Now, we make a change against another descendant and get an error + let new_sample = Sample::create(conn, "grandchild"); + let gc_bg_id = BlockGroup::get_or_create_sample_block_group( + conn, + "test", + "grandchild", + "chr1", + Some("child"), + ) + .unwrap(); + let new_path = Path::query( + conn, + "select * from paths where block_group_id = ?1", + vec![SQLValue::from(gc_bg_id)], + ); + + let insert_sequence = Sequence::new() + .sequence_type("DNA") + .sequence("NNNN") + .save(conn); + let insert_node_id = + Node::create(conn, insert_sequence.hash.as_str(), "new-hash".to_string()); + + let insert = PathBlock { + id: 0, + node_id: insert_node_id, + block_sequence: insert_sequence.get_sequence(0, 4).to_string(), + sequence_start: 0, + sequence_end: 4, + path_start: 20, + path_end: 24, + strand: Strand::Forward, + }; + let change = PathChange { + block_group_id: gc_bg_id, + path: new_path[0].clone(), + path_accession: None, + start: 20, + end: 24, + block: insert, + chromosome_index: 1, + phased: 0, + }; + // take out an entire block. + let tree = BlockGroup::intervaltree_for(conn, gc_bg_id, true); + BlockGroup::insert_change(conn, &change, &tree); + } }