From 039197573943099d3048b40206c9b4b3de85365a Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Tue, 3 Sep 2024 18:17:41 -0400 Subject: [PATCH] Add docstrings and unit tests for generate setup file task --- .../input/generate_setup_file.py | 37 ++++- .../input/test_generate_setup_file.py | 147 ++++++++++++++++++ 2 files changed, 181 insertions(+), 3 deletions(-) create mode 100644 tests/arcade_collection/input/test_generate_setup_file.py diff --git a/src/arcade_collection/input/generate_setup_file.py b/src/arcade_collection/input/generate_setup_file.py index 09153a0..9f54810 100644 --- a/src/arcade_collection/input/generate_setup_file.py +++ b/src/arcade_collection/input/generate_setup_file.py @@ -4,14 +4,42 @@ import numpy as np import pandas as pd +DEFAULT_POPULATION_ID = "X" +"""Default population ID used in setup file.""" + def generate_setup_file( - samples: pd.DataFrame, margins: tuple[int, int, int], potts_terms: list[str] + samples: pd.DataFrame, margins: tuple[int, int, int], terms: list[str] ) -> str: + """ + Create ARCADE setup file from samples, margins, and CPM Hamiltonian terms. + + Initial number of cells is determined by number of unique ids in samples. + Regions are included if samples contains valid regions. + + Parameters + ---------- + samples + Sample cell ids and coordinates. + margins + Margin size in x, y, and z directions. + terms + List of Potts Hamiltonian terms for setup file. + + Returns + ------- + : + Contents of ARCADE setup file. + """ + init = len(samples["id"].unique()) bounds = calculate_sample_bounds(samples, margins) - regions = samples["regions"].unique() if "regions" in samples else None - setup = make_setup_file(init, bounds, potts_terms, regions) + regions = ( + samples["region"].unique() + if "region" in samples.columns and not samples["region"].isnull().all() + else None + ) + setup = make_setup_file(init, bounds, terms, regions) return setup @@ -33,6 +61,7 @@ def calculate_sample_bounds( : Bounds in x, y, and z directions. """ + mins = (min(samples.x), min(samples.y), min(samples.z)) maxs = (max(samples.x), max(samples.y), max(samples.z)) @@ -64,8 +93,10 @@ def make_setup_file( Returns ------- + : Contents of ARCADE setup file. """ + root = ET.fromstring("") series = ET.SubElement( root, diff --git a/tests/arcade_collection/input/test_generate_setup_file.py b/tests/arcade_collection/input/test_generate_setup_file.py new file mode 100644 index 0000000..951488b --- /dev/null +++ b/tests/arcade_collection/input/test_generate_setup_file.py @@ -0,0 +1,147 @@ +import unittest + +import pandas as pd + +from arcade_collection.input.generate_setup_file import ( + DEFAULT_POPULATION_ID, + calculate_sample_bounds, + generate_setup_file, + make_setup_file, +) + + +class TestGenerateSetupFile(unittest.TestCase): + def setUp(self): + self.terms = ["term_a", "term_b", "term_c"] + self.margins = [10, 20, 30] + + self.setup_template_no_region = ( + "\n" + ' \n' + " \n" + ' \n' + ' \n' + ' \n' + " \n" + " \n" + " \n" + f' \n' + " \n" + " \n" + " \n" + "" + ) + + self.setup_template_with_region = ( + "\n" + ' \n' + " \n" + ' \n' + ' \n' + ' \n' + " \n" + " \n" + " \n" + f' \n' + f' \n' + f' \n' + " \n" + " \n" + " \n" + " \n" + "" + ) + + def test_generate_setup_file_no_regions(self): + samples = pd.DataFrame( + { + "id": [1, 2], + "x": [0, 2], + "y": [3, 7], + "z": [6, 7], + } + ) + + expected_setup = self.setup_template_no_region % (25, 47, 64, 2) + + setup = generate_setup_file(samples, self.margins, self.terms) + + self.assertEqual(expected_setup, setup) + + def test_generate_setup_file_invalid_regions(self): + samples = pd.DataFrame( + { + "id": [1, 2], + "x": [0, 2], + "y": [3, 7], + "z": [6, 7], + "region": [None, None], + } + ) + + expected_setup = self.setup_template_no_region % (25, 47, 64, 2) + + setup = generate_setup_file(samples, self.margins, self.terms) + + self.assertEqual(expected_setup, setup) + + def test_generate_setup_file_with_regions(self): + samples = pd.DataFrame( + { + "id": [1, 2], + "x": [0, 2], + "y": [3, 7], + "z": [6, 7], + "region": ["A", "B"], + } + ) + + expected_setup = self.setup_template_with_region % (25, 47, 64, 2, "A", "B") + + setup = generate_setup_file(samples, self.margins, self.terms) + + self.assertEqual(expected_setup, setup) + + def test_calculate_sample_bounds(self): + samples = pd.DataFrame( + { + "x": [0, 2], + "y": [3, 7], + "z": [6, 7], + } + ) + margins = [10, 20, 30] + + expected_bounds = (25, 47, 64) + + bounds = calculate_sample_bounds(samples, margins) + + self.assertTupleEqual(expected_bounds, bounds) + + def test_make_setup_file_no_regions(self): + init = 100 + bounds = (10, 20, 30) + regions = None + + expected_setup = self.setup_template_no_region % (*bounds, init) + + setup = make_setup_file(init, bounds, self.terms, regions) + + self.assertEqual(expected_setup, setup) + + def test_make_setup_file_with_regions(self): + init = 100 + bounds = (10, 20, 30) + regions = ["REGION_A", "REGION_B"] + + expected_setup = self.setup_template_with_region % (*bounds, init, regions[0], regions[1]) + + setup = make_setup_file(init, bounds, self.terms, regions) + + self.assertEqual(expected_setup, setup) + + +if __name__ == "__main__": + unittest.main()