From 039197573943099d3048b40206c9b4b3de85365a Mon Sep 17 00:00:00 2001
From: jessicasyu <15913767+jessicasyu@users.noreply.github.com>
Date: Tue, 3 Sep 2024 18:17:41 -0400
Subject: [PATCH] Add docstrings and unit tests for generate setup file task
---
.../input/generate_setup_file.py | 37 ++++-
.../input/test_generate_setup_file.py | 147 ++++++++++++++++++
2 files changed, 181 insertions(+), 3 deletions(-)
create mode 100644 tests/arcade_collection/input/test_generate_setup_file.py
diff --git a/src/arcade_collection/input/generate_setup_file.py b/src/arcade_collection/input/generate_setup_file.py
index 09153a0..9f54810 100644
--- a/src/arcade_collection/input/generate_setup_file.py
+++ b/src/arcade_collection/input/generate_setup_file.py
@@ -4,14 +4,42 @@
import numpy as np
import pandas as pd
+DEFAULT_POPULATION_ID = "X"
+"""Default population ID used in setup file."""
+
def generate_setup_file(
- samples: pd.DataFrame, margins: tuple[int, int, int], potts_terms: list[str]
+ samples: pd.DataFrame, margins: tuple[int, int, int], terms: list[str]
) -> str:
+ """
+ Create ARCADE setup file from samples, margins, and CPM Hamiltonian terms.
+
+ Initial number of cells is determined by number of unique ids in samples.
+ Regions are included if samples contains valid regions.
+
+ Parameters
+ ----------
+ samples
+ Sample cell ids and coordinates.
+ margins
+ Margin size in x, y, and z directions.
+ terms
+ List of Potts Hamiltonian terms for setup file.
+
+ Returns
+ -------
+ :
+ Contents of ARCADE setup file.
+ """
+
init = len(samples["id"].unique())
bounds = calculate_sample_bounds(samples, margins)
- regions = samples["regions"].unique() if "regions" in samples else None
- setup = make_setup_file(init, bounds, potts_terms, regions)
+ regions = (
+ samples["region"].unique()
+ if "region" in samples.columns and not samples["region"].isnull().all()
+ else None
+ )
+ setup = make_setup_file(init, bounds, terms, regions)
return setup
@@ -33,6 +61,7 @@ def calculate_sample_bounds(
:
Bounds in x, y, and z directions.
"""
+
mins = (min(samples.x), min(samples.y), min(samples.z))
maxs = (max(samples.x), max(samples.y), max(samples.z))
@@ -64,8 +93,10 @@ def make_setup_file(
Returns
-------
+ :
Contents of ARCADE setup file.
"""
+
root = ET.fromstring("")
series = ET.SubElement(
root,
diff --git a/tests/arcade_collection/input/test_generate_setup_file.py b/tests/arcade_collection/input/test_generate_setup_file.py
new file mode 100644
index 0000000..951488b
--- /dev/null
+++ b/tests/arcade_collection/input/test_generate_setup_file.py
@@ -0,0 +1,147 @@
+import unittest
+
+import pandas as pd
+
+from arcade_collection.input.generate_setup_file import (
+ DEFAULT_POPULATION_ID,
+ calculate_sample_bounds,
+ generate_setup_file,
+ make_setup_file,
+)
+
+
+class TestGenerateSetupFile(unittest.TestCase):
+ def setUp(self):
+ self.terms = ["term_a", "term_b", "term_c"]
+ self.margins = [10, 20, 30]
+
+ self.setup_template_no_region = (
+ "\n"
+ ' \n'
+ " \n"
+ ' \n'
+ ' \n'
+ ' \n'
+ " \n"
+ " \n"
+ " \n"
+ f' \n'
+ " \n"
+ " \n"
+ " \n"
+ ""
+ )
+
+ self.setup_template_with_region = (
+ "\n"
+ ' \n'
+ " \n"
+ ' \n'
+ ' \n'
+ ' \n'
+ " \n"
+ " \n"
+ " \n"
+ f' \n'
+ f' \n'
+ f' \n'
+ " \n"
+ " \n"
+ " \n"
+ " \n"
+ ""
+ )
+
+ def test_generate_setup_file_no_regions(self):
+ samples = pd.DataFrame(
+ {
+ "id": [1, 2],
+ "x": [0, 2],
+ "y": [3, 7],
+ "z": [6, 7],
+ }
+ )
+
+ expected_setup = self.setup_template_no_region % (25, 47, 64, 2)
+
+ setup = generate_setup_file(samples, self.margins, self.terms)
+
+ self.assertEqual(expected_setup, setup)
+
+ def test_generate_setup_file_invalid_regions(self):
+ samples = pd.DataFrame(
+ {
+ "id": [1, 2],
+ "x": [0, 2],
+ "y": [3, 7],
+ "z": [6, 7],
+ "region": [None, None],
+ }
+ )
+
+ expected_setup = self.setup_template_no_region % (25, 47, 64, 2)
+
+ setup = generate_setup_file(samples, self.margins, self.terms)
+
+ self.assertEqual(expected_setup, setup)
+
+ def test_generate_setup_file_with_regions(self):
+ samples = pd.DataFrame(
+ {
+ "id": [1, 2],
+ "x": [0, 2],
+ "y": [3, 7],
+ "z": [6, 7],
+ "region": ["A", "B"],
+ }
+ )
+
+ expected_setup = self.setup_template_with_region % (25, 47, 64, 2, "A", "B")
+
+ setup = generate_setup_file(samples, self.margins, self.terms)
+
+ self.assertEqual(expected_setup, setup)
+
+ def test_calculate_sample_bounds(self):
+ samples = pd.DataFrame(
+ {
+ "x": [0, 2],
+ "y": [3, 7],
+ "z": [6, 7],
+ }
+ )
+ margins = [10, 20, 30]
+
+ expected_bounds = (25, 47, 64)
+
+ bounds = calculate_sample_bounds(samples, margins)
+
+ self.assertTupleEqual(expected_bounds, bounds)
+
+ def test_make_setup_file_no_regions(self):
+ init = 100
+ bounds = (10, 20, 30)
+ regions = None
+
+ expected_setup = self.setup_template_no_region % (*bounds, init)
+
+ setup = make_setup_file(init, bounds, self.terms, regions)
+
+ self.assertEqual(expected_setup, setup)
+
+ def test_make_setup_file_with_regions(self):
+ init = 100
+ bounds = (10, 20, 30)
+ regions = ["REGION_A", "REGION_B"]
+
+ expected_setup = self.setup_template_with_region % (*bounds, init, regions[0], regions[1])
+
+ setup = make_setup_file(init, bounds, self.terms, regions)
+
+ self.assertEqual(expected_setup, setup)
+
+
+if __name__ == "__main__":
+ unittest.main()