From 039197573943099d3048b40206c9b4b3de85365a Mon Sep 17 00:00:00 2001
From: jessicasyu <15913767+jessicasyu@users.noreply.github.com>
Date: Tue, 3 Sep 2024 18:17:41 -0400
Subject: [PATCH] Add docstrings and unit tests for generate setup file task

---
 .../input/generate_setup_file.py              |  37 ++++-
 .../input/test_generate_setup_file.py         | 147 ++++++++++++++++++
 2 files changed, 181 insertions(+), 3 deletions(-)
 create mode 100644 tests/arcade_collection/input/test_generate_setup_file.py
diff --git a/src/arcade_collection/input/generate_setup_file.py b/src/arcade_collection/input/generate_setup_file.py
index 09153a0..9f54810 100644
--- a/src/arcade_collection/input/generate_setup_file.py
+++ b/src/arcade_collection/input/generate_setup_file.py
@@ -4,14 +4,42 @@
 import numpy as np
 import pandas as pd
 
+DEFAULT_POPULATION_ID = "X"
+"""Default population ID used in setup file."""
+
 
 def generate_setup_file(
-    samples: pd.DataFrame, margins: tuple[int, int, int], potts_terms: list[str]
+    samples: pd.DataFrame, margins: tuple[int, int, int], terms: list[str]
 ) -> str:
+    """
+    Create ARCADE setup file from samples, margins, and CPM Hamiltonian terms.
+
+    Initial number of cells is determined by number of unique ids in samples.
+    Regions are included if samples contains valid regions.
+
+    Parameters
+    ----------
+    samples
+        Sample cell ids and coordinates.
+    margins
+        Margin size in x, y, and z directions.
+    terms
+        List of Potts Hamiltonian terms for setup file.
+
+    Returns
+    -------
+    :
+        Contents of ARCADE setup file.
+    """
+
     init = len(samples["id"].unique())
     bounds = calculate_sample_bounds(samples, margins)
-    regions = samples["regions"].unique() if "regions" in samples else None
-    setup = make_setup_file(init, bounds, potts_terms, regions)
+    regions = (
+        samples["region"].unique()
+        if "region" in samples.columns and not samples["region"].isnull().all()
+        else None
+    )
+    setup = make_setup_file(init, bounds, terms, regions)
     return setup
 
 
@@ -33,6 +61,7 @@ def calculate_sample_bounds(
     :
         Bounds in x, y, and z directions.
     """
+
     mins = (min(samples.x), min(samples.y), min(samples.z))
     maxs = (max(samples.x), max(samples.y), max(samples.z))
 
@@ -64,8 +93,10 @@ def make_setup_file(
 
     Returns
     -------
+    :
         Contents of ARCADE setup file.
     """
+
     root = ET.fromstring("<set></set>")
     series = ET.SubElement(
         root,
diff --git a/tests/arcade_collection/input/test_generate_setup_file.py b/tests/arcade_collection/input/test_generate_setup_file.py
new file mode 100644
index 0000000..951488b
--- /dev/null
+++ b/tests/arcade_collection/input/test_generate_setup_file.py
@@ -0,0 +1,147 @@
+import unittest
+
+import pandas as pd
+
+from arcade_collection.input.generate_setup_file import (
+    DEFAULT_POPULATION_ID,
+    calculate_sample_bounds,
+    generate_setup_file,
+    make_setup_file,
+)
+
+
+class TestGenerateSetupFile(unittest.TestCase):
+    def setUp(self):
+        self.terms = ["term_a", "term_b", "term_c"]
+        self.margins = [10, 20, 30]
+
+        self.setup_template_no_region = (
+            "<set>\n"
+            '    <series name="ARCADE" interval="1" start="0" end="0" dt="1" ds="1" ticks="1"'
+            ' length="%d" width="%d" height="%d">\n'
+            "        <potts>\n"
+            '            <potts.term id="term_a" />\n'
+            '            <potts.term id="term_b" />\n'
+            '            <potts.term id="term_c" />\n'
+            "        </potts>\n"
+            "        <agents>\n"
+            "            <populations>\n"
+            f'                <population id="{DEFAULT_POPULATION_ID}" init="%d" />\n'
+            "            </populations>\n"
+            "        </agents>\n"
+            "    </series>\n"
+            "</set>"
+        )
+
+        self.setup_template_with_region = (
+            "<set>\n"
+            '    <series name="ARCADE" interval="1" start="0" end="0" dt="1" ds="1" ticks="1"'
+            ' length="%d" width="%d" height="%d">\n'
+            "        <potts>\n"
+            '            <potts.term id="term_a" />\n'
+            '            <potts.term id="term_b" />\n'
+            '            <potts.term id="term_c" />\n'
+            "        </potts>\n"
+            "        <agents>\n"
+            "            <populations>\n"
+            f'                <population id="{DEFAULT_POPULATION_ID}" init="%d">\n'
+            f'                    <population.region id="%s" />\n'
+            f'                    <population.region id="%s" />\n'
+            "                </population>\n"
+            "            </populations>\n"
+            "        </agents>\n"
+            "    </series>\n"
+            "</set>"
+        )
+
+    def test_generate_setup_file_no_regions(self):
+        samples = pd.DataFrame(
+            {
+                "id": [1, 2],
+                "x": [0, 2],
+                "y": [3, 7],
+                "z": [6, 7],
+            }
+        )
+
+        expected_setup = self.setup_template_no_region % (25, 47, 64, 2)
+
+        setup = generate_setup_file(samples, self.margins, self.terms)
+
+        self.assertEqual(expected_setup, setup)
+
+    def test_generate_setup_file_invalid_regions(self):
+        samples = pd.DataFrame(
+            {
+                "id": [1, 2],
+                "x": [0, 2],
+                "y": [3, 7],
+                "z": [6, 7],
+                "region": [None, None],
+            }
+        )
+
+        expected_setup = self.setup_template_no_region % (25, 47, 64, 2)
+
+        setup = generate_setup_file(samples, self.margins, self.terms)
+
+        self.assertEqual(expected_setup, setup)
+
+    def test_generate_setup_file_with_regions(self):
+        samples = pd.DataFrame(
+            {
+                "id": [1, 2],
+                "x": [0, 2],
+                "y": [3, 7],
+                "z": [6, 7],
+                "region": ["A", "B"],
+            }
+        )
+
+        expected_setup = self.setup_template_with_region % (25, 47, 64, 2, "A", "B")
+
+        setup = generate_setup_file(samples, self.margins, self.terms)
+
+        self.assertEqual(expected_setup, setup)
+
+    def test_calculate_sample_bounds(self):
+        samples = pd.DataFrame(
+            {
+                "x": [0, 2],
+                "y": [3, 7],
+                "z": [6, 7],
+            }
+        )
+        margins = [10, 20, 30]
+
+        expected_bounds = (25, 47, 64)
+
+        bounds = calculate_sample_bounds(samples, margins)
+
+        self.assertTupleEqual(expected_bounds, bounds)
+
+    def test_make_setup_file_no_regions(self):
+        init = 100
+        bounds = (10, 20, 30)
+        regions = None
+
+        expected_setup = self.setup_template_no_region % (*bounds, init)
+
+        setup = make_setup_file(init, bounds, self.terms, regions)
+
+        self.assertEqual(expected_setup, setup)
+
+    def test_make_setup_file_with_regions(self):
+        init = 100
+        bounds = (10, 20, 30)
+        regions = ["REGION_A", "REGION_B"]
+
+        expected_setup = self.setup_template_with_region % (*bounds, init, regions[0], regions[1])
+
+        setup = make_setup_file(init, bounds, self.terms, regions)
+
+        self.assertEqual(expected_setup, setup)
+
+
+if __name__ == "__main__":
+    unittest.main()