Add unit tests for arcade collection input tasks (#78)

* Remove hello world * Add docstrings and unit tests for group template conditions task * Add docstrings and unit tests for convert to cells file task * Add docstrings and unit tests for convert to locations file task * Add docstrings and unit tests for merge region samples task * Add docstrings and unit tests for generate setup file task
bagherilab · Sep 3, 2024 · 479bccc · 479bccc
1 parent ee9ad58
commit 479bccc
Show file tree

Hide file tree

Showing 11 changed files with 1,406 additions and 30 deletions.
diff --git a/src/arcade_collection/__main__.py b/src/arcade_collection/__main__.py
@@ -1,2 +0,0 @@
-if __name__ == "__main__":
-    print("hello world")

diff --git a/src/arcade_collection/input/convert_to_cells_file.py b/src/arcade_collection/input/convert_to_cells_file.py
@@ -10,6 +10,44 @@ def convert_to_cells_file(
     critical_height_distributions: dict[str, tuple[float, float]],
     state_thresholds: dict[str, float],
 ) -> list[dict]:
+    """
+    Convert all samples to cell objects.
+
+    For each cell id in samples, current volume and height are rescaled to
+    critical volume and critical height based on distribution means and standard
+    deviations. If reference volume and/or height exist for the cell id, those
+    values are used as the current values to be rescaled. Otherwise, current
+    volume is calculated from the number of voxel samples and current height is
+    calculated from the range of voxel coordinates along the z axis.
+
+    Initial cell state and cell state phase for each cell are estimated based on
+    state thresholds, the current cell volume, and the critical cell volume.
+
+    Cell object ids are reindexed starting with cell id 1.
+
+    Parameters
+    ----------
+    samples
+        Sample cell ids and coordinates.
+    reference
+        Reference values for volumes and heights.
+    volume_distributions
+        Map of volume means and standard deviations.
+    height_distributions
+        Map of height means and standard deviations.
+    critical_volume_distributions
+        Map of critical volume means and standard deviations.
+    critical_height_distributions
+        Map of critical height means and standard deviations.
+    state_thresholds
+        Critical volume fractions defining threshold between states.
+
+    Returns
+    -------
+    :
+        List of cell objects formatted for ARCADE.
+    """
+
     cells: list[dict] = []
     samples_by_id = samples.groupby("id")
 
@@ -42,30 +80,43 @@ def convert_to_cell(
     state_thresholds: dict[str, float],
 ) -> dict:
     """
-    Convert samples to ARCADE .CELLS json format.
+    Convert samples to cell object.
+
+    Current volume and height are rescaled to critical volume and critical
+    height based on distribution means and standard deviations. If reference
+    volume and/or height are provided (under the "DEFAULT" key), those values
+    are used as the current values to be rescaled. Otherwise, current volume is
+    calculated from the number of voxel samples and current height is calculated
+    from the range of voxel coordinates along the z axis.
+
+    Initial cell state and cell state phase are estimated based on state
+    thresholds, the current cell volume, and the critical cell volume.
 
     Parameters
     ----------
     cell_id
         Unique cell id.
     samples
-        Sample cell ids and coordinates.
+        Sample coordinates for a single object.
     reference
-        Reference data for conversion.
-    volume_distribution
-        Average and standard deviation of volume distributions.
-    height_distribution
-        Average and standard deviation of height distributions.
-    critical_volume_distribution
-        Average and standard deviation of critical volume distributions.
-    critical_height_distribution
-        Average and standard deviation of critical height distributions.
+        Reference data for cell.
+    volume_distributions
+        Map of volume means and standard deviations.
+    height_distributions
+        Map of height means and standard deviations.
+    critical_volume_distributions
+        Map of critical volume means and standard deviations.
+    critical_height_distributions
+        Map of critical height means and standard deviations.
+    state_thresholds
+        Critical volume fractions defining threshold between states.
 
     Returns
     -------
     :
-        Dictionary in ARCADE .CELLS json format.
+        Cell object formatted for ARCADE.
     """
+
     volume = len(samples)
     height = samples.z.max() - samples.z.min()
 
@@ -95,7 +146,7 @@ def convert_to_cell(
         "criticals": [critical_volume, critical_height],
     }
 
-    if "region" in samples.columns:
+    if "region" in samples.columns and not samples["region"].isnull().all():
         regions = [
             convert_to_cell_region(
                 region,
@@ -122,6 +173,39 @@ def convert_to_cell_region(
     critical_volume_distributions: dict[str, tuple[float, float]],
     critical_height_distributions: dict[str, tuple[float, float]],
 ) -> dict:
+    """
+    Convert region samples to cell region object.
+
+    Current region volume and height are rescaled to critical volume and
+    critical height based on distribution means and standard deviations. If
+    reference region volume and/or height are provided, those values are used as
+    the current values to be rescaled. Otherwise, current region volume is
+    calculated from the number of voxel samples and current region height is
+    calculated from the range of voxel coordinates along the z axis.
+
+    Parameters
+    ----------
+    region
+        Region name.
+    region_samples
+        Sample coordinates for region of a single object.
+    reference
+        Reference data for cell region.
+    volume_distributions
+        Map of volume means and standard deviations.
+    height_distributions
+        Map of height means and standard deviations.
+    critical_volume_distributions
+        Map of critical volume means and standard deviations.
+    critical_height_distributions
+        Map of critical height means and standard deviations.
+
+    Returns
+    -------
+    :
+        Cell region object formatted for ARCADE.
+    """
+
     region_volume = len(region_samples)
     region_height = region_samples.z.max() - region_samples.z.min()
 
@@ -152,12 +236,14 @@ def get_cell_state(
     """
     Estimates cell state based on cell volume.
 
-    The threshold fractions dictionary defines the monotonic thresholds
-    between different cell states.
-    For a given volume v, critical volume V, and states X1, X2, ..., XN with
-    corresponding, monotonic threshold fractions f1, f2, ..., fN, a cell is
-    assigned state Xi such that [f(i - 1) * V] <= v < [fi * V].
+    The threshold fractions dictionary defines the monotonic thresholds between
+    different cell states. For a given volume v, critical volume V, and states
+    X1, X2, ..., XN with corresponding, monotonic threshold fractions f1, f2,
+    ..., fN, a cell is assigned state Xi such that [f(i - 1) * V] <= v < [fi *
+    V].
+
     Cells with v < f1 * V are assigned state X1.
+
     Cells with v > fN * V are assigned state XN.
 
     Parameters
@@ -174,6 +260,7 @@ def get_cell_state(
     :
         Cell state.
     """
+
     thresholds = [fraction * critical_volume for fraction in threshold_fractions.values()]
     states = list(threshold_fractions.keys())
 
@@ -203,6 +290,7 @@ def convert_value_distribution(
     :
         Estimated critical value.
     """
+
     source_avg, source_std = source_distribution
     target_avg, target_std = target_distribution
     z_scored_value = (value - source_avg) / source_std
@@ -226,6 +314,7 @@ def filter_cell_reference(cell_id: int, reference: pd.DataFrame) -> dict:
     :
         Reference data for given cell id.
     """
+
     cell_reference = reference[reference["ID"] == cell_id].squeeze()
     cell_reference = cell_reference.to_dict() if not cell_reference.empty else {}
     return cell_reference
diff --git a/src/arcade_collection/input/convert_to_locations_file.py b/src/arcade_collection/input/convert_to_locations_file.py
@@ -4,6 +4,20 @@
 
 
 def convert_to_locations_file(samples: pd.DataFrame) -> list[dict]:
+    """
+    Convert all samples to location objects.
+
+    Parameters
+    ----------
+    samples
+        Sample cell ids and coordinates.
+
+    Returns
+    -------
+    :
+        List of location objects formatted for ARCADE.
+    """
+
     locations: list[dict] = []
     samples_by_id = samples.groupby("id")
 
@@ -15,23 +29,24 @@ def convert_to_locations_file(samples: pd.DataFrame) -> list[dict]:
 
 def convert_to_location(cell_id: int, samples: pd.DataFrame) -> dict:
     """
-    Convert samples to ARCADE .LOCATIONS json format.
+    Convert samples to location object.
 
     Parameters
     ----------
     cell_id
         Unique cell id.
     samples
-        Sample cell ids and coordinates.
+        Sample coordinates for a single object.
 
     Returns
     -------
     :
-        Dictionary in ARCADE .LOCATIONS json format.
+        Location object formatted for ARCADE.
     """
+
     center = get_center_voxel(samples)
 
-    if "region" in samples.columns:
+    if "region" in samples.columns and not samples["region"].isnull().all():
         voxels = [
             {"region": region, "voxels": get_location_voxels(samples, region)}
             for region in samples["region"].unique()
@@ -44,6 +59,7 @@ def convert_to_location(cell_id: int, samples: pd.DataFrame) -> dict:
         "center": center,
         "location": voxels,
     }
+
     return location
 
 
@@ -61,6 +77,7 @@ def get_center_voxel(samples: pd.DataFrame) -> tuple[int, int, int]:
     :
         Center voxel.
     """
+
     center_x = int(samples["x"].mean())
     center_y = int(samples["y"].mean())
     center_z = int(samples["z"].mean())
@@ -86,6 +103,7 @@ def get_location_voxels(
     :
         List of voxel coordinates.
     """
+
     if region is not None:
         region_samples = samples[samples["region"] == region]
         voxels_x = region_samples["x"]

diff --git a/src/arcade_collection/input/generate_setup_file.py b/src/arcade_collection/input/generate_setup_file.py
@@ -4,14 +4,42 @@
 import numpy as np
 import pandas as pd
 
+DEFAULT_POPULATION_ID = "X"
+"""Default population ID used in setup file."""
+
 
 def generate_setup_file(
-    samples: pd.DataFrame, margins: tuple[int, int, int], potts_terms: list[str]
+    samples: pd.DataFrame, margins: tuple[int, int, int], terms: list[str]
 ) -> str:
+    """
+    Create ARCADE setup file from samples, margins, and CPM Hamiltonian terms.
+
+    Initial number of cells is determined by number of unique ids in samples.
+    Regions are included if samples contains valid regions.
+
+    Parameters
+    ----------
+    samples
+        Sample cell ids and coordinates.
+    margins
+        Margin size in x, y, and z directions.
+    terms
+        List of Potts Hamiltonian terms for setup file.
+
+    Returns
+    -------
+    :
+        Contents of ARCADE setup file.
+    """
+
     init = len(samples["id"].unique())
     bounds = calculate_sample_bounds(samples, margins)
-    regions = samples["regions"].unique() if "regions" in samples else None
-    setup = make_setup_file(init, bounds, potts_terms, regions)
+    regions = (
+        samples["region"].unique()
+        if "region" in samples.columns and not samples["region"].isnull().all()
+        else None
+    )
+    setup = make_setup_file(init, bounds, terms, regions)
     return setup
 
 
@@ -33,6 +61,7 @@ def calculate_sample_bounds(
     :
         Bounds in x, y, and z directions.
     """
+
     mins = (min(samples.x), min(samples.y), min(samples.z))
     maxs = (max(samples.x), max(samples.y), max(samples.z))
 
@@ -64,8 +93,10 @@ def make_setup_file(
 
     Returns
     -------
+    :
         Contents of ARCADE setup file.
     """
+
     root = ET.fromstring("<set></set>")
     series = ET.SubElement(
         root,
Original file line number	Diff line number	Diff line change
		@@ -1,2 +0,0 @@
		if __name__ == "__main__":
		print("hello world")