Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add unit tests for arcade collection input tasks #78

Merged
merged 6 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/arcade_collection/__main__.py
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
if __name__ == "__main__":
print("hello world")
125 changes: 107 additions & 18 deletions src/arcade_collection/input/convert_to_cells_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,44 @@ def convert_to_cells_file(
critical_height_distributions: dict[str, tuple[float, float]],
state_thresholds: dict[str, float],
) -> list[dict]:
"""
Convert all samples to cell objects.

For each cell id in samples, current volume and height are rescaled to
critical volume and critical height based on distribution means and standard
deviations. If reference volume and/or height exist for the cell id, those
values are used as the current values to be rescaled. Otherwise, current
volume is calculated from the number of voxel samples and current height is
calculated from the range of voxel coordinates along the z axis.

Initial cell state and cell state phase for each cell are estimated based on
state thresholds, the current cell volume, and the critical cell volume.

Cell object ids are reindexed starting with cell id 1.

Parameters
----------
samples
Sample cell ids and coordinates.
reference
Reference values for volumes and heights.
volume_distributions
Map of volume means and standard deviations.
height_distributions
Map of height means and standard deviations.
critical_volume_distributions
Map of critical volume means and standard deviations.
critical_height_distributions
Map of critical height means and standard deviations.
state_thresholds
Critical volume fractions defining threshold between states.

Returns
-------
:
List of cell objects formatted for ARCADE.
"""

cells: list[dict] = []
samples_by_id = samples.groupby("id")

Expand Down Expand Up @@ -42,30 +80,43 @@ def convert_to_cell(
state_thresholds: dict[str, float],
) -> dict:
"""
Convert samples to ARCADE .CELLS json format.
Convert samples to cell object.

Current volume and height are rescaled to critical volume and critical
height based on distribution means and standard deviations. If reference
volume and/or height are provided (under the "DEFAULT" key), those values
are used as the current values to be rescaled. Otherwise, current volume is
calculated from the number of voxel samples and current height is calculated
from the range of voxel coordinates along the z axis.

Initial cell state and cell state phase are estimated based on state
thresholds, the current cell volume, and the critical cell volume.

Parameters
----------
cell_id
Unique cell id.
samples
Sample cell ids and coordinates.
Sample coordinates for a single object.
reference
Reference data for conversion.
volume_distribution
Average and standard deviation of volume distributions.
height_distribution
Average and standard deviation of height distributions.
critical_volume_distribution
Average and standard deviation of critical volume distributions.
critical_height_distribution
Average and standard deviation of critical height distributions.
Reference data for cell.
volume_distributions
Map of volume means and standard deviations.
height_distributions
Map of height means and standard deviations.
critical_volume_distributions
Map of critical volume means and standard deviations.
critical_height_distributions
Map of critical height means and standard deviations.
state_thresholds
Critical volume fractions defining threshold between states.

Returns
-------
:
Dictionary in ARCADE .CELLS json format.
Cell object formatted for ARCADE.
"""

volume = len(samples)
height = samples.z.max() - samples.z.min()

Expand Down Expand Up @@ -95,7 +146,7 @@ def convert_to_cell(
"criticals": [critical_volume, critical_height],
}

if "region" in samples.columns:
if "region" in samples.columns and not samples["region"].isnull().all():
regions = [
convert_to_cell_region(
region,
Expand All @@ -122,6 +173,39 @@ def convert_to_cell_region(
critical_volume_distributions: dict[str, tuple[float, float]],
critical_height_distributions: dict[str, tuple[float, float]],
) -> dict:
"""
Convert region samples to cell region object.

Current region volume and height are rescaled to critical volume and
critical height based on distribution means and standard deviations. If
reference region volume and/or height are provided, those values are used as
the current values to be rescaled. Otherwise, current region volume is
calculated from the number of voxel samples and current region height is
calculated from the range of voxel coordinates along the z axis.

Parameters
----------
region
Region name.
region_samples
Sample coordinates for region of a single object.
reference
Reference data for cell region.
volume_distributions
Map of volume means and standard deviations.
height_distributions
Map of height means and standard deviations.
critical_volume_distributions
Map of critical volume means and standard deviations.
critical_height_distributions
Map of critical height means and standard deviations.

Returns
-------
:
Cell region object formatted for ARCADE.
"""

region_volume = len(region_samples)
region_height = region_samples.z.max() - region_samples.z.min()

Expand Down Expand Up @@ -152,12 +236,14 @@ def get_cell_state(
"""
Estimates cell state based on cell volume.

The threshold fractions dictionary defines the monotonic thresholds
between different cell states.
For a given volume v, critical volume V, and states X1, X2, ..., XN with
corresponding, monotonic threshold fractions f1, f2, ..., fN, a cell is
assigned state Xi such that [f(i - 1) * V] <= v < [fi * V].
The threshold fractions dictionary defines the monotonic thresholds between
different cell states. For a given volume v, critical volume V, and states
X1, X2, ..., XN with corresponding, monotonic threshold fractions f1, f2,
..., fN, a cell is assigned state Xi such that [f(i - 1) * V] <= v < [fi *
V].

Cells with v < f1 * V are assigned state X1.

Cells with v > fN * V are assigned state XN.

Parameters
Expand All @@ -174,6 +260,7 @@ def get_cell_state(
:
Cell state.
"""

thresholds = [fraction * critical_volume for fraction in threshold_fractions.values()]
states = list(threshold_fractions.keys())

Expand Down Expand Up @@ -203,6 +290,7 @@ def convert_value_distribution(
:
Estimated critical value.
"""

source_avg, source_std = source_distribution
target_avg, target_std = target_distribution
z_scored_value = (value - source_avg) / source_std
Expand All @@ -226,6 +314,7 @@ def filter_cell_reference(cell_id: int, reference: pd.DataFrame) -> dict:
:
Reference data for given cell id.
"""

cell_reference = reference[reference["ID"] == cell_id].squeeze()
cell_reference = cell_reference.to_dict() if not cell_reference.empty else {}
return cell_reference
26 changes: 22 additions & 4 deletions src/arcade_collection/input/convert_to_locations_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@


def convert_to_locations_file(samples: pd.DataFrame) -> list[dict]:
"""
Convert all samples to location objects.

Parameters
----------
samples
Sample cell ids and coordinates.

Returns
-------
:
List of location objects formatted for ARCADE.
"""

locations: list[dict] = []
samples_by_id = samples.groupby("id")

Expand All @@ -15,23 +29,24 @@ def convert_to_locations_file(samples: pd.DataFrame) -> list[dict]:

def convert_to_location(cell_id: int, samples: pd.DataFrame) -> dict:
"""
Convert samples to ARCADE .LOCATIONS json format.
Convert samples to location object.

Parameters
----------
cell_id
Unique cell id.
samples
Sample cell ids and coordinates.
Sample coordinates for a single object.

Returns
-------
:
Dictionary in ARCADE .LOCATIONS json format.
Location object formatted for ARCADE.
"""

center = get_center_voxel(samples)

if "region" in samples.columns:
if "region" in samples.columns and not samples["region"].isnull().all():
voxels = [
{"region": region, "voxels": get_location_voxels(samples, region)}
for region in samples["region"].unique()
Expand All @@ -44,6 +59,7 @@ def convert_to_location(cell_id: int, samples: pd.DataFrame) -> dict:
"center": center,
"location": voxels,
}

return location


Expand All @@ -61,6 +77,7 @@ def get_center_voxel(samples: pd.DataFrame) -> tuple[int, int, int]:
:
Center voxel.
"""

center_x = int(samples["x"].mean())
center_y = int(samples["y"].mean())
center_z = int(samples["z"].mean())
Expand All @@ -86,6 +103,7 @@ def get_location_voxels(
:
List of voxel coordinates.
"""

if region is not None:
region_samples = samples[samples["region"] == region]
voxels_x = region_samples["x"]
Expand Down
37 changes: 34 additions & 3 deletions src/arcade_collection/input/generate_setup_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,42 @@
import numpy as np
import pandas as pd

DEFAULT_POPULATION_ID = "X"
"""Default population ID used in setup file."""


def generate_setup_file(
samples: pd.DataFrame, margins: tuple[int, int, int], potts_terms: list[str]
samples: pd.DataFrame, margins: tuple[int, int, int], terms: list[str]
) -> str:
"""
Create ARCADE setup file from samples, margins, and CPM Hamiltonian terms.

Initial number of cells is determined by number of unique ids in samples.
Regions are included if samples contains valid regions.

Parameters
----------
samples
Sample cell ids and coordinates.
margins
Margin size in x, y, and z directions.
terms
List of Potts Hamiltonian terms for setup file.

Returns
-------
:
Contents of ARCADE setup file.
"""

init = len(samples["id"].unique())
bounds = calculate_sample_bounds(samples, margins)
regions = samples["regions"].unique() if "regions" in samples else None
setup = make_setup_file(init, bounds, potts_terms, regions)
regions = (
samples["region"].unique()
if "region" in samples.columns and not samples["region"].isnull().all()
else None
)
setup = make_setup_file(init, bounds, terms, regions)
return setup


Expand All @@ -33,6 +61,7 @@ def calculate_sample_bounds(
:
Bounds in x, y, and z directions.
"""

mins = (min(samples.x), min(samples.y), min(samples.z))
maxs = (max(samples.x), max(samples.y), max(samples.z))

Expand Down Expand Up @@ -64,8 +93,10 @@ def make_setup_file(

Returns
-------
:
Contents of ARCADE setup file.
"""

root = ET.fromstring("<set></set>")
series = ET.SubElement(
root,
Expand Down
Loading