Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parse growth file task #14

Merged
merged 23 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/arcade_collection/output/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
from .get_location_voxels import get_location_voxels
from .merge_parsed_results import merge_parsed_results
from .parse_cells_file import parse_cells_file
from .parse_growth_file import parse_growth_file
from .parse_locations_file import parse_locations_file

convert_model_units = task(convert_model_units)
extract_tick_json = task(extract_tick_json)
get_location_voxels = task(get_location_voxels)
merge_parsed_results = task(merge_parsed_results)
parse_cells_file = task(parse_cells_file)
parse_growth_file = task(parse_growth_file)
parse_locations_file = task(parse_locations_file)
151 changes: 151 additions & 0 deletions src/arcade_collection/output/parse_growth_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import json
import tarfile

import numpy as np
import pandas as pd

GROWTH_COLUMNS = [
"TICK",
"SEED",
"U",
"V",
"W",
"Z",
"POSITION",
"POPULATION",
"STATE",
"VOLUME",
"CYCLE",
]

CELL_STATES = [
"NEUTRAL",
"APOPTOTIC",
"QUIESCENT",
"MIGRATORY",
"PROLIFERATIVE",
"SENESCENT",
"NECROTIC",
]


def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
"""
Parses a tumor growth simulation tar file.

Parameters
----------
tar :
Tar file of simulations for different seeds.

Returns
-------
:
Parsed simulation data for all seeds and timepoints.
"""

all_timepoints = []

for member in tar.getmembers():
extracted_member = tar.extractfile(member)
assert extracted_member is not None
extracted_json = json.loads(extracted_member.read().decode("utf-8"))

seed = extracted_json["seed"]
all_timepoints.extend(
[
data
for timepoint in extracted_json["timepoints"]
for data in parse_growth_timepoint(timepoint, seed)
]
)

timepoints_df = pd.DataFrame(all_timepoints, columns=GROWTH_COLUMNS)

return timepoints_df


def parse_growth_timepoint(timepoint: dict, seed: int) -> list:
"""
Parses a simulation timepoint into a list of features per cell.

The original data contains cell features in the form:

.. code-block:: json

{
"time": time,
"cells": [
[
[u, v, w, z],
[
[
type,
population,
state,
position,
volume,
[cell, cycle, lengths, ...]
],
...
]
],
...
]
}

Parsed data is formatted into:

.. code-block:: json

[
[time, seed, u, v, w, z, position, population, state, volume, cycle],
[time, seed, u, v, w, z, position, population, state, volume, cycle],
...
]

Cell cycle length is `None` if the cell has not yet divided. Otherwise, cell
cycle is the average of all cell cycle lengths.

Parameters
----------
timepoint :
Data for a timepoint.

Returns
-------
:
Parsed data of the timepoint.
"""

parsed_data = []
time = timepoint["time"]

for (location, cells) in timepoint["cells"]:
u, v, w, z = location

for cell in cells:
_, population, state, position, volume, cycles = cell

if len(cycles) == 0:
cycle = None
else:
cycle = np.mean(cycles)

data_list = [
time,
seed,
u,
v,
w,
z,
position,
population,
CELL_STATES[state],
volume,
cycle,
]

parsed_data.append(data_list)

return parsed_data
125 changes: 125 additions & 0 deletions tests/arcade_collection/output/test_parse_growth_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import tarfile
import unittest
from unittest import mock

import numpy as np
import pandas as pd

from arcade_collection.output.parse_growth_file import parse_growth_file


class TestParseGrowthFile(unittest.TestCase):
def test_parse_growth_timepoint(self):
tar_object = mock.Mock(spec=tarfile.TarFile)
tar_object.name = "tar_object_name.tar.xz"

first_tar_member = mock.Mock(spec=tarfile.TarInfo)
first_tar_member.name = "first_member.json"

second_tar_member = mock.Mock(spec=tarfile.TarInfo)
second_tar_member.name = "second_member.json"

tar_object.getmembers.return_value = [first_tar_member, second_tar_member]

first_json = mock.MagicMock()
first_json.read.return_value = '{"seed": 0, "timepoints": [{"time": 0.0,"cells": [[[-33,0,33,0],[[0,1,2,0,2322.26,[]]]],[[0,0,10,0],[[1,0,2,0,2300.50,[]]]]]},{"time": 0.5,"cells": [[[-33,0,31,0],[[0,1,2,0,2522.26,[]]]],[[0,0,5,0],[[1,0,3,0,4391.91,[]]]]]},{"time": 1.0,"cells": [[[-19,0,30,0],[[0,1,1,0,2582.22,[]]]],[[0,0,7,0],[[1,0,4,0,5047.58,[800.0,512.3]]]],[[3,3,-6,0],[[0,1,2,0,2453.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode(
"utf-8"
)

second_json = mock.MagicMock()
second_json.read.return_value = '{"seed": 1, "timepoints": [{"time": 10.0,"cells": [[[-13,0,33,0],[[0,1,2,0,2372.26,[]]]],[[0,0,10,0],[[1,0,2,0,2390.50,[]]]]]},{"time": 10.5,"cells": [[[-33,0,1,0],[[0,1,2,0,2022.26,[]]]],[[0,0,8,0],[[1,0,3,0,4390.91,[]]]]]},{"time": 11.0,"cells": [[[-19,0,3,0],[[0,1,1,0,2582.22,[]]]],[[1,0,1,0],[[1,0,4,0,5040.58,[800.0,512.3]]]],[[3,0,-6,0],[[0,2,2,0,2053.83,[640.0]],[1,0,6,1,2517.54,[]]]]]}]}'.encode(
"utf-8"
)

mock_contents = {
first_tar_member: first_json,
second_tar_member: second_json,
}
tar_object.extractfile.side_effect = lambda fname, *args, **kwargs: mock_contents[fname]

returned_df = parse_growth_file(tar_object)

expected_dict = {
"TICK": [
0.0,
0.0,
0.5,
0.5,
1.0,
1.0,
1.0,
1.0,
10.0,
10.0,
10.5,
10.5,
11.0,
11.0,
11.0,
11.0,
],
"SEED": [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1],
"U": [-33, 0, -33, 0, -19, 0, 3, 3, -13, 0, -33, 0, -19, 1, 3, 3],
"V": [0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0],
"W": [33, 10, 31, 5, 30, 7, -6, -6, 33, 10, 1, 8, 3, 1, -6, -6],
"Z": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"POSITION": [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1],
"POPULATION": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 2, 0],
"STATE": [
"QUIESCENT",
"QUIESCENT",
"QUIESCENT",
"MIGRATORY",
"APOPTOTIC",
"PROLIFERATIVE",
"QUIESCENT",
"MIGRATORY",
"QUIESCENT",
"QUIESCENT",
"QUIESCENT",
"MIGRATORY",
"APOPTOTIC",
"PROLIFERATIVE",
"QUIESCENT",
"NECROTIC",
],
"VOLUME": [
2322.26,
2300.5,
2522.26,
4391.91,
2582.22,
5047.58,
2453.83,
2517.54,
2372.26,
2390.50,
2022.26,
4390.91,
2582.22,
5040.58,
2053.83,
2517.54,
],
"CYCLE": [
None,
None,
None,
None,
None,
np.mean([800.0, 512.3]),
640.0,
None,
None,
None,
None,
None,
None,
np.mean([800.0, 512.3]),
640.0,
None,
],
}

expected_df = pd.DataFrame(expected_dict)
self.assertTrue(expected_df.equals(returned_df))