From 7c32a56191a39b0ac13f7a0bb2d3dd7fd22a40ca Mon Sep 17 00:00:00 2001 From: Isabelle Chen Date: Fri, 23 Dec 2022 00:26:02 -0600 Subject: [PATCH 01/20] Initial edits on parse growth file --- .../output/parse_growth_file.py | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 src/arcade_collection/output/parse_growth_file.py diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py new file mode 100644 index 0000000..8dfc2ba --- /dev/null +++ b/src/arcade_collection/output/parse_growth_file.py @@ -0,0 +1,62 @@ +from typing import List, Union +import json +import tarfile + +import pandas as pd +from prefect import task + + +@task +def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: + all_timepoints = [] + for member in tar.getmembers(): + seed = 0 + + +def parse_growth_timepoint(): + time_index = self.timepoints.index(timepoint) + + parsed_data = [] + sim_timepoint = loaded_simulation["timepoints"][time_index]["cells"] + param_timepoint = loaded_param_simulation["timepoints"][time_index]["cells"] + + for (location, cells), (_, param_cells) in zip(sim_timepoint, param_timepoint): + u = int(location[0]) + v = int(location[1]) + w = int(location[2]) + z = int(location[3]) + szudzik_coordinate = self.get_szudzik_pair(u, v) + + for cell, param_cell in zip(cells, param_cells): + population = cell[1] + state = cell[2] + position = cell[3] + volume = np.round(cell[4]) + cycle = np.round(np.mean(cell[5])) + max_height = param_cell[4][3] + meta_pref = param_cell[4][8] + migra_threshold = param_cell[4][9] + + data_list = [ + self.key, + self.seed, + timepoint, + szudzik_coordinate, + u, + v, + w, + z, + position, + str(population), + str(state), + volume, + cycle, + max_height, + meta_pref, + migra_threshold, + ] + + parsed_data.append(data_list) + + columns = [feature.name for feature in self.get_feature_list()] + return pd.DataFrame(parsed_data, columns=columns) From 70599ba17821365f056838039aea842f42a43fd9 Mon Sep 17 00:00:00 2001 From: Isabelle Chen Date: Tue, 3 Jan 2023 10:24:05 -0600 Subject: [PATCH 02/20] Add preliminary code for parse growth file --- tests/arcade_collection/unit/test_parse_growth_file.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tests/arcade_collection/unit/test_parse_growth_file.py diff --git a/tests/arcade_collection/unit/test_parse_growth_file.py b/tests/arcade_collection/unit/test_parse_growth_file.py new file mode 100644 index 0000000..6937b28 --- /dev/null +++ b/tests/arcade_collection/unit/test_parse_growth_file.py @@ -0,0 +1,10 @@ +import unittest +from unittest import mock + +from arcade_collection.output.parse_growth_file import parse_growth_file + + +class TestParseGrowthFile(unittest.TestCase): + @mock.patch("arcade_collection.output.parse_growth_file.tarfile") + def test_parse_growth_timepoint(self, tar_mock): + tar_object = mock.Mock(spec=tar_mock.TarFile) From d5852068409a1eaa0cdaca7149819e339fc943ab Mon Sep 17 00:00:00 2001 From: Isabelle Chen Date: Fri, 6 Jan 2023 18:19:29 -0600 Subject: [PATCH 03/20] Update the python testing version --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 4e1f860..53bbacf 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] isolated_build = True -envlist = py{39}, linter, mypy +envlist = py{310}, linter, mypy skipsdist=True [testenv] From c860a7f8f357d3a777c69ed03946d81447ba70ec Mon Sep 17 00:00:00 2001 From: Isabelle Chen Date: Fri, 6 Jan 2023 18:19:54 -0600 Subject: [PATCH 04/20] Fix module not found error in tests --- src/arcade_collection/output/__init__.py | 2 + .../output/parse_growth_file.py | 90 +++++++++++++----- .../output/parse_params_file.py | 26 +++++ tests/arcade_collection/__init__.py | 0 .../test_parse_growth_file.py | 0 .../._VIVO_HET_GRAPH_040_040_XABC_00.json | Bin 0 -> 276 bytes 6 files changed, 93 insertions(+), 25 deletions(-) create mode 100644 src/arcade_collection/output/parse_params_file.py delete mode 100644 tests/arcade_collection/__init__.py rename tests/arcade_collection/{unit => output}/test_parse_growth_file.py (100%) create mode 100755 untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json diff --git a/src/arcade_collection/output/__init__.py b/src/arcade_collection/output/__init__.py index cac6837..4d14b7b 100644 --- a/src/arcade_collection/output/__init__.py +++ b/src/arcade_collection/output/__init__.py @@ -3,3 +3,5 @@ from .merge_parsed_results import merge_parsed_results from .parse_cells_file import parse_cells_file from .parse_locations_file import parse_locations_file +from .parse_growth_file import parse_growth_file +from .parse_params_file import parse_params_file diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py index 8dfc2ba..498420f 100644 --- a/src/arcade_collection/output/parse_growth_file.py +++ b/src/arcade_collection/output/parse_growth_file.py @@ -1,62 +1,102 @@ from typing import List, Union import json import tarfile +from prefect import task +import numpy as np import pandas as pd -from prefect import task +import ntpath +from os import path + + +GROWTH_COLUMNS = [ + "TICK", + "SEED", + "U", + "V", + "W", + "Z", + "POSITION", + "POPULATION", + "STATE", + "VOLUME", + "CYCLE", +] @task def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: all_timepoints = [] for member in tar.getmembers(): - seed = 0 + extracted_member = tar.extractfile(member) + if extracted_member is not None: + base = ntpath.basename(member.name) + base_file = path.splitext(base) + file_name = base_file[0] + extension = base_file[1] + if file_name[0] != "." and extension == ".json": + extracted_json = json.loads(extracted_member.read().decode("utf-8")) + seed = extracted_json["seed"] + for timepoint in extracted_json["timepoints"]: + one_timepoint = parse_growth_timepoint(timepoint, seed) + for data in one_timepoint: + all_timepoints.append(data) -def parse_growth_timepoint(): - time_index = self.timepoints.index(timepoint) + timepoints_df = pd.DataFrame(all_timepoints, columns=GROWTH_COLUMNS) + return timepoints_df + +def convert_state_to_string(state_index: int) -> str: + if state_index == 0: + return "NEU" + elif state_index == 1: + return "APO" + elif state_index == 2: + return "QUI" + elif state_index == 3: + return "MIG" + elif state_index == 4: + return "PRO" + elif state_index == 5: + return "SEN" + elif state_index == 6: + return "NEC" + + +def parse_growth_timepoint(timepoint: dict, seed: int) -> List[list]: parsed_data = [] - sim_timepoint = loaded_simulation["timepoints"][time_index]["cells"] - param_timepoint = loaded_param_simulation["timepoints"][time_index]["cells"] - for (location, cells), (_, param_cells) in zip(sim_timepoint, param_timepoint): + for (location, cells) in timepoint["cells"]: u = int(location[0]) v = int(location[1]) w = int(location[2]) z = int(location[3]) - szudzik_coordinate = self.get_szudzik_pair(u, v) - for cell, param_cell in zip(cells, param_cells): + for cell in cells: population = cell[1] state = cell[2] position = cell[3] volume = np.round(cell[4]) - cycle = np.round(np.mean(cell[5])) - max_height = param_cell[4][3] - meta_pref = param_cell[4][8] - migra_threshold = param_cell[4][9] - + if len(cell[5]) == 0: + cycle = -1 + else: + cycle = np.round(np.mean(cell[5])) + time = timepoint["time"] data_list = [ - self.key, - self.seed, - timepoint, - szudzik_coordinate, + time, + seed, u, v, w, z, position, - str(population), - str(state), + population, + convert_state_to_string(state), volume, cycle, - max_height, - meta_pref, - migra_threshold, ] parsed_data.append(data_list) - columns = [feature.name for feature in self.get_feature_list()] - return pd.DataFrame(parsed_data, columns=columns) + return parsed_data diff --git a/src/arcade_collection/output/parse_params_file.py b/src/arcade_collection/output/parse_params_file.py new file mode 100644 index 0000000..e3f9186 --- /dev/null +++ b/src/arcade_collection/output/parse_params_file.py @@ -0,0 +1,26 @@ +from prefect import task +import json +import tarfile +import pandas as pd + + +@task +def parse_params_file(tar: tarfile.TarFile) -> pd.DataFrame: + all_timepoints = [] + + for member in tar.getmembers(): + seed = 0 + extracted_member = tar.extractfile(member) + assert extracted_member is not None + extracted_json = json.loads(extracted_member.read().decode("utf-8")) + + timepoints = [parse_timepoint(timepoint) for timepoint in extracted_json] + all_timepoints = all_timepoints + timepoints + + timepoints_df = pd.DataFrame(all_timepoints, columns=COLUMN_NAMES) + + return timepoints_df + + +def parse_timepoint(timepoint): + return 0 diff --git a/tests/arcade_collection/__init__.py b/tests/arcade_collection/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/arcade_collection/unit/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py similarity index 100% rename from tests/arcade_collection/unit/test_parse_growth_file.py rename to tests/arcade_collection/output/test_parse_growth_file.py diff --git a/untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json b/untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json new file mode 100755 index 0000000000000000000000000000000000000000..0fa68ed45b5e181584283c4d02f7b729915da075 GIT binary patch literal 276 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}@gpDx@)<>dG{b)=0I{(H$B>X9hzwZA z9w0jcO(n>5khlN?gH&>Uu3ln6K~Ab(PGWIMX>n>wN@7W>azHRt&lw=?!N9+i$`Bt;{oayDes6D5@=fl7Gv|V(*I1zwIAc Ju7ed4AOM+fGeiIY literal 0 HcmV?d00001 From 57f662ad0535d612f8c500e790809912e4c1f58d Mon Sep 17 00:00:00 2001 From: Isabelle Chen Date: Fri, 6 Jan 2023 18:22:16 -0600 Subject: [PATCH 05/20] Remove cache folder --- untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json | Bin 276 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100755 untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json diff --git a/untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json b/untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json deleted file mode 100755 index 0fa68ed45b5e181584283c4d02f7b729915da075..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 276 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}@gpDx@)<>dG{b)=0I{(H$B>X9hzwZA z9w0jcO(n>5khlN?gH&>Uu3ln6K~Ab(PGWIMX>n>wN@7W>azHRt&lw=?!N9+i$`Bt;{oayDes6D5@=fl7Gv|V(*I1zwIAc Ju7ed4AOM+fGeiIY From ae965c183dd6461cad33f3365ac3f2343e284f1a Mon Sep 17 00:00:00 2001 From: Isabelle Chen Date: Fri, 6 Jan 2023 18:25:34 -0600 Subject: [PATCH 06/20] Adjust import orders --- src/arcade_collection/output/parse_growth_file.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py index 498420f..e6ee997 100644 --- a/src/arcade_collection/output/parse_growth_file.py +++ b/src/arcade_collection/output/parse_growth_file.py @@ -1,12 +1,12 @@ -from typing import List, Union import json +from os import path import tarfile -from prefect import task +from typing import List, Union +import ntpath import numpy as np import pandas as pd -import ntpath -from os import path +from prefect import task GROWTH_COLUMNS = [ From e7d169c28b18d49346cd7d595a03ee0f8a539eb8 Mon Sep 17 00:00:00 2001 From: Isabelle Chen Date: Fri, 6 Jan 2023 20:18:00 -0600 Subject: [PATCH 07/20] Add extract file mocking with single member --- .../output/test_parse_growth_file.py | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py index 6937b28..48fc70f 100644 --- a/tests/arcade_collection/output/test_parse_growth_file.py +++ b/tests/arcade_collection/output/test_parse_growth_file.py @@ -1,10 +1,35 @@ +import json +import tarfile + import unittest from unittest import mock +from unittest.mock import mock_open from arcade_collection.output.parse_growth_file import parse_growth_file class TestParseGrowthFile(unittest.TestCase): - @mock.patch("arcade_collection.output.parse_growth_file.tarfile") - def test_parse_growth_timepoint(self, tar_mock): - tar_object = mock.Mock(spec=tar_mock.TarFile) + # @mock.patch("builtins.open", new_callable=mock_open) + # @mock.patch("arcade_collection.output.parse_growth_file.json") + def test_parse_growth_timepoint(self): + tar_object = mock.Mock(spec=tarfile.TarFile) + tar_object.name = "tar_object_name.tar" + assert tar_object.name == "tar_object_name.tar" + + first_tar_member = mock.Mock(spec=tarfile.TarInfo) + first_tar_member.name = "first_member.json" + assert first_tar_member.name == "first_member.json" + + first_json = mock.MagicMock() + first_json.read.return_value = '{"seed": 0, "timepoints": [{"time": 0.0,"cells": [[[-33,0,33,0],[[0,1,2,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,2300.50,[]]]]]},{"time": 0.5,"cells": [[[-33,0,33,0],[[0,1,2,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,4391.91,[]]]]]},{"time": 1.0,"cells": [[[-33,0,33,0],[[0,1,1,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,5047.58,[800.0,512.3]]]],[[3,3,-6,0],[[0,1,2,0,2453.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode( + "utf-8" + ) + + second_tar_member = mock.Mock(spec=tarfile.TarInfo) + second_tar_member.name = "second_member.txt" + assert second_tar_member.name == "second_member.txt" + + tar_object.getmembers.return_value = [first_tar_member, second_tar_member] + tar_object.extractfile.return_value = first_json + expected_dataframe = parse_growth_file.fn(tar_object) + print(expected_dataframe) From d2763157eea2be93bc2a05be5dd03e86dc1b0a0f Mon Sep 17 00:00:00 2001 From: Isabelle Chen Date: Fri, 6 Jan 2023 20:20:42 -0600 Subject: [PATCH 08/20] Add extract file mocking with single member --- .../output/test_parse_growth_file.py | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py index 6937b28..48fc70f 100644 --- a/tests/arcade_collection/output/test_parse_growth_file.py +++ b/tests/arcade_collection/output/test_parse_growth_file.py @@ -1,10 +1,35 @@ +import json +import tarfile + import unittest from unittest import mock +from unittest.mock import mock_open from arcade_collection.output.parse_growth_file import parse_growth_file class TestParseGrowthFile(unittest.TestCase): - @mock.patch("arcade_collection.output.parse_growth_file.tarfile") - def test_parse_growth_timepoint(self, tar_mock): - tar_object = mock.Mock(spec=tar_mock.TarFile) + # @mock.patch("builtins.open", new_callable=mock_open) + # @mock.patch("arcade_collection.output.parse_growth_file.json") + def test_parse_growth_timepoint(self): + tar_object = mock.Mock(spec=tarfile.TarFile) + tar_object.name = "tar_object_name.tar" + assert tar_object.name == "tar_object_name.tar" + + first_tar_member = mock.Mock(spec=tarfile.TarInfo) + first_tar_member.name = "first_member.json" + assert first_tar_member.name == "first_member.json" + + first_json = mock.MagicMock() + first_json.read.return_value = '{"seed": 0, "timepoints": [{"time": 0.0,"cells": [[[-33,0,33,0],[[0,1,2,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,2300.50,[]]]]]},{"time": 0.5,"cells": [[[-33,0,33,0],[[0,1,2,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,4391.91,[]]]]]},{"time": 1.0,"cells": [[[-33,0,33,0],[[0,1,1,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,5047.58,[800.0,512.3]]]],[[3,3,-6,0],[[0,1,2,0,2453.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode( + "utf-8" + ) + + second_tar_member = mock.Mock(spec=tarfile.TarInfo) + second_tar_member.name = "second_member.txt" + assert second_tar_member.name == "second_member.txt" + + tar_object.getmembers.return_value = [first_tar_member, second_tar_member] + tar_object.extractfile.return_value = first_json + expected_dataframe = parse_growth_file.fn(tar_object) + print(expected_dataframe) From feeaf99ac6f4b5e04a1a98fe9700d4494228b130 Mon Sep 17 00:00:00 2001 From: Isabelle Chen Date: Mon, 9 Jan 2023 15:19:57 -0600 Subject: [PATCH 09/20] Edit the test to include multiple extracted files and add docstrings --- .../output/parse_growth_file.py | 41 ++++++++++++++++++- .../output/test_parse_growth_file.py | 32 ++++++++++----- 2 files changed, 62 insertions(+), 11 deletions(-) diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py index e6ee997..9790080 100644 --- a/src/arcade_collection/output/parse_growth_file.py +++ b/src/arcade_collection/output/parse_growth_file.py @@ -1,7 +1,7 @@ import json from os import path import tarfile -from typing import List, Union +from typing import List import ntpath import numpy as np @@ -26,6 +26,19 @@ @task def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: + """ + Parse the tumor growth tar file. + + Parameters + --------- + tar : + Tar file of simulations. + + Returns + ------- + : + Data of all timepoints of all simulations in tar file. + """ all_timepoints = [] for member in tar.getmembers(): extracted_member = tar.extractfile(member) @@ -48,6 +61,19 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: def convert_state_to_string(state_index: int) -> str: + """ + Convert the numbers that represent cell state into an annotation. + + Parameters + --------- + state_index : + The index of cell states. + + Returns + ------- + : + The cell state annotation. + """ if state_index == 0: return "NEU" elif state_index == 1: @@ -65,6 +91,19 @@ def convert_state_to_string(state_index: int) -> str: def parse_growth_timepoint(timepoint: dict, seed: int) -> List[list]: + """ + Parse one timepoint of the simulation + + Parameters + --------- + timepoint : + The data of one timepoint. + + Returns + ------- + : + Parsed data of the timepoint. + """ parsed_data = [] for (location, cells) in timepoint["cells"]: diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py index 48fc70f..f98f077 100644 --- a/tests/arcade_collection/output/test_parse_growth_file.py +++ b/tests/arcade_collection/output/test_parse_growth_file.py @@ -1,16 +1,12 @@ -import json import tarfile import unittest from unittest import mock -from unittest.mock import mock_open from arcade_collection.output.parse_growth_file import parse_growth_file class TestParseGrowthFile(unittest.TestCase): - # @mock.patch("builtins.open", new_callable=mock_open) - # @mock.patch("arcade_collection.output.parse_growth_file.json") def test_parse_growth_timepoint(self): tar_object = mock.Mock(spec=tarfile.TarFile) tar_object.name = "tar_object_name.tar" @@ -20,16 +16,32 @@ def test_parse_growth_timepoint(self): first_tar_member.name = "first_member.json" assert first_tar_member.name == "first_member.json" + second_tar_member = mock.Mock(spec=tarfile.TarInfo) + second_tar_member.name = "second_member.txt" + assert second_tar_member.name == "second_member.txt" + + third_tar_member = mock.Mock(spec=tarfile.TarInfo) + third_tar_member.name = "third_member.json" + assert third_tar_member.name == "third_member.json" + first_json = mock.MagicMock() - first_json.read.return_value = '{"seed": 0, "timepoints": [{"time": 0.0,"cells": [[[-33,0,33,0],[[0,1,2,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,2300.50,[]]]]]},{"time": 0.5,"cells": [[[-33,0,33,0],[[0,1,2,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,4391.91,[]]]]]},{"time": 1.0,"cells": [[[-33,0,33,0],[[0,1,1,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,5047.58,[800.0,512.3]]]],[[3,3,-6,0],[[0,1,2,0,2453.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode( + first_json.read.return_value = '{"seed": 0, "timepoints": [{"time": 0.0,"cells": [[[-33,0,33,0],[[0,1,2,0,2322.26,[]]]],[[0,0,10,0],[[1,0,2,0,2300.50,[]]]]]},{"time": 0.5,"cells": [[[-33,0,31,0],[[0,1,2,0,2522.26,[]]]],[[0,0,5,0],[[1,0,3,0,4391.91,[]]]]]},{"time": 1.0,"cells": [[[-19,0,30,0],[[0,1,1,0,2582.22,[]]]],[[0,0,7,0],[[1,0,4,0,5047.58,[800.0,512.3]]]],[[3,3,-6,0],[[0,1,2,0,2453.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode( "utf-8" ) - second_tar_member = mock.Mock(spec=tarfile.TarInfo) - second_tar_member.name = "second_member.txt" - assert second_tar_member.name == "second_member.txt" + second_json = mock.MagicMock() + second_json.read.return_value = '{"seed": 1, "timepoints": [{"time": 10.0,"cells": [[[-13,0,33,0],[[0,1,2,0,2372.26,[]]]],[[0,0,10,0],[[1,0,2,0,2390.50,[]]]]]},{"time": 10.5,"cells": [[[-33,0,1,0],[[0,1,2,0,2022.26,[]]]],[[0,0,8,0],[[1,0,3,0,4390.91,[]]]]]},{"time": 11.0,"cells": [[[-19,0,3,0],[[0,1,1,0,2582.22,[]]]],[[1,0,1,0],[[1,0,4,0,5040.58,[800.0,512.3]]]],[[3,0,-6,0],[[0,1,2,0,2053.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode( + "utf-8" + ) + + tar_object.getmembers.return_value = [first_tar_member, second_tar_member, third_tar_member] + + mock_contents = { + first_tar_member: first_json, + second_tar_member: "", + third_tar_member: second_json, + } + tar_object.extractfile.side_effect = lambda fname, *args, **kwargs: mock_contents[fname] - tar_object.getmembers.return_value = [first_tar_member, second_tar_member] - tar_object.extractfile.return_value = first_json expected_dataframe = parse_growth_file.fn(tar_object) print(expected_dataframe) From eb33be46c0e27668adf15bef00e90460ae5edde1 Mon Sep 17 00:00:00 2001 From: Isabelle Chen Date: Mon, 9 Jan 2023 18:19:41 -0600 Subject: [PATCH 10/20] Fix mypy error in state conversion function by returning None --- src/arcade_collection/output/parse_growth_file.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py index 9790080..69c54e2 100644 --- a/src/arcade_collection/output/parse_growth_file.py +++ b/src/arcade_collection/output/parse_growth_file.py @@ -1,7 +1,7 @@ import json from os import path import tarfile -from typing import List +from typing import List, Union import ntpath import numpy as np @@ -30,7 +30,7 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: Parse the tumor growth tar file. Parameters - --------- + ---------- tar : Tar file of simulations. @@ -60,12 +60,12 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: return timepoints_df -def convert_state_to_string(state_index: int) -> str: +def convert_state_to_string(state_index: int) -> Union[str, None]: """ Convert the numbers that represent cell state into an annotation. Parameters - --------- + ---------- state_index : The index of cell states. @@ -89,13 +89,15 @@ def convert_state_to_string(state_index: int) -> str: elif state_index == 6: return "NEC" + return None + def parse_growth_timepoint(timepoint: dict, seed: int) -> List[list]: """ Parse one timepoint of the simulation Parameters - --------- + ---------- timepoint : The data of one timepoint. From 6019e961409efe45afb9f2d5a29a09c2b66952cf Mon Sep 17 00:00:00 2001 From: Isabelle Chen Date: Mon, 9 Jan 2023 18:20:11 -0600 Subject: [PATCH 11/20] Complete test for parse growth file --- .../output/test_parse_growth_file.py | 99 ++++++++++++++++++- 1 file changed, 94 insertions(+), 5 deletions(-) diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py index d23dd99..f5abc13 100644 --- a/tests/arcade_collection/output/test_parse_growth_file.py +++ b/tests/arcade_collection/output/test_parse_growth_file.py @@ -1,5 +1,7 @@ import tarfile +import numpy as np +import pandas as pd import unittest from unittest import mock from unittest.mock import mock_open @@ -25,18 +27,18 @@ def test_parse_growth_timepoint(self): third_tar_member.name = "third_member.json" assert third_tar_member.name == "third_member.json" + tar_object.getmembers.return_value = [first_tar_member, second_tar_member, third_tar_member] + first_json = mock.MagicMock() first_json.read.return_value = '{"seed": 0, "timepoints": [{"time": 0.0,"cells": [[[-33,0,33,0],[[0,1,2,0,2322.26,[]]]],[[0,0,10,0],[[1,0,2,0,2300.50,[]]]]]},{"time": 0.5,"cells": [[[-33,0,31,0],[[0,1,2,0,2522.26,[]]]],[[0,0,5,0],[[1,0,3,0,4391.91,[]]]]]},{"time": 1.0,"cells": [[[-19,0,30,0],[[0,1,1,0,2582.22,[]]]],[[0,0,7,0],[[1,0,4,0,5047.58,[800.0,512.3]]]],[[3,3,-6,0],[[0,1,2,0,2453.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode( "utf-8" ) second_json = mock.MagicMock() - second_json.read.return_value = '{"seed": 1, "timepoints": [{"time": 10.0,"cells": [[[-13,0,33,0],[[0,1,2,0,2372.26,[]]]],[[0,0,10,0],[[1,0,2,0,2390.50,[]]]]]},{"time": 10.5,"cells": [[[-33,0,1,0],[[0,1,2,0,2022.26,[]]]],[[0,0,8,0],[[1,0,3,0,4390.91,[]]]]]},{"time": 11.0,"cells": [[[-19,0,3,0],[[0,1,1,0,2582.22,[]]]],[[1,0,1,0],[[1,0,4,0,5040.58,[800.0,512.3]]]],[[3,0,-6,0],[[0,1,2,0,2053.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode( + second_json.read.return_value = '{"seed": 1, "timepoints": [{"time": 10.0,"cells": [[[-13,0,33,0],[[0,1,2,0,2372.26,[]]]],[[0,0,10,0],[[1,0,2,0,2390.50,[]]]]]},{"time": 10.5,"cells": [[[-33,0,1,0],[[0,1,2,0,2022.26,[]]]],[[0,0,8,0],[[1,0,3,0,4390.91,[]]]]]},{"time": 11.0,"cells": [[[-19,0,3,0],[[0,1,1,0,2582.22,[]]]],[[1,0,1,0],[[1,0,4,0,5040.58,[800.0,512.3]]]],[[3,0,-6,0],[[0,2,2,0,2053.83,[640.0]],[1,0,6,1,2517.54,[]]]]]}]}'.encode( "utf-8" ) - tar_object.getmembers.return_value = [first_tar_member, second_tar_member, third_tar_member] - mock_contents = { first_tar_member: first_json, second_tar_member: "", @@ -44,5 +46,92 @@ def test_parse_growth_timepoint(self): } tar_object.extractfile.side_effect = lambda fname, *args, **kwargs: mock_contents[fname] - expected_dataframe = parse_growth_file.fn(tar_object) - print(expected_dataframe) + returned_df = parse_growth_file.fn(tar_object) + + expected_dict = { + "TICK": [ + 0.0, + 0.0, + 0.5, + 0.5, + 1.0, + 1.0, + 1.0, + 1.0, + 10.0, + 10.0, + 10.5, + 10.5, + 11.0, + 11.0, + 11.0, + 11.0, + ], + "SEED": [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + "U": [-33, 0, -33, 0, -19, 0, 3, 3, -13, 0, -33, 0, -19, 1, 3, 3], + "V": [0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0], + "W": [33, 10, 31, 5, 30, 7, -6, -6, 33, 10, 1, 8, 3, 1, -6, -6], + "Z": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "POSITION": [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1], + "POPULATION": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 2, 0], + "STATE": [ + "QUI", + "QUI", + "QUI", + "MIG", + "APO", + "PRO", + "QUI", + "MIG", + "QUI", + "QUI", + "QUI", + "MIG", + "APO", + "PRO", + "QUI", + "NEC", + ], + "VOLUME": [ + np.round(i) + for i in [ + 2322.26, + 2300.5, + 2522.26, + 4391.91, + 2582.22, + 5047.58, + 2453.83, + 2517.54, + 2372.26, + 2390.50, + 2022.26, + 4390.91, + 2582.22, + 5040.58, + 2053.83, + 2517.54, + ] + ], + "CYCLE": [ + -1, + -1, + -1, + -1, + -1, + np.round(np.mean([800.0, 512.3])), + 640.0, + -1, + -1, + -1, + -1, + -1, + -1, + np.round(np.mean([800.0, 512.3])), + 640.0, + -1, + ], + } + + expected_df = pd.DataFrame(expected_dict) + self.assertTrue(expected_df.equals(returned_df)) From b26f75713e2387fe954647cc7d240fd04bf8028b Mon Sep 17 00:00:00 2001 From: Isabelle-C Date: Mon, 12 Jun 2023 07:38:08 -0500 Subject: [PATCH 12/20] Update parse growth file and tests after initial review --- .../output/parse_growth_file.py | 41 ++++----- .../output/test_parse_growth_file.py | 84 +++++++++---------- 2 files changed, 60 insertions(+), 65 deletions(-) diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py index 69c54e2..4e4ab13 100644 --- a/src/arcade_collection/output/parse_growth_file.py +++ b/src/arcade_collection/output/parse_growth_file.py @@ -3,7 +3,6 @@ import tarfile from typing import List, Union -import ntpath import numpy as np import pandas as pd from prefect import task @@ -42,19 +41,16 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: all_timepoints = [] for member in tar.getmembers(): extracted_member = tar.extractfile(member) - if extracted_member is not None: - base = ntpath.basename(member.name) - base_file = path.splitext(base) - file_name = base_file[0] - extension = base_file[1] - if file_name[0] != "." and extension == ".json": - extracted_json = json.loads(extracted_member.read().decode("utf-8")) - seed = extracted_json["seed"] - - for timepoint in extracted_json["timepoints"]: - one_timepoint = parse_growth_timepoint(timepoint, seed) - for data in one_timepoint: - all_timepoints.append(data) + extracted_json = json.loads(extracted_member.read().decode("utf-8")) + seed = extracted_json["seed"] + + all_timepoints.extend( + [ + data + for timepoint in extracted_json["timepoints"] + for data in parse_growth_timepoint(timepoint, seed) + ] + ) timepoints_df = pd.DataFrame(all_timepoints, columns=GROWTH_COLUMNS) return timepoints_df @@ -92,9 +88,14 @@ def convert_state_to_string(state_index: int) -> Union[str, None]: return None -def parse_growth_timepoint(timepoint: dict, seed: int) -> List[list]: +def parse_growth_timepoint(timepoint: dict, seed: int) -> list: """ - Parse one timepoint of the simulation + Parse one timepoint of the simulation. + + The original data contains data of every timepoint at a seed in a + dictionary. The current data contains data of one cell per row, with tick, + seed, coordinates (u, v, w, z), position, population, state, volume, and + averaged cycle. Parameters ---------- @@ -107,6 +108,7 @@ def parse_growth_timepoint(timepoint: dict, seed: int) -> List[list]: Parsed data of the timepoint. """ parsed_data = [] + time = timepoint["time"] for (location, cells) in timepoint["cells"]: u = int(location[0]) @@ -118,12 +120,11 @@ def parse_growth_timepoint(timepoint: dict, seed: int) -> List[list]: population = cell[1] state = cell[2] position = cell[3] - volume = np.round(cell[4]) + volume = cell[4] if len(cell[5]) == 0: - cycle = -1 + cycle = None else: - cycle = np.round(np.mean(cell[5])) - time = timepoint["time"] + cycle = np.mean(cell[5]) data_list = [ time, seed, diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py index f5abc13..64543e4 100644 --- a/tests/arcade_collection/output/test_parse_growth_file.py +++ b/tests/arcade_collection/output/test_parse_growth_file.py @@ -12,22 +12,15 @@ class TestParseGrowthFile(unittest.TestCase): def test_parse_growth_timepoint(self): tar_object = mock.Mock(spec=tarfile.TarFile) - tar_object.name = "tar_object_name.tar" - assert tar_object.name == "tar_object_name.tar" + tar_object.name = "tar_object_name.tar.xz" first_tar_member = mock.Mock(spec=tarfile.TarInfo) first_tar_member.name = "first_member.json" - assert first_tar_member.name == "first_member.json" second_tar_member = mock.Mock(spec=tarfile.TarInfo) - second_tar_member.name = "second_member.txt" - assert second_tar_member.name == "second_member.txt" + second_tar_member.name = "second_member.json" - third_tar_member = mock.Mock(spec=tarfile.TarInfo) - third_tar_member.name = "third_member.json" - assert third_tar_member.name == "third_member.json" - - tar_object.getmembers.return_value = [first_tar_member, second_tar_member, third_tar_member] + tar_object.getmembers.return_value = [first_tar_member, second_tar_member] first_json = mock.MagicMock() first_json.read.return_value = '{"seed": 0, "timepoints": [{"time": 0.0,"cells": [[[-33,0,33,0],[[0,1,2,0,2322.26,[]]]],[[0,0,10,0],[[1,0,2,0,2300.50,[]]]]]},{"time": 0.5,"cells": [[[-33,0,31,0],[[0,1,2,0,2522.26,[]]]],[[0,0,5,0],[[1,0,3,0,4391.91,[]]]]]},{"time": 1.0,"cells": [[[-19,0,30,0],[[0,1,1,0,2582.22,[]]]],[[0,0,7,0],[[1,0,4,0,5047.58,[800.0,512.3]]]],[[3,3,-6,0],[[0,1,2,0,2453.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode( @@ -41,8 +34,7 @@ def test_parse_growth_timepoint(self): mock_contents = { first_tar_member: first_json, - second_tar_member: "", - third_tar_member: second_json, + second_tar_member: second_json, } tar_object.extractfile.side_effect = lambda fname, *args, **kwargs: mock_contents[fname] @@ -93,45 +85,47 @@ def test_parse_growth_timepoint(self): "NEC", ], "VOLUME": [ - np.round(i) - for i in [ - 2322.26, - 2300.5, - 2522.26, - 4391.91, - 2582.22, - 5047.58, - 2453.83, - 2517.54, - 2372.26, - 2390.50, - 2022.26, - 4390.91, - 2582.22, - 5040.58, - 2053.83, - 2517.54, - ] + 2322.26, + 2300.5, + 2522.26, + 4391.91, + 2582.22, + 5047.58, + 2453.83, + 2517.54, + 2372.26, + 2390.50, + 2022.26, + 4390.91, + 2582.22, + 5040.58, + 2053.83, + 2517.54, ], "CYCLE": [ - -1, - -1, - -1, - -1, - -1, - np.round(np.mean([800.0, 512.3])), + None, + None, + None, + None, + None, + np.mean([800.0, 512.3]), 640.0, - -1, - -1, - -1, - -1, - -1, - -1, - np.round(np.mean([800.0, 512.3])), + None, + None, + None, + None, + None, + None, + np.mean([800.0, 512.3]), 640.0, - -1, + None, ], } expected_df = pd.DataFrame(expected_dict) + print(returned_df) + + print("-----") + + print(expected_df) self.assertTrue(expected_df.equals(returned_df)) From 5968462a20dbe0f467efc0dd11f37560c03df508 Mon Sep 17 00:00:00 2001 From: Isabelle-C Date: Mon, 12 Jun 2023 07:48:53 -0500 Subject: [PATCH 13/20] convert state to list format --- .../output/parse_growth_file.py | 24 +++++-------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py index 4e4ab13..05dbd50 100644 --- a/src/arcade_collection/output/parse_growth_file.py +++ b/src/arcade_collection/output/parse_growth_file.py @@ -56,7 +56,7 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: return timepoints_df -def convert_state_to_string(state_index: int) -> Union[str, None]: +def convert_state_to_string(state_index: int, state_list: List[str]) -> Union[str, None]: """ Convert the numbers that represent cell state into an annotation. @@ -64,28 +64,16 @@ def convert_state_to_string(state_index: int) -> Union[str, None]: ---------- state_index : The index of cell states. + state_list : + The list of cell states. Returns ------- : The cell state annotation. """ - if state_index == 0: - return "NEU" - elif state_index == 1: - return "APO" - elif state_index == 2: - return "QUI" - elif state_index == 3: - return "MIG" - elif state_index == 4: - return "PRO" - elif state_index == 5: - return "SEN" - elif state_index == 6: - return "NEC" - - return None + + return state_list[state_index] def parse_growth_timepoint(timepoint: dict, seed: int) -> list: @@ -134,7 +122,7 @@ def parse_growth_timepoint(timepoint: dict, seed: int) -> list: z, position, population, - convert_state_to_string(state), + convert_state_to_string(state, ["NEU", "APO", "QUI", "MIG", "PRO", "SEN", "NEC"]), volume, cycle, ] From c786f18a14e9b9396dd48d748f68824e37305c61 Mon Sep 17 00:00:00 2001 From: Isabelle-C Date: Sat, 7 Oct 2023 00:14:39 -0500 Subject: [PATCH 14/20] Update according to feedback comments --- src/arcade_collection/output/__init__.py | 2 + .../output/parse_growth_file.py | 50 +++++++------------ .../output/parse_params_file.py | 26 ---------- .../output/test_parse_growth_file.py | 37 ++++++-------- 4 files changed, 35 insertions(+), 80 deletions(-) delete mode 100644 src/arcade_collection/output/parse_params_file.py diff --git a/src/arcade_collection/output/__init__.py b/src/arcade_collection/output/__init__.py index 438a252..c45b14e 100644 --- a/src/arcade_collection/output/__init__.py +++ b/src/arcade_collection/output/__init__.py @@ -7,8 +7,10 @@ from .get_location_voxels import get_location_voxels from .merge_parsed_results import merge_parsed_results from .parse_cells_file import parse_cells_file +from .parse_growth_file import parse_growth_file from .parse_locations_file import parse_locations_file + convert_model_units = task(convert_model_units) extract_tick_json = task(extract_tick_json) get_location_voxels = task(get_location_voxels) diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py index 05dbd50..f5adf63 100644 --- a/src/arcade_collection/output/parse_growth_file.py +++ b/src/arcade_collection/output/parse_growth_file.py @@ -1,7 +1,5 @@ import json -from os import path import tarfile -from typing import List, Union import numpy as np import pandas as pd @@ -22,6 +20,16 @@ "CYCLE", ] +CELL_STATES = [ + "NEUTRAL", + "APOPTOTIC", + "QUIESCENT", + "MIGRATORY", + "PROLIFERATIVE", + "SENESCENT", + "NECROTIC", +] + @task def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: @@ -56,26 +64,6 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: return timepoints_df -def convert_state_to_string(state_index: int, state_list: List[str]) -> Union[str, None]: - """ - Convert the numbers that represent cell state into an annotation. - - Parameters - ---------- - state_index : - The index of cell states. - state_list : - The list of cell states. - - Returns - ------- - : - The cell state annotation. - """ - - return state_list[state_index] - - def parse_growth_timepoint(timepoint: dict, seed: int) -> list: """ Parse one timepoint of the simulation. @@ -99,20 +87,16 @@ def parse_growth_timepoint(timepoint: dict, seed: int) -> list: time = timepoint["time"] for (location, cells) in timepoint["cells"]: - u = int(location[0]) - v = int(location[1]) - w = int(location[2]) - z = int(location[3]) + u, v, w, z = location for cell in cells: - population = cell[1] - state = cell[2] - position = cell[3] - volume = cell[4] - if len(cell[5]) == 0: + _, population, state, position, volume, cycles = cell + + if len(cycles) == 0: cycle = None else: - cycle = np.mean(cell[5]) + cycle = np.mean(cycles) + data_list = [ time, seed, @@ -122,7 +106,7 @@ def parse_growth_timepoint(timepoint: dict, seed: int) -> list: z, position, population, - convert_state_to_string(state, ["NEU", "APO", "QUI", "MIG", "PRO", "SEN", "NEC"]), + CELL_STATES[state], volume, cycle, ] diff --git a/src/arcade_collection/output/parse_params_file.py b/src/arcade_collection/output/parse_params_file.py deleted file mode 100644 index e3f9186..0000000 --- a/src/arcade_collection/output/parse_params_file.py +++ /dev/null @@ -1,26 +0,0 @@ -from prefect import task -import json -import tarfile -import pandas as pd - - -@task -def parse_params_file(tar: tarfile.TarFile) -> pd.DataFrame: - all_timepoints = [] - - for member in tar.getmembers(): - seed = 0 - extracted_member = tar.extractfile(member) - assert extracted_member is not None - extracted_json = json.loads(extracted_member.read().decode("utf-8")) - - timepoints = [parse_timepoint(timepoint) for timepoint in extracted_json] - all_timepoints = all_timepoints + timepoints - - timepoints_df = pd.DataFrame(all_timepoints, columns=COLUMN_NAMES) - - return timepoints_df - - -def parse_timepoint(timepoint): - return 0 diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py index 64543e4..01e07a1 100644 --- a/tests/arcade_collection/output/test_parse_growth_file.py +++ b/tests/arcade_collection/output/test_parse_growth_file.py @@ -67,22 +67,22 @@ def test_parse_growth_timepoint(self): "POSITION": [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1], "POPULATION": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 2, 0], "STATE": [ - "QUI", - "QUI", - "QUI", - "MIG", - "APO", - "PRO", - "QUI", - "MIG", - "QUI", - "QUI", - "QUI", - "MIG", - "APO", - "PRO", - "QUI", - "NEC", + "QUIESCENT", + "QUIESCENT", + "QUIESCENT", + "MIGRATORY", + "APOPTOTIC", + "PROLIFERATIVE", + "QUIESCENT", + "MIGRATORY", + "QUIESCENT", + "QUIESCENT", + "QUIESCENT", + "MIGRATORY", + "APOPTOTIC", + "PROLIFERATIVE", + "QUIESCENT", + "NECROTIC", ], "VOLUME": [ 2322.26, @@ -123,9 +123,4 @@ def test_parse_growth_timepoint(self): } expected_df = pd.DataFrame(expected_dict) - print(returned_df) - - print("-----") - - print(expected_df) self.assertTrue(expected_df.equals(returned_df)) From 13bdd483744c57d9b245f3444fa1c827557b7861 Mon Sep 17 00:00:00 2001 From: Isabelle-C Date: Sat, 7 Oct 2023 00:34:10 -0500 Subject: [PATCH 15/20] Add isort changes --- src/arcade_collection/output/__init__.py | 1 - src/arcade_collection/output/parse_growth_file.py | 1 - tests/arcade_collection/output/test_parse_growth_file.py | 6 +++--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/arcade_collection/output/__init__.py b/src/arcade_collection/output/__init__.py index c45b14e..fc0aff0 100644 --- a/src/arcade_collection/output/__init__.py +++ b/src/arcade_collection/output/__init__.py @@ -10,7 +10,6 @@ from .parse_growth_file import parse_growth_file from .parse_locations_file import parse_locations_file - convert_model_units = task(convert_model_units) extract_tick_json = task(extract_tick_json) get_location_voxels = task(get_location_voxels) diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py index f5adf63..7f674a4 100644 --- a/src/arcade_collection/output/parse_growth_file.py +++ b/src/arcade_collection/output/parse_growth_file.py @@ -5,7 +5,6 @@ import pandas as pd from prefect import task - GROWTH_COLUMNS = [ "TICK", "SEED", diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py index 01e07a1..43e5f23 100644 --- a/tests/arcade_collection/output/test_parse_growth_file.py +++ b/tests/arcade_collection/output/test_parse_growth_file.py @@ -1,11 +1,11 @@ import tarfile - -import numpy as np -import pandas as pd import unittest from unittest import mock from unittest.mock import mock_open +import numpy as np +import pandas as pd + from arcade_collection.output.parse_growth_file import parse_growth_file From 40150eaa90b54174544a85c17b6adb585e9177fd Mon Sep 17 00:00:00 2001 From: Isabelle-C Date: Sat, 7 Oct 2023 00:40:46 -0500 Subject: [PATCH 16/20] Attempt to fix mypy error --- .../output/parse_growth_file.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py index 7f674a4..581459e 100644 --- a/src/arcade_collection/output/parse_growth_file.py +++ b/src/arcade_collection/output/parse_growth_file.py @@ -48,16 +48,17 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: all_timepoints = [] for member in tar.getmembers(): extracted_member = tar.extractfile(member) - extracted_json = json.loads(extracted_member.read().decode("utf-8")) - seed = extracted_json["seed"] - - all_timepoints.extend( - [ - data - for timepoint in extracted_json["timepoints"] - for data in parse_growth_timepoint(timepoint, seed) - ] - ) + if extracted_member is not None: + extracted_json = json.loads(extracted_member.read().decode("utf-8")) + seed = extracted_json["seed"] + + all_timepoints.extend( + [ + data + for timepoint in extracted_json["timepoints"] + for data in parse_growth_timepoint(timepoint, seed) + ] + ) timepoints_df = pd.DataFrame(all_timepoints, columns=GROWTH_COLUMNS) return timepoints_df From fee76ce5b03a7a47d7add6752ce018b70d61e30e Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 1 Nov 2023 17:26:40 -0400 Subject: [PATCH 17/20] Remove tox.ini file --- tox.ini | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 tox.ini diff --git a/tox.ini b/tox.ini deleted file mode 100644 index e5ad038..0000000 --- a/tox.ini +++ /dev/null @@ -1,19 +0,0 @@ -[tox] -isolated_build = True -envlist = py{310}, linter, mypy -skipsdist=True - -[testenv] -allowlist_externals = poetry -commands = - poetry run pytest --cov-report html --cov=src/ tests/ - -[testenv:linter] -commands = - poetry run black -l 100 src/ tests/ - poetry run pylint --ignore-patterns=test.*?py src/ tests/ - -[testenv:mypy] -commands = - poetry run mypy --config-file mypy.ini --disallow-untyped-calls --disallow-untyped-defs --disallow-incomplete-defs src - From 5040cc309e16ec2c81ee9dd2700e1cffd1b80238 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 1 Nov 2023 19:38:06 -0400 Subject: [PATCH 18/20] Move task decorator to init --- src/arcade_collection/output/__init__.py | 1 + src/arcade_collection/output/parse_growth_file.py | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/arcade_collection/output/__init__.py b/src/arcade_collection/output/__init__.py index fc0aff0..be6a602 100644 --- a/src/arcade_collection/output/__init__.py +++ b/src/arcade_collection/output/__init__.py @@ -15,4 +15,5 @@ get_location_voxels = task(get_location_voxels) merge_parsed_results = task(merge_parsed_results) parse_cells_file = task(parse_cells_file) +parse_growth_file = task(parse_growth_file) parse_locations_file = task(parse_locations_file) diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py index 581459e..01f2f81 100644 --- a/src/arcade_collection/output/parse_growth_file.py +++ b/src/arcade_collection/output/parse_growth_file.py @@ -3,7 +3,6 @@ import numpy as np import pandas as pd -from prefect import task GROWTH_COLUMNS = [ "TICK", @@ -30,7 +29,6 @@ ] -@task def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: """ Parse the tumor growth tar file. From eec9be042b2eb2720c1d5641714e72bcbbc94c8c Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 1 Nov 2023 19:38:19 -0400 Subject: [PATCH 19/20] Update docstrings --- .../output/parse_growth_file.py | 77 ++++++++++++++----- 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py index 01f2f81..cf372e4 100644 --- a/src/arcade_collection/output/parse_growth_file.py +++ b/src/arcade_collection/output/parse_growth_file.py @@ -31,56 +31,93 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: """ - Parse the tumor growth tar file. + Parses a tumor growth simulation tar file. Parameters ---------- tar : - Tar file of simulations. + Tar file of simulations for different seeds. Returns ------- : - Data of all timepoints of all simulations in tar file. + Parsed simulation data for all seeds and timepoints. """ + all_timepoints = [] + for member in tar.getmembers(): extracted_member = tar.extractfile(member) - if extracted_member is not None: - extracted_json = json.loads(extracted_member.read().decode("utf-8")) - seed = extracted_json["seed"] - - all_timepoints.extend( - [ - data - for timepoint in extracted_json["timepoints"] - for data in parse_growth_timepoint(timepoint, seed) - ] - ) + assert extracted_member is not None + extracted_json = json.loads(extracted_member.read().decode("utf-8")) + + seed = extracted_json["seed"] + all_timepoints.extend( + [ + data + for timepoint in extracted_json["timepoints"] + for data in parse_growth_timepoint(timepoint, seed) + ] + ) timepoints_df = pd.DataFrame(all_timepoints, columns=GROWTH_COLUMNS) + return timepoints_df def parse_growth_timepoint(timepoint: dict, seed: int) -> list: """ - Parse one timepoint of the simulation. + Parses a simulation timepoint into a list of features per cell. - The original data contains data of every timepoint at a seed in a - dictionary. The current data contains data of one cell per row, with tick, - seed, coordinates (u, v, w, z), position, population, state, volume, and - averaged cycle. + The original data contains cell features in the form: + + .. code-block:: json + + { + "time": time, + "cells": [ + [ + [u, v, w, z], + [ + [ + type, + population, + state, + position, + volume, + [cell, cycle, lengths, ...] + ], + ... + ] + ], + ... + ] + } + + Parsed data is formatted into: + + .. code-block:: json + + [ + [time, seed, u, v, w, z, position, population, state, volume, cycle], + [time, seed, u, v, w, z, position, population, state, volume, cycle], + ... + ] + + Cell cycle length is `None` if the cell has not yet divided. Otherwise, cell + cycle is the average of all cell cycle lengths. Parameters ---------- timepoint : - The data of one timepoint. + Data for a timepoint. Returns ------- : Parsed data of the timepoint. """ + parsed_data = [] time = timepoint["time"] From 2ef56bce14c88967047cb220fcfb39d87481ff71 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 1 Nov 2023 19:38:37 -0400 Subject: [PATCH 20/20] Remove fn call in test --- tests/arcade_collection/output/test_parse_growth_file.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py index 43e5f23..6706e76 100644 --- a/tests/arcade_collection/output/test_parse_growth_file.py +++ b/tests/arcade_collection/output/test_parse_growth_file.py @@ -1,7 +1,6 @@ import tarfile import unittest from unittest import mock -from unittest.mock import mock_open import numpy as np import pandas as pd @@ -38,7 +37,7 @@ def test_parse_growth_timepoint(self): } tar_object.extractfile.side_effect = lambda fname, *args, **kwargs: mock_contents[fname] - returned_df = parse_growth_file.fn(tar_object) + returned_df = parse_growth_file(tar_object) expected_dict = { "TICK": [