From 7c32a56191a39b0ac13f7a0bb2d3dd7fd22a40ca Mon Sep 17 00:00:00 2001
From: Isabelle Chen <isabellechen@Isabelles-MacBook-Pro-2.local>
Date: Fri, 23 Dec 2022 00:26:02 -0600
Subject: [PATCH 01/20] Initial edits on parse growth file

---
 .../output/parse_growth_file.py               | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 src/arcade_collection/output/parse_growth_file.py

diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py
new file mode 100644
index 0000000..8dfc2ba
--- /dev/null
+++ b/src/arcade_collection/output/parse_growth_file.py
@@ -0,0 +1,62 @@
+from typing import List, Union
+import json
+import tarfile
+
+import pandas as pd
+from prefect import task
+
+
+@task
+def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
+    all_timepoints = []
+    for member in tar.getmembers():
+        seed = 0
+
+
+def parse_growth_timepoint():
+    time_index = self.timepoints.index(timepoint)
+
+    parsed_data = []
+    sim_timepoint = loaded_simulation["timepoints"][time_index]["cells"]
+    param_timepoint = loaded_param_simulation["timepoints"][time_index]["cells"]
+
+    for (location, cells), (_, param_cells) in zip(sim_timepoint, param_timepoint):
+        u = int(location[0])
+        v = int(location[1])
+        w = int(location[2])
+        z = int(location[3])
+        szudzik_coordinate = self.get_szudzik_pair(u, v)
+
+        for cell, param_cell in zip(cells, param_cells):
+            population = cell[1]
+            state = cell[2]
+            position = cell[3]
+            volume = np.round(cell[4])
+            cycle = np.round(np.mean(cell[5]))
+            max_height = param_cell[4][3]
+            meta_pref = param_cell[4][8]
+            migra_threshold = param_cell[4][9]
+
+            data_list = [
+                self.key,
+                self.seed,
+                timepoint,
+                szudzik_coordinate,
+                u,
+                v,
+                w,
+                z,
+                position,
+                str(population),
+                str(state),
+                volume,
+                cycle,
+                max_height,
+                meta_pref,
+                migra_threshold,
+            ]
+
+            parsed_data.append(data_list)
+
+    columns = [feature.name for feature in self.get_feature_list()]
+    return pd.DataFrame(parsed_data, columns=columns)

From 70599ba17821365f056838039aea842f42a43fd9 Mon Sep 17 00:00:00 2001
From: Isabelle Chen <isabellechen@Isabelles-MacBook-Pro-2.local>
Date: Tue, 3 Jan 2023 10:24:05 -0600
Subject: [PATCH 02/20] Add preliminary code for parse growth file

---
 tests/arcade_collection/unit/test_parse_growth_file.py | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 tests/arcade_collection/unit/test_parse_growth_file.py

diff --git a/tests/arcade_collection/unit/test_parse_growth_file.py b/tests/arcade_collection/unit/test_parse_growth_file.py
new file mode 100644
index 0000000..6937b28
--- /dev/null
+++ b/tests/arcade_collection/unit/test_parse_growth_file.py
@@ -0,0 +1,10 @@
+import unittest
+from unittest import mock
+
+from arcade_collection.output.parse_growth_file import parse_growth_file
+
+
+class TestParseGrowthFile(unittest.TestCase):
+    @mock.patch("arcade_collection.output.parse_growth_file.tarfile")
+    def test_parse_growth_timepoint(self, tar_mock):
+        tar_object = mock.Mock(spec=tar_mock.TarFile)

From d5852068409a1eaa0cdaca7149819e339fc943ab Mon Sep 17 00:00:00 2001
From: Isabelle Chen <isabellechen@Isabelles-MacBook-Pro-2.local>
Date: Fri, 6 Jan 2023 18:19:29 -0600
Subject: [PATCH 03/20] Update the python testing version

---
 tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 4e1f860..53bbacf 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,6 +1,6 @@
 [tox]
 isolated_build = True
-envlist = py{39}, linter, mypy
+envlist = py{310}, linter, mypy
 skipsdist=True
 
 [testenv]

From c860a7f8f357d3a777c69ed03946d81447ba70ec Mon Sep 17 00:00:00 2001
From: Isabelle Chen <isabellechen@Isabelles-MacBook-Pro-2.local>
Date: Fri, 6 Jan 2023 18:19:54 -0600
Subject: [PATCH 04/20] Fix module not found error in tests

---
 src/arcade_collection/output/__init__.py      |   2 +
 .../output/parse_growth_file.py               |  90 +++++++++++++-----
 .../output/parse_params_file.py               |  26 +++++
 tests/arcade_collection/__init__.py           |   0
 .../test_parse_growth_file.py                 |   0
 .../._VIVO_HET_GRAPH_040_040_XABC_00.json     | Bin 0 -> 276 bytes
 6 files changed, 93 insertions(+), 25 deletions(-)
 create mode 100644 src/arcade_collection/output/parse_params_file.py
 delete mode 100644 tests/arcade_collection/__init__.py
 rename tests/arcade_collection/{unit => output}/test_parse_growth_file.py (100%)
 create mode 100755 untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json

diff --git a/src/arcade_collection/output/__init__.py b/src/arcade_collection/output/__init__.py
index cac6837..4d14b7b 100644
--- a/src/arcade_collection/output/__init__.py
+++ b/src/arcade_collection/output/__init__.py
@@ -3,3 +3,5 @@
 from .merge_parsed_results import merge_parsed_results
 from .parse_cells_file import parse_cells_file
 from .parse_locations_file import parse_locations_file
+from .parse_growth_file import parse_growth_file
+from .parse_params_file import parse_params_file
diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py
index 8dfc2ba..498420f 100644
--- a/src/arcade_collection/output/parse_growth_file.py
+++ b/src/arcade_collection/output/parse_growth_file.py
@@ -1,62 +1,102 @@
 from typing import List, Union
 import json
 import tarfile
+from prefect import task
 
+import numpy as np
 import pandas as pd
-from prefect import task
+import ntpath
+from os import path
+
+
+GROWTH_COLUMNS = [
+    "TICK",
+    "SEED",
+    "U",
+    "V",
+    "W",
+    "Z",
+    "POSITION",
+    "POPULATION",
+    "STATE",
+    "VOLUME",
+    "CYCLE",
+]
 
 
 @task
 def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
     all_timepoints = []
     for member in tar.getmembers():
-        seed = 0
+        extracted_member = tar.extractfile(member)
+        if extracted_member is not None:
+            base = ntpath.basename(member.name)
+            base_file = path.splitext(base)
+            file_name = base_file[0]
+            extension = base_file[1]
+            if file_name[0] != "." and extension == ".json":
+                extracted_json = json.loads(extracted_member.read().decode("utf-8"))
+                seed = extracted_json["seed"]
 
+                for timepoint in extracted_json["timepoints"]:
+                    one_timepoint = parse_growth_timepoint(timepoint, seed)
+                    for data in one_timepoint:
+                        all_timepoints.append(data)
 
-def parse_growth_timepoint():
-    time_index = self.timepoints.index(timepoint)
+    timepoints_df = pd.DataFrame(all_timepoints, columns=GROWTH_COLUMNS)
+    return timepoints_df
 
+
+def convert_state_to_string(state_index: int) -> str:
+    if state_index == 0:
+        return "NEU"
+    elif state_index == 1:
+        return "APO"
+    elif state_index == 2:
+        return "QUI"
+    elif state_index == 3:
+        return "MIG"
+    elif state_index == 4:
+        return "PRO"
+    elif state_index == 5:
+        return "SEN"
+    elif state_index == 6:
+        return "NEC"
+
+
+def parse_growth_timepoint(timepoint: dict, seed: int) -> List[list]:
     parsed_data = []
-    sim_timepoint = loaded_simulation["timepoints"][time_index]["cells"]
-    param_timepoint = loaded_param_simulation["timepoints"][time_index]["cells"]
 
-    for (location, cells), (_, param_cells) in zip(sim_timepoint, param_timepoint):
+    for (location, cells) in timepoint["cells"]:
         u = int(location[0])
         v = int(location[1])
         w = int(location[2])
         z = int(location[3])
-        szudzik_coordinate = self.get_szudzik_pair(u, v)
 
-        for cell, param_cell in zip(cells, param_cells):
+        for cell in cells:
             population = cell[1]
             state = cell[2]
             position = cell[3]
             volume = np.round(cell[4])
-            cycle = np.round(np.mean(cell[5]))
-            max_height = param_cell[4][3]
-            meta_pref = param_cell[4][8]
-            migra_threshold = param_cell[4][9]
-
+            if len(cell[5]) == 0:
+                cycle = -1
+            else:
+                cycle = np.round(np.mean(cell[5]))
+            time = timepoint["time"]
             data_list = [
-                self.key,
-                self.seed,
-                timepoint,
-                szudzik_coordinate,
+                time,
+                seed,
                 u,
                 v,
                 w,
                 z,
                 position,
-                str(population),
-                str(state),
+                population,
+                convert_state_to_string(state),
                 volume,
                 cycle,
-                max_height,
-                meta_pref,
-                migra_threshold,
             ]
 
             parsed_data.append(data_list)
 
-    columns = [feature.name for feature in self.get_feature_list()]
-    return pd.DataFrame(parsed_data, columns=columns)
+    return parsed_data
diff --git a/src/arcade_collection/output/parse_params_file.py b/src/arcade_collection/output/parse_params_file.py
new file mode 100644
index 0000000..e3f9186
--- /dev/null
+++ b/src/arcade_collection/output/parse_params_file.py
@@ -0,0 +1,26 @@
+from prefect import task
+import json
+import tarfile
+import pandas as pd
+
+
+@task
+def parse_params_file(tar: tarfile.TarFile) -> pd.DataFrame:
+    all_timepoints = []
+
+    for member in tar.getmembers():
+        seed = 0
+        extracted_member = tar.extractfile(member)
+        assert extracted_member is not None
+        extracted_json = json.loads(extracted_member.read().decode("utf-8"))
+
+        timepoints = [parse_timepoint(timepoint) for timepoint in extracted_json]
+        all_timepoints = all_timepoints + timepoints
+
+    timepoints_df = pd.DataFrame(all_timepoints, columns=COLUMN_NAMES)
+
+    return timepoints_df
+
+
+def parse_timepoint(timepoint):
+    return 0
diff --git a/tests/arcade_collection/__init__.py b/tests/arcade_collection/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/arcade_collection/unit/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py
similarity index 100%
rename from tests/arcade_collection/unit/test_parse_growth_file.py
rename to tests/arcade_collection/output/test_parse_growth_file.py
diff --git a/untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json b/untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json
new file mode 100755
index 0000000000000000000000000000000000000000..0fa68ed45b5e181584283c4d02f7b729915da075
GIT binary patch
literal 276
zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}@gpDx@)<>dG{b)=0I{(H$B>X9hzwZA
z9w0jcO(n>5khlN?gH&>Uu3ln6K~Ab(PGWIMX>n>wN@7W>azHRt&lw=?!N9<eP?wvS
zoC6eOyPK8_QWHAM97Hj&Fo<kj&cBz>+i$`Bt;{oayDes6D5@=fl7Gv|V(*I1zwIAc
Ju7ed4AOM+fGeiIY

literal 0
HcmV?d00001


From 57f662ad0535d612f8c500e790809912e4c1f58d Mon Sep 17 00:00:00 2001
From: Isabelle Chen <isabellechen@Isabelles-MacBook-Pro-2.local>
Date: Fri, 6 Jan 2023 18:22:16 -0600
Subject: [PATCH 05/20] Remove cache folder

---
 untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json | Bin 276 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100755 untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json

diff --git a/untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json b/untitled/._VIVO_HET_GRAPH_040_040_XABC_00.json
deleted file mode 100755
index 0fa68ed45b5e181584283c4d02f7b729915da075..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 276
zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}@gpDx@)<>dG{b)=0I{(H$B>X9hzwZA
z9w0jcO(n>5khlN?gH&>Uu3ln6K~Ab(PGWIMX>n>wN@7W>azHRt&lw=?!N9<eP?wvS
zoC6eOyPK8_QWHAM97Hj&Fo<kj&cBz>+i$`Bt;{oayDes6D5@=fl7Gv|V(*I1zwIAc
Ju7ed4AOM+fGeiIY


From ae965c183dd6461cad33f3365ac3f2343e284f1a Mon Sep 17 00:00:00 2001
From: Isabelle Chen <isabellechen@Isabelles-MacBook-Pro-2.local>
Date: Fri, 6 Jan 2023 18:25:34 -0600
Subject: [PATCH 06/20] Adjust import orders

---
 src/arcade_collection/output/parse_growth_file.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py
index 498420f..e6ee997 100644
--- a/src/arcade_collection/output/parse_growth_file.py
+++ b/src/arcade_collection/output/parse_growth_file.py
@@ -1,12 +1,12 @@
-from typing import List, Union
 import json
+from os import path
 import tarfile
-from prefect import task
+from typing import List, Union
 
+import ntpath
 import numpy as np
 import pandas as pd
-import ntpath
-from os import path
+from prefect import task
 
 
 GROWTH_COLUMNS = [

From e7d169c28b18d49346cd7d595a03ee0f8a539eb8 Mon Sep 17 00:00:00 2001
From: Isabelle Chen <isabellechen@Isabelles-MacBook-Pro-2.local>
Date: Fri, 6 Jan 2023 20:18:00 -0600
Subject: [PATCH 07/20] Add extract file mocking with single member

---
 .../output/test_parse_growth_file.py          | 31 +++++++++++++++++--
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py
index 6937b28..48fc70f 100644
--- a/tests/arcade_collection/output/test_parse_growth_file.py
+++ b/tests/arcade_collection/output/test_parse_growth_file.py
@@ -1,10 +1,35 @@
+import json
+import tarfile
+
 import unittest
 from unittest import mock
+from unittest.mock import mock_open
 
 from arcade_collection.output.parse_growth_file import parse_growth_file
 
 
 class TestParseGrowthFile(unittest.TestCase):
-    @mock.patch("arcade_collection.output.parse_growth_file.tarfile")
-    def test_parse_growth_timepoint(self, tar_mock):
-        tar_object = mock.Mock(spec=tar_mock.TarFile)
+    # @mock.patch("builtins.open", new_callable=mock_open)
+    # @mock.patch("arcade_collection.output.parse_growth_file.json")
+    def test_parse_growth_timepoint(self):
+        tar_object = mock.Mock(spec=tarfile.TarFile)
+        tar_object.name = "tar_object_name.tar"
+        assert tar_object.name == "tar_object_name.tar"
+
+        first_tar_member = mock.Mock(spec=tarfile.TarInfo)
+        first_tar_member.name = "first_member.json"
+        assert first_tar_member.name == "first_member.json"
+
+        first_json = mock.MagicMock()
+        first_json.read.return_value = '{"seed": 0, "timepoints": [{"time": 0.0,"cells": [[[-33,0,33,0],[[0,1,2,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,2300.50,[]]]]]},{"time": 0.5,"cells": [[[-33,0,33,0],[[0,1,2,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,4391.91,[]]]]]},{"time": 1.0,"cells": [[[-33,0,33,0],[[0,1,1,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,5047.58,[800.0,512.3]]]],[[3,3,-6,0],[[0,1,2,0,2453.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode(
+            "utf-8"
+        )
+
+        second_tar_member = mock.Mock(spec=tarfile.TarInfo)
+        second_tar_member.name = "second_member.txt"
+        assert second_tar_member.name == "second_member.txt"
+
+        tar_object.getmembers.return_value = [first_tar_member, second_tar_member]
+        tar_object.extractfile.return_value = first_json
+        expected_dataframe = parse_growth_file.fn(tar_object)
+        print(expected_dataframe)

From d2763157eea2be93bc2a05be5dd03e86dc1b0a0f Mon Sep 17 00:00:00 2001
From: Isabelle Chen <isabellechen@Isabelles-MacBook-Pro-2.local>
Date: Fri, 6 Jan 2023 20:20:42 -0600
Subject: [PATCH 08/20] Add extract file mocking with single member

---
 .../output/test_parse_growth_file.py          | 31 +++++++++++++++++--
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py
index 6937b28..48fc70f 100644
--- a/tests/arcade_collection/output/test_parse_growth_file.py
+++ b/tests/arcade_collection/output/test_parse_growth_file.py
@@ -1,10 +1,35 @@
+import json
+import tarfile
+
 import unittest
 from unittest import mock
+from unittest.mock import mock_open
 
 from arcade_collection.output.parse_growth_file import parse_growth_file
 
 
 class TestParseGrowthFile(unittest.TestCase):
-    @mock.patch("arcade_collection.output.parse_growth_file.tarfile")
-    def test_parse_growth_timepoint(self, tar_mock):
-        tar_object = mock.Mock(spec=tar_mock.TarFile)
+    # @mock.patch("builtins.open", new_callable=mock_open)
+    # @mock.patch("arcade_collection.output.parse_growth_file.json")
+    def test_parse_growth_timepoint(self):
+        tar_object = mock.Mock(spec=tarfile.TarFile)
+        tar_object.name = "tar_object_name.tar"
+        assert tar_object.name == "tar_object_name.tar"
+
+        first_tar_member = mock.Mock(spec=tarfile.TarInfo)
+        first_tar_member.name = "first_member.json"
+        assert first_tar_member.name == "first_member.json"
+
+        first_json = mock.MagicMock()
+        first_json.read.return_value = '{"seed": 0, "timepoints": [{"time": 0.0,"cells": [[[-33,0,33,0],[[0,1,2,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,2300.50,[]]]]]},{"time": 0.5,"cells": [[[-33,0,33,0],[[0,1,2,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,4391.91,[]]]]]},{"time": 1.0,"cells": [[[-33,0,33,0],[[0,1,1,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,5047.58,[800.0,512.3]]]],[[3,3,-6,0],[[0,1,2,0,2453.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode(
+            "utf-8"
+        )
+
+        second_tar_member = mock.Mock(spec=tarfile.TarInfo)
+        second_tar_member.name = "second_member.txt"
+        assert second_tar_member.name == "second_member.txt"
+
+        tar_object.getmembers.return_value = [first_tar_member, second_tar_member]
+        tar_object.extractfile.return_value = first_json
+        expected_dataframe = parse_growth_file.fn(tar_object)
+        print(expected_dataframe)

From feeaf99ac6f4b5e04a1a98fe9700d4494228b130 Mon Sep 17 00:00:00 2001
From: Isabelle Chen
 <isabellechen@dhcp-10-105-197-7.wireless.northwestern.private>
Date: Mon, 9 Jan 2023 15:19:57 -0600
Subject: [PATCH 09/20] Edit the test to include multiple extracted files and
 add docstrings

---
 .../output/parse_growth_file.py               | 41 ++++++++++++++++++-
 .../output/test_parse_growth_file.py          | 32 ++++++++++-----
 2 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py
index e6ee997..9790080 100644
--- a/src/arcade_collection/output/parse_growth_file.py
+++ b/src/arcade_collection/output/parse_growth_file.py
@@ -1,7 +1,7 @@
 import json
 from os import path
 import tarfile
-from typing import List, Union
+from typing import List
 
 import ntpath
 import numpy as np
@@ -26,6 +26,19 @@
 
 @task
 def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
+    """
+    Parse the tumor growth tar file.
+
+    Parameters
+    ---------
+    tar :
+        Tar file of simulations.
+
+    Returns
+    -------
+    :
+        Data of all timepoints of all simulations in tar file.
+    """
     all_timepoints = []
     for member in tar.getmembers():
         extracted_member = tar.extractfile(member)
@@ -48,6 +61,19 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
 
 
 def convert_state_to_string(state_index: int) -> str:
+    """
+    Convert the numbers that represent cell state into an annotation.
+
+    Parameters
+    ---------
+    state_index :
+        The index of cell states.
+
+    Returns
+    -------
+    :
+        The cell state annotation.
+    """
     if state_index == 0:
         return "NEU"
     elif state_index == 1:
@@ -65,6 +91,19 @@ def convert_state_to_string(state_index: int) -> str:
 
 
 def parse_growth_timepoint(timepoint: dict, seed: int) -> List[list]:
+    """
+    Parse one timepoint of the simulation
+
+    Parameters
+    ---------
+    timepoint :
+        The data of one timepoint.
+
+    Returns
+    -------
+    :
+        Parsed data of the timepoint.
+    """
     parsed_data = []
 
     for (location, cells) in timepoint["cells"]:
diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py
index 48fc70f..f98f077 100644
--- a/tests/arcade_collection/output/test_parse_growth_file.py
+++ b/tests/arcade_collection/output/test_parse_growth_file.py
@@ -1,16 +1,12 @@
-import json
 import tarfile
 
 import unittest
 from unittest import mock
-from unittest.mock import mock_open
 
 from arcade_collection.output.parse_growth_file import parse_growth_file
 
 
 class TestParseGrowthFile(unittest.TestCase):
-    # @mock.patch("builtins.open", new_callable=mock_open)
-    # @mock.patch("arcade_collection.output.parse_growth_file.json")
     def test_parse_growth_timepoint(self):
         tar_object = mock.Mock(spec=tarfile.TarFile)
         tar_object.name = "tar_object_name.tar"
@@ -20,16 +16,32 @@ def test_parse_growth_timepoint(self):
         first_tar_member.name = "first_member.json"
         assert first_tar_member.name == "first_member.json"
 
+        second_tar_member = mock.Mock(spec=tarfile.TarInfo)
+        second_tar_member.name = "second_member.txt"
+        assert second_tar_member.name == "second_member.txt"
+
+        third_tar_member = mock.Mock(spec=tarfile.TarInfo)
+        third_tar_member.name = "third_member.json"
+        assert third_tar_member.name == "third_member.json"
+
         first_json = mock.MagicMock()
-        first_json.read.return_value = '{"seed": 0, "timepoints": [{"time": 0.0,"cells": [[[-33,0,33,0],[[0,1,2,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,2300.50,[]]]]]},{"time": 0.5,"cells": [[[-33,0,33,0],[[0,1,2,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,4391.91,[]]]]]},{"time": 1.0,"cells": [[[-33,0,33,0],[[0,1,1,0,2522.26,[]]]],[[0,0,0,0],[[1,0,4,0,5047.58,[800.0,512.3]]]],[[3,3,-6,0],[[0,1,2,0,2453.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode(
+        first_json.read.return_value = '{"seed": 0, "timepoints": [{"time": 0.0,"cells": [[[-33,0,33,0],[[0,1,2,0,2322.26,[]]]],[[0,0,10,0],[[1,0,2,0,2300.50,[]]]]]},{"time": 0.5,"cells": [[[-33,0,31,0],[[0,1,2,0,2522.26,[]]]],[[0,0,5,0],[[1,0,3,0,4391.91,[]]]]]},{"time": 1.0,"cells": [[[-19,0,30,0],[[0,1,1,0,2582.22,[]]]],[[0,0,7,0],[[1,0,4,0,5047.58,[800.0,512.3]]]],[[3,3,-6,0],[[0,1,2,0,2453.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode(
             "utf-8"
         )
 
-        second_tar_member = mock.Mock(spec=tarfile.TarInfo)
-        second_tar_member.name = "second_member.txt"
-        assert second_tar_member.name == "second_member.txt"
+        second_json = mock.MagicMock()
+        second_json.read.return_value = '{"seed": 1, "timepoints": [{"time": 10.0,"cells": [[[-13,0,33,0],[[0,1,2,0,2372.26,[]]]],[[0,0,10,0],[[1,0,2,0,2390.50,[]]]]]},{"time": 10.5,"cells": [[[-33,0,1,0],[[0,1,2,0,2022.26,[]]]],[[0,0,8,0],[[1,0,3,0,4390.91,[]]]]]},{"time": 11.0,"cells": [[[-19,0,3,0],[[0,1,1,0,2582.22,[]]]],[[1,0,1,0],[[1,0,4,0,5040.58,[800.0,512.3]]]],[[3,0,-6,0],[[0,1,2,0,2053.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode(
+            "utf-8"
+        )
+
+        tar_object.getmembers.return_value = [first_tar_member, second_tar_member, third_tar_member]
+
+        mock_contents = {
+            first_tar_member: first_json,
+            second_tar_member: "",
+            third_tar_member: second_json,
+        }
+        tar_object.extractfile.side_effect = lambda fname, *args, **kwargs: mock_contents[fname]
 
-        tar_object.getmembers.return_value = [first_tar_member, second_tar_member]
-        tar_object.extractfile.return_value = first_json
         expected_dataframe = parse_growth_file.fn(tar_object)
         print(expected_dataframe)

From eb33be46c0e27668adf15bef00e90460ae5edde1 Mon Sep 17 00:00:00 2001
From: Isabelle Chen <isabellechen@Isabelles-MacBook-Pro-2.local>
Date: Mon, 9 Jan 2023 18:19:41 -0600
Subject: [PATCH 10/20] Fix mypy error in state conversion function by
 returning None

---
 src/arcade_collection/output/parse_growth_file.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py
index 9790080..69c54e2 100644
--- a/src/arcade_collection/output/parse_growth_file.py
+++ b/src/arcade_collection/output/parse_growth_file.py
@@ -1,7 +1,7 @@
 import json
 from os import path
 import tarfile
-from typing import List
+from typing import List, Union
 
 import ntpath
 import numpy as np
@@ -30,7 +30,7 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
     Parse the tumor growth tar file.
 
     Parameters
-    ---------
+    ----------
     tar :
         Tar file of simulations.
 
@@ -60,12 +60,12 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
     return timepoints_df
 
 
-def convert_state_to_string(state_index: int) -> str:
+def convert_state_to_string(state_index: int) -> Union[str, None]:
     """
     Convert the numbers that represent cell state into an annotation.
 
     Parameters
-    ---------
+    ----------
     state_index :
         The index of cell states.
 
@@ -89,13 +89,15 @@ def convert_state_to_string(state_index: int) -> str:
     elif state_index == 6:
         return "NEC"
 
+    return None
+
 
 def parse_growth_timepoint(timepoint: dict, seed: int) -> List[list]:
     """
     Parse one timepoint of the simulation
 
     Parameters
-    ---------
+    ----------
     timepoint :
         The data of one timepoint.
 

From 6019e961409efe45afb9f2d5a29a09c2b66952cf Mon Sep 17 00:00:00 2001
From: Isabelle Chen <isabellechen@Isabelles-MacBook-Pro-2.local>
Date: Mon, 9 Jan 2023 18:20:11 -0600
Subject: [PATCH 11/20] Complete test for parse growth file

---
 .../output/test_parse_growth_file.py          | 99 ++++++++++++++++++-
 1 file changed, 94 insertions(+), 5 deletions(-)

diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py
index d23dd99..f5abc13 100644
--- a/tests/arcade_collection/output/test_parse_growth_file.py
+++ b/tests/arcade_collection/output/test_parse_growth_file.py
@@ -1,5 +1,7 @@
 import tarfile
 
+import numpy as np
+import pandas as pd
 import unittest
 from unittest import mock
 from unittest.mock import mock_open
@@ -25,18 +27,18 @@ def test_parse_growth_timepoint(self):
         third_tar_member.name = "third_member.json"
         assert third_tar_member.name == "third_member.json"
 
+        tar_object.getmembers.return_value = [first_tar_member, second_tar_member, third_tar_member]
+
         first_json = mock.MagicMock()
         first_json.read.return_value = '{"seed": 0, "timepoints": [{"time": 0.0,"cells": [[[-33,0,33,0],[[0,1,2,0,2322.26,[]]]],[[0,0,10,0],[[1,0,2,0,2300.50,[]]]]]},{"time": 0.5,"cells": [[[-33,0,31,0],[[0,1,2,0,2522.26,[]]]],[[0,0,5,0],[[1,0,3,0,4391.91,[]]]]]},{"time": 1.0,"cells": [[[-19,0,30,0],[[0,1,1,0,2582.22,[]]]],[[0,0,7,0],[[1,0,4,0,5047.58,[800.0,512.3]]]],[[3,3,-6,0],[[0,1,2,0,2453.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode(
             "utf-8"
         )
 
         second_json = mock.MagicMock()
-        second_json.read.return_value = '{"seed": 1, "timepoints": [{"time": 10.0,"cells": [[[-13,0,33,0],[[0,1,2,0,2372.26,[]]]],[[0,0,10,0],[[1,0,2,0,2390.50,[]]]]]},{"time": 10.5,"cells": [[[-33,0,1,0],[[0,1,2,0,2022.26,[]]]],[[0,0,8,0],[[1,0,3,0,4390.91,[]]]]]},{"time": 11.0,"cells": [[[-19,0,3,0],[[0,1,1,0,2582.22,[]]]],[[1,0,1,0],[[1,0,4,0,5040.58,[800.0,512.3]]]],[[3,0,-6,0],[[0,1,2,0,2053.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode(
+        second_json.read.return_value = '{"seed": 1, "timepoints": [{"time": 10.0,"cells": [[[-13,0,33,0],[[0,1,2,0,2372.26,[]]]],[[0,0,10,0],[[1,0,2,0,2390.50,[]]]]]},{"time": 10.5,"cells": [[[-33,0,1,0],[[0,1,2,0,2022.26,[]]]],[[0,0,8,0],[[1,0,3,0,4390.91,[]]]]]},{"time": 11.0,"cells": [[[-19,0,3,0],[[0,1,1,0,2582.22,[]]]],[[1,0,1,0],[[1,0,4,0,5040.58,[800.0,512.3]]]],[[3,0,-6,0],[[0,2,2,0,2053.83,[640.0]],[1,0,6,1,2517.54,[]]]]]}]}'.encode(
             "utf-8"
         )
 
-        tar_object.getmembers.return_value = [first_tar_member, second_tar_member, third_tar_member]
-
         mock_contents = {
             first_tar_member: first_json,
             second_tar_member: "",
@@ -44,5 +46,92 @@ def test_parse_growth_timepoint(self):
         }
         tar_object.extractfile.side_effect = lambda fname, *args, **kwargs: mock_contents[fname]
 
-        expected_dataframe = parse_growth_file.fn(tar_object)
-        print(expected_dataframe)
+        returned_df = parse_growth_file.fn(tar_object)
+
+        expected_dict = {
+            "TICK": [
+                0.0,
+                0.0,
+                0.5,
+                0.5,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                10.0,
+                10.0,
+                10.5,
+                10.5,
+                11.0,
+                11.0,
+                11.0,
+                11.0,
+            ],
+            "SEED": [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1],
+            "U": [-33, 0, -33, 0, -19, 0, 3, 3, -13, 0, -33, 0, -19, 1, 3, 3],
+            "V": [0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0],
+            "W": [33, 10, 31, 5, 30, 7, -6, -6, 33, 10, 1, 8, 3, 1, -6, -6],
+            "Z": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            "POSITION": [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1],
+            "POPULATION": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 2, 0],
+            "STATE": [
+                "QUI",
+                "QUI",
+                "QUI",
+                "MIG",
+                "APO",
+                "PRO",
+                "QUI",
+                "MIG",
+                "QUI",
+                "QUI",
+                "QUI",
+                "MIG",
+                "APO",
+                "PRO",
+                "QUI",
+                "NEC",
+            ],
+            "VOLUME": [
+                np.round(i)
+                for i in [
+                    2322.26,
+                    2300.5,
+                    2522.26,
+                    4391.91,
+                    2582.22,
+                    5047.58,
+                    2453.83,
+                    2517.54,
+                    2372.26,
+                    2390.50,
+                    2022.26,
+                    4390.91,
+                    2582.22,
+                    5040.58,
+                    2053.83,
+                    2517.54,
+                ]
+            ],
+            "CYCLE": [
+                -1,
+                -1,
+                -1,
+                -1,
+                -1,
+                np.round(np.mean([800.0, 512.3])),
+                640.0,
+                -1,
+                -1,
+                -1,
+                -1,
+                -1,
+                -1,
+                np.round(np.mean([800.0, 512.3])),
+                640.0,
+                -1,
+            ],
+        }
+
+        expected_df = pd.DataFrame(expected_dict)
+        self.assertTrue(expected_df.equals(returned_df))

From b26f75713e2387fe954647cc7d240fd04bf8028b Mon Sep 17 00:00:00 2001
From: Isabelle-C <isabellechen2023@u.northwestern.edu>
Date: Mon, 12 Jun 2023 07:38:08 -0500
Subject: [PATCH 12/20] Update parse growth file and tests after initial review

---
 .../output/parse_growth_file.py               | 41 ++++-----
 .../output/test_parse_growth_file.py          | 84 +++++++++----------
 2 files changed, 60 insertions(+), 65 deletions(-)

diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py
index 69c54e2..4e4ab13 100644
--- a/src/arcade_collection/output/parse_growth_file.py
+++ b/src/arcade_collection/output/parse_growth_file.py
@@ -3,7 +3,6 @@
 import tarfile
 from typing import List, Union
 
-import ntpath
 import numpy as np
 import pandas as pd
 from prefect import task
@@ -42,19 +41,16 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
     all_timepoints = []
     for member in tar.getmembers():
         extracted_member = tar.extractfile(member)
-        if extracted_member is not None:
-            base = ntpath.basename(member.name)
-            base_file = path.splitext(base)
-            file_name = base_file[0]
-            extension = base_file[1]
-            if file_name[0] != "." and extension == ".json":
-                extracted_json = json.loads(extracted_member.read().decode("utf-8"))
-                seed = extracted_json["seed"]
-
-                for timepoint in extracted_json["timepoints"]:
-                    one_timepoint = parse_growth_timepoint(timepoint, seed)
-                    for data in one_timepoint:
-                        all_timepoints.append(data)
+        extracted_json = json.loads(extracted_member.read().decode("utf-8"))
+        seed = extracted_json["seed"]
+
+        all_timepoints.extend(
+            [
+                data
+                for timepoint in extracted_json["timepoints"]
+                for data in parse_growth_timepoint(timepoint, seed)
+            ]
+        )
 
     timepoints_df = pd.DataFrame(all_timepoints, columns=GROWTH_COLUMNS)
     return timepoints_df
@@ -92,9 +88,14 @@ def convert_state_to_string(state_index: int) -> Union[str, None]:
     return None
 
 
-def parse_growth_timepoint(timepoint: dict, seed: int) -> List[list]:
+def parse_growth_timepoint(timepoint: dict, seed: int) -> list:
     """
-    Parse one timepoint of the simulation
+    Parse one timepoint of the simulation.
+
+    The original data contains data of every timepoint at a seed in a
+    dictionary. The current data contains data of one cell per row, with tick,
+    seed, coordinates (u, v, w, z), position, population, state, volume, and
+    averaged cycle.
 
     Parameters
     ----------
@@ -107,6 +108,7 @@ def parse_growth_timepoint(timepoint: dict, seed: int) -> List[list]:
         Parsed data of the timepoint.
     """
     parsed_data = []
+    time = timepoint["time"]
 
     for (location, cells) in timepoint["cells"]:
         u = int(location[0])
@@ -118,12 +120,11 @@ def parse_growth_timepoint(timepoint: dict, seed: int) -> List[list]:
             population = cell[1]
             state = cell[2]
             position = cell[3]
-            volume = np.round(cell[4])
+            volume = cell[4]
             if len(cell[5]) == 0:
-                cycle = -1
+                cycle = None
             else:
-                cycle = np.round(np.mean(cell[5]))
-            time = timepoint["time"]
+                cycle = np.mean(cell[5])
             data_list = [
                 time,
                 seed,
diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py
index f5abc13..64543e4 100644
--- a/tests/arcade_collection/output/test_parse_growth_file.py
+++ b/tests/arcade_collection/output/test_parse_growth_file.py
@@ -12,22 +12,15 @@
 class TestParseGrowthFile(unittest.TestCase):
     def test_parse_growth_timepoint(self):
         tar_object = mock.Mock(spec=tarfile.TarFile)
-        tar_object.name = "tar_object_name.tar"
-        assert tar_object.name == "tar_object_name.tar"
+        tar_object.name = "tar_object_name.tar.xz"
 
         first_tar_member = mock.Mock(spec=tarfile.TarInfo)
         first_tar_member.name = "first_member.json"
-        assert first_tar_member.name == "first_member.json"
 
         second_tar_member = mock.Mock(spec=tarfile.TarInfo)
-        second_tar_member.name = "second_member.txt"
-        assert second_tar_member.name == "second_member.txt"
+        second_tar_member.name = "second_member.json"
 
-        third_tar_member = mock.Mock(spec=tarfile.TarInfo)
-        third_tar_member.name = "third_member.json"
-        assert third_tar_member.name == "third_member.json"
-
-        tar_object.getmembers.return_value = [first_tar_member, second_tar_member, third_tar_member]
+        tar_object.getmembers.return_value = [first_tar_member, second_tar_member]
 
         first_json = mock.MagicMock()
         first_json.read.return_value = '{"seed": 0, "timepoints": [{"time": 0.0,"cells": [[[-33,0,33,0],[[0,1,2,0,2322.26,[]]]],[[0,0,10,0],[[1,0,2,0,2300.50,[]]]]]},{"time": 0.5,"cells": [[[-33,0,31,0],[[0,1,2,0,2522.26,[]]]],[[0,0,5,0],[[1,0,3,0,4391.91,[]]]]]},{"time": 1.0,"cells": [[[-19,0,30,0],[[0,1,1,0,2582.22,[]]]],[[0,0,7,0],[[1,0,4,0,5047.58,[800.0,512.3]]]],[[3,3,-6,0],[[0,1,2,0,2453.83,[640.0]],[1,0,3,1,2517.54,[]]]]]}]}'.encode(
@@ -41,8 +34,7 @@ def test_parse_growth_timepoint(self):
 
         mock_contents = {
             first_tar_member: first_json,
-            second_tar_member: "",
-            third_tar_member: second_json,
+            second_tar_member: second_json,
         }
         tar_object.extractfile.side_effect = lambda fname, *args, **kwargs: mock_contents[fname]
 
@@ -93,45 +85,47 @@ def test_parse_growth_timepoint(self):
                 "NEC",
             ],
             "VOLUME": [
-                np.round(i)
-                for i in [
-                    2322.26,
-                    2300.5,
-                    2522.26,
-                    4391.91,
-                    2582.22,
-                    5047.58,
-                    2453.83,
-                    2517.54,
-                    2372.26,
-                    2390.50,
-                    2022.26,
-                    4390.91,
-                    2582.22,
-                    5040.58,
-                    2053.83,
-                    2517.54,
-                ]
+                2322.26,
+                2300.5,
+                2522.26,
+                4391.91,
+                2582.22,
+                5047.58,
+                2453.83,
+                2517.54,
+                2372.26,
+                2390.50,
+                2022.26,
+                4390.91,
+                2582.22,
+                5040.58,
+                2053.83,
+                2517.54,
             ],
             "CYCLE": [
-                -1,
-                -1,
-                -1,
-                -1,
-                -1,
-                np.round(np.mean([800.0, 512.3])),
+                None,
+                None,
+                None,
+                None,
+                None,
+                np.mean([800.0, 512.3]),
                 640.0,
-                -1,
-                -1,
-                -1,
-                -1,
-                -1,
-                -1,
-                np.round(np.mean([800.0, 512.3])),
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                np.mean([800.0, 512.3]),
                 640.0,
-                -1,
+                None,
             ],
         }
 
         expected_df = pd.DataFrame(expected_dict)
+        print(returned_df)
+
+        print("-----")
+
+        print(expected_df)
         self.assertTrue(expected_df.equals(returned_df))

From 5968462a20dbe0f467efc0dd11f37560c03df508 Mon Sep 17 00:00:00 2001
From: Isabelle-C <isabellechen2023@u.northwestern.edu>
Date: Mon, 12 Jun 2023 07:48:53 -0500
Subject: [PATCH 13/20] convert state to list format

---
 .../output/parse_growth_file.py               | 24 +++++--------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py
index 4e4ab13..05dbd50 100644
--- a/src/arcade_collection/output/parse_growth_file.py
+++ b/src/arcade_collection/output/parse_growth_file.py
@@ -56,7 +56,7 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
     return timepoints_df
 
 
-def convert_state_to_string(state_index: int) -> Union[str, None]:
+def convert_state_to_string(state_index: int, state_list: List[str]) -> Union[str, None]:
     """
     Convert the numbers that represent cell state into an annotation.
 
@@ -64,28 +64,16 @@ def convert_state_to_string(state_index: int) -> Union[str, None]:
     ----------
     state_index :
         The index of cell states.
+    state_list :
+        The list of cell states.
 
     Returns
     -------
     :
         The cell state annotation.
     """
-    if state_index == 0:
-        return "NEU"
-    elif state_index == 1:
-        return "APO"
-    elif state_index == 2:
-        return "QUI"
-    elif state_index == 3:
-        return "MIG"
-    elif state_index == 4:
-        return "PRO"
-    elif state_index == 5:
-        return "SEN"
-    elif state_index == 6:
-        return "NEC"
-
-    return None
+
+    return state_list[state_index]
 
 
 def parse_growth_timepoint(timepoint: dict, seed: int) -> list:
@@ -134,7 +122,7 @@ def parse_growth_timepoint(timepoint: dict, seed: int) -> list:
                 z,
                 position,
                 population,
-                convert_state_to_string(state),
+                convert_state_to_string(state, ["NEU", "APO", "QUI", "MIG", "PRO", "SEN", "NEC"]),
                 volume,
                 cycle,
             ]

From c786f18a14e9b9396dd48d748f68824e37305c61 Mon Sep 17 00:00:00 2001
From: Isabelle-C <isabellechen2023@u.northwestern.edu>
Date: Sat, 7 Oct 2023 00:14:39 -0500
Subject: [PATCH 14/20] Update according to feedback comments

---
 src/arcade_collection/output/__init__.py      |  2 +
 .../output/parse_growth_file.py               | 50 +++++++------------
 .../output/parse_params_file.py               | 26 ----------
 .../output/test_parse_growth_file.py          | 37 ++++++--------
 4 files changed, 35 insertions(+), 80 deletions(-)
 delete mode 100644 src/arcade_collection/output/parse_params_file.py

diff --git a/src/arcade_collection/output/__init__.py b/src/arcade_collection/output/__init__.py
index 438a252..c45b14e 100644
--- a/src/arcade_collection/output/__init__.py
+++ b/src/arcade_collection/output/__init__.py
@@ -7,8 +7,10 @@
 from .get_location_voxels import get_location_voxels
 from .merge_parsed_results import merge_parsed_results
 from .parse_cells_file import parse_cells_file
+from .parse_growth_file import parse_growth_file
 from .parse_locations_file import parse_locations_file
 
+
 convert_model_units = task(convert_model_units)
 extract_tick_json = task(extract_tick_json)
 get_location_voxels = task(get_location_voxels)
diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py
index 05dbd50..f5adf63 100644
--- a/src/arcade_collection/output/parse_growth_file.py
+++ b/src/arcade_collection/output/parse_growth_file.py
@@ -1,7 +1,5 @@
 import json
-from os import path
 import tarfile
-from typing import List, Union
 
 import numpy as np
 import pandas as pd
@@ -22,6 +20,16 @@
     "CYCLE",
 ]
 
+CELL_STATES = [
+    "NEUTRAL",
+    "APOPTOTIC",
+    "QUIESCENT",
+    "MIGRATORY",
+    "PROLIFERATIVE",
+    "SENESCENT",
+    "NECROTIC",
+]
+
 
 @task
 def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
@@ -56,26 +64,6 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
     return timepoints_df
 
 
-def convert_state_to_string(state_index: int, state_list: List[str]) -> Union[str, None]:
-    """
-    Convert the numbers that represent cell state into an annotation.
-
-    Parameters
-    ----------
-    state_index :
-        The index of cell states.
-    state_list :
-        The list of cell states.
-
-    Returns
-    -------
-    :
-        The cell state annotation.
-    """
-
-    return state_list[state_index]
-
-
 def parse_growth_timepoint(timepoint: dict, seed: int) -> list:
     """
     Parse one timepoint of the simulation.
@@ -99,20 +87,16 @@ def parse_growth_timepoint(timepoint: dict, seed: int) -> list:
     time = timepoint["time"]
 
     for (location, cells) in timepoint["cells"]:
-        u = int(location[0])
-        v = int(location[1])
-        w = int(location[2])
-        z = int(location[3])
+        u, v, w, z = location
 
         for cell in cells:
-            population = cell[1]
-            state = cell[2]
-            position = cell[3]
-            volume = cell[4]
-            if len(cell[5]) == 0:
+            _, population, state, position, volume, cycles = cell
+
+            if len(cycles) == 0:
                 cycle = None
             else:
-                cycle = np.mean(cell[5])
+                cycle = np.mean(cycles)
+
             data_list = [
                 time,
                 seed,
@@ -122,7 +106,7 @@ def parse_growth_timepoint(timepoint: dict, seed: int) -> list:
                 z,
                 position,
                 population,
-                convert_state_to_string(state, ["NEU", "APO", "QUI", "MIG", "PRO", "SEN", "NEC"]),
+                CELL_STATES[state],
                 volume,
                 cycle,
             ]
diff --git a/src/arcade_collection/output/parse_params_file.py b/src/arcade_collection/output/parse_params_file.py
deleted file mode 100644
index e3f9186..0000000
--- a/src/arcade_collection/output/parse_params_file.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from prefect import task
-import json
-import tarfile
-import pandas as pd
-
-
-@task
-def parse_params_file(tar: tarfile.TarFile) -> pd.DataFrame:
-    all_timepoints = []
-
-    for member in tar.getmembers():
-        seed = 0
-        extracted_member = tar.extractfile(member)
-        assert extracted_member is not None
-        extracted_json = json.loads(extracted_member.read().decode("utf-8"))
-
-        timepoints = [parse_timepoint(timepoint) for timepoint in extracted_json]
-        all_timepoints = all_timepoints + timepoints
-
-    timepoints_df = pd.DataFrame(all_timepoints, columns=COLUMN_NAMES)
-
-    return timepoints_df
-
-
-def parse_timepoint(timepoint):
-    return 0
diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py
index 64543e4..01e07a1 100644
--- a/tests/arcade_collection/output/test_parse_growth_file.py
+++ b/tests/arcade_collection/output/test_parse_growth_file.py
@@ -67,22 +67,22 @@ def test_parse_growth_timepoint(self):
             "POSITION": [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1],
             "POPULATION": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 2, 0],
             "STATE": [
-                "QUI",
-                "QUI",
-                "QUI",
-                "MIG",
-                "APO",
-                "PRO",
-                "QUI",
-                "MIG",
-                "QUI",
-                "QUI",
-                "QUI",
-                "MIG",
-                "APO",
-                "PRO",
-                "QUI",
-                "NEC",
+                "QUIESCENT",
+                "QUIESCENT",
+                "QUIESCENT",
+                "MIGRATORY",
+                "APOPTOTIC",
+                "PROLIFERATIVE",
+                "QUIESCENT",
+                "MIGRATORY",
+                "QUIESCENT",
+                "QUIESCENT",
+                "QUIESCENT",
+                "MIGRATORY",
+                "APOPTOTIC",
+                "PROLIFERATIVE",
+                "QUIESCENT",
+                "NECROTIC",
             ],
             "VOLUME": [
                 2322.26,
@@ -123,9 +123,4 @@ def test_parse_growth_timepoint(self):
         }
 
         expected_df = pd.DataFrame(expected_dict)
-        print(returned_df)
-
-        print("-----")
-
-        print(expected_df)
         self.assertTrue(expected_df.equals(returned_df))

From 13bdd483744c57d9b245f3444fa1c827557b7861 Mon Sep 17 00:00:00 2001
From: Isabelle-C <isabellechen2023@u.northwestern.edu>
Date: Sat, 7 Oct 2023 00:34:10 -0500
Subject: [PATCH 15/20] Add isort changes

---
 src/arcade_collection/output/__init__.py                 | 1 -
 src/arcade_collection/output/parse_growth_file.py        | 1 -
 tests/arcade_collection/output/test_parse_growth_file.py | 6 +++---
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/arcade_collection/output/__init__.py b/src/arcade_collection/output/__init__.py
index c45b14e..fc0aff0 100644
--- a/src/arcade_collection/output/__init__.py
+++ b/src/arcade_collection/output/__init__.py
@@ -10,7 +10,6 @@
 from .parse_growth_file import parse_growth_file
 from .parse_locations_file import parse_locations_file
 
-
 convert_model_units = task(convert_model_units)
 extract_tick_json = task(extract_tick_json)
 get_location_voxels = task(get_location_voxels)
diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py
index f5adf63..7f674a4 100644
--- a/src/arcade_collection/output/parse_growth_file.py
+++ b/src/arcade_collection/output/parse_growth_file.py
@@ -5,7 +5,6 @@
 import pandas as pd
 from prefect import task
 
-
 GROWTH_COLUMNS = [
     "TICK",
     "SEED",
diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py
index 01e07a1..43e5f23 100644
--- a/tests/arcade_collection/output/test_parse_growth_file.py
+++ b/tests/arcade_collection/output/test_parse_growth_file.py
@@ -1,11 +1,11 @@
 import tarfile
-
-import numpy as np
-import pandas as pd
 import unittest
 from unittest import mock
 from unittest.mock import mock_open
 
+import numpy as np
+import pandas as pd
+
 from arcade_collection.output.parse_growth_file import parse_growth_file
 
 
From 40150eaa90b54174544a85c17b6adb585e9177fd Mon Sep 17 00:00:00 2001
From: Isabelle-C <isabellechen2023@u.northwestern.edu>
Date: Sat, 7 Oct 2023 00:40:46 -0500
Subject: [PATCH 16/20] Attempt to fix mypy error

---
 .../output/parse_growth_file.py               | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py
index 7f674a4..581459e 100644
--- a/src/arcade_collection/output/parse_growth_file.py
+++ b/src/arcade_collection/output/parse_growth_file.py
@@ -48,16 +48,17 @@ def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
     all_timepoints = []
     for member in tar.getmembers():
         extracted_member = tar.extractfile(member)
-        extracted_json = json.loads(extracted_member.read().decode("utf-8"))
-        seed = extracted_json["seed"]
-
-        all_timepoints.extend(
-            [
-                data
-                for timepoint in extracted_json["timepoints"]
-                for data in parse_growth_timepoint(timepoint, seed)
-            ]
-        )
+        if extracted_member is not None:
+            extracted_json = json.loads(extracted_member.read().decode("utf-8"))
+            seed = extracted_json["seed"]
+
+            all_timepoints.extend(
+                [
+                    data
+                    for timepoint in extracted_json["timepoints"]
+                    for data in parse_growth_timepoint(timepoint, seed)
+                ]
+            )
 
     timepoints_df = pd.DataFrame(all_timepoints, columns=GROWTH_COLUMNS)
     return timepoints_df

From fee76ce5b03a7a47d7add6752ce018b70d61e30e Mon Sep 17 00:00:00 2001
From: jessicasyu <15913767+jessicasyu@users.noreply.github.com>
Date: Wed, 1 Nov 2023 17:26:40 -0400
Subject: [PATCH 17/20] Remove tox.ini file

---
 tox.ini | 19 -------------------
 1 file changed, 19 deletions(-)
 delete mode 100644 tox.ini

diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index e5ad038..0000000
--- a/tox.ini
+++ /dev/null
@@ -1,19 +0,0 @@
-[tox]
-isolated_build = True
-envlist = py{310}, linter, mypy
-skipsdist=True
-
-[testenv]
-allowlist_externals = poetry
-commands =
-    poetry run pytest --cov-report html --cov=src/ tests/
-
-[testenv:linter]
-commands =
-    poetry run black -l 100 src/ tests/
-    poetry run pylint --ignore-patterns=test.*?py src/ tests/
-
-[testenv:mypy]
-commands =
-    poetry run mypy --config-file mypy.ini --disallow-untyped-calls --disallow-untyped-defs --disallow-incomplete-defs src
-

From 5040cc309e16ec2c81ee9dd2700e1cffd1b80238 Mon Sep 17 00:00:00 2001
From: jessicasyu <15913767+jessicasyu@users.noreply.github.com>
Date: Wed, 1 Nov 2023 19:38:06 -0400
Subject: [PATCH 18/20] Move task decorator to init

---
 src/arcade_collection/output/__init__.py          | 1 +
 src/arcade_collection/output/parse_growth_file.py | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/arcade_collection/output/__init__.py b/src/arcade_collection/output/__init__.py
index fc0aff0..be6a602 100644
--- a/src/arcade_collection/output/__init__.py
+++ b/src/arcade_collection/output/__init__.py
@@ -15,4 +15,5 @@
 get_location_voxels = task(get_location_voxels)
 merge_parsed_results = task(merge_parsed_results)
 parse_cells_file = task(parse_cells_file)
+parse_growth_file = task(parse_growth_file)
 parse_locations_file = task(parse_locations_file)
diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py
index 581459e..01f2f81 100644
--- a/src/arcade_collection/output/parse_growth_file.py
+++ b/src/arcade_collection/output/parse_growth_file.py
@@ -3,7 +3,6 @@
 
 import numpy as np
 import pandas as pd
-from prefect import task
 
 GROWTH_COLUMNS = [
     "TICK",
@@ -30,7 +29,6 @@
 ]
 
 
-@task
 def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
     """
     Parse the tumor growth tar file.

From eec9be042b2eb2720c1d5641714e72bcbbc94c8c Mon Sep 17 00:00:00 2001
From: jessicasyu <15913767+jessicasyu@users.noreply.github.com>
Date: Wed, 1 Nov 2023 19:38:19 -0400
Subject: [PATCH 19/20] Update docstrings

---
 .../output/parse_growth_file.py               | 77 ++++++++++++++-----
 1 file changed, 57 insertions(+), 20 deletions(-)

diff --git a/src/arcade_collection/output/parse_growth_file.py b/src/arcade_collection/output/parse_growth_file.py
index 01f2f81..cf372e4 100644
--- a/src/arcade_collection/output/parse_growth_file.py
+++ b/src/arcade_collection/output/parse_growth_file.py
@@ -31,56 +31,93 @@
 
 def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
     """
-    Parse the tumor growth tar file.
+    Parses a tumor growth simulation tar file.
 
     Parameters
     ----------
     tar :
-        Tar file of simulations.
+        Tar file of simulations for different seeds.
 
     Returns
     -------
     :
-        Data of all timepoints of all simulations in tar file.
+        Parsed simulation data for all seeds and timepoints.
     """
+
     all_timepoints = []
+
     for member in tar.getmembers():
         extracted_member = tar.extractfile(member)
-        if extracted_member is not None:
-            extracted_json = json.loads(extracted_member.read().decode("utf-8"))
-            seed = extracted_json["seed"]
-
-            all_timepoints.extend(
-                [
-                    data
-                    for timepoint in extracted_json["timepoints"]
-                    for data in parse_growth_timepoint(timepoint, seed)
-                ]
-            )
+        assert extracted_member is not None
+        extracted_json = json.loads(extracted_member.read().decode("utf-8"))
+
+        seed = extracted_json["seed"]
+        all_timepoints.extend(
+            [
+                data
+                for timepoint in extracted_json["timepoints"]
+                for data in parse_growth_timepoint(timepoint, seed)
+            ]
+        )
 
     timepoints_df = pd.DataFrame(all_timepoints, columns=GROWTH_COLUMNS)
+
     return timepoints_df
 
 
 def parse_growth_timepoint(timepoint: dict, seed: int) -> list:
     """
-    Parse one timepoint of the simulation.
+    Parses a simulation timepoint into a list of features per cell.
 
-    The original data contains data of every timepoint at a seed in a
-    dictionary. The current data contains data of one cell per row, with tick,
-    seed, coordinates (u, v, w, z), position, population, state, volume, and
-    averaged cycle.
+    The original data contains cell features in the form:
+
+    .. code-block:: json
+
+        {
+            "time": time,
+            "cells": [
+                [
+                    [u, v, w, z],
+                    [
+                        [
+                            type,
+                            population,
+                            state,
+                            position,
+                            volume,
+                            [cell, cycle, lengths, ...]
+                        ],
+                        ...
+                    ]
+                ],
+                ...
+            ]
+        }
+
+    Parsed data is formatted into:
+
+    .. code-block:: json
+
+        [
+            [time, seed, u, v, w, z, position, population, state, volume, cycle],
+            [time, seed, u, v, w, z, position, population, state, volume, cycle],
+            ...
+        ]
+
+    Cell cycle length is `None` if the cell has not yet divided. Otherwise, cell
+    cycle is the average of all cell cycle lengths.
 
     Parameters
     ----------
     timepoint :
-        The data of one timepoint.
+        Data for a timepoint.
 
     Returns
     -------
     :
         Parsed data of the timepoint.
     """
+
     parsed_data = []
     time = timepoint["time"]
 

From 2ef56bce14c88967047cb220fcfb39d87481ff71 Mon Sep 17 00:00:00 2001
From: jessicasyu <15913767+jessicasyu@users.noreply.github.com>
Date: Wed, 1 Nov 2023 19:38:37 -0400
Subject: [PATCH 20/20] Remove fn call in test

---
 tests/arcade_collection/output/test_parse_growth_file.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/arcade_collection/output/test_parse_growth_file.py b/tests/arcade_collection/output/test_parse_growth_file.py
index 43e5f23..6706e76 100644
--- a/tests/arcade_collection/output/test_parse_growth_file.py
+++ b/tests/arcade_collection/output/test_parse_growth_file.py
@@ -1,7 +1,6 @@
 import tarfile
 import unittest
 from unittest import mock
-from unittest.mock import mock_open
 
 import numpy as np
 import pandas as pd
@@ -38,7 +37,7 @@ def test_parse_growth_timepoint(self):
         }
         tar_object.extractfile.side_effect = lambda fname, *args, **kwargs: mock_contents[fname]
 
-        returned_df = parse_growth_file.fn(tar_object)
+        returned_df = parse_growth_file(tar_object)
 
         expected_dict = {
             "TICK": [