From 7b7bc7ef338cc4781f829ff8ab16f79471f07401 Mon Sep 17 00:00:00 2001 From: Marco Varrone Date: Mon, 28 Oct 2024 08:41:04 +0100 Subject: [PATCH 01/20] Save reader elements incrementally --- src/spatialdata_io/readers/xenium.py | 135 +++++++++++++++++++++++---- 1 file changed, 116 insertions(+), 19 deletions(-) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 74034f37..6ecd8851 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -25,6 +25,7 @@ from datatree.datatree import DataTree from geopandas import GeoDataFrame from joblib import Parallel, delayed +from ome_zarr.io import parse_url from pyarrow import Table from shapely import Polygon from spatialdata import SpatialData @@ -48,7 +49,6 @@ __all__ = ["xenium", "xenium_aligned_image", "xenium_explorer_selection"] - @deprecation_alias(cells_as_shapes="cells_as_circles", cell_boundaries="cells_boundaries", cell_labels="cells_labels") @inject_docs(xx=XeniumKeys) def xenium( @@ -68,6 +68,7 @@ def xenium( imread_kwargs: Mapping[str, Any] = MappingProxyType({}), image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), labels_models_kwargs: Mapping[str, Any] = MappingProxyType({}), + output_path: Path | None = None, ) -> SpatialData: """ Read a *10X Genomics Xenium* dataset into a SpatialData object. @@ -124,10 +125,13 @@ def xenium( Keyword arguments to pass to the image models. labels_models_kwargs Keyword arguments to pass to the labels models. + output_path + Path to directly write the output to a zarr file. This can decrease the memory requirement. If not provided, the + function will return a :class:`spatialdata.SpatialData` object. Returns ------- - :class:`spatialdata.SpatialData` + If `output_path` is provided, the function will return `None`. Otherwise, it will return a :class:`spatialdata.SpatialData` object. Notes ----- @@ -160,6 +164,8 @@ def xenium( image_models_kwargs, labels_models_kwargs ) path = Path(path) + output_path = Path(output_path) if output_path is not None else None + with open(path / XeniumKeys.XENIUM_SPECS) as f: specs = json.load(f) # to trigger the warning if the version cannot be parsed @@ -204,11 +210,14 @@ def xenium( table.obs[XeniumKeys.Z_LEVEL] = cell_summary_table[XeniumKeys.Z_LEVEL] table.obs[XeniumKeys.NUCLEUS_COUNT] = cell_summary_table[XeniumKeys.NUCLEUS_COUNT] - polygons = {} - labels = {} - tables = {} - points = {} - images = {} + sdata = SpatialData() + + if output_path is not None: + sdata.path = output_path + sdata._validate_can_safely_write_to_path(output_path, overwrite=False) + store = parse_url(output_path, mode="w").store + _ = zarr.group(store=store, overwrite=False) + store.close() # From the public release notes here: # https://www.10xgenomics.com/support/software/xenium-onboard-analysis/latest/release-notes/release-notes-for-xoa @@ -217,7 +226,7 @@ def xenium( # nuclei to cells. Therefore for the moment we only link the table to the cell labels, and not to the nucleus # labels. if nucleus_labels: - labels["nucleus_labels"], _ = _get_labels_and_indices_mapping( + sdata.labels["nucleus_labels"], _ = _get_labels_and_indices_mapping( path, XeniumKeys.CELLS_ZARR, specs, @@ -225,8 +234,17 @@ def xenium( labels_name="nucleus_labels", labels_models_kwargs=labels_models_kwargs, ) + if output_path is not None: + sdata._write_element( + element=sdata.labels["nucleus_labels"], + zarr_container_path=output_path, + element_type="labels", + element_name="nucleus_labels", + overwrite=False, + ) + del sdata.labels["nucleus_labels"] if cells_labels: - labels["cell_labels"], cell_labels_indices_mapping = _get_labels_and_indices_mapping( + sdata.labels["cell_labels"], cell_labels_indices_mapping = _get_labels_and_indices_mapping( path, XeniumKeys.CELLS_ZARR, specs, @@ -234,6 +252,15 @@ def xenium( labels_name="cell_labels", labels_models_kwargs=labels_models_kwargs, ) + if output_path is not None: + sdata._write_element( + element=sdata.labels["cell_labels"], + zarr_container_path=output_path, + element_type="labels", + element_name="cell_labels", + overwrite=False, + ) + del sdata.labels["cell_labels"] if cell_labels_indices_mapping is not None and table is not None: if not pd.DataFrame.equals(cell_labels_indices_mapping["cell_id"], table.obs[str(XeniumKeys.CELL_ID)]): warnings.warn( @@ -249,41 +276,86 @@ def xenium( table.uns[TableModel.ATTRS_KEY][TableModel.INSTANCE_KEY] = "cell_labels" if nucleus_boundaries: - polygons["nucleus_boundaries"] = _get_polygons( + sdata.shapes["nucleus_boundaries"] = _get_polygons( path, XeniumKeys.NUCLEUS_BOUNDARIES_FILE, specs, n_jobs, idx=table.obs[str(XeniumKeys.CELL_ID)].copy(), ) + if output_path is not None: + sdata._write_element( + element=sdata.shapes["nucleus_boundaries"], + zarr_container_path=output_path, + element_type="shapes", + element_name="nucleus_boundaries", + overwrite=False, + ) + del sdata.shapes["nucleus_boundaries"] if cells_boundaries: - polygons["cell_boundaries"] = _get_polygons( + sdata.shapes["cell_boundaries"] = _get_polygons( path, XeniumKeys.CELL_BOUNDARIES_FILE, specs, n_jobs, idx=table.obs[str(XeniumKeys.CELL_ID)].copy(), ) + if output_path is not None: + sdata._write_element( + element=sdata.shapes["cell_boundaries"], + zarr_container_path=output_path, + element_type="shapes", + element_name="cell_boundaries", + overwrite=False, + ) + del sdata.shapes["cell_boundaries"] if transcripts: - points["transcripts"] = _get_points(path, specs) + sdata.points["transcripts"] = _get_points(path, specs) + if output_path is not None: + sdata._write_element( + element=sdata.points["transcripts"], + zarr_container_path=output_path, + element_type="points", + element_name="transcripts", + overwrite=False, + ) + del sdata.points["transcripts"] if version is None or version < packaging.version.parse("2.0.0"): if morphology_mip: - images["morphology_mip"] = _get_images( + sdata.images["morphology_mip"] = _get_images( path, XeniumKeys.MORPHOLOGY_MIP_FILE, imread_kwargs, image_models_kwargs, ) + if output_path is not None: + sdata._write_element( + element=sdata.images["morphology_mip"], + zarr_container_path=output_path, + element_type="images", + element_name="morphology_mip", + overwrite=False, + ) + del sdata.images["morphology_mip"] if morphology_focus: - images["morphology_focus"] = _get_images( + sdata.images["morphology_focus"] = _get_images( path, XeniumKeys.MORPHOLOGY_FOCUS_FILE, imread_kwargs, image_models_kwargs, ) + if output_path is not None: + sdata._write_element( + element=sdata.images["morphology_focus"], + zarr_container_path=output_path, + element_type="images", + element_name="morphology_focus", + overwrite=False, + ) + del sdata.images["morphology_focus"] else: if morphology_focus: morphology_focus_dir = path / XeniumKeys.MORPHOLOGY_FOCUS_DIR @@ -331,28 +403,53 @@ def filter(self, record: logging.LogRecord) -> bool: "c_coords" not in image_models_kwargs ), "The channel names for the morphology focus images are handled internally" image_models_kwargs["c_coords"] = list(channel_names.values()) - images["morphology_focus"] = _get_images( + sdata.images["morphology_focus"] = _get_images( morphology_focus_dir, XeniumKeys.MORPHOLOGY_FOCUS_CHANNEL_IMAGE.format(0), imread_kwargs, image_models_kwargs, ) del image_models_kwargs["c_coords"] + if output_path is not None: + sdata._write_element( + element=sdata.images["morphology_focus"], + zarr_container_path=output_path, + element_type="images", + element_name="morphology_focus", + overwrite=False, + ) + del sdata.images["morphology_focus"] logger.removeFilter(IgnoreSpecificMessage()) if table is not None: - tables["table"] = table + sdata.tables["table"] = table + if output_path is not None: + sdata._write_element( + element=sdata.tables["table"], + zarr_container_path=output_path, + element_type="tables", + element_name="table", + overwrite=False, + ) + del sdata.tables["table"] - elements_dict = {"images": images, "labels": labels, "points": points, "tables": tables, "shapes": polygons} if cells_as_circles: - elements_dict["shapes"][specs["region"]] = circles - sdata = SpatialData(**elements_dict) + sdata.shapes[specs["region"]] = circles # find and add additional aligned images if aligned_images: extra_images = _add_aligned_images(path, imread_kwargs, image_models_kwargs) for key, value in extra_images.items(): sdata.images[key] = value + if output_path is not None: + sdata._write_element( + element=sdata.images[key], + zarr_container_path=output_path, + element_type="images", + element_name=key, + overwrite=False, + ) + del sdata.images[key] return sdata From d0fed4dc9c66b28230026f98187c7ce75c5dc465 Mon Sep 17 00:00:00 2001 From: Marco Varrone Date: Mon, 28 Oct 2024 09:26:29 +0100 Subject: [PATCH 02/20] Consolidated metadata --- src/spatialdata_io/readers/xenium.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 6ecd8851..6a0b564e 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -451,6 +451,8 @@ def filter(self, record: logging.LogRecord) -> bool: ) del sdata.images[key] + sdata.write_consolidated_metadata() + return sdata From d13f581a6bbb935fe3ae76f579303607cfd852f9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 28 Oct 2024 08:34:40 +0000 Subject: [PATCH 03/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata_io/readers/xenium.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 6a0b564e..96d992d5 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -49,6 +49,7 @@ __all__ = ["xenium", "xenium_aligned_image", "xenium_explorer_selection"] + @deprecation_alias(cells_as_shapes="cells_as_circles", cell_boundaries="cells_boundaries", cell_labels="cells_labels") @inject_docs(xx=XeniumKeys) def xenium( @@ -211,13 +212,13 @@ def xenium( table.obs[XeniumKeys.NUCLEUS_COUNT] = cell_summary_table[XeniumKeys.NUCLEUS_COUNT] sdata = SpatialData() - + if output_path is not None: sdata.path = output_path sdata._validate_can_safely_write_to_path(output_path, overwrite=False) store = parse_url(output_path, mode="w").store _ = zarr.group(store=store, overwrite=False) - store.close() + store.close() # From the public release notes here: # https://www.10xgenomics.com/support/software/xenium-onboard-analysis/latest/release-notes/release-notes-for-xoa From 43f0d04b8da16b5794e7d7c4aa847e45c0cbc776 Mon Sep 17 00:00:00 2001 From: Marco Varrone Date: Mon, 28 Oct 2024 09:53:59 +0100 Subject: [PATCH 04/20] Write consolidated data only when output_path is set --- src/spatialdata_io/readers/xenium.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 96d992d5..6677a307 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -452,7 +452,8 @@ def filter(self, record: logging.LogRecord) -> bool: ) del sdata.images[key] - sdata.write_consolidated_metadata() + if output_path is not None: + sdata.write_consolidated_metadata() return sdata From 1d4c73427ef63fb44c6c92b861466ec00d0bc12a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 15 Mar 2025 17:22:32 +0000 Subject: [PATCH 05/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 02213221..f598404d 100644 --- a/README.md +++ b/README.md @@ -111,8 +111,7 @@ Marconato, L., Palla, G., Yamauchi, K.A. et al. SpatialData: an open and univers [link-docs]: https://spatialdata.scverse.org/projects/io/en/latest/ [link-api]: https://spatialdata.scverse.org/projects/io/en/latest/api.html [link-cli]: https://spatialdata.scverse.org/projects/io/en/latest/cli.html - -[//]: # (numfocus-fiscal-sponsor-attribution) +[//]: # "numfocus-fiscal-sponsor-attribution" spatialdata-io is part of the scverse® project ([website](https://scverse.org), [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS](https://numfocus.org/). If you like scverse® and want to support our mission, please consider making a tax-deductible [donation](https://numfocus.org/donate-to-scverse) to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs. From bbfa4f73b4aff965f0c2aa3ac3907965909651fd Mon Sep 17 00:00:00 2001 From: Marco Varrone Date: Mon, 17 Mar 2025 11:09:36 +0100 Subject: [PATCH 06/20] Use public write API --- src/spatialdata_io/readers/xenium.py | 101 +++++---------------------- 1 file changed, 17 insertions(+), 84 deletions(-) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 95c8f012..7e1fd028 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -27,7 +27,7 @@ from ome_zarr.io import parse_url from pyarrow import Table from shapely import Polygon -from spatialdata import SpatialData +from spatialdata import SpatialData, read_zarr from spatialdata._core.query.relational_query import get_element_instances from spatialdata._types import ArrayLike from spatialdata.models import ( @@ -126,12 +126,11 @@ def xenium( labels_models_kwargs Keyword arguments to pass to the labels models. output_path - Path to directly write the output to a zarr file. This can decrease the memory requirement. If not provided, the - function will return a :class:`spatialdata.SpatialData` object. + Path to directly write every element to a zarr file as soon as it is read. This can decrease the memory requirement. Returns ------- - If `output_path` is provided, the function will return `None`. Otherwise, it will return a :class:`spatialdata.SpatialData` object. + :class:`spatialdata.SpatialData` Notes ----- @@ -190,6 +189,7 @@ def xenium( if cells_table: return_values = _get_tables_and_circles(path, cells_as_circles, specs) + print(return_values) if cells_as_circles: table, circles = return_values else: @@ -213,11 +213,7 @@ def xenium( sdata = SpatialData() if output_path is not None: - sdata.path = output_path - sdata._validate_can_safely_write_to_path(output_path, overwrite=False) - store = parse_url(output_path, mode="w").store - _ = zarr.group(store=store, overwrite=False) - store.close() + sdata.write(output_path) # From the public release notes here: # https://www.10xgenomics.com/support/software/xenium-onboard-analysis/latest/release-notes/release-notes-for-xoa @@ -235,13 +231,7 @@ def xenium( labels_models_kwargs=labels_models_kwargs, ) if output_path is not None: - sdata._write_element( - element=sdata.labels["nucleus_labels"], - zarr_container_path=output_path, - element_type="labels", - element_name="nucleus_labels", - overwrite=False, - ) + sdata.write_element(element_name="nucleus_labels") del sdata.labels["nucleus_labels"] if cells_labels: sdata.labels["cell_labels"], cell_labels_indices_mapping = _get_labels_and_indices_mapping( @@ -253,13 +243,7 @@ def xenium( labels_models_kwargs=labels_models_kwargs, ) if output_path is not None: - sdata._write_element( - element=sdata.labels["cell_labels"], - zarr_container_path=output_path, - element_type="labels", - element_name="cell_labels", - overwrite=False, - ) + sdata.write_element(element_name="cell_labels") del sdata.labels["cell_labels"] if cell_labels_indices_mapping is not None and table is not None: if not pd.DataFrame.equals(cell_labels_indices_mapping["cell_id"], table.obs[str(XeniumKeys.CELL_ID)]): @@ -284,15 +268,8 @@ def xenium( idx=table.obs[str(XeniumKeys.CELL_ID)].copy(), ) if output_path is not None: - sdata._write_element( - element=sdata.shapes["nucleus_boundaries"], - zarr_container_path=output_path, - element_type="shapes", - element_name="nucleus_boundaries", - overwrite=False, - ) + sdata.write_element(element_name="nucleus_boundaries") del sdata.shapes["nucleus_boundaries"] - if cells_boundaries: sdata.shapes["cell_boundaries"] = _get_polygons( path, @@ -302,27 +279,13 @@ def xenium( idx=table.obs[str(XeniumKeys.CELL_ID)].copy(), ) if output_path is not None: - sdata._write_element( - element=sdata.shapes["cell_boundaries"], - zarr_container_path=output_path, - element_type="shapes", - element_name="cell_boundaries", - overwrite=False, - ) + sdata.write_element(element_name="cell_boundaries") del sdata.shapes["cell_boundaries"] - if transcripts: sdata.points["transcripts"] = _get_points(path, specs) if output_path is not None: - sdata._write_element( - element=sdata.points["transcripts"], - zarr_container_path=output_path, - element_type="points", - element_name="transcripts", - overwrite=False, - ) + sdata.write_element(element_name="transcripts") del sdata.points["transcripts"] - if version is None or version < packaging.version.parse("2.0.0"): if morphology_mip: sdata.images["morphology_mip"] = _get_images( @@ -332,13 +295,7 @@ def xenium( image_models_kwargs, ) if output_path is not None: - sdata._write_element( - element=sdata.images["morphology_mip"], - zarr_container_path=output_path, - element_type="images", - element_name="morphology_mip", - overwrite=False, - ) + sdata.write_element(element_name="morphology_mip") del sdata.images["morphology_mip"] if morphology_focus: sdata.images["morphology_focus"] = _get_images( @@ -348,13 +305,7 @@ def xenium( image_models_kwargs, ) if output_path is not None: - sdata._write_element( - element=sdata.images["morphology_focus"], - zarr_container_path=output_path, - element_type="images", - element_name="morphology_focus", - overwrite=False, - ) + sdata.write_element(element_name="morphology_focus") del sdata.images["morphology_focus"] else: if morphology_focus: @@ -409,26 +360,14 @@ def filter(self, record: logging.LogRecord) -> bool: ) del image_models_kwargs["c_coords"] if output_path is not None: - sdata._write_element( - element=sdata.images["morphology_focus"], - zarr_container_path=output_path, - element_type="images", - element_name="morphology_focus", - overwrite=False, - ) + sdata.write_element(element_name="morphology_focus") del sdata.images["morphology_focus"] logger.removeFilter(IgnoreSpecificMessage()) if table is not None: sdata.tables["table"] = table if output_path is not None: - sdata._write_element( - element=sdata.tables["table"], - zarr_container_path=output_path, - element_type="tables", - element_name="table", - overwrite=False, - ) + sdata.write_element(element_name="table") del sdata.tables["table"] if cells_as_circles: @@ -440,18 +379,12 @@ def filter(self, record: logging.LogRecord) -> bool: for key, value in extra_images.items(): sdata.images[key] = value if output_path is not None: - sdata._write_element( - element=sdata.images[key], - zarr_container_path=output_path, - element_type="images", - element_name=key, - overwrite=False, - ) + sdata.write_element(element_name=key) del sdata.images[key] if output_path is not None: - sdata.write_consolidated_metadata() - + sdata = read_zarr(output_path) + return sdata From 209a0c2e0136484519066af75775195b96b8e8f6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Mar 2025 10:10:08 +0000 Subject: [PATCH 07/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata_io/readers/xenium.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 7e1fd028..366fc459 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -24,7 +24,6 @@ from dask_image.imread import imread from geopandas import GeoDataFrame from joblib import Parallel, delayed -from ome_zarr.io import parse_url from pyarrow import Table from shapely import Polygon from spatialdata import SpatialData, read_zarr @@ -384,7 +383,7 @@ def filter(self, record: logging.LogRecord) -> bool: if output_path is not None: sdata = read_zarr(output_path) - + return sdata From 0d62dd90a1739f9871fa351e6cd8c213ec5f0774 Mon Sep 17 00:00:00 2001 From: Marco Varrone Date: Mon, 17 Mar 2025 11:19:22 +0100 Subject: [PATCH 08/20] Save cell_circles --- src/spatialdata_io/readers/xenium.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 7e1fd028..bb3d9e0c 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -372,6 +372,8 @@ def filter(self, record: logging.LogRecord) -> bool: if cells_as_circles: sdata.shapes[specs["region"]] = circles + sdata.write_element(element_name=specs["region"]) + del sdata.shapes[specs["region"]] # find and add additional aligned images if aligned_images: From 612e2f797dfa64a976681ee4eaf7a68b2efbfecc Mon Sep 17 00:00:00 2001 From: Marco Varrone Date: Mon, 17 Mar 2025 11:51:31 +0100 Subject: [PATCH 09/20] Save region only if output_path is defined --- src/spatialdata_io/readers/xenium.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 44c16416..e7bbc272 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -371,8 +371,9 @@ def filter(self, record: logging.LogRecord) -> bool: if cells_as_circles: sdata.shapes[specs["region"]] = circles - sdata.write_element(element_name=specs["region"]) - del sdata.shapes[specs["region"]] + if output_path is not None: + sdata.write_element(element_name=specs["region"]) + del sdata.shapes[specs["region"]] # find and add additional aligned images if aligned_images: From 132629959e3554318dcd7792a8434579b360e9b5 Mon Sep 17 00:00:00 2001 From: laudmt Date: Mon, 17 Mar 2025 11:59:32 +0100 Subject: [PATCH 10/20] improve cosmx loading --- src/spatialdata_io/readers/cosmx.py | 56 ++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index 561fe914..3f6c9287 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -16,7 +16,7 @@ from dask_image.imread import imread from scipy.sparse import csr_matrix from skimage.transform import estimate_transform -from spatialdata import SpatialData +from spatialdata import SpatialData, read_zarr from spatialdata._logging import logger from spatialdata.models import Image2DModel, Labels2DModel, PointsModel, TableModel from spatialdata.transformations.transformations import Affine, Identity @@ -34,6 +34,7 @@ def cosmx( transcripts: bool = True, imread_kwargs: Mapping[str, Any] = MappingProxyType({}), image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), + output_path: str | Path | None = None, ) -> SpatialData: """ Read *Cosmx Nanostring* data. @@ -62,12 +63,20 @@ def cosmx( Keyword arguments passed to :func:`dask_image.imread.imread`. image_models_kwargs Keyword arguments passed to :class:`spatialdata.models.Image2DModel`. + output_path + Path where the output will be saved. If ``None``, the output will not be saved. Returns ------- :class:`spatialdata.SpatialData` """ path = Path(path) + output_path = Path(output_path) if output_path is not None else None + sdata = SpatialData() + + # If output path is provided, save the empty SpatialData object to create directories and hierarchy + if output_path is not None: + sdata.write(output_path) # tries to infer dataset_id from the name of the counts file if dataset_id is None: @@ -151,6 +160,14 @@ def cosmx( inplace=True, ) + # Add table to SpatialData object, write it and delete temporary objects to save memory + sdata.tables["table"] = table + if output_path is not None: + sdata.write_element(element_name="table") + del adata + del table + del sdata.tables + # prepare to read images and labels file_extensions = (".jpg", ".png", ".jpeg", ".tif", ".tiff") pat = re.compile(r".*_F(\d+)") @@ -195,7 +212,14 @@ def cosmx( rgb=None, **image_models_kwargs, ) - images[f"{fov}_image"] = parsed_im + image_name = f"{fov}_image" + images[image_name] = parsed_im + if output_path is not None: + sdata.images[image_name] = parsed_im + sdata.write_element(element_name=image_name) + del parsed_im + del images[image_name] + del sdata.images[image_name] else: logger.warning(f"FOV {fov} not found in counts file. Skipping image {fname}.") @@ -218,7 +242,14 @@ def cosmx( dims=("y", "x"), **image_models_kwargs, ) - labels[f"{fov}_labels"] = parsed_la + label_name = f"{fov}_labels" + labels[label_name] = parsed_la + if output_path is not None: + sdata.labels[label_name] = parsed_la + sdata.write_element(element_name=label_name) + del parsed_la + del labels[label_name] + del sdata.labels[label_name] else: logger.warning(f"FOV {fov} not found in counts file. Skipping labels {fname}.") @@ -265,7 +296,8 @@ def cosmx( # we rename z because we want to treat the data as 2d sub_table.rename(columns={"z": "z_raw"}, inplace=True) if len(sub_table) > 0: - points[f"{fov}_points"] = PointsModel.parse( + point_name = f"{fov}_points" + points[point_name] = PointsModel.parse( sub_table, coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT}, feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT, @@ -276,6 +308,11 @@ def cosmx( "global_only_labels": aff, }, ) + if output_path is not None: + sdata.points[point_name] = points[point_name] + sdata.write_element(element_name=point_name) + del points[point_name] + del sdata.points[point_name] # TODO: what to do with fov file? # if fov_file is not None: @@ -286,5 +323,14 @@ def cosmx( # except KeyError: # logg.warning(f"FOV `{str(fov)}` does not exist, skipping it.") # continue - + if output_path is not None: + return read_zarr(output_path) return SpatialData(images=images, labels=labels, points=points, table=table) + +if __name__ == "__main__": + cosmx( + path="/Users/ldumont/git/cosmx_data", + dataset_id="1", + transcripts=True, + output_path="/Users/ldumont/cosmx_data_output", + ) From a9fb827702a3112db77e75c494c1a63aaf743ce6 Mon Sep 17 00:00:00 2001 From: Marco Varrone Date: Mon, 17 Mar 2025 12:24:43 +0100 Subject: [PATCH 11/20] Load labels using dask instead of numpy --- src/spatialdata_io/readers/xenium.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index e7bbc272..4cb10b72 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -451,7 +451,7 @@ def _get_labels_and_indices_mapping( with zarr.open(str(tmpdir), mode="r") as z: # get the labels - masks = z["masks"][f"{mask_index}"][...] + masks = da.from_array(z["masks"][f"{mask_index}"]) labels = Labels2DModel.parse( masks, dims=("y", "x"), transformations={"global": Identity()}, **labels_models_kwargs ) From 0109caeb0b5ce12a78649449151a1eb01c9f9824 Mon Sep 17 00:00:00 2001 From: laudmt Date: Mon, 17 Mar 2025 14:19:39 +0100 Subject: [PATCH 12/20] fix --- src/spatialdata_io/readers/cosmx.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index 3f6c9287..150d4795 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -76,7 +76,7 @@ def cosmx( # If output path is provided, save the empty SpatialData object to create directories and hierarchy if output_path is not None: - sdata.write(output_path) + sdata.write(output_path, overwrite=True) # tries to infer dataset_id from the name of the counts file if dataset_id is None: @@ -166,7 +166,7 @@ def cosmx( sdata.write_element(element_name="table") del adata del table - del sdata.tables + del sdata.tables['table'] # prepare to read images and labels file_extensions = (".jpg", ".png", ".jpeg", ".tif", ".tiff") @@ -326,11 +326,3 @@ def cosmx( if output_path is not None: return read_zarr(output_path) return SpatialData(images=images, labels=labels, points=points, table=table) - -if __name__ == "__main__": - cosmx( - path="/Users/ldumont/git/cosmx_data", - dataset_id="1", - transcripts=True, - output_path="/Users/ldumont/cosmx_data_output", - ) From 8841a730cca443721fcabcaa8667fcc7d12329ff Mon Sep 17 00:00:00 2001 From: Marco Varrone Date: Tue, 18 Mar 2025 07:41:41 +0100 Subject: [PATCH 13/20] Remove print --- src/spatialdata_io/readers/xenium.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 4cb10b72..a9dc7645 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -188,7 +188,6 @@ def xenium( if cells_table: return_values = _get_tables_and_circles(path, cells_as_circles, specs) - print(return_values) if cells_as_circles: table, circles = return_values else: From 12180a39cea86f459c8b0a2ec5acb4bfe281435b Mon Sep 17 00:00:00 2001 From: Marco Varrone Date: Tue, 18 Mar 2025 10:43:33 +0100 Subject: [PATCH 14/20] Write metadata --- src/spatialdata_io/readers/xenium.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index a9dc7645..7356efb4 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -384,6 +384,7 @@ def filter(self, record: logging.LogRecord) -> bool: del sdata.images[key] if output_path is not None: + sdata.write_consolidated_metadata() sdata = read_zarr(output_path) return sdata From 9873def1b1191b66a863872ef2eb0c8975c4d0f7 Mon Sep 17 00:00:00 2001 From: laudmt Date: Tue, 18 Mar 2025 10:46:09 +0100 Subject: [PATCH 15/20] improve ram --- src/spatialdata_io/readers/cosmx.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index 150d4795..82d3b5f6 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -167,6 +167,8 @@ def cosmx( del adata del table del sdata.tables['table'] + del counts + del obs # prepare to read images and labels file_extensions = (".jpg", ".png", ".jpeg", ".tif", ".tiff") @@ -287,6 +289,8 @@ def cosmx( transcripts_data = pd.read_csv(path / transcripts_file, header=0) transcripts_data.to_parquet(Path(tmpdir) / "transcripts.parquet") print("done") + if output_path is not None: + del transcripts_data ptable = pq.read_table(Path(tmpdir) / "transcripts.parquet") for fov in fovs_counts: @@ -324,5 +328,6 @@ def cosmx( # logg.warning(f"FOV `{str(fov)}` does not exist, skipping it.") # continue if output_path is not None: + sdata.write_consolidated_metadata() return read_zarr(output_path) return SpatialData(images=images, labels=labels, points=points, table=table) From b4ff59bfc66be090305ab5c0f4c12492246cebaf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Mar 2025 15:23:47 +0000 Subject: [PATCH 16/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata_io/readers/cosmx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index 82d3b5f6..1e7099f8 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -166,7 +166,7 @@ def cosmx( sdata.write_element(element_name="table") del adata del table - del sdata.tables['table'] + del sdata.tables["table"] del counts del obs From 20c3fb3c1d05f0ad091a925f64167efc457ad85a Mon Sep 17 00:00:00 2001 From: Marco Varrone Date: Wed, 19 Mar 2025 09:30:31 +0100 Subject: [PATCH 17/20] Remove unwanted README change --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index f598404d..38fcb525 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,7 @@ Marconato, L., Palla, G., Yamauchi, K.A. et al. SpatialData: an open and univers [link-docs]: https://spatialdata.scverse.org/projects/io/en/latest/ [link-api]: https://spatialdata.scverse.org/projects/io/en/latest/api.html [link-cli]: https://spatialdata.scverse.org/projects/io/en/latest/cli.html + [//]: # "numfocus-fiscal-sponsor-attribution" spatialdata-io is part of the scverse® project ([website](https://scverse.org), [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS](https://numfocus.org/). From 4af365fed835d03644fe58d9c231ee2f277e4960 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 19 Mar 2025 08:30:58 +0000 Subject: [PATCH 18/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 38fcb525..f598404d 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,6 @@ Marconato, L., Palla, G., Yamauchi, K.A. et al. SpatialData: an open and univers [link-docs]: https://spatialdata.scverse.org/projects/io/en/latest/ [link-api]: https://spatialdata.scverse.org/projects/io/en/latest/api.html [link-cli]: https://spatialdata.scverse.org/projects/io/en/latest/cli.html - [//]: # "numfocus-fiscal-sponsor-attribution" spatialdata-io is part of the scverse® project ([website](https://scverse.org), [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS](https://numfocus.org/). From 9d5feb1863a094b41f9085e1a2853b6b8d1b4c94 Mon Sep 17 00:00:00 2001 From: Marco Varrone Date: Wed, 19 Mar 2025 09:38:08 +0100 Subject: [PATCH 19/20] Remove unwanted README change pt.2 --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f598404d..02213221 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,8 @@ Marconato, L., Palla, G., Yamauchi, K.A. et al. SpatialData: an open and univers [link-docs]: https://spatialdata.scverse.org/projects/io/en/latest/ [link-api]: https://spatialdata.scverse.org/projects/io/en/latest/api.html [link-cli]: https://spatialdata.scverse.org/projects/io/en/latest/cli.html -[//]: # "numfocus-fiscal-sponsor-attribution" + +[//]: # (numfocus-fiscal-sponsor-attribution) spatialdata-io is part of the scverse® project ([website](https://scverse.org), [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS](https://numfocus.org/). If you like scverse® and want to support our mission, please consider making a tax-deductible [donation](https://numfocus.org/donate-to-scverse) to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs. From f97f1b3081dc6cb57c9138fb1903caf79269d3d4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 19 Mar 2025 08:38:34 +0000 Subject: [PATCH 20/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 02213221..f598404d 100644 --- a/README.md +++ b/README.md @@ -111,8 +111,7 @@ Marconato, L., Palla, G., Yamauchi, K.A. et al. SpatialData: an open and univers [link-docs]: https://spatialdata.scverse.org/projects/io/en/latest/ [link-api]: https://spatialdata.scverse.org/projects/io/en/latest/api.html [link-cli]: https://spatialdata.scverse.org/projects/io/en/latest/cli.html - -[//]: # (numfocus-fiscal-sponsor-attribution) +[//]: # "numfocus-fiscal-sponsor-attribution" spatialdata-io is part of the scverse® project ([website](https://scverse.org), [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS](https://numfocus.org/). If you like scverse® and want to support our mission, please consider making a tax-deductible [donation](https://numfocus.org/donate-to-scverse) to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs.