Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patch/segmentation mask #16

Merged
merged 4 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,11 @@
# cryoet-data-portal-neuroglancer

CryoET Data Portal Neuroglancer configuration helper

## Installation

```bash
git clone https://github.com/chanzuckerberg/cryoet-data-portal-neuroglancer.git
cd cryoet-data-portal-neuroglancer
poetry install
```
38 changes: 33 additions & 5 deletions cryoet_data_portal_neuroglancer/precompute/segmentation_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,9 @@ def create_segmentation_chunk(
# data = np.moveaxis(data, (0, 1, 2), (2, 1, 0))
for z, y, x in np.ndindex((gz, gy, gx)):
block = data[z * bz : (z + 1) * bz, y * by : (y + 1) * by, x * bx : (x + 1) * bx]
unique_values, encoded_values = np.unique(block, return_inverse=True)
if block.shape != block_size:
block = pad_block(block, block_size)
unique_values, encoded_values = np.unique(block, return_inverse=True)

lookup_table_offset, encoded_bits = _create_lookup_table(buffer, stored_lookup_tables, unique_values)
encoded_values_offset = _create_encoded_values(buffer, encoded_values, encoded_bits)
Expand Down Expand Up @@ -246,7 +246,7 @@ def _create_metadata(
"num_channels": 1,
"scales": [
{
"chunk_sizes": [chunk_size],
"chunk_sizes": [chunk_size[::-1]], # reverse the chunk size to pass from Z-Y-X to X-Y-Z
"encoding": "compressed_segmentation",
"compressed_segmentation_block_size": block_size,
"resolution": resolution,
Expand Down Expand Up @@ -318,17 +318,45 @@ def write_metadata(metadata: dict[str, Any], output_directory: Path) -> None:

def encode_segmentation(
filename: str,
output_path: Path,
output_path: Path | str,
resolution: tuple[float, float, float],
block_size: tuple[int, int, int] = (64, 64, 64),
data_directory: str = "data",
delete_existing: bool = False,
convert_non_zero_to: Optional[int] = 0,
convert_non_zero_to: int | None = 0,
include_mesh: bool = False,
mesh_directory: str = "mesh",
) -> None:
"""Convert the given OME-Zarr file to neuroglancer segmentation format with the given block size"""
"""Convert the given OME-Zarr file to neuroglancer segmentation format with the given block size.

Parameters
----------
filename : str
The path to the OME-Zarr file
output_path : Path | str
The path to the output directory
resolution : tuple[float, float, float]
The resolution of the data in nm
block_size : tuple[int, int, int], optional
The size of the blocks to use, by default (64, 64, 64)
This determines the size of the chunks in the precomputed format
output
Order is Z, Y, X
data_directory : str, optional
The name of the data directory, by default "data"
This is the directory that will contain the segmentation data
delete_existing : bool, optional
Whether to delete the existing output directory, by default False
If False and the output directory exists, the function will
return without doing anything
convert_non_zero_to : int | None, optional
The value to convert non-zero values to, by default 0, which
will leave non-zero values as they are. If None, non-zero
values will be left as they are also. This is useful for
representing multiple objects in the same segmentation
"""
print(f"Converting {filename} to neuroglancer compressed segmentation format")
output_path = Path(output_path)
dask_data = load_omezarr_data(filename)
if delete_existing and output_path.exists():
contents = list(output_path.iterdir())
Expand Down
33 changes: 31 additions & 2 deletions tests/test_segmentation_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,36 @@ def test__create_segmentation_chunk():
[1, 1, 1, 1],
],
]
chunk: Chunk = create_segmentation_chunk(np.array(array), dimensions=((0, 0, 0), (8, 8, 4)), block_size=(8, 8, 4))
real_data = np.array(array)
chunk: Chunk = create_segmentation_chunk(real_data, dimensions=((0, 0, 0), (8, 8, 4)), block_size=(8, 8, 4))

assert chunk.dimensions == ((0, 0, 0), (8, 8, 4))
# TODO expand me!
byte_array = chunk.buffer
data_start_offset = 20 # header of 8 bytes + 12 bytes of info
data = np.frombuffer(byte_array, dtype=np.uint32, offset=data_start_offset)
# The data is symmetric, so each 32-bit integer should be the same
assert len(data) == 8 # 8 * 8 * 4 / 32
assert np.all(np.diff(data) == 0)

# If we chunk in larger blocks, it should still work with padding
chunk: Chunk = create_segmentation_chunk(real_data, dimensions=((0, 0, 0), (8, 8, 4)), block_size=(8, 8, 8))

assert chunk.dimensions == ((0, 0, 0), (8, 8, 4))
byte_array = chunk.buffer
data_start_offset = 20
data = np.frombuffer(byte_array, dtype=np.uint32, offset=data_start_offset)
# The data is symmetric, so each 32-bit integer should be the same
assert len(data) == 16 # 8 * 8 * 8 / 32
assert np.all(np.diff(data) == 0)

# With smaller blocks, there should be more of them
chunk: Chunk = create_segmentation_chunk(real_data, dimensions=((0, 0, 0), (8, 8, 4)), block_size=(4, 4, 4))

assert chunk.dimensions == ((0, 0, 0), (8, 8, 4))
byte_array = chunk.buffer
data_start_offset = 20
data = np.frombuffer(byte_array, dtype=np.uint32)

# In this case, there should be four block headers (32 bits) - followed by the 3 32-bit info bytes, and then all the data is the same in one block of 8 32-bit integers
assert len(data) == 8 + 3 + 8
assert np.all(np.diff(data[11:]) == 0)
Loading