diff --git a/docs/distributed.rst b/docs/distributed.rst new file mode 100644 index 00000000..452c67eb --- /dev/null +++ b/docs/distributed.rst @@ -0,0 +1,165 @@ +Big Data +------------------------------------------------ + +Distributed Cellpose for larger-than-memory data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``cellpose.contrib.distributed_cellpose`` module is intended to help run cellpose on 3D datasets +that are too large to fit in system memory. The dataset is divided into overlapping blocks and +each block is segmented separately. Results are stitched back together into a seamless segmentation +of the whole dataset. + +Built to run on workstations or clusters. Blocks can be run in parallel, in series, or both. +Compute resources (GPUs, CPUs, and RAM) can be arbitrarily partitioned for parallel computing. +Currently workstations and LSF clusters are supported. SLURM clusters are +an easy addition - if you need this to run on a SLURM cluster `please post a feature request issue +to the github repository `_ and tag @GFleishman. + +The input data format must be a zarr array. Some functions are provided in the module to help +convert your data to a zarr array, but not all formats or situations are covered. These are +good opportunities to submit pull requests. Currently, the module must be run via the Python API, +but making it available in the GUI is another good PR or feature request. + +All user facing functions in the module have verbose docstrings that explain inputs and outputs. +You can access these docstrings like this: + +.. code-block:: python + + from cellpose.contrib.distributed_segmentation import distributed_eval + distributed_eval? + +Examples +~~~~~~~~ + +Run distributed Cellpose on half the resources of a workstation that has 16 cpus, 1 gpu, +and 128GB system memory: + +.. code-block:: python + + from cellpose.contrib.distributed_segmentation import distributed_eval + + # parameterize cellpose however you like + model_kwargs = {'gpu':True, 'model_type':'cyto3'} # can also use 'pretrained_model' + eval_kwargs = {'diameter':30, + 'z_axis':0, + 'channels':[0,0], + 'do_3D':True, + } + + # define compute resources for local workstation + cluster_kwargs = { + 'n_workers':1, # if you only have 1 gpu, then 1 worker is the right choice + 'ncpus':8, + 'memory_limit':'64GB', + 'threads_per_worker':1, + } + + # run segmentation + # outputs: + # segments: zarr array containing labels + # boxes: list of bounding boxes around all labels (very useful for navigating big data) + segments, boxes = distributed_eval( + input_zarr=large_zarr_array, + blocksize=(256, 256, 256), + write_path='/where/zarr/array/containing/results/will/be/written.zarr', + model_kwargs=model_kwargs, + eval_kwargs=eval_kwargs, + cluster_kwargs=cluster_kwargs, + ) + + +Test run a single block before distributing the whole dataset (always a good idea): + +.. code-block:: python + + from cellpose.contrib.distributed_segmentation import process_block + + # parameterize cellpose however you like + model_kwargs = {'gpu':True, 'model_type':'cyto3'} + eval_kwargs = {'diameter':30, + 'z_axis':0, + 'channels':[0,0], + 'do_3D':True, + } + + # define a crop as the distributed function would + starts = (128, 128, 128) + blocksize = (256, 256, 256) + overlap = 60 + crop = tuple(slice(s-overlap, s+b+overlap) for s, b in zip(starts, blocksize)) + + # call the segmentation + segments, boxes, box_ids = process_block( + block_index=(0, 0, 0), # when test_mode=True this is just a dummy value + crop=crop, + input_zarr=my_zarr_array, + model_kwargs=model_kwargs, + eval_kwargs=eval_kwargs, + blocksize=blocksize, + overlap=overlap, + output_zarr=None, + test_mode=True, + ) + + +Convert a single large (but still smaller than system memory) tiff image to a zarr array: + +.. code-block:: python + + # Note full image will be loaded in system memory + import tifffile + from cellpose.contrib.distributed_segmentation import numpy_array_to_zarr + + data_numpy = tifffile.imread('/path/to/image.tiff') + data_zarr = numpy_array_to_zarr('/path/to/output.zarr', data_numpy, chunks=(256, 256, 256)) + del data_numpy # assumption is data is large, don't keep in memory copy around + + +Wrap a folder of tiff images/tiles into a single zarr array without duplicating any data: + +.. code-block:: python + + # Note tiff filenames must indicate the position of each file in the overall tile grid + from cellpose.contrib.distributed_segmentation import wrap_folder_of_tiffs + reconstructed_virtual_zarr_array = wrap_folder_of_tiffs( + filname_pattern='/path/to/folder/of/*.tiff', + block_index_pattern=r'_(Z)(\d+)(Y)(\d+)(X)(\d+)', + ) + + +Run distributed Cellpose on an LSF cluster with 128 GPUs (e.g. Janelia cluster): + +.. code-block:: python + + from cellpose.contrib.distributed_segmentation import distributed_eval + + # parameterize cellpose however you like + model_kwargs = {'gpu':True, 'model_type':'cyto3'} + eval_kwargs = {'diameter':30, + 'z_axis':0, + 'channels':[0,0], + 'do_3D':True, + } + + # define LSFCluster parameters + cluster_kwargs = { + 'ncpus':2, # cpus per worker + 'min_workers':8, # cluster adapts number of workers based on number of blocks + 'max_workers':128, + 'queue':'gpu_l4', # flags required to specify a gpu job may differ between clusters + 'job_extra_directives':['-gpu "num=1"'], + } + + # run segmentation + # outputs: + # segments: zarr array containing labels + # boxes: list of bounding boxes around all labels (very useful for navigating big data) + segments, boxes = distributed_eval( + input_zarr=large_zarr_array, + blocksize=(256, 256, 256), + write_path='/where/zarr/array/containing/results/will/be/written.zarr', + model_kwargs=model_kwargs, + eval_kwargs=eval_kwargs, + cluster_kwargs=cluster_kwargs, + ) + diff --git a/docs/index.rst b/docs/index.rst index 9df60a80..9e6af4d6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -55,6 +55,7 @@ Cellpose: a generalist algorithm for cellular segmentation restore train benchmark + distributed openvino faq