diff --git a/tutorials/mmist.ipynb b/tutorials/mmist.ipynb index 468a3e7..6fc1c1c 100644 --- a/tutorials/mmist.ipynb +++ b/tutorials/mmist.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "id": "73ae1fbb-d3b7-4f68-b883-1f52e5dab55b", "metadata": {}, "outputs": [], @@ -69,7 +69,9 @@ "source": [ "## Data Loading\n", "\n", - "In this section, we initialize and prepare our dataset for analysis. The data consists of images and segmentation models for various ligand treatments. Data for this tutorial can be downloaded from the Zenodo repository ????. Metadata, image, and segmentation data is stored first onto h5 files (models), which will also be used to store cell featurization data. See [celltraj.imageprep.create_h5](https://jcopperm.github.io/celltraj/api.html#celltraj.imageprep.create_h5) and [celltraj.imageprep.save_frame_h5](https://jcopperm.github.io/celltraj/api.html#celltraj.imageprep.save_frame_h5) for documentation. Each treatment is represented by an HDF5 file that contains relevant metadata, imaging data, and segmentation results.\n", + "In this section, we initialize and prepare our dataset for analysis. The data consists of images and segmentation models for various ligand treatments. Data for this tutorial can be downloaded from the [Zenodo repository](https://zenodo.org/records/11455834). Metadata, image, and segmentation data is stored first onto h5 files (models), which will also be used to store cell featurization data. See [celltraj.imageprep.create_h5](https://jcopperm.github.io/celltraj/api.html#celltraj.imageprep.create_h5) and [celltraj.imageprep.save_frame_h5](https://jcopperm.github.io/celltraj/api.html#celltraj.imageprep.save_frame_h5) for documentation. Each treatment is represented by an HDF5 file that contains relevant metadata, imaging data, and segmentation results. Note that the files provided in the Zenodo repository have been archived and compressed, after downloading unpack all of the files with:\n", + "`find /path/to/data_repository -name \"*.tar.gz\" | xargs -I {} tar -xzvf {} -C /path/to/extract`.\n", + "To run this notebook with the datapath as written, place all extracted .h5 and .csv files into a folder named `data` which is in the same folder as the mmist.ipynb jupyter notebook.\n", "\n", "- `tmSet`: This list defines the different ligand treatments that were applied in the experiments.\n", "- `modelName`: This string identifies the dataset and is used to name the results.\n", @@ -82,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "id": "22a72644-5036-49f5-a2a3-d65ee039516d", "metadata": {}, "outputs": [ @@ -90,7 +92,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "['/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGF.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_PBS.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5']\n" + "['data/mcf10a_LI204601_2019-04-09_example2024_EGF.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_PBS.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5']\n" ] } ], @@ -100,7 +102,7 @@ "inds_tmSet=[0,1,2,3,4,5,6,7] #indexing for conditions\n", "colorSet=['blue','green','red','purple','brown','orange','dimgray','goldenrod']\n", "modelName = f'mcf10a_LI204601_2019-04-09_example2024' #Data specifier string, also results will be saved/named using this string\n", - "datapath='/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs' #path to where the data can be accessed\n", + "datapath='data' #path to where the data can be accessed\n", "modelList=[]\n", "inds_tmSet_models=[]\n", "for i_tm in range(len(tmSet)):\n", @@ -294,7 +296,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "id": "218f8877-f2e0-40f1-9ebf-403012fe3b94", "metadata": {}, "outputs": [ @@ -302,11 +304,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5\n", "interpreting image as xyc\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m0\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m1\n", - "getting foreground mask from /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5 fmask channel 0\n" + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m0\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m1\n", + "getting foreground mask from data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5 fmask channel 0\n" ] }, { @@ -315,7 +317,7 @@ "array([b'nuc', b'cyto'], dtype='|S32')" ] }, - "execution_count": 7, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" }, @@ -376,7 +378,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "id": "c683a4b9-f5e7-4ffa-8389-c3946e039333", "metadata": {}, "outputs": [ @@ -384,67 +386,66 @@ "name": "stdout", "output_type": "stream", "text": [ - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGF.h5\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_EGF.h5\n", "interpreting image as xyc\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGF.h5:cell_data_m0\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGF.h5:cell_data_m1\n", - "loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGF.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_EGF.h5:cell_data_m0\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_EGF.h5:cell_data_m1\n", + "loaded data/mcf10a_LI204601_2019-04-09_example2024_EGF.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", "has morphology features\n", "has motility features\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5\n", "interpreting image as xyc\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5:cell_data_m0\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5:cell_data_m1\n", - "loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5:cell_data_m0\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5:cell_data_m1\n", + "loaded data/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", "has morphology features\n", "has motility features\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5\n", "interpreting image as xyc\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m0\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m1\n", - "loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m0\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m1\n", + "loaded data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", "has morphology features\n", "has motility features\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5\n", "interpreting image as xyc\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5:cell_data_m0\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5:cell_data_m1\n", - "loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5:cell_data_m0\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5:cell_data_m1\n", + "loaded data/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", "has morphology features\n", "has motility features\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5\n", "interpreting image as xyc\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5:cell_data_m0\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5:cell_data_m1\n", - "loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5:cell_data_m0\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5:cell_data_m1\n", + "loaded data/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", "has morphology features\n", "has motility features\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5\n", "interpreting image as xyc\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5:cell_data_m0\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5:cell_data_m1\n", - "loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5:cell_data_m0\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5:cell_data_m1\n", + "loaded data/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", "has morphology features\n", "has motility features\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_PBS.h5\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_PBS.h5\n", "interpreting image as xyc\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_PBS.h5:cell_data_m0\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_PBS.h5:cell_data_m1\n", - "loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_PBS.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_PBS.h5:cell_data_m0\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_PBS.h5:cell_data_m1\n", + "loaded data/mcf10a_LI204601_2019-04-09_example2024_PBS.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", "has morphology features\n", "has motility features\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5\n", "interpreting image as xyc\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5:cell_data_m0\n", - "loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5:cell_data_m1\n", - "loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5:cell_data_m0\n", + "loading data/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5:cell_data_m1\n", + "loaded data/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n", "has morphology features\n", "has motility features\n" ] } ], "source": [ - "date = '10may24'\n", "nmodels=len(modelList)\n", "modelSet=[None]*nmodels\n", "indgood_models=np.array([]).astype(int)\n", @@ -494,7 +495,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "id": "db80cc2f-34bc-4cf1-a2b9-1c9085fb47cd", "metadata": {}, "outputs": [ @@ -2661,7 +2662,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 6, "id": "59c30395-1563-40ed-a9c3-69f2baf9489a", "metadata": {}, "outputs": [ @@ -2690,9 +2691,7 @@ ], "source": [ "seqFile='MDD_ligandCombination_RNAseq_log2TPM_proteinCoding.csv'\n", - "seqData=pandas.read_csv(f'{datapath}/sequencing/{seqFile}')\n", - "\n", - "datapath='/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs' #path to where the data can be accessed\n", + "seqData=pandas.read_csv(f'{datapath}/{seqFile}')\n", "\n", "#create a filter for log2(TPM)>0.5 in 2 conditions\n", "ind_minexpr=np.where(np.sum(seqData.iloc[:,3:]>0.5,axis=1)>=2)[0]\n", @@ -2709,7 +2708,7 @@ "deseq=[None]*nf\n", "for i_tm in range(nf):\n", " seqfile=f'deseq2_DE_lfcshrink_ligands_{tmSet[i_tm]}_vs_CTRL.csv'\n", - " deseq[i_tm]=pandas.read_csv(f'{datapath}/sequencing/{seqfile}')\n", + " deseq[i_tm]=pandas.read_csv(f'{datapath}/{seqfile}')\n", "\n", "#now put together differential expression data matrix protein coding nG0\n", "nG=ind_expressed.size\n",