Skip to content

Commit

Permalink
mmist tutorial update zenodo
Browse files Browse the repository at this point in the history
  • Loading branch information
jcopperm committed Jun 3, 2024
1 parent 77421f9 commit 9e5cf44
Showing 1 changed file with 50 additions and 51 deletions.
101 changes: 50 additions & 51 deletions tutorials/mmist.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 1,
"id": "73ae1fbb-d3b7-4f68-b883-1f52e5dab55b",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -69,7 +69,9 @@
"source": [
"## Data Loading\n",
"\n",
"In this section, we initialize and prepare our dataset for analysis. The data consists of images and segmentation models for various ligand treatments. Data for this tutorial can be downloaded from the Zenodo repository ????. Metadata, image, and segmentation data is stored first onto h5 files (models), which will also be used to store cell featurization data. See [celltraj.imageprep.create_h5](https://jcopperm.github.io/celltraj/api.html#celltraj.imageprep.create_h5) and [celltraj.imageprep.save_frame_h5](https://jcopperm.github.io/celltraj/api.html#celltraj.imageprep.save_frame_h5) for documentation. Each treatment is represented by an HDF5 file that contains relevant metadata, imaging data, and segmentation results.\n",
"In this section, we initialize and prepare our dataset for analysis. The data consists of images and segmentation models for various ligand treatments. Data for this tutorial can be downloaded from the [Zenodo repository](https://zenodo.org/records/11455834). Metadata, image, and segmentation data is stored first onto h5 files (models), which will also be used to store cell featurization data. See [celltraj.imageprep.create_h5](https://jcopperm.github.io/celltraj/api.html#celltraj.imageprep.create_h5) and [celltraj.imageprep.save_frame_h5](https://jcopperm.github.io/celltraj/api.html#celltraj.imageprep.save_frame_h5) for documentation. Each treatment is represented by an HDF5 file that contains relevant metadata, imaging data, and segmentation results. Note that the files provided in the Zenodo repository have been archived and compressed, after downloading unpack all of the files with:\n",
"`find /path/to/data_repository -name \"*.tar.gz\" | xargs -I {} tar -xzvf {} -C /path/to/extract`.\n",
"To run this notebook with the datapath as written, place all extracted .h5 and .csv files into a folder named `data` which is in the same folder as the mmist.ipynb jupyter notebook.\n",
"\n",
"- `tmSet`: This list defines the different ligand treatments that were applied in the experiments.\n",
"- `modelName`: This string identifies the dataset and is used to name the results.\n",
Expand All @@ -82,15 +84,15 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 2,
"id": "22a72644-5036-49f5-a2a3-d65ee039516d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGF.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_PBS.h5', '/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5']\n"
"['data/mcf10a_LI204601_2019-04-09_example2024_EGF.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_PBS.h5', 'data/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5']\n"
]
}
],
Expand All @@ -100,7 +102,7 @@
"inds_tmSet=[0,1,2,3,4,5,6,7] #indexing for conditions\n",
"colorSet=['blue','green','red','purple','brown','orange','dimgray','goldenrod']\n",
"modelName = f'mcf10a_LI204601_2019-04-09_example2024' #Data specifier string, also results will be saved/named using this string\n",
"datapath='/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs' #path to where the data can be accessed\n",
"datapath='data' #path to where the data can be accessed\n",
"modelList=[]\n",
"inds_tmSet_models=[]\n",
"for i_tm in range(len(tmSet)):\n",
Expand Down Expand Up @@ -294,19 +296,19 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 3,
"id": "218f8877-f2e0-40f1-9ebf-403012fe3b94",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5\n",
"interpreting image as xyc\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m0\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m1\n",
"getting foreground mask from /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5 fmask channel 0\n"
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m0\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m1\n",
"getting foreground mask from data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5 fmask channel 0\n"
]
},
{
Expand All @@ -315,7 +317,7 @@
"array([b'nuc', b'cyto'], dtype='|S32')"
]
},
"execution_count": 7,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
},
Expand Down Expand Up @@ -376,75 +378,74 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 4,
"id": "c683a4b9-f5e7-4ffa-8389-c3946e039333",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGF.h5\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_EGF.h5\n",
"interpreting image as xyc\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGF.h5:cell_data_m0\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGF.h5:cell_data_m1\n",
"loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGF.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_EGF.h5:cell_data_m0\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_EGF.h5:cell_data_m1\n",
"loaded data/mcf10a_LI204601_2019-04-09_example2024_EGF.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"has morphology features\n",
"has motility features\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5\n",
"interpreting image as xyc\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5:cell_data_m0\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5:cell_data_m1\n",
"loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5:cell_data_m0\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5:cell_data_m1\n",
"loaded data/mcf10a_LI204601_2019-04-09_example2024_EGFTGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"has morphology features\n",
"has motility features\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5\n",
"interpreting image as xyc\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m0\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m1\n",
"loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSM.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m0\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5:cell_data_m1\n",
"loaded data/mcf10a_LI204601_2019-04-09_example2024_OSM.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"has morphology features\n",
"has motility features\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5\n",
"interpreting image as xyc\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5:cell_data_m0\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5:cell_data_m1\n",
"loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5:cell_data_m0\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5:cell_data_m1\n",
"loaded data/mcf10a_LI204601_2019-04-09_example2024_OSMEGF.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"has morphology features\n",
"has motility features\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5\n",
"interpreting image as xyc\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5:cell_data_m0\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5:cell_data_m1\n",
"loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5:cell_data_m0\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5:cell_data_m1\n",
"loaded data/mcf10a_LI204601_2019-04-09_example2024_OSMEGFTGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"has morphology features\n",
"has motility features\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5\n",
"interpreting image as xyc\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5:cell_data_m0\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5:cell_data_m1\n",
"loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5:cell_data_m0\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5:cell_data_m1\n",
"loaded data/mcf10a_LI204601_2019-04-09_example2024_OSMTGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"has morphology features\n",
"has motility features\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_PBS.h5\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_PBS.h5\n",
"interpreting image as xyc\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_PBS.h5:cell_data_m0\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_PBS.h5:cell_data_m1\n",
"loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_PBS.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_PBS.h5:cell_data_m0\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_PBS.h5:cell_data_m1\n",
"loaded data/mcf10a_LI204601_2019-04-09_example2024_PBS.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"has morphology features\n",
"has motility features\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5\n",
"interpreting image as xyc\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5:cell_data_m0\n",
"loading /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5:cell_data_m1\n",
"loaded /home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5:cell_data_m0\n",
"loading data/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5:cell_data_m1\n",
"loaded data/mcf10a_LI204601_2019-04-09_example2024_TGFB.h5 with [b'nuc' b'cyto'] mask channels, [b'BF' b'nuc' b'rep'] image channels, cell index on 1\n",
"has morphology features\n",
"has motility features\n"
]
}
],
"source": [
"date = '10may24'\n",
"nmodels=len(modelList)\n",
"modelSet=[None]*nmodels\n",
"indgood_models=np.array([]).astype(int)\n",
Expand Down Expand Up @@ -494,7 +495,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 5,
"id": "db80cc2f-34bc-4cf1-a2b9-1c9085fb47cd",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -2661,7 +2662,7 @@
},
{
"cell_type": "code",
"execution_count": 82,
"execution_count": 6,
"id": "59c30395-1563-40ed-a9c3-69f2baf9489a",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -2690,9 +2691,7 @@
],
"source": [
"seqFile='MDD_ligandCombination_RNAseq_log2TPM_proteinCoding.csv'\n",
"seqData=pandas.read_csv(f'{datapath}/sequencing/{seqFile}')\n",
"\n",
"datapath='/home/groups/CEDAR/copperma/dyn-cell-sys/mmist/example_8may24/images_and_segs' #path to where the data can be accessed\n",
"seqData=pandas.read_csv(f'{datapath}/{seqFile}')\n",
"\n",
"#create a filter for log2(TPM)>0.5 in 2 conditions\n",
"ind_minexpr=np.where(np.sum(seqData.iloc[:,3:]>0.5,axis=1)>=2)[0]\n",
Expand All @@ -2709,7 +2708,7 @@
"deseq=[None]*nf\n",
"for i_tm in range(nf):\n",
" seqfile=f'deseq2_DE_lfcshrink_ligands_{tmSet[i_tm]}_vs_CTRL.csv'\n",
" deseq[i_tm]=pandas.read_csv(f'{datapath}/sequencing/{seqfile}')\n",
" deseq[i_tm]=pandas.read_csv(f'{datapath}/{seqfile}')\n",
"\n",
"#now put together differential expression data matrix protein coding nG0\n",
"nG=ind_expressed.size\n",
Expand Down

0 comments on commit 9e5cf44

Please sign in to comment.