-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
few minor changes to reproduce curating_datasets notebooks (#37)
- Loading branch information
1 parent
cc7a36b
commit 42b42b8
Showing
5 changed files
with
376 additions
and
449 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
Empty file.
376 changes: 376 additions & 0 deletions
376
examples/curating_datasets/curating_african_fetal_us_datasets.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,376 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Curating african US fetal datasets\n", | ||
"\n", | ||
"**Author(s):** Miguel Xochicale [@mxochicale](https://github.com/mxochicale) \n", | ||
"**Contributor(s):** Michelle Iskandar [@michellepi](https://github.com/michellepi) \n", | ||
"\n", | ||
"**Contributor(s):** Harvey Mannering [@harveymannering](https://github.com/harveymannering) and Miguel Xochicale [@mxochicale](https://github.com/mxochicale)\n", | ||
"\n", | ||
"\n", | ||
"\n", | ||
"\n", | ||
"**Google-colabs:** \n", | ||
"\n", | ||
"\n", | ||
"## Introduction\n", | ||
"This notebook presents curation of African US datasets\n", | ||
"\n", | ||
"### Running notebook\n", | ||
"\n", | ||
"Go to repository path: `cd $HOME/repositories/budai4medtech/xfetus/` \n", | ||
"Open repo in pycharm and in the terminal type:\n", | ||
"```\n", | ||
"git checkout master # or the branch\n", | ||
"git pull # to bring a local branch up-to-date with its remote version\n", | ||
"```\n", | ||
"Launch Notebook server:\n", | ||
"```\n", | ||
"export PYTHONPATH=\"${PYTHONPATH}:$HOME/repositories/budai4medtech/xfetus/\"\n", | ||
"mamba activate *VE\n", | ||
"jupyter notebook --browser=firefox\n", | ||
"```\n", | ||
"which will open your web-browser.\n", | ||
"\n", | ||
"\n", | ||
"### Logbook\n", | ||
"* 15-Jul-2023: adds notebook \n", | ||
"* Sat 15 Jul 04:21:22 BST 2023: Added `AfricanFetalPlaneDataset` and tested in notebook\n", | ||
"\n", | ||
"### References\n", | ||
"* Maternal fetal ultrasound planes from low-resource imaging settings in five African countries (456 items, totalling 55.7 MB) > https://zenodo.org/record/7540448\n", | ||
"\n", | ||
"\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Jupyter Notebook" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Installing package dependencies (for google-colabs)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# !pip install xfetus" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Setting imports and datasets paths" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"ExecuteTime": { | ||
"end_time": "2022-08-26T13:10:31.969839Z", | ||
"start_time": "2022-08-26T13:10:31.214611Z" | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"%load_ext autoreload\n", | ||
"%autoreload 2\n", | ||
"\n", | ||
"from xfetus import AfricanFetalPlaneDataset\n", | ||
"\n", | ||
"import os\n", | ||
"import random\n", | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"\n", | ||
"import torch\n", | ||
"import torchvision.utils as vutils\n", | ||
"import torchvision.transforms as transforms\n", | ||
"from torch.utils.data import Dataset, DataLoader\n", | ||
"\n", | ||
"HOME_PATH = os.path.expanduser(f'~')\n", | ||
"USERNAME = os.path.split(HOME_PATH)[1]\n", | ||
"\n", | ||
"REPOSITORY_PATH='repositories/datasets/african-fetal-us-dataset/'\n", | ||
"FULL_REPO_DATA_PATH = HOME_PATH+'/'+REPOSITORY_PATH\n", | ||
"\n", | ||
"# Root directory for dataset\n", | ||
"dataroot = FULL_REPO_DATA_PATH + \"Images/\"\n", | ||
"ref = FULL_REPO_DATA_PATH + \"African_planes_database.csv\"\n", | ||
"\n", | ||
"device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", | ||
"\n", | ||
"## Printing Versions and paths\n", | ||
"print(f'FULL_REPO_DATA_PATH: {FULL_REPO_DATA_PATH}' )\n", | ||
"print(f'Device: {device}')\n", | ||
"\n", | ||
"# Set random seed for reproducibility\n", | ||
"manualSeed = 999\n", | ||
"#manualSeed = random.randint(1, 10000) # use if you want new results\n", | ||
"print(\"Random Seed: \", manualSeed)\n", | ||
"random.seed(manualSeed)\n", | ||
"torch.manual_seed(manualSeed)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Setting variables for dataloader" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Configuraing dataloader" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## Select contry for any plan\n", | ||
"# country='Algeria'; plane = None #lenght 100\n", | ||
"# country='Egypt'; plane = None #lenght 100\n", | ||
"# country='Malawi'; plane = None #lenght 100\n", | ||
"# country='Uganda'; plane = None #lenght 75\n", | ||
"# country='Ghana'; plane = None #lenght 75\n", | ||
"\n", | ||
"##Select country for fetal brain plains \n", | ||
"country='Algeria'; plane = 'Fetal brain' #lenght 25\n", | ||
"# country='Egypt'; plane = 'Fetal brain' #lenght 25\n", | ||
"# country='Malawi'; plane = 'Fetal brain' #lenght 25\n", | ||
"# country='Uganda'; plane = 'Fetal brain' #lenght 25\n", | ||
"# country='Ghana'; plane = 'Fetal brain' #lenght 25\n", | ||
"\n", | ||
"# plane = 'Fetal abdomen'; country='Algeria' #lenght 25\n", | ||
"# plane = 'Fetal femur'; country='Algeria' #lenght 25\n", | ||
"# plane = 'Fetal brain'; country='Egypt' #lenght 25\n", | ||
"# plane = 'Fetal brain'; country='Malawi' #lenght 25\n", | ||
"# plane = 'Fetal brain'; country='Uganda' #lenght 25\n", | ||
"# plane = 'Fetal brain'; country='Ghana' #lenght 25\n", | ||
"\n", | ||
"## Select all \n", | ||
"# plane = None; country=None #lenght based on train size\n", | ||
"\n", | ||
"##Select Fetal Plane\n", | ||
"# plane = 'Fetal brain'\n", | ||
"# plane = 'Fetal abdomen'\n", | ||
"# plane = 'Fetal femur'\n", | ||
"# plane = 'Fetal thorax'\n", | ||
"# plane = None\n", | ||
"\n", | ||
"##Select African country\n", | ||
"# country='Algeria'\n", | ||
"# country='Egypt'\n", | ||
"# country='Malawi'\n", | ||
"# country='Uganda'\n", | ||
"# country='Ghana'\n", | ||
"# country=None\n", | ||
"\n", | ||
"## Patient number\n", | ||
"# patient Algeria = 1, 4, 7, 12, 14, 16, 18, 23, 24, 27, 30, 31, 33, 34, 35, 36, 38, 41, 44, 45, 48, 51, 52, 53\n", | ||
"# Train=0\n", | ||
"# Train=1\n", | ||
"\n", | ||
"\n", | ||
"## Image size\n", | ||
"# image_size = 28\n", | ||
"# image_size = 64\n", | ||
"# image_size = 100\n", | ||
"# image_size = 250\n", | ||
"image_size = 400\n", | ||
"# image_size = 512\n", | ||
"\n", | ||
"## Number of workers for dataloader\n", | ||
"workers = 8\n", | ||
"\n", | ||
"## Batch size during training\n", | ||
"# batch_size = 2\n", | ||
"# batch_size = 3\n", | ||
"# batch_size = 4\n", | ||
"# batch_size = 8\n", | ||
"# batch_size = 16\n", | ||
"batch_size = 32\n", | ||
"# batch_size = 128\n", | ||
"\n", | ||
"image_channels = 1\n", | ||
"SHUFFLE_T = True\n", | ||
"SHUFFLE_F = False\n", | ||
"downsampling_factor = 4\n", | ||
"number_of_train_sample_size = 400\n", | ||
"split_type_val ='manual'\n", | ||
"split_val = 'train'\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
" \n", | ||
"transform_operations=transforms.Compose([\n", | ||
" transforms.ToTensor(),\n", | ||
" transforms.Grayscale(num_output_channels=image_channels),\n", | ||
" #mt.RandRotate(range_x=0.1, prob=0.5),\n", | ||
" #mt.RandZoom(prob=0.5, min_zoom=1, max_zoom=1.1),\n", | ||
" #mt.Resize([image_size, image_size]),\n", | ||
" transforms.Resize([image_size, image_size], antialias=True),\n", | ||
" transforms.Normalize((0.5), (0.5)), #mean=0.5, std=0.5\n", | ||
" ])\n", | ||
"\n", | ||
"dataset = AfricanFetalPlaneDataset(\n", | ||
" root_dir=dataroot,\n", | ||
" csv_file=ref,\n", | ||
" plane=plane,\n", | ||
" country=country,\n", | ||
" transform=transform_operations,\n", | ||
" split_type=split_type_val,\n", | ||
" split=split_val,\n", | ||
" train_size=number_of_train_sample_size,\n", | ||
" downsampling_factor=downsampling_factor\n", | ||
" )\n", | ||
"\n", | ||
"print(type(dataset))#<class 'medisynth.medisynth.AfricanFetalPlaneDataset'>\n", | ||
"\n", | ||
"number_of_images = dataset.__len__()\n", | ||
"print(f'lenght {number_of_images}')\n", | ||
"\n", | ||
"dataloader = DataLoader(dataset, \n", | ||
" batch_size=batch_size,\n", | ||
" shuffle=SHUFFLE_T, \n", | ||
" num_workers=workers)\n", | ||
"\n", | ||
"## Plot some training images\n", | ||
"real_batch = next(iter(dataloader))\n", | ||
"print(type(real_batch)) #<class 'list'>\n", | ||
"print(len(real_batch)) #<class 'list'>\n", | ||
"image = real_batch[0]\n", | ||
"ds_image = real_batch[1]\n", | ||
"\n", | ||
"print(ds_image.shape)\n", | ||
"print(image.shape)\n", | ||
"\n", | ||
"\n", | ||
"grid_images=vutils.make_grid(image.to(device), nrow=5, padding=2, normalize=True)\n", | ||
"print(grid_images.shape)\n", | ||
"plt.figure(figsize=(15,5))\n", | ||
"plt.title(f'Images from {plane} Original shape')\n", | ||
"plt.imshow(grid_images.permute(1, 2, 0).cpu().numpy() ) \n", | ||
"plt.show()\n", | ||
"\n", | ||
"\n", | ||
"grid_ds_images=vutils.make_grid(ds_image.to(device), nrow=5, padding=2, normalize=True)\n", | ||
"plt.figure(figsize=(15,5))\n", | ||
"plt.title(f'Images from {plane} Downsampled/{downsampling_factor}')\n", | ||
"plt.imshow(grid_ds_images.permute(1, 2, 0).cpu().numpy() )\n", | ||
"plt.show()\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## Displaying dataloader content\n", | ||
"len_dataloader = len(dataloader)\n", | ||
"\n", | ||
"for batch_idx, real_batch in enumerate(dataloader): #iterate through each batch \n", | ||
" if batch_idx % 20 == 0:\n", | ||
" print(f'batch_idx: {batch_idx}; \\\n", | ||
" sampled images in the batch: { batch_idx*batch_size } of {number_of_images}')\n", | ||
" \n", | ||
" image = real_batch[0]\n", | ||
" ds_image = real_batch[1]\n", | ||
" \n", | ||
" \n", | ||
" \n", | ||
" print(f' image.shape: {image.shape}') #torch.Size([BATCH_SIZE, 1, ORIGINAL, ORIGINAL])\n", | ||
" print(f' image.dtype: {image.dtype}') #ds_image.dtype: torch.float32\n", | ||
" \n", | ||
" \n", | ||
" print(f' ds_image.shape: {ds_image.shape}') #torch.Size([BATCH_SIZE, 1, ORIGINAL/2, ORIGINAL/2])\n", | ||
" print(f' ds_image.dtype: {ds_image.dtype}') #ds_image.dtype: torch.float32\n", | ||
"\n", | ||
" \n", | ||
" grid_img = vutils.make_grid(image, nrow=4, padding=0, normalize=True)\n", | ||
" print(f' grid_img.shape: {grid_img.shape}')# torch.Size([3, 512, 2048])\n", | ||
" \n", | ||
" \n", | ||
" ds_grid_img = vutils.make_grid(ds_image, nrow=4, padding=0, normalize=True)\n", | ||
" print(f' ds_grid_img.shape: {ds_grid_img.shape}')# torch.Size([3, 512, 2048])\n", | ||
" \n", | ||
" \n", | ||
" \n", | ||
" plt.figure(figsize=(10,10))\n", | ||
" plt.title(f'Images from {plane} Original shape')\n", | ||
" #plt.axis(\"off\")\n", | ||
" plt.imshow(grid_img.permute(1, 2, 0).cpu().numpy() ) \n", | ||
" #plt.imshow(grid_img.permute(1, 2, 0).cpu().numpy().astype(\"float32\")) \n", | ||
" plt.show()\n", | ||
" \n", | ||
" plt.figure(figsize=(10,10))\n", | ||
" plt.title(f'Images from {plane} Downsampled/{downsampling_factor}')\n", | ||
" plt.imshow(ds_grid_img.permute(1, 2, 0).cpu().numpy() ) \n", | ||
" plt.show()\n", | ||
" " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters