Skip to content

Commit

Permalink
Add option for using other folder in dataset creation script (#291)
Browse files Browse the repository at this point in the history
Co-authored-by: xavier-owkin <[email protected]>
  • Loading branch information
xavier-owkin and xavier-owkin authored Dec 20, 2023
1 parent 7afb239 commit 695ec0e
Showing 1 changed file with 19 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ def __len__(self):

def __getitem__(self, idx):
pil_image = self.slide.read_region(
self.coords[idx].astype("int_"), self.level, (self.tile_size, self.tile_size)
self.coords[idx].astype("int_"),
self.level,
(self.tile_size, self.tile_size),
).convert("RGB")
if self.transform is not None:
pil_image = self.transform(pil_image)
Expand All @@ -68,7 +70,7 @@ def save_dict_to_csv(dict_arg, file_name):
df.to_csv(file_name, index=False)


def main(batch_size, num_workers_torch, tile_from_scratch, remove_big_tiff):
def main(batch_size, num_workers_torch, tile_from_scratch, remove_big_tiff, output_path):
"""Function tiling slides that have been downloaded using download.py.
Parameters
Expand All @@ -83,9 +85,12 @@ def main(batch_size, num_workers_torch, tile_from_scratch, remove_big_tiff):
If this option is activated we disregard the csv files with precomputed
coordinates.
remove_big_tiff : bool
remove_big_tiff: bool
Whether or not to get rid of all original slides after tiling.
output_path: str
An optional path to store the dataset after tiling
Raises
------
ValueError
Expand Down Expand Up @@ -125,6 +130,10 @@ def main(batch_size, num_workers_torch, tile_from_scratch, remove_big_tiff):
sys.exit()

slides_dir = dict["dataset_path"]
if output_path is not None:
output_slides_dir = output_path
else:
output_slides_dir = slides_dir
slides_paths = glob(os.path.join(slides_dir, "*.tif"))
grid_tiles_extractor = GridTiler(
tile_size=(224, 224),
Expand Down Expand Up @@ -167,7 +176,7 @@ def main(batch_size, num_workers_torch, tile_from_scratch, remove_big_tiff):

for sp in tqdm(slides_paths):
slide_name = os.path.basename(sp)
path_to_features = os.path.join(slides_dir, slide_name + ".npy")
path_to_features = os.path.join(output_slides_dir, slide_name + ".npy")
if os.path.exists(path_to_features):
continue
print(f"Tiling slide {sp}")
Expand Down Expand Up @@ -247,6 +256,8 @@ def main(batch_size, num_workers_torch, tile_from_scratch, remove_big_tiff):
np.save(path_to_features, features)

write_value_in_config(config_file, "preprocessing_complete", True)
if output_path is not None:
write_value_in_config(config_file, "dataset_path", output_path)

if args.remove_big_tiff:
print("Removing all slides")
Expand Down Expand Up @@ -279,11 +290,15 @@ def main(batch_size, num_workers_torch, tile_from_scratch, remove_big_tiff):
help="Whether or not to remove the original slides images that take \
up to 800G, after computing the features using them.",
)
parser.add_argument(
"--output-path", type=str, help="The path where to store the tiles"
)

args = parser.parse_args()
main(
args.batch_size,
args.num_workers_torch,
args.tile_from_scratch,
args.remove_big_tiff,
args.output_path,
)

0 comments on commit 695ec0e

Please sign in to comment.