From 256cecfe1f0feeb0823484a0afede5a87fe07264 Mon Sep 17 00:00:00 2001 From: Huihuo Zheng Date: Fri, 1 Dec 2023 20:24:56 +0000 Subject: [PATCH] partial merged the following PR: https://github.com/argonne-lcf/dlio_benchmark/pull/81 --- dlio_benchmark/common/enumerations.py | 1 + .../data_loader/data_loader_factory.py | 3 + .../data_loader/native_dali_data_loader.py | 60 ++++++++++++++ dlio_benchmark/reader/dali_image_reader.py | 69 ++++++++++++++++ dlio_benchmark/reader/dali_npz_reader.py | 68 ++++++++++++++++ dlio_benchmark/reader/dali_tfrecord_reader.py | 78 +++++++++++++++++++ .../reader/{png_reader.py => image_reader.py} | 4 +- dlio_benchmark/reader/jpeg_reader.py | 62 --------------- dlio_benchmark/reader/npz_reader.py | 60 -------------- dlio_benchmark/reader/reader_factory.py | 26 ++++--- 10 files changed, 297 insertions(+), 134 deletions(-) create mode 100644 dlio_benchmark/data_loader/native_dali_data_loader.py create mode 100644 dlio_benchmark/reader/dali_image_reader.py create mode 100644 dlio_benchmark/reader/dali_npz_reader.py create mode 100644 dlio_benchmark/reader/dali_tfrecord_reader.py rename dlio_benchmark/reader/{png_reader.py => image_reader.py} (96%) delete mode 100644 dlio_benchmark/reader/jpeg_reader.py delete mode 100644 dlio_benchmark/reader/npz_reader.py diff --git a/dlio_benchmark/common/enumerations.py b/dlio_benchmark/common/enumerations.py index 64227772..0081195e 100644 --- a/dlio_benchmark/common/enumerations.py +++ b/dlio_benchmark/common/enumerations.py @@ -124,6 +124,7 @@ class DataLoaderType(Enum): TENSORFLOW='tensorflow' PYTORCH='pytorch' DALI='dali' + NATIVE_DALI = 'native_dali' CUSTOM='custom' NONE='none' diff --git a/dlio_benchmark/data_loader/data_loader_factory.py b/dlio_benchmark/data_loader/data_loader_factory.py index e8457450..13bf16b0 100644 --- a/dlio_benchmark/data_loader/data_loader_factory.py +++ b/dlio_benchmark/data_loader/data_loader_factory.py @@ -45,6 +45,9 @@ def get_loader(type, format_type, dataset_type, epoch): elif type == DataLoaderType.DALI: from dlio_benchmark.data_loader.dali_data_loader import DaliDataLoader return DaliDataLoader(format_type, dataset_type, epoch) + elif type == DataLoaderType.NATIVE_DALI: + from dlio_benchmark.data_loader.native_dali_data_loader import NativeDaliDataLoader + return NativeDaliDataLoader(format_type, dataset_type, epoch) else: print("Data Loader %s not supported or plugins not found" % type) raise Exception(str(ErrorCodes.EC1004)) diff --git a/dlio_benchmark/data_loader/native_dali_data_loader.py b/dlio_benchmark/data_loader/native_dali_data_loader.py new file mode 100644 index 00000000..2df04a5e --- /dev/null +++ b/dlio_benchmark/data_loader/native_dali_data_loader.py @@ -0,0 +1,60 @@ +from time import time +import logging +import math +import numpy as np +from nvidia.dali.pipeline import Pipeline +import nvidia.dali.fn as fn +import nvidia.dali.types as types +import nvidia.dali as dali +from nvidia.dali.plugin.pytorch import DALIGenericIterator + +from dlio_benchmark.common.constants import MODULE_DATA_LOADER +from dlio_benchmark.common.enumerations import Shuffle, DataLoaderType, DatasetType +from dlio_benchmark.data_loader.base_data_loader import BaseDataLoader +from dlio_benchmark.reader.reader_factory import ReaderFactory +from dlio_benchmark.utils.utility import utcnow, get_rank, timeit, Profile + +dlp = Profile(MODULE_DATA_LOADER) + + +class NativeDaliDataLoader(BaseDataLoader): + @dlp.log_init + def __init__(self, format_type, dataset_type, epoch): + super().__init__(format_type, dataset_type, epoch, DataLoaderType.NATIVE_DALI) + self.pipelines = [] + + @dlp.log + def read(self): + num_samples = self._args.total_samples_train if self.dataset_type is DatasetType.TRAIN else self._args.total_samples_eval + batch_size = self._args.batch_size if self.dataset_type is DatasetType.TRAIN else self._args.batch_size_eval + parallel = True if self._args.read_threads > 0 else False + self.pipelines = [] + num_threads = 1 + if self._args.read_threads > 0: + num_threads = self._args.read_threads + # None executes pipeline on CPU and the reader does the batching + pipeline = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=None, py_num_workers=num_threads, + exec_async=False, exec_pipelined=False) + with pipeline: + images = ReaderFactory.get_reader(type=self.format_type, + dataset_type=self.dataset_type, + thread_index=-1, + epoch_number=self.epoch_number).read() + pipeline.set_outputs(images) + self.pipelines.append(pipeline) + logging.info(f"{utcnow()} Creating {num_threads} pipelines by {self._args.my_rank} rank ") + + @dlp.log + def next(self): + super().next() + num_samples = self._args.total_samples_train if self.dataset_type is DatasetType.TRAIN else self._args.total_samples_eval + batch_size = self._args.batch_size if self.dataset_type is DatasetType.TRAIN else self._args.batch_size_eval + for step in range(num_samples // batch_size): + _dataset = DALIGenericIterator(self.pipelines, ['data']) + for batch in _dataset: + logging.info(f"{utcnow()} Creating {len(batch)} batches by {self._args.my_rank} rank ") + yield batch + + @dlp.log + def finalize(self): + pass diff --git a/dlio_benchmark/reader/dali_image_reader.py b/dlio_benchmark/reader/dali_image_reader.py new file mode 100644 index 00000000..adbc2c55 --- /dev/null +++ b/dlio_benchmark/reader/dali_image_reader.py @@ -0,0 +1,69 @@ +""" + Copyright (c) 2022, UChicago Argonne, LLC + All Rights Reserved + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +import math +import logging +from time import time + +import nvidia.dali.fn as fn +from dlio_benchmark.common.constants import MODULE_DATA_READER +from dlio_benchmark.reader.dali_base_reader import DaliBaseReader +from dlio_benchmark.reader.tf_base_reader import TFBaseReader +from dlio_benchmark.utils.utility import utcnow, PerfTrace, Profile +from dlio_benchmark.common.enumerations import DatasetType, Shuffle +import nvidia.dali.tfrecord as tfrec + +dlp = Profile(MODULE_DATA_READER) + + +class DaliImageReader(DaliBaseReader): + @dlp.log_init + def __init__(self, dataset_type): + super().__init__(dataset_type) + + @dlp.log + def _load(self): + logging.debug( + f"{utcnow()} Reading {len(self.file_list)} files rank {self._args.my_rank}") + random_shuffle = False + seed = -1 + seed_change_epoch = False + if self._args.sample_shuffle is not Shuffle.OFF: + if self._args.sample_shuffle is not Shuffle.SEED: + seed = self._args.seed + random_shuffle = True + seed_change_epoch = True + initial_fill = 1024 + if self._args.shuffle_size > 0: + initial_fill = self._args.shuffle_size + prefetch_size = 1 + if self._args.prefetch_size > 0: + prefetch_size = self._args.prefetch_size + + stick_to_shard = True + if seed_change_epoch: + stick_to_shard = False + images, labels = fn.readers.file(files=files, num_shards=self._args.comm_size, + prefetch_queue_depth=prefetch_size, + initial_fill=initial_fill, random_shuffle=random_shuffle, + shuffle_after_epoch=seed_change_epoch, + stick_to_shard=stick_to_shard, pad_last_batch=True) + dataset = fn.decoders.image(jpegs, device='cpu') + return dataset + + @dlp.log + def finalize(self): + pass \ No newline at end of file diff --git a/dlio_benchmark/reader/dali_npz_reader.py b/dlio_benchmark/reader/dali_npz_reader.py new file mode 100644 index 00000000..f2887ef5 --- /dev/null +++ b/dlio_benchmark/reader/dali_npz_reader.py @@ -0,0 +1,68 @@ +""" + Copyright (c) 2022, UChicago Argonne, LLC + All Rights Reserved + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +import math +import logging +from time import time + +import nvidia.dali.fn as fn +from dlio_benchmark.common.constants import MODULE_DATA_READER +from dlio_benchmark.reader.dali_base_reader import DaliBaseReader +from dlio_benchmark.reader.tf_base_reader import TFBaseReader +from dlio_benchmark.utils.utility import utcnow, PerfTrace, Profile +from dlio_benchmark.common.enumerations import DatasetType, Shuffle +import nvidia.dali.tfrecord as tfrec + +dlp = Profile(MODULE_DATA_READER) + + +class DaliNPZReader(DaliBaseReader): + @dlp.log_init + def __init__(self, dataset_type): + super().__init__(dataset_type) + + @dlp.log + def _load(self): + logging.debug( + f"{utcnow()} Reading {len(self.file_list)} files rank {self._args.my_rank}") + random_shuffle = False + seed = -1 + seed_change_epoch = False + if self._args.sample_shuffle is not Shuffle.OFF: + if self._args.sample_shuffle is not Shuffle.SEED: + seed = self._args.seed + random_shuffle = True + seed_change_epoch = True + initial_fill = 1024 + if self._args.shuffle_size > 0: + initial_fill = self._args.shuffle_size + prefetch_size = 1 + if self._args.prefetch_size > 0: + prefetch_size = self._args.prefetch_size + + stick_to_shard = True + if seed_change_epoch: + stick_to_shard = False + + dataset = fn.readers.numpy(device='cpu', files=self.file_list, num_shards=self._args.comm_size, + prefetch_queue_depth=prefetch_size, initial_fill=initial_fill, + random_shuffle=random_shuffle, seed=seed, shuffle_after_epoch=seed_change_epoch, + stick_to_shard=stick_to_shard, pad_last_batch=True) + return dataset + + @dlp.log + def finalize(self): + pass diff --git a/dlio_benchmark/reader/dali_tfrecord_reader.py b/dlio_benchmark/reader/dali_tfrecord_reader.py new file mode 100644 index 00000000..4b8147af --- /dev/null +++ b/dlio_benchmark/reader/dali_tfrecord_reader.py @@ -0,0 +1,78 @@ +""" + Copyright (c) 2022, UChicago Argonne, LLC + All Rights Reserved + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +import os.path + +import math +import logging +from time import time + +import nvidia +import nvidia.dali.fn as fn +from dlio_benchmark.common.constants import MODULE_DATA_READER +from dlio_benchmark.reader.dali_base_reader import DaliBaseReader +from dlio_benchmark.reader.tf_base_reader import TFBaseReader +from dlio_benchmark.utils.utility import utcnow, PerfTrace, Profile +from dlio_benchmark.common.enumerations import DatasetType, Shuffle +import nvidia.dali.tfrecord as tfrec + +dlp = Profile(MODULE_DATA_READER) + + +class DaliTFRecordReader(DaliBaseReader): + @dlp.log_init + def __init__(self, dataset_type): + super().__init__(dataset_type) + + @dlp.log + def _load(self): + folder = "valid" + if self.dataset_type == DatasetType.TRAIN: + folder = "train" + index_folder = f"{self._args.data_folder}/index/{folder}" + index_files = [] + for file in self.file_list: + filename = os.path.basename(file) + index_files.append(f"{index_folder}/{filename}.idx") + logging.info( + f"{utcnow()} Reading {len(self.file_list)} files rank {self._args.my_rank}") + random_shuffle = False + seed = -1 + if self._args.sample_shuffle is not Shuffle.OFF: + if self._args.sample_shuffle is not Shuffle.SEED: + seed = self._args.seed + random_shuffle = True + initial_fill = 1024 + if self._args.shuffle_size > 0: + initial_fill = self._args.shuffle_size + prefetch_size = 1 + if self._args.prefetch_size > 0: + prefetch_size = self._args.prefetch_size + dataset = fn.readers.tfrecord(path=self.file_list, + index_path=index_files, + features={ + 'image': tfrec.FixedLenFeature((), tfrec.string, ""), + 'size': tfrec.FixedLenFeature([1], tfrec.int64, 0) + }, num_shards=self._args.comm_size, + prefetch_queue_depth=prefetch_size, + initial_fill=initial_fill, + random_shuffle=random_shuffle, seed=seed, + stick_to_shard=True, pad_last_batch=True) + return dataset["image"] + + @dlp.log + def finalize(self): + pass diff --git a/dlio_benchmark/reader/png_reader.py b/dlio_benchmark/reader/image_reader.py similarity index 96% rename from dlio_benchmark/reader/png_reader.py rename to dlio_benchmark/reader/image_reader.py index 64183dd3..1fe63a05 100644 --- a/dlio_benchmark/reader/png_reader.py +++ b/dlio_benchmark/reader/image_reader.py @@ -26,9 +26,9 @@ dlp = Profile(MODULE_DATA_READER) -class PNGReader(FormatReader): +class ImageReader(FormatReader): """ - Reader for PNG files + Reader for PNG / JPEG files """ @dlp.log_init diff --git a/dlio_benchmark/reader/jpeg_reader.py b/dlio_benchmark/reader/jpeg_reader.py deleted file mode 100644 index 664cde04..00000000 --- a/dlio_benchmark/reader/jpeg_reader.py +++ /dev/null @@ -1,62 +0,0 @@ -""" - Copyright (c) 2022, UChicago Argonne, LLC - All Rights Reserved - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" - -import numpy as np -from PIL import Image - -from dlio_benchmark.common.constants import MODULE_DATA_READER -from dlio_benchmark.reader.reader_handler import FormatReader -from dlio_profiler.logger import fn_interceptor as Profile - -dlp = Profile(MODULE_DATA_READER) - - -class JPEGReader(FormatReader): - """ - Reader for JPEG files - """ - - @dlp.log_init - def __init__(self, dataset_type, thread_index, epoch): - super().__init__(dataset_type, thread_index) - - @dlp.log - def open(self, filename): - super().open(filename) - return np.asarray(Image.open(filename)) - - @dlp.log - def close(self, filename): - super().close(filename) - - @dlp.log - def get_sample(self, filename, sample_index): - super().get_sample(filename, sample_index) - image = self.open_file_map[filename] - dlp.update(image_size=image.nbytes) - - def next(self): - for batch in super().next(): - yield batch - - @dlp.log - def read_index(self, image_idx, step): - return super().read_index(image_idx, step) - - @dlp.log - def finalize(self): - return super().finalize() diff --git a/dlio_benchmark/reader/npz_reader.py b/dlio_benchmark/reader/npz_reader.py deleted file mode 100644 index f0144f74..00000000 --- a/dlio_benchmark/reader/npz_reader.py +++ /dev/null @@ -1,60 +0,0 @@ -""" - Copyright (c) 2022, UChicago Argonne, LLC - All Rights Reserved - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" -import numpy as np - -from dlio_benchmark.common.constants import MODULE_DATA_READER -from dlio_benchmark.reader.reader_handler import FormatReader -from dlio_profiler.logger import fn_interceptor as Profile - -dlp = Profile(MODULE_DATA_READER) - - -class NPZReader(FormatReader): - """ - Reader for NPZ files - """ - - @dlp.log_init - def __init__(self, dataset_type, thread_index, epoch): - super().__init__(dataset_type, thread_index) - - @dlp.log - def open(self, filename): - super().open(filename) - return np.load(filename, allow_pickle=True)['x'] - - @dlp.log - def close(self, filename): - super().close(filename) - - @dlp.log - def get_sample(self, filename, sample_index): - super().get_sample(filename, sample_index) - image = self.open_file_map[filename][..., sample_index] - dlp.update(image_size=image.nbytes) - - def next(self): - for batch in super().next(): - yield batch - - @dlp.log - def read_index(self, image_idx, step): - return super().read_index(image_idx, step) - - @dlp.log - def finalize(self): - return super().finalize() \ No newline at end of file diff --git a/dlio_benchmark/reader/reader_factory.py b/dlio_benchmark/reader/reader_factory.py index 74fc353e..e6055dc4 100644 --- a/dlio_benchmark/reader/reader_factory.py +++ b/dlio_benchmark/reader/reader_factory.py @@ -43,18 +43,24 @@ def get_reader(type, dataset_type, thread_index, epoch_number): elif type == FormatType.CSV: from dlio_benchmark.reader.csv_reader import CSVReader return CSVReader(dataset_type, thread_index, epoch_number) - elif type == FormatType.JPEG: - from dlio_benchmark.reader.jpeg_reader import JPEGReader - return JPEGReader(dataset_type, thread_index, epoch_number) - elif type == FormatType.PNG: - from dlio_benchmark.reader.png_reader import PNGReader - return PNGReader(dataset_type, thread_index, epoch_number) + elif type == FormatType.JPEG or FormatType.PNG: + if _args.data_loader == DataLoaderType.NATIVE_DALI + from dlio_benchmark.reader.image_reader import ImageReader + return DaliImageReader(dataset_type, thread_index, epoch_number) elif type == FormatType.NPZ: - from dlio_benchmark.reader.npz_reader import NPZReader - return NPZReader(dataset_type, thread_index, epoch_number) + if _args.data_loader == DataLoaderType.NATIVE_DALI + from dlio_benchmark.reader.dali_npz_reader import DaliNPZReader + return DaliNPZReader(dataset_type, thread_index, epoch_number) + else: + from dlio_benchmark.reader.npz_reader import NPZReader + return NPZReader(dataset_type, thread_index, epoch_number) elif type == FormatType.TFRECORD: - from dlio_benchmark.reader.tf_reader import TFReader - return TFReader(dataset_type, thread_index, epoch_number) + if _args.data_loader == DataLoaderType.NATIVE_DALI: + from dlio_benchmark.reader.dali_tf_reader import DaliTFReader + return TFReader(dataset_type, thread_index, epoch_number) + else: + from dlio_benchmark.reader.tf_reader import TFReader + return TFReader(dataset_type, thread_index, epoch_number) else: print("Loading data of %s format is not supported without framework data loader" %type) raise Exception(type)