diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py index 6726f91f..1c039163 100644 --- a/optimum_benchmark/backends/base.py +++ b/optimum_benchmark/backends/base.py @@ -70,14 +70,11 @@ def __init__(self, config: BackendConfigT): elif self.config.library == "llama_cpp": self.logger.info("\t+ Benchmarking a LlamaCpp model") - # TOD: need a custom method to extract shapes from gguf - self.model_shapes = extract_transformers_shapes_from_artifacts( - self.pretrained_config, self.pretrained_processor - ) self.pretrained_processor = None - self.generation_config = None self.pretrained_config = None + self.generation_config = None self.automodel_loader = None + self.model_shapes = {} else: self.logger.info("\t+ Benchmarking a Transformers model") diff --git a/optimum_benchmark/backends/llama_cpp/backend.py b/optimum_benchmark/backends/llama_cpp/backend.py index 06215cbf..c9d6bbf8 100644 --- a/optimum_benchmark/backends/llama_cpp/backend.py +++ b/optimum_benchmark/backends/llama_cpp/backend.py @@ -41,15 +41,10 @@ def llama_cpp_kwargs(self) -> Dict[str, Any]: "echo": False, } - def prepare_input_shapes(self, input_shapes: Dict[str, Any]) -> Dict[str, Any]: - if self.config.task == "text-generation": - if input_shapes["batch_size"] != 1: - raise ValueError("Batch size must be 1 for LlamaCpp text generation") - - return input_shapes - def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]: if self.config.task == "text-generation": + if inputs["input_ids"].shape[0] != 1: + raise ValueError("Batch size must be 1 for LlamaCpp text generation") return {"tokens": inputs["input_ids"].squeeze(0).tolist()} elif self.config.task == "feature-extraction": diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py index 98eb4a37..768ea1c7 100644 --- a/optimum_benchmark/launchers/torchrun/launcher.py +++ b/optimum_benchmark/launchers/torchrun/launcher.py @@ -24,10 +24,6 @@ class TorchrunLauncher(Launcher[TorchrunConfig]): def __init__(self, config: TorchrunConfig): super().__init__(config) - if sys.platform == "win32": - self.logger.info("\t+ Disabline libuv on Windows") - os.environ["USE_LIBUV"] = "0" - if get_start_method(allow_none=True) != self.config.start_method: self.logger.info(f"\t+ Setting multiprocessing start method to {self.config.start_method}") set_start_method(self.config.start_method, force=True) @@ -164,8 +160,14 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l device = torch.device("cuda", rank) torch.cuda.set_device(device) + if sys.platform == "win32": + logger.info("\t+ Disabling libuv for Windows") + init_method = "env://?use_libuv=0" + else: + init_method = "env://" + logger.info("\t+ Initializing torch.distributed process group") - torch.distributed.init_process_group() + torch.distributed.init_process_group(init_method=init_method) try: report = worker(*worker_args)