huggingface · IlyasMoutawwakil · Dec 17, 2024 · Dec 13, 2024 · Dec 13, 2024 · Dec 13, 2024
diff --git a/.github/workflows/test_cli_cpu_py_txi.yaml b/.github/workflows/test_cli_cpu_py_txi.yaml
@@ -43,9 +43,12 @@ jobs:
 
       - name: Install requirements
         run: |
-          pip install --upgrade pip
-          pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-          pip install -e .[testing,py-txi] git+https://github.com/IlyasMoutawwakil/py-txi.git
+          pip install uv
+          uv pip install --upgrade pip
+          uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+          uv pip install -e .[testing,py-txi] git+https://github.com/IlyasMoutawwakil/py-txi.git
+        env:
+          UV_SYSTEM_PYTHON: 1
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and py_txi"

diff --git a/.github/workflows/test_cli_cuda_py_txi.yaml b/.github/workflows/test_cli_cuda_py_txi.yaml
@@ -44,16 +44,22 @@ jobs:
 
       - name: Install requirements
         run: |
-          pip install --upgrade pip
-          pip install -e .[testing,py-txi] git+https://github.com/IlyasMoutawwakil/py-txi.git
+          pip install uv
+          uv pip install --upgrade pip
+          uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+          uv pip install -e .[testing,py-txi] git+https://github.com/IlyasMoutawwakil/py-txi.git
+        env:
+          UV_SYSTEM_PYTHON: 1
 
       - name: Run tests
-        run: pytest tests/test_cli.py -x -s -k "cli and cuda and py_txi"
+        run: |
+          FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and py_txi"
 
       - if: ${{
           (github.event_name == 'push') ||
           (github.event_name == 'workflow_dispatch') ||
           contains( github.event.pull_request.labels.*.name, 'examples')
           }}
         name: Run examples
-        run: pytest tests/test_examples.py -x -s -k "cli and cuda and (tgi or tei)"
+        run: |
+          FORCE_SEQUENTIAL=1 pytest tests/test_examples.py -x -s -k "cli and cuda and (tgi or tei)"
diff --git a/README.md b/README.md
@@ -50,7 +50,6 @@ Optimum-Benchmark is continuously and intensively tested on a variety of devices
 
 [![CLI_CPU_IPEX](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_ipex.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_ipex.yaml)
 [![CLI_CPU_LLAMA_CPP](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_llama_cpp.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_llama_cpp.yaml)
-[![CLI_CPU_NEURAL_COMPRESSOR](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_neural_compressor.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_neural_compressor.yaml)
 [![CLI_CPU_ONNXRUNTIME](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_onnxruntime.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_onnxruntime.yaml)
 [![CLI_CPU_OPENVINO](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_openvino.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_openvino.yaml)
 [![CLI_CPU_PYTORCH](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_pytorch.yaml)
@@ -61,7 +60,6 @@ Optimum-Benchmark is continuously and intensively tested on a variety of devices
 [![CLI_CUDA_TENSORRT_LLM](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_tensorrt_llm.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_tensorrt_llm.yaml)
 [![CLI_CUDA_TORCH_ORT](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_torch_ort.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_torch_ort.yaml)
 [![CLI_CUDA_VLLM](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_vllm.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_vllm.yaml)
-[![CLI_ENERGY_STAR](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_energy_star.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_energy_star.yaml)
 [![CLI_MISC](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_misc.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_misc.yaml)
 [![CLI_ROCM_PYTORCH](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_rocm_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_rocm_pytorch.yaml)
 
@@ -100,10 +98,9 @@ Depending on the backends you want to use, you can install `optimum-benchmark` w
 - OnnxRuntime: `pip install optimum-benchmark[onnxruntime]`
 - TensorRT-LLM: `pip install optimum-benchmark[tensorrt-llm]`
 - OnnxRuntime-GPU: `pip install optimum-benchmark[onnxruntime-gpu]`
-- Neural Compressor: `pip install optimum-benchmark[neural-compressor]`
-- Py-TXI: `pip install optimum-benchmark[py-txi]`
-- IPEX: `pip install optimum-benchmark[ipex]`
+- Py-TXI (TGI & TEI): `pip install optimum-benchmark[py-txi]`
 - vLLM: `pip install optimum-benchmark[vllm]`
+- IPEX: `pip install optimum-benchmark[ipex]`
 
 We also support the following extra extra dependencies:
 
@@ -144,9 +141,6 @@ if __name__ == "__main__":
     )
     benchmark_report = Benchmark.launch(benchmark_config)
 
-    # log the benchmark in terminal
-    benchmark_report.log() # or print(benchmark_report)
-
     # convert artifacts to a dictionary or dataframe
     benchmark_config.to_dict() # or benchmark_config.to_dataframe()
 
@@ -175,15 +169,17 @@ If you're on VSCode, you can hover over the configuration classes to see the ava
 You can also run a benchmark using the command line by specifying the configuration directory and the configuration name. Both arguments are mandatory for [`hydra`](https://hydra.cc/). `--config-dir` is the directory where the configuration files are stored and `--config-name` is the name of the configuration file without its `.yaml` extension.
 
 ```bash
-optimum-benchmark --config-dir examples/ --config-name pytorch_bert
+optimum-benchmark --config-dir examples/ --config-name cuda_pytorch_bert
 ```
 
-This will run the benchmark using the configuration in [`examples/pytorch_bert.yaml`](examples/pytorch_bert.yaml) and store the results in `runs/pytorch_bert`.
+This will run the benchmark using the configuration in [`examples/cuda_pytorch_bert.yaml`](examples/cuda_pytorch_bert.yaml) and store the results in `runs/cuda_pytorch_bert`.
 
 The resulting files are :
 
 - `benchmark_config.json` which contains the configuration used for the benchmark, including the backend, launcher, scenario and the environment in which the benchmark was run.
 - `benchmark_report.json` which contains a full report of the benchmark's results, like latency measurements, memory usage, energy consumption, etc.
+- `benchmark_report.txt` which contains a detailed report of the benchmark's results, in the same format they were logged.
+- `benchmark_report.md` which contains a detailed report of the benchmark's results, in markdown format.
 - `benchmark.json` contains both the report and the configuration in a single file.
 - `benchmark.log` contains the logs of the benchmark run.
 
@@ -309,9 +305,7 @@ For more information on the features of each backend, you can check their respec
 - [PyTorchConfig](optimum_benchmark/backends/pytorch/config.py)
 - [ORTConfig](optimum_benchmark/backends/onnxruntime/config.py)
 - [TorchORTConfig](optimum_benchmark/backends/torch_ort/config.py)
-- [LLMSwarmConfig](optimum_benchmark/backends/llm_swarm/config.py)
 - [TRTLLMConfig](optimum_benchmark/backends/tensorrt_llm/config.py)
-- [INCConfig](optimum_benchmark/backends/neural_compressor/config.py)
 
 </details>
 

diff --git a/examples/cuda_pytorch_bert.yaml b/examples/cuda_pytorch_bert.yaml
@@ -6,7 +6,7 @@ defaults:
   - _base_
   - _self_
 
-name: pytorch_bert
+name: cuda_pytorch_bert
 
 launcher:
   device_isolation: true

diff --git a/optimum_benchmark/backends/config.py b/optimum_benchmark/backends/config.py
@@ -22,13 +22,13 @@ class BackendConfig(ABC):
     version: str
     _target_: str
 
+    model: Optional[str] = None
+    processor: Optional[str] = None
+
     task: Optional[str] = None
     library: Optional[str] = None
     model_type: Optional[str] = None
 
-    model: Optional[str] = None
-    processor: Optional[str] = None
-
     device: Optional[str] = None
     # we use a string here instead of a list
     # because it's easier to pass in a yaml or from cli
@@ -48,30 +48,44 @@ def __post_init__(self):
         if self.model is None:
             raise ValueError("`model` must be specified.")
 
+        if self.model_kwargs.get("token", None) is not None:
+            LOGGER.info(
+                "You have passed an argument `token` to `model_kwargs`. This is dangerous as the config cannot do encryption to protect it. "
+                "We will proceed to registering `token` in the environment as `HF_TOKEN` to avoid saving it or pushing it to the hub by mistake."
+            )
+            os.environ["HF_TOKEN"] = self.model_kwargs.pop("token")
+
         if self.processor is None:
             self.processor = self.model
 
-        # TODO: add cache_dir, token, etc. to these methods
+        if not self.processor_kwargs:
+            self.processor_kwargs = self.model_kwargs
+
         if self.library is None:
             self.library = infer_library_from_model_name_or_path(
                 model_name_or_path=self.model,
-                token=self.model_kwargs.get("token", None),
                 revision=self.model_kwargs.get("revision", None),
+                cache_dir=self.model_kwargs.get("cache_dir", None),
+            )
+
+        if self.library not in ["transformers", "diffusers", "timm", "llama_cpp"]:
+            raise ValueError(
+                f"`library` must be either `transformers`, `diffusers`, `timm` or `llama_cpp`, but got {self.library}"
             )
 
         if self.task is None:
             self.task = infer_task_from_model_name_or_path(
                 model_name_or_path=self.model,
-                token=self.model_kwargs.get("token", None),
                 revision=self.model_kwargs.get("revision", None),
+                cache_dir=self.model_kwargs.get("cache_dir", None),
                 library_name=self.library,
             )
 
         if self.model_type is None:
             self.model_type = infer_model_type_from_model_name_or_path(
                 model_name_or_path=self.model,
-                token=self.model_kwargs.get("token", None),
                 revision=self.model_kwargs.get("revision", None),
+                cache_dir=self.model_kwargs.get("cache_dir", None),
                 library_name=self.library,
             )
 
@@ -103,11 +117,6 @@ def __post_init__(self):
             else:
                 raise RuntimeError("CUDA device is only supported on systems with NVIDIA or ROCm drivers.")
 
-        if self.library not in ["transformers", "diffusers", "timm", "llama_cpp"]:
-            raise ValueError(
-                f"`library` must be either `transformers`, `diffusers`, `timm` or `llama_cpp`, but got {self.library}"
-            )
-
         if self.inter_op_num_threads is not None:
             if self.inter_op_num_threads == -1:
                 self.inter_op_num_threads = cpu_count()