huggingface · IlyasMoutawwakil · Feb 20, 2024 · Feb 20, 2024
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
 # List of targets that are not associated with files
-.PHONY:	quality style install build_docker_cpu build_docker_cuda build_docker_rocm test_cli_cpu_neural_compressor test_cli_cpu_onnxruntime test_cli_cpu_openvino test_cli_cpu_pytorch test_cli_rocm_pytorch test_cli_cuda_pytorch test_api_cpu test_api_cuda test_api_rocm test_api_misc
+.PHONY:	quality style install
 
 quality:
 	ruff check .
@@ -26,7 +26,7 @@ test_cli_cpu_neural_compressor:
 	--rm \
 	--pid=host \
 	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
+	--volume $(shell pwd):/workspace \
 	--workdir /workspace \
 	opt-bench-cpu:latest -c "pip install -e .[testing,neural-compressor,diffusers,timm] && pytest tests/ -k 'cli and cpu and neural_compressor' -x"
 
@@ -35,7 +35,7 @@ test_cli_cpu_onnxruntime:
 	--rm \
 	--pid=host \
 	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
+	--volume $(shell pwd):/workspace \
 	--workdir /workspace \
 	opt-bench-cpu:latest -c "pip install -e .[testing,onnxruntime,diffusers,timm] && pytest tests/ -k 'cli and cpu and onnxruntime' -x"
 
@@ -44,7 +44,7 @@ test_cli_cpu_openvino:
 	--rm \
 	--pid=host \
 	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
+	--volume $(shell pwd):/workspace \
 	--workdir /workspace \
 	opt-bench-cpu:latest -c "pip install -e .[testing,openvino,diffusers,timm] && pytest tests/ -k 'cli and cpu and openvino' -x"
 
@@ -53,7 +53,7 @@ test_cli_cpu_pytorch:
 	--rm \
 	--pid=host \
 	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
+	--volume $(shell pwd):/workspace \
 	--workdir /workspace \
 	opt-bench-cpu:latest -c "pip install -e .[testing,diffusers,timm] && pytest tests/ -k 'cli and cpu and pytorch' -x"
 
@@ -66,7 +66,7 @@ test_cli_rocm_pytorch:
 	--device /dev/dri/renderD129 \
 	--group-add video \
 	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
+	--volume $(shell pwd):/workspace \
 	--workdir /workspace \
 	opt-bench-rocm:latest -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"
 
@@ -76,16 +76,26 @@ test_cli_cuda_pytorch:
 	--pid=host \
 	--gpus '"device=0,1"' \
 	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
+	--volume $(shell pwd):/workspace \
 	--workdir /workspace \
 	opt-bench-cuda:latest -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"
 
+test_cli_tensorrt_llm:
+	docker run \
+	--rm \
+	--pid=host \
+	--gpus '"device=0,1"' \
+	--entrypoint /bin/bash \
+	--volume $(shell pwd):/workspace \
+	--workdir /workspace \
+	opt-bench-tensorrt-llm:latest -c "pip install -e .[testing] && pip uninstall -y nvidia-ml-py && pytest tests/ -k 'cli and tensorrt_llm' -x"
+
 test_api_cpu:
 	docker run \
 	--rm \
 	--pid=host \
 	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
+	--volume $(shell pwd):/workspace \
 	--workdir /workspace \
 	opt-bench-cpu:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cpu' -x"
 
@@ -95,7 +105,7 @@ test_api_cuda:
 	--pid=host \
 	--gpus '"device=0,1"' \
 	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
+	--volume $(shell pwd):/workspace \
 	--workdir /workspace \
 	opt-bench-cuda:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"
 
@@ -108,7 +118,7 @@ test_api_rocm:
 	--device /dev/dri/renderD129 \
 	--group-add video \
 	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
+	--volume $(shell pwd):/workspace \
 	--workdir /workspace \
 	opt-bench-rocm:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"
 
@@ -117,6 +127,6 @@ test_api_misc:
 	--rm \
 	--pid=host \
 	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
+	--volume $(shell pwd):/workspace \
 	--workdir /workspace \
 	opt-bench-cpu:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and not (cpu or cuda or rocm or tensorrt)' -x"
diff --git a/optimum_benchmark/backends/tensorrt_llm/backend.py b/optimum_benchmark/backends/tensorrt_llm/backend.py
@@ -47,13 +47,15 @@ def load_trtmodel_from_pretrained(self) -> None:
 
     def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
         return self.pretrained_model.generate(
-            input_ids=inputs.get("input_ids", None), attention_mask=inputs.get("attention_mask", None), max_new_tokens=1
+            input_ids=inputs.get("input_ids"),
+            attention_mask=inputs.get("attention_mask"),
+            max_new_tokens=1,
         )
 
     def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
         return self.pretrained_model.generate(
-            input_ids=inputs.get("inputs", None),  # diff names
-            attention_mask=inputs.get("attention_mask", None),
+            input_ids=inputs.get("input_ids"),
+            attention_mask=inputs.get("attention_mask"),
             # important for benchmarking
             max_new_tokens=kwargs.get("max_new_tokens", -1),
             min_length=kwargs.get("min_new_tokens", -1),  # why different ?