Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support per token measurements through logits processor #130

Merged
merged 1 commit into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 21 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# List of targets that are not associated with files
.PHONY: quality style install build_docker_cpu build_docker_cuda build_docker_rocm test_cli_cpu_neural_compressor test_cli_cpu_onnxruntime test_cli_cpu_openvino test_cli_cpu_pytorch test_cli_rocm_pytorch test_cli_cuda_pytorch test_api_cpu test_api_cuda test_api_rocm test_api_misc
.PHONY: quality style install

quality:
ruff check .
Expand All @@ -26,7 +26,7 @@ test_cli_cpu_neural_compressor:
--rm \
--pid=host \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cpu:latest -c "pip install -e .[testing,neural-compressor,diffusers,timm] && pytest tests/ -k 'cli and cpu and neural_compressor' -x"

Expand All @@ -35,7 +35,7 @@ test_cli_cpu_onnxruntime:
--rm \
--pid=host \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cpu:latest -c "pip install -e .[testing,onnxruntime,diffusers,timm] && pytest tests/ -k 'cli and cpu and onnxruntime' -x"

Expand All @@ -44,7 +44,7 @@ test_cli_cpu_openvino:
--rm \
--pid=host \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cpu:latest -c "pip install -e .[testing,openvino,diffusers,timm] && pytest tests/ -k 'cli and cpu and openvino' -x"

Expand All @@ -53,7 +53,7 @@ test_cli_cpu_pytorch:
--rm \
--pid=host \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cpu:latest -c "pip install -e .[testing,diffusers,timm] && pytest tests/ -k 'cli and cpu and pytorch' -x"

Expand All @@ -66,7 +66,7 @@ test_cli_rocm_pytorch:
--device /dev/dri/renderD129 \
--group-add video \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-rocm:latest -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"

Expand All @@ -76,16 +76,26 @@ test_cli_cuda_pytorch:
--pid=host \
--gpus '"device=0,1"' \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cuda:latest -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"

test_cli_tensorrt_llm:
docker run \
--rm \
--pid=host \
--gpus '"device=0,1"' \
--entrypoint /bin/bash \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-tensorrt-llm:latest -c "pip install -e .[testing] && pip uninstall -y nvidia-ml-py && pytest tests/ -k 'cli and tensorrt_llm' -x"

test_api_cpu:
docker run \
--rm \
--pid=host \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cpu:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cpu' -x"

Expand All @@ -95,7 +105,7 @@ test_api_cuda:
--pid=host \
--gpus '"device=0,1"' \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cuda:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"

Expand All @@ -108,7 +118,7 @@ test_api_rocm:
--device /dev/dri/renderD129 \
--group-add video \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-rocm:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"

Expand All @@ -117,6 +127,6 @@ test_api_misc:
--rm \
--pid=host \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cpu:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and not (cpu or cuda or rocm or tensorrt)' -x"
8 changes: 5 additions & 3 deletions optimum_benchmark/backends/tensorrt_llm/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@ def load_trtmodel_from_pretrained(self) -> None:

def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
return self.pretrained_model.generate(
input_ids=inputs.get("input_ids", None), attention_mask=inputs.get("attention_mask", None), max_new_tokens=1
input_ids=inputs.get("input_ids"),
attention_mask=inputs.get("attention_mask"),
max_new_tokens=1,
)

def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
return self.pretrained_model.generate(
input_ids=inputs.get("inputs", None), # diff names
attention_mask=inputs.get("attention_mask", None),
input_ids=inputs.get("input_ids"),
attention_mask=inputs.get("attention_mask"),
# important for benchmarking
max_new_tokens=kwargs.get("max_new_tokens", -1),
min_length=kwargs.get("min_new_tokens", -1), # why different ?
Expand Down
Loading
Loading