Skip to content

Commit

Permalink
format.sh
Browse files Browse the repository at this point in the history
  • Loading branch information
kzawora-intel committed Aug 13, 2024
1 parent f328349 commit 212e87e
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 5 deletions.
8 changes: 7 additions & 1 deletion benchmarks/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,13 @@ def run_vllm(
))

start = time.perf_counter()
llm.generate(prompts, sampling_params, use_tqdm=True)
outputs = llm.generate(prompts, sampling_params, use_tqdm=True)
for output in outputs:
print('=================================================')
print(f'request id = {output.request_id}')
print(f'prompt = {output.prompt}')
print(f'response = {output.outputs[0].text}')
print('=================================================\n\n')
end = time.perf_counter()
return end - start

Expand Down
2 changes: 1 addition & 1 deletion vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from vllm.transformers_utils.config import get_config, get_hf_text_config
from vllm.utils import (STR_NOT_IMPL_ENC_DEC_CUDAGRAPH, GiB_bytes,
cuda_device_count_stateless, get_cpu_memory, is_cpu,
is_hip, is_neuron, is_openvino, is_xpu, is_hpu,
is_hip, is_hpu, is_neuron, is_openvino, is_xpu,
print_warning_once)

if TYPE_CHECKING:
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/layers/rotary_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
import torch.nn as nn

from vllm.model_executor.custom_op import CustomOp

from vllm.platforms import current_platform

if current_platform.is_hpu():
from vllm.hpu.rotary_embed import HpuRotaryEmbedding

Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/sampling_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from vllm.sampling_params import SamplingParams, SamplingType
from vllm.sequence import SequenceData, SequenceGroupMetadata
from vllm.triton_utils.sample import get_num_triton_sampler_splits
from vllm.utils import (PyObjectCache, async_tensor_h2d,
from vllm.utils import (PyObjectCache, async_tensor_h2d, is_hpu,
is_pin_memory_available, make_tensor_with_pad,
maybe_expand_dim, is_hpu)
maybe_expand_dim)

_SAMPLING_EPS = 1e-5
_SEED_0_REPLACEMENT = 3403598558
Expand Down

0 comments on commit 212e87e

Please sign in to comment.