Skip to content

Commit

Permalink
fix destructors flow and remove finish_measurements
Browse files Browse the repository at this point in the history
  • Loading branch information
nirda7 committed Nov 25, 2024
1 parent dbde4b8 commit 9c9d748
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 24 deletions.
3 changes: 0 additions & 3 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1325,9 +1325,6 @@ def _advance_to_next_step(
else:
seq.append_token_id(sample.output_token, sample.logprobs)

def finish_measurements(self):
self.model_executor.finish_measurements()

def step(self) -> List[Union[RequestOutput, EmbeddingRequestOutput]]:
"""Performs one decoding iteration and returns newly generated results.
Expand Down
4 changes: 0 additions & 4 deletions vllm/entrypoints/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,10 +236,6 @@ def set_tokenizer(self, tokenizer: AnyTokenizer) -> None:
else:
tokenizer_group.tokenizer = get_cached_tokenizer(tokenizer)

def finish_measurements(self):
assert not envs.VLLM_USE_V1, "INC does not support vLLM V1"
self.llm_engine.finish_measurements() # type: ignore[attr-defined]

@overload # LEGACY: single (prompt + optional token ids)
def generate(
self,
Expand Down
10 changes: 5 additions & 5 deletions vllm/executor/hpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class HPUExecutor(ExecutorBase):
def _init_executor(self) -> None:
"""Initialize the worker and load the model."""
self._init_worker()
self.shutdown_inc = True

def _get_worker_kwargs(
self,
Expand Down Expand Up @@ -94,9 +95,6 @@ def initialize_cache(self, num_gpu_blocks: int, num_cpu_blocks) -> None:
msg = f"init_cache_engine took {cache_init_m.get_summary_string()}"
logger.info(msg)

def finish_measurements(self):
self.driver_worker.finish_measurements()

def execute_model(
self,
execute_model_req: ExecuteModelRequest) -> List[SamplerOutput]:
Expand Down Expand Up @@ -201,8 +199,10 @@ def stop_profile(self) -> None:
self.driver_worker.stop_profile()

def shutdown(self) -> None:
if hasattr(self.driver_worker, 'shutdown_inc'):
self.driver_worker.shutdown_inc()
if getattr(self, 'shutdown_inc', False):
if hasattr(self.driver_worker, 'shutdown_inc'):
self.driver_worker.shutdown_inc()
self.shutdown_inc = False


class HPUExecutorAsync(HPUExecutor, ExecutorAsyncBase):
Expand Down
8 changes: 5 additions & 3 deletions vllm/executor/ray_hpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,12 @@ def _init_executor(self) -> None:
self.output_decoder = msgspec.msgpack.Decoder(
Optional[List[SamplerOutput]])

self.shutdown_inc = True

def shutdown(self) -> None:
if getattr(self, 'shutdown_inc', False):
self._run_workers("shutdown_inc")
self.shutdown_inc = False
for worker in self.workers:
worker.__ray_terminate__.remote()
if hasattr(self, "forward_dag") and self.forward_dag is not None:
Expand All @@ -80,9 +85,6 @@ def shutdown(self) -> None:
ray.kill(worker)
self.forward_dag = None

def finish_measurements(self):
self._run_workers("finish_measurements")

def _get_worker_module_and_class(
self
) -> Tuple[str, str, Optional[Callable[[],
Expand Down
6 changes: 0 additions & 6 deletions vllm/worker/hpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1777,10 +1777,6 @@ def prepare_model_input(
is_prompt=is_prompt,
virtual_engine=virtual_engine)

def finish_measurements(self):
from neural_compressor.torch.quantization import finalize_calibration
finalize_calibration(self.model.model)

def _check_config(self, batch_size, seq_len, is_prompt, warmup_mode):
cfg = (batch_size, seq_len, is_prompt)
seen = cfg in self.seen_configs
Expand Down Expand Up @@ -2161,5 +2157,3 @@ def shutdown_inc(self):
finalize_calibration(self.model.model)
self._is_inc_finalized = True

def __del__(self):
self.shutdown_inc()
3 changes: 0 additions & 3 deletions vllm/worker/hpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,6 @@ def _warm_up_model(self) -> None:
# the model initialization and profiling.
set_random_seed(self.model_config.seed)

def finish_measurements(self):
self.model_runner.finish_measurements()

@property
def do_metadata_broadcast(self) -> bool:
return self.parallel_config.tensor_parallel_size > 1
Expand Down

0 comments on commit 9c9d748

Please sign in to comment.