From f6395c6693bfcc04ffc3277e39d591b06d9c4c85 Mon Sep 17 00:00:00 2001 From: jvstme <36324149+jvstme@users.noreply.github.com> Date: Thu, 27 Jun 2024 06:27:32 +0000 Subject: [PATCH] Wait longer for OCI Bare Metal provisioning (#1362) Sometimes Bare Metal instances take longer than 10 minutes to provision. --- .../server/background/tasks/process_running_jobs.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/dstack/_internal/server/background/tasks/process_running_jobs.py b/src/dstack/_internal/server/background/tasks/process_running_jobs.py index c724f9a16..f43c3e303 100644 --- a/src/dstack/_internal/server/background/tasks/process_running_jobs.py +++ b/src/dstack/_internal/server/background/tasks/process_running_jobs.py @@ -194,7 +194,7 @@ async def _process_job(job_id: UUID): if not success: # check timeout if job_submission.age > _get_runner_timeout_interval( - job_provisioning_data.backend + job_provisioning_data.backend, job_provisioning_data.instance_type.name ): logger.warning( "%s: failed because runner has not become available in time, age=%s", @@ -574,9 +574,11 @@ def _submit_job_to_runner( # do not log here, because the runner will send a new status -def _get_runner_timeout_interval(backend_type: BackendType) -> timedelta: +def _get_runner_timeout_interval(backend_type: BackendType, instance_type_name: str) -> timedelta: if backend_type == BackendType.LAMBDA: return timedelta(seconds=1200) if backend_type == BackendType.KUBERNETES: return timedelta(seconds=1200) + if backend_type == BackendType.OCI and instance_type_name.startswith("BM."): + return timedelta(seconds=1200) return timedelta(seconds=600)