forked from huggingface/optimum-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.py
31 lines (24 loc) · 1001 Bytes
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from dataclasses import dataclass
from typing import Optional
from ...import_utils import llm_swarm_version
from ..config import BackendConfig
@dataclass
class LLMSwarmConfig(BackendConfig):
name: str = "llm-swarm"
version: Optional[str] = llm_swarm_version()
_target_: str = "optimum_benchmark.backends.llm_swarm.backend.LLMSwarmBackend"
# optimum benchmark specific
no_weights: bool = False
# llm-swarm specific
gpus: int = 8
instances: int = 1
inference_engine: str = "tgi"
volume: str = "/fsx/ilyas/.cache"
per_instance_max_parallel_requests: int = 500
slurm_template_path: str = "/fsx/ilyas/swarm-templates/tgi_h100.template.slurm"
load_balancer_template_path: str = "/fsx/ilyas/swarm-templates/nginx.template.conf"
debug_endpoint: Optional[str] = None
def __post_init__(self):
super().__post_init__()
# so that downloaded artifacts are stored in the same place
self.hub_kwargs["cache_dir"] = self.volume