diff --git a/ChatQnA/benchmark_chatqna.yaml b/ChatQnA/benchmark_chatqna.yaml index 5d4ab6758..c608b8afb 100644 --- a/ChatQnA/benchmark_chatqna.yaml +++ b/ChatQnA/benchmark_chatqna.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + deploy: device: gaudi version: 1.1.0 @@ -68,9 +71,9 @@ benchmark: seed: 1024 # workload, all of the test cases will run for benchmark - test_cases: - - chatqnafixed - - chatqna_qlist_pubmed: + test_cases: + - chatqnafixed + - chatqna_qlist_pubmed: dataset: pub_med10 # pub_med10, pub_med100, pub_med1000 user_queries: [1, 2, 4] query_token_size: 128 # if specified, means fixed query token size will be sent out diff --git a/benchmark.py b/benchmark.py index 0a8360db6..fb20367c0 100644 --- a/benchmark.py +++ b/benchmark.py @@ -3,12 +3,11 @@ import os import sys -import yaml from datetime import datetime -from kubernetes import client, config +import yaml from evals.benchmark.stresscli.commands.load_test import locust_runtests - +from kubernetes import client, config # only support chatqna for now service_endpoints = { @@ -111,6 +110,7 @@ def _create_yaml_content(service, base_url, bench_target, test_phase, num_querie concurrency = test_params["concurrent_level"] import importlib.util + package_name = "opea-eval" spec = importlib.util.find_spec(package_name) print(spec) @@ -118,11 +118,12 @@ def _create_yaml_content(service, base_url, bench_target, test_phase, num_querie # get folder path of opea-eval eval_path = None import pkg_resources + for dist in pkg_resources.working_set: - if 'opea-eval' in dist.project_name: + if "opea-eval" in dist.project_name: eval_path = dist.location if not eval_path: - print(f"Fail to load opea-eval package. Please install it first.") + print("Fail to load opea-eval package. Please install it first.") exit(1) yaml_content = { @@ -157,9 +158,7 @@ def _create_yaml_content(service, base_url, bench_target, test_phase, num_querie return yaml_content -def _create_stresscli_confs( - case_params, test_params, test_phase, num_queries, base_url, ts -) -> str: +def _create_stresscli_confs(case_params, test_params, test_phase, num_queries, base_url, ts) -> str: """Create a stresscli configuration file and persist it on disk.""" stresscli_confs = [] # Get the workload @@ -173,9 +172,8 @@ def _create_stresscli_confs( bench_target = list(test_case.keys())[0] dataset_conf = test_case[bench_target] if bench_target == "chatqna_qlist_pubmed": - max_lines = dataset_conf['dataset'].split("pub_med")[-1] - stresscli_conf['envs'] = {'DATASET': f"pubmed_{max_lines}.txt", - 'MAX_LINES': max_lines} + max_lines = dataset_conf["dataset"].split("pub_med")[-1] + stresscli_conf["envs"] = {"DATASET": f"pubmed_{max_lines}.txt", "MAX_LINES": max_lines} # Generate the content of stresscli configuration file stresscli_yaml = _create_yaml_content(case_params, base_url, bench_target, test_phase, num_queries, test_params) @@ -186,7 +184,7 @@ def _create_stresscli_confs( ) with open(run_yaml_path, "w") as yaml_file: yaml.dump(stresscli_yaml, yaml_file) - stresscli_conf['run_yaml_path'] = run_yaml_path + stresscli_conf["run_yaml_path"] = run_yaml_path stresscli_confs.append(stresscli_conf) return stresscli_confs @@ -200,28 +198,18 @@ def create_stresscli_confs(service, base_url, test_suite_config, index): # Add YAML configuration of stresscli for warm-ups warm_ups = test_suite_config["warm_ups"] if warm_ups is not None and warm_ups > 0: - stresscli_confs.extend( - _create_stresscli_confs( - service, test_suite_config, "warmup", warm_ups, base_url, index - ) - ) + stresscli_confs.extend(_create_stresscli_confs(service, test_suite_config, "warmup", warm_ups, base_url, index)) # Add YAML configuration of stresscli for benchmark user_queries_lst = test_suite_config["user_queries"] if user_queries_lst is None or len(user_queries_lst) == 0: # Test stop is controlled by run time - stresscli_confs.extend( - _create_stresscli_confs( - service, test_suite_config, "benchmark", -1, base_url, index - ) - ) + stresscli_confs.extend(_create_stresscli_confs(service, test_suite_config, "benchmark", -1, base_url, index)) else: # Test stop is controlled by request count for user_queries in user_queries_lst: stresscli_confs.extend( - _create_stresscli_confs( - service, test_suite_config, "benchmark", user_queries, base_url, index - ) + _create_stresscli_confs(service, test_suite_config, "benchmark", user_queries, base_url, index) ) return stresscli_confs @@ -243,7 +231,7 @@ def _run_service_test(example, service, test_suite_config): deployment_type, test_suite_config.get("service_ip"), test_suite_config.get("service_port"), - test_suite_config.get("namespace") + test_suite_config.get("namespace"), ) base_url = f"http://{svc_ip}:{port}" @@ -255,18 +243,16 @@ def _run_service_test(example, service, test_suite_config): timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Create the run.yaml for the service - stresscli_confs = create_stresscli_confs( - service, base_url, test_suite_config, timestamp - ) + stresscli_confs = create_stresscli_confs(service, base_url, test_suite_config, timestamp) # Do benchmark in for-loop for different user queries output_folders = [] for index, stresscli_conf in enumerate(stresscli_confs, start=1): - run_yaml_path = stresscli_conf['run_yaml_path'] + run_yaml_path = stresscli_conf["run_yaml_path"] print(f"[OPEA BENCHMARK] 🚀 The {index} time test is running, run yaml: {run_yaml_path}...") - os.environ['MAX_TOKENS'] = str(service.get("max_output")) - if stresscli_conf.get('envs') is not None: - for key, value in stresscli_conf.get('envs').items(): + os.environ["MAX_TOKENS"] = str(service.get("max_output")) + if stresscli_conf.get("envs") is not None: + for key, value in stresscli_conf.get("envs").items(): os.environ[key] = value output_folders.append(locust_runtests(None, run_yaml_path)) @@ -283,7 +269,7 @@ def run_benchmark(benchmark_config, chart_name, namespace, llm_model=None, repor # Extract data parsed_data = construct_benchmark_config(benchmark_config) test_suite_config = { - "user_queries": parsed_data['user_queries'], # num of user queries + "user_queries": parsed_data["user_queries"], # num of user queries "random_prompt": False, # whether to use random prompt, set to False by default "run_time": "60m", # The max total run time for the test suite, set to 60m by default "collect_service_metric": False, # whether to collect service metrics, set to False by default @@ -292,23 +278,25 @@ def run_benchmark(benchmark_config, chart_name, namespace, llm_model=None, repor "service_ip": None, # Leave as None for k8s, specify for Docker "service_port": None, # Leave as None for k8s, specify for Docker "test_output_dir": os.getcwd() + "/benchmark_output", # The directory to store the test output - "load_shape": {"name": "constant", - "params": {"constant": {"concurrent_level": 4}, "poisson": {"arrival_rate": 1.0}}}, + "load_shape": { + "name": "constant", + "params": {"constant": {"concurrent_level": 4}, "poisson": {"arrival_rate": 1.0}}, + }, "concurrent_level": 4, "arrival_rate": 1.0, "query_timeout": 120, - "warm_ups": parsed_data['warmup_iterations'], - "seed": parsed_data['seed'], + "warm_ups": parsed_data["warmup_iterations"], + "seed": parsed_data["seed"], "namespace": namespace, "test_cases": parsed_data["test_cases"], - "llm_max_token_size": parsed_data['llm_max_token_size'] + "llm_max_token_size": parsed_data["llm_max_token_size"], } dataset = None query_data = None # Do benchmark in for-loop for different llm_max_token_size - for llm_max_token in parsed_data['llm_max_token_size']: + for llm_max_token in parsed_data["llm_max_token_size"]: print(f"[OPEA BENCHMARK] 🚀 Run benchmark on {dataset} with llm max-output-token {llm_max_token}.") case_data = {} # Support chatqna only for now @@ -325,13 +313,13 @@ def run_benchmark(benchmark_config, chart_name, namespace, llm_model=None, repor "chatqna-retriever-usvc", "chatqna-tei", "chatqna-teirerank", - "chatqna-tgi" + "chatqna-tgi", ], "test_cases": parsed_data["test_cases"], # Activate if random_prompt=true: leave blank = default dataset(WebQuestions) or sharegpt "prompts": query_data, "max_output": llm_max_token, # max number of output tokens - "k": 1 # number of retrieved documents + "k": 1, # number of retrieved documents } output_folder = _run_service_test(chart_name, case_data, test_suite_config) @@ -352,8 +340,4 @@ def run_benchmark(benchmark_config, chart_name, namespace, llm_model=None, repor if __name__ == "__main__": benchmark_config = load_yaml("./benchmark.yaml") - run_benchmark( - benchmark_config=benchmark_config, - chart_name='chatqna', - namespace='deploy-benchmark' - ) + run_benchmark(benchmark_config=benchmark_config, chart_name="chatqna", namespace="deploy-benchmark") diff --git a/deploy.py b/deploy.py index 488474149..966ac4a6c 100644 --- a/deploy.py +++ b/deploy.py @@ -8,17 +8,17 @@ import subprocess import sys import time +from enum import Enum, auto import yaml -from enum import Enum, auto - ################################################################################ # # # HELM VALUES GENERATION SECTION # # # ################################################################################ + def configure_node_selectors(values, node_selector, deploy_config): """Configure node selectors for all services.""" for service_name, config in deploy_config["services"].items(): @@ -31,6 +31,7 @@ def configure_node_selectors(values, node_selector, deploy_config): values[service_name] = {"nodeSelector": {key: value for key, value in node_selector.items()}} return values + def configure_replica(values, deploy_config): """Get replica configuration based on example type and node count.""" for service_name, config in deploy_config["services"].items(): @@ -47,6 +48,7 @@ def configure_replica(values, deploy_config): return values + def get_output_filename(num_nodes, with_rerank, example_type, device, action_type): """Generate output filename based on configuration.""" rerank_suffix = "with-rerank-" if with_rerank else "" @@ -54,6 +56,7 @@ def get_output_filename(num_nodes, with_rerank, example_type, device, action_typ return f"{example_type}-{num_nodes}-{device}-{action_suffix}{rerank_suffix}values.yaml" + def configure_resources(values, deploy_config): """Configure resources when tuning is enabled.""" resource_configs = [] @@ -87,15 +90,19 @@ def configure_resources(values, deploy_config): if resources: if service_name == "llm": engine = config.get("engine", "tgi") - resource_configs.append({ - "name": engine, - "resources": resources, - }) + resource_configs.append( + { + "name": engine, + "resources": resources, + } + ) else: - resource_configs.append({ - "name": service_name, - "resources": resources, - }) + resource_configs.append( + { + "name": service_name, + "resources": resources, + } + ) for config in [r for r in resource_configs if r]: service_name = config["name"] @@ -106,13 +113,19 @@ def configure_resources(values, deploy_config): return values + def configure_extra_cmd_args(values, deploy_config): """Configure extra command line arguments for services.""" for service_name, config in deploy_config["services"].items(): extra_cmd_args = [] - for param in ["max_batch_size", "max_input_length", "max_total_tokens", - "max_batch_total_tokens", "max_batch_prefill_tokens"]: + for param in [ + "max_batch_size", + "max_input_length", + "max_total_tokens", + "max_batch_total_tokens", + "max_batch_prefill_tokens", + ]: if config.get(param): extra_cmd_args.extend([f"--{param.replace('_', '-')}", str(config[param])]) @@ -129,6 +142,7 @@ def configure_extra_cmd_args(values, deploy_config): return values + def configure_models(values, deploy_config): """Configure model settings for services.""" for service_name, config in deploy_config["services"].items(): @@ -147,8 +161,9 @@ def configure_models(values, deploy_config): return values + def configure_rerank(values, with_rerank, deploy_config, example_type, node_selector): - """Configure rerank service""" + """Configure rerank service.""" if with_rerank: if "teirerank" not in values: values["teirerank"] = {"nodeSelector": {key: value for key, value in node_selector.items()}} @@ -163,6 +178,7 @@ def configure_rerank(values, with_rerank, deploy_config, example_type, node_sele values["teirerank"]["enabled"] = False return values + def generate_helm_values(example_type, deploy_config, chart_dir, action_type, node_selector=None): """Create a values.yaml file based on the provided configuration.""" if deploy_config is None: @@ -170,10 +186,7 @@ def generate_helm_values(example_type, deploy_config, chart_dir, action_type, no # Ensure the chart_dir exists if not os.path.exists(chart_dir): - return { - "status": "false", - "message": f"Chart directory {chart_dir} does not exist" - } + return {"status": "false", "message": f"Chart directory {chart_dir} does not exist"} num_nodes = deploy_config.get("node", 1) with_rerank = deploy_config["services"].get("teirerank", {}).get("enabled", False) @@ -214,12 +227,14 @@ def generate_helm_values(example_type, deploy_config, chart_dir, action_type, no print(f"YAML file {filepath} has been generated.") return {"status": "success", "filepath": filepath} + ################################################################################ # # # DEPLOYMENT SECTION # # # ################################################################################ + def run_kubectl_command(command): """Run a kubectl command and return the output.""" try: @@ -388,14 +403,14 @@ def uninstall_helm_release(release_name, namespace=None): def update_service(release_name, chart_name, namespace, hw_values_file, deploy_values_file, update_values_file): """Update the deployment using helm upgrade with new values. - + Args: release_name: The helm release name namespace: The kubernetes namespace deploy_config: The deployment configuration chart_name: The chart name for the deployment """ - + # Construct helm upgrade command command = [ "helm", @@ -409,17 +424,18 @@ def update_service(release_name, chart_name, namespace, hw_values_file, deploy_v "-f", deploy_values_file, "-f", - update_values_file + update_values_file, ] # Execute helm upgrade print(f"Running command: {' '.join(command)}") run_kubectl_command(command) - print(f"Deployment updated successfully") - + print("Deployment updated successfully") + # Cleanup temporary values file - if 'values_file' in locals() and os.path.exists(values_file): + if "values_file" in locals() and os.path.exists(values_file): os.remove(values_file) + def read_deploy_config(config_path): """Read and parse the deploy config file. @@ -430,7 +446,7 @@ def read_deploy_config(config_path): The parsed deploy config dictionary or None if failed """ try: - with open(config_path, 'r') as f: + with open(config_path, "r") as f: return yaml.safe_load(f) except Exception as e: print(f"Failed to load deploy config: {str(e)}") @@ -467,19 +483,16 @@ def check_deployment_ready(release_name, namespace, timeout=300, interval=5, log # Loop through each deployment to check its readiness for deployment_name in deployments: - if '-' not in deployment_name or 'ui' in deployment_name or 'nginx' in deployment_name: + if "-" not in deployment_name or "ui" in deployment_name or "nginx" in deployment_name: continue - instance_name = deployment_name.split('-', 1)[0] - app_name = deployment_name.split('-', 1)[1] + instance_name = deployment_name.split("-", 1)[0] + app_name = deployment_name.split("-", 1)[1] if instance_name != release_name: continue - cmd = [ - "kubectl", "-n", namespace, "get", "deployment", deployment_name, - "-o", "jsonpath={.spec.replicas}" - ] + cmd = ["kubectl", "-n", namespace, "get", "deployment", deployment_name, "-o", "jsonpath={.spec.replicas}"] desired_replicas = int(subprocess.check_output(cmd, text=True).strip()) with open(logfile, "a") as log: @@ -487,24 +500,37 @@ def check_deployment_ready(release_name, namespace, timeout=300, interval=5, log while True: cmd = [ - "kubectl", "-n", namespace, "get", "pods", - "-l", f"app.kubernetes.io/instance={instance_name}", - "-l", f"app.kubernetes.io/name={app_name}", - "--field-selector=status.phase=Running", "-o", "json" + "kubectl", + "-n", + namespace, + "get", + "pods", + "-l", + f"app.kubernetes.io/instance={instance_name}", + "-l", + f"app.kubernetes.io/name={app_name}", + "--field-selector=status.phase=Running", + "-o", + "json", ] pods_output = subprocess.check_output(cmd, text=True) pods = json.loads(pods_output) ready_pods = sum( - 1 for pod in pods["items"] if - all(container.get('ready') for container in pod.get('status', {}).get('containerStatuses', [])) + 1 + for pod in pods["items"] + if all(container.get("ready") for container in pod.get("status", {}).get("containerStatuses", [])) ) - terminating_pods = sum(1 for pod in pods["items"] if pod.get("metadata", {}).get("deletionTimestamp") is not None) + terminating_pods = sum( + 1 for pod in pods["items"] if pod.get("metadata", {}).get("deletionTimestamp") is not None + ) with open(logfile, "a") as log: - log.write(f"Ready pods: {ready_pods}, Desired replicas: {desired_replicas}, Terminating pods: {terminating_pods}\n") + log.write( + f"Ready pods: {ready_pods}, Desired replicas: {desired_replicas}, Terminating pods: {terminating_pods}\n" + ) if ready_pods == desired_replicas and terminating_pods == 0: with open(logfile, "a") as log: @@ -513,7 +539,9 @@ def check_deployment_ready(release_name, namespace, timeout=300, interval=5, log if timer >= timeout: with open(logfile, "a") as log: - log.write(f"Timeout reached for deployment '{deployment_name}'. Not all pods are running and ready.\n") + log.write( + f"Timeout reached for deployment '{deployment_name}'. Not all pods are running and ready.\n" + ) return 1 # Failure time.sleep(interval) @@ -609,7 +637,7 @@ def main(): example_type=args.chart_name, deploy_config=deploy_config, chart_dir=args.chart_dir, - action_type=action_type, # 0 - deploy, 1 - update + action_type=action_type, # 0 - deploy, 1 - update node_selector=node_selector, ) @@ -633,12 +661,7 @@ def main(): try: update_service( - args.chart_name, - args.chart_name, - args.namespace, - hw_values_file, - args.user_values, - values_file_path + args.chart_name, args.chart_name, args.namespace, hw_values_file, args.user_values, values_file_path ) return except Exception as e: @@ -647,13 +670,7 @@ def main(): # Deploy unless --create-values-only is specified if not args.create_values_only: - install_helm_release( - args.chart_name, - args.chart_name, - args.namespace, - hw_values_file, - values_file_path - ) + install_helm_release(args.chart_name, args.chart_name, args.namespace, hw_values_file, values_file_path) print(f"values_file_path: {values_file_path}") diff --git a/deploy_and_benchmark.py b/deploy_and_benchmark.py index a26ed0b68..1dc4c4308 100644 --- a/deploy_and_benchmark.py +++ b/deploy_and_benchmark.py @@ -1,69 +1,75 @@ -import yaml -import subprocess -import sys -import os -import copy +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + import argparse -import shutil +import copy +import os import re +import shutil +import subprocess +import sys + +import yaml from benchmark import run_benchmark + def read_yaml(file_path): try: - with open(file_path, 'r') as file: + with open(file_path, "r") as file: return yaml.safe_load(file) except Exception as e: print(f"Error reading YAML file: {e}") return None + def construct_deploy_config(deploy_config, target_node, max_batch_size=None): - """ - Construct a new deploy config based on the target node number and optional max_batch_size. - + """Construct a new deploy config based on the target node number and optional max_batch_size. + Args: deploy_config: Original deploy config dictionary target_node: Target node number to match in the node array max_batch_size: Optional specific max_batch_size value to use - + Returns: A new deploy config with single values for node and instance_num """ # Deep copy the original config to avoid modifying it new_config = copy.deepcopy(deploy_config) - + # Get the node array and validate - nodes = deploy_config.get('node') + nodes = deploy_config.get("node") if not isinstance(nodes, list): raise ValueError("deploy_config['node'] must be an array") - + # Find the index of the target node try: node_index = nodes.index(target_node) except ValueError: raise ValueError(f"Target node {target_node} not found in node array {nodes}") - + # Set the single node value - new_config['node'] = target_node - + new_config["node"] = target_node + # Update instance_num for each service based on the same index - for service_name, service_config in new_config.get('services', {}).items(): - if 'replicaCount' in service_config: - instance_nums = service_config['replicaCount'] + for service_name, service_config in new_config.get("services", {}).items(): + if "replicaCount" in service_config: + instance_nums = service_config["replicaCount"] if isinstance(instance_nums, list): if len(instance_nums) != len(nodes): raise ValueError( f"instance_num array length ({len(instance_nums)}) for service {service_name} " f"doesn't match node array length ({len(nodes)})" ) - service_config['replicaCount'] = instance_nums[node_index] - + service_config["replicaCount"] = instance_nums[node_index] + # Update max_batch_size if specified - if max_batch_size is not None and 'llm' in new_config['services']: - new_config['services']['llm']['max_batch_size'] = max_batch_size - + if max_batch_size is not None and "llm" in new_config["services"]: + new_config["services"]["llm"]["max_batch_size"] = max_batch_size + return new_config + def pull_helm_chart(chart_pull_url, version, chart_name): # Pull and untar the chart subprocess.run(["helm", "pull", chart_pull_url, "--version", version, "--untar"], check=True) @@ -77,9 +83,9 @@ def pull_helm_chart(chart_pull_url, version, chart_name): return untar_dir + def main(yaml_file, target_node=None): - """ - Main function to process deployment configuration. + """Main function to process deployment configuration. Args: yaml_file: Path to the YAML configuration file @@ -90,23 +96,23 @@ def main(yaml_file, target_node=None): print("Failed to read YAML file.") return None - deploy_config = config['deploy'] - benchmark_config = config['benchmark'] + deploy_config = config["deploy"] + benchmark_config = config["benchmark"] # Extract chart name from the YAML file name - chart_name = os.path.splitext(os.path.basename(yaml_file))[0].split('_')[-1] + chart_name = os.path.splitext(os.path.basename(yaml_file))[0].split("_")[-1] print(f"chart_name: {chart_name}") python_cmd = sys.executable # Process nodes - nodes = deploy_config.get('node', []) + nodes = deploy_config.get("node", []) if not isinstance(nodes, list): print("Error: deploy_config['node'] must be an array") return None nodes_to_process = [target_node] if target_node is not None else nodes - node_names = deploy_config.get('node_name', []) - namespace = deploy_config.get('namespace', "default") + node_names = deploy_config.get("node_name", []) + namespace = deploy_config.get("namespace", "default") # Pull the Helm chart chart_pull_url = f"oci://ghcr.io/opea-project/charts/{chart_name}" @@ -124,17 +130,9 @@ def main(yaml_file, target_node=None): # Add labels for current node configuration print(f"Adding labels for {node} nodes...") - cmd = [ - python_cmd, - 'deploy.py', - '--chart-name', - chart_name, - '--num-nodes', - str(node), - '--add-label' - ] + cmd = [python_cmd, "deploy.py", "--chart-name", chart_name, "--num-nodes", str(node), "--add-label"] if current_node_names: - cmd.extend(['--node-names'] + current_node_names) + cmd.extend(["--node-names"] + current_node_names) result = subprocess.run(cmd, check=True) if result.returncode != 0: @@ -143,11 +141,11 @@ def main(yaml_file, target_node=None): try: # Process max_batch_sizes - max_batch_sizes = deploy_config.get('services', {}).get('llm', {}).get('max_batch_size', []) + max_batch_sizes = deploy_config.get("services", {}).get("llm", {}).get("max_batch_size", []) if not isinstance(max_batch_sizes, list): max_batch_sizes = [max_batch_sizes] - values_file_path= None + values_file_path = None for i, max_batch_size in enumerate(max_batch_sizes): print(f"\nProcessing max_batch_size: {max_batch_size}") @@ -157,22 +155,22 @@ def main(yaml_file, target_node=None): # Write the new deploy config to a temporary file temp_config_file = f"temp_deploy_config_{node}_{max_batch_size}.yaml" try: - with open(temp_config_file, 'w') as f: + with open(temp_config_file, "w") as f: yaml.dump(new_deploy_config, f) if i == 0: # First iteration: full deployment cmd = [ python_cmd, - 'deploy.py', - '--deploy-config', + "deploy.py", + "--deploy-config", temp_config_file, - '--chart-name', + "--chart-name", chart_name, - '--namespace', + "--namespace", namespace, - '--chart-dir', - chart_dir + "--chart-dir", + chart_dir, ] result = subprocess.run(cmd, check=True, capture_output=True, text=True) @@ -187,52 +185,55 @@ def main(yaml_file, target_node=None): # Subsequent iterations: update services with config change cmd = [ python_cmd, - 'deploy.py', - '--deploy-config', + "deploy.py", + "--deploy-config", temp_config_file, - '--chart-name', + "--chart-name", chart_name, - '--namespace', + "--namespace", namespace, - '--chart-dir', + "--chart-dir", chart_dir, - '--user-values', + "--user-values", values_file_path, - '--update-service' + "--update-service", ] result = subprocess.run(cmd, check=True) if result.returncode != 0: - print(f"Update failed for {node} nodes configuration with max_batch_size {max_batch_size}") + print( + f"Update failed for {node} nodes configuration with max_batch_size {max_batch_size}" + ) break # Skip remaining max_batch_sizes for this node # Wait for deployment to be ready print("\nWaiting for deployment to be ready...") cmd = [ python_cmd, - 'deploy.py', - '--chart-name', + "deploy.py", + "--chart-name", chart_name, - '--namespace', + "--namespace", namespace, - '--check-ready' + "--check-ready", ] try: result = subprocess.run(cmd, check=True) - print(f"Deployments are ready!") + print("Deployments are ready!") except subprocess.CalledProcessError as e: - print(f"Depoyments status failed with returncode: {e.returncode}") + print(f"Deployments status failed with returncode: {e.returncode}") # Run benchmark run_benchmark( benchmark_config=benchmark_config, chart_name=chart_name, namespace=namespace, - llm_model=deploy_config.get('services', {}).get('llm', {}).get('model_id', "") + llm_model=deploy_config.get("services", {}).get("llm", {}).get("model_id", ""), ) - except Exception as e: - print(f"Error during {'deployment' if i == 0 else 'update'} for {node} nodes with max_batch_size {max_batch_size}: {str(e)}") + print( + f"Error during {'deployment' if i == 0 else 'update'} for {node} nodes with max_batch_size {max_batch_size}: {str(e)}" + ) break # Skip remaining max_batch_sizes for this node finally: # Clean up the temporary file @@ -244,12 +245,12 @@ def main(yaml_file, target_node=None): print(f"\nUninstalling deployment for {node} nodes...") cmd = [ python_cmd, - 'deploy.py', - '--chart-name', + "deploy.py", + "--chart-name", chart_name, - '--namespace', + "--namespace", namespace, - '--uninstall', + "--uninstall", ] try: result = subprocess.run(cmd, check=True) @@ -260,17 +261,9 @@ def main(yaml_file, target_node=None): # Delete labels for current node configuration print(f"Deleting labels for {node} nodes...") - cmd = [ - python_cmd, - 'deploy.py', - '--chart-name', - chart_name, - '--num-nodes', - str(node), - '--delete-label' - ] + cmd = [python_cmd, "deploy.py", "--chart-name", chart_name, "--num-nodes", str(node), "--delete-label"] if current_node_names: - cmd.extend(['--node-names'] + current_node_names) + cmd.extend(["--node-names"] + current_node_names) try: result = subprocess.run(cmd, check=True) @@ -289,6 +282,7 @@ def main(yaml_file, target_node=None): shutil.rmtree(chart_dir) print("Temporary directory removed successfully.") + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Deploy and benchmark with specific node configuration.") parser.add_argument("yaml_file", help="Path to the YAML configuration file")