Skip to content

Commit

Permalink
Merge branch 'main' into pvijayakrish-main
Browse files Browse the repository at this point in the history
  • Loading branch information
pvijayakrish authored Nov 1, 2024
2 parents 1af618a + b9738bb commit df6e581
Show file tree
Hide file tree
Showing 23 changed files with 848 additions and 444 deletions.
75 changes: 72 additions & 3 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
"2024.4.0", # Standalone OpenVINO
"3.2.6", # DCGM version
"0.5.5", # vLLM version
"3.12.3", # RHEL Python version
)
}

Expand Down Expand Up @@ -950,7 +951,6 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
libb64-devel \\
gperftools-devel \\
patchelf \\
python3.11-devel \\
python3-pip \\
python3-setuptools \\
rapidjson-devel \\
Expand All @@ -963,6 +963,10 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
libxml2-devel \\
numactl-devel \\
wget
"""
# Requires openssl-devel to be installed first for pyenv build to be successful
df += change_default_python_version_rhel(TRITON_VERSION_MAP[FLAGS.version][7])
df += """
RUN pip3 install --upgrade pip \\
&& pip3 install --upgrade \\
Expand Down Expand Up @@ -1389,7 +1393,29 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach

# Add dependencies needed for python backend
if "python" in backends:
df += """
if target_platform() == "rhel":
df += """
# python3, python3-pip and some pip installs required for the python backend
RUN yum install -y \\
libarchive-devel \\
python3-pip \\
openssl-devel \\
readline-devel
"""
# Requires openssl-devel to be installed first for pyenv build to be successful
df += change_default_python_version_rhel(
TRITON_VERSION_MAP[FLAGS.version][7]
)
df += """
RUN pip3 install --upgrade pip \\
&& pip3 install --upgrade \\
wheel \\
setuptools \\
\"numpy<2\" \\
virtualenv
"""
else:
df += """
# python3, python3-pip and some pip installs required for the python backend
RUN apt-get update \\
&& apt-get install -y --no-install-recommends \\
Expand Down Expand Up @@ -1514,6 +1540,34 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
return df


def change_default_python_version_rhel(version):
df = """
# The python library version available for install via 'yum install python3.X-devel' does not
# match the version of python inside the RHEL base container. This means that python packages
# installed within the container will not be picked up by the python backend stub process pybind
# bindings. It must instead must be installed via pyenv.
ENV PYENV_ROOT=/opt/pyenv_build
RUN curl https://pyenv.run | bash
ENV PATH="${{PYENV_ROOT}}/bin:$PATH"
RUN eval "$(pyenv init -)"
RUN CONFIGURE_OPTS=\"--with-openssl=/usr/lib64\" && pyenv install {} \\
&& cp ${{PYENV_ROOT}}/versions/{}/lib/libpython3* /usr/lib64/""".format(
version, version
)
df += """
# RHEL image has several python versions. It's important
# to set the correct version, otherwise, packages that are
# pip installed will not be found during testing.
ENV PYVER={} PYTHONPATH=/opt/python/v
RUN ln -sf ${{PYENV_ROOT}}/versions/${{PYVER}}* ${{PYTHONPATH}}
ENV PYBIN=${{PYTHONPATH}}/bin
ENV PYTHON_BIN_PATH=${{PYBIN}}/python${{PYVER}} PATH=${{PYBIN}}:${{PATH}}
""".format(
version
)
return df


def create_dockerfile_windows(
ddir, dockerfile_name, argmap, backends, repoagents, caches
):
Expand Down Expand Up @@ -1679,7 +1733,9 @@ def create_docker_build_script(script_name, container_install_dir, container_ci_
if FLAGS.container_memory:
baseargs += ["--memory", FLAGS.container_memory]

baseargs += ["--cache-from={}".format(k) for k in cachefrommap]
if target_platform() != "windows":
baseargs += ["--cache-from={}".format(k) for k in cachefrommap]

baseargs += ["."]

docker_script.cwd(THIS_SCRIPT_DIR)
Expand Down Expand Up @@ -1956,6 +2012,19 @@ def backend_build(
cmake_script.mkdir(os.path.join(install_dir, "backends"))
cmake_script.rmdir(os.path.join(install_dir, "backends", be))

# The python library version available for install via 'yum install python3.X-devel' does not
# match the version of python inside the RHEL base container. This means that python packages
# installed within the container will not be picked up by the python backend stub process pybind
# bindings. It must instead must be installed via pyenv. We package it here for better usability.
if target_platform() == "rhel" and be == "python":
major_minor_version = ".".join(
(TRITON_VERSION_MAP[FLAGS.version][7]).split(".")[:2]
)
version_matched_files = "/usr/lib64/libpython" + major_minor_version + "*"
cmake_script.cp(
version_matched_files, os.path.join(repo_install_dir, "backends", be)
)

cmake_script.cpdir(
os.path.join(repo_install_dir, "backends", be),
os.path.join(install_dir, "backends"),
Expand Down
8 changes: 6 additions & 2 deletions docs/customization_guide/tritonfrontend.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,18 @@ Note: `model_path` may need to be edited depending on your setup.

2. Now, to start up the respective services with `tritonfrontend`
```python
from tritonfrontend import KServeHttp, KServeGrpc
from tritonfrontend import KServeHttp, KServeGrpc, Metrics
http_options = KServeHttp.Options(thread_count=5)
http_service = KServeHttp(server, http_options)
http_service.start()

# Default options (if none provided)
grpc_service = KServeGrpc(server)
grpc_service.start()

# Can start metrics service as well
metrics_service = Metrics(server)
metrics_service.start()
```

3. Finally, with running services, we can use `tritonclient` or simple `curl` commands to send requests and receive responses from the frontends.
Expand Down Expand Up @@ -97,6 +101,7 @@ print("[INFERENCE RESULTS]")
print("Output data:", output_data)

# Stop respective services and server.
metrics_service.stop()
http_service.stop()
grpc_service.stop()
server.stop()
Expand Down Expand Up @@ -139,7 +144,6 @@ With this workflow, you can avoid having to stop each service after client reque
- The following features are not currently supported when launching the Triton frontend services through the python bindings:
- [Tracing](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/trace.md)
- [Shared Memory](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_shared_memory.md)
- [Metrics](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/metrics.md)
- [Restricted Protocols](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#limit-endpoint-access-beta)
- VertexAI
- Sagemaker
Expand Down
90 changes: 88 additions & 2 deletions qa/L0_python_api/test_kserve.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
import tritonclient.http as httpclient
import tritonserver
from tritonclient.utils import InferenceServerException
from tritonfrontend import KServeGrpc, KServeHttp
from tritonfrontend import KServeGrpc, KServeHttp, Metrics


class TestHttpOptions:
Expand All @@ -48,7 +48,7 @@ def test_wrong_http_parameters(self):
with pytest.raises(Exception):
KServeHttp.Options(port=-15)
with pytest.raises(Exception):
KServeHttp.Options(thread_count=-5)
KServeHttp.Options(thread_count=0)

# Wrong data type
with pytest.raises(Exception):
Expand All @@ -70,6 +70,20 @@ def test_wrong_grpc_parameters(self):
KServeGrpc.Options(port=-5)
with pytest.raises(Exception):
KServeGrpc.Options(keepalive_timeout_ms=-20_000)
with pytest.raises(Exception):
KServeGrpc.Options(keepalive_time_ms=-1)
with pytest.raises(Exception):
KServeGrpc.Options(keepalive_timeout_ms=-1)
with pytest.raises(Exception):
KServeGrpc.Options(http2_max_pings_without_data=-1)
with pytest.raises(Exception):
KServeGrpc.Options(http2_min_recv_ping_interval_without_data_ms=-1)
with pytest.raises(Exception):
KServeGrpc.Options(http2_max_ping_strikes=-1)
with pytest.raises(Exception):
KServeGrpc.Options(max_connection_age_ms=-1)
with pytest.raises(Exception):
KServeGrpc.Options(max_connection_age_grace_ms=-1)

# Wrong data type
with pytest.raises(Exception):
Expand All @@ -78,8 +92,25 @@ def test_wrong_grpc_parameters(self):
KServeGrpc.Options(server_key=10)


class TestMetricsOptions:
def test_correct_http_parameters(self):
Metrics.Options(address="0.0.0.1", port=8080, thread_count=16)

def test_wrong_http_parameters(self):
# Out of range
with pytest.raises(Exception):
Metrics.Options(port=-15)
with pytest.raises(Exception):
Metrics.Options(thread_count=0)

# Wrong data type
with pytest.raises(Exception):
Metrics.Options(thread_count="ten")


HTTP_ARGS = (KServeHttp, httpclient, "localhost:8000") # Default HTTP args
GRPC_ARGS = (KServeGrpc, grpcclient, "localhost:8001") # Default GRPC args
METRICS_ARGS = (Metrics, "localhost:8002") # Default Metrics args


class TestKServe:
Expand Down Expand Up @@ -271,6 +302,61 @@ def callback(user_data, result, error):
utils.teardown_client(grpc_client)
utils.teardown_server(server)

@pytest.mark.parametrize("frontend, url", [METRICS_ARGS])
def test_metrics_default_port(self, frontend, url):
server = utils.setup_server()
service = utils.setup_service(server, frontend)

metrics_url = f"http://{url}/metrics"
status_code, _ = utils.get_metrics(metrics_url)

assert status_code == 200

utils.teardown_service(service)
utils.teardown_server(server)

@pytest.mark.parametrize("frontend", [Metrics])
def test_metrics_custom_port(self, frontend, port=8005):
server = utils.setup_server()
service = utils.setup_service(server, frontend, Metrics.Options(port=port))

metrics_url = f"http://localhost:{port}/metrics"
status_code, _ = utils.get_metrics(metrics_url)

assert status_code == 200

utils.teardown_service(service)
utils.teardown_server(server)

@pytest.mark.parametrize("frontend, url", [METRICS_ARGS])
def test_metrics_update(self, frontend, url):
# Setup Server, KServeGrpc, Metrics
server = utils.setup_server()
grpc_service = utils.setup_service(
server, KServeGrpc
) # Needed to send inference request
metrics_service = utils.setup_service(server, frontend)

# Get Metrics and verify inference count == 0 before inference
before_status_code, before_inference_count = utils.get_metrics(
f"http://{url}/metrics"
)
assert before_status_code == 200 and before_inference_count == 0

# Send 1 Inference Request with send_and_test_inference()
assert utils.send_and_test_inference_identity(GRPC_ARGS[1], GRPC_ARGS[2])

# Get Metrics and verify inference count == 1 after inference
after_status_code, after_inference_count = utils.get_metrics(
f"http://{url}/metrics"
)
assert after_status_code == 200 and after_inference_count == 1

# Teardown Metrics, GrpcService, Server
utils.teardown_service(grpc_service)
utils.teardown_service(metrics_service)
utils.teardown_server(server)

# KNOWN ISSUE: CAUSES SEGFAULT
# Created [DLIS-7231] to address at future date
# Once the server has been stopped, the underlying TRITONSERVER_Server instance
Expand Down
8 changes: 7 additions & 1 deletion qa/L0_python_api/test_model_repository/identity/config.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,10 @@ output [
data_type: TYPE_STRING
dims: [ 1 ]
}
]
]
instance_group [
{
count: 1
kind : KIND_CPU
}
]
Loading

0 comments on commit df6e581

Please sign in to comment.