Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

APPENG-2521: Implement the NIM service deployment page and connect it with the APIs #3

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 36 additions & 2 deletions frontend/src/api/k8s/servingRuntimes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ import {
ServingRuntimeAnnotations,
ServingRuntimeKind,
} from '~/k8sTypes';
import { CreatingServingRuntimeObject } from '~/pages/modelServing/screens/types';
import {
CreatingServingRuntimeObject,
SupportedModelFormatsInfo,
} from '~/pages/modelServing/screens/types';
import { ContainerResources } from '~/types';
import { getModelServingRuntimeName } from '~/pages/modelServing/utils';
import { getDisplayNameFromK8sResource, translateDisplayNameForK8s } from '~/concepts/k8s/utils';
Expand All @@ -31,7 +34,15 @@ export const assembleServingRuntime = (
acceleratorProfileState?: AcceleratorProfileState,
isModelMesh?: boolean,
): ServingRuntimeKind => {
const { name: displayName, numReplicas, modelSize, externalRoute, tokenAuth } = data;
const {
name: displayName,
numReplicas,
modelSize,
externalRoute,
tokenAuth,
imageName,
supportedModelFormatsInfo,
} = data;
const createName = isCustomServingRuntimesEnabled
? translateDisplayNameForK8s(displayName)
: getModelServingRuntimeName(namespace);
Expand Down Expand Up @@ -121,6 +132,11 @@ export const assembleServingRuntime = (
volumeMounts.push(getshmVolumeMount());
}

if (imageName) {
const containerObj = container;
containerObj.image = imageName;
}

const containerWithoutResources = _.omit(container, 'resources');

return {
Expand All @@ -132,6 +148,24 @@ export const assembleServingRuntime = (
},
);

if (supportedModelFormatsInfo) {
const supportedModelFormatsObj: SupportedModelFormatsInfo = {
name: supportedModelFormatsInfo.name,
version: supportedModelFormatsInfo.version,
autoSelect: true,
priority: 1,
};

if (
updatedServingRuntime.spec.supportedModelFormats &&
updatedServingRuntime.spec.supportedModelFormats.length >= 1
) {
updatedServingRuntime.spec.supportedModelFormats[0] = supportedModelFormatsObj;
} else {
updatedServingRuntime.spec.supportedModelFormats?.push(supportedModelFormatsObj);
}
}

if (isModelMesh) {
updatedServingRuntime.spec.tolerations = tolerations;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ const EmptyNIMModelServingCard: React.FC = () => {
border: '1px solid var(--pf-v5-global--BorderColor--100)',
borderRadius: 16,
}}
data-testid="single-serving-platform-card"
data-testid="nvidia-nim-model-serving-platform-card"
>
<CardTitle>
<TextContent>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import * as React from 'react';
import {
Alert,
AlertActionCloseButton,
Form,
FormSection,
Modal,
Stack,
StackItem,
Expand All @@ -25,25 +26,16 @@ import useCustomServingRuntimesEnabled from '~/pages/modelServing/customServingR
import { getServingRuntimeFromName } from '~/pages/modelServing/customServingRuntimes/utils';
import useServingAcceleratorProfile from '~/pages/modelServing/screens/projects/useServingAcceleratorProfile';
import DashboardModalFooter from '~/concepts/dashboard/DashboardModalFooter';
import {
InferenceServiceStorageType,
ServingRuntimeEditInfo,
} from '~/pages/modelServing/screens/types';
import { ServingRuntimeEditInfo } from '~/pages/modelServing/screens/types';
import ServingRuntimeSizeSection from '~/pages/modelServing/screens/projects/ServingRuntimeModal/ServingRuntimeSizeSection';
import NIMModelListSection from '~/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection';
import NIMModelDeploymentNameSection from '~/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection';
import ProjectSection from '~/pages/modelServing/screens/projects/InferenceServiceModal/ProjectSection';
import { DataConnection, NamespaceApplicationCase } from '~/pages/projects/types';
import { AwsKeys } from '~/pages/projects/dataConnections/const';
import { isAWSValid } from '~/pages/projects/screens/spawner/spawnerUtils';
import DataConnectionSection from '~/pages/modelServing/screens/projects/InferenceServiceModal/DataConnectionSection';
import { getProjectDisplayName } from '~/concepts/projects/utils';
import { translateDisplayNameForK8s } from '~/concepts/k8s/utils';
import { containsOnlySlashes, isS3PathValid } from '~/utilities/string';
import AuthServingRuntimeSection from '~/pages/modelServing/screens/projects/ServingRuntimeModal/AuthServingRuntimeSection';
import { getDisplayNameFromK8sResource, translateDisplayNameForK8s } from '~/concepts/k8s/utils';
import { useAccessReview } from '~/api';
import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas';
import KServeAutoscalerReplicaSection from './KServeAutoscalerReplicaSection';
import KServeAutoscalerReplicaSection from '~/pages/modelServing/screens/projects/kServeModal/KServeAutoscalerReplicaSection';

const accessReviewResource: AccessReviewResourceAttributes = {
group: 'rbac.authorization.k8s.io',
Expand Down Expand Up @@ -115,27 +107,25 @@ const DeployNIMServiceModal: React.FC<DeployNIMServiceModalProps> = ({
}, [currentProjectName, setCreateDataInferenceService, isOpen]);

// Serving Runtime Validation
const isDisabledServingRuntime = namespace === '' || actionInProgress;

const baseInputValueValid =
createDataServingRuntime.numReplicas >= 0 &&
resourcesArePositive(createDataServingRuntime.modelSize.resources) &&
requestsUnderLimits(createDataServingRuntime.modelSize.resources);

const isDisabledServingRuntime = namespace === '' || actionInProgress || !baseInputValueValid;

const isDisabledInferenceService =
actionInProgress ||
createDataInferenceService.name.trim() === '' ||
createDataInferenceService.project === '' ||
!isInferenceServiceNameWithinLimit
!isInferenceServiceNameWithinLimit ||
!baseInputValueValid;

const servingRuntimeSelected = React.useMemo(
() =>
editInfo?.servingRuntimeEditInfo?.servingRuntime ||
getServingRuntimeFromName(
createDataServingRuntime.servingRuntimeTemplateName,
servingRuntimeTemplates,
),
[editInfo, servingRuntimeTemplates, createDataServingRuntime.servingRuntimeTemplateName],
getServingRuntimeFromName('nvidia-runtime-gpu', servingRuntimeTemplates),
[editInfo, servingRuntimeTemplates],
);

const onBeforeClose = (submitted: boolean) => {
Expand All @@ -161,48 +151,49 @@ const DeployNIMServiceModal: React.FC<DeployNIMServiceModalProps> = ({
const submit = () => {
setError(undefined);
setActionInProgress(true);
onSuccess();

// const servingRuntimeName = 'nim';
const servingRuntimeName =
editInfo?.inferenceServiceEditInfo?.spec.predictor.model?.runtime ||
translateDisplayNameForK8s(createDataInferenceService.name);

// const submitServingRuntimeResources = getSubmitServingRuntimeResourcesFn(
// servingRuntimeSelected,
// createDataServingRuntime,
// customServingRuntimesEnabled,
// namespace,
// editInfo?.servingRuntimeEditInfo,
// false,
// acceleratorProfileState,
// NamespaceApplicationCase.KSERVE_PROMOTION,
// projectContext?.currentProject,
// servingRuntimeName,
// false,
// );
const submitServingRuntimeResources = getSubmitServingRuntimeResourcesFn(
servingRuntimeSelected,
createDataServingRuntime,
customServingRuntimesEnabled,
namespace,
editInfo?.servingRuntimeEditInfo,
false,
acceleratorProfileState,
NamespaceApplicationCase.KSERVE_PROMOTION,
projectContext?.currentProject,
servingRuntimeName,
true,
);

// const submitInferenceServiceResource = getSubmitInferenceServiceResourceFn(
// createDataInferenceService,
// editInfo?.inferenceServiceEditInfo,
// servingRuntimeName,
// false,
// acceleratorProfileState,
// allowCreate,
// editInfo?.secrets,
// );
const submitInferenceServiceResource = getSubmitInferenceServiceResourceFn(
createDataInferenceService,
editInfo?.inferenceServiceEditInfo,
servingRuntimeName,
false,
acceleratorProfileState,
allowCreate,
editInfo?.secrets,
);

// Promise.all([
// submitServingRuntimeResources({ dryRun: true }),
// submitInferenceServiceResource({ dryRun: true }),
// ])
// .then(() =>
// Promise.all([
// submitServingRuntimeResources({ dryRun: false }),
// submitInferenceServiceResource({ dryRun: false }),
// ]),
// )
// .then(() => onSuccess())
// .catch((e) => {
// setErrorModal(e);
// });
Promise.all([
submitServingRuntimeResources({ dryRun: true }),
submitInferenceServiceResource({ dryRun: true }),
])
.then(() =>
Promise.all([
submitServingRuntimeResources({ dryRun: false }),
submitInferenceServiceResource({ dryRun: false }),
]),
)
.then(() => onSuccess())
.catch((e) => {
setErrorModal(e);
});
};

return (
Expand Down Expand Up @@ -231,11 +222,32 @@ const DeployNIMServiceModal: React.FC<DeployNIMServiceModalProps> = ({
}}
>
<Stack hasGutter>
{!isAuthorinoEnabled && alertVisible && (
<StackItem>
<Alert
id="no-authorino-installed-alert"
data-testid="no-authorino-installed-alert"
isExpandable
isInline
variant="warning"
title="Token authentication service not installed"
actionClose={<AlertActionCloseButton onClose={() => setAlertVisible(false)} />}
>
<p>
The NVIDIA NIM model serving platform used by this project allows deployed models
to be accessible via external routes. It is recommended that token authentication
be enabled to protect these routes. The serving platform requires the Authorino
operator be installed on the cluster for token authentication. Contact a cluster
administrator to install the operator.
</p>
</Alert>
</StackItem>
)}
<StackItem>
<ProjectSection
projectName={
(projectContext?.currentProject &&
getProjectDisplayName(projectContext.currentProject)) ||
getDisplayNameFromK8sResource(projectContext.currentProject)) ||
editInfo?.inferenceServiceEditInfo?.metadata.namespace ||
''
}
Expand All @@ -250,8 +262,9 @@ const DeployNIMServiceModal: React.FC<DeployNIMServiceModalProps> = ({
<StackItem>
<StackItem>
<NIMModelListSection
data={createDataInferenceService}
setData={setCreateDataInferenceService}
inferenceServiceData={createDataInferenceService}
setInferenceServiceData={setCreateDataInferenceService}
setServingRuntimeData={setCreateDataServingRuntime}
isEditing={!!editInfo}
/>
</StackItem>
Expand All @@ -266,8 +279,8 @@ const DeployNIMServiceModal: React.FC<DeployNIMServiceModalProps> = ({
</StackItem>
<StackItem>
<ServingRuntimeSizeSection
data={createDataServingRuntime}
setData={setCreateDataServingRuntime}
data={createDataInferenceService}
setData={setCreateDataInferenceService}
sizes={sizes}
servingRuntimeSelected={servingRuntimeSelected}
acceleratorProfileState={acceleratorProfileState}
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ type NIMModelDeploymentNameSectionProps = {
setData: UpdateObjectAtPropAndValue<CreatingInferenceServiceObject>;
};

const NIMModelDeploymentNameSection: React.FC<NIMModelDeploymentNameSectionProps> = ({ data, setData }) => (
const NIMModelDeploymentNameSection: React.FC<NIMModelDeploymentNameSectionProps> = ({
data,
setData,
}) => (
<FormGroup label="Model deployment name" fieldId="model-deployment-name-section" isRequired>
<TextInput
isRequired
Expand Down
Loading
Loading