Skip to content

Commit

Permalink
Merge pull request #3 from RHEcosystemAppEng/APPENG-2521-Implement-th…
Browse files Browse the repository at this point in the history
…e-NIM-service-deployment-page-and-connect-it-with-the-APIs

APPENG-2521: Implement the NIM service deployment page and connect it with the APIs
  • Loading branch information
yzhao583 authored Jul 29, 2024
2 parents ec1a54f + 8383986 commit 0660dde
Show file tree
Hide file tree
Showing 12 changed files with 218 additions and 216 deletions.
38 changes: 36 additions & 2 deletions frontend/src/api/k8s/servingRuntimes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ import {
ServingRuntimeAnnotations,
ServingRuntimeKind,
} from '~/k8sTypes';
import { CreatingServingRuntimeObject } from '~/pages/modelServing/screens/types';
import {
CreatingServingRuntimeObject,
SupportedModelFormatsInfo,
} from '~/pages/modelServing/screens/types';
import { ContainerResources } from '~/types';
import { getModelServingRuntimeName } from '~/pages/modelServing/utils';
import { getDisplayNameFromK8sResource, translateDisplayNameForK8s } from '~/concepts/k8s/utils';
Expand All @@ -31,7 +34,15 @@ export const assembleServingRuntime = (
acceleratorProfileState?: AcceleratorProfileState,
isModelMesh?: boolean,
): ServingRuntimeKind => {
const { name: displayName, numReplicas, modelSize, externalRoute, tokenAuth } = data;
const {
name: displayName,
numReplicas,
modelSize,
externalRoute,
tokenAuth,
imageName,
supportedModelFormatsInfo,
} = data;
const createName = isCustomServingRuntimesEnabled
? translateDisplayNameForK8s(displayName)
: getModelServingRuntimeName(namespace);
Expand Down Expand Up @@ -121,6 +132,11 @@ export const assembleServingRuntime = (
volumeMounts.push(getshmVolumeMount());
}

if (imageName) {
const containerObj = container;
containerObj.image = imageName;
}

const containerWithoutResources = _.omit(container, 'resources');

return {
Expand All @@ -132,6 +148,24 @@ export const assembleServingRuntime = (
},
);

if (supportedModelFormatsInfo) {
const supportedModelFormatsObj: SupportedModelFormatsInfo = {
name: supportedModelFormatsInfo.name,
version: supportedModelFormatsInfo.version,
autoSelect: true,
priority: 1,
};

if (
updatedServingRuntime.spec.supportedModelFormats &&
updatedServingRuntime.spec.supportedModelFormats.length >= 1
) {
updatedServingRuntime.spec.supportedModelFormats[0] = supportedModelFormatsObj;
} else {
updatedServingRuntime.spec.supportedModelFormats?.push(supportedModelFormatsObj);
}
}

if (isModelMesh) {
updatedServingRuntime.spec.tolerations = tolerations;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ const EmptyNIMModelServingCard: React.FC = () => {
border: '1px solid var(--pf-v5-global--BorderColor--100)',
borderRadius: 16,
}}
data-testid="single-serving-platform-card"
data-testid="nvidia-nim-model-serving-platform-card"
>
<CardTitle>
<TextContent>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import * as React from 'react';
import {
Alert,
AlertActionCloseButton,
Form,
FormSection,
Modal,
Stack,
StackItem,
Expand All @@ -25,25 +26,16 @@ import useCustomServingRuntimesEnabled from '~/pages/modelServing/customServingR
import { getServingRuntimeFromName } from '~/pages/modelServing/customServingRuntimes/utils';
import useServingAcceleratorProfile from '~/pages/modelServing/screens/projects/useServingAcceleratorProfile';
import DashboardModalFooter from '~/concepts/dashboard/DashboardModalFooter';
import {
InferenceServiceStorageType,
ServingRuntimeEditInfo,
} from '~/pages/modelServing/screens/types';
import { ServingRuntimeEditInfo } from '~/pages/modelServing/screens/types';
import ServingRuntimeSizeSection from '~/pages/modelServing/screens/projects/ServingRuntimeModal/ServingRuntimeSizeSection';
import NIMModelListSection from '~/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection';
import NIMModelDeploymentNameSection from '~/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection';
import ProjectSection from '~/pages/modelServing/screens/projects/InferenceServiceModal/ProjectSection';
import { DataConnection, NamespaceApplicationCase } from '~/pages/projects/types';
import { AwsKeys } from '~/pages/projects/dataConnections/const';
import { isAWSValid } from '~/pages/projects/screens/spawner/spawnerUtils';
import DataConnectionSection from '~/pages/modelServing/screens/projects/InferenceServiceModal/DataConnectionSection';
import { getProjectDisplayName } from '~/concepts/projects/utils';
import { translateDisplayNameForK8s } from '~/concepts/k8s/utils';
import { containsOnlySlashes, isS3PathValid } from '~/utilities/string';
import AuthServingRuntimeSection from '~/pages/modelServing/screens/projects/ServingRuntimeModal/AuthServingRuntimeSection';
import { getDisplayNameFromK8sResource, translateDisplayNameForK8s } from '~/concepts/k8s/utils';
import { useAccessReview } from '~/api';
import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas';
import KServeAutoscalerReplicaSection from './KServeAutoscalerReplicaSection';
import KServeAutoscalerReplicaSection from '~/pages/modelServing/screens/projects/kServeModal/KServeAutoscalerReplicaSection';

const accessReviewResource: AccessReviewResourceAttributes = {
group: 'rbac.authorization.k8s.io',
Expand Down Expand Up @@ -115,27 +107,25 @@ const DeployNIMServiceModal: React.FC<DeployNIMServiceModalProps> = ({
}, [currentProjectName, setCreateDataInferenceService, isOpen]);

// Serving Runtime Validation
const isDisabledServingRuntime = namespace === '' || actionInProgress;

const baseInputValueValid =
createDataServingRuntime.numReplicas >= 0 &&
resourcesArePositive(createDataServingRuntime.modelSize.resources) &&
requestsUnderLimits(createDataServingRuntime.modelSize.resources);

const isDisabledServingRuntime = namespace === '' || actionInProgress || !baseInputValueValid;

const isDisabledInferenceService =
actionInProgress ||
createDataInferenceService.name.trim() === '' ||
createDataInferenceService.project === '' ||
!isInferenceServiceNameWithinLimit
!isInferenceServiceNameWithinLimit ||
!baseInputValueValid;

const servingRuntimeSelected = React.useMemo(
() =>
editInfo?.servingRuntimeEditInfo?.servingRuntime ||
getServingRuntimeFromName(
createDataServingRuntime.servingRuntimeTemplateName,
servingRuntimeTemplates,
),
[editInfo, servingRuntimeTemplates, createDataServingRuntime.servingRuntimeTemplateName],
getServingRuntimeFromName('nvidia-runtime-gpu', servingRuntimeTemplates),
[editInfo, servingRuntimeTemplates],
);

const onBeforeClose = (submitted: boolean) => {
Expand All @@ -161,48 +151,49 @@ const DeployNIMServiceModal: React.FC<DeployNIMServiceModalProps> = ({
const submit = () => {
setError(undefined);
setActionInProgress(true);
onSuccess();

// const servingRuntimeName = 'nim';
const servingRuntimeName =
editInfo?.inferenceServiceEditInfo?.spec.predictor.model?.runtime ||
translateDisplayNameForK8s(createDataInferenceService.name);

// const submitServingRuntimeResources = getSubmitServingRuntimeResourcesFn(
// servingRuntimeSelected,
// createDataServingRuntime,
// customServingRuntimesEnabled,
// namespace,
// editInfo?.servingRuntimeEditInfo,
// false,
// acceleratorProfileState,
// NamespaceApplicationCase.KSERVE_PROMOTION,
// projectContext?.currentProject,
// servingRuntimeName,
// false,
// );
const submitServingRuntimeResources = getSubmitServingRuntimeResourcesFn(
servingRuntimeSelected,
createDataServingRuntime,
customServingRuntimesEnabled,
namespace,
editInfo?.servingRuntimeEditInfo,
false,
acceleratorProfileState,
NamespaceApplicationCase.KSERVE_PROMOTION,
projectContext?.currentProject,
servingRuntimeName,
true,
);

// const submitInferenceServiceResource = getSubmitInferenceServiceResourceFn(
// createDataInferenceService,
// editInfo?.inferenceServiceEditInfo,
// servingRuntimeName,
// false,
// acceleratorProfileState,
// allowCreate,
// editInfo?.secrets,
// );
const submitInferenceServiceResource = getSubmitInferenceServiceResourceFn(
createDataInferenceService,
editInfo?.inferenceServiceEditInfo,
servingRuntimeName,
false,
acceleratorProfileState,
allowCreate,
editInfo?.secrets,
);

// Promise.all([
// submitServingRuntimeResources({ dryRun: true }),
// submitInferenceServiceResource({ dryRun: true }),
// ])
// .then(() =>
// Promise.all([
// submitServingRuntimeResources({ dryRun: false }),
// submitInferenceServiceResource({ dryRun: false }),
// ]),
// )
// .then(() => onSuccess())
// .catch((e) => {
// setErrorModal(e);
// });
Promise.all([
submitServingRuntimeResources({ dryRun: true }),
submitInferenceServiceResource({ dryRun: true }),
])
.then(() =>
Promise.all([
submitServingRuntimeResources({ dryRun: false }),
submitInferenceServiceResource({ dryRun: false }),
]),
)
.then(() => onSuccess())
.catch((e) => {
setErrorModal(e);
});
};

return (
Expand Down Expand Up @@ -231,11 +222,32 @@ const DeployNIMServiceModal: React.FC<DeployNIMServiceModalProps> = ({
}}
>
<Stack hasGutter>
{!isAuthorinoEnabled && alertVisible && (
<StackItem>
<Alert
id="no-authorino-installed-alert"
data-testid="no-authorino-installed-alert"
isExpandable
isInline
variant="warning"
title="Token authentication service not installed"
actionClose={<AlertActionCloseButton onClose={() => setAlertVisible(false)} />}
>
<p>
The NVIDIA NIM model serving platform used by this project allows deployed models
to be accessible via external routes. It is recommended that token authentication
be enabled to protect these routes. The serving platform requires the Authorino
operator be installed on the cluster for token authentication. Contact a cluster
administrator to install the operator.
</p>
</Alert>
</StackItem>
)}
<StackItem>
<ProjectSection
projectName={
(projectContext?.currentProject &&
getProjectDisplayName(projectContext.currentProject)) ||
getDisplayNameFromK8sResource(projectContext.currentProject)) ||
editInfo?.inferenceServiceEditInfo?.metadata.namespace ||
''
}
Expand All @@ -250,8 +262,9 @@ const DeployNIMServiceModal: React.FC<DeployNIMServiceModalProps> = ({
<StackItem>
<StackItem>
<NIMModelListSection
data={createDataInferenceService}
setData={setCreateDataInferenceService}
inferenceServiceData={createDataInferenceService}
setInferenceServiceData={setCreateDataInferenceService}
setServingRuntimeData={setCreateDataServingRuntime}
isEditing={!!editInfo}
/>
</StackItem>
Expand All @@ -266,8 +279,8 @@ const DeployNIMServiceModal: React.FC<DeployNIMServiceModalProps> = ({
</StackItem>
<StackItem>
<ServingRuntimeSizeSection
data={createDataServingRuntime}
setData={setCreateDataServingRuntime}
data={createDataInferenceService}
setData={setCreateDataInferenceService}
sizes={sizes}
servingRuntimeSelected={servingRuntimeSelected}
acceleratorProfileState={acceleratorProfileState}
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ type NIMModelDeploymentNameSectionProps = {
setData: UpdateObjectAtPropAndValue<CreatingInferenceServiceObject>;
};

const NIMModelDeploymentNameSection: React.FC<NIMModelDeploymentNameSectionProps> = ({ data, setData }) => (
const NIMModelDeploymentNameSection: React.FC<NIMModelDeploymentNameSectionProps> = ({
data,
setData,
}) => (
<FormGroup label="Model deployment name" fieldId="model-deployment-name-section" isRequired>
<TextInput
isRequired
Expand Down
Loading

0 comments on commit 0660dde

Please sign in to comment.