diff --git a/frontend/src/api/k8s/servingRuntimes.ts b/frontend/src/api/k8s/servingRuntimes.ts index e3b933dcb9..7392a2751e 100644 --- a/frontend/src/api/k8s/servingRuntimes.ts +++ b/frontend/src/api/k8s/servingRuntimes.ts @@ -13,7 +13,10 @@ import { ServingRuntimeAnnotations, ServingRuntimeKind, } from '~/k8sTypes'; -import { CreatingServingRuntimeObject } from '~/pages/modelServing/screens/types'; +import { + CreatingServingRuntimeObject, + SupportedModelFormatsInfo, +} from '~/pages/modelServing/screens/types'; import { ContainerResources } from '~/types'; import { getModelServingRuntimeName } from '~/pages/modelServing/utils'; import { getDisplayNameFromK8sResource, translateDisplayNameForK8s } from '~/concepts/k8s/utils'; @@ -31,7 +34,15 @@ export const assembleServingRuntime = ( acceleratorProfileState?: AcceleratorProfileState, isModelMesh?: boolean, ): ServingRuntimeKind => { - const { name: displayName, numReplicas, modelSize, externalRoute, tokenAuth } = data; + const { + name: displayName, + numReplicas, + modelSize, + externalRoute, + tokenAuth, + imageName, + supportedModelFormatsInfo, + } = data; const createName = isCustomServingRuntimesEnabled ? translateDisplayNameForK8s(displayName) : getModelServingRuntimeName(namespace); @@ -121,6 +132,11 @@ export const assembleServingRuntime = ( volumeMounts.push(getshmVolumeMount()); } + if (imageName) { + const containerObj = container; + containerObj.image = imageName; + } + const containerWithoutResources = _.omit(container, 'resources'); return { @@ -132,6 +148,24 @@ export const assembleServingRuntime = ( }, ); + if (supportedModelFormatsInfo) { + const supportedModelFormatsObj: SupportedModelFormatsInfo = { + name: supportedModelFormatsInfo.name, + version: supportedModelFormatsInfo.version, + autoSelect: true, + priority: 1, + }; + + if ( + updatedServingRuntime.spec.supportedModelFormats && + updatedServingRuntime.spec.supportedModelFormats.length >= 1 + ) { + updatedServingRuntime.spec.supportedModelFormats[0] = supportedModelFormatsObj; + } else { + updatedServingRuntime.spec.supportedModelFormats?.push(supportedModelFormatsObj); + } + } + if (isModelMesh) { updatedServingRuntime.spec.tolerations = tolerations; } diff --git a/frontend/src/pages/modelServing/screens/projects/EmptyNIMModelServingCard.tsx b/frontend/src/pages/modelServing/screens/projects/EmptyNIMModelServingCard.tsx index 5dab6c0ef0..34eb718335 100644 --- a/frontend/src/pages/modelServing/screens/projects/EmptyNIMModelServingCard.tsx +++ b/frontend/src/pages/modelServing/screens/projects/EmptyNIMModelServingCard.tsx @@ -57,7 +57,7 @@ const EmptyNIMModelServingCard: React.FC = () => { border: '1px solid var(--pf-v5-global--BorderColor--100)', borderRadius: 16, }} - data-testid="single-serving-platform-card" + data-testid="nvidia-nim-model-serving-platform-card" > diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx index cddda667b0..d8ed0ad886 100644 --- a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx @@ -1,7 +1,8 @@ import * as React from 'react'; import { + Alert, + AlertActionCloseButton, Form, - FormSection, Modal, Stack, StackItem, @@ -25,25 +26,16 @@ import useCustomServingRuntimesEnabled from '~/pages/modelServing/customServingR import { getServingRuntimeFromName } from '~/pages/modelServing/customServingRuntimes/utils'; import useServingAcceleratorProfile from '~/pages/modelServing/screens/projects/useServingAcceleratorProfile'; import DashboardModalFooter from '~/concepts/dashboard/DashboardModalFooter'; -import { - InferenceServiceStorageType, - ServingRuntimeEditInfo, -} from '~/pages/modelServing/screens/types'; +import { ServingRuntimeEditInfo } from '~/pages/modelServing/screens/types'; import ServingRuntimeSizeSection from '~/pages/modelServing/screens/projects/ServingRuntimeModal/ServingRuntimeSizeSection'; import NIMModelListSection from '~/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection'; import NIMModelDeploymentNameSection from '~/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection'; import ProjectSection from '~/pages/modelServing/screens/projects/InferenceServiceModal/ProjectSection'; import { DataConnection, NamespaceApplicationCase } from '~/pages/projects/types'; -import { AwsKeys } from '~/pages/projects/dataConnections/const'; -import { isAWSValid } from '~/pages/projects/screens/spawner/spawnerUtils'; -import DataConnectionSection from '~/pages/modelServing/screens/projects/InferenceServiceModal/DataConnectionSection'; -import { getProjectDisplayName } from '~/concepts/projects/utils'; -import { translateDisplayNameForK8s } from '~/concepts/k8s/utils'; -import { containsOnlySlashes, isS3PathValid } from '~/utilities/string'; -import AuthServingRuntimeSection from '~/pages/modelServing/screens/projects/ServingRuntimeModal/AuthServingRuntimeSection'; +import { getDisplayNameFromK8sResource, translateDisplayNameForK8s } from '~/concepts/k8s/utils'; import { useAccessReview } from '~/api'; import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas'; -import KServeAutoscalerReplicaSection from './KServeAutoscalerReplicaSection'; +import KServeAutoscalerReplicaSection from '~/pages/modelServing/screens/projects/kServeModal/KServeAutoscalerReplicaSection'; const accessReviewResource: AccessReviewResourceAttributes = { group: 'rbac.authorization.k8s.io', @@ -115,27 +107,25 @@ const DeployNIMServiceModal: React.FC = ({ }, [currentProjectName, setCreateDataInferenceService, isOpen]); // Serving Runtime Validation + const isDisabledServingRuntime = namespace === '' || actionInProgress; + const baseInputValueValid = createDataServingRuntime.numReplicas >= 0 && resourcesArePositive(createDataServingRuntime.modelSize.resources) && requestsUnderLimits(createDataServingRuntime.modelSize.resources); - const isDisabledServingRuntime = namespace === '' || actionInProgress || !baseInputValueValid; - const isDisabledInferenceService = actionInProgress || createDataInferenceService.name.trim() === '' || createDataInferenceService.project === '' || - !isInferenceServiceNameWithinLimit + !isInferenceServiceNameWithinLimit || + !baseInputValueValid; const servingRuntimeSelected = React.useMemo( () => editInfo?.servingRuntimeEditInfo?.servingRuntime || - getServingRuntimeFromName( - createDataServingRuntime.servingRuntimeTemplateName, - servingRuntimeTemplates, - ), - [editInfo, servingRuntimeTemplates, createDataServingRuntime.servingRuntimeTemplateName], + getServingRuntimeFromName('nvidia-runtime-gpu', servingRuntimeTemplates), + [editInfo, servingRuntimeTemplates], ); const onBeforeClose = (submitted: boolean) => { @@ -161,48 +151,49 @@ const DeployNIMServiceModal: React.FC = ({ const submit = () => { setError(undefined); setActionInProgress(true); - onSuccess(); - // const servingRuntimeName = 'nim'; + const servingRuntimeName = + editInfo?.inferenceServiceEditInfo?.spec.predictor.model?.runtime || + translateDisplayNameForK8s(createDataInferenceService.name); - // const submitServingRuntimeResources = getSubmitServingRuntimeResourcesFn( - // servingRuntimeSelected, - // createDataServingRuntime, - // customServingRuntimesEnabled, - // namespace, - // editInfo?.servingRuntimeEditInfo, - // false, - // acceleratorProfileState, - // NamespaceApplicationCase.KSERVE_PROMOTION, - // projectContext?.currentProject, - // servingRuntimeName, - // false, - // ); + const submitServingRuntimeResources = getSubmitServingRuntimeResourcesFn( + servingRuntimeSelected, + createDataServingRuntime, + customServingRuntimesEnabled, + namespace, + editInfo?.servingRuntimeEditInfo, + false, + acceleratorProfileState, + NamespaceApplicationCase.KSERVE_PROMOTION, + projectContext?.currentProject, + servingRuntimeName, + true, + ); - // const submitInferenceServiceResource = getSubmitInferenceServiceResourceFn( - // createDataInferenceService, - // editInfo?.inferenceServiceEditInfo, - // servingRuntimeName, - // false, - // acceleratorProfileState, - // allowCreate, - // editInfo?.secrets, - // ); + const submitInferenceServiceResource = getSubmitInferenceServiceResourceFn( + createDataInferenceService, + editInfo?.inferenceServiceEditInfo, + servingRuntimeName, + false, + acceleratorProfileState, + allowCreate, + editInfo?.secrets, + ); - // Promise.all([ - // submitServingRuntimeResources({ dryRun: true }), - // submitInferenceServiceResource({ dryRun: true }), - // ]) - // .then(() => - // Promise.all([ - // submitServingRuntimeResources({ dryRun: false }), - // submitInferenceServiceResource({ dryRun: false }), - // ]), - // ) - // .then(() => onSuccess()) - // .catch((e) => { - // setErrorModal(e); - // }); + Promise.all([ + submitServingRuntimeResources({ dryRun: true }), + submitInferenceServiceResource({ dryRun: true }), + ]) + .then(() => + Promise.all([ + submitServingRuntimeResources({ dryRun: false }), + submitInferenceServiceResource({ dryRun: false }), + ]), + ) + .then(() => onSuccess()) + .catch((e) => { + setErrorModal(e); + }); }; return ( @@ -231,11 +222,32 @@ const DeployNIMServiceModal: React.FC = ({ }} > + {!isAuthorinoEnabled && alertVisible && ( + + setAlertVisible(false)} />} + > +

+ The NVIDIA NIM model serving platform used by this project allows deployed models + to be accessible via external routes. It is recommended that token authentication + be enabled to protect these routes. The serving platform requires the Authorino + operator be installed on the cluster for token authentication. Contact a cluster + administrator to install the operator. +

+
+
+ )} = ({ @@ -266,8 +279,8 @@ const DeployNIMServiceModal: React.FC = ({ ; - infoContent?: string; -}; - -const KServeAutoscalerReplicaSection: React.FC = ({ - data, - setData, - infoContent, -}) => { - const MIN_SIZE = 0; - - const onStep = (step: number) => { - setData('minReplicas', normalizeBetween(data.minReplicas + step, MIN_SIZE)); - setData('maxReplicas', normalizeBetween(data.maxReplicas + step, MIN_SIZE)); - }; - - return ( - - {infoContent}}> - - - - - ) : undefined - } - > - onStep(1)} - onMinus={() => onStep(-1)} - onChange={(event) => { - if (isHTMLInputElement(event.target)) { - const newSize = Number(event.target.value); - setData( - 'minReplicas', - Number.isNaN(newSize) ? MIN_SIZE : normalizeBetween(newSize, MIN_SIZE), - ); - setData( - 'maxReplicas', - Number.isNaN(newSize) ? MIN_SIZE : normalizeBetween(newSize, MIN_SIZE), - ); - } - }} - /> - - - ); -}; - -export default KServeAutoscalerReplicaSection; diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection.tsx index 38161ca610..20f894ec1e 100644 --- a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection.tsx +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection.tsx @@ -8,7 +8,10 @@ type NIMModelDeploymentNameSectionProps = { setData: UpdateObjectAtPropAndValue; }; -const NIMModelDeploymentNameSection: React.FC = ({ data, setData }) => ( +const NIMModelDeploymentNameSection: React.FC = ({ + data, + setData, +}) => ( ; + inferenceServiceData: CreatingInferenceServiceObject; + setInferenceServiceData: UpdateObjectAtPropAndValue; + setServingRuntimeData: UpdateObjectAtPropAndValue; isEditing?: boolean; }; -const NIMModelListSection: React.FC = ({ data, setData, isEditing }) => { +const NIMModelListSection: React.FC = ({ + inferenceServiceData, + setInferenceServiceData, + setServingRuntimeData, + isEditing, +}) => { const [options, setOptions] = useState<{ key: string; label: string }[]>([]); useEffect(() => { @@ -29,6 +38,25 @@ const NIMModelListSection: React.FC = ({ data, setData getModelNames(); }, []); + const getSupportedModelFormatsInfo = (name: string) => { + const modelInfo = options.find((option) => option.key === name); + if (modelInfo) { + return { + name: modelInfo.key, + version: modelInfo.label.split(' - ')[1], + }; + } + return { name: '', version: '' }; + }; + + const getNIMImageName = (name: string) => { + const imageInfo = options.find((option) => option.key === name); + if (imageInfo) { + return `nvcr.io/nim/meta/${name}:${imageInfo.label.split(' - ')[1]}`; + } + return ''; + }; + return ( = ({ data, setData dataTestId="nim-model-list-selection" aria-label="Select NVIDIA model" options={options} - placeholder={isEditing ? data.name : 'Select NVIDIA model'} - value={data.format.name} + placeholder={isEditing ? inferenceServiceData.name : 'Select NVIDIA model'} + value={inferenceServiceData.format.name} onChange={(name) => { - setData('format', { name }); + setServingRuntimeData('supportedModelFormatsInfo', getSupportedModelFormatsInfo(name)); + setServingRuntimeData('imageName', getNIMImageName(name)); + setInferenceServiceData('format', { name }); }} /> diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMServingRuntimeSection.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMServingRuntimeSection.tsx deleted file mode 100644 index fbc77026c4..0000000000 --- a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMServingRuntimeSection.tsx +++ /dev/null @@ -1,52 +0,0 @@ -import * as React from 'react'; -import { - FormGroup, -} from '@patternfly/react-core'; -import { UpdateObjectAtPropAndValue } from '~/pages/projects/types'; -import SimpleDropdownSelect from '~/components/SimpleDropdownSelect'; -import { CreatingServingRuntimeObject } from '~/pages/modelServing/screens/types'; - - -type NIMServingRuntimeSectionProps = { - data: CreatingServingRuntimeObject; - setData: UpdateObjectAtPropAndValue; - isEditing?: boolean; -}; - -const NIMServingRuntimeSection: React.FC = ({ - data, - setData, - isEditing, -}) => { - - const options = [ - { - key: 'nvidia-nim-serving-runtime', - label: 'NVIDIA NIM serving runtime', - }, - ]; - - return ( - - { - setData('servingRuntimeTemplateName', name); - }} - /> - - ); -}; - -export default NIMServingRuntimeSection; diff --git a/frontend/src/pages/modelServing/screens/projects/utils.ts b/frontend/src/pages/modelServing/screens/projects/utils.ts index 4b0646e3f5..59600b8369 100644 --- a/frontend/src/pages/modelServing/screens/projects/utils.ts +++ b/frontend/src/pages/modelServing/screens/projects/utils.ts @@ -7,7 +7,11 @@ import { SecretKind, ServingRuntimeKind, } from '~/k8sTypes'; -import { DataConnection, NamespaceApplicationCase, UpdateObjectAtPropAndValue } from '~/pages/projects/types'; +import { + DataConnection, + NamespaceApplicationCase, + UpdateObjectAtPropAndValue, +} from '~/pages/projects/types'; import useGenericObjectState from '~/utilities/useGenericObjectState'; import { CreatingInferenceServiceObject, diff --git a/frontend/src/pages/modelServing/screens/types.ts b/frontend/src/pages/modelServing/screens/types.ts index a8bf14fdb2..c6aaa9f22f 100644 --- a/frontend/src/pages/modelServing/screens/types.ts +++ b/frontend/src/pages/modelServing/screens/types.ts @@ -32,6 +32,13 @@ export type ModelStatus = { failedToSchedule: boolean; }; +export type SupportedModelFormatsInfo = { + name: string; + version: string; + autoSelect?: boolean; + priority?: number; +}; + export type CreatingServingRuntimeObject = { name: string; servingRuntimeTemplateName: string; @@ -40,6 +47,8 @@ export type CreatingServingRuntimeObject = { externalRoute: boolean; tokenAuth: boolean; tokens: ServingRuntimeToken[]; + imageName?: string; + supportedModelFormatsInfo?: SupportedModelFormatsInfo; }; export type ServingRuntimeToken = { diff --git a/frontend/src/pages/notebookController/screens/server/AcceleratorProfileSelectField.tsx b/frontend/src/pages/notebookController/screens/server/AcceleratorProfileSelectField.tsx index 5286276709..70ba984aa3 100644 --- a/frontend/src/pages/notebookController/screens/server/AcceleratorProfileSelectField.tsx +++ b/frontend/src/pages/notebookController/screens/server/AcceleratorProfileSelectField.tsx @@ -206,6 +206,35 @@ const AcceleratorProfileSelectField: React.FC )} + {acceleratorProfile && ( + + + + onStep(1)} + onMinus={() => onStep(-1)} + onChange={(event) => { + if (isHTMLInputElement(event.target)) { + const newSize = Number(event.target.value); + setAcceleratorProfileState('count', Math.max(newSize, 1)); + } + }} + /> + + + + )} + {acceleratorCountWarning && ( + + + + )}
); }; diff --git a/frontend/src/pages/projects/screens/detail/overview/serverModels/AddModelFooter.tsx b/frontend/src/pages/projects/screens/detail/overview/serverModels/AddModelFooter.tsx index ef73047627..7b9e76a805 100644 --- a/frontend/src/pages/projects/screens/detail/overview/serverModels/AddModelFooter.tsx +++ b/frontend/src/pages/projects/screens/detail/overview/serverModels/AddModelFooter.tsx @@ -89,7 +89,7 @@ const AddModelFooter: React.FC = ({ selectedPlatform, isNIM onClose={onSubmit} /> ) : null} - {modalShown && isNIM ? ( + {modalShown && isNIM ? ( { - - return ( - - - - - Models are deployed using NVIDIA NIM microservices. Choose this option when you want to deploy your model within a NIM container. Please provide the API key to authenticate with the NIM service. - - - - - - ); -} +const SelectNIMCard: React.FC = () => ( + + + + + Models are deployed using NVIDIA NIM microservices. Choose this option when you want to + deploy your model within a NIM container. Please provide the API key to authenticate with + the NIM service. + + + + + +); export default SelectNIMCard;