From 3a68c021b7a7647bea11a0320bed9dfe8b34d0f9 Mon Sep 17 00:00:00 2001 From: Nimbus318 <136771156+Nimbus318@users.noreply.github.com> Date: Tue, 14 Jan 2025 20:12:37 +0800 Subject: [PATCH] fix: temporarily adapt to Ascend310P computational tasks Signed-off-by: Nimbus318 <136771156+Nimbus318@users.noreply.github.com> --- server/internal/provider/ascend/device.go | 5 +-- server/internal/provider/util/util.go | 42 +++++++++++++++++------ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/server/internal/provider/ascend/device.go b/server/internal/provider/ascend/device.go index 5573c54..bdd2082 100644 --- a/server/internal/provider/ascend/device.go +++ b/server/internal/provider/ascend/device.go @@ -4,6 +4,7 @@ import "vgpu/internal/provider/util" const ( AscendDevice = "Ascend" + Ascend310PDevice = "Ascend310P" AscendDeviceSelection = "huawei.com/predicate-ascend-idx-" // IluvatarUseUUID is user can use specify Iluvatar device for set Iluvatar UUID. AscendDeviceUseUUID = "huawei.com/use-ascenduuid" @@ -24,6 +25,6 @@ func init() { AscendNodeRegisterAnnos = []string{Ascend910BNodeRegisterAnno, Ascend310PNodeRegisterAnno} util.InRequestDevices[AscendDevice] = "hami.io/Ascend910B-devices-to-allocate" util.SupportDevices[AscendDevice] = "hami.io/Ascend910B-devices-allocated" - util.InRequestDevices["Ascend310P"] = "hami.io/Ascend310P-devices-to-allocate" - util.SupportDevices["Ascend310P"] = "hami.io/Ascend310P-devices-allocated" + util.InRequestDevices[Ascend310PDevice] = "hami.io/Ascend310P-devices-to-allocate" + util.SupportDevices[Ascend310PDevice] = "hami.io/Ascend310P-devices-allocated" } diff --git a/server/internal/provider/util/util.go b/server/internal/provider/util/util.go index f1265b0..4d91892 100644 --- a/server/internal/provider/util/util.go +++ b/server/internal/provider/util/util.go @@ -17,24 +17,42 @@ const ( // OnePodMultiContainerSplitSymbol this is when one pod having multi container and more than one container use device, use ; symbol to join device info. OnePodMultiContainerSplitSymbol = ";" - NvidiaGPUDevice = "NVIDIA" - AscendGPUDevice = "Ascend" - HygonGPUDevice = "DCU" - CambriconGPUDevice = "MLU" + NvidiaGPUDevice = "NVIDIA" + AscendGPUDevice = "Ascend" + Ascend310PGPUDevice = "Ascend310P" + HygonGPUDevice = "DCU" + CambriconGPUDevice = "MLU" DsmluProfileAndInstance = "CAMBRICON_DSMLU_PROFILE_INSTANCE" NVIDIAPriority = "nvidia.com/priority" ) +type ascendDeviceConfig struct { + Usedmem int32 + Usedcores int32 +} + var ( - InRequestDevices map[string]string - SupportDevices map[string]string + InRequestDevices map[string]string + SupportDevices map[string]string + ascendDeviceConfigs map[string]map[int32]ascendDeviceConfig ) func init() { InRequestDevices = make(map[string]string) SupportDevices = make(map[string]string) + ascendDeviceConfigs = map[string]map[int32]ascendDeviceConfig{ + "Ascend910B": { + 16384: {Usedmem: 16384, Usedcores: 25}, + 32768: {Usedmem: 32768, Usedcores: 50}, + }, + "Ascend310P": { + 3072: {Usedmem: 3072, Usedcores: 13}, + 6144: {Usedmem: 6144, Usedcores: 25}, + 12288: {Usedmem: 12288, Usedcores: 50}, + }, + } initMLUDevice() } @@ -186,11 +204,13 @@ func DecodeNpuContainerDevices(str string) (ContainerDevices, error) { devmem, _ := strconv.ParseInt(tmpstr[2], 10, 32) tmpdev.Usedmem = int32(devmem) tmpdev.Usedcores = 100 - if tmpdev.Usedmem == 16384 { - tmpdev.Usedcores = 25 - } else if tmpdev.Usedmem == 32768 { - tmpdev.Usedcores = 50 + + if configs, exists := ascendDeviceConfigs[tmpdev.Type]; exists { + if config, ok := configs[tmpdev.Usedmem]; ok { + tmpdev.Usedcores = config.Usedcores + } } + contdev = append(contdev, tmpdev) } } @@ -262,7 +282,7 @@ func DecodePodDevices(pod *corev1.Pod, log *log.Helper) (PodDevices, error) { } pd[devType] = make(PodSingleDevice, 0) switch devType { - case AscendGPUDevice: + case AscendGPUDevice, Ascend310PGPUDevice: for _, s := range strings.Split(str, OnePodMultiContainerSplitSymbol) { cd, err := DecodeNpuContainerDevices(s) if err != nil {