From 75636642cdade8bb0725ccf392e68ba5c628b7c3 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Fri, 22 Sep 2023 09:20:06 +0100 Subject: [PATCH] CP-44320 scaffolding for NVidia Virtual Compute Service (VCS) NVidia is supporting vGPUs that can run as compute vGPUs. These are marked in their vgpuConfig.xml with a new value "Compute" for the existing "class" attribute: We don't parse and use this attribute so far but need to in order to support VCS. This commit extends the parser to recognise the class attribute and to pass it on under the internal name "vclass" to xenopsd. Xenopsd's job is to start device emulators accordingly. Selecting the correct emulators is future work. So far, xenopsd logs the vclass it receives. Unit tests have been updated. This commit should have no impact on current functionality, be safe to merge, and lay groundwork for extending xenopsd to support VCS. Signed-off-by: Christian Lindig --- ocaml/tests/common/test_vgpu_common.ml | 5 +++++ ocaml/tests/test_vgpu_type.ml | 4 ++++ ocaml/tests/test_xenopsd_metadata.ml | 7 +++++++ ocaml/xapi-idl/xen/xenops_types.ml | 1 + ocaml/xapi/xapi_globs.ml | 3 +++ ocaml/xapi/xapi_vgpu_type.ml | 20 ++++++++++++++++++-- ocaml/xapi/xapi_xenops.ml | 10 ++++++---- ocaml/xenopsd/xc/device.ml | 4 +++- 8 files changed, 47 insertions(+), 7 deletions(-) diff --git a/ocaml/tests/common/test_vgpu_common.ml b/ocaml/tests/common/test_vgpu_common.ml index 966f2a065ea..15c0da96c91 100644 --- a/ocaml/tests/common/test_vgpu_common.ml +++ b/ocaml/tests/common/test_vgpu_common.ml @@ -33,6 +33,7 @@ let k100 = ; vdev_id= 0x0fe7 ; vsubdev_id= 0x101e ; sriov= false + ; vclass= "NVS" } ) ; experimental= false @@ -59,6 +60,7 @@ let k140q = ; vdev_id= 0x0ff7 ; vsubdev_id= 0x1037 ; sriov= false + ; vclass= "NVS" } ) ; experimental= false @@ -85,6 +87,7 @@ let k200 = ; vdev_id= 0x118d ; vsubdev_id= 0x101d ; sriov= false + ; vclass= "NVS" } ) ; experimental= false @@ -111,6 +114,7 @@ let k240q = ; vdev_id= 0x11b0 ; vsubdev_id= 0x101a ; sriov= false + ; vclass= "NVS" } ) ; experimental= false @@ -137,6 +141,7 @@ let k260q = ; vdev_id= 0x11b0 ; vsubdev_id= 0x101b ; sriov= false + ; vclass= "NVS" } ) ; experimental= false diff --git a/ocaml/tests/test_vgpu_type.ml b/ocaml/tests/test_vgpu_type.ml index cbc8d1459f0..b042ddcd2a7 100644 --- a/ocaml/tests/test_vgpu_type.ml +++ b/ocaml/tests/test_vgpu_type.ml @@ -67,6 +67,7 @@ module NvidiaTest = struct ; vdev_id= 0x1111 ; vsubdev_id= 0x2222 ; sriov= false + ; vclass= "NVS" } ; framebufferlength= 0x10000000L ; num_heads= 2L @@ -92,6 +93,7 @@ module NvidiaTest = struct ; vdev_id= 0x1111 ; vsubdev_id= 0x2222 ; sriov= false + ; vclass= "NVS" } ; framebufferlength= 0x10000000L ; num_heads= 2L @@ -117,6 +119,7 @@ module NvidiaTest = struct ; vdev_id= 0x1112 ; vsubdev_id= 0x2223 ; sriov= false + ; vclass= "NVS" } ; framebufferlength= 0x20000000L ; num_heads= 4L @@ -138,6 +141,7 @@ module NvidiaTest = struct ; vdev_id= 0x1111 ; vsubdev_id= 0x2222 ; sriov= false + ; vclass= "NVS" } ; framebufferlength= 0x10000000L ; num_heads= 2L diff --git a/ocaml/tests/test_xenopsd_metadata.ml b/ocaml/tests/test_xenopsd_metadata.ml index b04804f9011..c052de228fa 100644 --- a/ocaml/tests/test_xenopsd_metadata.ml +++ b/ocaml/tests/test_xenopsd_metadata.ml @@ -280,6 +280,7 @@ module GenerateVGPUMetadata = Generic.MakeStateful (struct ; type_id= Some "type_id_1" ; uuid= Some (uuid_with_index 0) ; extra_args= "" + ; vclass= None } ) ] @@ -364,6 +365,7 @@ module GenerateMultiVGPUMetadata = Generic.MakeStateful (struct ; type_id= Some "type_id_1" ; uuid= Some (uuid_with_index 0) ; extra_args= "" + ; vclass= None } ) ; Xenops_interface.Vgpu.( @@ -375,6 +377,7 @@ module GenerateMultiVGPUMetadata = Generic.MakeStateful (struct ; type_id= Some "type_id_1" ; uuid= Some (uuid_with_index 1) ; extra_args= "" + ; vclass= None } ) ] @@ -398,6 +401,7 @@ module GenerateMultiVGPUMetadata = Generic.MakeStateful (struct ; type_id= Some "type_id_1" ; uuid= Some (uuid_with_index 0) ; extra_args= "" + ; vclass= None } ) ; Xenops_interface.Vgpu.( @@ -409,6 +413,7 @@ module GenerateMultiVGPUMetadata = Generic.MakeStateful (struct ; type_id= Some "type_id_1" ; uuid= Some (uuid_with_index 1) ; extra_args= "" + ; vclass= None } ) ; Xenops_interface.Vgpu.( @@ -420,6 +425,7 @@ module GenerateMultiVGPUMetadata = Generic.MakeStateful (struct ; type_id= Some "type_id_1" ; uuid= Some (uuid_with_index 2) ; extra_args= "" + ; vclass= None } ) ; Xenops_interface.Vgpu.( @@ -431,6 +437,7 @@ module GenerateMultiVGPUMetadata = Generic.MakeStateful (struct ; type_id= Some "type_id_1" ; uuid= Some (uuid_with_index 3) ; extra_args= "" + ; vclass= None } ) ] diff --git a/ocaml/xapi-idl/xen/xenops_types.ml b/ocaml/xapi-idl/xen/xenops_types.ml index c28cffc2f64..2ed03daf79d 100644 --- a/ocaml/xapi-idl/xen/xenops_types.ml +++ b/ocaml/xapi-idl/xen/xenops_types.ml @@ -32,6 +32,7 @@ module Vgpu = struct [@default {domain= 0000; bus= 0; dev= 11; fn= 0}] ; type_id: string option ; uuid: string option + ; vclass: string option (** from vgpu: Compute, NVS, Quadro *) ; extra_args: string [@default ""] (** string is passed on as is and no structure is assumed *) } diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index ebe812b35ab..1e45670ca75 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -536,6 +536,9 @@ let pass_through_pif_carrier = ref false let vgpu_type_id = "type_id" +(** key for NVidia vgpu "class" attribute in vgpuConfig.xml *) +let vgpu_type_vclass = "vclass" + let igd_passthru_key = "igd_passthrough" let vgt_low_gm_sz = "vgt_low_gm_sz" diff --git a/ocaml/xapi/xapi_vgpu_type.ml b/ocaml/xapi/xapi_vgpu_type.ml index 530507c714d..24a7ae29762 100644 --- a/ocaml/xapi/xapi_vgpu_type.ml +++ b/ocaml/xapi/xapi_vgpu_type.ml @@ -39,6 +39,7 @@ module Identifier = struct ; vdev_id: int (** vgpuTYpe/deviceId in XML *) ; vsubdev_id: int (** vgpuTYpe/subsystemId in XML *) ; sriov: bool (** true if SRIOV mode to be used *) + ; vclass: string (** vgpuType/class in XML: Quadro, NVS, Compute *) } type gvt_g_id = { @@ -399,6 +400,7 @@ let read_whitelist_line_by_line ~whitelist ~device_id ~parse_line [] module Vendor_nvidia = struct + (* represents a vgpuType declaration found in vgpuConfig.xml *) type vgpu_conf = { identifier: Identifier.nvidia_id ; framebufferlength: int64 @@ -652,6 +654,7 @@ module Vendor_nvidia = struct ) in let devid = find_one_by_name "devId" vgpu_type in + let vclass = get_attr "class" vgpu_type in let identifier = Identifier. { @@ -669,6 +672,7 @@ module Vendor_nvidia = struct false ) (* don't use SRIOV *) + ; vclass } in @@ -760,7 +764,11 @@ module Vendor_nvidia = struct ; max_resolution_x= conf.max_x ; max_resolution_y= conf.max_y ; size= Int64.div Constants.pgpu_default_size conf.max_instance - ; internal_config= [(Xapi_globs.vgpu_type_id, conf.type_id)] + ; internal_config= + [ + (Xapi_globs.vgpu_type_id, conf.type_id) + ; (Xapi_globs.vgpu_type_vclass, conf.identifier.vclass) + ] ; identifier= Nvidia conf.identifier ; experimental= false ; compatible_model_names_in_vm= conf.compatible_model_names_in_vm @@ -999,7 +1007,15 @@ module Nvidia_compat = struct Scanf.sscanf (List.assoc "plugin0.vdev_id" args) {|"0x%x:0x%x"|} (fun vdev_id vsubdev_id -> Identifier.( - Nvidia {pdev_id; psubdev_id; vdev_id; vsubdev_id; sriov= false} + Nvidia + { + pdev_id + ; psubdev_id + ; vdev_id + ; vsubdev_id + ; sriov= false + ; vclass= "" + } ) ) with e -> raise (Parse_error e) diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index fe12a3a0810..6aa703eb933 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -946,10 +946,11 @@ module MD = struct let physical_pci_address = get_target_pci_address ~__context vgpu in let virtual_pci_address = get_virtual_pci_address ~__context vgpu in let vgpu_type = vgpu.Db_actions.vGPU_type in - let type_id, config_file = - Db.VGPU_type.get_internal_config ~__context ~self:vgpu_type |> fun x -> - ( List.assoc_opt Xapi_globs.vgpu_type_id x - , List.assoc_opt Xapi_globs.nvidia_compat_config_file_key x + let type_id, config_file, vclass = + Db.VGPU_type.get_internal_config ~__context ~self:vgpu_type |> fun kv -> + ( List.assoc_opt Xapi_globs.vgpu_type_id kv + , List.assoc_opt Xapi_globs.nvidia_compat_config_file_key kv + , List.assoc_opt Xapi_globs.vgpu_type_vclass kv ) in let uuid = vgpu.Db_actions.vGPU_uuid in @@ -964,6 +965,7 @@ module MD = struct ; type_id ; uuid= Some uuid ; extra_args + ; vclass (* from vgpuType class attribute in vgpuConfig.xml *) } in { diff --git a/ocaml/xenopsd/xc/device.ml b/ocaml/xenopsd/xc/device.ml index 6727f81502c..7ab2417890c 100644 --- a/ocaml/xenopsd/xc/device.ml +++ b/ocaml/xenopsd/xc/device.ml @@ -3635,7 +3635,9 @@ module Dm = struct let start_vgpu ~xc:_ ~xs task ?(restore = false) domid vgpus vcpus profile = let open Xenops_interface.Vgpu in match vgpus with - | {implementation= Nvidia _; _} :: _ -> + | {implementation= Nvidia {vclass; _}; _} :: _ -> + let vclass = Option.value ~default:"unknown" vclass in + info "NVidia vgpu vclass=%s" vclass ; (* Start DEMU and wait until it has reached the desired state *) if not (Service.Vgpu.is_running ~xs domid) then ( let pcis = List.map (fun x -> x.physical_pci_address) vgpus in