diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 259b89b7413..0c163aa7e32 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -91,6 +91,10 @@ jobs: opam exec -- make install DESTDIR=$(mktemp -d) opam exec -- make install DESTDIR=$(mktemp -d) BUILD_PY2=NO + - name: Sanity test SDK + run: | + opam exec -- make sdksanity + - name: Uninstall unversioned packages and remove pins # This should purge them from the cache, unversioned package have # 'master' as its version diff --git a/Makefile b/Makefile index 819bd9a432b..4d155619a57 100644 --- a/Makefile +++ b/Makefile @@ -66,6 +66,7 @@ sdk: @ocaml/sdk-gen/csharp/generate \ @ocaml/sdk-gen/java/generate \ @ocaml/sdk-gen/powershell/generate + rm -rf $(XAPISDK) mkdir -p $(XAPISDK)/c mkdir -p $(XAPISDK)/csharp mkdir -p $(XAPISDK)/java @@ -79,6 +80,12 @@ sdk: sh ocaml/sdk-gen/windows-line-endings.sh $(XAPISDK)/csharp sh ocaml/sdk-gen/windows-line-endings.sh $(XAPISDK)/powershell +# workaround for no .resx generation, just for compilation testing +sdksanity: sdk + sed -i 's/FriendlyErrorNames.ResourceManager/null/g' ./_build/install/default/xapi/sdk/csharp/src/Failure.cs + cd _build/install/default/xapi/sdk/csharp/src && dotnet add package Newtonsoft.Json && dotnet build -f netstandard2.0 + + python: $(MAKE) -C scripts/examples/python build diff --git a/doc/content/toolstack/features/NUMA/index.md b/doc/content/toolstack/features/NUMA/index.md index 128dbb759f0..ee7f52c98fe 100644 --- a/doc/content/toolstack/features/NUMA/index.md +++ b/doc/content/toolstack/features/NUMA/index.md @@ -85,7 +85,7 @@ An optimal allocation would require knowing what VMs you would start/create in t Overall we want to balance the VMs across NUMA nodes, such that we use all NUMA nodes to take advantage of the maximum memory bandwidth available on the system. For now this proposed balancing will be done only by balancing memory usage: always heuristically allocating VMs on the NUMA node that has the most available memory. Note that this allocation has a race condition for now when multiple VMs are booted in parallel, because we don't wait until Xen has constructed the domain for each one (that'd serialize domain construction, which is currently parallel). -This may be improved in the future by having an API to query Xen where it has allocated the memory, and to explicitly ask it to place memory on a given NUMA node (instead of best-effort). +This may be improved in the future by having an API to query Xen where it has allocated the memory, and to explicitly ask it to place memory on a given NUMA node (instead of best_effort). If a VM doesn't fit into a single node then it is not so clear what the best approach is. One criteria to consider is minimizing the NUMA distance between the nodes chosen for the VM. @@ -128,23 +128,23 @@ See page 13 in [^AMD_numa] for a diagram of an AMD Opteron 6272 system. ## XAPI datamodel design * New API field: `Host.numa_affinity_policy`. -* Choices: `default`, `any`, `best-effort`. -* On upgrade the field is set to `default` +* Choices: `default_policy`, `any`, `best_effort`. +* On upgrade the field is set to `default_policy` * Changes in the field only affect newly (re)booted VMs, for changes to take effect on existing VMs a host evacuation or reboot is needed There may be more choices in the future (e.g. `strict`, which requires both Xen and toolstack changes). Meaning of the policy: * `any`: the Xen default where it allocated memory by striping across NUMA nodes -* `best-effort`: the algorithm described in this document, where soft pinning is used to achieve better balancing and lower latency -* `default`: when the admin hasn't expressed a preference +* `best_effort`: the algorithm described in this document, where soft pinning is used to achieve better balancing and lower latency +* `default_policy`: when the admin hasn't expressed a preference -* Currently `default` is treated as `any`, but the admin can change it, and then the system will remember that change across upgrades. - If we didn't have a `default` then changing the "default" policy on an upgrade would be tricky: we either risk overriding an explicit choice of the admin, or existing installs cannot take advantage of the improved performance from `best-effort` -* Future XAPI versions may change `default` to mean `best-effort`. +* Currently `default_policy` is treated as `any`, but the admin can change it, and then the system will remember that change across upgrades. + If we didn't have a `default_policy` then changing the "default" policy on an upgrade would be tricky: we either risk overriding an explicit choice of the admin, or existing installs cannot take advantage of the improved performance from `best_effort` +* Future XAPI versions may change `default_policy` to mean `best_effort`. Admins can still override it to `any` if they wish on a host by host basis. -It is not expected that users would have to change `best-effort`, unless they run very specific workloads, so a pool level control is not provided at this moment. +It is not expected that users would have to change `best_effort`, unless they run very specific workloads, so a pool level control is not provided at this moment. There is also no separate feature flag: this host flag acts as a feature flag that can be set through the API without restarting the toolstack. Although obviously only new VMs will benefit. @@ -173,7 +173,7 @@ Tests are in [test_topology.ml] which checks balancing properties and whether th ## Future work -* enable 'best-effort' mode by default once more testing has been done +* enable 'best_effort' mode by default once more testing has been done * an API to query Xen where it has actually allocated the VM's memory. Currently only an `xl debug-keys` interface exists which is not supported in production as it can result in killing the host via the watchdog, and is not a proper API, but a textual debug output with no stability guarantees. * more host policies (e.g. `strict`). diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index ec7f759d632..d843a26810e 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -10,7 +10,7 @@ open Datamodel_roles to leave a gap for potential hotfixes needing to increment the schema version.*) let schema_major_vsn = 5 -let schema_minor_vsn = 770 +let schema_minor_vsn = 771 (* Historical schema versions just in case this is useful later *) let rio_schema_major_vsn = 5 diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index 3d1f11677a1..062112b5fed 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1673,12 +1673,12 @@ let host_numa_affinity_policy = ( "host_numa_affinity_policy" , [ ("any", "VMs are spread across all available NUMA nodes") - ; ( "best-effort" + ; ( "best_effort" , "VMs are placed on the smallest number of NUMA nodes that they fit \ using soft-pinning, but the policy doesn't guarantee a balanced \ placement, falling back to the 'any' policy." ) - ; ( "default" + ; ( "default_policy" , "Use the NUMA affinity policy that is the default for the current \ version" ) @@ -2196,7 +2196,8 @@ let t = "Default as 'unknown', 'yes' if the host is up to date with \ updates synced from remote CDN, otherwise 'no'" ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:host_numa_affinity_policy - "numa_affinity_policy" ~default_value:(Some (VEnum "default")) + "numa_affinity_policy" + ~default_value:(Some (VEnum "default_policy")) "NUMA-aware VM memory and vCPU placement policy" ] ) diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index aa95fc01eaa..cdbe739e238 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "2ec2aeef2405d6bd73ab6beb44185532" +let last_known_schema_hash = "29a820cd4c81617f07b43c9b4f934317" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index 510db04352f..a33c55b2246 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -208,7 +208,7 @@ let make_host2 ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) ~display:`enabled ~virtual_hardware_platform_versions:[] ~control_domain:Ref.null ~updates_requiring_reboot:[] ~iscsi_iqn:"" ~multipathing:false ~uefi_certificates:"" ~editions:[] ~pending_guidances:[] - ~tls_verification_enabled ~numa_affinity_policy:`default + ~tls_verification_enabled ~numa_affinity_policy:`default_policy ~last_software_update:(Xapi_host.get_servertime ~__context ~host:ref) ~recommended_guidances:[] ~latest_synced_updates_applied:`unknown ; ref diff --git a/ocaml/xapi-cli-server/record_util.ml b/ocaml/xapi-cli-server/record_util.ml index 8c2bad7e338..e3f2483ae76 100644 --- a/ocaml/xapi-cli-server/record_util.ml +++ b/ocaml/xapi-cli-server/record_util.ml @@ -709,21 +709,23 @@ let host_sched_gran_to_string = function let host_numa_affinity_policy_to_string = function | `any -> "any" - | `besteffort -> - "best-effort" - | `default -> - "default" + | `best_effort -> + "best_effort" + | `default_policy -> + "default_policy" let host_numa_affinity_policy_of_string = function | "any" -> `any - | "best-effort" -> - `besteffort - | "default" -> - `default + | "best_effort" -> + `best_effort + | "default_policy" -> + `default_policy | s -> raise - (Record_failure ("Expected 'any', 'best-effort' or 'default', got " ^ s)) + (Record_failure + ("Expected 'any', 'best_effort' or 'default_policy', got " ^ s) + ) let pgpu_dom0_access_to_string x = host_display_to_string x diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index caeed6208e5..2006e4e2710 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -1039,7 +1039,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~cpu_configuration:[] (* !!! FIXME hard coding *) ~cpu_info:[] ~chipset_info ~memory_overhead:0L ~sched_policy:"credit" (* !!! FIXME hard coding *) - ~numa_affinity_policy:`default + ~numa_affinity_policy:`default_policy ~supported_bootloaders:(List.map fst Xapi_globs.supported_bootloaders) ~suspend_image_sr:Ref.null ~crash_dump_sr:Ref.null ~logging:[] ~hostname ~address ~metrics ~license_params ~boot_free_mem:0L ~ha_statefiles:[] diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 0c44113087f..bbb2389cc59 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -3125,9 +3125,9 @@ let set_numa_affinity_policy ~__context ~value = match value with | `any -> Any - | `besteffort -> + | `best_effort -> Best_effort - | `default -> + | `default_policy -> Any in Client.HOST.set_numa_affinity_policy dbg value