-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
qemu-static: add patches for VFIO support on SNP
Upstream QEMU can't launch SNP VMs with VFIO devices. Apply the patches from this patch series: https://patchwork.kernel.org/project/kvm/cover/[email protected]/ We also need another patch to support large devices (such as GPUs).
- Loading branch information
Showing
8 changed files
with
834 additions
and
0 deletions.
There are no files selected for viewing
417 changes: 417 additions & 0 deletions
417
packages/by-name/qemu-static/0003-guest_memfd-Introduce-an-object-to-manage-the-guest-.patch
Large diffs are not rendered by default.
Oops, something went wrong.
199 changes: 199 additions & 0 deletions
199
packages/by-name/qemu-static/0004-guest_memfd-Introduce-a-helper-to-notify-the-shared-.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
From cfc2bc7492cc3cc1f713dcd7d73c55bc2caac65a Mon Sep 17 00:00:00 2001 | ||
From: Chenyi Qiang <[email protected]> | ||
Date: Thu, 25 Jul 2024 03:21:11 -0400 | ||
Subject: [PATCH 2/6] guest_memfd: Introduce a helper to notify the | ||
shared/private state change | ||
|
||
Introduce a helper function within RamDiscardManager to efficiently | ||
notify all registered RamDiscardListeners, including VFIO listeners | ||
about the memory conversion events between shared and private in | ||
guest_memfd. The existing VFIO listener can dynamically DMA map/unmap | ||
the shared pages based on the conversion type: | ||
- For conversions from shared to private, the VFIO system ensures the | ||
discarding of shared mapping from the IOMMU. | ||
- For conversions from private to shared, it triggers the population of | ||
the shared mapping into the IOMMU. | ||
|
||
Additionally, there could be some special conversion requests: | ||
- When a conversion request is made for a page already in the desired | ||
state (either private or shared), the helper simply returns success. | ||
- For requests involving a range partially in the desired state, only | ||
the necessary segments are converted, ensuring the entire range | ||
complies with the request efficiently. | ||
- In scenarios where a conversion request is declined by other systems, | ||
such as a failure from VFIO during notify_populate(), the helper will | ||
roll back the request, maintaining consistency. | ||
|
||
Signed-off-by: Chenyi Qiang <[email protected]> | ||
--- | ||
include/sysemu/guest-memfd-manager.h | 3 + | ||
system/guest-memfd-manager.c | 141 +++++++++++++++++++++++++++ | ||
2 files changed, 144 insertions(+) | ||
|
||
diff --git a/include/sysemu/guest-memfd-manager.h b/include/sysemu/guest-memfd-manager.h | ||
index ab8c2ba362..1cce4cde43 100644 | ||
--- a/include/sysemu/guest-memfd-manager.h | ||
+++ b/include/sysemu/guest-memfd-manager.h | ||
@@ -43,4 +43,7 @@ struct GuestMemfdManagerClass { | ||
void (*realize)(Object *gmm, MemoryRegion *mr, uint64_t region_size); | ||
}; | ||
|
||
+int guest_memfd_state_change(GuestMemfdManager *gmm, uint64_t offset, uint64_t size, | ||
+ bool shared_to_private); | ||
+ | ||
#endif | ||
diff --git a/system/guest-memfd-manager.c b/system/guest-memfd-manager.c | ||
index 7b90f26859..deb43db90b 100644 | ||
--- a/system/guest-memfd-manager.c | ||
+++ b/system/guest-memfd-manager.c | ||
@@ -243,6 +243,147 @@ static void guest_memfd_rdm_replay_discarded(const RamDiscardManager *rdm, | ||
guest_memfd_rdm_replay_discarded_cb); | ||
} | ||
|
||
+static bool guest_memfd_is_valid_range(GuestMemfdManager *gmm, | ||
+ uint64_t offset, uint64_t size) | ||
+{ | ||
+ MemoryRegion *mr = gmm->mr; | ||
+ | ||
+ g_assert(mr); | ||
+ | ||
+ uint64_t region_size = memory_region_size(mr); | ||
+ if (!QEMU_IS_ALIGNED(offset, gmm->block_size)) { | ||
+ return false; | ||
+ } | ||
+ if (offset + size < offset || !size) { | ||
+ return false; | ||
+ } | ||
+ if (offset >= region_size || offset + size > region_size) { | ||
+ return false; | ||
+ } | ||
+ return true; | ||
+} | ||
+ | ||
+static void guest_memfd_notify_discard(GuestMemfdManager *gmm, | ||
+ uint64_t offset, uint64_t size) | ||
+{ | ||
+ RamDiscardListener *rdl; | ||
+ | ||
+ QLIST_FOREACH(rdl, &gmm->rdl_list, next) { | ||
+ MemoryRegionSection tmp = *rdl->section; | ||
+ | ||
+ if (!guest_memfd_rdm_intersect_memory_section(&tmp, offset, size)) { | ||
+ continue; | ||
+ } | ||
+ | ||
+ guest_memfd_for_each_populated_range(gmm, &tmp, rdl, | ||
+ guest_memfd_notify_discard_cb); | ||
+ } | ||
+} | ||
+ | ||
+ | ||
+static int guest_memfd_notify_populate(GuestMemfdManager *gmm, | ||
+ uint64_t offset, uint64_t size) | ||
+{ | ||
+ RamDiscardListener *rdl, *rdl2; | ||
+ int ret = 0; | ||
+ | ||
+ QLIST_FOREACH(rdl, &gmm->rdl_list, next) { | ||
+ MemoryRegionSection tmp = *rdl->section; | ||
+ | ||
+ if (!guest_memfd_rdm_intersect_memory_section(&tmp, offset, size)) { | ||
+ continue; | ||
+ } | ||
+ | ||
+ ret = guest_memfd_for_each_discarded_range(gmm, &tmp, rdl, | ||
+ guest_memfd_notify_populate_cb); | ||
+ if (ret) { | ||
+ break; | ||
+ } | ||
+ } | ||
+ | ||
+ if (ret) { | ||
+ /* Notify all already-notified listeners. */ | ||
+ QLIST_FOREACH(rdl2, &gmm->rdl_list, next) { | ||
+ MemoryRegionSection tmp = *rdl2->section; | ||
+ | ||
+ if (rdl2 == rdl) { | ||
+ break; | ||
+ } | ||
+ if (!guest_memfd_rdm_intersect_memory_section(&tmp, offset, size)) { | ||
+ continue; | ||
+ } | ||
+ | ||
+ guest_memfd_for_each_discarded_range(gmm, &tmp, rdl2, | ||
+ guest_memfd_notify_discard_cb); | ||
+ } | ||
+ } | ||
+ return ret; | ||
+} | ||
+ | ||
+static bool guest_memfd_is_range_populated(GuestMemfdManager *gmm, | ||
+ uint64_t offset, uint64_t size) | ||
+{ | ||
+ const unsigned long first_bit = offset / gmm->block_size; | ||
+ const unsigned long last_bit = first_bit + (size / gmm->block_size) - 1; | ||
+ unsigned long found_bit; | ||
+ | ||
+ /* We fake a shorter bitmap to avoid searching too far. */ | ||
+ found_bit = find_next_bit(gmm->discard_bitmap, last_bit + 1, first_bit); | ||
+ return found_bit > last_bit; | ||
+} | ||
+ | ||
+static bool guest_memfd_is_range_discarded(GuestMemfdManager *gmm, | ||
+ uint64_t offset, uint64_t size) | ||
+{ | ||
+ const unsigned long first_bit = offset / gmm->block_size; | ||
+ const unsigned long last_bit = first_bit + (size / gmm->block_size) - 1; | ||
+ unsigned long found_bit; | ||
+ | ||
+ /* We fake a shorter bitmap to avoid searching too far. */ | ||
+ found_bit = find_next_zero_bit(gmm->discard_bitmap, last_bit + 1, first_bit); | ||
+ return found_bit > last_bit; | ||
+} | ||
+ | ||
+int guest_memfd_state_change(GuestMemfdManager *gmm, uint64_t offset, uint64_t size, | ||
+ bool shared_to_private) | ||
+{ | ||
+ int ret = 0; | ||
+ | ||
+ if (!guest_memfd_is_valid_range(gmm, offset, size)) { | ||
+ error_report("%s, invalid range: offset 0x%lx, size 0x%lx", | ||
+ __func__, offset, size); | ||
+ return -1; | ||
+ } | ||
+ | ||
+ if ((shared_to_private && guest_memfd_is_range_discarded(gmm, offset, size)) || | ||
+ (!shared_to_private && guest_memfd_is_range_populated(gmm, offset, size))) { | ||
+ return 0; | ||
+ } | ||
+ | ||
+ if (shared_to_private) { | ||
+ guest_memfd_notify_discard(gmm, offset, size); | ||
+ } else { | ||
+ ret = guest_memfd_notify_populate(gmm, offset, size); | ||
+ } | ||
+ | ||
+ if (!ret) { | ||
+ unsigned long first_bit = offset / gmm->block_size; | ||
+ unsigned long nbits = size / gmm->block_size; | ||
+ | ||
+ g_assert((first_bit + nbits) <= gmm->discard_bitmap_size); | ||
+ | ||
+ if (shared_to_private) { | ||
+ bitmap_set(gmm->discard_bitmap, first_bit, nbits); | ||
+ } else { | ||
+ bitmap_clear(gmm->discard_bitmap, first_bit, nbits); | ||
+ } | ||
+ | ||
+ return 0; | ||
+ } | ||
+ | ||
+ return ret; | ||
+} | ||
+ | ||
static void guest_memfd_manager_realize(Object *obj, MemoryRegion *mr, | ||
uint64_t region_size) | ||
{ | ||
-- | ||
2.34.1 | ||
|
51 changes: 51 additions & 0 deletions
51
packages/by-name/qemu-static/0005-KVM-Notify-the-state-change-via-RamDiscardManager-he.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
From 35a523caf0cdab45782fe1330e991733b8d85635 Mon Sep 17 00:00:00 2001 | ||
From: Chenyi Qiang <[email protected]> | ||
Date: Thu, 25 Jul 2024 03:21:12 -0400 | ||
Subject: [PATCH 3/6] KVM: Notify the state change via RamDiscardManager helper | ||
during shared/private conversion | ||
|
||
Once exit to userspace to convert the page from private to shared or | ||
vice versa at runtime, notify the state change via the | ||
guest_memfd_state_change() helper so that other registered subsystems | ||
like VFIO can be notified. | ||
|
||
Signed-off-by: Chenyi Qiang <[email protected]> | ||
--- | ||
accel/kvm/kvm-all.c | 7 +++++++ | ||
1 file changed, 7 insertions(+) | ||
|
||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c | ||
index acc23092e7..42e05e82c7 100644 | ||
--- a/accel/kvm/kvm-all.c | ||
+++ b/accel/kvm/kvm-all.c | ||
@@ -48,6 +48,7 @@ | ||
#include "kvm-cpus.h" | ||
#include "sysemu/dirtylimit.h" | ||
#include "qemu/range.h" | ||
+#include "sysemu/guest-memfd-manager.h" | ||
|
||
#include "hw/boards.h" | ||
#include "sysemu/stats.h" | ||
@@ -2896,6 +2897,7 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) | ||
RAMBlock *rb; | ||
void *addr; | ||
int ret = -1; | ||
+ GuestMemfdManager *gmm; | ||
|
||
trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared"); | ||
|
||
@@ -2958,6 +2960,11 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) | ||
addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; | ||
rb = qemu_ram_block_from_host(addr, false, &offset); | ||
|
||
+ gmm = GUEST_MEMFD_MANAGER(mr->rdm); | ||
+ if (gmm) { | ||
+ guest_memfd_state_change(gmm, offset, size, to_private); | ||
+ } | ||
+ | ||
if (to_private) { | ||
if (rb->page_size != qemu_real_host_page_size()) { | ||
/* | ||
-- | ||
2.34.1 | ||
|
53 changes: 53 additions & 0 deletions
53
packages/by-name/qemu-static/0006-memory-Register-the-RamDiscardManager-instance-upon-.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
From 1edf8d61a13344f820bc7f2d489386061fb560c5 Mon Sep 17 00:00:00 2001 | ||
From: Chenyi Qiang <[email protected]> | ||
Date: Thu, 25 Jul 2024 03:21:13 -0400 | ||
Subject: [PATCH 4/6] memory: Register the RamDiscardManager instance upon | ||
guest_memfd creation | ||
|
||
Instantiate a new guest_memfd_manager object and register it in the | ||
target MemoryRegion. From this point, other subsystems such as VFIO can | ||
register their listeners in guest_memfd_manager and receive conversion | ||
events through RamDiscardManager. | ||
|
||
Signed-off-by: Chenyi Qiang <[email protected]> | ||
--- | ||
system/physmem.c | 9 +++++++++ | ||
1 file changed, 9 insertions(+) | ||
|
||
diff --git a/system/physmem.c b/system/physmem.c | ||
index 94600a33ec..a10f769cb5 100644 | ||
--- a/system/physmem.c | ||
+++ b/system/physmem.c | ||
@@ -53,6 +53,7 @@ | ||
#include "sysemu/hostmem.h" | ||
#include "sysemu/hw_accel.h" | ||
#include "sysemu/xen-mapcache.h" | ||
+#include "sysemu/guest-memfd-manager.h" | ||
#include "trace.h" | ||
|
||
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE | ||
@@ -1899,6 +1900,12 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) | ||
qemu_mutex_unlock_ramlist(); | ||
goto out_free; | ||
} | ||
+ | ||
+ GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(object_new(TYPE_GUEST_MEMFD_MANAGER)); | ||
+ GuestMemfdManagerClass *gmmc = GUEST_MEMFD_MANAGER_GET_CLASS(gmm); | ||
+ g_assert(new_block->mr); | ||
+ gmmc->realize(OBJECT(gmm), new_block->mr, new_block->mr->size); | ||
+ memory_region_set_ram_discard_manager(gmm->mr, RAM_DISCARD_MANAGER(gmm)); | ||
} | ||
|
||
new_ram_size = MAX(old_ram_size, | ||
@@ -2156,6 +2163,8 @@ static void reclaim_ramblock(RAMBlock *block) | ||
|
||
if (block->guest_memfd >= 0) { | ||
close(block->guest_memfd); | ||
+ g_assert(block->mr); | ||
+ object_unref(OBJECT(block->mr->rdm)); | ||
ram_block_discard_require(false); | ||
} | ||
|
||
-- | ||
2.34.1 | ||
|
47 changes: 47 additions & 0 deletions
47
packages/by-name/qemu-static/0007-guest-memfd-Default-to-discarded-private-in-guest_me.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
From 6b692d82049689c95fa3feb6522d847b6738aa3f Mon Sep 17 00:00:00 2001 | ||
From: Chenyi Qiang <[email protected]> | ||
Date: Thu, 25 Jul 2024 03:21:14 -0400 | ||
Subject: [PATCH 5/6] guest-memfd: Default to discarded (private) in | ||
guest_memfd_manager | ||
|
||
guest_memfd was initially set to shared until the commit bd3bcf6962 | ||
("kvm/memory: Make memory type private by default if it has guest memfd | ||
backend"). To align with this change, the default state in | ||
guest_memfd_manager is set to discarded. | ||
|
||
One concern raised by this commit is the handling of the virtual BIOS. | ||
The virtual BIOS loads its image into the shared memory of guest_memfd. | ||
However, during the region_commit() stage, the memory attribute is | ||
set to private while its shared memory remains valid. This mismatch | ||
persists until the shared content is copied to the private region. | ||
Fortunately, this interval only exits during setup stage and currently, | ||
only the guest_memfd_manager is concerned with the state of the | ||
guest_memfd at that stage. For simplicity, the default bitmap in | ||
guest_memfd_manager is set to discarded (private). This is feasible | ||
because the shared content of the virtual BIOS will eventually be | ||
discarded and there are no requests to DMA access to this shared part | ||
during this period. | ||
|
||
Additionally, setting the default to private can also reduce the | ||
overhead of mapping shared pages into IOMMU by VFIO at the bootup stage. | ||
|
||
Signed-off-by: Chenyi Qiang <[email protected]> | ||
--- | ||
system/guest-memfd-manager.c | 1 + | ||
1 file changed, 1 insertion(+) | ||
|
||
diff --git a/system/guest-memfd-manager.c b/system/guest-memfd-manager.c | ||
index deb43db90b..ad1a46bac4 100644 | ||
--- a/system/guest-memfd-manager.c | ||
+++ b/system/guest-memfd-manager.c | ||
@@ -393,6 +393,7 @@ static void guest_memfd_manager_realize(Object *obj, MemoryRegion *mr, | ||
gmm->mr = mr; | ||
gmm->discard_bitmap_size = bitmap_size; | ||
gmm->discard_bitmap = bitmap_new(bitmap_size); | ||
+ bitmap_fill(gmm->discard_bitmap, bitmap_size); | ||
} | ||
|
||
static void guest_memfd_manager_init(Object *obj) | ||
-- | ||
2.34.1 | ||
|
29 changes: 29 additions & 0 deletions
29
packages/by-name/qemu-static/0008-RAMBlock-make-guest_memfd-require-coordinate-discard.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
From e9ba216a062efcfff2831edb214815cce88c80dd Mon Sep 17 00:00:00 2001 | ||
From: Chenyi Qiang <[email protected]> | ||
Date: Thu, 25 Jul 2024 03:21:15 -0400 | ||
Subject: [PATCH 6/6] RAMBlock: make guest_memfd require coordinate discard | ||
|
||
As guest_memfd is now managed by guest_memfd_manager with | ||
RamDiscardManager, only block uncoordinated discard. | ||
|
||
Signed-off-by: Chenyi Qiang <[email protected]> | ||
--- | ||
system/physmem.c | 2 +- | ||
1 file changed, 1 insertion(+), 1 deletion(-) | ||
|
||
diff --git a/system/physmem.c b/system/physmem.c | ||
index a10f769cb5..6aae81812e 100644 | ||
--- a/system/physmem.c | ||
+++ b/system/physmem.c | ||
@@ -1886,7 +1886,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) | ||
assert(kvm_enabled()); | ||
assert(new_block->guest_memfd < 0); | ||
|
||
- ret = ram_block_discard_require(true); | ||
+ ret = ram_block_coordinated_discard_require(true); | ||
if (ret < 0) { | ||
error_setg_errno(errp, -ret, | ||
"cannot set up private guest memory: discard currently blocked"); | ||
-- | ||
2.34.1 | ||
|
Oops, something went wrong.