Currently, we try to remap all rings whenever we add a single new memory
region. That doesn't quite make sense, because we already map rings when
setting the ring address, and panic if that goes wrong. Likely, that
handling was simply copied from set_mem_table code, where we actually
have to remap all rings.
Remapping all rings might require us to walk quite a lot of memory
regions to perform the address translations. Ideally, we'd simply remove
that remapping.
However, let's be a bit careful. There might be some weird corner cases
where we might temporarily remove a single memory region (e.g., resize
it), that would have worked for now. Further, a ring might be located on
hotplugged memory, and as the VM reboots, we might unplug that memory, to
hotplug memory before resetting the ring addresses.
So let's unmap affected rings as we remove a memory region, and try
dynamically mapping the ring again when required.
Signed-off-by: David Hildenbrand <david@redhat.com>
---
subprojects/libvhost-user/libvhost-user.c | 107 ++++++++++++++++------
1 file changed, 78 insertions(+), 29 deletions(-)
diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c
index febeb2eb89..738e84ab63 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -283,10 +283,75 @@ vu_remove_all_mem_regs(VuDev *dev)
dev->nregions = 0;
}
+static bool
+map_ring(VuDev *dev, VuVirtq *vq)
+{
+ vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr);
+ vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr);
+ vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr);
+
+ DPRINT("Setting virtq addresses:\n");
+ DPRINT(" vring_desc at %p\n", vq->vring.desc);
+ DPRINT(" vring_used at %p\n", vq->vring.used);
+ DPRINT(" vring_avail at %p\n", vq->vring.avail);
+
+ return !(vq->vring.desc && vq->vring.used && vq->vring.avail);
+}
+
static bool
vu_is_vq_usable(VuDev *dev, VuVirtq *vq)
{
- return likely(!dev->broken) && likely(vq->vring.avail);
+ if (unlikely(dev->broken)) {
+ return false;
+ }
+
+ if (likely(vq->vring.avail)) {
+ return true;
+ }
+
+ /*
+ * In corner cases, we might temporarily remove a memory region that
+ * mapped a ring. When removing a memory region we make sure to
+ * unmap any rings that would be impacted. Let's try to remap if we
+ * already succeeded mapping this ring once.
+ */
+ if (!vq->vra.desc_user_addr || !vq->vra.used_user_addr ||
+ !vq->vra.avail_user_addr) {
+ return false;
+ }
+ if (map_ring(dev, vq)) {
+ vu_panic(dev, "remapping queue on access");
+ return false;
+ }
+ return true;
+}
+
+static void
+unmap_rings(VuDev *dev, VuDevRegion *r)
+{
+ int i;
+
+ for (i = 0; i < dev->max_queues; i++) {
+ VuVirtq *vq = &dev->vq[i];
+ const uintptr_t desc = (uintptr_t)vq->vring.desc;
+ const uintptr_t used = (uintptr_t)vq->vring.used;
+ const uintptr_t avail = (uintptr_t)vq->vring.avail;
+
+ if (desc < r->mmap_addr || desc >= r->mmap_addr + r->size) {
+ continue;
+ }
+ if (used < r->mmap_addr || used >= r->mmap_addr + r->size) {
+ continue;
+ }
+ if (avail < r->mmap_addr || avail >= r->mmap_addr + r->size) {
+ continue;
+ }
+
+ DPRINT("Unmapping rings of queue %d\n", i);
+ vq->vring.desc = NULL;
+ vq->vring.used = NULL;
+ vq->vring.avail = NULL;
+ }
}
static size_t
@@ -784,21 +849,6 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
return false;
}
-static bool
-map_ring(VuDev *dev, VuVirtq *vq)
-{
- vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr);
- vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr);
- vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr);
-
- DPRINT("Setting virtq addresses:\n");
- DPRINT(" vring_desc at %p\n", vq->vring.desc);
- DPRINT(" vring_used at %p\n", vq->vring.used);
- DPRINT(" vring_avail at %p\n", vq->vring.avail);
-
- return !(vq->vring.desc && vq->vring.used && vq->vring.avail);
-}
-
static bool
generate_faults(VuDev *dev) {
unsigned int i;
@@ -882,7 +932,6 @@ generate_faults(VuDev *dev) {
static bool
vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
- int i;
VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m;
if (vmsg->fd_num != 1) {
@@ -928,19 +977,9 @@ vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
vmsg->fd_num = 0;
DPRINT("Successfully added new region in postcopy\n");
return true;
- } else {
- for (i = 0; i < dev->max_queues; i++) {
- if (dev->vq[i].vring.desc) {
- if (map_ring(dev, &dev->vq[i])) {
- vu_panic(dev, "remapping queue %d for new memory region",
- i);
- }
- }
- }
-
- DPRINT("Successfully added new region\n");
- return false;
}
+ DPRINT("Successfully added new region\n");
+ return false;
}
static inline bool reg_equal(VuDevRegion *vudev_reg,
@@ -993,6 +1032,16 @@ vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
return false;
}
+ /*
+ * There might be valid cases where we temporarily remove memory regions
+ * to readd them again, or remove memory regions and don't use the rings
+ * anymore before we set the ring addresses and restart the device.
+ *
+ * Unmap all affected rings, remapping them on demand later. This should
+ * be a corner case.
+ */
+ unmap_rings(dev, r);
+
munmap((void *)(uintptr_t)r->mmap_addr, r->size + r->mmap_offset);
idx = r - dev->regions;
--
2.43.0
Someone else with more knowledge of the VQ mapping code should also review. On Fri, Feb 2, 2024 at 4:55 PM David Hildenbrand <david@redhat.com> wrote: > > Currently, we try to remap all rings whenever we add a single new memory > region. That doesn't quite make sense, because we already map rings when > setting the ring address, and panic if that goes wrong. Likely, that > handling was simply copied from set_mem_table code, where we actually > have to remap all rings. > > Remapping all rings might require us to walk quite a lot of memory > regions to perform the address translations. Ideally, we'd simply remove > that remapping. > > However, let's be a bit careful. There might be some weird corner cases > where we might temporarily remove a single memory region (e.g., resize > it), that would have worked for now. Further, a ring might be located on > hotplugged memory, and as the VM reboots, we might unplug that memory, to > hotplug memory before resetting the ring addresses. > > So let's unmap affected rings as we remove a memory region, and try > dynamically mapping the ring again when required. > > Signed-off-by: David Hildenbrand <david@redhat.com> Acked-by: Raphael Norwitz <raphael@enfabrica.net> > --- > subprojects/libvhost-user/libvhost-user.c | 107 ++++++++++++++++------ > 1 file changed, 78 insertions(+), 29 deletions(-) > > diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c > index febeb2eb89..738e84ab63 100644 > --- a/subprojects/libvhost-user/libvhost-user.c > +++ b/subprojects/libvhost-user/libvhost-user.c > @@ -283,10 +283,75 @@ vu_remove_all_mem_regs(VuDev *dev) > dev->nregions = 0; > } > > +static bool > +map_ring(VuDev *dev, VuVirtq *vq) > +{ > + vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr); > + vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr); > + vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr); > + > + DPRINT("Setting virtq addresses:\n"); > + DPRINT(" vring_desc at %p\n", vq->vring.desc); > + DPRINT(" vring_used at %p\n", vq->vring.used); > + DPRINT(" vring_avail at %p\n", vq->vring.avail); > + > + return !(vq->vring.desc && vq->vring.used && vq->vring.avail); > +} > + > static bool Consider changing the function name to indicate that it may actually map a vq? Maybe vu_maybe_map_vq()? > vu_is_vq_usable(VuDev *dev, VuVirtq *vq) > { > - return likely(!dev->broken) && likely(vq->vring.avail); > + if (unlikely(dev->broken)) { > + return false; > + } > + > + if (likely(vq->vring.avail)) { > + return true; > + } > + > + /* > + * In corner cases, we might temporarily remove a memory region that > + * mapped a ring. When removing a memory region we make sure to > + * unmap any rings that would be impacted. Let's try to remap if we > + * already succeeded mapping this ring once. > + */ > + if (!vq->vra.desc_user_addr || !vq->vra.used_user_addr || > + !vq->vra.avail_user_addr) { > + return false; > + } > + if (map_ring(dev, vq)) { > + vu_panic(dev, "remapping queue on access"); > + return false; > + } > + return true; > +} > + > +static void > +unmap_rings(VuDev *dev, VuDevRegion *r) > +{ > + int i; > + > + for (i = 0; i < dev->max_queues; i++) { > + VuVirtq *vq = &dev->vq[i]; > + const uintptr_t desc = (uintptr_t)vq->vring.desc; > + const uintptr_t used = (uintptr_t)vq->vring.used; > + const uintptr_t avail = (uintptr_t)vq->vring.avail; > + > + if (desc < r->mmap_addr || desc >= r->mmap_addr + r->size) { > + continue; > + } > + if (used < r->mmap_addr || used >= r->mmap_addr + r->size) { > + continue; > + } > + if (avail < r->mmap_addr || avail >= r->mmap_addr + r->size) { > + continue; > + } > + > + DPRINT("Unmapping rings of queue %d\n", i); > + vq->vring.desc = NULL; > + vq->vring.used = NULL; > + vq->vring.avail = NULL; > + } > } > > static size_t > @@ -784,21 +849,6 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg) > return false; > } > > -static bool > -map_ring(VuDev *dev, VuVirtq *vq) > -{ > - vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr); > - vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr); > - vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr); > - > - DPRINT("Setting virtq addresses:\n"); > - DPRINT(" vring_desc at %p\n", vq->vring.desc); > - DPRINT(" vring_used at %p\n", vq->vring.used); > - DPRINT(" vring_avail at %p\n", vq->vring.avail); > - > - return !(vq->vring.desc && vq->vring.used && vq->vring.avail); > -} > - > static bool > generate_faults(VuDev *dev) { > unsigned int i; > @@ -882,7 +932,6 @@ generate_faults(VuDev *dev) { > > static bool > vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { > - int i; > VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m; > > if (vmsg->fd_num != 1) { > @@ -928,19 +977,9 @@ vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { > vmsg->fd_num = 0; > DPRINT("Successfully added new region in postcopy\n"); > return true; > - } else { > - for (i = 0; i < dev->max_queues; i++) { > - if (dev->vq[i].vring.desc) { > - if (map_ring(dev, &dev->vq[i])) { > - vu_panic(dev, "remapping queue %d for new memory region", > - i); > - } > - } > - } > - > - DPRINT("Successfully added new region\n"); > - return false; > } > + DPRINT("Successfully added new region\n"); > + return false; > } > > static inline bool reg_equal(VuDevRegion *vudev_reg, > @@ -993,6 +1032,16 @@ vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { > return false; > } > > + /* > + * There might be valid cases where we temporarily remove memory regions > + * to readd them again, or remove memory regions and don't use the rings > + * anymore before we set the ring addresses and restart the device. > + * > + * Unmap all affected rings, remapping them on demand later. This should > + * be a corner case. > + */ > + unmap_rings(dev, r); > + > munmap((void *)(uintptr_t)r->mmap_addr, r->size + r->mmap_offset); > > idx = r - dev->regions; > -- > 2.43.0 > >
On 04.02.24 03:15, Raphael Norwitz wrote: > Someone else with more knowledge of the VQ mapping code should also review. > > On Fri, Feb 2, 2024 at 4:55 PM David Hildenbrand <david@redhat.com> wrote: >> >> Currently, we try to remap all rings whenever we add a single new memory >> region. That doesn't quite make sense, because we already map rings when >> setting the ring address, and panic if that goes wrong. Likely, that >> handling was simply copied from set_mem_table code, where we actually >> have to remap all rings. >> >> Remapping all rings might require us to walk quite a lot of memory >> regions to perform the address translations. Ideally, we'd simply remove >> that remapping. >> >> However, let's be a bit careful. There might be some weird corner cases >> where we might temporarily remove a single memory region (e.g., resize >> it), that would have worked for now. Further, a ring might be located on >> hotplugged memory, and as the VM reboots, we might unplug that memory, to >> hotplug memory before resetting the ring addresses. >> >> So let's unmap affected rings as we remove a memory region, and try >> dynamically mapping the ring again when required. >> >> Signed-off-by: David Hildenbrand <david@redhat.com> > > Acked-by: Raphael Norwitz <raphael@enfabrica.net> Thanks! > >> --- >> subprojects/libvhost-user/libvhost-user.c | 107 ++++++++++++++++------ >> 1 file changed, 78 insertions(+), 29 deletions(-) >> >> diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c >> index febeb2eb89..738e84ab63 100644 >> --- a/subprojects/libvhost-user/libvhost-user.c >> +++ b/subprojects/libvhost-user/libvhost-user.c >> @@ -283,10 +283,75 @@ vu_remove_all_mem_regs(VuDev *dev) >> dev->nregions = 0; >> } >> >> +static bool >> +map_ring(VuDev *dev, VuVirtq *vq) >> +{ >> + vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr); >> + vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr); >> + vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr); >> + >> + DPRINT("Setting virtq addresses:\n"); >> + DPRINT(" vring_desc at %p\n", vq->vring.desc); >> + DPRINT(" vring_used at %p\n", vq->vring.used); >> + DPRINT(" vring_avail at %p\n", vq->vring.avail); >> + >> + return !(vq->vring.desc && vq->vring.used && vq->vring.avail); >> +} >> + >> static bool > > Consider changing the function name to indicate that it may actually map a vq? > > Maybe vu_maybe_map_vq()? I don't think that would be really better. It's an implementation detial that we try to recover in these corner cases by remapping the rings. In the majority of all cases this function will simply check whether the device is broken and the vring was set up properly (which usually implies mapping the rings). So I think in the caller: if (!vu_is_vq_usable()) { return; } is easier to get than: if (!vu_maybe_map_vq()) { return; } Thanks! -- Cheers, David / dhildenb
© 2016 - 2024 Red Hat, Inc.