Hi all,
Using QXL graphics on Windows 10 hvm domains causes the guest to become
extremely slow. The behaviour will happen as soon as Windows loads the
driver, so the VM will initially work normally while the OS is loading.
This was reproduced on the current master but to my knowledge it's
always been an issue for Windows 10 guests on Xen (issue is not present
on KVM).
The normal VGA display adapter uses a single vram memory region which is
registered as the Xen framebuffer with xen_register_framebuffer().
This will cause it to be mapped to the Xen physmap in xen_add_to_physmap().
In the case of one or multiple QXL devices, only the first memory region
of the first (primary) device will be registered with Xen as framebuffer
and added to physmap (since it reuses the vga code), while I think all
other memory regions will be accessed via the IOREQ server, which
probably has too much overhead and seems to be the cause of the
unresponsive guest.
We made an attempt at fixing the problem by forcing those memory regions
to be added to the Xen physmap in xen_add_to_physmap().
This solves the performance issue and seems to be enough to make
QXL work. Multi-screen, additional resolutions, etc., all seems fine.
However, there is a lot I don't understand so I am not sure the change
is sensible. Hoping to get some expert eyes on this.
I see these issues with the current change:
* Broken migration. When trying to restore the domain, this assertion
will fail for the qxl memory regions I added to the physmap (the ones
named "qxl.vram" or "qxl.vgavram"), because the address returned by
xen_replace_cache_entry() is different from what we get from
memory_region_get_ram_ptr():
qemu-system-i386: ../hw/i386/xen/xen-hvm.c:317:
xen_add_to_physmap: Assertion `p && p ==
memory_region_get_ram_ptr(mr)' failed.
If I understand correctly, we try to recreate the physmap entry for
the region by calling xen_replace_cache_entry(), which retrieves
the guest memory address using xenforeignmemory_map2(), since the VRAM
should belong to the guest and not QEMU (however this might not be the
case for the QXL memory regions?). The address we obtain should
also match the one obtained through memory_region_get_ram_ptr(),
which (I think) will come from the restored VM state.
In qxl.c, I'm only calling memory_region_get_ram_ptr(&qxl->vram_bar)
to ensure the region is mapped on the host (not doing so will cause
issues later), but I'm not using the returned pointer anywhere.
Maybe it's supposed to be saved with the VM state?
* Now that I'm using a list of regions registered as Xen framebuffer, I
don't know what to do in xen_sync_dirty_bitmap(). At the moment I
forced it to only call memory_region_set_dirty() on the "original"
framebuffer in a quick and dirty way, since it seems like we get there
only from the vga code but not from the QXL code, which seems to use
memory_region_set_dirty() instead (from qxl_set_dirty()).
* I used memory_region_set_log(<qxl_mem_region>, true, DIRTY_MEMORY_VGA)
or the regions won't be actually added in arch_xen_set_memory(). I don't
know if that's the right call, I just copied what we do for std VGA.
Hoping there isn't a fundamental issue with the approach, it would be
great to have this working!
The issue should be easy to reproduce but please let me know if I failed
to provide some important information.
Thank you,
Alessandro
---
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 7178dec85d..80dc0b2131 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -29,6 +29,7 @@
#include "qemu/main-loop.h"
#include "qemu/module.h"
#include "hw/qdev-properties.h"
+#include "hw/xen/xen.h"
#include "sysemu/runstate.h"
#include "migration/vmstate.h"
#include "trace.h"
@@ -2139,11 +2140,14 @@ static void qxl_realize_common(PCIQXLDevice
*qxl, Error **errp)
memory_region_init_alias(&qxl->vram32_bar, OBJECT(qxl), "qxl.vram32",
&qxl->vram_bar, 0, qxl->vram32_size);
+ memory_region_get_ram_ptr(&qxl->vram_bar);
memory_region_init_io(&qxl->io_bar, OBJECT(qxl), &qxl_io_ops, qxl,
"qxl-ioports", io_size);
if (qxl->have_vga) {
vga_dirty_log_start(&qxl->vga);
}
+ xen_register_framebuffer(&qxl->vram_bar);
+ memory_region_set_log(&qxl->vram_bar, true, DIRTY_MEMORY_VGA);
memory_region_set_flush_coalesced(&qxl->io_bar);
@@ -2268,6 +2272,9 @@ static void qxl_realize_secondary(PCIDevice
*dev, Error **errp)
qxl->id = qemu_console_get_index(qxl->vga.con); /* == channel_id */
qxl_realize_common(qxl, errp);
+
+ xen_register_framebuffer(&qxl->vga.vram);
+ memory_region_set_log(&qxl->vga.vram, true, DIRTY_MEMORY_VGA);
}
static int qxl_pre_save(void *opaque)
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 4f6446600c..33c7c14804 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -14,6 +14,8 @@
#include "qapi/qapi-commands-migration.h"
#include "trace.h"
+#include "exec/ramblock.h"
+
#include "hw/i386/pc.h"
#include "hw/irq.h"
#include "hw/i386/apic-msidef.h"
@@ -26,7 +28,7 @@
#include "exec/target_page.h"
static MemoryRegion ram_640k, ram_lo, ram_hi;
-static MemoryRegion *framebuffer;
+static QLIST_HEAD(, XenFramebuffer) xen_framebuffer;
static bool xen_in_migration;
/* Compatibility with older version */
@@ -175,6 +177,17 @@ static void xen_ram_init(PCMachineState *pcms,
}
}
+static MemoryRegion *get_framebuffer_by_name(const char *name) {
+ XenFramebuffer *fb = NULL;
+
+ QLIST_FOREACH(fb, &xen_framebuffer, list) {
+ if (g_strcmp0(memory_region_name(fb->mr), name) == 0) {
+ return fb->mr;
+ }
+ }
+ return NULL;
+}
+
static XenPhysmap *get_physmapping(hwaddr start_addr, ram_addr_t size,
int page_mask)
{
@@ -254,6 +267,7 @@ static int xen_add_to_physmap(XenIOState *state,
unsigned long nr_pages;
int rc = 0;
XenPhysmap *physmap = NULL;
+ XenFramebuffer *fb = NULL;
hwaddr pfn, start_gpfn;
hwaddr phys_offset = memory_region_get_ram_addr(mr);
const char *mr_name;
@@ -269,9 +283,14 @@ static int xen_add_to_physmap(XenIOState *state,
* the linear framebuffer to be that region.
* Avoid tracking any regions that is not videoram and avoid tracking
* the legacy vga region. */
- if (mr == framebuffer && start_addr > 0xbffff) {
- goto go_physmap;
+ if (start_addr > 0xbffff) {
+ QLIST_FOREACH(fb, &xen_framebuffer, list) {
+ if (mr == fb->mr) {
+ goto go_physmap;
+ }
+ }
}
+
return -1;
go_physmap:
@@ -293,6 +312,7 @@ go_physmap:
/* Now when we have a physmap entry we can replace a dummy mapping with
* a real one of guest foreign memory. */
uint8_t *p = xen_replace_cache_entry(phys_offset, start_addr, size);
+ // For qxl.vram this assert will fail
assert(p && p == memory_region_get_ram_ptr(mr));
return 0;
@@ -406,7 +426,8 @@ static void xen_sync_dirty_bitmap(XenIOState *state,
#define ENODATA ENOENT
#endif
if (errno == ENODATA) {
- memory_region_set_dirty(framebuffer, 0, size);
+ MemoryRegion *fb = get_framebuffer_by_name("vga.vram");
+ memory_region_set_dirty(fb, 0, size);
DPRINTF("xen: track_dirty_vram failed (0x" HWADDR_FMT_plx
", 0x" HWADDR_FMT_plx "): %s\n",
start_addr, start_addr + size, strerror(errno));
@@ -419,8 +440,10 @@ static void xen_sync_dirty_bitmap(XenIOState *state,
while (map != 0) {
j = ctzl(map);
map &= ~(1ul << j);
- memory_region_set_dirty(framebuffer,
- (i * width + j) * page_size, page_size);
+ MemoryRegion *fb = get_framebuffer_by_name("vga.vram");
+ memory_region_set_dirty(fb,
+ (i * width + j) * page_size, page_size);
+
};
}
}
@@ -618,6 +641,8 @@ void xen_hvm_init_pc(PCMachineState *pcms,
MemoryRegion **ram_memory)
xen_is_stubdomain = xen_check_stubdomain(state->xenstore);
+ QLIST_INIT(&xen_framebuffer);
+
QLIST_INIT(&xen_physmap);
xen_read_physmap(state);
@@ -658,7 +683,12 @@ err:
void xen_register_framebuffer(MemoryRegion *mr)
{
- framebuffer = mr;
+ XenFramebuffer *fb = NULL;
+
+ fb= g_new(XenFramebuffer, 1);
+ fb->mr = mr;
+
+ QLIST_INSERT_HEAD(&xen_framebuffer, fb, list);
}
void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
diff --git a/include/hw/xen/xen-hvm-common.h b/include/hw/xen/xen-hvm-common.h
index 3d796235dc..8eba992c55 100644
--- a/include/hw/xen/xen-hvm-common.h
+++ b/include/hw/xen/xen-hvm-common.h
@@ -43,6 +43,13 @@ static inline ioreq_t
*xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu)
#define BUFFER_IO_MAX_DELAY 100
+typedef struct XenFramebuffer {
+ MemoryRegion *mr;
+
+ QLIST_ENTRY(XenFramebuffer) list;
+} XenFramebuffer;
+
+
typedef struct XenPhysmap {
hwaddr start_addr;
ram_addr_t size;
+Xenia, Ray On Thu, 6 Mar 2025, Alessandro Muggianu wrote: > Hi all, > > Using QXL graphics on Windows 10 hvm domains causes the guest to become > extremely slow. The behaviour will happen as soon as Windows loads the > driver, so the VM will initially work normally while the OS is loading. > > This was reproduced on the current master but to my knowledge it's > always been an issue for Windows 10 guests on Xen (issue is not present > on KVM). > > The normal VGA display adapter uses a single vram memory region which is > registered as the Xen framebuffer with xen_register_framebuffer(). > This will cause it to be mapped to the Xen physmap in xen_add_to_physmap(). > > In the case of one or multiple QXL devices, only the first memory region > of the first (primary) device will be registered with Xen as framebuffer > and added to physmap (since it reuses the vga code), while I think all > other memory regions will be accessed via the IOREQ server, which > probably has too much overhead and seems to be the cause of the > unresponsive guest. > > We made an attempt at fixing the problem by forcing those memory regions > to be added to the Xen physmap in xen_add_to_physmap(). > > This solves the performance issue and seems to be enough to make > QXL work. Multi-screen, additional resolutions, etc., all seems fine. > > However, there is a lot I don't understand so I am not sure the change > is sensible. Hoping to get some expert eyes on this. > > I see these issues with the current change: > > * Broken migration. When trying to restore the domain, this assertion > will fail for the qxl memory regions I added to the physmap (the ones > named "qxl.vram" or "qxl.vgavram"), because the address returned by > xen_replace_cache_entry() is different from what we get from > memory_region_get_ram_ptr(): > > qemu-system-i386: ../hw/i386/xen/xen-hvm.c:317: > xen_add_to_physmap: Assertion `p && p == > memory_region_get_ram_ptr(mr)' failed. > > If I understand correctly, we try to recreate the physmap entry for > the region by calling xen_replace_cache_entry(), which retrieves > the guest memory address using xenforeignmemory_map2(), since the VRAM > should belong to the guest and not QEMU (however this might not be the > case for the QXL memory regions?). The address we obtain should > also match the one obtained through memory_region_get_ram_ptr(), > which (I think) will come from the restored VM state. > > In qxl.c, I'm only calling memory_region_get_ram_ptr(&qxl->vram_bar) > to ensure the region is mapped on the host (not doing so will cause > issues later), but I'm not using the returned pointer anywhere. > Maybe it's supposed to be saved with the VM state? > > * Now that I'm using a list of regions registered as Xen framebuffer, I > don't know what to do in xen_sync_dirty_bitmap(). At the moment I > forced it to only call memory_region_set_dirty() on the "original" > framebuffer in a quick and dirty way, since it seems like we get there > only from the vga code but not from the QXL code, which seems to use > memory_region_set_dirty() instead (from qxl_set_dirty()). > > * I used memory_region_set_log(<qxl_mem_region>, true, DIRTY_MEMORY_VGA) > or the regions won't be actually added in arch_xen_set_memory(). I don't > know if that's the right call, I just copied what we do for std VGA. > > Hoping there isn't a fundamental issue with the approach, it would be > great to have this working! > > The issue should be easy to reproduce but please let me know if I failed > to provide some important information. > > Thank you, > > Alessandro > > --- > > diff --git a/hw/display/qxl.c b/hw/display/qxl.c > index 7178dec85d..80dc0b2131 100644 > --- a/hw/display/qxl.c > +++ b/hw/display/qxl.c > @@ -29,6 +29,7 @@ > #include "qemu/main-loop.h" > #include "qemu/module.h" > #include "hw/qdev-properties.h" > +#include "hw/xen/xen.h" > #include "sysemu/runstate.h" > #include "migration/vmstate.h" > #include "trace.h" > @@ -2139,11 +2140,14 @@ static void qxl_realize_common(PCIQXLDevice > *qxl, Error **errp) > memory_region_init_alias(&qxl->vram32_bar, OBJECT(qxl), "qxl.vram32", > &qxl->vram_bar, 0, qxl->vram32_size); > > + memory_region_get_ram_ptr(&qxl->vram_bar); > memory_region_init_io(&qxl->io_bar, OBJECT(qxl), &qxl_io_ops, qxl, > "qxl-ioports", io_size); > if (qxl->have_vga) { > vga_dirty_log_start(&qxl->vga); > } > + xen_register_framebuffer(&qxl->vram_bar); > + memory_region_set_log(&qxl->vram_bar, true, DIRTY_MEMORY_VGA); > memory_region_set_flush_coalesced(&qxl->io_bar); > > > @@ -2268,6 +2272,9 @@ static void qxl_realize_secondary(PCIDevice > *dev, Error **errp) > qxl->id = qemu_console_get_index(qxl->vga.con); /* == channel_id */ > > qxl_realize_common(qxl, errp); > + > + xen_register_framebuffer(&qxl->vga.vram); > + memory_region_set_log(&qxl->vga.vram, true, DIRTY_MEMORY_VGA); > } > > static int qxl_pre_save(void *opaque) > diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c > index 4f6446600c..33c7c14804 100644 > --- a/hw/i386/xen/xen-hvm.c > +++ b/hw/i386/xen/xen-hvm.c > @@ -14,6 +14,8 @@ > #include "qapi/qapi-commands-migration.h" > #include "trace.h" > > +#include "exec/ramblock.h" > + > #include "hw/i386/pc.h" > #include "hw/irq.h" > #include "hw/i386/apic-msidef.h" > @@ -26,7 +28,7 @@ > #include "exec/target_page.h" > > static MemoryRegion ram_640k, ram_lo, ram_hi; > -static MemoryRegion *framebuffer; > +static QLIST_HEAD(, XenFramebuffer) xen_framebuffer; > static bool xen_in_migration; > > /* Compatibility with older version */ > @@ -175,6 +177,17 @@ static void xen_ram_init(PCMachineState *pcms, > } > } > > +static MemoryRegion *get_framebuffer_by_name(const char *name) { > + XenFramebuffer *fb = NULL; > + > + QLIST_FOREACH(fb, &xen_framebuffer, list) { > + if (g_strcmp0(memory_region_name(fb->mr), name) == 0) { > + return fb->mr; > + } > + } > + return NULL; > +} > + > static XenPhysmap *get_physmapping(hwaddr start_addr, ram_addr_t size, > int page_mask) > { > @@ -254,6 +267,7 @@ static int xen_add_to_physmap(XenIOState *state, > unsigned long nr_pages; > int rc = 0; > XenPhysmap *physmap = NULL; > + XenFramebuffer *fb = NULL; > hwaddr pfn, start_gpfn; > hwaddr phys_offset = memory_region_get_ram_addr(mr); > const char *mr_name; > @@ -269,9 +283,14 @@ static int xen_add_to_physmap(XenIOState *state, > * the linear framebuffer to be that region. > * Avoid tracking any regions that is not videoram and avoid tracking > * the legacy vga region. */ > - if (mr == framebuffer && start_addr > 0xbffff) { > - goto go_physmap; > + if (start_addr > 0xbffff) { > + QLIST_FOREACH(fb, &xen_framebuffer, list) { > + if (mr == fb->mr) { > + goto go_physmap; > + } > + } > } > + > return -1; > > go_physmap: > @@ -293,6 +312,7 @@ go_physmap: > /* Now when we have a physmap entry we can replace a dummy mapping with > * a real one of guest foreign memory. */ > uint8_t *p = xen_replace_cache_entry(phys_offset, start_addr, size); > + // For qxl.vram this assert will fail > assert(p && p == memory_region_get_ram_ptr(mr)); > > return 0; > @@ -406,7 +426,8 @@ static void xen_sync_dirty_bitmap(XenIOState *state, > #define ENODATA ENOENT > #endif > if (errno == ENODATA) { > - memory_region_set_dirty(framebuffer, 0, size); > + MemoryRegion *fb = get_framebuffer_by_name("vga.vram"); > + memory_region_set_dirty(fb, 0, size); > DPRINTF("xen: track_dirty_vram failed (0x" HWADDR_FMT_plx > ", 0x" HWADDR_FMT_plx "): %s\n", > start_addr, start_addr + size, strerror(errno)); > @@ -419,8 +440,10 @@ static void xen_sync_dirty_bitmap(XenIOState *state, > while (map != 0) { > j = ctzl(map); > map &= ~(1ul << j); > - memory_region_set_dirty(framebuffer, > - (i * width + j) * page_size, page_size); > + MemoryRegion *fb = get_framebuffer_by_name("vga.vram"); > + memory_region_set_dirty(fb, > + (i * width + j) * page_size, page_size); > + > }; > } > } > @@ -618,6 +641,8 @@ void xen_hvm_init_pc(PCMachineState *pcms, > MemoryRegion **ram_memory) > > xen_is_stubdomain = xen_check_stubdomain(state->xenstore); > > + QLIST_INIT(&xen_framebuffer); > + > QLIST_INIT(&xen_physmap); > xen_read_physmap(state); > > @@ -658,7 +683,12 @@ err: > > void xen_register_framebuffer(MemoryRegion *mr) > { > - framebuffer = mr; > + XenFramebuffer *fb = NULL; > + > + fb= g_new(XenFramebuffer, 1); > + fb->mr = mr; > + > + QLIST_INSERT_HEAD(&xen_framebuffer, fb, list); > } > > void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) > diff --git a/include/hw/xen/xen-hvm-common.h b/include/hw/xen/xen-hvm-common.h > index 3d796235dc..8eba992c55 100644 > --- a/include/hw/xen/xen-hvm-common.h > +++ b/include/hw/xen/xen-hvm-common.h > @@ -43,6 +43,13 @@ static inline ioreq_t > *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu) > > #define BUFFER_IO_MAX_DELAY 100 > > +typedef struct XenFramebuffer { > + MemoryRegion *mr; > + > + QLIST_ENTRY(XenFramebuffer) list; > +} XenFramebuffer; > + > + > typedef struct XenPhysmap { > hwaddr start_addr; > ram_addr_t size; >
© 2016 - 2025 Red Hat, Inc.