[XEN PATCH 2/3] libxl/dm: Manage pci slot assignment for Intel IGD passthrough

Chuck Zmudzinski posted 3 patches 1 year, 10 months ago
There is a newer version of this series
[XEN PATCH 2/3] libxl/dm: Manage pci slot assignment for Intel IGD passthrough
Posted by Chuck Zmudzinski 1 year, 10 months ago
By default, except for the ich9-usb-uhci device which libxl assigns to
slot 29 (0xid), libxl defers to qemu upstream's automatic slot assignment
process, which is simply to assign each emulated device to the next
available slot on the pci bus. With this default behavior, libxl and
qemu will not configure the Intel IGD correctly. Specifically, the Intel
IGD must be assigned to slot 2, but the current default behavior is to
assign one of the emulated devices to slot 2.

With this patch libxl uses qemu command line options to specify the slot
address of each pci device in the guest when gfx_passthru is enabled
for the Intel IGD. It uses the same simple algorithm of assigning the
next available slot, except that it starts with slot 3 instead of slot 2
for the emulated devices. This process of slot assignment aims to
simulate the behavior of existing machines as much as possible.

The default behavior (when igd gfx_passthru is disabled) of letting qemu
manage the slot addresses of emulated pci devices is preserved. The
patch also preserves the special treatment for the ich9 usb2 controller
(ich9-usb-ehci1) that libxl currently assigns to slot.function 29.7 and
the associated ich9-usb-uhciN devices to slot 29 (0x1d).

For future maintainance of this code, it is important that pci devices
that are managed by the libxl__build_device_model_args_new function
follow the logic of this patch and use the new local counter next_slot
to assign the slot address instead of letting upstream qemu assign the
slot if the guest is configured for Intel IGD passthrough, and preserve
the current behavior of letting qemu assign the slot address otherwise.

Signed-off-by: Chuck Zmudzinski <brchuckz@aol.com>
---
The diff of this patch is easier to review if it is generated using
the -b (aka --ignore-space-change) option of diff/git-diff to filter
out some of the changes that are only due to white space.

This patch is difficult to verify for correctness without testing all
the devices that could be added by the libxl__build_device_model_args_new
function. There are 12 places where the addr=%x option needed to be
added to the arguments of the "-device" qemu option that adds an
emulated pci device which corresponds to at least 12 different devices
that could be affected by this patch if the patch contains mistakes
that the compiler did not notice. One mistake the compiler would not
notice is a missing next_slot++; statement that would result in qemu
trying to assign a device to a slot that is already assigned, which
is an error in qemu. I did enough tests to find some mistakes in the
patch which of course I fixed before submitting the patch. So I cannot
guarantee that there are not any other mistakes in the patch because
I don't have the resources to do the necessary testing of the many
possible configurations that could be affected by this patch.

 tools/libs/light/libxl_dm.c | 168 ++++++++++++++++++++++++++++++------
 1 file changed, 141 insertions(+), 27 deletions(-)

diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
index 2048815611..2720b5d4d0 100644
--- a/tools/libs/light/libxl_dm.c
+++ b/tools/libs/light/libxl_dm.c
@@ -1205,6 +1205,20 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
     const char *path, *chardev;
     bool is_stubdom = libxl_defbool_val(b_info->device_model_stubdomain);
     int rc;
+    int next_slot;
+    bool configure_pci_for_igd = false;
+    /*
+     * next_slot is only used when we need to configure the pci
+     * slots for the Intel IGD. Slot 2 will be for the Intel IGD.
+     */
+    next_slot = 3;
+    if (libxl_defbool_val(b_info->u.hvm.gfx_passthru)) {
+        enum libxl_gfx_passthru_kind gfx_passthru_kind =
+                        libxl__detect_gfx_passthru_kind(gc, guest_config);
+        if (gfx_passthru_kind == LIBXL_GFX_PASSTHRU_KIND_IGD) {
+            configure_pci_for_igd = true;
+        }
+    }
 
     dm_args = flexarray_make(gc, 16, 1);
     dm_envs = flexarray_make(gc, 16, 1);
@@ -1372,6 +1386,20 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
 
         if (b_info->cmdline)
             flexarray_vappend(dm_args, "-append", b_info->cmdline, NULL);
+        /*
+         * When the Intel IGD is configured for primary graphics
+         * passthrough, we need to manually add the xen platform
+         * device because the QEMU machine type is "pc". Add it first to
+         * simulate the behavior of the "xenfv" QEMU machine type which
+         * always adds the xen platform device first. But in this case it
+         * will be at slot 3 because we are reserving slot 2 for the IGD.
+         */
+        if (configure_pci_for_igd &&
+            libxl_defbool_val(b_info->u.hvm.xen_platform_pci)) {
+            flexarray_append_pair(dm_args, "-device",
+                        GCSPRINTF("xen-platform,addr=%x", next_slot));
+            next_slot++;
+        }
 
         /* Find out early if one of the disk is on the scsi bus and add a scsi
          * controller. This is done ahead to keep the same behavior as previous
@@ -1381,7 +1409,14 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
                 continue;
             }
             if (strncmp(disks[i].vdev, "sd", 2) == 0) {
-                flexarray_vappend(dm_args, "-device", "lsi53c895a", NULL);
+                if (configure_pci_for_igd) {
+                    flexarray_vappend(dm_args, "-device",
+                        GCSPRINTF("lsi53c895a,addr=%x", next_slot), NULL);
+                    next_slot++;
+                } else {
+                    flexarray_vappend(dm_args, "-device", "lsi53c895a",
+                                      NULL);
+                }
                 break;
             }
         }
@@ -1436,31 +1471,67 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
             flexarray_append(dm_args, "-spice");
             flexarray_append(dm_args, spiceoptions);
             if (libxl_defbool_val(b_info->u.hvm.spice.vdagent)) {
-                flexarray_vappend(dm_args, "-device", "virtio-serial",
-                    "-chardev", "spicevmc,id=vdagent,name=vdagent", "-device",
-                    "virtserialport,chardev=vdagent,name=com.redhat.spice.0",
-                    NULL);
+                if (configure_pci_for_igd) {
+                    flexarray_vappend(dm_args, "-device",
+                        GCSPRINTF("virtio-serial,addr=%x", next_slot),
+                        "-chardev", "spicevmc,id=vdagent,name=vdagent",
+                        "-device",
+                        "virtserialport,chardev=vdagent,name=com.redhat.spice.0",
+                        NULL);
+                next_slot++;
+                } else {
+                    flexarray_vappend(dm_args, "-device", "virtio-serial",
+                        "-chardev", "spicevmc,id=vdagent,name=vdagent",
+                        "-device",
+                        "virtserialport,chardev=vdagent,name=com.redhat.spice.0",
+                        NULL);
+                }
             }
         }
 
         switch (b_info->u.hvm.vga.kind) {
         case LIBXL_VGA_INTERFACE_TYPE_STD:
-            flexarray_append_pair(dm_args, "-device",
-                GCSPRINTF("VGA,vgamem_mb=%d",
-                libxl__sizekb_to_mb(b_info->video_memkb)));
+            if (configure_pci_for_igd) {
+                flexarray_append_pair(dm_args, "-device",
+                    GCSPRINTF("VGA,addr=%x,vgamem_mb=%d", next_slot,
+                    libxl__sizekb_to_mb(b_info->video_memkb)));
+                next_slot++;
+            } else {
+                flexarray_append_pair(dm_args, "-device",
+                    GCSPRINTF("VGA,vgamem_mb=%d",
+                    libxl__sizekb_to_mb(b_info->video_memkb)));
+            }
             break;
         case LIBXL_VGA_INTERFACE_TYPE_CIRRUS:
-            flexarray_append_pair(dm_args, "-device",
-                GCSPRINTF("cirrus-vga,vgamem_mb=%d",
-                libxl__sizekb_to_mb(b_info->video_memkb)));
+            if (configure_pci_for_igd) {
+                flexarray_append_pair(dm_args, "-device",
+                    GCSPRINTF("cirrus-vga,addr=%x,vgamem_mb=%d", next_slot,
+                    libxl__sizekb_to_mb(b_info->video_memkb)));
+                next_slot++;
+            } else {
+                flexarray_append_pair(dm_args, "-device",
+                    GCSPRINTF("cirrus-vga,vgamem_mb=%d",
+                    libxl__sizekb_to_mb(b_info->video_memkb)));
+            }
             break;
         case LIBXL_VGA_INTERFACE_TYPE_NONE:
             break;
         case LIBXL_VGA_INTERFACE_TYPE_QXL:
             /* QXL have 2 ram regions, ram and vram */
-            flexarray_append_pair(dm_args, "-device",
-                GCSPRINTF("qxl-vga,vram_size_mb=%"PRIu64",ram_size_mb=%"PRIu64,
-                (b_info->video_memkb/2/1024), (b_info->video_memkb/2/1024) ) );
+            if (configure_pci_for_igd) {
+                flexarray_append_pair(dm_args, "-device",
+                    GCSPRINTF("qxl-vga,addr=%x,vram_size_mb=%"PRIu64
+                    ",ram_size_mb=%"PRIu64, next_slot,
+                    (b_info->video_memkb/2/1024),
+                    (b_info->video_memkb/2/1024) ) );
+                next_slot++;
+            } else {
+                flexarray_append_pair(dm_args, "-device",
+                    GCSPRINTF("qxl-vga,vram_size_mb=%"PRIu64
+                    ",ram_size_mb=%"PRIu64,
+                    (b_info->video_memkb/2/1024),
+                    (b_info->video_memkb/2/1024) ) );
+            }
             break;
         default:
             LOGD(ERROR, guest_domid, "Invalid emulated video card specified");
@@ -1496,8 +1567,15 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
         } else if (b_info->u.hvm.usbversion) {
             switch (b_info->u.hvm.usbversion) {
             case 1:
-                flexarray_vappend(dm_args,
-                    "-device", "piix3-usb-uhci,id=usb", NULL);
+                if (configure_pci_for_igd) {
+                    flexarray_vappend(dm_args, "-device",
+                        GCSPRINTF("piix3-usb-uhci,addr=%x,id=usb",
+                                  next_slot), NULL);
+                    next_slot++;
+                } else {
+                    flexarray_vappend(dm_args,
+                        "-device", "piix3-usb-uhci,id=usb", NULL);
+                }
                 break;
             case 2:
                 flexarray_append_pair(dm_args, "-device",
@@ -1509,8 +1587,15 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
                         i, 2*(i-1), i-1));
                 break;
             case 3:
-                flexarray_vappend(dm_args,
-                    "-device", "nec-usb-xhci,id=usb", NULL);
+                if (configure_pci_for_igd) {
+                    flexarray_vappend(dm_args, "-device",
+                        GCSPRINTF("nec-usb-xhci,addr=%x,id=usb",
+                                  next_slot), NULL);
+                    next_slot++;
+                } else {
+                    flexarray_vappend(dm_args,
+                        "-device", "nec-usb-xhci,id=usb", NULL);
+                }
                 break;
             default:
                 LOGD(ERROR, guest_domid, "usbversion parameter is invalid, "
@@ -1542,8 +1627,15 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
 
             switch (soundhw) {
             case LIBXL__QEMU_SOUNDHW_HDA:
-                flexarray_vappend(dm_args, "-device", "intel-hda",
-                                  "-device", "hda-duplex", NULL);
+                if (configure_pci_for_igd) {
+                    flexarray_vappend(dm_args, "-device",
+                        GCSPRINTF("intel-hda,addr=%x", next_slot),
+                                   "-device", "hda-duplex", NULL);
+                    next_slot++;
+                } else {
+                    flexarray_vappend(dm_args, "-device", "intel-hda",
+                                      "-device", "hda-duplex", NULL);
+                }
                 break;
             default:
                 flexarray_append_pair(dm_args, "-device",
@@ -1573,10 +1665,18 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
                                                 guest_domid, nics[i].devid,
                                                 LIBXL_NIC_TYPE_VIF_IOEMU);
                 flexarray_append(dm_args, "-device");
-                flexarray_append(dm_args,
-                   GCSPRINTF("%s,id=nic%d,netdev=net%d,mac=%s",
-                             nics[i].model, nics[i].devid,
-                             nics[i].devid, smac));
+                if (configure_pci_for_igd) {
+                    flexarray_append(dm_args,
+                        GCSPRINTF("%s,addr=%x,id=nic%d,netdev=net%d,mac=%s",
+                                  nics[i].model, next_slot, nics[i].devid,
+                                  nics[i].devid, smac));
+                    next_slot++;
+                } else {
+                    flexarray_append(dm_args,
+                        GCSPRINTF("%s,id=nic%d,netdev=net%d,mac=%s",
+                                  nics[i].model, nics[i].devid,
+                                  nics[i].devid, smac));
+                }
                 flexarray_append(dm_args, "-netdev");
                 flexarray_append(dm_args,
                                  GCSPRINTF("type=tap,id=net%d,ifname=%s,"
@@ -1865,8 +1965,15 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
     if (b_info->type == LIBXL_DOMAIN_TYPE_HVM) {
         flexarray_append_pair(dm_envs, "XEN_DOMAIN_ID", GCSPRINTF("%d", guest_domid));
 
-        if (b_info->u.hvm.hdtype == LIBXL_HDTYPE_AHCI)
-            flexarray_append_pair(dm_args, "-device", "ahci,id=ahci0");
+        if (b_info->u.hvm.hdtype == LIBXL_HDTYPE_AHCI) {
+            if (configure_pci_for_igd) {
+                flexarray_append_pair(dm_args, "-device",
+                            GCSPRINTF("ahci,addr=%x,id=ahci0", next_slot));
+                next_slot++;
+            } else {
+                flexarray_append_pair(dm_args, "-device", "ahci,id=ahci0");
+            }
+        }
         for (i = 0; i < num_disks; i++) {
             int disk, part;
             int dev_number =
@@ -2043,7 +2150,14 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
         switch (b_info->u.hvm.vendor_device) {
         case LIBXL_VENDOR_DEVICE_XENSERVER:
             flexarray_append(dm_args, "-device");
-            flexarray_append(dm_args, "xen-pvdevice,device-id=0xc000");
+            if (configure_pci_for_igd) {
+                flexarray_append(dm_args,
+                GCSPRINTF("xen-pvdevice,addr=%x,device-id=0xc000",
+                           next_slot));
+                next_slot++;
+            } else {
+                flexarray_append(dm_args, "xen-pvdevice,device-id=0xc000");
+            }
             break;
         default:
             break;
-- 
2.39.0