The BDSM register is mirrored into MMIO space at least for gen 11 and
later devices. Unfortunately, the Windows driver reads the register
value from MMIO space instead of PCI config space for those devices [1].
Therefore, we either have to keep a 1:1 mapping for the host and guest
address or we have to emulate the MMIO register too. Using the igd in
legacy mode is already hard due to it's many constraints. Keeping a 1:1
mapping may not work in all cases and makes it even harder to use. An
MMIO emulation has to trap the whole MMIO page. This makes accesses to
this page slower compared to using second level address translation.
Nevertheless, it doesn't have any constraints and I haven't noticed any
performance degradation yet making it a better solution.
[1] https://github.com/projectacrn/acrn-hypervisor/blob/5c351bee0f6ae46250eefc07f44b4a31e770f3cf/devicemodel/hw/pci/passthrough.c#L650-L653
Signed-off-by: Corvin Köhne <c.koehne@beckhoff.com>
---
hw/vfio/igd.c | 97 ++++++++++++++++++++++++++++++++++++++++++++
hw/vfio/pci-quirks.c | 1 +
hw/vfio/pci.h | 1 +
3 files changed, 99 insertions(+)
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index 0b6533bbf7..863b58565e 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -374,6 +374,103 @@ static const MemoryRegionOps vfio_igd_index_quirk = {
.endianness = DEVICE_LITTLE_ENDIAN,
};
+#define IGD_BDSM_MMIO_OFFSET 0x1080C0
+
+static uint64_t vfio_igd_quirk_bdsm_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ VFIOPCIDevice *vdev = opaque;
+ uint64_t offset;
+
+ offset = IGD_BDSM_GEN11 + addr;
+
+ switch (size) {
+ case 1:
+ return pci_get_byte(vdev->pdev.config + offset);
+ case 2:
+ return le16_to_cpu(pci_get_word(vdev->pdev.config + offset));
+ case 4:
+ return le32_to_cpu(pci_get_long(vdev->pdev.config + offset));
+ case 8:
+ return le64_to_cpu(pci_get_quad(vdev->pdev.config + offset));
+ default:
+ hw_error("igd: unsupported read size, %u bytes", size);
+ break;
+ }
+
+ return 0;
+}
+
+static void vfio_igd_quirk_bdsm_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIOPCIDevice *vdev = opaque;
+ uint64_t offset;
+
+ offset = IGD_BDSM_GEN11 + addr;
+
+ switch (size) {
+ case 1:
+ pci_set_byte(vdev->pdev.config + offset, data);
+ break;
+ case 2:
+ pci_set_word(vdev->pdev.config + offset, data);
+ break;
+ case 4:
+ pci_set_long(vdev->pdev.config + offset, data);
+ break;
+ case 8:
+ pci_set_quad(vdev->pdev.config + offset, data);
+ break;
+ default:
+ hw_error("igd: unsupported read size, %u bytes", size);
+ break;
+ }
+}
+
+static const MemoryRegionOps vfio_igd_bdsm_quirk = {
+ .read = vfio_igd_quirk_bdsm_read,
+ .write = vfio_igd_quirk_bdsm_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOQuirk *quirk;
+ int gen;
+
+ /*
+ * This must be an Intel VGA device at address 00:02.0 for us to even
+ * consider enabling legacy mode. Some driver have dependencies on the PCI
+ * bus address.
+ */
+ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
+ !vfio_is_vga(vdev) || nr != 0 ||
+ &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
+ 0, PCI_DEVFN(0x2, 0))) {
+ return;
+ }
+
+ /*
+ * Only on IGD devices of gen 11 and above, the BDSM register is mirrored
+ * into MMIO space and read from MMIO space by the Windows driver.
+ */
+ gen = igd_gen(vdev);
+ if (gen < 11) {
+ return;
+ }
+
+ quirk = vfio_quirk_alloc(1);
+ quirk->data = vdev;
+
+ memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_bdsm_quirk,
+ vdev, "vfio-igd-bdsm-quirk", 8);
+ memory_region_add_subregion_overlap(vdev->bars[0].region.mem, 0x1080C0,
+ &quirk->mem[0], 1);
+
+ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+}
+
void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
{
g_autofree struct vfio_region_info *rom = NULL;
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 39dae72497..d37f722cce 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1259,6 +1259,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
vfio_probe_nvidia_bar0_quirk(vdev, nr);
vfio_probe_rtl8168_bar2_quirk(vdev, nr);
#ifdef CONFIG_VFIO_IGD
+ vfio_probe_igd_bar0_quirk(vdev, nr);
vfio_probe_igd_bar4_quirk(vdev, nr);
#endif
}
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index bf67df2fbc..5ad090a229 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -215,6 +215,7 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp);
void vfio_quirk_reset(VFIOPCIDevice *vdev);
VFIOQuirk *vfio_quirk_alloc(int nr_mem);
+void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr);
void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr);
extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
--
2.46.0
This email contains confidential information. If you have received it in error, you must not read, use, copy or pass on this e-mail or its attachments. If you have received the e-mail in error, please inform me immediately by reply e-mail and then delete this e-mail from your system. Thank you
Diese E-Mail enthält vertrauliche Informationen. Sollten Sie sie irrtümlich erhalten haben, dürfen Sie diese E-Mail oder ihre Anhänge nicht lesen, verwenden, kopieren oder weitergeben. Sollten Sie die Mail versehentlich erhalten haben, teilen Sie mir dies bitte umgehend per Antwort-E-Mail mit und löschen Sie diese E-Mail dann aus Ihrem System. Vielen Dank
Beckhoff Automation GmbH & Co. KG | Managing Director: Dipl. Phys. Hans Beckhoff
Registered office: Verl, Germany | Register court: Guetersloh HRA 7075
On Thu, 22 Aug 2024 13:08:29 +0200 Corvin Köhne <c.koehne@beckhoff.com> wrote: > The BDSM register is mirrored into MMIO space at least for gen 11 and > later devices. Unfortunately, the Windows driver reads the register > value from MMIO space instead of PCI config space for those devices [1]. > Therefore, we either have to keep a 1:1 mapping for the host and guest > address or we have to emulate the MMIO register too. Using the igd in > legacy mode is already hard due to it's many constraints. Keeping a 1:1 > mapping may not work in all cases and makes it even harder to use. An > MMIO emulation has to trap the whole MMIO page. This makes accesses to > this page slower compared to using second level address translation. > Nevertheless, it doesn't have any constraints and I haven't noticed any > performance degradation yet making it a better solution. > > [1] https://github.com/projectacrn/acrn-hypervisor/blob/5c351bee0f6ae46250eefc07f44b4a31e770f3cf/devicemodel/hw/pci/passthrough.c#L650-L653 > > Signed-off-by: Corvin Köhne <c.koehne@beckhoff.com> > --- > hw/vfio/igd.c | 97 ++++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/pci-quirks.c | 1 + > hw/vfio/pci.h | 1 + > 3 files changed, 99 insertions(+) > > diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c > index 0b6533bbf7..863b58565e 100644 > --- a/hw/vfio/igd.c > +++ b/hw/vfio/igd.c > @@ -374,6 +374,103 @@ static const MemoryRegionOps vfio_igd_index_quirk = { > .endianness = DEVICE_LITTLE_ENDIAN, > }; > > +#define IGD_BDSM_MMIO_OFFSET 0x1080C0 > + > +static uint64_t vfio_igd_quirk_bdsm_read(void *opaque, > + hwaddr addr, unsigned size) > +{ > + VFIOPCIDevice *vdev = opaque; > + uint64_t offset; > + > + offset = IGD_BDSM_GEN11 + addr; > + > + switch (size) { > + case 1: > + return pci_get_byte(vdev->pdev.config + offset); > + case 2: > + return le16_to_cpu(pci_get_word(vdev->pdev.config + offset)); > + case 4: > + return le32_to_cpu(pci_get_long(vdev->pdev.config + offset)); > + case 8: > + return le64_to_cpu(pci_get_quad(vdev->pdev.config + offset)); > + default: > + hw_error("igd: unsupported read size, %u bytes", size); > + break; > + } > + > + return 0; > +} > + > +static void vfio_igd_quirk_bdsm_write(void *opaque, hwaddr addr, > + uint64_t data, unsigned size) > +{ > + VFIOPCIDevice *vdev = opaque; > + uint64_t offset; > + > + offset = IGD_BDSM_GEN11 + addr; > + > + switch (size) { > + case 1: > + pci_set_byte(vdev->pdev.config + offset, data); > + break; > + case 2: > + pci_set_word(vdev->pdev.config + offset, data); > + break; > + case 4: > + pci_set_long(vdev->pdev.config + offset, data); > + break; > + case 8: > + pci_set_quad(vdev->pdev.config + offset, data); > + break; > + default: > + hw_error("igd: unsupported read size, %u bytes", size); > + break; > + } > +} If we have the leXX_to_cpu() in the read path, don't we need cpu_to_leXX() in the write path? Maybe we should in fact just get rid of all of them since we're quirking a device that's specific to a little endian architecture and we're defining the memory region as little endian, but minimally we should be consistent. > + > +static const MemoryRegionOps vfio_igd_bdsm_quirk = { > + .read = vfio_igd_quirk_bdsm_read, > + .write = vfio_igd_quirk_bdsm_write, > + .endianness = DEVICE_LITTLE_ENDIAN, > +}; > + > +void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) > +{ > + VFIOQuirk *quirk; > + int gen; > + > + /* > + * This must be an Intel VGA device at address 00:02.0 for us to even > + * consider enabling legacy mode. Some driver have dependencies on the PCI > + * bus address. > + */ > + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || > + !vfio_is_vga(vdev) || nr != 0 || > + &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), > + 0, PCI_DEVFN(0x2, 0))) { > + return; > + } > + > + /* > + * Only on IGD devices of gen 11 and above, the BDSM register is mirrored > + * into MMIO space and read from MMIO space by the Windows driver. > + */ > + gen = igd_gen(vdev); > + if (gen < 11) { > + return; > + } > + > + quirk = vfio_quirk_alloc(1); > + quirk->data = vdev; > + > + memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_bdsm_quirk, > + vdev, "vfio-igd-bdsm-quirk", 8); > + memory_region_add_subregion_overlap(vdev->bars[0].region.mem, 0x1080C0, Use your macro here, IGD_BDSM_MMIO_OFFSET. Thanks, Alex PS - please drop the confidential email warning signature when posting to public lists. > + &quirk->mem[0], 1); > + > + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); > +} > + > void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) > { > g_autofree struct vfio_region_info *rom = NULL; > diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c > index 39dae72497..d37f722cce 100644 > --- a/hw/vfio/pci-quirks.c > +++ b/hw/vfio/pci-quirks.c > @@ -1259,6 +1259,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) > vfio_probe_nvidia_bar0_quirk(vdev, nr); > vfio_probe_rtl8168_bar2_quirk(vdev, nr); > #ifdef CONFIG_VFIO_IGD > + vfio_probe_igd_bar0_quirk(vdev, nr); > vfio_probe_igd_bar4_quirk(vdev, nr); > #endif > } > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h > index bf67df2fbc..5ad090a229 100644 > --- a/hw/vfio/pci.h > +++ b/hw/vfio/pci.h > @@ -215,6 +215,7 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev); > bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp); > void vfio_quirk_reset(VFIOPCIDevice *vdev); > VFIOQuirk *vfio_quirk_alloc(int nr_mem); > +void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr); > void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); > > extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
On Mon, 2024-08-26 at 10:35 -0600, Alex Williamson wrote: > CAUTION: External Email!! > On Thu, 22 Aug 2024 13:08:29 +0200 > Corvin Köhne <c.koehne@beckhoff.com> wrote: > > > The BDSM register is mirrored into MMIO space at least for gen 11 > > and > > later devices. Unfortunately, the Windows driver reads the register > > value from MMIO space instead of PCI config space for those devices > > [1]. > > Therefore, we either have to keep a 1:1 mapping for the host and > > guest > > address or we have to emulate the MMIO register too. Using the igd > > in > > legacy mode is already hard due to it's many constraints. Keeping a > > 1:1 > > mapping may not work in all cases and makes it even harder to use. > > An > > MMIO emulation has to trap the whole MMIO page. This makes accesses > > to > > this page slower compared to using second level address > > translation. > > Nevertheless, it doesn't have any constraints and I haven't noticed > > any > > performance degradation yet making it a better solution. > > > > [1] > > https://github.com/projectacrn/acrn-hypervisor/blob/5c351bee0f6ae46250eefc07f44b4a31e770f3cf/devicemodel/hw/pci/passthrough.c#L650-L653 > > > > Signed-off-by: Corvin Köhne <c.koehne@beckhoff.com> > > --- > > hw/vfio/igd.c | 97 > > ++++++++++++++++++++++++++++++++++++++++++++ > > hw/vfio/pci-quirks.c | 1 + > > hw/vfio/pci.h | 1 + > > 3 files changed, 99 insertions(+) > > > > diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c > > index 0b6533bbf7..863b58565e 100644 > > --- a/hw/vfio/igd.c > > +++ b/hw/vfio/igd.c > > @@ -374,6 +374,103 @@ static const MemoryRegionOps > > vfio_igd_index_quirk = { > > .endianness = DEVICE_LITTLE_ENDIAN, > > }; > > > > +#define IGD_BDSM_MMIO_OFFSET 0x1080C0 > > + > > +static uint64_t vfio_igd_quirk_bdsm_read(void *opaque, > > + hwaddr addr, unsigned > > size) > > +{ > > + VFIOPCIDevice *vdev = opaque; > > + uint64_t offset; > > + > > + offset = IGD_BDSM_GEN11 + addr; > > + > > + switch (size) { > > + case 1: > > + return pci_get_byte(vdev->pdev.config + offset); > > + case 2: > > + return le16_to_cpu(pci_get_word(vdev->pdev.config + > > offset)); > > + case 4: > > + return le32_to_cpu(pci_get_long(vdev->pdev.config + > > offset)); > > + case 8: > > + return le64_to_cpu(pci_get_quad(vdev->pdev.config + > > offset)); > > + default: > > + hw_error("igd: unsupported read size, %u bytes", size); > > + break; > > + } > > + > > + return 0; > > +} > > + > > +static void vfio_igd_quirk_bdsm_write(void *opaque, hwaddr addr, > > + uint64_t data, unsigned > > size) > > +{ > > + VFIOPCIDevice *vdev = opaque; > > + uint64_t offset; > > + > > + offset = IGD_BDSM_GEN11 + addr; > > + > > + switch (size) { > > + case 1: > > + pci_set_byte(vdev->pdev.config + offset, data); > > + break; > > + case 2: > > + pci_set_word(vdev->pdev.config + offset, data); > > + break; > > + case 4: > > + pci_set_long(vdev->pdev.config + offset, data); > > + break; > > + case 8: > > + pci_set_quad(vdev->pdev.config + offset, data); > > + break; > > + default: > > + hw_error("igd: unsupported read size, %u bytes", size); > > + break; > > + } > > +} > > If we have the leXX_to_cpu() in the read path, don't we need > cpu_to_leXX() in the write path? Maybe we should in fact just get > rid > of all of them since we're quirking a device that's specific to a > little endian architecture and we're defining the memory region as > little endian, but minimally we should be consistent. > Will drop leXX_to_cpu in the read path. > > + > > +static const MemoryRegionOps vfio_igd_bdsm_quirk = { > > + .read = vfio_igd_quirk_bdsm_read, > > + .write = vfio_igd_quirk_bdsm_write, > > + .endianness = DEVICE_LITTLE_ENDIAN, > > +}; > > + > > +void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) > > +{ > > + VFIOQuirk *quirk; > > + int gen; > > + > > + /* > > + * This must be an Intel VGA device at address 00:02.0 for us > > to even > > + * consider enabling legacy mode. Some driver have > > dependencies on the PCI > > + * bus address. > > + */ > > + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || > > + !vfio_is_vga(vdev) || nr != 0 || > > + &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev- > > >pdev), > > + 0, PCI_DEVFN(0x2, 0))) { > > + return; > > + } > > + > > + /* > > + * Only on IGD devices of gen 11 and above, the BDSM register > > is mirrored > > + * into MMIO space and read from MMIO space by the Windows > > driver. > > + */ > > + gen = igd_gen(vdev); > > + if (gen < 11) { > > + return; > > + } > > + > > + quirk = vfio_quirk_alloc(1); > > + quirk->data = vdev; > > + > > + memory_region_init_io(&quirk->mem[0], OBJECT(vdev), > > &vfio_igd_bdsm_quirk, > > + vdev, "vfio-igd-bdsm-quirk", 8); > > + memory_region_add_subregion_overlap(vdev->bars[0].region.mem, > > 0x1080C0, > > Use your macro here, IGD_BDSM_MMIO_OFFSET. Thanks, > Thanks for catching. > Alex > > PS - please drop the confidential email warning signature when > posting > to public lists. > Sry for the noise. I can't drop it, so I'm going to use another mail address to post my patches. > > + &quirk->mem[0], 1); > > + > > + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); > > +} > > + > > void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) > > { > > g_autofree struct vfio_region_info *rom = NULL; > > diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c > > index 39dae72497..d37f722cce 100644 > > --- a/hw/vfio/pci-quirks.c > > +++ b/hw/vfio/pci-quirks.c > > @@ -1259,6 +1259,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice > > *vdev, int nr) > > vfio_probe_nvidia_bar0_quirk(vdev, nr); > > vfio_probe_rtl8168_bar2_quirk(vdev, nr); > > #ifdef CONFIG_VFIO_IGD > > + vfio_probe_igd_bar0_quirk(vdev, nr); > > vfio_probe_igd_bar4_quirk(vdev, nr); > > #endif > > } > > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h > > index bf67df2fbc..5ad090a229 100644 > > --- a/hw/vfio/pci.h > > +++ b/hw/vfio/pci.h > > @@ -215,6 +215,7 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice > > *vdev); > > bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp); > > void vfio_quirk_reset(VFIOPCIDevice *vdev); > > VFIOQuirk *vfio_quirk_alloc(int nr_mem); > > +void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr); > > void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); > > > > extern const PropertyInfo qdev_prop_nv_gpudirect_clique; > > -- Thanks, Corvin
On Wed, 2024-08-28 at 12:40 +0200, Corvin Köhne wrote: > On Mon, 2024-08-26 at 10:35 -0600, Alex Williamson wrote: > > > > PS - please drop the confidential email warning signature when > > posting > > to public lists. > > > > Sry for the noise. I can't drop it, so I'm going to use another mail > address to post my patches. > > > Argh, forgot updating my send-email config when resending the patch series. Should I resend it again? -- Kind regards, Corvin
On 8/28/24 14:50, Corvin Köhne wrote: > On Wed, 2024-08-28 at 12:40 +0200, Corvin Köhne wrote: >> On Mon, 2024-08-26 at 10:35 -0600, Alex Williamson wrote: >>> >>> PS - please drop the confidential email warning signature when >>> posting >>> to public lists. >>> >> >> Sry for the noise. I can't drop it, so I'm going to use another mail >> address to post my patches. >> >>> > > Argh, forgot updating my send-email config when resending the patch > series. Should I resend it again? Please do because the result is not compatible with the tools we use to extract patches (b4). Thanks, C.
© 2016 - 2024 Red Hat, Inc.