[PATCH] vfio/igd: Fix incorrect error propagation in vfio_pci_igd_opregion_detect()

Tomita Moeko posted 1 patch 5 months, 4 weeks ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20250521154036.28219-1-tomitamoeko@gmail.com
Maintainers: Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>, Tomita Moeko <tomitamoeko@gmail.com>
There is a newer version of this series
hw/vfio/igd.c | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
[PATCH] vfio/igd: Fix incorrect error propagation in vfio_pci_igd_opregion_detect()
Posted by Tomita Moeko 5 months, 4 weeks ago
In vfio_pci_igd_opregion_detect(), errp will be set when device does
not have OpRegion or is hotplugged. This errp will be propergated to
pci_qdev_realize(), which interprets it as failure, causing unexpected
termination on devices without OpRegion like SR-IOV VFs or discrete
GPUs. Fix it by not setting errp in vfio_pci_igd_opregion_detect().

This patch also checks if the device has OpRegion before hotplug status
to prvent unwanted warning messages on non-IGD devices.

Fixes: c0273e77f2d7 ("vfio/igd: Detect IGD device by OpRegion")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2968
Reported-by: Edmund Raile <edmund.raile@protonmail.com>
Link: https://lore.kernel.org/qemu-devel/30044d14-17ec-46e3-b9c3-63d27a5bde27@gmail.com
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
---
 hw/vfio/igd.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index e7952d15a0..e7a9d1ffc1 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -187,23 +187,21 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
 }
 
 static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev,
-                                         struct vfio_region_info **opregion,
-                                         Error **errp)
+                                         struct vfio_region_info **opregion)
 {
     int ret;
 
-    /* Hotplugging is not supported for opregion access */
-    if (vdev->pdev.qdev.hotplugged) {
-        error_setg(errp, "IGD OpRegion is not supported on hotplugged device");
-        return false;
-    }
-
     ret = vfio_device_get_region_info_type(&vdev->vbasedev,
                     VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
                     VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion);
     if (ret) {
-        error_setg_errno(errp, -ret,
-                         "Device does not supports IGD OpRegion feature");
+        return false;
+    }
+
+    /* Hotplugging is not supported for opregion access */
+    if (vdev->pdev.qdev.hotplugged) {
+        warn_report("IGD device detected, but OpRegion is not supported "
+                    "on hotplugged device.");
         return false;
     }
 
@@ -524,7 +522,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
     }
 
     /* IGD device always comes with OpRegion */
-    if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+    if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
         return true;
     }
     info_report("OpRegion detected on Intel display %x.", vdev->device_id);
@@ -695,7 +693,7 @@ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
         return true;
     }
 
-    if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+    if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
         /* Should never reach here, KVMGT always emulates OpRegion */
         return false;
     }
-- 
2.47.2
Re: [PATCH] vfio/igd: Fix incorrect error propagation in vfio_pci_igd_opregion_detect()
Posted by edmund.raile via 5 months, 3 weeks ago
Hi Moeko,

>I did some further debugging, and found it was caused by a mistake in
>error handling. In vfio_pci_igd_opregion_detect(), `errp` is set when
>OpRegion is not found. However, in pci_qdev_realize(), the caller of
>vfio_realize(), it checks if the errp is set for failure, rather than
>the return value.

Am I correct in assuming that errp can just be removed because the
return status already carries the information?

>I will submit a patch later to fix error handling.

I've had to manually apply the patch by hand.
For me, your patch became:

diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index e7952d15a0..e7a9d1ffc1 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -187,23 +187,21 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
 }
 
 static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev,
-                                         struct vfio_region_info **opregion,
-                                         Error **errp)
+                                         struct vfio_region_info **opregion)
 {
     int ret;
 
-    /* Hotplugging is not supported for opregion access */
-    if (vdev->pdev.qdev.hotplugged) {
-        error_setg(errp, "IGD OpRegion is not supported on hotplugged device");
-        return false;
-    }
-
     ret = vfio_device_get_region_info_type(&vdev->vbasedev,
                     VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
                     VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion);
     if (ret) {
-        error_setg_errno(errp, -ret,
-                         "Device does not supports IGD OpRegion feature");
+        return false;
+    }
+
+    /* Hotplugging is not supported for opregion access */
+    if (vdev->pdev.qdev.hotplugged) {
+        warn_report("IGD device detected, but OpRegion is not supported "
+                    "on hotplugged device.");
         return false;
     }
 
@@ -524,7 +522,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
     }
 
     /* IGD device always comes with OpRegion */
-    if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+    if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
         return true;
     }
     info_report("OpRegion detected on Intel display %x.", vdev->device_id);
@@ -695,7 +693,7 @@ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
         return true;
     }
 
-    if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+    if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
         /* Should never reach here, KVMGT always emulates OpRegion */
         return false;
     }

Testing it, it works (for me)!
Though I can only confirm the negative (for a SR-IOV iGPU
virtual function).
I can't unbind my primary GPU (the root iGPU device) since
I'm using it.
The VM boots fine regardless whether
`-device vfio-pci,...,x-igd-opregion=` is set to off or on now.
Tested-by: Edmund Raile <edmund.raile@protonmail.com>

Your patch provides successful automatic OpRegion detection.

But for the case that this automatic detection should some day break
or become ineffective due to new hardware: should I still send in my
[PATCH v3]?

[PATCH v3] vfio/igd: Respect x-igd-opregion=off in IGD quirk handling

diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index e7a9d1ffc1..ac9f504e8f 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -521,6 +521,11 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
         return true;
     }
 
+    /* Respect x-igd-opregion=off by skipping OpRegion handling */
+    if (!(vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION)) {
+        return true;
+    }
+
     /* IGD device always comes with OpRegion */
     if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
         return true;

That way, we'd have a certain way to force it off if need be
A redundancy, in case.
Can't always expect everything to always work perfectly.

Thanks,
Edmund
Re: [PATCH] vfio/igd: Fix incorrect error propagation in vfio_pci_igd_opregion_detect()
Posted by Corvin Köhne 5 months, 3 weeks ago
On Wed, 2025-05-21 at 23:40 +0800, Tomita Moeko wrote:
> CAUTION: External Email!!
> In vfio_pci_igd_opregion_detect(), errp will be set when device does
> not have OpRegion or is hotplugged. This errp will be propergated to
> pci_qdev_realize(), which interprets it as failure, causing unexpected
> termination on devices without OpRegion like SR-IOV VFs or discrete
> GPUs. Fix it by not setting errp in vfio_pci_igd_opregion_detect().
> 
> This patch also checks if the device has OpRegion before hotplug status
> to prvent unwanted warning messages on non-IGD devices.
> 

prevent

> Fixes: c0273e77f2d7 ("vfio/igd: Detect IGD device by OpRegion")
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2968
> Reported-by: Edmund Raile <edmund.raile@protonmail.com>
> Link:
> https://nospamproxywebp.beckhoff.com/enQsig/link?id=BAgAAADuIFWfr8z-RqgAAABiHH1XDaojIZQJ51lW0MfdmbQ31Dc6Q9h8spLn3SSpPU_3qX9yZXScfxI-jMpRRWHhwwz3WzjHhTZuw3K8bmq6pYob7gKjPbBhTbeNLyeDio9w7y6aczQOHjPiGEAK1Zd5bXPhuYMJhd0r02BDXxk2NzfU10_-lfkisL6dUaMagg0Urr9aoCFD5may09obVXsyPg-RtsX8nfXOGmiQX-6W6i3Z7jYH4Ys1
>  
> Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
> ---
> 

-- 
Kind regards,
Corvin
Re: [PATCH] vfio/igd: Fix incorrect error propagation in vfio_pci_igd_opregion_detect()
Posted by Alex Williamson 5 months, 3 weeks ago
On Wed, 21 May 2025 23:40:36 +0800
Tomita Moeko <tomitamoeko@gmail.com> wrote:

> In vfio_pci_igd_opregion_detect(), errp will be set when device does
> not have OpRegion or is hotplugged. This errp will be propergated to

propagated

> pci_qdev_realize(), which interprets it as failure, causing unexpected
> termination on devices without OpRegion like SR-IOV VFs or discrete
> GPUs. Fix it by not setting errp in vfio_pci_igd_opregion_detect().
> 
> This patch also checks if the device has OpRegion before hotplug status
> to prvent unwanted warning messages on non-IGD devices.
> 
> Fixes: c0273e77f2d7 ("vfio/igd: Detect IGD device by OpRegion")
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2968
> Reported-by: Edmund Raile <edmund.raile@protonmail.com>
> Link: https://lore.kernel.org/qemu-devel/30044d14-17ec-46e3-b9c3-63d27a5bde27@gmail.com
> Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
> ---
>  hw/vfio/igd.c | 22 ++++++++++------------
>  1 file changed, 10 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
> index e7952d15a0..e7a9d1ffc1 100644
> --- a/hw/vfio/igd.c
> +++ b/hw/vfio/igd.c
> @@ -187,23 +187,21 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
>  }
>  
>  static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev,
> -                                         struct vfio_region_info **opregion,
> -                                         Error **errp)
> +                                         struct vfio_region_info **opregion)
>  {
>      int ret;
>  
> -    /* Hotplugging is not supported for opregion access */
> -    if (vdev->pdev.qdev.hotplugged) {
> -        error_setg(errp, "IGD OpRegion is not supported on hotplugged device");
> -        return false;
> -    }
> -
>      ret = vfio_device_get_region_info_type(&vdev->vbasedev,
>                      VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
>                      VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion);
>      if (ret) {
> -        error_setg_errno(errp, -ret,
> -                         "Device does not supports IGD OpRegion feature");
> +        return false;
> +    }
> +
> +    /* Hotplugging is not supported for opregion access */
> +    if (vdev->pdev.qdev.hotplugged) {
> +        warn_report("IGD device detected, but OpRegion is not supported "
> +                    "on hotplugged device.");
>          return false;
>      }
>  
> @@ -524,7 +522,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
>      }
>  
>      /* IGD device always comes with OpRegion */
> -    if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
> +    if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
>          return true;
>      }
>      info_report("OpRegion detected on Intel display %x.", vdev->device_id);
> @@ -695,7 +693,7 @@ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
>          return true;
>      }
>  
> -    if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
> +    if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
>          /* Should never reach here, KVMGT always emulates OpRegion */
>          return false;
>      }
Re: [PATCH] vfio/igd: Fix incorrect error propagation in vfio_pci_igd_opregion_detect()
Posted by Cédric Le Goater 5 months, 4 weeks ago
Moeko,

On 5/21/25 17:40, Tomita Moeko wrote:
> In vfio_pci_igd_opregion_detect(), errp will be set when device does
> not have OpRegion or is hotplugged. This errp will be propergated to
> pci_qdev_realize(), which interprets it as failure, causing unexpected
> termination on devices without OpRegion like SR-IOV VFs or discrete
> GPUs. Fix it by not setting errp in vfio_pci_igd_opregion_detect().
> 
> This patch also checks if the device has OpRegion before hotplug status
> to prvent unwanted warning messages on non-IGD devices.
> 
> Fixes: c0273e77f2d7 ("vfio/igd: Detect IGD device by OpRegion")
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2968
> Reported-by: Edmund Raile <edmund.raile@protonmail.com>
> Link: https://lore.kernel.org/qemu-devel/30044d14-17ec-46e3-b9c3-63d27a5bde27@gmail.com
> Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>

Could you please resend a patch that applies on vfio-next ?

   https://github.com/legoater/qemu/commits/vfio-next


Thanks,

C.


> ---
>   hw/vfio/igd.c | 22 ++++++++++------------
>   1 file changed, 10 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
> index e7952d15a0..e7a9d1ffc1 100644
> --- a/hw/vfio/igd.c
> +++ b/hw/vfio/igd.c
> @@ -187,23 +187,21 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
>   }
>   
>   static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev,
> -                                         struct vfio_region_info **opregion,
> -                                         Error **errp)
> +                                         struct vfio_region_info **opregion)
>   {
>       int ret;
>   
> -    /* Hotplugging is not supported for opregion access */
> -    if (vdev->pdev.qdev.hotplugged) {
> -        error_setg(errp, "IGD OpRegion is not supported on hotplugged device");
> -        return false;
> -    }
> -
>       ret = vfio_device_get_region_info_type(&vdev->vbasedev,
>                       VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
>                       VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion);
>       if (ret) {
> -        error_setg_errno(errp, -ret,
> -                         "Device does not supports IGD OpRegion feature");
> +        return false;
> +    }
> +
> +    /* Hotplugging is not supported for opregion access */
> +    if (vdev->pdev.qdev.hotplugged) {
> +        warn_report("IGD device detected, but OpRegion is not supported "
> +                    "on hotplugged device.");
>           return false;
>       }
>   
> @@ -524,7 +522,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
>       }
>   
>       /* IGD device always comes with OpRegion */
> -    if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
> +    if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
>           return true;
>       }
>       info_report("OpRegion detected on Intel display %x.", vdev->device_id);
> @@ -695,7 +693,7 @@ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
>           return true;
>       }
>   
> -    if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
> +    if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
>           /* Should never reach here, KVMGT always emulates OpRegion */
>           return false;
>       }