[PATCH v2 06/29] vfio: enable per-IRQ MSI-X masking

John Levon posted 29 patches 5 months, 4 weeks ago
Maintainers: John Levon <john.levon@nutanix.com>, Thanos Makatos <thanos.makatos@nutanix.com>, Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>, "Michael S. Tsirkin" <mst@redhat.com>, Stefano Garzarella <sgarzare@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, Peter Xu <peterx@redhat.com>, David Hildenbrand <david@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>
There is a newer version of this series
[PATCH v2 06/29] vfio: enable per-IRQ MSI-X masking
Posted by John Levon 5 months, 4 weeks ago
If VFIO_IRQ_INFO_MASKABLE is set for VFIO_PCI_MSIX_IRQ_INDEX, record
this in ->can_mask_msix, and use it to individually mask MSI-X
interrupts as needed.

Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
---
 hw/vfio/pci.h                 |  1 +
 include/hw/vfio/vfio-device.h |  2 ++
 hw/vfio/device.c              | 26 +++++++++++++++++++
 hw/vfio/pci.c                 | 47 ++++++++++++++++++++++++++++++-----
 4 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 61fa385ddb..43c2d72b84 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -191,6 +191,7 @@ struct VFIOPCIDevice {
     bool defer_kvm_irq_routing;
     bool clear_parent_atomics_on_exit;
     bool skip_vsc_check;
+    bool can_mask_msix;
     VFIODisplay *dpy;
     Notifier irqchip_change_notifier;
 };
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 8bcb3c19f6..923f9cd116 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -133,7 +133,9 @@ struct VFIODeviceOps {
     (ret < 0 ? strerror(-ret) : "short write")
 
 void vfio_device_irq_disable(VFIODevice *vbasedev, int index);
+void vfio_device_irq_unmask_single(VFIODevice *vbasedev, int index, int irq);
 void vfio_device_irq_unmask(VFIODevice *vbasedev, int index);
+void vfio_device_irq_mask_single(VFIODevice *vbasedev, int index, int irq);
 void vfio_device_irq_mask(VFIODevice *vbasedev, int index);
 bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex,
                                    int action, int fd, Error **errp);
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 9fba2c7272..d0068086ae 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -85,6 +85,19 @@ void vfio_device_irq_disable(VFIODevice *vbasedev, int index)
     vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
 }
 
+void vfio_device_irq_unmask_single(VFIODevice *vbasedev, int index, int irq)
+{
+    struct vfio_irq_set irq_set = {
+        .argsz = sizeof(irq_set),
+        .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
+        .index = index,
+        .start = irq,
+        .count = 1,
+    };
+
+    vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
+}
+
 void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
 {
     struct vfio_irq_set irq_set = {
@@ -98,6 +111,19 @@ void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
     vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
 }
 
+void vfio_device_irq_mask_single(VFIODevice *vbasedev, int index, int irq)
+{
+    struct vfio_irq_set irq_set = {
+        .argsz = sizeof(irq_set),
+        .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK,
+        .index = index,
+        .start = irq,
+        .count = 1,
+    };
+
+    vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
+}
+
 void vfio_device_irq_mask(VFIODevice *vbasedev, int index)
 {
     struct vfio_irq_set irq_set = {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 5159198bdb..ef38b4692a 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -535,6 +535,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
 {
     VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
     VFIOMSIVector *vector;
+    bool new_vec = false;
     int ret;
     bool resizing = !!(vdev->nr_vectors < nr + 1);
 
@@ -549,6 +550,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
             error_report("vfio: Error: event_notifier_init failed");
         }
         vector->use = true;
+        new_vec = true;
         msix_vector_use(pdev, nr);
     }
 
@@ -575,6 +577,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
                 kvm_irqchip_commit_route_changes(&vfio_route_change);
                 vfio_connect_kvm_msi_virq(vector);
             }
+            new_vec = true;
         }
     }
 
@@ -584,6 +587,9 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
      * in use, so we shutdown and incrementally increase them as needed.
      * nr_vectors represents the total number of vectors allocated.
      *
+     * Otherwise, unmask the vector if the vector is already setup (and we can
+     * do so) or send the fd if not.
+     *
      * When dynamic allocation is supported, let the host only allocate
      * and enable a vector when it is in use in guest. nr_vectors represents
      * the upper bound of vectors being enabled (but not all of the ranges
@@ -594,13 +600,20 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
     }
 
     if (!vdev->defer_kvm_irq_routing) {
-        if (vdev->msix->noresize && resizing) {
-            vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
-            ret = vfio_enable_vectors(vdev, true);
-            if (ret) {
-                error_report("vfio: failed to enable vectors, %s",
-                             strerror(-ret));
+        if (resizing) {
+            if (vdev->msix->noresize) {
+                vfio_device_irq_disable(&vdev->vbasedev,
+                                        VFIO_PCI_MSIX_IRQ_INDEX);
+                ret = vfio_enable_vectors(vdev, true);
+                if (ret) {
+                    error_report("vfio: failed to enable vectors, %d", ret);
+                }
+            } else {
+                set_irq_signalling(&vdev->vbasedev, vector, nr);
             }
+        } else if (vdev->can_mask_msix && !new_vec) {
+            vfio_device_irq_unmask_single(&vdev->vbasedev,
+                                          VFIO_PCI_MSIX_IRQ_INDEX, nr);
         } else {
             set_irq_signalling(&vdev->vbasedev, vector, nr);
         }
@@ -630,6 +643,13 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
 
     trace_vfio_msix_vector_release(vdev->vbasedev.name, nr);
 
+    /* just mask vector if peer supports it */
+    if (vdev->can_mask_msix) {
+        vfio_device_irq_mask_single(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
+                                    nr);
+        return;
+    }
+
     /*
      * There are still old guests that mask and unmask vectors on every
      * interrupt.  If we're using QEMU bypass with a KVM irqfd, leave all of
@@ -702,6 +722,13 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
             error_report("vfio: failed to enable vectors, %s",
                          strerror(-ret));
         }
+    } else if (vdev->can_mask_msix) {
+        /*
+         * If we can use single irq masking, send an invalid fd on vector 0
+         * to enable MSI-X without any vectors enabled.
+         */
+        vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
+                                      0, VFIO_IRQ_SET_ACTION_TRIGGER, -1, NULL);
     } else {
         /*
          * Some communication channels between VF & PF or PF & fw rely on the
@@ -2842,6 +2869,14 @@ bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp)
         }
     }
 
+    ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
+                                   &irq_info);
+    if (ret == 0 && (irq_info.flags & VFIO_IRQ_INFO_MASKABLE)) {
+        vdev->can_mask_msix = true;
+    } else {
+        vdev->can_mask_msix = false;
+    }
+
     ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info);
     if (ret) {
         /* This can fail for an old kernel or legacy PCI dev */
-- 
2.43.0
Re: [PATCH v2 06/29] vfio: enable per-IRQ MSI-X masking
Posted by Cédric Le Goater 5 months, 4 weeks ago
+Sẗeven

On 5/20/25 17:03, John Levon wrote:
> If VFIO_IRQ_INFO_MASKABLE is set for VFIO_PCI_MSIX_IRQ_INDEX, record
> this in ->can_mask_msix, and use it to individually mask MSI-X
> interrupts as needed.
> 
> Originally-by: John Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John Levon <john.levon@nutanix.com>


This one conflicts with the "live update" series. We will address it later.


Thanks,

C.



> ---
>   hw/vfio/pci.h                 |  1 +
>   include/hw/vfio/vfio-device.h |  2 ++
>   hw/vfio/device.c              | 26 +++++++++++++++++++
>   hw/vfio/pci.c                 | 47 ++++++++++++++++++++++++++++++-----
>   4 files changed, 70 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 61fa385ddb..43c2d72b84 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -191,6 +191,7 @@ struct VFIOPCIDevice {
>       bool defer_kvm_irq_routing;
>       bool clear_parent_atomics_on_exit;
>       bool skip_vsc_check;
> +    bool can_mask_msix;
>       VFIODisplay *dpy;
>       Notifier irqchip_change_notifier;
>   };
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 8bcb3c19f6..923f9cd116 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -133,7 +133,9 @@ struct VFIODeviceOps {
>       (ret < 0 ? strerror(-ret) : "short write")
>   
>   void vfio_device_irq_disable(VFIODevice *vbasedev, int index);
> +void vfio_device_irq_unmask_single(VFIODevice *vbasedev, int index, int irq);
>   void vfio_device_irq_unmask(VFIODevice *vbasedev, int index);
> +void vfio_device_irq_mask_single(VFIODevice *vbasedev, int index, int irq);
>   void vfio_device_irq_mask(VFIODevice *vbasedev, int index);
>   bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex,
>                                      int action, int fd, Error **errp);
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index 9fba2c7272..d0068086ae 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -85,6 +85,19 @@ void vfio_device_irq_disable(VFIODevice *vbasedev, int index)
>       vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
>   }
>   
> +void vfio_device_irq_unmask_single(VFIODevice *vbasedev, int index, int irq)
> +{
> +    struct vfio_irq_set irq_set = {
> +        .argsz = sizeof(irq_set),
> +        .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
> +        .index = index,
> +        .start = irq,
> +        .count = 1,
> +    };
> +
> +    vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
> +}
> +
>   void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
>   {
>       struct vfio_irq_set irq_set = {
> @@ -98,6 +111,19 @@ void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
>       vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
>   }
>   
> +void vfio_device_irq_mask_single(VFIODevice *vbasedev, int index, int irq)
> +{
> +    struct vfio_irq_set irq_set = {
> +        .argsz = sizeof(irq_set),
> +        .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK,
> +        .index = index,
> +        .start = irq,
> +        .count = 1,
> +    };
> +
> +    vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
> +}
> +
>   void vfio_device_irq_mask(VFIODevice *vbasedev, int index)
>   {
>       struct vfio_irq_set irq_set = {
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 5159198bdb..ef38b4692a 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -535,6 +535,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>   {
>       VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
>       VFIOMSIVector *vector;
> +    bool new_vec = false;
>       int ret;
>       bool resizing = !!(vdev->nr_vectors < nr + 1);
>   
> @@ -549,6 +550,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>               error_report("vfio: Error: event_notifier_init failed");
>           }
>           vector->use = true;
> +        new_vec = true;
>           msix_vector_use(pdev, nr);
>       }
>   
> @@ -575,6 +577,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>                   kvm_irqchip_commit_route_changes(&vfio_route_change);
>                   vfio_connect_kvm_msi_virq(vector);
>               }
> +            new_vec = true;
>           }
>       }
>   
> @@ -584,6 +587,9 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>        * in use, so we shutdown and incrementally increase them as needed.
>        * nr_vectors represents the total number of vectors allocated.
>        *
> +     * Otherwise, unmask the vector if the vector is already setup (and we can
> +     * do so) or send the fd if not.
> +     *
>        * When dynamic allocation is supported, let the host only allocate
>        * and enable a vector when it is in use in guest. nr_vectors represents
>        * the upper bound of vectors being enabled (but not all of the ranges
> @@ -594,13 +600,20 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>       }
>   
>       if (!vdev->defer_kvm_irq_routing) {
> -        if (vdev->msix->noresize && resizing) {
> -            vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
> -            ret = vfio_enable_vectors(vdev, true);
> -            if (ret) {
> -                error_report("vfio: failed to enable vectors, %s",
> -                             strerror(-ret));
> +        if (resizing) {
> +            if (vdev->msix->noresize) {
> +                vfio_device_irq_disable(&vdev->vbasedev,
> +                                        VFIO_PCI_MSIX_IRQ_INDEX);
> +                ret = vfio_enable_vectors(vdev, true);
> +                if (ret) {
> +                    error_report("vfio: failed to enable vectors, %d", ret);
> +                }
> +            } else {
> +                set_irq_signalling(&vdev->vbasedev, vector, nr);
>               }
> +        } else if (vdev->can_mask_msix && !new_vec) {
> +            vfio_device_irq_unmask_single(&vdev->vbasedev,
> +                                          VFIO_PCI_MSIX_IRQ_INDEX, nr);
>           } else {
>               set_irq_signalling(&vdev->vbasedev, vector, nr);
>           }
> @@ -630,6 +643,13 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
>   
>       trace_vfio_msix_vector_release(vdev->vbasedev.name, nr);
>   
> +    /* just mask vector if peer supports it */
> +    if (vdev->can_mask_msix) {
> +        vfio_device_irq_mask_single(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
> +                                    nr);
> +        return;
> +    }
> +
>       /*
>        * There are still old guests that mask and unmask vectors on every
>        * interrupt.  If we're using QEMU bypass with a KVM irqfd, leave all of
> @@ -702,6 +722,13 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
>               error_report("vfio: failed to enable vectors, %s",
>                            strerror(-ret));
>           }
> +    } else if (vdev->can_mask_msix) {
> +        /*
> +         * If we can use single irq masking, send an invalid fd on vector 0
> +         * to enable MSI-X without any vectors enabled.
> +         */
> +        vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
> +                                      0, VFIO_IRQ_SET_ACTION_TRIGGER, -1, NULL);
>       } else {
>           /*
>            * Some communication channels between VF & PF or PF & fw rely on the
> @@ -2842,6 +2869,14 @@ bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp)
>           }
>       }
>   
> +    ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
> +                                   &irq_info);
> +    if (ret == 0 && (irq_info.flags & VFIO_IRQ_INFO_MASKABLE)) {
> +        vdev->can_mask_msix = true;
> +    } else {
> +        vdev->can_mask_msix = false;
> +    }
> +
>       ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info);
>       if (ret) {
>           /* This can fail for an old kernel or legacy PCI dev */


Re: [PATCH v2 06/29] vfio: enable per-IRQ MSI-X masking
Posted by Steven Sistare 5 months, 3 weeks ago
On 5/21/2025 3:29 AM, Cédric Le Goater wrote:
> +Sẗeven
> 
> On 5/20/25 17:03, John Levon wrote:
>> If VFIO_IRQ_INFO_MASKABLE is set for VFIO_PCI_MSIX_IRQ_INDEX, record
>> this in ->can_mask_msix, and use it to individually mask MSI-X
>> interrupts as needed.
>>
>> Originally-by: John Johnson <john.g.johnson@oracle.com>
>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>> Signed-off-by: John Levon <john.levon@nutanix.com>
> 
> This one conflicts with the "live update" series. We will address it later.

AFAICT there is only a trivial conflict versus my patch

   vfio/pci: vfio_vector_init
   https://lore.kernel.org/qemu-devel/1747063973-124548-17-git-send-email-steven.sistare@oracle.com/

because I extract a handful of lines from vfio_msix_vector_do_use into a
subroutine, at the same place vfio-user has added a local variable new_vec,
but the new code will simply be:

vfio_msix_vector_do_use
     if (!vector->use) {
         new_vec = true;
         vfio_pci_vector_init(vdev, nr);
     }

Also, for now I am assuming that if vfio-user is being used, then I will add
a CPR blocker.  Thus I do not have to account for per-IRQ masking in the
cpr load path, for now.

- Steve

>> ---
>>   hw/vfio/pci.h                 |  1 +
>>   include/hw/vfio/vfio-device.h |  2 ++
>>   hw/vfio/device.c              | 26 +++++++++++++++++++
>>   hw/vfio/pci.c                 | 47 ++++++++++++++++++++++++++++++-----
>>   4 files changed, 70 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index 61fa385ddb..43c2d72b84 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -191,6 +191,7 @@ struct VFIOPCIDevice {
>>       bool defer_kvm_irq_routing;
>>       bool clear_parent_atomics_on_exit;
>>       bool skip_vsc_check;
>> +    bool can_mask_msix;
>>       VFIODisplay *dpy;
>>       Notifier irqchip_change_notifier;
>>   };
>> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
>> index 8bcb3c19f6..923f9cd116 100644
>> --- a/include/hw/vfio/vfio-device.h
>> +++ b/include/hw/vfio/vfio-device.h
>> @@ -133,7 +133,9 @@ struct VFIODeviceOps {
>>       (ret < 0 ? strerror(-ret) : "short write")
>>   void vfio_device_irq_disable(VFIODevice *vbasedev, int index);
>> +void vfio_device_irq_unmask_single(VFIODevice *vbasedev, int index, int irq);
>>   void vfio_device_irq_unmask(VFIODevice *vbasedev, int index);
>> +void vfio_device_irq_mask_single(VFIODevice *vbasedev, int index, int irq);
>>   void vfio_device_irq_mask(VFIODevice *vbasedev, int index);
>>   bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex,
>>                                      int action, int fd, Error **errp);
>> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
>> index 9fba2c7272..d0068086ae 100644
>> --- a/hw/vfio/device.c
>> +++ b/hw/vfio/device.c
>> @@ -85,6 +85,19 @@ void vfio_device_irq_disable(VFIODevice *vbasedev, int index)
>>       vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
>>   }
>> +void vfio_device_irq_unmask_single(VFIODevice *vbasedev, int index, int irq)
>> +{
>> +    struct vfio_irq_set irq_set = {
>> +        .argsz = sizeof(irq_set),
>> +        .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
>> +        .index = index,
>> +        .start = irq,
>> +        .count = 1,
>> +    };
>> +
>> +    vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
>> +}
>> +
>>   void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
>>   {
>>       struct vfio_irq_set irq_set = {
>> @@ -98,6 +111,19 @@ void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
>>       vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
>>   }
>> +void vfio_device_irq_mask_single(VFIODevice *vbasedev, int index, int irq)
>> +{
>> +    struct vfio_irq_set irq_set = {
>> +        .argsz = sizeof(irq_set),
>> +        .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK,
>> +        .index = index,
>> +        .start = irq,
>> +        .count = 1,
>> +    };
>> +
>> +    vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
>> +}
>> +
>>   void vfio_device_irq_mask(VFIODevice *vbasedev, int index)
>>   {
>>       struct vfio_irq_set irq_set = {
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 5159198bdb..ef38b4692a 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -535,6 +535,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>>   {
>>       VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
>>       VFIOMSIVector *vector;
>> +    bool new_vec = false;
>>       int ret;
>>       bool resizing = !!(vdev->nr_vectors < nr + 1);
>> @@ -549,6 +550,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>>               error_report("vfio: Error: event_notifier_init failed");
>>           }
>>           vector->use = true;
>> +        new_vec = true;
>>           msix_vector_use(pdev, nr);
>>       }
>> @@ -575,6 +577,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>>                   kvm_irqchip_commit_route_changes(&vfio_route_change);
>>                   vfio_connect_kvm_msi_virq(vector);
>>               }
>> +            new_vec = true;
>>           }
>>       }
>> @@ -584,6 +587,9 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>>        * in use, so we shutdown and incrementally increase them as needed.
>>        * nr_vectors represents the total number of vectors allocated.
>>        *
>> +     * Otherwise, unmask the vector if the vector is already setup (and we can
>> +     * do so) or send the fd if not.
>> +     *
>>        * When dynamic allocation is supported, let the host only allocate
>>        * and enable a vector when it is in use in guest. nr_vectors represents
>>        * the upper bound of vectors being enabled (but not all of the ranges
>> @@ -594,13 +600,20 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>>       }
>>       if (!vdev->defer_kvm_irq_routing) {
>> -        if (vdev->msix->noresize && resizing) {
>> -            vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
>> -            ret = vfio_enable_vectors(vdev, true);
>> -            if (ret) {
>> -                error_report("vfio: failed to enable vectors, %s",
>> -                             strerror(-ret));
>> +        if (resizing) {
>> +            if (vdev->msix->noresize) {
>> +                vfio_device_irq_disable(&vdev->vbasedev,
>> +                                        VFIO_PCI_MSIX_IRQ_INDEX);
>> +                ret = vfio_enable_vectors(vdev, true);
>> +                if (ret) {
>> +                    error_report("vfio: failed to enable vectors, %d", ret);
>> +                }
>> +            } else {
>> +                set_irq_signalling(&vdev->vbasedev, vector, nr);
>>               }
>> +        } else if (vdev->can_mask_msix && !new_vec) {
>> +            vfio_device_irq_unmask_single(&vdev->vbasedev,
>> +                                          VFIO_PCI_MSIX_IRQ_INDEX, nr);
>>           } else {
>>               set_irq_signalling(&vdev->vbasedev, vector, nr);
>>           }
>> @@ -630,6 +643,13 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
>>       trace_vfio_msix_vector_release(vdev->vbasedev.name, nr);
>> +    /* just mask vector if peer supports it */
>> +    if (vdev->can_mask_msix) {
>> +        vfio_device_irq_mask_single(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
>> +                                    nr);
>> +        return;
>> +    }
>> +
>>       /*
>>        * There are still old guests that mask and unmask vectors on every
>>        * interrupt.  If we're using QEMU bypass with a KVM irqfd, leave all of
>> @@ -702,6 +722,13 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
>>               error_report("vfio: failed to enable vectors, %s",
>>                            strerror(-ret));
>>           }
>> +    } else if (vdev->can_mask_msix) {
>> +        /*
>> +         * If we can use single irq masking, send an invalid fd on vector 0
>> +         * to enable MSI-X without any vectors enabled.
>> +         */
>> +        vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
>> +                                      0, VFIO_IRQ_SET_ACTION_TRIGGER, -1, NULL);
>>       } else {
>>           /*
>>            * Some communication channels between VF & PF or PF & fw rely on the
>> @@ -2842,6 +2869,14 @@ bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp)
>>           }
>>       }
>> +    ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
>> +                                   &irq_info);
>> +    if (ret == 0 && (irq_info.flags & VFIO_IRQ_INFO_MASKABLE)) {
>> +        vdev->can_mask_msix = true;
>> +    } else {
>> +        vdev->can_mask_msix = false;
>> +    }
>> +
>>       ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info);
>>       if (ret) {
>>           /* This can fail for an old kernel or legacy PCI dev */
> 


Re: [PATCH v2 06/29] vfio: enable per-IRQ MSI-X masking
Posted by John Levon 5 months, 3 weeks ago
On Wed, May 28, 2025 at 01:16:32PM -0400, Steven Sistare wrote:

> > On 5/20/25 17:03, John Levon wrote:
> > > If VFIO_IRQ_INFO_MASKABLE is set for VFIO_PCI_MSIX_IRQ_INDEX, record
> > > this in ->can_mask_msix, and use it to individually mask MSI-X
> > > interrupts as needed.
> > > 
> > > Originally-by: John Johnson <john.g.johnson@oracle.com>
> > > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> > > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> > > Signed-off-by: John Levon <john.levon@nutanix.com>
> > 
> > This one conflicts with the "live update" series. We will address it later.
> 
> AFAICT there is only a trivial conflict versus my patch
> 
>   vfio/pci: vfio_vector_init
>   https://urldefense.proofpoint.com/v2/url?u=https-3A__lore.kernel.org_qemu-2Ddevel_1747063973-2D124548-2D17-2Dgit-2Dsend-2Demail-2Dsteven.sistare-40oracle.com_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=v7SNLJqx7b9Vfc7ZO82Wg4nnZ8O5XkACFQ30bVKxotI&m=us5Q7sdvPzuJ8qd0-KHuGcuoxQPDRJYPJ9dFmIeHxb0i8WnpPqrLiu3UuGSoq95U&s=Pd5fbQX8J7_CHHWD16ZuvXKJ13kipONxypuqS_cjGI0&e=
> 
> because I extract a handful of lines from vfio_msix_vector_do_use into a
> subroutine, at the same place vfio-user has added a local variable new_vec,
> but the new code will simply be:
> 
> vfio_msix_vector_do_use
>     if (!vector->use) {
>         new_vec = true;
>         vfio_pci_vector_init(vdev, nr);
>     }

I'll take a look when rebasing on top of CPR and ask you if I get stuck.

> Also, for now I am assuming that if vfio-user is being used, then I will add
> a CPR blocker.

That's fine (and I will take care of it in my series), but I *think* this
per-interrupt masking can apply outside of vfio-user.

regards
john