If VFIO_IRQ_INFO_MASKABLE is set for VFIO_PCI_MSIX_IRQ_INDEX, record
this in ->can_mask_msix, and use it to individually mask MSI-X
interrupts as needed.
Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
---
hw/vfio/pci.h | 1 +
include/hw/vfio/vfio-device.h | 2 ++
hw/vfio/device.c | 26 +++++++++++++++++++
hw/vfio/pci.c | 47 ++++++++++++++++++++++++++++++-----
4 files changed, 70 insertions(+), 6 deletions(-)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 61fa385ddb..43c2d72b84 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -191,6 +191,7 @@ struct VFIOPCIDevice {
bool defer_kvm_irq_routing;
bool clear_parent_atomics_on_exit;
bool skip_vsc_check;
+ bool can_mask_msix;
VFIODisplay *dpy;
Notifier irqchip_change_notifier;
};
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 8bcb3c19f6..923f9cd116 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -133,7 +133,9 @@ struct VFIODeviceOps {
(ret < 0 ? strerror(-ret) : "short write")
void vfio_device_irq_disable(VFIODevice *vbasedev, int index);
+void vfio_device_irq_unmask_single(VFIODevice *vbasedev, int index, int irq);
void vfio_device_irq_unmask(VFIODevice *vbasedev, int index);
+void vfio_device_irq_mask_single(VFIODevice *vbasedev, int index, int irq);
void vfio_device_irq_mask(VFIODevice *vbasedev, int index);
bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex,
int action, int fd, Error **errp);
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 9fba2c7272..d0068086ae 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -85,6 +85,19 @@ void vfio_device_irq_disable(VFIODevice *vbasedev, int index)
vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
}
+void vfio_device_irq_unmask_single(VFIODevice *vbasedev, int index, int irq)
+{
+ struct vfio_irq_set irq_set = {
+ .argsz = sizeof(irq_set),
+ .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
+ .index = index,
+ .start = irq,
+ .count = 1,
+ };
+
+ vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
+}
+
void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
{
struct vfio_irq_set irq_set = {
@@ -98,6 +111,19 @@ void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
}
+void vfio_device_irq_mask_single(VFIODevice *vbasedev, int index, int irq)
+{
+ struct vfio_irq_set irq_set = {
+ .argsz = sizeof(irq_set),
+ .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK,
+ .index = index,
+ .start = irq,
+ .count = 1,
+ };
+
+ vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
+}
+
void vfio_device_irq_mask(VFIODevice *vbasedev, int index)
{
struct vfio_irq_set irq_set = {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 5159198bdb..ef38b4692a 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -535,6 +535,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
{
VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
VFIOMSIVector *vector;
+ bool new_vec = false;
int ret;
bool resizing = !!(vdev->nr_vectors < nr + 1);
@@ -549,6 +550,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
error_report("vfio: Error: event_notifier_init failed");
}
vector->use = true;
+ new_vec = true;
msix_vector_use(pdev, nr);
}
@@ -575,6 +577,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
kvm_irqchip_commit_route_changes(&vfio_route_change);
vfio_connect_kvm_msi_virq(vector);
}
+ new_vec = true;
}
}
@@ -584,6 +587,9 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
* in use, so we shutdown and incrementally increase them as needed.
* nr_vectors represents the total number of vectors allocated.
*
+ * Otherwise, unmask the vector if the vector is already setup (and we can
+ * do so) or send the fd if not.
+ *
* When dynamic allocation is supported, let the host only allocate
* and enable a vector when it is in use in guest. nr_vectors represents
* the upper bound of vectors being enabled (but not all of the ranges
@@ -594,13 +600,20 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
}
if (!vdev->defer_kvm_irq_routing) {
- if (vdev->msix->noresize && resizing) {
- vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
- ret = vfio_enable_vectors(vdev, true);
- if (ret) {
- error_report("vfio: failed to enable vectors, %s",
- strerror(-ret));
+ if (resizing) {
+ if (vdev->msix->noresize) {
+ vfio_device_irq_disable(&vdev->vbasedev,
+ VFIO_PCI_MSIX_IRQ_INDEX);
+ ret = vfio_enable_vectors(vdev, true);
+ if (ret) {
+ error_report("vfio: failed to enable vectors, %d", ret);
+ }
+ } else {
+ set_irq_signalling(&vdev->vbasedev, vector, nr);
}
+ } else if (vdev->can_mask_msix && !new_vec) {
+ vfio_device_irq_unmask_single(&vdev->vbasedev,
+ VFIO_PCI_MSIX_IRQ_INDEX, nr);
} else {
set_irq_signalling(&vdev->vbasedev, vector, nr);
}
@@ -630,6 +643,13 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
trace_vfio_msix_vector_release(vdev->vbasedev.name, nr);
+ /* just mask vector if peer supports it */
+ if (vdev->can_mask_msix) {
+ vfio_device_irq_mask_single(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
+ nr);
+ return;
+ }
+
/*
* There are still old guests that mask and unmask vectors on every
* interrupt. If we're using QEMU bypass with a KVM irqfd, leave all of
@@ -702,6 +722,13 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
error_report("vfio: failed to enable vectors, %s",
strerror(-ret));
}
+ } else if (vdev->can_mask_msix) {
+ /*
+ * If we can use single irq masking, send an invalid fd on vector 0
+ * to enable MSI-X without any vectors enabled.
+ */
+ vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
+ 0, VFIO_IRQ_SET_ACTION_TRIGGER, -1, NULL);
} else {
/*
* Some communication channels between VF & PF or PF & fw rely on the
@@ -2842,6 +2869,14 @@ bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp)
}
}
+ ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
+ &irq_info);
+ if (ret == 0 && (irq_info.flags & VFIO_IRQ_INFO_MASKABLE)) {
+ vdev->can_mask_msix = true;
+ } else {
+ vdev->can_mask_msix = false;
+ }
+
ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info);
if (ret) {
/* This can fail for an old kernel or legacy PCI dev */
--
2.43.0
+Sẗeven
On 5/20/25 17:03, John Levon wrote:
> If VFIO_IRQ_INFO_MASKABLE is set for VFIO_PCI_MSIX_IRQ_INDEX, record
> this in ->can_mask_msix, and use it to individually mask MSI-X
> interrupts as needed.
>
> Originally-by: John Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John Levon <john.levon@nutanix.com>
This one conflicts with the "live update" series. We will address it later.
Thanks,
C.
> ---
> hw/vfio/pci.h | 1 +
> include/hw/vfio/vfio-device.h | 2 ++
> hw/vfio/device.c | 26 +++++++++++++++++++
> hw/vfio/pci.c | 47 ++++++++++++++++++++++++++++++-----
> 4 files changed, 70 insertions(+), 6 deletions(-)
>
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 61fa385ddb..43c2d72b84 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -191,6 +191,7 @@ struct VFIOPCIDevice {
> bool defer_kvm_irq_routing;
> bool clear_parent_atomics_on_exit;
> bool skip_vsc_check;
> + bool can_mask_msix;
> VFIODisplay *dpy;
> Notifier irqchip_change_notifier;
> };
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 8bcb3c19f6..923f9cd116 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -133,7 +133,9 @@ struct VFIODeviceOps {
> (ret < 0 ? strerror(-ret) : "short write")
>
> void vfio_device_irq_disable(VFIODevice *vbasedev, int index);
> +void vfio_device_irq_unmask_single(VFIODevice *vbasedev, int index, int irq);
> void vfio_device_irq_unmask(VFIODevice *vbasedev, int index);
> +void vfio_device_irq_mask_single(VFIODevice *vbasedev, int index, int irq);
> void vfio_device_irq_mask(VFIODevice *vbasedev, int index);
> bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex,
> int action, int fd, Error **errp);
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index 9fba2c7272..d0068086ae 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -85,6 +85,19 @@ void vfio_device_irq_disable(VFIODevice *vbasedev, int index)
> vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
> }
>
> +void vfio_device_irq_unmask_single(VFIODevice *vbasedev, int index, int irq)
> +{
> + struct vfio_irq_set irq_set = {
> + .argsz = sizeof(irq_set),
> + .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
> + .index = index,
> + .start = irq,
> + .count = 1,
> + };
> +
> + vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
> +}
> +
> void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
> {
> struct vfio_irq_set irq_set = {
> @@ -98,6 +111,19 @@ void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
> vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
> }
>
> +void vfio_device_irq_mask_single(VFIODevice *vbasedev, int index, int irq)
> +{
> + struct vfio_irq_set irq_set = {
> + .argsz = sizeof(irq_set),
> + .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK,
> + .index = index,
> + .start = irq,
> + .count = 1,
> + };
> +
> + vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
> +}
> +
> void vfio_device_irq_mask(VFIODevice *vbasedev, int index)
> {
> struct vfio_irq_set irq_set = {
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 5159198bdb..ef38b4692a 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -535,6 +535,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
> {
> VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
> VFIOMSIVector *vector;
> + bool new_vec = false;
> int ret;
> bool resizing = !!(vdev->nr_vectors < nr + 1);
>
> @@ -549,6 +550,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
> error_report("vfio: Error: event_notifier_init failed");
> }
> vector->use = true;
> + new_vec = true;
> msix_vector_use(pdev, nr);
> }
>
> @@ -575,6 +577,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
> kvm_irqchip_commit_route_changes(&vfio_route_change);
> vfio_connect_kvm_msi_virq(vector);
> }
> + new_vec = true;
> }
> }
>
> @@ -584,6 +587,9 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
> * in use, so we shutdown and incrementally increase them as needed.
> * nr_vectors represents the total number of vectors allocated.
> *
> + * Otherwise, unmask the vector if the vector is already setup (and we can
> + * do so) or send the fd if not.
> + *
> * When dynamic allocation is supported, let the host only allocate
> * and enable a vector when it is in use in guest. nr_vectors represents
> * the upper bound of vectors being enabled (but not all of the ranges
> @@ -594,13 +600,20 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
> }
>
> if (!vdev->defer_kvm_irq_routing) {
> - if (vdev->msix->noresize && resizing) {
> - vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
> - ret = vfio_enable_vectors(vdev, true);
> - if (ret) {
> - error_report("vfio: failed to enable vectors, %s",
> - strerror(-ret));
> + if (resizing) {
> + if (vdev->msix->noresize) {
> + vfio_device_irq_disable(&vdev->vbasedev,
> + VFIO_PCI_MSIX_IRQ_INDEX);
> + ret = vfio_enable_vectors(vdev, true);
> + if (ret) {
> + error_report("vfio: failed to enable vectors, %d", ret);
> + }
> + } else {
> + set_irq_signalling(&vdev->vbasedev, vector, nr);
> }
> + } else if (vdev->can_mask_msix && !new_vec) {
> + vfio_device_irq_unmask_single(&vdev->vbasedev,
> + VFIO_PCI_MSIX_IRQ_INDEX, nr);
> } else {
> set_irq_signalling(&vdev->vbasedev, vector, nr);
> }
> @@ -630,6 +643,13 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
>
> trace_vfio_msix_vector_release(vdev->vbasedev.name, nr);
>
> + /* just mask vector if peer supports it */
> + if (vdev->can_mask_msix) {
> + vfio_device_irq_mask_single(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
> + nr);
> + return;
> + }
> +
> /*
> * There are still old guests that mask and unmask vectors on every
> * interrupt. If we're using QEMU bypass with a KVM irqfd, leave all of
> @@ -702,6 +722,13 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
> error_report("vfio: failed to enable vectors, %s",
> strerror(-ret));
> }
> + } else if (vdev->can_mask_msix) {
> + /*
> + * If we can use single irq masking, send an invalid fd on vector 0
> + * to enable MSI-X without any vectors enabled.
> + */
> + vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
> + 0, VFIO_IRQ_SET_ACTION_TRIGGER, -1, NULL);
> } else {
> /*
> * Some communication channels between VF & PF or PF & fw rely on the
> @@ -2842,6 +2869,14 @@ bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp)
> }
> }
>
> + ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
> + &irq_info);
> + if (ret == 0 && (irq_info.flags & VFIO_IRQ_INFO_MASKABLE)) {
> + vdev->can_mask_msix = true;
> + } else {
> + vdev->can_mask_msix = false;
> + }
> +
> ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info);
> if (ret) {
> /* This can fail for an old kernel or legacy PCI dev */
On 5/21/2025 3:29 AM, Cédric Le Goater wrote:
> +Sẗeven
>
> On 5/20/25 17:03, John Levon wrote:
>> If VFIO_IRQ_INFO_MASKABLE is set for VFIO_PCI_MSIX_IRQ_INDEX, record
>> this in ->can_mask_msix, and use it to individually mask MSI-X
>> interrupts as needed.
>>
>> Originally-by: John Johnson <john.g.johnson@oracle.com>
>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>> Signed-off-by: John Levon <john.levon@nutanix.com>
>
> This one conflicts with the "live update" series. We will address it later.
AFAICT there is only a trivial conflict versus my patch
vfio/pci: vfio_vector_init
https://lore.kernel.org/qemu-devel/1747063973-124548-17-git-send-email-steven.sistare@oracle.com/
because I extract a handful of lines from vfio_msix_vector_do_use into a
subroutine, at the same place vfio-user has added a local variable new_vec,
but the new code will simply be:
vfio_msix_vector_do_use
if (!vector->use) {
new_vec = true;
vfio_pci_vector_init(vdev, nr);
}
Also, for now I am assuming that if vfio-user is being used, then I will add
a CPR blocker. Thus I do not have to account for per-IRQ masking in the
cpr load path, for now.
- Steve
>> ---
>> hw/vfio/pci.h | 1 +
>> include/hw/vfio/vfio-device.h | 2 ++
>> hw/vfio/device.c | 26 +++++++++++++++++++
>> hw/vfio/pci.c | 47 ++++++++++++++++++++++++++++++-----
>> 4 files changed, 70 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index 61fa385ddb..43c2d72b84 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -191,6 +191,7 @@ struct VFIOPCIDevice {
>> bool defer_kvm_irq_routing;
>> bool clear_parent_atomics_on_exit;
>> bool skip_vsc_check;
>> + bool can_mask_msix;
>> VFIODisplay *dpy;
>> Notifier irqchip_change_notifier;
>> };
>> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
>> index 8bcb3c19f6..923f9cd116 100644
>> --- a/include/hw/vfio/vfio-device.h
>> +++ b/include/hw/vfio/vfio-device.h
>> @@ -133,7 +133,9 @@ struct VFIODeviceOps {
>> (ret < 0 ? strerror(-ret) : "short write")
>> void vfio_device_irq_disable(VFIODevice *vbasedev, int index);
>> +void vfio_device_irq_unmask_single(VFIODevice *vbasedev, int index, int irq);
>> void vfio_device_irq_unmask(VFIODevice *vbasedev, int index);
>> +void vfio_device_irq_mask_single(VFIODevice *vbasedev, int index, int irq);
>> void vfio_device_irq_mask(VFIODevice *vbasedev, int index);
>> bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex,
>> int action, int fd, Error **errp);
>> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
>> index 9fba2c7272..d0068086ae 100644
>> --- a/hw/vfio/device.c
>> +++ b/hw/vfio/device.c
>> @@ -85,6 +85,19 @@ void vfio_device_irq_disable(VFIODevice *vbasedev, int index)
>> vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
>> }
>> +void vfio_device_irq_unmask_single(VFIODevice *vbasedev, int index, int irq)
>> +{
>> + struct vfio_irq_set irq_set = {
>> + .argsz = sizeof(irq_set),
>> + .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
>> + .index = index,
>> + .start = irq,
>> + .count = 1,
>> + };
>> +
>> + vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
>> +}
>> +
>> void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
>> {
>> struct vfio_irq_set irq_set = {
>> @@ -98,6 +111,19 @@ void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
>> vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
>> }
>> +void vfio_device_irq_mask_single(VFIODevice *vbasedev, int index, int irq)
>> +{
>> + struct vfio_irq_set irq_set = {
>> + .argsz = sizeof(irq_set),
>> + .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK,
>> + .index = index,
>> + .start = irq,
>> + .count = 1,
>> + };
>> +
>> + vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
>> +}
>> +
>> void vfio_device_irq_mask(VFIODevice *vbasedev, int index)
>> {
>> struct vfio_irq_set irq_set = {
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 5159198bdb..ef38b4692a 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -535,6 +535,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>> {
>> VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
>> VFIOMSIVector *vector;
>> + bool new_vec = false;
>> int ret;
>> bool resizing = !!(vdev->nr_vectors < nr + 1);
>> @@ -549,6 +550,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>> error_report("vfio: Error: event_notifier_init failed");
>> }
>> vector->use = true;
>> + new_vec = true;
>> msix_vector_use(pdev, nr);
>> }
>> @@ -575,6 +577,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>> kvm_irqchip_commit_route_changes(&vfio_route_change);
>> vfio_connect_kvm_msi_virq(vector);
>> }
>> + new_vec = true;
>> }
>> }
>> @@ -584,6 +587,9 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>> * in use, so we shutdown and incrementally increase them as needed.
>> * nr_vectors represents the total number of vectors allocated.
>> *
>> + * Otherwise, unmask the vector if the vector is already setup (and we can
>> + * do so) or send the fd if not.
>> + *
>> * When dynamic allocation is supported, let the host only allocate
>> * and enable a vector when it is in use in guest. nr_vectors represents
>> * the upper bound of vectors being enabled (but not all of the ranges
>> @@ -594,13 +600,20 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>> }
>> if (!vdev->defer_kvm_irq_routing) {
>> - if (vdev->msix->noresize && resizing) {
>> - vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
>> - ret = vfio_enable_vectors(vdev, true);
>> - if (ret) {
>> - error_report("vfio: failed to enable vectors, %s",
>> - strerror(-ret));
>> + if (resizing) {
>> + if (vdev->msix->noresize) {
>> + vfio_device_irq_disable(&vdev->vbasedev,
>> + VFIO_PCI_MSIX_IRQ_INDEX);
>> + ret = vfio_enable_vectors(vdev, true);
>> + if (ret) {
>> + error_report("vfio: failed to enable vectors, %d", ret);
>> + }
>> + } else {
>> + set_irq_signalling(&vdev->vbasedev, vector, nr);
>> }
>> + } else if (vdev->can_mask_msix && !new_vec) {
>> + vfio_device_irq_unmask_single(&vdev->vbasedev,
>> + VFIO_PCI_MSIX_IRQ_INDEX, nr);
>> } else {
>> set_irq_signalling(&vdev->vbasedev, vector, nr);
>> }
>> @@ -630,6 +643,13 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
>> trace_vfio_msix_vector_release(vdev->vbasedev.name, nr);
>> + /* just mask vector if peer supports it */
>> + if (vdev->can_mask_msix) {
>> + vfio_device_irq_mask_single(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
>> + nr);
>> + return;
>> + }
>> +
>> /*
>> * There are still old guests that mask and unmask vectors on every
>> * interrupt. If we're using QEMU bypass with a KVM irqfd, leave all of
>> @@ -702,6 +722,13 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
>> error_report("vfio: failed to enable vectors, %s",
>> strerror(-ret));
>> }
>> + } else if (vdev->can_mask_msix) {
>> + /*
>> + * If we can use single irq masking, send an invalid fd on vector 0
>> + * to enable MSI-X without any vectors enabled.
>> + */
>> + vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
>> + 0, VFIO_IRQ_SET_ACTION_TRIGGER, -1, NULL);
>> } else {
>> /*
>> * Some communication channels between VF & PF or PF & fw rely on the
>> @@ -2842,6 +2869,14 @@ bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp)
>> }
>> }
>> + ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX,
>> + &irq_info);
>> + if (ret == 0 && (irq_info.flags & VFIO_IRQ_INFO_MASKABLE)) {
>> + vdev->can_mask_msix = true;
>> + } else {
>> + vdev->can_mask_msix = false;
>> + }
>> +
>> ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info);
>> if (ret) {
>> /* This can fail for an old kernel or legacy PCI dev */
>
On Wed, May 28, 2025 at 01:16:32PM -0400, Steven Sistare wrote:
> > On 5/20/25 17:03, John Levon wrote:
> > > If VFIO_IRQ_INFO_MASKABLE is set for VFIO_PCI_MSIX_IRQ_INDEX, record
> > > this in ->can_mask_msix, and use it to individually mask MSI-X
> > > interrupts as needed.
> > >
> > > Originally-by: John Johnson <john.g.johnson@oracle.com>
> > > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> > > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> > > Signed-off-by: John Levon <john.levon@nutanix.com>
> >
> > This one conflicts with the "live update" series. We will address it later.
>
> AFAICT there is only a trivial conflict versus my patch
>
> vfio/pci: vfio_vector_init
> https://urldefense.proofpoint.com/v2/url?u=https-3A__lore.kernel.org_qemu-2Ddevel_1747063973-2D124548-2D17-2Dgit-2Dsend-2Demail-2Dsteven.sistare-40oracle.com_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=v7SNLJqx7b9Vfc7ZO82Wg4nnZ8O5XkACFQ30bVKxotI&m=us5Q7sdvPzuJ8qd0-KHuGcuoxQPDRJYPJ9dFmIeHxb0i8WnpPqrLiu3UuGSoq95U&s=Pd5fbQX8J7_CHHWD16ZuvXKJ13kipONxypuqS_cjGI0&e=
>
> because I extract a handful of lines from vfio_msix_vector_do_use into a
> subroutine, at the same place vfio-user has added a local variable new_vec,
> but the new code will simply be:
>
> vfio_msix_vector_do_use
> if (!vector->use) {
> new_vec = true;
> vfio_pci_vector_init(vdev, nr);
> }
I'll take a look when rebasing on top of CPR and ask you if I get stuck.
> Also, for now I am assuming that if vfio-user is being used, then I will add
> a CPR blocker.
That's fine (and I will take care of it in my series), but I *think* this
per-interrupt masking can apply outside of vfio-user.
regards
john
© 2016 - 2025 Red Hat, Inc.