[PATCH v3 22/33] hw/vfio: generate new file fd for pseudo device and rebind existing descriptors

Ani Sinha posted 33 patches 1 week, 6 days ago
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Yanan Wang <wangyanan55@huawei.com>, Zhao Liu <zhao1.liu@intel.com>, "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>, Richard Henderson <richard.henderson@linaro.org>, "Michael S. Tsirkin" <mst@redhat.com>, David Woodhouse <dwmw2@infradead.org>, Paul Durrant <paul@xen.org>, Bernhard Beschow <shentey@gmail.com>, Alex Williamson <alex@shazbot.org>, "Cédric Le Goater" <clg@redhat.com>, Peter Xu <peterx@redhat.com>, Peter Maydell <peter.maydell@linaro.org>, Marcelo Tosatti <mtosatti@redhat.com>, Song Gao <gaosong@loongson.cn>, Huacai Chen <chenhuacai@kernel.org>, Aurelien Jarno <aurelien@aurel32.net>, Jiaxun Yang <jiaxun.yang@flygoat.com>, Aleksandar Rikalo <arikalo@gmail.com>, Nicholas Piggin <npiggin@gmail.com>, Harsh Prateek Bora <harshpb@linux.ibm.com>, Chinmay Rath <rathc@linux.ibm.com>, Palmer Dabbelt <palmer@dabbelt.com>, Alistair Francis <alistair.francis@wdc.com>, Weiwei Li <liwei1518@gmail.com>, Daniel Henrique Barboza <dbarboza@ventanamicro.com>, Liu Zhiwei <zhiwei_liu@linux.alibaba.com>, Halil Pasic <pasic@linux.ibm.com>, Christian Borntraeger <borntraeger@linux.ibm.com>, Eric Farman <farman@linux.ibm.com>, Matthew Rosato <mjrosato@linux.ibm.com>, Ilya Leoshkevich <iii@linux.ibm.com>, David Hildenbrand <david@kernel.org>, Thomas Huth <thuth@redhat.com>, Ani Sinha <anisinha@redhat.com>
[PATCH v3 22/33] hw/vfio: generate new file fd for pseudo device and rebind existing descriptors
Posted by Ani Sinha 1 week, 6 days ago
Normally the vfio pseudo device file descriptor lives for the life of the VM.
However, when the kvm VM file descriptor changes, a new file descriptor
for the pseudo device needs to be generated against the new kvm VM descriptor.
Other existing vfio descriptors needs to be reattached to the new pseudo device
descriptor. This change performs the above steps.

Signed-off-by: Ani Sinha <anisinha@redhat.com>
---
 hw/vfio/helpers.c | 86 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 83 insertions(+), 3 deletions(-)

diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index f68f8165d0..d467875b4b 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -110,12 +110,71 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
 #ifdef CONFIG_KVM
 /*
  * We have a single VFIO pseudo device per KVM VM.  Once created it lives
- * for the life of the VM.  Closing the file descriptor only drops our
- * reference to it and the device's reference to kvm.  Therefore once
- * initialized, this file descriptor is only released on QEMU exit and
+ * for the life of the VM except when the vm file descriptor changes for
+ * confidential virtual machines. In that case, the old file descriptor is
+ * closed and a new file descriptor is recreated.  Closing the file descriptor
+ * only drops our reference to it and the device's reference to kvm.
+ * Therefore once initialized, this file descriptor is normally only released
+ * on QEMU exit (except for confidential VMs as stated above) and
  * we'll re-use it should another vfio device be attached before then.
  */
 int vfio_kvm_device_fd = -1;
+
+typedef struct KVMVfioFileFd {
+    int fd;
+    QLIST_ENTRY(KVMVfioFileFd) node;
+} KVMVfioFileFd;
+
+static QLIST_HEAD(, KVMVfioFileFd) kvm_vfio_file_fds =
+    QLIST_HEAD_INITIALIZER(kvm_vfio_file_fds);
+
+static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
+                                  Error **errp);
+static struct NotifierWithReturn kvm_vfio_vmfd_change_notifier = {
+    .notify = kvm_vfio_filefd_rebind,
+};
+
+static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
+                                  Error **errp)
+{
+    KVMVfioFileFd *file_fd;
+    int ret = 0;
+    struct kvm_device_attr attr = {
+        .group = KVM_DEV_VFIO_FILE,
+        .attr = KVM_DEV_VFIO_FILE_ADD,
+    };
+    struct kvm_create_device cd = {
+        .type = KVM_DEV_TYPE_VFIO,
+    };
+
+    /* we are not interested in pre vmfd change notification */
+    if (((VmfdChangeNotifier *)data)->pre) {
+        return 0;
+    }
+
+    if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
+        error_setg_errno(errp, errno, "Failed to create KVM VFIO device");
+        return -errno;
+    }
+
+    if (vfio_kvm_device_fd) {
+        close(vfio_kvm_device_fd);
+    }
+
+    vfio_kvm_device_fd = cd.fd;
+
+    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
+        attr.addr = (uint64_t)(unsigned long)&file_fd->fd;
+        if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
+            error_setg_errno(errp, errno,
+                             "Failed to add fd %d to KVM VFIO device",
+                             file_fd->fd);
+            ret = -errno;
+        }
+    }
+    return ret;
+}
+
 #endif
 
 void vfio_kvm_device_close(void)
@@ -137,6 +196,7 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
         .attr = KVM_DEV_VFIO_FILE_ADD,
         .addr = (uint64_t)(unsigned long)&fd,
     };
+    KVMVfioFileFd *file_fd;
 
     if (!kvm_enabled()) {
         return 0;
@@ -153,6 +213,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
         }
 
         vfio_kvm_device_fd = cd.fd;
+        /*
+         * If the vm file descriptor changes, add a notifier so that we can
+         * re-create the vfio_kvm_device_fd.
+         */
+        kvm_vmfd_add_change_notifier(&kvm_vfio_vmfd_change_notifier);
     }
 
     if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
@@ -160,6 +225,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
                          fd);
         return -errno;
     }
+
+    file_fd = g_malloc0(sizeof(*file_fd));
+    file_fd->fd = fd;
+    QLIST_INSERT_HEAD(&kvm_vfio_file_fds, file_fd, node);
+
 #endif
     return 0;
 }
@@ -172,6 +242,7 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
         .attr = KVM_DEV_VFIO_FILE_DEL,
         .addr = (uint64_t)(unsigned long)&fd,
     };
+    KVMVfioFileFd *file_fd;
 
     if (vfio_kvm_device_fd < 0) {
         error_setg(errp, "KVM VFIO device isn't created yet");
@@ -183,6 +254,15 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
                          "Failed to remove fd %d from KVM VFIO device", fd);
         return -errno;
     }
+
+    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
+        if (file_fd->fd == fd) {
+            QLIST_REMOVE(file_fd, node);
+            g_free(file_fd);
+            break;
+        }
+    }
+
 #endif
     return 0;
 }
-- 
2.42.0
Re: [PATCH v3 22/33] hw/vfio: generate new file fd for pseudo device and rebind existing descriptors
Posted by Cédric Le Goater 7 hours ago
On 1/27/26 06:15, Ani Sinha wrote:
> Normally the vfio pseudo device file descriptor lives for the life of the VM.
> However, when the kvm VM file descriptor changes, a new file descriptor
> for the pseudo device needs to be generated against the new kvm VM descriptor.
> Other existing vfio descriptors needs to be reattached to the new pseudo device
> descriptor. This change performs the above steps.
> 
> Signed-off-by: Ani Sinha <anisinha@redhat.com>
> ---
>   hw/vfio/helpers.c | 86 +++++++++++++++++++++++++++++++++++++++++++++--
>   1 file changed, 83 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
> index f68f8165d0..d467875b4b 100644
> --- a/hw/vfio/helpers.c
> +++ b/hw/vfio/helpers.c
> @@ -110,12 +110,71 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
>   #ifdef CONFIG_KVM
>   /*
>    * We have a single VFIO pseudo device per KVM VM.  Once created it lives
> - * for the life of the VM.  Closing the file descriptor only drops our
> - * reference to it and the device's reference to kvm.  Therefore once
> - * initialized, this file descriptor is only released on QEMU exit and
> + * for the life of the VM except when the vm file descriptor changes for
> + * confidential virtual machines. In that case, the old file descriptor is
> + * closed and a new file descriptor is recreated.  Closing the file descriptor
> + * only drops our reference to it and the device's reference to kvm.
> + * Therefore once initialized, this file descriptor is normally only released
> + * on QEMU exit (except for confidential VMs as stated above) and

Instead of changing this paragraph, I would prefer having, below
the vfio_kvm_device_fd definition, a new section starting with
comment "Confidential virtual machines"

>    * we'll re-use it should another vfio device be attached before then.
>    */
>   int vfio_kvm_device_fd = -1;

/*
  * Confidential virtual machines

    <what is different from non confidential VMs>

  */
   
<Code>

> +
> +typedef struct KVMVfioFileFd {
> +    int fd;
> +    QLIST_ENTRY(KVMVfioFileFd) node;
> +} KVMVfioFileFd;
> +
> +static QLIST_HEAD(, KVMVfioFileFd) kvm_vfio_file_fds =
> +    QLIST_HEAD_INITIALIZER(kvm_vfio_file_fds);
> +
> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
> +                                  Error **errp);

This declaration could be avoided.

> +static struct NotifierWithReturn kvm_vfio_vmfd_change_notifier = {
> +    .notify = kvm_vfio_filefd_rebind,
> +};
> +
> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
> +                                  Error **errp)
> +{
> +    KVMVfioFileFd *file_fd;
> +    int ret = 0;
> +    struct kvm_device_attr attr = {
> +        .group = KVM_DEV_VFIO_FILE,
> +        .attr = KVM_DEV_VFIO_FILE_ADD,
> +    };
> +    struct kvm_create_device cd = {
> +        .type = KVM_DEV_TYPE_VFIO,
> +    };
> +
> +    /* we are not interested in pre vmfd change notification */
> +    if (((VmfdChangeNotifier *)data)->pre) {
> +        return 0;
> +    }
> +
> +    if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
> +        error_setg_errno(errp, errno, "Failed to create KVM VFIO device");
> +        return -errno;
> +    }
> +
> +    if (vfio_kvm_device_fd) {

May be "vfio_kvm_device_fd != -1" ?

> +        close(vfio_kvm_device_fd);
> +    }
> +
> +    vfio_kvm_device_fd = cd.fd;
> +
> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
> +        attr.addr = (uint64_t)(unsigned long)&file_fd->fd;
> +        if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
> +            error_setg_errno(errp, errno,
> +                             "Failed to add fd %d to KVM VFIO device",
> +                             file_fd->fd);
> +            ret = -errno;
> +        }
> +    }
> +    return ret;
> +}
> +
>   #endif
>   
>   void vfio_kvm_device_close(void)
> @@ -137,6 +196,7 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>           .attr = KVM_DEV_VFIO_FILE_ADD,
>           .addr = (uint64_t)(unsigned long)&fd,
>       };
> +    KVMVfioFileFd *file_fd;
>   
>       if (!kvm_enabled()) {
>           return 0;
> @@ -153,6 +213,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>           }
>   
>           vfio_kvm_device_fd = cd.fd;
> +        /*
> +         * If the vm file descriptor changes, add a notifier so that we can
> +         * re-create the vfio_kvm_device_fd.
> +         */
> +        kvm_vmfd_add_change_notifier(&kvm_vfio_vmfd_change_notifier);
>       }
>   
>       if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
> @@ -160,6 +225,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>                            fd);
>           return -errno;
>       }
> +
> +    file_fd = g_malloc0(sizeof(*file_fd));
> +    file_fd->fd = fd;
> +    QLIST_INSERT_HEAD(&kvm_vfio_file_fds, file_fd, node);
> +

Please introduce a helper routine.


>   #endif
>       return 0;
>   }
> @@ -172,6 +242,7 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
>           .attr = KVM_DEV_VFIO_FILE_DEL,
>           .addr = (uint64_t)(unsigned long)&fd,
>       };
> +    KVMVfioFileFd *file_fd;
>   
>       if (vfio_kvm_device_fd < 0) {
>           error_setg(errp, "KVM VFIO device isn't created yet");
> @@ -183,6 +254,15 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
>                            "Failed to remove fd %d from KVM VFIO device", fd);
>           return -errno;
>       }
> +
> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
> +        if (file_fd->fd == fd) {
> +            QLIST_REMOVE(file_fd, node);
> +            g_free(file_fd);
> +            break;
> +        }
> +    }

and a second helper for removal.

Thanks,

C.

>   #endif
>       return 0;
>   }
Re: [PATCH v3 22/33] hw/vfio: generate new file fd for pseudo device and rebind existing descriptors
Posted by Ani Sinha 5 hours ago

> On 9 Feb 2026, at 12:18 PM, Cédric Le Goater <clg@redhat.com> wrote:
> 
> On 1/27/26 06:15, Ani Sinha wrote:
>> Normally the vfio pseudo device file descriptor lives for the life of the VM.
>> However, when the kvm VM file descriptor changes, a new file descriptor
>> for the pseudo device needs to be generated against the new kvm VM descriptor.
>> Other existing vfio descriptors needs to be reattached to the new pseudo device
>> descriptor. This change performs the above steps.
>> Signed-off-by: Ani Sinha <anisinha@redhat.com>
>> ---
>>  hw/vfio/helpers.c | 86 +++++++++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 83 insertions(+), 3 deletions(-)
>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>> index f68f8165d0..d467875b4b 100644
>> --- a/hw/vfio/helpers.c
>> +++ b/hw/vfio/helpers.c
>> @@ -110,12 +110,71 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
>>  #ifdef CONFIG_KVM
>>  /*
>>   * We have a single VFIO pseudo device per KVM VM.  Once created it lives
>> - * for the life of the VM.  Closing the file descriptor only drops our
>> - * reference to it and the device's reference to kvm.  Therefore once
>> - * initialized, this file descriptor is only released on QEMU exit and
>> + * for the life of the VM except when the vm file descriptor changes for
>> + * confidential virtual machines. In that case, the old file descriptor is
>> + * closed and a new file descriptor is recreated.  Closing the file descriptor
>> + * only drops our reference to it and the device's reference to kvm.
>> + * Therefore once initialized, this file descriptor is normally only released
>> + * on QEMU exit (except for confidential VMs as stated above) and
> 
> Instead of changing this paragraph, I would prefer having, below
> the vfio_kvm_device_fd definition, a new section starting with
> comment "Confidential virtual machines"
> 
>>   * we'll re-use it should another vfio device be attached before then.
>>   */
>>  int vfio_kvm_device_fd = -1;
> 
> /*
> * Confidential virtual machines
> 
>   <what is different from non confidential VMs>
> 
> */
>  <Code>
> 
>> +
>> +typedef struct KVMVfioFileFd {
>> +    int fd;
>> +    QLIST_ENTRY(KVMVfioFileFd) node;
>> +} KVMVfioFileFd;
>> +
>> +static QLIST_HEAD(, KVMVfioFileFd) kvm_vfio_file_fds =
>> +    QLIST_HEAD_INITIALIZER(kvm_vfio_file_fds);
>> +
>> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
>> +                                  Error **errp);
> 
> This declaration could be avoided.
> 
>> +static struct NotifierWithReturn kvm_vfio_vmfd_change_notifier = {
>> +    .notify = kvm_vfio_filefd_rebind,
>> +};
>> +
>> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
>> +                                  Error **errp)
>> +{
>> +    KVMVfioFileFd *file_fd;
>> +    int ret = 0;
>> +    struct kvm_device_attr attr = {
>> +        .group = KVM_DEV_VFIO_FILE,
>> +        .attr = KVM_DEV_VFIO_FILE_ADD,
>> +    };
>> +    struct kvm_create_device cd = {
>> +        .type = KVM_DEV_TYPE_VFIO,
>> +    };
>> +
>> +    /* we are not interested in pre vmfd change notification */
>> +    if (((VmfdChangeNotifier *)data)->pre) {
>> +        return 0;
>> +    }
>> +
>> +    if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
>> +        error_setg_errno(errp, errno, "Failed to create KVM VFIO device");
>> +        return -errno;
>> +    }
>> +
>> +    if (vfio_kvm_device_fd) {
> 
> May be "vfio_kvm_device_fd != -1" ?
> 
>> +        close(vfio_kvm_device_fd);
>> +    }
>> +
>> +    vfio_kvm_device_fd = cd.fd;
>> +
>> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
>> +        attr.addr = (uint64_t)(unsigned long)&file_fd->fd;
>> +        if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
>> +            error_setg_errno(errp, errno,
>> +                             "Failed to add fd %d to KVM VFIO device",
>> +                             file_fd->fd);
>> +            ret = -errno;
>> +        }
>> +    }
>> +    return ret;
>> +}
>> +
>>  #endif
>>    void vfio_kvm_device_close(void)
>> @@ -137,6 +196,7 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>>          .attr = KVM_DEV_VFIO_FILE_ADD,
>>          .addr = (uint64_t)(unsigned long)&fd,
>>      };
>> +    KVMVfioFileFd *file_fd;
>>        if (!kvm_enabled()) {
>>          return 0;
>> @@ -153,6 +213,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>>          }
>>            vfio_kvm_device_fd = cd.fd;
>> +        /*
>> +         * If the vm file descriptor changes, add a notifier so that we can
>> +         * re-create the vfio_kvm_device_fd.
>> +         */
>> +        kvm_vmfd_add_change_notifier(&kvm_vfio_vmfd_change_notifier);
>>      }
>>        if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
>> @@ -160,6 +225,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>>                           fd);
>>          return -errno;
>>      }
>> +
>> +    file_fd = g_malloc0(sizeof(*file_fd));
>> +    file_fd->fd = fd;
>> +    QLIST_INSERT_HEAD(&kvm_vfio_file_fds, file_fd, node);
>> +
> 
> Please introduce a helper routine.
> 
> 
>>  #endif
>>      return 0;
>>  }
>> @@ -172,6 +242,7 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
>>          .attr = KVM_DEV_VFIO_FILE_DEL,
>>          .addr = (uint64_t)(unsigned long)&fd,
>>      };
>> +    KVMVfioFileFd *file_fd;
>>        if (vfio_kvm_device_fd < 0) {
>>          error_setg(errp, "KVM VFIO device isn't created yet");
>> @@ -183,6 +254,15 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
>>                           "Failed to remove fd %d from KVM VFIO device", fd);
>>          return -errno;
>>      }
>> +
>> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
        ^^^^^^^^^^^^^
I believe this should be QLIST_FOREACH_SAFE . Will fix in next version.

>> +        if (file_fd->fd == fd) {
>> +            QLIST_REMOVE(file_fd, node);
>> +            g_free(file_fd);
>> +            break;
>> +        }
>> +    }
> 
> and a second helper for removal.
> 
> Thanks,
> 
> C.
> 
>>  #endif
>>      return 0;
>>  }
Re: [PATCH v3 22/33] hw/vfio: generate new file fd for pseudo device and rebind existing descriptors
Posted by Ani Sinha 23 hours ago
On Tue, Jan 27, 2026 at 10:47 AM Ani Sinha <anisinha@redhat.com> wrote:
>
> Normally the vfio pseudo device file descriptor lives for the life of the VM.
> However, when the kvm VM file descriptor changes, a new file descriptor
> for the pseudo device needs to be generated against the new kvm VM descriptor.
> Other existing vfio descriptors needs to be reattached to the new pseudo device
> descriptor. This change performs the above steps.

I have not been able to test this change. Can someone suggest a way to
add a functional test and/or test this manually?
Otherwise I am going to add a comment in the next spin up that this
change is not tested.

>
> Signed-off-by: Ani Sinha <anisinha@redhat.com>
> ---
>  hw/vfio/helpers.c | 86 +++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 83 insertions(+), 3 deletions(-)
>
> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
> index f68f8165d0..d467875b4b 100644
> --- a/hw/vfio/helpers.c
> +++ b/hw/vfio/helpers.c
> @@ -110,12 +110,71 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
>  #ifdef CONFIG_KVM
>  /*
>   * We have a single VFIO pseudo device per KVM VM.  Once created it lives
> - * for the life of the VM.  Closing the file descriptor only drops our
> - * reference to it and the device's reference to kvm.  Therefore once
> - * initialized, this file descriptor is only released on QEMU exit and
> + * for the life of the VM except when the vm file descriptor changes for
> + * confidential virtual machines. In that case, the old file descriptor is
> + * closed and a new file descriptor is recreated.  Closing the file descriptor
> + * only drops our reference to it and the device's reference to kvm.
> + * Therefore once initialized, this file descriptor is normally only released
> + * on QEMU exit (except for confidential VMs as stated above) and
>   * we'll re-use it should another vfio device be attached before then.
>   */
>  int vfio_kvm_device_fd = -1;
> +
> +typedef struct KVMVfioFileFd {
> +    int fd;
> +    QLIST_ENTRY(KVMVfioFileFd) node;
> +} KVMVfioFileFd;
> +
> +static QLIST_HEAD(, KVMVfioFileFd) kvm_vfio_file_fds =
> +    QLIST_HEAD_INITIALIZER(kvm_vfio_file_fds);
> +
> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
> +                                  Error **errp);
> +static struct NotifierWithReturn kvm_vfio_vmfd_change_notifier = {
> +    .notify = kvm_vfio_filefd_rebind,
> +};
> +
> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
> +                                  Error **errp)
> +{
> +    KVMVfioFileFd *file_fd;
> +    int ret = 0;
> +    struct kvm_device_attr attr = {
> +        .group = KVM_DEV_VFIO_FILE,
> +        .attr = KVM_DEV_VFIO_FILE_ADD,
> +    };
> +    struct kvm_create_device cd = {
> +        .type = KVM_DEV_TYPE_VFIO,
> +    };
> +
> +    /* we are not interested in pre vmfd change notification */
> +    if (((VmfdChangeNotifier *)data)->pre) {
> +        return 0;
> +    }
> +
> +    if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
> +        error_setg_errno(errp, errno, "Failed to create KVM VFIO device");
> +        return -errno;
> +    }
> +
> +    if (vfio_kvm_device_fd) {
> +        close(vfio_kvm_device_fd);
> +    }
> +
> +    vfio_kvm_device_fd = cd.fd;
> +
> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
> +        attr.addr = (uint64_t)(unsigned long)&file_fd->fd;
> +        if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
> +            error_setg_errno(errp, errno,
> +                             "Failed to add fd %d to KVM VFIO device",
> +                             file_fd->fd);
> +            ret = -errno;
> +        }
> +    }
> +    return ret;
> +}
> +
>  #endif
>
>  void vfio_kvm_device_close(void)
> @@ -137,6 +196,7 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>          .attr = KVM_DEV_VFIO_FILE_ADD,
>          .addr = (uint64_t)(unsigned long)&fd,
>      };
> +    KVMVfioFileFd *file_fd;
>
>      if (!kvm_enabled()) {
>          return 0;
> @@ -153,6 +213,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>          }
>
>          vfio_kvm_device_fd = cd.fd;
> +        /*
> +         * If the vm file descriptor changes, add a notifier so that we can
> +         * re-create the vfio_kvm_device_fd.
> +         */
> +        kvm_vmfd_add_change_notifier(&kvm_vfio_vmfd_change_notifier);
>      }
>
>      if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
> @@ -160,6 +225,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>                           fd);
>          return -errno;
>      }
> +
> +    file_fd = g_malloc0(sizeof(*file_fd));
> +    file_fd->fd = fd;
> +    QLIST_INSERT_HEAD(&kvm_vfio_file_fds, file_fd, node);
> +
>  #endif
>      return 0;
>  }
> @@ -172,6 +242,7 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
>          .attr = KVM_DEV_VFIO_FILE_DEL,
>          .addr = (uint64_t)(unsigned long)&fd,
>      };
> +    KVMVfioFileFd *file_fd;
>
>      if (vfio_kvm_device_fd < 0) {
>          error_setg(errp, "KVM VFIO device isn't created yet");
> @@ -183,6 +254,15 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
>                           "Failed to remove fd %d from KVM VFIO device", fd);
>          return -errno;
>      }
> +
> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
> +        if (file_fd->fd == fd) {
> +            QLIST_REMOVE(file_fd, node);
> +            g_free(file_fd);
> +            break;
> +        }
> +    }
> +
>  #endif
>      return 0;
>  }
> --
> 2.42.0
>
Re: [PATCH v3 22/33] hw/vfio: generate new file fd for pseudo device and rebind existing descriptors
Posted by Cédric Le Goater 16 hours ago
On 2/8/26 15:39, Ani Sinha wrote:
> On Tue, Jan 27, 2026 at 10:47 AM Ani Sinha <anisinha@redhat.com> wrote:
>>
>> Normally the vfio pseudo device file descriptor lives for the life of the VM.
>> However, when the kvm VM file descriptor changes, a new file descriptor
>> for the pseudo device needs to be generated against the new kvm VM descriptor.
>> Other existing vfio descriptors needs to be reattached to the new pseudo device
>> descriptor. This change performs the above steps.
> 
> I have not been able to test this change. Can someone suggest a way to
> add a functional test and/or test this manually?

Automated VFIO testing [*] is tough because it depends on a "real"
physical PCI device.

I have tested you series using a sev-snp guest with an assigned device
(SATA controller) and verified that the guest could reboot. The SATA
controller has no disks connected, so further testing is required.

However, a 'system_reset' command issued from the QEMU monitor leaves
the guest hanging. May be this is expected ?


> Otherwise I am going to add a comment in the next spin up that this
> change is not tested.

I will provide some more comments on the code. Please give me a day
or two before resending.

Thanks,

C.

[*] One way to address this constraint for VFIO testing would be
     to run the test in a nested environment using emulated devices,
     like IGB, attached to an L1 which would be assigned to an L2.
     But that's unrelated to this series.
> 
>>
>> Signed-off-by: Ani Sinha <anisinha@redhat.com>
>> ---
>>   hw/vfio/helpers.c | 86 +++++++++++++++++++++++++++++++++++++++++++++--
>>   1 file changed, 83 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>> index f68f8165d0..d467875b4b 100644
>> --- a/hw/vfio/helpers.c
>> +++ b/hw/vfio/helpers.c
>> @@ -110,12 +110,71 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
>>   #ifdef CONFIG_KVM
>>   /*
>>    * We have a single VFIO pseudo device per KVM VM.  Once created it lives
>> - * for the life of the VM.  Closing the file descriptor only drops our
>> - * reference to it and the device's reference to kvm.  Therefore once
>> - * initialized, this file descriptor is only released on QEMU exit and
>> + * for the life of the VM except when the vm file descriptor changes for
>> + * confidential virtual machines. In that case, the old file descriptor is
>> + * closed and a new file descriptor is recreated.  Closing the file descriptor
>> + * only drops our reference to it and the device's reference to kvm.
>> + * Therefore once initialized, this file descriptor is normally only released
>> + * on QEMU exit (except for confidential VMs as stated above) and
>>    * we'll re-use it should another vfio device be attached before then.
>>    */
>>   int vfio_kvm_device_fd = -1;
>> +
>> +typedef struct KVMVfioFileFd {
>> +    int fd;
>> +    QLIST_ENTRY(KVMVfioFileFd) node;
>> +} KVMVfioFileFd;
>> +
>> +static QLIST_HEAD(, KVMVfioFileFd) kvm_vfio_file_fds =
>> +    QLIST_HEAD_INITIALIZER(kvm_vfio_file_fds);
>> +
>> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
>> +                                  Error **errp);
>> +static struct NotifierWithReturn kvm_vfio_vmfd_change_notifier = {
>> +    .notify = kvm_vfio_filefd_rebind,
>> +};
>> +
>> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
>> +                                  Error **errp)
>> +{
>> +    KVMVfioFileFd *file_fd;
>> +    int ret = 0;
>> +    struct kvm_device_attr attr = {
>> +        .group = KVM_DEV_VFIO_FILE,
>> +        .attr = KVM_DEV_VFIO_FILE_ADD,
>> +    };
>> +    struct kvm_create_device cd = {
>> +        .type = KVM_DEV_TYPE_VFIO,
>> +    };
>> +
>> +    /* we are not interested in pre vmfd change notification */
>> +    if (((VmfdChangeNotifier *)data)->pre) {
>> +        return 0;
>> +    }
>> +
>> +    if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
>> +        error_setg_errno(errp, errno, "Failed to create KVM VFIO device");
>> +        return -errno;
>> +    }
>> +
>> +    if (vfio_kvm_device_fd) {
>> +        close(vfio_kvm_device_fd);
>> +    }
>> +
>> +    vfio_kvm_device_fd = cd.fd;
>> +
>> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
>> +        attr.addr = (uint64_t)(unsigned long)&file_fd->fd;
>> +        if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
>> +            error_setg_errno(errp, errno,
>> +                             "Failed to add fd %d to KVM VFIO device",
>> +                             file_fd->fd);
>> +            ret = -errno;
>> +        }
>> +    }
>> +    return ret;
>> +}
>> +
>>   #endif
>>
>>   void vfio_kvm_device_close(void)
>> @@ -137,6 +196,7 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>>           .attr = KVM_DEV_VFIO_FILE_ADD,
>>           .addr = (uint64_t)(unsigned long)&fd,
>>       };
>> +    KVMVfioFileFd *file_fd;
>>
>>       if (!kvm_enabled()) {
>>           return 0;
>> @@ -153,6 +213,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>>           }
>>
>>           vfio_kvm_device_fd = cd.fd;
>> +        /*
>> +         * If the vm file descriptor changes, add a notifier so that we can
>> +         * re-create the vfio_kvm_device_fd.
>> +         */
>> +        kvm_vmfd_add_change_notifier(&kvm_vfio_vmfd_change_notifier);
>>       }
>>
>>       if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
>> @@ -160,6 +225,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>>                            fd);
>>           return -errno;
>>       }
>> +
>> +    file_fd = g_malloc0(sizeof(*file_fd));
>> +    file_fd->fd = fd;
>> +    QLIST_INSERT_HEAD(&kvm_vfio_file_fds, file_fd, node);
>> +
>>   #endif
>>       return 0;
>>   }
>> @@ -172,6 +242,7 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
>>           .attr = KVM_DEV_VFIO_FILE_DEL,
>>           .addr = (uint64_t)(unsigned long)&fd,
>>       };
>> +    KVMVfioFileFd *file_fd;
>>
>>       if (vfio_kvm_device_fd < 0) {
>>           error_setg(errp, "KVM VFIO device isn't created yet");
>> @@ -183,6 +254,15 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
>>                            "Failed to remove fd %d from KVM VFIO device", fd);
>>           return -errno;
>>       }
>> +
>> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
>> +        if (file_fd->fd == fd) {
>> +            QLIST_REMOVE(file_fd, node);
>> +            g_free(file_fd);
>> +            break;
>> +        }
>> +    }
>> +
>>   #endif
>>       return 0;
>>   }
>> --
>> 2.42.0
>>
> 


Re: [PATCH v3 22/33] hw/vfio: generate new file fd for pseudo device and rebind existing descriptors
Posted by Ani Sinha 10 hours ago
On Mon, Feb 9, 2026 at 3:38 AM Cédric Le Goater <clg@redhat.com> wrote:
>
> On 2/8/26 15:39, Ani Sinha wrote:
> > On Tue, Jan 27, 2026 at 10:47 AM Ani Sinha <anisinha@redhat.com> wrote:
> >>
> >> Normally the vfio pseudo device file descriptor lives for the life of the VM.
> >> However, when the kvm VM file descriptor changes, a new file descriptor
> >> for the pseudo device needs to be generated against the new kvm VM descriptor.
> >> Other existing vfio descriptors needs to be reattached to the new pseudo device
> >> descriptor. This change performs the above steps.
> >
> > I have not been able to test this change. Can someone suggest a way to
> > add a functional test and/or test this manually?
>
> Automated VFIO testing [*] is tough because it depends on a "real"
> physical PCI device.
>
> I have tested you series using a sev-snp guest with an assigned device
> (SATA controller) and verified that the guest could reboot. The SATA
> controller has no disks connected, so further testing is required.
>
> However, a 'system_reset' command issued from the QEMU monitor leaves
> the guest hanging. May be this is expected ?

I do not think so. On non-coco, I just tried it and it reboots fine
with x-change-vmfd-on-reset turned on.
I tried a RHEL 10 guest image as well as a simple UKI kernel.
Maybe there is some bug in this patch, not sure. In QEMU the code path
should be the same for both.

>
>
> > Otherwise I am going to add a comment in the next spin up that this
> > change is not tested.
>
> I will provide some more comments on the code. Please give me a day
> or two before resending.

Sure, please take all the time you need.

>
> Thanks,
>
> C.
>
> [*] One way to address this constraint for VFIO testing would be
>      to run the test in a nested environment using emulated devices,
>      like IGB, attached to an L1 which would be assigned to an L2.
>      But that's unrelated to this series.
> >
> >>
> >> Signed-off-by: Ani Sinha <anisinha@redhat.com>
> >> ---
> >>   hw/vfio/helpers.c | 86 +++++++++++++++++++++++++++++++++++++++++++++--
> >>   1 file changed, 83 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
> >> index f68f8165d0..d467875b4b 100644
> >> --- a/hw/vfio/helpers.c
> >> +++ b/hw/vfio/helpers.c
> >> @@ -110,12 +110,71 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
> >>   #ifdef CONFIG_KVM
> >>   /*
> >>    * We have a single VFIO pseudo device per KVM VM.  Once created it lives
> >> - * for the life of the VM.  Closing the file descriptor only drops our
> >> - * reference to it and the device's reference to kvm.  Therefore once
> >> - * initialized, this file descriptor is only released on QEMU exit and
> >> + * for the life of the VM except when the vm file descriptor changes for
> >> + * confidential virtual machines. In that case, the old file descriptor is
> >> + * closed and a new file descriptor is recreated.  Closing the file descriptor
> >> + * only drops our reference to it and the device's reference to kvm.
> >> + * Therefore once initialized, this file descriptor is normally only released
> >> + * on QEMU exit (except for confidential VMs as stated above) and
> >>    * we'll re-use it should another vfio device be attached before then.
> >>    */
> >>   int vfio_kvm_device_fd = -1;
> >> +
> >> +typedef struct KVMVfioFileFd {
> >> +    int fd;
> >> +    QLIST_ENTRY(KVMVfioFileFd) node;
> >> +} KVMVfioFileFd;
> >> +
> >> +static QLIST_HEAD(, KVMVfioFileFd) kvm_vfio_file_fds =
> >> +    QLIST_HEAD_INITIALIZER(kvm_vfio_file_fds);
> >> +
> >> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
> >> +                                  Error **errp);
> >> +static struct NotifierWithReturn kvm_vfio_vmfd_change_notifier = {
> >> +    .notify = kvm_vfio_filefd_rebind,
> >> +};
> >> +
> >> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
> >> +                                  Error **errp)
> >> +{
> >> +    KVMVfioFileFd *file_fd;
> >> +    int ret = 0;
> >> +    struct kvm_device_attr attr = {
> >> +        .group = KVM_DEV_VFIO_FILE,
> >> +        .attr = KVM_DEV_VFIO_FILE_ADD,
> >> +    };
> >> +    struct kvm_create_device cd = {
> >> +        .type = KVM_DEV_TYPE_VFIO,
> >> +    };
> >> +
> >> +    /* we are not interested in pre vmfd change notification */
> >> +    if (((VmfdChangeNotifier *)data)->pre) {
> >> +        return 0;
> >> +    }
> >> +
> >> +    if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
> >> +        error_setg_errno(errp, errno, "Failed to create KVM VFIO device");
> >> +        return -errno;
> >> +    }
> >> +
> >> +    if (vfio_kvm_device_fd) {
> >> +        close(vfio_kvm_device_fd);
> >> +    }
> >> +
> >> +    vfio_kvm_device_fd = cd.fd;
> >> +
> >> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
> >> +        attr.addr = (uint64_t)(unsigned long)&file_fd->fd;
> >> +        if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
> >> +            error_setg_errno(errp, errno,
> >> +                             "Failed to add fd %d to KVM VFIO device",
> >> +                             file_fd->fd);
> >> +            ret = -errno;
> >> +        }
> >> +    }
> >> +    return ret;
> >> +}
> >> +
> >>   #endif
> >>
> >>   void vfio_kvm_device_close(void)
> >> @@ -137,6 +196,7 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
> >>           .attr = KVM_DEV_VFIO_FILE_ADD,
> >>           .addr = (uint64_t)(unsigned long)&fd,
> >>       };
> >> +    KVMVfioFileFd *file_fd;
> >>
> >>       if (!kvm_enabled()) {
> >>           return 0;
> >> @@ -153,6 +213,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
> >>           }
> >>
> >>           vfio_kvm_device_fd = cd.fd;
> >> +        /*
> >> +         * If the vm file descriptor changes, add a notifier so that we can
> >> +         * re-create the vfio_kvm_device_fd.
> >> +         */
> >> +        kvm_vmfd_add_change_notifier(&kvm_vfio_vmfd_change_notifier);
> >>       }
> >>
> >>       if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
> >> @@ -160,6 +225,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
> >>                            fd);
> >>           return -errno;
> >>       }
> >> +
> >> +    file_fd = g_malloc0(sizeof(*file_fd));
> >> +    file_fd->fd = fd;
> >> +    QLIST_INSERT_HEAD(&kvm_vfio_file_fds, file_fd, node);
> >> +
> >>   #endif
> >>       return 0;
> >>   }
> >> @@ -172,6 +242,7 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
> >>           .attr = KVM_DEV_VFIO_FILE_DEL,
> >>           .addr = (uint64_t)(unsigned long)&fd,
> >>       };
> >> +    KVMVfioFileFd *file_fd;
> >>
> >>       if (vfio_kvm_device_fd < 0) {
> >>           error_setg(errp, "KVM VFIO device isn't created yet");
> >> @@ -183,6 +254,15 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
> >>                            "Failed to remove fd %d from KVM VFIO device", fd);
> >>           return -errno;
> >>       }
> >> +
> >> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
> >> +        if (file_fd->fd == fd) {
> >> +            QLIST_REMOVE(file_fd, node);
> >> +            g_free(file_fd);
> >> +            break;
> >> +        }
> >> +    }
> >> +
> >>   #endif
> >>       return 0;
> >>   }
> >> --
> >> 2.42.0
> >>
> >
>
Re: [PATCH v3 22/33] hw/vfio: generate new file fd for pseudo device and rebind existing descriptors
Posted by Ani Sinha 8 hours ago

> On 9 Feb 2026, at 9:37 AM, Ani Sinha <anisinha@redhat.com> wrote:
> 
> On Mon, Feb 9, 2026 at 3:38 AM Cédric Le Goater <clg@redhat.com> wrote:
>> 
>> On 2/8/26 15:39, Ani Sinha wrote:
>>> On Tue, Jan 27, 2026 at 10:47 AM Ani Sinha <anisinha@redhat.com> wrote:
>>>> 
>>>> Normally the vfio pseudo device file descriptor lives for the life of the VM.
>>>> However, when the kvm VM file descriptor changes, a new file descriptor
>>>> for the pseudo device needs to be generated against the new kvm VM descriptor.
>>>> Other existing vfio descriptors needs to be reattached to the new pseudo device
>>>> descriptor. This change performs the above steps.
>>> 
>>> I have not been able to test this change. Can someone suggest a way to
>>> add a functional test and/or test this manually?
>> 
>> Automated VFIO testing [*] is tough because it depends on a "real"
>> physical PCI device.
>> 
>> I have tested you series using a sev-snp guest with an assigned device
>> (SATA controller) and verified that the guest could reboot. The SATA
>> controller has no disks connected, so further testing is required.
>> 
>> However, a 'system_reset' command issued from the QEMU monitor leaves
>> the guest hanging. May be this is expected ?
> 
> I do not think so. On non-coco, I just tried it and it reboots fine
> with x-change-vmfd-on-reset turned on.

Sorry I had some issues with my testing. Neither non-coco reset not coco reset was using vmfd change path when the QMP reset command was used.
In non-coco, it was the normal reset which obviously works.

I fixed it. See https://gitlab.com/anisinha/qemu/-/commit/c5514dbb2b52d474397220cfa3a0b8faa26cda0c

Please use this patch and test. I have tasted this on SNP and seems to work. I will make it part of next spin-up.

Thanks for testing.

> I tried a RHEL 10 guest image as well as a simple UKI kernel.
> Maybe there is some bug in this patch, not sure. In QEMU the code path
> should be the same for both.
> 
>> 
>> 
>>> Otherwise I am going to add a comment in the next spin up that this
>>> change is not tested.
>> 
>> I will provide some more comments on the code. Please give me a day
>> or two before resending.
> 
> Sure, please take all the time you need.
> 
>> 
>> Thanks,
>> 
>> C.
>> 
>> [*] One way to address this constraint for VFIO testing would be
>>     to run the test in a nested environment using emulated devices,
>>     like IGB, attached to an L1 which would be assigned to an L2.
>>     But that's unrelated to this series.
>>> 
>>>> 
>>>> Signed-off-by: Ani Sinha <anisinha@redhat.com>
>>>> ---
>>>>  hw/vfio/helpers.c | 86 +++++++++++++++++++++++++++++++++++++++++++++--
>>>>  1 file changed, 83 insertions(+), 3 deletions(-)
>>>> 
>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>> index f68f8165d0..d467875b4b 100644
>>>> --- a/hw/vfio/helpers.c
>>>> +++ b/hw/vfio/helpers.c
>>>> @@ -110,12 +110,71 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
>>>>  #ifdef CONFIG_KVM
>>>>  /*
>>>>   * We have a single VFIO pseudo device per KVM VM.  Once created it lives
>>>> - * for the life of the VM.  Closing the file descriptor only drops our
>>>> - * reference to it and the device's reference to kvm.  Therefore once
>>>> - * initialized, this file descriptor is only released on QEMU exit and
>>>> + * for the life of the VM except when the vm file descriptor changes for
>>>> + * confidential virtual machines. In that case, the old file descriptor is
>>>> + * closed and a new file descriptor is recreated.  Closing the file descriptor
>>>> + * only drops our reference to it and the device's reference to kvm.
>>>> + * Therefore once initialized, this file descriptor is normally only released
>>>> + * on QEMU exit (except for confidential VMs as stated above) and
>>>>   * we'll re-use it should another vfio device be attached before then.
>>>>   */
>>>>  int vfio_kvm_device_fd = -1;
>>>> +
>>>> +typedef struct KVMVfioFileFd {
>>>> +    int fd;
>>>> +    QLIST_ENTRY(KVMVfioFileFd) node;
>>>> +} KVMVfioFileFd;
>>>> +
>>>> +static QLIST_HEAD(, KVMVfioFileFd) kvm_vfio_file_fds =
>>>> +    QLIST_HEAD_INITIALIZER(kvm_vfio_file_fds);
>>>> +
>>>> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
>>>> +                                  Error **errp);
>>>> +static struct NotifierWithReturn kvm_vfio_vmfd_change_notifier = {
>>>> +    .notify = kvm_vfio_filefd_rebind,
>>>> +};
>>>> +
>>>> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
>>>> +                                  Error **errp)
>>>> +{
>>>> +    KVMVfioFileFd *file_fd;
>>>> +    int ret = 0;
>>>> +    struct kvm_device_attr attr = {
>>>> +        .group = KVM_DEV_VFIO_FILE,
>>>> +        .attr = KVM_DEV_VFIO_FILE_ADD,
>>>> +    };
>>>> +    struct kvm_create_device cd = {
>>>> +        .type = KVM_DEV_TYPE_VFIO,
>>>> +    };
>>>> +
>>>> +    /* we are not interested in pre vmfd change notification */
>>>> +    if (((VmfdChangeNotifier *)data)->pre) {
>>>> +        return 0;
>>>> +    }
>>>> +
>>>> +    if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
>>>> +        error_setg_errno(errp, errno, "Failed to create KVM VFIO device");
>>>> +        return -errno;
>>>> +    }
>>>> +
>>>> +    if (vfio_kvm_device_fd) {
>>>> +        close(vfio_kvm_device_fd);
>>>> +    }
>>>> +
>>>> +    vfio_kvm_device_fd = cd.fd;
>>>> +
>>>> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
>>>> +        attr.addr = (uint64_t)(unsigned long)&file_fd->fd;
>>>> +        if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
>>>> +            error_setg_errno(errp, errno,
>>>> +                             "Failed to add fd %d to KVM VFIO device",
>>>> +                             file_fd->fd);
>>>> +            ret = -errno;
>>>> +        }
>>>> +    }
>>>> +    return ret;
>>>> +}
>>>> +
>>>>  #endif
>>>> 
>>>>  void vfio_kvm_device_close(void)
>>>> @@ -137,6 +196,7 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>>>>          .attr = KVM_DEV_VFIO_FILE_ADD,
>>>>          .addr = (uint64_t)(unsigned long)&fd,
>>>>      };
>>>> +    KVMVfioFileFd *file_fd;
>>>> 
>>>>      if (!kvm_enabled()) {
>>>>          return 0;
>>>> @@ -153,6 +213,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>>>>          }
>>>> 
>>>>          vfio_kvm_device_fd = cd.fd;
>>>> +        /*
>>>> +         * If the vm file descriptor changes, add a notifier so that we can
>>>> +         * re-create the vfio_kvm_device_fd.
>>>> +         */
>>>> +        kvm_vmfd_add_change_notifier(&kvm_vfio_vmfd_change_notifier);
>>>>      }
>>>> 
>>>>      if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
>>>> @@ -160,6 +225,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
>>>>                           fd);
>>>>          return -errno;
>>>>      }
>>>> +
>>>> +    file_fd = g_malloc0(sizeof(*file_fd));
>>>> +    file_fd->fd = fd;
>>>> +    QLIST_INSERT_HEAD(&kvm_vfio_file_fds, file_fd, node);
>>>> +
>>>>  #endif
>>>>      return 0;
>>>>  }
>>>> @@ -172,6 +242,7 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
>>>>          .attr = KVM_DEV_VFIO_FILE_DEL,
>>>>          .addr = (uint64_t)(unsigned long)&fd,
>>>>      };
>>>> +    KVMVfioFileFd *file_fd;
>>>> 
>>>>      if (vfio_kvm_device_fd < 0) {
>>>>          error_setg(errp, "KVM VFIO device isn't created yet");
>>>> @@ -183,6 +254,15 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
>>>>                           "Failed to remove fd %d from KVM VFIO device", fd);
>>>>          return -errno;
>>>>      }
>>>> +
>>>> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
>>>> +        if (file_fd->fd == fd) {
>>>> +            QLIST_REMOVE(file_fd, node);
>>>> +            g_free(file_fd);
>>>> +            break;
>>>> +        }
>>>> +    }
>>>> +
>>>>  #endif
>>>>      return 0;
>>>>  }
>>>> --
>>>> 2.42.0
>>>> 
>>> 
>> 
Re: [PATCH v3 22/33] hw/vfio: generate new file fd for pseudo device and rebind existing descriptors
Posted by Cédric Le Goater 7 hours ago
On 2/9/26 06:44, Ani Sinha wrote:
> 
> 
>> On 9 Feb 2026, at 9:37 AM, Ani Sinha <anisinha@redhat.com> wrote:
>>
>> On Mon, Feb 9, 2026 at 3:38 AM Cédric Le Goater <clg@redhat.com> wrote:
>>>
>>> On 2/8/26 15:39, Ani Sinha wrote:
>>>> On Tue, Jan 27, 2026 at 10:47 AM Ani Sinha <anisinha@redhat.com> wrote:
>>>>>
>>>>> Normally the vfio pseudo device file descriptor lives for the life of the VM.
>>>>> However, when the kvm VM file descriptor changes, a new file descriptor
>>>>> for the pseudo device needs to be generated against the new kvm VM descriptor.
>>>>> Other existing vfio descriptors needs to be reattached to the new pseudo device
>>>>> descriptor. This change performs the above steps.
>>>>
>>>> I have not been able to test this change. Can someone suggest a way to
>>>> add a functional test and/or test this manually?
>>>
>>> Automated VFIO testing [*] is tough because it depends on a "real"
>>> physical PCI device.
>>>
>>> I have tested you series using a sev-snp guest with an assigned device
>>> (SATA controller) and verified that the guest could reboot. The SATA
>>> controller has no disks connected, so further testing is required.
>>>
>>> However, a 'system_reset' command issued from the QEMU monitor leaves
>>> the guest hanging. May be this is expected ?
>>
>> I do not think so. On non-coco, I just tried it and it reboots fine
>> with x-change-vmfd-on-reset turned on.
> 
> Sorry I had some issues with my testing. Neither non-coco reset not coco reset was using vmfd change path when the QMP reset command was used.
> In non-coco, it was the normal reset which obviously works.
> 
> I fixed it. See https://gitlab.com/anisinha/qemu/-/commit/c5514dbb2b52d474397220cfa3a0b8faa26cda0c
> 
> Please use this patch and test. I have tasted this on SNP and seems to work. I will make it part of next spin-up.

Looks to me.

Thanks,

C.



Re: [PATCH v3 22/33] hw/vfio: generate new file fd for pseudo device and rebind existing descriptors
Posted by Ani Sinha 9 hours ago
On Mon, Feb 9, 2026 at 9:37 AM Ani Sinha <anisinha@redhat.com> wrote:
>
> On Mon, Feb 9, 2026 at 3:38 AM Cédric Le Goater <clg@redhat.com> wrote:
> >
> > On 2/8/26 15:39, Ani Sinha wrote:
> > > On Tue, Jan 27, 2026 at 10:47 AM Ani Sinha <anisinha@redhat.com> wrote:
> > >>
> > >> Normally the vfio pseudo device file descriptor lives for the life of the VM.
> > >> However, when the kvm VM file descriptor changes, a new file descriptor
> > >> for the pseudo device needs to be generated against the new kvm VM descriptor.
> > >> Other existing vfio descriptors needs to be reattached to the new pseudo device
> > >> descriptor. This change performs the above steps.
> > >
> > > I have not been able to test this change. Can someone suggest a way to
> > > add a functional test and/or test this manually?
> >
> > Automated VFIO testing [*] is tough because it depends on a "real"
> > physical PCI device.
> >
> > I have tested you series using a sev-snp guest with an assigned device
> > (SATA controller) and verified that the guest could reboot. The SATA
> > controller has no disks connected, so further testing is required.
> >
> > However, a 'system_reset' command issued from the QEMU monitor leaves
> > the guest hanging. May be this is expected ?
>
> I do not think so. On non-coco, I just tried it and it reboots fine
> with x-change-vmfd-on-reset turned on.

Please try this way on non-coco. If it works, then we need to assume
that for the CoCo case, some extra steps are needed during reset for
vfio which I missed.

> I tried a RHEL 10 guest image as well as a simple UKI kernel.
> Maybe there is some bug in this patch, not sure. In QEMU the code path
> should be the same for both.
>
> >
> >
> > > Otherwise I am going to add a comment in the next spin up that this
> > > change is not tested.
> >
> > I will provide some more comments on the code. Please give me a day
> > or two before resending.
>
> Sure, please take all the time you need.
>
> >
> > Thanks,
> >
> > C.
> >
> > [*] One way to address this constraint for VFIO testing would be
> >      to run the test in a nested environment using emulated devices,
> >      like IGB, attached to an L1 which would be assigned to an L2.
> >      But that's unrelated to this series.
> > >
> > >>
> > >> Signed-off-by: Ani Sinha <anisinha@redhat.com>
> > >> ---
> > >>   hw/vfio/helpers.c | 86 +++++++++++++++++++++++++++++++++++++++++++++--
> > >>   1 file changed, 83 insertions(+), 3 deletions(-)
> > >>
> > >> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
> > >> index f68f8165d0..d467875b4b 100644
> > >> --- a/hw/vfio/helpers.c
> > >> +++ b/hw/vfio/helpers.c
> > >> @@ -110,12 +110,71 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
> > >>   #ifdef CONFIG_KVM
> > >>   /*
> > >>    * We have a single VFIO pseudo device per KVM VM.  Once created it lives
> > >> - * for the life of the VM.  Closing the file descriptor only drops our
> > >> - * reference to it and the device's reference to kvm.  Therefore once
> > >> - * initialized, this file descriptor is only released on QEMU exit and
> > >> + * for the life of the VM except when the vm file descriptor changes for
> > >> + * confidential virtual machines. In that case, the old file descriptor is
> > >> + * closed and a new file descriptor is recreated.  Closing the file descriptor
> > >> + * only drops our reference to it and the device's reference to kvm.
> > >> + * Therefore once initialized, this file descriptor is normally only released
> > >> + * on QEMU exit (except for confidential VMs as stated above) and
> > >>    * we'll re-use it should another vfio device be attached before then.
> > >>    */
> > >>   int vfio_kvm_device_fd = -1;
> > >> +
> > >> +typedef struct KVMVfioFileFd {
> > >> +    int fd;
> > >> +    QLIST_ENTRY(KVMVfioFileFd) node;
> > >> +} KVMVfioFileFd;
> > >> +
> > >> +static QLIST_HEAD(, KVMVfioFileFd) kvm_vfio_file_fds =
> > >> +    QLIST_HEAD_INITIALIZER(kvm_vfio_file_fds);
> > >> +
> > >> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
> > >> +                                  Error **errp);
> > >> +static struct NotifierWithReturn kvm_vfio_vmfd_change_notifier = {
> > >> +    .notify = kvm_vfio_filefd_rebind,
> > >> +};
> > >> +
> > >> +static int kvm_vfio_filefd_rebind(NotifierWithReturn *notifier, void *data,
> > >> +                                  Error **errp)
> > >> +{
> > >> +    KVMVfioFileFd *file_fd;
> > >> +    int ret = 0;
> > >> +    struct kvm_device_attr attr = {
> > >> +        .group = KVM_DEV_VFIO_FILE,
> > >> +        .attr = KVM_DEV_VFIO_FILE_ADD,
> > >> +    };
> > >> +    struct kvm_create_device cd = {
> > >> +        .type = KVM_DEV_TYPE_VFIO,
> > >> +    };
> > >> +
> > >> +    /* we are not interested in pre vmfd change notification */
> > >> +    if (((VmfdChangeNotifier *)data)->pre) {
> > >> +        return 0;
> > >> +    }
> > >> +
> > >> +    if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
> > >> +        error_setg_errno(errp, errno, "Failed to create KVM VFIO device");
> > >> +        return -errno;
> > >> +    }
> > >> +
> > >> +    if (vfio_kvm_device_fd) {
> > >> +        close(vfio_kvm_device_fd);
> > >> +    }
> > >> +
> > >> +    vfio_kvm_device_fd = cd.fd;
> > >> +
> > >> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
> > >> +        attr.addr = (uint64_t)(unsigned long)&file_fd->fd;
> > >> +        if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
> > >> +            error_setg_errno(errp, errno,
> > >> +                             "Failed to add fd %d to KVM VFIO device",
> > >> +                             file_fd->fd);
> > >> +            ret = -errno;
> > >> +        }
> > >> +    }
> > >> +    return ret;
> > >> +}
> > >> +
> > >>   #endif
> > >>
> > >>   void vfio_kvm_device_close(void)
> > >> @@ -137,6 +196,7 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
> > >>           .attr = KVM_DEV_VFIO_FILE_ADD,
> > >>           .addr = (uint64_t)(unsigned long)&fd,
> > >>       };
> > >> +    KVMVfioFileFd *file_fd;
> > >>
> > >>       if (!kvm_enabled()) {
> > >>           return 0;
> > >> @@ -153,6 +213,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
> > >>           }
> > >>
> > >>           vfio_kvm_device_fd = cd.fd;
> > >> +        /*
> > >> +         * If the vm file descriptor changes, add a notifier so that we can
> > >> +         * re-create the vfio_kvm_device_fd.
> > >> +         */
> > >> +        kvm_vmfd_add_change_notifier(&kvm_vfio_vmfd_change_notifier);
> > >>       }
> > >>
> > >>       if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
> > >> @@ -160,6 +225,11 @@ int vfio_kvm_device_add_fd(int fd, Error **errp)
> > >>                            fd);
> > >>           return -errno;
> > >>       }
> > >> +
> > >> +    file_fd = g_malloc0(sizeof(*file_fd));
> > >> +    file_fd->fd = fd;
> > >> +    QLIST_INSERT_HEAD(&kvm_vfio_file_fds, file_fd, node);
> > >> +
> > >>   #endif
> > >>       return 0;
> > >>   }
> > >> @@ -172,6 +242,7 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
> > >>           .attr = KVM_DEV_VFIO_FILE_DEL,
> > >>           .addr = (uint64_t)(unsigned long)&fd,
> > >>       };
> > >> +    KVMVfioFileFd *file_fd;
> > >>
> > >>       if (vfio_kvm_device_fd < 0) {
> > >>           error_setg(errp, "KVM VFIO device isn't created yet");
> > >> @@ -183,6 +254,15 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
> > >>                            "Failed to remove fd %d from KVM VFIO device", fd);
> > >>           return -errno;
> > >>       }
> > >> +
> > >> +    QLIST_FOREACH(file_fd, &kvm_vfio_file_fds, node) {
> > >> +        if (file_fd->fd == fd) {
> > >> +            QLIST_REMOVE(file_fd, node);
> > >> +            g_free(file_fd);
> > >> +            break;
> > >> +        }
> > >> +    }
> > >> +
> > >>   #endif
> > >>       return 0;
> > >>   }
> > >> --
> > >> 2.42.0
> > >>
> > >
> >