With default config, kernel VFIO type1 driver limits dirty bitmap to 256MB
for unmap_bitmap ioctl so the maximum guest memory region is no more than
8TB size for the ioctl to succeed.
Be conservative here to limit total guest memory to 8TB or else add a
migration blocker. IOMMUFD backend doesn't have such limit, one can use
IOMMUFD backed device if there is a need to migration such large VM.
Suggested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/vfio/migration.c | 37 +++++++++++++++++++++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 4c06e3db93..1106ca7857 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -16,6 +16,7 @@
#include <sys/ioctl.h>
#include "system/runstate.h"
+#include "hw/boards.h"
#include "hw/vfio/vfio-device.h"
#include "hw/vfio/vfio-migration.h"
#include "migration/misc.h"
@@ -1152,6 +1153,35 @@ static bool vfio_viommu_preset(VFIODevice *vbasedev)
return vbasedev->bcontainer->space->as != &address_space_memory;
}
+static bool vfio_dirty_tracking_exceed_limit(VFIODevice *vbasedev)
+{
+ VFIOContainer *bcontainer = vbasedev->bcontainer;
+ uint64_t max_size, page_size;
+
+ if (!object_dynamic_cast(OBJECT(bcontainer), TYPE_VFIO_IOMMU_LEGACY)) {
+ return false;
+ }
+
+ if (!bcontainer->dirty_pages_supported) {
+ return true;
+ }
+ /*
+ * VFIO type1 driver has a limitation of bitmap size on unmap_bitmap
+ * ioctl(), calculate the limit and compare with guest memory size to
+ * catch dirty tracking failure early.
+ *
+ * This limit is 8TB with default kernel and QEMU config, we are a bit
+ * conservative here as VM memory layout may be nonconsecutive or VM
+ * can run with vIOMMU enabled so the limitation could be relaxed. One
+ * can also switch to use IOMMUFD backend if there is a need to migrate
+ * large VM.
+ */
+ page_size = 1 << ctz64(bcontainer->dirty_pgsizes);
+ max_size = bcontainer->max_dirty_bitmap_size * BITS_PER_BYTE * page_size;
+
+ return current_machine->ram_size > max_size;
+}
+
/*
* Return true when either migration initialized or blocker registered.
* Currently only return false when adding blocker fails which will
@@ -1208,6 +1238,13 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
goto add_blocker;
}
+ if (vfio_dirty_tracking_exceed_limit(vbasedev)) {
+ error_setg(&err, "%s: Migration is currently not supported with "
+ "large memory VM due to dirty tracking limitation in "
+ "VFIO type1 driver", vbasedev->name);
+ goto add_blocker;
+ }
+
trace_vfio_migration_realize(vbasedev->name);
return true;
--
2.47.1
On 2025/10/17 16:22, Zhenzhong Duan wrote:
> With default config, kernel VFIO type1 driver limits dirty bitmap to 256MB
> for unmap_bitmap ioctl so the maximum guest memory region is no more than
> 8TB size for the ioctl to succeed.
>
> Be conservative here to limit total guest memory to 8TB or else add a
> migration blocker. IOMMUFD backend doesn't have such limit, one can use
> IOMMUFD backed device if there is a need to migration such large VM.
>
> Suggested-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/vfio/migration.c | 37 +++++++++++++++++++++++++++++++++++++
> 1 file changed, 37 insertions(+)
>
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 4c06e3db93..1106ca7857 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -16,6 +16,7 @@
> #include <sys/ioctl.h>
>
> #include "system/runstate.h"
> +#include "hw/boards.h"
> #include "hw/vfio/vfio-device.h"
> #include "hw/vfio/vfio-migration.h"
> #include "migration/misc.h"
> @@ -1152,6 +1153,35 @@ static bool vfio_viommu_preset(VFIODevice *vbasedev)
> return vbasedev->bcontainer->space->as != &address_space_memory;
> }
>
> +static bool vfio_dirty_tracking_exceed_limit(VFIODevice *vbasedev)
> +{
> + VFIOContainer *bcontainer = vbasedev->bcontainer;
> + uint64_t max_size, page_size;
> +
> + if (!object_dynamic_cast(OBJECT(bcontainer), TYPE_VFIO_IOMMU_LEGACY)) {
> + return false;
> + }
> +
> + if (!bcontainer->dirty_pages_supported) {
> + return true;
> + }
> + /*
> + * VFIO type1 driver has a limitation of bitmap size on unmap_bitmap
> + * ioctl(), calculate the limit and compare with guest memory size to
> + * catch dirty tracking failure early.
> + *
> + * This limit is 8TB with default kernel and QEMU config, we are a bit
> + * conservative here as VM memory layout may be nonconsecutive or VM
> + * can run with vIOMMU enabled so the limitation could be relaxed. One
> + * can also switch to use IOMMUFD backend if there is a need to migrate
> + * large VM.
> + */
> + page_size = 1 << ctz64(bcontainer->dirty_pgsizes);
Should use qemu_real_host_page_size() here?
> + max_size = bcontainer->max_dirty_bitmap_size * BITS_PER_BYTE * page_size;
> +
> + return current_machine->ram_size > max_size;
> +}
> +
> /*
> * Return true when either migration initialized or blocker registered.
> * Currently only return false when adding blocker fails which will
> @@ -1208,6 +1238,13 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
> goto add_blocker;
> }
>
> + if (vfio_dirty_tracking_exceed_limit(vbasedev)) {
> + error_setg(&err, "%s: Migration is currently not supported with "
> + "large memory VM due to dirty tracking limitation in "
> + "VFIO type1 driver", vbasedev->name);
> + goto add_blocker;
> + }
> +
> trace_vfio_migration_realize(vbasedev->name);
> return true;
>
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v2 7/8] vfio/migration: Add migration blocker if VM
>memory is too large to cause unmap_bitmap failure
>
>On 2025/10/17 16:22, Zhenzhong Duan wrote:
>> With default config, kernel VFIO type1 driver limits dirty bitmap to 256MB
>> for unmap_bitmap ioctl so the maximum guest memory region is no more
>than
>> 8TB size for the ioctl to succeed.
>>
>> Be conservative here to limit total guest memory to 8TB or else add a
>> migration blocker. IOMMUFD backend doesn't have such limit, one can use
>> IOMMUFD backed device if there is a need to migration such large VM.
>>
>> Suggested-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> hw/vfio/migration.c | 37 +++++++++++++++++++++++++++++++++++++
>> 1 file changed, 37 insertions(+)
>>
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index 4c06e3db93..1106ca7857 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -16,6 +16,7 @@
>> #include <sys/ioctl.h>
>>
>> #include "system/runstate.h"
>> +#include "hw/boards.h"
>> #include "hw/vfio/vfio-device.h"
>> #include "hw/vfio/vfio-migration.h"
>> #include "migration/misc.h"
>> @@ -1152,6 +1153,35 @@ static bool vfio_viommu_preset(VFIODevice
>*vbasedev)
>> return vbasedev->bcontainer->space->as !=
>&address_space_memory;
>> }
>>
>> +static bool vfio_dirty_tracking_exceed_limit(VFIODevice *vbasedev)
>> +{
>> + VFIOContainer *bcontainer = vbasedev->bcontainer;
>> + uint64_t max_size, page_size;
>> +
>> + if (!object_dynamic_cast(OBJECT(bcontainer),
>TYPE_VFIO_IOMMU_LEGACY)) {
>> + return false;
>> + }
>> +
>> + if (!bcontainer->dirty_pages_supported) {
>> + return true;
>> + }
>> + /*
>> + * VFIO type1 driver has a limitation of bitmap size on unmap_bitmap
>> + * ioctl(), calculate the limit and compare with guest memory size to
>> + * catch dirty tracking failure early.
>> + *
>> + * This limit is 8TB with default kernel and QEMU config, we are a bit
>> + * conservative here as VM memory layout may be nonconsecutive
>or VM
>> + * can run with vIOMMU enabled so the limitation could be relaxed.
>One
>> + * can also switch to use IOMMUFD backend if there is a need to
>migrate
>> + * large VM.
>> + */
>> + page_size = 1 << ctz64(bcontainer->dirty_pgsizes);
>
>Should use qemu_real_host_page_size() here?
hmm, I think it's host mmu page size which is not as accurate as the iommu page sizes? here we want the iommu ones.
Thanks
Zhenzhong
On 2025/10/21 16:25, Duan, Zhenzhong wrote:
>
>
>> -----Original Message-----
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Subject: Re: [PATCH v2 7/8] vfio/migration: Add migration blocker if VM
>> memory is too large to cause unmap_bitmap failure
>>
>> On 2025/10/17 16:22, Zhenzhong Duan wrote:
>>> With default config, kernel VFIO type1 driver limits dirty bitmap to 256MB
>>> for unmap_bitmap ioctl so the maximum guest memory region is no more
>> than
>>> 8TB size for the ioctl to succeed.
>>>
>>> Be conservative here to limit total guest memory to 8TB or else add a
>>> migration blocker. IOMMUFD backend doesn't have such limit, one can use
>>> IOMMUFD backed device if there is a need to migration such large VM.
>>>
>>> Suggested-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>> hw/vfio/migration.c | 37 +++++++++++++++++++++++++++++++++++++
>>> 1 file changed, 37 insertions(+)
>>>
>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>> index 4c06e3db93..1106ca7857 100644
>>> --- a/hw/vfio/migration.c
>>> +++ b/hw/vfio/migration.c
>>> @@ -16,6 +16,7 @@
>>> #include <sys/ioctl.h>
>>>
>>> #include "system/runstate.h"
>>> +#include "hw/boards.h"
>>> #include "hw/vfio/vfio-device.h"
>>> #include "hw/vfio/vfio-migration.h"
>>> #include "migration/misc.h"
>>> @@ -1152,6 +1153,35 @@ static bool vfio_viommu_preset(VFIODevice
>> *vbasedev)
>>> return vbasedev->bcontainer->space->as !=
>> &address_space_memory;
>>> }
>>>
>>> +static bool vfio_dirty_tracking_exceed_limit(VFIODevice *vbasedev)
>>> +{
>>> + VFIOContainer *bcontainer = vbasedev->bcontainer;
>>> + uint64_t max_size, page_size;
>>> +
>>> + if (!object_dynamic_cast(OBJECT(bcontainer),
>> TYPE_VFIO_IOMMU_LEGACY)) {
>>> + return false;
>>> + }
>>> +
>>> + if (!bcontainer->dirty_pages_supported) {
>>> + return true;
>>> + }
>>> + /*
>>> + * VFIO type1 driver has a limitation of bitmap size on unmap_bitmap
>>> + * ioctl(), calculate the limit and compare with guest memory size to
>>> + * catch dirty tracking failure early.
>>> + *
>>> + * This limit is 8TB with default kernel and QEMU config, we are a bit
>>> + * conservative here as VM memory layout may be nonconsecutive
>> or VM
>>> + * can run with vIOMMU enabled so the limitation could be relaxed.
>> One
>>> + * can also switch to use IOMMUFD backend if there is a need to
>> migrate
>>> + * large VM.
>>> + */
>>> + page_size = 1 << ctz64(bcontainer->dirty_pgsizes);
>>
>> Should use qemu_real_host_page_size() here?
>
> hmm, I think it's host mmu page size which is not as accurate as the iommu page sizes? here we want the iommu ones.
I saw vfio_legacy_query_dirty_bitmap() uses qemu_real_host_page_size()
though kernel enforces min iommu page size. Shall we let
qemu_real_host_page_size() use iommu page size instead of cpu page
size?
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v2 7/8] vfio/migration: Add migration blocker if VM
>memory is too large to cause unmap_bitmap failure
>
>On 2025/10/21 16:25, Duan, Zhenzhong wrote:
>>
>>
>>> -----Original Message-----
>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>> Subject: Re: [PATCH v2 7/8] vfio/migration: Add migration blocker if VM
>>> memory is too large to cause unmap_bitmap failure
>>>
>>> On 2025/10/17 16:22, Zhenzhong Duan wrote:
>>>> With default config, kernel VFIO type1 driver limits dirty bitmap to 256MB
>>>> for unmap_bitmap ioctl so the maximum guest memory region is no more
>>> than
>>>> 8TB size for the ioctl to succeed.
>>>>
>>>> Be conservative here to limit total guest memory to 8TB or else add a
>>>> migration blocker. IOMMUFD backend doesn't have such limit, one can
>use
>>>> IOMMUFD backed device if there is a need to migration such large VM.
>>>>
>>>> Suggested-by: Yi Liu <yi.l.liu@intel.com>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>> hw/vfio/migration.c | 37
>+++++++++++++++++++++++++++++++++++++
>>>> 1 file changed, 37 insertions(+)
>>>>
>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>> index 4c06e3db93..1106ca7857 100644
>>>> --- a/hw/vfio/migration.c
>>>> +++ b/hw/vfio/migration.c
>>>> @@ -16,6 +16,7 @@
>>>> #include <sys/ioctl.h>
>>>>
>>>> #include "system/runstate.h"
>>>> +#include "hw/boards.h"
>>>> #include "hw/vfio/vfio-device.h"
>>>> #include "hw/vfio/vfio-migration.h"
>>>> #include "migration/misc.h"
>>>> @@ -1152,6 +1153,35 @@ static bool vfio_viommu_preset(VFIODevice
>>> *vbasedev)
>>>> return vbasedev->bcontainer->space->as !=
>>> &address_space_memory;
>>>> }
>>>>
>>>> +static bool vfio_dirty_tracking_exceed_limit(VFIODevice *vbasedev)
>>>> +{
>>>> + VFIOContainer *bcontainer = vbasedev->bcontainer;
>>>> + uint64_t max_size, page_size;
>>>> +
>>>> + if (!object_dynamic_cast(OBJECT(bcontainer),
>>> TYPE_VFIO_IOMMU_LEGACY)) {
>>>> + return false;
>>>> + }
>>>> +
>>>> + if (!bcontainer->dirty_pages_supported) {
>>>> + return true;
>>>> + }
>>>> + /*
>>>> + * VFIO type1 driver has a limitation of bitmap size on
>unmap_bitmap
>>>> + * ioctl(), calculate the limit and compare with guest memory size
>to
>>>> + * catch dirty tracking failure early.
>>>> + *
>>>> + * This limit is 8TB with default kernel and QEMU config, we are a
>bit
>>>> + * conservative here as VM memory layout may be nonconsecutive
>>> or VM
>>>> + * can run with vIOMMU enabled so the limitation could be
>relaxed.
>>> One
>>>> + * can also switch to use IOMMUFD backend if there is a need to
>>> migrate
>>>> + * large VM.
>>>> + */
>>>> + page_size = 1 << ctz64(bcontainer->dirty_pgsizes);
>>>
>>> Should use qemu_real_host_page_size() here?
>>
>> hmm, I think it's host mmu page size which is not as accurate as the iommu
>page sizes? here we want the iommu ones.
>
>I saw vfio_legacy_query_dirty_bitmap() uses qemu_real_host_page_size()
>though kernel enforces min iommu page size. Shall we let
>qemu_real_host_page_size() use iommu page size instead of cpu page
>size?
qemu_real_host_page_size() is used in vfio_legacy_query_dirty_bitmap()
because physical_memory_set_dirty_lebitmap() only supports that size,
so we shouldn't change it.
bcontainer->dirty_pgsizes should always contains qemu_real_host_page_size()
or else ioctl(VFIO_IOMMU_DIRTY_PAGES) will fail. So above code is same effect
as using qemu_real_host_page_size(). But I think it's clearer for readers using
iommu information for calculating here.
Thanks
Zhenzhong
On 10/17/25 10:22, Zhenzhong Duan wrote:
> With default config, kernel VFIO type1 driver limits dirty bitmap to 256MB
... VFIO IOMMU Type1 ...
> for unmap_bitmap ioctl so the maximum guest memory region is no more than
> 8TB size for the ioctl to succeed.
>
> Be conservative here to limit total guest memory to 8TB or else add a
> migration blocker. IOMMUFD backend doesn't have such limit, one can use
> IOMMUFD backed device if there is a need to migration such large VM.
>
> Suggested-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/vfio/migration.c | 37 +++++++++++++++++++++++++++++++++++++
> 1 file changed, 37 insertions(+)
>
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 4c06e3db93..1106ca7857 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -16,6 +16,7 @@
> #include <sys/ioctl.h>
>
> #include "system/runstate.h"
> +#include "hw/boards.h"
> #include "hw/vfio/vfio-device.h"
> #include "hw/vfio/vfio-migration.h"
> #include "migration/misc.h"
> @@ -1152,6 +1153,35 @@ static bool vfio_viommu_preset(VFIODevice *vbasedev)
> return vbasedev->bcontainer->space->as != &address_space_memory;
> }
>
> +static bool vfio_dirty_tracking_exceed_limit(VFIODevice *vbasedev)
> +{
> + VFIOContainer *bcontainer = vbasedev->bcontainer;
> + uint64_t max_size, page_size;
> +
> + if (!object_dynamic_cast(OBJECT(bcontainer), TYPE_VFIO_IOMMU_LEGACY)) {
> + return false;
> + }
Could we set in the IOMMUFD backend 'dirty_pgsizes' and
'max_dirty_bitmap_size'to avoid the object_dynamic_cast() ?
Thanks,
C.
> + if (!bcontainer->dirty_pages_supported) {
> + return true;
> + }
> + /*
> + * VFIO type1 driver has a limitation of bitmap size on unmap_bitmap
> + * ioctl(), calculate the limit and compare with guest memory size to
> + * catch dirty tracking failure early.
> + *
> + * This limit is 8TB with default kernel and QEMU config, we are a bit
> + * conservative here as VM memory layout may be nonconsecutive or VM
> + * can run with vIOMMU enabled so the limitation could be relaxed. One
> + * can also switch to use IOMMUFD backend if there is a need to migrate
> + * large VM.
> + */
> + page_size = 1 << ctz64(bcontainer->dirty_pgsizes);
> + max_size = bcontainer->max_dirty_bitmap_size * BITS_PER_BYTE * page_size;
> +
> + return current_machine->ram_size > max_size;
> +}
> +
> /*
> * Return true when either migration initialized or blocker registered.
> * Currently only return false when adding blocker fails which will
> @@ -1208,6 +1238,13 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
> goto add_blocker;
> }
>
> + if (vfio_dirty_tracking_exceed_limit(vbasedev)) {
> + error_setg(&err, "%s: Migration is currently not supported with "
> + "large memory VM due to dirty tracking limitation in "
> + "VFIO type1 driver", vbasedev->name);
> + goto add_blocker;
> + }
> +
> trace_vfio_migration_realize(vbasedev->name);
> return true;
>
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Subject: Re: [PATCH v2 7/8] vfio/migration: Add migration blocker if VM
>memory is too large to cause unmap_bitmap failure
>
>On 10/17/25 10:22, Zhenzhong Duan wrote:
>> With default config, kernel VFIO type1 driver limits dirty bitmap to 256MB
>
>
>... VFIO IOMMU Type1 ...
OK
>
>> for unmap_bitmap ioctl so the maximum guest memory region is no more
>than
>> 8TB size for the ioctl to succeed.
>>
>> Be conservative here to limit total guest memory to 8TB or else add a
>> migration blocker. IOMMUFD backend doesn't have such limit, one can use
>> IOMMUFD backed device if there is a need to migration such large VM.
>>
>> Suggested-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> hw/vfio/migration.c | 37 +++++++++++++++++++++++++++++++++++++
>> 1 file changed, 37 insertions(+)
>>
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index 4c06e3db93..1106ca7857 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -16,6 +16,7 @@
>> #include <sys/ioctl.h>
>>
>> #include "system/runstate.h"
>> +#include "hw/boards.h"
>> #include "hw/vfio/vfio-device.h"
>> #include "hw/vfio/vfio-migration.h"
>> #include "migration/misc.h"
>> @@ -1152,6 +1153,35 @@ static bool vfio_viommu_preset(VFIODevice
>*vbasedev)
>> return vbasedev->bcontainer->space->as !=
>&address_space_memory;
>> }
>>
>> +static bool vfio_dirty_tracking_exceed_limit(VFIODevice *vbasedev)
>> +{
>> + VFIOContainer *bcontainer = vbasedev->bcontainer;
>> + uint64_t max_size, page_size;
>> +
>> + if (!object_dynamic_cast(OBJECT(bcontainer),
>TYPE_VFIO_IOMMU_LEGACY)) {
>> + return false;
>> + }
>
>
>Could we set in the IOMMUFD backend 'dirty_pgsizes' and
>'max_dirty_bitmap_size'to avoid the object_dynamic_cast() ?
Sure, will do.
Thanks
Zhenzhong
© 2016 - 2025 Red Hat, Inc.