From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
There's already a max in-flight VFIO device state buffers *count* limit,
add also max queued buffers *size* limit.
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
---
docs/devel/migration/vfio.rst | 8 +++++---
hw/vfio/migration-multifd.c | 21 +++++++++++++++++++--
hw/vfio/pci.c | 9 +++++++++
include/hw/vfio/vfio-common.h | 1 +
4 files changed, 34 insertions(+), 5 deletions(-)
diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst
index 7c9cb7bdbf87..127a1db35949 100644
--- a/docs/devel/migration/vfio.rst
+++ b/docs/devel/migration/vfio.rst
@@ -254,12 +254,14 @@ This means that a malicious QEMU source could theoretically cause the target
QEMU to allocate unlimited amounts of memory for such buffers-in-flight.
The "x-migration-max-queued-buffers" property allows capping the maximum count
-of these VFIO device state buffers queued at the destination.
+of these VFIO device state buffers queued at the destination while
+"x-migration-max-queued-buffers-size" property allows capping their total queued
+size.
Because a malicious QEMU source causing OOM on the target is not expected to be
a realistic threat in most of VFIO live migration use cases and the right value
-depends on the particular setup by default this queued buffers limit is
-disabled by setting it to UINT64_MAX.
+depends on the particular setup by default these queued buffers limits are
+disabled by setting them to UINT64_MAX.
Some host platforms (like ARM64) require that VFIO device config is loaded only
after all iterables were loaded.
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index dccd763d7c39..a9d41b9f1cb1 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -83,6 +83,7 @@ typedef struct VFIOMultifd {
uint32_t load_buf_idx;
uint32_t load_buf_idx_last;
uint32_t load_buf_queued_pending_buffers;
+ size_t load_buf_queued_pending_buffers_size;
} VFIOMultifd;
static void vfio_state_buffer_clear(gpointer data)
@@ -139,6 +140,7 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
VFIOMigration *migration = vbasedev->migration;
VFIOMultifd *multifd = migration->multifd;
VFIOStateBuffer *lb;
+ size_t data_size = packet_total_size - sizeof(*packet);
vfio_state_buffers_assert_init(&multifd->load_bufs);
if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
@@ -165,8 +167,19 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
return false;
}
- lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet));
- lb->len = packet_total_size - sizeof(*packet);
+ multifd->load_buf_queued_pending_buffers_size += data_size;
+ if (multifd->load_buf_queued_pending_buffers_size >
+ vbasedev->migration_max_queued_buffers_size) {
+ error_setg(errp,
+ "%s: queuing state buffer %" PRIu32
+ " would exceed the size max of %" PRIu64,
+ vbasedev->name, packet->idx,
+ vbasedev->migration_max_queued_buffers_size);
+ return false;
+ }
+
+ lb->data = g_memdup2(&packet->data, data_size);
+ lb->len = data_size;
lb->is_present = true;
return true;
@@ -346,6 +359,9 @@ static bool vfio_load_state_buffer_write(VFIODevice *vbasedev,
assert(wr_ret <= buf_len);
buf_len -= wr_ret;
buf_cur += wr_ret;
+
+ assert(multifd->load_buf_queued_pending_buffers_size >= wr_ret);
+ multifd->load_buf_queued_pending_buffers_size -= wr_ret;
}
trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
@@ -519,6 +535,7 @@ static VFIOMultifd *vfio_multifd_new(void)
multifd->load_buf_idx = 0;
multifd->load_buf_idx_last = UINT32_MAX;
multifd->load_buf_queued_pending_buffers = 0;
+ multifd->load_buf_queued_pending_buffers_size = 0;
qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
multifd->load_bufs_iter_done = false;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 02f784c1b2a3..8abf73f810ee 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3392,6 +3392,8 @@ static const Property vfio_pci_dev_properties[] = {
ON_OFF_AUTO_AUTO),
DEFINE_PROP_UINT64("x-migration-max-queued-buffers", VFIOPCIDevice,
vbasedev.migration_max_queued_buffers, UINT64_MAX),
+ DEFINE_PROP_SIZE("x-migration-max-queued-buffers-size", VFIOPCIDevice,
+ vbasedev.migration_max_queued_buffers_size, UINT64_MAX),
DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
vbasedev.migration_events, false),
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
@@ -3581,6 +3583,13 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
"destination when doing live "
"migration of device state via "
"multifd channels");
+ object_class_property_set_description(klass, /* 10.0 */
+ "x-migration-max-queued-buffers-size",
+ "Maximum size of in-flight VFIO "
+ "device state buffers queued at the "
+ "destination when doing live "
+ "migration of device state via "
+ "multifd channels");
}
static const TypeInfo vfio_pci_dev_info = {
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index c8ff4252e24a..fff2f35754b2 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -158,6 +158,7 @@ typedef struct VFIODevice {
OnOffAuto migration_multifd_transfer;
OnOffAuto migration_load_config_after_iter;
uint64_t migration_max_queued_buffers;
+ uint64_t migration_max_queued_buffers_size;
bool migration_events;
VFIODeviceOps *ops;
unsigned int num_irqs;
On 3/7/25 11:57, Maciej S. Szmigiero wrote: > From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> > > There's already a max in-flight VFIO device state buffers *count* limit, no. there isn't. Do we need both ? > add also max queued buffers *size* limit. > > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> > --- > docs/devel/migration/vfio.rst | 8 +++++--- > hw/vfio/migration-multifd.c | 21 +++++++++++++++++++-- > hw/vfio/pci.c | 9 +++++++++ > include/hw/vfio/vfio-common.h | 1 + > 4 files changed, 34 insertions(+), 5 deletions(-) > > diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst > index 7c9cb7bdbf87..127a1db35949 100644 > --- a/docs/devel/migration/vfio.rst > +++ b/docs/devel/migration/vfio.rst > @@ -254,12 +254,14 @@ This means that a malicious QEMU source could theoretically cause the target > QEMU to allocate unlimited amounts of memory for such buffers-in-flight. > > The "x-migration-max-queued-buffers" property allows capping the maximum count > -of these VFIO device state buffers queued at the destination. > +of these VFIO device state buffers queued at the destination while > +"x-migration-max-queued-buffers-size" property allows capping their total queued > +size. > > Because a malicious QEMU source causing OOM on the target is not expected to be > a realistic threat in most of VFIO live migration use cases and the right value > -depends on the particular setup by default this queued buffers limit is > -disabled by setting it to UINT64_MAX. > +depends on the particular setup by default these queued buffers limits are > +disabled by setting them to UINT64_MAX. > > Some host platforms (like ARM64) require that VFIO device config is loaded only > after all iterables were loaded. > diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c > index dccd763d7c39..a9d41b9f1cb1 100644 > --- a/hw/vfio/migration-multifd.c > +++ b/hw/vfio/migration-multifd.c > @@ -83,6 +83,7 @@ typedef struct VFIOMultifd { > uint32_t load_buf_idx; > uint32_t load_buf_idx_last; > uint32_t load_buf_queued_pending_buffers; 'load_buf_queued_pending_buffers' is not in mainline. Please rebase. Thanks, C. > + size_t load_buf_queued_pending_buffers_size; > } VFIOMultifd; > > static void vfio_state_buffer_clear(gpointer data) > @@ -139,6 +140,7 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, > VFIOMigration *migration = vbasedev->migration; > VFIOMultifd *multifd = migration->multifd; > VFIOStateBuffer *lb; > + size_t data_size = packet_total_size - sizeof(*packet); > > vfio_state_buffers_assert_init(&multifd->load_bufs); > if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { > @@ -165,8 +167,19 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, > return false; > } > > - lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); > - lb->len = packet_total_size - sizeof(*packet); > + multifd->load_buf_queued_pending_buffers_size += data_size; > + if (multifd->load_buf_queued_pending_buffers_size > > + vbasedev->migration_max_queued_buffers_size) { > + error_setg(errp, > + "%s: queuing state buffer %" PRIu32 > + " would exceed the size max of %" PRIu64, > + vbasedev->name, packet->idx, > + vbasedev->migration_max_queued_buffers_size); > + return false; > + } > + > + lb->data = g_memdup2(&packet->data, data_size); > + lb->len = data_size; > lb->is_present = true; > > return true; > @@ -346,6 +359,9 @@ static bool vfio_load_state_buffer_write(VFIODevice *vbasedev, > assert(wr_ret <= buf_len); > buf_len -= wr_ret; > buf_cur += wr_ret; > + > + assert(multifd->load_buf_queued_pending_buffers_size >= wr_ret); > + multifd->load_buf_queued_pending_buffers_size -= wr_ret; > } > > trace_vfio_load_state_device_buffer_load_end(vbasedev->name, > @@ -519,6 +535,7 @@ static VFIOMultifd *vfio_multifd_new(void) > multifd->load_buf_idx = 0; > multifd->load_buf_idx_last = UINT32_MAX; > multifd->load_buf_queued_pending_buffers = 0; > + multifd->load_buf_queued_pending_buffers_size = 0; > qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); > > multifd->load_bufs_iter_done = false; > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 02f784c1b2a3..8abf73f810ee 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -3392,6 +3392,8 @@ static const Property vfio_pci_dev_properties[] = { > ON_OFF_AUTO_AUTO), > DEFINE_PROP_UINT64("x-migration-max-queued-buffers", VFIOPCIDevice, > vbasedev.migration_max_queued_buffers, UINT64_MAX), > + DEFINE_PROP_SIZE("x-migration-max-queued-buffers-size", VFIOPCIDevice, > + vbasedev.migration_max_queued_buffers_size, UINT64_MAX), > DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice, > vbasedev.migration_events, false), > DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), > @@ -3581,6 +3583,13 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) > "destination when doing live " > "migration of device state via " > "multifd channels"); > + object_class_property_set_description(klass, /* 10.0 */ > + "x-migration-max-queued-buffers-size", > + "Maximum size of in-flight VFIO " > + "device state buffers queued at the " > + "destination when doing live " > + "migration of device state via " > + "multifd channels"); > } > > static const TypeInfo vfio_pci_dev_info = { > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index c8ff4252e24a..fff2f35754b2 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -158,6 +158,7 @@ typedef struct VFIODevice { > OnOffAuto migration_multifd_transfer; > OnOffAuto migration_load_config_after_iter; > uint64_t migration_max_queued_buffers; > + uint64_t migration_max_queued_buffers_size; > bool migration_events; > VFIODeviceOps *ops; > unsigned int num_irqs; >
On 7.03.2025 13:03, Cédric Le Goater wrote: > On 3/7/25 11:57, Maciej S. Szmigiero wrote: >> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >> >> There's already a max in-flight VFIO device state buffers *count* limit, > > no. there isn't. Do we need both ? This is on a top of the remaining patches (x-migration-load-config-after-iter and x-migration-max-queued-buffers) - I thought we were supposed to work on these after the main series was merged as they are relatively non-critical. I would also give x-migration-load-config-after-iter priority over x-migration-max-queued-buffers{,-size} as the former is correctness fix while the later are just additional functionalities. Also, if some setup is truly worried about these buffers consuming too much memory then roughly the same thing could be achieved by (temporarily) putting the target QEMU process in a memory-limited cgroup. On the other hand, the network endianess patch is urgent since it affects the bit stream. >> add also max queued buffers *size* limit. >> >> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >> --- >> docs/devel/migration/vfio.rst | 8 +++++--- >> hw/vfio/migration-multifd.c | 21 +++++++++++++++++++-- >> hw/vfio/pci.c | 9 +++++++++ >> include/hw/vfio/vfio-common.h | 1 + >> 4 files changed, 34 insertions(+), 5 deletions(-) >> >> diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst >> index 7c9cb7bdbf87..127a1db35949 100644 >> --- a/docs/devel/migration/vfio.rst >> +++ b/docs/devel/migration/vfio.rst >> @@ -254,12 +254,14 @@ This means that a malicious QEMU source could theoretically cause the target >> QEMU to allocate unlimited amounts of memory for such buffers-in-flight. >> The "x-migration-max-queued-buffers" property allows capping the maximum count >> -of these VFIO device state buffers queued at the destination. >> +of these VFIO device state buffers queued at the destination while >> +"x-migration-max-queued-buffers-size" property allows capping their total queued >> +size. >> Because a malicious QEMU source causing OOM on the target is not expected to be >> a realistic threat in most of VFIO live migration use cases and the right value >> -depends on the particular setup by default this queued buffers limit is >> -disabled by setting it to UINT64_MAX. >> +depends on the particular setup by default these queued buffers limits are >> +disabled by setting them to UINT64_MAX. >> Some host platforms (like ARM64) require that VFIO device config is loaded only >> after all iterables were loaded. >> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c >> index dccd763d7c39..a9d41b9f1cb1 100644 >> --- a/hw/vfio/migration-multifd.c >> +++ b/hw/vfio/migration-multifd.c >> @@ -83,6 +83,7 @@ typedef struct VFIOMultifd { >> uint32_t load_buf_idx; >> uint32_t load_buf_idx_last; >> uint32_t load_buf_queued_pending_buffers; > > 'load_buf_queued_pending_buffers' is not in mainline. Please rebase. > > > Thanks, > > C. Thanks, Maciej
On 3/7/25 14:45, Maciej S. Szmigiero wrote: > On 7.03.2025 13:03, Cédric Le Goater wrote: >> On 3/7/25 11:57, Maciej S. Szmigiero wrote: >>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>> >>> There's already a max in-flight VFIO device state buffers *count* limit, >> >> no. there isn't. Do we need both ? > > This is on a top of the remaining patches (x-migration-load-config-after-iter > and x-migration-max-queued-buffers) - I thought we were supposed to work > on these after the main series was merged as they are relatively non-critical. yes. we don't need both count and size limits though, a size limit is enough. > I would also give x-migration-load-config-after-iter priority over > x-migration-max-queued-buffers{,-size} as the former is correctness fix > while the later are just additional functionalities. ok. I have kept both patches in my tree with the doc updates. > Also, if some setup is truly worried about these buffers consuming too much > memory then roughly the same thing could be achieved by (temporarily) putting > the target QEMU process in a memory-limited cgroup. yes. That said, since QEMU exchanges 1MB VFIODeviceStatePackets when using multifd and that the overall device state is in the order of 100MB : /* * This is an arbitrary size based on migration of mlx5 devices, where typically * total device migration size is on the order of 100s of MB. Testing with * larger values, e.g. 128MB and 1GB, did not show a performance improvement. */ #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) Could we define the limit to 1GB ? Avihai, would that make sense ? Thanks, C. > > On the other hand, the network endianess patch is urgent since it affects > the bit stream. > >>> add also max queued buffers *size* limit. >>> >>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >>> --- >>> docs/devel/migration/vfio.rst | 8 +++++--- >>> hw/vfio/migration-multifd.c | 21 +++++++++++++++++++-- >>> hw/vfio/pci.c | 9 +++++++++ >>> include/hw/vfio/vfio-common.h | 1 + >>> 4 files changed, 34 insertions(+), 5 deletions(-) >>> >>> diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst >>> index 7c9cb7bdbf87..127a1db35949 100644 >>> --- a/docs/devel/migration/vfio.rst >>> +++ b/docs/devel/migration/vfio.rst >>> @@ -254,12 +254,14 @@ This means that a malicious QEMU source could theoretically cause the target >>> QEMU to allocate unlimited amounts of memory for such buffers-in-flight. >>> The "x-migration-max-queued-buffers" property allows capping the maximum count >>> -of these VFIO device state buffers queued at the destination. >>> +of these VFIO device state buffers queued at the destination while >>> +"x-migration-max-queued-buffers-size" property allows capping their total queued >>> +size. >>> Because a malicious QEMU source causing OOM on the target is not expected to be >>> a realistic threat in most of VFIO live migration use cases and the right value >>> -depends on the particular setup by default this queued buffers limit is >>> -disabled by setting it to UINT64_MAX. >>> +depends on the particular setup by default these queued buffers limits are >>> +disabled by setting them to UINT64_MAX. >>> Some host platforms (like ARM64) require that VFIO device config is loaded only >>> after all iterables were loaded. >>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c >>> index dccd763d7c39..a9d41b9f1cb1 100644 >>> --- a/hw/vfio/migration-multifd.c >>> +++ b/hw/vfio/migration-multifd.c >>> @@ -83,6 +83,7 @@ typedef struct VFIOMultifd { >>> uint32_t load_buf_idx; >>> uint32_t load_buf_idx_last; >>> uint32_t load_buf_queued_pending_buffers; >> >> 'load_buf_queued_pending_buffers' is not in mainline. Please rebase. >> >> >> Thanks, >> >> C. > > Thanks, > Maciej >
On 11.03.2025 14:04, Cédric Le Goater wrote: > On 3/7/25 14:45, Maciej S. Szmigiero wrote: >> On 7.03.2025 13:03, Cédric Le Goater wrote: >>> On 3/7/25 11:57, Maciej S. Szmigiero wrote: >>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>> >>>> There's already a max in-flight VFIO device state buffers *count* limit, >>> >>> no. there isn't. Do we need both ? >> >> This is on a top of the remaining patches (x-migration-load-config-after-iter >> and x-migration-max-queued-buffers) - I thought we were supposed to work >> on these after the main series was merged as they are relatively non-critical. > > yes. we don't need both count and size limits though, a size limit is enough. > >> I would also give x-migration-load-config-after-iter priority over >> x-migration-max-queued-buffers{,-size} as the former is correctness fix >> while the later are just additional functionalities. > > ok. I have kept both patches in my tree with the doc updates. > I don't see the x-migration-load-config-after-iter patch in upstream QEMU anywhere. That's a bit concerning since it's a correctness fix - without it the multifd VFIO migration on ARM64 can fail. The existing patch still applies, but requires changing "#if defined(TARGET_ARM)" to "strcmp(target_name(), "aarch64") == 0" due to recent commit 5731baee6c3c ("hw/vfio: Compile some common objects once"). I can submit an updated patch if you like. Thanks, Maciej
Hello Maciej, On 4/1/25 14:26, Maciej S. Szmigiero wrote: > On 11.03.2025 14:04, Cédric Le Goater wrote: >> On 3/7/25 14:45, Maciej S. Szmigiero wrote: >>> On 7.03.2025 13:03, Cédric Le Goater wrote: >>>> On 3/7/25 11:57, Maciej S. Szmigiero wrote: >>>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>>> >>>>> There's already a max in-flight VFIO device state buffers *count* limit, >>>> >>>> no. there isn't. Do we need both ? >>> >>> This is on a top of the remaining patches (x-migration-load-config-after-iter >>> and x-migration-max-queued-buffers) - I thought we were supposed to work >>> on these after the main series was merged as they are relatively non-critical. >> >> yes. we don't need both count and size limits though, a size limit is enough. >> >>> I would also give x-migration-load-config-after-iter priority over >>> x-migration-max-queued-buffers{,-size} as the former is correctness fix >>> while the later are just additional functionalities. >> >> ok. I have kept both patches in my tree with the doc updates. >> > > I don't see the x-migration-load-config-after-iter patch in upstream QEMU > anywhere. > That's a bit concerning since it's a correctness fix - without it the > multifd VFIO migration on ARM64 can fail. > > The existing patch still applies, but requires changing > "#if defined(TARGET_ARM)" to "strcmp(target_name(), "aarch64") == 0" due to > recent commit 5731baee6c3c ("hw/vfio: Compile some common objects once"). > > I can submit an updated patch if you like. It is a bit early. Let's wait for the spring cleanup to be applied first. I am waiting for more feedback from Avihai and Joao. It should not be long. Thanks, C.
On 2.04.2025 11:51, Cédric Le Goater wrote: > Hello Maciej, > > On 4/1/25 14:26, Maciej S. Szmigiero wrote: >> On 11.03.2025 14:04, Cédric Le Goater wrote: >>> On 3/7/25 14:45, Maciej S. Szmigiero wrote: >>>> On 7.03.2025 13:03, Cédric Le Goater wrote: >>>>> On 3/7/25 11:57, Maciej S. Szmigiero wrote: >>>>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>>>> >>>>>> There's already a max in-flight VFIO device state buffers *count* limit, >>>>> >>>>> no. there isn't. Do we need both ? >>>> >>>> This is on a top of the remaining patches (x-migration-load-config-after-iter >>>> and x-migration-max-queued-buffers) - I thought we were supposed to work >>>> on these after the main series was merged as they are relatively non-critical. >>> >>> yes. we don't need both count and size limits though, a size limit is enough. >>> >>>> I would also give x-migration-load-config-after-iter priority over >>>> x-migration-max-queued-buffers{,-size} as the former is correctness fix >>>> while the later are just additional functionalities. >>> >>> ok. I have kept both patches in my tree with the doc updates. >>> >> >> I don't see the x-migration-load-config-after-iter patch in upstream QEMU >> anywhere. >> That's a bit concerning since it's a correctness fix - without it the >> multifd VFIO migration on ARM64 can fail. >> >> The existing patch still applies, but requires changing >> "#if defined(TARGET_ARM)" to "strcmp(target_name(), "aarch64") == 0" due to >> recent commit 5731baee6c3c ("hw/vfio: Compile some common objects once"). >> >> I can submit an updated patch if you like. > > It is a bit early. > > Let's wait for the spring cleanup to be applied first. I am waiting for > more feedback from Avihai and Joao. It should not be long. I guess by "spring cleanup" you mean this patch set: https://lore.kernel.org/qemu-devel/20250326075122.1299361-1-clg@redhat.com/ It is marked "for-10.1" while I think we should not have this ARM64 regression in 10.0, which is due to be released in 2-3 weeks. (The situation is different with the buffer queuing limits patches which can wait since they are just additional functionalities rather than correctness fixes). > > Thanks, > > C. Thanks, Maciej
On 4/2/25 14:40, Maciej S. Szmigiero wrote: > On 2.04.2025 11:51, Cédric Le Goater wrote: >> Hello Maciej, >> >> On 4/1/25 14:26, Maciej S. Szmigiero wrote: >>> On 11.03.2025 14:04, Cédric Le Goater wrote: >>>> On 3/7/25 14:45, Maciej S. Szmigiero wrote: >>>>> On 7.03.2025 13:03, Cédric Le Goater wrote: >>>>>> On 3/7/25 11:57, Maciej S. Szmigiero wrote: >>>>>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>>>>> >>>>>>> There's already a max in-flight VFIO device state buffers *count* limit, >>>>>> >>>>>> no. there isn't. Do we need both ? >>>>> >>>>> This is on a top of the remaining patches (x-migration-load-config-after-iter >>>>> and x-migration-max-queued-buffers) - I thought we were supposed to work >>>>> on these after the main series was merged as they are relatively non-critical. >>>> >>>> yes. we don't need both count and size limits though, a size limit is enough. >>>> >>>>> I would also give x-migration-load-config-after-iter priority over >>>>> x-migration-max-queued-buffers{,-size} as the former is correctness fix >>>>> while the later are just additional functionalities. >>>> >>>> ok. I have kept both patches in my tree with the doc updates. >>>> >>> >>> I don't see the x-migration-load-config-after-iter patch in upstream QEMU >>> anywhere. >>> That's a bit concerning since it's a correctness fix - without it the >>> multifd VFIO migration on ARM64 can fail. >>> >>> The existing patch still applies, but requires changing >>> "#if defined(TARGET_ARM)" to "strcmp(target_name(), "aarch64") == 0" due to >>> recent commit 5731baee6c3c ("hw/vfio: Compile some common objects once"). >>> >>> I can submit an updated patch if you like. >> >> It is a bit early. >> >> Let's wait for the spring cleanup to be applied first. I am waiting for >> more feedback from Avihai and Joao. It should not be long. > > I guess by "spring cleanup" you mean this patch set: > https://lore.kernel.org/qemu-devel/20250326075122.1299361-1-clg@redhat.com/ > > It is marked "for-10.1" while I think we should not have this ARM64 > regression in 10.0, which is due to be released in 2-3 weeks. A regression would be mean the feature worked before which is not case, it didn't exist. As said before, I'd rather expose the initial "multifd support for VFIO migration" feature first without workarounds in QEMU 10.0. Support on ARM is broken not because we are missing support in VFIO but because there is an issue in the ordering of device states on ARM. IMO, this needs to be addressed with a larger crowd. Please include migration maintainers, the virt ARM maintainers, GIC maintainers and let's see what can be done to avoid a workaround during the QEMU 10.1 cycle. VFIO migration is a recent feature. VFIO migration support on ARM (for MLX5 VFs) is even newer (there were recently fixes in the upstream kernel for it). If a distro needs support for it, your patch is there and ready to be backported. So there is a plan B. Let's not rush things please. Thanks, C.
On 11/03/2025 15:04, Cédric Le Goater wrote: > External email: Use caution opening links or attachments > > > On 3/7/25 14:45, Maciej S. Szmigiero wrote: >> On 7.03.2025 13:03, Cédric Le Goater wrote: >>> On 3/7/25 11:57, Maciej S. Szmigiero wrote: >>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>> >>>> There's already a max in-flight VFIO device state buffers *count* >>>> limit, >>> >>> no. there isn't. Do we need both ? >> >> This is on a top of the remaining patches >> (x-migration-load-config-after-iter >> and x-migration-max-queued-buffers) - I thought we were supposed to work >> on these after the main series was merged as they are relatively >> non-critical. > > yes. we don't need both count and size limits though, a size limit is > enough. > >> I would also give x-migration-load-config-after-iter priority over >> x-migration-max-queued-buffers{,-size} as the former is correctness fix >> while the later are just additional functionalities. > > ok. I have kept both patches in my tree with the doc updates. > >> Also, if some setup is truly worried about these buffers consuming >> too much >> memory then roughly the same thing could be achieved by (temporarily) >> putting >> the target QEMU process in a memory-limited cgroup. > > yes. > > That said, > > since QEMU exchanges 1MB VFIODeviceStatePackets when using multifd and > that > the overall device state is in the order of 100MB : > > /* > * This is an arbitrary size based on migration of mlx5 devices, > where typically > * total device migration size is on the order of 100s of MB. > Testing with > * larger values, e.g. 128MB and 1GB, did not show a performance > improvement. > */ > #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) > > > Could we define the limit to 1GB ? > > Avihai, would that make sense ? > There can be many use cases, each one with its own requirements and constraints, so it's hard for me to think of a "good" default value. IIUC this limit is mostly relevant for the extreme cases where devices have big state + writing the buffers to the device is slow. So IMHO let's set it to unlimited by default and let the users decide if they want to set such limit and to what value. (Note also that even when unlimited, it is really limited to 2 * device_state_size). Unless you have other reasons why 1GB or other value is preferable? Thanks. > > Thanks, > > C. > > > > >> >> On the other hand, the network endianess patch is urgent since it >> affects >> the bit stream. >> >>>> add also max queued buffers *size* limit. >>>> >>>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >>>> --- >>>> docs/devel/migration/vfio.rst | 8 +++++--- >>>> hw/vfio/migration-multifd.c | 21 +++++++++++++++++++-- >>>> hw/vfio/pci.c | 9 +++++++++ >>>> include/hw/vfio/vfio-common.h | 1 + >>>> 4 files changed, 34 insertions(+), 5 deletions(-) >>>> >>>> diff --git a/docs/devel/migration/vfio.rst >>>> b/docs/devel/migration/vfio.rst >>>> index 7c9cb7bdbf87..127a1db35949 100644 >>>> --- a/docs/devel/migration/vfio.rst >>>> +++ b/docs/devel/migration/vfio.rst >>>> @@ -254,12 +254,14 @@ This means that a malicious QEMU source could >>>> theoretically cause the target >>>> QEMU to allocate unlimited amounts of memory for such >>>> buffers-in-flight. >>>> The "x-migration-max-queued-buffers" property allows capping the >>>> maximum count >>>> -of these VFIO device state buffers queued at the destination. >>>> +of these VFIO device state buffers queued at the destination while >>>> +"x-migration-max-queued-buffers-size" property allows capping >>>> their total queued >>>> +size. >>>> Because a malicious QEMU source causing OOM on the target is not >>>> expected to be >>>> a realistic threat in most of VFIO live migration use cases and >>>> the right value >>>> -depends on the particular setup by default this queued buffers >>>> limit is >>>> -disabled by setting it to UINT64_MAX. >>>> +depends on the particular setup by default these queued buffers >>>> limits are >>>> +disabled by setting them to UINT64_MAX. >>>> Some host platforms (like ARM64) require that VFIO device config >>>> is loaded only >>>> after all iterables were loaded. >>>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c >>>> index dccd763d7c39..a9d41b9f1cb1 100644 >>>> --- a/hw/vfio/migration-multifd.c >>>> +++ b/hw/vfio/migration-multifd.c >>>> @@ -83,6 +83,7 @@ typedef struct VFIOMultifd { >>>> uint32_t load_buf_idx; >>>> uint32_t load_buf_idx_last; >>>> uint32_t load_buf_queued_pending_buffers; >>> >>> 'load_buf_queued_pending_buffers' is not in mainline. Please rebase. >>> >>> >>> Thanks, >>> >>> C. >> >> Thanks, >> Maciej >> >
On 3/11/25 15:57, Avihai Horon wrote: > > On 11/03/2025 15:04, Cédric Le Goater wrote: >> External email: Use caution opening links or attachments >> >> >> On 3/7/25 14:45, Maciej S. Szmigiero wrote: >>> On 7.03.2025 13:03, Cédric Le Goater wrote: >>>> On 3/7/25 11:57, Maciej S. Szmigiero wrote: >>>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>>> >>>>> There's already a max in-flight VFIO device state buffers *count* limit, >>>> >>>> no. there isn't. Do we need both ? >>> >>> This is on a top of the remaining patches (x-migration-load-config-after-iter >>> and x-migration-max-queued-buffers) - I thought we were supposed to work >>> on these after the main series was merged as they are relatively non-critical. >> >> yes. we don't need both count and size limits though, a size limit is enough. >> >>> I would also give x-migration-load-config-after-iter priority over >>> x-migration-max-queued-buffers{,-size} as the former is correctness fix >>> while the later are just additional functionalities. >> >> ok. I have kept both patches in my tree with the doc updates. >> >>> Also, if some setup is truly worried about these buffers consuming too much >>> memory then roughly the same thing could be achieved by (temporarily) putting >>> the target QEMU process in a memory-limited cgroup. >> >> yes. >> >> That said, >> >> since QEMU exchanges 1MB VFIODeviceStatePackets when using multifd and that >> the overall device state is in the order of 100MB : >> >> /* >> * This is an arbitrary size based on migration of mlx5 devices, where typically >> * total device migration size is on the order of 100s of MB. Testing with >> * larger values, e.g. 128MB and 1GB, did not show a performance improvement. >> */ >> #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) >> >> >> Could we define the limit to 1GB ? >> >> Avihai, would that make sense ? >> > There can be many use cases, each one with its own requirements and constraints, so it's hard for me to think of a "good" default value. > > IIUC this limit is mostly relevant for the extreme cases where devices have big state + writing the buffers to the device is slow. > So IMHO let's set it to unlimited by default and let the users decide if they want to set such limit and to what value. (Note also that even when unlimited, it is really limited to 2 * device_state_size). > > Unless you have other reasons why 1GB or other value is preferable? none but UINT_MAX is not good value either. Let's wait before introducing a new limiting property. I will send the last PR for QEMU 10.0 at the end of the day. Thanks, C.
On 11/03/2025 17:45, Cédric Le Goater wrote: > External email: Use caution opening links or attachments > > > On 3/11/25 15:57, Avihai Horon wrote: >> >> On 11/03/2025 15:04, Cédric Le Goater wrote: >>> External email: Use caution opening links or attachments >>> >>> >>> On 3/7/25 14:45, Maciej S. Szmigiero wrote: >>>> On 7.03.2025 13:03, Cédric Le Goater wrote: >>>>> On 3/7/25 11:57, Maciej S. Szmigiero wrote: >>>>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>>>> >>>>>> There's already a max in-flight VFIO device state buffers *count* >>>>>> limit, >>>>> >>>>> no. there isn't. Do we need both ? >>>> >>>> This is on a top of the remaining patches >>>> (x-migration-load-config-after-iter >>>> and x-migration-max-queued-buffers) - I thought we were supposed to >>>> work >>>> on these after the main series was merged as they are relatively >>>> non-critical. >>> >>> yes. we don't need both count and size limits though, a size limit >>> is enough. >>> >>>> I would also give x-migration-load-config-after-iter priority over >>>> x-migration-max-queued-buffers{,-size} as the former is correctness >>>> fix >>>> while the later are just additional functionalities. >>> >>> ok. I have kept both patches in my tree with the doc updates. >>> >>>> Also, if some setup is truly worried about these buffers consuming >>>> too much >>>> memory then roughly the same thing could be achieved by >>>> (temporarily) putting >>>> the target QEMU process in a memory-limited cgroup. >>> >>> yes. >>> >>> That said, >>> >>> since QEMU exchanges 1MB VFIODeviceStatePackets when using multifd >>> and that >>> the overall device state is in the order of 100MB : >>> >>> /* >>> * This is an arbitrary size based on migration of mlx5 devices, >>> where typically >>> * total device migration size is on the order of 100s of MB. >>> Testing with >>> * larger values, e.g. 128MB and 1GB, did not show a performance >>> improvement. >>> */ >>> #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) >>> >>> >>> Could we define the limit to 1GB ? >>> >>> Avihai, would that make sense ? >>> >> There can be many use cases, each one with its own requirements and >> constraints, so it's hard for me to think of a "good" default value. >> >> IIUC this limit is mostly relevant for the extreme cases where >> devices have big state + writing the buffers to the device is slow. >> So IMHO let's set it to unlimited by default and let the users decide >> if they want to set such limit and to what value. (Note also that >> even when unlimited, it is really limited to 2 * device_state_size). >> >> Unless you have other reasons why 1GB or other value is preferable? > > none but UINT_MAX is not good value either. You mean UINT_MAX is not a good value to represent "unlimited" or that unlimited is not a good default value? > Let's wait before introducing > a new limiting property. > > I will send the last PR for QEMU 10.0 at the end of the day. > > > Thanks, > > C. >
On 3/11/25 17:01, Avihai Horon wrote: > > On 11/03/2025 17:45, Cédric Le Goater wrote: >> External email: Use caution opening links or attachments >> >> >> On 3/11/25 15:57, Avihai Horon wrote: >>> >>> On 11/03/2025 15:04, Cédric Le Goater wrote: >>>> External email: Use caution opening links or attachments >>>> >>>> >>>> On 3/7/25 14:45, Maciej S. Szmigiero wrote: >>>>> On 7.03.2025 13:03, Cédric Le Goater wrote: >>>>>> On 3/7/25 11:57, Maciej S. Szmigiero wrote: >>>>>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>>>>> >>>>>>> There's already a max in-flight VFIO device state buffers *count* limit, >>>>>> >>>>>> no. there isn't. Do we need both ? >>>>> >>>>> This is on a top of the remaining patches (x-migration-load-config-after-iter >>>>> and x-migration-max-queued-buffers) - I thought we were supposed to work >>>>> on these after the main series was merged as they are relatively non-critical. >>>> >>>> yes. we don't need both count and size limits though, a size limit is enough. >>>> >>>>> I would also give x-migration-load-config-after-iter priority over >>>>> x-migration-max-queued-buffers{,-size} as the former is correctness fix >>>>> while the later are just additional functionalities. >>>> >>>> ok. I have kept both patches in my tree with the doc updates. >>>> >>>>> Also, if some setup is truly worried about these buffers consuming too much >>>>> memory then roughly the same thing could be achieved by (temporarily) putting >>>>> the target QEMU process in a memory-limited cgroup. >>>> >>>> yes. >>>> >>>> That said, >>>> >>>> since QEMU exchanges 1MB VFIODeviceStatePackets when using multifd and that >>>> the overall device state is in the order of 100MB : >>>> >>>> /* >>>> * This is an arbitrary size based on migration of mlx5 devices, where typically >>>> * total device migration size is on the order of 100s of MB. Testing with >>>> * larger values, e.g. 128MB and 1GB, did not show a performance improvement. >>>> */ >>>> #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) >>>> >>>> >>>> Could we define the limit to 1GB ? >>>> >>>> Avihai, would that make sense ? >>>> >>> There can be many use cases, each one with its own requirements and constraints, so it's hard for me to think of a "good" default value. >>> >>> IIUC this limit is mostly relevant for the extreme cases where devices have big state + writing the buffers to the device is slow. >>> So IMHO let's set it to unlimited by default and let the users decide if they want to set such limit and to what value. (Note also that even when unlimited, it is really limited to 2 * device_state_size). >>> >>> Unless you have other reasons why 1GB or other value is preferable? >> >> none but UINT_MAX is not good value either. > > You mean UINT_MAX is not a good value to represent "unlimited" or that unlimited is not a good default value? unlimited is not a good default value. Thanks, C.
On 11/03/2025 18:05, Cédric Le Goater wrote: > External email: Use caution opening links or attachments > > > On 3/11/25 17:01, Avihai Horon wrote: >> >> On 11/03/2025 17:45, Cédric Le Goater wrote: >>> External email: Use caution opening links or attachments >>> >>> >>> On 3/11/25 15:57, Avihai Horon wrote: >>>> >>>> On 11/03/2025 15:04, Cédric Le Goater wrote: >>>>> External email: Use caution opening links or attachments >>>>> >>>>> >>>>> On 3/7/25 14:45, Maciej S. Szmigiero wrote: >>>>>> On 7.03.2025 13:03, Cédric Le Goater wrote: >>>>>>> On 3/7/25 11:57, Maciej S. Szmigiero wrote: >>>>>>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>>>>>> >>>>>>>> There's already a max in-flight VFIO device state buffers >>>>>>>> *count* limit, >>>>>>> >>>>>>> no. there isn't. Do we need both ? >>>>>> >>>>>> This is on a top of the remaining patches >>>>>> (x-migration-load-config-after-iter >>>>>> and x-migration-max-queued-buffers) - I thought we were supposed >>>>>> to work >>>>>> on these after the main series was merged as they are relatively >>>>>> non-critical. >>>>> >>>>> yes. we don't need both count and size limits though, a size limit >>>>> is enough. >>>>> >>>>>> I would also give x-migration-load-config-after-iter priority over >>>>>> x-migration-max-queued-buffers{,-size} as the former is >>>>>> correctness fix >>>>>> while the later are just additional functionalities. >>>>> >>>>> ok. I have kept both patches in my tree with the doc updates. >>>>> >>>>>> Also, if some setup is truly worried about these buffers >>>>>> consuming too much >>>>>> memory then roughly the same thing could be achieved by >>>>>> (temporarily) putting >>>>>> the target QEMU process in a memory-limited cgroup. >>>>> >>>>> yes. >>>>> >>>>> That said, >>>>> >>>>> since QEMU exchanges 1MB VFIODeviceStatePackets when using multifd >>>>> and that >>>>> the overall device state is in the order of 100MB : >>>>> >>>>> /* >>>>> * This is an arbitrary size based on migration of mlx5 devices, >>>>> where typically >>>>> * total device migration size is on the order of 100s of MB. >>>>> Testing with >>>>> * larger values, e.g. 128MB and 1GB, did not show a performance >>>>> improvement. >>>>> */ >>>>> #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) >>>>> >>>>> >>>>> Could we define the limit to 1GB ? >>>>> >>>>> Avihai, would that make sense ? >>>>> >>>> There can be many use cases, each one with its own requirements and >>>> constraints, so it's hard for me to think of a "good" default value. >>>> >>>> IIUC this limit is mostly relevant for the extreme cases where >>>> devices have big state + writing the buffers to the device is slow. >>>> So IMHO let's set it to unlimited by default and let the users >>>> decide if they want to set such limit and to what value. (Note also >>>> that even when unlimited, it is really limited to 2 * >>>> device_state_size). >>>> >>>> Unless you have other reasons why 1GB or other value is preferable? >>> >>> none but UINT_MAX is not good value either. >> >> You mean UINT_MAX is not a good value to represent "unlimited" or >> that unlimited is not a good default value? > > unlimited is not a good default value. Why not? It basically means "disabled".
© 2016 - 2025 Red Hat, Inc.