include/libvirt/libvirt-domain.h | 11 +++++++++++ src/libvirt-domain.c | 20 ++++++++++++++++++++ src/qemu/qemu_migration.h | 1 + src/qemu/qemu_migration_params.c | 8 +++++++- src/qemu/qemu_migration_params.h | 1 + 5 files changed, 40 insertions(+), 1 deletion(-)
Add plumbing for QEMU's switchover-ack migration capability, which
helps lower the downtime during VFIO migrations. This capability is
enabled by default as long as both the source and destination support
it.
Note: switchover-ack depends on the return path capability, so this may
not be used when VIR_MIGRATE_TUNNELLED flag is set.
Extensive details about the qemu switchover-ack implementation are
available in the qemu series v6 cover letter [1] where the highlight is
the extreme reduction in guest visible downtime. In addition to the
original test results below, I saw a roughly ~20% reduction in downtime
for VFIO VGPU devices at minimum.
=== Test results ===
The below table shows the downtime of two identical migrations. In the
first migration swithcover ack is disabled and in the second it is
enabled. The migrated VM is assigned with a mlx5 VFIO device which has
300MB of device data to be migrated.
+----------------------+-----------------------+----------+
| Switchover ack | VFIO device data size | Downtime |
+----------------------+-----------------------+----------+
| Disabled | 300MB | 1900ms |
| Enabled | 300MB | 420ms |
+----------------------+-----------------------+----------+
Switchover ack gives a roughly 4.5 times improvement in downtime.
The 1480ms difference is time that is used for resource allocation for
the VFIO device in the destination. Without switchover ack, this time is
spent when the source VM is stopped and thus the downtime is much
higher. With switchover ack, the time is spent when the source VM is
still running.
[1] https://patchwork.kernel.org/project/qemu-devel/cover/20230621111201.29729-1-avihaih@nvidia.com/
Signed-off-by: Jon Kohler <jon@nutanix.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Avihai Horon <avihaih@nvidia.com>
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: YangHang Liu <yanghliu@redhat.com>
---
include/libvirt/libvirt-domain.h | 11 +++++++++++
src/libvirt-domain.c | 20 ++++++++++++++++++++
src/qemu/qemu_migration.h | 1 +
src/qemu/qemu_migration_params.c | 8 +++++++-
src/qemu/qemu_migration_params.h | 1 +
5 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h
index 2f5b01bbfe..9543629f30 100644
--- a/include/libvirt/libvirt-domain.h
+++ b/include/libvirt/libvirt-domain.h
@@ -1100,6 +1100,17 @@ typedef enum {
* Since: 8.5.0
*/
VIR_MIGRATE_ZEROCOPY = (1 << 20),
+
+ /* Use switchover ack migration capability to reduce downtime on VFIO
+ * device migration. This prevents the source from stopping the VM and
+ * completing the migration until an ACK is received from the destination
+ * that it's OK to do so. Thus, a VFIO device can make sure that its
+ * initial bytes were sent and loaded in the destination before the
+ * source VM is stopped.
+ *
+ * Since: 10.5.0
+ */
+ VIR_MIGRATE_SWITCHOVER_ACK = (1 << 21),
} virDomainMigrateFlags;
diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c
index 7c6b93963c..786fef317d 100644
--- a/src/libvirt-domain.c
+++ b/src/libvirt-domain.c
@@ -3822,6 +3822,10 @@ virDomainMigrate(virDomainPtr domain,
VIR_MIGRATE_PARALLEL,
error);
+ VIR_EXCLUSIVE_FLAGS_GOTO(VIR_MIGRATE_TUNNELLED,
+ VIR_MIGRATE_SWITCHOVER_ACK,
+ error);
+
VIR_REQUIRE_FLAG_GOTO(VIR_MIGRATE_NON_SHARED_SYNCHRONOUS_WRITES,
VIR_MIGRATE_NON_SHARED_DISK | VIR_MIGRATE_NON_SHARED_INC,
error);
@@ -4021,6 +4025,10 @@ virDomainMigrate2(virDomainPtr domain,
VIR_MIGRATE_PARALLEL,
error);
+ VIR_EXCLUSIVE_FLAGS_GOTO(VIR_MIGRATE_TUNNELLED,
+ VIR_MIGRATE_SWITCHOVER_ACK,
+ error);
+
VIR_REQUIRE_FLAG_GOTO(VIR_MIGRATE_NON_SHARED_SYNCHRONOUS_WRITES,
VIR_MIGRATE_NON_SHARED_DISK | VIR_MIGRATE_NON_SHARED_INC,
error);
@@ -4497,6 +4505,10 @@ virDomainMigrateToURI(virDomainPtr domain,
VIR_MIGRATE_PARALLEL,
error);
+ VIR_EXCLUSIVE_FLAGS_GOTO(VIR_MIGRATE_TUNNELLED,
+ VIR_MIGRATE_SWITCHOVER_ACK,
+ error);
+
if (virDomainMigrateUnmanagedCheckCompat(domain, flags) < 0)
goto error;
@@ -4577,6 +4589,10 @@ virDomainMigrateToURI2(virDomainPtr domain,
VIR_MIGRATE_PARALLEL,
error);
+ VIR_EXCLUSIVE_FLAGS_GOTO(VIR_MIGRATE_TUNNELLED,
+ VIR_MIGRATE_SWITCHOVER_ACK,
+ error);
+
if (virDomainMigrateUnmanagedCheckCompat(domain, flags) < 0)
goto error;
@@ -4656,6 +4672,10 @@ virDomainMigrateToURI3(virDomainPtr domain,
VIR_MIGRATE_PARALLEL,
error);
+ VIR_EXCLUSIVE_FLAGS_GOTO(VIR_MIGRATE_TUNNELLED,
+ VIR_MIGRATE_SWITCHOVER_ACK,
+ error);
+
if (virDomainMigrateUnmanagedCheckCompat(domain, flags) < 0)
goto error;
diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h
index ed62fd4a91..cd89e100e1 100644
--- a/src/qemu/qemu_migration.h
+++ b/src/qemu/qemu_migration.h
@@ -62,6 +62,7 @@
VIR_MIGRATE_NON_SHARED_SYNCHRONOUS_WRITES | \
VIR_MIGRATE_POSTCOPY_RESUME | \
VIR_MIGRATE_ZEROCOPY | \
+ VIR_MIGRATE_SWITCHOVER_ACK | \
0)
/* All supported migration parameters and their types. */
diff --git a/src/qemu/qemu_migration_params.c b/src/qemu/qemu_migration_params.c
index 48f8657f71..9593b6ba65 100644
--- a/src/qemu/qemu_migration_params.c
+++ b/src/qemu/qemu_migration_params.c
@@ -105,6 +105,7 @@ VIR_ENUM_IMPL(qemuMigrationCapability,
"return-path",
"zero-copy-send",
"postcopy-preempt",
+ "switchover-ack",
);
@@ -138,7 +139,7 @@ struct _qemuMigrationParamsAlwaysOnItem {
typedef struct _qemuMigrationParamsFlagMapItem qemuMigrationParamsFlagMapItem;
struct _qemuMigrationParamsFlagMapItem {
/* Describes what to do with the capability if @flag is found.
- * When se to QEMU_MIGRATION_FLAG_REQUIRED, the capability will be
+ * When set to QEMU_MIGRATION_FLAG_REQUIRED, the capability will be
* enabled iff the specified migration flag is enabled. On the other hand
* QEMU_MIGRATION_FLAG_FORBIDDEN will enable the capability as long as
* the specified migration flag is not enabled. */
@@ -215,6 +216,11 @@ static const qemuMigrationParamsFlagMapItem qemuMigrationParamsFlagMap[] = {
.flag = VIR_MIGRATE_ZEROCOPY,
.cap = QEMU_MIGRATION_CAP_ZERO_COPY_SEND,
.party = QEMU_MIGRATION_SOURCE},
+
+ {.match = QEMU_MIGRATION_FLAG_FORBIDDEN,
+ .flag = VIR_MIGRATE_TUNNELLED,
+ .cap = QEMU_MIGRATION_CAP_SWITCHOVER_ACK,
+ .party = QEMU_MIGRATION_SOURCE | QEMU_MIGRATION_DESTINATION},
};
/* Translation from VIR_MIGRATE_PARAM_* typed parameters to
diff --git a/src/qemu/qemu_migration_params.h b/src/qemu/qemu_migration_params.h
index 91bc6792cd..df67f1fb92 100644
--- a/src/qemu/qemu_migration_params.h
+++ b/src/qemu/qemu_migration_params.h
@@ -41,6 +41,7 @@ typedef enum {
QEMU_MIGRATION_CAP_RETURN_PATH,
QEMU_MIGRATION_CAP_ZERO_COPY_SEND,
QEMU_MIGRATION_CAP_POSTCOPY_PREEMPT,
+ QEMU_MIGRATION_CAP_SWITCHOVER_ACK,
QEMU_MIGRATION_CAP_LAST
} qemuMigrationCapability;
--
2.43.0
On Tue, Jun 18, 2024 at 08:06:06AM -0700, Jon Kohler wrote: > Add plumbing for QEMU's switchover-ack migration capability, which > helps lower the downtime during VFIO migrations. This capability is > enabled by default as long as both the source and destination support > it. > > Note: switchover-ack depends on the return path capability, so this may > not be used when VIR_MIGRATE_TUNNELLED flag is set. > > Extensive details about the qemu switchover-ack implementation are > available in the qemu series v6 cover letter [1] where the highlight is > the extreme reduction in guest visible downtime. In addition to the > original test results below, I saw a roughly ~20% reduction in downtime > for VFIO VGPU devices at minimum. > > === Test results === > > The below table shows the downtime of two identical migrations. In the > first migration swithcover ack is disabled and in the second it is > enabled. The migrated VM is assigned with a mlx5 VFIO device which has > 300MB of device data to be migrated. > > +----------------------+-----------------------+----------+ > | Switchover ack | VFIO device data size | Downtime | > +----------------------+-----------------------+----------+ > | Disabled | 300MB | 1900ms | > | Enabled | 300MB | 420ms | > +----------------------+-----------------------+----------+ > > Switchover ack gives a roughly 4.5 times improvement in downtime. > The 1480ms difference is time that is used for resource allocation for > the VFIO device in the destination. Without switchover ack, this time is > spent when the source VM is stopped and thus the downtime is much > higher. With switchover ack, the time is spent when the source VM is > still running. > > [1] https://patchwork.kernel.org/project/qemu-devel/cover/20230621111201.29729-1-avihaih@nvidia.com/ > > Signed-off-by: Jon Kohler <jon@nutanix.com> > Cc: Alex Williamson <alex.williamson@redhat.com> > Cc: Avihai Horon <avihaih@nvidia.com> > Cc: Markus Armbruster <armbru@redhat.com> > Cc: Peter Xu <peterx@redhat.com> > Cc: YangHang Liu <yanghliu@redhat.com> > --- > include/libvirt/libvirt-domain.h | 11 +++++++++++ > src/libvirt-domain.c | 20 ++++++++++++++++++++ > src/qemu/qemu_migration.h | 1 + > src/qemu/qemu_migration_params.c | 8 +++++++- > src/qemu/qemu_migration_params.h | 1 + > 5 files changed, 40 insertions(+), 1 deletion(-) > > diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h > index 2f5b01bbfe..9543629f30 100644 > --- a/include/libvirt/libvirt-domain.h > +++ b/include/libvirt/libvirt-domain.h > @@ -1100,6 +1100,17 @@ typedef enum { > * Since: 8.5.0 > */ > VIR_MIGRATE_ZEROCOPY = (1 << 20), > + > + /* Use switchover ack migration capability to reduce downtime on VFIO > + * device migration. This prevents the source from stopping the VM and > + * completing the migration until an ACK is received from the destination > + * that it's OK to do so. Thus, a VFIO device can make sure that its > + * initial bytes were sent and loaded in the destination before the > + * source VM is stopped. > + * > + * Since: 10.5.0 > + */ > + VIR_MIGRATE_SWITCHOVER_ACK = (1 << 21), > } virDomainMigrateFlags; Do we really need a flag for this ? Is there a credible scenario in which this flag works, and yet shouldn't be used by libvirt ? IOW, can we just "do the right thing" and always enable this, except for TUNNELLED mode. With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On Tue, Jun 18, 2024 at 16:14:29 +0100, Daniel P. Berrangé wrote: > On Tue, Jun 18, 2024 at 08:06:06AM -0700, Jon Kohler wrote: > > diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h > > index 2f5b01bbfe..9543629f30 100644 > > --- a/include/libvirt/libvirt-domain.h > > +++ b/include/libvirt/libvirt-domain.h > > @@ -1100,6 +1100,17 @@ typedef enum { > > * Since: 8.5.0 > > */ > > VIR_MIGRATE_ZEROCOPY = (1 << 20), > > + > > + /* Use switchover ack migration capability to reduce downtime on VFIO > > + * device migration. This prevents the source from stopping the VM and > > + * completing the migration until an ACK is received from the destination > > + * that it's OK to do so. Thus, a VFIO device can make sure that its > > + * initial bytes were sent and loaded in the destination before the > > + * source VM is stopped. > > + * > > + * Since: 10.5.0 > > + */ > > + VIR_MIGRATE_SWITCHOVER_ACK = (1 << 21), > > } virDomainMigrateFlags; > > Do we really need a flag for this ? Is there a credible scenario > in which this flag works, and yet shouldn't be used by libvirt ? > > IOW, can we just "do the right thing" and always enable this, > except for TUNNELLED mode. I discussed this capability some time ago with Peter (I think) and if IIRC there was some downside when the capability is enabled for domains that do not use VFIO. I don't remember exactly what it was about, but perhaps introducing an extra delay in migration switchover? Peter, can you add the details, please? That said, I think we should be able to do the right thing anyway and enable this capability only for domains that use VFIO. Jirka
> On Jun 20, 2024, at 4:30 AM, Jiri Denemark <jdenemar@redhat.com> wrote: > > !-------------------------------------------------------------------| > CAUTION: External Email > > |-------------------------------------------------------------------! > > On Tue, Jun 18, 2024 at 16:14:29 +0100, Daniel P. Berrangé wrote: >> On Tue, Jun 18, 2024 at 08:06:06AM -0700, Jon Kohler wrote: >>> diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h >>> index 2f5b01bbfe..9543629f30 100644 >>> --- a/include/libvirt/libvirt-domain.h >>> +++ b/include/libvirt/libvirt-domain.h >>> @@ -1100,6 +1100,17 @@ typedef enum { >>> * Since: 8.5.0 >>> */ >>> VIR_MIGRATE_ZEROCOPY = (1 << 20), >>> + >>> + /* Use switchover ack migration capability to reduce downtime on VFIO >>> + * device migration. This prevents the source from stopping the VM and >>> + * completing the migration until an ACK is received from the destination >>> + * that it's OK to do so. Thus, a VFIO device can make sure that its >>> + * initial bytes were sent and loaded in the destination before the >>> + * source VM is stopped. >>> + * >>> + * Since: 10.5.0 >>> + */ >>> + VIR_MIGRATE_SWITCHOVER_ACK = (1 << 21), >>> } virDomainMigrateFlags; >> >> Do we really need a flag for this ? Is there a credible scenario >> in which this flag works, and yet shouldn't be used by libvirt ? >> >> IOW, can we just "do the right thing" and always enable this, >> except for TUNNELLED mode. > > I discussed this capability some time ago with Peter (I think) and if > IIRC there was some downside when the capability is enabled for domains > that do not use VFIO. I don't remember exactly what it was about, but > perhaps introducing an extra delay in migration switchover? Peter, can > you add the details, please? Thanks - @Peter, if you have additional info on that, would love to know what the non-VFIO downsides are here. > > That said, I think we should be able to do the right thing anyway and > enable this capability only for domains that use VFIO. Ok, I’ll see what I can cook up in a v2 patch with this as the direction. > > Jirka >
On Thu, Jun 20, 2024 at 07:45:42PM +0000, Jon Kohler wrote: > > > > On Jun 20, 2024, at 4:30 AM, Jiri Denemark <jdenemar@redhat.com> wrote: > > > > !-------------------------------------------------------------------| > > CAUTION: External Email > > > > |-------------------------------------------------------------------! > > > > On Tue, Jun 18, 2024 at 16:14:29 +0100, Daniel P. Berrangé wrote: > >> On Tue, Jun 18, 2024 at 08:06:06AM -0700, Jon Kohler wrote: > >>> diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h > >>> index 2f5b01bbfe..9543629f30 100644 > >>> --- a/include/libvirt/libvirt-domain.h > >>> +++ b/include/libvirt/libvirt-domain.h > >>> @@ -1100,6 +1100,17 @@ typedef enum { > >>> * Since: 8.5.0 > >>> */ > >>> VIR_MIGRATE_ZEROCOPY = (1 << 20), > >>> + > >>> + /* Use switchover ack migration capability to reduce downtime on VFIO > >>> + * device migration. This prevents the source from stopping the VM and > >>> + * completing the migration until an ACK is received from the destination > >>> + * that it's OK to do so. Thus, a VFIO device can make sure that its > >>> + * initial bytes were sent and loaded in the destination before the > >>> + * source VM is stopped. > >>> + * > >>> + * Since: 10.5.0 > >>> + */ > >>> + VIR_MIGRATE_SWITCHOVER_ACK = (1 << 21), > >>> } virDomainMigrateFlags; > >> > >> Do we really need a flag for this ? Is there a credible scenario > >> in which this flag works, and yet shouldn't be used by libvirt ? > >> > >> IOW, can we just "do the right thing" and always enable this, > >> except for TUNNELLED mode. > > > > I discussed this capability some time ago with Peter (I think) and if > > IIRC there was some downside when the capability is enabled for domains > > that do not use VFIO. I don't remember exactly what it was about, but > > perhaps introducing an extra delay in migration switchover? Peter, can > > you add the details, please? > > Thanks - @Peter, if you have additional info on that, would love to know > what the non-VFIO downsides are here. So far, VFIO is the only one who will register this "ACK needed" hook. When nobody registers with it, the ACK will be sent upfront of a migration when return path is established. That happens at the very beginning of a migration, and that ACK will be completely meaningless in that case. Said that, it may not be too bad either to have that meaningless ACK, if that will simply Libvirt. That only happens once per migration, and after sent once it should work exactly the same as when switchover-ack not enabled. Thanks, -- Peter Xu
> On Jun 20, 2024, at 4:06 PM, Peter Xu <peterx@redhat.com> wrote: > > !-------------------------------------------------------------------| > CAUTION: External Email > > |-------------------------------------------------------------------! > > On Thu, Jun 20, 2024 at 07:45:42PM +0000, Jon Kohler wrote: >> >> >>> On Jun 20, 2024, at 4:30 AM, Jiri Denemark <jdenemar@redhat.com> wrote: >>> >>> !-------------------------------------------------------------------| >>> CAUTION: External Email >>> >>> |-------------------------------------------------------------------! >>> >>> On Tue, Jun 18, 2024 at 16:14:29 +0100, Daniel P. Berrangé wrote: >>>> On Tue, Jun 18, 2024 at 08:06:06AM -0700, Jon Kohler wrote: >>>>> diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h >>>>> index 2f5b01bbfe..9543629f30 100644 >>>>> --- a/include/libvirt/libvirt-domain.h >>>>> +++ b/include/libvirt/libvirt-domain.h >>>>> @@ -1100,6 +1100,17 @@ typedef enum { >>>>> * Since: 8.5.0 >>>>> */ >>>>> VIR_MIGRATE_ZEROCOPY = (1 << 20), >>>>> + >>>>> + /* Use switchover ack migration capability to reduce downtime on VFIO >>>>> + * device migration. This prevents the source from stopping the VM and >>>>> + * completing the migration until an ACK is received from the destination >>>>> + * that it's OK to do so. Thus, a VFIO device can make sure that its >>>>> + * initial bytes were sent and loaded in the destination before the >>>>> + * source VM is stopped. >>>>> + * >>>>> + * Since: 10.5.0 >>>>> + */ >>>>> + VIR_MIGRATE_SWITCHOVER_ACK = (1 << 21), >>>>> } virDomainMigrateFlags; >>>> >>>> Do we really need a flag for this ? Is there a credible scenario >>>> in which this flag works, and yet shouldn't be used by libvirt ? >>>> >>>> IOW, can we just "do the right thing" and always enable this, >>>> except for TUNNELLED mode. >>> >>> I discussed this capability some time ago with Peter (I think) and if >>> IIRC there was some downside when the capability is enabled for domains >>> that do not use VFIO. I don't remember exactly what it was about, but >>> perhaps introducing an extra delay in migration switchover? Peter, can >>> you add the details, please? >> >> Thanks - @Peter, if you have additional info on that, would love to know >> what the non-VFIO downsides are here. > > So far, VFIO is the only one who will register this "ACK needed" hook. > When nobody registers with it, the ACK will be sent upfront of a migration > when return path is established. That happens at the very beginning of a > migration, and that ACK will be completely meaningless in that case. > > Said that, it may not be too bad either to have that meaningless ACK, if > that will simply Libvirt. That only happens once per migration, and after > sent once it should work exactly the same as when switchover-ack not enabled. RE Simplicity - thats exactly my thought here. If it is effectively a no-op, then just enabling switchover-ack full-time on all migrations would make this quite easy. Said another way, I take your comments to mean that there is no functional or performance downside to enabling switchover-ack on non-VFIO, therefore, might as well keep it simple and not special-case it in libvirt side, right? Thanks, Jon > > Thanks, > > -- > Peter Xu >
> On Jun 20, 2024, at 4:16 PM, Jon Kohler <jon@nutanix.com> wrote: > > > >> On Jun 20, 2024, at 4:06 PM, Peter Xu <peterx@redhat.com> wrote: >> >> !-------------------------------------------------------------------| >> CAUTION: External Email >> >> |-------------------------------------------------------------------! >> >> On Thu, Jun 20, 2024 at 07:45:42PM +0000, Jon Kohler wrote: >>> >>> >>>> On Jun 20, 2024, at 4:30 AM, Jiri Denemark <jdenemar@redhat.com> wrote: >>>> >>>> !-------------------------------------------------------------------| >>>> CAUTION: External Email >>>> >>>> |-------------------------------------------------------------------! >>>> >>>> On Tue, Jun 18, 2024 at 16:14:29 +0100, Daniel P. Berrangé wrote: >>>>> On Tue, Jun 18, 2024 at 08:06:06AM -0700, Jon Kohler wrote: >>>>>> diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h >>>>>> index 2f5b01bbfe..9543629f30 100644 >>>>>> --- a/include/libvirt/libvirt-domain.h >>>>>> +++ b/include/libvirt/libvirt-domain.h >>>>>> @@ -1100,6 +1100,17 @@ typedef enum { >>>>>> * Since: 8.5.0 >>>>>> */ >>>>>> VIR_MIGRATE_ZEROCOPY = (1 << 20), >>>>>> + >>>>>> + /* Use switchover ack migration capability to reduce downtime on VFIO >>>>>> + * device migration. This prevents the source from stopping the VM and >>>>>> + * completing the migration until an ACK is received from the destination >>>>>> + * that it's OK to do so. Thus, a VFIO device can make sure that its >>>>>> + * initial bytes were sent and loaded in the destination before the >>>>>> + * source VM is stopped. >>>>>> + * >>>>>> + * Since: 10.5.0 >>>>>> + */ >>>>>> + VIR_MIGRATE_SWITCHOVER_ACK = (1 << 21), >>>>>> } virDomainMigrateFlags; >>>>> >>>>> Do we really need a flag for this ? Is there a credible scenario >>>>> in which this flag works, and yet shouldn't be used by libvirt ? >>>>> >>>>> IOW, can we just "do the right thing" and always enable this, >>>>> except for TUNNELLED mode. >>>> >>>> I discussed this capability some time ago with Peter (I think) and if >>>> IIRC there was some downside when the capability is enabled for domains >>>> that do not use VFIO. I don't remember exactly what it was about, but >>>> perhaps introducing an extra delay in migration switchover? Peter, can >>>> you add the details, please? >>> >>> Thanks - @Peter, if you have additional info on that, would love to know >>> what the non-VFIO downsides are here. >> >> So far, VFIO is the only one who will register this "ACK needed" hook. >> When nobody registers with it, the ACK will be sent upfront of a migration >> when return path is established. That happens at the very beginning of a >> migration, and that ACK will be completely meaningless in that case. >> >> Said that, it may not be too bad either to have that meaningless ACK, if >> that will simply Libvirt. That only happens once per migration, and after >> sent once it should work exactly the same as when switchover-ack not enabled. > > RE Simplicity - thats exactly my thought here. If it is effectively a no-op, then just > enabling switchover-ack full-time on all migrations would make this quite easy. > > Said another way, I take your comments to mean that there is no functional or > performance downside to enabling switchover-ack on non-VFIO, therefore, might > as well keep it simple and not special-case it in libvirt side, right? > > Thanks, > Jon V2 patch posted to the list just now. Thanks for the feedback everyone > >> >> Thanks, >> >> -- >> Peter Xu
> On Jun 18, 2024, at 11:14 AM, Daniel P. Berrangé <berrange@redhat.com> wrote: > > !-------------------------------------------------------------------| > CAUTION: External Email > > |-------------------------------------------------------------------! > > On Tue, Jun 18, 2024 at 08:06:06AM -0700, Jon Kohler wrote: >> Add plumbing for QEMU's switchover-ack migration capability, which >> helps lower the downtime during VFIO migrations. This capability is >> enabled by default as long as both the source and destination support >> it. >> >> Note: switchover-ack depends on the return path capability, so this may >> not be used when VIR_MIGRATE_TUNNELLED flag is set. >> >> Extensive details about the qemu switchover-ack implementation are >> available in the qemu series v6 cover letter [1] where the highlight is >> the extreme reduction in guest visible downtime. In addition to the >> original test results below, I saw a roughly ~20% reduction in downtime >> for VFIO VGPU devices at minimum. >> >> === Test results === >> >> The below table shows the downtime of two identical migrations. In the >> first migration swithcover ack is disabled and in the second it is >> enabled. The migrated VM is assigned with a mlx5 VFIO device which has >> 300MB of device data to be migrated. >> >> +----------------------+-----------------------+----------+ >> | Switchover ack | VFIO device data size | Downtime | >> +----------------------+-----------------------+----------+ >> | Disabled | 300MB | 1900ms | >> | Enabled | 300MB | 420ms | >> +----------------------+-----------------------+----------+ >> >> Switchover ack gives a roughly 4.5 times improvement in downtime. >> The 1480ms difference is time that is used for resource allocation for >> the VFIO device in the destination. Without switchover ack, this time is >> spent when the source VM is stopped and thus the downtime is much >> higher. With switchover ack, the time is spent when the source VM is >> still running. >> >> [1] https://urldefense.proofpoint.com/v2/url?u=https-3A__patchwork.kernel.org_project_qemu-2Ddevel_cover_20230621111201.29729-2D1-2Davihaih-40nvidia.com_&d=DwIBaQ&c=s883GpUCOChKOHiocYtGcg&r=NGPRGGo37mQiSXgHKm5rCQ&m=IK96YahScD6JLFH7zWWs9JorFqX8nJEfoxk6A_fdNOKH6x1TW4o3ACyknvLMQN-K&s=LXbohsflI2O3b_WWKMx82xYBwAgUvO4yct1RrzuTOSs&e= >> >> Signed-off-by: Jon Kohler <jon@nutanix.com> >> Cc: Alex Williamson <alex.williamson@redhat.com> >> Cc: Avihai Horon <avihaih@nvidia.com> >> Cc: Markus Armbruster <armbru@redhat.com> >> Cc: Peter Xu <peterx@redhat.com> >> Cc: YangHang Liu <yanghliu@redhat.com> >> --- >> include/libvirt/libvirt-domain.h | 11 +++++++++++ >> src/libvirt-domain.c | 20 ++++++++++++++++++++ >> src/qemu/qemu_migration.h | 1 + >> src/qemu/qemu_migration_params.c | 8 +++++++- >> src/qemu/qemu_migration_params.h | 1 + >> 5 files changed, 40 insertions(+), 1 deletion(-) >> >> diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h >> index 2f5b01bbfe..9543629f30 100644 >> --- a/include/libvirt/libvirt-domain.h >> +++ b/include/libvirt/libvirt-domain.h >> @@ -1100,6 +1100,17 @@ typedef enum { >> * Since: 8.5.0 >> */ >> VIR_MIGRATE_ZEROCOPY = (1 << 20), >> + >> + /* Use switchover ack migration capability to reduce downtime on VFIO >> + * device migration. This prevents the source from stopping the VM and >> + * completing the migration until an ACK is received from the destination >> + * that it's OK to do so. Thus, a VFIO device can make sure that its >> + * initial bytes were sent and loaded in the destination before the >> + * source VM is stopped. >> + * >> + * Since: 10.5.0 >> + */ >> + VIR_MIGRATE_SWITCHOVER_ACK = (1 << 21), >> } virDomainMigrateFlags; > > Do we really need a flag for this ? Is there a credible scenario > in which this flag works, and yet shouldn't be used by libvirt ? > > IOW, can we just "do the right thing" and always enable this, > except for TUNNELLED mode. The flag makes the error handling in src/libvirt-domain.c a bit smoother, but if we dropped that, we could simply depend on the “party” logic in qemuMigrationParamsFlagMap to do the right thing, which as far as I can tell does exactly what you’re talking about. In short, if we are OK with dropping the error handling given by VIR_EXCLUSIVE_FLAGS_GOTO(), then we could simplify this. Happy to go either way with this, let me know what you think about the error handling comments above. Thanks for the quick review - Jon > > > With regards, > Daniel > -- > |: https://urldefense.proofpoint.com/v2/url?u=https-3A__berrange.com&d=DwIBaQ&c=s883GpUCOChKOHiocYtGcg&r=NGPRGGo37mQiSXgHKm5rCQ&m=IK96YahScD6JLFH7zWWs9JorFqX8nJEfoxk6A_fdNOKH6x1TW4o3ACyknvLMQN-K&s=dejjCPlhp3d29jOJm8xIAoRxolMcjVexK07G3qdQt5A&e= -o- https://urldefense.proofpoint.com/v2/url?u=https-3A__www.flickr.com_photos_dberrange&d=DwIBaQ&c=s883GpUCOChKOHiocYtGcg&r=NGPRGGo37mQiSXgHKm5rCQ&m=IK96YahScD6JLFH7zWWs9JorFqX8nJEfoxk6A_fdNOKH6x1TW4o3ACyknvLMQN-K&s=y0K6Ram4tL7Uu74R-yqo-gibJM9XxrdVy5eB8D8DGCQ&e= :| > |: https://urldefense.proofpoint.com/v2/url?u=https-3A__libvirt.org&d=DwIBaQ&c=s883GpUCOChKOHiocYtGcg&r=NGPRGGo37mQiSXgHKm5rCQ&m=IK96YahScD6JLFH7zWWs9JorFqX8nJEfoxk6A_fdNOKH6x1TW4o3ACyknvLMQN-K&s=o2SVP1vo3ALW1s_3cY_vd2DKCHcbK-zMR9Mpz6X5NzY&e= -o- https://urldefense.proofpoint.com/v2/url?u=https-3A__fstop138.berrange.com&d=DwIBaQ&c=s883GpUCOChKOHiocYtGcg&r=NGPRGGo37mQiSXgHKm5rCQ&m=IK96YahScD6JLFH7zWWs9JorFqX8nJEfoxk6A_fdNOKH6x1TW4o3ACyknvLMQN-K&s=Xn9lrZVPIT-j2dxLgZ44mqA2eOqDAZ5tIInN9zNMNT4&e= :| > |: https://urldefense.proofpoint.com/v2/url?u=https-3A__entangle-2Dphoto.org&d=DwIBaQ&c=s883GpUCOChKOHiocYtGcg&r=NGPRGGo37mQiSXgHKm5rCQ&m=IK96YahScD6JLFH7zWWs9JorFqX8nJEfoxk6A_fdNOKH6x1TW4o3ACyknvLMQN-K&s=gR-qzVxTnd6m6kXnUhxi0xNjkEphbpwc6SdNMmhBb_s&e= -o- https://urldefense.proofpoint.com/v2/url?u=https-3A__www.instagram.com_dberrange&d=DwIBaQ&c=s883GpUCOChKOHiocYtGcg&r=NGPRGGo37mQiSXgHKm5rCQ&m=IK96YahScD6JLFH7zWWs9JorFqX8nJEfoxk6A_fdNOKH6x1TW4o3ACyknvLMQN-K&s=SbmO2KPGog2k1fYbN30msRFiwCUndhS_KyizZSGB4mI&e= :|
On Tue, Jun 18, 2024 at 03:26:03PM +0000, Jon Kohler wrote: > > > > On Jun 18, 2024, at 11:14 AM, Daniel P. Berrangé <berrange@redhat.com> wrote: > > > > !-------------------------------------------------------------------| > > CAUTION: External Email > > > > |-------------------------------------------------------------------! > > > > On Tue, Jun 18, 2024 at 08:06:06AM -0700, Jon Kohler wrote: > >> Add plumbing for QEMU's switchover-ack migration capability, which > >> helps lower the downtime during VFIO migrations. This capability is > >> enabled by default as long as both the source and destination support > >> it. > >> > >> Note: switchover-ack depends on the return path capability, so this may > >> not be used when VIR_MIGRATE_TUNNELLED flag is set. > >> > >> Extensive details about the qemu switchover-ack implementation are > >> available in the qemu series v6 cover letter [1] where the highlight is > >> the extreme reduction in guest visible downtime. In addition to the > >> original test results below, I saw a roughly ~20% reduction in downtime > >> for VFIO VGPU devices at minimum. > >> > >> === Test results === > >> > >> The below table shows the downtime of two identical migrations. In the > >> first migration swithcover ack is disabled and in the second it is > >> enabled. The migrated VM is assigned with a mlx5 VFIO device which has > >> 300MB of device data to be migrated. > >> > >> +----------------------+-----------------------+----------+ > >> | Switchover ack | VFIO device data size | Downtime | > >> +----------------------+-----------------------+----------+ > >> | Disabled | 300MB | 1900ms | > >> | Enabled | 300MB | 420ms | > >> +----------------------+-----------------------+----------+ > >> > >> Switchover ack gives a roughly 4.5 times improvement in downtime. > >> The 1480ms difference is time that is used for resource allocation for > >> the VFIO device in the destination. Without switchover ack, this time is > >> spent when the source VM is stopped and thus the downtime is much > >> higher. With switchover ack, the time is spent when the source VM is > >> still running. > >> > >> [1] https://urldefense.proofpoint.com/v2/url?u=https-3A__patchwork.kernel.org_project_qemu-2Ddevel_cover_20230621111201.29729-2D1-2Davihaih-40nvidia.com_&d=DwIBaQ&c=s883GpUCOChKOHiocYtGcg&r=NGPRGGo37mQiSXgHKm5rCQ&m=IK96YahScD6JLFH7zWWs9JorFqX8nJEfoxk6A_fdNOKH6x1TW4o3ACyknvLMQN-K&s=LXbohsflI2O3b_WWKMx82xYBwAgUvO4yct1RrzuTOSs&e= > >> > >> Signed-off-by: Jon Kohler <jon@nutanix.com> > >> Cc: Alex Williamson <alex.williamson@redhat.com> > >> Cc: Avihai Horon <avihaih@nvidia.com> > >> Cc: Markus Armbruster <armbru@redhat.com> > >> Cc: Peter Xu <peterx@redhat.com> > >> Cc: YangHang Liu <yanghliu@redhat.com> > >> --- > >> include/libvirt/libvirt-domain.h | 11 +++++++++++ > >> src/libvirt-domain.c | 20 ++++++++++++++++++++ > >> src/qemu/qemu_migration.h | 1 + > >> src/qemu/qemu_migration_params.c | 8 +++++++- > >> src/qemu/qemu_migration_params.h | 1 + > >> 5 files changed, 40 insertions(+), 1 deletion(-) > >> > >> diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h > >> index 2f5b01bbfe..9543629f30 100644 > >> --- a/include/libvirt/libvirt-domain.h > >> +++ b/include/libvirt/libvirt-domain.h > >> @@ -1100,6 +1100,17 @@ typedef enum { > >> * Since: 8.5.0 > >> */ > >> VIR_MIGRATE_ZEROCOPY = (1 << 20), > >> + > >> + /* Use switchover ack migration capability to reduce downtime on VFIO > >> + * device migration. This prevents the source from stopping the VM and > >> + * completing the migration until an ACK is received from the destination > >> + * that it's OK to do so. Thus, a VFIO device can make sure that its > >> + * initial bytes were sent and loaded in the destination before the > >> + * source VM is stopped. > >> + * > >> + * Since: 10.5.0 > >> + */ > >> + VIR_MIGRATE_SWITCHOVER_ACK = (1 << 21), > >> } virDomainMigrateFlags; > > > > Do we really need a flag for this ? Is there a credible scenario > > in which this flag works, and yet shouldn't be used by libvirt ? > > > > IOW, can we just "do the right thing" and always enable this, > > except for TUNNELLED mode. > > The flag makes the error handling in src/libvirt-domain.c a bit > smoother, but if we dropped that, we could simply depend on > the “party” logic in qemuMigrationParamsFlagMap to do the > right thing, which as far as I can tell does exactly what you’re > talking about. > > In short, if we are OK with dropping the error handling given > by VIR_EXCLUSIVE_FLAGS_GOTO(), then we could > simplify this. If we don't add a VIR_MIGRATE_SWITCHOVER_ACK flag at all, then there are no errors that need to be checked using VIR_EXCLUSIVE_FLAGS_GOTO in the first place. Libvirt should automatically use switchover-ack, if-and-only-if, both src+dst QEMU support switchover-ack AND TUNNELLED flag is NOT requested. IOW, just make it "do the right thing" With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
© 2016 - 2024 Red Hat, Inc.