[PATCH v3] vfio/migration: Send VFIO_MIGRATION event before PRE_COPY_P2P transition

Avihai Horon posted 1 patch 4 days, 9 hours ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20260202173406.13979-1-avihaih@nvidia.com
Maintainers: Alex Williamson <alex@shazbot.org>, "Cédric Le Goater" <clg@redhat.com>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>
qapi/vfio.json      | 13 +++++++++++--
hw/vfio/migration.c | 26 +++++++++++++++++++-------
2 files changed, 30 insertions(+), 9 deletions(-)
[PATCH v3] vfio/migration: Send VFIO_MIGRATION event before PRE_COPY_P2P transition
Posted by Avihai Horon 4 days, 9 hours ago
The VFIO_MIGRATION event notifies users when a VFIO device transitions
to a new state.

One use case for this event is to prevent timeouts for RDMA connections
to the migrated device. In this case, an external management application
(not libvirt) consumes the events and disables the RDMA timeout
mechanism when receiving the event for PRE_COPY_P2P state, which
indicates that the device is non-responsive.

This is essential because RDMA connections typically have very low
timeouts (tens of milliseconds), which can be far below migration
downtime.

However, under heavy resource utilization, the device transition to
PRE_COPY_P2P can take hundreds of milliseconds to complete. Since the
VFIO_MIGRATION event is currently sent only after the transition
completes, it arrives too late, after RDMA connections have already
timed out.

To address this, send an additional "prepare" event immediately before
initiating the PRE_COPY_P2P transition. This guarantees timely event
delivery regardless of how long the actual state transition takes.

Signed-off-by: Avihai Horon <avihaih@nvidia.com>
---
Changes from v2 (https://lore.kernel.org/qemu-devel/20260201122348.28478-1-avihaih@nvidia.com/):
* Renamed prepare-pre-copy-p2p to pre-copy-p2p-prepare
* Renamed prep parameter to prepare in mig_state_to_qapi_state() and
  vfio_migration_send_event()
* Added short explanatory comment before sending the prepare event in
  vfio_migration_set_state()
* Explicitly used VFIO_DEVICE_STATE_PRE_COPY_P2P as parameter for
  vfio_migration_send_event()

Changes from v1 (https://lore.kernel.org/qemu-devel/20260128105159.10282-1-avihaih@nvidia.com/):
* Removed VFIO_MIGRATION_PREPARE event and instead added a new
  PREPARE_PRE_COPY_P2P state which is sent before PRE_COPY_P2P
  transition
* Added details to commit message
---
 qapi/vfio.json      | 13 +++++++++++--
 hw/vfio/migration.c | 26 +++++++++++++++++++-------
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/qapi/vfio.json b/qapi/vfio.json
index a1a9c5b673..17b6046871 100644
--- a/qapi/vfio.json
+++ b/qapi/vfio.json
@@ -11,7 +11,13 @@
 ##
 # @QapiVfioMigrationState:
 #
-# An enumeration of the VFIO device migration states.
+# An enumeration of the VFIO device migration states.  In addition to
+# the regular states, there are prepare states (with 'prepare' suffix)
+# which indicate that the device is just about to transition to the
+# corresponding state.  Note that seeing a prepare state for state X
+# doesn't guarantee that the next state will be X, as the state
+# transition can fail and the device may transition to a different
+# state instead.
 #
 # @stop: The device is stopped.
 #
@@ -32,11 +38,14 @@
 #     tracking its internal state and its internal state is available
 #     for reading.
 #
+# @pre-copy-p2p-prepare: The device is just about to move to
+#     pre-copy-p2p state.  (since 11.0)
+#
 # Since: 9.1
 ##
 { 'enum': 'QapiVfioMigrationState',
   'data': [ 'stop', 'running', 'stop-copy', 'resuming', 'running-p2p',
-            'pre-copy', 'pre-copy-p2p' ] }
+            'pre-copy', 'pre-copy-p2p', 'pre-copy-p2p-prepare' ] }
 
 ##
 # @VFIO_MIGRATION:
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index b4695030c7..4bd8e24699 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -68,7 +68,7 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state)
 }
 
 static QapiVfioMigrationState
-mig_state_to_qapi_state(enum vfio_device_mig_state state)
+mig_state_to_qapi_state(enum vfio_device_mig_state state, bool prepare)
 {
     switch (state) {
     case VFIO_DEVICE_STATE_STOP:
@@ -84,15 +84,17 @@ mig_state_to_qapi_state(enum vfio_device_mig_state state)
     case VFIO_DEVICE_STATE_PRE_COPY:
         return QAPI_VFIO_MIGRATION_STATE_PRE_COPY;
     case VFIO_DEVICE_STATE_PRE_COPY_P2P:
-        return QAPI_VFIO_MIGRATION_STATE_PRE_COPY_P2P;
+        return prepare ? QAPI_VFIO_MIGRATION_STATE_PRE_COPY_P2P_PREPARE :
+                         QAPI_VFIO_MIGRATION_STATE_PRE_COPY_P2P;
     default:
         g_assert_not_reached();
     }
 }
 
-static void vfio_migration_send_event(VFIODevice *vbasedev)
+static void vfio_migration_send_event(VFIODevice *vbasedev,
+                                      enum vfio_device_mig_state state,
+                                      bool prepare)
 {
-    VFIOMigration *migration = vbasedev->migration;
     DeviceState *dev = vbasedev->dev;
     g_autofree char *qom_path = NULL;
     Object *obj;
@@ -106,8 +108,8 @@ static void vfio_migration_send_event(VFIODevice *vbasedev)
     g_assert(obj);
     qom_path = object_get_canonical_path(obj);
 
-    qapi_event_send_vfio_migration(
-        dev->id, qom_path, mig_state_to_qapi_state(migration->device_state));
+    qapi_event_send_vfio_migration(dev->id, qom_path,
+                                   mig_state_to_qapi_state(state, prepare));
 }
 
 static void vfio_migration_set_device_state(VFIODevice *vbasedev,
@@ -119,7 +121,7 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
                                           mig_state_to_str(state));
 
     migration->device_state = state;
-    vfio_migration_send_event(vbasedev);
+    vfio_migration_send_event(vbasedev, state, false);
 }
 
 int vfio_migration_set_state(VFIODevice *vbasedev,
@@ -146,6 +148,16 @@ int vfio_migration_set_state(VFIODevice *vbasedev,
         return 0;
     }
 
+    /*
+     * Send a prepare event before initiating the PRE_COPY_P2P transition to
+     * ensure timely event delivery regardless of how long the state transition
+     * takes.
+     */
+    if (new_state == VFIO_DEVICE_STATE_PRE_COPY_P2P) {
+        vfio_migration_send_event(vbasedev, VFIO_DEVICE_STATE_PRE_COPY_P2P,
+                                  true);
+    }
+
     feature->argsz = sizeof(buf);
     feature->flags =
         VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE;
-- 
2.40.1
Re: [PATCH v3] vfio/migration: Send VFIO_MIGRATION event before PRE_COPY_P2P transition
Posted by Markus Armbruster 2 days, 14 hours ago
Avihai Horon <avihaih@nvidia.com> writes:

> The VFIO_MIGRATION event notifies users when a VFIO device transitions
> to a new state.
>
> One use case for this event is to prevent timeouts for RDMA connections
> to the migrated device. In this case, an external management application
> (not libvirt) consumes the events and disables the RDMA timeout
> mechanism when receiving the event for PRE_COPY_P2P state, which
> indicates that the device is non-responsive.
>
> This is essential because RDMA connections typically have very low
> timeouts (tens of milliseconds), which can be far below migration
> downtime.
>
> However, under heavy resource utilization, the device transition to
> PRE_COPY_P2P can take hundreds of milliseconds to complete. Since the
> VFIO_MIGRATION event is currently sent only after the transition
> completes, it arrives too late, after RDMA connections have already
> timed out.
>
> To address this, send an additional "prepare" event immediately before
> initiating the PRE_COPY_P2P transition. This guarantees timely event
> delivery regardless of how long the actual state transition takes.
>
> Signed-off-by: Avihai Horon <avihaih@nvidia.com>

[...]

> diff --git a/qapi/vfio.json b/qapi/vfio.json
> index a1a9c5b673..17b6046871 100644
> --- a/qapi/vfio.json
> +++ b/qapi/vfio.json
> @@ -11,7 +11,13 @@
>  ##
>  # @QapiVfioMigrationState:
>  #
> -# An enumeration of the VFIO device migration states.
> +# An enumeration of the VFIO device migration states.  In addition to
> +# the regular states, there are prepare states (with 'prepare' suffix)
> +# which indicate that the device is just about to transition to the
> +# corresponding state.  Note that seeing a prepare state for state X
> +# doesn't guarantee that the next state will be X, as the state
> +# transition can fail and the device may transition to a different
> +# state instead.
>  #
>  # @stop: The device is stopped.
>  #
> @@ -32,11 +38,14 @@
>  #     tracking its internal state and its internal state is available
>  #     for reading.
>  #
> +# @pre-copy-p2p-prepare: The device is just about to move to
> +#     pre-copy-p2p state.  (since 11.0)
> +#
>  # Since: 9.1
>  ##
>  { 'enum': 'QapiVfioMigrationState',
>    'data': [ 'stop', 'running', 'stop-copy', 'resuming', 'running-p2p',
> -            'pre-copy', 'pre-copy-p2p' ] }
> +            'pre-copy', 'pre-copy-p2p', 'pre-copy-p2p-prepare' ] }
>  
>  ##
>  # @VFIO_MIGRATION:

Acked-by: Markus Armbruster <armbru@redhat.com>

[...]
Re: [PATCH v3] vfio/migration: Send VFIO_MIGRATION event before PRE_COPY_P2P transition
Posted by Markus Armbruster 2 days, 14 hours ago
Markus Armbruster <armbru@redhat.com> writes:

> Avihai Horon <avihaih@nvidia.com> writes:
>
>> The VFIO_MIGRATION event notifies users when a VFIO device transitions
>> to a new state.
>>
>> One use case for this event is to prevent timeouts for RDMA connections
>> to the migrated device. In this case, an external management application
>> (not libvirt) consumes the events and disables the RDMA timeout
>> mechanism when receiving the event for PRE_COPY_P2P state, which
>> indicates that the device is non-responsive.
>>
>> This is essential because RDMA connections typically have very low
>> timeouts (tens of milliseconds), which can be far below migration
>> downtime.
>>
>> However, under heavy resource utilization, the device transition to
>> PRE_COPY_P2P can take hundreds of milliseconds to complete. Since the
>> VFIO_MIGRATION event is currently sent only after the transition
>> completes, it arrives too late, after RDMA connections have already
>> timed out.
>>
>> To address this, send an additional "prepare" event immediately before
>> initiating the PRE_COPY_P2P transition. This guarantees timely event
>> delivery regardless of how long the actual state transition takes.
>>
>> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
>
> [...]
>
>> diff --git a/qapi/vfio.json b/qapi/vfio.json
>> index a1a9c5b673..17b6046871 100644
>> --- a/qapi/vfio.json
>> +++ b/qapi/vfio.json
>> @@ -11,7 +11,13 @@
>>  ##
>>  # @QapiVfioMigrationState:
>>  #
>> -# An enumeration of the VFIO device migration states.
>> +# An enumeration of the VFIO device migration states.  In addition to
>> +# the regular states, there are prepare states (with 'prepare' suffix)
>> +# which indicate that the device is just about to transition to the
>> +# corresponding state.  Note that seeing a prepare state for state X
>> +# doesn't guarantee that the next state will be X, as the state
>> +# transition can fail and the device may transition to a different
>> +# state instead.
>>  #
>>  # @stop: The device is stopped.
>>  #
>> @@ -32,11 +38,14 @@
>>  #     tracking its internal state and its internal state is available
>>  #     for reading.
>>  #
>> +# @pre-copy-p2p-prepare: The device is just about to move to
>> +#     pre-copy-p2p state.  (since 11.0)
>> +#
>>  # Since: 9.1
>>  ##
>>  { 'enum': 'QapiVfioMigrationState',
>>    'data': [ 'stop', 'running', 'stop-copy', 'resuming', 'running-p2p',
>> -            'pre-copy', 'pre-copy-p2p' ] }
>> +            'pre-copy', 'pre-copy-p2p', 'pre-copy-p2p-prepare' ] }
>>  
>>  ##
>>  # @VFIO_MIGRATION:
>
> Acked-by: Markus Armbruster <armbru@redhat.com>

Except for the subject line: "vfio/migration: Send VFIO_MIGRATION event
before PRE_COPY_P2P transition" become misleading in v2.

>
> [...]
Re: [PATCH v3] vfio/migration: Send VFIO_MIGRATION event before PRE_COPY_P2P transition
Posted by Avihai Horon 2 days, 13 hours ago
On 2/4/2026 3:24 PM, Markus Armbruster wrote:
> External email: Use caution opening links or attachments
>
>
> Markus Armbruster <armbru@redhat.com> writes:
>
>> Avihai Horon <avihaih@nvidia.com> writes:
>>
>>> The VFIO_MIGRATION event notifies users when a VFIO device transitions
>>> to a new state.
>>>
>>> One use case for this event is to prevent timeouts for RDMA connections
>>> to the migrated device. In this case, an external management application
>>> (not libvirt) consumes the events and disables the RDMA timeout
>>> mechanism when receiving the event for PRE_COPY_P2P state, which
>>> indicates that the device is non-responsive.
>>>
>>> This is essential because RDMA connections typically have very low
>>> timeouts (tens of milliseconds), which can be far below migration
>>> downtime.
>>>
>>> However, under heavy resource utilization, the device transition to
>>> PRE_COPY_P2P can take hundreds of milliseconds to complete. Since the
>>> VFIO_MIGRATION event is currently sent only after the transition
>>> completes, it arrives too late, after RDMA connections have already
>>> timed out.
>>>
>>> To address this, send an additional "prepare" event immediately before
>>> initiating the PRE_COPY_P2P transition. This guarantees timely event
>>> delivery regardless of how long the actual state transition takes.
>>>
>>> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
>> [...]
>>
>>> diff --git a/qapi/vfio.json b/qapi/vfio.json
>>> index a1a9c5b673..17b6046871 100644
>>> --- a/qapi/vfio.json
>>> +++ b/qapi/vfio.json
>>> @@ -11,7 +11,13 @@
>>>   ##
>>>   # @QapiVfioMigrationState:
>>>   #
>>> -# An enumeration of the VFIO device migration states.
>>> +# An enumeration of the VFIO device migration states.  In addition to
>>> +# the regular states, there are prepare states (with 'prepare' suffix)
>>> +# which indicate that the device is just about to transition to the
>>> +# corresponding state.  Note that seeing a prepare state for state X
>>> +# doesn't guarantee that the next state will be X, as the state
>>> +# transition can fail and the device may transition to a different
>>> +# state instead.
>>>   #
>>>   # @stop: The device is stopped.
>>>   #
>>> @@ -32,11 +38,14 @@
>>>   #     tracking its internal state and its internal state is available
>>>   #     for reading.
>>>   #
>>> +# @pre-copy-p2p-prepare: The device is just about to move to
>>> +#     pre-copy-p2p state.  (since 11.0)
>>> +#
>>>   # Since: 9.1
>>>   ##
>>>   { 'enum': 'QapiVfioMigrationState',
>>>     'data': [ 'stop', 'running', 'stop-copy', 'resuming', 'running-p2p',
>>> -            'pre-copy', 'pre-copy-p2p' ] }
>>> +            'pre-copy', 'pre-copy-p2p', 'pre-copy-p2p-prepare' ] }
>>>
>>>   ##
>>>   # @VFIO_MIGRATION:
>> Acked-by: Markus Armbruster <armbru@redhat.com>
> Except for the subject line: "vfio/migration: Send VFIO_MIGRATION event
> before PRE_COPY_P2P transition" become misleading in v2.

Can you explain why misleading?

Prior to this patch VFIO_MIGRATION event was sent only after 
PRE_COPY_P2P transition.
Now with this patch VFIO_MIGRATION event is sent also before 
PRE_COPY_P2P transition.

Thanks.
Re: [PATCH v3] vfio/migration: Send VFIO_MIGRATION event before PRE_COPY_P2P transition
Posted by Markus Armbruster 2 days, 13 hours ago
Avihai Horon <avihaih@nvidia.com> writes:

> On 2/4/2026 3:24 PM, Markus Armbruster wrote:

[...]

>> Except for the subject line: "vfio/migration: Send VFIO_MIGRATION event
>> before PRE_COPY_P2P transition" become misleading in v2.
>
> Can you explain why misleading?
>
> Prior to this patch VFIO_MIGRATION event was sent only after PRE_COPY_P2P transition.
> Now with this patch VFIO_MIGRATION event is sent also before PRE_COPY_P2P transition.

Nevermind, I got confused :)
Re: [PATCH v3] vfio/migration: Send VFIO_MIGRATION event before PRE_COPY_P2P transition
Posted by Cédric Le Goater 3 days, 10 hours ago
On 2/2/26 18:34, Avihai Horon wrote:
> The VFIO_MIGRATION event notifies users when a VFIO device transitions
> to a new state.
> 
> One use case for this event is to prevent timeouts for RDMA connections
> to the migrated device. In this case, an external management application
> (not libvirt) consumes the events and disables the RDMA timeout
> mechanism when receiving the event for PRE_COPY_P2P state, which
> indicates that the device is non-responsive.
> 
> This is essential because RDMA connections typically have very low
> timeouts (tens of milliseconds), which can be far below migration
> downtime.
> 
> However, under heavy resource utilization, the device transition to
> PRE_COPY_P2P can take hundreds of milliseconds to complete. Since the
> VFIO_MIGRATION event is currently sent only after the transition
> completes, it arrives too late, after RDMA connections have already
> timed out.
> 
> To address this, send an additional "prepare" event immediately before
> initiating the PRE_COPY_P2P transition. This guarantees timely event
> delivery regardless of how long the actual state transition takes.
> 
> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
> ---
> Changes from v2 (https://lore.kernel.org/qemu-devel/20260201122348.28478-1-avihaih@nvidia.com/):
> * Renamed prepare-pre-copy-p2p to pre-copy-p2p-prepare
> * Renamed prep parameter to prepare in mig_state_to_qapi_state() and
>    vfio_migration_send_event()
> * Added short explanatory comment before sending the prepare event in
>    vfio_migration_set_state()
> * Explicitly used VFIO_DEVICE_STATE_PRE_COPY_P2P as parameter for
>    vfio_migration_send_event()
> 
> Changes from v1 (https://lore.kernel.org/qemu-devel/20260128105159.10282-1-avihaih@nvidia.com/):
> * Removed VFIO_MIGRATION_PREPARE event and instead added a new
>    PREPARE_PRE_COPY_P2P state which is sent before PRE_COPY_P2P
>    transition
> * Added details to commit message
> ---
>   qapi/vfio.json      | 13 +++++++++++--
>   hw/vfio/migration.c | 26 +++++++++++++++++++-------
>   2 files changed, 30 insertions(+), 9 deletions(-)
> 
> diff --git a/qapi/vfio.json b/qapi/vfio.json
> index a1a9c5b673..17b6046871 100644
> --- a/qapi/vfio.json
> +++ b/qapi/vfio.json
> @@ -11,7 +11,13 @@
>   ##
>   # @QapiVfioMigrationState:


(I had forgotten about the vfio-pci "migration-events" property)


Peter, Fabiano,

Do you think it would be interesting to send VFIO migration events
by default ?

Thanks,

C.


>   #
> -# An enumeration of the VFIO device migration states.
> +# An enumeration of the VFIO device migration states.  In addition to
> +# the regular states, there are prepare states (with 'prepare' suffix)
> +# which indicate that the device is just about to transition to the
> +# corresponding state.  Note that seeing a prepare state for state X
> +# doesn't guarantee that the next state will be X, as the state
> +# transition can fail and the device may transition to a different
> +# state instead.
>   #
>   # @stop: The device is stopped.
>   #
> @@ -32,11 +38,14 @@
>   #     tracking its internal state and its internal state is available
>   #     for reading.
>   #
> +# @pre-copy-p2p-prepare: The device is just about to move to
> +#     pre-copy-p2p state.  (since 11.0)
> +#
>   # Since: 9.1
>   ##
>   { 'enum': 'QapiVfioMigrationState',
>     'data': [ 'stop', 'running', 'stop-copy', 'resuming', 'running-p2p',
> -            'pre-copy', 'pre-copy-p2p' ] }
> +            'pre-copy', 'pre-copy-p2p', 'pre-copy-p2p-prepare' ] }
>   
>   ##
>   # @VFIO_MIGRATION:
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index b4695030c7..4bd8e24699 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -68,7 +68,7 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state)
>   }
>   
>   static QapiVfioMigrationState
> -mig_state_to_qapi_state(enum vfio_device_mig_state state)
> +mig_state_to_qapi_state(enum vfio_device_mig_state state, bool prepare)
>   {
>       switch (state) {
>       case VFIO_DEVICE_STATE_STOP:
> @@ -84,15 +84,17 @@ mig_state_to_qapi_state(enum vfio_device_mig_state state)
>       case VFIO_DEVICE_STATE_PRE_COPY:
>           return QAPI_VFIO_MIGRATION_STATE_PRE_COPY;
>       case VFIO_DEVICE_STATE_PRE_COPY_P2P:
> -        return QAPI_VFIO_MIGRATION_STATE_PRE_COPY_P2P;
> +        return prepare ? QAPI_VFIO_MIGRATION_STATE_PRE_COPY_P2P_PREPARE :
> +                         QAPI_VFIO_MIGRATION_STATE_PRE_COPY_P2P;
>       default:
>           g_assert_not_reached();
>       }
>   }
>   
> -static void vfio_migration_send_event(VFIODevice *vbasedev)
> +static void vfio_migration_send_event(VFIODevice *vbasedev,
> +                                      enum vfio_device_mig_state state,
> +                                      bool prepare)
>   {
> -    VFIOMigration *migration = vbasedev->migration;
>       DeviceState *dev = vbasedev->dev;
>       g_autofree char *qom_path = NULL;
>       Object *obj;
> @@ -106,8 +108,8 @@ static void vfio_migration_send_event(VFIODevice *vbasedev)
>       g_assert(obj);
>       qom_path = object_get_canonical_path(obj);
>   
> -    qapi_event_send_vfio_migration(
> -        dev->id, qom_path, mig_state_to_qapi_state(migration->device_state));
> +    qapi_event_send_vfio_migration(dev->id, qom_path,
> +                                   mig_state_to_qapi_state(state, prepare));
>   }
>   
>   static void vfio_migration_set_device_state(VFIODevice *vbasedev,
> @@ -119,7 +121,7 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
>                                             mig_state_to_str(state));
>   
>       migration->device_state = state;
> -    vfio_migration_send_event(vbasedev);
> +    vfio_migration_send_event(vbasedev, state, false);
>   }
>   
>   int vfio_migration_set_state(VFIODevice *vbasedev,
> @@ -146,6 +148,16 @@ int vfio_migration_set_state(VFIODevice *vbasedev,
>           return 0;
>       }
>   
> +    /*
> +     * Send a prepare event before initiating the PRE_COPY_P2P transition to
> +     * ensure timely event delivery regardless of how long the state transition
> +     * takes.
> +     */
> +    if (new_state == VFIO_DEVICE_STATE_PRE_COPY_P2P) {
> +        vfio_migration_send_event(vbasedev, VFIO_DEVICE_STATE_PRE_COPY_P2P,
> +                                  true);
> +    }
> +
>       feature->argsz = sizeof(buf);
>       feature->flags =
>           VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE;
Re: [PATCH v3] vfio/migration: Send VFIO_MIGRATION event before PRE_COPY_P2P transition
Posted by Peter Xu 3 days, 10 hours ago
On Tue, Feb 03, 2026 at 05:48:11PM +0100, Cédric Le Goater wrote:
> Peter, Fabiano,
> 
> Do you think it would be interesting to send VFIO migration events
> by default ?

No objection here.

IIUC it's a matter of why it got introduced with default off in the middle
of 2024?  If it's about compatibility of any old mgmt which may be
surprised by these events, then we want to know if they're ready, and then
if we need a compat field for it in older machine types (or enable it even
with old machines).

Thanks,

-- 
Peter Xu


Re: [PATCH v3] vfio/migration: Send VFIO_MIGRATION event before PRE_COPY_P2P transition
Posted by Avihai Horon 3 days, 10 hours ago
On 2/3/2026 7:02 PM, Peter Xu wrote:
> External email: Use caution opening links or attachments
>
>
> On Tue, Feb 03, 2026 at 05:48:11PM +0100, Cédric Le Goater wrote:
>> Peter, Fabiano,
>>
>> Do you think it would be interesting to send VFIO migration events
>> by default ?
> No objection here.
>
> IIUC it's a matter of why it got introduced with default off in the middle
> of 2024?  If it's about compatibility of any old mgmt which may be
> surprised by these events, then we want to know if they're ready, and then
> if we need a compat field for it in older machine types (or enable it even
> with old machines).

I disabled it by default back then because it was only needed for the 
use case I mentioned here (specifically, it wasn't needed by libvirt).

I guess you can either leave it as is or enable it by default, it won't 
matter.

Thanks.


Re: [PATCH v3] vfio/migration: Send VFIO_MIGRATION event before PRE_COPY_P2P transition
Posted by Cédric Le Goater 3 days, 11 hours ago
On 2/2/26 18:34, Avihai Horon wrote:
> The VFIO_MIGRATION event notifies users when a VFIO device transitions
> to a new state.
> 
> One use case for this event is to prevent timeouts for RDMA connections
> to the migrated device. In this case, an external management application
> (not libvirt) consumes the events and disables the RDMA timeout
> mechanism when receiving the event for PRE_COPY_P2P state, which
> indicates that the device is non-responsive.
> 
> This is essential because RDMA connections typically have very low
> timeouts (tens of milliseconds), which can be far below migration
> downtime.
> 
> However, under heavy resource utilization, the device transition to
> PRE_COPY_P2P can take hundreds of milliseconds to complete. Since the
> VFIO_MIGRATION event is currently sent only after the transition
> completes, it arrives too late, after RDMA connections have already
> timed out.
> 
> To address this, send an additional "prepare" event immediately before
> initiating the PRE_COPY_P2P transition. This guarantees timely event
> delivery regardless of how long the actual state transition takes.
> 
> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
> ---
> Changes from v2 (https://lore.kernel.org/qemu-devel/20260201122348.28478-1-avihaih@nvidia.com/):
> * Renamed prepare-pre-copy-p2p to pre-copy-p2p-prepare
> * Renamed prep parameter to prepare in mig_state_to_qapi_state() and
>    vfio_migration_send_event()
> * Added short explanatory comment before sending the prepare event in
>    vfio_migration_set_state()
> * Explicitly used VFIO_DEVICE_STATE_PRE_COPY_P2P as parameter for
>    vfio_migration_send_event()
> 
> Changes from v1 (https://lore.kernel.org/qemu-devel/20260128105159.10282-1-avihaih@nvidia.com/):
> * Removed VFIO_MIGRATION_PREPARE event and instead added a new
>    PREPARE_PRE_COPY_P2P state which is sent before PRE_COPY_P2P
>    transition
> * Added details to commit message
> ---
>   qapi/vfio.json      | 13 +++++++++++--
>   hw/vfio/migration.c | 26 +++++++++++++++++++-------
>   2 files changed, 30 insertions(+), 9 deletions(-)

Reviewed-by: Cédric Le Goater <clg@redhat.com>

Thanks,

C.

> diff --git a/qapi/vfio.json b/qapi/vfio.json
> index a1a9c5b673..17b6046871 100644
> --- a/qapi/vfio.json
> +++ b/qapi/vfio.json
> @@ -11,7 +11,13 @@
>   ##
>   # @QapiVfioMigrationState:
>   #
> -# An enumeration of the VFIO device migration states.
> +# An enumeration of the VFIO device migration states.  In addition to
> +# the regular states, there are prepare states (with 'prepare' suffix)
> +# which indicate that the device is just about to transition to the
> +# corresponding state.  Note that seeing a prepare state for state X
> +# doesn't guarantee that the next state will be X, as the state
> +# transition can fail and the device may transition to a different
> +# state instead.
>   #
>   # @stop: The device is stopped.
>   #
> @@ -32,11 +38,14 @@
>   #     tracking its internal state and its internal state is available
>   #     for reading.
>   #
> +# @pre-copy-p2p-prepare: The device is just about to move to
> +#     pre-copy-p2p state.  (since 11.0)
> +#
>   # Since: 9.1
>   ##
>   { 'enum': 'QapiVfioMigrationState',
>     'data': [ 'stop', 'running', 'stop-copy', 'resuming', 'running-p2p',
> -            'pre-copy', 'pre-copy-p2p' ] }
> +            'pre-copy', 'pre-copy-p2p', 'pre-copy-p2p-prepare' ] }
>   
>   ##
>   # @VFIO_MIGRATION:
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index b4695030c7..4bd8e24699 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -68,7 +68,7 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state)
>   }
>   
>   static QapiVfioMigrationState
> -mig_state_to_qapi_state(enum vfio_device_mig_state state)
> +mig_state_to_qapi_state(enum vfio_device_mig_state state, bool prepare)
>   {
>       switch (state) {
>       case VFIO_DEVICE_STATE_STOP:
> @@ -84,15 +84,17 @@ mig_state_to_qapi_state(enum vfio_device_mig_state state)
>       case VFIO_DEVICE_STATE_PRE_COPY:
>           return QAPI_VFIO_MIGRATION_STATE_PRE_COPY;
>       case VFIO_DEVICE_STATE_PRE_COPY_P2P:
> -        return QAPI_VFIO_MIGRATION_STATE_PRE_COPY_P2P;
> +        return prepare ? QAPI_VFIO_MIGRATION_STATE_PRE_COPY_P2P_PREPARE :
> +                         QAPI_VFIO_MIGRATION_STATE_PRE_COPY_P2P;
>       default:
>           g_assert_not_reached();
>       }
>   }
>   
> -static void vfio_migration_send_event(VFIODevice *vbasedev)
> +static void vfio_migration_send_event(VFIODevice *vbasedev,
> +                                      enum vfio_device_mig_state state,
> +                                      bool prepare)
>   {
> -    VFIOMigration *migration = vbasedev->migration;
>       DeviceState *dev = vbasedev->dev;
>       g_autofree char *qom_path = NULL;
>       Object *obj;
> @@ -106,8 +108,8 @@ static void vfio_migration_send_event(VFIODevice *vbasedev)
>       g_assert(obj);
>       qom_path = object_get_canonical_path(obj);
>   
> -    qapi_event_send_vfio_migration(
> -        dev->id, qom_path, mig_state_to_qapi_state(migration->device_state));
> +    qapi_event_send_vfio_migration(dev->id, qom_path,
> +                                   mig_state_to_qapi_state(state, prepare));
>   }
>   
>   static void vfio_migration_set_device_state(VFIODevice *vbasedev,
> @@ -119,7 +121,7 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
>                                             mig_state_to_str(state));
>   
>       migration->device_state = state;
> -    vfio_migration_send_event(vbasedev);
> +    vfio_migration_send_event(vbasedev, state, false);
>   }
>   
>   int vfio_migration_set_state(VFIODevice *vbasedev,
> @@ -146,6 +148,16 @@ int vfio_migration_set_state(VFIODevice *vbasedev,
>           return 0;
>       }
>   
> +    /*
> +     * Send a prepare event before initiating the PRE_COPY_P2P transition to
> +     * ensure timely event delivery regardless of how long the state transition
> +     * takes.
> +     */
> +    if (new_state == VFIO_DEVICE_STATE_PRE_COPY_P2P) {
> +        vfio_migration_send_event(vbasedev, VFIO_DEVICE_STATE_PRE_COPY_P2P,
> +                                  true);
> +    }
> +
>       feature->argsz = sizeof(buf);
>       feature->flags =
>           VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE;