[PATCH v2 01/17] vfio/migration: Add save_{iterate, complete_precopy}_started trace events

Maciej S. Szmigiero posted 17 patches 2 months, 4 weeks ago
There is a newer version of this series
[PATCH v2 01/17] vfio/migration: Add save_{iterate, complete_precopy}_started trace events
Posted by Maciej S. Szmigiero 2 months, 4 weeks ago
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>

This way both the start and end points of migrating a particular VFIO
device are known.

Add also a vfio_save_iterate_empty_hit trace event so it is known when
there's no more data to send for that device.

Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
---
 hw/vfio/migration.c           | 13 +++++++++++++
 hw/vfio/trace-events          |  3 +++
 include/hw/vfio/vfio-common.h |  3 +++
 3 files changed, 19 insertions(+)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 262d42a46e58..24679d8c5034 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -472,6 +472,9 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
         return -ENOMEM;
     }
 
+    migration->save_iterate_run = false;
+    migration->save_iterate_empty_hit = false;
+
     if (vfio_precopy_supported(vbasedev)) {
         switch (migration->device_state) {
         case VFIO_DEVICE_STATE_RUNNING:
@@ -605,9 +608,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
     VFIOMigration *migration = vbasedev->migration;
     ssize_t data_size;
 
+    if (!migration->save_iterate_run) {
+        trace_vfio_save_iterate_started(vbasedev->name);
+        migration->save_iterate_run = true;
+    }
+
     data_size = vfio_save_block(f, migration);
     if (data_size < 0) {
         return data_size;
+    } else if (data_size == 0 && !migration->save_iterate_empty_hit) {
+        trace_vfio_save_iterate_empty_hit(vbasedev->name);
+        migration->save_iterate_empty_hit = true;
     }
 
     vfio_update_estimated_pending_data(migration, data_size);
@@ -633,6 +644,8 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
     int ret;
     Error *local_err = NULL;
 
+    trace_vfio_save_complete_precopy_started(vbasedev->name);
+
     /* We reach here with device state STOP or STOP_COPY only */
     ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
                                    VFIO_DEVICE_STATE_STOP, &local_err);
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 98bd4dcceadc..013c602f30fa 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -159,8 +159,11 @@ vfio_migration_state_notifier(const char *name, int state) " (%s) state %d"
 vfio_save_block(const char *name, int data_size) " (%s) data_size %d"
 vfio_save_cleanup(const char *name) " (%s)"
 vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
+vfio_save_complete_precopy_started(const char *name) " (%s)"
 vfio_save_device_config_state(const char *name) " (%s)"
 vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
+vfio_save_iterate_started(const char *name) " (%s)"
+vfio_save_iterate_empty_hit(const char *name) " (%s)"
 vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64
 vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
 vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index fed499b199f0..32d58e3e025b 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -73,6 +73,9 @@ typedef struct VFIOMigration {
     uint64_t precopy_init_size;
     uint64_t precopy_dirty_size;
     bool initial_data_sent;
+
+    bool save_iterate_run;
+    bool save_iterate_empty_hit;
 } VFIOMigration;
 
 struct VFIOGroup;
Re: [PATCH v2 01/17] vfio/migration: Add save_{iterate,complete_precopy}_started trace events
Posted by Avihai Horon 2 months, 2 weeks ago
Hi Maciej,

On 27/08/2024 20:54, Maciej S. Szmigiero wrote:
> External email: Use caution opening links or attachments
>
>
> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>
> This way both the start and end points of migrating a particular VFIO
> device are known.
>
> Add also a vfio_save_iterate_empty_hit trace event so it is known when
> there's no more data to send for that device.

Out of curiosity, what are these traces used for?

>
> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> ---
>   hw/vfio/migration.c           | 13 +++++++++++++
>   hw/vfio/trace-events          |  3 +++
>   include/hw/vfio/vfio-common.h |  3 +++
>   3 files changed, 19 insertions(+)
>
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 262d42a46e58..24679d8c5034 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -472,6 +472,9 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
>           return -ENOMEM;
>       }
>
> +    migration->save_iterate_run = false;
> +    migration->save_iterate_empty_hit = false;
> +
>       if (vfio_precopy_supported(vbasedev)) {
>           switch (migration->device_state) {
>           case VFIO_DEVICE_STATE_RUNNING:
> @@ -605,9 +608,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
>       VFIOMigration *migration = vbasedev->migration;
>       ssize_t data_size;
>
> +    if (!migration->save_iterate_run) {
> +        trace_vfio_save_iterate_started(vbasedev->name);
> +        migration->save_iterate_run = true;

Maybe rename save_iterate_run to save_iterate_started so it's aligned 
with trace_vfio_save_iterate_started and 
trace_vfio_save_complete_precopy_started?

> +    }
> +
>       data_size = vfio_save_block(f, migration);
>       if (data_size < 0) {
>           return data_size;
> +    } else if (data_size == 0 && !migration->save_iterate_empty_hit) {
> +        trace_vfio_save_iterate_empty_hit(vbasedev->name);
> +        migration->save_iterate_empty_hit = true;

During precopy we could hit empty multiple times. Any reason why only 
the first time should be traced?

>       }
>
>       vfio_update_estimated_pending_data(migration, data_size);
> @@ -633,6 +644,8 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
>       int ret;
>       Error *local_err = NULL;
>
> +    trace_vfio_save_complete_precopy_started(vbasedev->name);
> +
>       /* We reach here with device state STOP or STOP_COPY only */
>       ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
>                                      VFIO_DEVICE_STATE_STOP, &local_err);
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 98bd4dcceadc..013c602f30fa 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -159,8 +159,11 @@ vfio_migration_state_notifier(const char *name, int state) " (%s) state %d"
>   vfio_save_block(const char *name, int data_size) " (%s) data_size %d"
>   vfio_save_cleanup(const char *name) " (%s)"
>   vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
> +vfio_save_complete_precopy_started(const char *name) " (%s)"
>   vfio_save_device_config_state(const char *name) " (%s)"
>   vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
> +vfio_save_iterate_started(const char *name) " (%s)"
> +vfio_save_iterate_empty_hit(const char *name) " (%s)"

Let's keep it sorted in alphabetical order.

Thanks.

>   vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64
>   vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
>   vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index fed499b199f0..32d58e3e025b 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -73,6 +73,9 @@ typedef struct VFIOMigration {
>       uint64_t precopy_init_size;
>       uint64_t precopy_dirty_size;
>       bool initial_data_sent;
> +
> +    bool save_iterate_run;
> +    bool save_iterate_empty_hit;
>   } VFIOMigration;
>
>   struct VFIOGroup;
Re: [PATCH v2 01/17] vfio/migration: Add save_{iterate,complete_precopy}_started trace events
Posted by Maciej S. Szmigiero 2 months, 2 weeks ago
On 5.09.2024 15:08, Avihai Horon wrote:
> Hi Maciej,
> 
> On 27/08/2024 20:54, Maciej S. Szmigiero wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>>
>> This way both the start and end points of migrating a particular VFIO
>> device are known.
>>
>> Add also a vfio_save_iterate_empty_hit trace event so it is known when
>> there's no more data to send for that device.
> 
> Out of curiosity, what are these traces used for?

Just for benchmarking, collecting these data makes it easier to
reason where possible bottlenecks may be.

>>
>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
>> ---
>>   hw/vfio/migration.c           | 13 +++++++++++++
>>   hw/vfio/trace-events          |  3 +++
>>   include/hw/vfio/vfio-common.h |  3 +++
>>   3 files changed, 19 insertions(+)
>>
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index 262d42a46e58..24679d8c5034 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -472,6 +472,9 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
>>           return -ENOMEM;
>>       }
>>
>> +    migration->save_iterate_run = false;
>> +    migration->save_iterate_empty_hit = false;
>> +
>>       if (vfio_precopy_supported(vbasedev)) {
>>           switch (migration->device_state) {
>>           case VFIO_DEVICE_STATE_RUNNING:
>> @@ -605,9 +608,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
>>       VFIOMigration *migration = vbasedev->migration;
>>       ssize_t data_size;
>>
>> +    if (!migration->save_iterate_run) {
>> +        trace_vfio_save_iterate_started(vbasedev->name);
>> +        migration->save_iterate_run = true;
> 
> Maybe rename save_iterate_run to save_iterate_started so it's aligned with trace_vfio_save_iterate_started and trace_vfio_save_complete_precopy_started?

Will do.

>> +    }
>> +
>>       data_size = vfio_save_block(f, migration);
>>       if (data_size < 0) {
>>           return data_size;
>> +    } else if (data_size == 0 && !migration->save_iterate_empty_hit) {
>> +        trace_vfio_save_iterate_empty_hit(vbasedev->name);
>> +        migration->save_iterate_empty_hit = true;
> 
> During precopy we could hit empty multiple times. Any reason why only the first time should be traced?

This trace point is supposed to indicate whether the device state
transfer during the time the VM was still running likely has
exhausted the amount of data that can be transferred during
that phase.

In other words, the stopped-time device state transfer likely
only had to transfer the data which the device does not support
transferring during the live VM phase (with just a small possible
residual accrued since that trace point was hit).

If that trace point was hit then delaying the switch over point
further likely wouldn't help the device transfer less data during
the downtime.

>>       }
>>
>>       vfio_update_estimated_pending_data(migration, data_size);
>> @@ -633,6 +644,8 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
>>       int ret;
>>       Error *local_err = NULL;
>>
>> +    trace_vfio_save_complete_precopy_started(vbasedev->name);
>> +
>>       /* We reach here with device state STOP or STOP_COPY only */
>>       ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
>>                                      VFIO_DEVICE_STATE_STOP, &local_err);
>> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
>> index 98bd4dcceadc..013c602f30fa 100644
>> --- a/hw/vfio/trace-events
>> +++ b/hw/vfio/trace-events
>> @@ -159,8 +159,11 @@ vfio_migration_state_notifier(const char *name, int state) " (%s) state %d"
>>   vfio_save_block(const char *name, int data_size) " (%s) data_size %d"
>>   vfio_save_cleanup(const char *name) " (%s)"
>>   vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
>> +vfio_save_complete_precopy_started(const char *name) " (%s)"
>>   vfio_save_device_config_state(const char *name) " (%s)"
>>   vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
>> +vfio_save_iterate_started(const char *name) " (%s)"
>> +vfio_save_iterate_empty_hit(const char *name) " (%s)"
> 
> Let's keep it sorted in alphabetical order.

Ack.
  
> Thanks.

Thanks,
Maciej


Re: [PATCH v2 01/17] vfio/migration: Add save_{iterate,complete_precopy}_started trace events
Posted by Avihai Horon 2 months, 1 week ago
On 09/09/2024 21:04, Maciej S. Szmigiero wrote:
> External email: Use caution opening links or attachments
>
>
> On 5.09.2024 15:08, Avihai Horon wrote:
>> Hi Maciej,
>>
>> On 27/08/2024 20:54, Maciej S. Szmigiero wrote:
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>>>
>>> This way both the start and end points of migrating a particular VFIO
>>> device are known.
>>>
>>> Add also a vfio_save_iterate_empty_hit trace event so it is known when
>>> there's no more data to send for that device.
>>
>> Out of curiosity, what are these traces used for?
>
> Just for benchmarking, collecting these data makes it easier to
> reason where possible bottlenecks may be.
>
>>>
>>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
>>> ---
>>>   hw/vfio/migration.c           | 13 +++++++++++++
>>>   hw/vfio/trace-events          |  3 +++
>>>   include/hw/vfio/vfio-common.h |  3 +++
>>>   3 files changed, 19 insertions(+)
>>>
>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>> index 262d42a46e58..24679d8c5034 100644
>>> --- a/hw/vfio/migration.c
>>> +++ b/hw/vfio/migration.c
>>> @@ -472,6 +472,9 @@ static int vfio_save_setup(QEMUFile *f, void 
>>> *opaque, Error **errp)
>>>           return -ENOMEM;
>>>       }
>>>
>>> +    migration->save_iterate_run = false;
>>> +    migration->save_iterate_empty_hit = false;
>>> +
>>>       if (vfio_precopy_supported(vbasedev)) {
>>>           switch (migration->device_state) {
>>>           case VFIO_DEVICE_STATE_RUNNING:
>>> @@ -605,9 +608,17 @@ static int vfio_save_iterate(QEMUFile *f, void 
>>> *opaque)
>>>       VFIOMigration *migration = vbasedev->migration;
>>>       ssize_t data_size;
>>>
>>> +    if (!migration->save_iterate_run) {
>>> +        trace_vfio_save_iterate_started(vbasedev->name);
>>> +        migration->save_iterate_run = true;
>>
>> Maybe rename save_iterate_run to save_iterate_started so it's aligned 
>> with trace_vfio_save_iterate_started and 
>> trace_vfio_save_complete_precopy_started?
>
> Will do.
>
>>> +    }
>>> +
>>>       data_size = vfio_save_block(f, migration);
>>>       if (data_size < 0) {
>>>           return data_size;
>>> +    } else if (data_size == 0 && !migration->save_iterate_empty_hit) {
>>> +        trace_vfio_save_iterate_empty_hit(vbasedev->name);
>>> +        migration->save_iterate_empty_hit = true;
>>
>> During precopy we could hit empty multiple times. Any reason why only 
>> the first time should be traced?
>
> This trace point is supposed to indicate whether the device state
> transfer during the time the VM was still running likely has
> exhausted the amount of data that can be transferred during
> that phase.
>
> In other words, the stopped-time device state transfer likely
> only had to transfer the data which the device does not support
> transferring during the live VM phase (with just a small possible
> residual accrued since that trace point was hit).
>
> If that trace point was hit then delaying the switch over point
> further likely wouldn't help the device transfer less data during
> the downtime.

Ah, I see.

Can we achieve the same goal by using trace_vfio_state_pending_exact() 
instead?

Thanks.