From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
This way both the start and end points of migrating a particular VFIO
device are known.
Add also a vfio_save_iterate_empty_hit trace event so it is known when
there's no more data to send for that device.
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
---
hw/vfio/migration.c | 13 +++++++++++++
hw/vfio/trace-events | 3 +++
include/hw/vfio/vfio-common.h | 3 +++
3 files changed, 19 insertions(+)
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 992dc3b10257..1b1ddf527d69 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -472,6 +472,9 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
return -ENOMEM;
}
+ migration->save_iterate_started = false;
+ migration->save_iterate_empty_hit = false;
+
if (vfio_precopy_supported(vbasedev)) {
switch (migration->device_state) {
case VFIO_DEVICE_STATE_RUNNING:
@@ -602,9 +605,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
VFIOMigration *migration = vbasedev->migration;
ssize_t data_size;
+ if (!migration->save_iterate_started) {
+ trace_vfio_save_iterate_started(vbasedev->name);
+ migration->save_iterate_started = true;
+ }
+
data_size = vfio_save_block(f, migration);
if (data_size < 0) {
return data_size;
+ } else if (data_size == 0 && !migration->save_iterate_empty_hit) {
+ trace_vfio_save_iterate_empty_hit(vbasedev->name);
+ migration->save_iterate_empty_hit = true;
}
vfio_update_estimated_pending_data(migration, data_size);
@@ -630,6 +641,8 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
int ret;
Error *local_err = NULL;
+ trace_vfio_save_complete_precopy_started(vbasedev->name);
+
/* We reach here with device state STOP or STOP_COPY only */
ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
VFIO_DEVICE_STATE_STOP, &local_err);
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 29789e8d276d..e58deab232ed 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -159,8 +159,11 @@ vfio_migration_state_notifier(const char *name, int state) " (%s) state %d"
vfio_save_block(const char *name, int data_size) " (%s) data_size %d"
vfio_save_cleanup(const char *name) " (%s)"
vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
+vfio_save_complete_precopy_started(const char *name) " (%s)"
vfio_save_device_config_state(const char *name) " (%s)"
vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
+vfio_save_iterate_empty_hit(const char *name) " (%s)"
+vfio_save_iterate_started(const char *name) " (%s)"
vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size %"PRIu64
vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy %"PRIu64" postcopy %"PRIu64" precopy initial size %"PRIu64" precopy dirty size %"PRIu64
vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy %"PRIu64" postcopy %"PRIu64" stopcopy size %"PRIu64" precopy initial size %"PRIu64" precopy dirty size %"PRIu64
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index fed499b199f0..997ee5af2d5b 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -73,6 +73,9 @@ typedef struct VFIOMigration {
uint64_t precopy_init_size;
uint64_t precopy_dirty_size;
bool initial_data_sent;
+
+ bool save_iterate_started;
+ bool save_iterate_empty_hit;
} VFIOMigration;
struct VFIOGroup;
Hi Maciej, On 29/10/2024 16:58, Maciej S. Szmigiero wrote: > External email: Use caution opening links or attachments > > > From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> > > This way both the start and end points of migrating a particular VFIO > device are known. > > Add also a vfio_save_iterate_empty_hit trace event so it is known when > there's no more data to send for that device. > > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> > --- > hw/vfio/migration.c | 13 +++++++++++++ > hw/vfio/trace-events | 3 +++ > include/hw/vfio/vfio-common.h | 3 +++ > 3 files changed, 19 insertions(+) > > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index 992dc3b10257..1b1ddf527d69 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -472,6 +472,9 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp) > return -ENOMEM; > } > > + migration->save_iterate_started = false; > + migration->save_iterate_empty_hit = false; > + > if (vfio_precopy_supported(vbasedev)) { > switch (migration->device_state) { > case VFIO_DEVICE_STATE_RUNNING: > @@ -602,9 +605,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) > VFIOMigration *migration = vbasedev->migration; > ssize_t data_size; > > + if (!migration->save_iterate_started) { > + trace_vfio_save_iterate_started(vbasedev->name); > + migration->save_iterate_started = true; > + } > + > data_size = vfio_save_block(f, migration); > if (data_size < 0) { > return data_size; > + } else if (data_size == 0 && !migration->save_iterate_empty_hit) { > + trace_vfio_save_iterate_empty_hit(vbasedev->name); > + migration->save_iterate_empty_hit = true; > } Can we instead use trace_vfio_save_iterate to understand if the device reached 0? In any case, I think the above could fit better in vfio_save_block(), where ENOMSG indicates that the device has no more data to send during pre-copy phase: ... if (data_size < 0) { /* * Pre-copy emptied all the device state for now. For more information, * please refer to the Linux kernel VFIO uAPI. */ if (errno == ENOMSG) { trace_vfio_save_iterate_empty_hit(vbasedev->name) <--------------- move it here return 0; } return -errno; } ... If you move the trace there, maybe renaming it to trace_vfio_precopy_empty_hit() will be more accurate? And trying to avoid adding the extra VFIOMigration->save_iterate_empty_hit flag, can we simply trace it every time? > > vfio_update_estimated_pending_data(migration, data_size); > @@ -630,6 +641,8 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) > int ret; > Error *local_err = NULL; > > + trace_vfio_save_complete_precopy_started(vbasedev->name); I assume this trace is used to measure how long it takes for vfio_save_complete_precopy() to run? If so, can we use trace_vmstate_downtime_save to achieve the same goal? Thanks. > + > /* We reach here with device state STOP or STOP_COPY only */ > ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, > VFIO_DEVICE_STATE_STOP, &local_err); > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index 29789e8d276d..e58deab232ed 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -159,8 +159,11 @@ vfio_migration_state_notifier(const char *name, int state) " (%s) state %d" > vfio_save_block(const char *name, int data_size) " (%s) data_size %d" > vfio_save_cleanup(const char *name) " (%s)" > vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" > +vfio_save_complete_precopy_started(const char *name) " (%s)" > vfio_save_device_config_state(const char *name) " (%s)" > vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64 > +vfio_save_iterate_empty_hit(const char *name) " (%s)" > +vfio_save_iterate_started(const char *name) " (%s)" > vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size %"PRIu64 > vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy %"PRIu64" postcopy %"PRIu64" precopy initial size %"PRIu64" precopy dirty size %"PRIu64 > vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy %"PRIu64" postcopy %"PRIu64" stopcopy size %"PRIu64" precopy initial size %"PRIu64" precopy dirty size %"PRIu64 > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index fed499b199f0..997ee5af2d5b 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -73,6 +73,9 @@ typedef struct VFIOMigration { > uint64_t precopy_init_size; > uint64_t precopy_dirty_size; > bool initial_data_sent; > + > + bool save_iterate_started; > + bool save_iterate_empty_hit; > } VFIOMigration; > > struct VFIOGroup;
Hi Avihai, On 31.10.2024 15:21, Avihai Horon wrote: > Hi Maciej, > > On 29/10/2024 16:58, Maciej S. Szmigiero wrote: >> External email: Use caution opening links or attachments >> >> >> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >> >> This way both the start and end points of migrating a particular VFIO >> device are known. >> >> Add also a vfio_save_iterate_empty_hit trace event so it is known when >> there's no more data to send for that device. >> >> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >> --- >> hw/vfio/migration.c | 13 +++++++++++++ >> hw/vfio/trace-events | 3 +++ >> include/hw/vfio/vfio-common.h | 3 +++ >> 3 files changed, 19 insertions(+) >> >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >> index 992dc3b10257..1b1ddf527d69 100644 >> --- a/hw/vfio/migration.c >> +++ b/hw/vfio/migration.c >> @@ -472,6 +472,9 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp) >> return -ENOMEM; >> } >> >> + migration->save_iterate_started = false; >> + migration->save_iterate_empty_hit = false; >> + >> if (vfio_precopy_supported(vbasedev)) { >> switch (migration->device_state) { >> case VFIO_DEVICE_STATE_RUNNING: >> @@ -602,9 +605,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) >> VFIOMigration *migration = vbasedev->migration; >> ssize_t data_size; >> >> + if (!migration->save_iterate_started) { >> + trace_vfio_save_iterate_started(vbasedev->name); >> + migration->save_iterate_started = true; >> + } >> + >> data_size = vfio_save_block(f, migration); >> if (data_size < 0) { >> return data_size; >> + } else if (data_size == 0 && !migration->save_iterate_empty_hit) { >> + trace_vfio_save_iterate_empty_hit(vbasedev->name); >> + migration->save_iterate_empty_hit = true; >> } > > Can we instead use trace_vfio_save_iterate to understand if the device reached 0? AFAIK there's not way to filter trace events by their parameters, like only logging vfio_save_iterate trace event if both parameters are zero. It means that vfio_save_iterate has to be enabled unconditionally to serve as a replacement for vfio_save_iterate_empty_hit, which could result in it being logged/emitted many extra times (with non-zero parameters). Because of that I think having a dedicated trace event for such occasion makes sense (it is also easily grep-able). > In any case, I think the above could fit better in vfio_save_block(), where ENOMSG indicates that the device has no more data to send during pre-copy phase: > > ... > if (data_size < 0) { > /* > * Pre-copy emptied all the device state for now. For more information, > * please refer to the Linux kernel VFIO uAPI. > */ > if (errno == ENOMSG) { > trace_vfio_save_iterate_empty_hit(vbasedev->name) <--------------- move it here > return 0; > } > > return -errno; > } > ... > > If you move the trace there, maybe renaming it to trace_vfio_precopy_empty_hit() will be more accurate? This move and rename seems sensible to me. > And trying to avoid adding the extra VFIOMigration->save_iterate_empty_hit flag, can we simply trace it every time? Will have to do some tests to be sure but if there's possibility that we get ENOMSG many times then obviously we don't want to flood logs with this trace event in this case - we want to only log the "data present" -> "data not present" edge/change. >> >> vfio_update_estimated_pending_data(migration, data_size); >> @@ -630,6 +641,8 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) >> int ret; >> Error *local_err = NULL; >> >> + trace_vfio_save_complete_precopy_started(vbasedev->name); > > I assume this trace is used to measure how long it takes for vfio_save_complete_precopy() to run? If so, can we use trace_vmstate_downtime_save to achieve the same goal? With an appropriate filtering I guess it could be used to extract the same data, although explicit VFIO trace point makes it easier to just look at these traces directly (less noise from other devices). But at the same time trace_vfio_save_complete_precopy already exists and by this metric it would also be unnecessary. > Thanks. Thanks, Maciej
On 01/11/2024 0:17, Maciej S. Szmigiero wrote: > External email: Use caution opening links or attachments > > > Hi Avihai, > > On 31.10.2024 15:21, Avihai Horon wrote: >> Hi Maciej, >> >> On 29/10/2024 16:58, Maciej S. Szmigiero wrote: >>> External email: Use caution opening links or attachments >>> >>> >>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>> >>> This way both the start and end points of migrating a particular VFIO >>> device are known. >>> >>> Add also a vfio_save_iterate_empty_hit trace event so it is known when >>> there's no more data to send for that device. >>> >>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >>> --- >>> hw/vfio/migration.c | 13 +++++++++++++ >>> hw/vfio/trace-events | 3 +++ >>> include/hw/vfio/vfio-common.h | 3 +++ >>> 3 files changed, 19 insertions(+) >>> >>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >>> index 992dc3b10257..1b1ddf527d69 100644 >>> --- a/hw/vfio/migration.c >>> +++ b/hw/vfio/migration.c >>> @@ -472,6 +472,9 @@ static int vfio_save_setup(QEMUFile *f, void >>> *opaque, Error **errp) >>> return -ENOMEM; >>> } >>> >>> + migration->save_iterate_started = false; >>> + migration->save_iterate_empty_hit = false; >>> + >>> if (vfio_precopy_supported(vbasedev)) { >>> switch (migration->device_state) { >>> case VFIO_DEVICE_STATE_RUNNING: >>> @@ -602,9 +605,17 @@ static int vfio_save_iterate(QEMUFile *f, void >>> *opaque) >>> VFIOMigration *migration = vbasedev->migration; >>> ssize_t data_size; >>> >>> + if (!migration->save_iterate_started) { >>> + trace_vfio_save_iterate_started(vbasedev->name); >>> + migration->save_iterate_started = true; >>> + } >>> + >>> data_size = vfio_save_block(f, migration); >>> if (data_size < 0) { >>> return data_size; >>> + } else if (data_size == 0 && !migration->save_iterate_empty_hit) { >>> + trace_vfio_save_iterate_empty_hit(vbasedev->name); >>> + migration->save_iterate_empty_hit = true; >>> } >> >> Can we instead use trace_vfio_save_iterate to understand if the >> device reached 0? > > AFAIK there's not way to filter trace events by their parameters, > like only logging vfio_save_iterate trace event if both parameters > are zero. > > It means that vfio_save_iterate has to be enabled unconditionally to > serve as a replacement for vfio_save_iterate_empty_hit, which could > result in it being logged/emitted many extra times (with non-zero > parameters). > > Because of that I think having a dedicated trace event for such > occasion makes sense (it is also easily grep-able). Ahh, I understand. > >> In any case, I think the above could fit better in vfio_save_block(), >> where ENOMSG indicates that the device has no more data to send >> during pre-copy phase: >> >> ... >> if (data_size < 0) { >> /* >> * Pre-copy emptied all the device state for now. For more >> information, >> * please refer to the Linux kernel VFIO uAPI. >> */ >> if (errno == ENOMSG) { >> trace_vfio_save_iterate_empty_hit(vbasedev->name) <--------------- >> move it here >> return 0; >> } >> >> return -errno; >> } >> ... >> >> If you move the trace there, maybe renaming it to >> trace_vfio_precopy_empty_hit() will be more accurate? > > This move and rename seems sensible to me. > >> And trying to avoid adding the extra >> VFIOMigration->save_iterate_empty_hit flag, can we simply trace it >> every time? > > Will have to do some tests to be sure but if there's possibility that > we get ENOMSG many times then obviously we don't want to flood logs with > this trace event in this case - we want to only log the > "data present" -> "data not present" edge/change. OK, so I guess a flag is really needed. BTW, there is also trace_vfio_state_pending_exact, maybe it could do the job? It might get called multiple times but not as many as vfio_save_iterate. > >>> >>> vfio_update_estimated_pending_data(migration, data_size); >>> @@ -630,6 +641,8 @@ static int vfio_save_complete_precopy(QEMUFile >>> *f, void *opaque) >>> int ret; >>> Error *local_err = NULL; >>> >>> + trace_vfio_save_complete_precopy_started(vbasedev->name); >> >> I assume this trace is used to measure how long it takes for >> vfio_save_complete_precopy() to run? If so, can we use >> trace_vmstate_downtime_save to achieve the same goal? > > With an appropriate filtering I guess it could be used to > extract the same data, although explicit VFIO trace point > makes it easier to just look at these traces directly > (less noise from other devices). I see. Well, I don't have a strong opinion if it makes life easier. Thanks. > > But at the same time trace_vfio_save_complete_precopy > already exists and by this metric it would also be > unnecessary. > >> Thanks. > > Thanks, > Maciej >
On 4.11.2024 09:08, Avihai Horon wrote: > > On 01/11/2024 0:17, Maciej S. Szmigiero wrote: >> External email: Use caution opening links or attachments >> >> >> Hi Avihai, >> >> On 31.10.2024 15:21, Avihai Horon wrote: >>> Hi Maciej, >>> >>> On 29/10/2024 16:58, Maciej S. Szmigiero wrote: >>>> External email: Use caution opening links or attachments >>>> >>>> >>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>> >>>> This way both the start and end points of migrating a particular VFIO >>>> device are known. >>>> >>>> Add also a vfio_save_iterate_empty_hit trace event so it is known when >>>> there's no more data to send for that device. >>>> >>>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >>>> --- >>>> hw/vfio/migration.c | 13 +++++++++++++ >>>> hw/vfio/trace-events | 3 +++ >>>> include/hw/vfio/vfio-common.h | 3 +++ >>>> 3 files changed, 19 insertions(+) >>>> >>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >>>> index 992dc3b10257..1b1ddf527d69 100644 >>>> --- a/hw/vfio/migration.c >>>> +++ b/hw/vfio/migration.c >>>> @@ -472,6 +472,9 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp) >>>> return -ENOMEM; >>>> } >>>> >>>> + migration->save_iterate_started = false; >>>> + migration->save_iterate_empty_hit = false; >>>> + >>>> if (vfio_precopy_supported(vbasedev)) { >>>> switch (migration->device_state) { >>>> case VFIO_DEVICE_STATE_RUNNING: >>>> @@ -602,9 +605,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) >>>> VFIOMigration *migration = vbasedev->migration; >>>> ssize_t data_size; >>>> >>>> + if (!migration->save_iterate_started) { >>>> + trace_vfio_save_iterate_started(vbasedev->name); >>>> + migration->save_iterate_started = true; >>>> + } >>>> + >>>> data_size = vfio_save_block(f, migration); >>>> if (data_size < 0) { >>>> return data_size; >>>> + } else if (data_size == 0 && !migration->save_iterate_empty_hit) { >>>> + trace_vfio_save_iterate_empty_hit(vbasedev->name); >>>> + migration->save_iterate_empty_hit = true; >>>> } >>> >>> Can we instead use trace_vfio_save_iterate to understand if the device reached 0? >> >> AFAIK there's not way to filter trace events by their parameters, >> like only logging vfio_save_iterate trace event if both parameters >> are zero. >> >> It means that vfio_save_iterate has to be enabled unconditionally to >> serve as a replacement for vfio_save_iterate_empty_hit, which could >> result in it being logged/emitted many extra times (with non-zero >> parameters). >> >> Because of that I think having a dedicated trace event for such >> occasion makes sense (it is also easily grep-able). > > Ahh, I understand. > >> >>> In any case, I think the above could fit better in vfio_save_block(), where ENOMSG indicates that the device has no more data to send during pre-copy phase: >>> >>> ... >>> if (data_size < 0) { >>> /* >>> * Pre-copy emptied all the device state for now. For more information, >>> * please refer to the Linux kernel VFIO uAPI. >>> */ >>> if (errno == ENOMSG) { >>> trace_vfio_save_iterate_empty_hit(vbasedev->name) <--------------- move it here >>> return 0; >>> } >>> >>> return -errno; >>> } >>> ... >>> >>> If you move the trace there, maybe renaming it to trace_vfio_precopy_empty_hit() will be more accurate? >> >> This move and rename seems sensible to me. >> >>> And trying to avoid adding the extra VFIOMigration->save_iterate_empty_hit flag, can we simply trace it every time? >> >> Will have to do some tests to be sure but if there's possibility that >> we get ENOMSG many times then obviously we don't want to flood logs with >> this trace event in this case - we want to only log the >> "data present" -> "data not present" edge/change. > > OK, so I guess a flag is really needed. > BTW, there is also trace_vfio_state_pending_exact, maybe it could do the job? It might get called multiple times but not as many as vfio_save_iterate. In a quick test run it was still called/logged 5 times for each VFIO device so quite more often than the empty_hit one (which was logged just once per dev). >> >>>> >>>> vfio_update_estimated_pending_data(migration, data_size); >>>> @@ -630,6 +641,8 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) >>>> int ret; >>>> Error *local_err = NULL; >>>> >>>> + trace_vfio_save_complete_precopy_started(vbasedev->name); >>> >>> I assume this trace is used to measure how long it takes for vfio_save_complete_precopy() to run? If so, can we use trace_vmstate_downtime_save to achieve the same goal? >> >> With an appropriate filtering I guess it could be used to >> extract the same data, although explicit VFIO trace point >> makes it easier to just look at these traces directly >> (less noise from other devices). > > I see. Well, I don't have a strong opinion if it makes life easier. > > Thanks. > >> >> But at the same time trace_vfio_save_complete_precopy >> already exists and by this metric it would also be >> unnecessary. >> >>> Thanks. >> >> Thanks, >> Maciej >> Thanks, Maciej
On 04/11/2024 16:00, Maciej S. Szmigiero wrote: > External email: Use caution opening links or attachments > > > On 4.11.2024 09:08, Avihai Horon wrote: >> >> On 01/11/2024 0:17, Maciej S. Szmigiero wrote: >>> External email: Use caution opening links or attachments >>> >>> >>> Hi Avihai, >>> >>> On 31.10.2024 15:21, Avihai Horon wrote: >>>> Hi Maciej, >>>> >>>> On 29/10/2024 16:58, Maciej S. Szmigiero wrote: >>>>> External email: Use caution opening links or attachments >>>>> >>>>> >>>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>>> >>>>> This way both the start and end points of migrating a particular VFIO >>>>> device are known. >>>>> >>>>> Add also a vfio_save_iterate_empty_hit trace event so it is known >>>>> when >>>>> there's no more data to send for that device. >>>>> >>>>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >>>>> --- >>>>> hw/vfio/migration.c | 13 +++++++++++++ >>>>> hw/vfio/trace-events | 3 +++ >>>>> include/hw/vfio/vfio-common.h | 3 +++ >>>>> 3 files changed, 19 insertions(+) >>>>> >>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >>>>> index 992dc3b10257..1b1ddf527d69 100644 >>>>> --- a/hw/vfio/migration.c >>>>> +++ b/hw/vfio/migration.c >>>>> @@ -472,6 +472,9 @@ static int vfio_save_setup(QEMUFile *f, void >>>>> *opaque, Error **errp) >>>>> return -ENOMEM; >>>>> } >>>>> >>>>> + migration->save_iterate_started = false; >>>>> + migration->save_iterate_empty_hit = false; >>>>> + >>>>> if (vfio_precopy_supported(vbasedev)) { >>>>> switch (migration->device_state) { >>>>> case VFIO_DEVICE_STATE_RUNNING: >>>>> @@ -602,9 +605,17 @@ static int vfio_save_iterate(QEMUFile *f, >>>>> void *opaque) >>>>> VFIOMigration *migration = vbasedev->migration; >>>>> ssize_t data_size; >>>>> >>>>> + if (!migration->save_iterate_started) { >>>>> + trace_vfio_save_iterate_started(vbasedev->name); >>>>> + migration->save_iterate_started = true; >>>>> + } >>>>> + >>>>> data_size = vfio_save_block(f, migration); >>>>> if (data_size < 0) { >>>>> return data_size; >>>>> + } else if (data_size == 0 && >>>>> !migration->save_iterate_empty_hit) { >>>>> + trace_vfio_save_iterate_empty_hit(vbasedev->name); >>>>> + migration->save_iterate_empty_hit = true; >>>>> } >>>> >>>> Can we instead use trace_vfio_save_iterate to understand if the >>>> device reached 0? >>> >>> AFAIK there's not way to filter trace events by their parameters, >>> like only logging vfio_save_iterate trace event if both parameters >>> are zero. >>> >>> It means that vfio_save_iterate has to be enabled unconditionally to >>> serve as a replacement for vfio_save_iterate_empty_hit, which could >>> result in it being logged/emitted many extra times (with non-zero >>> parameters). >>> >>> Because of that I think having a dedicated trace event for such >>> occasion makes sense (it is also easily grep-able). >> >> Ahh, I understand. >> >>> >>>> In any case, I think the above could fit better in >>>> vfio_save_block(), where ENOMSG indicates that the device has no >>>> more data to send during pre-copy phase: >>>> >>>> ... >>>> if (data_size < 0) { >>>> /* >>>> * Pre-copy emptied all the device state for now. For more >>>> information, >>>> * please refer to the Linux kernel VFIO uAPI. >>>> */ >>>> if (errno == ENOMSG) { >>>> trace_vfio_save_iterate_empty_hit(vbasedev->name) <--------------- >>>> move it here >>>> return 0; >>>> } >>>> >>>> return -errno; >>>> } >>>> ... >>>> >>>> If you move the trace there, maybe renaming it to >>>> trace_vfio_precopy_empty_hit() will be more accurate? >>> >>> This move and rename seems sensible to me. >>> >>>> And trying to avoid adding the extra >>>> VFIOMigration->save_iterate_empty_hit flag, can we simply trace it >>>> every time? >>> >>> Will have to do some tests to be sure but if there's possibility that >>> we get ENOMSG many times then obviously we don't want to flood logs >>> with >>> this trace event in this case - we want to only log the >>> "data present" -> "data not present" edge/change. >> >> OK, so I guess a flag is really needed. >> BTW, there is also trace_vfio_state_pending_exact, maybe it could do >> the job? It might get called multiple times but not as many as >> vfio_save_iterate. > > In a quick test run it was still called/logged 5 times for each VFIO > device > so quite more often than the empty_hit one (which was logged just once > per dev). Yes, that is expected. If that's too noisy for you then the empty_hit trace seems fine, IMHO. Thanks.
On 31.10.2024 23:17, Maciej S. Szmigiero wrote: > Hi Avihai, > > On 31.10.2024 15:21, Avihai Horon wrote: >> Hi Maciej, >> >> On 29/10/2024 16:58, Maciej S. Szmigiero wrote: >>> External email: Use caution opening links or attachments >>> >>> >>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>> >>> This way both the start and end points of migrating a particular VFIO >>> device are known. >>> >>> Add also a vfio_save_iterate_empty_hit trace event so it is known when >>> there's no more data to send for that device. >>> >>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >>> --- >>> hw/vfio/migration.c | 13 +++++++++++++ >>> hw/vfio/trace-events | 3 +++ >>> include/hw/vfio/vfio-common.h | 3 +++ >>> 3 files changed, 19 insertions(+) >>> >>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >>> index 992dc3b10257..1b1ddf527d69 100644 >>> --- a/hw/vfio/migration.c >>> +++ b/hw/vfio/migration.c (..) >> In any case, I think the above could fit better in vfio_save_block(), where ENOMSG indicates that the device has no more data to send during pre-copy phase: >> >> ... >> if (data_size < 0) { >> /* >> * Pre-copy emptied all the device state for now. For more information, >> * please refer to the Linux kernel VFIO uAPI. >> */ >> if (errno == ENOMSG) { >> trace_vfio_save_iterate_empty_hit(vbasedev->name) <--------------- move it here >> return 0; >> } >> >> return -errno; >> } >> ... >> >> If you move the trace there, maybe renaming it to trace_vfio_precopy_empty_hit() will be more accurate? > > This move and rename seems sensible to me. > >> And trying to avoid adding the extra VFIOMigration->save_iterate_empty_hit flag, can we simply trace it every time? > > Will have to do some tests to be sure but if there's possibility that > we get ENOMSG many times then obviously we don't want to flood logs with > this trace event in this case - we want to only log the > "data present" -> "data not present" edge/change. > Indeed - it needs to be triggered only on the "non-empty"/"empty" change otherwise there are 3 repeats of this trace event per VFIO device in a rapid succession. So some kind of a flag here is necessary: with the trace event being "re-armed" by a non-empty read from the VFIO device I get just one such event per device. Thanks, Maciej
© 2016 - 2024 Red Hat, Inc.