From: Daniel P. Berrangé <berrange@redhat.com>
This directly implements the get_buffer logic using QIOChannel APIs.
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
migration/qemu-file-channel.c | 29 -----------------------------
migration/qemu-file.c | 18 ++++++++++++++++--
migration/qemu-file.h | 9 ---------
3 files changed, 16 insertions(+), 40 deletions(-)
diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
index 8ff58e81f9..7b32831752 100644
--- a/migration/qemu-file-channel.c
+++ b/migration/qemu-file-channel.c
@@ -74,34 +74,6 @@ static ssize_t channel_writev_buffer(void *opaque,
}
-static ssize_t channel_get_buffer(void *opaque,
- uint8_t *buf,
- int64_t pos,
- size_t size,
- Error **errp)
-{
- QIOChannel *ioc = QIO_CHANNEL(opaque);
- ssize_t ret;
-
- do {
- ret = qio_channel_read(ioc, (char *)buf, size, errp);
- if (ret < 0) {
- if (ret == QIO_CHANNEL_ERR_BLOCK) {
- if (qemu_in_coroutine()) {
- qio_channel_yield(ioc, G_IO_IN);
- } else {
- qio_channel_wait(ioc, G_IO_IN);
- }
- } else {
- return -EIO;
- }
- }
- } while (ret == QIO_CHANNEL_ERR_BLOCK);
-
- return ret;
-}
-
-
static QEMUFile *channel_get_input_return_path(void *opaque)
{
QIOChannel *ioc = QIO_CHANNEL(opaque);
@@ -117,7 +89,6 @@ static QEMUFile *channel_get_output_return_path(void *opaque)
}
static const QEMUFileOps channel_input_ops = {
- .get_buffer = channel_get_buffer,
.get_return_path = channel_get_input_return_path,
};
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 74f919de67..e206b05550 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -377,8 +377,22 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
return 0;
}
- len = f->ops->get_buffer(f->ioc, f->buf + pending, f->total_transferred,
- IO_BUF_SIZE - pending, &local_error);
+ do {
+ len = qio_channel_read(f->ioc,
+ (char *)f->buf + pending,
+ IO_BUF_SIZE - pending,
+ &local_error);
+ if (len == QIO_CHANNEL_ERR_BLOCK) {
+ if (qemu_in_coroutine()) {
+ qio_channel_yield(f->ioc, G_IO_IN);
+ } else {
+ qio_channel_wait(f->ioc, G_IO_IN);
+ }
+ } else if (len < 0) {
+ len = EIO;
+ }
+ } while (len == QIO_CHANNEL_ERR_BLOCK);
+
if (len > 0) {
f->buf_size += len;
f->total_transferred += len;
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 4a3beedb5b..f7ed568894 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -29,14 +29,6 @@
#include "exec/cpu-common.h"
#include "io/channel.h"
-/* Read a chunk of data from a file at the given position. The pos argument
- * can be ignored if the file is only be used for streaming. The number of
- * bytes actually read should be returned.
- */
-typedef ssize_t (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf,
- int64_t pos, size_t size,
- Error **errp);
-
/*
* This function writes an iovec to file. The handler must write all
* of the data or return a negative errno value.
@@ -77,7 +69,6 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f,
typedef QEMUFile *(QEMURetPathFunc)(void *opaque);
typedef struct QEMUFileOps {
- QEMUFileGetBufferFunc *get_buffer;
QEMUFileWritevBufferFunc *writev_buffer;
QEMURetPathFunc *get_return_path;
} QEMUFileOps;
--
2.36.1
On Wed, Jun 22, 2022 at 07:39:06PM +0100, Dr. David Alan Gilbert (git) wrote:
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index 74f919de67..e206b05550 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -377,8 +377,22 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
> return 0;
> }
>
> - len = f->ops->get_buffer(f->ioc, f->buf + pending, f->total_transferred,
> - IO_BUF_SIZE - pending, &local_error);
> + do {
> + len = qio_channel_read(f->ioc,
> + (char *)f->buf + pending,
> + IO_BUF_SIZE - pending,
> + &local_error);
> + if (len == QIO_CHANNEL_ERR_BLOCK) {
> + if (qemu_in_coroutine()) {
> + qio_channel_yield(f->ioc, G_IO_IN);
> + } else {
> + qio_channel_wait(f->ioc, G_IO_IN);
> + }
> + } else if (len < 0) {
> + len = EIO;
This should be -EIO.
> + }
> + } while (len == QIO_CHANNEL_ERR_BLOCK);
It's failing only with the new TLS test I added for postcopy somehow (at
least /x86_64/migration/postcopy/recovery/tls).. I also verified after the
change it'll work again.
--
Peter Xu
On Wed, Jun 22, 2022 at 03:34:52PM -0400, Peter Xu wrote:
> On Wed, Jun 22, 2022 at 07:39:06PM +0100, Dr. David Alan Gilbert (git) wrote:
> > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > index 74f919de67..e206b05550 100644
> > --- a/migration/qemu-file.c
> > +++ b/migration/qemu-file.c
> > @@ -377,8 +377,22 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
> > return 0;
> > }
> >
> > - len = f->ops->get_buffer(f->ioc, f->buf + pending, f->total_transferred,
> > - IO_BUF_SIZE - pending, &local_error);
> > + do {
> > + len = qio_channel_read(f->ioc,
> > + (char *)f->buf + pending,
> > + IO_BUF_SIZE - pending,
> > + &local_error);
> > + if (len == QIO_CHANNEL_ERR_BLOCK) {
> > + if (qemu_in_coroutine()) {
> > + qio_channel_yield(f->ioc, G_IO_IN);
> > + } else {
> > + qio_channel_wait(f->ioc, G_IO_IN);
> > + }
> > + } else if (len < 0) {
> > + len = EIO;
>
> This should be -EIO.
>
> > + }
> > + } while (len == QIO_CHANNEL_ERR_BLOCK);
>
> It's failing only with the new TLS test I added for postcopy somehow (at
> least /x86_64/migration/postcopy/recovery/tls).. I also verified after the
> change it'll work again.
Assuming you can still reproduce the pre-existing flaw, can you capture
a stack trace when it hangs. I'm wondering if it is a sign that the
migration is not converging when using TLS under certain load conditions,
because the test waits forever for converge.
Also what scenario are you running in ? Bare metal or a VM, and what
host arch ? Wondering if the machine is at all slow, or for example
missing AES hardware acceleration or some such thing.
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On Mon, Jun 27, 2022 at 04:03:09PM +0100, Daniel P. Berrangé wrote:
> On Wed, Jun 22, 2022 at 03:34:52PM -0400, Peter Xu wrote:
> > On Wed, Jun 22, 2022 at 07:39:06PM +0100, Dr. David Alan Gilbert (git) wrote:
> > > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > > index 74f919de67..e206b05550 100644
> > > --- a/migration/qemu-file.c
> > > +++ b/migration/qemu-file.c
> > > @@ -377,8 +377,22 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
> > > return 0;
> > > }
> > >
> > > - len = f->ops->get_buffer(f->ioc, f->buf + pending, f->total_transferred,
> > > - IO_BUF_SIZE - pending, &local_error);
> > > + do {
> > > + len = qio_channel_read(f->ioc,
> > > + (char *)f->buf + pending,
> > > + IO_BUF_SIZE - pending,
> > > + &local_error);
> > > + if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > + if (qemu_in_coroutine()) {
> > > + qio_channel_yield(f->ioc, G_IO_IN);
> > > + } else {
> > > + qio_channel_wait(f->ioc, G_IO_IN);
> > > + }
> > > + } else if (len < 0) {
> > > + len = EIO;
> >
> > This should be -EIO.
> >
> > > + }
> > > + } while (len == QIO_CHANNEL_ERR_BLOCK);
> >
> > It's failing only with the new TLS test I added for postcopy somehow (at
> > least /x86_64/migration/postcopy/recovery/tls).. I also verified after the
> > change it'll work again.
>
> Assuming you can still reproduce the pre-existing flaw, can you capture
> a stack trace when it hangs. I'm wondering if it is a sign that the
> migration is not converging when using TLS under certain load conditions,
> because the test waits forever for converge.
Yes it is, and it reproduces here every time. It hangs at:
if (!got_stop) {
qtest_qmp_eventwait(from, "STOP");
}
>
> Also what scenario are you running in ? Bare metal or a VM, and what
> host arch ? Wondering if the machine is at all slow, or for example
> missing AES hardware acceleration or some such thing.
It's Intel(R) Xeon(R) CPU E5-2630 v4 @ 2.20GHz, 40 cores.
It'll pass after I modify the downtime:
migrate_set_parameter_int(from, "downtime-limit", 100000);
And with QTEST_LOG=1 I found that the bw is indeed low, ~700mbps.
--
Peter Xu
On Mon, Jun 27, 2022 at 04:32:00PM -0400, Peter Xu wrote:
> On Mon, Jun 27, 2022 at 04:03:09PM +0100, Daniel P. Berrangé wrote:
> > On Wed, Jun 22, 2022 at 03:34:52PM -0400, Peter Xu wrote:
> > > On Wed, Jun 22, 2022 at 07:39:06PM +0100, Dr. David Alan Gilbert (git) wrote:
> > > > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > > > index 74f919de67..e206b05550 100644
> > > > --- a/migration/qemu-file.c
> > > > +++ b/migration/qemu-file.c
> > > > @@ -377,8 +377,22 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
> > > > return 0;
> > > > }
> > > >
> > > > - len = f->ops->get_buffer(f->ioc, f->buf + pending, f->total_transferred,
> > > > - IO_BUF_SIZE - pending, &local_error);
> > > > + do {
> > > > + len = qio_channel_read(f->ioc,
> > > > + (char *)f->buf + pending,
> > > > + IO_BUF_SIZE - pending,
> > > > + &local_error);
> > > > + if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > > + if (qemu_in_coroutine()) {
> > > > + qio_channel_yield(f->ioc, G_IO_IN);
> > > > + } else {
> > > > + qio_channel_wait(f->ioc, G_IO_IN);
> > > > + }
> > > > + } else if (len < 0) {
> > > > + len = EIO;
> > >
> > > This should be -EIO.
> > >
> > > > + }
> > > > + } while (len == QIO_CHANNEL_ERR_BLOCK);
> > >
> > > It's failing only with the new TLS test I added for postcopy somehow (at
> > > least /x86_64/migration/postcopy/recovery/tls).. I also verified after the
> > > change it'll work again.
> >
> > Assuming you can still reproduce the pre-existing flaw, can you capture
> > a stack trace when it hangs. I'm wondering if it is a sign that the
> > migration is not converging when using TLS under certain load conditions,
> > because the test waits forever for converge.
>
> Yes it is, and it reproduces here every time. It hangs at:
>
> if (!got_stop) {
> qtest_qmp_eventwait(from, "STOP");
> }
>
> >
> > Also what scenario are you running in ? Bare metal or a VM, and what
> > host arch ? Wondering if the machine is at all slow, or for example
> > missing AES hardware acceleration or some such thing.
>
> It's Intel(R) Xeon(R) CPU E5-2630 v4 @ 2.20GHz, 40 cores.
>
> It'll pass after I modify the downtime:
>
> migrate_set_parameter_int(from, "downtime-limit", 100000);
>
> And with QTEST_LOG=1 I found that the bw is indeed low, ~700mbps.
Good, this all makes sense, and I've got pending patchues I'm testing
that will fix this.
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On Wed, Jun 22, 2022 at 03:34:52PM -0400, Peter Xu wrote:
> On Wed, Jun 22, 2022 at 07:39:06PM +0100, Dr. David Alan Gilbert (git) wrote:
> > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > index 74f919de67..e206b05550 100644
> > --- a/migration/qemu-file.c
> > +++ b/migration/qemu-file.c
> > @@ -377,8 +377,22 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
> > return 0;
> > }
> >
> > - len = f->ops->get_buffer(f->ioc, f->buf + pending, f->total_transferred,
> > - IO_BUF_SIZE - pending, &local_error);
> > + do {
> > + len = qio_channel_read(f->ioc,
> > + (char *)f->buf + pending,
> > + IO_BUF_SIZE - pending,
> > + &local_error);
> > + if (len == QIO_CHANNEL_ERR_BLOCK) {
> > + if (qemu_in_coroutine()) {
> > + qio_channel_yield(f->ioc, G_IO_IN);
> > + } else {
> > + qio_channel_wait(f->ioc, G_IO_IN);
> > + }
> > + } else if (len < 0) {
> > + len = EIO;
>
> This should be -EIO.
Yes, that's correct change. /facepalm
>
> > + }
> > + } while (len == QIO_CHANNEL_ERR_BLOCK);
>
> It's failing only with the new TLS test I added for postcopy somehow (at
> least /x86_64/migration/postcopy/recovery/tls).. I also verified after the
> change it'll work again.
Yeah, I guess this is a rare failure condition that's not easily hit
in our tests. Makes sense that recovery tests could hit it though.
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
* Daniel P. Berrangé (berrange@redhat.com) wrote:
> On Wed, Jun 22, 2022 at 03:34:52PM -0400, Peter Xu wrote:
> > On Wed, Jun 22, 2022 at 07:39:06PM +0100, Dr. David Alan Gilbert (git) wrote:
> > > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > > index 74f919de67..e206b05550 100644
> > > --- a/migration/qemu-file.c
> > > +++ b/migration/qemu-file.c
> > > @@ -377,8 +377,22 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
> > > return 0;
> > > }
> > >
> > > - len = f->ops->get_buffer(f->ioc, f->buf + pending, f->total_transferred,
> > > - IO_BUF_SIZE - pending, &local_error);
> > > + do {
> > > + len = qio_channel_read(f->ioc,
> > > + (char *)f->buf + pending,
> > > + IO_BUF_SIZE - pending,
> > > + &local_error);
> > > + if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > + if (qemu_in_coroutine()) {
> > > + qio_channel_yield(f->ioc, G_IO_IN);
> > > + } else {
> > > + qio_channel_wait(f->ioc, G_IO_IN);
> > > + }
> > > + } else if (len < 0) {
> > > + len = EIO;
> >
> > This should be -EIO.
>
> Yes, that's correct change. /facepalm
I'll resend with that fixed.
Dave
>
> >
> > > + }
> > > + } while (len == QIO_CHANNEL_ERR_BLOCK);
> >
> > It's failing only with the new TLS test I added for postcopy somehow (at
> > least /x86_64/migration/postcopy/recovery/tls).. I also verified after the
> > change it'll work again.
>
> Yeah, I guess this is a rare failure condition that's not easily hit
> in our tests. Makes sense that recovery tests could hit it though.
>
> With regards,
> Daniel
> --
> |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o- https://fstop138.berrange.com :|
> |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Wed, Jun 22, 2022 at 03:34:52PM -0400, Peter Xu wrote:
> On Wed, Jun 22, 2022 at 07:39:06PM +0100, Dr. David Alan Gilbert (git) wrote:
> > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > index 74f919de67..e206b05550 100644
> > --- a/migration/qemu-file.c
> > +++ b/migration/qemu-file.c
> > @@ -377,8 +377,22 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
> > return 0;
> > }
> >
> > - len = f->ops->get_buffer(f->ioc, f->buf + pending, f->total_transferred,
> > - IO_BUF_SIZE - pending, &local_error);
> > + do {
> > + len = qio_channel_read(f->ioc,
> > + (char *)f->buf + pending,
> > + IO_BUF_SIZE - pending,
> > + &local_error);
> > + if (len == QIO_CHANNEL_ERR_BLOCK) {
> > + if (qemu_in_coroutine()) {
> > + qio_channel_yield(f->ioc, G_IO_IN);
> > + } else {
> > + qio_channel_wait(f->ioc, G_IO_IN);
> > + }
> > + } else if (len < 0) {
> > + len = EIO;
>
> This should be -EIO.
>
> > + }
> > + } while (len == QIO_CHANNEL_ERR_BLOCK);
>
> It's failing only with the new TLS test I added for postcopy somehow (at
> least /x86_64/migration/postcopy/recovery/tls).. I also verified after the
> change it'll work again.
Hmm, when I wanted to run the whole bunch of the migration-test again I
found that precopy tls test hangs (/x86_64/migration/precopy/unix/tls/psk).
Though for this time it also hangs for me even with the master branch, so
maybe not anything wrong with this specific pull req but still something
needs fixing..
--
Peter Xu
On Wed, Jun 22, 2022 at 04:13:39PM -0400, Peter Xu wrote:
> On Wed, Jun 22, 2022 at 03:34:52PM -0400, Peter Xu wrote:
> > On Wed, Jun 22, 2022 at 07:39:06PM +0100, Dr. David Alan Gilbert (git) wrote:
> > > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > > index 74f919de67..e206b05550 100644
> > > --- a/migration/qemu-file.c
> > > +++ b/migration/qemu-file.c
> > > @@ -377,8 +377,22 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
> > > return 0;
> > > }
> > >
> > > - len = f->ops->get_buffer(f->ioc, f->buf + pending, f->total_transferred,
> > > - IO_BUF_SIZE - pending, &local_error);
> > > + do {
> > > + len = qio_channel_read(f->ioc,
> > > + (char *)f->buf + pending,
> > > + IO_BUF_SIZE - pending,
> > > + &local_error);
> > > + if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > + if (qemu_in_coroutine()) {
> > > + qio_channel_yield(f->ioc, G_IO_IN);
> > > + } else {
> > > + qio_channel_wait(f->ioc, G_IO_IN);
> > > + }
> > > + } else if (len < 0) {
> > > + len = EIO;
> >
> > This should be -EIO.
> >
> > > + }
> > > + } while (len == QIO_CHANNEL_ERR_BLOCK);
> >
> > It's failing only with the new TLS test I added for postcopy somehow (at
> > least /x86_64/migration/postcopy/recovery/tls).. I also verified after the
> > change it'll work again.
>
> Hmm, when I wanted to run the whole bunch of the migration-test again I
> found that precopy tls test hangs (/x86_64/migration/precopy/unix/tls/psk).
> Though for this time it also hangs for me even with the master branch, so
> maybe not anything wrong with this specific pull req but still something
> needs fixing..
That pre-existing test has been runnnig by default in CI for a while
now, under different OS builds, so I'm surprised. Is there anything
especially unusual / different about your setup that could explain
why you see hang that we don't get anywhere else ?
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On Thu, Jun 23, 2022 at 09:47:51AM +0100, Daniel P. Berrangé wrote: > > Hmm, when I wanted to run the whole bunch of the migration-test again I > > found that precopy tls test hangs (/x86_64/migration/precopy/unix/tls/psk). > > Though for this time it also hangs for me even with the master branch, so > > maybe not anything wrong with this specific pull req but still something > > needs fixing.. > > That pre-existing test has been runnnig by default in CI for a while > now, under different OS builds, so I'm surprised. Is there anything > especially unusual / different about your setup that could explain > why you see hang that we don't get anywhere else ? TL;DR: I think it's not run in CI? Please see ufd_version_check(), as when uffd not detected we'll skip the whole thing. We really need to apply this patch, soon-ish.. https://lore.kernel.org/all/20210615175523.439830-2-peterx@redhat.com/ I can easily reproduce the hang on two x86_64 hosts I have, with current master commit (2b049d2c8dc01de750410f8f1a4eac498c04c723). Or am I the only one? So I think it also means we don't run migration unit tests on non-Linux OSes for sure because uffd was never there, meanwhile it also requires (mostly) root privilege even for Linux hosts so if the sysctl knob was not set properly (on sysctl.unprivileged_userfaultfd=1) the test can be skipped too. When I was changing migration code in the past few months (at least after I'm aware CI was probably not running it), I ran migration-test manually with root, but that's not ideal... -- Peter Xu
On Thu, Jun 23, 2022 at 03:13:36PM -0400, Peter Xu wrote: > On Thu, Jun 23, 2022 at 09:47:51AM +0100, Daniel P. Berrangé wrote: > > > Hmm, when I wanted to run the whole bunch of the migration-test again I > > > found that precopy tls test hangs (/x86_64/migration/precopy/unix/tls/psk). > > > Though for this time it also hangs for me even with the master branch, so > > > maybe not anything wrong with this specific pull req but still something > > > needs fixing.. > > > > That pre-existing test has been runnnig by default in CI for a while > > now, under different OS builds, so I'm surprised. Is there anything > > especially unusual / different about your setup that could explain > > why you see hang that we don't get anywhere else ? > > TL;DR: I think it's not run in CI? > > Please see ufd_version_check(), as when uffd not detected we'll skip the > whole thing. Our CI should be passing that check for the private runners eg https://gitlab.com/qemu-project/qemu/-/jobs/2641920502 shows us running 35 tests 2/178 qemu:qtest+qtest-x86_64 / qtest-x86_64/migration-test OK 65.57s 35 subtests passed but yes, the container based jobs are all skipping > > We really need to apply this patch, soon-ish.. > > https://lore.kernel.org/all/20210615175523.439830-2-peterx@redhat.com/ > > I can easily reproduce the hang on two x86_64 hosts I have, with current > master commit (2b049d2c8dc01de750410f8f1a4eac498c04c723). Or am I the only > one? > > So I think it also means we don't run migration unit tests on non-Linux > OSes for sure because uffd was never there, meanwhile it also requires > (mostly) root privilege even for Linux hosts so if the sysctl knob was not > set properly (on sysctl.unprivileged_userfaultfd=1) the test can be skipped > too. It appears Fedora at least set unprivileged_userfaultfd=1 by default, hence why I've never seen it skipped. With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On Mon, Jun 27, 2022 at 03:41:57PM +0100, Daniel P. Berrangé wrote: > On Thu, Jun 23, 2022 at 03:13:36PM -0400, Peter Xu wrote: > > On Thu, Jun 23, 2022 at 09:47:51AM +0100, Daniel P. Berrangé wrote: > > > > Hmm, when I wanted to run the whole bunch of the migration-test again I > > > > found that precopy tls test hangs (/x86_64/migration/precopy/unix/tls/psk). > > > > Though for this time it also hangs for me even with the master branch, so > > > > maybe not anything wrong with this specific pull req but still something > > > > needs fixing.. > > > > > > That pre-existing test has been runnnig by default in CI for a while > > > now, under different OS builds, so I'm surprised. Is there anything > > > especially unusual / different about your setup that could explain > > > why you see hang that we don't get anywhere else ? > > > > TL;DR: I think it's not run in CI? > > > > Please see ufd_version_check(), as when uffd not detected we'll skip the > > whole thing. > > Our CI should be passing that check for the private runners eg > > https://gitlab.com/qemu-project/qemu/-/jobs/2641920502 > > shows us running 35 tests > > 2/178 qemu:qtest+qtest-x86_64 / qtest-x86_64/migration-test OK 65.57s 35 subtests passed > > but yes, the container based jobs are all skipping > > > > > We really need to apply this patch, soon-ish.. > > > > https://lore.kernel.org/all/20210615175523.439830-2-peterx@redhat.com/ BTW, I'd suggest re-sending that patch to bump it up in the inbox as its a year old at this point. With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On Mon, Jun 27, 2022 at 03:44:21PM +0100, Daniel P. Berrangé wrote: > On Mon, Jun 27, 2022 at 03:41:57PM +0100, Daniel P. Berrangé wrote: > > On Thu, Jun 23, 2022 at 03:13:36PM -0400, Peter Xu wrote: > > > On Thu, Jun 23, 2022 at 09:47:51AM +0100, Daniel P. Berrangé wrote: > > > > > Hmm, when I wanted to run the whole bunch of the migration-test again I > > > > > found that precopy tls test hangs (/x86_64/migration/precopy/unix/tls/psk). > > > > > Though for this time it also hangs for me even with the master branch, so > > > > > maybe not anything wrong with this specific pull req but still something > > > > > needs fixing.. > > > > > > > > That pre-existing test has been runnnig by default in CI for a while > > > > now, under different OS builds, so I'm surprised. Is there anything > > > > especially unusual / different about your setup that could explain > > > > why you see hang that we don't get anywhere else ? > > > > > > TL;DR: I think it's not run in CI? > > > > > > Please see ufd_version_check(), as when uffd not detected we'll skip the > > > whole thing. > > > > Our CI should be passing that check for the private runners eg > > > > https://gitlab.com/qemu-project/qemu/-/jobs/2641920502 > > > > shows us running 35 tests > > > > 2/178 qemu:qtest+qtest-x86_64 / qtest-x86_64/migration-test OK 65.57s 35 subtests passed > > > > but yes, the container based jobs are all skipping > > > > > > > > We really need to apply this patch, soon-ish.. > > > > > > https://lore.kernel.org/all/20210615175523.439830-2-peterx@redhat.com/ > > BTW, I'd suggest re-sending that patch to bump it up in the inbox as > its a year old at this point. Indeed it already conflicts with the preempt series, I'll post one based on that. Thanks. -- Peter Xu
© 2016 - 2026 Red Hat, Inc.