drivers/char/hw_random/virtio-rng.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+)
From: Martin Wilck <mwilck@suse.com>
If a program opens /dev/hwrng with O_NONBLOCK and uses poll() and
non-blocking read() to retrieve random data, it ends up in a tight
loop with poll() always returning POLLIN and read() returning EAGAIN.
This repeats forever until some process makes a blocking read() call.
The reason is that virtio_read() always returns 0 in non-blocking mode,
even if data is available. Worse, it fetches random data from the
hypervisor after every non-blocking call, without ever using this data.
The following test program illustrates the behavior and can be used
for testing and experiments. The problem will only be seen if all
tasks use non-blocking access; otherwise the blocking reads will
"recharge" the random pool and cause other, non-blocking reads to
succeed at least sometimes.
/* Whether to use non-blocking mode in a task, problem occurs if CONDITION is 1 */
//#define CONDITION (getpid() % 2 != 0)
static volatile sig_atomic_t stop;
static void handler(int sig __attribute__((unused))) { stop = 1; }
static void loop(int fd, int sec)
{
struct pollfd pfd = { .fd = fd, .events = POLLIN, };
unsigned long errors = 0, eagains = 0, bytes = 0, succ = 0;
int size, rc, rd;
srandom(getpid());
if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK) == -1)
perror("fcntl");
size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ + 1);
for(;;) {
char buf[size];
if (stop)
break;
rc = poll(&pfd, 1, sec);
if (rc > 0) {
rd = read(fd, buf, sizeof(buf));
if (rd == -1 && errno == EAGAIN)
eagains++;
else if (rd == -1)
errors++;
else {
succ++;
bytes += rd;
write(1, buf, sizeof(buf));
}
} else if (rc == -1) {
if (errno != EINTR)
perror("poll");
break;
} else
fprintf(stderr, "poll: timeout\n");
}
fprintf(stderr,
"pid %d %sblocking, bufsize %d, %d seconds, %lu bytes read, %lu success, %lu eagain, %lu errors\n",
getpid(), CONDITION ? "non-" : "", size, sec, bytes, succ, eagains, errors);
}
int main(void)
{
int fd;
fork(); fork();
fd = open("/dev/hwrng", O_RDONLY);
if (fd == -1) {
perror("open");
return 1;
};
signal(SIGALRM, handler);
alarm(SECONDS);
loop(fd, SECONDS);
close(fd);
wait(NULL);
return 0;
}
void loop(int fd)
{
struct pollfd pfd0 = { .fd = fd, .events = POLLIN, };
int rc;
unsigned int n;
for (n = LOOPS; n > 0; n--) {
struct pollfd pfd = pfd0;
char buf[SIZE];
rc = poll(&pfd, 1, 1);
if (rc > 0) {
int rd = read(fd, buf, sizeof(buf));
if (rd == -1)
perror("read");
else
printf("read %d bytes\n", rd);
} else if (rc == -1)
perror("poll");
else
fprintf(stderr, "timeout\n");
}
}
int main(void)
{
int fd;
fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
if (fd == -1) {
perror("open");
return 1;
};
loop(fd);
close(fd);
return 0;
}
This can be observed in the real word e.g. with nested qemu/KVM virtual
machines, if both the "outer" and "inner" VMs have a virtio-rng device.
If the "inner" VM requests random data, qemu running in the "outer" VM
uses this device in a non-blocking manner like the test program above.
Fix it by returning available data if a previous hypervisor call has
completed in the meantime. I tested the patch with the program above,
and with rng-tools.
Signed-off-by: Martin Wilck <mwilck@suse.com>
---
drivers/char/hw_random/virtio-rng.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 79a6e47b5fbc..984713b35892 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -59,6 +59,20 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
if (vi->hwrng_removed)
return -ENODEV;
+ /*
+ * If the previous call was non-blocking, we may have got some
+ * randomness already.
+ */
+ if (vi->busy && completion_done(&vi->have_data)) {
+ unsigned int len;
+
+ vi->busy = false;
+ len = vi->data_avail > size ? size : vi->data_avail;
+ vi->data_avail -= len;
+ if (len)
+ return len;
+ }
+
if (!vi->busy) {
vi->busy = true;
reinit_completion(&vi->have_data);
--
2.26.2
On Wed, 2020-07-15 at 15:32 +0200, mwilck@suse.com wrote: > From: Martin Wilck <mwilck@suse.com> > > If a program opens /dev/hwrng with O_NONBLOCK and uses poll() and > non-blocking read() to retrieve random data, it ends up in a tight > loop with poll() always returning POLLIN and read() returning EAGAIN. > This repeats forever until some process makes a blocking read() call. > The reason is that virtio_read() always returns 0 in non-blocking > mode, > even if data is available. Worse, it fetches random data from the > hypervisor after every non-blocking call, without ever using this > data. Gentle review reminder. https://patchew.org/QEMU/20200715133255.10526-1-mwilck@suse.com/ Regards Martin
You Cc'ed qemu-devel, so Cc'ing the virtio-rng maintainers. On 7/15/20 3:32 PM, mwilck@suse.com wrote: > From: Martin Wilck <mwilck@suse.com> > > If a program opens /dev/hwrng with O_NONBLOCK and uses poll() and > non-blocking read() to retrieve random data, it ends up in a tight > loop with poll() always returning POLLIN and read() returning EAGAIN. > This repeats forever until some process makes a blocking read() call. > The reason is that virtio_read() always returns 0 in non-blocking mode, > even if data is available. Worse, it fetches random data from the > hypervisor after every non-blocking call, without ever using this data. > > The following test program illustrates the behavior and can be used > for testing and experiments. The problem will only be seen if all > tasks use non-blocking access; otherwise the blocking reads will > "recharge" the random pool and cause other, non-blocking reads to > succeed at least sometimes. > > /* Whether to use non-blocking mode in a task, problem occurs if CONDITION is 1 */ > //#define CONDITION (getpid() % 2 != 0) > > static volatile sig_atomic_t stop; > static void handler(int sig __attribute__((unused))) { stop = 1; } > > static void loop(int fd, int sec) > { > struct pollfd pfd = { .fd = fd, .events = POLLIN, }; > unsigned long errors = 0, eagains = 0, bytes = 0, succ = 0; > int size, rc, rd; > > srandom(getpid()); > if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK) == -1) > perror("fcntl"); > size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ + 1); > > for(;;) { > char buf[size]; > > if (stop) > break; > rc = poll(&pfd, 1, sec); > if (rc > 0) { > rd = read(fd, buf, sizeof(buf)); > if (rd == -1 && errno == EAGAIN) > eagains++; > else if (rd == -1) > errors++; > else { > succ++; > bytes += rd; > write(1, buf, sizeof(buf)); > } > } else if (rc == -1) { > if (errno != EINTR) > perror("poll"); > break; > } else > fprintf(stderr, "poll: timeout\n"); > } > fprintf(stderr, > "pid %d %sblocking, bufsize %d, %d seconds, %lu bytes read, %lu success, %lu eagain, %lu errors\n", > getpid(), CONDITION ? "non-" : "", size, sec, bytes, succ, eagains, errors); > } > > int main(void) > { > int fd; > > fork(); fork(); > fd = open("/dev/hwrng", O_RDONLY); > if (fd == -1) { > perror("open"); > return 1; > }; > signal(SIGALRM, handler); > alarm(SECONDS); > loop(fd, SECONDS); > close(fd); > wait(NULL); > return 0; > } > > void loop(int fd) > { > struct pollfd pfd0 = { .fd = fd, .events = POLLIN, }; > int rc; > unsigned int n; > > for (n = LOOPS; n > 0; n--) { > struct pollfd pfd = pfd0; > char buf[SIZE]; > > rc = poll(&pfd, 1, 1); > if (rc > 0) { > int rd = read(fd, buf, sizeof(buf)); > > if (rd == -1) > perror("read"); > else > printf("read %d bytes\n", rd); > } else if (rc == -1) > perror("poll"); > else > fprintf(stderr, "timeout\n"); > > } > } > > int main(void) > { > int fd; > > fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK); > if (fd == -1) { > perror("open"); > return 1; > }; > loop(fd); > close(fd); > return 0; > } > > This can be observed in the real word e.g. with nested qemu/KVM virtual > machines, if both the "outer" and "inner" VMs have a virtio-rng device. > If the "inner" VM requests random data, qemu running in the "outer" VM > uses this device in a non-blocking manner like the test program above. > > Fix it by returning available data if a previous hypervisor call has > completed in the meantime. I tested the patch with the program above, > and with rng-tools. > > Signed-off-by: Martin Wilck <mwilck@suse.com> > --- > drivers/char/hw_random/virtio-rng.c | 14 ++++++++++++++ > 1 file changed, 14 insertions(+) > > diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c > index 79a6e47b5fbc..984713b35892 100644 > --- a/drivers/char/hw_random/virtio-rng.c > +++ b/drivers/char/hw_random/virtio-rng.c > @@ -59,6 +59,20 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) > if (vi->hwrng_removed) > return -ENODEV; > > + /* > + * If the previous call was non-blocking, we may have got some > + * randomness already. > + */ > + if (vi->busy && completion_done(&vi->have_data)) { > + unsigned int len; > + > + vi->busy = false; > + len = vi->data_avail > size ? size : vi->data_avail; > + vi->data_avail -= len; > + if (len) > + return len; > + } > + > if (!vi->busy) { > vi->busy = true; > reinit_completion(&vi->have_data); >
On 11/08/2020 12:37, Philippe Mathieu-Daudé wrote: > You Cc'ed qemu-devel, so Cc'ing the virtio-rng maintainers. > > On 7/15/20 3:32 PM, mwilck@suse.com wrote: >> From: Martin Wilck <mwilck@suse.com> >> >> If a program opens /dev/hwrng with O_NONBLOCK and uses poll() and >> non-blocking read() to retrieve random data, it ends up in a tight >> loop with poll() always returning POLLIN and read() returning EAGAIN. >> This repeats forever until some process makes a blocking read() call. >> The reason is that virtio_read() always returns 0 in non-blocking mode, >> even if data is available. Worse, it fetches random data from the >> hypervisor after every non-blocking call, without ever using this data. >> >> The following test program illustrates the behavior and can be used >> for testing and experiments. The problem will only be seen if all >> tasks use non-blocking access; otherwise the blocking reads will >> "recharge" the random pool and cause other, non-blocking reads to >> succeed at least sometimes. >> >> /* Whether to use non-blocking mode in a task, problem occurs if CONDITION is 1 */ >> //#define CONDITION (getpid() % 2 != 0) >> >> static volatile sig_atomic_t stop; >> static void handler(int sig __attribute__((unused))) { stop = 1; } >> >> static void loop(int fd, int sec) >> { >> struct pollfd pfd = { .fd = fd, .events = POLLIN, }; >> unsigned long errors = 0, eagains = 0, bytes = 0, succ = 0; >> int size, rc, rd; >> >> srandom(getpid()); >> if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK) == -1) >> perror("fcntl"); >> size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ + 1); >> >> for(;;) { >> char buf[size]; >> >> if (stop) >> break; >> rc = poll(&pfd, 1, sec); >> if (rc > 0) { >> rd = read(fd, buf, sizeof(buf)); >> if (rd == -1 && errno == EAGAIN) >> eagains++; >> else if (rd == -1) >> errors++; >> else { >> succ++; >> bytes += rd; >> write(1, buf, sizeof(buf)); >> } >> } else if (rc == -1) { >> if (errno != EINTR) >> perror("poll"); >> break; >> } else >> fprintf(stderr, "poll: timeout\n"); >> } >> fprintf(stderr, >> "pid %d %sblocking, bufsize %d, %d seconds, %lu bytes read, %lu success, %lu eagain, %lu errors\n", >> getpid(), CONDITION ? "non-" : "", size, sec, bytes, succ, eagains, errors); >> } >> >> int main(void) >> { >> int fd; >> >> fork(); fork(); >> fd = open("/dev/hwrng", O_RDONLY); >> if (fd == -1) { >> perror("open"); >> return 1; >> }; >> signal(SIGALRM, handler); >> alarm(SECONDS); >> loop(fd, SECONDS); >> close(fd); >> wait(NULL); >> return 0; >> } >> >> void loop(int fd) >> { >> struct pollfd pfd0 = { .fd = fd, .events = POLLIN, }; >> int rc; >> unsigned int n; >> >> for (n = LOOPS; n > 0; n--) { >> struct pollfd pfd = pfd0; >> char buf[SIZE]; >> >> rc = poll(&pfd, 1, 1); >> if (rc > 0) { >> int rd = read(fd, buf, sizeof(buf)); >> >> if (rd == -1) >> perror("read"); >> else >> printf("read %d bytes\n", rd); >> } else if (rc == -1) >> perror("poll"); >> else >> fprintf(stderr, "timeout\n"); >> >> } >> } >> >> int main(void) >> { >> int fd; >> >> fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK); >> if (fd == -1) { >> perror("open"); >> return 1; >> }; >> loop(fd); >> close(fd); >> return 0; >> } >> >> This can be observed in the real word e.g. with nested qemu/KVM virtual >> machines, if both the "outer" and "inner" VMs have a virtio-rng device. >> If the "inner" VM requests random data, qemu running in the "outer" VM >> uses this device in a non-blocking manner like the test program above. >> >> Fix it by returning available data if a previous hypervisor call has >> completed in the meantime. I tested the patch with the program above, >> and with rng-tools. >> >> Signed-off-by: Martin Wilck <mwilck@suse.com> >> --- >> drivers/char/hw_random/virtio-rng.c | 14 ++++++++++++++ >> 1 file changed, 14 insertions(+) >> >> diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c >> index 79a6e47b5fbc..984713b35892 100644 >> --- a/drivers/char/hw_random/virtio-rng.c >> +++ b/drivers/char/hw_random/virtio-rng.c >> @@ -59,6 +59,20 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) >> if (vi->hwrng_removed) >> return -ENODEV; >> >> + /* >> + * If the previous call was non-blocking, we may have got some >> + * randomness already. >> + */ >> + if (vi->busy && completion_done(&vi->have_data)) { >> + unsigned int len; >> + >> + vi->busy = false; >> + len = vi->data_avail > size ? size : vi->data_avail; >> + vi->data_avail -= len; You don't need to modify data_avail. As busy is set to false, the buffer will be reused. and it is always overwritten by virtqueue_get_buf(). And moreover, if it was reused it would be always the beginning. >> + if (len) >> + return len; >> + } >> + >> if (!vi->busy) { >> vi->busy = true; >> reinit_completion(&vi->have_data); >> > Why don't you modify only the wait case? Something like: if (!wait && !completion_done(&vi->have_data)) { return 0; } then at the end you can do "return min(size, vi->data_avail);". Thanks, Laurent
On Tue, 2020-08-11 at 14:02 +0200, Laurent Vivier wrote: > > > > drivers/char/hw_random/virtio-rng.c | 14 ++++++++++++++ > > > 1 file changed, 14 insertions(+) > > > > > > diff --git a/drivers/char/hw_random/virtio-rng.c > > > b/drivers/char/hw_random/virtio-rng.c > > > index 79a6e47b5fbc..984713b35892 100644 > > > --- a/drivers/char/hw_random/virtio-rng.c > > > +++ b/drivers/char/hw_random/virtio-rng.c > > > @@ -59,6 +59,20 @@ static int virtio_read(struct hwrng *rng, void > > > *buf, size_t size, bool wait) > > > if (vi->hwrng_removed) > > > return -ENODEV; > > > > > > + /* > > > + * If the previous call was non-blocking, we may have got some > > > + * randomness already. > > > + */ > > > + if (vi->busy && completion_done(&vi->have_data)) { > > > + unsigned int len; > > > + > > > + vi->busy = false; > > > + len = vi->data_avail > size ? size : vi->data_avail; > > > + vi->data_avail -= len; > > You don't need to modify data_avail. As busy is set to false, the > buffer > will be reused. and it is always overwritten by virtqueue_get_buf(). > And moreover, if it was reused it would be always the beginning. Ok. > > > > + if (len) > > > + return len; > > > + } > > > + > > > if (!vi->busy) { > > > vi->busy = true; > > > reinit_completion(&vi->have_data); > > > > > Why don't you modify only the wait case? > > Something like: > > if (!wait && !completion_done(&vi->have_data)) { > return 0; > } > > then at the end you can do "return min(size, vi->data_avail);". Sorry, I don't understand what you mean. Where would you insert the above "if" clause? Are you saying I should call wait_for_completion_killable() also in the (!wait) case? I must call check completion_done() before calling reinit_completion(). OTOH, if completion_done() returns false, I can't simply return 0, I must at least start fetching new random data, so that a subsequent virtio_read() call has a chance to return something. Thanks, Martin
On 11/08/2020 14:22, Martin Wilck wrote: > On Tue, 2020-08-11 at 14:02 +0200, Laurent Vivier wrote: >> >>>> drivers/char/hw_random/virtio-rng.c | 14 ++++++++++++++ >>>> 1 file changed, 14 insertions(+) >>>> >>>> diff --git a/drivers/char/hw_random/virtio-rng.c >>>> b/drivers/char/hw_random/virtio-rng.c >>>> index 79a6e47b5fbc..984713b35892 100644 >>>> --- a/drivers/char/hw_random/virtio-rng.c >>>> +++ b/drivers/char/hw_random/virtio-rng.c >>>> @@ -59,6 +59,20 @@ static int virtio_read(struct hwrng *rng, void >>>> *buf, size_t size, bool wait) >>>> if (vi->hwrng_removed) >>>> return -ENODEV; >>>> >>>> + /* >>>> + * If the previous call was non-blocking, we may have got some >>>> + * randomness already. >>>> + */ >>>> + if (vi->busy && completion_done(&vi->have_data)) { >>>> + unsigned int len; >>>> + >>>> + vi->busy = false; >>>> + len = vi->data_avail > size ? size : vi->data_avail; >>>> + vi->data_avail -= len; >> >> You don't need to modify data_avail. As busy is set to false, the >> buffer >> will be reused. and it is always overwritten by virtqueue_get_buf(). >> And moreover, if it was reused it would be always the beginning. > > Ok. > >> >>>> + if (len) >>>> + return len; >>>> + } >>>> + >>>> if (!vi->busy) { >>>> vi->busy = true; >>>> reinit_completion(&vi->have_data); >>>> >> >> Why don't you modify only the wait case? >> >> Something like: >> >> if (!wait && !completion_done(&vi->have_data)) { >> return 0; >> } >> >> then at the end you can do "return min(size, vi->data_avail);". > > Sorry, I don't understand what you mean. Where would you insert the > above "if" clause? Are you saying I should call > wait_for_completion_killable() also in the (!wait) case? Yes, but only if a the completion is done, so it will not wait. > > I must call check completion_done() before calling reinit_completion(). Normally, the busy flag is here for that. If busy is true, a buffer is already registered. reinit_completion() must not be called if busy is true. busy becomes false when the before is ready to be reused. > OTOH, if completion_done() returns false, I can't simply return 0, I > must at least start fetching new random data, so that a subsequent > virtio_read() call has a chance to return something. if you modify "if (!wait)" to becomes "if (!wait && !completion_done(&vi->have_data))", either we have already a registered buffer from a previous call or the one we have registered if busy is false. So you can return 0 as nothing is ready but we have a registered buffer for the next time. Thanks, Laurent
On Tue, 2020-08-11 at 14:39 +0200, Laurent Vivier wrote: > On 11/08/2020 14:22, Martin Wilck wrote: > > On Tue, 2020-08-11 at 14:02 +0200, Laurent Vivier wrote: > > > > > drivers/char/hw_random/virtio-rng.c | 14 ++++++++++++++ > > > > > 1 file changed, 14 insertions(+) > > > > > > > > > > diff --git a/drivers/char/hw_random/virtio-rng.c > > > > > b/drivers/char/hw_random/virtio-rng.c > > > > > index 79a6e47b5fbc..984713b35892 100644 > > > > > --- a/drivers/char/hw_random/virtio-rng.c > > > > > +++ b/drivers/char/hw_random/virtio-rng.c > > > > > @@ -59,6 +59,20 @@ static int virtio_read(struct hwrng *rng, > > > > > void > > > > > *buf, size_t size, bool wait) > > > > > if (vi->hwrng_removed) > > > > > return -ENODEV; > > > > > > > > > > + /* > > > > > + * If the previous call was non-blocking, we may have > > > > > got some > > > > > + * randomness already. > > > > > + */ > > > > > + if (vi->busy && completion_done(&vi->have_data)) { > > > > > + unsigned int len; > > > > > + > > > > > + vi->busy = false; > > > > > + len = vi->data_avail > size ? size : vi- > > > > > >data_avail; > > > > > + vi->data_avail -= len; > > > > > > You don't need to modify data_avail. As busy is set to false, the > > > buffer > > > will be reused. and it is always overwritten by > > > virtqueue_get_buf(). > > > And moreover, if it was reused it would be always the beginning. > > > > Ok. > > > > > > > + if (len) > > > > > + return len; > > > > > + } > > > > > + > > > > > if (!vi->busy) { > > > > > vi->busy = true; > > > > > reinit_completion(&vi->have_data); > > > > > > > > > > > Why don't you modify only the wait case? > > > > > > Something like: > > > > > > if (!wait && !completion_done(&vi->have_data)) { > > > return 0; > > > } > > > > > > then at the end you can do "return min(size, vi->data_avail);". > > > > Sorry, I don't understand what you mean. Where would you insert the > > above "if" clause? Are you saying I should call > > wait_for_completion_killable() also in the (!wait) case? > > Yes, but only if a the completion is done, so it will not wait. > Slowly getting there, thanks for your patience. Yes, I guess this would work, too. I'll test and get back to you. > > I must call check completion_done() before calling > > reinit_completion(). > > Normally, the busy flag is here for that. If busy is true, a buffer > is > already registered. reinit_completion() must not be called if busy is > true. busy becomes false when the before is ready to be reused. My thinking was that once the completion is done, "busy" doesn't reflect the actual state any more. But your idea is leaner and keeps the semantics of "busy". I'll give it a try. Thanks, Martin
On 11/08/2020 14:53, Martin Wilck wrote: > On Tue, 2020-08-11 at 14:39 +0200, Laurent Vivier wrote: >> On 11/08/2020 14:22, Martin Wilck wrote: >>> On Tue, 2020-08-11 at 14:02 +0200, Laurent Vivier wrote: >>>>>> drivers/char/hw_random/virtio-rng.c | 14 ++++++++++++++ >>>>>> 1 file changed, 14 insertions(+) >>>>>> >>>>>> diff --git a/drivers/char/hw_random/virtio-rng.c >>>>>> b/drivers/char/hw_random/virtio-rng.c >>>>>> index 79a6e47b5fbc..984713b35892 100644 >>>>>> --- a/drivers/char/hw_random/virtio-rng.c >>>>>> +++ b/drivers/char/hw_random/virtio-rng.c >>>>>> @@ -59,6 +59,20 @@ static int virtio_read(struct hwrng *rng, >>>>>> void >>>>>> *buf, size_t size, bool wait) >>>>>> if (vi->hwrng_removed) >>>>>> return -ENODEV; >>>>>> >>>>>> + /* >>>>>> + * If the previous call was non-blocking, we may have >>>>>> got some >>>>>> + * randomness already. >>>>>> + */ >>>>>> + if (vi->busy && completion_done(&vi->have_data)) { >>>>>> + unsigned int len; >>>>>> + >>>>>> + vi->busy = false; >>>>>> + len = vi->data_avail > size ? size : vi- >>>>>>> data_avail; >>>>>> + vi->data_avail -= len; >>>> >>>> You don't need to modify data_avail. As busy is set to false, the >>>> buffer >>>> will be reused. and it is always overwritten by >>>> virtqueue_get_buf(). >>>> And moreover, if it was reused it would be always the beginning. >>> >>> Ok. >>> >>>>>> + if (len) >>>>>> + return len; >>>>>> + } >>>>>> + >>>>>> if (!vi->busy) { >>>>>> vi->busy = true; >>>>>> reinit_completion(&vi->have_data); >>>>>> >>>> >>>> Why don't you modify only the wait case? >>>> >>>> Something like: >>>> >>>> if (!wait && !completion_done(&vi->have_data)) { >>>> return 0; >>>> } >>>> >>>> then at the end you can do "return min(size, vi->data_avail);". >>> >>> Sorry, I don't understand what you mean. Where would you insert the >>> above "if" clause? Are you saying I should call >>> wait_for_completion_killable() also in the (!wait) case? >> >> Yes, but only if a the completion is done, so it will not wait. >> > > Slowly getting there, thanks for your patience. Yes, I guess this would > work, too. I'll test and get back to you. No problem. This code is tricky and it took me several months to really start to understand it ... Thanks, Laurent
On Tue, Aug 11, 2020 at 03:00:14PM +0200, Laurent Vivier wrote: > No problem. This code is tricky and it took me several months to really > start to understand it ... Oh great, we actually have someone who understands the code! Maybe you can help me understand: virtio_read takes the buf pointer and puts it in the vq. It can then return to caller (e.g. on a signal). Device can meanwhile write into the buffer. It looks like if another call then happens, and that other call uses a different buffer, virtio rng will happily return the data written into the original buf pointer, confusing the caller. Is that right? -- MST
On 11/08/2020 15:14, Michael S. Tsirkin wrote: > On Tue, Aug 11, 2020 at 03:00:14PM +0200, Laurent Vivier wrote: >> No problem. This code is tricky and it took me several months to really >> start to understand it ... > > Oh great, we actually have someone who understands the code! > Maybe you can help me understand: virtio_read > takes the buf pointer and puts it in the vq. > It can then return to caller (e.g. on a signal). > Device can meanwhile write into the buffer. > > It looks like if another call then happens, and that > other call uses a different buffer, virtio rng > will happily return the data written into the > original buf pointer, confusing the caller. > > Is that right? > Yes. hw_random core uses two bufers: - rng_fillbuf that is used with a blocking access and protected by the reading_mutex. I think this cannot be interrupted by a kill because it's in hwrng_fillfn() and it's kthread. - rng_buffer that is used in rng_dev_read() and can be interrupted (it is also protected by reading_mutex) But if rng_dev_read() is called with O_NONBLOCK or interrupted and then rng_fillbuf starts they can be mixed. We have also the first use of rng_buffer in add_early_randomness() that use a different size than in rng_dev_read() with the same buffer (and this size is 16 whereas the hwrng read API says it must be at least 32...). The problem here is core has been developped with synchronicity in mind, whereas virtio is asynchronous by definition. I think we should add some internal buffers in virtio-rng backend. This would improve performance (we are at 1 MB/s, I sent a patch to improve that, but this doesn't fix the problems above), and allows hw_random core to use memory that doesn't need to be compatible with virt_to_page(). Thanks, Laurent
On Tue, Aug 11, 2020 at 03:53:54PM +0200, Laurent Vivier wrote: > On 11/08/2020 15:14, Michael S. Tsirkin wrote: > > On Tue, Aug 11, 2020 at 03:00:14PM +0200, Laurent Vivier wrote: > >> No problem. This code is tricky and it took me several months to really > >> start to understand it ... > > > > Oh great, we actually have someone who understands the code! > > Maybe you can help me understand: virtio_read > > takes the buf pointer and puts it in the vq. > > It can then return to caller (e.g. on a signal). > > Device can meanwhile write into the buffer. > > > > It looks like if another call then happens, and that > > other call uses a different buffer, virtio rng > > will happily return the data written into the > > original buf pointer, confusing the caller. > > > > Is that right? > > > > Yes. > > hw_random core uses two bufers: > > - rng_fillbuf that is used with a blocking access and protected by the > reading_mutex. I think this cannot be interrupted by a kill because it's > in hwrng_fillfn() and it's kthread. > > - rng_buffer that is used in rng_dev_read() and can be interrupted (it > is also protected by reading_mutex) > > But if rng_dev_read() is called with O_NONBLOCK or interrupted and then > rng_fillbuf starts they can be mixed. > > We have also the first use of rng_buffer in add_early_randomness() that > use a different size than in rng_dev_read() with the same buffer (and > this size is 16 whereas the hwrng read API says it must be at least 32...). > > The problem here is core has been developped with synchronicity in mind, > whereas virtio is asynchronous by definition. > > I think we should add some internal buffers in virtio-rng backend. This > would improve performance (we are at 1 MB/s, I sent a patch to improve > that, but this doesn't fix the problems above), and allows hw_random > core to use memory that doesn't need to be compatible with virt_to_page(). > > Thanks, > Laurent OK so just add a bunch of 32 bit buffers and pass them to hardware, as they data gets consumed pass them to hardware again? -- MST
On 11/08/2020 16:49, Michael S. Tsirkin wrote: > On Tue, Aug 11, 2020 at 03:53:54PM +0200, Laurent Vivier wrote: >> On 11/08/2020 15:14, Michael S. Tsirkin wrote: >>> On Tue, Aug 11, 2020 at 03:00:14PM +0200, Laurent Vivier wrote: >>>> No problem. This code is tricky and it took me several months to really >>>> start to understand it ... >>> >>> Oh great, we actually have someone who understands the code! >>> Maybe you can help me understand: virtio_read >>> takes the buf pointer and puts it in the vq. >>> It can then return to caller (e.g. on a signal). >>> Device can meanwhile write into the buffer. >>> >>> It looks like if another call then happens, and that >>> other call uses a different buffer, virtio rng >>> will happily return the data written into the >>> original buf pointer, confusing the caller. >>> >>> Is that right? >>> >> >> Yes. >> >> hw_random core uses two bufers: >> >> - rng_fillbuf that is used with a blocking access and protected by the >> reading_mutex. I think this cannot be interrupted by a kill because it's >> in hwrng_fillfn() and it's kthread. >> >> - rng_buffer that is used in rng_dev_read() and can be interrupted (it >> is also protected by reading_mutex) >> >> But if rng_dev_read() is called with O_NONBLOCK or interrupted and then >> rng_fillbuf starts they can be mixed. >> >> We have also the first use of rng_buffer in add_early_randomness() that >> use a different size than in rng_dev_read() with the same buffer (and >> this size is 16 whereas the hwrng read API says it must be at least 32...). >> >> The problem here is core has been developped with synchronicity in mind, >> whereas virtio is asynchronous by definition. >> >> I think we should add some internal buffers in virtio-rng backend. This >> would improve performance (we are at 1 MB/s, I sent a patch to improve >> that, but this doesn't fix the problems above), and allows hw_random >> core to use memory that doesn't need to be compatible with virt_to_page(). >> >> Thanks, >> Laurent > > OK so just add a bunch of 32 bit buffers and pass them to hardware, > as they data gets consumed pass them to hardware again? > > For virtio-rng performance we must ask for the bigger block we can (the size given in rng_dev_read() would be great). But the problem here is not to waste entropy. We should avoid to ask for entropy we don't need. So we can't really enqueue buffer before knowing the size. And if there is no enough entroy to fill the buffer but enough for the use we can be blocked waiting for entropy we don't need. And the change must be done at virtio-rng level, not in core because it's useless for other backends Moreover, the buffer in core will be used with another hw_random backend if the user change the backend while the buffer is in use by virtio-rng. So we really need to copy between virtio-rng buffer and core buffer. I've also propose a change to the virtio entropy device specs to add a command queue and a command to flush the enqueued buffers. The purpose was to be able to remove a blocked device, but it can also be useful in this case. to remove the buffer of an interrupted read. Thanks, Laurent
On Tue, 2020-08-11 at 14:53 +0200, Martin Wilck wrote: > On Tue, 2020-08-11 at 14:39 +0200, Laurent Vivier wrote: > > On 11/08/2020 14:22, Martin Wilck wrote: > > > On Tue, 2020-08-11 at 14:02 +0200, Laurent Vivier wrote: > > > > > > drivers/char/hw_random/virtio-rng.c | 14 ++++++++++++++ > > > > > > 1 file changed, 14 insertions(+) > > > > > > > > > > > > diff --git a/drivers/char/hw_random/virtio-rng.c > > > > > > b/drivers/char/hw_random/virtio-rng.c > > > > > > index 79a6e47b5fbc..984713b35892 100644 > > > > > > --- a/drivers/char/hw_random/virtio-rng.c > > > > > > +++ b/drivers/char/hw_random/virtio-rng.c > > > > > > @@ -59,6 +59,20 @@ static int virtio_read(struct hwrng > > > > > > *rng, > > > > > > void > > > > > > *buf, size_t size, bool wait) > > > > > > if (vi->hwrng_removed) > > > > > > return -ENODEV; > > > > > > > > > > > > + /* > > > > > > + * If the previous call was non-blocking, we may have > > > > > > got some > > > > > > + * randomness already. > > > > > > + */ > > > > > > + if (vi->busy && completion_done(&vi->have_data)) { > > > > > > + unsigned int len; > > > > > > + > > > > > > + vi->busy = false; > > > > > > + len = vi->data_avail > size ? size : vi- > > > > > > > data_avail; > > > > > > + vi->data_avail -= len; > > > > > > > > You don't need to modify data_avail. As busy is set to false, > > > > the > > > > buffer > > > > will be reused. and it is always overwritten by > > > > virtqueue_get_buf(). > > > > And moreover, if it was reused it would be always the > > > > beginning. > > > > > > Ok. > > > > > > > > > + if (len) > > > > > > + return len; > > > > > > + } > > > > > > + > > > > > > if (!vi->busy) { > > > > > > vi->busy = true; > > > > > > reinit_completion(&vi->have_data); > > > > > > > > > > > > > > Why don't you modify only the wait case? > > > > > > > > Something like: > > > > > > > > if (!wait && !completion_done(&vi->have_data)) { > > > > return 0; > > > > } > > > > > > > > then at the end you can do "return min(size, vi->data_avail);". > > > > > > Sorry, I don't understand what you mean. Where would you insert > > > the > > > above "if" clause? Are you saying I should call > > > wait_for_completion_killable() also in the (!wait) case? > > > > Yes, but only if a the completion is done, so it will not wait. > > > > Slowly getting there, thanks for your patience. Yes, I guess this > would > work, too. I'll test and get back to you. > > > > I must call check completion_done() before calling > > > reinit_completion(). > > > > Normally, the busy flag is here for that. If busy is true, a buffer > > is > > already registered. reinit_completion() must not be called if busy > > is > > true. busy becomes false when the before is ready to be reused. > > My thinking was that once the completion is done, "busy" doesn't > reflect the actual state any more. But your idea is leaner and keeps > the semantics of "busy". I'll give it a try. Confirmed, your code solves the issue as well. I'm going to submit a v3, although nn light of the dicussion with Michael, I assume that you're going to go at it yourself to solve the other issues. Regards and thanks, Martin
On Wed, Jul 15, 2020 at 03:32:55PM +0200, mwilck@suse.com wrote: > From: Martin Wilck <mwilck@suse.com> > > If a program opens /dev/hwrng with O_NONBLOCK and uses poll() and > non-blocking read() to retrieve random data, it ends up in a tight > loop with poll() always returning POLLIN and read() returning EAGAIN. > This repeats forever until some process makes a blocking read() call. > The reason is that virtio_read() always returns 0 in non-blocking mode, > even if data is available. Worse, it fetches random data from the > hypervisor after every non-blocking call, without ever using this data. > > The following test program illustrates the behavior and can be used > for testing and experiments. The problem will only be seen if all > tasks use non-blocking access; otherwise the blocking reads will > "recharge" the random pool and cause other, non-blocking reads to > succeed at least sometimes. > > /* Whether to use non-blocking mode in a task, problem occurs if CONDITION is 1 */ > //#define CONDITION (getpid() % 2 != 0) > > static volatile sig_atomic_t stop; > static void handler(int sig __attribute__((unused))) { stop = 1; } > > static void loop(int fd, int sec) > { > struct pollfd pfd = { .fd = fd, .events = POLLIN, }; > unsigned long errors = 0, eagains = 0, bytes = 0, succ = 0; > int size, rc, rd; > > srandom(getpid()); > if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK) == -1) > perror("fcntl"); > size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ + 1); > > for(;;) { > char buf[size]; > > if (stop) > break; > rc = poll(&pfd, 1, sec); > if (rc > 0) { > rd = read(fd, buf, sizeof(buf)); > if (rd == -1 && errno == EAGAIN) > eagains++; > else if (rd == -1) > errors++; > else { > succ++; > bytes += rd; > write(1, buf, sizeof(buf)); > } > } else if (rc == -1) { > if (errno != EINTR) > perror("poll"); > break; > } else > fprintf(stderr, "poll: timeout\n"); > } > fprintf(stderr, > "pid %d %sblocking, bufsize %d, %d seconds, %lu bytes read, %lu success, %lu eagain, %lu errors\n", > getpid(), CONDITION ? "non-" : "", size, sec, bytes, succ, eagains, errors); > } > > int main(void) > { > int fd; > > fork(); fork(); > fd = open("/dev/hwrng", O_RDONLY); > if (fd == -1) { > perror("open"); > return 1; > }; > signal(SIGALRM, handler); > alarm(SECONDS); > loop(fd, SECONDS); > close(fd); > wait(NULL); > return 0; > } > > void loop(int fd) > { > struct pollfd pfd0 = { .fd = fd, .events = POLLIN, }; > int rc; > unsigned int n; > > for (n = LOOPS; n > 0; n--) { > struct pollfd pfd = pfd0; > char buf[SIZE]; > > rc = poll(&pfd, 1, 1); > if (rc > 0) { > int rd = read(fd, buf, sizeof(buf)); > > if (rd == -1) > perror("read"); > else > printf("read %d bytes\n", rd); > } else if (rc == -1) > perror("poll"); > else > fprintf(stderr, "timeout\n"); > > } > } > > int main(void) > { > int fd; > > fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK); > if (fd == -1) { > perror("open"); > return 1; > }; > loop(fd); > close(fd); > return 0; > } > > This can be observed in the real word e.g. with nested qemu/KVM virtual > machines, if both the "outer" and "inner" VMs have a virtio-rng device. > If the "inner" VM requests random data, qemu running in the "outer" VM > uses this device in a non-blocking manner like the test program above. > > Fix it by returning available data if a previous hypervisor call has > completed in the meantime. I tested the patch with the program above, > and with rng-tools. > > Signed-off-by: Martin Wilck <mwilck@suse.com> > --- > drivers/char/hw_random/virtio-rng.c | 14 ++++++++++++++ > 1 file changed, 14 insertions(+) > > diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c > index 79a6e47b5fbc..984713b35892 100644 > --- a/drivers/char/hw_random/virtio-rng.c > +++ b/drivers/char/hw_random/virtio-rng.c > @@ -59,6 +59,20 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) > if (vi->hwrng_removed) > return -ENODEV; > > + /* > + * If the previous call was non-blocking, we may have got some > + * randomness already. > + */ > + if (vi->busy && completion_done(&vi->have_data)) { > + unsigned int len; > + > + vi->busy = false; > + len = vi->data_avail > size ? size : vi->data_avail; > + vi->data_avail -= len; I wonder what purpose does this line serve: busy is false which basically means data_avail is invalid, right? A following non blocking call will not enter here. > + if (len) > + return len; > + } > + > if (!vi->busy) { > vi->busy = true; > reinit_completion(&vi->have_data); > -- > 2.26.2
On Tue, 2020-08-11 at 07:26 -0400, Michael S. Tsirkin wrote: > On Wed, Jul 15, 2020 at 03:32:55PM +0200, mwilck@suse.com wrote: > > drivers/char/hw_random/virtio-rng.c | 14 ++++++++++++++ > > 1 file changed, 14 insertions(+) > > > > diff --git a/drivers/char/hw_random/virtio-rng.c > > b/drivers/char/hw_random/virtio-rng.c > > index 79a6e47b5fbc..984713b35892 100644 > > --- a/drivers/char/hw_random/virtio-rng.c > > +++ b/drivers/char/hw_random/virtio-rng.c > > @@ -59,6 +59,20 @@ static int virtio_read(struct hwrng *rng, void > > *buf, size_t size, bool wait) > > if (vi->hwrng_removed) > > return -ENODEV; > > > > + /* > > + * If the previous call was non-blocking, we may have got some > > + * randomness already. > > + */ > > + if (vi->busy && completion_done(&vi->have_data)) { > > + unsigned int len; > > + > > + vi->busy = false; > > + len = vi->data_avail > size ? size : vi->data_avail; > > + vi->data_avail -= len; > > I wonder what purpose does this line serve: busy is false > which basically means data_avail is invalid, right? > A following non blocking call will not enter here. Well, I thought this is just how reading data normally works. But you're right, the remainder will not be used. I can remove the line, or reset data_avail to 0 at this point. What do you prefer? Regards, Martin
On Tue, Aug 11, 2020 at 02:07:23PM +0200, Martin Wilck wrote: > On Tue, 2020-08-11 at 07:26 -0400, Michael S. Tsirkin wrote: > > On Wed, Jul 15, 2020 at 03:32:55PM +0200, mwilck@suse.com wrote: > > > drivers/char/hw_random/virtio-rng.c | 14 ++++++++++++++ > > > 1 file changed, 14 insertions(+) > > > > > > diff --git a/drivers/char/hw_random/virtio-rng.c > > > b/drivers/char/hw_random/virtio-rng.c > > > index 79a6e47b5fbc..984713b35892 100644 > > > --- a/drivers/char/hw_random/virtio-rng.c > > > +++ b/drivers/char/hw_random/virtio-rng.c > > > @@ -59,6 +59,20 @@ static int virtio_read(struct hwrng *rng, void > > > *buf, size_t size, bool wait) > > > if (vi->hwrng_removed) > > > return -ENODEV; > > > > > > + /* > > > + * If the previous call was non-blocking, we may have got some > > > + * randomness already. > > > + */ > > > + if (vi->busy && completion_done(&vi->have_data)) { > > > + unsigned int len; > > > + > > > + vi->busy = false; > > > + len = vi->data_avail > size ? size : vi->data_avail; > > > + vi->data_avail -= len; > > > > I wonder what purpose does this line serve: busy is false > > which basically means data_avail is invalid, right? > > A following non blocking call will not enter here. > > Well, I thought this is just how reading data normally works. But > you're right, the remainder will not be used. I can remove the line, or > reset data_avail to 0 at this point. What do you prefer? > > Regards, > Martin Removing seems cleaner. But looking at it, it is using the API in a very strange way: a buffer is placed in the ring by one call, and *assumed* to still be there in the next call. which it might not be if one call is from userspace and the next one is from fill kthread. I guess this is why it's returning 0: yes it knows there's data, but it does not know where it is. -- MST
© 2016 - 2024 Red Hat, Inc.