Reallocing the ioeventfds[] array each time an element is added is very
expensive as the number of ioeventfds increases. Batch allocate instead
to amortize the cost of realloc.
This patch reduces Linux guest boot times from 362s to 140s when there
are 2 virtio-blk devices with 1 virtqueue and 99 virtio-blk devices with
32 virtqueues.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
memory.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/memory.c b/memory.c
index aeaa8dcc9e..2d6f931f8c 100644
--- a/memory.c
+++ b/memory.c
@@ -794,10 +794,18 @@ static void address_space_update_ioeventfds(AddressSpace *as)
FlatView *view;
FlatRange *fr;
unsigned ioeventfd_nb = 0;
- MemoryRegionIoeventfd *ioeventfds = NULL;
+ unsigned ioeventfd_max;
+ MemoryRegionIoeventfd *ioeventfds;
AddrRange tmp;
unsigned i;
+ /*
+ * It is likely that the number of ioeventfds hasn't changed much, so use
+ * the previous size as the starting value.
+ */
+ ioeventfd_max = as->ioeventfd_nb;
+ ioeventfds = g_new(MemoryRegionIoeventfd, ioeventfd_max);
+
view = address_space_get_flatview(as);
FOR_EACH_FLAT_RANGE(fr, view) {
for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
@@ -806,8 +814,11 @@ static void address_space_update_ioeventfds(AddressSpace *as)
int128_make64(fr->offset_in_region)));
if (addrrange_intersects(fr->addr, tmp)) {
++ioeventfd_nb;
- ioeventfds = g_realloc(ioeventfds,
- ioeventfd_nb * sizeof(*ioeventfds));
+ if (ioeventfd_nb > ioeventfd_max) {
+ ioeventfd_max += 64;
+ ioeventfds = g_realloc(ioeventfds,
+ ioeventfd_max * sizeof(*ioeventfds));
+ }
ioeventfds[ioeventfd_nb-1] = fr->mr->ioeventfds[i];
ioeventfds[ioeventfd_nb-1].addr = tmp;
}
--
2.24.1
On Tue, Feb 18, 2020 at 06:22:26PM +0000, Stefan Hajnoczi wrote:
> Reallocing the ioeventfds[] array each time an element is added is very
> expensive as the number of ioeventfds increases. Batch allocate instead
> to amortize the cost of realloc.
>
> This patch reduces Linux guest boot times from 362s to 140s when there
> are 2 virtio-blk devices with 1 virtqueue and 99 virtio-blk devices with
> 32 virtqueues.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> memory.c | 17 ++++++++++++++---
> 1 file changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/memory.c b/memory.c
> index aeaa8dcc9e..2d6f931f8c 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -794,10 +794,18 @@ static void address_space_update_ioeventfds(AddressSpace *as)
> FlatView *view;
> FlatRange *fr;
> unsigned ioeventfd_nb = 0;
> - MemoryRegionIoeventfd *ioeventfds = NULL;
> + unsigned ioeventfd_max;
> + MemoryRegionIoeventfd *ioeventfds;
> AddrRange tmp;
> unsigned i;
>
> + /*
> + * It is likely that the number of ioeventfds hasn't changed much, so use
> + * the previous size as the starting value.
> + */
> + ioeventfd_max = as->ioeventfd_nb;
> + ioeventfds = g_new(MemoryRegionIoeventfd, ioeventfd_max);
Would the ioeventfd_max being cached and never goes down but it can
only keep or increase? I'm not sure if that's a big problem, but
considering the commit message mentioned 99 virtio-blk with 32 queues
each, I'm not sure... :)
I'm thinking maybe start with a relative big number but always under
control (e.g., 64), then...
> +
> view = address_space_get_flatview(as);
> FOR_EACH_FLAT_RANGE(fr, view) {
> for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
> @@ -806,8 +814,11 @@ static void address_space_update_ioeventfds(AddressSpace *as)
> int128_make64(fr->offset_in_region)));
> if (addrrange_intersects(fr->addr, tmp)) {
> ++ioeventfd_nb;
> - ioeventfds = g_realloc(ioeventfds,
> - ioeventfd_nb * sizeof(*ioeventfds));
> + if (ioeventfd_nb > ioeventfd_max) {
> + ioeventfd_max += 64;
... do exponential increase here (max*=2) instead so still easy to
converge?
Thanks,
> + ioeventfds = g_realloc(ioeventfds,
> + ioeventfd_max * sizeof(*ioeventfds));
> + }
> ioeventfds[ioeventfd_nb-1] = fr->mr->ioeventfds[i];
> ioeventfds[ioeventfd_nb-1].addr = tmp;
> }
> --
> 2.24.1
>
--
Peter Xu
On Tue, Feb 18, 2020 at 9:50 PM Peter Xu <peterx@redhat.com> wrote:
>
> On Tue, Feb 18, 2020 at 06:22:26PM +0000, Stefan Hajnoczi wrote:
> > Reallocing the ioeventfds[] array each time an element is added is very
> > expensive as the number of ioeventfds increases. Batch allocate instead
> > to amortize the cost of realloc.
> >
> > This patch reduces Linux guest boot times from 362s to 140s when there
> > are 2 virtio-blk devices with 1 virtqueue and 99 virtio-blk devices with
> > 32 virtqueues.
> >
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> > memory.c | 17 ++++++++++++++---
> > 1 file changed, 14 insertions(+), 3 deletions(-)
> >
> > diff --git a/memory.c b/memory.c
> > index aeaa8dcc9e..2d6f931f8c 100644
> > --- a/memory.c
> > +++ b/memory.c
> > @@ -794,10 +794,18 @@ static void address_space_update_ioeventfds(AddressSpace *as)
> > FlatView *view;
> > FlatRange *fr;
> > unsigned ioeventfd_nb = 0;
> > - MemoryRegionIoeventfd *ioeventfds = NULL;
> > + unsigned ioeventfd_max;
> > + MemoryRegionIoeventfd *ioeventfds;
> > AddrRange tmp;
> > unsigned i;
> >
> > + /*
> > + * It is likely that the number of ioeventfds hasn't changed much, so use
> > + * the previous size as the starting value.
> > + */
> > + ioeventfd_max = as->ioeventfd_nb;
> > + ioeventfds = g_new(MemoryRegionIoeventfd, ioeventfd_max);
>
> Would the ioeventfd_max being cached and never goes down but it can
> only keep or increase?
No, it will decrease but only the next time
address_space_update_ioeventfds() is called. That's when we'll use
the next ioeventfds_nb as the starting point.
> I'm not sure if that's a big problem, but
> considering the commit message mentioned 99 virtio-blk with 32 queues
> each, I'm not sure... :)
>
> I'm thinking maybe start with a relative big number but always under
> control (e.g., 64), then...
I also considered doing a final g_realloc() at the end of the function
to get rid of the excess allocation but I'm not sure it's worth it...
> > +
> > view = address_space_get_flatview(as);
> > FOR_EACH_FLAT_RANGE(fr, view) {
> > for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
> > @@ -806,8 +814,11 @@ static void address_space_update_ioeventfds(AddressSpace *as)
> > int128_make64(fr->offset_in_region)));
> > if (addrrange_intersects(fr->addr, tmp)) {
> > ++ioeventfd_nb;
> > - ioeventfds = g_realloc(ioeventfds,
> > - ioeventfd_nb * sizeof(*ioeventfds));
> > + if (ioeventfd_nb > ioeventfd_max) {
> > + ioeventfd_max += 64;
>
> ... do exponential increase here (max*=2) instead so still easy to
> converge?
I'm happy to tweak the policy. Let's see what Paolo thinks.
Stefan
On 19/02/20 10:18, Stefan Hajnoczi wrote: >> ... do exponential increase here (max*=2) instead so still easy to >> converge? > I'm happy to tweak the policy. Let's see what Paolo thinks. I included Peter's suggestion in my own tweak. Thanks to both of you! Paolo
On 18/02/20 19:22, Stefan Hajnoczi wrote:
> + * It is likely that the number of ioeventfds hasn't changed much, so use
> + * the previous size as the starting value.
> + */
> + ioeventfd_max = as->ioeventfd_nb;
> + ioeventfds = g_new(MemoryRegionIoeventfd, ioeventfd_max);
This would be a bit space-inefficient if we are adding just one ioeventfd,
because it would waste 64 entries right below. I would like to squash this
if it's okay with you:
diff --git a/memory.c b/memory.c
index 2d6f931f8c..09be40edd2 100644
--- a/memory.c
+++ b/memory.c
@@ -801,9 +801,10 @@ static void address_space_update_ioeventfds(AddressSpace *as)
/*
* It is likely that the number of ioeventfds hasn't changed much, so use
- * the previous size as the starting value.
+ * the previous size as the starting value, with some headroom to avoid
+ * gratuitous reallocations.
*/
- ioeventfd_max = as->ioeventfd_nb;
+ ioeventfd_max = QEMU_ALIGN_UP(as->ioeventfd_nb, 4);
ioeventfds = g_new(MemoryRegionIoeventfd, ioeventfd_max);
view = address_space_get_flatview(as);
@@ -815,7 +816,7 @@ static void address_space_update_ioeventfds(AddressSpace *as)
if (addrrange_intersects(fr->addr, tmp)) {
++ioeventfd_nb;
if (ioeventfd_nb > ioeventfd_max) {
- ioeventfd_max += 64;
+ ioeventfd_max = MAX(ioeventfd_max * 2, 4);
ioeventfds = g_realloc(ioeventfds,
ioeventfd_max * sizeof(*ioeventfds));
}
Thanks,
Paolo
> view = address_space_get_flatview(as);
> FOR_EACH_FLAT_RANGE(fr, view) {
> for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
> @@ -806,8 +814,11 @@ static void address_space_update_ioeventfds(AddressSpace *as)
> int128_make64(fr->offset_in_region)));
> if (addrrange_intersects(fr->addr, tmp)) {
> ++ioeventfd_nb;
> - ioeventfds = g_realloc(ioeventfds,
> - ioeventfd_nb * sizeof(*ioeventfds));
> + if (ioeventfd_nb > ioeventfd_max) {
> + ioeventfd_max += 64;
> + ioeventfds = g_realloc(ioeventfds,
> + ioeventfd_max * sizeof(*ioeventfds));
> + }
On Wed, Feb 19, 2020 at 12:36:04PM +0100, Paolo Bonzini wrote: > On 18/02/20 19:22, Stefan Hajnoczi wrote: > > + * It is likely that the number of ioeventfds hasn't changed much, so use > > + * the previous size as the starting value. > > + */ > > + ioeventfd_max = as->ioeventfd_nb; > > + ioeventfds = g_new(MemoryRegionIoeventfd, ioeventfd_max); > > This would be a bit space-inefficient if we are adding just one ioeventfd, > because it would waste 64 entries right below. I would like to squash this > if it's okay with you: Yes, please feel free to squash it: Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
© 2016 - 2025 Red Hat, Inc.