Reply: [PATCH] virtio-balloon: optimize the virtio-balloon on the ARM platform.

Yangming via posted 1 patch 1 year, 2 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/6d5f8a599b6a41de8885434e6bb91c9a@huawei.com
Maintainers: David Hildenbrand <david@redhat.com>, Igor Mammedov <imammedo@redhat.com>, Xiao Guangrong <xiaoguangrong.eric@gmail.com>, "Michael S. Tsirkin" <mst@redhat.com>, Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Yanan Wang <wangyanan55@huawei.com>
hw/mem/pc-dimm.c           |  2 ++
hw/virtio/virtio-balloon.c | 33 +++++----------------------------
include/hw/boards.h        |  1 +
3 files changed, 8 insertions(+), 28 deletions(-)
Reply: [PATCH] virtio-balloon: optimize the virtio-balloon on the ARM platform.
Posted by Yangming via 1 year, 2 months ago


> On Fri, Feb 24, 2023 at 08:23:40AM +0000, Yangming wrote:
> >
> > Optimize the virtio-balloon feature on the ARM platform by adding a
> variable to keep track of the current hot-plugged pc-dimm size, instead of
> traversing the virtual machine's memory modules to count the current RAM
> size during the balloon inflation or deflation process. This variable can be
> updated only when plugging or unplugging the device, which will result in an
> increase of more than 60% efficiency of balloon process on the ARM platform.
> >
> > Signed-off-by: Qi Xi <xiqi2@huawei.com>
> > Signed-off-by: Ming Yang yangming73@huawei.com
> 
> What kind of performance gains are achieved by this patch?
> Pls include some measurements: before and after.
> 
> 
> > ---
> >  hw/mem/pc-dimm.c           |  2 ++
> >  hw/virtio/virtio-balloon.c | 44 +++++++++++++-------------------------
> >  include/hw/boards.h        |  1 +
> >  3 files changed, 18 insertions(+), 29 deletions(-)
> >
> > diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index
> 50ef83215c..192fc7922c 100644
> > --- a/hw/mem/pc-dimm.c
> > +++ b/hw/mem/pc-dimm.c
> > @@ -81,6 +81,7 @@ void pc_dimm_plug(PCDIMMDevice *dimm,
> MachineState *machine)
> >
> >      memory_device_plug(MEMORY_DEVICE(dimm), machine);
> >      vmstate_register_ram(vmstate_mr, DEVICE(dimm));
> > +    machine->device_memory->dimm_size += vmstate_mr->size;
> >  }
> >
> >  void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine)
> @@ -90,6 +91,7 @@ void pc_dimm_unplug(PCDIMMDevice *dimm,
> MachineState *machine)
> >
> >      memory_device_unplug(MEMORY_DEVICE(dimm), machine);
> >      vmstate_unregister_ram(vmstate_mr, DEVICE(dimm));
> > +    machine->device_memory->dimm_size -= vmstate_mr->size;
> >  }
> >
> >  static int pc_dimm_slot2bitmap(Object *obj, void *opaque) diff --git
> a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index
> 746f07c4d2..40fa40109d 100644
> > --- a/hw/virtio/virtio-balloon.c
> > +++ b/hw/virtio/virtio-balloon.c
> > @@ -729,37 +729,14 @@ static void virtio_balloon_get_config(VirtIODevice
> *vdev, uint8_t *config_data)
> >      memcpy(config_data, &config, virtio_balloon_config_size(dev));  }
> >
> > -static int build_dimm_list(Object *obj, void *opaque) -{
> > -    GSList **list = opaque;
> > -
> > -    if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
> > -        DeviceState *dev = DEVICE(obj);
> > -        if (dev->realized) { /* only realized DIMMs matter */
> > -            *list = g_slist_prepend(*list, dev);
> > -        }
> > -    }
> > -
> > -    object_child_foreach(obj, build_dimm_list, opaque);
> > -    return 0;
> > -}
> > -
> >  static ram_addr_t get_current_ram_size(void)  {
> > -    GSList *list = NULL, *item;
> > -    ram_addr_t size = current_machine->ram_size;
> > -
> > -    build_dimm_list(qdev_get_machine(), &list);
> > -    for (item = list; item; item = g_slist_next(item)) {
> > -        Object *obj = OBJECT(item->data);
> > -        if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) {
> > -            size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
> > -                                            &error_abort);
> > -        }
> > +    MachineState *machine = MACHINE(qdev_get_machine());
> > +    if (machine->device_memory != NULL) {
> > +        return machine->ram_size + machine->device_memory->dimm_size;
> > +    } else {
> > +        return machine->ram_size;
> >      }
> > -    g_slist_free(list);
> > -
> > -    return size;
> >  }
> >
> >  static bool virtio_balloon_page_poison_support(void *opaque) @@ -776,7
> +753,11 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
> >      VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
> >      struct virtio_balloon_config config;
> >      uint32_t oldactual = dev->actual;
> > -    ram_addr_t vm_ram_size = get_current_ram_size();
> > +    ram_addr_t vm_ram_size;
> > +    ram_addr_t vm_ram_size_new;
> > +
> > +retry:
> > +    vm_ram_size = get_current_ram_size();
> >
> >      memcpy(&config, config_data, virtio_balloon_config_size(dev));
> >      dev->actual = le32_to_cpu(config.actual); @@ -784,6 +765,11 @@ static
> void virtio_balloon_set_config(VirtIODevice *vdev,
> >          qapi_event_send_balloon_change(vm_ram_size -
> >                          ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
> >      }
> > +    vm_ram_size_new = get_current_ram_size();
> > +    if (vm_ram_size_new != vm_ram_size) {
> > +        goto retry;
> > +    }
> > +
> 
> What is this doing? needs and comment.
> And please don't implement loops using goto.
> 
> 
> >      dev->poison_val = 0;
> >      if (virtio_balloon_page_poison_support(dev)) {
> >          dev->poison_val = le32_to_cpu(config.poison_val); diff --git
> a/include/hw/boards.h b/include/hw/boards.h index
> 6fbbfd56c8..551b4b419e 100644
> > --- a/include/hw/boards.h
> > +++ b/include/hw/boards.h
> > @@ -296,6 +296,7 @@ struct MachineClass {  typedef struct
> DeviceMemoryState {
> >      hwaddr base;
> >      MemoryRegion mr;
> > +    ram_addr_t dimm_size;
> >  } DeviceMemoryState;
> >
> >  /**
> > --
> > 2.33.0


Optimize the virtio-balloon feature on the ARM platform by adding
a variable to keep track of the current hot-plugged pc-dimm size,
instead of traversing the virtual machine's memory modules to count
the current RAM size during the balloon inflation or deflation
process. This variable can be updated only when plugging or unplugging
the device, which will result in an increase of approximately 60%
efficiency of balloon process on the ARM platform.

We tested the total amount of time required for the balloon inflation process on ARM: 
inflate the balloon to 64GB of a 128GB guest under stress.
Before: 102 seconds
After: 42 seconds

Signed-off-by: Qi Xi <xiqi2@huawei.com>
Signed-off-by: Ming Yang yangming73@huawei.com
---
 hw/mem/pc-dimm.c           |  2 ++
 hw/virtio/virtio-balloon.c | 33 +++++----------------------------
 include/hw/boards.h        |  1 +
 3 files changed, 8 insertions(+), 28 deletions(-)

diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 50ef83215c..192fc7922c 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -81,6 +81,7 @@ void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine)
 
     memory_device_plug(MEMORY_DEVICE(dimm), machine);
     vmstate_register_ram(vmstate_mr, DEVICE(dimm));
+    machine->device_memory->dimm_size += vmstate_mr->size;
 }
 
 void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine)
@@ -90,6 +91,7 @@ void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine)
 
     memory_device_unplug(MEMORY_DEVICE(dimm), machine);
     vmstate_unregister_ram(vmstate_mr, DEVICE(dimm));
+    machine->device_memory->dimm_size -= vmstate_mr->size;
 }
 
 static int pc_dimm_slot2bitmap(Object *obj, void *opaque)
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 746f07c4d2..80bbb59132 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -729,37 +729,14 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
     memcpy(config_data, &config, virtio_balloon_config_size(dev));
 }
 
-static int build_dimm_list(Object *obj, void *opaque)
-{
-    GSList **list = opaque;
-
-    if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
-        DeviceState *dev = DEVICE(obj);
-        if (dev->realized) { /* only realized DIMMs matter */
-            *list = g_slist_prepend(*list, dev);
-        }
-    }
-
-    object_child_foreach(obj, build_dimm_list, opaque);
-    return 0;
-}
-
 static ram_addr_t get_current_ram_size(void)
 {
-    GSList *list = NULL, *item;
-    ram_addr_t size = current_machine->ram_size;
-
-    build_dimm_list(qdev_get_machine(), &list);
-    for (item = list; item; item = g_slist_next(item)) {
-        Object *obj = OBJECT(item->data);
-        if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) {
-            size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
-                                            &error_abort);
-        }
+    MachineState *machine = MACHINE(qdev_get_machine());
+    if (machine->device_memory != NULL) {
+        return machine->ram_size + machine->device_memory->dimm_size;
+    } else {
+        return machine->ram_size;
     }
-    g_slist_free(list);
-
-    return size;
 }
 
 static bool virtio_balloon_page_poison_support(void *opaque)
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 6fbbfd56c8..551b4b419e 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -296,6 +296,7 @@ struct MachineClass {
 typedef struct DeviceMemoryState {
     hwaddr base;
     MemoryRegion mr;
+    ram_addr_t dimm_size;
 } DeviceMemoryState;
 
 /**
-- 
2.33.0
Re: Reply: [PATCH] virtio-balloon: optimize the virtio-balloon on the ARM platform.
Posted by David Hildenbrand 1 year, 2 months ago
> Optimize the virtio-balloon feature on the ARM platform by adding
> a variable to keep track of the current hot-plugged pc-dimm size,
> instead of traversing the virtual machine's memory modules to count
> the current RAM size during the balloon inflation or deflation
> process. This variable can be updated only when plugging or unplugging
> the device, which will result in an increase of approximately 60%
> efficiency of balloon process on the ARM platform.
> 
> We tested the total amount of time required for the balloon inflation process on ARM:
> inflate the balloon to 64GB of a 128GB guest under stress.
> Before: 102 seconds
> After: 42 seconds
> 
> Signed-off-by: Qi Xi <xiqi2@huawei.com>
> Signed-off-by: Ming Yang yangming73@huawei.com
> ---
>   hw/mem/pc-dimm.c           |  2 ++
>   hw/virtio/virtio-balloon.c | 33 +++++----------------------------
>   include/hw/boards.h        |  1 +
>   3 files changed, 8 insertions(+), 28 deletions(-)
> 
> diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
> index 50ef83215c..192fc7922c 100644
> --- a/hw/mem/pc-dimm.c
> +++ b/hw/mem/pc-dimm.c
> @@ -81,6 +81,7 @@ void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine)
>   
>       memory_device_plug(MEMORY_DEVICE(dimm), machine);
>       vmstate_register_ram(vmstate_mr, DEVICE(dimm));
> +    machine->device_memory->dimm_size += vmstate_mr->size;
>   }
>   
>   void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine)
> @@ -90,6 +91,7 @@ void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine)
>   
>       memory_device_unplug(MEMORY_DEVICE(dimm), machine);
>       vmstate_unregister_ram(vmstate_mr, DEVICE(dimm));
> +    machine->device_memory->dimm_size -= vmstate_mr->size;
>   }

Ahh, missed that my previous comment was not addressed: we only want to 
track "real" DIMMs, not NVDIMMs.

-- 
Thanks,

David / dhildenb
Re: Reply: [PATCH] virtio-balloon: optimize the virtio-balloon on the ARM platform.
Posted by David Hildenbrand 1 year, 2 months ago
On 27.02.23 02:25, Yangming wrote:
> 
> 
>> On Fri, Feb 24, 2023 at 08:23:40AM +0000, Yangming wrote:
>>>
>>> Optimize the virtio-balloon feature on the ARM platform by adding a
>> variable to keep track of the current hot-plugged pc-dimm size, instead of
>> traversing the virtual machine's memory modules to count the current RAM
>> size during the balloon inflation or deflation process. This variable can be
>> updated only when plugging or unplugging the device, which will result in an
>> increase of more than 60% efficiency of balloon process on the ARM platform.
>>>
>>> Signed-off-by: Qi Xi <xiqi2@huawei.com>
>>> Signed-off-by: Ming Yang yangming73@huawei.com
>>
>> What kind of performance gains are achieved by this patch?
>> Pls include some measurements: before and after.
>>
>>
>>> ---
>>>   hw/mem/pc-dimm.c           |  2 ++
>>>   hw/virtio/virtio-balloon.c | 44 +++++++++++++-------------------------
>>>   include/hw/boards.h        |  1 +
>>>   3 files changed, 18 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index
>> 50ef83215c..192fc7922c 100644
>>> --- a/hw/mem/pc-dimm.c
>>> +++ b/hw/mem/pc-dimm.c
>>> @@ -81,6 +81,7 @@ void pc_dimm_plug(PCDIMMDevice *dimm,
>> MachineState *machine)
>>>
>>>       memory_device_plug(MEMORY_DEVICE(dimm), machine);
>>>       vmstate_register_ram(vmstate_mr, DEVICE(dimm));
>>> +    machine->device_memory->dimm_size += vmstate_mr->size;
>>>   }
>>>
>>>   void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine)
>> @@ -90,6 +91,7 @@ void pc_dimm_unplug(PCDIMMDevice *dimm,
>> MachineState *machine)
>>>
>>>       memory_device_unplug(MEMORY_DEVICE(dimm), machine);
>>>       vmstate_unregister_ram(vmstate_mr, DEVICE(dimm));
>>> +    machine->device_memory->dimm_size -= vmstate_mr->size;
>>>   }
>>>
>>>   static int pc_dimm_slot2bitmap(Object *obj, void *opaque) diff --git
>> a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index
>> 746f07c4d2..40fa40109d 100644
>>> --- a/hw/virtio/virtio-balloon.c
>>> +++ b/hw/virtio/virtio-balloon.c
>>> @@ -729,37 +729,14 @@ static void virtio_balloon_get_config(VirtIODevice
>> *vdev, uint8_t *config_data)
>>>       memcpy(config_data, &config, virtio_balloon_config_size(dev));  }
>>>
>>> -static int build_dimm_list(Object *obj, void *opaque) -{
>>> -    GSList **list = opaque;
>>> -
>>> -    if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
>>> -        DeviceState *dev = DEVICE(obj);
>>> -        if (dev->realized) { /* only realized DIMMs matter */
>>> -            *list = g_slist_prepend(*list, dev);
>>> -        }
>>> -    }
>>> -
>>> -    object_child_foreach(obj, build_dimm_list, opaque);
>>> -    return 0;
>>> -}
>>> -
>>>   static ram_addr_t get_current_ram_size(void)  {
>>> -    GSList *list = NULL, *item;
>>> -    ram_addr_t size = current_machine->ram_size;
>>> -
>>> -    build_dimm_list(qdev_get_machine(), &list);
>>> -    for (item = list; item; item = g_slist_next(item)) {
>>> -        Object *obj = OBJECT(item->data);
>>> -        if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) {
>>> -            size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
>>> -                                            &error_abort);
>>> -        }
>>> +    MachineState *machine = MACHINE(qdev_get_machine());
>>> +    if (machine->device_memory != NULL) {
>>> +        return machine->ram_size + machine->device_memory->dimm_size;
>>> +    } else {
>>> +        return machine->ram_size;
>>>       }
>>> -    g_slist_free(list);
>>> -
>>> -    return size;
>>>   }
>>>
>>>   static bool virtio_balloon_page_poison_support(void *opaque) @@ -776,7
>> +753,11 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
>>>       VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
>>>       struct virtio_balloon_config config;
>>>       uint32_t oldactual = dev->actual;
>>> -    ram_addr_t vm_ram_size = get_current_ram_size();
>>> +    ram_addr_t vm_ram_size;
>>> +    ram_addr_t vm_ram_size_new;
>>> +
>>> +retry:
>>> +    vm_ram_size = get_current_ram_size();
>>>
>>>       memcpy(&config, config_data, virtio_balloon_config_size(dev));
>>>       dev->actual = le32_to_cpu(config.actual); @@ -784,6 +765,11 @@ static
>> void virtio_balloon_set_config(VirtIODevice *vdev,
>>>           qapi_event_send_balloon_change(vm_ram_size -
>>>                           ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
>>>       }
>>> +    vm_ram_size_new = get_current_ram_size();
>>> +    if (vm_ram_size_new != vm_ram_size) {
>>> +        goto retry;
>>> +    }
>>> +
>>
>> What is this doing? needs and comment.
>> And please don't implement loops using goto.
>>
>>
>>>       dev->poison_val = 0;
>>>       if (virtio_balloon_page_poison_support(dev)) {
>>>           dev->poison_val = le32_to_cpu(config.poison_val); diff --git
>> a/include/hw/boards.h b/include/hw/boards.h index
>> 6fbbfd56c8..551b4b419e 100644
>>> --- a/include/hw/boards.h
>>> +++ b/include/hw/boards.h
>>> @@ -296,6 +296,7 @@ struct MachineClass {  typedef struct
>> DeviceMemoryState {
>>>       hwaddr base;
>>>       MemoryRegion mr;
>>> +    ram_addr_t dimm_size;
>>>   } DeviceMemoryState;
>>>
>>>   /**
>>> --
>>> 2.33.0
> 
> 
> Optimize the virtio-balloon feature on the ARM platform by adding
> a variable to keep track of the current hot-plugged pc-dimm size,
> instead of traversing the virtual machine's memory modules to count
> the current RAM size during the balloon inflation or deflation
> process. This variable can be updated only when plugging or unplugging
> the device, which will result in an increase of approximately 60%
> efficiency of balloon process on the ARM platform.
> 
> We tested the total amount of time required for the balloon inflation process on ARM:
> inflate the balloon to 64GB of a 128GB guest under stress.
> Before: 102 seconds
> After: 42 seconds
> 
> Signed-off-by: Qi Xi <xiqi2@huawei.com>
> Signed-off-by: Ming Yang yangming73@huawei.com
> ---


Acked-by: David Hildenbrand <david@redhat.com>

-- 
Thanks,

David / dhildenb