[Qemu-devel] [PULL 19/22] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT

Dr. David Alan Gilbert (git) posted 22 patches 5 years, 3 months ago
Maintainers: Richard Henderson <rth@twiddle.net>, "Michael S. Tsirkin" <mst@redhat.com>, "Dr. David Alan Gilbert" <dgilbert@redhat.com>, zhanghailiang <zhang.zhanghailiang@huawei.com>, Paolo Bonzini <pbonzini@redhat.com>, Laurent Vivier <lvivier@redhat.com>, Thomas Huth <thuth@redhat.com>, Juan Quintela <quintela@redhat.com>, Markus Armbruster <armbru@redhat.com>, Eric Blake <eblake@redhat.com>
There is a newer version of this series
[Qemu-devel] [PULL 19/22] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
Posted by Dr. David Alan Gilbert (git) 5 years, 3 months ago
From: Wei Wang <wei.w.wang@intel.com>

The new feature enables the virtio-balloon device to receive hints of
guest free pages from the free page vq.

A notifier is registered to the migration precopy notifier chain. The
notifier calls free_page_start after the migration thread syncs the dirty
bitmap, so that the free page optimization starts to clear bits of free
pages from the bitmap. It calls the free_page_stop before the migration
thread syncs the bitmap, which is the end of the current round of ram
save. The free_page_stop is also called to stop the optimization in the
case when there is an error occurred in the process of ram saving.

Note: balloon will report pages which were free at the time of this call.
As the reporting happens asynchronously, dirty bit logging must be
enabled before this free_page_start call is made. Guest reporting must be
disabled before the migration dirty bitmap is synchronized.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Peter Xu <peterx@redhat.com>
Message-Id: <1544516693-5395-8-git-send-email-wei.w.wang@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
  dgilbert: Dropped kernel header update, fixed up CMD_ID_* name change
---
 hw/virtio/virtio-balloon.c         | 263 +++++++++++++++++++++++++++++
 include/hw/virtio/virtio-balloon.h |  28 ++-
 2 files changed, 290 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index d3f2913a85..e3a65940ef 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -27,6 +27,7 @@
 #include "qapi/visitor.h"
 #include "trace.h"
 #include "qemu/error-report.h"
+#include "migration/misc.h"
 
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
@@ -378,6 +379,184 @@ out:
     }
 }
 
+static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev,
+                                               VirtQueue *vq)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+    qemu_bh_schedule(s->free_page_bh);
+}
+
+static bool get_free_page_hints(VirtIOBalloon *dev)
+{
+    VirtQueueElement *elem;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtQueue *vq = dev->free_page_vq;
+
+    while (dev->block_iothread) {
+        qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
+    }
+
+    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+    if (!elem) {
+        return false;
+    }
+
+    if (elem->out_num) {
+        uint32_t id;
+        size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
+                                 &id, sizeof(id));
+        virtqueue_push(vq, elem, size);
+        g_free(elem);
+
+        virtio_tswap32s(vdev, &id);
+        if (unlikely(size != sizeof(id))) {
+            virtio_error(vdev, "received an incorrect cmd id");
+            return false;
+        }
+        if (id == dev->free_page_report_cmd_id) {
+            dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
+        } else {
+            /*
+             * Stop the optimization only when it has started. This
+             * avoids a stale stop sign for the previous command.
+             */
+            if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
+                dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
+            }
+        }
+    }
+
+    if (elem->in_num) {
+        if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
+            qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
+                                      elem->in_sg[0].iov_len);
+        }
+        virtqueue_push(vq, elem, 1);
+        g_free(elem);
+    }
+
+    return true;
+}
+
+static void virtio_ballloon_get_free_page_hints(void *opaque)
+{
+    VirtIOBalloon *dev = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtQueue *vq = dev->free_page_vq;
+    bool continue_to_get_hints;
+
+    do {
+        qemu_mutex_lock(&dev->free_page_lock);
+        virtio_queue_set_notification(vq, 0);
+        continue_to_get_hints = get_free_page_hints(dev);
+        qemu_mutex_unlock(&dev->free_page_lock);
+        virtio_notify(vdev, vq);
+      /*
+       * Start to poll the vq once the reporting started. Otherwise, continue
+       * only when there are entries on the vq, which need to be given back.
+       */
+    } while (continue_to_get_hints ||
+             dev->free_page_report_status == FREE_PAGE_REPORT_S_START);
+    virtio_queue_set_notification(vq, 1);
+}
+
+static bool virtio_balloon_free_page_support(void *opaque)
+{
+    VirtIOBalloon *s = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT);
+}
+
+static void virtio_balloon_free_page_start(VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    /* For the stop and copy phase, we don't need to start the optimization */
+    if (!vdev->vm_running) {
+        return;
+    }
+
+    if (s->free_page_report_cmd_id == UINT_MAX) {
+        s->free_page_report_cmd_id =
+                       VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
+    } else {
+        s->free_page_report_cmd_id++;
+    }
+
+    s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED;
+    virtio_notify_config(vdev);
+}
+
+static void virtio_balloon_free_page_stop(VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    if (s->free_page_report_status != FREE_PAGE_REPORT_S_STOP) {
+        /*
+         * The lock also guarantees us that the
+         * virtio_ballloon_get_free_page_hints exits after the
+         * free_page_report_status is set to S_STOP.
+         */
+        qemu_mutex_lock(&s->free_page_lock);
+        /*
+         * The guest hasn't done the reporting, so host sends a notification
+         * to the guest to actively stop the reporting.
+         */
+        s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
+        qemu_mutex_unlock(&s->free_page_lock);
+        virtio_notify_config(vdev);
+    }
+}
+
+static void virtio_balloon_free_page_done(VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    s->free_page_report_status = FREE_PAGE_REPORT_S_DONE;
+    virtio_notify_config(vdev);
+}
+
+static int
+virtio_balloon_free_page_report_notify(NotifierWithReturn *n, void *data)
+{
+    VirtIOBalloon *dev = container_of(n, VirtIOBalloon,
+                                      free_page_report_notify);
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    PrecopyNotifyData *pnd = data;
+
+    if (!virtio_balloon_free_page_support(dev)) {
+        /*
+         * This is an optimization provided to migration, so just return 0 to
+         * have the normal migration process not affected when this feature is
+         * not supported.
+         */
+        return 0;
+    }
+
+    switch (pnd->reason) {
+    case PRECOPY_NOTIFY_SETUP:
+        precopy_enable_free_page_optimization();
+        break;
+    case PRECOPY_NOTIFY_COMPLETE:
+    case PRECOPY_NOTIFY_CLEANUP:
+    case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
+        virtio_balloon_free_page_stop(dev);
+        break;
+    case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
+        if (vdev->vm_running) {
+            virtio_balloon_free_page_start(dev);
+        } else {
+            virtio_balloon_free_page_done(dev);
+        }
+        break;
+    default:
+        virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason);
+    }
+
+    return 0;
+}
+
 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
 {
     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
@@ -386,6 +565,17 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
     config.num_pages = cpu_to_le32(dev->num_pages);
     config.actual = cpu_to_le32(dev->actual);
 
+    if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
+        config.free_page_report_cmd_id =
+                       cpu_to_le32(dev->free_page_report_cmd_id);
+    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
+        config.free_page_report_cmd_id =
+                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
+    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
+        config.free_page_report_cmd_id =
+                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
+    }
+
     trace_virtio_balloon_get_config(config.num_pages, config.actual);
     memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
 }
@@ -446,6 +636,7 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
     f |= dev->host_features;
     virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
+
     return f;
 }
 
@@ -482,6 +673,18 @@ static int virtio_balloon_post_load_device(void *opaque, int version_id)
     return 0;
 }
 
+static const VMStateDescription vmstate_virtio_balloon_free_page_report = {
+    .name = "virtio-balloon-device/free-page-report",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = virtio_balloon_free_page_support,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(free_page_report_cmd_id, VirtIOBalloon),
+        VMSTATE_UINT32(free_page_report_status, VirtIOBalloon),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_virtio_balloon_device = {
     .name = "virtio-balloon-device",
     .version_id = 1,
@@ -492,6 +695,10 @@ static const VMStateDescription vmstate_virtio_balloon_device = {
         VMSTATE_UINT32(actual, VirtIOBalloon),
         VMSTATE_END_OF_LIST()
     },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_virtio_balloon_free_page_report,
+        NULL
+    }
 };
 
 static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
@@ -516,6 +723,29 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
     s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
     s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
 
+    if (virtio_has_feature(s->host_features,
+                           VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+        s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
+                                           virtio_balloon_handle_free_page_vq);
+        s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
+        s->free_page_report_cmd_id =
+                           VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
+        s->free_page_report_notify.notify =
+                                       virtio_balloon_free_page_report_notify;
+        precopy_add_notifier(&s->free_page_report_notify);
+        if (s->iothread) {
+            object_ref(OBJECT(s->iothread));
+            s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
+                                       virtio_ballloon_get_free_page_hints, s);
+            qemu_mutex_init(&s->free_page_lock);
+            qemu_cond_init(&s->free_page_cond);
+            s->block_iothread = false;
+        } else {
+            /* Simply disable this feature if the iothread wasn't created. */
+            s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT);
+            virtio_error(vdev, "iothread is missing");
+        }
+    }
     reset_stats(s);
 }
 
@@ -524,6 +754,11 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOBalloon *s = VIRTIO_BALLOON(dev);
 
+    if (virtio_balloon_free_page_support(s)) {
+        qemu_bh_delete(s->free_page_bh);
+        virtio_balloon_free_page_stop(s);
+        precopy_remove_notifier(&s->free_page_report_notify);
+    }
     balloon_stats_destroy_timer(s);
     qemu_remove_balloon_handler(s);
     virtio_cleanup(vdev);
@@ -533,6 +768,10 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev)
 {
     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
 
+    if (virtio_balloon_free_page_support(s)) {
+        virtio_balloon_free_page_stop(s);
+    }
+
     if (s->stats_vq_elem != NULL) {
         virtqueue_unpop(s->svq, s->stats_vq_elem, 0);
         g_free(s->stats_vq_elem);
@@ -550,6 +789,26 @@ static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
          * was stopped */
         virtio_balloon_receive_stats(vdev, s->svq);
     }
+
+    if (virtio_balloon_free_page_support(s)) {
+        /*
+         * The VM is woken up and the iothread was blocked, so signal it to
+         * continue.
+         */
+        if (vdev->vm_running && s->block_iothread) {
+            qemu_mutex_lock(&s->free_page_lock);
+            s->block_iothread = false;
+            qemu_cond_signal(&s->free_page_cond);
+            qemu_mutex_unlock(&s->free_page_lock);
+        }
+
+        /* The VM is stopped, block the iothread. */
+        if (!vdev->vm_running) {
+            qemu_mutex_lock(&s->free_page_lock);
+            s->block_iothread = true;
+            qemu_mutex_unlock(&s->free_page_lock);
+        }
+    }
 }
 
 static void virtio_balloon_instance_init(Object *obj)
@@ -578,6 +837,10 @@ static const VMStateDescription vmstate_virtio_balloon = {
 static Property virtio_balloon_properties[] = {
     DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
                     VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
+    DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
+                    VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
+    DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD,
+                     IOThread *),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index 99dcd6d105..1afafb12f6 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -17,11 +17,14 @@
 
 #include "standard-headers/linux/virtio_balloon.h"
 #include "hw/virtio/virtio.h"
+#include "sysemu/iothread.h"
 
 #define TYPE_VIRTIO_BALLOON "virtio-balloon-device"
 #define VIRTIO_BALLOON(obj) \
         OBJECT_CHECK(VirtIOBalloon, (obj), TYPE_VIRTIO_BALLOON)
 
+#define VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN 0x80000000
+
 typedef struct virtio_balloon_stat VirtIOBalloonStat;
 
 typedef struct virtio_balloon_stat_modern {
@@ -32,15 +35,38 @@ typedef struct virtio_balloon_stat_modern {
 
 typedef struct PartiallyBalloonedPage PartiallyBalloonedPage;
 
+enum virtio_balloon_free_page_report_status {
+    FREE_PAGE_REPORT_S_STOP = 0,
+    FREE_PAGE_REPORT_S_REQUESTED = 1,
+    FREE_PAGE_REPORT_S_START = 2,
+    FREE_PAGE_REPORT_S_DONE = 3,
+};
+
 typedef struct VirtIOBalloon {
     VirtIODevice parent_obj;
-    VirtQueue *ivq, *dvq, *svq;
+    VirtQueue *ivq, *dvq, *svq, *free_page_vq;
+    uint32_t free_page_report_status;
     uint32_t num_pages;
     uint32_t actual;
+    uint32_t free_page_report_cmd_id;
     uint64_t stats[VIRTIO_BALLOON_S_NR];
     VirtQueueElement *stats_vq_elem;
     size_t stats_vq_offset;
     QEMUTimer *stats_timer;
+    IOThread *iothread;
+    QEMUBH *free_page_bh;
+    /*
+     * Lock to synchronize threads to access the free page reporting related
+     * fields (e.g. free_page_report_status).
+     */
+    QemuMutex free_page_lock;
+    QemuCond  free_page_cond;
+    /*
+     * Set to block iothread to continue reading free page hints as the VM is
+     * stopped.
+     */
+    bool block_iothread;
+    NotifierWithReturn free_page_report_notify;
     int64_t stats_last_update;
     int64_t stats_poll_interval;
     uint32_t host_features;
-- 
2.20.1


Re: [Qemu-devel] [PULL 19/22] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
Posted by Peter Maydell 5 years, 3 months ago
On Wed, 6 Mar 2019 at 11:55, Dr. David Alan Gilbert (git)
<dgilbert@redhat.com> wrote:
>
> From: Wei Wang <wei.w.wang@intel.com>
>
> The new feature enables the virtio-balloon device to receive hints of
> guest free pages from the free page vq.
>
> A notifier is registered to the migration precopy notifier chain. The
> notifier calls free_page_start after the migration thread syncs the dirty
> bitmap, so that the free page optimization starts to clear bits of free
> pages from the bitmap. It calls the free_page_stop before the migration
> thread syncs the bitmap, which is the end of the current round of ram
> save. The free_page_stop is also called to stop the optimization in the
> case when there is an error occurred in the process of ram saving.
>
> Note: balloon will report pages which were free at the time of this call.
> As the reporting happens asynchronously, dirty bit logging must be
> enabled before this free_page_start call is made. Guest reporting must be
> disabled before the migration dirty bitmap is synchronized.
>
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> CC: Michael S. Tsirkin <mst@redhat.com>
> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
> CC: Juan Quintela <quintela@redhat.com>
> CC: Peter Xu <peterx@redhat.com>
> Message-Id: <1544516693-5395-8-git-send-email-wei.w.wang@intel.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>   dgilbert: Dropped kernel header update, fixed up CMD_ID_* name change
> ---

Hi -- Coverity points out a use-after-free here (CID 1399412):

> +static bool get_free_page_hints(VirtIOBalloon *dev)
> +{
> +    VirtQueueElement *elem;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VirtQueue *vq = dev->free_page_vq;
> +
> +    while (dev->block_iothread) {
> +        qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
> +    }
> +
> +    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
> +    if (!elem) {
> +        return false;
> +    }
> +
> +    if (elem->out_num) {
> +        uint32_t id;
> +        size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
> +                                 &id, sizeof(id));
> +        virtqueue_push(vq, elem, size);
> +        g_free(elem);

Here we free elem...

> +
> +        virtio_tswap32s(vdev, &id);
> +        if (unlikely(size != sizeof(id))) {
> +            virtio_error(vdev, "received an incorrect cmd id");
> +            return false;
> +        }
> +        if (id == dev->free_page_report_cmd_id) {
> +            dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
> +        } else {
> +            /*
> +             * Stop the optimization only when it has started. This
> +             * avoids a stale stop sign for the previous command.
> +             */
> +            if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
> +                dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
> +            }
> +        }
> +    }
> +
> +    if (elem->in_num) {

...but we can fall through here and try to dereference elem...

> +        if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
> +            qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
> +                                      elem->in_sg[0].iov_len);
> +        }
> +        virtqueue_push(vq, elem, 1);
> +        g_free(elem);

...and then free it again.

> +    }
> +
> +    return true;
> +}

thanks
-- PMM

Re: [Qemu-devel] [PULL 19/22] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
Posted by Dr. David Alan Gilbert 5 years, 3 months ago
* Peter Maydell (peter.maydell@linaro.org) wrote:
> On Wed, 6 Mar 2019 at 11:55, Dr. David Alan Gilbert (git)
> <dgilbert@redhat.com> wrote:
> >
> > From: Wei Wang <wei.w.wang@intel.com>


Wei: Can you look at this....

> > The new feature enables the virtio-balloon device to receive hints of
> > guest free pages from the free page vq.
> >
> > A notifier is registered to the migration precopy notifier chain. The
> > notifier calls free_page_start after the migration thread syncs the dirty
> > bitmap, so that the free page optimization starts to clear bits of free
> > pages from the bitmap. It calls the free_page_stop before the migration
> > thread syncs the bitmap, which is the end of the current round of ram
> > save. The free_page_stop is also called to stop the optimization in the
> > case when there is an error occurred in the process of ram saving.
> >
> > Note: balloon will report pages which were free at the time of this call.
> > As the reporting happens asynchronously, dirty bit logging must be
> > enabled before this free_page_start call is made. Guest reporting must be
> > disabled before the migration dirty bitmap is synchronized.
> >
> > Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> > CC: Michael S. Tsirkin <mst@redhat.com>
> > CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > CC: Juan Quintela <quintela@redhat.com>
> > CC: Peter Xu <peterx@redhat.com>
> > Message-Id: <1544516693-5395-8-git-send-email-wei.w.wang@intel.com>
> > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> >   dgilbert: Dropped kernel header update, fixed up CMD_ID_* name change
> > ---
> 
> Hi -- Coverity points out a use-after-free here (CID 1399412):
> 
> > +static bool get_free_page_hints(VirtIOBalloon *dev)
> > +{
> > +    VirtQueueElement *elem;
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VirtQueue *vq = dev->free_page_vq;
> > +
> > +    while (dev->block_iothread) {
> > +        qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
> > +    }
> > +
> > +    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
> > +    if (!elem) {
> > +        return false;
> > +    }
> > +
> > +    if (elem->out_num) {
> > +        uint32_t id;
> > +        size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
> > +                                 &id, sizeof(id));
> > +        virtqueue_push(vq, elem, size);
> > +        g_free(elem);
> 
> Here we free elem...
> 
> > +
> > +        virtio_tswap32s(vdev, &id);
> > +        if (unlikely(size != sizeof(id))) {
> > +            virtio_error(vdev, "received an incorrect cmd id");
> > +            return false;
> > +        }
> > +        if (id == dev->free_page_report_cmd_id) {
> > +            dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
> > +        } else {
> > +            /*
> > +             * Stop the optimization only when it has started. This
> > +             * avoids a stale stop sign for the previous command.
> > +             */
> > +            if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
> > +                dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
> > +            }
> > +        }
> > +    }
> > +
> > +    if (elem->in_num) {
> 
> ...but we can fall through here and try to dereference elem...
> 
> > +        if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
> > +            qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
> > +                                      elem->in_sg[0].iov_len);
> > +        }
> > +        virtqueue_push(vq, elem, 1);
> > +        g_free(elem);
> 
> ...and then free it again.

OK, so the question here is:
  Is it allowed to have both in and out in the same queue element; if
it's not then we need to error the device.
  If it is allowed then we need to fix up the out_num case.

Dave

> > +    }
> > +
> > +    return true;
> > +}
> 
> thanks
> -- PMM
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

Re: [Qemu-devel] [PULL 19/22] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
Posted by Wei Wang 5 years, 3 months ago
On 03/09/2019 02:30 AM, Dr. David Alan Gilbert wrote:
> * Peter Maydell (peter.maydell@linaro.org) wrote:
>> On Wed, 6 Mar 2019 at 11:55, Dr. David Alan Gilbert (git)
>> <dgilbert@redhat.com> wrote:
>>> From: Wei Wang <wei.w.wang@intel.com>
>
> Wei: Can you look at this....

Sure, I'll send a followup patch to fix this.


>
>>> The new feature enables the virtio-balloon device to receive hints of
>>> guest free pages from the free page vq.
>>>
>>> A notifier is registered to the migration precopy notifier chain. The
>>> notifier calls free_page_start after the migration thread syncs the dirty
>>> bitmap, so that the free page optimization starts to clear bits of free
>>> pages from the bitmap. It calls the free_page_stop before the migration
>>> thread syncs the bitmap, which is the end of the current round of ram
>>> save. The free_page_stop is also called to stop the optimization in the
>>> case when there is an error occurred in the process of ram saving.
>>>
>>> Note: balloon will report pages which were free at the time of this call.
>>> As the reporting happens asynchronously, dirty bit logging must be
>>> enabled before this free_page_start call is made. Guest reporting must be
>>> disabled before the migration dirty bitmap is synchronized.
>>>
>>> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
>>> CC: Michael S. Tsirkin <mst@redhat.com>
>>> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>> CC: Juan Quintela <quintela@redhat.com>
>>> CC: Peter Xu <peterx@redhat.com>
>>> Message-Id: <1544516693-5395-8-git-send-email-wei.w.wang@intel.com>
>>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>>> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>    dgilbert: Dropped kernel header update, fixed up CMD_ID_* name change
>>> ---
>> Hi -- Coverity points out a use-after-free here (CID 1399412):
>>
>>> +static bool get_free_page_hints(VirtIOBalloon *dev)
>>> +{
>>> +    VirtQueueElement *elem;
>>> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>>> +    VirtQueue *vq = dev->free_page_vq;
>>> +
>>> +    while (dev->block_iothread) {
>>> +        qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
>>> +    }
>>> +
>>> +    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
>>> +    if (!elem) {
>>> +        return false;
>>> +    }
>>> +
>>> +    if (elem->out_num) {
>>> +        uint32_t id;
>>> +        size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
>>> +                                 &id, sizeof(id));
>>> +        virtqueue_push(vq, elem, size);
>>> +        g_free(elem);
>> Here we free elem...
>>
>>> +
>>> +        virtio_tswap32s(vdev, &id);
>>> +        if (unlikely(size != sizeof(id))) {
>>> +            virtio_error(vdev, "received an incorrect cmd id");
>>> +            return false;
>>> +        }
>>> +        if (id == dev->free_page_report_cmd_id) {
>>> +            dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
>>> +        } else {
>>> +            /*
>>> +             * Stop the optimization only when it has started. This
>>> +             * avoids a stale stop sign for the previous command.
>>> +             */
>>> +            if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
>>> +                dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
>>> +            }
>>> +        }
>>> +    }
>>> +
>>> +    if (elem->in_num) {
>> ...but we can fall through here and try to dereference elem...
>>
>>> +        if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
>>> +            qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
>>> +                                      elem->in_sg[0].iov_len);
>>> +        }
>>> +        virtqueue_push(vq, elem, 1);
>>> +        g_free(elem);
>> ...and then free it again.
> OK, so the question here is:
>    Is it allowed to have both in and out in the same queue element; if
> it's not then we need to error the device.
>    If it is allowed then we need to fix up the out_num case.
>

I think it is allowed. From virtqueue_pop, an elem could consist of 
several inbufs and outbufs.

Probably we could just delay the free till the end of this 
function..will post the fix patch for review soon.

Best,
Wei