[RFC PATCH 5/5] virtio-net: Introduce LM early load

Yajun Wu posted 5 patches 11 months, 4 weeks ago
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Jason Wang <jasowang@redhat.com>
[RFC PATCH 5/5] virtio-net: Introduce LM early load
Posted by Yajun Wu 11 months, 4 weeks ago
Register a new vmstate for virtio-net with an early_setup flag to send
the device state during migration setup.

This can reduce the migration downtime of a virtio-net device with a
vhost-user backend.

This feature is disabled by default and can be enabled by setting the
"x-early-migration" device property to on.

Signed-off-by: Yajun Wu <yajunw@nvidia.com>
Reviewed-by: Avihai Horon <avihaih@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
 hw/net/trace-events            |   1 +
 hw/net/virtio-net.c            | 100 +++++++++++++++++++++++++++++++++
 include/hw/virtio/virtio-net.h |   1 +
 3 files changed, 102 insertions(+)

diff --git a/hw/net/trace-events b/hw/net/trace-events
index 6b5ba669a2..ec89229044 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -399,6 +399,7 @@ virtio_net_post_load_device(void)
 virtio_net_rss_disable(void)
 virtio_net_rss_error(const char *msg, uint32_t value) "%s, value 0x%08x"
 virtio_net_rss_enable(uint32_t p1, uint16_t p2, uint8_t p3) "hashes 0x%x, table of %d, key of %d"
+virtio_net_load_early_setup(void) ""
 
 # tulip.c
 tulip_reg_write(uint64_t addr, const char *name, int size, uint64_t val) "addr 0x%02"PRIx64" (%s) size %d value 0x%08"PRIx64
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7102ec4817..d0b0cc2ffe 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -46,6 +46,7 @@
 #include "net_rx_pkt.h"
 #include "hw/virtio/vhost.h"
 #include "sysemu/qtest.h"
+#include "sysemu/runstate.h"
 
 #define VIRTIO_NET_VM_VERSION    11
 
@@ -3568,6 +3569,95 @@ static bool failover_hide_primary_device(DeviceListener *listener,
     return qatomic_read(&n->failover_primary_hidden);
 }
 
+static int virtio_net_load_early_setup(void *opaque, int version_id)
+{
+    VirtIONet *n = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(n);
+    NetClientState *nc = qemu_get_queue(n->nic);
+    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
+    int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
+        n->max_ncs - n->max_queue_pairs : 0;
+    VHostNetState *net;
+    int r;
+
+    assert(nc->peer);
+    assert(nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER);
+
+    net = get_vhost_net(nc->peer);
+    assert(net);
+    assert(net->dev.vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
+
+    trace_virtio_net_load_early_setup();
+
+    /* backend should support presetup */
+    r = vhost_dev_set_presetup_state(&net->dev, true);
+    if (r < 0) {
+        error_report("Start presetup device fail: %d", r);
+        return r;
+    }
+
+    if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
+        r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
+        if (r < 0) {
+            error_report("%uBytes MTU not supported by the backend",
+                         n->net_conf.mtu);
+            goto error;
+        }
+    }
+
+    r = vhost_net_presetup(vdev, n->nic->ncs, queue_pairs, cvq);
+    if (r < 0) {
+        error_report("Presetup device fail: %d", r);
+        goto error;
+    }
+
+    r = vhost_dev_set_presetup_state(&net->dev, false);
+    if (r < 0) {
+        error_report("Finish presetup device fail: %d", r);
+        return r;
+    }
+    return 0;
+
+error:
+    vhost_dev_set_presetup_state(&net->dev, false);
+    return r;
+}
+
+static bool virtio_net_early_setup_needed(void *opaque)
+{
+    VirtIONet *n = opaque;
+    NetClientState *nc = qemu_get_queue(n->nic);
+    VHostNetState *net = get_vhost_net(nc->peer);
+
+    /*
+     * Presetup aims to reduce live migration downtime by sync device
+     * status in setup stage. So only do presetup when source VM is in
+     * running state.
+     */
+    if (runstate_is_running() &&
+        nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER &&
+        net->dev.vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER &&
+        !vhost_dev_has_iommu(&net->dev) &&
+        n->vhost_started &&
+        n->status & VIRTIO_NET_S_LINK_UP) {
+        return true;
+    }
+    return false;
+}
+
+static const VMStateDescription vmstate_virtio_net_early = {
+    .name = "virtio-net-early",
+    .minimum_version_id = VIRTIO_NET_VM_VERSION,
+    .version_id = VIRTIO_NET_VM_VERSION,
+    .fields = (VMStateField[]) {
+        VMSTATE_EARLY_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+    .early_setup = true,
+    .post_load = virtio_net_load_early_setup,
+    .needed = virtio_net_early_setup_needed,
+};
+
 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -3743,6 +3833,11 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
         virtio_net_load_ebpf(n);
     }
+
+    if (n->early_migration) {
+        vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY,
+                         &vmstate_virtio_net_early, n);
+    }
 }
 
 static void virtio_net_device_unrealize(DeviceState *dev)
@@ -3787,6 +3882,10 @@ static void virtio_net_device_unrealize(DeviceState *dev)
     g_free(n->rss_data.indirections_table);
     net_rx_pkt_uninit(n->rx_pkt);
     virtio_cleanup(vdev);
+
+    if (n->early_migration) {
+        vmstate_unregister(NULL, &vmstate_virtio_net_early, n);
+    }
 }
 
 static void virtio_net_instance_init(Object *obj)
@@ -3922,6 +4021,7 @@ static Property virtio_net_properties[] = {
     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
+    DEFINE_PROP_BOOL("x-early-migration", VirtIONet, early_migration, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index e07a723027..9e6f90b46f 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -212,6 +212,7 @@ struct VirtIONet {
     /* primary failover device is hidden*/
     bool failover_primary_hidden;
     bool failover;
+    bool early_migration;
     DeviceListener primary_listener;
     QDict *primary_opts;
     bool primary_opts_from_json;
-- 
2.27.0
Re: [RFC PATCH 5/5] virtio-net: Introduce LM early load
Posted by Eugenio Perez Martin 8 months, 3 weeks ago
On Mon, Sep 18, 2023 at 6:51 AM Yajun Wu <yajunw@nvidia.com> wrote:
>
> Register a new vmstate for virtio-net with an early_setup flag to send
> the device state during migration setup.
>
> This can reduce the migration downtime of a virtio-net device with a
> vhost-user backend.
>
> This feature is disabled by default and can be enabled by setting the
> "x-early-migration" device property to on.
>
> Signed-off-by: Yajun Wu <yajunw@nvidia.com>
> Reviewed-by: Avihai Horon <avihaih@nvidia.com>
> Reviewed-by: Jiri Pirko <jiri@nvidia.com>
> ---
>  hw/net/trace-events            |   1 +
>  hw/net/virtio-net.c            | 100 +++++++++++++++++++++++++++++++++
>  include/hw/virtio/virtio-net.h |   1 +
>  3 files changed, 102 insertions(+)
>
> diff --git a/hw/net/trace-events b/hw/net/trace-events
> index 6b5ba669a2..ec89229044 100644
> --- a/hw/net/trace-events
> +++ b/hw/net/trace-events
> @@ -399,6 +399,7 @@ virtio_net_post_load_device(void)
>  virtio_net_rss_disable(void)
>  virtio_net_rss_error(const char *msg, uint32_t value) "%s, value 0x%08x"
>  virtio_net_rss_enable(uint32_t p1, uint16_t p2, uint8_t p3) "hashes 0x%x, table of %d, key of %d"
> +virtio_net_load_early_setup(void) ""
>
>  # tulip.c
>  tulip_reg_write(uint64_t addr, const char *name, int size, uint64_t val) "addr 0x%02"PRIx64" (%s) size %d value 0x%08"PRIx64
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 7102ec4817..d0b0cc2ffe 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -46,6 +46,7 @@
>  #include "net_rx_pkt.h"
>  #include "hw/virtio/vhost.h"
>  #include "sysemu/qtest.h"
> +#include "sysemu/runstate.h"
>
>  #define VIRTIO_NET_VM_VERSION    11
>
> @@ -3568,6 +3569,95 @@ static bool failover_hide_primary_device(DeviceListener *listener,
>      return qatomic_read(&n->failover_primary_hidden);
>  }
>
> +static int virtio_net_load_early_setup(void *opaque, int version_id)
> +{
> +    VirtIONet *n = opaque;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(n);
> +    NetClientState *nc = qemu_get_queue(n->nic);
> +    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> +    int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
> +        n->max_ncs - n->max_queue_pairs : 0;
> +    VHostNetState *net;
> +    int r;
> +
> +    assert(nc->peer);
> +    assert(nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER);
> +
> +    net = get_vhost_net(nc->peer);
> +    assert(net);
> +    assert(net->dev.vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> +
> +    trace_virtio_net_load_early_setup();
> +
> +    /* backend should support presetup */
> +    r = vhost_dev_set_presetup_state(&net->dev, true);
> +    if (r < 0) {
> +        error_report("Start presetup device fail: %d", r);
> +        return r;
> +    }
> +
> +    if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
> +        r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
> +        if (r < 0) {
> +            error_report("%uBytes MTU not supported by the backend",
> +                         n->net_conf.mtu);
> +            goto error;
> +        }
> +    }
> +
> +    r = vhost_net_presetup(vdev, n->nic->ncs, queue_pairs, cvq);
> +    if (r < 0) {
> +        error_report("Presetup device fail: %d", r);
> +        goto error;
> +    }
> +
> +    r = vhost_dev_set_presetup_state(&net->dev, false);

I guess this is to signal the backend the end of the presetup
information, isn't it?

Can we do it in the vhost-user backend itself? You can check the queue
a function is running against with dev->vq_index and
dev->vq_index_end.

You can see an example of checking if the function is running at the
first device with at vhost_user_backend_init, that checks
dev->vq_index == 0.

You can see an example of vq_index_end at vhost_user_dev_start, that
only add the status if it runs in the last device. In this case, the
check is (dev->vq_index + dev->nvqs != dev->vq_index_end).

> +    if (r < 0) {
> +        error_report("Finish presetup device fail: %d", r);
> +        return r;
> +    }
> +    return 0;
> +
> +error:
> +    vhost_dev_set_presetup_state(&net->dev, false);
> +    return r;
> +}
> +
> +static bool virtio_net_early_setup_needed(void *opaque)
> +{
> +    VirtIONet *n = opaque;
> +    NetClientState *nc = qemu_get_queue(n->nic);
> +    VHostNetState *net = get_vhost_net(nc->peer);
> +
> +    /*
> +     * Presetup aims to reduce live migration downtime by sync device
> +     * status in setup stage. So only do presetup when source VM is in
> +     * running state.
> +     */
> +    if (runstate_is_running() &&
> +        nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER &&
> +        net->dev.vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER &&
> +        !vhost_dev_has_iommu(&net->dev) &&
> +        n->vhost_started &&
> +        n->status & VIRTIO_NET_S_LINK_UP) {
> +        return true;
> +    }
> +    return false;
> +}

I think it is better not to check for vhost-user here, as:
* All backends can potentially benefit from this.
* Source running vhost-user does not mean the destination is running
vhost-user too.

Another nitpick, you can directly "return runstate_is_running() &&
...;". But I'm fine with this version too.

> +
> +static const VMStateDescription vmstate_virtio_net_early = {
> +    .name = "virtio-net-early",
> +    .minimum_version_id = VIRTIO_NET_VM_VERSION,
> +    .version_id = VIRTIO_NET_VM_VERSION,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_EARLY_VIRTIO_DEVICE,
> +        VMSTATE_END_OF_LIST()
> +    },
> +    .early_setup = true,
> +    .post_load = virtio_net_load_early_setup,
> +    .needed = virtio_net_early_setup_needed,
> +};
> +
>  static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -3743,6 +3833,11 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>      if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
>          virtio_net_load_ebpf(n);
>      }
> +
> +    if (n->early_migration) {
> +        vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY,
> +                         &vmstate_virtio_net_early, n);
> +    }
>  }
>
>  static void virtio_net_device_unrealize(DeviceState *dev)
> @@ -3787,6 +3882,10 @@ static void virtio_net_device_unrealize(DeviceState *dev)
>      g_free(n->rss_data.indirections_table);
>      net_rx_pkt_uninit(n->rx_pkt);
>      virtio_cleanup(vdev);
> +
> +    if (n->early_migration) {
> +        vmstate_unregister(NULL, &vmstate_virtio_net_early, n);
> +    }
>  }
>
>  static void virtio_net_instance_init(Object *obj)
> @@ -3922,6 +4021,7 @@ static Property virtio_net_properties[] = {
>      DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
>      DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
>      DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
> +    DEFINE_PROP_BOOL("x-early-migration", VirtIONet, early_migration, false),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>
> diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
> index e07a723027..9e6f90b46f 100644
> --- a/include/hw/virtio/virtio-net.h
> +++ b/include/hw/virtio/virtio-net.h
> @@ -212,6 +212,7 @@ struct VirtIONet {
>      /* primary failover device is hidden*/
>      bool failover_primary_hidden;
>      bool failover;
> +    bool early_migration;
>      DeviceListener primary_listener;
>      QDict *primary_opts;
>      bool primary_opts_from_json;
> --
> 2.27.0
>
>