[v1] vhost-vdpa: cache Virtio states

[RFC PATCH] vhost-vdpa: cache Virtio states

Posted by Shao-Chien Chiang 1 year, 1 month ago

Repetitive ioctls makes vdpa devices initialization and startup slow.
This patch is to cache Virtio status, features, and config.
Testing with vdpa-sim-net as my vdpa device, the numbers of ioctl is 
reduced from 47 to 37.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1579

Signed-off-by: Shao-Chien Chiang <ray90514@gmail.com>
---
 hw/virtio/vhost-vdpa.c         | 44 +++++++++++++++++++++++-----------
 include/hw/virtio/vhost-vdpa.h |  3 +++
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index bc6bad23d5..1fccd151cf 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -343,21 +343,17 @@ static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
     int ret;
 
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
-
     ret = ioctl(fd, request, arg);
     return ret < 0 ? -errno : ret;
 }
 
 static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
 {
-    uint8_t s;
+    struct vhost_vdpa *v = dev->opaque;
+    uint8_t s = v->status;
     int ret;
 
     trace_vhost_vdpa_add_status(dev, status);
-    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
-    if (ret < 0) {
-        return ret;
-    }
 
     s |= status;
 
@@ -374,6 +370,7 @@ static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
     if (!(s & status)) {
         return -EIO;
     }
+    v->status = s;
 
     return 0;
 }
@@ -436,8 +433,15 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
     dev->opaque =  opaque ;
     v->listener = vhost_vdpa_memory_listener;
     v->msg_type = VHOST_IOTLB_MSG_V2;
+    v->config = NULL;
+    v->features = dev->features;
     vhost_vdpa_init_svq(dev, v);
 
+    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &v->status);
+    if (ret) {
+        return ret;
+    }
+
     error_propagate(&dev->migration_blocker, v->migration_blocker);
     if (!vhost_vdpa_first_dev(dev)) {
         return 0;
@@ -456,6 +460,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
             return ret;
         }
         vhost_svq_valid_features(features, &dev->migration_blocker);
+        v->features = features;
     }
 
     /*
@@ -602,6 +607,7 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
     vhost_vdpa_svq_cleanup(dev);
 
     dev->opaque = NULL;
+    g_free(v->config);
 
     return 0;
 }
@@ -718,6 +724,7 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev)
     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
     trace_vhost_vdpa_reset_device(dev, status);
     v->suspended = false;
+    v->status = 0;
     return ret;
 }
 
@@ -767,6 +774,7 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
                                    uint32_t offset, uint32_t size,
                                    uint32_t flags)
 {
+    struct vhost_vdpa *v = dev->opaque;
     struct vhost_vdpa_config *config;
     int ret;
     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
@@ -776,6 +784,10 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
     config->off = offset;
     config->len = size;
     memcpy(config->buf, data, size);
+    if (v->config != NULL) {
+        assert(size + offset <= v->config->len);
+        memcpy(v->config->buf + offset, data, size);
+    }
     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) &&
         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
         vhost_vdpa_dump_config(dev, data, size);
@@ -788,17 +800,19 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
                                    uint32_t config_len, Error **errp)
 {
-    struct vhost_vdpa_config *v_config;
+    struct vhost_vdpa *v = dev->opaque;
     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
     int ret;
 
     trace_vhost_vdpa_get_config(dev, config, config_len);
-    v_config = g_malloc(config_len + config_size);
-    v_config->len = config_len;
-    v_config->off = 0;
-    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
-    memcpy(config, v_config->buf, config_len);
-    g_free(v_config);
+    if (v->config == NULL) {
+        v->config = g_malloc(config_len + config_size);
+        v->config->len = config_len;
+        v->config->off = 0;
+        ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v->config);
+    }
+    assert(config_len <= v->config->len);
+    memcpy(config, v->config->buf, config_len);
     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) &&
         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
         vhost_vdpa_dump_config(dev, config, config_len);
@@ -1294,8 +1308,10 @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
 static int vhost_vdpa_get_features(struct vhost_dev *dev,
                                      uint64_t *features)
 {
-    int ret = vhost_vdpa_get_dev_features(dev, features);
+    struct vhost_vdpa *v = dev->opaque;
+    int ret = 0;
 
+    *features = v->features;
     if (ret == 0) {
         /* Add SVQ logging capabilities */
         *features |= BIT_ULL(VHOST_F_LOG_ALL);
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index c278a2a8de..c1505a21ec 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -39,6 +39,9 @@ typedef struct vhost_vdpa {
     MemoryListener listener;
     struct vhost_vdpa_iova_range iova_range;
     uint64_t acked_features;
+    uint64_t features;
+    uint8_t status;
+    struct vhost_vdpa_config *config;
     bool shadow_vqs_enabled;
     /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */
     bool shadow_data;
-- 
2.25.1

[RFC PATCH v1 0/2] vhost-vdpa: cache Virtio states

Posted by Shao-Chien Chiang 1 year ago

Repetitive ioctls makes vdpa devices initialization and startup slow.
This patch series is to cache Virtio status, features, and config.
For each latency test, I use vdpa-sim-net as my vdpa device.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1579

Shao-Chien Chiang (2):
  vhost-vdpa: cache device status and features
  vhost-vdpa: cache device config

 hw/virtio/vhost-vdpa.c         | 60 ++++++++++++++++++++++++++--------
 include/hw/virtio/vhost-vdpa.h |  4 +++
 2 files changed, 50 insertions(+), 14 deletions(-)

-- 
2.25.1

Re: [RFC PATCH v1 0/2] vhost-vdpa: cache Virtio states

Posted by Shao-Chien Chiang 1 year ago

Hi,

I found a problem about cache.

If there are several devices operating the same backend device, the
cache might be inconsistent.

I think we could handle this by checking if a device is the first
device, but I'm not sure it will be feasible.

Is there any better approach to this problem?

Thank you!

Re: [RFC PATCH v1 0/2] vhost-vdpa: cache Virtio states

Posted by Eugenio Perez Martin 1 year ago

On Sat, Apr 22, 2023 at 8:39 AM Shao-Chien Chiang <ray90514@gmail.com> wrote:
>
> Hi,
>
> I found a problem about cache.
>
> If there are several devices operating the same backend device, the
> cache might be inconsistent.
>

Hi Shao-Chien,

What do you mean by "several devices operating the same backend
device". The guest only sees "one virtio device per vhost-vdpa backend
device".

Thanks!

> I think we could handle this by checking if a device is the first
> device, but I'm not sure it will be feasible.
>
> Is there any better approach to this problem?
>
> Thank you!
>

Re: [RFC PATCH v1 0/2] vhost-vdpa: cache Virtio states

Posted by Shao-Chien Chiang 1 year ago

Hi,
The "devices" means there are multiple vhost_dev.
After further research, I found the problem only happens in status.
Vhot-vdpa layer set status ACKNOWLEDGE,  DRIVER, and FEATURES_OK
only once by checking vhost_vdpa_first_dev().
However, it set status DRIVER_OK only once by checking if the device
is the last,
whose code is dev->vq_index + dev->nvqs != dev->vq_index_end in
vhost_vdpa_dev_start().
I think these are the causes of the problem.
Thank you!

On Mon, Apr 24, 2023 at 3:04 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Sat, Apr 22, 2023 at 8:39 AM Shao-Chien Chiang <ray90514@gmail.com> wrote:
> >
> > Hi,
> >
> > I found a problem about cache.
> >
> > If there are several devices operating the same backend device, the
> > cache might be inconsistent.
> >
>
> Hi Shao-Chien,
>
> What do you mean by "several devices operating the same backend
> device". The guest only sees "one virtio device per vhost-vdpa backend
> device".
>
> Thanks!
>
> > I think we could handle this by checking if a device is the first
> > device, but I'm not sure it will be feasible.
> >
> > Is there any better approach to this problem?
> >
> > Thank you!
> >
>

Re: [RFC PATCH v1 0/2] vhost-vdpa: cache Virtio states

Posted by Eugenio Perez Martin 1 year ago

On Tue, Apr 18, 2023 at 3:22 PM Shao-Chien Chiang <ray90514@gmail.com> wrote:
>
> Repetitive ioctls makes vdpa devices initialization and startup slow.
> This patch series is to cache Virtio status, features, and config.
> For each latency test, I use vdpa-sim-net as my vdpa device.
>

This one should be version 2.
* Please don't send it as a reply to the previous version, as it
confuses the tooling. I've been there myself :).
* Please include a changelog with the updates between the different
versions of the series, like:
v2: Add latency profiled numbers with virtio_sim_net.

CCing Jason.

Thanks!

> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1579
>
> Shao-Chien Chiang (2):
>   vhost-vdpa: cache device status and features
>   vhost-vdpa: cache device config
>
>  hw/virtio/vhost-vdpa.c         | 60 ++++++++++++++++++++++++++--------
>  include/hw/virtio/vhost-vdpa.h |  4 +++
>  2 files changed, 50 insertions(+), 14 deletions(-)
>
> --
> 2.25.1
>
>

[RFC PATCH v1 1/2] vhost-vdpa: cache device status and features

Posted by Shao-Chien Chiang 1 year ago

After caching the device status and features, the latency is reduced by 0.059 sec.

Signed-off-by: Shao-Chien Chiang <ray90514@gmail.com>
---
 hw/virtio/vhost-vdpa.c         | 16 ++++++++++------
 include/hw/virtio/vhost-vdpa.h |  2 ++
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index bc6bad23d5..ccde4c7040 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -350,14 +350,11 @@ static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
 
 static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
 {
-    uint8_t s;
+    struct vhost_vdpa *v = dev->opaque;
+    uint8_t s = v->status;
     int ret;
 
     trace_vhost_vdpa_add_status(dev, status);
-    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
-    if (ret < 0) {
-        return ret;
-    }
 
     s |= status;
 
@@ -374,6 +371,7 @@ static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
     if (!(s & status)) {
         return -EIO;
     }
+    v->status = s;
 
     return 0;
 }
@@ -436,6 +434,8 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
     dev->opaque =  opaque ;
     v->listener = vhost_vdpa_memory_listener;
     v->msg_type = VHOST_IOTLB_MSG_V2;
+    v->status = 0;
+    v->features = dev->features;
     vhost_vdpa_init_svq(dev, v);
 
     error_propagate(&dev->migration_blocker, v->migration_blocker);
@@ -456,6 +456,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
             return ret;
         }
         vhost_svq_valid_features(features, &dev->migration_blocker);
+        v->features = features;
     }
 
     /*
@@ -718,6 +719,7 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev)
     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
     trace_vhost_vdpa_reset_device(dev, status);
     v->suspended = false;
+    v->status = 0;
     return ret;
 }
 
@@ -1294,8 +1296,10 @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
 static int vhost_vdpa_get_features(struct vhost_dev *dev,
                                      uint64_t *features)
 {
-    int ret = vhost_vdpa_get_dev_features(dev, features);
+    struct vhost_vdpa *v = dev->opaque;
+    int ret = 0;
 
+    *features = v->features;
     if (ret == 0) {
         /* Add SVQ logging capabilities */
         *features |= BIT_ULL(VHOST_F_LOG_ALL);
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index c278a2a8de..d563630cc9 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -39,6 +39,8 @@ typedef struct vhost_vdpa {
     MemoryListener listener;
     struct vhost_vdpa_iova_range iova_range;
     uint64_t acked_features;
+    uint64_t features;
+    uint8_t status;
     bool shadow_vqs_enabled;
     /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */
     bool shadow_data;
-- 
2.25.1

Re: [RFC PATCH v1 1/2] vhost-vdpa: cache device status and features

Posted by Eugenio Perez Martin 1 year ago

On Tue, Apr 18, 2023 at 3:22 PM Shao-Chien Chiang <ray90514@gmail.com> wrote:
>
> After caching the device status and features, the latency is reduced by 0.059 sec.

Can you add more details? Like:
* Initial and final time.
* Times that you repeated the experiment to reach for valid means etc.

And I think it is worth splitting this patch in two, one with status
and other with features. If we find a regression it may help to track
it down.

Thanks!

>
> Signed-off-by: Shao-Chien Chiang <ray90514@gmail.com>
> ---
>  hw/virtio/vhost-vdpa.c         | 16 ++++++++++------
>  include/hw/virtio/vhost-vdpa.h |  2 ++
>  2 files changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index bc6bad23d5..ccde4c7040 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -350,14 +350,11 @@ static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
>
>  static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
>  {
> -    uint8_t s;
> +    struct vhost_vdpa *v = dev->opaque;
> +    uint8_t s = v->status;
>      int ret;
>
>      trace_vhost_vdpa_add_status(dev, status);
> -    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
> -    if (ret < 0) {
> -        return ret;
> -    }
>
>      s |= status;
>
> @@ -374,6 +371,7 @@ static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
>      if (!(s & status)) {
>          return -EIO;
>      }
> +    v->status = s;
>
>      return 0;
>  }
> @@ -436,6 +434,8 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
>      dev->opaque =  opaque ;
>      v->listener = vhost_vdpa_memory_listener;
>      v->msg_type = VHOST_IOTLB_MSG_V2;
> +    v->status = 0;
> +    v->features = dev->features;
>      vhost_vdpa_init_svq(dev, v);
>
>      error_propagate(&dev->migration_blocker, v->migration_blocker);
> @@ -456,6 +456,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
>              return ret;
>          }
>          vhost_svq_valid_features(features, &dev->migration_blocker);
> +        v->features = features;
>      }
>
>      /*
> @@ -718,6 +719,7 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev)
>      ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
>      trace_vhost_vdpa_reset_device(dev, status);
>      v->suspended = false;
> +    v->status = 0;
>      return ret;
>  }
>
> @@ -1294,8 +1296,10 @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
>  static int vhost_vdpa_get_features(struct vhost_dev *dev,
>                                       uint64_t *features)
>  {
> -    int ret = vhost_vdpa_get_dev_features(dev, features);
> +    struct vhost_vdpa *v = dev->opaque;
> +    int ret = 0;
>
> +    *features = v->features;
>      if (ret == 0) {
>          /* Add SVQ logging capabilities */
>          *features |= BIT_ULL(VHOST_F_LOG_ALL);
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index c278a2a8de..d563630cc9 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -39,6 +39,8 @@ typedef struct vhost_vdpa {
>      MemoryListener listener;
>      struct vhost_vdpa_iova_range iova_range;
>      uint64_t acked_features;
> +    uint64_t features;
> +    uint8_t status;
>      bool shadow_vqs_enabled;
>      /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */
>      bool shadow_data;
> --
> 2.25.1
>
>

[RFC PATCH v1 2/2] vhost-vdpa: cache device config

Posted by Shao-Chien Chiang 1 year ago

The config caching is disabled when starting config interruption.
If we could know whether there is a config interruption, I think we can
invalidate the cache at that time instead of disabling the caching
mechanism. 
After caching the device config, the latency is reduced by 0.066 sec.

Signed-off-by: Shao-Chien Chiang <ray90514@gmail.com>
---
 hw/virtio/vhost-vdpa.c         | 44 +++++++++++++++++++++++++++-------
 include/hw/virtio/vhost-vdpa.h |  2 ++
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index ccde4c7040..92bb09ef4d 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -436,6 +436,8 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
     v->msg_type = VHOST_IOTLB_MSG_V2;
     v->status = 0;
     v->features = dev->features;
+    v->config = NULL;
+    v->config_cache_enabled = true;
     vhost_vdpa_init_svq(dev, v);
 
     error_propagate(&dev->migration_blocker, v->migration_blocker);
@@ -748,8 +750,16 @@ static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
 static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
                                        int fd)
 {
+    struct vhost_vdpa *v = dev->opaque;
+    int ret;
+
     trace_vhost_vdpa_set_config_call(dev, fd);
-    return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd);
+    ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd);
+    if (ret == 0) {
+        v->config_cache_enabled = false;
+    }
+
+    return ret;
 }
 
 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config,
@@ -769,6 +779,7 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
                                    uint32_t offset, uint32_t size,
                                    uint32_t flags)
 {
+    struct vhost_vdpa *v = dev->opaque;
     struct vhost_vdpa_config *config;
     int ret;
     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
@@ -783,6 +794,11 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
         vhost_vdpa_dump_config(dev, data, size);
     }
     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config);
+    if (v->config_cache_enabled && v->config != NULL) {
+        if (ret == 0) {
+            memcpy(v->config->buf + offset, data, size);
+        }
+    }
     g_free(config);
     return ret;
 }
@@ -790,17 +806,29 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
                                    uint32_t config_len, Error **errp)
 {
-    struct vhost_vdpa_config *v_config;
+    struct vhost_vdpa *v = dev->opaque;
     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
     int ret;
 
     trace_vhost_vdpa_get_config(dev, config, config_len);
-    v_config = g_malloc(config_len + config_size);
-    v_config->len = config_len;
-    v_config->off = 0;
-    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
-    memcpy(config, v_config->buf, config_len);
-    g_free(v_config);
+    if (v->config_cache_enabled && v->config != NULL) {
+        if (config_len <= v->config->len) {
+            memcpy(config, v->config->buf, config_len);
+            ret = 0;
+        } else {
+            ret = -EINVAL;
+        }
+    } else {
+        v->config = g_malloc(config_len + config_size);
+        v->config->len = config_len;
+        v->config->off = 0;
+        ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v->config);
+        memcpy(config, v->config->buf, config_len);
+        if (!v->config_cache_enabled) {
+            g_free(v->config);
+            v->config = NULL;
+        }
+    }
     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) &&
         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
         vhost_vdpa_dump_config(dev, config, config_len);
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index d563630cc9..60374785fd 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -41,6 +41,8 @@ typedef struct vhost_vdpa {
     uint64_t acked_features;
     uint64_t features;
     uint8_t status;
+    struct vhost_vdpa_config *config;
+    bool config_cache_enabled;
     bool shadow_vqs_enabled;
     /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */
     bool shadow_data;
-- 
2.25.1

Re: [RFC PATCH v1 2/2] vhost-vdpa: cache device config

Posted by Eugenio Perez Martin 1 year ago

On Tue, Apr 18, 2023 at 3:22 PM Shao-Chien Chiang <ray90514@gmail.com> wrote:
>
> The config caching is disabled when starting config interruption.
> If we could know whether there is a config interruption, I think we can
> invalidate the cache at that time instead of disabling the caching
> mechanism.
> After caching the device config, the latency is reduced by 0.066 sec.
>
> Signed-off-by: Shao-Chien Chiang <ray90514@gmail.com>
> ---
>  hw/virtio/vhost-vdpa.c         | 44 +++++++++++++++++++++++++++-------
>  include/hw/virtio/vhost-vdpa.h |  2 ++
>  2 files changed, 38 insertions(+), 8 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index ccde4c7040..92bb09ef4d 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -436,6 +436,8 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
>      v->msg_type = VHOST_IOTLB_MSG_V2;
>      v->status = 0;
>      v->features = dev->features;
> +    v->config = NULL;
> +    v->config_cache_enabled = true;
>      vhost_vdpa_init_svq(dev, v);
>
>      error_propagate(&dev->migration_blocker, v->migration_blocker);
> @@ -748,8 +750,16 @@ static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
>  static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
>                                         int fd)
>  {
> +    struct vhost_vdpa *v = dev->opaque;
> +    int ret;
> +
>      trace_vhost_vdpa_set_config_call(dev, fd);
> -    return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd);
> +    ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd);
> +    if (ret == 0) {
> +        v->config_cache_enabled = false;

The lifecycle of the vhost_vdpa device is:
init -> start -> stop -> start -> .... -> cleanup.

In other words, it is initialized only once at qemu startup but it can
be started & stopped many times. You can check if the device is
stopping if the fd is -1. Other values indicate the device is starting
or that the notifier is being masked, we must disable the cache in
both cases.

You can force this cycle if you rmmod the virtio_net module in the
guest and then modprobe it again. However, maybe it only accesses the
config once in this situation. If that is the case, I think it is
worth keeping this code and putting a comment explaining it.

Thanks!

> +    }
> +
> +    return ret;
>  }
>
>  static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config,
> @@ -769,6 +779,7 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
>                                     uint32_t offset, uint32_t size,
>                                     uint32_t flags)
>  {
> +    struct vhost_vdpa *v = dev->opaque;
>      struct vhost_vdpa_config *config;
>      int ret;
>      unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
> @@ -783,6 +794,11 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
>          vhost_vdpa_dump_config(dev, data, size);
>      }
>      ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config);
> +    if (v->config_cache_enabled && v->config != NULL) {
> +        if (ret == 0) {
> +            memcpy(v->config->buf + offset, data, size);
> +        }
> +    }
>      g_free(config);
>      return ret;
>  }
> @@ -790,17 +806,29 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
>  static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
>                                     uint32_t config_len, Error **errp)
>  {
> -    struct vhost_vdpa_config *v_config;
> +    struct vhost_vdpa *v = dev->opaque;
>      unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
>      int ret;
>
>      trace_vhost_vdpa_get_config(dev, config, config_len);
> -    v_config = g_malloc(config_len + config_size);
> -    v_config->len = config_len;
> -    v_config->off = 0;
> -    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
> -    memcpy(config, v_config->buf, config_len);
> -    g_free(v_config);
> +    if (v->config_cache_enabled && v->config != NULL) {
> +        if (config_len <= v->config->len) {
> +            memcpy(config, v->config->buf, config_len);
> +            ret = 0;
> +        } else {
> +            ret = -EINVAL;
> +        }
> +    } else {
> +        v->config = g_malloc(config_len + config_size);

This may not be the whole size of the config. The size is
sizeof(struct virtio_net_config), and it should be set to 0 with
g_malloc0.

> +        v->config->len = config_len;
> +        v->config->off = 0;
> +        ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v->config);
> +        memcpy(config, v->config->buf, config_len);
> +        if (!v->config_cache_enabled) {
> +            g_free(v->config);
> +            v->config = NULL;

Maybe it is worth freeing it at vhost_vdpa_set_config_call?

Thanks!

> +        }
> +    }
>      if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) &&
>          trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
>          vhost_vdpa_dump_config(dev, config, config_len);
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index d563630cc9..60374785fd 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -41,6 +41,8 @@ typedef struct vhost_vdpa {
>      uint64_t acked_features;
>      uint64_t features;
>      uint8_t status;
> +    struct vhost_vdpa_config *config;
> +    bool config_cache_enabled;
>      bool shadow_vqs_enabled;
>      /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */
>      bool shadow_data;
> --
> 2.25.1
>
>

Re: [RFC PATCH] vhost-vdpa: cache Virtio states

Posted by Eugenio Perez Martin 1 year, 1 month ago

On Fri, Apr 14, 2023 at 9:26 AM Shao-Chien Chiang <ray90514@gmail.com> wrote:
>
> Repetitive ioctls makes vdpa devices initialization and startup slow.
> This patch is to cache Virtio status, features, and config.
> Testing with vdpa-sim-net as my vdpa device, the numbers of ioctl is
> reduced from 47 to 37.
>

Hi Shao-Chien,

To know the latency reduction would make it easier to evaluate them. I
think it would be enough with printing the timestamp from the first
access to the last one in the initialization.

Also, could you split the series in two, one caching the config
accesses and other one caching the status one? That would ease the
review process and the evaluation of their inclusion based on the
profiling.

Finally this needs to take into account the configure interrupt. You
can see qemu commit range
ee3b8dc6cc496ba7f4e27aed4493275c706a7942..1680542862edd963e6380dd4121a5e85df55581f
for more information.

I think we can go two ways:
* Config interrupt invalidates the cached config, so we set it to NULL here.
* We don't cache config as long as vdpa config interrupt is enabled.

More minor comments inline.

> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1579
>
> Signed-off-by: Shao-Chien Chiang <ray90514@gmail.com>
> ---
>  hw/virtio/vhost-vdpa.c         | 44 +++++++++++++++++++++++-----------
>  include/hw/virtio/vhost-vdpa.h |  3 +++
>  2 files changed, 33 insertions(+), 14 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index bc6bad23d5..1fccd151cf 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -343,21 +343,17 @@ static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
>      int ret;
>
>      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> -

Please leave out this line deletion. If you think it is needed we can
address it in another series.

>      ret = ioctl(fd, request, arg);
>      return ret < 0 ? -errno : ret;
>  }
>
>  static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
>  {
> -    uint8_t s;
> +    struct vhost_vdpa *v = dev->opaque;
> +    uint8_t s = v->status;
>      int ret;
>
>      trace_vhost_vdpa_add_status(dev, status);
> -    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
> -    if (ret < 0) {
> -        return ret;
> -    }
>
>      s |= status;
>
> @@ -374,6 +370,7 @@ static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
>      if (!(s & status)) {
>          return -EIO;
>      }
> +    v->status = s;
>
>      return 0;
>  }
> @@ -436,8 +433,15 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
>      dev->opaque =  opaque ;
>      v->listener = vhost_vdpa_memory_listener;
>      v->msg_type = VHOST_IOTLB_MSG_V2;
> +    v->config = NULL;
> +    v->features = dev->features;
>      vhost_vdpa_init_svq(dev, v);
>
> +    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &v->status);
> +    if (ret) {
> +        return ret;
> +    }
> +

This first GET_STATUS is not needed, we can assume the first one is 0.

>      error_propagate(&dev->migration_blocker, v->migration_blocker);
>      if (!vhost_vdpa_first_dev(dev)) {
>          return 0;
> @@ -456,6 +460,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
>              return ret;
>          }
>          vhost_svq_valid_features(features, &dev->migration_blocker);
> +        v->features = features;
>      }
>
>      /*
> @@ -602,6 +607,7 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
>      vhost_vdpa_svq_cleanup(dev);
>
>      dev->opaque = NULL;
> +    g_free(v->config);
>
>      return 0;
>  }
> @@ -718,6 +724,7 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev)
>      ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
>      trace_vhost_vdpa_reset_device(dev, status);
>      v->suspended = false;
> +    v->status = 0;
>      return ret;
>  }
>
> @@ -767,6 +774,7 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
>                                     uint32_t offset, uint32_t size,
>                                     uint32_t flags)
>  {
> +    struct vhost_vdpa *v = dev->opaque;
>      struct vhost_vdpa_config *config;
>      int ret;
>      unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
> @@ -776,6 +784,10 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
>      config->off = offset;
>      config->len = size;
>      memcpy(config->buf, data, size);
> +    if (v->config != NULL) {
> +        assert(size + offset <= v->config->len);

I think the guest is able to trigger this assertion, we should replace
either by the same error returned from the kernel or simply to make
the call to the kernel and let it solve the issue.

> +        memcpy(v->config->buf + offset, data, size);
> +    }
>      if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) &&
>          trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
>          vhost_vdpa_dump_config(dev, data, size);
> @@ -788,17 +800,19 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
>  static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
>                                     uint32_t config_len, Error **errp)
>  {
> -    struct vhost_vdpa_config *v_config;
> +    struct vhost_vdpa *v = dev->opaque;
>      unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
>      int ret;
>
>      trace_vhost_vdpa_get_config(dev, config, config_len);
> -    v_config = g_malloc(config_len + config_size);
> -    v_config->len = config_len;
> -    v_config->off = 0;
> -    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
> -    memcpy(config, v_config->buf, config_len);
> -    g_free(v_config);
> +    if (v->config == NULL) {
> +        v->config = g_malloc(config_len + config_size);
> +        v->config->len = config_len;
> +        v->config->off = 0;
> +        ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v->config);
> +    }
> +    assert(config_len <= v->config->len);

Same here, I think the guest is able to trigger this assertion.

Thanks!

> +    memcpy(config, v->config->buf, config_len);
>      if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) &&
>          trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
>          vhost_vdpa_dump_config(dev, config, config_len);
> @@ -1294,8 +1308,10 @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
>  static int vhost_vdpa_get_features(struct vhost_dev *dev,
>                                       uint64_t *features)
>  {
> -    int ret = vhost_vdpa_get_dev_features(dev, features);
> +    struct vhost_vdpa *v = dev->opaque;
> +    int ret = 0;
>
> +    *features = v->features;
>      if (ret == 0) {
>          /* Add SVQ logging capabilities */
>          *features |= BIT_ULL(VHOST_F_LOG_ALL);
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index c278a2a8de..c1505a21ec 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -39,6 +39,9 @@ typedef struct vhost_vdpa {
>      MemoryListener listener;
>      struct vhost_vdpa_iova_range iova_range;
>      uint64_t acked_features;
> +    uint64_t features;
> +    uint8_t status;
> +    struct vhost_vdpa_config *config;
>      bool shadow_vqs_enabled;
>      /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */
>      bool shadow_data;
> --
> 2.25.1
>
>