Isolate control virtqueue in its own group, allowing to intercept control
commands but letting dataplane run totally passthrough to the guest.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
v8:
* Do not allocate iova_tree on net_init_vhost_vdpa if only CVQ is
shadowed. Move the iova_tree handling in this case to
vhost_vdpa_net_cvq_start and vhost_vdpa_net_cvq_stop.
v7:
* Never ask for number of address spaces, just react if isolation is not
possible.
* Return ASID ioctl errors instead of masking them as if the device has
no asid.
* Simplify net_init_vhost_vdpa logic
* Add "if possible" suffix
v6:
* Disable control SVQ if the device does not support it because of
features.
v5:
* Fixing the not adding cvq buffers when x-svq=on is specified.
* Move vring state in vhost_vdpa_get_vring_group instead of using a
parameter.
* Rename VHOST_VDPA_NET_CVQ_PASSTHROUGH to VHOST_VDPA_NET_DATA_ASID
v4:
* Squash vhost_vdpa_cvq_group_is_independent.
* Rebased on last CVQ start series, that allocated CVQ cmd bufs at load
* Do not check for cvq index on vhost_vdpa_net_prepare, we only have one
that callback registered in that NetClientInfo.
v3:
* Make asid related queries print a warning instead of returning an
error and stop the start of qemu.
---
hw/virtio/vhost-vdpa.c | 3 +-
net/vhost-vdpa.c | 106 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 107 insertions(+), 2 deletions(-)
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 8e54c5c0fc..45bb72d359 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -652,7 +652,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
{
uint64_t features;
uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
- 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
+ 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
+ 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
int r;
if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index a1f1e29b7c..bce57fa724 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -102,6 +102,8 @@ static const uint64_t vdpa_svq_device_features =
BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
BIT_ULL(VIRTIO_NET_F_STANDBY);
+#define VHOST_VDPA_NET_CVQ_ASID 1
+
VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
{
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
@@ -259,6 +261,40 @@ static VhostIOVATree *vhost_vdpa_svq_allocate_iova_tree(int vdpa_device_fd)
return vhost_iova_tree_new(iova_range.first, iova_range.last);
}
+static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
+{
+ struct vhost_vring_state state = {
+ .index = vq_index,
+ };
+ int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state);
+
+ if (unlikely(r < 0)) {
+ error_report("Cannot get VQ %u group: %s", vq_index,
+ g_strerror(errno));
+ return r;
+ }
+
+ return state.num;
+}
+
+static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
+ unsigned vq_group,
+ unsigned asid_num)
+{
+ struct vhost_vring_state asid = {
+ .index = vq_group,
+ .num = asid_num,
+ };
+ int r;
+
+ r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid);
+ if (unlikely(r < 0)) {
+ error_report("Can't set vq group %u asid %u, errno=%d (%s)",
+ asid.index, asid.num, errno, g_strerror(errno));
+ }
+ return r;
+}
+
static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
{
VhostIOVATree *tree = v->iova_tree;
@@ -333,11 +369,71 @@ dma_map_err:
static int vhost_vdpa_net_cvq_start(NetClientState *nc)
{
VhostVDPAState *s;
- int r;
+ struct vhost_vdpa *v;
+ uint64_t backend_features;
+ int64_t cvq_group;
+ int cvq_index, r;
assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
s = DO_UPCAST(VhostVDPAState, nc, nc);
+ v = &s->vhost_vdpa;
+
+ v->shadow_data = s->always_svq;
+ v->shadow_vqs_enabled = s->always_svq;
+ s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
+
+ if (s->always_svq) {
+ /* SVQ is already configured for all virtqueues */
+ goto out;
+ }
+
+ /* Backend features are not available in v->dev yet. */
+ r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
+ if (unlikely(r < 0)) {
+ error_report("Cannot get vdpa backend_features: %s(%d)",
+ g_strerror(errno), errno);
+ return -1;
+ }
+ if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) ||
+ !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
+ return 0;
+ }
+
+ /**
+ * Check if all the virtqueues of the virtio device are in a different vq
+ * than the last vq. VQ group of last group passed in cvq_group.
+ */
+ cvq_index = v->dev->vq_index_end - 1;
+ cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index);
+ if (unlikely(cvq_group < 0)) {
+ return cvq_group;
+ }
+ for (int i = 0; i < cvq_index; ++i) {
+ int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i);
+
+ if (unlikely(group < 0)) {
+ return group;
+ }
+
+ if (unlikely(group == cvq_group)) {
+ warn_report(
+ "CVQ %"PRId64" group is the same as VQ %d one (%"PRId64")",
+ cvq_group, i, group);
+ return 0;
+ }
+ }
+
+ r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
+ if (unlikely(r < 0)) {
+ return r;
+ }
+
+ v->iova_tree = vhost_vdpa_svq_allocate_iova_tree(v->device_fd);
+ v->shadow_vqs_enabled = true;
+ s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
+
+out:
if (!s->vhost_vdpa.shadow_vqs_enabled) {
return 0;
}
@@ -366,6 +462,14 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
if (s->vhost_vdpa.shadow_vqs_enabled) {
vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
+ if (!s->always_svq) {
+ /*
+ * If only the CVQ is shadowed we can delete this safely.
+ * If all the VQs are shadows this will be needed by the time the
+ * device is started again to register SVQ vrings and similar.
+ */
+ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
+ }
}
}
--
2.31.1
On Thu, Nov 24, 2022 at 11:52 PM Eugenio Pérez <eperezma@redhat.com> wrote:
>
> Isolate control virtqueue in its own group, allowing to intercept control
> commands but letting dataplane run totally passthrough to the guest.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
> v8:
> * Do not allocate iova_tree on net_init_vhost_vdpa if only CVQ is
> shadowed. Move the iova_tree handling in this case to
> vhost_vdpa_net_cvq_start and vhost_vdpa_net_cvq_stop.
>
> v7:
> * Never ask for number of address spaces, just react if isolation is not
> possible.
> * Return ASID ioctl errors instead of masking them as if the device has
> no asid.
> * Simplify net_init_vhost_vdpa logic
> * Add "if possible" suffix
>
> v6:
> * Disable control SVQ if the device does not support it because of
> features.
>
> v5:
> * Fixing the not adding cvq buffers when x-svq=on is specified.
> * Move vring state in vhost_vdpa_get_vring_group instead of using a
> parameter.
> * Rename VHOST_VDPA_NET_CVQ_PASSTHROUGH to VHOST_VDPA_NET_DATA_ASID
>
> v4:
> * Squash vhost_vdpa_cvq_group_is_independent.
> * Rebased on last CVQ start series, that allocated CVQ cmd bufs at load
> * Do not check for cvq index on vhost_vdpa_net_prepare, we only have one
> that callback registered in that NetClientInfo.
>
> v3:
> * Make asid related queries print a warning instead of returning an
> error and stop the start of qemu.
> ---
> hw/virtio/vhost-vdpa.c | 3 +-
> net/vhost-vdpa.c | 106 ++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 107 insertions(+), 2 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 8e54c5c0fc..45bb72d359 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -652,7 +652,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
> {
> uint64_t features;
> uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
> - 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
> + 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
> + 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
> int r;
>
> if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index a1f1e29b7c..bce57fa724 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -102,6 +102,8 @@ static const uint64_t vdpa_svq_device_features =
> BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> BIT_ULL(VIRTIO_NET_F_STANDBY);
>
> +#define VHOST_VDPA_NET_CVQ_ASID 1
> +
> VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> {
> VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> @@ -259,6 +261,40 @@ static VhostIOVATree *vhost_vdpa_svq_allocate_iova_tree(int vdpa_device_fd)
> return vhost_iova_tree_new(iova_range.first, iova_range.last);
> }
>
> +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
> +{
> + struct vhost_vring_state state = {
> + .index = vq_index,
> + };
> + int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state);
> +
> + if (unlikely(r < 0)) {
> + error_report("Cannot get VQ %u group: %s", vq_index,
> + g_strerror(errno));
> + return r;
> + }
> +
> + return state.num;
> +}
> +
> +static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
> + unsigned vq_group,
> + unsigned asid_num)
> +{
> + struct vhost_vring_state asid = {
> + .index = vq_group,
> + .num = asid_num,
> + };
> + int r;
> +
> + r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid);
> + if (unlikely(r < 0)) {
> + error_report("Can't set vq group %u asid %u, errno=%d (%s)",
> + asid.index, asid.num, errno, g_strerror(errno));
> + }
> + return r;
> +}
> +
> static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
> {
> VhostIOVATree *tree = v->iova_tree;
> @@ -333,11 +369,71 @@ dma_map_err:
> static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> {
> VhostVDPAState *s;
> - int r;
> + struct vhost_vdpa *v;
> + uint64_t backend_features;
> + int64_t cvq_group;
> + int cvq_index, r;
>
> assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>
> s = DO_UPCAST(VhostVDPAState, nc, nc);
> + v = &s->vhost_vdpa;
> +
> + v->shadow_data = s->always_svq;
> + v->shadow_vqs_enabled = s->always_svq;
> + s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
> +
> + if (s->always_svq) {
> + /* SVQ is already configured for all virtqueues */
> + goto out;
> + }
> +
> + /* Backend features are not available in v->dev yet. */
> + r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
> + if (unlikely(r < 0)) {
> + error_report("Cannot get vdpa backend_features: %s(%d)",
> + g_strerror(errno), errno);
> + return -1;
> + }
> + if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) ||
> + !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
As discussed in v7, it would be better to add comment to explain how
migration is blocked in this case.
> + return 0;
> + }
> +
> + /**
> + * Check if all the virtqueues of the virtio device are in a different vq
> + * than the last vq. VQ group of last group passed in cvq_group.
> + */
> + cvq_index = v->dev->vq_index_end - 1;
> + cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index);
> + if (unlikely(cvq_group < 0)) {
> + return cvq_group;
> + }
> + for (int i = 0; i < cvq_index; ++i) {
> + int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i);
> +
> + if (unlikely(group < 0)) {
> + return group;
> + }
> +
> + if (unlikely(group == cvq_group)) {
> + warn_report(
> + "CVQ %"PRId64" group is the same as VQ %d one (%"PRId64")",
> + cvq_group, i, group);
Any reason we need a warning here? It's pretty common for the parent
that only has a single as.
Others look good.
Thanks
> + return 0;
> + }
> + }
> +
> + r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
> + if (unlikely(r < 0)) {
> + return r;
> + }
> +
> + v->iova_tree = vhost_vdpa_svq_allocate_iova_tree(v->device_fd);
> + v->shadow_vqs_enabled = true;
> + s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
> +
> +out:
> if (!s->vhost_vdpa.shadow_vqs_enabled) {
> return 0;
> }
> @@ -366,6 +462,14 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
> if (s->vhost_vdpa.shadow_vqs_enabled) {
> vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
> vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
> + if (!s->always_svq) {
> + /*
> + * If only the CVQ is shadowed we can delete this safely.
> + * If all the VQs are shadows this will be needed by the time the
> + * device is started again to register SVQ vrings and similar.
> + */
> + g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
> + }
> }
> }
>
> --
> 2.31.1
>
© 2016 - 2025 Red Hat, Inc.