This allows sepparate the different virtqueues in groups that shares the
same address space. Asking the VDUSE device for the groups of the vq at
the beginning as they're needed for the DMA API.
Allocating 3 vq groups as net is the device that need the most groups:
* Dataplane (guest passthrough)
* CVQ
* Shadowed vrings.
Future versions of the series can include dynamic allocation of the
groups array so VDUSE can declare more groups.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
v1: Fix: Remove BIT_ULL(VIRTIO_S_*), as _S_ is already the bit (Maxime)
RFC v3:
* Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower
value to reduce memory consumption, but vqs are already limited to
that value and userspace VDUSE is able to allocate that many vqs.
* Remove the descs vq group capability as it will not be used and we can
add it on top.
* Do not ask for vq groups in number of vq groups < 2.
* Move the valid vq groups range check to vduse_validate_config.
RFC v2:
* Cache group information in kernel, as we need to provide the vq map
tokens properly.
* Add descs vq group to optimize SVQ forwarding and support indirect
descriptors out of the box.
---
drivers/vdpa/vdpa_user/vduse_dev.c | 51 ++++++++++++++++++++++++++++--
include/uapi/linux/vduse.h | 21 +++++++++++-
2 files changed, 68 insertions(+), 4 deletions(-)
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index e7bced0b5542..0f4e36dd167e 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -58,6 +58,7 @@ struct vduse_virtqueue {
struct vdpa_vq_state state;
bool ready;
bool kicked;
+ u32 vq_group;
spinlock_t kick_lock;
spinlock_t irq_lock;
struct eventfd_ctx *kickfd;
@@ -114,6 +115,7 @@ struct vduse_dev {
u8 status;
u32 vq_num;
u32 vq_align;
+ u32 ngroups;
struct vduse_umem *umem;
struct mutex mem_lock;
unsigned int bounce_size;
@@ -592,6 +594,13 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
return 0;
}
+static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ return dev->vqs[idx]->vq_group;
+}
+
static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
struct vdpa_vq_state *state)
{
@@ -678,6 +687,28 @@ static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
return dev->status;
}
+static int vduse_fill_vq_groups(struct vduse_dev *dev)
+{
+ /* All vqs and descs must be in vq group 0 if ngroups < 2 */
+ if (dev->ngroups < 2)
+ return 0;
+
+ for (int i = 0; i < dev->vdev->vdpa.nvqs; ++i) {
+ struct vduse_dev_msg msg = { 0 };
+ int ret;
+
+ msg.req.type = VDUSE_GET_VQ_GROUP;
+ msg.req.vq_group.index = i;
+ ret = vduse_dev_msg_sync(dev, &msg);
+ if (ret)
+ return ret;
+
+ dev->vqs[i]->vq_group = msg.resp.vq_group.group;
+ }
+
+ return 0;
+}
+
static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
@@ -685,6 +716,11 @@ static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
if (vduse_dev_set_status(dev, status))
return;
+ if (((dev->status ^ status) & VIRTIO_CONFIG_S_FEATURES_OK) &&
+ (status & VIRTIO_CONFIG_S_FEATURES_OK))
+ if (vduse_fill_vq_groups(dev))
+ return;
+
dev->status = status;
}
@@ -789,6 +825,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
.set_vq_cb = vduse_vdpa_set_vq_cb,
.set_vq_num = vduse_vdpa_set_vq_num,
.get_vq_size = vduse_vdpa_get_vq_size,
+ .get_vq_group = vduse_get_vq_group,
.set_vq_ready = vduse_vdpa_set_vq_ready,
.get_vq_ready = vduse_vdpa_get_vq_ready,
.set_vq_state = vduse_vdpa_set_vq_state,
@@ -1737,12 +1774,19 @@ static bool features_is_valid(struct vduse_dev_config *config)
return true;
}
-static bool vduse_validate_config(struct vduse_dev_config *config)
+static bool vduse_validate_config(struct vduse_dev_config *config,
+ u64 api_version)
{
if (!is_mem_zero((const char *)config->reserved,
sizeof(config->reserved)))
return false;
+ if (api_version < VDUSE_API_VERSION_1 && config->ngroups)
+ return false;
+
+ if (api_version >= VDUSE_API_VERSION_1 && config->ngroups > 0xffff)
+ return false;
+
if (config->vq_align > PAGE_SIZE)
return false;
@@ -1858,6 +1902,7 @@ static int vduse_create_dev(struct vduse_dev_config *config,
dev->device_features = config->features;
dev->device_id = config->device_id;
dev->vendor_id = config->vendor_id;
+ dev->ngroups = (dev->api_version < 1) ? 1 : (config->ngroups ?: 1);
dev->name = kstrdup(config->name, GFP_KERNEL);
if (!dev->name)
goto err_str;
@@ -1936,7 +1981,7 @@ static long vduse_ioctl(struct file *file, unsigned int cmd,
break;
ret = -EINVAL;
- if (vduse_validate_config(&config) == false)
+ if (!vduse_validate_config(&config, control->api_version))
break;
buf = vmemdup_user(argp + size, config.config_size);
@@ -2017,7 +2062,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
&vduse_vdpa_config_ops, &vduse_map_ops,
- 1, 1, name, true);
+ dev->ngroups, 1, name, true);
if (IS_ERR(vdev))
return PTR_ERR(vdev);
diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h
index 9a56d0416bfe..b1c0e47d71fb 100644
--- a/include/uapi/linux/vduse.h
+++ b/include/uapi/linux/vduse.h
@@ -31,6 +31,7 @@
* @features: virtio features
* @vq_num: the number of virtqueues
* @vq_align: the allocation alignment of virtqueue's metadata
+ * @ngroups: number of vq groups that VDUSE device declares
* @reserved: for future use, needs to be initialized to zero
* @config_size: the size of the configuration space
* @config: the buffer of the configuration space
@@ -45,7 +46,8 @@ struct vduse_dev_config {
__u64 features;
__u32 vq_num;
__u32 vq_align;
- __u32 reserved[13];
+ __u32 ngroups; /* if VDUSE_API_VERSION >= 1 */
+ __u32 reserved[12];
__u32 config_size;
__u8 config[];
};
@@ -160,6 +162,16 @@ struct vduse_vq_state_packed {
__u16 last_used_idx;
};
+/**
+ * struct vduse_vq_group - virtqueue group
+ * @index: Index of the virtqueue
+ * @group: Virtqueue group
+ */
+struct vduse_vq_group {
+ __u32 index;
+ __u32 group;
+};
+
/**
* struct vduse_vq_info - information of a virtqueue
* @index: virtqueue index
@@ -274,6 +286,7 @@ enum vduse_req_type {
VDUSE_GET_VQ_STATE,
VDUSE_SET_STATUS,
VDUSE_UPDATE_IOTLB,
+ VDUSE_GET_VQ_GROUP,
};
/**
@@ -316,6 +329,7 @@ struct vduse_iova_range {
* @vq_state: virtqueue state, only index field is available
* @s: device status
* @iova: IOVA range for updating
+ * @vq_group: virtqueue group of a virtqueue
* @padding: padding
*
* Structure used by read(2) on /dev/vduse/$NAME.
@@ -328,6 +342,8 @@ struct vduse_dev_request {
struct vduse_vq_state vq_state;
struct vduse_dev_status s;
struct vduse_iova_range iova;
+ /* Only if vduse api version >= 1 */;
+ struct vduse_vq_group vq_group;
__u32 padding[32];
};
};
@@ -338,6 +354,7 @@ struct vduse_dev_request {
* @result: the result of request
* @reserved: for future use, needs to be initialized to zero
* @vq_state: virtqueue state
+ * @vq_group: virtqueue group of a virtqueue
* @padding: padding
*
* Structure used by write(2) on /dev/vduse/$NAME.
@@ -350,6 +367,8 @@ struct vduse_dev_response {
__u32 reserved[4];
union {
struct vduse_vq_state vq_state;
+ /* Only if vduse api version >= 1 */
+ struct vduse_vq_group vq_group;
__u32 padding[32];
};
};
--
2.51.0
On Tue, Aug 26, 2025 at 7:27 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > This allows sepparate the different virtqueues in groups that shares the > same address space. Asking the VDUSE device for the groups of the vq at > the beginning as they're needed for the DMA API. > > Allocating 3 vq groups as net is the device that need the most groups: > * Dataplane (guest passthrough) > * CVQ > * Shadowed vrings. > > Future versions of the series can include dynamic allocation of the > groups array so VDUSE can declare more groups. > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > --- > v1: Fix: Remove BIT_ULL(VIRTIO_S_*), as _S_ is already the bit (Maxime) > > RFC v3: > * Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower > value to reduce memory consumption, but vqs are already limited to > that value and userspace VDUSE is able to allocate that many vqs. > * Remove the descs vq group capability as it will not be used and we can > add it on top. > * Do not ask for vq groups in number of vq groups < 2. > * Move the valid vq groups range check to vduse_validate_config. > > RFC v2: > * Cache group information in kernel, as we need to provide the vq map > tokens properly. > * Add descs vq group to optimize SVQ forwarding and support indirect > descriptors out of the box. > --- > drivers/vdpa/vdpa_user/vduse_dev.c | 51 ++++++++++++++++++++++++++++-- > include/uapi/linux/vduse.h | 21 +++++++++++- > 2 files changed, 68 insertions(+), 4 deletions(-) > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > index e7bced0b5542..0f4e36dd167e 100644 > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > @@ -58,6 +58,7 @@ struct vduse_virtqueue { > struct vdpa_vq_state state; > bool ready; > bool kicked; > + u32 vq_group; > spinlock_t kick_lock; > spinlock_t irq_lock; > struct eventfd_ctx *kickfd; > @@ -114,6 +115,7 @@ struct vduse_dev { > u8 status; > u32 vq_num; > u32 vq_align; > + u32 ngroups; > struct vduse_umem *umem; > struct mutex mem_lock; > unsigned int bounce_size; > @@ -592,6 +594,13 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, > return 0; > } > > +static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx) > +{ > + struct vduse_dev *dev = vdpa_to_vduse(vdpa); > + > + return dev->vqs[idx]->vq_group; > +} > + > static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > struct vdpa_vq_state *state) > { > @@ -678,6 +687,28 @@ static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) > return dev->status; > } > > +static int vduse_fill_vq_groups(struct vduse_dev *dev) > +{ > + /* All vqs and descs must be in vq group 0 if ngroups < 2 */ > + if (dev->ngroups < 2) > + return 0; > + > + for (int i = 0; i < dev->vdev->vdpa.nvqs; ++i) { > + struct vduse_dev_msg msg = { 0 }; > + int ret; > + > + msg.req.type = VDUSE_GET_VQ_GROUP; > + msg.req.vq_group.index = i; > + ret = vduse_dev_msg_sync(dev, &msg); > + if (ret) > + return ret; > + > + dev->vqs[i]->vq_group = msg.resp.vq_group.group; > + } > + > + return 0; > +} > + > static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > { > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > @@ -685,6 +716,11 @@ static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > if (vduse_dev_set_status(dev, status)) > return; > > + if (((dev->status ^ status) & VIRTIO_CONFIG_S_FEATURES_OK) && > + (status & VIRTIO_CONFIG_S_FEATURES_OK)) > + if (vduse_fill_vq_groups(dev)) > + return; I may lose some context but I think we've agreed that we need to extend the status response for this instead of having multiple indepdent response. > + > dev->status = status; > } > > @@ -789,6 +825,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = { > .set_vq_cb = vduse_vdpa_set_vq_cb, > .set_vq_num = vduse_vdpa_set_vq_num, > .get_vq_size = vduse_vdpa_get_vq_size, > + .get_vq_group = vduse_get_vq_group, > .set_vq_ready = vduse_vdpa_set_vq_ready, > .get_vq_ready = vduse_vdpa_get_vq_ready, > .set_vq_state = vduse_vdpa_set_vq_state, > @@ -1737,12 +1774,19 @@ static bool features_is_valid(struct vduse_dev_config *config) > return true; > } > > -static bool vduse_validate_config(struct vduse_dev_config *config) > +static bool vduse_validate_config(struct vduse_dev_config *config, > + u64 api_version) > { > if (!is_mem_zero((const char *)config->reserved, > sizeof(config->reserved))) > return false; > > + if (api_version < VDUSE_API_VERSION_1 && config->ngroups) > + return false; > + > + if (api_version >= VDUSE_API_VERSION_1 && config->ngroups > 0xffff) > + return false; Let's use a macro instead of magic number. > + > if (config->vq_align > PAGE_SIZE) > return false; > > @@ -1858,6 +1902,7 @@ static int vduse_create_dev(struct vduse_dev_config *config, > dev->device_features = config->features; > dev->device_id = config->device_id; > dev->vendor_id = config->vendor_id; > + dev->ngroups = (dev->api_version < 1) ? 1 : (config->ngroups ?: 1); > dev->name = kstrdup(config->name, GFP_KERNEL); > if (!dev->name) > goto err_str; > @@ -1936,7 +1981,7 @@ static long vduse_ioctl(struct file *file, unsigned int cmd, > break; > > ret = -EINVAL; > - if (vduse_validate_config(&config) == false) > + if (!vduse_validate_config(&config, control->api_version)) > break; > > buf = vmemdup_user(argp + size, config.config_size); > @@ -2017,7 +2062,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) > > vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, > &vduse_vdpa_config_ops, &vduse_map_ops, > - 1, 1, name, true); > + dev->ngroups, 1, name, true); > if (IS_ERR(vdev)) > return PTR_ERR(vdev); > > diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h > index 9a56d0416bfe..b1c0e47d71fb 100644 > --- a/include/uapi/linux/vduse.h > +++ b/include/uapi/linux/vduse.h > @@ -31,6 +31,7 @@ > * @features: virtio features > * @vq_num: the number of virtqueues > * @vq_align: the allocation alignment of virtqueue's metadata > + * @ngroups: number of vq groups that VDUSE device declares > * @reserved: for future use, needs to be initialized to zero > * @config_size: the size of the configuration space > * @config: the buffer of the configuration space > @@ -45,7 +46,8 @@ struct vduse_dev_config { > __u64 features; > __u32 vq_num; > __u32 vq_align; > - __u32 reserved[13]; > + __u32 ngroups; /* if VDUSE_API_VERSION >= 1 */ > + __u32 reserved[12]; > __u32 config_size; > __u8 config[]; > }; > @@ -160,6 +162,16 @@ struct vduse_vq_state_packed { > __u16 last_used_idx; > }; > > +/** > + * struct vduse_vq_group - virtqueue group > + * @index: Index of the virtqueue > + * @group: Virtqueue group > + */ > +struct vduse_vq_group { > + __u32 index; > + __u32 group; > +}; > + > /** > * struct vduse_vq_info - information of a virtqueue > * @index: virtqueue index > @@ -274,6 +286,7 @@ enum vduse_req_type { > VDUSE_GET_VQ_STATE, > VDUSE_SET_STATUS, > VDUSE_UPDATE_IOTLB, > + VDUSE_GET_VQ_GROUP, > }; > > /** > @@ -316,6 +329,7 @@ struct vduse_iova_range { > * @vq_state: virtqueue state, only index field is available > * @s: device status > * @iova: IOVA range for updating > + * @vq_group: virtqueue group of a virtqueue > * @padding: padding > * > * Structure used by read(2) on /dev/vduse/$NAME. > @@ -328,6 +342,8 @@ struct vduse_dev_request { > struct vduse_vq_state vq_state; > struct vduse_dev_status s; > struct vduse_iova_range iova; > + /* Only if vduse api version >= 1 */; > + struct vduse_vq_group vq_group; > __u32 padding[32]; > }; > }; > @@ -338,6 +354,7 @@ struct vduse_dev_request { > * @result: the result of request > * @reserved: for future use, needs to be initialized to zero > * @vq_state: virtqueue state > + * @vq_group: virtqueue group of a virtqueue > * @padding: padding > * > * Structure used by write(2) on /dev/vduse/$NAME. > @@ -350,6 +367,8 @@ struct vduse_dev_response { > __u32 reserved[4]; > union { > struct vduse_vq_state vq_state; > + /* Only if vduse api version >= 1 */ > + struct vduse_vq_group vq_group; > __u32 padding[32]; > }; > }; > -- > 2.51.0 > Thanks
On Mon, Sep 1, 2025 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote: > > On Tue, Aug 26, 2025 at 7:27 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > This allows sepparate the different virtqueues in groups that shares the > > same address space. Asking the VDUSE device for the groups of the vq at > > the beginning as they're needed for the DMA API. > > > > Allocating 3 vq groups as net is the device that need the most groups: > > * Dataplane (guest passthrough) > > * CVQ > > * Shadowed vrings. > > > > Future versions of the series can include dynamic allocation of the > > groups array so VDUSE can declare more groups. > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > --- > > v1: Fix: Remove BIT_ULL(VIRTIO_S_*), as _S_ is already the bit (Maxime) > > > > RFC v3: > > * Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower > > value to reduce memory consumption, but vqs are already limited to > > that value and userspace VDUSE is able to allocate that many vqs. > > * Remove the descs vq group capability as it will not be used and we can > > add it on top. > > * Do not ask for vq groups in number of vq groups < 2. > > * Move the valid vq groups range check to vduse_validate_config. > > > > RFC v2: > > * Cache group information in kernel, as we need to provide the vq map > > tokens properly. > > * Add descs vq group to optimize SVQ forwarding and support indirect > > descriptors out of the box. > > --- > > drivers/vdpa/vdpa_user/vduse_dev.c | 51 ++++++++++++++++++++++++++++-- > > include/uapi/linux/vduse.h | 21 +++++++++++- > > 2 files changed, 68 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > > index e7bced0b5542..0f4e36dd167e 100644 > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > > @@ -58,6 +58,7 @@ struct vduse_virtqueue { > > struct vdpa_vq_state state; > > bool ready; > > bool kicked; > > + u32 vq_group; > > spinlock_t kick_lock; > > spinlock_t irq_lock; > > struct eventfd_ctx *kickfd; > > @@ -114,6 +115,7 @@ struct vduse_dev { > > u8 status; > > u32 vq_num; > > u32 vq_align; > > + u32 ngroups; > > struct vduse_umem *umem; > > struct mutex mem_lock; > > unsigned int bounce_size; > > @@ -592,6 +594,13 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, > > return 0; > > } > > > > +static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx) > > +{ > > + struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > + > > + return dev->vqs[idx]->vq_group; > > +} > > + > > static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > > struct vdpa_vq_state *state) > > { > > @@ -678,6 +687,28 @@ static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) > > return dev->status; > > } > > > > +static int vduse_fill_vq_groups(struct vduse_dev *dev) > > +{ > > + /* All vqs and descs must be in vq group 0 if ngroups < 2 */ > > + if (dev->ngroups < 2) > > + return 0; > > + > > + for (int i = 0; i < dev->vdev->vdpa.nvqs; ++i) { > > + struct vduse_dev_msg msg = { 0 }; > > + int ret; > > + > > + msg.req.type = VDUSE_GET_VQ_GROUP; > > + msg.req.vq_group.index = i; > > + ret = vduse_dev_msg_sync(dev, &msg); > > + if (ret) > > + return ret; > > + > > + dev->vqs[i]->vq_group = msg.resp.vq_group.group; > > + } > > + > > + return 0; > > +} > > + > > static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > { > > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > @@ -685,6 +716,11 @@ static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > if (vduse_dev_set_status(dev, status)) > > return; > > > > + if (((dev->status ^ status) & VIRTIO_CONFIG_S_FEATURES_OK) && > > + (status & VIRTIO_CONFIG_S_FEATURES_OK)) > > + if (vduse_fill_vq_groups(dev)) > > + return; > > I may lose some context but I think we've agreed that we need to > extend the status response for this instead of having multiple > indepdent response. > My understanding was it is ok to start with this version by [1]. We can even make it asynchronous on top if we find this is a bottleneck and the VDUSE device would need no change, would that work? > > + > > dev->status = status; > > } > > > > @@ -789,6 +825,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = { > > .set_vq_cb = vduse_vdpa_set_vq_cb, > > .set_vq_num = vduse_vdpa_set_vq_num, > > .get_vq_size = vduse_vdpa_get_vq_size, > > + .get_vq_group = vduse_get_vq_group, > > .set_vq_ready = vduse_vdpa_set_vq_ready, > > .get_vq_ready = vduse_vdpa_get_vq_ready, > > .set_vq_state = vduse_vdpa_set_vq_state, > > @@ -1737,12 +1774,19 @@ static bool features_is_valid(struct vduse_dev_config *config) > > return true; > > } > > > > -static bool vduse_validate_config(struct vduse_dev_config *config) > > +static bool vduse_validate_config(struct vduse_dev_config *config, > > + u64 api_version) > > { > > if (!is_mem_zero((const char *)config->reserved, > > sizeof(config->reserved))) > > return false; > > > > + if (api_version < VDUSE_API_VERSION_1 && config->ngroups) > > + return false; > > + > > + if (api_version >= VDUSE_API_VERSION_1 && config->ngroups > 0xffff) > > + return false; > > Let's use a macro instead of magic number. > The rest of the limits are hardcoded, but I'm ok with changing this. Is UINT16_MAX ok here, or do you prefer something like MAX_NGROUPS and MAX_ASID? [...] [1] https://patchew.org/linux/20250807115752.1663383-1-eperezma@redhat.com/20250807115752.1663383-3-eperezma@redhat.com/#CACGkMEuVngGjgPZXnajiPC+pcbt+dr6jqKRQr8OcX7HK1W3WNQ@mail.gmail.com
On Mon, Sep 1, 2025 at 4:40 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > On Mon, Sep 1, 2025 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > On Tue, Aug 26, 2025 at 7:27 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > This allows sepparate the different virtqueues in groups that shares the > > > same address space. Asking the VDUSE device for the groups of the vq at > > > the beginning as they're needed for the DMA API. > > > > > > Allocating 3 vq groups as net is the device that need the most groups: > > > * Dataplane (guest passthrough) > > > * CVQ > > > * Shadowed vrings. > > > > > > Future versions of the series can include dynamic allocation of the > > > groups array so VDUSE can declare more groups. > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > --- > > > v1: Fix: Remove BIT_ULL(VIRTIO_S_*), as _S_ is already the bit (Maxime) > > > > > > RFC v3: > > > * Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower > > > value to reduce memory consumption, but vqs are already limited to > > > that value and userspace VDUSE is able to allocate that many vqs. > > > * Remove the descs vq group capability as it will not be used and we can > > > add it on top. > > > * Do not ask for vq groups in number of vq groups < 2. > > > * Move the valid vq groups range check to vduse_validate_config. > > > > > > RFC v2: > > > * Cache group information in kernel, as we need to provide the vq map > > > tokens properly. > > > * Add descs vq group to optimize SVQ forwarding and support indirect > > > descriptors out of the box. > > > --- > > > drivers/vdpa/vdpa_user/vduse_dev.c | 51 ++++++++++++++++++++++++++++-- > > > include/uapi/linux/vduse.h | 21 +++++++++++- > > > 2 files changed, 68 insertions(+), 4 deletions(-) > > > > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > > > index e7bced0b5542..0f4e36dd167e 100644 > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > > > @@ -58,6 +58,7 @@ struct vduse_virtqueue { > > > struct vdpa_vq_state state; > > > bool ready; > > > bool kicked; > > > + u32 vq_group; > > > spinlock_t kick_lock; > > > spinlock_t irq_lock; > > > struct eventfd_ctx *kickfd; > > > @@ -114,6 +115,7 @@ struct vduse_dev { > > > u8 status; > > > u32 vq_num; > > > u32 vq_align; > > > + u32 ngroups; > > > struct vduse_umem *umem; > > > struct mutex mem_lock; > > > unsigned int bounce_size; > > > @@ -592,6 +594,13 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, > > > return 0; > > > } > > > > > > +static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx) > > > +{ > > > + struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > + > > > + return dev->vqs[idx]->vq_group; > > > +} > > > + > > > static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > > > struct vdpa_vq_state *state) > > > { > > > @@ -678,6 +687,28 @@ static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) > > > return dev->status; > > > } > > > > > > +static int vduse_fill_vq_groups(struct vduse_dev *dev) > > > +{ > > > + /* All vqs and descs must be in vq group 0 if ngroups < 2 */ > > > + if (dev->ngroups < 2) > > > + return 0; > > > + > > > + for (int i = 0; i < dev->vdev->vdpa.nvqs; ++i) { > > > + struct vduse_dev_msg msg = { 0 }; > > > + int ret; > > > + > > > + msg.req.type = VDUSE_GET_VQ_GROUP; > > > + msg.req.vq_group.index = i; > > > + ret = vduse_dev_msg_sync(dev, &msg); > > > + if (ret) > > > + return ret; > > > + > > > + dev->vqs[i]->vq_group = msg.resp.vq_group.group; > > > + } > > > + > > > + return 0; > > > +} > > > + > > > static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > { > > > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > @@ -685,6 +716,11 @@ static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > if (vduse_dev_set_status(dev, status)) > > > return; > > > > > > + if (((dev->status ^ status) & VIRTIO_CONFIG_S_FEATURES_OK) && > > > + (status & VIRTIO_CONFIG_S_FEATURES_OK)) > > > + if (vduse_fill_vq_groups(dev)) > > > + return; > > > > I may lose some context but I think we've agreed that we need to > > extend the status response for this instead of having multiple > > indepdent response. > > > > My understanding was it is ok to start with this version by [1]. We > can even make it asynchronous on top if we find this is a bottleneck > and the VDUSE device would need no change, would that work? > > > > + > > > dev->status = status; > > > } > > > > > > @@ -789,6 +825,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = { > > > .set_vq_cb = vduse_vdpa_set_vq_cb, > > > .set_vq_num = vduse_vdpa_set_vq_num, > > > .get_vq_size = vduse_vdpa_get_vq_size, > > > + .get_vq_group = vduse_get_vq_group, > > > .set_vq_ready = vduse_vdpa_set_vq_ready, > > > .get_vq_ready = vduse_vdpa_get_vq_ready, > > > .set_vq_state = vduse_vdpa_set_vq_state, > > > @@ -1737,12 +1774,19 @@ static bool features_is_valid(struct vduse_dev_config *config) > > > return true; > > > } > > > > > > -static bool vduse_validate_config(struct vduse_dev_config *config) > > > +static bool vduse_validate_config(struct vduse_dev_config *config, > > > + u64 api_version) > > > { > > > if (!is_mem_zero((const char *)config->reserved, > > > sizeof(config->reserved))) > > > return false; > > > > > > + if (api_version < VDUSE_API_VERSION_1 && config->ngroups) > > > + return false; > > > + > > > + if (api_version >= VDUSE_API_VERSION_1 && config->ngroups > 0xffff) > > > + return false; > > > > Let's use a macro instead of magic number. > > > > The rest of the limits are hardcoded, but I'm ok with changing this. > Is UINT16_MAX ok here, or do you prefer something like MAX_NGROUPS and > MAX_ASID? MAX_NGROUPS and MAX_ASID seem to be better. Thanks > > [...] > > [1] https://patchew.org/linux/20250807115752.1663383-1-eperezma@redhat.com/20250807115752.1663383-3-eperezma@redhat.com/#CACGkMEuVngGjgPZXnajiPC+pcbt+dr6jqKRQr8OcX7HK1W3WNQ@mail.gmail.com >
On Mon, Sep 1, 2025 at 4:40 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > On Mon, Sep 1, 2025 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > On Tue, Aug 26, 2025 at 7:27 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > This allows sepparate the different virtqueues in groups that shares the > > > same address space. Asking the VDUSE device for the groups of the vq at > > > the beginning as they're needed for the DMA API. > > > > > > Allocating 3 vq groups as net is the device that need the most groups: > > > * Dataplane (guest passthrough) > > > * CVQ > > > * Shadowed vrings. > > > > > > Future versions of the series can include dynamic allocation of the > > > groups array so VDUSE can declare more groups. > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > --- > > > v1: Fix: Remove BIT_ULL(VIRTIO_S_*), as _S_ is already the bit (Maxime) > > > > > > RFC v3: > > > * Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower > > > value to reduce memory consumption, but vqs are already limited to > > > that value and userspace VDUSE is able to allocate that many vqs. > > > * Remove the descs vq group capability as it will not be used and we can > > > add it on top. > > > * Do not ask for vq groups in number of vq groups < 2. > > > * Move the valid vq groups range check to vduse_validate_config. > > > > > > RFC v2: > > > * Cache group information in kernel, as we need to provide the vq map > > > tokens properly. > > > * Add descs vq group to optimize SVQ forwarding and support indirect > > > descriptors out of the box. > > > --- > > > drivers/vdpa/vdpa_user/vduse_dev.c | 51 ++++++++++++++++++++++++++++-- > > > include/uapi/linux/vduse.h | 21 +++++++++++- > > > 2 files changed, 68 insertions(+), 4 deletions(-) > > > > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > > > index e7bced0b5542..0f4e36dd167e 100644 > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > > > @@ -58,6 +58,7 @@ struct vduse_virtqueue { > > > struct vdpa_vq_state state; > > > bool ready; > > > bool kicked; > > > + u32 vq_group; > > > spinlock_t kick_lock; > > > spinlock_t irq_lock; > > > struct eventfd_ctx *kickfd; > > > @@ -114,6 +115,7 @@ struct vduse_dev { > > > u8 status; > > > u32 vq_num; > > > u32 vq_align; > > > + u32 ngroups; > > > struct vduse_umem *umem; > > > struct mutex mem_lock; > > > unsigned int bounce_size; > > > @@ -592,6 +594,13 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, > > > return 0; > > > } > > > > > > +static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx) > > > +{ > > > + struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > + > > > + return dev->vqs[idx]->vq_group; > > > +} > > > + > > > static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > > > struct vdpa_vq_state *state) > > > { > > > @@ -678,6 +687,28 @@ static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) > > > return dev->status; > > > } > > > > > > +static int vduse_fill_vq_groups(struct vduse_dev *dev) > > > +{ > > > + /* All vqs and descs must be in vq group 0 if ngroups < 2 */ > > > + if (dev->ngroups < 2) > > > + return 0; > > > + > > > + for (int i = 0; i < dev->vdev->vdpa.nvqs; ++i) { > > > + struct vduse_dev_msg msg = { 0 }; > > > + int ret; > > > + > > > + msg.req.type = VDUSE_GET_VQ_GROUP; > > > + msg.req.vq_group.index = i; > > > + ret = vduse_dev_msg_sync(dev, &msg); > > > + if (ret) > > > + return ret; > > > + > > > + dev->vqs[i]->vq_group = msg.resp.vq_group.group; > > > + } > > > + > > > + return 0; > > > +} > > > + > > > static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > { > > > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > @@ -685,6 +716,11 @@ static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > if (vduse_dev_set_status(dev, status)) > > > return; > > > > > > + if (((dev->status ^ status) & VIRTIO_CONFIG_S_FEATURES_OK) && > > > + (status & VIRTIO_CONFIG_S_FEATURES_OK)) > > > + if (vduse_fill_vq_groups(dev)) > > > + return; > > > > I may lose some context but I think we've agreed that we need to > > extend the status response for this instead of having multiple > > indepdent response. > > > > My understanding was it is ok to start with this version by [1]. We > can even make it asynchronous on top if we find this is a bottleneck > and the VDUSE device would need no change, would that work? I think I need to understand why we can not defer this to get_group_asid() call. Thanks
On Wed, Sep 3, 2025 at 5:58 AM Jason Wang <jasowang@redhat.com> wrote: > > On Mon, Sep 1, 2025 at 4:40 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > On Mon, Sep 1, 2025 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > On Tue, Aug 26, 2025 at 7:27 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > > > This allows sepparate the different virtqueues in groups that shares the > > > > same address space. Asking the VDUSE device for the groups of the vq at > > > > the beginning as they're needed for the DMA API. > > > > > > > > Allocating 3 vq groups as net is the device that need the most groups: > > > > * Dataplane (guest passthrough) > > > > * CVQ > > > > * Shadowed vrings. > > > > > > > > Future versions of the series can include dynamic allocation of the > > > > groups array so VDUSE can declare more groups. > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > > --- > > > > v1: Fix: Remove BIT_ULL(VIRTIO_S_*), as _S_ is already the bit (Maxime) > > > > > > > > RFC v3: > > > > * Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower > > > > value to reduce memory consumption, but vqs are already limited to > > > > that value and userspace VDUSE is able to allocate that many vqs. > > > > * Remove the descs vq group capability as it will not be used and we can > > > > add it on top. > > > > * Do not ask for vq groups in number of vq groups < 2. > > > > * Move the valid vq groups range check to vduse_validate_config. > > > > > > > > RFC v2: > > > > * Cache group information in kernel, as we need to provide the vq map > > > > tokens properly. > > > > * Add descs vq group to optimize SVQ forwarding and support indirect > > > > descriptors out of the box. > > > > --- > > > > drivers/vdpa/vdpa_user/vduse_dev.c | 51 ++++++++++++++++++++++++++++-- > > > > include/uapi/linux/vduse.h | 21 +++++++++++- > > > > 2 files changed, 68 insertions(+), 4 deletions(-) > > > > > > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > index e7bced0b5542..0f4e36dd167e 100644 > > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > @@ -58,6 +58,7 @@ struct vduse_virtqueue { > > > > struct vdpa_vq_state state; > > > > bool ready; > > > > bool kicked; > > > > + u32 vq_group; > > > > spinlock_t kick_lock; > > > > spinlock_t irq_lock; > > > > struct eventfd_ctx *kickfd; > > > > @@ -114,6 +115,7 @@ struct vduse_dev { > > > > u8 status; > > > > u32 vq_num; > > > > u32 vq_align; > > > > + u32 ngroups; > > > > struct vduse_umem *umem; > > > > struct mutex mem_lock; > > > > unsigned int bounce_size; > > > > @@ -592,6 +594,13 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > return 0; > > > > } > > > > > > > > +static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx) > > > > +{ > > > > + struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > + > > > > + return dev->vqs[idx]->vq_group; > > > > +} > > > > + > > > > static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > struct vdpa_vq_state *state) > > > > { > > > > @@ -678,6 +687,28 @@ static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) > > > > return dev->status; > > > > } > > > > > > > > +static int vduse_fill_vq_groups(struct vduse_dev *dev) > > > > +{ > > > > + /* All vqs and descs must be in vq group 0 if ngroups < 2 */ > > > > + if (dev->ngroups < 2) > > > > + return 0; > > > > + > > > > + for (int i = 0; i < dev->vdev->vdpa.nvqs; ++i) { > > > > + struct vduse_dev_msg msg = { 0 }; > > > > + int ret; > > > > + > > > > + msg.req.type = VDUSE_GET_VQ_GROUP; > > > > + msg.req.vq_group.index = i; > > > > + ret = vduse_dev_msg_sync(dev, &msg); > > > > + if (ret) > > > > + return ret; > > > > + > > > > + dev->vqs[i]->vq_group = msg.resp.vq_group.group; > > > > + } > > > > + > > > > + return 0; > > > > +} > > > > + > > > > static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > { > > > > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > @@ -685,6 +716,11 @@ static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > if (vduse_dev_set_status(dev, status)) > > > > return; > > > > > > > > + if (((dev->status ^ status) & VIRTIO_CONFIG_S_FEATURES_OK) && > > > > + (status & VIRTIO_CONFIG_S_FEATURES_OK)) > > > > + if (vduse_fill_vq_groups(dev)) > > > > + return; > > > > > > I may lose some context but I think we've agreed that we need to > > > extend the status response for this instead of having multiple > > > indepdent response. > > > > > > > My understanding was it is ok to start with this version by [1]. We > > can even make it asynchronous on top if we find this is a bottleneck > > and the VDUSE device would need no change, would that work? > > I think I need to understand why we can not defer this to get_group_asid() call. > Because we need to know the vq_groups->asid mapping in other calls like set_group_asid or get_vq_group. We could add a boolean on each virtqueue to track if we know its virtqueue group and then only ask VDUSE device it if needed, would that work?
On Wed, Sep 3, 2025 at 2:29 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > On Wed, Sep 3, 2025 at 5:58 AM Jason Wang <jasowang@redhat.com> wrote: > > > > On Mon, Sep 1, 2025 at 4:40 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > > > On Mon, Sep 1, 2025 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > On Tue, Aug 26, 2025 at 7:27 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > > > > > This allows sepparate the different virtqueues in groups that shares the > > > > > same address space. Asking the VDUSE device for the groups of the vq at > > > > > the beginning as they're needed for the DMA API. > > > > > > > > > > Allocating 3 vq groups as net is the device that need the most groups: > > > > > * Dataplane (guest passthrough) > > > > > * CVQ > > > > > * Shadowed vrings. > > > > > > > > > > Future versions of the series can include dynamic allocation of the > > > > > groups array so VDUSE can declare more groups. > > > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > > > --- > > > > > v1: Fix: Remove BIT_ULL(VIRTIO_S_*), as _S_ is already the bit (Maxime) > > > > > > > > > > RFC v3: > > > > > * Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower > > > > > value to reduce memory consumption, but vqs are already limited to > > > > > that value and userspace VDUSE is able to allocate that many vqs. > > > > > * Remove the descs vq group capability as it will not be used and we can > > > > > add it on top. > > > > > * Do not ask for vq groups in number of vq groups < 2. > > > > > * Move the valid vq groups range check to vduse_validate_config. > > > > > > > > > > RFC v2: > > > > > * Cache group information in kernel, as we need to provide the vq map > > > > > tokens properly. > > > > > * Add descs vq group to optimize SVQ forwarding and support indirect > > > > > descriptors out of the box. > > > > > --- > > > > > drivers/vdpa/vdpa_user/vduse_dev.c | 51 ++++++++++++++++++++++++++++-- > > > > > include/uapi/linux/vduse.h | 21 +++++++++++- > > > > > 2 files changed, 68 insertions(+), 4 deletions(-) > > > > > > > > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > index e7bced0b5542..0f4e36dd167e 100644 > > > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > @@ -58,6 +58,7 @@ struct vduse_virtqueue { > > > > > struct vdpa_vq_state state; > > > > > bool ready; > > > > > bool kicked; > > > > > + u32 vq_group; > > > > > spinlock_t kick_lock; > > > > > spinlock_t irq_lock; > > > > > struct eventfd_ctx *kickfd; > > > > > @@ -114,6 +115,7 @@ struct vduse_dev { > > > > > u8 status; > > > > > u32 vq_num; > > > > > u32 vq_align; > > > > > + u32 ngroups; > > > > > struct vduse_umem *umem; > > > > > struct mutex mem_lock; > > > > > unsigned int bounce_size; > > > > > @@ -592,6 +594,13 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > > return 0; > > > > > } > > > > > > > > > > +static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx) > > > > > +{ > > > > > + struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > > + > > > > > + return dev->vqs[idx]->vq_group; > > > > > +} > > > > > + > > > > > static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > > struct vdpa_vq_state *state) > > > > > { > > > > > @@ -678,6 +687,28 @@ static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) > > > > > return dev->status; > > > > > } > > > > > > > > > > +static int vduse_fill_vq_groups(struct vduse_dev *dev) > > > > > +{ > > > > > + /* All vqs and descs must be in vq group 0 if ngroups < 2 */ > > > > > + if (dev->ngroups < 2) > > > > > + return 0; > > > > > + > > > > > + for (int i = 0; i < dev->vdev->vdpa.nvqs; ++i) { > > > > > + struct vduse_dev_msg msg = { 0 }; > > > > > + int ret; > > > > > + > > > > > + msg.req.type = VDUSE_GET_VQ_GROUP; > > > > > + msg.req.vq_group.index = i; > > > > > + ret = vduse_dev_msg_sync(dev, &msg); > > > > > + if (ret) > > > > > + return ret; > > > > > + > > > > > + dev->vqs[i]->vq_group = msg.resp.vq_group.group; > > > > > + } > > > > > + > > > > > + return 0; > > > > > +} > > > > > + > > > > > static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > > { > > > > > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > > @@ -685,6 +716,11 @@ static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > > if (vduse_dev_set_status(dev, status)) > > > > > return; > > > > > > > > > > + if (((dev->status ^ status) & VIRTIO_CONFIG_S_FEATURES_OK) && > > > > > + (status & VIRTIO_CONFIG_S_FEATURES_OK)) > > > > > + if (vduse_fill_vq_groups(dev)) > > > > > + return; > > > > > > > > I may lose some context but I think we've agreed that we need to > > > > extend the status response for this instead of having multiple > > > > indepdent response. > > > > > > > > > > My understanding was it is ok to start with this version by [1]. We > > > can even make it asynchronous on top if we find this is a bottleneck > > > and the VDUSE device would need no change, would that work? > > > > I think I need to understand why we can not defer this to get_group_asid() call. > > > > Because we need to know the vq_groups->asid mapping in other calls > like set_group_asid or get_vq_group. I think we don't need the mapping of those, or anything I miss? And the vq to group mappings could be piggy backed by the device creation request. > > We could add a boolean on each virtqueue to track if we know its > virtqueue group and then only ask VDUSE device it if needed, would > that work? Thanks >
On Wed, Sep 3, 2025 at 9:40 AM Jason Wang <jasowang@redhat.com> wrote: > > On Wed, Sep 3, 2025 at 2:29 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > On Wed, Sep 3, 2025 at 5:58 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > On Mon, Sep 1, 2025 at 4:40 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > > > > > On Mon, Sep 1, 2025 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > On Tue, Aug 26, 2025 at 7:27 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > > > > > > > This allows sepparate the different virtqueues in groups that shares the > > > > > > same address space. Asking the VDUSE device for the groups of the vq at > > > > > > the beginning as they're needed for the DMA API. > > > > > > > > > > > > Allocating 3 vq groups as net is the device that need the most groups: > > > > > > * Dataplane (guest passthrough) > > > > > > * CVQ > > > > > > * Shadowed vrings. > > > > > > > > > > > > Future versions of the series can include dynamic allocation of the > > > > > > groups array so VDUSE can declare more groups. > > > > > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > > > > --- > > > > > > v1: Fix: Remove BIT_ULL(VIRTIO_S_*), as _S_ is already the bit (Maxime) > > > > > > > > > > > > RFC v3: > > > > > > * Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower > > > > > > value to reduce memory consumption, but vqs are already limited to > > > > > > that value and userspace VDUSE is able to allocate that many vqs. > > > > > > * Remove the descs vq group capability as it will not be used and we can > > > > > > add it on top. > > > > > > * Do not ask for vq groups in number of vq groups < 2. > > > > > > * Move the valid vq groups range check to vduse_validate_config. > > > > > > > > > > > > RFC v2: > > > > > > * Cache group information in kernel, as we need to provide the vq map > > > > > > tokens properly. > > > > > > * Add descs vq group to optimize SVQ forwarding and support indirect > > > > > > descriptors out of the box. > > > > > > --- > > > > > > drivers/vdpa/vdpa_user/vduse_dev.c | 51 ++++++++++++++++++++++++++++-- > > > > > > include/uapi/linux/vduse.h | 21 +++++++++++- > > > > > > 2 files changed, 68 insertions(+), 4 deletions(-) > > > > > > > > > > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > index e7bced0b5542..0f4e36dd167e 100644 > > > > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > @@ -58,6 +58,7 @@ struct vduse_virtqueue { > > > > > > struct vdpa_vq_state state; > > > > > > bool ready; > > > > > > bool kicked; > > > > > > + u32 vq_group; > > > > > > spinlock_t kick_lock; > > > > > > spinlock_t irq_lock; > > > > > > struct eventfd_ctx *kickfd; > > > > > > @@ -114,6 +115,7 @@ struct vduse_dev { > > > > > > u8 status; > > > > > > u32 vq_num; > > > > > > u32 vq_align; > > > > > > + u32 ngroups; > > > > > > struct vduse_umem *umem; > > > > > > struct mutex mem_lock; > > > > > > unsigned int bounce_size; > > > > > > @@ -592,6 +594,13 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > > > return 0; > > > > > > } > > > > > > > > > > > > +static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx) > > > > > > +{ > > > > > > + struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > > > + > > > > > > + return dev->vqs[idx]->vq_group; > > > > > > +} > > > > > > + > > > > > > static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > > > struct vdpa_vq_state *state) > > > > > > { > > > > > > @@ -678,6 +687,28 @@ static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) > > > > > > return dev->status; > > > > > > } > > > > > > > > > > > > +static int vduse_fill_vq_groups(struct vduse_dev *dev) > > > > > > +{ > > > > > > + /* All vqs and descs must be in vq group 0 if ngroups < 2 */ > > > > > > + if (dev->ngroups < 2) > > > > > > + return 0; > > > > > > + > > > > > > + for (int i = 0; i < dev->vdev->vdpa.nvqs; ++i) { > > > > > > + struct vduse_dev_msg msg = { 0 }; > > > > > > + int ret; > > > > > > + > > > > > > + msg.req.type = VDUSE_GET_VQ_GROUP; > > > > > > + msg.req.vq_group.index = i; > > > > > > + ret = vduse_dev_msg_sync(dev, &msg); > > > > > > + if (ret) > > > > > > + return ret; > > > > > > + > > > > > > + dev->vqs[i]->vq_group = msg.resp.vq_group.group; > > > > > > + } > > > > > > + > > > > > > + return 0; > > > > > > +} > > > > > > + > > > > > > static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > > > { > > > > > > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > > > @@ -685,6 +716,11 @@ static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > > > if (vduse_dev_set_status(dev, status)) > > > > > > return; > > > > > > > > > > > > + if (((dev->status ^ status) & VIRTIO_CONFIG_S_FEATURES_OK) && > > > > > > + (status & VIRTIO_CONFIG_S_FEATURES_OK)) > > > > > > + if (vduse_fill_vq_groups(dev)) > > > > > > + return; > > > > > > > > > > I may lose some context but I think we've agreed that we need to > > > > > extend the status response for this instead of having multiple > > > > > indepdent response. > > > > > > > > > > > > > My understanding was it is ok to start with this version by [1]. We > > > > can even make it asynchronous on top if we find this is a bottleneck > > > > and the VDUSE device would need no change, would that work? > > > > > > I think I need to understand why we can not defer this to get_group_asid() call. > > > > > > > Because we need to know the vq_groups->asid mapping in other calls > > like set_group_asid or get_vq_group. > > I think we don't need the mapping of those, or anything I miss? > If the kernel module does not ask the userland device for the actual vq group of a virtqueue, what should it return in vduse_get_vq_group? 0 for all vqs, even if the CVQ is in vq group 1? That's also valid for vduse_get_vq_map, which return is assumed not to change in all the life of the device as it is not protected by a mutex. > And the vq to group mappings could be piggy backed by the device > creation request. > I'm not sure, I think it involves a vduse request per asid or vq group operation. Even get_vq_map. But I'm open to explore this possibility for sure. > > > > We could add a boolean on each virtqueue to track if we know its > > virtqueue group and then only ask VDUSE device it if needed, would > > that work? > > Thanks > > > >
On Wed, Sep 3, 2025 at 6:31 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > On Wed, Sep 3, 2025 at 9:40 AM Jason Wang <jasowang@redhat.com> wrote: > > > > On Wed, Sep 3, 2025 at 2:29 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > > > On Wed, Sep 3, 2025 at 5:58 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > On Mon, Sep 1, 2025 at 4:40 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > > > > > > > On Mon, Sep 1, 2025 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > > > On Tue, Aug 26, 2025 at 7:27 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > > > > > > > > > This allows sepparate the different virtqueues in groups that shares the > > > > > > > same address space. Asking the VDUSE device for the groups of the vq at > > > > > > > the beginning as they're needed for the DMA API. > > > > > > > > > > > > > > Allocating 3 vq groups as net is the device that need the most groups: > > > > > > > * Dataplane (guest passthrough) > > > > > > > * CVQ > > > > > > > * Shadowed vrings. > > > > > > > > > > > > > > Future versions of the series can include dynamic allocation of the > > > > > > > groups array so VDUSE can declare more groups. > > > > > > > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > > > > > --- > > > > > > > v1: Fix: Remove BIT_ULL(VIRTIO_S_*), as _S_ is already the bit (Maxime) > > > > > > > > > > > > > > RFC v3: > > > > > > > * Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower > > > > > > > value to reduce memory consumption, but vqs are already limited to > > > > > > > that value and userspace VDUSE is able to allocate that many vqs. > > > > > > > * Remove the descs vq group capability as it will not be used and we can > > > > > > > add it on top. > > > > > > > * Do not ask for vq groups in number of vq groups < 2. > > > > > > > * Move the valid vq groups range check to vduse_validate_config. > > > > > > > > > > > > > > RFC v2: > > > > > > > * Cache group information in kernel, as we need to provide the vq map > > > > > > > tokens properly. > > > > > > > * Add descs vq group to optimize SVQ forwarding and support indirect > > > > > > > descriptors out of the box. > > > > > > > --- > > > > > > > drivers/vdpa/vdpa_user/vduse_dev.c | 51 ++++++++++++++++++++++++++++-- > > > > > > > include/uapi/linux/vduse.h | 21 +++++++++++- > > > > > > > 2 files changed, 68 insertions(+), 4 deletions(-) > > > > > > > > > > > > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > > index e7bced0b5542..0f4e36dd167e 100644 > > > > > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > > @@ -58,6 +58,7 @@ struct vduse_virtqueue { > > > > > > > struct vdpa_vq_state state; > > > > > > > bool ready; > > > > > > > bool kicked; > > > > > > > + u32 vq_group; > > > > > > > spinlock_t kick_lock; > > > > > > > spinlock_t irq_lock; > > > > > > > struct eventfd_ctx *kickfd; > > > > > > > @@ -114,6 +115,7 @@ struct vduse_dev { > > > > > > > u8 status; > > > > > > > u32 vq_num; > > > > > > > u32 vq_align; > > > > > > > + u32 ngroups; > > > > > > > struct vduse_umem *umem; > > > > > > > struct mutex mem_lock; > > > > > > > unsigned int bounce_size; > > > > > > > @@ -592,6 +594,13 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > > > > return 0; > > > > > > > } > > > > > > > > > > > > > > +static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx) > > > > > > > +{ > > > > > > > + struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > > > > + > > > > > > > + return dev->vqs[idx]->vq_group; > > > > > > > +} > > > > > > > + > > > > > > > static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > > > > struct vdpa_vq_state *state) > > > > > > > { > > > > > > > @@ -678,6 +687,28 @@ static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) > > > > > > > return dev->status; > > > > > > > } > > > > > > > > > > > > > > +static int vduse_fill_vq_groups(struct vduse_dev *dev) > > > > > > > +{ > > > > > > > + /* All vqs and descs must be in vq group 0 if ngroups < 2 */ > > > > > > > + if (dev->ngroups < 2) > > > > > > > + return 0; > > > > > > > + > > > > > > > + for (int i = 0; i < dev->vdev->vdpa.nvqs; ++i) { > > > > > > > + struct vduse_dev_msg msg = { 0 }; > > > > > > > + int ret; > > > > > > > + > > > > > > > + msg.req.type = VDUSE_GET_VQ_GROUP; > > > > > > > + msg.req.vq_group.index = i; > > > > > > > + ret = vduse_dev_msg_sync(dev, &msg); > > > > > > > + if (ret) > > > > > > > + return ret; > > > > > > > + > > > > > > > + dev->vqs[i]->vq_group = msg.resp.vq_group.group; > > > > > > > + } > > > > > > > + > > > > > > > + return 0; > > > > > > > +} > > > > > > > + > > > > > > > static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > > > > { > > > > > > > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > > > > @@ -685,6 +716,11 @@ static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > > > > if (vduse_dev_set_status(dev, status)) > > > > > > > return; > > > > > > > > > > > > > > + if (((dev->status ^ status) & VIRTIO_CONFIG_S_FEATURES_OK) && > > > > > > > + (status & VIRTIO_CONFIG_S_FEATURES_OK)) > > > > > > > + if (vduse_fill_vq_groups(dev)) > > > > > > > + return; > > > > > > > > > > > > I may lose some context but I think we've agreed that we need to > > > > > > extend the status response for this instead of having multiple > > > > > > indepdent response. > > > > > > > > > > > > > > > > My understanding was it is ok to start with this version by [1]. We > > > > > can even make it asynchronous on top if we find this is a bottleneck > > > > > and the VDUSE device would need no change, would that work? > > > > > > > > I think I need to understand why we can not defer this to get_group_asid() call. > > > > > > > > > > Because we need to know the vq_groups->asid mapping in other calls > > > like set_group_asid or get_vq_group. > > > > I think we don't need the mapping of those, or anything I miss? > > > > If the kernel module does not ask the userland device for the actual > vq group of a virtqueue, what should it return in vduse_get_vq_group? > 0 for all vqs, even if the CVQ is in vq group 1? Since the topology is fixed I think userspace should provide this when creating a device. > > That's also valid for vduse_get_vq_map, which return is assumed not to > change in all the life of the device as it is not protected by a > mutex. > > > And the vq to group mappings could be piggy backed by the device > > creation request. > > > > I'm not sure, I think it involves a vduse request per asid or vq group > operation. Even get_vq_map. But I'm open to explore this possibility > for sure. Something like this? struct vduse_vq_config { __u32 index; __u16 max_size; __u16 reserved[13]; }; ? Thanks > > > > > > > We could add a boolean on each virtqueue to track if we know its > > > virtqueue group and then only ask VDUSE device it if needed, would > > > that work? > > > > Thanks > > > > > > > >
On Thu, Sep 4, 2025 at 11:08 AM Jason Wang <jasowang@redhat.com> wrote: > > On Wed, Sep 3, 2025 at 6:31 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > On Wed, Sep 3, 2025 at 9:40 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > On Wed, Sep 3, 2025 at 2:29 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > > > > > On Wed, Sep 3, 2025 at 5:58 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > On Mon, Sep 1, 2025 at 4:40 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > > > > > > > > > On Mon, Sep 1, 2025 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > > > > > On Tue, Aug 26, 2025 at 7:27 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > > > > > > > > > > > This allows sepparate the different virtqueues in groups that shares the > > > > > > > > same address space. Asking the VDUSE device for the groups of the vq at > > > > > > > > the beginning as they're needed for the DMA API. > > > > > > > > > > > > > > > > Allocating 3 vq groups as net is the device that need the most groups: > > > > > > > > * Dataplane (guest passthrough) > > > > > > > > * CVQ > > > > > > > > * Shadowed vrings. > > > > > > > > > > > > > > > > Future versions of the series can include dynamic allocation of the > > > > > > > > groups array so VDUSE can declare more groups. > > > > > > > > > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > > > > > > --- > > > > > > > > v1: Fix: Remove BIT_ULL(VIRTIO_S_*), as _S_ is already the bit (Maxime) > > > > > > > > > > > > > > > > RFC v3: > > > > > > > > * Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower > > > > > > > > value to reduce memory consumption, but vqs are already limited to > > > > > > > > that value and userspace VDUSE is able to allocate that many vqs. > > > > > > > > * Remove the descs vq group capability as it will not be used and we can > > > > > > > > add it on top. > > > > > > > > * Do not ask for vq groups in number of vq groups < 2. > > > > > > > > * Move the valid vq groups range check to vduse_validate_config. > > > > > > > > > > > > > > > > RFC v2: > > > > > > > > * Cache group information in kernel, as we need to provide the vq map > > > > > > > > tokens properly. > > > > > > > > * Add descs vq group to optimize SVQ forwarding and support indirect > > > > > > > > descriptors out of the box. > > > > > > > > --- > > > > > > > > drivers/vdpa/vdpa_user/vduse_dev.c | 51 ++++++++++++++++++++++++++++-- > > > > > > > > include/uapi/linux/vduse.h | 21 +++++++++++- > > > > > > > > 2 files changed, 68 insertions(+), 4 deletions(-) > > > > > > > > > > > > > > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > > > index e7bced0b5542..0f4e36dd167e 100644 > > > > > > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > > > @@ -58,6 +58,7 @@ struct vduse_virtqueue { > > > > > > > > struct vdpa_vq_state state; > > > > > > > > bool ready; > > > > > > > > bool kicked; > > > > > > > > + u32 vq_group; > > > > > > > > spinlock_t kick_lock; > > > > > > > > spinlock_t irq_lock; > > > > > > > > struct eventfd_ctx *kickfd; > > > > > > > > @@ -114,6 +115,7 @@ struct vduse_dev { > > > > > > > > u8 status; > > > > > > > > u32 vq_num; > > > > > > > > u32 vq_align; > > > > > > > > + u32 ngroups; > > > > > > > > struct vduse_umem *umem; > > > > > > > > struct mutex mem_lock; > > > > > > > > unsigned int bounce_size; > > > > > > > > @@ -592,6 +594,13 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > > > > > return 0; > > > > > > > > } > > > > > > > > > > > > > > > > +static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx) > > > > > > > > +{ > > > > > > > > + struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > > > > > + > > > > > > > > + return dev->vqs[idx]->vq_group; > > > > > > > > +} > > > > > > > > + > > > > > > > > static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > > > > > struct vdpa_vq_state *state) > > > > > > > > { > > > > > > > > @@ -678,6 +687,28 @@ static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) > > > > > > > > return dev->status; > > > > > > > > } > > > > > > > > > > > > > > > > +static int vduse_fill_vq_groups(struct vduse_dev *dev) > > > > > > > > +{ > > > > > > > > + /* All vqs and descs must be in vq group 0 if ngroups < 2 */ > > > > > > > > + if (dev->ngroups < 2) > > > > > > > > + return 0; > > > > > > > > + > > > > > > > > + for (int i = 0; i < dev->vdev->vdpa.nvqs; ++i) { > > > > > > > > + struct vduse_dev_msg msg = { 0 }; > > > > > > > > + int ret; > > > > > > > > + > > > > > > > > + msg.req.type = VDUSE_GET_VQ_GROUP; > > > > > > > > + msg.req.vq_group.index = i; > > > > > > > > + ret = vduse_dev_msg_sync(dev, &msg); > > > > > > > > + if (ret) > > > > > > > > + return ret; > > > > > > > > + > > > > > > > > + dev->vqs[i]->vq_group = msg.resp.vq_group.group; > > > > > > > > + } > > > > > > > > + > > > > > > > > + return 0; > > > > > > > > +} > > > > > > > > + > > > > > > > > static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > > > > > { > > > > > > > > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > > > > > @@ -685,6 +716,11 @@ static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > > > > > if (vduse_dev_set_status(dev, status)) > > > > > > > > return; > > > > > > > > > > > > > > > > + if (((dev->status ^ status) & VIRTIO_CONFIG_S_FEATURES_OK) && > > > > > > > > + (status & VIRTIO_CONFIG_S_FEATURES_OK)) > > > > > > > > + if (vduse_fill_vq_groups(dev)) > > > > > > > > + return; > > > > > > > > > > > > > > I may lose some context but I think we've agreed that we need to > > > > > > > extend the status response for this instead of having multiple > > > > > > > indepdent response. > > > > > > > > > > > > > > > > > > > My understanding was it is ok to start with this version by [1]. We > > > > > > can even make it asynchronous on top if we find this is a bottleneck > > > > > > and the VDUSE device would need no change, would that work? > > > > > > > > > > I think I need to understand why we can not defer this to get_group_asid() call. > > > > > > > > > > > > > Because we need to know the vq_groups->asid mapping in other calls > > > > like set_group_asid or get_vq_group. > > > > > > I think we don't need the mapping of those, or anything I miss? > > > > > > > If the kernel module does not ask the userland device for the actual > > vq group of a virtqueue, what should it return in vduse_get_vq_group? > > 0 for all vqs, even if the CVQ is in vq group 1? > > Since the topology is fixed I think userspace should provide this when > creating a device. > > > > > That's also valid for vduse_get_vq_map, which return is assumed not to > > change in all the life of the device as it is not protected by a > > mutex. > > > > > And the vq to group mappings could be piggy backed by the device > > > creation request. > > > > > > > I'm not sure, I think it involves a vduse request per asid or vq group > > operation. Even get_vq_map. But I'm open to explore this possibility > > for sure. > > Something like this? > > struct vduse_vq_config { > __u32 index; > __u16 max_size; > __u16 reserved[13]; > }; I meant this actually: struct vduse_vq_config { __u32 index; __u16 max_size; __u16 group; __u16 reserved[12]; }; Thanks > > ? > > Thanks > > > > > > > > > > > We could add a boolean on each virtqueue to track if we know its > > > > virtqueue group and then only ask VDUSE device it if needed, would > > > > that work? > > > > > > Thanks > > > > > > > > > > > >
On Thu, Sep 4, 2025 at 5:20 AM Jason Wang <jasowang@redhat.com> wrote: > > On Thu, Sep 4, 2025 at 11:08 AM Jason Wang <jasowang@redhat.com> wrote: > > > > On Wed, Sep 3, 2025 at 6:31 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > > > On Wed, Sep 3, 2025 at 9:40 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > On Wed, Sep 3, 2025 at 2:29 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > > > > > > > On Wed, Sep 3, 2025 at 5:58 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > > > On Mon, Sep 1, 2025 at 4:40 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > > > > > > > > > > > On Mon, Sep 1, 2025 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > > > > > > > On Tue, Aug 26, 2025 at 7:27 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > > > > > > > > > > > > > This allows sepparate the different virtqueues in groups that shares the > > > > > > > > > same address space. Asking the VDUSE device for the groups of the vq at > > > > > > > > > the beginning as they're needed for the DMA API. > > > > > > > > > > > > > > > > > > Allocating 3 vq groups as net is the device that need the most groups: > > > > > > > > > * Dataplane (guest passthrough) > > > > > > > > > * CVQ > > > > > > > > > * Shadowed vrings. > > > > > > > > > > > > > > > > > > Future versions of the series can include dynamic allocation of the > > > > > > > > > groups array so VDUSE can declare more groups. > > > > > > > > > > > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > > > > > > > --- > > > > > > > > > v1: Fix: Remove BIT_ULL(VIRTIO_S_*), as _S_ is already the bit (Maxime) > > > > > > > > > > > > > > > > > > RFC v3: > > > > > > > > > * Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower > > > > > > > > > value to reduce memory consumption, but vqs are already limited to > > > > > > > > > that value and userspace VDUSE is able to allocate that many vqs. > > > > > > > > > * Remove the descs vq group capability as it will not be used and we can > > > > > > > > > add it on top. > > > > > > > > > * Do not ask for vq groups in number of vq groups < 2. > > > > > > > > > * Move the valid vq groups range check to vduse_validate_config. > > > > > > > > > > > > > > > > > > RFC v2: > > > > > > > > > * Cache group information in kernel, as we need to provide the vq map > > > > > > > > > tokens properly. > > > > > > > > > * Add descs vq group to optimize SVQ forwarding and support indirect > > > > > > > > > descriptors out of the box. > > > > > > > > > --- > > > > > > > > > drivers/vdpa/vdpa_user/vduse_dev.c | 51 ++++++++++++++++++++++++++++-- > > > > > > > > > include/uapi/linux/vduse.h | 21 +++++++++++- > > > > > > > > > 2 files changed, 68 insertions(+), 4 deletions(-) > > > > > > > > > > > > > > > > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > > > > index e7bced0b5542..0f4e36dd167e 100644 > > > > > > > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > > > > @@ -58,6 +58,7 @@ struct vduse_virtqueue { > > > > > > > > > struct vdpa_vq_state state; > > > > > > > > > bool ready; > > > > > > > > > bool kicked; > > > > > > > > > + u32 vq_group; > > > > > > > > > spinlock_t kick_lock; > > > > > > > > > spinlock_t irq_lock; > > > > > > > > > struct eventfd_ctx *kickfd; > > > > > > > > > @@ -114,6 +115,7 @@ struct vduse_dev { > > > > > > > > > u8 status; > > > > > > > > > u32 vq_num; > > > > > > > > > u32 vq_align; > > > > > > > > > + u32 ngroups; > > > > > > > > > struct vduse_umem *umem; > > > > > > > > > struct mutex mem_lock; > > > > > > > > > unsigned int bounce_size; > > > > > > > > > @@ -592,6 +594,13 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > > > > > > return 0; > > > > > > > > > } > > > > > > > > > > > > > > > > > > +static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx) > > > > > > > > > +{ > > > > > > > > > + struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > > > > > > + > > > > > > > > > + return dev->vqs[idx]->vq_group; > > > > > > > > > +} > > > > > > > > > + > > > > > > > > > static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > > > > > > struct vdpa_vq_state *state) > > > > > > > > > { > > > > > > > > > @@ -678,6 +687,28 @@ static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) > > > > > > > > > return dev->status; > > > > > > > > > } > > > > > > > > > > > > > > > > > > +static int vduse_fill_vq_groups(struct vduse_dev *dev) > > > > > > > > > +{ > > > > > > > > > + /* All vqs and descs must be in vq group 0 if ngroups < 2 */ > > > > > > > > > + if (dev->ngroups < 2) > > > > > > > > > + return 0; > > > > > > > > > + > > > > > > > > > + for (int i = 0; i < dev->vdev->vdpa.nvqs; ++i) { > > > > > > > > > + struct vduse_dev_msg msg = { 0 }; > > > > > > > > > + int ret; > > > > > > > > > + > > > > > > > > > + msg.req.type = VDUSE_GET_VQ_GROUP; > > > > > > > > > + msg.req.vq_group.index = i; > > > > > > > > > + ret = vduse_dev_msg_sync(dev, &msg); > > > > > > > > > + if (ret) > > > > > > > > > + return ret; > > > > > > > > > + > > > > > > > > > + dev->vqs[i]->vq_group = msg.resp.vq_group.group; > > > > > > > > > + } > > > > > > > > > + > > > > > > > > > + return 0; > > > > > > > > > +} > > > > > > > > > + > > > > > > > > > static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > > > > > > { > > > > > > > > > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > > > > > > @@ -685,6 +716,11 @@ static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > > > > > > if (vduse_dev_set_status(dev, status)) > > > > > > > > > return; > > > > > > > > > > > > > > > > > > + if (((dev->status ^ status) & VIRTIO_CONFIG_S_FEATURES_OK) && > > > > > > > > > + (status & VIRTIO_CONFIG_S_FEATURES_OK)) > > > > > > > > > + if (vduse_fill_vq_groups(dev)) > > > > > > > > > + return; > > > > > > > > > > > > > > > > I may lose some context but I think we've agreed that we need to > > > > > > > > extend the status response for this instead of having multiple > > > > > > > > indepdent response. > > > > > > > > > > > > > > > > > > > > > > My understanding was it is ok to start with this version by [1]. We > > > > > > > can even make it asynchronous on top if we find this is a bottleneck > > > > > > > and the VDUSE device would need no change, would that work? > > > > > > > > > > > > I think I need to understand why we can not defer this to get_group_asid() call. > > > > > > > > > > > > > > > > Because we need to know the vq_groups->asid mapping in other calls > > > > > like set_group_asid or get_vq_group. > > > > > > > > I think we don't need the mapping of those, or anything I miss? > > > > > > > > > > If the kernel module does not ask the userland device for the actual > > > vq group of a virtqueue, what should it return in vduse_get_vq_group? > > > 0 for all vqs, even if the CVQ is in vq group 1? > > > > Since the topology is fixed I think userspace should provide this when > > creating a device. > > The topology is not fixed: The CVQ may be either the vq #2 or the last one depending if mq has been negotiated or not. So it cannot be provided at device creation, but only after feature negotiation. > > > > > > That's also valid for vduse_get_vq_map, which return is assumed not to > > > change in all the life of the device as it is not protected by a > > > mutex. > > > > > > > And the vq to group mappings could be piggy backed by the device > > > > creation request. > > > > > > > > > > I'm not sure, I think it involves a vduse request per asid or vq group > > > operation. Even get_vq_map. But I'm open to explore this possibility > > > for sure. > > > > Something like this? > > > > struct vduse_vq_config { > > __u32 index; > > __u16 max_size; > > __u16 reserved[13]; > > }; > > I meant this actually: > > struct vduse_vq_config { > __u32 index; > __u16 max_size; > __u16 group; > __u16 reserved[12]; > }; > This is very interesting, the device should call VDUSE_VQ_SETUP in the middle processing the VDUSE_SET_STATUS message, checking for FEATURES_OK. Ok let's move to that, thanks!
On Fri, Sep 5, 2025 at 4:47 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > On Thu, Sep 4, 2025 at 5:20 AM Jason Wang <jasowang@redhat.com> wrote: > > > > On Thu, Sep 4, 2025 at 11:08 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > On Wed, Sep 3, 2025 at 6:31 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > > > > > On Wed, Sep 3, 2025 at 9:40 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > On Wed, Sep 3, 2025 at 2:29 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > > > > > > > > > On Wed, Sep 3, 2025 at 5:58 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > > > > > On Mon, Sep 1, 2025 at 4:40 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > > > > > > > > > > > > > > > On Mon, Sep 1, 2025 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > > > > > > > > > On Tue, Aug 26, 2025 at 7:27 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > > > > > > > > > > > > > > > This allows sepparate the different virtqueues in groups that shares the > > > > > > > > > > same address space. Asking the VDUSE device for the groups of the vq at > > > > > > > > > > the beginning as they're needed for the DMA API. > > > > > > > > > > > > > > > > > > > > Allocating 3 vq groups as net is the device that need the most groups: > > > > > > > > > > * Dataplane (guest passthrough) > > > > > > > > > > * CVQ > > > > > > > > > > * Shadowed vrings. > > > > > > > > > > > > > > > > > > > > Future versions of the series can include dynamic allocation of the > > > > > > > > > > groups array so VDUSE can declare more groups. > > > > > > > > > > > > > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > > > > > > > > --- > > > > > > > > > > v1: Fix: Remove BIT_ULL(VIRTIO_S_*), as _S_ is already the bit (Maxime) > > > > > > > > > > > > > > > > > > > > RFC v3: > > > > > > > > > > * Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower > > > > > > > > > > value to reduce memory consumption, but vqs are already limited to > > > > > > > > > > that value and userspace VDUSE is able to allocate that many vqs. > > > > > > > > > > * Remove the descs vq group capability as it will not be used and we can > > > > > > > > > > add it on top. > > > > > > > > > > * Do not ask for vq groups in number of vq groups < 2. > > > > > > > > > > * Move the valid vq groups range check to vduse_validate_config. > > > > > > > > > > > > > > > > > > > > RFC v2: > > > > > > > > > > * Cache group information in kernel, as we need to provide the vq map > > > > > > > > > > tokens properly. > > > > > > > > > > * Add descs vq group to optimize SVQ forwarding and support indirect > > > > > > > > > > descriptors out of the box. > > > > > > > > > > --- > > > > > > > > > > drivers/vdpa/vdpa_user/vduse_dev.c | 51 ++++++++++++++++++++++++++++-- > > > > > > > > > > include/uapi/linux/vduse.h | 21 +++++++++++- > > > > > > > > > > 2 files changed, 68 insertions(+), 4 deletions(-) > > > > > > > > > > > > > > > > > > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > > > > > index e7bced0b5542..0f4e36dd167e 100644 > > > > > > > > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > > > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > > > > > > > > > > @@ -58,6 +58,7 @@ struct vduse_virtqueue { > > > > > > > > > > struct vdpa_vq_state state; > > > > > > > > > > bool ready; > > > > > > > > > > bool kicked; > > > > > > > > > > + u32 vq_group; > > > > > > > > > > spinlock_t kick_lock; > > > > > > > > > > spinlock_t irq_lock; > > > > > > > > > > struct eventfd_ctx *kickfd; > > > > > > > > > > @@ -114,6 +115,7 @@ struct vduse_dev { > > > > > > > > > > u8 status; > > > > > > > > > > u32 vq_num; > > > > > > > > > > u32 vq_align; > > > > > > > > > > + u32 ngroups; > > > > > > > > > > struct vduse_umem *umem; > > > > > > > > > > struct mutex mem_lock; > > > > > > > > > > unsigned int bounce_size; > > > > > > > > > > @@ -592,6 +594,13 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > > > > > > > return 0; > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > +static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx) > > > > > > > > > > +{ > > > > > > > > > > + struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > > > > > > > + > > > > > > > > > > + return dev->vqs[idx]->vq_group; > > > > > > > > > > +} > > > > > > > > > > + > > > > > > > > > > static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > > > > > > > > > > struct vdpa_vq_state *state) > > > > > > > > > > { > > > > > > > > > > @@ -678,6 +687,28 @@ static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) > > > > > > > > > > return dev->status; > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > +static int vduse_fill_vq_groups(struct vduse_dev *dev) > > > > > > > > > > +{ > > > > > > > > > > + /* All vqs and descs must be in vq group 0 if ngroups < 2 */ > > > > > > > > > > + if (dev->ngroups < 2) > > > > > > > > > > + return 0; > > > > > > > > > > + > > > > > > > > > > + for (int i = 0; i < dev->vdev->vdpa.nvqs; ++i) { > > > > > > > > > > + struct vduse_dev_msg msg = { 0 }; > > > > > > > > > > + int ret; > > > > > > > > > > + > > > > > > > > > > + msg.req.type = VDUSE_GET_VQ_GROUP; > > > > > > > > > > + msg.req.vq_group.index = i; > > > > > > > > > > + ret = vduse_dev_msg_sync(dev, &msg); > > > > > > > > > > + if (ret) > > > > > > > > > > + return ret; > > > > > > > > > > + > > > > > > > > > > + dev->vqs[i]->vq_group = msg.resp.vq_group.group; > > > > > > > > > > + } > > > > > > > > > > + > > > > > > > > > > + return 0; > > > > > > > > > > +} > > > > > > > > > > + > > > > > > > > > > static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > > > > > > > { > > > > > > > > > > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > > > > > > > > > @@ -685,6 +716,11 @@ static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > > > > > > > > > if (vduse_dev_set_status(dev, status)) > > > > > > > > > > return; > > > > > > > > > > > > > > > > > > > > + if (((dev->status ^ status) & VIRTIO_CONFIG_S_FEATURES_OK) && > > > > > > > > > > + (status & VIRTIO_CONFIG_S_FEATURES_OK)) > > > > > > > > > > + if (vduse_fill_vq_groups(dev)) > > > > > > > > > > + return; > > > > > > > > > > > > > > > > > > I may lose some context but I think we've agreed that we need to > > > > > > > > > extend the status response for this instead of having multiple > > > > > > > > > indepdent response. > > > > > > > > > > > > > > > > > > > > > > > > > My understanding was it is ok to start with this version by [1]. We > > > > > > > > can even make it asynchronous on top if we find this is a bottleneck > > > > > > > > and the VDUSE device would need no change, would that work? > > > > > > > > > > > > > > I think I need to understand why we can not defer this to get_group_asid() call. > > > > > > > > > > > > > > > > > > > Because we need to know the vq_groups->asid mapping in other calls > > > > > > like set_group_asid or get_vq_group. > > > > > > > > > > I think we don't need the mapping of those, or anything I miss? > > > > > > > > > > > > > If the kernel module does not ask the userland device for the actual > > > > vq group of a virtqueue, what should it return in vduse_get_vq_group? > > > > 0 for all vqs, even if the CVQ is in vq group 1? > > > > > > Since the topology is fixed I think userspace should provide this when > > > creating a device. > > > > > The topology is not fixed: The CVQ may be either the vq #2 or the last > one depending if mq has been negotiated or not. > > So it cannot be provided at device creation, but only after feature negotiation. You are correct. This has been discussed before. Thanks
On Mon, Sep 1, 2025 at 9:59 AM Jason Wang <jasowang@redhat.com> wrote: > > On Tue, Aug 26, 2025 at 7:27 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > This allows sepparate the different virtqueues in groups that shares the > > same address space. Asking the VDUSE device for the groups of the vq at > > the beginning as they're needed for the DMA API. > > > > Allocating 3 vq groups as net is the device that need the most groups: > > * Dataplane (guest passthrough) > > * CVQ > > * Shadowed vrings. > > > > Future versions of the series can include dynamic allocation of the > > groups array so VDUSE can declare more groups. > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > --- > > v1: Fix: Remove BIT_ULL(VIRTIO_S_*), as _S_ is already the bit (Maxime) > > > > RFC v3: > > * Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower > > value to reduce memory consumption, but vqs are already limited to > > that value and userspace VDUSE is able to allocate that many vqs. > > * Remove the descs vq group capability as it will not be used and we can > > add it on top. > > * Do not ask for vq groups in number of vq groups < 2. > > * Move the valid vq groups range check to vduse_validate_config. > > > > RFC v2: > > * Cache group information in kernel, as we need to provide the vq map > > tokens properly. > > * Add descs vq group to optimize SVQ forwarding and support indirect > > descriptors out of the box. > > --- > > drivers/vdpa/vdpa_user/vduse_dev.c | 51 ++++++++++++++++++++++++++++-- > > include/uapi/linux/vduse.h | 21 +++++++++++- > > 2 files changed, 68 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > > index e7bced0b5542..0f4e36dd167e 100644 > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > > @@ -58,6 +58,7 @@ struct vduse_virtqueue { > > struct vdpa_vq_state state; > > bool ready; > > bool kicked; > > + u32 vq_group; > > spinlock_t kick_lock; > > spinlock_t irq_lock; > > struct eventfd_ctx *kickfd; > > @@ -114,6 +115,7 @@ struct vduse_dev { > > u8 status; > > u32 vq_num; > > u32 vq_align; > > + u32 ngroups; > > struct vduse_umem *umem; > > struct mutex mem_lock; > > unsigned int bounce_size; > > @@ -592,6 +594,13 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, > > return 0; > > } > > > > +static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx) > > +{ > > + struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > + > > + return dev->vqs[idx]->vq_group; > > +} > > + > > static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > > struct vdpa_vq_state *state) > > { > > @@ -678,6 +687,28 @@ static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) > > return dev->status; > > } > > > > +static int vduse_fill_vq_groups(struct vduse_dev *dev) > > +{ > > + /* All vqs and descs must be in vq group 0 if ngroups < 2 */ > > + if (dev->ngroups < 2) > > + return 0; > > + > > + for (int i = 0; i < dev->vdev->vdpa.nvqs; ++i) { > > + struct vduse_dev_msg msg = { 0 }; > > + int ret; > > + > > + msg.req.type = VDUSE_GET_VQ_GROUP; > > + msg.req.vq_group.index = i; > > + ret = vduse_dev_msg_sync(dev, &msg); > > + if (ret) > > + return ret; > > + > > + dev->vqs[i]->vq_group = msg.resp.vq_group.group; > > + } > > + > > + return 0; > > +} > > + > > static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > { > > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > > @@ -685,6 +716,11 @@ static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) > > if (vduse_dev_set_status(dev, status)) > > return; > > > > + if (((dev->status ^ status) & VIRTIO_CONFIG_S_FEATURES_OK) && > > + (status & VIRTIO_CONFIG_S_FEATURES_OK)) > > + if (vduse_fill_vq_groups(dev)) > > + return; > > I may lose some context but I think we've agreed that we need to > extend the status response for this instead of having multiple > indepdent response. Btw, I wonder why don't we get the vq group per .get_vq_group() Thanks > > > + > > dev->status = status; > > } > > > > @@ -789,6 +825,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = { > > .set_vq_cb = vduse_vdpa_set_vq_cb, > > .set_vq_num = vduse_vdpa_set_vq_num, > > .get_vq_size = vduse_vdpa_get_vq_size, > > + .get_vq_group = vduse_get_vq_group, > > .set_vq_ready = vduse_vdpa_set_vq_ready, > > .get_vq_ready = vduse_vdpa_get_vq_ready, > > .set_vq_state = vduse_vdpa_set_vq_state, > > @@ -1737,12 +1774,19 @@ static bool features_is_valid(struct vduse_dev_config *config) > > return true; > > } > > > > -static bool vduse_validate_config(struct vduse_dev_config *config) > > +static bool vduse_validate_config(struct vduse_dev_config *config, > > + u64 api_version) > > { > > if (!is_mem_zero((const char *)config->reserved, > > sizeof(config->reserved))) > > return false; > > > > + if (api_version < VDUSE_API_VERSION_1 && config->ngroups) > > + return false; > > + > > + if (api_version >= VDUSE_API_VERSION_1 && config->ngroups > 0xffff) > > + return false; > > Let's use a macro instead of magic number. > > > + > > if (config->vq_align > PAGE_SIZE) > > return false; > > > > @@ -1858,6 +1902,7 @@ static int vduse_create_dev(struct vduse_dev_config *config, > > dev->device_features = config->features; > > dev->device_id = config->device_id; > > dev->vendor_id = config->vendor_id; > > + dev->ngroups = (dev->api_version < 1) ? 1 : (config->ngroups ?: 1); > > dev->name = kstrdup(config->name, GFP_KERNEL); > > if (!dev->name) > > goto err_str; > > @@ -1936,7 +1981,7 @@ static long vduse_ioctl(struct file *file, unsigned int cmd, > > break; > > > > ret = -EINVAL; > > - if (vduse_validate_config(&config) == false) > > + if (!vduse_validate_config(&config, control->api_version)) > > break; > > > > buf = vmemdup_user(argp + size, config.config_size); > > @@ -2017,7 +2062,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) > > > > vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, > > &vduse_vdpa_config_ops, &vduse_map_ops, > > - 1, 1, name, true); > > + dev->ngroups, 1, name, true); > > if (IS_ERR(vdev)) > > return PTR_ERR(vdev); > > > > diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h > > index 9a56d0416bfe..b1c0e47d71fb 100644 > > --- a/include/uapi/linux/vduse.h > > +++ b/include/uapi/linux/vduse.h > > @@ -31,6 +31,7 @@ > > * @features: virtio features > > * @vq_num: the number of virtqueues > > * @vq_align: the allocation alignment of virtqueue's metadata > > + * @ngroups: number of vq groups that VDUSE device declares > > * @reserved: for future use, needs to be initialized to zero > > * @config_size: the size of the configuration space > > * @config: the buffer of the configuration space > > @@ -45,7 +46,8 @@ struct vduse_dev_config { > > __u64 features; > > __u32 vq_num; > > __u32 vq_align; > > - __u32 reserved[13]; > > + __u32 ngroups; /* if VDUSE_API_VERSION >= 1 */ > > + __u32 reserved[12]; > > __u32 config_size; > > __u8 config[]; > > }; > > @@ -160,6 +162,16 @@ struct vduse_vq_state_packed { > > __u16 last_used_idx; > > }; > > > > +/** > > + * struct vduse_vq_group - virtqueue group > > + * @index: Index of the virtqueue > > + * @group: Virtqueue group > > + */ > > +struct vduse_vq_group { > > + __u32 index; > > + __u32 group; > > +}; > > + > > /** > > * struct vduse_vq_info - information of a virtqueue > > * @index: virtqueue index > > @@ -274,6 +286,7 @@ enum vduse_req_type { > > VDUSE_GET_VQ_STATE, > > VDUSE_SET_STATUS, > > VDUSE_UPDATE_IOTLB, > > + VDUSE_GET_VQ_GROUP, > > }; > > > > /** > > @@ -316,6 +329,7 @@ struct vduse_iova_range { > > * @vq_state: virtqueue state, only index field is available > > * @s: device status > > * @iova: IOVA range for updating > > + * @vq_group: virtqueue group of a virtqueue > > * @padding: padding > > * > > * Structure used by read(2) on /dev/vduse/$NAME. > > @@ -328,6 +342,8 @@ struct vduse_dev_request { > > struct vduse_vq_state vq_state; > > struct vduse_dev_status s; > > struct vduse_iova_range iova; > > + /* Only if vduse api version >= 1 */; > > + struct vduse_vq_group vq_group; > > __u32 padding[32]; > > }; > > }; > > @@ -338,6 +354,7 @@ struct vduse_dev_request { > > * @result: the result of request > > * @reserved: for future use, needs to be initialized to zero > > * @vq_state: virtqueue state > > + * @vq_group: virtqueue group of a virtqueue > > * @padding: padding > > * > > * Structure used by write(2) on /dev/vduse/$NAME. > > @@ -350,6 +367,8 @@ struct vduse_dev_response { > > __u32 reserved[4]; > > union { > > struct vduse_vq_state vq_state; > > + /* Only if vduse api version >= 1 */ > > + struct vduse_vq_group vq_group; > > __u32 padding[32]; > > }; > > }; > > -- > > 2.51.0 > > > > Thanks
© 2016 - 2025 Red Hat, Inc.