[PATCH v3 4/8] vringh: support VA with iotlb

Stefano Garzarella posted 8 patches 3 years ago
Only 5 patches received!
There is a newer version of this series
[PATCH v3 4/8] vringh: support VA with iotlb
Posted by Stefano Garzarella 3 years ago
vDPA supports the possibility to use user VA in the iotlb messages.
So, let's add support for user VA in vringh to use it in the vDPA
simulators.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---

Notes:
    v3:
    - refactored avoiding code duplication [Eugenio]
    v2:
    - replace kmap_atomic() with kmap_local_page() [see previous patch]
    - fix cast warnings when build with W=1 C=1

 include/linux/vringh.h            |   5 +-
 drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
 drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
 drivers/vhost/vringh.c            | 153 +++++++++++++++++++++++-------
 4 files changed, 127 insertions(+), 37 deletions(-)

diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index 1991a02c6431..d39b9f2dcba0 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -32,6 +32,9 @@ struct vringh {
 	/* Can we get away with weak barriers? */
 	bool weak_barriers;
 
+	/* Use user's VA */
+	bool use_va;
+
 	/* Last available index we saw (ie. where we're up to). */
 	u16 last_avail_idx;
 
@@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
 		      spinlock_t *iotlb_lock);
 
 int vringh_init_iotlb(struct vringh *vrh, u64 features,
-		      unsigned int num, bool weak_barriers,
+		      unsigned int num, bool weak_barriers, bool use_va,
 		      struct vring_desc *desc,
 		      struct vring_avail *avail,
 		      struct vring_used *used);
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 520646ae7fa0..dfd0e000217b 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2537,7 +2537,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
 
 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
-					MLX5_CVQ_MAX_ENT, false,
+					MLX5_CVQ_MAX_ENT, false, false,
 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
 					(struct vring_used *)(uintptr_t)cvq->device_addr);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index eea23c630f7c..47cdf2a1f5b8 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -60,7 +60,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
 	struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
 	uint16_t last_avail_idx = vq->vring.last_avail_idx;
 
-	vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true,
+	vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true, false,
 			  (struct vring_desc *)(uintptr_t)vq->desc_addr,
 			  (struct vring_avail *)
 			  (uintptr_t)vq->driver_addr,
@@ -92,7 +92,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
 	vq->cb = NULL;
 	vq->private = NULL;
 	vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
-			  VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
+			  VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);
 
 	vq->vring.notify = NULL;
 }
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 0ba3ef809e48..72c88519329a 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -1094,10 +1094,18 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
 
 #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
 
+struct iotlb_vec {
+	union {
+		struct iovec *iovec;
+		struct bio_vec *bvec;
+	} iov;
+	size_t count;
+	bool is_iovec;
+};
+
 static int iotlb_translate(const struct vringh *vrh,
 			   u64 addr, u64 len, u64 *translated,
-			   struct bio_vec iov[],
-			   int iov_size, u32 perm)
+			   struct iotlb_vec *ivec, u32 perm)
 {
 	struct vhost_iotlb_map *map;
 	struct vhost_iotlb *iotlb = vrh->iotlb;
@@ -1107,9 +1115,9 @@ static int iotlb_translate(const struct vringh *vrh,
 	spin_lock(vrh->iotlb_lock);
 
 	while (len > s) {
-		u64 size, pa, pfn;
+		u64 size;
 
-		if (unlikely(ret >= iov_size)) {
+		if (unlikely(ret >= ivec->count)) {
 			ret = -ENOBUFS;
 			break;
 		}
@@ -1124,10 +1132,22 @@ static int iotlb_translate(const struct vringh *vrh,
 		}
 
 		size = map->size - addr + map->start;
-		pa = map->addr + addr - map->start;
-		pfn = pa >> PAGE_SHIFT;
-		bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size),
-			      pa & (PAGE_SIZE - 1));
+		if (ivec->is_iovec) {
+			struct iovec *iovec = ivec->iov.iovec;
+
+			iovec[ret].iov_len = min(len - s, size);
+			iovec[ret].iov_base = (void __user *)(unsigned long)
+					      (map->addr + addr - map->start);
+		} else {
+			u64 pa = map->addr + addr - map->start;
+			u64 pfn = pa >> PAGE_SHIFT;
+			struct bio_vec *bvec = ivec->iov.bvec;
+
+			bvec_set_page(&bvec[ret], pfn_to_page(pfn),
+				      min(len - s, size),
+				      pa & (PAGE_SIZE - 1));
+		}
+
 		s += size;
 		addr += size;
 		++ret;
@@ -1141,26 +1161,42 @@ static int iotlb_translate(const struct vringh *vrh,
 	return ret;
 }
 
+#define IOTLB_IOV_SIZE 16
+
 static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
 				  void *src, size_t len)
 {
+	struct iotlb_vec ivec;
+	union {
+		struct iovec iovec[IOTLB_IOV_SIZE];
+		struct bio_vec bvec[IOTLB_IOV_SIZE];
+	} iov;
 	u64 total_translated = 0;
 
+	ivec.iov.iovec = iov.iovec;
+	ivec.count = IOTLB_IOV_SIZE;
+	ivec.is_iovec = vrh->use_va;
+
 	while (total_translated < len) {
-		struct bio_vec iov[16];
 		struct iov_iter iter;
 		u64 translated;
 		int ret;
 
 		ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
 				      len - total_translated, &translated,
-				      iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
+				      &ivec, VHOST_MAP_RO);
 		if (ret == -ENOBUFS)
-			ret = ARRAY_SIZE(iov);
+			ret = IOTLB_IOV_SIZE;
 		else if (ret < 0)
 			return ret;
 
-		iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated);
+		if (ivec.is_iovec) {
+			iov_iter_init(&iter, ITER_SOURCE, ivec.iov.iovec, ret,
+				      translated);
+		} else {
+			iov_iter_bvec(&iter, ITER_SOURCE, ivec.iov.bvec, ret,
+				      translated);
+		}
 
 		ret = copy_from_iter(dst, translated, &iter);
 		if (ret < 0)
@@ -1177,23 +1213,37 @@ static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
 static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
 				void *src, size_t len)
 {
+	struct iotlb_vec ivec;
+	union {
+		struct iovec iovec[IOTLB_IOV_SIZE];
+		struct bio_vec bvec[IOTLB_IOV_SIZE];
+	} iov;
 	u64 total_translated = 0;
 
+	ivec.iov.iovec = iov.iovec;
+	ivec.count = IOTLB_IOV_SIZE;
+	ivec.is_iovec = vrh->use_va;
+
 	while (total_translated < len) {
-		struct bio_vec iov[16];
 		struct iov_iter iter;
 		u64 translated;
 		int ret;
 
 		ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
 				      len - total_translated, &translated,
-				      iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
+				      &ivec, VHOST_MAP_WO);
 		if (ret == -ENOBUFS)
-			ret = ARRAY_SIZE(iov);
+			ret = IOTLB_IOV_SIZE;
 		else if (ret < 0)
 			return ret;
 
-		iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated);
+		if (ivec.is_iovec) {
+			iov_iter_init(&iter, ITER_DEST, ivec.iov.iovec, ret,
+				      translated);
+		} else {
+			iov_iter_bvec(&iter, ITER_DEST, ivec.iov.bvec, ret,
+				      translated);
+		}
 
 		ret = copy_to_iter(src, translated, &iter);
 		if (ret < 0)
@@ -1210,20 +1260,37 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
 static inline int getu16_iotlb(const struct vringh *vrh,
 			       u16 *val, const __virtio16 *p)
 {
-	struct bio_vec iov;
-	void *kaddr, *from;
+	struct iotlb_vec ivec;
+	union {
+		struct iovec iovec[1];
+		struct bio_vec bvec[1];
+	} iov;
+	__virtio16 tmp;
 	int ret;
 
+	ivec.iov.iovec = iov.iovec;
+	ivec.count = 1;
+	ivec.is_iovec = vrh->use_va;
+
 	/* Atomic read is needed for getu16 */
-	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
-			      &iov, 1, VHOST_MAP_RO);
+	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
+			      NULL, &ivec, VHOST_MAP_RO);
 	if (ret < 0)
 		return ret;
 
-	kaddr = kmap_local_page(iov.bv_page);
-	from = kaddr + iov.bv_offset;
-	*val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
-	kunmap_local(kaddr);
+	if (ivec.is_iovec) {
+		ret = __get_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base);
+		if (ret)
+			return ret;
+	} else {
+		void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page);
+		void *from = kaddr + ivec.iov.bvec[0].bv_offset;
+
+		tmp = READ_ONCE(*(__virtio16 *)from);
+		kunmap_local(kaddr);
+	}
+
+	*val = vringh16_to_cpu(vrh, tmp);
 
 	return 0;
 }
@@ -1231,20 +1298,37 @@ static inline int getu16_iotlb(const struct vringh *vrh,
 static inline int putu16_iotlb(const struct vringh *vrh,
 			       __virtio16 *p, u16 val)
 {
-	struct bio_vec iov;
-	void *kaddr, *to;
+	struct iotlb_vec ivec;
+	union {
+		struct iovec iovec;
+		struct bio_vec bvec;
+	} iov;
+	__virtio16 tmp;
 	int ret;
 
+	ivec.iov.iovec = &iov.iovec;
+	ivec.count = 1;
+	ivec.is_iovec = vrh->use_va;
+
 	/* Atomic write is needed for putu16 */
-	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
-			      &iov, 1, VHOST_MAP_WO);
+	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
+			      NULL, &ivec, VHOST_MAP_RO);
 	if (ret < 0)
 		return ret;
 
-	kaddr = kmap_local_page(iov.bv_page);
-	to = kaddr + iov.bv_offset;
-	WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
-	kunmap_local(kaddr);
+	tmp = cpu_to_vringh16(vrh, val);
+
+	if (ivec.is_iovec) {
+		ret = __put_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base);
+		if (ret)
+			return ret;
+	} else {
+		void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page);
+		void *to = kaddr + ivec.iov.bvec[0].bv_offset;
+
+		WRITE_ONCE(*(__virtio16 *)to, tmp);
+		kunmap_local(kaddr);
+	}
 
 	return 0;
 }
@@ -1306,6 +1390,7 @@ static inline int putused_iotlb(const struct vringh *vrh,
  * @features: the feature bits for this ring.
  * @num: the number of elements.
  * @weak_barriers: true if we only need memory barriers, not I/O.
+ * @use_va: true if IOTLB contains user VA
  * @desc: the userpace descriptor pointer.
  * @avail: the userpace avail pointer.
  * @used: the userpace used pointer.
@@ -1313,11 +1398,13 @@ static inline int putused_iotlb(const struct vringh *vrh,
  * Returns an error if num is invalid.
  */
 int vringh_init_iotlb(struct vringh *vrh, u64 features,
-		      unsigned int num, bool weak_barriers,
+		      unsigned int num, bool weak_barriers, bool use_va,
 		      struct vring_desc *desc,
 		      struct vring_avail *avail,
 		      struct vring_used *used)
 {
+	vrh->use_va = use_va;
+
 	return vringh_init_kern(vrh, features, num, weak_barriers,
 				desc, avail, used);
 }
-- 
2.39.2
Re: [PATCH v3 4/8] vringh: support VA with iotlb
Posted by Eugenio Perez Martin 3 years ago
On Tue, Mar 21, 2023 at 4:43 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> vDPA supports the possibility to use user VA in the iotlb messages.
> So, let's add support for user VA in vringh to use it in the vDPA
> simulators.
>
> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> ---
>
> Notes:
>     v3:
>     - refactored avoiding code duplication [Eugenio]
>     v2:
>     - replace kmap_atomic() with kmap_local_page() [see previous patch]
>     - fix cast warnings when build with W=1 C=1
>
>  include/linux/vringh.h            |   5 +-
>  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
>  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
>  drivers/vhost/vringh.c            | 153 +++++++++++++++++++++++-------
>  4 files changed, 127 insertions(+), 37 deletions(-)
>
> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
> index 1991a02c6431..d39b9f2dcba0 100644
> --- a/include/linux/vringh.h
> +++ b/include/linux/vringh.h
> @@ -32,6 +32,9 @@ struct vringh {
>         /* Can we get away with weak barriers? */
>         bool weak_barriers;
>
> +       /* Use user's VA */
> +       bool use_va;
> +
>         /* Last available index we saw (ie. where we're up to). */
>         u16 last_avail_idx;
>
> @@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
>                       spinlock_t *iotlb_lock);
>
>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
> -                     unsigned int num, bool weak_barriers,
> +                     unsigned int num, bool weak_barriers, bool use_va,
>                       struct vring_desc *desc,
>                       struct vring_avail *avail,
>                       struct vring_used *used);
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 520646ae7fa0..dfd0e000217b 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2537,7 +2537,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
>
>         if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
> -                                       MLX5_CVQ_MAX_ENT, false,
> +                                       MLX5_CVQ_MAX_ENT, false, false,
>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> index eea23c630f7c..47cdf2a1f5b8 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> @@ -60,7 +60,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>         struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
>         uint16_t last_avail_idx = vq->vring.last_avail_idx;
>
> -       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true,
> +       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true, false,
>                           (struct vring_desc *)(uintptr_t)vq->desc_addr,
>                           (struct vring_avail *)
>                           (uintptr_t)vq->driver_addr,
> @@ -92,7 +92,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
>         vq->cb = NULL;
>         vq->private = NULL;
>         vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
> -                         VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
> +                         VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);
>
>         vq->vring.notify = NULL;
>  }
> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> index 0ba3ef809e48..72c88519329a 100644
> --- a/drivers/vhost/vringh.c
> +++ b/drivers/vhost/vringh.c
> @@ -1094,10 +1094,18 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>
>  #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
>
> +struct iotlb_vec {
> +       union {
> +               struct iovec *iovec;
> +               struct bio_vec *bvec;
> +       } iov;
> +       size_t count;
> +       bool is_iovec;
> +};
> +
>  static int iotlb_translate(const struct vringh *vrh,
>                            u64 addr, u64 len, u64 *translated,
> -                          struct bio_vec iov[],
> -                          int iov_size, u32 perm)
> +                          struct iotlb_vec *ivec, u32 perm)
>  {
>         struct vhost_iotlb_map *map;
>         struct vhost_iotlb *iotlb = vrh->iotlb;
> @@ -1107,9 +1115,9 @@ static int iotlb_translate(const struct vringh *vrh,
>         spin_lock(vrh->iotlb_lock);
>
>         while (len > s) {
> -               u64 size, pa, pfn;
> +               u64 size;
>
> -               if (unlikely(ret >= iov_size)) {
> +               if (unlikely(ret >= ivec->count)) {
>                         ret = -ENOBUFS;
>                         break;
>                 }
> @@ -1124,10 +1132,22 @@ static int iotlb_translate(const struct vringh *vrh,
>                 }
>
>                 size = map->size - addr + map->start;
> -               pa = map->addr + addr - map->start;
> -               pfn = pa >> PAGE_SHIFT;
> -               bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size),
> -                             pa & (PAGE_SIZE - 1));
> +               if (ivec->is_iovec) {
> +                       struct iovec *iovec = ivec->iov.iovec;
> +
> +                       iovec[ret].iov_len = min(len - s, size);
> +                       iovec[ret].iov_base = (void __user *)(unsigned long)

s/unsigned long/uintptr_t ?



> +                                             (map->addr + addr - map->start);
> +               } else {
> +                       u64 pa = map->addr + addr - map->start;
> +                       u64 pfn = pa >> PAGE_SHIFT;
> +                       struct bio_vec *bvec = ivec->iov.bvec;
> +
> +                       bvec_set_page(&bvec[ret], pfn_to_page(pfn),
> +                                     min(len - s, size),
> +                                     pa & (PAGE_SIZE - 1));
> +               }
> +
>                 s += size;
>                 addr += size;
>                 ++ret;
> @@ -1141,26 +1161,42 @@ static int iotlb_translate(const struct vringh *vrh,
>         return ret;
>  }
>
> +#define IOTLB_IOV_SIZE 16

I'm fine with defining here, but maybe it is better to isolate the
change in a previous patch or reuse another well known macro?

Other looks good, and I agree with Jason's comments so even if the
macro declaration is not moved:

Acked-by: Eugenio Pérez <eperezma@redhat.com>

> +
>  static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
>                                   void *src, size_t len)
>  {
> +       struct iotlb_vec ivec;
> +       union {
> +               struct iovec iovec[IOTLB_IOV_SIZE];
> +               struct bio_vec bvec[IOTLB_IOV_SIZE];
> +       } iov;
>         u64 total_translated = 0;
>
> +       ivec.iov.iovec = iov.iovec;
> +       ivec.count = IOTLB_IOV_SIZE;
> +       ivec.is_iovec = vrh->use_va;
> +
>         while (total_translated < len) {
> -               struct bio_vec iov[16];
>                 struct iov_iter iter;
>                 u64 translated;
>                 int ret;
>
>                 ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
>                                       len - total_translated, &translated,
> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
> +                                     &ivec, VHOST_MAP_RO);
>                 if (ret == -ENOBUFS)
> -                       ret = ARRAY_SIZE(iov);
> +                       ret = IOTLB_IOV_SIZE;
>                 else if (ret < 0)
>                         return ret;
>
> -               iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated);
> +               if (ivec.is_iovec) {
> +                       iov_iter_init(&iter, ITER_SOURCE, ivec.iov.iovec, ret,
> +                                     translated);
> +               } else {
> +                       iov_iter_bvec(&iter, ITER_SOURCE, ivec.iov.bvec, ret,
> +                                     translated);
> +               }
>
>                 ret = copy_from_iter(dst, translated, &iter);
>                 if (ret < 0)
> @@ -1177,23 +1213,37 @@ static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
>  static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>                                 void *src, size_t len)
>  {
> +       struct iotlb_vec ivec;
> +       union {
> +               struct iovec iovec[IOTLB_IOV_SIZE];
> +               struct bio_vec bvec[IOTLB_IOV_SIZE];
> +       } iov;
>         u64 total_translated = 0;
>
> +       ivec.iov.iovec = iov.iovec;
> +       ivec.count = IOTLB_IOV_SIZE;
> +       ivec.is_iovec = vrh->use_va;
> +
>         while (total_translated < len) {
> -               struct bio_vec iov[16];
>                 struct iov_iter iter;
>                 u64 translated;
>                 int ret;
>
>                 ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
>                                       len - total_translated, &translated,
> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
> +                                     &ivec, VHOST_MAP_WO);
>                 if (ret == -ENOBUFS)
> -                       ret = ARRAY_SIZE(iov);
> +                       ret = IOTLB_IOV_SIZE;
>                 else if (ret < 0)
>                         return ret;
>
> -               iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated);
> +               if (ivec.is_iovec) {
> +                       iov_iter_init(&iter, ITER_DEST, ivec.iov.iovec, ret,
> +                                     translated);
> +               } else {
> +                       iov_iter_bvec(&iter, ITER_DEST, ivec.iov.bvec, ret,
> +                                     translated);
> +               }
>
>                 ret = copy_to_iter(src, translated, &iter);
>                 if (ret < 0)
> @@ -1210,20 +1260,37 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>  static inline int getu16_iotlb(const struct vringh *vrh,
>                                u16 *val, const __virtio16 *p)
>  {
> -       struct bio_vec iov;
> -       void *kaddr, *from;
> +       struct iotlb_vec ivec;
> +       union {
> +               struct iovec iovec[1];
> +               struct bio_vec bvec[1];
> +       } iov;
> +       __virtio16 tmp;
>         int ret;
>
> +       ivec.iov.iovec = iov.iovec;
> +       ivec.count = 1;
> +       ivec.is_iovec = vrh->use_va;
> +
>         /* Atomic read is needed for getu16 */
> -       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> -                             &iov, 1, VHOST_MAP_RO);
> +       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                             NULL, &ivec, VHOST_MAP_RO);
>         if (ret < 0)
>                 return ret;
>
> -       kaddr = kmap_local_page(iov.bv_page);
> -       from = kaddr + iov.bv_offset;
> -       *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
> -       kunmap_local(kaddr);
> +       if (ivec.is_iovec) {
> +               ret = __get_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base);
> +               if (ret)
> +                       return ret;
> +       } else {
> +               void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page);
> +               void *from = kaddr + ivec.iov.bvec[0].bv_offset;
> +
> +               tmp = READ_ONCE(*(__virtio16 *)from);
> +               kunmap_local(kaddr);
> +       }
> +
> +       *val = vringh16_to_cpu(vrh, tmp);
>
>         return 0;
>  }
> @@ -1231,20 +1298,37 @@ static inline int getu16_iotlb(const struct vringh *vrh,
>  static inline int putu16_iotlb(const struct vringh *vrh,
>                                __virtio16 *p, u16 val)
>  {
> -       struct bio_vec iov;
> -       void *kaddr, *to;
> +       struct iotlb_vec ivec;
> +       union {
> +               struct iovec iovec;
> +               struct bio_vec bvec;
> +       } iov;
> +       __virtio16 tmp;
>         int ret;
>
> +       ivec.iov.iovec = &iov.iovec;
> +       ivec.count = 1;
> +       ivec.is_iovec = vrh->use_va;
> +
>         /* Atomic write is needed for putu16 */
> -       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> -                             &iov, 1, VHOST_MAP_WO);
> +       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                             NULL, &ivec, VHOST_MAP_RO);
>         if (ret < 0)
>                 return ret;
>
> -       kaddr = kmap_local_page(iov.bv_page);
> -       to = kaddr + iov.bv_offset;
> -       WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
> -       kunmap_local(kaddr);
> +       tmp = cpu_to_vringh16(vrh, val);
> +
> +       if (ivec.is_iovec) {
> +               ret = __put_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base);
> +               if (ret)
> +                       return ret;
> +       } else {
> +               void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page);
> +               void *to = kaddr + ivec.iov.bvec[0].bv_offset;
> +
> +               WRITE_ONCE(*(__virtio16 *)to, tmp);
> +               kunmap_local(kaddr);
> +       }
>
>         return 0;
>  }
> @@ -1306,6 +1390,7 @@ static inline int putused_iotlb(const struct vringh *vrh,
>   * @features: the feature bits for this ring.
>   * @num: the number of elements.
>   * @weak_barriers: true if we only need memory barriers, not I/O.
> + * @use_va: true if IOTLB contains user VA
>   * @desc: the userpace descriptor pointer.
>   * @avail: the userpace avail pointer.
>   * @used: the userpace used pointer.
> @@ -1313,11 +1398,13 @@ static inline int putused_iotlb(const struct vringh *vrh,
>   * Returns an error if num is invalid.
>   */
>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
> -                     unsigned int num, bool weak_barriers,
> +                     unsigned int num, bool weak_barriers, bool use_va,
>                       struct vring_desc *desc,
>                       struct vring_avail *avail,
>                       struct vring_used *used)
>  {
> +       vrh->use_va = use_va;
> +
>         return vringh_init_kern(vrh, features, num, weak_barriers,
>                                 desc, avail, used);
>  }
> --
> 2.39.2
>
Re: [PATCH v3 4/8] vringh: support VA with iotlb
Posted by Stefano Garzarella 3 years ago
On Thu, Mar 23, 2023 at 09:09:14AM +0100, Eugenio Perez Martin wrote:
>On Tue, Mar 21, 2023 at 4:43 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>>
>> vDPA supports the possibility to use user VA in the iotlb messages.
>> So, let's add support for user VA in vringh to use it in the vDPA
>> simulators.
>>
>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>> ---
>>
>> Notes:
>>     v3:
>>     - refactored avoiding code duplication [Eugenio]
>>     v2:
>>     - replace kmap_atomic() with kmap_local_page() [see previous patch]
>>     - fix cast warnings when build with W=1 C=1
>>
>>  include/linux/vringh.h            |   5 +-
>>  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
>>  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
>>  drivers/vhost/vringh.c            | 153 +++++++++++++++++++++++-------
>>  4 files changed, 127 insertions(+), 37 deletions(-)
>>
>> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
>> index 1991a02c6431..d39b9f2dcba0 100644
>> --- a/include/linux/vringh.h
>> +++ b/include/linux/vringh.h
>> @@ -32,6 +32,9 @@ struct vringh {
>>         /* Can we get away with weak barriers? */
>>         bool weak_barriers;
>>
>> +       /* Use user's VA */
>> +       bool use_va;
>> +
>>         /* Last available index we saw (ie. where we're up to). */
>>         u16 last_avail_idx;
>>
>> @@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
>>                       spinlock_t *iotlb_lock);
>>
>>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
>> -                     unsigned int num, bool weak_barriers,
>> +                     unsigned int num, bool weak_barriers, bool use_va,
>>                       struct vring_desc *desc,
>>                       struct vring_avail *avail,
>>                       struct vring_used *used);
>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> index 520646ae7fa0..dfd0e000217b 100644
>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> @@ -2537,7 +2537,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
>>
>>         if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
>>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
>> -                                       MLX5_CVQ_MAX_ENT, false,
>> +                                       MLX5_CVQ_MAX_ENT, false, false,
>>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> index eea23c630f7c..47cdf2a1f5b8 100644
>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> @@ -60,7 +60,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>>         struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
>>         uint16_t last_avail_idx = vq->vring.last_avail_idx;
>>
>> -       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true,
>> +       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true, false,
>>                           (struct vring_desc *)(uintptr_t)vq->desc_addr,
>>                           (struct vring_avail *)
>>                           (uintptr_t)vq->driver_addr,
>> @@ -92,7 +92,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
>>         vq->cb = NULL;
>>         vq->private = NULL;
>>         vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
>> -                         VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
>> +                         VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);
>>
>>         vq->vring.notify = NULL;
>>  }
>> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
>> index 0ba3ef809e48..72c88519329a 100644
>> --- a/drivers/vhost/vringh.c
>> +++ b/drivers/vhost/vringh.c
>> @@ -1094,10 +1094,18 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>>
>>  #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
>>
>> +struct iotlb_vec {
>> +       union {
>> +               struct iovec *iovec;
>> +               struct bio_vec *bvec;
>> +       } iov;
>> +       size_t count;
>> +       bool is_iovec;
>> +};
>> +
>>  static int iotlb_translate(const struct vringh *vrh,
>>                            u64 addr, u64 len, u64 *translated,
>> -                          struct bio_vec iov[],
>> -                          int iov_size, u32 perm)
>> +                          struct iotlb_vec *ivec, u32 perm)
>>  {
>>         struct vhost_iotlb_map *map;
>>         struct vhost_iotlb *iotlb = vrh->iotlb;
>> @@ -1107,9 +1115,9 @@ static int iotlb_translate(const struct vringh *vrh,
>>         spin_lock(vrh->iotlb_lock);
>>
>>         while (len > s) {
>> -               u64 size, pa, pfn;
>> +               u64 size;
>>
>> -               if (unlikely(ret >= iov_size)) {
>> +               if (unlikely(ret >= ivec->count)) {
>>                         ret = -ENOBUFS;
>>                         break;
>>                 }
>> @@ -1124,10 +1132,22 @@ static int iotlb_translate(const struct vringh *vrh,
>>                 }
>>
>>                 size = map->size - addr + map->start;
>> -               pa = map->addr + addr - map->start;
>> -               pfn = pa >> PAGE_SHIFT;
>> -               bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size),
>> -                             pa & (PAGE_SIZE - 1));
>> +               if (ivec->is_iovec) {
>> +                       struct iovec *iovec = ivec->iov.iovec;
>> +
>> +                       iovec[ret].iov_len = min(len - s, size);
>> +                       iovec[ret].iov_base = (void __user *)(unsigned long)
>
>s/unsigned long/uintptr_t ?
>

yep, good catch!

As I wrote to Jason, I think I'll take it out of the if and just declare 
an uintptr_t variable, since I'm using it also in the else branch.

>
>
>> +                                             (map->addr + addr - map->start);
>> +               } else {
>> +                       u64 pa = map->addr + addr - map->start;
>> +                       u64 pfn = pa >> PAGE_SHIFT;
>> +                       struct bio_vec *bvec = ivec->iov.bvec;
>> +
>> +                       bvec_set_page(&bvec[ret], pfn_to_page(pfn),
>> +                                     min(len - s, size),
>> +                                     pa & (PAGE_SIZE - 1));
>> +               }
>> +
>>                 s += size;
>>                 addr += size;
>>                 ++ret;
>> @@ -1141,26 +1161,42 @@ static int iotlb_translate(const struct vringh *vrh,
>>         return ret;
>>  }
>>
>> +#define IOTLB_IOV_SIZE 16
>
>I'm fine with defining here, but maybe it is better to isolate the
>change in a previous patch or reuse another well known macro?

Yep, good point!

Do you have any well known macro to suggest?

>
>Other looks good, and I agree with Jason's comments so even if the
>macro declaration is not moved:
>
>Acked-by: Eugenio Pérez <eperezma@redhat.com>

Thanks,
Stefano

Re: [PATCH v3 4/8] vringh: support VA with iotlb
Posted by Eugenio Perez Martin 3 years ago
On Thu, Mar 23, 2023 at 11:46 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> On Thu, Mar 23, 2023 at 09:09:14AM +0100, Eugenio Perez Martin wrote:
> >On Tue, Mar 21, 2023 at 4:43 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
> >>
> >> vDPA supports the possibility to use user VA in the iotlb messages.
> >> So, let's add support for user VA in vringh to use it in the vDPA
> >> simulators.
> >>
> >> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> >> ---
> >>
> >> Notes:
> >>     v3:
> >>     - refactored avoiding code duplication [Eugenio]
> >>     v2:
> >>     - replace kmap_atomic() with kmap_local_page() [see previous patch]
> >>     - fix cast warnings when build with W=1 C=1
> >>
> >>  include/linux/vringh.h            |   5 +-
> >>  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
> >>  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
> >>  drivers/vhost/vringh.c            | 153 +++++++++++++++++++++++-------
> >>  4 files changed, 127 insertions(+), 37 deletions(-)
> >>
> >> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
> >> index 1991a02c6431..d39b9f2dcba0 100644
> >> --- a/include/linux/vringh.h
> >> +++ b/include/linux/vringh.h
> >> @@ -32,6 +32,9 @@ struct vringh {
> >>         /* Can we get away with weak barriers? */
> >>         bool weak_barriers;
> >>
> >> +       /* Use user's VA */
> >> +       bool use_va;
> >> +
> >>         /* Last available index we saw (ie. where we're up to). */
> >>         u16 last_avail_idx;
> >>
> >> @@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
> >>                       spinlock_t *iotlb_lock);
> >>
> >>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
> >> -                     unsigned int num, bool weak_barriers,
> >> +                     unsigned int num, bool weak_barriers, bool use_va,
> >>                       struct vring_desc *desc,
> >>                       struct vring_avail *avail,
> >>                       struct vring_used *used);
> >> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >> index 520646ae7fa0..dfd0e000217b 100644
> >> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >> @@ -2537,7 +2537,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
> >>
> >>         if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
> >>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
> >> -                                       MLX5_CVQ_MAX_ENT, false,
> >> +                                       MLX5_CVQ_MAX_ENT, false, false,
> >>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
> >>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
> >>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
> >> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> >> index eea23c630f7c..47cdf2a1f5b8 100644
> >> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> >> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> >> @@ -60,7 +60,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
> >>         struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
> >>         uint16_t last_avail_idx = vq->vring.last_avail_idx;
> >>
> >> -       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true,
> >> +       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true, false,
> >>                           (struct vring_desc *)(uintptr_t)vq->desc_addr,
> >>                           (struct vring_avail *)
> >>                           (uintptr_t)vq->driver_addr,
> >> @@ -92,7 +92,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
> >>         vq->cb = NULL;
> >>         vq->private = NULL;
> >>         vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
> >> -                         VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
> >> +                         VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);
> >>
> >>         vq->vring.notify = NULL;
> >>  }
> >> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> >> index 0ba3ef809e48..72c88519329a 100644
> >> --- a/drivers/vhost/vringh.c
> >> +++ b/drivers/vhost/vringh.c
> >> @@ -1094,10 +1094,18 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
> >>
> >>  #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
> >>
> >> +struct iotlb_vec {
> >> +       union {
> >> +               struct iovec *iovec;
> >> +               struct bio_vec *bvec;
> >> +       } iov;
> >> +       size_t count;
> >> +       bool is_iovec;
> >> +};
> >> +
> >>  static int iotlb_translate(const struct vringh *vrh,
> >>                            u64 addr, u64 len, u64 *translated,
> >> -                          struct bio_vec iov[],
> >> -                          int iov_size, u32 perm)
> >> +                          struct iotlb_vec *ivec, u32 perm)
> >>  {
> >>         struct vhost_iotlb_map *map;
> >>         struct vhost_iotlb *iotlb = vrh->iotlb;
> >> @@ -1107,9 +1115,9 @@ static int iotlb_translate(const struct vringh *vrh,
> >>         spin_lock(vrh->iotlb_lock);
> >>
> >>         while (len > s) {
> >> -               u64 size, pa, pfn;
> >> +               u64 size;
> >>
> >> -               if (unlikely(ret >= iov_size)) {
> >> +               if (unlikely(ret >= ivec->count)) {
> >>                         ret = -ENOBUFS;
> >>                         break;
> >>                 }
> >> @@ -1124,10 +1132,22 @@ static int iotlb_translate(const struct vringh *vrh,
> >>                 }
> >>
> >>                 size = map->size - addr + map->start;
> >> -               pa = map->addr + addr - map->start;
> >> -               pfn = pa >> PAGE_SHIFT;
> >> -               bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size),
> >> -                             pa & (PAGE_SIZE - 1));
> >> +               if (ivec->is_iovec) {
> >> +                       struct iovec *iovec = ivec->iov.iovec;
> >> +
> >> +                       iovec[ret].iov_len = min(len - s, size);
> >> +                       iovec[ret].iov_base = (void __user *)(unsigned long)
> >
> >s/unsigned long/uintptr_t ?
> >
>
> yep, good catch!
>
> As I wrote to Jason, I think I'll take it out of the if and just declare
> an uintptr_t variable, since I'm using it also in the else branch.
>
> >
> >
> >> +                                             (map->addr + addr - map->start);
> >> +               } else {
> >> +                       u64 pa = map->addr + addr - map->start;
> >> +                       u64 pfn = pa >> PAGE_SHIFT;
> >> +                       struct bio_vec *bvec = ivec->iov.bvec;
> >> +
> >> +                       bvec_set_page(&bvec[ret], pfn_to_page(pfn),
> >> +                                     min(len - s, size),
> >> +                                     pa & (PAGE_SIZE - 1));
> >> +               }
> >> +
> >>                 s += size;
> >>                 addr += size;
> >>                 ++ret;
> >> @@ -1141,26 +1161,42 @@ static int iotlb_translate(const struct vringh *vrh,
> >>         return ret;
> >>  }
> >>
> >> +#define IOTLB_IOV_SIZE 16
> >
> >I'm fine with defining here, but maybe it is better to isolate the
> >change in a previous patch or reuse another well known macro?
>
> Yep, good point!
>
> Do you have any well known macro to suggest?
>

Not really, 16 seems like a convenience value here actually :). Maybe
replace _SIZE with _STRIDE or similar?

I keep the Acked-by even if the final name is IOTLB_IOV_SIZE though.

Thanks!
Re: [PATCH v3 4/8] vringh: support VA with iotlb
Posted by Stefano Garzarella 3 years ago
On Thu, Mar 23, 2023 at 03:43:34PM +0100, Eugenio Perez Martin wrote:
>On Thu, Mar 23, 2023 at 11:46 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
>>
>> On Thu, Mar 23, 2023 at 09:09:14AM +0100, Eugenio Perez Martin wrote:
>> >On Tue, Mar 21, 2023 at 4:43 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>> >>
>> >> vDPA supports the possibility to use user VA in the iotlb messages.
>> >> So, let's add support for user VA in vringh to use it in the vDPA
>> >> simulators.
>> >>
>> >> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>> >> ---
>> >>
>> >> Notes:
>> >>     v3:
>> >>     - refactored avoiding code duplication [Eugenio]
>> >>     v2:
>> >>     - replace kmap_atomic() with kmap_local_page() [see previous patch]
>> >>     - fix cast warnings when build with W=1 C=1
>> >>
>> >>  include/linux/vringh.h            |   5 +-
>> >>  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
>> >>  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
>> >>  drivers/vhost/vringh.c            | 153 +++++++++++++++++++++++-------
>> >>  4 files changed, 127 insertions(+), 37 deletions(-)
>> >>
>> >> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
>> >> index 1991a02c6431..d39b9f2dcba0 100644
>> >> --- a/include/linux/vringh.h
>> >> +++ b/include/linux/vringh.h
>> >> @@ -32,6 +32,9 @@ struct vringh {
>> >>         /* Can we get away with weak barriers? */
>> >>         bool weak_barriers;
>> >>
>> >> +       /* Use user's VA */
>> >> +       bool use_va;
>> >> +
>> >>         /* Last available index we saw (ie. where we're up to). */
>> >>         u16 last_avail_idx;
>> >>
>> >> @@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
>> >>                       spinlock_t *iotlb_lock);
>> >>
>> >>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
>> >> -                     unsigned int num, bool weak_barriers,
>> >> +                     unsigned int num, bool weak_barriers, bool use_va,
>> >>                       struct vring_desc *desc,
>> >>                       struct vring_avail *avail,
>> >>                       struct vring_used *used);
>> >> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> >> index 520646ae7fa0..dfd0e000217b 100644
>> >> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> >> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> >> @@ -2537,7 +2537,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
>> >>
>> >>         if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
>> >>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
>> >> -                                       MLX5_CVQ_MAX_ENT, false,
>> >> +                                       MLX5_CVQ_MAX_ENT, false, false,
>> >>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>> >>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>> >>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
>> >> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> >> index eea23c630f7c..47cdf2a1f5b8 100644
>> >> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> >> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> >> @@ -60,7 +60,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>> >>         struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
>> >>         uint16_t last_avail_idx = vq->vring.last_avail_idx;
>> >>
>> >> -       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true,
>> >> +       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true, false,
>> >>                           (struct vring_desc *)(uintptr_t)vq->desc_addr,
>> >>                           (struct vring_avail *)
>> >>                           (uintptr_t)vq->driver_addr,
>> >> @@ -92,7 +92,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
>> >>         vq->cb = NULL;
>> >>         vq->private = NULL;
>> >>         vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
>> >> -                         VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
>> >> +                         VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);
>> >>
>> >>         vq->vring.notify = NULL;
>> >>  }
>> >> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
>> >> index 0ba3ef809e48..72c88519329a 100644
>> >> --- a/drivers/vhost/vringh.c
>> >> +++ b/drivers/vhost/vringh.c
>> >> @@ -1094,10 +1094,18 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>> >>
>> >>  #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
>> >>
>> >> +struct iotlb_vec {
>> >> +       union {
>> >> +               struct iovec *iovec;
>> >> +               struct bio_vec *bvec;
>> >> +       } iov;
>> >> +       size_t count;
>> >> +       bool is_iovec;
>> >> +};
>> >> +
>> >>  static int iotlb_translate(const struct vringh *vrh,
>> >>                            u64 addr, u64 len, u64 *translated,
>> >> -                          struct bio_vec iov[],
>> >> -                          int iov_size, u32 perm)
>> >> +                          struct iotlb_vec *ivec, u32 perm)
>> >>  {
>> >>         struct vhost_iotlb_map *map;
>> >>         struct vhost_iotlb *iotlb = vrh->iotlb;
>> >> @@ -1107,9 +1115,9 @@ static int iotlb_translate(const struct vringh *vrh,
>> >>         spin_lock(vrh->iotlb_lock);
>> >>
>> >>         while (len > s) {
>> >> -               u64 size, pa, pfn;
>> >> +               u64 size;
>> >>
>> >> -               if (unlikely(ret >= iov_size)) {
>> >> +               if (unlikely(ret >= ivec->count)) {
>> >>                         ret = -ENOBUFS;
>> >>                         break;
>> >>                 }
>> >> @@ -1124,10 +1132,22 @@ static int iotlb_translate(const struct vringh *vrh,
>> >>                 }
>> >>
>> >>                 size = map->size - addr + map->start;
>> >> -               pa = map->addr + addr - map->start;
>> >> -               pfn = pa >> PAGE_SHIFT;
>> >> -               bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size),
>> >> -                             pa & (PAGE_SIZE - 1));
>> >> +               if (ivec->is_iovec) {
>> >> +                       struct iovec *iovec = ivec->iov.iovec;
>> >> +
>> >> +                       iovec[ret].iov_len = min(len - s, size);
>> >> +                       iovec[ret].iov_base = (void __user *)(unsigned long)
>> >
>> >s/unsigned long/uintptr_t ?
>> >
>>
>> yep, good catch!
>>
>> As I wrote to Jason, I think I'll take it out of the if and just declare
>> an uintptr_t variable, since I'm using it also in the else branch.
>>
>> >
>> >
>> >> +                                             (map->addr + addr - map->start);
>> >> +               } else {
>> >> +                       u64 pa = map->addr + addr - map->start;
>> >> +                       u64 pfn = pa >> PAGE_SHIFT;
>> >> +                       struct bio_vec *bvec = ivec->iov.bvec;
>> >> +
>> >> +                       bvec_set_page(&bvec[ret], pfn_to_page(pfn),
>> >> +                                     min(len - s, size),
>> >> +                                     pa & (PAGE_SIZE - 1));
>> >> +               }
>> >> +
>> >>                 s += size;
>> >>                 addr += size;
>> >>                 ++ret;
>> >> @@ -1141,26 +1161,42 @@ static int iotlb_translate(const struct vringh *vrh,
>> >>         return ret;
>> >>  }
>> >>
>> >> +#define IOTLB_IOV_SIZE 16
>> >
>> >I'm fine with defining here, but maybe it is better to isolate the
>> >change in a previous patch or reuse another well known macro?
>>
>> Yep, good point!
>>
>> Do you have any well known macro to suggest?
>>
>
>Not really, 16 seems like a convenience value here actually :). Maybe
>replace _SIZE with _STRIDE or similar?

Ack, I will add IOTLB_IOV_STRIDE in a preparation patch before this
one.

>
>I keep the Acked-by even if the final name is IOTLB_IOV_SIZE though.

Thanks,
I changed a bit this patch following Jason's and your suggestions.

I'd like an explicit Acked-by on the next version if it is okay with 
you.

Thanks,
Stefano

Re: [PATCH v3 4/8] vringh: support VA with iotlb
Posted by Jason Wang 3 years ago
On Tue, Mar 21, 2023 at 11:43 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> vDPA supports the possibility to use user VA in the iotlb messages.
> So, let's add support for user VA in vringh to use it in the vDPA
> simulators.
>
> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> ---
>
> Notes:
>     v3:
>     - refactored avoiding code duplication [Eugenio]
>     v2:
>     - replace kmap_atomic() with kmap_local_page() [see previous patch]
>     - fix cast warnings when build with W=1 C=1
>
>  include/linux/vringh.h            |   5 +-
>  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
>  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
>  drivers/vhost/vringh.c            | 153 +++++++++++++++++++++++-------
>  4 files changed, 127 insertions(+), 37 deletions(-)
>
> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
> index 1991a02c6431..d39b9f2dcba0 100644
> --- a/include/linux/vringh.h
> +++ b/include/linux/vringh.h
> @@ -32,6 +32,9 @@ struct vringh {
>         /* Can we get away with weak barriers? */
>         bool weak_barriers;
>
> +       /* Use user's VA */
> +       bool use_va;
> +
>         /* Last available index we saw (ie. where we're up to). */
>         u16 last_avail_idx;
>
> @@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
>                       spinlock_t *iotlb_lock);
>
>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
> -                     unsigned int num, bool weak_barriers,
> +                     unsigned int num, bool weak_barriers, bool use_va,
>                       struct vring_desc *desc,
>                       struct vring_avail *avail,
>                       struct vring_used *used);
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 520646ae7fa0..dfd0e000217b 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2537,7 +2537,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
>
>         if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
> -                                       MLX5_CVQ_MAX_ENT, false,
> +                                       MLX5_CVQ_MAX_ENT, false, false,

To avoid those changes, would it be better to introduce
vringh_init_iotlb_va() so vringh_init_iotlb() can stick to pa.

>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> index eea23c630f7c..47cdf2a1f5b8 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> @@ -60,7 +60,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>         struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
>         uint16_t last_avail_idx = vq->vring.last_avail_idx;
>
> -       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true,
> +       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true, false,
>                           (struct vring_desc *)(uintptr_t)vq->desc_addr,
>                           (struct vring_avail *)
>                           (uintptr_t)vq->driver_addr,
> @@ -92,7 +92,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
>         vq->cb = NULL;
>         vq->private = NULL;
>         vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
> -                         VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
> +                         VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);
>
>         vq->vring.notify = NULL;
>  }
> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> index 0ba3ef809e48..72c88519329a 100644
> --- a/drivers/vhost/vringh.c
> +++ b/drivers/vhost/vringh.c
> @@ -1094,10 +1094,18 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>
>  #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
>
> +struct iotlb_vec {
> +       union {
> +               struct iovec *iovec;
> +               struct bio_vec *bvec;
> +       } iov;
> +       size_t count;
> +       bool is_iovec;

I wonder if this is needed (if vringh is passed to every iotlb_vec helper).

> +};
> +
>  static int iotlb_translate(const struct vringh *vrh,
>                            u64 addr, u64 len, u64 *translated,
> -                          struct bio_vec iov[],
> -                          int iov_size, u32 perm)
> +                          struct iotlb_vec *ivec, u32 perm)
>  {
>         struct vhost_iotlb_map *map;
>         struct vhost_iotlb *iotlb = vrh->iotlb;
> @@ -1107,9 +1115,9 @@ static int iotlb_translate(const struct vringh *vrh,
>         spin_lock(vrh->iotlb_lock);
>
>         while (len > s) {
> -               u64 size, pa, pfn;
> +               u64 size;
>
> -               if (unlikely(ret >= iov_size)) {
> +               if (unlikely(ret >= ivec->count)) {
>                         ret = -ENOBUFS;
>                         break;
>                 }
> @@ -1124,10 +1132,22 @@ static int iotlb_translate(const struct vringh *vrh,
>                 }
>
>                 size = map->size - addr + map->start;
> -               pa = map->addr + addr - map->start;
> -               pfn = pa >> PAGE_SHIFT;
> -               bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size),
> -                             pa & (PAGE_SIZE - 1));
> +               if (ivec->is_iovec) {
> +                       struct iovec *iovec = ivec->iov.iovec;
> +
> +                       iovec[ret].iov_len = min(len - s, size);
> +                       iovec[ret].iov_base = (void __user *)(unsigned long)
> +                                             (map->addr + addr - map->start);

map->addr - map->start to avoid overflow?

> +               } else {
> +                       u64 pa = map->addr + addr - map->start;
> +                       u64 pfn = pa >> PAGE_SHIFT;
> +                       struct bio_vec *bvec = ivec->iov.bvec;
> +
> +                       bvec_set_page(&bvec[ret], pfn_to_page(pfn),
> +                                     min(len - s, size),
> +                                     pa & (PAGE_SIZE - 1));
> +               }
> +
>                 s += size;
>                 addr += size;
>                 ++ret;
> @@ -1141,26 +1161,42 @@ static int iotlb_translate(const struct vringh *vrh,
>         return ret;
>  }
>
> +#define IOTLB_IOV_SIZE 16
> +
>  static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
>                                   void *src, size_t len)
>  {
> +       struct iotlb_vec ivec;
> +       union {
> +               struct iovec iovec[IOTLB_IOV_SIZE];
> +               struct bio_vec bvec[IOTLB_IOV_SIZE];
> +       } iov;
>         u64 total_translated = 0;
>
> +       ivec.iov.iovec = iov.iovec;

ivc.iov = iov ?

Others look good.

Thanks

> +       ivec.count = IOTLB_IOV_SIZE;
> +       ivec.is_iovec = vrh->use_va;
> +
>         while (total_translated < len) {
> -               struct bio_vec iov[16];
>                 struct iov_iter iter;
>                 u64 translated;
>                 int ret;
>
>                 ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
>                                       len - total_translated, &translated,
> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
> +                                     &ivec, VHOST_MAP_RO);
>                 if (ret == -ENOBUFS)
> -                       ret = ARRAY_SIZE(iov);
> +                       ret = IOTLB_IOV_SIZE;
>                 else if (ret < 0)
>                         return ret;
>
> -               iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated);
> +               if (ivec.is_iovec) {
> +                       iov_iter_init(&iter, ITER_SOURCE, ivec.iov.iovec, ret,
> +                                     translated);
> +               } else {
> +                       iov_iter_bvec(&iter, ITER_SOURCE, ivec.iov.bvec, ret,
> +                                     translated);
> +               }
>
>                 ret = copy_from_iter(dst, translated, &iter);
>                 if (ret < 0)
> @@ -1177,23 +1213,37 @@ static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
>  static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>                                 void *src, size_t len)
>  {
> +       struct iotlb_vec ivec;
> +       union {
> +               struct iovec iovec[IOTLB_IOV_SIZE];
> +               struct bio_vec bvec[IOTLB_IOV_SIZE];
> +       } iov;
>         u64 total_translated = 0;
>
> +       ivec.iov.iovec = iov.iovec;
> +       ivec.count = IOTLB_IOV_SIZE;
> +       ivec.is_iovec = vrh->use_va;
> +
>         while (total_translated < len) {
> -               struct bio_vec iov[16];
>                 struct iov_iter iter;
>                 u64 translated;
>                 int ret;
>
>                 ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
>                                       len - total_translated, &translated,
> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
> +                                     &ivec, VHOST_MAP_WO);
>                 if (ret == -ENOBUFS)
> -                       ret = ARRAY_SIZE(iov);
> +                       ret = IOTLB_IOV_SIZE;
>                 else if (ret < 0)
>                         return ret;
>
> -               iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated);
> +               if (ivec.is_iovec) {
> +                       iov_iter_init(&iter, ITER_DEST, ivec.iov.iovec, ret,
> +                                     translated);
> +               } else {
> +                       iov_iter_bvec(&iter, ITER_DEST, ivec.iov.bvec, ret,
> +                                     translated);
> +               }
>
>                 ret = copy_to_iter(src, translated, &iter);
>                 if (ret < 0)
> @@ -1210,20 +1260,37 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>  static inline int getu16_iotlb(const struct vringh *vrh,
>                                u16 *val, const __virtio16 *p)
>  {
> -       struct bio_vec iov;
> -       void *kaddr, *from;
> +       struct iotlb_vec ivec;
> +       union {
> +               struct iovec iovec[1];
> +               struct bio_vec bvec[1];
> +       } iov;
> +       __virtio16 tmp;
>         int ret;
>
> +       ivec.iov.iovec = iov.iovec;
> +       ivec.count = 1;
> +       ivec.is_iovec = vrh->use_va;
> +
>         /* Atomic read is needed for getu16 */
> -       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> -                             &iov, 1, VHOST_MAP_RO);
> +       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                             NULL, &ivec, VHOST_MAP_RO);
>         if (ret < 0)
>                 return ret;
>
> -       kaddr = kmap_local_page(iov.bv_page);
> -       from = kaddr + iov.bv_offset;
> -       *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
> -       kunmap_local(kaddr);
> +       if (ivec.is_iovec) {
> +               ret = __get_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base);
> +               if (ret)
> +                       return ret;
> +       } else {
> +               void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page);
> +               void *from = kaddr + ivec.iov.bvec[0].bv_offset;
> +
> +               tmp = READ_ONCE(*(__virtio16 *)from);
> +               kunmap_local(kaddr);
> +       }
> +
> +       *val = vringh16_to_cpu(vrh, tmp);
>
>         return 0;
>  }
> @@ -1231,20 +1298,37 @@ static inline int getu16_iotlb(const struct vringh *vrh,
>  static inline int putu16_iotlb(const struct vringh *vrh,
>                                __virtio16 *p, u16 val)
>  {
> -       struct bio_vec iov;
> -       void *kaddr, *to;
> +       struct iotlb_vec ivec;
> +       union {
> +               struct iovec iovec;
> +               struct bio_vec bvec;
> +       } iov;
> +       __virtio16 tmp;
>         int ret;
>
> +       ivec.iov.iovec = &iov.iovec;
> +       ivec.count = 1;
> +       ivec.is_iovec = vrh->use_va;
> +
>         /* Atomic write is needed for putu16 */
> -       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> -                             &iov, 1, VHOST_MAP_WO);
> +       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                             NULL, &ivec, VHOST_MAP_RO);
>         if (ret < 0)
>                 return ret;
>
> -       kaddr = kmap_local_page(iov.bv_page);
> -       to = kaddr + iov.bv_offset;
> -       WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
> -       kunmap_local(kaddr);
> +       tmp = cpu_to_vringh16(vrh, val);
> +
> +       if (ivec.is_iovec) {
> +               ret = __put_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base);
> +               if (ret)
> +                       return ret;
> +       } else {
> +               void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page);
> +               void *to = kaddr + ivec.iov.bvec[0].bv_offset;
> +
> +               WRITE_ONCE(*(__virtio16 *)to, tmp);
> +               kunmap_local(kaddr);
> +       }
>
>         return 0;
>  }
> @@ -1306,6 +1390,7 @@ static inline int putused_iotlb(const struct vringh *vrh,
>   * @features: the feature bits for this ring.
>   * @num: the number of elements.
>   * @weak_barriers: true if we only need memory barriers, not I/O.
> + * @use_va: true if IOTLB contains user VA
>   * @desc: the userpace descriptor pointer.
>   * @avail: the userpace avail pointer.
>   * @used: the userpace used pointer.
> @@ -1313,11 +1398,13 @@ static inline int putused_iotlb(const struct vringh *vrh,
>   * Returns an error if num is invalid.
>   */
>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
> -                     unsigned int num, bool weak_barriers,
> +                     unsigned int num, bool weak_barriers, bool use_va,
>                       struct vring_desc *desc,
>                       struct vring_avail *avail,
>                       struct vring_used *used)
>  {
> +       vrh->use_va = use_va;
> +
>         return vringh_init_kern(vrh, features, num, weak_barriers,
>                                 desc, avail, used);
>  }
> --
> 2.39.2
>
Re: [PATCH v3 4/8] vringh: support VA with iotlb
Posted by Stefano Garzarella 3 years ago
On Thu, Mar 23, 2023 at 11:36:28AM +0800, Jason Wang wrote:
>On Tue, Mar 21, 2023 at 11:43 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>>
>> vDPA supports the possibility to use user VA in the iotlb messages.
>> So, let's add support for user VA in vringh to use it in the vDPA
>> simulators.
>>
>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>> ---
>>
>> Notes:
>>     v3:
>>     - refactored avoiding code duplication [Eugenio]
>>     v2:
>>     - replace kmap_atomic() with kmap_local_page() [see previous patch]
>>     - fix cast warnings when build with W=1 C=1
>>
>>  include/linux/vringh.h            |   5 +-
>>  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
>>  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
>>  drivers/vhost/vringh.c            | 153 +++++++++++++++++++++++-------
>>  4 files changed, 127 insertions(+), 37 deletions(-)
>>
>> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
>> index 1991a02c6431..d39b9f2dcba0 100644
>> --- a/include/linux/vringh.h
>> +++ b/include/linux/vringh.h
>> @@ -32,6 +32,9 @@ struct vringh {
>>         /* Can we get away with weak barriers? */
>>         bool weak_barriers;
>>
>> +       /* Use user's VA */
>> +       bool use_va;
>> +
>>         /* Last available index we saw (ie. where we're up to). */
>>         u16 last_avail_idx;
>>
>> @@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
>>                       spinlock_t *iotlb_lock);
>>
>>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
>> -                     unsigned int num, bool weak_barriers,
>> +                     unsigned int num, bool weak_barriers, bool use_va,
>>                       struct vring_desc *desc,
>>                       struct vring_avail *avail,
>>                       struct vring_used *used);
>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> index 520646ae7fa0..dfd0e000217b 100644
>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> @@ -2537,7 +2537,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
>>
>>         if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
>>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
>> -                                       MLX5_CVQ_MAX_ENT, false,
>> +                                       MLX5_CVQ_MAX_ENT, false, false,
>
>To avoid those changes, would it be better to introduce
>vringh_init_iotlb_va() so vringh_init_iotlb() can stick to pa.

Okay, I don't have a strong opinion, I'll add vringh_init_iotlb_va().

>
>>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> index eea23c630f7c..47cdf2a1f5b8 100644
>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> @@ -60,7 +60,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>>         struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
>>         uint16_t last_avail_idx = vq->vring.last_avail_idx;
>>
>> -       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true,
>> +       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true, false,
>>                           (struct vring_desc *)(uintptr_t)vq->desc_addr,
>>                           (struct vring_avail *)
>>                           (uintptr_t)vq->driver_addr,
>> @@ -92,7 +92,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
>>         vq->cb = NULL;
>>         vq->private = NULL;
>>         vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
>> -                         VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
>> +                         VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);
>>
>>         vq->vring.notify = NULL;
>>  }
>> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
>> index 0ba3ef809e48..72c88519329a 100644
>> --- a/drivers/vhost/vringh.c
>> +++ b/drivers/vhost/vringh.c
>> @@ -1094,10 +1094,18 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>>
>>  #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
>>
>> +struct iotlb_vec {
>> +       union {
>> +               struct iovec *iovec;
>> +               struct bio_vec *bvec;
>> +       } iov;
>> +       size_t count;
>> +       bool is_iovec;
>
>I wonder if this is needed (if vringh is passed to every iotlb_vec helper).

Yep, I can use vringh->use_va.

>
>> +};
>> +
>>  static int iotlb_translate(const struct vringh *vrh,
>>                            u64 addr, u64 len, u64 *translated,
>> -                          struct bio_vec iov[],
>> -                          int iov_size, u32 perm)
>> +                          struct iotlb_vec *ivec, u32 perm)
>>  {
>>         struct vhost_iotlb_map *map;
>>         struct vhost_iotlb *iotlb = vrh->iotlb;
>> @@ -1107,9 +1115,9 @@ static int iotlb_translate(const struct vringh *vrh,
>>         spin_lock(vrh->iotlb_lock);
>>
>>         while (len > s) {
>> -               u64 size, pa, pfn;
>> +               u64 size;
>>
>> -               if (unlikely(ret >= iov_size)) {
>> +               if (unlikely(ret >= ivec->count)) {
>>                         ret = -ENOBUFS;
>>                         break;
>>                 }
>> @@ -1124,10 +1132,22 @@ static int iotlb_translate(const struct vringh *vrh,
>>                 }
>>
>>                 size = map->size - addr + map->start;
>> -               pa = map->addr + addr - map->start;
>> -               pfn = pa >> PAGE_SHIFT;
>> -               bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size),
>> -                             pa & (PAGE_SIZE - 1));
>> +               if (ivec->is_iovec) {
>> +                       struct iovec *iovec = ivec->iov.iovec;
>> +
>> +                       iovec[ret].iov_len = min(len - s, size);
>> +                       iovec[ret].iov_base = (void __user *)(unsigned long)
>> +                                             (map->addr + addr - map->start);
>
>map->addr - map->start to avoid overflow?

Right, it was pre-existing, but I'll fix since I'm here (also in the
else branch).
And since it's duplicate code, I'll declare it outside!

>
>> +               } else {
>> +                       u64 pa = map->addr + addr - map->start;
>> +                       u64 pfn = pa >> PAGE_SHIFT;
>> +                       struct bio_vec *bvec = ivec->iov.bvec;
>> +
>> +                       bvec_set_page(&bvec[ret], pfn_to_page(pfn),
>> +                                     min(len - s, size),
>> +                                     pa & (PAGE_SIZE - 1));
>> +               }
>> +
>>                 s += size;
>>                 addr += size;
>>                 ++ret;
>> @@ -1141,26 +1161,42 @@ static int iotlb_translate(const struct vringh *vrh,
>>         return ret;
>>  }
>>
>> +#define IOTLB_IOV_SIZE 16
>> +
>>  static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
>>                                   void *src, size_t len)
>>  {
>> +       struct iotlb_vec ivec;
>> +       union {
>> +               struct iovec iovec[IOTLB_IOV_SIZE];
>> +               struct bio_vec bvec[IOTLB_IOV_SIZE];
>> +       } iov;
>>         u64 total_translated = 0;
>>
>> +       ivec.iov.iovec = iov.iovec;
>
>ivc.iov = iov ?

I tried, but they are both anonymous unions, so I cannot assign them.

Also, this inner union I need to allocate space in the stack to hold
both arrays, while the union in iotlb_vec to carry both pointers.

I don't know whether to add a third field (e.g. `void *raw`) to both and
assign it.

>
>Others look good.

Thanks,
Stefano