[PATCH v2 4/8] vringh: support VA with iotlb

Stefano Garzarella posted 8 patches 3 years, 1 month ago
There is a newer version of this series
[PATCH v2 4/8] vringh: support VA with iotlb
Posted by Stefano Garzarella 3 years, 1 month ago
vDPA supports the possibility to use user VA in the iotlb messages.
So, let's add support for user VA in vringh to use it in the vDPA
simulators.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---

Notes:
    v2:
    - replace kmap_atomic() with kmap_local_page() [see previous patch]
    - fix cast warnings when build with W=1 C=1

 include/linux/vringh.h            |   5 +-
 drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
 drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
 drivers/vhost/vringh.c            | 247 ++++++++++++++++++++++++------
 4 files changed, 205 insertions(+), 53 deletions(-)

diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index 1991a02c6431..d39b9f2dcba0 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -32,6 +32,9 @@ struct vringh {
 	/* Can we get away with weak barriers? */
 	bool weak_barriers;
 
+	/* Use user's VA */
+	bool use_va;
+
 	/* Last available index we saw (ie. where we're up to). */
 	u16 last_avail_idx;
 
@@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
 		      spinlock_t *iotlb_lock);
 
 int vringh_init_iotlb(struct vringh *vrh, u64 features,
-		      unsigned int num, bool weak_barriers,
+		      unsigned int num, bool weak_barriers, bool use_va,
 		      struct vring_desc *desc,
 		      struct vring_avail *avail,
 		      struct vring_used *used);
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 3a0e721aef05..babc8dd171a6 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2537,7 +2537,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
 
 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
-					MLX5_CVQ_MAX_ENT, false,
+					MLX5_CVQ_MAX_ENT, false, false,
 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
 					(struct vring_used *)(uintptr_t)cvq->device_addr);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 6a0a65814626..481eb156658b 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -60,7 +60,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
 	struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
 	uint16_t last_avail_idx = vq->vring.last_avail_idx;
 
-	vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true,
+	vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true, false,
 			  (struct vring_desc *)(uintptr_t)vq->desc_addr,
 			  (struct vring_avail *)
 			  (uintptr_t)vq->driver_addr,
@@ -81,7 +81,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
 	vq->cb = NULL;
 	vq->private = NULL;
 	vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
-			  VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
+			  VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);
 
 	vq->vring.notify = NULL;
 }
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 0ba3ef809e48..61c79cea44ca 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -1094,15 +1094,99 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
 
 #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
 
-static int iotlb_translate(const struct vringh *vrh,
-			   u64 addr, u64 len, u64 *translated,
-			   struct bio_vec iov[],
-			   int iov_size, u32 perm)
+static int iotlb_translate_va(const struct vringh *vrh,
+			      u64 addr, u64 len, u64 *translated,
+			      struct iovec iov[],
+			      int iov_size, u32 perm)
 {
 	struct vhost_iotlb_map *map;
 	struct vhost_iotlb *iotlb = vrh->iotlb;
+	u64 s = 0, last = addr + len - 1;
 	int ret = 0;
+
+	spin_lock(vrh->iotlb_lock);
+
+	while (len > s) {
+		u64 size;
+
+		if (unlikely(ret >= iov_size)) {
+			ret = -ENOBUFS;
+			break;
+		}
+
+		map = vhost_iotlb_itree_first(iotlb, addr, last);
+		if (!map || map->start > addr) {
+			ret = -EINVAL;
+			break;
+		} else if (!(map->perm & perm)) {
+			ret = -EPERM;
+			break;
+		}
+
+		size = map->size - addr + map->start;
+		iov[ret].iov_len = min(len - s, size);
+		iov[ret].iov_base = (void __user *)(unsigned long)
+				    (map->addr + addr - map->start);
+		s += size;
+		addr += size;
+		++ret;
+	}
+
+	spin_unlock(vrh->iotlb_lock);
+
+	if (translated)
+		*translated = min(len, s);
+
+	return ret;
+}
+
+static inline int copy_from_va(const struct vringh *vrh, void *dst, void *src,
+			       u64 len, u64 *translated)
+{
+	struct iovec iov[16];
+	struct iov_iter iter;
+	int ret;
+
+	ret = iotlb_translate_va(vrh, (u64)(uintptr_t)src, len, translated, iov,
+				 ARRAY_SIZE(iov), VHOST_MAP_RO);
+	if (ret == -ENOBUFS)
+		ret = ARRAY_SIZE(iov);
+	else if (ret < 0)
+		return ret;
+
+	iov_iter_init(&iter, ITER_SOURCE, iov, ret, *translated);
+
+	return copy_from_iter(dst, *translated, &iter);
+}
+
+static inline int copy_to_va(const struct vringh *vrh, void *dst, void *src,
+			     u64 len, u64 *translated)
+{
+	struct iovec iov[16];
+	struct iov_iter iter;
+	int ret;
+
+	ret = iotlb_translate_va(vrh, (u64)(uintptr_t)dst, len, translated, iov,
+				 ARRAY_SIZE(iov), VHOST_MAP_WO);
+	if (ret == -ENOBUFS)
+		ret = ARRAY_SIZE(iov);
+	else if (ret < 0)
+		return ret;
+
+	iov_iter_init(&iter, ITER_DEST, iov, ret, *translated);
+
+	return copy_to_iter(src, *translated, &iter);
+}
+
+static int iotlb_translate_pa(const struct vringh *vrh,
+			      u64 addr, u64 len, u64 *translated,
+			      struct bio_vec iov[],
+			      int iov_size, u32 perm)
+{
+	struct vhost_iotlb_map *map;
+	struct vhost_iotlb *iotlb = vrh->iotlb;
 	u64 s = 0, last = addr + len - 1;
+	int ret = 0;
 
 	spin_lock(vrh->iotlb_lock);
 
@@ -1141,28 +1225,61 @@ static int iotlb_translate(const struct vringh *vrh,
 	return ret;
 }
 
+static inline int copy_from_pa(const struct vringh *vrh, void *dst, void *src,
+			       u64 len, u64 *translated)
+{
+	struct bio_vec iov[16];
+	struct iov_iter iter;
+	int ret;
+
+	ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)src, len, translated, iov,
+				 ARRAY_SIZE(iov), VHOST_MAP_RO);
+	if (ret == -ENOBUFS)
+		ret = ARRAY_SIZE(iov);
+	else if (ret < 0)
+		return ret;
+
+	iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, *translated);
+
+	return copy_from_iter(dst, *translated, &iter);
+}
+
+static inline int copy_to_pa(const struct vringh *vrh, void *dst, void *src,
+			     u64 len, u64 *translated)
+{
+	struct bio_vec iov[16];
+	struct iov_iter iter;
+	int ret;
+
+	ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)dst, len, translated, iov,
+				 ARRAY_SIZE(iov), VHOST_MAP_WO);
+	if (ret == -ENOBUFS)
+		ret = ARRAY_SIZE(iov);
+	else if (ret < 0)
+		return ret;
+
+	iov_iter_bvec(&iter, ITER_DEST, iov, ret, *translated);
+
+	return copy_to_iter(src, *translated, &iter);
+}
+
 static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
 				  void *src, size_t len)
 {
 	u64 total_translated = 0;
 
 	while (total_translated < len) {
-		struct bio_vec iov[16];
-		struct iov_iter iter;
 		u64 translated;
 		int ret;
 
-		ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
-				      len - total_translated, &translated,
-				      iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
-		if (ret == -ENOBUFS)
-			ret = ARRAY_SIZE(iov);
-		else if (ret < 0)
-			return ret;
-
-		iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated);
+		if (vrh->use_va) {
+			ret = copy_from_va(vrh, dst, src,
+					   len - total_translated, &translated);
+		} else {
+			ret = copy_from_pa(vrh, dst, src,
+					   len - total_translated, &translated);
+		}
 
-		ret = copy_from_iter(dst, translated, &iter);
 		if (ret < 0)
 			return ret;
 
@@ -1180,22 +1297,17 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
 	u64 total_translated = 0;
 
 	while (total_translated < len) {
-		struct bio_vec iov[16];
-		struct iov_iter iter;
 		u64 translated;
 		int ret;
 
-		ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
-				      len - total_translated, &translated,
-				      iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
-		if (ret == -ENOBUFS)
-			ret = ARRAY_SIZE(iov);
-		else if (ret < 0)
-			return ret;
-
-		iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated);
+		if (vrh->use_va) {
+			ret = copy_to_va(vrh, dst, src,
+					 len - total_translated, &translated);
+		} else {
+			ret = copy_to_pa(vrh, dst, src,
+					 len - total_translated, &translated);
+		}
 
-		ret = copy_to_iter(src, translated, &iter);
 		if (ret < 0)
 			return ret;
 
@@ -1210,20 +1322,37 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
 static inline int getu16_iotlb(const struct vringh *vrh,
 			       u16 *val, const __virtio16 *p)
 {
-	struct bio_vec iov;
-	void *kaddr, *from;
 	int ret;
 
 	/* Atomic read is needed for getu16 */
-	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
-			      &iov, 1, VHOST_MAP_RO);
-	if (ret < 0)
-		return ret;
+	if (vrh->use_va) {
+		struct iovec iov;
+		__virtio16 tmp;
+
+		ret = iotlb_translate_va(vrh, (u64)(uintptr_t)p, sizeof(*p),
+					 NULL, &iov, 1, VHOST_MAP_RO);
+		if (ret < 0)
+			return ret;
 
-	kaddr = kmap_local_page(iov.bv_page);
-	from = kaddr + iov.bv_offset;
-	*val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
-	kunmap_local(kaddr);
+		ret = __get_user(tmp, (__virtio16 __user *)iov.iov_base);
+		if (ret)
+			return ret;
+
+		*val = vringh16_to_cpu(vrh, tmp);
+	} else {
+		struct bio_vec iov;
+		void *kaddr, *from;
+
+		ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)p, sizeof(*p),
+					 NULL, &iov, 1, VHOST_MAP_RO);
+		if (ret < 0)
+			return ret;
+
+		kaddr = kmap_local_page(iov.bv_page);
+		from = kaddr + iov.bv_offset;
+		*val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
+		kunmap_local(kaddr);
+	}
 
 	return 0;
 }
@@ -1231,20 +1360,37 @@ static inline int getu16_iotlb(const struct vringh *vrh,
 static inline int putu16_iotlb(const struct vringh *vrh,
 			       __virtio16 *p, u16 val)
 {
-	struct bio_vec iov;
-	void *kaddr, *to;
 	int ret;
 
 	/* Atomic write is needed for putu16 */
-	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
-			      &iov, 1, VHOST_MAP_WO);
-	if (ret < 0)
-		return ret;
+	if (vrh->use_va) {
+		struct iovec iov;
+		__virtio16 tmp;
 
-	kaddr = kmap_local_page(iov.bv_page);
-	to = kaddr + iov.bv_offset;
-	WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
-	kunmap_local(kaddr);
+		ret = iotlb_translate_va(vrh, (u64)(uintptr_t)p, sizeof(*p),
+					 NULL, &iov, 1, VHOST_MAP_RO);
+		if (ret < 0)
+			return ret;
+
+		tmp = cpu_to_vringh16(vrh, val);
+
+		ret = __put_user(tmp, (__virtio16 __user *)iov.iov_base);
+		if (ret)
+			return ret;
+	} else {
+		struct bio_vec iov;
+		void *kaddr, *to;
+
+		ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
+					 &iov, 1, VHOST_MAP_WO);
+		if (ret < 0)
+			return ret;
+
+		kaddr = kmap_local_page(iov.bv_page);
+		to = kaddr + iov.bv_offset;
+		WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
+		kunmap_local(kaddr);
+	}
 
 	return 0;
 }
@@ -1306,6 +1452,7 @@ static inline int putused_iotlb(const struct vringh *vrh,
  * @features: the feature bits for this ring.
  * @num: the number of elements.
  * @weak_barriers: true if we only need memory barriers, not I/O.
+ * @use_va: true if IOTLB contains user VA
  * @desc: the userpace descriptor pointer.
  * @avail: the userpace avail pointer.
  * @used: the userpace used pointer.
@@ -1313,11 +1460,13 @@ static inline int putused_iotlb(const struct vringh *vrh,
  * Returns an error if num is invalid.
  */
 int vringh_init_iotlb(struct vringh *vrh, u64 features,
-		      unsigned int num, bool weak_barriers,
+		      unsigned int num, bool weak_barriers, bool use_va,
 		      struct vring_desc *desc,
 		      struct vring_avail *avail,
 		      struct vring_used *used)
 {
+	vrh->use_va = use_va;
+
 	return vringh_init_kern(vrh, features, num, weak_barriers,
 				desc, avail, used);
 }
-- 
2.39.2
Re: [PATCH v2 4/8] vringh: support VA with iotlb
Posted by Jason Wang 3 years ago
On Thu, Mar 2, 2023 at 7:35 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> vDPA supports the possibility to use user VA in the iotlb messages.
> So, let's add support for user VA in vringh to use it in the vDPA
> simulators.
>
> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> ---
>
> Notes:
>     v2:
>     - replace kmap_atomic() with kmap_local_page() [see previous patch]
>     - fix cast warnings when build with W=1 C=1
>
>  include/linux/vringh.h            |   5 +-
>  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
>  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
>  drivers/vhost/vringh.c            | 247 ++++++++++++++++++++++++------
>  4 files changed, 205 insertions(+), 53 deletions(-)
>
> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
> index 1991a02c6431..d39b9f2dcba0 100644
> --- a/include/linux/vringh.h
> +++ b/include/linux/vringh.h
> @@ -32,6 +32,9 @@ struct vringh {
>         /* Can we get away with weak barriers? */
>         bool weak_barriers;
>
> +       /* Use user's VA */
> +       bool use_va;
> +
>         /* Last available index we saw (ie. where we're up to). */
>         u16 last_avail_idx;
>
> @@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
>                       spinlock_t *iotlb_lock);
>
>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
> -                     unsigned int num, bool weak_barriers,
> +                     unsigned int num, bool weak_barriers, bool use_va,
>                       struct vring_desc *desc,
>                       struct vring_avail *avail,
>                       struct vring_used *used);
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 3a0e721aef05..babc8dd171a6 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2537,7 +2537,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
>
>         if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
> -                                       MLX5_CVQ_MAX_ENT, false,
> +                                       MLX5_CVQ_MAX_ENT, false, false,
>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> index 6a0a65814626..481eb156658b 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> @@ -60,7 +60,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>         struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
>         uint16_t last_avail_idx = vq->vring.last_avail_idx;
>
> -       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true,
> +       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true, false,
>                           (struct vring_desc *)(uintptr_t)vq->desc_addr,
>                           (struct vring_avail *)
>                           (uintptr_t)vq->driver_addr,
> @@ -81,7 +81,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
>         vq->cb = NULL;
>         vq->private = NULL;
>         vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
> -                         VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
> +                         VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);
>
>         vq->vring.notify = NULL;
>  }
> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> index 0ba3ef809e48..61c79cea44ca 100644
> --- a/drivers/vhost/vringh.c
> +++ b/drivers/vhost/vringh.c
> @@ -1094,15 +1094,99 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>
>  #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
>
> -static int iotlb_translate(const struct vringh *vrh,
> -                          u64 addr, u64 len, u64 *translated,
> -                          struct bio_vec iov[],
> -                          int iov_size, u32 perm)
> +static int iotlb_translate_va(const struct vringh *vrh,
> +                             u64 addr, u64 len, u64 *translated,
> +                             struct iovec iov[],
> +                             int iov_size, u32 perm)
>  {
>         struct vhost_iotlb_map *map;
>         struct vhost_iotlb *iotlb = vrh->iotlb;
> +       u64 s = 0, last = addr + len - 1;
>         int ret = 0;
> +
> +       spin_lock(vrh->iotlb_lock);
> +
> +       while (len > s) {
> +               u64 size;
> +
> +               if (unlikely(ret >= iov_size)) {
> +                       ret = -ENOBUFS;
> +                       break;
> +               }
> +
> +               map = vhost_iotlb_itree_first(iotlb, addr, last);
> +               if (!map || map->start > addr) {
> +                       ret = -EINVAL;
> +                       break;
> +               } else if (!(map->perm & perm)) {
> +                       ret = -EPERM;
> +                       break;
> +               }
> +
> +               size = map->size - addr + map->start;
> +               iov[ret].iov_len = min(len - s, size);
> +               iov[ret].iov_base = (void __user *)(unsigned long)
> +                                   (map->addr + addr - map->start);
> +               s += size;
> +               addr += size;
> +               ++ret;
> +       }
> +
> +       spin_unlock(vrh->iotlb_lock);
> +
> +       if (translated)
> +               *translated = min(len, s);
> +
> +       return ret;
> +}
> +
> +static inline int copy_from_va(const struct vringh *vrh, void *dst, void *src,
> +                              u64 len, u64 *translated)
> +{
> +       struct iovec iov[16];
> +       struct iov_iter iter;
> +       int ret;
> +
> +       ret = iotlb_translate_va(vrh, (u64)(uintptr_t)src, len, translated, iov,
> +                                ARRAY_SIZE(iov), VHOST_MAP_RO);
> +       if (ret == -ENOBUFS)
> +               ret = ARRAY_SIZE(iov);
> +       else if (ret < 0)
> +               return ret;
> +
> +       iov_iter_init(&iter, ITER_SOURCE, iov, ret, *translated);
> +
> +       return copy_from_iter(dst, *translated, &iter);
> +}
> +
> +static inline int copy_to_va(const struct vringh *vrh, void *dst, void *src,
> +                            u64 len, u64 *translated)
> +{
> +       struct iovec iov[16];
> +       struct iov_iter iter;
> +       int ret;
> +
> +       ret = iotlb_translate_va(vrh, (u64)(uintptr_t)dst, len, translated, iov,
> +                                ARRAY_SIZE(iov), VHOST_MAP_WO);
> +       if (ret == -ENOBUFS)
> +               ret = ARRAY_SIZE(iov);
> +       else if (ret < 0)
> +               return ret;
> +
> +       iov_iter_init(&iter, ITER_DEST, iov, ret, *translated);
> +
> +       return copy_to_iter(src, *translated, &iter);
> +}
> +
> +static int iotlb_translate_pa(const struct vringh *vrh,
> +                             u64 addr, u64 len, u64 *translated,
> +                             struct bio_vec iov[],
> +                             int iov_size, u32 perm)
> +{
> +       struct vhost_iotlb_map *map;
> +       struct vhost_iotlb *iotlb = vrh->iotlb;
>         u64 s = 0, last = addr + len - 1;
> +       int ret = 0;
>
>         spin_lock(vrh->iotlb_lock);
>
> @@ -1141,28 +1225,61 @@ static int iotlb_translate(const struct vringh *vrh,
>         return ret;
>  }
>
> +static inline int copy_from_pa(const struct vringh *vrh, void *dst, void *src,
> +                              u64 len, u64 *translated)
> +{
> +       struct bio_vec iov[16];
> +       struct iov_iter iter;
> +       int ret;
> +
> +       ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)src, len, translated, iov,
> +                                ARRAY_SIZE(iov), VHOST_MAP_RO);
> +       if (ret == -ENOBUFS)
> +               ret = ARRAY_SIZE(iov);
> +       else if (ret < 0)
> +               return ret;
> +
> +       iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, *translated);
> +
> +       return copy_from_iter(dst, *translated, &iter);
> +}
> +
> +static inline int copy_to_pa(const struct vringh *vrh, void *dst, void *src,
> +                            u64 len, u64 *translated)
> +{
> +       struct bio_vec iov[16];
> +       struct iov_iter iter;
> +       int ret;
> +
> +       ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)dst, len, translated, iov,
> +                                ARRAY_SIZE(iov), VHOST_MAP_WO);
> +       if (ret == -ENOBUFS)
> +               ret = ARRAY_SIZE(iov);
> +       else if (ret < 0)
> +               return ret;
> +
> +       iov_iter_bvec(&iter, ITER_DEST, iov, ret, *translated);
> +
> +       return copy_to_iter(src, *translated, &iter);
> +}
> +
>  static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
>                                   void *src, size_t len)
>  {
>         u64 total_translated = 0;
>
>         while (total_translated < len) {
> -               struct bio_vec iov[16];
> -               struct iov_iter iter;
>                 u64 translated;
>                 int ret;
>
> -               ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
> -                                     len - total_translated, &translated,
> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
> -               if (ret == -ENOBUFS)
> -                       ret = ARRAY_SIZE(iov);
> -               else if (ret < 0)
> -                       return ret;
> -
> -               iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated);
> +               if (vrh->use_va) {
> +                       ret = copy_from_va(vrh, dst, src,
> +                                          len - total_translated, &translated);
> +               } else {
> +                       ret = copy_from_pa(vrh, dst, src,
> +                                          len - total_translated, &translated);
> +               }
>
> -               ret = copy_from_iter(dst, translated, &iter);
>                 if (ret < 0)
>                         return ret;
>
> @@ -1180,22 +1297,17 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>         u64 total_translated = 0;
>
>         while (total_translated < len) {
> -               struct bio_vec iov[16];
> -               struct iov_iter iter;
>                 u64 translated;
>                 int ret;
>
> -               ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
> -                                     len - total_translated, &translated,
> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
> -               if (ret == -ENOBUFS)
> -                       ret = ARRAY_SIZE(iov);
> -               else if (ret < 0)
> -                       return ret;
> -
> -               iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated);
> +               if (vrh->use_va) {
> +                       ret = copy_to_va(vrh, dst, src,
> +                                        len - total_translated, &translated);
> +               } else {
> +                       ret = copy_to_pa(vrh, dst, src,
> +                                        len - total_translated, &translated);
> +               }
>
> -               ret = copy_to_iter(src, translated, &iter);
>                 if (ret < 0)
>                         return ret;
>
> @@ -1210,20 +1322,37 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>  static inline int getu16_iotlb(const struct vringh *vrh,
>                                u16 *val, const __virtio16 *p)
>  {
> -       struct bio_vec iov;
> -       void *kaddr, *from;
>         int ret;
>
>         /* Atomic read is needed for getu16 */
> -       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> -                             &iov, 1, VHOST_MAP_RO);
> -       if (ret < 0)
> -               return ret;
> +       if (vrh->use_va) {
> +               struct iovec iov;
> +               __virtio16 tmp;
> +
> +               ret = iotlb_translate_va(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                                        NULL, &iov, 1, VHOST_MAP_RO);
> +               if (ret < 0)
> +                       return ret;

Nit: since we have copy_to_va/copy_to_pa variants, let's introduce
getu16_iotlb_va/pa variants?

>
> -       kaddr = kmap_local_page(iov.bv_page);
> -       from = kaddr + iov.bv_offset;
> -       *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
> -       kunmap_local(kaddr);
> +               ret = __get_user(tmp, (__virtio16 __user *)iov.iov_base);
> +               if (ret)
> +                       return ret;
> +
> +               *val = vringh16_to_cpu(vrh, tmp);
> +       } else {
> +               struct bio_vec iov;
> +               void *kaddr, *from;
> +
> +               ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                                        NULL, &iov, 1, VHOST_MAP_RO);
> +               if (ret < 0)
> +                       return ret;
> +
> +               kaddr = kmap_local_page(iov.bv_page);

If we decide to have a use_va switch, is kmap_local_page() still required here?

Other looks good.

Thanks

> +               from = kaddr + iov.bv_offset;
> +               *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
> +               kunmap_local(kaddr);
> +       }
>
>         return 0;
>  }
> @@ -1231,20 +1360,37 @@ static inline int getu16_iotlb(const struct vringh *vrh,
>  static inline int putu16_iotlb(const struct vringh *vrh,
>                                __virtio16 *p, u16 val)
>  {
> -       struct bio_vec iov;
> -       void *kaddr, *to;
>         int ret;
>
>         /* Atomic write is needed for putu16 */
> -       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> -                             &iov, 1, VHOST_MAP_WO);
> -       if (ret < 0)
> -               return ret;
> +       if (vrh->use_va) {
> +               struct iovec iov;
> +               __virtio16 tmp;
>
> -       kaddr = kmap_local_page(iov.bv_page);
> -       to = kaddr + iov.bv_offset;
> -       WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
> -       kunmap_local(kaddr);
> +               ret = iotlb_translate_va(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                                        NULL, &iov, 1, VHOST_MAP_RO);
> +               if (ret < 0)
> +                       return ret;
> +
> +               tmp = cpu_to_vringh16(vrh, val);
> +
> +               ret = __put_user(tmp, (__virtio16 __user *)iov.iov_base);
> +               if (ret)
> +                       return ret;
> +       } else {
> +               struct bio_vec iov;
> +               void *kaddr, *to;
> +
> +               ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> +                                        &iov, 1, VHOST_MAP_WO);
> +               if (ret < 0)
> +                       return ret;
> +
> +               kaddr = kmap_local_page(iov.bv_page);
> +               to = kaddr + iov.bv_offset;
> +               WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
> +               kunmap_local(kaddr);
> +       }
>
>         return 0;
>  }
> @@ -1306,6 +1452,7 @@ static inline int putused_iotlb(const struct vringh *vrh,
>   * @features: the feature bits for this ring.
>   * @num: the number of elements.
>   * @weak_barriers: true if we only need memory barriers, not I/O.
> + * @use_va: true if IOTLB contains user VA
>   * @desc: the userpace descriptor pointer.
>   * @avail: the userpace avail pointer.
>   * @used: the userpace used pointer.
> @@ -1313,11 +1460,13 @@ static inline int putused_iotlb(const struct vringh *vrh,
>   * Returns an error if num is invalid.
>   */
>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
> -                     unsigned int num, bool weak_barriers,
> +                     unsigned int num, bool weak_barriers, bool use_va,
>                       struct vring_desc *desc,
>                       struct vring_avail *avail,
>                       struct vring_used *used)
>  {
> +       vrh->use_va = use_va;
> +
>         return vringh_init_kern(vrh, features, num, weak_barriers,
>                                 desc, avail, used);
>  }
> --
> 2.39.2
>
Re: [PATCH v2 4/8] vringh: support VA with iotlb
Posted by Stefano Garzarella 3 years ago
On Tue, Mar 14, 2023 at 12:53:57PM +0800, Jason Wang wrote:
>On Thu, Mar 2, 2023 at 7:35 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>>
>> vDPA supports the possibility to use user VA in the iotlb messages.
>> So, let's add support for user VA in vringh to use it in the vDPA
>> simulators.
>>
>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>> ---
>>
>> Notes:
>>     v2:
>>     - replace kmap_atomic() with kmap_local_page() [see previous patch]
>>     - fix cast warnings when build with W=1 C=1
>>
>>  include/linux/vringh.h            |   5 +-
>>  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
>>  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
>>  drivers/vhost/vringh.c            | 247 ++++++++++++++++++++++++------
>>  4 files changed, 205 insertions(+), 53 deletions(-)
>>
>> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
>> index 1991a02c6431..d39b9f2dcba0 100644
>> --- a/include/linux/vringh.h
>> +++ b/include/linux/vringh.h
>> @@ -32,6 +32,9 @@ struct vringh {
>>         /* Can we get away with weak barriers? */
>>         bool weak_barriers;
>>
>> +       /* Use user's VA */
>> +       bool use_va;
>> +
>>         /* Last available index we saw (ie. where we're up to). */
>>         u16 last_avail_idx;
>>
>> @@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
>>                       spinlock_t *iotlb_lock);
>>
>>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
>> -                     unsigned int num, bool weak_barriers,
>> +                     unsigned int num, bool weak_barriers, bool use_va,
>>                       struct vring_desc *desc,
>>                       struct vring_avail *avail,
>>                       struct vring_used *used);
>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> index 3a0e721aef05..babc8dd171a6 100644
>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> @@ -2537,7 +2537,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
>>
>>         if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
>>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
>> -                                       MLX5_CVQ_MAX_ENT, false,
>> +                                       MLX5_CVQ_MAX_ENT, false, false,
>>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> index 6a0a65814626..481eb156658b 100644
>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> @@ -60,7 +60,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>>         struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
>>         uint16_t last_avail_idx = vq->vring.last_avail_idx;
>>
>> -       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true,
>> +       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true, false,
>>                           (struct vring_desc *)(uintptr_t)vq->desc_addr,
>>                           (struct vring_avail *)
>>                           (uintptr_t)vq->driver_addr,
>> @@ -81,7 +81,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
>>         vq->cb = NULL;
>>         vq->private = NULL;
>>         vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
>> -                         VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
>> +                         VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);
>>
>>         vq->vring.notify = NULL;
>>  }
>> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
>> index 0ba3ef809e48..61c79cea44ca 100644
>> --- a/drivers/vhost/vringh.c
>> +++ b/drivers/vhost/vringh.c
>> @@ -1094,15 +1094,99 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>>
>>  #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
>>
>> -static int iotlb_translate(const struct vringh *vrh,
>> -                          u64 addr, u64 len, u64 *translated,
>> -                          struct bio_vec iov[],
>> -                          int iov_size, u32 perm)
>> +static int iotlb_translate_va(const struct vringh *vrh,
>> +                             u64 addr, u64 len, u64 *translated,
>> +                             struct iovec iov[],
>> +                             int iov_size, u32 perm)
>>  {
>>         struct vhost_iotlb_map *map;
>>         struct vhost_iotlb *iotlb = vrh->iotlb;
>> +       u64 s = 0, last = addr + len - 1;
>>         int ret = 0;
>> +
>> +       spin_lock(vrh->iotlb_lock);
>> +
>> +       while (len > s) {
>> +               u64 size;
>> +
>> +               if (unlikely(ret >= iov_size)) {
>> +                       ret = -ENOBUFS;
>> +                       break;
>> +               }
>> +
>> +               map = vhost_iotlb_itree_first(iotlb, addr, last);
>> +               if (!map || map->start > addr) {
>> +                       ret = -EINVAL;
>> +                       break;
>> +               } else if (!(map->perm & perm)) {
>> +                       ret = -EPERM;
>> +                       break;
>> +               }
>> +
>> +               size = map->size - addr + map->start;
>> +               iov[ret].iov_len = min(len - s, size);
>> +               iov[ret].iov_base = (void __user *)(unsigned long)
>> +                                   (map->addr + addr - map->start);
>> +               s += size;
>> +               addr += size;
>> +               ++ret;
>> +       }
>> +
>> +       spin_unlock(vrh->iotlb_lock);
>> +
>> +       if (translated)
>> +               *translated = min(len, s);
>> +
>> +       return ret;
>> +}
>> +
>> +static inline int copy_from_va(const struct vringh *vrh, void *dst, void *src,
>> +                              u64 len, u64 *translated)
>> +{
>> +       struct iovec iov[16];
>> +       struct iov_iter iter;
>> +       int ret;
>> +
>> +       ret = iotlb_translate_va(vrh, (u64)(uintptr_t)src, len, translated, iov,
>> +                                ARRAY_SIZE(iov), VHOST_MAP_RO);
>> +       if (ret == -ENOBUFS)
>> +               ret = ARRAY_SIZE(iov);
>> +       else if (ret < 0)
>> +               return ret;
>> +
>> +       iov_iter_init(&iter, ITER_SOURCE, iov, ret, *translated);
>> +
>> +       return copy_from_iter(dst, *translated, &iter);
>> +}
>> +
>> +static inline int copy_to_va(const struct vringh *vrh, void *dst, void *src,
>> +                            u64 len, u64 *translated)
>> +{
>> +       struct iovec iov[16];
>> +       struct iov_iter iter;
>> +       int ret;
>> +
>> +       ret = iotlb_translate_va(vrh, (u64)(uintptr_t)dst, len, translated, iov,
>> +                                ARRAY_SIZE(iov), VHOST_MAP_WO);
>> +       if (ret == -ENOBUFS)
>> +               ret = ARRAY_SIZE(iov);
>> +       else if (ret < 0)
>> +               return ret;
>> +
>> +       iov_iter_init(&iter, ITER_DEST, iov, ret, *translated);
>> +
>> +       return copy_to_iter(src, *translated, &iter);
>> +}
>> +
>> +static int iotlb_translate_pa(const struct vringh *vrh,
>> +                             u64 addr, u64 len, u64 *translated,
>> +                             struct bio_vec iov[],
>> +                             int iov_size, u32 perm)
>> +{
>> +       struct vhost_iotlb_map *map;
>> +       struct vhost_iotlb *iotlb = vrh->iotlb;
>>         u64 s = 0, last = addr + len - 1;
>> +       int ret = 0;
>>
>>         spin_lock(vrh->iotlb_lock);
>>
>> @@ -1141,28 +1225,61 @@ static int iotlb_translate(const struct vringh *vrh,
>>         return ret;
>>  }
>>
>> +static inline int copy_from_pa(const struct vringh *vrh, void *dst, void *src,
>> +                              u64 len, u64 *translated)
>> +{
>> +       struct bio_vec iov[16];
>> +       struct iov_iter iter;
>> +       int ret;
>> +
>> +       ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)src, len, translated, iov,
>> +                                ARRAY_SIZE(iov), VHOST_MAP_RO);
>> +       if (ret == -ENOBUFS)
>> +               ret = ARRAY_SIZE(iov);
>> +       else if (ret < 0)
>> +               return ret;
>> +
>> +       iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, *translated);
>> +
>> +       return copy_from_iter(dst, *translated, &iter);
>> +}
>> +
>> +static inline int copy_to_pa(const struct vringh *vrh, void *dst, void *src,
>> +                            u64 len, u64 *translated)
>> +{
>> +       struct bio_vec iov[16];
>> +       struct iov_iter iter;
>> +       int ret;
>> +
>> +       ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)dst, len, translated, iov,
>> +                                ARRAY_SIZE(iov), VHOST_MAP_WO);
>> +       if (ret == -ENOBUFS)
>> +               ret = ARRAY_SIZE(iov);
>> +       else if (ret < 0)
>> +               return ret;
>> +
>> +       iov_iter_bvec(&iter, ITER_DEST, iov, ret, *translated);
>> +
>> +       return copy_to_iter(src, *translated, &iter);
>> +}
>> +
>>  static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
>>                                   void *src, size_t len)
>>  {
>>         u64 total_translated = 0;
>>
>>         while (total_translated < len) {
>> -               struct bio_vec iov[16];
>> -               struct iov_iter iter;
>>                 u64 translated;
>>                 int ret;
>>
>> -               ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
>> -                                     len - total_translated, &translated,
>> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
>> -               if (ret == -ENOBUFS)
>> -                       ret = ARRAY_SIZE(iov);
>> -               else if (ret < 0)
>> -                       return ret;
>> -
>> -               iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated);
>> +               if (vrh->use_va) {
>> +                       ret = copy_from_va(vrh, dst, src,
>> +                                          len - total_translated, &translated);
>> +               } else {
>> +                       ret = copy_from_pa(vrh, dst, src,
>> +                                          len - total_translated, &translated);
>> +               }
>>
>> -               ret = copy_from_iter(dst, translated, &iter);
>>                 if (ret < 0)
>>                         return ret;
>>
>> @@ -1180,22 +1297,17 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>>         u64 total_translated = 0;
>>
>>         while (total_translated < len) {
>> -               struct bio_vec iov[16];
>> -               struct iov_iter iter;
>>                 u64 translated;
>>                 int ret;
>>
>> -               ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
>> -                                     len - total_translated, &translated,
>> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
>> -               if (ret == -ENOBUFS)
>> -                       ret = ARRAY_SIZE(iov);
>> -               else if (ret < 0)
>> -                       return ret;
>> -
>> -               iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated);
>> +               if (vrh->use_va) {
>> +                       ret = copy_to_va(vrh, dst, src,
>> +                                        len - total_translated, &translated);
>> +               } else {
>> +                       ret = copy_to_pa(vrh, dst, src,
>> +                                        len - total_translated, &translated);
>> +               }
>>
>> -               ret = copy_to_iter(src, translated, &iter);
>>                 if (ret < 0)
>>                         return ret;
>>
>> @@ -1210,20 +1322,37 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>>  static inline int getu16_iotlb(const struct vringh *vrh,
>>                                u16 *val, const __virtio16 *p)
>>  {
>> -       struct bio_vec iov;
>> -       void *kaddr, *from;
>>         int ret;
>>
>>         /* Atomic read is needed for getu16 */
>> -       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
>> -                             &iov, 1, VHOST_MAP_RO);
>> -       if (ret < 0)
>> -               return ret;
>> +       if (vrh->use_va) {
>> +               struct iovec iov;
>> +               __virtio16 tmp;
>> +
>> +               ret = iotlb_translate_va(vrh, (u64)(uintptr_t)p, sizeof(*p),
>> +                                        NULL, &iov, 1, VHOST_MAP_RO);
>> +               if (ret < 0)
>> +                       return ret;
>
>Nit: since we have copy_to_va/copy_to_pa variants, let's introduce
>getu16_iotlb_va/pa variants?

Yep!

>
>>
>> -       kaddr = kmap_local_page(iov.bv_page);
>> -       from = kaddr + iov.bv_offset;
>> -       *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
>> -       kunmap_local(kaddr);
>> +               ret = __get_user(tmp, (__virtio16 __user *)iov.iov_base);
>> +               if (ret)
>> +                       return ret;
>> +
>> +               *val = vringh16_to_cpu(vrh, tmp);
>> +       } else {
>> +               struct bio_vec iov;
>> +               void *kaddr, *from;
>> +
>> +               ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)p, sizeof(*p),
>> +                                        NULL, &iov, 1, VHOST_MAP_RO);
>> +               if (ret < 0)
>> +                       return ret;
>> +
>> +               kaddr = kmap_local_page(iov.bv_page);
>
>If we decide to have a use_va switch, is kmap_local_page() still required here?
>

I think yes. This is related to the email where Fabio clarified for us,
right?

>Other looks good.

Thanks,
Stefano

Re: [PATCH v2 4/8] vringh: support VA with iotlb
Posted by Eugenio Perez Martin 3 years, 1 month ago
On Thu, Mar 2, 2023 at 12:35 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> vDPA supports the possibility to use user VA in the iotlb messages.
> So, let's add support for user VA in vringh to use it in the vDPA
> simulators.
>
> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> ---
>
> Notes:
>     v2:
>     - replace kmap_atomic() with kmap_local_page() [see previous patch]
>     - fix cast warnings when build with W=1 C=1
>
>  include/linux/vringh.h            |   5 +-
>  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
>  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
>  drivers/vhost/vringh.c            | 247 ++++++++++++++++++++++++------
>  4 files changed, 205 insertions(+), 53 deletions(-)
>
> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
> index 1991a02c6431..d39b9f2dcba0 100644
> --- a/include/linux/vringh.h
> +++ b/include/linux/vringh.h
> @@ -32,6 +32,9 @@ struct vringh {
>         /* Can we get away with weak barriers? */
>         bool weak_barriers;
>
> +       /* Use user's VA */
> +       bool use_va;
> +
>         /* Last available index we saw (ie. where we're up to). */
>         u16 last_avail_idx;
>
> @@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
>                       spinlock_t *iotlb_lock);
>
>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
> -                     unsigned int num, bool weak_barriers,
> +                     unsigned int num, bool weak_barriers, bool use_va,
>                       struct vring_desc *desc,
>                       struct vring_avail *avail,
>                       struct vring_used *used);
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 3a0e721aef05..babc8dd171a6 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2537,7 +2537,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
>
>         if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
> -                                       MLX5_CVQ_MAX_ENT, false,
> +                                       MLX5_CVQ_MAX_ENT, false, false,
>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> index 6a0a65814626..481eb156658b 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> @@ -60,7 +60,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>         struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
>         uint16_t last_avail_idx = vq->vring.last_avail_idx;
>
> -       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true,
> +       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true, false,
>                           (struct vring_desc *)(uintptr_t)vq->desc_addr,
>                           (struct vring_avail *)
>                           (uintptr_t)vq->driver_addr,
> @@ -81,7 +81,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
>         vq->cb = NULL;
>         vq->private = NULL;
>         vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
> -                         VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
> +                         VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);
>
>         vq->vring.notify = NULL;
>  }
> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> index 0ba3ef809e48..61c79cea44ca 100644
> --- a/drivers/vhost/vringh.c
> +++ b/drivers/vhost/vringh.c
> @@ -1094,15 +1094,99 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>
>  #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
>
> -static int iotlb_translate(const struct vringh *vrh,
> -                          u64 addr, u64 len, u64 *translated,
> -                          struct bio_vec iov[],
> -                          int iov_size, u32 perm)
> +static int iotlb_translate_va(const struct vringh *vrh,
> +                             u64 addr, u64 len, u64 *translated,
> +                             struct iovec iov[],
> +                             int iov_size, u32 perm)
>  {
>         struct vhost_iotlb_map *map;
>         struct vhost_iotlb *iotlb = vrh->iotlb;
> +       u64 s = 0, last = addr + len - 1;
>         int ret = 0;
> +
> +       spin_lock(vrh->iotlb_lock);
> +
> +       while (len > s) {
> +               u64 size;
> +
> +               if (unlikely(ret >= iov_size)) {
> +                       ret = -ENOBUFS;
> +                       break;
> +               }
> +
> +               map = vhost_iotlb_itree_first(iotlb, addr, last);
> +               if (!map || map->start > addr) {
> +                       ret = -EINVAL;
> +                       break;
> +               } else if (!(map->perm & perm)) {
> +                       ret = -EPERM;
> +                       break;
> +               }
> +
> +               size = map->size - addr + map->start;
> +               iov[ret].iov_len = min(len - s, size);
> +               iov[ret].iov_base = (void __user *)(unsigned long)
> +                                   (map->addr + addr - map->start);
> +               s += size;
> +               addr += size;
> +               ++ret;
> +       }
> +
> +       spin_unlock(vrh->iotlb_lock);
> +
> +       if (translated)
> +               *translated = min(len, s);
> +
> +       return ret;
> +}

It seems to me iotlb_translate_va and iotlb_translate_pa are very
similar, their only difference is that the argument is that iov is
iovec instead of bio_vec. And how to fill it, obviously.

It would be great to merge both functions, only differing with a
conditional on vrh->use_va, or generics, or similar. Or, if following
the style of the rest of vringh code, to provide a callback to fill
iovec (although I like conditional more).

However I cannot think of an easy way to perform that without long
macros or type erasure.

> +
> +static inline int copy_from_va(const struct vringh *vrh, void *dst, void *src,
> +                              u64 len, u64 *translated)
> +{
> +       struct iovec iov[16];
> +       struct iov_iter iter;
> +       int ret;
> +
> +       ret = iotlb_translate_va(vrh, (u64)(uintptr_t)src, len, translated, iov,
> +                                ARRAY_SIZE(iov), VHOST_MAP_RO);
> +       if (ret == -ENOBUFS)
> +               ret = ARRAY_SIZE(iov);
> +       else if (ret < 0)
> +               return ret;
> +
> +       iov_iter_init(&iter, ITER_SOURCE, iov, ret, *translated);
> +
> +       return copy_from_iter(dst, *translated, &iter);

Maybe a good baby step for DRY is to return the iov_iter in
copy_from/to_va/pa here?

But I'm ok with this version too.

Acked-by: Eugenio Pérez Martin <eperezma@redhat.com>

> +}
> +
> +static inline int copy_to_va(const struct vringh *vrh, void *dst, void *src,
> +                            u64 len, u64 *translated)
> +{
> +       struct iovec iov[16];
> +       struct iov_iter iter;
> +       int ret;
> +
> +       ret = iotlb_translate_va(vrh, (u64)(uintptr_t)dst, len, translated, iov,
> +                                ARRAY_SIZE(iov), VHOST_MAP_WO);
> +       if (ret == -ENOBUFS)
> +               ret = ARRAY_SIZE(iov);
> +       else if (ret < 0)
> +               return ret;
> +
> +       iov_iter_init(&iter, ITER_DEST, iov, ret, *translated);
> +
> +       return copy_to_iter(src, *translated, &iter);
> +}
> +
> +static int iotlb_translate_pa(const struct vringh *vrh,
> +                             u64 addr, u64 len, u64 *translated,
> +                             struct bio_vec iov[],
> +                             int iov_size, u32 perm)
> +{
> +       struct vhost_iotlb_map *map;
> +       struct vhost_iotlb *iotlb = vrh->iotlb;
>         u64 s = 0, last = addr + len - 1;
> +       int ret = 0;
>
>         spin_lock(vrh->iotlb_lock);
>
> @@ -1141,28 +1225,61 @@ static int iotlb_translate(const struct vringh *vrh,
>         return ret;
>  }
>
> +static inline int copy_from_pa(const struct vringh *vrh, void *dst, void *src,
> +                              u64 len, u64 *translated)
> +{
> +       struct bio_vec iov[16];
> +       struct iov_iter iter;
> +       int ret;
> +
> +       ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)src, len, translated, iov,
> +                                ARRAY_SIZE(iov), VHOST_MAP_RO);
> +       if (ret == -ENOBUFS)
> +               ret = ARRAY_SIZE(iov);
> +       else if (ret < 0)
> +               return ret;
> +
> +       iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, *translated);
> +
> +       return copy_from_iter(dst, *translated, &iter);
> +}
> +
> +static inline int copy_to_pa(const struct vringh *vrh, void *dst, void *src,
> +                            u64 len, u64 *translated)
> +{
> +       struct bio_vec iov[16];
> +       struct iov_iter iter;
> +       int ret;
> +
> +       ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)dst, len, translated, iov,
> +                                ARRAY_SIZE(iov), VHOST_MAP_WO);
> +       if (ret == -ENOBUFS)
> +               ret = ARRAY_SIZE(iov);
> +       else if (ret < 0)
> +               return ret;
> +
> +       iov_iter_bvec(&iter, ITER_DEST, iov, ret, *translated);
> +
> +       return copy_to_iter(src, *translated, &iter);
> +}
> +
>  static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
>                                   void *src, size_t len)
>  {
>         u64 total_translated = 0;
>
>         while (total_translated < len) {
> -               struct bio_vec iov[16];
> -               struct iov_iter iter;
>                 u64 translated;
>                 int ret;
>
> -               ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
> -                                     len - total_translated, &translated,
> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
> -               if (ret == -ENOBUFS)
> -                       ret = ARRAY_SIZE(iov);
> -               else if (ret < 0)
> -                       return ret;
> -
> -               iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated);
> +               if (vrh->use_va) {
> +                       ret = copy_from_va(vrh, dst, src,
> +                                          len - total_translated, &translated);
> +               } else {
> +                       ret = copy_from_pa(vrh, dst, src,
> +                                          len - total_translated, &translated);
> +               }
>
> -               ret = copy_from_iter(dst, translated, &iter);
>                 if (ret < 0)
>                         return ret;
>
> @@ -1180,22 +1297,17 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>         u64 total_translated = 0;
>
>         while (total_translated < len) {
> -               struct bio_vec iov[16];
> -               struct iov_iter iter;
>                 u64 translated;
>                 int ret;
>
> -               ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
> -                                     len - total_translated, &translated,
> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
> -               if (ret == -ENOBUFS)
> -                       ret = ARRAY_SIZE(iov);
> -               else if (ret < 0)
> -                       return ret;
> -
> -               iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated);
> +               if (vrh->use_va) {
> +                       ret = copy_to_va(vrh, dst, src,
> +                                        len - total_translated, &translated);
> +               } else {
> +                       ret = copy_to_pa(vrh, dst, src,
> +                                        len - total_translated, &translated);
> +               }
>
> -               ret = copy_to_iter(src, translated, &iter);
>                 if (ret < 0)
>                         return ret;
>
> @@ -1210,20 +1322,37 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>  static inline int getu16_iotlb(const struct vringh *vrh,
>                                u16 *val, const __virtio16 *p)
>  {
> -       struct bio_vec iov;
> -       void *kaddr, *from;
>         int ret;
>
>         /* Atomic read is needed for getu16 */
> -       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> -                             &iov, 1, VHOST_MAP_RO);
> -       if (ret < 0)
> -               return ret;
> +       if (vrh->use_va) {
> +               struct iovec iov;
> +               __virtio16 tmp;
> +
> +               ret = iotlb_translate_va(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                                        NULL, &iov, 1, VHOST_MAP_RO);
> +               if (ret < 0)
> +                       return ret;
>
> -       kaddr = kmap_local_page(iov.bv_page);
> -       from = kaddr + iov.bv_offset;
> -       *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
> -       kunmap_local(kaddr);
> +               ret = __get_user(tmp, (__virtio16 __user *)iov.iov_base);
> +               if (ret)
> +                       return ret;
> +
> +               *val = vringh16_to_cpu(vrh, tmp);
> +       } else {
> +               struct bio_vec iov;
> +               void *kaddr, *from;
> +
> +               ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                                        NULL, &iov, 1, VHOST_MAP_RO);
> +               if (ret < 0)
> +                       return ret;
> +
> +               kaddr = kmap_local_page(iov.bv_page);
> +               from = kaddr + iov.bv_offset;
> +               *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
> +               kunmap_local(kaddr);
> +       }
>
>         return 0;
>  }
> @@ -1231,20 +1360,37 @@ static inline int getu16_iotlb(const struct vringh *vrh,
>  static inline int putu16_iotlb(const struct vringh *vrh,
>                                __virtio16 *p, u16 val)
>  {
> -       struct bio_vec iov;
> -       void *kaddr, *to;
>         int ret;
>
>         /* Atomic write is needed for putu16 */
> -       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> -                             &iov, 1, VHOST_MAP_WO);
> -       if (ret < 0)
> -               return ret;
> +       if (vrh->use_va) {
> +               struct iovec iov;
> +               __virtio16 tmp;
>
> -       kaddr = kmap_local_page(iov.bv_page);
> -       to = kaddr + iov.bv_offset;
> -       WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
> -       kunmap_local(kaddr);
> +               ret = iotlb_translate_va(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                                        NULL, &iov, 1, VHOST_MAP_RO);
> +               if (ret < 0)
> +                       return ret;
> +
> +               tmp = cpu_to_vringh16(vrh, val);
> +
> +               ret = __put_user(tmp, (__virtio16 __user *)iov.iov_base);
> +               if (ret)
> +                       return ret;
> +       } else {
> +               struct bio_vec iov;
> +               void *kaddr, *to;
> +
> +               ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> +                                        &iov, 1, VHOST_MAP_WO);
> +               if (ret < 0)
> +                       return ret;
> +
> +               kaddr = kmap_local_page(iov.bv_page);
> +               to = kaddr + iov.bv_offset;
> +               WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
> +               kunmap_local(kaddr);
> +       }
>
>         return 0;
>  }
> @@ -1306,6 +1452,7 @@ static inline int putused_iotlb(const struct vringh *vrh,
>   * @features: the feature bits for this ring.
>   * @num: the number of elements.
>   * @weak_barriers: true if we only need memory barriers, not I/O.
> + * @use_va: true if IOTLB contains user VA
>   * @desc: the userpace descriptor pointer.
>   * @avail: the userpace avail pointer.
>   * @used: the userpace used pointer.
> @@ -1313,11 +1460,13 @@ static inline int putused_iotlb(const struct vringh *vrh,
>   * Returns an error if num is invalid.
>   */
>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
> -                     unsigned int num, bool weak_barriers,
> +                     unsigned int num, bool weak_barriers, bool use_va,
>                       struct vring_desc *desc,
>                       struct vring_avail *avail,
>                       struct vring_used *used)
>  {
> +       vrh->use_va = use_va;
> +
>         return vringh_init_kern(vrh, features, num, weak_barriers,
>                                 desc, avail, used);
>  }
> --
> 2.39.2
>
Re: [PATCH v2 4/8] vringh: support VA with iotlb
Posted by Stefano Garzarella 3 years ago
On Fri, Mar 3, 2023 at 3:39 PM Eugenio Perez Martin <eperezma@redhat.com> wrote:
>
> On Thu, Mar 2, 2023 at 12:35 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
> >
> > vDPA supports the possibility to use user VA in the iotlb messages.
> > So, let's add support for user VA in vringh to use it in the vDPA
> > simulators.
> >
> > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> > ---
> >
> > Notes:
> >     v2:
> >     - replace kmap_atomic() with kmap_local_page() [see previous patch]
> >     - fix cast warnings when build with W=1 C=1
> >
> >  include/linux/vringh.h            |   5 +-
> >  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
> >  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
> >  drivers/vhost/vringh.c            | 247 ++++++++++++++++++++++++------
> >  4 files changed, 205 insertions(+), 53 deletions(-)
> >

[...]

>
> It seems to me iotlb_translate_va and iotlb_translate_pa are very
> similar, their only difference is that the argument is that iov is
> iovec instead of bio_vec. And how to fill it, obviously.
>
> It would be great to merge both functions, only differing with a
> conditional on vrh->use_va, or generics, or similar. Or, if following
> the style of the rest of vringh code, to provide a callback to fill
> iovec (although I like conditional more).
>
> However I cannot think of an easy way to perform that without long
> macros or type erasure.

Thank you for pushing me :-)
I finally managed to avoid code duplication (partial patch attached,
but not yet fully tested).

@Jason: with this refactoring I removed copy_to_va/copy_to_pa, so I
also avoided getu16_iotlb_va/pa.

I will send the full patch in v3, but I would like to get your opinion
first ;-)



diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 0ba3ef809e48..71dd67700e36 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -1096,8 +1096,7 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
 
 static int iotlb_translate(const struct vringh *vrh,
 			   u64 addr, u64 len, u64 *translated,
-			   struct bio_vec iov[],
-			   int iov_size, u32 perm)
+			   void *iov, int iov_size, bool iovec, u32 perm)
 {
 	struct vhost_iotlb_map *map;
 	struct vhost_iotlb *iotlb = vrh->iotlb;
@@ -1107,7 +1106,7 @@ static int iotlb_translate(const struct vringh *vrh,
 	spin_lock(vrh->iotlb_lock);
 
 	while (len > s) {
-		u64 size, pa, pfn;
+		u64 size;
 
 		if (unlikely(ret >= iov_size)) {
 			ret = -ENOBUFS;
@@ -1124,10 +1123,22 @@ static int iotlb_translate(const struct vringh *vrh,
 		}
 
 		size = map->size - addr + map->start;
-		pa = map->addr + addr - map->start;
-		pfn = pa >> PAGE_SHIFT;
-		bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size),
-			      pa & (PAGE_SIZE - 1));
+		if (iovec) {
+			struct iovec *iovec = iov;
+
+			iovec[ret].iov_len = min(len - s, size);
+			iovec[ret].iov_base = (void __user *)(unsigned long)
+					      (map->addr + addr - map->start);
+		} else {
+			u64 pa = map->addr + addr - map->start;
+			u64 pfn = pa >> PAGE_SHIFT;
+			struct bio_vec *bvec = iov;
+
+			bvec_set_page(&bvec[ret], pfn_to_page(pfn),
+				      min(len - s, size),
+				      pa & (PAGE_SIZE - 1));
+		}
+
 		s += size;
 		addr += size;
 		++ret;
@@ -1141,26 +1152,38 @@ static int iotlb_translate(const struct vringh *vrh,
 	return ret;
 }
 
+#define IOTLB_IOV_SIZE 16
+
 static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
 				  void *src, size_t len)
 {
 	u64 total_translated = 0;
 
 	while (total_translated < len) {
-		struct bio_vec iov[16];
+		union {
+			struct iovec iovec[IOTLB_IOV_SIZE];
+			struct bio_vec bvec[IOTLB_IOV_SIZE];
+		} iov;
 		struct iov_iter iter;
 		u64 translated;
 		int ret;
 
 		ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
 				      len - total_translated, &translated,
-				      iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
+				      &iov, IOTLB_IOV_SIZE, vrh->use_va,
+				      VHOST_MAP_RO);
 		if (ret == -ENOBUFS)
-			ret = ARRAY_SIZE(iov);
+			ret = IOTLB_IOV_SIZE;
 		else if (ret < 0)
 			return ret;
 
-		iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated);
+		if (vrh->use_va) {
+			iov_iter_init(&iter, ITER_SOURCE, iov.iovec, ret,
+				      translated);
+		} else {
+			iov_iter_bvec(&iter, ITER_SOURCE, iov.bvec, ret,
+				      translated);
+		}
 
 		ret = copy_from_iter(dst, translated, &iter);
 		if (ret < 0)
@@ -1180,20 +1203,30 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
 	u64 total_translated = 0;
 
 	while (total_translated < len) {
-		struct bio_vec iov[16];
+		union {
+			struct iovec iovec[IOTLB_IOV_SIZE];
+			struct bio_vec bvec[IOTLB_IOV_SIZE];
+		} iov;
 		struct iov_iter iter;
 		u64 translated;
 		int ret;
 
 		ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
 				      len - total_translated, &translated,
-				      iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
+				      &iov, IOTLB_IOV_SIZE, vrh->use_va,
+				      VHOST_MAP_WO);
 		if (ret == -ENOBUFS)
-			ret = ARRAY_SIZE(iov);
+			ret = IOTLB_IOV_SIZE;
 		else if (ret < 0)
 			return ret;
 
-		iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated);
+		if (vrh->use_va) {
+			iov_iter_init(&iter, ITER_DEST, iov.iovec, ret,
+				      translated);
+		} else {
+			iov_iter_bvec(&iter, ITER_DEST, iov.bvec, ret,
+				      translated);
+		}
 
 		ret = copy_to_iter(src, translated, &iter);
 		if (ret < 0)
@@ -1210,20 +1243,32 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
 static inline int getu16_iotlb(const struct vringh *vrh,
 			       u16 *val, const __virtio16 *p)
 {
-	struct bio_vec iov;
-	void *kaddr, *from;
+	union {
+		struct iovec iovec;
+		struct bio_vec bvec;
+	} iov;
+	__virtio16 tmp;
 	int ret;
 
 	/* Atomic read is needed for getu16 */
-	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
-			      &iov, 1, VHOST_MAP_RO);
+	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
+			      NULL, &iov, 1, vrh->use_va, VHOST_MAP_RO);
 	if (ret < 0)
 		return ret;
 
-	kaddr = kmap_local_page(iov.bv_page);
-	from = kaddr + iov.bv_offset;
-	*val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
-	kunmap_local(kaddr);
+	if (vrh->use_va) {
+		ret = __get_user(tmp, (__virtio16 __user *)iov.iovec.iov_base);
+		if (ret)
+			return ret;
+	} else {
+		void *kaddr = kmap_local_page(iov.bvec.bv_page);
+		void *from = kaddr + iov.bvec.bv_offset;
+
+		tmp = READ_ONCE(*(__virtio16 *)from);
+		kunmap_local(kaddr);
+	}
+
+	*val = vringh16_to_cpu(vrh, tmp);
 
 	return 0;
 }
@@ -1231,20 +1276,32 @@ static inline int getu16_iotlb(const struct vringh *vrh,
 static inline int putu16_iotlb(const struct vringh *vrh,
 			       __virtio16 *p, u16 val)
 {
-	struct bio_vec iov;
-	void *kaddr, *to;
+	union {
+		struct iovec iovec;
+		struct bio_vec bvec;
+	} iov;
+	__virtio16 tmp;
 	int ret;
 
 	/* Atomic write is needed for putu16 */
-	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
-			      &iov, 1, VHOST_MAP_WO);
+	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
+			      NULL, &iov, 1, vrh->use_va, VHOST_MAP_RO);
 	if (ret < 0)
 		return ret;
 
-	kaddr = kmap_local_page(iov.bv_page);
-	to = kaddr + iov.bv_offset;
-	WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
-	kunmap_local(kaddr);
+	tmp = cpu_to_vringh16(vrh, val);
+
+	if (vrh->use_va) {
+		ret = __put_user(tmp, (__virtio16 __user *)iov.iovec.iov_base);
+		if (ret)
+			return ret;
+	} else {
+		void *kaddr = kmap_local_page(iov.bvec.bv_page);
+		void *to = kaddr + iov.bvec.bv_offset;
+
+		WRITE_ONCE(*(__virtio16 *)to, tmp);
+		kunmap_local(kaddr);
+	}
 
 	return 0;
 }

Re: [PATCH v2 4/8] vringh: support VA with iotlb
Posted by Eugenio Perez Martin 3 years ago
On Thu, Mar 16, 2023 at 5:07 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> On Fri, Mar 3, 2023 at 3:39 PM Eugenio Perez Martin <eperezma@redhat.com> wrote:
> >
> > On Thu, Mar 2, 2023 at 12:35 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
> > >
> > > vDPA supports the possibility to use user VA in the iotlb messages.
> > > So, let's add support for user VA in vringh to use it in the vDPA
> > > simulators.
> > >
> > > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> > > ---
> > >
> > > Notes:
> > >     v2:
> > >     - replace kmap_atomic() with kmap_local_page() [see previous patch]
> > >     - fix cast warnings when build with W=1 C=1
> > >
> > >  include/linux/vringh.h            |   5 +-
> > >  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
> > >  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
> > >  drivers/vhost/vringh.c            | 247 ++++++++++++++++++++++++------
> > >  4 files changed, 205 insertions(+), 53 deletions(-)
> > >
>
> [...]
>
> >
> > It seems to me iotlb_translate_va and iotlb_translate_pa are very
> > similar, their only difference is that the argument is that iov is
> > iovec instead of bio_vec. And how to fill it, obviously.
> >
> > It would be great to merge both functions, only differing with a
> > conditional on vrh->use_va, or generics, or similar. Or, if following
> > the style of the rest of vringh code, to provide a callback to fill
> > iovec (although I like conditional more).
> >
> > However I cannot think of an easy way to perform that without long
> > macros or type erasure.
>
> Thank you for pushing me :-)
> I finally managed to avoid code duplication (partial patch attached,
> but not yet fully tested).
>
> @Jason: with this refactoring I removed copy_to_va/copy_to_pa, so I
> also avoided getu16_iotlb_va/pa.
>
> I will send the full patch in v3, but I would like to get your opinion
> first ;-)
>
>
>
> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> index 0ba3ef809e48..71dd67700e36 100644
> --- a/drivers/vhost/vringh.c
> +++ b/drivers/vhost/vringh.c
> @@ -1096,8 +1096,7 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>
>  static int iotlb_translate(const struct vringh *vrh,
>                            u64 addr, u64 len, u64 *translated,
> -                          struct bio_vec iov[],
> -                          int iov_size, u32 perm)
> +                          void *iov, int iov_size, bool iovec, u32 perm)

I think this is an improvement, but we're doing type erasure here. I
don't think it is a big deal since the function is not exported, it's
pretty contained in this file, so I'd ack this version too. I'm just
throwing ideas here:

a) typedef the union {iovec, bio_vec} and use that type in the parameter.

As a drawback, that union feels out of place in this file. Is this the
only place where it is needed? I don't see other similar uses in the
kernel.

b) To convert from iov to bio_iov at return
The drawback is the extra processing if the compiler is not smart
enough to inline it. I prefer the previous one but I didn't want to
omit it, just in case.

Thanks!

>  {
>         struct vhost_iotlb_map *map;
>         struct vhost_iotlb *iotlb = vrh->iotlb;
> @@ -1107,7 +1106,7 @@ static int iotlb_translate(const struct vringh *vrh,
>         spin_lock(vrh->iotlb_lock);
>
>         while (len > s) {
> -               u64 size, pa, pfn;
> +               u64 size;
>
>                 if (unlikely(ret >= iov_size)) {
>                         ret = -ENOBUFS;
> @@ -1124,10 +1123,22 @@ static int iotlb_translate(const struct vringh *vrh,
>                 }
>
>                 size = map->size - addr + map->start;
> -               pa = map->addr + addr - map->start;
> -               pfn = pa >> PAGE_SHIFT;
> -               bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size),
> -                             pa & (PAGE_SIZE - 1));
> +               if (iovec) {
> +                       struct iovec *iovec = iov;
> +
> +                       iovec[ret].iov_len = min(len - s, size);
> +                       iovec[ret].iov_base = (void __user *)(unsigned long)
> +                                             (map->addr + addr - map->start);
> +               } else {
> +                       u64 pa = map->addr + addr - map->start;
> +                       u64 pfn = pa >> PAGE_SHIFT;
> +                       struct bio_vec *bvec = iov;
> +
> +                       bvec_set_page(&bvec[ret], pfn_to_page(pfn),
> +                                     min(len - s, size),
> +                                     pa & (PAGE_SIZE - 1));
> +               }
> +
>                 s += size;
>                 addr += size;
>                 ++ret;
> @@ -1141,26 +1152,38 @@ static int iotlb_translate(const struct vringh *vrh,
>         return ret;
>  }
>
> +#define IOTLB_IOV_SIZE 16
> +
>  static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
>                                   void *src, size_t len)
>  {
>         u64 total_translated = 0;
>
>         while (total_translated < len) {
> -               struct bio_vec iov[16];
> +               union {
> +                       struct iovec iovec[IOTLB_IOV_SIZE];
> +                       struct bio_vec bvec[IOTLB_IOV_SIZE];
> +               } iov;
>                 struct iov_iter iter;
>                 u64 translated;
>                 int ret;
>
>                 ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
>                                       len - total_translated, &translated,
> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
> +                                     &iov, IOTLB_IOV_SIZE, vrh->use_va,
> +                                     VHOST_MAP_RO);
>                 if (ret == -ENOBUFS)
> -                       ret = ARRAY_SIZE(iov);
> +                       ret = IOTLB_IOV_SIZE;
>                 else if (ret < 0)
>                         return ret;
>
> -               iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated);
> +               if (vrh->use_va) {
> +                       iov_iter_init(&iter, ITER_SOURCE, iov.iovec, ret,
> +                                     translated);
> +               } else {
> +                       iov_iter_bvec(&iter, ITER_SOURCE, iov.bvec, ret,
> +                                     translated);
> +               }
>
>                 ret = copy_from_iter(dst, translated, &iter);
>                 if (ret < 0)
> @@ -1180,20 +1203,30 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>         u64 total_translated = 0;
>
>         while (total_translated < len) {
> -               struct bio_vec iov[16];
> +               union {
> +                       struct iovec iovec[IOTLB_IOV_SIZE];
> +                       struct bio_vec bvec[IOTLB_IOV_SIZE];
> +               } iov;
>                 struct iov_iter iter;
>                 u64 translated;
>                 int ret;
>
>                 ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
>                                       len - total_translated, &translated,
> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
> +                                     &iov, IOTLB_IOV_SIZE, vrh->use_va,
> +                                     VHOST_MAP_WO);
>                 if (ret == -ENOBUFS)
> -                       ret = ARRAY_SIZE(iov);
> +                       ret = IOTLB_IOV_SIZE;
>                 else if (ret < 0)
>                         return ret;
>
> -               iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated);
> +               if (vrh->use_va) {
> +                       iov_iter_init(&iter, ITER_DEST, iov.iovec, ret,
> +                                     translated);
> +               } else {
> +                       iov_iter_bvec(&iter, ITER_DEST, iov.bvec, ret,
> +                                     translated);
> +               }
>
>                 ret = copy_to_iter(src, translated, &iter);
>                 if (ret < 0)
> @@ -1210,20 +1243,32 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>  static inline int getu16_iotlb(const struct vringh *vrh,
>                                u16 *val, const __virtio16 *p)
>  {
> -       struct bio_vec iov;
> -       void *kaddr, *from;
> +       union {
> +               struct iovec iovec;
> +               struct bio_vec bvec;
> +       } iov;
> +       __virtio16 tmp;
>         int ret;
>
>         /* Atomic read is needed for getu16 */
> -       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> -                             &iov, 1, VHOST_MAP_RO);
> +       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                             NULL, &iov, 1, vrh->use_va, VHOST_MAP_RO);
>         if (ret < 0)
>                 return ret;
>
> -       kaddr = kmap_local_page(iov.bv_page);
> -       from = kaddr + iov.bv_offset;
> -       *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
> -       kunmap_local(kaddr);
> +       if (vrh->use_va) {
> +               ret = __get_user(tmp, (__virtio16 __user *)iov.iovec.iov_base);
> +               if (ret)
> +                       return ret;
> +       } else {
> +               void *kaddr = kmap_local_page(iov.bvec.bv_page);
> +               void *from = kaddr + iov.bvec.bv_offset;
> +
> +               tmp = READ_ONCE(*(__virtio16 *)from);
> +               kunmap_local(kaddr);
> +       }
> +
> +       *val = vringh16_to_cpu(vrh, tmp);
>
>         return 0;
>  }
> @@ -1231,20 +1276,32 @@ static inline int getu16_iotlb(const struct vringh *vrh,
>  static inline int putu16_iotlb(const struct vringh *vrh,
>                                __virtio16 *p, u16 val)
>  {
> -       struct bio_vec iov;
> -       void *kaddr, *to;
> +       union {
> +               struct iovec iovec;
> +               struct bio_vec bvec;
> +       } iov;
> +       __virtio16 tmp;
>         int ret;
>
>         /* Atomic write is needed for putu16 */
> -       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> -                             &iov, 1, VHOST_MAP_WO);
> +       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                             NULL, &iov, 1, vrh->use_va, VHOST_MAP_RO);
>         if (ret < 0)
>                 return ret;
>
> -       kaddr = kmap_local_page(iov.bv_page);
> -       to = kaddr + iov.bv_offset;
> -       WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
> -       kunmap_local(kaddr);
> +       tmp = cpu_to_vringh16(vrh, val);
> +
> +       if (vrh->use_va) {
> +               ret = __put_user(tmp, (__virtio16 __user *)iov.iovec.iov_base);
> +               if (ret)
> +                       return ret;
> +       } else {
> +               void *kaddr = kmap_local_page(iov.bvec.bv_page);
> +               void *to = kaddr + iov.bvec.bv_offset;
> +
> +               WRITE_ONCE(*(__virtio16 *)to, tmp);
> +               kunmap_local(kaddr);
> +       }
>
>         return 0;
>  }
>
Re: [PATCH v2 4/8] vringh: support VA with iotlb
Posted by Stefano Garzarella 3 years ago
On Fri, Mar 17, 2023 at 10:49:27AM +0100, Eugenio Perez Martin wrote:
>On Thu, Mar 16, 2023 at 5:07 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>>
>> On Fri, Mar 3, 2023 at 3:39 PM Eugenio Perez Martin <eperezma@redhat.com> wrote:
>> >
>> > On Thu, Mar 2, 2023 at 12:35 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>> > >
>> > > vDPA supports the possibility to use user VA in the iotlb messages.
>> > > So, let's add support for user VA in vringh to use it in the vDPA
>> > > simulators.
>> > >
>> > > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>> > > ---
>> > >
>> > > Notes:
>> > >     v2:
>> > >     - replace kmap_atomic() with kmap_local_page() [see previous patch]
>> > >     - fix cast warnings when build with W=1 C=1
>> > >
>> > >  include/linux/vringh.h            |   5 +-
>> > >  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
>> > >  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
>> > >  drivers/vhost/vringh.c            | 247 ++++++++++++++++++++++++------
>> > >  4 files changed, 205 insertions(+), 53 deletions(-)
>> > >
>>
>> [...]
>>
>> >
>> > It seems to me iotlb_translate_va and iotlb_translate_pa are very
>> > similar, their only difference is that the argument is that iov is
>> > iovec instead of bio_vec. And how to fill it, obviously.
>> >
>> > It would be great to merge both functions, only differing with a
>> > conditional on vrh->use_va, or generics, or similar. Or, if following
>> > the style of the rest of vringh code, to provide a callback to fill
>> > iovec (although I like conditional more).
>> >
>> > However I cannot think of an easy way to perform that without long
>> > macros or type erasure.
>>
>> Thank you for pushing me :-)
>> I finally managed to avoid code duplication (partial patch attached,
>> but not yet fully tested).
>>
>> @Jason: with this refactoring I removed copy_to_va/copy_to_pa, so I
>> also avoided getu16_iotlb_va/pa.
>>
>> I will send the full patch in v3, but I would like to get your opinion
>> first ;-)
>>
>>
>>
>> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
>> index 0ba3ef809e48..71dd67700e36 100644
>> --- a/drivers/vhost/vringh.c
>> +++ b/drivers/vhost/vringh.c
>> @@ -1096,8 +1096,7 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>>
>>  static int iotlb_translate(const struct vringh *vrh,
>>                            u64 addr, u64 len, u64 *translated,
>> -                          struct bio_vec iov[],
>> -                          int iov_size, u32 perm)
>> +                          void *iov, int iov_size, bool iovec, u32 perm)
>
>I think this is an improvement, but we're doing type erasure here. I
>don't think it is a big deal since the function is not exported, it's
>pretty contained in this file, so I'd ack this version too. I'm just
>throwing ideas here:
>
>a) typedef the union {iovec, bio_vec} and use that type in the parameter.
>
>As a drawback, that union feels out of place in this file. Is this the
>only place where it is needed? I don't see other similar uses in the
>kernel.

iov_iter has something similar, but they are const pointers, so IIUC
it is not supposed to be used to set the bvec contents, just iterate it.

Anyway I thought something similar and should be doable, but since
it was internal API I went to type erasure.

>
>b) To convert from iov to bio_iov at return
>The drawback is the extra processing if the compiler is not smart
>enough to inline it. I prefer the previous one but I didn't want to
>omit it, just in case.

Yep, I prefer too the previous one, so let's go in that direction for
v3 ;-)

Thanks,
Stefano

Re: [PATCH v2 4/8] vringh: support VA with iotlb
Posted by Jason Wang 3 years ago
On Fri, Mar 17, 2023 at 12:07 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> On Fri, Mar 3, 2023 at 3:39 PM Eugenio Perez Martin <eperezma@redhat.com> wrote:
> >
> > On Thu, Mar 2, 2023 at 12:35 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
> > >
> > > vDPA supports the possibility to use user VA in the iotlb messages.
> > > So, let's add support for user VA in vringh to use it in the vDPA
> > > simulators.
> > >
> > > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> > > ---
> > >
> > > Notes:
> > >     v2:
> > >     - replace kmap_atomic() with kmap_local_page() [see previous patch]
> > >     - fix cast warnings when build with W=1 C=1
> > >
> > >  include/linux/vringh.h            |   5 +-
> > >  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
> > >  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
> > >  drivers/vhost/vringh.c            | 247 ++++++++++++++++++++++++------
> > >  4 files changed, 205 insertions(+), 53 deletions(-)
> > >
>
> [...]
>
> >
> > It seems to me iotlb_translate_va and iotlb_translate_pa are very
> > similar, their only difference is that the argument is that iov is
> > iovec instead of bio_vec. And how to fill it, obviously.
> >
> > It would be great to merge both functions, only differing with a
> > conditional on vrh->use_va, or generics, or similar. Or, if following
> > the style of the rest of vringh code, to provide a callback to fill
> > iovec (although I like conditional more).
> >
> > However I cannot think of an easy way to perform that without long
> > macros or type erasure.
>
> Thank you for pushing me :-)
> I finally managed to avoid code duplication (partial patch attached,
> but not yet fully tested).
>
> @Jason: with this refactoring I removed copy_to_va/copy_to_pa, so I
> also avoided getu16_iotlb_va/pa.
>
> I will send the full patch in v3, but I would like to get your opinion
> first ;-)

Fine with me.

Thanks

>
>
>
> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> index 0ba3ef809e48..71dd67700e36 100644
> --- a/drivers/vhost/vringh.c
> +++ b/drivers/vhost/vringh.c
> @@ -1096,8 +1096,7 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>
>  static int iotlb_translate(const struct vringh *vrh,
>                            u64 addr, u64 len, u64 *translated,
> -                          struct bio_vec iov[],
> -                          int iov_size, u32 perm)
> +                          void *iov, int iov_size, bool iovec, u32 perm)
>  {
>         struct vhost_iotlb_map *map;
>         struct vhost_iotlb *iotlb = vrh->iotlb;
> @@ -1107,7 +1106,7 @@ static int iotlb_translate(const struct vringh *vrh,
>         spin_lock(vrh->iotlb_lock);
>
>         while (len > s) {
> -               u64 size, pa, pfn;
> +               u64 size;
>
>                 if (unlikely(ret >= iov_size)) {
>                         ret = -ENOBUFS;
> @@ -1124,10 +1123,22 @@ static int iotlb_translate(const struct vringh *vrh,
>                 }
>
>                 size = map->size - addr + map->start;
> -               pa = map->addr + addr - map->start;
> -               pfn = pa >> PAGE_SHIFT;
> -               bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size),
> -                             pa & (PAGE_SIZE - 1));
> +               if (iovec) {
> +                       struct iovec *iovec = iov;
> +
> +                       iovec[ret].iov_len = min(len - s, size);
> +                       iovec[ret].iov_base = (void __user *)(unsigned long)
> +                                             (map->addr + addr - map->start);
> +               } else {
> +                       u64 pa = map->addr + addr - map->start;
> +                       u64 pfn = pa >> PAGE_SHIFT;
> +                       struct bio_vec *bvec = iov;
> +
> +                       bvec_set_page(&bvec[ret], pfn_to_page(pfn),
> +                                     min(len - s, size),
> +                                     pa & (PAGE_SIZE - 1));
> +               }
> +
>                 s += size;
>                 addr += size;
>                 ++ret;
> @@ -1141,26 +1152,38 @@ static int iotlb_translate(const struct vringh *vrh,
>         return ret;
>  }
>
> +#define IOTLB_IOV_SIZE 16
> +
>  static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
>                                   void *src, size_t len)
>  {
>         u64 total_translated = 0;
>
>         while (total_translated < len) {
> -               struct bio_vec iov[16];
> +               union {
> +                       struct iovec iovec[IOTLB_IOV_SIZE];
> +                       struct bio_vec bvec[IOTLB_IOV_SIZE];
> +               } iov;
>                 struct iov_iter iter;
>                 u64 translated;
>                 int ret;
>
>                 ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
>                                       len - total_translated, &translated,
> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
> +                                     &iov, IOTLB_IOV_SIZE, vrh->use_va,
> +                                     VHOST_MAP_RO);
>                 if (ret == -ENOBUFS)
> -                       ret = ARRAY_SIZE(iov);
> +                       ret = IOTLB_IOV_SIZE;
>                 else if (ret < 0)
>                         return ret;
>
> -               iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated);
> +               if (vrh->use_va) {
> +                       iov_iter_init(&iter, ITER_SOURCE, iov.iovec, ret,
> +                                     translated);
> +               } else {
> +                       iov_iter_bvec(&iter, ITER_SOURCE, iov.bvec, ret,
> +                                     translated);
> +               }
>
>                 ret = copy_from_iter(dst, translated, &iter);
>                 if (ret < 0)
> @@ -1180,20 +1203,30 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>         u64 total_translated = 0;
>
>         while (total_translated < len) {
> -               struct bio_vec iov[16];
> +               union {
> +                       struct iovec iovec[IOTLB_IOV_SIZE];
> +                       struct bio_vec bvec[IOTLB_IOV_SIZE];
> +               } iov;
>                 struct iov_iter iter;
>                 u64 translated;
>                 int ret;
>
>                 ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
>                                       len - total_translated, &translated,
> -                                     iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
> +                                     &iov, IOTLB_IOV_SIZE, vrh->use_va,
> +                                     VHOST_MAP_WO);
>                 if (ret == -ENOBUFS)
> -                       ret = ARRAY_SIZE(iov);
> +                       ret = IOTLB_IOV_SIZE;
>                 else if (ret < 0)
>                         return ret;
>
> -               iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated);
> +               if (vrh->use_va) {
> +                       iov_iter_init(&iter, ITER_DEST, iov.iovec, ret,
> +                                     translated);
> +               } else {
> +                       iov_iter_bvec(&iter, ITER_DEST, iov.bvec, ret,
> +                                     translated);
> +               }
>
>                 ret = copy_to_iter(src, translated, &iter);
>                 if (ret < 0)
> @@ -1210,20 +1243,32 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
>  static inline int getu16_iotlb(const struct vringh *vrh,
>                                u16 *val, const __virtio16 *p)
>  {
> -       struct bio_vec iov;
> -       void *kaddr, *from;
> +       union {
> +               struct iovec iovec;
> +               struct bio_vec bvec;
> +       } iov;
> +       __virtio16 tmp;
>         int ret;
>
>         /* Atomic read is needed for getu16 */
> -       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> -                             &iov, 1, VHOST_MAP_RO);
> +       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                             NULL, &iov, 1, vrh->use_va, VHOST_MAP_RO);
>         if (ret < 0)
>                 return ret;
>
> -       kaddr = kmap_local_page(iov.bv_page);
> -       from = kaddr + iov.bv_offset;
> -       *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
> -       kunmap_local(kaddr);
> +       if (vrh->use_va) {
> +               ret = __get_user(tmp, (__virtio16 __user *)iov.iovec.iov_base);
> +               if (ret)
> +                       return ret;
> +       } else {
> +               void *kaddr = kmap_local_page(iov.bvec.bv_page);
> +               void *from = kaddr + iov.bvec.bv_offset;
> +
> +               tmp = READ_ONCE(*(__virtio16 *)from);
> +               kunmap_local(kaddr);
> +       }
> +
> +       *val = vringh16_to_cpu(vrh, tmp);
>
>         return 0;
>  }
> @@ -1231,20 +1276,32 @@ static inline int getu16_iotlb(const struct vringh *vrh,
>  static inline int putu16_iotlb(const struct vringh *vrh,
>                                __virtio16 *p, u16 val)
>  {
> -       struct bio_vec iov;
> -       void *kaddr, *to;
> +       union {
> +               struct iovec iovec;
> +               struct bio_vec bvec;
> +       } iov;
> +       __virtio16 tmp;
>         int ret;
>
>         /* Atomic write is needed for putu16 */
> -       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> -                             &iov, 1, VHOST_MAP_WO);
> +       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
> +                             NULL, &iov, 1, vrh->use_va, VHOST_MAP_RO);
>         if (ret < 0)
>                 return ret;
>
> -       kaddr = kmap_local_page(iov.bv_page);
> -       to = kaddr + iov.bv_offset;
> -       WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
> -       kunmap_local(kaddr);
> +       tmp = cpu_to_vringh16(vrh, val);
> +
> +       if (vrh->use_va) {
> +               ret = __put_user(tmp, (__virtio16 __user *)iov.iovec.iov_base);
> +               if (ret)
> +                       return ret;
> +       } else {
> +               void *kaddr = kmap_local_page(iov.bvec.bv_page);
> +               void *to = kaddr + iov.bvec.bv_offset;
> +
> +               WRITE_ONCE(*(__virtio16 *)to, tmp);
> +               kunmap_local(kaddr);
> +       }
>
>         return 0;
>  }
>
Re: [PATCH v2 4/8] vringh: support VA with iotlb
Posted by Stefano Garzarella 3 years, 1 month ago
On Fri, Mar 03, 2023 at 03:38:57PM +0100, Eugenio Perez Martin wrote:
>On Thu, Mar 2, 2023 at 12:35 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>>
>> vDPA supports the possibility to use user VA in the iotlb messages.
>> So, let's add support for user VA in vringh to use it in the vDPA
>> simulators.
>>
>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>> ---
>>
>> Notes:
>>     v2:
>>     - replace kmap_atomic() with kmap_local_page() [see previous patch]
>>     - fix cast warnings when build with W=1 C=1
>>
>>  include/linux/vringh.h            |   5 +-
>>  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
>>  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
>>  drivers/vhost/vringh.c            | 247 ++++++++++++++++++++++++------
>>  4 files changed, 205 insertions(+), 53 deletions(-)
>>
>> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
>> index 1991a02c6431..d39b9f2dcba0 100644
>> --- a/include/linux/vringh.h
>> +++ b/include/linux/vringh.h
>> @@ -32,6 +32,9 @@ struct vringh {
>>         /* Can we get away with weak barriers? */
>>         bool weak_barriers;
>>
>> +       /* Use user's VA */
>> +       bool use_va;
>> +
>>         /* Last available index we saw (ie. where we're up to). */
>>         u16 last_avail_idx;
>>
>> @@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
>>                       spinlock_t *iotlb_lock);
>>
>>  int vringh_init_iotlb(struct vringh *vrh, u64 features,
>> -                     unsigned int num, bool weak_barriers,
>> +                     unsigned int num, bool weak_barriers, bool use_va,
>>                       struct vring_desc *desc,
>>                       struct vring_avail *avail,
>>                       struct vring_used *used);
>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> index 3a0e721aef05..babc8dd171a6 100644
>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> @@ -2537,7 +2537,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
>>
>>         if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
>>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
>> -                                       MLX5_CVQ_MAX_ENT, false,
>> +                                       MLX5_CVQ_MAX_ENT, false, false,
>>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> index 6a0a65814626..481eb156658b 100644
>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> @@ -60,7 +60,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>>         struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
>>         uint16_t last_avail_idx = vq->vring.last_avail_idx;
>>
>> -       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true,
>> +       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true, false,
>>                           (struct vring_desc *)(uintptr_t)vq->desc_addr,
>>                           (struct vring_avail *)
>>                           (uintptr_t)vq->driver_addr,
>> @@ -81,7 +81,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
>>         vq->cb = NULL;
>>         vq->private = NULL;
>>         vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
>> -                         VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
>> +                         VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);
>>
>>         vq->vring.notify = NULL;
>>  }
>> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
>> index 0ba3ef809e48..61c79cea44ca 100644
>> --- a/drivers/vhost/vringh.c
>> +++ b/drivers/vhost/vringh.c
>> @@ -1094,15 +1094,99 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>>
>>  #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
>>
>> -static int iotlb_translate(const struct vringh *vrh,
>> -                          u64 addr, u64 len, u64 *translated,
>> -                          struct bio_vec iov[],
>> -                          int iov_size, u32 perm)
>> +static int iotlb_translate_va(const struct vringh *vrh,
>> +                             u64 addr, u64 len, u64 *translated,
>> +                             struct iovec iov[],
>> +                             int iov_size, u32 perm)
>>  {
>>         struct vhost_iotlb_map *map;
>>         struct vhost_iotlb *iotlb = vrh->iotlb;
>> +       u64 s = 0, last = addr + len - 1;
>>         int ret = 0;
>> +
>> +       spin_lock(vrh->iotlb_lock);
>> +
>> +       while (len > s) {
>> +               u64 size;
>> +
>> +               if (unlikely(ret >= iov_size)) {
>> +                       ret = -ENOBUFS;
>> +                       break;
>> +               }
>> +
>> +               map = vhost_iotlb_itree_first(iotlb, addr, last);
>> +               if (!map || map->start > addr) {
>> +                       ret = -EINVAL;
>> +                       break;
>> +               } else if (!(map->perm & perm)) {
>> +                       ret = -EPERM;
>> +                       break;
>> +               }
>> +
>> +               size = map->size - addr + map->start;
>> +               iov[ret].iov_len = min(len - s, size);
>> +               iov[ret].iov_base = (void __user *)(unsigned long)
>> +                                   (map->addr + addr - map->start);
>> +               s += size;
>> +               addr += size;
>> +               ++ret;
>> +       }
>> +
>> +       spin_unlock(vrh->iotlb_lock);
>> +
>> +       if (translated)
>> +               *translated = min(len, s);
>> +
>> +       return ret;
>> +}
>
>It seems to me iotlb_translate_va and iotlb_translate_pa are very
>similar, their only difference is that the argument is that iov is
>iovec instead of bio_vec. And how to fill it, obviously.
>
>It would be great to merge both functions, only differing with a
>conditional on vrh->use_va, or generics, or similar. Or, if following
>the style of the rest of vringh code, to provide a callback to fill
>iovec (although I like conditional more).
>
>However I cannot think of an easy way to perform that without long
>macros or type erasure.

I agree and I tried, but then I got messed up and let it go.

But maybe with the callback it shouldn't be too messy, I can try it and
see what comes out :-)

>
>> +
>> +static inline int copy_from_va(const struct vringh *vrh, void *dst, void *src,
>> +                              u64 len, u64 *translated)
>> +{
>> +       struct iovec iov[16];
>> +       struct iov_iter iter;
>> +       int ret;
>> +
>> +       ret = iotlb_translate_va(vrh, (u64)(uintptr_t)src, len, translated, iov,
>> +                                ARRAY_SIZE(iov), VHOST_MAP_RO);
>> +       if (ret == -ENOBUFS)
>> +               ret = ARRAY_SIZE(iov);
>> +       else if (ret < 0)
>> +               return ret;
>> +
>> +       iov_iter_init(&iter, ITER_SOURCE, iov, ret, *translated);
>> +
>> +       return copy_from_iter(dst, *translated, &iter);
>
>Maybe a good baby step for DRY is to return the iov_iter in
>copy_from/to_va/pa here?

Good point! I'll try it.

>
>But I'm ok with this version too.
>
>Acked-by: Eugenio Pérez Martin <eperezma@redhat.com>

Thanks for the review!
Stefano