[RFC PATCH net-next v2 1/2] virtio-net: support zerocopy multi buffer XDP in mergeable

Bui Quang Minh posted 2 patches 6 months, 3 weeks ago
[RFC PATCH net-next v2 1/2] virtio-net: support zerocopy multi buffer XDP in mergeable
Posted by Bui Quang Minh 6 months, 3 weeks ago
Currently, in zerocopy mode with mergeable receive buffer, virtio-net
does not support multi buffer but a single buffer only. This commit adds
support for multi mergeable receive buffer in the zerocopy XDP path by
utilizing XDP buffer with frags.

Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
---
 drivers/net/virtio_net.c | 123 +++++++++++++++++++++------------------
 1 file changed, 66 insertions(+), 57 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e53ba600605a..a9558650f205 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -45,6 +45,8 @@ module_param(napi_tx, bool, 0644);
 #define VIRTIO_XDP_TX		BIT(0)
 #define VIRTIO_XDP_REDIR	BIT(1)
 
+#define VIRTNET_MAX_ZC_SEGS	8
+
 /* RX packet size EWMA. The average packet size is used to determine the packet
  * buffer size when refilling RX rings. As the entire RX ring may be refilled
  * at once, the weight is chosen so that the EWMA will be insensitive to short-
@@ -1232,65 +1234,53 @@ static void xsk_drop_follow_bufs(struct net_device *dev,
 	}
 }
 
-static int xsk_append_merge_buffer(struct virtnet_info *vi,
-				   struct receive_queue *rq,
-				   struct sk_buff *head_skb,
-				   u32 num_buf,
-				   struct virtio_net_hdr_mrg_rxbuf *hdr,
-				   struct virtnet_rq_stats *stats)
+static int virtnet_build_xsk_buff_mrg(struct virtnet_info *vi,
+				      struct receive_queue *rq,
+				      u32 num_buf,
+				      struct xdp_buff *xdp,
+				      struct virtnet_rq_stats *stats)
 {
-	struct sk_buff *curr_skb;
-	struct xdp_buff *xdp;
-	u32 len, truesize;
-	struct page *page;
+	unsigned int len;
 	void *buf;
 
-	curr_skb = head_skb;
+	if (num_buf < 2)
+		return 0;
+
+	while (num_buf > 1) {
+		struct xdp_buff *new_xdp;
 
-	while (--num_buf) {
 		buf = virtqueue_get_buf(rq->vq, &len);
-		if (unlikely(!buf)) {
-			pr_debug("%s: rx error: %d buffers out of %d missing\n",
-				 vi->dev->name, num_buf,
-				 virtio16_to_cpu(vi->vdev,
-						 hdr->num_buffers));
+		if (!unlikely(buf)) {
+			pr_debug("%s: rx error: %d buffers missing\n",
+				 vi->dev->name, num_buf);
 			DEV_STATS_INC(vi->dev, rx_length_errors);
-			return -EINVAL;
-		}
-
-		u64_stats_add(&stats->bytes, len);
-
-		xdp = buf_to_xdp(vi, rq, buf, len);
-		if (!xdp)
-			goto err;
-
-		buf = napi_alloc_frag(len);
-		if (!buf) {
-			xsk_buff_free(xdp);
-			goto err;
+			return -1;
 		}
 
-		memcpy(buf, xdp->data - vi->hdr_len, len);
-
-		xsk_buff_free(xdp);
+		new_xdp = buf_to_xdp(vi, rq, buf, len);
+		if (!new_xdp)
+			goto drop_bufs;
 
-		page = virt_to_page(buf);
+		/* In virtnet_add_recvbuf_xsk(), we ask the host to fill from
+		 * xdp->data - vi->hdr_len with both virtio_net_hdr and data.
+		 * However, only the first packet has the virtio_net_hdr, the
+		 * following ones do not. So we need to adjust the following
+		 * packets' data pointer to the correct place.
+		 */
+		new_xdp->data -= vi->hdr_len;
+		new_xdp->data_end = new_xdp->data + len;
 
-		truesize = len;
+		if (!xsk_buff_add_frag(xdp, new_xdp))
+			goto drop_bufs;
 
-		curr_skb  = virtnet_skb_append_frag(head_skb, curr_skb, page,
-						    buf, len, truesize);
-		if (!curr_skb) {
-			put_page(page);
-			goto err;
-		}
+		num_buf--;
 	}
 
 	return 0;
 
-err:
+drop_bufs:
 	xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats);
-	return -EINVAL;
+	return -1;
 }
 
 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi,
@@ -1307,23 +1297,42 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct
 	num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
 
 	ret = XDP_PASS;
+	if (virtnet_build_xsk_buff_mrg(vi, rq, num_buf, xdp, stats))
+		goto drop;
+
 	rcu_read_lock();
 	prog = rcu_dereference(rq->xdp_prog);
-	/* TODO: support multi buffer. */
-	if (prog && num_buf == 1)
-		ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
+	if (prog) {
+		/* We are in zerocopy mode so we cannot copy the multi-buffer
+		 * xdp buff to a single linear xdp buff. If we do so, in case
+		 * the BPF program decides to redirect to a XDP socket (XSK),
+		 * it will trigger the zerocopy receive logic in XDP socket.
+		 * The receive logic thinks it receives zerocopy buffer while
+		 * in fact, it is the copy one and everything is messed up.
+		 * So just drop the packet here if we have a multi-buffer xdp
+		 * buff and the BPF program does not support it.
+		 */
+		if (xdp_buff_has_frags(xdp) && !prog->aux->xdp_has_frags)
+			ret = XDP_DROP;
+		else
+			ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit,
+						  stats);
+	}
 	rcu_read_unlock();
 
 	switch (ret) {
 	case XDP_PASS:
-		skb = xsk_construct_skb(rq, xdp);
+		skb = xdp_build_skb_from_zc(xdp);
 		if (!skb)
-			goto drop_bufs;
+			break;
 
-		if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) {
-			dev_kfree_skb(skb);
-			goto drop;
-		}
+		/* Later, in virtnet_receive_done(), eth_type_trans()
+		 * is called. However, in xdp_build_skb_from_zc(), it is called
+		 * already. As a result, we need to reset the data to before
+		 * the mac header so that the later call in
+		 * virtnet_receive_done() works correctly.
+		 */
+		skb_push(skb, ETH_HLEN);
 
 		return skb;
 
@@ -1332,14 +1341,11 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct
 		return NULL;
 
 	default:
-		/* drop packet */
-		xsk_buff_free(xdp);
+		break;
 	}
 
-drop_bufs:
-	xsk_drop_follow_bufs(dev, rq, num_buf, stats);
-
 drop:
+	xsk_buff_free(xdp);
 	u64_stats_inc(&stats->drops);
 	return NULL;
 }
@@ -1396,6 +1402,8 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue
 		return -ENOMEM;
 
 	len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len;
+	/* Reserve some space for skb_shared_info */
+	len -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
 	for (i = 0; i < num; ++i) {
 		/* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space.
@@ -6734,6 +6742,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	dev->netdev_ops = &virtnet_netdev;
 	dev->stat_ops = &virtnet_stat_ops;
 	dev->features = NETIF_F_HIGHDMA;
+	dev->xdp_zc_max_segs = VIRTNET_MAX_ZC_SEGS;
 
 	dev->ethtool_ops = &virtnet_ethtool_ops;
 	SET_NETDEV_DEV(dev, &vdev->dev);
-- 
2.43.0
Re: [RFC PATCH net-next v2 1/2] virtio-net: support zerocopy multi buffer XDP in mergeable
Posted by Jason Wang 6 months, 3 weeks ago
On Wed, May 28, 2025 at 12:19 AM Bui Quang Minh
<minhquangbui99@gmail.com> wrote:
>
> Currently, in zerocopy mode with mergeable receive buffer, virtio-net
> does not support multi buffer but a single buffer only. This commit adds
> support for multi mergeable receive buffer in the zerocopy XDP path by
> utilizing XDP buffer with frags.
>
> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> ---
>  drivers/net/virtio_net.c | 123 +++++++++++++++++++++------------------
>  1 file changed, 66 insertions(+), 57 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index e53ba600605a..a9558650f205 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -45,6 +45,8 @@ module_param(napi_tx, bool, 0644);
>  #define VIRTIO_XDP_TX          BIT(0)
>  #define VIRTIO_XDP_REDIR       BIT(1)
>
> +#define VIRTNET_MAX_ZC_SEGS    8
> +
>  /* RX packet size EWMA. The average packet size is used to determine the packet
>   * buffer size when refilling RX rings. As the entire RX ring may be refilled
>   * at once, the weight is chosen so that the EWMA will be insensitive to short-
> @@ -1232,65 +1234,53 @@ static void xsk_drop_follow_bufs(struct net_device *dev,
>         }
>  }
>
> -static int xsk_append_merge_buffer(struct virtnet_info *vi,
> -                                  struct receive_queue *rq,
> -                                  struct sk_buff *head_skb,
> -                                  u32 num_buf,
> -                                  struct virtio_net_hdr_mrg_rxbuf *hdr,
> -                                  struct virtnet_rq_stats *stats)
> +static int virtnet_build_xsk_buff_mrg(struct virtnet_info *vi,
> +                                     struct receive_queue *rq,
> +                                     u32 num_buf,
> +                                     struct xdp_buff *xdp,
> +                                     struct virtnet_rq_stats *stats)
>  {
> -       struct sk_buff *curr_skb;
> -       struct xdp_buff *xdp;
> -       u32 len, truesize;
> -       struct page *page;
> +       unsigned int len;
>         void *buf;
>
> -       curr_skb = head_skb;
> +       if (num_buf < 2)
> +               return 0;
> +
> +       while (num_buf > 1) {
> +               struct xdp_buff *new_xdp;
>
> -       while (--num_buf) {
>                 buf = virtqueue_get_buf(rq->vq, &len);
> -               if (unlikely(!buf)) {
> -                       pr_debug("%s: rx error: %d buffers out of %d missing\n",
> -                                vi->dev->name, num_buf,
> -                                virtio16_to_cpu(vi->vdev,
> -                                                hdr->num_buffers));
> +               if (!unlikely(buf)) {
> +                       pr_debug("%s: rx error: %d buffers missing\n",
> +                                vi->dev->name, num_buf);
>                         DEV_STATS_INC(vi->dev, rx_length_errors);
> -                       return -EINVAL;
> -               }
> -
> -               u64_stats_add(&stats->bytes, len);
> -
> -               xdp = buf_to_xdp(vi, rq, buf, len);
> -               if (!xdp)
> -                       goto err;
> -
> -               buf = napi_alloc_frag(len);
> -               if (!buf) {
> -                       xsk_buff_free(xdp);
> -                       goto err;
> +                       return -1;
>                 }
>
> -               memcpy(buf, xdp->data - vi->hdr_len, len);
> -
> -               xsk_buff_free(xdp);
> +               new_xdp = buf_to_xdp(vi, rq, buf, len);
> +               if (!new_xdp)
> +                       goto drop_bufs;
>
> -               page = virt_to_page(buf);
> +               /* In virtnet_add_recvbuf_xsk(), we ask the host to fill from
> +                * xdp->data - vi->hdr_len with both virtio_net_hdr and data.
> +                * However, only the first packet has the virtio_net_hdr, the
> +                * following ones do not. So we need to adjust the following

Typo here.

> +                * packets' data pointer to the correct place.
> +                */

I wonder what happens if we don't use this trick? I meant we don't
reuse the header room for the virtio-net header. This seems to be fine
for a mergeable buffer and can help to reduce the trick.

> +               new_xdp->data -= vi->hdr_len;
> +               new_xdp->data_end = new_xdp->data + len;
>
> -               truesize = len;
> +               if (!xsk_buff_add_frag(xdp, new_xdp))
> +                       goto drop_bufs;
>
> -               curr_skb  = virtnet_skb_append_frag(head_skb, curr_skb, page,
> -                                                   buf, len, truesize);
> -               if (!curr_skb) {
> -                       put_page(page);
> -                       goto err;
> -               }
> +               num_buf--;
>         }
>
>         return 0;
>
> -err:
> +drop_bufs:
>         xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats);
> -       return -EINVAL;
> +       return -1;
>  }
>
>  static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi,
> @@ -1307,23 +1297,42 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct
>         num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
>
>         ret = XDP_PASS;
> +       if (virtnet_build_xsk_buff_mrg(vi, rq, num_buf, xdp, stats))
> +               goto drop;
> +
>         rcu_read_lock();
>         prog = rcu_dereference(rq->xdp_prog);
> -       /* TODO: support multi buffer. */
> -       if (prog && num_buf == 1)
> -               ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);

Without this patch it looks like we had a bug:

        ret = XDP_PASS;
        rcu_read_lock();
        prog = rcu_dereference(rq->xdp_prog);
        /* TODO: support multi buffer. */
        if (prog && num_buf == 1)
                ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
        rcu_read_unlock();

This implies if num_buf is greater than 1, we will assume XDP_PASS?

> +       if (prog) {
> +               /* We are in zerocopy mode so we cannot copy the multi-buffer
> +                * xdp buff to a single linear xdp buff. If we do so, in case
> +                * the BPF program decides to redirect to a XDP socket (XSK),
> +                * it will trigger the zerocopy receive logic in XDP socket.
> +                * The receive logic thinks it receives zerocopy buffer while
> +                * in fact, it is the copy one and everything is messed up.
> +                * So just drop the packet here if we have a multi-buffer xdp
> +                * buff and the BPF program does not support it.
> +                */
> +               if (xdp_buff_has_frags(xdp) && !prog->aux->xdp_has_frags)
> +                       ret = XDP_DROP;

Could we move the check before trying to build a multi-buffer XDP buff?

> +               else
> +                       ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit,
> +                                                 stats);
> +       }
>         rcu_read_unlock();
>
>         switch (ret) {
>         case XDP_PASS:
> -               skb = xsk_construct_skb(rq, xdp);
> +               skb = xdp_build_skb_from_zc(xdp);

Is this better to make this change a separate patch?

>                 if (!skb)
> -                       goto drop_bufs;
> +                       break;
>
> -               if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) {
> -                       dev_kfree_skb(skb);
> -                       goto drop;
> -               }
> +               /* Later, in virtnet_receive_done(), eth_type_trans()
> +                * is called. However, in xdp_build_skb_from_zc(), it is called
> +                * already. As a result, we need to reset the data to before
> +                * the mac header so that the later call in
> +                * virtnet_receive_done() works correctly.
> +                */
> +               skb_push(skb, ETH_HLEN);
>
>                 return skb;
>
> @@ -1332,14 +1341,11 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct
>                 return NULL;
>
>         default:
> -               /* drop packet */
> -               xsk_buff_free(xdp);
> +               break;
>         }
>
> -drop_bufs:
> -       xsk_drop_follow_bufs(dev, rq, num_buf, stats);
> -
>  drop:
> +       xsk_buff_free(xdp);
>         u64_stats_inc(&stats->drops);
>         return NULL;
>  }
> @@ -1396,6 +1402,8 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue
>                 return -ENOMEM;
>
>         len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len;
> +       /* Reserve some space for skb_shared_info */
> +       len -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
>
>         for (i = 0; i < num; ++i) {
>                 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space.
> @@ -6734,6 +6742,7 @@ static int virtnet_probe(struct virtio_device *vdev)
>         dev->netdev_ops = &virtnet_netdev;
>         dev->stat_ops = &virtnet_stat_ops;
>         dev->features = NETIF_F_HIGHDMA;
> +       dev->xdp_zc_max_segs = VIRTNET_MAX_ZC_SEGS;
>
>         dev->ethtool_ops = &virtnet_ethtool_ops;
>         SET_NETDEV_DEV(dev, &vdev->dev);
> --
> 2.43.0
>

Thanks
Re: [RFC PATCH net-next v2 1/2] virtio-net: support zerocopy multi buffer XDP in mergeable
Posted by Bui Quang Minh 6 months, 3 weeks ago
On 5/29/25 12:59, Jason Wang wrote:
> On Wed, May 28, 2025 at 12:19 AM Bui Quang Minh
> <minhquangbui99@gmail.com> wrote:
>> Currently, in zerocopy mode with mergeable receive buffer, virtio-net
>> does not support multi buffer but a single buffer only. This commit adds
>> support for multi mergeable receive buffer in the zerocopy XDP path by
>> utilizing XDP buffer with frags.
>>
>> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
>> ---
>>   drivers/net/virtio_net.c | 123 +++++++++++++++++++++------------------
>>   1 file changed, 66 insertions(+), 57 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index e53ba600605a..a9558650f205 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -45,6 +45,8 @@ module_param(napi_tx, bool, 0644);
>>   #define VIRTIO_XDP_TX          BIT(0)
>>   #define VIRTIO_XDP_REDIR       BIT(1)
>>
>> +#define VIRTNET_MAX_ZC_SEGS    8
>> +
>>   /* RX packet size EWMA. The average packet size is used to determine the packet
>>    * buffer size when refilling RX rings. As the entire RX ring may be refilled
>>    * at once, the weight is chosen so that the EWMA will be insensitive to short-
>> @@ -1232,65 +1234,53 @@ static void xsk_drop_follow_bufs(struct net_device *dev,
>>          }
>>   }
>>
>> -static int xsk_append_merge_buffer(struct virtnet_info *vi,
>> -                                  struct receive_queue *rq,
>> -                                  struct sk_buff *head_skb,
>> -                                  u32 num_buf,
>> -                                  struct virtio_net_hdr_mrg_rxbuf *hdr,
>> -                                  struct virtnet_rq_stats *stats)
>> +static int virtnet_build_xsk_buff_mrg(struct virtnet_info *vi,
>> +                                     struct receive_queue *rq,
>> +                                     u32 num_buf,
>> +                                     struct xdp_buff *xdp,
>> +                                     struct virtnet_rq_stats *stats)
>>   {
>> -       struct sk_buff *curr_skb;
>> -       struct xdp_buff *xdp;
>> -       u32 len, truesize;
>> -       struct page *page;
>> +       unsigned int len;
>>          void *buf;
>>
>> -       curr_skb = head_skb;
>> +       if (num_buf < 2)
>> +               return 0;
>> +
>> +       while (num_buf > 1) {
>> +               struct xdp_buff *new_xdp;
>>
>> -       while (--num_buf) {
>>                  buf = virtqueue_get_buf(rq->vq, &len);
>> -               if (unlikely(!buf)) {
>> -                       pr_debug("%s: rx error: %d buffers out of %d missing\n",
>> -                                vi->dev->name, num_buf,
>> -                                virtio16_to_cpu(vi->vdev,
>> -                                                hdr->num_buffers));
>> +               if (!unlikely(buf)) {
>> +                       pr_debug("%s: rx error: %d buffers missing\n",
>> +                                vi->dev->name, num_buf);
>>                          DEV_STATS_INC(vi->dev, rx_length_errors);
>> -                       return -EINVAL;
>> -               }
>> -
>> -               u64_stats_add(&stats->bytes, len);
>> -
>> -               xdp = buf_to_xdp(vi, rq, buf, len);
>> -               if (!xdp)
>> -                       goto err;
>> -
>> -               buf = napi_alloc_frag(len);
>> -               if (!buf) {
>> -                       xsk_buff_free(xdp);
>> -                       goto err;
>> +                       return -1;
>>                  }
>>
>> -               memcpy(buf, xdp->data - vi->hdr_len, len);
>> -
>> -               xsk_buff_free(xdp);
>> +               new_xdp = buf_to_xdp(vi, rq, buf, len);
>> +               if (!new_xdp)
>> +                       goto drop_bufs;
>>
>> -               page = virt_to_page(buf);
>> +               /* In virtnet_add_recvbuf_xsk(), we ask the host to fill from
>> +                * xdp->data - vi->hdr_len with both virtio_net_hdr and data.
>> +                * However, only the first packet has the virtio_net_hdr, the
>> +                * following ones do not. So we need to adjust the following
> Typo here.

I'm sorry, could you clarify which word contains the typo?

>
>> +                * packets' data pointer to the correct place.
>> +                */
> I wonder what happens if we don't use this trick? I meant we don't
> reuse the header room for the virtio-net header. This seems to be fine
> for a mergeable buffer and can help to reduce the trick.

I don't think using the header room for virtio-net header creates this 
case handling. In my opinion, it comes from the slightly difference in 
the recvbuf between single buffer and multi-buffer. When we have n 
single-buffer packets, each buffer will have its own virtio-net header. 
But when we have 1 multi-buffer packet (which spans across n buffers), 
only the first buffer has virtio-net header, the following buffers do not.

There 2 important pointers here. The pointer we announce to the vhost 
side to fill the data, let's call it announced_addr, and xdp_buff->data 
which is expected to point the the start of Ethernet frame. Currently,

     announced_addr = xdp_buff->data - hdr_len

The host side will write the virtio-net header to announced_addr then 
the Ethernet frame's data in the first buffer. In case of multi-buffer 
packet, in the following buffers, host side writes the Ethernet frame's 
data to the announced_addr no virtio-net header. So in the virtio-net, 
we need to subtract xdp_buff->data, otherwise, we lose some Ethernet 
frame's data.

I think a slightly better solution is that we set announced_addr = 
xdp_buff->data then we only need to xdp_buff->data += hdr_len for the 
first buffer and do need to adjust xdp_buff->data of the following buffers.

>
>> +               new_xdp->data -= vi->hdr_len;
>> +               new_xdp->data_end = new_xdp->data + len;
>>
>> -               truesize = len;
>> +               if (!xsk_buff_add_frag(xdp, new_xdp))
>> +                       goto drop_bufs;
>>
>> -               curr_skb  = virtnet_skb_append_frag(head_skb, curr_skb, page,
>> -                                                   buf, len, truesize);
>> -               if (!curr_skb) {
>> -                       put_page(page);
>> -                       goto err;
>> -               }
>> +               num_buf--;
>>          }
>>
>>          return 0;
>>
>> -err:
>> +drop_bufs:
>>          xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats);
>> -       return -EINVAL;
>> +       return -1;
>>   }
>>
>>   static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi,
>> @@ -1307,23 +1297,42 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct
>>          num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
>>
>>          ret = XDP_PASS;
>> +       if (virtnet_build_xsk_buff_mrg(vi, rq, num_buf, xdp, stats))
>> +               goto drop;
>> +
>>          rcu_read_lock();
>>          prog = rcu_dereference(rq->xdp_prog);
>> -       /* TODO: support multi buffer. */
>> -       if (prog && num_buf == 1)
>> -               ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
> Without this patch it looks like we had a bug:
>
>          ret = XDP_PASS;
>          rcu_read_lock();
>          prog = rcu_dereference(rq->xdp_prog);
>          /* TODO: support multi buffer. */
>          if (prog && num_buf == 1)
>                  ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
>          rcu_read_unlock();
>
> This implies if num_buf is greater than 1, we will assume XDP_PASS?

Yes, I think XDP_DROP should be returned in that case.

>
>> +       if (prog) {
>> +               /* We are in zerocopy mode so we cannot copy the multi-buffer
>> +                * xdp buff to a single linear xdp buff. If we do so, in case
>> +                * the BPF program decides to redirect to a XDP socket (XSK),
>> +                * it will trigger the zerocopy receive logic in XDP socket.
>> +                * The receive logic thinks it receives zerocopy buffer while
>> +                * in fact, it is the copy one and everything is messed up.
>> +                * So just drop the packet here if we have a multi-buffer xdp
>> +                * buff and the BPF program does not support it.
>> +                */
>> +               if (xdp_buff_has_frags(xdp) && !prog->aux->xdp_has_frags)
>> +                       ret = XDP_DROP;
> Could we move the check before trying to build a multi-buffer XDP buff?

Yes, I'll fix this in next version.

>
>> +               else
>> +                       ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit,
>> +                                                 stats);
>> +       }
>>          rcu_read_unlock();
>>
>>          switch (ret) {
>>          case XDP_PASS:
>> -               skb = xsk_construct_skb(rq, xdp);
>> +               skb = xdp_build_skb_from_zc(xdp);
> Is this better to make this change a separate patch?

Okay, I'll create a separate patch to convert the current XDP_PASS 
handler to use xdp_build_skb_from_zc helper.

>
>>                  if (!skb)
>> -                       goto drop_bufs;
>> +                       break;
>>
>> -               if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) {
>> -                       dev_kfree_skb(skb);
>> -                       goto drop;
>> -               }
>> +               /* Later, in virtnet_receive_done(), eth_type_trans()
>> +                * is called. However, in xdp_build_skb_from_zc(), it is called
>> +                * already. As a result, we need to reset the data to before
>> +                * the mac header so that the later call in
>> +                * virtnet_receive_done() works correctly.
>> +                */
>> +               skb_push(skb, ETH_HLEN);
>>
>>                  return skb;
>>
>> @@ -1332,14 +1341,11 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct
>>                  return NULL;
>>
>>          default:
>> -               /* drop packet */
>> -               xsk_buff_free(xdp);
>> +               break;
>>          }
>>
>> -drop_bufs:
>> -       xsk_drop_follow_bufs(dev, rq, num_buf, stats);
>> -
>>   drop:
>> +       xsk_buff_free(xdp);
>>          u64_stats_inc(&stats->drops);
>>          return NULL;
>>   }
>> @@ -1396,6 +1402,8 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue
>>                  return -ENOMEM;
>>
>>          len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len;
>> +       /* Reserve some space for skb_shared_info */
>> +       len -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
>>
>>          for (i = 0; i < num; ++i) {
>>                  /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space.
>> @@ -6734,6 +6742,7 @@ static int virtnet_probe(struct virtio_device *vdev)
>>          dev->netdev_ops = &virtnet_netdev;
>>          dev->stat_ops = &virtnet_stat_ops;
>>          dev->features = NETIF_F_HIGHDMA;
>> +       dev->xdp_zc_max_segs = VIRTNET_MAX_ZC_SEGS;
>>
>>          dev->ethtool_ops = &virtnet_ethtool_ops;
>>          SET_NETDEV_DEV(dev, &vdev->dev);
>> --
>> 2.43.0
>>
> Thanks
>

Re: [RFC PATCH net-next v2 1/2] virtio-net: support zerocopy multi buffer XDP in mergeable
Posted by Jason Wang 6 months, 2 weeks ago
On Thu, May 29, 2025 at 8:28 PM Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>
> On 5/29/25 12:59, Jason Wang wrote:
> > On Wed, May 28, 2025 at 12:19 AM Bui Quang Minh
> > <minhquangbui99@gmail.com> wrote:
> >> Currently, in zerocopy mode with mergeable receive buffer, virtio-net
> >> does not support multi buffer but a single buffer only. This commit adds
> >> support for multi mergeable receive buffer in the zerocopy XDP path by
> >> utilizing XDP buffer with frags.
> >>
> >> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> >> ---
> >>   drivers/net/virtio_net.c | 123 +++++++++++++++++++++------------------
> >>   1 file changed, 66 insertions(+), 57 deletions(-)
> >>
> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >> index e53ba600605a..a9558650f205 100644
> >> --- a/drivers/net/virtio_net.c
> >> +++ b/drivers/net/virtio_net.c
> >> @@ -45,6 +45,8 @@ module_param(napi_tx, bool, 0644);
> >>   #define VIRTIO_XDP_TX          BIT(0)
> >>   #define VIRTIO_XDP_REDIR       BIT(1)
> >>
> >> +#define VIRTNET_MAX_ZC_SEGS    8
> >> +
> >>   /* RX packet size EWMA. The average packet size is used to determine the packet
> >>    * buffer size when refilling RX rings. As the entire RX ring may be refilled
> >>    * at once, the weight is chosen so that the EWMA will be insensitive to short-
> >> @@ -1232,65 +1234,53 @@ static void xsk_drop_follow_bufs(struct net_device *dev,
> >>          }
> >>   }
> >>
> >> -static int xsk_append_merge_buffer(struct virtnet_info *vi,
> >> -                                  struct receive_queue *rq,
> >> -                                  struct sk_buff *head_skb,
> >> -                                  u32 num_buf,
> >> -                                  struct virtio_net_hdr_mrg_rxbuf *hdr,
> >> -                                  struct virtnet_rq_stats *stats)
> >> +static int virtnet_build_xsk_buff_mrg(struct virtnet_info *vi,
> >> +                                     struct receive_queue *rq,
> >> +                                     u32 num_buf,
> >> +                                     struct xdp_buff *xdp,
> >> +                                     struct virtnet_rq_stats *stats)
> >>   {
> >> -       struct sk_buff *curr_skb;
> >> -       struct xdp_buff *xdp;
> >> -       u32 len, truesize;
> >> -       struct page *page;
> >> +       unsigned int len;
> >>          void *buf;
> >>
> >> -       curr_skb = head_skb;
> >> +       if (num_buf < 2)
> >> +               return 0;
> >> +
> >> +       while (num_buf > 1) {
> >> +               struct xdp_buff *new_xdp;
> >>
> >> -       while (--num_buf) {
> >>                  buf = virtqueue_get_buf(rq->vq, &len);
> >> -               if (unlikely(!buf)) {
> >> -                       pr_debug("%s: rx error: %d buffers out of %d missing\n",
> >> -                                vi->dev->name, num_buf,
> >> -                                virtio16_to_cpu(vi->vdev,
> >> -                                                hdr->num_buffers));
> >> +               if (!unlikely(buf)) {
> >> +                       pr_debug("%s: rx error: %d buffers missing\n",
> >> +                                vi->dev->name, num_buf);
> >>                          DEV_STATS_INC(vi->dev, rx_length_errors);
> >> -                       return -EINVAL;
> >> -               }
> >> -
> >> -               u64_stats_add(&stats->bytes, len);
> >> -
> >> -               xdp = buf_to_xdp(vi, rq, buf, len);
> >> -               if (!xdp)
> >> -                       goto err;
> >> -
> >> -               buf = napi_alloc_frag(len);
> >> -               if (!buf) {
> >> -                       xsk_buff_free(xdp);
> >> -                       goto err;
> >> +                       return -1;
> >>                  }
> >>
> >> -               memcpy(buf, xdp->data - vi->hdr_len, len);
> >> -
> >> -               xsk_buff_free(xdp);
> >> +               new_xdp = buf_to_xdp(vi, rq, buf, len);
> >> +               if (!new_xdp)
> >> +                       goto drop_bufs;
> >>
> >> -               page = virt_to_page(buf);
> >> +               /* In virtnet_add_recvbuf_xsk(), we ask the host to fill from
> >> +                * xdp->data - vi->hdr_len with both virtio_net_hdr and data.
> >> +                * However, only the first packet has the virtio_net_hdr, the
> >> +                * following ones do not. So we need to adjust the following
> > Typo here.
>
> I'm sorry, could you clarify which word contains the typo?
>
> >
> >> +                * packets' data pointer to the correct place.
> >> +                */
> > I wonder what happens if we don't use this trick? I meant we don't
> > reuse the header room for the virtio-net header. This seems to be fine
> > for a mergeable buffer and can help to reduce the trick.
>
> I don't think using the header room for virtio-net header creates this
> case handling. In my opinion, it comes from the slightly difference in
> the recvbuf between single buffer and multi-buffer. When we have n
> single-buffer packets, each buffer will have its own virtio-net header.
> But when we have 1 multi-buffer packet (which spans across n buffers),
> only the first buffer has virtio-net header, the following buffers do not.
>
> There 2 important pointers here. The pointer we announce to the vhost
> side to fill the data, let's call it announced_addr, and xdp_buff->data
> which is expected to point the the start of Ethernet frame. Currently,
>
>      announced_addr = xdp_buff->data - hdr_len
>
> The host side will write the virtio-net header to announced_addr then
> the Ethernet frame's data in the first buffer. In case of multi-buffer
> packet, in the following buffers, host side writes the Ethernet frame's
> data to the announced_addr no virtio-net header. So in the virtio-net,
> we need to subtract xdp_buff->data, otherwise, we lose some Ethernet
> frame's data.
>
> I think a slightly better solution is that we set announced_addr =
> xdp_buff->data then we only need to xdp_buff->data += hdr_len for the
> first buffer and do need to adjust xdp_buff->data of the following buffers.

Exactly my point.

>
> >
> >> +               new_xdp->data -= vi->hdr_len;
> >> +               new_xdp->data_end = new_xdp->data + len;
> >>
> >> -               truesize = len;
> >> +               if (!xsk_buff_add_frag(xdp, new_xdp))
> >> +                       goto drop_bufs;
> >>
> >> -               curr_skb  = virtnet_skb_append_frag(head_skb, curr_skb, page,
> >> -                                                   buf, len, truesize);
> >> -               if (!curr_skb) {
> >> -                       put_page(page);
> >> -                       goto err;
> >> -               }
> >> +               num_buf--;
> >>          }
> >>
> >>          return 0;
> >>
> >> -err:
> >> +drop_bufs:
> >>          xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats);
> >> -       return -EINVAL;
> >> +       return -1;
> >>   }
> >>
> >>   static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi,
> >> @@ -1307,23 +1297,42 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct
> >>          num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
> >>
> >>          ret = XDP_PASS;
> >> +       if (virtnet_build_xsk_buff_mrg(vi, rq, num_buf, xdp, stats))
> >> +               goto drop;
> >> +
> >>          rcu_read_lock();
> >>          prog = rcu_dereference(rq->xdp_prog);
> >> -       /* TODO: support multi buffer. */
> >> -       if (prog && num_buf == 1)
> >> -               ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
> > Without this patch it looks like we had a bug:
> >
> >          ret = XDP_PASS;
> >          rcu_read_lock();
> >          prog = rcu_dereference(rq->xdp_prog);
> >          /* TODO: support multi buffer. */
> >          if (prog && num_buf == 1)
> >                  ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
> >          rcu_read_unlock();
> >
> > This implies if num_buf is greater than 1, we will assume XDP_PASS?
>
> Yes, I think XDP_DROP should be returned in that case.

Care to post a patch and cc stable?

>
> >
> >> +       if (prog) {
> >> +               /* We are in zerocopy mode so we cannot copy the multi-buffer
> >> +                * xdp buff to a single linear xdp buff. If we do so, in case
> >> +                * the BPF program decides to redirect to a XDP socket (XSK),
> >> +                * it will trigger the zerocopy receive logic in XDP socket.
> >> +                * The receive logic thinks it receives zerocopy buffer while
> >> +                * in fact, it is the copy one and everything is messed up.
> >> +                * So just drop the packet here if we have a multi-buffer xdp
> >> +                * buff and the BPF program does not support it.
> >> +                */
> >> +               if (xdp_buff_has_frags(xdp) && !prog->aux->xdp_has_frags)
> >> +                       ret = XDP_DROP;
> > Could we move the check before trying to build a multi-buffer XDP buff?
>
> Yes, I'll fix this in next version.
>
> >
> >> +               else
> >> +                       ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit,
> >> +                                                 stats);
> >> +       }
> >>          rcu_read_unlock();
> >>
> >>          switch (ret) {
> >>          case XDP_PASS:
> >> -               skb = xsk_construct_skb(rq, xdp);
> >> +               skb = xdp_build_skb_from_zc(xdp);
> > Is this better to make this change a separate patch?
>
> Okay, I'll create a separate patch to convert the current XDP_PASS
> handler to use xdp_build_skb_from_zc helper.

That would be better.

>
> >
> >>                  if (!skb)
> >> -                       goto drop_bufs;
> >> +                       break;
> >>
> >> -               if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) {
> >> -                       dev_kfree_skb(skb);
> >> -                       goto drop;
> >> -               }
> >> +               /* Later, in virtnet_receive_done(), eth_type_trans()
> >> +                * is called. However, in xdp_build_skb_from_zc(), it is called
> >> +                * already. As a result, we need to reset the data to before
> >> +                * the mac header so that the later call in
> >> +                * virtnet_receive_done() works correctly.
> >> +                */
> >> +               skb_push(skb, ETH_HLEN);
> >>
> >>                  return skb;
> >>
> >> @@ -1332,14 +1341,11 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct
> >>                  return NULL;
> >>
> >>          default:
> >> -               /* drop packet */
> >> -               xsk_buff_free(xdp);
> >> +               break;
> >>          }
> >>
> >> -drop_bufs:
> >> -       xsk_drop_follow_bufs(dev, rq, num_buf, stats);
> >> -
> >>   drop:
> >> +       xsk_buff_free(xdp);
> >>          u64_stats_inc(&stats->drops);
> >>          return NULL;
> >>   }
> >> @@ -1396,6 +1402,8 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue
> >>                  return -ENOMEM;
> >>
> >>          len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len;
> >> +       /* Reserve some space for skb_shared_info */
> >> +       len -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> >>
> >>          for (i = 0; i < num; ++i) {
> >>                  /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space.
> >> @@ -6734,6 +6742,7 @@ static int virtnet_probe(struct virtio_device *vdev)
> >>          dev->netdev_ops = &virtnet_netdev;
> >>          dev->stat_ops = &virtnet_stat_ops;
> >>          dev->features = NETIF_F_HIGHDMA;
> >> +       dev->xdp_zc_max_segs = VIRTNET_MAX_ZC_SEGS;
> >>
> >>          dev->ethtool_ops = &virtnet_ethtool_ops;
> >>          SET_NETDEV_DEV(dev, &vdev->dev);
> >> --
> >> 2.43.0
> >>
> > Thanks
> >
>

Thanks
Re: [RFC PATCH net-next v2 1/2] virtio-net: support zerocopy multi buffer XDP in mergeable
Posted by Bui Quang Minh 6 months, 2 weeks ago
On 6/3/25 09:56, Jason Wang wrote:
> On Thu, May 29, 2025 at 8:28 PM Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>> On 5/29/25 12:59, Jason Wang wrote:
>>> On Wed, May 28, 2025 at 12:19 AM Bui Quang Minh
>>> <minhquangbui99@gmail.com> wrote:
>>>> Currently, in zerocopy mode with mergeable receive buffer, virtio-net
>>>> does not support multi buffer but a single buffer only. This commit adds
>>>> support for multi mergeable receive buffer in the zerocopy XDP path by
>>>> utilizing XDP buffer with frags.
>>>>
>>>> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
>>>> ---
>>>>    drivers/net/virtio_net.c | 123 +++++++++++++++++++++------------------
>>>>    1 file changed, 66 insertions(+), 57 deletions(-)
>>>>
>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>>> index e53ba600605a..a9558650f205 100644
>>>> --- a/drivers/net/virtio_net.c
>>>> +++ b/drivers/net/virtio_net.c
>>>> @@ -45,6 +45,8 @@ module_param(napi_tx, bool, 0644);
>>>>    #define VIRTIO_XDP_TX          BIT(0)
>>>>    #define VIRTIO_XDP_REDIR       BIT(1)
>>>>
>>>> +#define VIRTNET_MAX_ZC_SEGS    8
>>>> +
>>>>    /* RX packet size EWMA. The average packet size is used to determine the packet
>>>>     * buffer size when refilling RX rings. As the entire RX ring may be refilled
>>>>     * at once, the weight is chosen so that the EWMA will be insensitive to short-
>>>> @@ -1232,65 +1234,53 @@ static void xsk_drop_follow_bufs(struct net_device *dev,
>>>>           }
>>>>    }
>>>>
>>>> -static int xsk_append_merge_buffer(struct virtnet_info *vi,
>>>> -                                  struct receive_queue *rq,
>>>> -                                  struct sk_buff *head_skb,
>>>> -                                  u32 num_buf,
>>>> -                                  struct virtio_net_hdr_mrg_rxbuf *hdr,
>>>> -                                  struct virtnet_rq_stats *stats)
>>>> +static int virtnet_build_xsk_buff_mrg(struct virtnet_info *vi,
>>>> +                                     struct receive_queue *rq,
>>>> +                                     u32 num_buf,
>>>> +                                     struct xdp_buff *xdp,
>>>> +                                     struct virtnet_rq_stats *stats)
>>>>    {
>>>> -       struct sk_buff *curr_skb;
>>>> -       struct xdp_buff *xdp;
>>>> -       u32 len, truesize;
>>>> -       struct page *page;
>>>> +       unsigned int len;
>>>>           void *buf;
>>>>
>>>> -       curr_skb = head_skb;
>>>> +       if (num_buf < 2)
>>>> +               return 0;
>>>> +
>>>> +       while (num_buf > 1) {
>>>> +               struct xdp_buff *new_xdp;
>>>>
>>>> -       while (--num_buf) {
>>>>                   buf = virtqueue_get_buf(rq->vq, &len);
>>>> -               if (unlikely(!buf)) {
>>>> -                       pr_debug("%s: rx error: %d buffers out of %d missing\n",
>>>> -                                vi->dev->name, num_buf,
>>>> -                                virtio16_to_cpu(vi->vdev,
>>>> -                                                hdr->num_buffers));
>>>> +               if (!unlikely(buf)) {
>>>> +                       pr_debug("%s: rx error: %d buffers missing\n",
>>>> +                                vi->dev->name, num_buf);
>>>>                           DEV_STATS_INC(vi->dev, rx_length_errors);
>>>> -                       return -EINVAL;
>>>> -               }
>>>> -
>>>> -               u64_stats_add(&stats->bytes, len);
>>>> -
>>>> -               xdp = buf_to_xdp(vi, rq, buf, len);
>>>> -               if (!xdp)
>>>> -                       goto err;
>>>> -
>>>> -               buf = napi_alloc_frag(len);
>>>> -               if (!buf) {
>>>> -                       xsk_buff_free(xdp);
>>>> -                       goto err;
>>>> +                       return -1;
>>>>                   }
>>>>
>>>> -               memcpy(buf, xdp->data - vi->hdr_len, len);
>>>> -
>>>> -               xsk_buff_free(xdp);
>>>> +               new_xdp = buf_to_xdp(vi, rq, buf, len);
>>>> +               if (!new_xdp)
>>>> +                       goto drop_bufs;
>>>>
>>>> -               page = virt_to_page(buf);
>>>> +               /* In virtnet_add_recvbuf_xsk(), we ask the host to fill from
>>>> +                * xdp->data - vi->hdr_len with both virtio_net_hdr and data.
>>>> +                * However, only the first packet has the virtio_net_hdr, the
>>>> +                * following ones do not. So we need to adjust the following
>>> Typo here.
>> I'm sorry, could you clarify which word contains the typo?
>>
>>>> +                * packets' data pointer to the correct place.
>>>> +                */
>>> I wonder what happens if we don't use this trick? I meant we don't
>>> reuse the header room for the virtio-net header. This seems to be fine
>>> for a mergeable buffer and can help to reduce the trick.
>> I don't think using the header room for virtio-net header creates this
>> case handling. In my opinion, it comes from the slightly difference in
>> the recvbuf between single buffer and multi-buffer. When we have n
>> single-buffer packets, each buffer will have its own virtio-net header.
>> But when we have 1 multi-buffer packet (which spans across n buffers),
>> only the first buffer has virtio-net header, the following buffers do not.
>>
>> There 2 important pointers here. The pointer we announce to the vhost
>> side to fill the data, let's call it announced_addr, and xdp_buff->data
>> which is expected to point the the start of Ethernet frame. Currently,
>>
>>       announced_addr = xdp_buff->data - hdr_len
>>
>> The host side will write the virtio-net header to announced_addr then
>> the Ethernet frame's data in the first buffer. In case of multi-buffer
>> packet, in the following buffers, host side writes the Ethernet frame's
>> data to the announced_addr no virtio-net header. So in the virtio-net,
>> we need to subtract xdp_buff->data, otherwise, we lose some Ethernet
>> frame's data.
>>
>> I think a slightly better solution is that we set announced_addr =
>> xdp_buff->data then we only need to xdp_buff->data += hdr_len for the
>> first buffer and do need to adjust xdp_buff->data of the following buffers.
> Exactly my point.
>
>>>> +               new_xdp->data -= vi->hdr_len;
>>>> +               new_xdp->data_end = new_xdp->data + len;
>>>>
>>>> -               truesize = len;
>>>> +               if (!xsk_buff_add_frag(xdp, new_xdp))
>>>> +                       goto drop_bufs;
>>>>
>>>> -               curr_skb  = virtnet_skb_append_frag(head_skb, curr_skb, page,
>>>> -                                                   buf, len, truesize);
>>>> -               if (!curr_skb) {
>>>> -                       put_page(page);
>>>> -                       goto err;
>>>> -               }
>>>> +               num_buf--;
>>>>           }
>>>>
>>>>           return 0;
>>>>
>>>> -err:
>>>> +drop_bufs:
>>>>           xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats);
>>>> -       return -EINVAL;
>>>> +       return -1;
>>>>    }
>>>>
>>>>    static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi,
>>>> @@ -1307,23 +1297,42 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct
>>>>           num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
>>>>
>>>>           ret = XDP_PASS;
>>>> +       if (virtnet_build_xsk_buff_mrg(vi, rq, num_buf, xdp, stats))
>>>> +               goto drop;
>>>> +
>>>>           rcu_read_lock();
>>>>           prog = rcu_dereference(rq->xdp_prog);
>>>> -       /* TODO: support multi buffer. */
>>>> -       if (prog && num_buf == 1)
>>>> -               ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
>>> Without this patch it looks like we had a bug:
>>>
>>>           ret = XDP_PASS;
>>>           rcu_read_lock();
>>>           prog = rcu_dereference(rq->xdp_prog);
>>>           /* TODO: support multi buffer. */
>>>           if (prog && num_buf == 1)
>>>                   ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
>>>           rcu_read_unlock();
>>>
>>> This implies if num_buf is greater than 1, we will assume XDP_PASS?
>> Yes, I think XDP_DROP should be returned in that case.
> Care to post a patch and cc stable?

Okay, I'll submit a patch shortly.

Thanks,
Quang Minh.

>
>>>> +       if (prog) {
>>>> +               /* We are in zerocopy mode so we cannot copy the multi-buffer
>>>> +                * xdp buff to a single linear xdp buff. If we do so, in case
>>>> +                * the BPF program decides to redirect to a XDP socket (XSK),
>>>> +                * it will trigger the zerocopy receive logic in XDP socket.
>>>> +                * The receive logic thinks it receives zerocopy buffer while
>>>> +                * in fact, it is the copy one and everything is messed up.
>>>> +                * So just drop the packet here if we have a multi-buffer xdp
>>>> +                * buff and the BPF program does not support it.
>>>> +                */
>>>> +               if (xdp_buff_has_frags(xdp) && !prog->aux->xdp_has_frags)
>>>> +                       ret = XDP_DROP;
>>> Could we move the check before trying to build a multi-buffer XDP buff?
>> Yes, I'll fix this in next version.
>>
>>>> +               else
>>>> +                       ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit,
>>>> +                                                 stats);
>>>> +       }
>>>>           rcu_read_unlock();
>>>>
>>>>           switch (ret) {
>>>>           case XDP_PASS:
>>>> -               skb = xsk_construct_skb(rq, xdp);
>>>> +               skb = xdp_build_skb_from_zc(xdp);
>>> Is this better to make this change a separate patch?
>> Okay, I'll create a separate patch to convert the current XDP_PASS
>> handler to use xdp_build_skb_from_zc helper.
> That would be better.
>
>>>>                   if (!skb)
>>>> -                       goto drop_bufs;
>>>> +                       break;
>>>>
>>>> -               if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) {
>>>> -                       dev_kfree_skb(skb);
>>>> -                       goto drop;
>>>> -               }
>>>> +               /* Later, in virtnet_receive_done(), eth_type_trans()
>>>> +                * is called. However, in xdp_build_skb_from_zc(), it is called
>>>> +                * already. As a result, we need to reset the data to before
>>>> +                * the mac header so that the later call in
>>>> +                * virtnet_receive_done() works correctly.
>>>> +                */
>>>> +               skb_push(skb, ETH_HLEN);
>>>>
>>>>                   return skb;
>>>>
>>>> @@ -1332,14 +1341,11 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct
>>>>                   return NULL;
>>>>
>>>>           default:
>>>> -               /* drop packet */
>>>> -               xsk_buff_free(xdp);
>>>> +               break;
>>>>           }
>>>>
>>>> -drop_bufs:
>>>> -       xsk_drop_follow_bufs(dev, rq, num_buf, stats);
>>>> -
>>>>    drop:
>>>> +       xsk_buff_free(xdp);
>>>>           u64_stats_inc(&stats->drops);
>>>>           return NULL;
>>>>    }
>>>> @@ -1396,6 +1402,8 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue
>>>>                   return -ENOMEM;
>>>>
>>>>           len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len;
>>>> +       /* Reserve some space for skb_shared_info */
>>>> +       len -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
>>>>
>>>>           for (i = 0; i < num; ++i) {
>>>>                   /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space.
>>>> @@ -6734,6 +6742,7 @@ static int virtnet_probe(struct virtio_device *vdev)
>>>>           dev->netdev_ops = &virtnet_netdev;
>>>>           dev->stat_ops = &virtnet_stat_ops;
>>>>           dev->features = NETIF_F_HIGHDMA;
>>>> +       dev->xdp_zc_max_segs = VIRTNET_MAX_ZC_SEGS;
>>>>
>>>>           dev->ethtool_ops = &virtnet_ethtool_ops;
>>>>           SET_NETDEV_DEV(dev, &vdev->dev);
>>>> --
>>>> 2.43.0
>>>>
>>> Thanks
>>>
> Thanks
>

Re: [RFC PATCH net-next v2 1/2] virtio-net: support zerocopy multi buffer XDP in mergeable
Posted by Lei Yang 6 months, 2 weeks ago
Tested this patch with virtio-net regression tests, everything works fine.

Tested-by: Lei Yang <leiyang@redhat.com>

On Tue, Jun 3, 2025 at 10:57 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Thu, May 29, 2025 at 8:28 PM Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> >
> > On 5/29/25 12:59, Jason Wang wrote:
> > > On Wed, May 28, 2025 at 12:19 AM Bui Quang Minh
> > > <minhquangbui99@gmail.com> wrote:
> > >> Currently, in zerocopy mode with mergeable receive buffer, virtio-net
> > >> does not support multi buffer but a single buffer only. This commit adds
> > >> support for multi mergeable receive buffer in the zerocopy XDP path by
> > >> utilizing XDP buffer with frags.
> > >>
> > >> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> > >> ---
> > >>   drivers/net/virtio_net.c | 123 +++++++++++++++++++++------------------
> > >>   1 file changed, 66 insertions(+), 57 deletions(-)
> > >>
> > >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > >> index e53ba600605a..a9558650f205 100644
> > >> --- a/drivers/net/virtio_net.c
> > >> +++ b/drivers/net/virtio_net.c
> > >> @@ -45,6 +45,8 @@ module_param(napi_tx, bool, 0644);
> > >>   #define VIRTIO_XDP_TX          BIT(0)
> > >>   #define VIRTIO_XDP_REDIR       BIT(1)
> > >>
> > >> +#define VIRTNET_MAX_ZC_SEGS    8
> > >> +
> > >>   /* RX packet size EWMA. The average packet size is used to determine the packet
> > >>    * buffer size when refilling RX rings. As the entire RX ring may be refilled
> > >>    * at once, the weight is chosen so that the EWMA will be insensitive to short-
> > >> @@ -1232,65 +1234,53 @@ static void xsk_drop_follow_bufs(struct net_device *dev,
> > >>          }
> > >>   }
> > >>
> > >> -static int xsk_append_merge_buffer(struct virtnet_info *vi,
> > >> -                                  struct receive_queue *rq,
> > >> -                                  struct sk_buff *head_skb,
> > >> -                                  u32 num_buf,
> > >> -                                  struct virtio_net_hdr_mrg_rxbuf *hdr,
> > >> -                                  struct virtnet_rq_stats *stats)
> > >> +static int virtnet_build_xsk_buff_mrg(struct virtnet_info *vi,
> > >> +                                     struct receive_queue *rq,
> > >> +                                     u32 num_buf,
> > >> +                                     struct xdp_buff *xdp,
> > >> +                                     struct virtnet_rq_stats *stats)
> > >>   {
> > >> -       struct sk_buff *curr_skb;
> > >> -       struct xdp_buff *xdp;
> > >> -       u32 len, truesize;
> > >> -       struct page *page;
> > >> +       unsigned int len;
> > >>          void *buf;
> > >>
> > >> -       curr_skb = head_skb;
> > >> +       if (num_buf < 2)
> > >> +               return 0;
> > >> +
> > >> +       while (num_buf > 1) {
> > >> +               struct xdp_buff *new_xdp;
> > >>
> > >> -       while (--num_buf) {
> > >>                  buf = virtqueue_get_buf(rq->vq, &len);
> > >> -               if (unlikely(!buf)) {
> > >> -                       pr_debug("%s: rx error: %d buffers out of %d missing\n",
> > >> -                                vi->dev->name, num_buf,
> > >> -                                virtio16_to_cpu(vi->vdev,
> > >> -                                                hdr->num_buffers));
> > >> +               if (!unlikely(buf)) {
> > >> +                       pr_debug("%s: rx error: %d buffers missing\n",
> > >> +                                vi->dev->name, num_buf);
> > >>                          DEV_STATS_INC(vi->dev, rx_length_errors);
> > >> -                       return -EINVAL;
> > >> -               }
> > >> -
> > >> -               u64_stats_add(&stats->bytes, len);
> > >> -
> > >> -               xdp = buf_to_xdp(vi, rq, buf, len);
> > >> -               if (!xdp)
> > >> -                       goto err;
> > >> -
> > >> -               buf = napi_alloc_frag(len);
> > >> -               if (!buf) {
> > >> -                       xsk_buff_free(xdp);
> > >> -                       goto err;
> > >> +                       return -1;
> > >>                  }
> > >>
> > >> -               memcpy(buf, xdp->data - vi->hdr_len, len);
> > >> -
> > >> -               xsk_buff_free(xdp);
> > >> +               new_xdp = buf_to_xdp(vi, rq, buf, len);
> > >> +               if (!new_xdp)
> > >> +                       goto drop_bufs;
> > >>
> > >> -               page = virt_to_page(buf);
> > >> +               /* In virtnet_add_recvbuf_xsk(), we ask the host to fill from
> > >> +                * xdp->data - vi->hdr_len with both virtio_net_hdr and data.
> > >> +                * However, only the first packet has the virtio_net_hdr, the
> > >> +                * following ones do not. So we need to adjust the following
> > > Typo here.
> >
> > I'm sorry, could you clarify which word contains the typo?
> >
> > >
> > >> +                * packets' data pointer to the correct place.
> > >> +                */
> > > I wonder what happens if we don't use this trick? I meant we don't
> > > reuse the header room for the virtio-net header. This seems to be fine
> > > for a mergeable buffer and can help to reduce the trick.
> >
> > I don't think using the header room for virtio-net header creates this
> > case handling. In my opinion, it comes from the slightly difference in
> > the recvbuf between single buffer and multi-buffer. When we have n
> > single-buffer packets, each buffer will have its own virtio-net header.
> > But when we have 1 multi-buffer packet (which spans across n buffers),
> > only the first buffer has virtio-net header, the following buffers do not.
> >
> > There 2 important pointers here. The pointer we announce to the vhost
> > side to fill the data, let's call it announced_addr, and xdp_buff->data
> > which is expected to point the the start of Ethernet frame. Currently,
> >
> >      announced_addr = xdp_buff->data - hdr_len
> >
> > The host side will write the virtio-net header to announced_addr then
> > the Ethernet frame's data in the first buffer. In case of multi-buffer
> > packet, in the following buffers, host side writes the Ethernet frame's
> > data to the announced_addr no virtio-net header. So in the virtio-net,
> > we need to subtract xdp_buff->data, otherwise, we lose some Ethernet
> > frame's data.
> >
> > I think a slightly better solution is that we set announced_addr =
> > xdp_buff->data then we only need to xdp_buff->data += hdr_len for the
> > first buffer and do need to adjust xdp_buff->data of the following buffers.
>
> Exactly my point.
>
> >
> > >
> > >> +               new_xdp->data -= vi->hdr_len;
> > >> +               new_xdp->data_end = new_xdp->data + len;
> > >>
> > >> -               truesize = len;
> > >> +               if (!xsk_buff_add_frag(xdp, new_xdp))
> > >> +                       goto drop_bufs;
> > >>
> > >> -               curr_skb  = virtnet_skb_append_frag(head_skb, curr_skb, page,
> > >> -                                                   buf, len, truesize);
> > >> -               if (!curr_skb) {
> > >> -                       put_page(page);
> > >> -                       goto err;
> > >> -               }
> > >> +               num_buf--;
> > >>          }
> > >>
> > >>          return 0;
> > >>
> > >> -err:
> > >> +drop_bufs:
> > >>          xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats);
> > >> -       return -EINVAL;
> > >> +       return -1;
> > >>   }
> > >>
> > >>   static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi,
> > >> @@ -1307,23 +1297,42 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct
> > >>          num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
> > >>
> > >>          ret = XDP_PASS;
> > >> +       if (virtnet_build_xsk_buff_mrg(vi, rq, num_buf, xdp, stats))
> > >> +               goto drop;
> > >> +
> > >>          rcu_read_lock();
> > >>          prog = rcu_dereference(rq->xdp_prog);
> > >> -       /* TODO: support multi buffer. */
> > >> -       if (prog && num_buf == 1)
> > >> -               ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
> > > Without this patch it looks like we had a bug:
> > >
> > >          ret = XDP_PASS;
> > >          rcu_read_lock();
> > >          prog = rcu_dereference(rq->xdp_prog);
> > >          /* TODO: support multi buffer. */
> > >          if (prog && num_buf == 1)
> > >                  ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
> > >          rcu_read_unlock();
> > >
> > > This implies if num_buf is greater than 1, we will assume XDP_PASS?
> >
> > Yes, I think XDP_DROP should be returned in that case.
>
> Care to post a patch and cc stable?
>
> >
> > >
> > >> +       if (prog) {
> > >> +               /* We are in zerocopy mode so we cannot copy the multi-buffer
> > >> +                * xdp buff to a single linear xdp buff. If we do so, in case
> > >> +                * the BPF program decides to redirect to a XDP socket (XSK),
> > >> +                * it will trigger the zerocopy receive logic in XDP socket.
> > >> +                * The receive logic thinks it receives zerocopy buffer while
> > >> +                * in fact, it is the copy one and everything is messed up.
> > >> +                * So just drop the packet here if we have a multi-buffer xdp
> > >> +                * buff and the BPF program does not support it.
> > >> +                */
> > >> +               if (xdp_buff_has_frags(xdp) && !prog->aux->xdp_has_frags)
> > >> +                       ret = XDP_DROP;
> > > Could we move the check before trying to build a multi-buffer XDP buff?
> >
> > Yes, I'll fix this in next version.
> >
> > >
> > >> +               else
> > >> +                       ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit,
> > >> +                                                 stats);
> > >> +       }
> > >>          rcu_read_unlock();
> > >>
> > >>          switch (ret) {
> > >>          case XDP_PASS:
> > >> -               skb = xsk_construct_skb(rq, xdp);
> > >> +               skb = xdp_build_skb_from_zc(xdp);
> > > Is this better to make this change a separate patch?
> >
> > Okay, I'll create a separate patch to convert the current XDP_PASS
> > handler to use xdp_build_skb_from_zc helper.
>
> That would be better.
>
> >
> > >
> > >>                  if (!skb)
> > >> -                       goto drop_bufs;
> > >> +                       break;
> > >>
> > >> -               if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) {
> > >> -                       dev_kfree_skb(skb);
> > >> -                       goto drop;
> > >> -               }
> > >> +               /* Later, in virtnet_receive_done(), eth_type_trans()
> > >> +                * is called. However, in xdp_build_skb_from_zc(), it is called
> > >> +                * already. As a result, we need to reset the data to before
> > >> +                * the mac header so that the later call in
> > >> +                * virtnet_receive_done() works correctly.
> > >> +                */
> > >> +               skb_push(skb, ETH_HLEN);
> > >>
> > >>                  return skb;
> > >>
> > >> @@ -1332,14 +1341,11 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct
> > >>                  return NULL;
> > >>
> > >>          default:
> > >> -               /* drop packet */
> > >> -               xsk_buff_free(xdp);
> > >> +               break;
> > >>          }
> > >>
> > >> -drop_bufs:
> > >> -       xsk_drop_follow_bufs(dev, rq, num_buf, stats);
> > >> -
> > >>   drop:
> > >> +       xsk_buff_free(xdp);
> > >>          u64_stats_inc(&stats->drops);
> > >>          return NULL;
> > >>   }
> > >> @@ -1396,6 +1402,8 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue
> > >>                  return -ENOMEM;
> > >>
> > >>          len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len;
> > >> +       /* Reserve some space for skb_shared_info */
> > >> +       len -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> > >>
> > >>          for (i = 0; i < num; ++i) {
> > >>                  /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space.
> > >> @@ -6734,6 +6742,7 @@ static int virtnet_probe(struct virtio_device *vdev)
> > >>          dev->netdev_ops = &virtnet_netdev;
> > >>          dev->stat_ops = &virtnet_stat_ops;
> > >>          dev->features = NETIF_F_HIGHDMA;
> > >> +       dev->xdp_zc_max_segs = VIRTNET_MAX_ZC_SEGS;
> > >>
> > >>          dev->ethtool_ops = &virtnet_ethtool_ops;
> > >>          SET_NETDEV_DEV(dev, &vdev->dev);
> > >> --
> > >> 2.43.0
> > >>
> > > Thanks
> > >
> >
>
> Thanks
>
>
Re: [RFC PATCH net-next v2 1/2] virtio-net: support zerocopy multi buffer XDP in mergeable
Posted by ALOK TIWARI 6 months, 3 weeks ago

On 27-05-2025 21:49, Bui Quang Minh wrote:
> Currently, in zerocopy mode with mergeable receive buffer, virtio-net
> does not support multi buffer but a single buffer only. This commit adds
> support for multi mergeable receive buffer in the zerocopy XDP path by
> utilizing XDP buffer with frags.
> 
> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> ---
>   drivers/net/virtio_net.c | 123 +++++++++++++++++++++------------------
>   1 file changed, 66 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index e53ba600605a..a9558650f205 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -45,6 +45,8 @@ module_param(napi_tx, bool, 0644);
>   #define VIRTIO_XDP_TX		BIT(0)
>   #define VIRTIO_XDP_REDIR	BIT(1)
>   
> +#define VIRTNET_MAX_ZC_SEGS	8
> +
>   /* RX packet size EWMA. The average packet size is used to determine the packet
>    * buffer size when refilling RX rings. As the entire RX ring may be refilled
>    * at once, the weight is chosen so that the EWMA will be insensitive to short-
> @@ -1232,65 +1234,53 @@ static void xsk_drop_follow_bufs(struct net_device *dev,
>   	}
>   }
>   
> -static int xsk_append_merge_buffer(struct virtnet_info *vi,
> -				   struct receive_queue *rq,
> -				   struct sk_buff *head_skb,
> -				   u32 num_buf,
> -				   struct virtio_net_hdr_mrg_rxbuf *hdr,
> -				   struct virtnet_rq_stats *stats)
> +static int virtnet_build_xsk_buff_mrg(struct virtnet_info *vi,
> +				      struct receive_queue *rq,
> +				      u32 num_buf,
> +				      struct xdp_buff *xdp,
> +				      struct virtnet_rq_stats *stats)
>   {
> -	struct sk_buff *curr_skb;
> -	struct xdp_buff *xdp;
> -	u32 len, truesize;
> -	struct page *page;
> +	unsigned int len;
>   	void *buf;
>   
> -	curr_skb = head_skb;
> +	if (num_buf < 2)
> +		return 0;
> +
> +	while (num_buf > 1) {
> +		struct xdp_buff *new_xdp;
>   
> -	while (--num_buf) {
>   		buf = virtqueue_get_buf(rq->vq, &len);
> -		if (unlikely(!buf)) {
> -			pr_debug("%s: rx error: %d buffers out of %d missing\n",
> -				 vi->dev->name, num_buf,
> -				 virtio16_to_cpu(vi->vdev,
> -						 hdr->num_buffers));
> +		if (!unlikely(buf)) {

if (unlikely(!buf)) { ?

> +			pr_debug("%s: rx error: %d buffers missing\n",
> +				 vi->dev->name, num_buf);
>   			DEV_STATS_INC(vi->dev, rx_length_errors);

Thanks,
Alok
Re: [RFC PATCH net-next v2 1/2] virtio-net: support zerocopy multi buffer XDP in mergeable
Posted by Bui Quang Minh 6 months, 3 weeks ago
On 5/28/25 23:44, ALOK TIWARI wrote:
>
>
> On 27-05-2025 21:49, Bui Quang Minh wrote:
>> Currently, in zerocopy mode with mergeable receive buffer, virtio-net
>> does not support multi buffer but a single buffer only. This commit adds
>> support for multi mergeable receive buffer in the zerocopy XDP path by
>> utilizing XDP buffer with frags.
>>
>> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
>> ---
>>   drivers/net/virtio_net.c | 123 +++++++++++++++++++++------------------
>>   1 file changed, 66 insertions(+), 57 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index e53ba600605a..a9558650f205 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -45,6 +45,8 @@ module_param(napi_tx, bool, 0644);
>>   #define VIRTIO_XDP_TX        BIT(0)
>>   #define VIRTIO_XDP_REDIR    BIT(1)
>>   +#define VIRTNET_MAX_ZC_SEGS    8
>> +
>>   /* RX packet size EWMA. The average packet size is used to 
>> determine the packet
>>    * buffer size when refilling RX rings. As the entire RX ring may 
>> be refilled
>>    * at once, the weight is chosen so that the EWMA will be 
>> insensitive to short-
>> @@ -1232,65 +1234,53 @@ static void xsk_drop_follow_bufs(struct 
>> net_device *dev,
>>       }
>>   }
>>   -static int xsk_append_merge_buffer(struct virtnet_info *vi,
>> -                   struct receive_queue *rq,
>> -                   struct sk_buff *head_skb,
>> -                   u32 num_buf,
>> -                   struct virtio_net_hdr_mrg_rxbuf *hdr,
>> -                   struct virtnet_rq_stats *stats)
>> +static int virtnet_build_xsk_buff_mrg(struct virtnet_info *vi,
>> +                      struct receive_queue *rq,
>> +                      u32 num_buf,
>> +                      struct xdp_buff *xdp,
>> +                      struct virtnet_rq_stats *stats)
>>   {
>> -    struct sk_buff *curr_skb;
>> -    struct xdp_buff *xdp;
>> -    u32 len, truesize;
>> -    struct page *page;
>> +    unsigned int len;
>>       void *buf;
>>   -    curr_skb = head_skb;
>> +    if (num_buf < 2)
>> +        return 0;
>> +
>> +    while (num_buf > 1) {
>> +        struct xdp_buff *new_xdp;
>>   -    while (--num_buf) {
>>           buf = virtqueue_get_buf(rq->vq, &len);
>> -        if (unlikely(!buf)) {
>> -            pr_debug("%s: rx error: %d buffers out of %d missing\n",
>> -                 vi->dev->name, num_buf,
>> -                 virtio16_to_cpu(vi->vdev,
>> -                         hdr->num_buffers));
>> +        if (!unlikely(buf)) {
>
> if (unlikely(!buf)) { ?

Thanks, I'll fix this in the next version.

>
>> +            pr_debug("%s: rx error: %d buffers missing\n",
>> +                 vi->dev->name, num_buf);
>>               DEV_STATS_INC(vi->dev, rx_length_errors);
>
> Thanks,
> Alok