From nobody Thu Oct 9 10:42:43 2025 Received: from m16.mail.163.com (m16.mail.163.com [117.135.210.5]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 8441313E02D; Wed, 18 Jun 2025 03:06:09 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=117.135.210.5 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1750215973; cv=none; b=Mx78RvX1LguoORUwTBI5l3Ro/wIAQVl/miPpJBJN8tu79rRI2ofxDPU0PGc1h8xxTuc9DBChiQRrmKcFbqqJnYCmYZ+OGvFLwbzGnAlYULozJt8tKMdpAzSYQqf5k27+Mxl13+ATysiGYOWWNQUEq/lscVpInERFZyuDKKnjBI4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1750215973; c=relaxed/simple; bh=QL/4KRWb+Eb99LdlmW0FWVyAOQcTQchrPVSpQl3cLHI=; h=From:To:Cc:Subject:Date:Message-Id:MIME-Version; b=aj8Swl4cGLJCpVAq+GJXwZymPLsL+6dkgB9Rs//0rFOz8R4r02VME94f3j3bfk0C0550BnjjQ/3rxYM3qVczIafAUzG381ZDGPcVIafT7nejlh7rQgQ/vTzl05xLGTCp9mxhjVwtag18khnrwLa5bL1BQdOL3isn+0rjv9Zd/GI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=163.com; spf=pass smtp.mailfrom=163.com; dkim=pass (1024-bit key) header.d=163.com header.i=@163.com header.b=k+2Jkpvm; arc=none smtp.client-ip=117.135.210.5 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=163.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=163.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=163.com header.i=@163.com header.b="k+2Jkpvm" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=163.com; s=s110527; h=From:To:Subject:Date:Message-Id:MIME-Version; bh=9s xZpWGMDtvzQWDkps87/gtbTx/0wlDsgpbiAR5Zn1Q=; b=k+2Jkpvmbm7FNi4kyG rBZMceiZa1E7R1X9z2uyNcf7zjbnZtNn+r4R5CO7/Xo59EGmvKEMFVG0cLkY0pBj 93pvRlA5mXknfLqOJcsbtR/BYvvJD2fFQNTcMKVgO6cnb7V9pOQBx802/CJRXwfr lBzpf4qK6dWQs+U4xXh/yiR38= Received: from localhost.localdomain (unknown []) by gzsmtp1 (Coremail) with SMTP id PCgvCgDX_84DLVJo98pqAA--.16005S2; Wed, 18 Jun 2025 11:05:39 +0800 (CST) From: Feng Yang To: davem@davemloft.net, edumazet@google.com, kuba@kernel.org, pabeni@redhat.com, horms@kernel.org, willemb@google.com, almasrymina@google.com, kerneljasonxing@gmail.com, ebiggers@google.com, asml.silence@gmail.com, aleksander.lobakin@intel.com Cc: yangfeng@kylinos.cn, netdev@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [RFC PATCH net-next] skbuff: Improve the sending efficiency of __skb_send_sock Date: Wed, 18 Jun 2025 11:05:37 +0800 Message-Id: <20250618030537.28394-1-yangfeng59949@163.com> X-Mailer: git-send-email 2.25.1 Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-CM-TRANSID: PCgvCgDX_84DLVJo98pqAA--.16005S2 X-Coremail-Antispam: 1Uf129KBjvJXoWxAry8Cw4fZFWrAFW8XF4xJFb_yoWrtr4kpa 15W398Zr47Jr1q9r4kJrZ3Cr4ft3yvk3y5tF4fA395Ar90qryFgFWUGr1jkFWrKrZ7uFyU trs0vr1rGrn0va7anT9S1TB71UUUUU7qnTZGkaVYY2UrUUUUjbIjqfuFe4nvWSU5nxnvy2 9KBjDUYxBIdaVFxhVjvjDU0xZFpf9x07jbzV8UUUUU= X-CM-SenderInfo: p1dqww5hqjkmqzuzqiywtou0bp/1tbiThlweGhSKSe2kwAAsO Content-Type: text/plain; charset="utf-8" From: Feng Yang By aggregating skb data into a bvec array for transmission, when using sock= map to forward large packets, what previously required multiple transmissions now only needs a single tra= nsmission, which significantly enhances performance. For small packets, the performance remains comparable to the original level. Signed-off-by: Feng Yang --- net/core/skbuff.c | 110 ++++++++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 58 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 85fc82f72d26..19d78285a1c9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3235,82 +3235,75 @@ typedef int (*sendmsg_func)(struct sock *sk, struct= msghdr *msg); static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offse= t, int len, sendmsg_func sendmsg, int flags) { - unsigned int orig_len =3D len; struct sk_buff *head =3D skb; unsigned short fragidx; - int slen, ret; + struct msghdr msg; + struct bio_vec *bvec; + int max_vecs, ret; + int bvec_count =3D 0; + unsigned int copied =3D 0; + + max_vecs =3D skb_shinfo(skb)->nr_frags + 1; // +1 for linear data + if (skb_has_frag_list(skb)) { + struct sk_buff *frag_skb =3D skb_shinfo(skb)->frag_list; + + while (frag_skb) { + max_vecs +=3D skb_shinfo(frag_skb)->nr_frags + 1; // +1 for linear data + frag_skb =3D frag_skb->next; + } + } + + bvec =3D kcalloc(max_vecs, sizeof(struct bio_vec), GFP_KERNEL); + if (!bvec) + return -ENOMEM; + + memset(&msg, 0, sizeof(msg)); + msg.msg_flags =3D MSG_SPLICE_PAGES | MSG_DONTWAIT | flags; =20 do_frag_list: =20 /* Deal with head data */ - while (offset < skb_headlen(skb) && len) { - struct kvec kv; - struct msghdr msg; - - slen =3D min_t(int, len, skb_headlen(skb) - offset); - kv.iov_base =3D skb->data + offset; - kv.iov_len =3D slen; - memset(&msg, 0, sizeof(msg)); - msg.msg_flags =3D MSG_DONTWAIT | flags; + if (offset < skb_headlen(skb)) { + unsigned int copy_len =3D min(skb_headlen(skb) - offset, len - copied); + struct page *page =3D virt_to_page(skb->data + offset); + unsigned int page_offset =3D offset_in_page(skb->data + offset); =20 - iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen); - ret =3D INDIRECT_CALL_2(sendmsg, sendmsg_locked, - sendmsg_unlocked, sk, &msg); - if (ret <=3D 0) - goto error; + if (!sendpage_ok(page)) + msg.msg_flags &=3D ~MSG_SPLICE_PAGES; =20 - offset +=3D ret; - len -=3D ret; + bvec_set_page(&bvec[bvec_count++], page, copy_len, page_offset); + copied +=3D copy_len; + offset +=3D copy_len; } =20 - /* All the data was skb head? */ - if (!len) - goto out; - /* Make offset relative to start of frags */ offset -=3D skb_headlen(skb); =20 - /* Find where we are in frag list */ - for (fragidx =3D 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { - skb_frag_t *frag =3D &skb_shinfo(skb)->frags[fragidx]; + if (copied < len) { + for (fragidx =3D 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { + skb_frag_t *frag =3D &skb_shinfo(skb)->frags[fragidx]; + unsigned int frag_size =3D skb_frag_size(frag); =20 - if (offset < skb_frag_size(frag)) - break; - - offset -=3D skb_frag_size(frag); - } - - for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { - skb_frag_t *frag =3D &skb_shinfo(skb)->frags[fragidx]; - - slen =3D min_t(size_t, len, skb_frag_size(frag) - offset); + /* Find where we are in frag list */ + if (offset >=3D frag_size) { + offset -=3D frag_size; + continue; + } =20 - while (slen) { - struct bio_vec bvec; - struct msghdr msg =3D { - .msg_flags =3D MSG_SPLICE_PAGES | MSG_DONTWAIT | - flags, - }; + unsigned int copy_len =3D min(frag_size - offset, len - copied); =20 - bvec_set_page(&bvec, skb_frag_page(frag), slen, + bvec_set_page(&bvec[bvec_count++], skb_frag_page(frag), copy_len, skb_frag_off(frag) + offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, - slen); =20 - ret =3D INDIRECT_CALL_2(sendmsg, sendmsg_locked, - sendmsg_unlocked, sk, &msg); - if (ret <=3D 0) - goto error; + copied +=3D copy_len; + offset =3D 0; =20 - len -=3D ret; - offset +=3D ret; - slen -=3D ret; + if (copied >=3D len) + break; } - - offset =3D 0; } =20 - if (len) { + if (copied < len) { /* Process any frag lists */ =20 if (skb =3D=3D head) { @@ -3324,11 +3317,12 @@ static int __skb_send_sock(struct sock *sk, struct = sk_buff *skb, int offset, } } =20 -out: - return orig_len - len; + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bvec_count, len); + ret =3D INDIRECT_CALL_2(sendmsg, sendmsg_locked, sendmsg_unlocked, sk, &m= sg); + + kfree(bvec); =20 -error: - return orig_len =3D=3D len ? ret : orig_len - len; + return ret; } =20 /* Send skb data on a socket. Socket must be locked. */ --=20 2.43.0