Refactor some of the skb frag ref/unref helpers for improved clarity.
Implement napi_pp_get_page() to be the mirror counterpart of
napi_pp_put_page().
Implement skb_page_ref() to be the mirror of skb_page_unref().
Improve __skb_frag_ref() to become a mirror counterpart of
__skb_frag_unref(). Previously unref could handle pp & non-pp pages,
while the ref could only handle non-pp pages. Now both the ref & unref
helpers can correctly handle both pp & non-pp pages.
Now that __skb_frag_ref() can handle both pp & non-pp pages, remove
skb_pp_frag_ref(), and use __skb_frag_ref() instead. This lets us
remove pp specific handling from skb_try_coalesce.
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
---
.../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 2 +-
drivers/net/ethernet/sun/cassini.c | 4 +-
include/linux/skbuff.h | 22 ++++++--
net/core/skbuff.c | 54 ++++++-------------
4 files changed, 38 insertions(+), 44 deletions(-)
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
index 6482728794dd..f9b0a9533985 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -1658,7 +1658,7 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb,
for (i = 0; i < record->num_frags; i++) {
skb_shinfo(nskb)->frags[i] = record->frags[i];
/* increase the frag ref count */
- __skb_frag_ref(&skb_shinfo(nskb)->frags[i]);
+ __skb_frag_ref(&skb_shinfo(nskb)->frags[i], false);
}
skb_shinfo(nskb)->nr_frags = record->num_frags;
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index bfb903506367..fabba729e1b8 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -1999,7 +1999,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
skb->len += hlen - swivel;
skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel);
- __skb_frag_ref(frag);
+ __skb_frag_ref(frag, false);
/* any more data? */
if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) {
@@ -2023,7 +2023,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
frag++;
skb_frag_fill_page_desc(frag, page->buffer, 0, hlen);
- __skb_frag_ref(frag);
+ __skb_frag_ref(frag, false);
RX_USED_ADD(page, hlen + cp->crc_size);
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bafa5c9ff59a..058d72a2a250 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3494,15 +3494,29 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
return netmem_to_page(frag->netmem);
}
+bool napi_pp_get_page(struct page *page);
+
+static inline void skb_page_ref(struct page *page, bool recycle)
+{
+#ifdef CONFIG_PAGE_POOL
+ if (recycle && napi_pp_get_page(page))
+ return;
+#endif
+ get_page(page);
+}
+
/**
* __skb_frag_ref - take an addition reference on a paged fragment.
* @frag: the paged fragment
+ * @recycle: skb->pp_recycle param of the parent skb. False if no parent skb.
*
- * Takes an additional reference on the paged fragment @frag.
+ * Takes an additional reference on the paged fragment @frag. Obtains the
+ * correct reference count depending on whether skb->pp_recycle is set and
+ * whether the frag is a page pool frag.
*/
-static inline void __skb_frag_ref(skb_frag_t *frag)
+static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle)
{
- get_page(skb_frag_page(frag));
+ skb_page_ref(skb_frag_page(frag), recycle);
}
/**
@@ -3514,7 +3528,7 @@ static inline void __skb_frag_ref(skb_frag_t *frag)
*/
static inline void skb_frag_ref(struct sk_buff *skb, int f)
{
- __skb_frag_ref(&skb_shinfo(skb)->frags[f]);
+ __skb_frag_ref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
}
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 17617c29be2d..5c86ecaceb6c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1005,6 +1005,19 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
EXPORT_SYMBOL(skb_cow_data_for_xdp);
#if IS_ENABLED(CONFIG_PAGE_POOL)
+bool napi_pp_get_page(struct page *page)
+{
+
+ page = compound_head(page);
+
+ if (!is_pp_page(page))
+ return false;
+
+ page_pool_ref_page(head_page);
+ return true;
+}
+EXPORT_SYMBOL(napi_pp_get_page);
+
bool napi_pp_put_page(struct page *page, bool napi_safe)
{
bool allow_direct = false;
@@ -1057,37 +1070,6 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
return napi_pp_put_page(virt_to_page(data), napi_safe);
}
-/**
- * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb
- * @skb: page pool aware skb
- *
- * Increase the fragment reference count (pp_ref_count) of a skb. This is
- * intended to gain fragment references only for page pool aware skbs,
- * i.e. when skb->pp_recycle is true, and not for fragments in a
- * non-pp-recycling skb. It has a fallback to increase references on normal
- * pages, as page pool aware skbs may also have normal page fragments.
- */
-static int skb_pp_frag_ref(struct sk_buff *skb)
-{
- struct skb_shared_info *shinfo;
- struct page *head_page;
- int i;
-
- if (!skb->pp_recycle)
- return -EINVAL;
-
- shinfo = skb_shinfo(skb);
-
- for (i = 0; i < shinfo->nr_frags; i++) {
- head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
- if (likely(is_pp_page(head_page)))
- page_pool_ref_page(head_page);
- else
- page_ref_inc(head_page);
- }
- return 0;
-}
-
static void skb_kfree_head(void *head, unsigned int end_offset)
{
if (end_offset == SKB_SMALL_HEAD_HEADROOM)
@@ -4196,7 +4178,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
to++;
} else {
- __skb_frag_ref(fragfrom);
+ __skb_frag_ref(fragfrom, skb->pp_recycle);
skb_frag_page_copy(fragto, fragfrom);
skb_frag_off_copy(fragto, fragfrom);
skb_frag_size_set(fragto, todo);
@@ -4846,7 +4828,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
}
*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
- __skb_frag_ref(nskb_frag);
+ __skb_frag_ref(nskb_frag, nskb->pp_recycle);
size = skb_frag_size(nskb_frag);
if (pos < offset) {
@@ -5977,10 +5959,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
/* if the skb is not cloned this does nothing
* since we set nr_frags to 0.
*/
- if (skb_pp_frag_ref(from)) {
- for (i = 0; i < from_shinfo->nr_frags; i++)
- __skb_frag_ref(&from_shinfo->frags[i]);
- }
+ for (i = 0; i < from_shinfo->nr_frags; i++)
+ __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle);
to->truesize += delta;
to->len += len;
--
2.44.0.396.g6e790dbe36-goog
Hi Mina,
kernel test robot noticed the following build errors:
[auto build test ERROR on net-next/main]
url: https://github.com/intel-lab-lkp/linux/commits/Mina-Almasry/net-make-napi_frag_unref-reuse-skb_page_unref/20240328-054816
base: net-next/main
patch link: https://lore.kernel.org/r/20240327214523.2182174-3-almasrymina%40google.com
patch subject: [PATCH net-next v2 2/3] net: mirror skb frag ref/unref helpers
config: riscv-defconfig (https://download.01.org/0day-ci/archive/20240329/202403290103.BHENUIYW-lkp@intel.com/config)
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project 23de3862dce582ce91c1aa914467d982cb1a73b4)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240329/202403290103.BHENUIYW-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202403290103.BHENUIYW-lkp@intel.com/
All errors (new ones prefixed by >>):
In file included from net/core/skbuff.c:40:
In file included from include/linux/mm.h:2208:
include/linux/vmstat.h:522:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
522 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
| ~~~~~~~~~~~ ^ ~~~
>> net/core/skbuff.c:1016:21: error: use of undeclared identifier 'head_page'
1016 | page_pool_ref_page(head_page);
| ^
1 warning and 1 error generated.
vim +/head_page +1016 net/core/skbuff.c
1010
1011 page = compound_head(page);
1012
1013 if (!is_pp_page(page))
1014 return false;
1015
> 1016 page_pool_ref_page(head_page);
1017 return true;
1018 }
1019 EXPORT_SYMBOL(napi_pp_get_page);
1020
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Mina,
kernel test robot noticed the following build errors:
[auto build test ERROR on net-next/main]
url: https://github.com/intel-lab-lkp/linux/commits/Mina-Almasry/net-make-napi_frag_unref-reuse-skb_page_unref/20240328-054816
base: net-next/main
patch link: https://lore.kernel.org/r/20240327214523.2182174-3-almasrymina%40google.com
patch subject: [PATCH net-next v2 2/3] net: mirror skb frag ref/unref helpers
config: x86_64-rhel-8.3-rust (https://download.01.org/0day-ci/archive/20240329/202403290006.WfusvToB-lkp@intel.com/config)
compiler: clang version 17.0.6 (https://github.com/llvm/llvm-project 6009708b4367171ccdbf4b5905cb6a803753fe18)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240329/202403290006.WfusvToB-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202403290006.WfusvToB-lkp@intel.com/
All errors (new ones prefixed by >>):
>> net/tls/tls_device_fallback.c:280:22: error: too few arguments to function call, expected 2, have 1
280 | __skb_frag_ref(frag);
| ~~~~~~~~~~~~~~ ^
include/linux/skbuff.h:3517:20: note: '__skb_frag_ref' declared here
3517 | static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle)
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1 error generated.
vim +280 net/tls/tls_device_fallback.c
e8f69799810c32 Ilya Lesokhin 2018-04-30 228
e8f69799810c32 Ilya Lesokhin 2018-04-30 229 /* This function may be called after the user socket is already
e8f69799810c32 Ilya Lesokhin 2018-04-30 230 * closed so make sure we don't use anything freed during
e8f69799810c32 Ilya Lesokhin 2018-04-30 231 * tls_sk_proto_close here
e8f69799810c32 Ilya Lesokhin 2018-04-30 232 */
e8f69799810c32 Ilya Lesokhin 2018-04-30 233
e8f69799810c32 Ilya Lesokhin 2018-04-30 234 static int fill_sg_in(struct scatterlist *sg_in,
e8f69799810c32 Ilya Lesokhin 2018-04-30 235 struct sk_buff *skb,
d80a1b9d186057 Boris Pismenny 2018-07-13 236 struct tls_offload_context_tx *ctx,
e8f69799810c32 Ilya Lesokhin 2018-04-30 237 u64 *rcd_sn,
e8f69799810c32 Ilya Lesokhin 2018-04-30 238 s32 *sync_size,
e8f69799810c32 Ilya Lesokhin 2018-04-30 239 int *resync_sgs)
e8f69799810c32 Ilya Lesokhin 2018-04-30 240 {
504148fedb8542 Eric Dumazet 2022-06-30 241 int tcp_payload_offset = skb_tcp_all_headers(skb);
e8f69799810c32 Ilya Lesokhin 2018-04-30 242 int payload_len = skb->len - tcp_payload_offset;
e8f69799810c32 Ilya Lesokhin 2018-04-30 243 u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
e8f69799810c32 Ilya Lesokhin 2018-04-30 244 struct tls_record_info *record;
e8f69799810c32 Ilya Lesokhin 2018-04-30 245 unsigned long flags;
e8f69799810c32 Ilya Lesokhin 2018-04-30 246 int remaining;
e8f69799810c32 Ilya Lesokhin 2018-04-30 247 int i;
e8f69799810c32 Ilya Lesokhin 2018-04-30 248
e8f69799810c32 Ilya Lesokhin 2018-04-30 249 spin_lock_irqsave(&ctx->lock, flags);
e8f69799810c32 Ilya Lesokhin 2018-04-30 250 record = tls_get_record(ctx, tcp_seq, rcd_sn);
e8f69799810c32 Ilya Lesokhin 2018-04-30 251 if (!record) {
e8f69799810c32 Ilya Lesokhin 2018-04-30 252 spin_unlock_irqrestore(&ctx->lock, flags);
e8f69799810c32 Ilya Lesokhin 2018-04-30 253 return -EINVAL;
e8f69799810c32 Ilya Lesokhin 2018-04-30 254 }
e8f69799810c32 Ilya Lesokhin 2018-04-30 255
e8f69799810c32 Ilya Lesokhin 2018-04-30 256 *sync_size = tcp_seq - tls_record_start_seq(record);
e8f69799810c32 Ilya Lesokhin 2018-04-30 257 if (*sync_size < 0) {
e8f69799810c32 Ilya Lesokhin 2018-04-30 258 int is_start_marker = tls_record_is_start_marker(record);
e8f69799810c32 Ilya Lesokhin 2018-04-30 259
e8f69799810c32 Ilya Lesokhin 2018-04-30 260 spin_unlock_irqrestore(&ctx->lock, flags);
e8f69799810c32 Ilya Lesokhin 2018-04-30 261 /* This should only occur if the relevant record was
e8f69799810c32 Ilya Lesokhin 2018-04-30 262 * already acked. In that case it should be ok
e8f69799810c32 Ilya Lesokhin 2018-04-30 263 * to drop the packet and avoid retransmission.
e8f69799810c32 Ilya Lesokhin 2018-04-30 264 *
e8f69799810c32 Ilya Lesokhin 2018-04-30 265 * There is a corner case where the packet contains
e8f69799810c32 Ilya Lesokhin 2018-04-30 266 * both an acked and a non-acked record.
e8f69799810c32 Ilya Lesokhin 2018-04-30 267 * We currently don't handle that case and rely
a0e128ef88e4a0 Yueh-Shun Li 2023-06-22 268 * on TCP to retransmit a packet that doesn't contain
e8f69799810c32 Ilya Lesokhin 2018-04-30 269 * already acked payload.
e8f69799810c32 Ilya Lesokhin 2018-04-30 270 */
e8f69799810c32 Ilya Lesokhin 2018-04-30 271 if (!is_start_marker)
e8f69799810c32 Ilya Lesokhin 2018-04-30 272 *sync_size = 0;
e8f69799810c32 Ilya Lesokhin 2018-04-30 273 return -EINVAL;
e8f69799810c32 Ilya Lesokhin 2018-04-30 274 }
e8f69799810c32 Ilya Lesokhin 2018-04-30 275
e8f69799810c32 Ilya Lesokhin 2018-04-30 276 remaining = *sync_size;
e8f69799810c32 Ilya Lesokhin 2018-04-30 277 for (i = 0; remaining > 0; i++) {
e8f69799810c32 Ilya Lesokhin 2018-04-30 278 skb_frag_t *frag = &record->frags[i];
e8f69799810c32 Ilya Lesokhin 2018-04-30 279
e8f69799810c32 Ilya Lesokhin 2018-04-30 @280 __skb_frag_ref(frag);
e8f69799810c32 Ilya Lesokhin 2018-04-30 281 sg_set_page(sg_in + i, skb_frag_page(frag),
b54c9d5bd6e38e Jonathan Lemon 2019-07-30 282 skb_frag_size(frag), skb_frag_off(frag));
e8f69799810c32 Ilya Lesokhin 2018-04-30 283
e8f69799810c32 Ilya Lesokhin 2018-04-30 284 remaining -= skb_frag_size(frag);
e8f69799810c32 Ilya Lesokhin 2018-04-30 285
e8f69799810c32 Ilya Lesokhin 2018-04-30 286 if (remaining < 0)
e8f69799810c32 Ilya Lesokhin 2018-04-30 287 sg_in[i].length += remaining;
e8f69799810c32 Ilya Lesokhin 2018-04-30 288 }
e8f69799810c32 Ilya Lesokhin 2018-04-30 289 *resync_sgs = i;
e8f69799810c32 Ilya Lesokhin 2018-04-30 290
e8f69799810c32 Ilya Lesokhin 2018-04-30 291 spin_unlock_irqrestore(&ctx->lock, flags);
e8f69799810c32 Ilya Lesokhin 2018-04-30 292 if (skb_to_sgvec(skb, &sg_in[i], tcp_payload_offset, payload_len) < 0)
e8f69799810c32 Ilya Lesokhin 2018-04-30 293 return -EINVAL;
e8f69799810c32 Ilya Lesokhin 2018-04-30 294
e8f69799810c32 Ilya Lesokhin 2018-04-30 295 return 0;
e8f69799810c32 Ilya Lesokhin 2018-04-30 296 }
e8f69799810c32 Ilya Lesokhin 2018-04-30 297
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2026 Red Hat, Inc.