The code piece which would attach a frag to &xdp_buff is almost
identical across the drivers supporting XDP multi-buffer on Rx.
Make it a generic elegant onelner.
Also, I see lots of drivers calculating frags_truesize as
`xdp->frame_sz * nr_frags`. I can't say this is fully correct, since
frags might be backed by chunks of different sizes, especially with
stuff like the header split. Even page_pool_alloc() can give you two
different truesizes on two subsequent requests to allocate the same
buffer size. Add a field to &skb_shared_info (unionized as there's no
free slot currently on x6_64) to track the "true" truesize. It can be
used later when updating an skb.
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
---
include/linux/skbuff.h | 16 ++++++--
include/net/xdp.h | 90 +++++++++++++++++++++++++++++++++++++++++-
2 files changed, 101 insertions(+), 5 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c867df5b1051..6ec78c1598fe 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -607,11 +607,19 @@ struct skb_shared_info {
* Warning : all fields before dataref are cleared in __alloc_skb()
*/
atomic_t dataref;
- unsigned int xdp_frags_size;
- /* Intermediate layers must ensure that destructor_arg
- * remains valid until skb destructor */
- void * destructor_arg;
+ union {
+ struct {
+ u32 xdp_frags_size;
+ u32 xdp_frags_truesize;
+ };
+
+ /*
+ * Intermediate layers must ensure that destructor_arg
+ * remains valid until skb destructor.
+ */
+ void *destructor_arg;
+ };
/* must be last field, see pskb_expand_head() */
skb_frag_t frags[MAX_SKB_FRAGS];
diff --git a/include/net/xdp.h b/include/net/xdp.h
index c4b408d22669..19d2b283b845 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -167,6 +167,88 @@ xdp_get_buff_len(const struct xdp_buff *xdp)
return len;
}
+/**
+ * __xdp_buff_add_frag - attach a frag to an &xdp_buff
+ * @xdp: XDP buffer to attach the frag to
+ * @page: page containing the frag
+ * @offset: page offset at which the frag starts
+ * @size: size of the frag
+ * @truesize: truesize (page / page frag size) of the frag
+ * @try_coalesce: whether to try coalescing the frags
+ *
+ * Attach a frag to an XDP buffer. If it currently has no frags attached,
+ * initialize the related fields, otherwise check that the frag number
+ * didn't reach the limit of ``MAX_SKB_FRAGS``. If possible, try coalescing
+ * the frag with the previous one.
+ * The function doesn't check/update the pfmemalloc bit. Please use the
+ * non-underscored wrapper in drivers.
+ *
+ * Return: true on success, false if there's no space for the frag in
+ * the shared info struct.
+ */
+static inline bool __xdp_buff_add_frag(struct xdp_buff *xdp, struct page *page,
+ u32 offset, u32 size, u32 truesize,
+ bool try_coalesce)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ skb_frag_t *prev;
+ u32 nr_frags;
+
+ if (!xdp_buff_has_frags(xdp)) {
+ xdp_buff_set_frags_flag(xdp);
+
+ nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ sinfo->xdp_frags_truesize = 0;
+
+ goto fill;
+ }
+
+ nr_frags = sinfo->nr_frags;
+ if (unlikely(nr_frags == MAX_SKB_FRAGS))
+ return false;
+
+ prev = &sinfo->frags[nr_frags - 1];
+ if (try_coalesce && page == skb_frag_page(prev) &&
+ offset == skb_frag_off(prev) + skb_frag_size(prev))
+ skb_frag_size_add(prev, size);
+ else
+fill:
+ __skb_fill_page_desc_noacc(sinfo, nr_frags++, page,
+ offset, size);
+
+ sinfo->nr_frags = nr_frags;
+ sinfo->xdp_frags_size += size;
+ sinfo->xdp_frags_truesize += truesize;
+
+ return true;
+}
+
+/**
+ * xdp_buff_add_frag - attach a frag to an &xdp_buff
+ * @xdp: XDP buffer to attach the frag to
+ * @page: page containing the frag
+ * @offset: page offset at which the frag starts
+ * @size: size of the frag
+ * @truesize: truesize (page / page frag size) of the frag
+ *
+ * Version of __xdp_buff_add_frag() which takes care of the pfmemalloc bit.
+ *
+ * Return: true on success, false if there's no space for the frag in
+ * the shared info struct.
+ */
+static inline bool xdp_buff_add_frag(struct xdp_buff *xdp, struct page *page,
+ u32 offset, u32 size, u32 truesize)
+{
+ if (!__xdp_buff_add_frag(xdp, page, offset, size, truesize, true))
+ return false;
+
+ if (unlikely(page_is_pfmemalloc(page)))
+ xdp_buff_set_frag_pfmemalloc(xdp);
+
+ return true;
+}
+
struct xdp_frame {
void *data;
u32 len;
@@ -230,7 +312,13 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
unsigned int size, unsigned int truesize,
bool pfmemalloc)
{
- skb_shinfo(skb)->nr_frags = nr_frags;
+ struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+ sinfo->nr_frags = nr_frags;
+ /* ``destructor_arg`` is unionized with ``xdp_frags_{,true}size``,
+ * reset it after that these fields aren't used anymore.
+ */
+ sinfo->destructor_arg = NULL;
skb->len += size;
skb->data_len += size;
--
2.46.2
On Tue, Oct 15, 2024 at 04:53:43PM +0200, Alexander Lobakin wrote: > The code piece which would attach a frag to &xdp_buff is almost > identical across the drivers supporting XDP multi-buffer on Rx. > Make it a generic elegant onelner. oneliner > Also, I see lots of drivers calculating frags_truesize as > `xdp->frame_sz * nr_frags`. I can't say this is fully correct, since > frags might be backed by chunks of different sizes, especially with > stuff like the header split. Even page_pool_alloc() can give you two > different truesizes on two subsequent requests to allocate the same > buffer size. Add a field to &skb_shared_info (unionized as there's no > free slot currently on x6_64) to track the "true" truesize. It can be x86_64 > used later when updating an skb. I also agree that xdp->frame_sz * nr_frags for truesize might be an over-assumption. Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> two small nits/questions below that might be ignored. > > Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> > --- > include/linux/skbuff.h | 16 ++++++-- > include/net/xdp.h | 90 +++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 101 insertions(+), 5 deletions(-) > > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h > index c867df5b1051..6ec78c1598fe 100644 > --- a/include/linux/skbuff.h > +++ b/include/linux/skbuff.h > @@ -607,11 +607,19 @@ struct skb_shared_info { > * Warning : all fields before dataref are cleared in __alloc_skb() > */ > atomic_t dataref; > - unsigned int xdp_frags_size; > > - /* Intermediate layers must ensure that destructor_arg > - * remains valid until skb destructor */ > - void * destructor_arg; > + union { > + struct { > + u32 xdp_frags_size; > + u32 xdp_frags_truesize; > + }; > + > + /* > + * Intermediate layers must ensure that destructor_arg > + * remains valid until skb destructor. > + */ > + void *destructor_arg; > + }; > > /* must be last field, see pskb_expand_head() */ > skb_frag_t frags[MAX_SKB_FRAGS]; > diff --git a/include/net/xdp.h b/include/net/xdp.h > index c4b408d22669..19d2b283b845 100644 > --- a/include/net/xdp.h > +++ b/include/net/xdp.h > @@ -167,6 +167,88 @@ xdp_get_buff_len(const struct xdp_buff *xdp) > return len; > } > > +/** > + * __xdp_buff_add_frag - attach a frag to an &xdp_buff > + * @xdp: XDP buffer to attach the frag to > + * @page: page containing the frag > + * @offset: page offset at which the frag starts > + * @size: size of the frag > + * @truesize: truesize (page / page frag size) of the frag > + * @try_coalesce: whether to try coalescing the frags > + * > + * Attach a frag to an XDP buffer. If it currently has no frags attached, > + * initialize the related fields, otherwise check that the frag number > + * didn't reach the limit of ``MAX_SKB_FRAGS``. If possible, try coalescing > + * the frag with the previous one. > + * The function doesn't check/update the pfmemalloc bit. Please use the > + * non-underscored wrapper in drivers. > + * > + * Return: true on success, false if there's no space for the frag in > + * the shared info struct. > + */ > +static inline bool __xdp_buff_add_frag(struct xdp_buff *xdp, struct page *page, > + u32 offset, u32 size, u32 truesize, > + bool try_coalesce) > +{ > + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); > + skb_frag_t *prev; > + u32 nr_frags; > + > + if (!xdp_buff_has_frags(xdp)) { > + xdp_buff_set_frags_flag(xdp); > + > + nr_frags = 0; > + sinfo->xdp_frags_size = 0; > + sinfo->xdp_frags_truesize = 0; > + > + goto fill; > + } > + > + nr_frags = sinfo->nr_frags; > + if (unlikely(nr_frags == MAX_SKB_FRAGS)) > + return false; > + > + prev = &sinfo->frags[nr_frags - 1]; > + if (try_coalesce && page == skb_frag_page(prev) && > + offset == skb_frag_off(prev) + skb_frag_size(prev)) > + skb_frag_size_add(prev, size); > + else > +fill: > + __skb_fill_page_desc_noacc(sinfo, nr_frags++, page, > + offset, size); > + > + sinfo->nr_frags = nr_frags; is it really necessary to work on local nr_frags instead of directly update it from sinfo? > + sinfo->xdp_frags_size += size; > + sinfo->xdp_frags_truesize += truesize; > + > + return true; > +} > + > +/** > + * xdp_buff_add_frag - attach a frag to an &xdp_buff > + * @xdp: XDP buffer to attach the frag to > + * @page: page containing the frag > + * @offset: page offset at which the frag starts > + * @size: size of the frag > + * @truesize: truesize (page / page frag size) of the frag > + * > + * Version of __xdp_buff_add_frag() which takes care of the pfmemalloc bit. > + * > + * Return: true on success, false if there's no space for the frag in > + * the shared info struct. > + */ > +static inline bool xdp_buff_add_frag(struct xdp_buff *xdp, struct page *page, > + u32 offset, u32 size, u32 truesize) > +{ > + if (!__xdp_buff_add_frag(xdp, page, offset, size, truesize, true)) > + return false; > + > + if (unlikely(page_is_pfmemalloc(page))) > + xdp_buff_set_frag_pfmemalloc(xdp); > + > + return true; > +} > + > struct xdp_frame { > void *data; > u32 len; > @@ -230,7 +312,13 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, > unsigned int size, unsigned int truesize, > bool pfmemalloc) > { > - skb_shinfo(skb)->nr_frags = nr_frags; > + struct skb_shared_info *sinfo = skb_shinfo(skb); > + > + sinfo->nr_frags = nr_frags; > + /* ``destructor_arg`` is unionized with ``xdp_frags_{,true}size``, > + * reset it after that these fields aren't used anymore. > + */ > + sinfo->destructor_arg = NULL; wouldn't clearing size and truesize from union be more obvious? OTOH it's one write vs two :) > > skb->len += size; > skb->data_len += size; > -- > 2.46.2 >
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Date: Thu, 17 Oct 2024 14:26:48 +0200 > On Tue, Oct 15, 2024 at 04:53:43PM +0200, Alexander Lobakin wrote: >> The code piece which would attach a frag to &xdp_buff is almost >> identical across the drivers supporting XDP multi-buffer on Rx. >> Make it a generic elegant onelner. > > oneliner > >> Also, I see lots of drivers calculating frags_truesize as >> `xdp->frame_sz * nr_frags`. I can't say this is fully correct, since >> frags might be backed by chunks of different sizes, especially with >> stuff like the header split. Even page_pool_alloc() can give you two >> different truesizes on two subsequent requests to allocate the same >> buffer size. Add a field to &skb_shared_info (unionized as there's no >> free slot currently on x6_64) to track the "true" truesize. It can be > > x86_64 What a shame from these two typos >_< > >> used later when updating an skb. [...] >> + >> + prev = &sinfo->frags[nr_frags - 1]; >> + if (try_coalesce && page == skb_frag_page(prev) && >> + offset == skb_frag_off(prev) + skb_frag_size(prev)) >> + skb_frag_size_add(prev, size); >> + else >> +fill: >> + __skb_fill_page_desc_noacc(sinfo, nr_frags++, page, >> + offset, size); >> + >> + sinfo->nr_frags = nr_frags; > > is it really necessary to work on local nr_frags instead of directly > update it from sinfo? I think you remember the difference when you started to work on ntu and ntc locally instead of accessing the ring struct all the time? :> > >> + sinfo->xdp_frags_size += size; >> + sinfo->xdp_frags_truesize += truesize; >> + >> + return true; >> +} [...] >> @@ -230,7 +312,13 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, >> unsigned int size, unsigned int truesize, >> bool pfmemalloc) >> { >> - skb_shinfo(skb)->nr_frags = nr_frags; >> + struct skb_shared_info *sinfo = skb_shinfo(skb); >> + >> + sinfo->nr_frags = nr_frags; >> + /* ``destructor_arg`` is unionized with ``xdp_frags_{,true}size``, >> + * reset it after that these fields aren't used anymore. >> + */ >> + sinfo->destructor_arg = NULL; > > wouldn't clearing size and truesize from union be more obvious? But here we actually need to reset the destructor arg pointer. size/truesize are not needed at this point anymore, but the arg can be used/tested later, so I thought clearing it here is more clear to the readers? > OTOH it's one write vs two :) Sometimes the compiler can optimize two subsequent writes (e.g. to addr and addr + 4) into one bigger, but I wouldn't rely on it (that's why in patch #18 I intensively use casts to u64). > >> >> skb->len += size; >> skb->data_len += size; >> -- >> 2.46.2 Thanks, Olek
On Mon, Oct 21, 2024 at 04:10:30PM +0200, Alexander Lobakin wrote: > From: Maciej Fijalkowski <maciej.fijalkowski@intel.com> > Date: Thu, 17 Oct 2024 14:26:48 +0200 > > > On Tue, Oct 15, 2024 at 04:53:43PM +0200, Alexander Lobakin wrote: > >> The code piece which would attach a frag to &xdp_buff is almost > >> identical across the drivers supporting XDP multi-buffer on Rx. > >> Make it a generic elegant onelner. > > > > oneliner > > > >> Also, I see lots of drivers calculating frags_truesize as > >> `xdp->frame_sz * nr_frags`. I can't say this is fully correct, since > >> frags might be backed by chunks of different sizes, especially with > >> stuff like the header split. Even page_pool_alloc() can give you two > >> different truesizes on two subsequent requests to allocate the same > >> buffer size. Add a field to &skb_shared_info (unionized as there's no > >> free slot currently on x6_64) to track the "true" truesize. It can be > > > > x86_64 > > What a shame from these two typos >_< > > > > >> used later when updating an skb. > > [...] > > >> + > >> + prev = &sinfo->frags[nr_frags - 1]; > >> + if (try_coalesce && page == skb_frag_page(prev) && > >> + offset == skb_frag_off(prev) + skb_frag_size(prev)) > >> + skb_frag_size_add(prev, size); > >> + else > >> +fill: > >> + __skb_fill_page_desc_noacc(sinfo, nr_frags++, page, > >> + offset, size); > >> + > >> + sinfo->nr_frags = nr_frags; > > > > is it really necessary to work on local nr_frags instead of directly > > update it from sinfo? > > I think you remember the difference when you started to work on ntu and > ntc locally instead of accessing the ring struct all the time? :> Right, although impact there was a bit bigger. Typos are minor, so: Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> unless others have some opinion on union being introduced here. > > > > >> + sinfo->xdp_frags_size += size; > >> + sinfo->xdp_frags_truesize += truesize; > >> + > >> + return true; > >> +} > > [...] > > >> @@ -230,7 +312,13 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, > >> unsigned int size, unsigned int truesize, > >> bool pfmemalloc) > >> { > >> - skb_shinfo(skb)->nr_frags = nr_frags; > >> + struct skb_shared_info *sinfo = skb_shinfo(skb); > >> + > >> + sinfo->nr_frags = nr_frags; > >> + /* ``destructor_arg`` is unionized with ``xdp_frags_{,true}size``, > >> + * reset it after that these fields aren't used anymore. > >> + */ > >> + sinfo->destructor_arg = NULL; > > > > wouldn't clearing size and truesize from union be more obvious? > > But here we actually need to reset the destructor arg pointer. > size/truesize are not needed at this point anymore, but the arg can be > used/tested later, so I thought clearing it here is more clear to the > readers? > > > OTOH it's one write vs two :) > > Sometimes the compiler can optimize two subsequent writes (e.g. to addr > and addr + 4) into one bigger, but I wouldn't rely on it (that's why in > patch #18 I intensively use casts to u64). > > > > >> > >> skb->len += size; > >> skb->data_len += size; > >> -- > >> 2.46.2 > > Thanks, > Olek
© 2016 - 2024 Red Hat, Inc.