[v1] net: pass receive socket to drop tracepoint

[RFC net-next 1/6] net: add kfree_skb_for_sk function

Posted by Yan Zhai 1 year, 8 months ago

Implement a new kfree_skb_for_sk to replace kfree_skb_reason on a few
local receive path. The function accepts an extra receiving socket
argument, which will be set in skb->cb for kfree_skb/consume_skb
tracepoint consumption. With this extra bit of information, it will be
easier to attribute dropped packets to netns/containers and
sockets/services for performance and error monitoring purpose.

Signed-off-by: Yan Zhai <yan@cloudflare.com>
---
 include/linux/skbuff.h | 48 ++++++++++++++++++++++++++++++++++++++++--
 net/core/dev.c         | 21 +++++++-----------
 net/core/skbuff.c      | 29 +++++++++++++------------
 3 files changed, 70 insertions(+), 28 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fe7d8dbef77e..66f5b06798f2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1251,8 +1251,52 @@ static inline bool skb_data_unref(const struct sk_buff *skb,
 	return true;
 }
 
-void __fix_address
-kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
+/*
+ * Some protocol will clear or reuse skb->dev field for other purposes.  For
+ * example, TCP stack would reuse the pointer for out of order packet handling.
+ * This caused some problem for drop monitoring on kfree_skb tracepoint, since
+ * no other fields of an skb provides netns information.  In addition, it is
+ * also complicated to recover receive socket information for dropped packets,
+ * because the socket lookup can be an sk-lookup BPF program.
+ *
+ * This can be addressed by just passing the rx socket to the tracepoint,
+ * because it also has valid netns binding.
+ */
+struct kfree_skb_cb {
+	enum skb_drop_reason reason; /* used only by dev_kfree_skb_irq */
+	struct sock *rx_sk;
+};
+
+#define KFREE_SKB_CB(skb) ((struct kfree_skb_cb *)(skb)->cb)
+
+/* Save cb->rx_sk before calling kfree_skb/consume_skb tracepoint, and restore
+ * after the tracepoint. This is necessary because some skb destructor might
+ * rely on values in skb->cb, e.g. unix_destruct_scm.
+ */
+#define _call_trace_kfree_skb(action, skb, sk, ...)	do {	\
+	if (trace_##action##_skb_enabled()) {			\
+		struct kfree_skb_cb saved;			\
+		saved.rx_sk = KFREE_SKB_CB(skb)->rx_sk;		\
+		KFREE_SKB_CB(skb)->rx_sk = sk;			\
+		trace_##action##_skb((skb), ## __VA_ARGS__);	\
+		KFREE_SKB_CB(skb)->rx_sk = saved.rx_sk;		\
+	}							\
+} while (0)
+
+#define call_trace_kfree_skb(skb, sk, ...) \
+	_call_trace_kfree_skb(kfree, skb, sk, ## __VA_ARGS__)
+
+#define call_trace_consume_skb(skb, sk, ...) \
+	_call_trace_kfree_skb(consume, skb, sk, ## __VA_ARGS__)
+
+void __fix_address kfree_skb_for_sk(struct sk_buff *skb, struct sock *rx_sk,
+				    enum skb_drop_reason reason);
+
+static inline void
+kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+{
+	kfree_skb_for_sk(skb, NULL, reason);
+}
 
 /**
  *	kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason
diff --git a/net/core/dev.c b/net/core/dev.c
index 85fe8138f3e4..17516f26be92 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3135,15 +3135,6 @@ void __netif_schedule(struct Qdisc *q)
 }
 EXPORT_SYMBOL(__netif_schedule);
 
-struct dev_kfree_skb_cb {
-	enum skb_drop_reason reason;
-};
-
-static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
-{
-	return (struct dev_kfree_skb_cb *)skb->cb;
-}
-
 void netif_schedule_queue(struct netdev_queue *txq)
 {
 	rcu_read_lock();
@@ -3182,7 +3173,11 @@ void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason)
 	} else if (likely(!refcount_dec_and_test(&skb->users))) {
 		return;
 	}
-	get_kfree_skb_cb(skb)->reason = reason;
+
+	/* There is no need to save the old cb since we are the only user. */
+	KFREE_SKB_CB(skb)->reason = reason;
+	KFREE_SKB_CB(skb)->rx_sk = NULL;
+
 	local_irq_save(flags);
 	skb->next = __this_cpu_read(softnet_data.completion_queue);
 	__this_cpu_write(softnet_data.completion_queue, skb);
@@ -5229,17 +5224,17 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
 			clist = clist->next;
 
 			WARN_ON(refcount_read(&skb->users));
-			if (likely(get_kfree_skb_cb(skb)->reason == SKB_CONSUMED))
+			if (likely(KFREE_SKB_CB(skb)->reason == SKB_CONSUMED))
 				trace_consume_skb(skb, net_tx_action);
 			else
 				trace_kfree_skb(skb, net_tx_action,
-						get_kfree_skb_cb(skb)->reason);
+						KFREE_SKB_CB(skb)->reason);
 
 			if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
 				__kfree_skb(skb);
 			else
 				__napi_kfree_skb(skb,
-						 get_kfree_skb_cb(skb)->reason);
+						 KFREE_SKB_CB(skb)->reason);
 		}
 	}
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 466999a7515e..5ce6996512a1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1190,7 +1190,8 @@ void __kfree_skb(struct sk_buff *skb)
 EXPORT_SYMBOL(__kfree_skb);
 
 static __always_inline
-bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+bool __kfree_skb_reason(struct sk_buff *skb, struct sock *rx_sk,
+			enum skb_drop_reason reason)
 {
 	if (unlikely(!skb_unref(skb)))
 		return false;
@@ -1201,28 +1202,30 @@ bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
 				SKB_DROP_REASON_SUBSYS_NUM);
 
 	if (reason == SKB_CONSUMED)
-		trace_consume_skb(skb, __builtin_return_address(0));
+		call_trace_consume_skb(skb, rx_sk, __builtin_return_address(0));
 	else
-		trace_kfree_skb(skb, __builtin_return_address(0), reason);
+		call_trace_kfree_skb(skb, rx_sk, __builtin_return_address(0), reason);
+
 	return true;
 }
 
 /**
- *	kfree_skb_reason - free an sk_buff with special reason
+ *	kfree_skb_for_sk - free an sk_buff with special reason and receiving socket
  *	@skb: buffer to free
+ *	@rx_sk: the socket to receive the buffer, or NULL if not applicable
  *	@reason: reason why this skb is dropped
  *
  *	Drop a reference to the buffer and free it if the usage count has
- *	hit zero. Meanwhile, pass the drop reason to 'kfree_skb'
+ *	hit zero. Meanwhile, pass the drop reason and rx socket to 'kfree_skb'
  *	tracepoint.
  */
-void __fix_address
-kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+void __fix_address kfree_skb_for_sk(struct sk_buff *skb, struct sock *rx_sk,
+				    enum skb_drop_reason reason)
 {
-	if (__kfree_skb_reason(skb, reason))
+	if (__kfree_skb_reason(skb, rx_sk, reason))
 		__kfree_skb(skb);
 }
-EXPORT_SYMBOL(kfree_skb_reason);
+EXPORT_SYMBOL(kfree_skb_for_sk);
 
 #define KFREE_SKB_BULK_SIZE	16
 
@@ -1261,7 +1264,7 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason)
 	while (segs) {
 		struct sk_buff *next = segs->next;
 
-		if (__kfree_skb_reason(segs, reason)) {
+		if (__kfree_skb_reason(segs, NULL, reason)) {
 			skb_poison_list(segs);
 			kfree_skb_add_bulk(segs, &sa, reason);
 		}
@@ -1405,7 +1408,7 @@ void consume_skb(struct sk_buff *skb)
 	if (!skb_unref(skb))
 		return;
 
-	trace_consume_skb(skb, __builtin_return_address(0));
+	call_trace_consume_skb(skb, NULL, __builtin_return_address(0));
 	__kfree_skb(skb);
 }
 EXPORT_SYMBOL(consume_skb);
@@ -1420,7 +1423,7 @@ EXPORT_SYMBOL(consume_skb);
  */
 void __consume_stateless_skb(struct sk_buff *skb)
 {
-	trace_consume_skb(skb, __builtin_return_address(0));
+	call_trace_consume_skb(skb, NULL, __builtin_return_address(0));
 	skb_release_data(skb, SKB_CONSUMED);
 	kfree_skbmem(skb);
 }
@@ -1478,7 +1481,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
 		return;
 
 	/* if reaching here SKB is ready to free */
-	trace_consume_skb(skb, __builtin_return_address(0));
+	call_trace_consume_skb(skb, NULL, __builtin_return_address(0));
 
 	/* if SKB is a clone, don't handle this case */
 	if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
-- 
2.30.2

Re: [RFC net-next 1/6] net: add kfree_skb_for_sk function

Posted by Eric Dumazet 1 year, 8 months ago

On Thu, May 30, 2024 at 11:46 PM Yan Zhai <yan@cloudflare.com> wrote:
>
> Implement a new kfree_skb_for_sk to replace kfree_skb_reason on a few
> local receive path. The function accepts an extra receiving socket
> argument, which will be set in skb->cb for kfree_skb/consume_skb
> tracepoint consumption. With this extra bit of information, it will be
> easier to attribute dropped packets to netns/containers and
> sockets/services for performance and error monitoring purpose.

This is a lot of code churn...

I have to ask : Why not simply adding an sk parameter to an existing
trace point ?

If this not possible, I would rather add new tracepoints, adding new classes,
because it will ease your debugging :

When looking for TCP drops, simply use a tcp_event_sk_skb_reason instance,
and voila, no distractions caused by RAW/ICMP/ICMPv6/af_packet drops.

DECLARE_EVENT_CLASS(tcp_event_sk_skb_reason,

     TP_PROTO(const struct sock *sk, const struct sk_buff *skb, enum
skb_drop_reason reason),
...
);

Also, the name ( kfree_skb_for_sk) and order of parameters is confusing.

I always prefer this kind of ordering/names :

void sk_skb_reason_drop( [struct net *net ] // not relevant here, but
to expand the rationale
              struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason)

Looking at the name, we immediately see the parameter order.

The consume one (no @reason there) would be called

void sk_skb_consume(struct sock *sk, struct sk_buff *skb);

Re: [RFC net-next 1/6] net: add kfree_skb_for_sk function

Posted by Yan Zhai 1 year, 8 months ago

Hi Eric,

 Thanks for the feedback.

On Fri, May 31, 2024 at 1:51 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Thu, May 30, 2024 at 11:46 PM Yan Zhai <yan@cloudflare.com> wrote:
> >
> > Implement a new kfree_skb_for_sk to replace kfree_skb_reason on a few
> > local receive path. The function accepts an extra receiving socket
> > argument, which will be set in skb->cb for kfree_skb/consume_skb
> > tracepoint consumption. With this extra bit of information, it will be
> > easier to attribute dropped packets to netns/containers and
> > sockets/services for performance and error monitoring purposes.
>
> This is a lot of code churn...
>
> I have to ask : Why not simply adding an sk parameter to an existing
> trace point ?
>
Modifying a signature of the current tracepoint seems like a breaking
change, that's why I was saving the context inside skb->cb, hoping to
not impact any existing programs watching this tracepoint. But
thinking it twice, it might not cause a problem if the signature
becomes:

 trace_kfree_skb(const struct sk_buff *skb, void *location, enum
skb_drop_reason reason, const struct sock *sk)

As return values are usually not a thing for tracepoints, it is
probably still compatible. The cons is that the last "sk" still breaks
the integrity of naming. How about making a "kfree_skb_context"
internal struct and putting it as the last argument to "hide" the
naming confusion?

> If this not possible, I would rather add new tracepoints, adding new classes,
> because it will ease your debugging :
>
> When looking for TCP drops, simply use a tcp_event_sk_skb_reason instance,
> and voila, no distractions caused by RAW/ICMP/ICMPv6/af_packet drops.
>
> DECLARE_EVENT_CLASS(tcp_event_sk_skb_reason,
>
>      TP_PROTO(const struct sock *sk, const struct sk_buff *skb, enum
> skb_drop_reason reason),
> ...
> );

The alternative of adding another tracepoint could indeed work, we had
a few cases like that in the past, e.g.

https://lore.kernel.org/lkml/20230711043453.64095-1-ivan@cloudflare.com/
https://lore.kernel.org/netdev/20230707043923.35578-1-ivan@cloudflare.com/

But it does feel like a whack-a-mole thing. The problems are solvable
if we extend the kfree_skb tracepoint, so I would prefer to not add a
new tracepoint.

>
> Also, the name ( kfree_skb_for_sk) and order of parameters is confusing.
>
> I always prefer this kind of ordering/names :
>
> void sk_skb_reason_drop( [struct net *net ] // not relevant here, but
> to expand the rationale
>               struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason)
>
> Looking at the name, we immediately see the parameter order.
>
> The consume one (no @reason there) would be called
>
> void sk_skb_consume(struct sock *sk, struct sk_buff *skb);

I was intending to keep the "kfree_skb" prefix initially since it
would appear less surprising to kernel developers who used kfree_skb
and kfree_skb_reason. But your points do make good sense. How about
"kfree_sk_skb_reason" and "consume_sk_skb" here?

thanks
Yan

Re: [RFC net-next 1/6] net: add kfree_skb_for_sk function

Posted by Eric Dumazet 1 year, 8 months ago

On Fri, May 31, 2024 at 6:58 PM Yan Zhai <yan@cloudflare.com> wrote:
>
> Hi Eric,
>
>  Thanks for the feedback.
>
> On Fri, May 31, 2024 at 1:51 AM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Thu, May 30, 2024 at 11:46 PM Yan Zhai <yan@cloudflare.com> wrote:
> > >
> > > Implement a new kfree_skb_for_sk to replace kfree_skb_reason on a few
> > > local receive path. The function accepts an extra receiving socket
> > > argument, which will be set in skb->cb for kfree_skb/consume_skb
> > > tracepoint consumption. With this extra bit of information, it will be
> > > easier to attribute dropped packets to netns/containers and
> > > sockets/services for performance and error monitoring purposes.
> >
> > This is a lot of code churn...
> >
> > I have to ask : Why not simply adding an sk parameter to an existing
> > trace point ?
> >
> Modifying a signature of the current tracepoint seems like a breaking
> change, that's why I was saving the context inside skb->cb, hoping to
> not impact any existing programs watching this tracepoint. But
> thinking it twice, it might not cause a problem if the signature
> becomes:
>
>  trace_kfree_skb(const struct sk_buff *skb, void *location, enum
> skb_drop_reason reason, const struct sock *sk)
>
> As return values are usually not a thing for tracepoints, it is
> probably still compatible. The cons is that the last "sk" still breaks
> the integrity of naming. How about making a "kfree_skb_context"
> internal struct and putting it as the last argument to "hide" the
> naming confusion?
>
> > If this not possible, I would rather add new tracepoints, adding new classes,
> > because it will ease your debugging :
> >
> > When looking for TCP drops, simply use a tcp_event_sk_skb_reason instance,
> > and voila, no distractions caused by RAW/ICMP/ICMPv6/af_packet drops.
> >
> > DECLARE_EVENT_CLASS(tcp_event_sk_skb_reason,
> >
> >      TP_PROTO(const struct sock *sk, const struct sk_buff *skb, enum
> > skb_drop_reason reason),
> > ...
> > );
>
> The alternative of adding another tracepoint could indeed work, we had
> a few cases like that in the past, e.g.
>
> https://lore.kernel.org/lkml/20230711043453.64095-1-ivan@cloudflare.com/
> https://lore.kernel.org/netdev/20230707043923.35578-1-ivan@cloudflare.com/
>
> But it does feel like a whack-a-mole thing. The problems are solvable
> if we extend the kfree_skb tracepoint, so I would prefer to not add a
> new tracepoint.

Solvable with many future merge conflicts for stable teams.


>
> >
> > Also, the name ( kfree_skb_for_sk) and order of parameters is confusing.
> >
> > I always prefer this kind of ordering/names :
> >
> > void sk_skb_reason_drop( [struct net *net ] // not relevant here, but
> > to expand the rationale
> >               struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason)
> >
> > Looking at the name, we immediately see the parameter order.
> >
> > The consume one (no @reason there) would be called
> >
> > void sk_skb_consume(struct sock *sk, struct sk_buff *skb);
>
> I was intending to keep the "kfree_skb" prefix initially since it
> would appear less surprising to kernel developers who used kfree_skb
> and kfree_skb_reason. But your points do make good sense. How about
> "kfree_sk_skb_reason" and "consume_sk_skb" here?
>

IMO kfree_skb() and consume_skb() were a wrong choice. We have to live
with them.

It should have been skb_free(), skb_consume(), skb_alloc(),
to be consistent.

Following (partial) list was much better:

skb_add_rx_frag_netmem, skb_coalesce_rx_frag, skb_pp_cow_data,
skb_cow_data_for_xdp,
skb_dump, skb_tx_error, skb_morph, skb_zerocopy_iter_stream, skb_copy_ubufs,
skb_clone, skb_headers_offset_update, skb_copy_header, skb_copy,
skb_realloc_headroom, skb_expand_head, skb_copy_expand, skb_put,
skb_push, skb_pull, skb_pull_data, skb_trim, skb_copy_bits,
skb_splice_bits, skb_send_sock_locked, skb_store_bits,
skb_checksum, skb_copy_and_csum_bits, skb_zerocopy_headlen,
skb_zerocopy, skb_copy_and_csum_dev, skb_dequeue,
skb_dequeue_tail, skb_queue_purge_reason, skb_errqueue_purge,
skb_queue_head, skb_queue_tail, skb_unlink, skb_append,
skb_split, skb_prepare_seq_read, skb_seq_read, skb_abort_seq_read,
skb_find_text, skb_append_pagefrags, skb_pull_rcsum, skb_segment_list,
skb_segment, skb_to_sgvec, skb_to_sgvec_nomark, skb_cow_data, skb_clone_sk,
skb_complete_tx_timestamp, skb_tstamp_tx, skb_complete_wifi_ack,
skb_partial_csum_set, skb_checksum_setup, skb_checksum_trimmed,
skb_try_coalesce, skb_scrub_packet, skb_vlan_untag, skb_ensure_writable,
skb_ensure_writable_head_tail, skb_vlan_pop, skb_vlan_push, skb_eth_pop,
skb_eth_push, skb_mpls_push, skb_mpls_pop, skb_mpls_update_lse,
skb_mpls_dec_ttl, skb_condense, skb_ext_add, skb_splice_from_iter

(just to make my point very very clear)

Instead we have a myriad of functions with illogical parameter
ordering vs their names.

I see no reason to add more confusion for new helpers.

Re: [RFC net-next 1/6] net: add kfree_skb_for_sk function

Posted by Yan Zhai 1 year, 8 months ago

On Fri, May 31, 2024 at 12:32 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Fri, May 31, 2024 at 6:58 PM Yan Zhai <yan@cloudflare.com> wrote:
> >
> > Hi Eric,
> >
> >  Thanks for the feedback.
> >
> > On Fri, May 31, 2024 at 1:51 AM Eric Dumazet <edumazet@google.com> wrote:
> > >
> > > On Thu, May 30, 2024 at 11:46 PM Yan Zhai <yan@cloudflare.com> wrote:
> > > >
> > > > Implement a new kfree_skb_for_sk to replace kfree_skb_reason on a few
> > > > local receive path. The function accepts an extra receiving socket
> > > > argument, which will be set in skb->cb for kfree_skb/consume_skb
> > > > tracepoint consumption. With this extra bit of information, it will be
> > > > easier to attribute dropped packets to netns/containers and
> > > > sockets/services for performance and error monitoring purposes.
> > >
> > > This is a lot of code churn...
> > >
> > > I have to ask : Why not simply adding an sk parameter to an existing
> > > trace point ?
> > >
> > Modifying a signature of the current tracepoint seems like a breaking
> > change, that's why I was saving the context inside skb->cb, hoping to
> > not impact any existing programs watching this tracepoint. But
> > thinking it twice, it might not cause a problem if the signature
> > becomes:
> >
> >  trace_kfree_skb(const struct sk_buff *skb, void *location, enum
> > skb_drop_reason reason, const struct sock *sk)
> >
> > As return values are usually not a thing for tracepoints, it is
> > probably still compatible. The cons is that the last "sk" still breaks
> > the integrity of naming. How about making a "kfree_skb_context"
> > internal struct and putting it as the last argument to "hide" the
> > naming confusion?
> >
> > > If this not possible, I would rather add new tracepoints, adding new classes,
> > > because it will ease your debugging :
> > >
> > > When looking for TCP drops, simply use a tcp_event_sk_skb_reason instance,
> > > and voila, no distractions caused by RAW/ICMP/ICMPv6/af_packet drops.
> > >
> > > DECLARE_EVENT_CLASS(tcp_event_sk_skb_reason,
> > >
> > >      TP_PROTO(const struct sock *sk, const struct sk_buff *skb, enum
> > > skb_drop_reason reason),
> > > ...
> > > );
> >
> > The alternative of adding another tracepoint could indeed work, we had
> > a few cases like that in the past, e.g.
> >
> > https://lore.kernel.org/lkml/20230711043453.64095-1-ivan@cloudflare.com/
> > https://lore.kernel.org/netdev/20230707043923.35578-1-ivan@cloudflare.com/
> >
> > But it does feel like a whack-a-mole thing. The problems are solvable
> > if we extend the kfree_skb tracepoint, so I would prefer to not add a
> > new tracepoint.
>
> Solvable with many future merge conflicts for stable teams.
>
I don't quite follow it. I think this specific commit using skb->cb is
unnecessary so I am going to re-work it. As you initially mentioned,
maybe I should just extend kfree_skb tracepoint. I saw a similar
change dd1b527831a3("net: add location to trace_consume_skb()"), is it
something I might follow, or do you specifically mean changes like
this can annoy stable teams?

>
> >
> > >
> > > Also, the name ( kfree_skb_for_sk) and order of parameters is confusing.
> > >
> > > I always prefer this kind of ordering/names :
> > >
> > > void sk_skb_reason_drop( [struct net *net ] // not relevant here, but
> > > to expand the rationale
> > >               struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason)
> > >
> > > Looking at the name, we immediately see the parameter order.
> > >
> > > The consume one (no @reason there) would be called
> > >
> > > void sk_skb_consume(struct sock *sk, struct sk_buff *skb);
> >
> > I was intending to keep the "kfree_skb" prefix initially since it
> > would appear less surprising to kernel developers who used kfree_skb
> > and kfree_skb_reason. But your points do make good sense. How about
> > "kfree_sk_skb_reason" and "consume_sk_skb" here?
> >
>
> IMO kfree_skb() and consume_skb() were a wrong choice. We have to live
> with them.
>
> It should have been skb_free(), skb_consume(), skb_alloc(),
> to be consistent.
>
> Following (partial) list was much better:
>
> skb_add_rx_frag_netmem, skb_coalesce_rx_frag, skb_pp_cow_data,
> skb_cow_data_for_xdp,
> skb_dump, skb_tx_error, skb_morph, skb_zerocopy_iter_stream, skb_copy_ubufs,
> skb_clone, skb_headers_offset_update, skb_copy_header, skb_copy,
> skb_realloc_headroom, skb_expand_head, skb_copy_expand, skb_put,
> skb_push, skb_pull, skb_pull_data, skb_trim, skb_copy_bits,
> skb_splice_bits, skb_send_sock_locked, skb_store_bits,
> skb_checksum, skb_copy_and_csum_bits, skb_zerocopy_headlen,
> skb_zerocopy, skb_copy_and_csum_dev, skb_dequeue,
> skb_dequeue_tail, skb_queue_purge_reason, skb_errqueue_purge,
> skb_queue_head, skb_queue_tail, skb_unlink, skb_append,
> skb_split, skb_prepare_seq_read, skb_seq_read, skb_abort_seq_read,
> skb_find_text, skb_append_pagefrags, skb_pull_rcsum, skb_segment_list,
> skb_segment, skb_to_sgvec, skb_to_sgvec_nomark, skb_cow_data, skb_clone_sk,
> skb_complete_tx_timestamp, skb_tstamp_tx, skb_complete_wifi_ack,
> skb_partial_csum_set, skb_checksum_setup, skb_checksum_trimmed,
> skb_try_coalesce, skb_scrub_packet, skb_vlan_untag, skb_ensure_writable,
> skb_ensure_writable_head_tail, skb_vlan_pop, skb_vlan_push, skb_eth_pop,
> skb_eth_push, skb_mpls_push, skb_mpls_pop, skb_mpls_update_lse,
> skb_mpls_dec_ttl, skb_condense, skb_ext_add, skb_splice_from_iter
>
> (just to make my point very very clear)
>
> Instead we have a myriad of functions with illogical parameter
> ordering vs their names.
>
> I see no reason to add more confusion for new helpers.

ACK. Thanks for clarifying.

Yan

Re: [RFC net-next 1/6] net: add kfree_skb_for_sk function

Posted by Eric Dumazet 1 year, 8 months ago

On Fri, May 31, 2024 at 9:05 PM Yan Zhai <yan@cloudflare.com> wrote:
>

> I don't quite follow it. I think this specific commit using skb->cb is
> unnecessary so I am going to re-work it. As you initially mentioned,
> maybe I should just extend kfree_skb tracepoint. I saw a similar
> change dd1b527831a3("net: add location to trace_consume_skb()"), is it
> something I might follow, or do you specifically mean changes like
> this can annoy stable teams?
>

I do not think trace points arguments are put in stone.

If they were, I would nack the addition of new tracepoints, to prevent
ossification.

[RFC net-next 1/6] net: add kfree_skb_for_sk function
[RFC net-next 2/6] ping: pass rx socket on rcv drops
[RFC net-next 3/6] net: raw: pass rx socket on rcv drops
[RFC net-next 4/6] tcp: pass rx socket on rcv drops
[RFC net-next 5/6] udp: pass rx socket on rcv drops
[RFC net-next 6/6] af_packet: pass rx socket on rcv drops