From: Dmitry Safonov <dima@arista.com>
Now that the destruction of info/keys is delayed until the socket
destructor, it's safe to use kfree() without an RCU callback.
As either socket was yet in TCP_CLOSE state or the socket refcounter is
zero and no one can discover it anymore, it's safe to release memory
straight away.
Similar thing was possible for twsk already.
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
net/ipv4/tcp.c | 17 +++--------------
net/ipv4/tcp_ao.c | 5 ++---
net/ipv4/tcp_ipv4.c | 4 ++--
net/ipv4/tcp_minisocks.c | 19 +++++--------------
4 files changed, 12 insertions(+), 33 deletions(-)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e2ec4ee0ff4a640e9e5501a0d93fc0ed312d488d..254ca95d0c3c5c44029be0e84120c5e9fb9d4514 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -413,27 +413,16 @@ static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp)
}
#ifdef CONFIG_TCP_MD5SIG
-static void tcp_md5sig_info_free_rcu(struct rcu_head *head)
-{
- struct tcp_md5sig_info *md5sig;
-
- md5sig = container_of(head, struct tcp_md5sig_info, rcu);
- kfree(md5sig);
- static_branch_slow_dec_deferred(&tcp_md5_needed);
- tcp_md5_release_sigpool();
-}
-
void tcp_md5_destruct_sock(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tp->md5sig_info) {
- struct tcp_md5sig_info *md5sig;
- md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
tcp_clear_md5_list(sk);
- rcu_assign_pointer(tp->md5sig_info, NULL);
- call_rcu(&md5sig->rcu, tcp_md5sig_info_free_rcu);
+ kfree(rcu_replace_pointer(tp->md5sig_info, NULL, 1));
+ static_branch_slow_dec_deferred(&tcp_md5_needed);
+ tcp_md5_release_sigpool();
}
}
EXPORT_SYMBOL_GPL(tcp_md5_destruct_sock);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index bbb8d5f0eae7d3d8887da3fa4d68e248af9060ad..31302be78bc4450b56fa23a390b6d03b2262741d 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -268,9 +268,8 @@ static void tcp_ao_key_free_rcu(struct rcu_head *head)
kfree_sensitive(key);
}
-static void tcp_ao_info_free_rcu(struct rcu_head *head)
+static void tcp_ao_info_free(struct tcp_ao_info *ao)
{
- struct tcp_ao_info *ao = container_of(head, struct tcp_ao_info, rcu);
struct tcp_ao_key *key;
struct hlist_node *n;
@@ -310,7 +309,7 @@ void tcp_ao_destroy_sock(struct sock *sk, bool twsk)
if (!twsk)
tcp_ao_sk_omem_free(sk, ao);
- call_rcu(&ao->rcu, tcp_ao_info_free_rcu);
+ tcp_ao_info_free(ao);
}
void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 68bb75bd419cdbfce17048252919996d764ddc1a..f914bda25d8f5170395157b707d3bd2ef04267a1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1503,9 +1503,9 @@ void tcp_clear_md5_list(struct sock *sk)
md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
- hlist_del_rcu(&key->node);
+ hlist_del(&key->node);
atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
- kfree_rcu(key, rcu);
+ kfree(key);
}
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d1c9e40886463ca308f9f3682c4039f491e7555f..7c2ae07d8d5d2a18d6ce3210cc09ee5d9850ea29 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -377,26 +377,17 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
}
EXPORT_SYMBOL(tcp_time_wait);
-#ifdef CONFIG_TCP_MD5SIG
-static void tcp_md5_twsk_free_rcu(struct rcu_head *head)
-{
- struct tcp_md5sig_key *key;
-
- key = container_of(head, struct tcp_md5sig_key, rcu);
- kfree(key);
- static_branch_slow_dec_deferred(&tcp_md5_needed);
- tcp_md5_release_sigpool();
-}
-#endif
-
void tcp_twsk_destructor(struct sock *sk)
{
#ifdef CONFIG_TCP_MD5SIG
if (static_branch_unlikely(&tcp_md5_needed.key)) {
struct tcp_timewait_sock *twsk = tcp_twsk(sk);
- if (twsk->tw_md5_key)
- call_rcu(&twsk->tw_md5_key->rcu, tcp_md5_twsk_free_rcu);
+ if (twsk->tw_md5_key) {
+ kfree(twsk->tw_md5_key);
+ static_branch_slow_dec_deferred(&tcp_md5_needed);
+ tcp_md5_release_sigpool();
+ }
}
#endif
tcp_ao_destroy_sock(sk, true);
--
2.42.2
On Sat, 30 Aug 2025 05:31:47 +0100 Dmitry Safonov via B4 Relay wrote: > Now that the destruction of info/keys is delayed until the socket > destructor, it's safe to use kfree() without an RCU callback. > As either socket was yet in TCP_CLOSE state or the socket refcounter is > zero and no one can discover it anymore, it's safe to release memory > straight away. > Similar thing was possible for twsk already. After this patch the rcu members of struct tcp_ao* seem to no longer be used? -- pw-bot: cr
On Wed, Sep 3, 2025 at 12:09 AM Jakub Kicinski <kuba@kernel.org> wrote: > > On Sat, 30 Aug 2025 05:31:47 +0100 Dmitry Safonov via B4 Relay wrote: > > Now that the destruction of info/keys is delayed until the socket > > destructor, it's safe to use kfree() without an RCU callback. > > As either socket was yet in TCP_CLOSE state or the socket refcounter is > > zero and no one can discover it anymore, it's safe to release memory > > straight away. > > Similar thing was possible for twsk already. > > After this patch the rcu members of struct tcp_ao* seem to no longer > be used? Right. I'll remove tcp_ao_info::rcu in v4. For tcp_ao_key it's needed for the regular key rotation, as well as for tcp_md5sig_key. Thanks, Dmitry
On Wed, 3 Sep 2025 18:41:39 +0100 Dmitry Safonov wrote: > > On Sat, 30 Aug 2025 05:31:47 +0100 Dmitry Safonov via B4 Relay wrote: > > > Now that the destruction of info/keys is delayed until the socket > > > destructor, it's safe to use kfree() without an RCU callback. > > > As either socket was yet in TCP_CLOSE state or the socket refcounter is > > > zero and no one can discover it anymore, it's safe to release memory > > > straight away. > > > Similar thing was possible for twsk already. > > > > After this patch the rcu members of struct tcp_ao* seem to no longer > > be used? > > Right. I'll remove tcp_ao_info::rcu in v4. > For tcp_ao_key it's needed for the regular key rotation, as well as > for tcp_md5sig_key. Hm, maybe I missed something. I did a test allmodconfig build yesterday and while the md5sig_key rcu was still needed, removing the ao_key didn't cause issues. But it was just a quick test I didn't even config kconfig is sane.
On Wed, Sep 3, 2025 at 11:23 PM Jakub Kicinski <kuba@kernel.org> wrote: > On Wed, 3 Sep 2025 18:41:39 +0100 Dmitry Safonov wrote: > > > On Sat, 30 Aug 2025 05:31:47 +0100 Dmitry Safonov via B4 Relay wrote: > > > > Now that the destruction of info/keys is delayed until the socket > > > > destructor, it's safe to use kfree() without an RCU callback. > > > > As either socket was yet in TCP_CLOSE state or the socket refcounter is > > > > zero and no one can discover it anymore, it's safe to release memory > > > > straight away. > > > > Similar thing was possible for twsk already. > > > > > > After this patch the rcu members of struct tcp_ao* seem to no longer > > > be used? > > > > Right. I'll remove tcp_ao_info::rcu in v4. > > For tcp_ao_key it's needed for the regular key rotation, as well as > > for tcp_md5sig_key. > > Hm, maybe I missed something. I did a test allmodconfig build yesterday > and while the md5sig_key rcu was still needed, removing the ao_key > didn't cause issues. But it was just a quick test I didn't even config > kconfig is sane. Hmm, probably CONFIG_TCP_AO was off? tcp_ao_delete_key() does call_rcu(&key->rcu, tcp_ao_key_free_rcu). Looking at the code now, I guess what I could have done even more is migrating tcp_sock::ao_info (and tcp_timewait_sock::ao_info) from rcu_*() helpers to acquire/release ones. Somewhat feeling uneasy about going that far just yet. Should I do it with another cleanup on the top, what do you think? Thanks, Dmitry
On Thu, 4 Sep 2025 00:17:34 +0100 Dmitry Safonov wrote: > > > Right. I'll remove tcp_ao_info::rcu in v4. > > > For tcp_ao_key it's needed for the regular key rotation, as well as > > > for tcp_md5sig_key. > > > > Hm, maybe I missed something. I did a test allmodconfig build yesterday > > and while the md5sig_key rcu was still needed, removing the ao_key > > didn't cause issues. But it was just a quick test I didn't even config > > kconfig is sane. > > Hmm, probably CONFIG_TCP_AO was off? > tcp_ao_delete_key() does call_rcu(&key->rcu, tcp_ao_key_free_rcu). > > Looking at the code now, I guess what I could have done even more is > migrating tcp_sock::ao_info (and tcp_timewait_sock::ao_info) from > rcu_*() helpers to acquire/release ones. Somewhat feeling uneasy about > going that far just yet. Should I do it with another cleanup on the > top, what do you think? No preference :)
On Thu, Sep 4, 2025 at 12:27 AM Jakub Kicinski <kuba@kernel.org> wrote: > On Thu, 4 Sep 2025 00:17:34 +0100 Dmitry Safonov wrote: [..] > > Looking at the code now, I guess what I could have done even more is > > migrating tcp_sock::ao_info (and tcp_timewait_sock::ao_info) from > > rcu_*() helpers to acquire/release ones. Somewhat feeling uneasy about > > going that far just yet. Should I do it with another cleanup on the > > top, what do you think? > > No preference :) Jakub, I've sent v5, addressing minor v4 review comments. I have a patch that migrates ao_info from rcu_*() to smp_{store,release}*(), it seems to work. I'm going to send it as a follow-up, once this gets into net-next as these two seem to be mostly reviewed/ready. While on it, I noticed that potentially I could trim (struct tcp_timewait_sock) by 8 bytes if tw_md5_key and ao_info would union in another helper structure. As on time-wait socket only one of MD5 or AO could be set. Planning to send these two patches in a separate thread/patches set. Thanks, Dmitry
© 2016 - 2025 Red Hat, Inc.