net/ipv6/ip6mr.c | 103 +++++++++++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 39 deletions(-)
ip6mr_vif_seq_start() must lock RCU even in a case of error, because
stop callback is called unconditionally.
When IPV6_MROUTE_MULTIPLE_TABLES is enabled, calls to ip6mr_get_table()
should be done under RCU or RTNL lock. Lock RCU before the call unless
it's done already or RTNL lock is held.
Signed-off-by: Petr Malat <oss@malat.biz>
Signed-off-by: Stefan Wiehler <stefan.wiehler@nokia.com>
Fixes: d1db275dd3f6 ("ipv6: ip6mr: support multiple tables")
---
v2:
- rebase on top of net tree
- add Fixes tag
- refactor out paths
v1: https://patchwork.kernel.org/project/netdevbpf/patch/20240605195355.363936-1-oss@malat.biz/
---
net/ipv6/ip6mr.c | 103 +++++++++++++++++++++++++++++------------------
1 file changed, 64 insertions(+), 39 deletions(-)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 2ce4ae0d8dc3..9a72928861ac 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -411,13 +411,13 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
+ rcu_read_lock();
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
return ERR_PTR(-ENOENT);
iter->mrt = mrt;
- rcu_read_lock();
return mr_vif_seq_start(seq, pos);
}
@@ -1884,18 +1884,23 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
struct mfc6_cache *c;
struct net *net = sock_net(sk);
struct mr_table *mrt;
+ int err;
+ rcu_read_lock();
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
- if (!mrt)
- return -ENOENT;
+ if (!mrt) {
+ err = -ENOENT;
+ goto out;
+ }
switch (cmd) {
case SIOCGETMIFCNT_IN6:
vr = (struct sioc_mif_req6 *)arg;
- if (vr->mifi >= mrt->maxvif)
- return -EINVAL;
+ if (vr->mifi >= mrt->maxvif) {
+ err = -EINVAL;
+ goto out;
+ }
vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
- rcu_read_lock();
vif = &mrt->vif_table[vr->mifi];
if (VIF_EXISTS(mrt, vr->mifi)) {
vr->icount = READ_ONCE(vif->pkt_in);
@@ -1905,12 +1910,11 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
rcu_read_unlock();
return 0;
}
- rcu_read_unlock();
- return -EADDRNOTAVAIL;
+ err = -EADDRNOTAVAIL;
+ goto out;
case SIOCGETSGCNT_IN6:
sr = (struct sioc_sg_req6 *)arg;
- rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
&sr->grp.sin6_addr);
if (c) {
@@ -1920,11 +1924,16 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
rcu_read_unlock();
return 0;
}
- rcu_read_unlock();
- return -EADDRNOTAVAIL;
+ err = -EADDRNOTAVAIL;
+ goto out;
default:
- return -ENOIOCTLCMD;
+ err = -ENOIOCTLCMD;
+ goto out;
}
+
+out:
+ rcu_read_unlock();
+ return err;
}
#ifdef CONFIG_COMPAT
@@ -1952,19 +1961,35 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
struct mfc6_cache *c;
struct net *net = sock_net(sk);
struct mr_table *mrt;
-
- mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
- if (!mrt)
- return -ENOENT;
+ int err;
switch (cmd) {
case SIOCGETMIFCNT_IN6:
if (copy_from_user(&vr, arg, sizeof(vr)))
return -EFAULT;
- if (vr.mifi >= mrt->maxvif)
- return -EINVAL;
+ break;
+ case SIOCGETSGCNT_IN6:
+ if (copy_from_user(&sr, arg, sizeof(sr)))
+ return -EFAULT;
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ rcu_read_lock();
+ mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+ if (!mrt) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ switch (cmd) {
+ case SIOCGETMIFCNT_IN6:
+ if (vr.mifi >= mrt->maxvif) {
+ err = -EINVAL;
+ goto out;
+ }
vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
- rcu_read_lock();
vif = &mrt->vif_table[vr.mifi];
if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = READ_ONCE(vif->pkt_in);
@@ -1977,13 +2002,9 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
return -EFAULT;
return 0;
}
- rcu_read_unlock();
- return -EADDRNOTAVAIL;
+ err = -EADDRNOTAVAIL;
+ goto out;
case SIOCGETSGCNT_IN6:
- if (copy_from_user(&sr, arg, sizeof(sr)))
- return -EFAULT;
-
- rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
sr.pktcnt = c->_c.mfc_un.res.pkt;
@@ -1995,11 +2016,13 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
return -EFAULT;
return 0;
}
- rcu_read_unlock();
- return -EADDRNOTAVAIL;
- default:
- return -ENOIOCTLCMD;
+ err = -EADDRNOTAVAIL;
+ goto out;
}
+
+out:
+ rcu_read_unlock();
+ return err;
}
#endif
@@ -2275,11 +2298,13 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
struct mfc6_cache *cache;
struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
+ rcu_read_lock();
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
- if (!mrt)
- return -ENOENT;
+ if (!mrt) {
+ err = -ENOENT;
+ goto out;
+ }
- rcu_read_lock();
cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
if (!cache && skb->dev) {
int vif = ip6mr_find_vif(mrt, skb->dev);
@@ -2297,15 +2322,15 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
dev = skb->dev;
if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
- rcu_read_unlock();
- return -ENODEV;
+ err = -ENODEV;
+ goto out;
}
/* really correct? */
skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
if (!skb2) {
- rcu_read_unlock();
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out;
}
NETLINK_CB(skb2).portid = portid;
@@ -2327,12 +2352,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
iph->daddr = rt->rt6i_dst.addr;
err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
- rcu_read_unlock();
-
- return err;
+ goto out;
}
err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
+
+out:
rcu_read_unlock();
return err;
}
--
2.42.0
On Tue, Oct 1, 2024 at 12:05 PM Stefan Wiehler <stefan.wiehler@nokia.com> wrote: > > ip6mr_vif_seq_start() must lock RCU even in a case of error, because > stop callback is called unconditionally. > OK, but RT6_TABLE_DFLT always exists, ip6mr_get_table(net, RT6_TABLE_DFLT) can never fail. This is ensured at netns creation, from ip6mr_rules_init() This complex patch adds some code churn, with no clear bug fix ?
> OK, but RT6_TABLE_DFLT always exists, ip6mr_get_table(net, RT6_TABLE_DFLT) > can never fail. > > This is ensured at netns creation, from ip6mr_rules_init() OK, but nevertheless we need to enter a RCU read-side critical section before ip6mr_get_table() is called. > This complex patch adds some code churn, with no clear bug fix ? We should have probably mentioned the Lockdep-RCU splat that brought this topic up: [ 48.834645] WARNING: suspicious RCU usage [ 48.834647] 6.1.103-584209f6d5-nokia_sm_x86 #1 Tainted: G S O [ 48.834649] ----------------------------- [ 48.834651] /net/ipv6/ip6mr.c:132 RCU-list traversed in non-reader section!! [ 48.834654] other info that might help us debug this: [ 48.834656] rcu_scheduler_active = 2, debug_locks = 1 [ 48.834658] no locks held by radvd/5777. [ 48.834660] stack backtrace: [ 48.834663] CPU: 0 PID: 5777 Comm: radvd Tainted: G S O 6.1.103-584209f6d5-nokia_sm_x86 #1 [ 48.834666] Hardware name: Nokia Asil/Default string, BIOS 0ACNA113 06/07/2024 [ 48.834673] Call Trace: [ 48.834674] <TASK> [ 48.834677] dump_stack_lvl+0xb7/0xe9 [ 48.834687] lockdep_rcu_suspicious.cold+0x2d/0x64 [ 48.834697] ip6mr_get_table+0x9f/0xb0 [ 48.834704] ip6mr_ioctl+0x50/0x360 [ 48.834713] ? sk_ioctl+0x5f/0x1c0 [ 48.834719] sk_ioctl+0x5f/0x1c0 [ 48.834723] ? find_held_lock+0x2b/0x80 [ 48.834731] sock_do_ioctl+0x7b/0x140 [ 48.834737] ? proc_nr_files+0x30/0x30 [ 48.834744] sock_ioctl+0x1f5/0x360 [ 48.834754] __x64_sys_ioctl+0x8d/0xd0 [ 48.834760] do_syscall_64+0x3c/0x90 [ 48.834765] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 48.834769] RIP: 0033:0x7fda5f56e2ac [ 48.834773] Code: 1e fa 48 8d 44 24 08 48 89 54 24 e0 48 89 44 24 c0 48 8d 44 24 d0 48 89 44 24 c8 b8 1 0 00 00 00 c7 44 24 b8 10 00 00 00 0f 05 <3d> 00 f0 ff ff 89 c2 77 0b 89 d0 c3 0f 1f 84 00 00 00 00 00 48 8b [ 48.834776] RSP: 002b:00007fff52d4bda8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 48.834782] RAX: ffffffffffffffda RBX: 000000000171cd80 RCX: 00007fda5f56e2ac [ 48.834784] RDX: 00007fff52d4bdb0 RSI: 0000000000008913 RDI: 0000000000000003 [ 48.834787] RBP: 000000000171cd30 R08: 0000000000000007 R09: 000000000000003c [ 48.834789] R10: 0000000000000004 R11: 0000000000000246 R12: 0000000000000003 [ 48.834791] R13: 0000000000000000 R14: 0000000000000004 R15: 000000000040d43c [ 48.834802] </TASK> Kind regards, Stefan
On Tue, Oct 1, 2024 at 2:50 PM Stefan Wiehler <stefan.wiehler@nokia.com> wrote: > > > OK, but RT6_TABLE_DFLT always exists, ip6mr_get_table(net, RT6_TABLE_DFLT) > > can never fail. > > > > This is ensured at netns creation, from ip6mr_rules_init() > > OK, but nevertheless we need to enter a RCU read-side critical section before > ip6mr_get_table() is called. This could be a lockdep annotation error then, at least for RT6_TABLE_DFLT, oh well. Note that net/ipv4/ipmr.c would have a similar issue. Please split your patch in small units, their Fixes: tags are likely different, and if some code breaks something, fixing the issue will be easier. The changelog seemed to only address the first ip6mr_vif_seq_start() part.
> This could be a lockdep annotation error then, at least for > RT6_TABLE_DFLT, oh well. As you have already explained, we can ignore the ip6mr_vif_seq_start() error path, so the issue boils down to ip6mr_get_table() being called without entering a RCU read-side critical section from these 4 functions: ipmr_vif_seq_start(), ip6mr_ioctl(), ip6mr_compat_ioctl() and ip6mr_get_route(). It is my understanding that in none of these four cases the RTNL lock is held either; at least according to the RCU-lockdep splat we clearly see that this is not the case in ip6mr_ioctl() – but please correct me if I'm wrong. > Note that net/ipv4/ipmr.c would have a similar issue. Yes, looks indeed like that :-/ > Please split your patch in small units, their Fixes: tags are likely > different, and if some code breaks something, > fixing the issue will be easier. > > The changelog seemed to only address the first ip6mr_vif_seq_start() part. If you prefer that I can split the change into 4 commits addressing each of the 4 functions mentioned before. Kind regards, Stefan
Le mar. 1 oct. 2024 à 16:36, Stefan Wiehler <stefan.wiehler@nokia.com> a écrit : > > > This could be a lockdep annotation error then, at least for > > RT6_TABLE_DFLT, oh well. > > As you have already explained, we can ignore the ip6mr_vif_seq_start() error > path, so the issue boils down to ip6mr_get_table() being called without > entering a RCU read-side critical section from these 4 functions: > ipmr_vif_seq_start(), ip6mr_ioctl(), ip6mr_compat_ioctl() and > ip6mr_get_route(). It is my understanding that in none of these four cases the > RTNL lock is held either; at least according to the RCU-lockdep splat we > clearly see that this is not the case in ip6mr_ioctl() – but please correct me > if I'm wrong. > > > Note that net/ipv4/ipmr.c would have a similar issue. > > Yes, looks indeed like that :-/ > > > Please split your patch in small units, their Fixes: tags are likely > > different, and if some code breaks something, > > fixing the issue will be easier. > > > > The changelog seemed to only address the first ip6mr_vif_seq_start() part. > > If you prefer that I can split the change into 4 commits addressing each of the > 4 functions mentioned before. Yes please. Extending rcu_read_lock() sections needs inspection, because we can not sleep there.
© 2016 - 2024 Red Hat, Inc.