[PATCH 5.15 1/3] ipv6: annotate some data-races around sk->sk_prot

Kazunori Kobayashi posted 3 patches 2 years, 8 months ago
There is a newer version of this series
[PATCH 5.15 1/3] ipv6: annotate some data-races around sk->sk_prot
Posted by Kazunori Kobayashi 2 years, 8 months ago
From: Eric Dumazet <edumazet@google.com>

commit 086d49058cd8471046ae9927524708820f5fd1c7 upstream.

IPv6 has this hack changing sk->sk_prot when an IPv6 socket
is 'converted' to an IPv4 one with IPV6_ADDRFORM option.

This operation is only performed for TCP and UDP, knowing
their 'struct proto' for the two network families are populated
in the same way, and can not disappear while a reader
might use and dereference sk->sk_prot.

If we think about it all reads of sk->sk_prot while
either socket lock or RTNL is not acquired should be using READ_ONCE().

Also note that other layers like MPTCP, XFRM, CHELSIO_TLS also
write over sk->sk_prot.

BUG: KCSAN: data-race in inet6_recvmsg / ipv6_setsockopt

write to 0xffff8881386f7aa8 of 8 bytes by task 26932 on cpu 0:
 do_ipv6_setsockopt net/ipv6/ipv6_sockglue.c:492 [inline]
 ipv6_setsockopt+0x3758/0x3910 net/ipv6/ipv6_sockglue.c:1019
 udpv6_setsockopt+0x85/0x90 net/ipv6/udp.c:1649
 sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3489
 __sys_setsockopt+0x209/0x2a0 net/socket.c:2180
 __do_sys_setsockopt net/socket.c:2191 [inline]
 __se_sys_setsockopt net/socket.c:2188 [inline]
 __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff8881386f7aa8 of 8 bytes by task 26911 on cpu 1:
 inet6_recvmsg+0x7a/0x210 net/ipv6/af_inet6.c:659
 ____sys_recvmsg+0x16c/0x320
 ___sys_recvmsg net/socket.c:2674 [inline]
 do_recvmmsg+0x3f5/0xae0 net/socket.c:2768
 __sys_recvmmsg net/socket.c:2847 [inline]
 __do_sys_recvmmsg net/socket.c:2870 [inline]
 __se_sys_recvmmsg net/socket.c:2863 [inline]
 __x64_sys_recvmmsg+0xde/0x160 net/socket.c:2863
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0xffffffff85e0e980 -> 0xffffffff85e01580

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 26911 Comm: syz-executor.3 Not tainted 5.17.0-rc2-syzkaller-00316-g0457e5153e0e-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Kazunori Kobayashi <kazunori.kobayashi@miraclelinux.com>
---
 net/ipv6/af_inet6.c      | 24 ++++++++++++++++++------
 net/ipv6/ipv6_sockglue.c |  6 ++++--
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8e0c33b683010..bbfa5d3a16f01 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -452,11 +452,14 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct sock *sk = sock->sk;
 	u32 flags = BIND_WITH_LOCK;
+	const struct proto *prot;
 	int err = 0;
 
+	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
+	prot = READ_ONCE(sk->sk_prot);
 	/* If the socket has its own bind function then use it. */
-	if (sk->sk_prot->bind)
-		return sk->sk_prot->bind(sk, uaddr, addr_len);
+	if (prot->bind)
+		return prot->bind(sk, uaddr, addr_len);
 
 	if (addr_len < SIN6_LEN_RFC2133)
 		return -EINVAL;
@@ -572,6 +575,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	void __user *argp = (void __user *)arg;
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
+	const struct proto *prot;
 
 	switch (cmd) {
 	case SIOCADDRT:
@@ -589,9 +593,11 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case SIOCSIFDSTADDR:
 		return addrconf_set_dstaddr(net, argp);
 	default:
-		if (!sk->sk_prot->ioctl)
+		/* IPV6_ADDRFORM can change sk->sk_prot under us. */
+		prot = READ_ONCE(sk->sk_prot);
+		if (!prot->ioctl)
 			return -ENOIOCTLCMD;
-		return sk->sk_prot->ioctl(sk, cmd, arg);
+		return prot->ioctl(sk, cmd, arg);
 	}
 	/*NOTREACHED*/
 	return 0;
@@ -653,11 +659,14 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *,
 int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct sock *sk = sock->sk;
+	const struct proto *prot;
 
 	if (unlikely(inet_send_prepare(sk)))
 		return -EAGAIN;
 
-	return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udpv6_sendmsg,
+	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
+	prot = READ_ONCE(sk->sk_prot);
+	return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udpv6_sendmsg,
 			       sk, msg, size);
 }
 
@@ -667,13 +676,16 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 		  int flags)
 {
 	struct sock *sk = sock->sk;
+	const struct proto *prot;
 	int addr_len = 0;
 	int err;
 
 	if (likely(!(flags & MSG_ERRQUEUE)))
 		sock_rps_record_flow(sk);
 
-	err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
+	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
+	prot = READ_ONCE(sk->sk_prot);
+	err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
 			      sk, msg, size, flags & MSG_DONTWAIT,
 			      flags & ~MSG_DONTWAIT, &addr_len);
 	if (err >= 0)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 197e12d5607f1..485d381aa4475 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -475,7 +475,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 				sock_prot_inuse_add(net, sk->sk_prot, -1);
 				sock_prot_inuse_add(net, &tcp_prot, 1);
 				local_bh_enable();
-				sk->sk_prot = &tcp_prot;
+				/* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */
+				WRITE_ONCE(sk->sk_prot, &tcp_prot);
 				icsk->icsk_af_ops = &ipv4_specific;
 				sk->sk_socket->ops = &inet_stream_ops;
 				sk->sk_family = PF_INET;
@@ -489,7 +490,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 				sock_prot_inuse_add(net, sk->sk_prot, -1);
 				sock_prot_inuse_add(net, prot, 1);
 				local_bh_enable();
-				sk->sk_prot = prot;
+				/* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */
+				WRITE_ONCE(sk->sk_prot, prot);
 				sk->sk_socket->ops = &inet_dgram_ops;
 				sk->sk_family = PF_INET;
 			}
-- 
2.39.2
Re: [PATCH 5.15 1/3] ipv6: annotate some data-races around sk->sk_prot
Posted by Greg KH 1 year, 5 months ago
On Mon, Apr 17, 2023 at 04:53:46PM +0000, Kazunori Kobayashi wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> commit 086d49058cd8471046ae9927524708820f5fd1c7 upstream.
> 
> IPv6 has this hack changing sk->sk_prot when an IPv6 socket
> is 'converted' to an IPv4 one with IPV6_ADDRFORM option.
> 
> This operation is only performed for TCP and UDP, knowing
> their 'struct proto' for the two network families are populated
> in the same way, and can not disappear while a reader
> might use and dereference sk->sk_prot.
> 
> If we think about it all reads of sk->sk_prot while
> either socket lock or RTNL is not acquired should be using READ_ONCE().
> 
> Also note that other layers like MPTCP, XFRM, CHELSIO_TLS also
> write over sk->sk_prot.
> 
> BUG: KCSAN: data-race in inet6_recvmsg / ipv6_setsockopt
> 
> write to 0xffff8881386f7aa8 of 8 bytes by task 26932 on cpu 0:
>  do_ipv6_setsockopt net/ipv6/ipv6_sockglue.c:492 [inline]
>  ipv6_setsockopt+0x3758/0x3910 net/ipv6/ipv6_sockglue.c:1019
>  udpv6_setsockopt+0x85/0x90 net/ipv6/udp.c:1649
>  sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3489
>  __sys_setsockopt+0x209/0x2a0 net/socket.c:2180
>  __do_sys_setsockopt net/socket.c:2191 [inline]
>  __se_sys_setsockopt net/socket.c:2188 [inline]
>  __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188
>  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
>  do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
>  entry_SYSCALL_64_after_hwframe+0x44/0xae
> 
> read to 0xffff8881386f7aa8 of 8 bytes by task 26911 on cpu 1:
>  inet6_recvmsg+0x7a/0x210 net/ipv6/af_inet6.c:659
>  ____sys_recvmsg+0x16c/0x320
>  ___sys_recvmsg net/socket.c:2674 [inline]
>  do_recvmmsg+0x3f5/0xae0 net/socket.c:2768
>  __sys_recvmmsg net/socket.c:2847 [inline]
>  __do_sys_recvmmsg net/socket.c:2870 [inline]
>  __se_sys_recvmmsg net/socket.c:2863 [inline]
>  __x64_sys_recvmmsg+0xde/0x160 net/socket.c:2863
>  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
>  do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
>  entry_SYSCALL_64_after_hwframe+0x44/0xae
> 
> value changed: 0xffffffff85e0e980 -> 0xffffffff85e01580
> 
> Reported by Kernel Concurrency Sanitizer on:
> CPU: 1 PID: 26911 Comm: syz-executor.3 Not tainted 5.17.0-rc2-syzkaller-00316-g0457e5153e0e-dirty #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
> 
> Reported-by: syzbot <syzkaller@googlegroups.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> Signed-off-by: Kazunori Kobayashi <kazunori.kobayashi@miraclelinux.com>

This backport didn't apply at all, are you sure you made it against the
proper tree?

The original commit does seem to apply properly, so I'll go apply that
one instead...

greg k-h
Re: [PATCH 5.15 1/3] ipv6: annotate some data-races around sk->sk_prot
Posted by Kazunori Kobayashi 1 year, 5 months ago
On 2024/07/02 18:42, Greg KH wrote:
> On Mon, Apr 17, 2023 at 04:53:46PM +0000, Kazunori Kobayashi wrote:
>> From: Eric Dumazet <edumazet@google.com>
>>
>> commit 086d49058cd8471046ae9927524708820f5fd1c7 upstream.
>>
>> IPv6 has this hack changing sk->sk_prot when an IPv6 socket
>> is 'converted' to an IPv4 one with IPV6_ADDRFORM option.
>>
>> This operation is only performed for TCP and UDP, knowing
>> their 'struct proto' for the two network families are populated
>> in the same way, and can not disappear while a reader
>> might use and dereference sk->sk_prot.
>>
>> If we think about it all reads of sk->sk_prot while
>> either socket lock or RTNL is not acquired should be using READ_ONCE().
>>
>> Also note that other layers like MPTCP, XFRM, CHELSIO_TLS also
>> write over sk->sk_prot.
>>
>> BUG: KCSAN: data-race in inet6_recvmsg / ipv6_setsockopt
>>
>> write to 0xffff8881386f7aa8 of 8 bytes by task 26932 on cpu 0:
>>   do_ipv6_setsockopt net/ipv6/ipv6_sockglue.c:492 [inline]
>>   ipv6_setsockopt+0x3758/0x3910 net/ipv6/ipv6_sockglue.c:1019
>>   udpv6_setsockopt+0x85/0x90 net/ipv6/udp.c:1649
>>   sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3489
>>   __sys_setsockopt+0x209/0x2a0 net/socket.c:2180
>>   __do_sys_setsockopt net/socket.c:2191 [inline]
>>   __se_sys_setsockopt net/socket.c:2188 [inline]
>>   __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188
>>   do_syscall_x64 arch/x86/entry/common.c:50 [inline]
>>   do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
>>   entry_SYSCALL_64_after_hwframe+0x44/0xae
>>
>> read to 0xffff8881386f7aa8 of 8 bytes by task 26911 on cpu 1:
>>   inet6_recvmsg+0x7a/0x210 net/ipv6/af_inet6.c:659
>>   ____sys_recvmsg+0x16c/0x320
>>   ___sys_recvmsg net/socket.c:2674 [inline]
>>   do_recvmmsg+0x3f5/0xae0 net/socket.c:2768
>>   __sys_recvmmsg net/socket.c:2847 [inline]
>>   __do_sys_recvmmsg net/socket.c:2870 [inline]
>>   __se_sys_recvmmsg net/socket.c:2863 [inline]
>>   __x64_sys_recvmmsg+0xde/0x160 net/socket.c:2863
>>   do_syscall_x64 arch/x86/entry/common.c:50 [inline]
>>   do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
>>   entry_SYSCALL_64_after_hwframe+0x44/0xae
>>
>> value changed: 0xffffffff85e0e980 -> 0xffffffff85e01580
>>
>> Reported by Kernel Concurrency Sanitizer on:
>> CPU: 1 PID: 26911 Comm: syz-executor.3 Not tainted 5.17.0-rc2-syzkaller-00316-g0457e5153e0e-dirty #0
>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
>>
>> Reported-by: syzbot <syzkaller@googlegroups.com>
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> Signed-off-by: David S. Miller <davem@davemloft.net>
>> Signed-off-by: Kazunori Kobayashi <kazunori.kobayashi@miraclelinux.com>
> This backport didn't apply at all, are you sure you made it against the
> proper tree?
>
> The original commit does seem to apply properly, so I'll go apply that
> one instead...
>
> greg k-h

I assumed the following commit is the latest version for 5.15 stable and 
based the patch on this.
Is there any difference from your expectation?

commit 4878aadf2d1519f3731ae300ce1fef78fc63ee30 (tag: v5.15.161, 
origin/linux-5.15.y, li
nux-5.15.y)
Author: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date:   Sun Jun 16 13:40:01 2024 +0200

     Linux 5.15.161


Regards,

Kazunori

-- 
Kazunori Kobayashi
Cybertrust Japan Co., Ltd.
https://www.cybertrust.co.jp/