[PATCH v2] bluetooth/l2cap: sync sock recv cb and release

Edward Adam Davis posted 1 patch 1 year, 6 months ago
net/bluetooth/l2cap_sock.c | 25 ++++++++++++++++++++++---
1 file changed, 22 insertions(+), 3 deletions(-)
[PATCH v2] bluetooth/l2cap: sync sock recv cb and release
Posted by Edward Adam Davis 1 year, 6 months ago
The problem occurs between the system call to close the sock and hci_rx_work,
where the former releases the sock and the latter accesses it without lock protection.

           CPU0                       CPU1
           ----                       ----
           sock_close                 hci_rx_work
	   l2cap_sock_release         hci_acldata_packet
	   l2cap_sock_kill            l2cap_recv_frame
	   sk_free                    l2cap_conless_channel
	                              l2cap_sock_recv_cb

If hci_rx_work processes the data that needs to be received before the sock is
closed, then everything is normal; Otherwise, the work thread may access the
released sock when receiving data.

Add a chan mutex in the rx callback of the sock to achieve synchronization between
the sock release and recv cb.

Sock is dead, so set chan data to NULL, avoid others use invalid sock pointer.

Reported-and-tested-by: syzbot+b7f6f8c9303466e16c8a@syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <eadavis@qq.com>
---
 net/bluetooth/l2cap_sock.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 6db60946c627..f45cdf9bc985 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1239,6 +1239,10 @@ static void l2cap_sock_kill(struct sock *sk)
 
 	BT_DBG("sk %p state %s", sk, state_to_string(sk->sk_state));
 
+	/* Sock is dead, so set chan data to NULL, avoid other task use invalid
+	 * sock pointer.
+	 */
+	l2cap_pi(sk)->chan->data = NULL;
 	/* Kill poor orphan */
 
 	l2cap_chan_put(l2cap_pi(sk)->chan);
@@ -1481,12 +1485,25 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
 
 static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
 {
-	struct sock *sk = chan->data;
-	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct sock *sk;
+	struct l2cap_pinfo *pi;
 	int err;
 
-	lock_sock(sk);
+	/* To avoid race with sock_release, a chan lock needs to be added here
+	 * to synchronize the sock.
+	 */
+	l2cap_chan_hold(chan);
+	l2cap_chan_lock(chan);
+	sk = chan->data;
 
+	if (!sk) {
+		l2cap_chan_unlock(chan);
+		l2cap_chan_put(chan);
+		return -ENXIO;
+	}
+
+	pi = l2cap_pi(sk);
+	lock_sock(sk);
 	if (chan->mode == L2CAP_MODE_ERTM && !list_empty(&pi->rx_busy)) {
 		err = -ENOMEM;
 		goto done;
@@ -1535,6 +1552,8 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
 
 done:
 	release_sock(sk);
+	l2cap_chan_unlock(chan);
+	l2cap_chan_put(chan);
 
 	return err;
 }
-- 
2.43.0
Re: [PATCH v2] bluetooth/l2cap: sync sock recv cb and release
Posted by Luiz Augusto von Dentz 1 year, 5 months ago
Hi Edward,

On Fri, Jun 14, 2024 at 9:46 PM Edward Adam Davis <eadavis@qq.com> wrote:
>
> The problem occurs between the system call to close the sock and hci_rx_work,
> where the former releases the sock and the latter accesses it without lock protection.
>
>            CPU0                       CPU1
>            ----                       ----
>            sock_close                 hci_rx_work
>            l2cap_sock_release         hci_acldata_packet
>            l2cap_sock_kill            l2cap_recv_frame
>            sk_free                    l2cap_conless_channel
>                                       l2cap_sock_recv_cb
>
> If hci_rx_work processes the data that needs to be received before the sock is
> closed, then everything is normal; Otherwise, the work thread may access the
> released sock when receiving data.
>
> Add a chan mutex in the rx callback of the sock to achieve synchronization between
> the sock release and recv cb.
>
> Sock is dead, so set chan data to NULL, avoid others use invalid sock pointer.
>
> Reported-and-tested-by: syzbot+b7f6f8c9303466e16c8a@syzkaller.appspotmail.com
> Signed-off-by: Edward Adam Davis <eadavis@qq.com>
> ---
>  net/bluetooth/l2cap_sock.c | 25 ++++++++++++++++++++++---
>  1 file changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
> index 6db60946c627..f45cdf9bc985 100644
> --- a/net/bluetooth/l2cap_sock.c
> +++ b/net/bluetooth/l2cap_sock.c
> @@ -1239,6 +1239,10 @@ static void l2cap_sock_kill(struct sock *sk)
>
>         BT_DBG("sk %p state %s", sk, state_to_string(sk->sk_state));
>
> +       /* Sock is dead, so set chan data to NULL, avoid other task use invalid
> +        * sock pointer.
> +        */
> +       l2cap_pi(sk)->chan->data = NULL;
>         /* Kill poor orphan */
>
>         l2cap_chan_put(l2cap_pi(sk)->chan);
> @@ -1481,12 +1485,25 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
>
>  static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
>  {
> -       struct sock *sk = chan->data;
> -       struct l2cap_pinfo *pi = l2cap_pi(sk);
> +       struct sock *sk;
> +       struct l2cap_pinfo *pi;
>         int err;
>
> -       lock_sock(sk);
> +       /* To avoid race with sock_release, a chan lock needs to be added here
> +        * to synchronize the sock.
> +        */
> +       l2cap_chan_hold(chan);
> +       l2cap_chan_lock(chan);
> +       sk = chan->data;
>
> +       if (!sk) {
> +               l2cap_chan_unlock(chan);
> +               l2cap_chan_put(chan);
> +               return -ENXIO;
> +       }
> +
> +       pi = l2cap_pi(sk);
> +       lock_sock(sk);
>         if (chan->mode == L2CAP_MODE_ERTM && !list_empty(&pi->rx_busy)) {
>                 err = -ENOMEM;
>                 goto done;
> @@ -1535,6 +1552,8 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
>
>  done:
>         release_sock(sk);
> +       l2cap_chan_unlock(chan);
> +       l2cap_chan_put(chan);
>
>         return err;
>  }
> --
> 2.43.0

Looks like this was never really tested properly:

============================================
WARNING: possible recursive locking detected
6.10.0-rc3-g4029dba6b6f1 #6823 Not tainted
--------------------------------------------
kworker/u5:0/35 is trying to acquire lock:
ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
l2cap_sock_recv_cb+0x44/0x1e0

but task is already holding lock:
ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
l2cap_get_chan_by_scid+0xaf/0xd0

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&chan->lock#2/1);
  lock(&chan->lock#2/1);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

3 locks held by kworker/u5:0/35:
 #0: ffff888002b8a940 ((wq_completion)hci0#2){+.+.}-{0:0}, at:
process_one_work+0x750/0x930
 #1: ffff888002c67dd0 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0},
at: process_one_work+0x44e/0x930
 #2: ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
l2cap_get_chan_by_scid+0xaf/0xd0

l2cap_sock_recv_cb is assumed to be called with the chan_lock held so
perhaps we can just do:

       sk = chan->data;
       if (!sk)
               return -ENXIO;

-- 
Luiz Augusto von Dentz
Re: [PATCH] bluetooth/l2cap: sync sock recv cb and release
Posted by Edward Adam Davis 1 year, 5 months ago
Hi Luiz Augusto von Dentz,

On Thu, 20 Jun 2024 12:53:19 -0400, Luiz Augusto von Dentz wrote:
> >         release_sock(sk);
> > +       l2cap_chan_unlock(chan);
> > +       l2cap_chan_put(chan);
> >
> >         return err;
> >  }
> > --
> > 2.43.0
> 
> Looks like this was never really tested properly:
> 
> ============================================
> WARNING: possible recursive locking detected
> 6.10.0-rc3-g4029dba6b6f1 #6823 Not tainted
> --------------------------------------------
> kworker/u5:0/35 is trying to acquire lock:
> ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> l2cap_sock_recv_cb+0x44/0x1e0
> 
> but task is already holding lock:
> ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> l2cap_get_chan_by_scid+0xaf/0xd0
> 
> other info that might help us debug this:
>  Possible unsafe locking scenario:
> 
>        CPU0
>        ----
>   lock(&chan->lock#2/1);
>   lock(&chan->lock#2/1);
> 
>  *** DEADLOCK ***
> 
>  May be due to missing lock nesting notation
> 
> 3 locks held by kworker/u5:0/35:
>  #0: ffff888002b8a940 ((wq_completion)hci0#2){+.+.}-{0:0}, at:
> process_one_work+0x750/0x930
>  #1: ffff888002c67dd0 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0},
> at: process_one_work+0x44e/0x930
>  #2: ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> l2cap_get_chan_by_scid+0xaf/0xd0
> 
> l2cap_sock_recv_cb is assumed to be called with the chan_lock held so
> perhaps we can just do:
> 
>        sk = chan->data;
>        if (!sk)
>                return -ENXIO;

If the release occurs after this judgment, the same problem will still occur. 
Recv and release must be synchronized using locks, which can be solved by
adding new lock.

Please use the new patch https://syzkaller.appspot.com/text?tag=Patch&x=15d2c48e980000, I have tested in 
https://syzkaller.appspot.com/bug?extid=b7f6f8c9303466e16c8a

--
Edward
Re: [PATCH] bluetooth/l2cap: sync sock recv cb and release
Posted by Luiz Augusto von Dentz 1 year, 5 months ago
Hi Edward,

On Fri, Jun 21, 2024 at 11:46 PM Edward Adam Davis <eadavis@qq.com> wrote:
>
> Hi Luiz Augusto von Dentz,
>
> On Thu, 20 Jun 2024 12:53:19 -0400, Luiz Augusto von Dentz wrote:
> > >         release_sock(sk);
> > > +       l2cap_chan_unlock(chan);
> > > +       l2cap_chan_put(chan);
> > >
> > >         return err;
> > >  }
> > > --
> > > 2.43.0
> >
> > Looks like this was never really tested properly:
> >
> > ============================================
> > WARNING: possible recursive locking detected
> > 6.10.0-rc3-g4029dba6b6f1 #6823 Not tainted
> > --------------------------------------------
> > kworker/u5:0/35 is trying to acquire lock:
> > ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> > l2cap_sock_recv_cb+0x44/0x1e0
> >
> > but task is already holding lock:
> > ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> > l2cap_get_chan_by_scid+0xaf/0xd0
> >
> > other info that might help us debug this:
> >  Possible unsafe locking scenario:
> >
> >        CPU0
> >        ----
> >   lock(&chan->lock#2/1);
> >   lock(&chan->lock#2/1);
> >
> >  *** DEADLOCK ***
> >
> >  May be due to missing lock nesting notation
> >
> > 3 locks held by kworker/u5:0/35:
> >  #0: ffff888002b8a940 ((wq_completion)hci0#2){+.+.}-{0:0}, at:
> > process_one_work+0x750/0x930
> >  #1: ffff888002c67dd0 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0},
> > at: process_one_work+0x44e/0x930
> >  #2: ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> > l2cap_get_chan_by_scid+0xaf/0xd0
> >
> > l2cap_sock_recv_cb is assumed to be called with the chan_lock held so
> > perhaps we can just do:
> >
> >        sk = chan->data;
> >        if (!sk)
> >                return -ENXIO;
>
> If the release occurs after this judgment, the same problem will still occur.
> Recv and release must be synchronized using locks, which can be solved by
> adding new lock.
>
> Please use the new patch https://syzkaller.appspot.com/text?tag=Patch&x=15d2c48e980000, I have tested in
> https://syzkaller.appspot.com/bug?extid=b7f6f8c9303466e16c8a

Hmm, why don't we just fix l2cap_conless_channel? Btw,
l2cap_conless_channel is normally not used by any profiles thus why
there isn't any CI covering it, on the other hand l2cap_data_channel
is used by 99% of the profiles.

> --
> Edward
>


-- 
Luiz Augusto von Dentz
Re: [PATCH] bluetooth/l2cap: sync sock recv cb and release
Posted by Edward Adam Davis 1 year, 5 months ago
Hi Luiz Augusto von Dentz,

On Mon, 24 Jun 2024 09:36:14 -0400, Luiz Augusto von Dentz wrote:
> > > Looks like this was never really tested properly:
> > >
> > > ============================================
> > > WARNING: possible recursive locking detected
> > > 6.10.0-rc3-g4029dba6b6f1 #6823 Not tainted
> > > --------------------------------------------
> > > kworker/u5:0/35 is trying to acquire lock:
> > > ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> > > l2cap_sock_recv_cb+0x44/0x1e0
> > >
> > > but task is already holding lock:
> > > ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> > > l2cap_get_chan_by_scid+0xaf/0xd0
> > >
> > > other info that might help us debug this:
> > >  Possible unsafe locking scenario:
> > >
> > >        CPU0
> > >        ----
> > >   lock(&chan->lock#2/1);
> > >   lock(&chan->lock#2/1);
> > >
> > >  *** DEADLOCK ***
> > >
> > >  May be due to missing lock nesting notation
> > >
> > > 3 locks held by kworker/u5:0/35:
> > >  #0: ffff888002b8a940 ((wq_completion)hci0#2){+.+.}-{0:0}, at:
> > > process_one_work+0x750/0x930
> > >  #1: ffff888002c67dd0 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0},
> > > at: process_one_work+0x44e/0x930
> > >  #2: ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> > > l2cap_get_chan_by_scid+0xaf/0xd0
> > >
> > > l2cap_sock_recv_cb is assumed to be called with the chan_lock held so
> > > perhaps we can just do:
> > >
> > >        sk = chan->data;
> > >        if (!sk)
> > >                return -ENXIO;
> >
> > If the release occurs after this judgment, the same problem will still occur.
> > Recv and release must be synchronized using locks, which can be solved by
> > adding new lock.
> >
> > Please use the new patch https://syzkaller.appspot.com/text?tag=Patch&x=15d2c48e980000, I have tested in
> > https://syzkaller.appspot.com/bug?extid=b7f6f8c9303466e16c8a
> 
> Hmm, why don't we just fix l2cap_conless_channel? Btw,
> l2cap_conless_channel is normally not used by any profiles thus why
> there isn't any CI covering it, on the other hand l2cap_data_channel
> is used by 99% of the profiles.
Yes, we can fix l2cap_conless_channel, but key point is that "chan->lock"
cannot be used to synchronize l2cap_conless_channel and l2cap_sock_release,
otherwise it will form an AA lock with l2cap_data_channel.

Why not fix it in l2cap_conless_channel but in l2cap_sock_recv_cb, because
l2cap_sock_recv_cb is on the same layer as l2cap_sock_kill, using a new 
lock for synchronization is more appropriate.
--
Edward
Re: [PATCH] bluetooth/l2cap: sync sock recv cb and release
Posted by Luiz Augusto von Dentz 1 year, 5 months ago
Hi Edward,

On Mon, Jun 24, 2024 at 8:53 PM Edward Adam Davis <eadavis@qq.com> wrote:
>
> Hi Luiz Augusto von Dentz,
>
> On Mon, 24 Jun 2024 09:36:14 -0400, Luiz Augusto von Dentz wrote:
> > > > Looks like this was never really tested properly:
> > > >
> > > > ============================================
> > > > WARNING: possible recursive locking detected
> > > > 6.10.0-rc3-g4029dba6b6f1 #6823 Not tainted
> > > > --------------------------------------------
> > > > kworker/u5:0/35 is trying to acquire lock:
> > > > ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> > > > l2cap_sock_recv_cb+0x44/0x1e0
> > > >
> > > > but task is already holding lock:
> > > > ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> > > > l2cap_get_chan_by_scid+0xaf/0xd0
> > > >
> > > > other info that might help us debug this:
> > > >  Possible unsafe locking scenario:
> > > >
> > > >        CPU0
> > > >        ----
> > > >   lock(&chan->lock#2/1);
> > > >   lock(&chan->lock#2/1);
> > > >
> > > >  *** DEADLOCK ***
> > > >
> > > >  May be due to missing lock nesting notation
> > > >
> > > > 3 locks held by kworker/u5:0/35:
> > > >  #0: ffff888002b8a940 ((wq_completion)hci0#2){+.+.}-{0:0}, at:
> > > > process_one_work+0x750/0x930
> > > >  #1: ffff888002c67dd0 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0},
> > > > at: process_one_work+0x44e/0x930
> > > >  #2: ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> > > > l2cap_get_chan_by_scid+0xaf/0xd0
> > > >
> > > > l2cap_sock_recv_cb is assumed to be called with the chan_lock held so
> > > > perhaps we can just do:
> > > >
> > > >        sk = chan->data;
> > > >        if (!sk)
> > > >                return -ENXIO;
> > >
> > > If the release occurs after this judgment, the same problem will still occur.
> > > Recv and release must be synchronized using locks, which can be solved by
> > > adding new lock.
> > >
> > > Please use the new patch https://syzkaller.appspot.com/text?tag=Patch&x=15d2c48e980000, I have tested in
> > > https://syzkaller.appspot.com/bug?extid=b7f6f8c9303466e16c8a
> >
> > Hmm, why don't we just fix l2cap_conless_channel? Btw,
> > l2cap_conless_channel is normally not used by any profiles thus why
> > there isn't any CI covering it, on the other hand l2cap_data_channel
> > is used by 99% of the profiles.
> Yes, we can fix l2cap_conless_channel, but key point is that "chan->lock"
> cannot be used to synchronize l2cap_conless_channel and l2cap_sock_release,
> otherwise it will form an AA lock with l2cap_data_channel.

Don't follow, what l2cap_conless_channel has to do with
l2cap_data_channel? You can't have the same channel calling both, it
is either connectionless or it is not.

> Why not fix it in l2cap_conless_channel but in l2cap_sock_recv_cb, because
> l2cap_sock_recv_cb is on the same layer as l2cap_sock_kill, using a new
> lock for synchronization is more appropriate.

There are dedicated locks for both layers and now that we are doing
chan->lock for before chan->recv the locking sequence should be the
same in all instances.

> --
> Edward
>


-- 
Luiz Augusto von Dentz
[PATCH V3] Bluetooth/l2cap: sync sock recv cb and release
Posted by Edward Adam Davis 1 year, 5 months ago
The problem occurs between the system call to close the sock and hci_rx_work,
where the former releases the sock and the latter accesses it without lock protection.

           CPU0                       CPU1
           ----                       ----
           sock_close                 hci_rx_work
	   l2cap_sock_release         hci_acldata_packet
	   l2cap_sock_kill            l2cap_recv_frame
	   sk_free                    l2cap_conless_channel
	                              l2cap_sock_recv_cb

If hci_rx_work processes the data that needs to be received before the sock is
closed, then everything is normal; Otherwise, the work thread may access the
released sock when receiving data.

Add a chan lock in the l2cap_conless_channel of the sock to achieve sync
between the sock release and recv cb.

Sock is dead, so set chan data to NULL, avoid others use invalid sock pointer.

Reported-by: syzbot+b7f6f8c9303466e16c8a@syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <eadavis@qq.com>
---
 net/bluetooth/l2cap_core.c |  3 +++
 net/bluetooth/l2cap_sock.c | 13 +++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index aed025734d04..35a9534eb62d 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -6771,10 +6771,13 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
 	bacpy(&bt_cb(skb)->l2cap.bdaddr, &hcon->dst);
 	bt_cb(skb)->l2cap.psm = psm;
 
+	l2cap_chan_lock(chan);
 	if (!chan->ops->recv(chan, skb)) {
+		l2cap_chan_unlock(chan);
 		l2cap_chan_put(chan);
 		return;
 	}
+	l2cap_chan_unlock(chan);
 
 drop:
 	l2cap_chan_put(chan);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 6db60946c627..25091fb992a7 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1239,6 +1239,10 @@ static void l2cap_sock_kill(struct sock *sk)
 
 	BT_DBG("sk %p state %s", sk, state_to_string(sk->sk_state));
 
+	/* Sock is dead, so set chan data to NULL, avoid other task use invalid
+	 * sock pointer.
+	 */
+	l2cap_pi(sk)->chan->data = NULL;
 	/* Kill poor orphan */
 
 	l2cap_chan_put(l2cap_pi(sk)->chan);
@@ -1481,11 +1485,16 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
 
 static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
 {
-	struct sock *sk = chan->data;
-	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct sock *sk;
+	struct l2cap_pinfo *pi;
 	int err;
 
+	if (!chan->data)
+		return -ENXIO;
+
+	sk = chan->data;
 	lock_sock(sk);
+	pi = l2cap_pi(sk);
 
 	if (chan->mode == L2CAP_MODE_ERTM && !list_empty(&pi->rx_busy)) {
 		err = -ENOMEM;
-- 
2.43.0
Re: [PATCH] bluetooth/l2cap: sync sock recv cb and release
Posted by Edward Adam Davis 1 year, 5 months ago
Hi Luiz Augusto von Dentz,

On Thu, 20 Jun 2024 12:53:19 -0400, Luiz Augusto von Dentz wrote:
> >         release_sock(sk);
> > +       l2cap_chan_unlock(chan);
> > +       l2cap_chan_put(chan);
> >
> >         return err;
> >  }
> > --
> > 2.43.0
> 
> Looks like this was never really tested properly:
> 
> ============================================
> WARNING: possible recursive locking detected
> 6.10.0-rc3-g4029dba6b6f1 #6823 Not tainted
> --------------------------------------------
> kworker/u5:0/35 is trying to acquire lock:
> ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> l2cap_sock_recv_cb+0x44/0x1e0
> 
> but task is already holding lock:
> ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> l2cap_get_chan_by_scid+0xaf/0xd0
> 
> other info that might help us debug this:
>  Possible unsafe locking scenario:
> 
>        CPU0
>        ----
>   lock(&chan->lock#2/1);
>   lock(&chan->lock#2/1);
> 
>  *** DEADLOCK ***
> 
>  May be due to missing lock nesting notation
> 
> 3 locks held by kworker/u5:0/35:
>  #0: ffff888002b8a940 ((wq_completion)hci0#2){+.+.}-{0:0}, at:
> process_one_work+0x750/0x930
>  #1: ffff888002c67dd0 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0},
> at: process_one_work+0x44e/0x930
>  #2: ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
> l2cap_get_chan_by_scid+0xaf/0xd0
> 
> l2cap_sock_recv_cb is assumed to be called with the chan_lock held so
> perhaps we can just do:
> 
>        sk = chan->data;
>        if (!sk)
>                return -ENXIO;

If the release occurs after this judgment, the same problem will still occur. 
Recv and release must be synchronized using locks, which can be solved by
adding new lock.

Can you provide a reproduction program for the AA lock mentioned above?

--
Edward