[PATCH net v2] net: netrom: fix lock order inversion in nr_add_node, nr_del_node and nr_dec_obs

Mashiro Chen posted 1 patch 2 months, 1 week ago
net/netrom/nr_route.c | 45 ++++++++++++++++++++++++-------------------
1 file changed, 25 insertions(+), 20 deletions(-)
[PATCH net v2] net: netrom: fix lock order inversion in nr_add_node, nr_del_node and nr_dec_obs
Posted by Mashiro Chen 2 months, 1 week ago
nr_del_node() and nr_dec_obs() acquire nr_node_list_lock first, then
call nr_remove_neigh() which internally acquires nr_neigh_list_lock.
nr_add_node() acquires node_lock first, then calls nr_remove_neigh()
which acquires nr_neigh_list_lock.

Both are the reverse of the lock order used in nr_rt_device_down() and
nr_rt_free(), which acquire nr_neigh_list_lock before nr_node_list_lock
and node_lock.

The resulting lock order inversions can cause an ABBA deadlock when
concurrently executing:
  - SIOCDELRT or SIOCNRDECOBS ioctl (requires CAP_NET_ADMIN)
  - bringing down a NET/ROM-attached network device

Fix by acquiring nr_neigh_list_lock before nr_node_list_lock and
node_lock in all three functions, following the canonical lock order,
and replacing the internal-locking nr_remove_neigh() with
nr_remove_neigh_locked() which assumes the caller already holds
nr_neigh_list_lock.

Fixes: e03e7f20ebf7 ("netrom: fix possible dead-lock in nr_rt_ioctl()")
Reported-by: syzbot+6eb7834837cf6a8db75b@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=6eb7834837cf6a8db75b
Signed-off-by: Mashiro Chen <mashiro.chen@mailbox.org>
---
Changes in v2:
  - Move __nr_remove_neigh() and nr_remove_neigh_locked() macro definition
    before nr_add_node() to fix implicit function declaration build error

 net/netrom/nr_route.c | 45 ++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 9cc29ae85b06f..c3cceee5a2284 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -75,7 +75,21 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign,
 	return found;
 }
 
-static void nr_remove_neigh(struct nr_neigh *);
+static inline void __nr_remove_neigh(struct nr_neigh *nr_neigh)
+{
+	hlist_del_init(&nr_neigh->neigh_node);
+	nr_neigh_put(nr_neigh);
+}
+
+#define nr_remove_neigh_locked(__neigh) \
+	__nr_remove_neigh(__neigh)
+
+static void nr_remove_neigh(struct nr_neigh *nr_neigh)
+{
+	spin_lock_bh(&nr_neigh_list_lock);
+	__nr_remove_neigh(nr_neigh);
+	spin_unlock_bh(&nr_neigh_list_lock);
+}
 
 /*      re-sort the routes in quality order.    */
 static void re_sort_routes(struct nr_node *nr_node, int x, int y)
@@ -211,6 +225,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
 		nr_neigh_put(nr_neigh);
 		return 0;
 	}
+	spin_lock_bh(&nr_neigh_list_lock);
 	nr_node_lock(nr_node);
 
 	if (quality != 0)
@@ -246,7 +261,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
 				nr_neigh_put(nr_node->routes[2].neighbour);
 
 				if (nr_node->routes[2].neighbour->count == 0 && !nr_node->routes[2].neighbour->locked)
-					nr_remove_neigh(nr_node->routes[2].neighbour);
+					nr_remove_neigh_locked(nr_node->routes[2].neighbour);
 
 				nr_node->routes[2].quality   = quality;
 				nr_node->routes[2].obs_count = obs_count;
@@ -281,6 +296,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
 
 	nr_neigh_put(nr_neigh);
 	nr_node_unlock(nr_node);
+	spin_unlock_bh(&nr_neigh_list_lock);
 	nr_node_put(nr_node);
 	return 0;
 }
@@ -293,22 +309,6 @@ static void nr_remove_node_locked(struct nr_node *nr_node)
 	nr_node_put(nr_node);
 }
 
-static inline void __nr_remove_neigh(struct nr_neigh *nr_neigh)
-{
-	hlist_del_init(&nr_neigh->neigh_node);
-	nr_neigh_put(nr_neigh);
-}
-
-#define nr_remove_neigh_locked(__neigh) \
-	__nr_remove_neigh(__neigh)
-
-static void nr_remove_neigh(struct nr_neigh *nr_neigh)
-{
-	spin_lock_bh(&nr_neigh_list_lock);
-	__nr_remove_neigh(nr_neigh);
-	spin_unlock_bh(&nr_neigh_list_lock);
-}
-
 /*
  *	"Delete" a node. Strictly speaking remove a route to a node. The node
  *	is only deleted if no routes are left to it.
@@ -331,6 +331,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
 		return -EINVAL;
 	}
 
+	spin_lock_bh(&nr_neigh_list_lock);
 	spin_lock_bh(&nr_node_list_lock);
 	nr_node_lock(nr_node);
 	for (i = 0; i < nr_node->count; i++) {
@@ -339,7 +340,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
 			nr_neigh_put(nr_neigh);
 
 			if (nr_neigh->count == 0 && !nr_neigh->locked)
-				nr_remove_neigh(nr_neigh);
+				nr_remove_neigh_locked(nr_neigh);
 			nr_neigh_put(nr_neigh);
 
 			nr_node->count--;
@@ -361,6 +362,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
 			}
 			nr_node_unlock(nr_node);
 			spin_unlock_bh(&nr_node_list_lock);
+			spin_unlock_bh(&nr_neigh_list_lock);
 
 			return 0;
 		}
@@ -368,6 +370,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
 	nr_neigh_put(nr_neigh);
 	nr_node_unlock(nr_node);
 	spin_unlock_bh(&nr_node_list_lock);
+	spin_unlock_bh(&nr_neigh_list_lock);
 	nr_node_put(nr_node);
 
 	return -EINVAL;
@@ -454,6 +457,7 @@ static int nr_dec_obs(void)
 	struct hlist_node *nodet;
 	int i;
 
+	spin_lock_bh(&nr_neigh_list_lock);
 	spin_lock_bh(&nr_node_list_lock);
 	nr_node_for_each_safe(s, nodet, &nr_node_list) {
 		nr_node_lock(s);
@@ -469,7 +473,7 @@ static int nr_dec_obs(void)
 				nr_neigh_put(nr_neigh);
 
 				if (nr_neigh->count == 0 && !nr_neigh->locked)
-					nr_remove_neigh(nr_neigh);
+					nr_remove_neigh_locked(nr_neigh);
 
 				s->count--;
 
@@ -497,6 +501,7 @@ static int nr_dec_obs(void)
 		nr_node_unlock(s);
 	}
 	spin_unlock_bh(&nr_node_list_lock);
+	spin_unlock_bh(&nr_neigh_list_lock);
 
 	return 0;
 }
-- 
2.53.0
Re: [PATCH net v2] net: netrom: fix lock order inversion in nr_add_node, nr_del_node and nr_dec_obs
Posted by Jakub Kicinski 2 months ago
On Mon,  6 Apr 2026 19:49:04 +0800 Mashiro Chen wrote:
> nr_del_node() and nr_dec_obs() acquire nr_node_list_lock first, then
> call nr_remove_neigh() which internally acquires nr_neigh_list_lock.
> nr_add_node() acquires node_lock first, then calls nr_remove_neigh()
> which acquires nr_neigh_list_lock.

Can we please merge nr_node_list_lock and nr_neigh_list_lock
into one instead?

Lets try to simplify this code as much as possible.
It's a maintenance nightmare and has fewer users than syzbot reports
(i'm not joking).
Re: [PATCH net v2] net: netrom: fix lock order inversion in nr_add_node, nr_del_node and nr_dec_obs
Posted by Mashiro Chen 2 months ago
On Mon, 6 Apr 2026 Jakub Kicinski wrote:
 > Can we please merge nr_node_list_lock and nr_neigh_list_lock
 > into one instead?

This makes more sense, thanks for the suggestion.
I'll rework the patch to merge the two locks into one and
repost next week.

On 4/10/26 10:54, Jakub Kicinski wrote:
> On Mon,  6 Apr 2026 19:49:04 +0800 Mashiro Chen wrote:
>> nr_del_node() and nr_dec_obs() acquire nr_node_list_lock first, then
>> call nr_remove_neigh() which internally acquires nr_neigh_list_lock.
>> nr_add_node() acquires node_lock first, then calls nr_remove_neigh()
>> which acquires nr_neigh_list_lock.
> Can we please merge nr_node_list_lock and nr_neigh_list_lock
> into one instead?
>
> Lets try to simplify this code as much as possible.
> It's a maintenance nightmare and has fewer users than syzbot reports
> (i'm not joking).