[PATCH] netns: optimize netns cleaning by batching unhash_nsid calls

Qiliang Yuan posted 1 patch 1 week, 5 days ago
There is a newer version of this series
include/net/net_namespace.h |  1 +
net/core/net_namespace.c    | 50 +++++++++++++++++++++++++++----------
2 files changed, 38 insertions(+), 13 deletions(-)
[PATCH] netns: optimize netns cleaning by batching unhash_nsid calls
Posted by Qiliang Yuan 1 week, 5 days ago
Currently, unhash_nsid() scans the entire net_namespace_list for each
netns in a destruction batch during cleanup_net(). This leads to
an O(M * N) complexity, where M is the batch size and N is the total
number of namespaces in the system.

Reduce the complexity to O(N) by introducing an 'is_dying' flag to mark
the entire batch of namespaces being destroyed. This allows unhash_nsid()
to perform a single-pass traversal over the system's namespaces. In
this pass, for each survivor namespace, iterate through its netns_ids
and remove any mappings that point to a marked namespace.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
---
 include/net/net_namespace.h |  1 +
 net/core/net_namespace.c    | 50 +++++++++++++++++++++++++++----------
 2 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index cb664f6e3558..bd1acc6056ac 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -69,6 +69,7 @@ struct net {
 
 	unsigned int		dev_base_seq;	/* protected by rtnl_mutex */
 	u32			ifindex;
+	bool			is_dying;
 
 	spinlock_t		nsid_lock;
 	atomic_t		fnhe_genid;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a6e6a964a287..d24e46c034f2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -413,6 +413,8 @@ static __net_init int preinit_net(struct net *net, struct user_namespace *user_n
 
 	get_random_bytes(&net->hash_mix, sizeof(u32));
 	net->dev_base_seq = 1;
+	net->ifindex = 0;
+	net->is_dying = false;
 	net->user_ns = user_ns;
 
 	idr_init(&net->netns_ids);
@@ -624,9 +626,10 @@ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
 }
 EXPORT_SYMBOL_GPL(net_ns_get_ownership);
 
-static void unhash_nsid(struct net *net, struct net *last)
+static void unhash_nsid(struct net *last)
 {
 	struct net *tmp;
+
 	/* This function is only called from cleanup_net() work,
 	 * and this work is the only process, that may delete
 	 * a net from net_namespace_list. So, when the below
@@ -636,20 +639,34 @@ static void unhash_nsid(struct net *net, struct net *last)
 	for_each_net(tmp) {
 		int id;
 
-		spin_lock(&tmp->nsid_lock);
-		id = __peernet2id(tmp, net);
-		if (id >= 0)
-			idr_remove(&tmp->netns_ids, id);
-		spin_unlock(&tmp->nsid_lock);
-		if (id >= 0)
-			rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
-					  GFP_KERNEL);
+		for (id = 0; ; id++) {
+			struct net *peer;
+			bool dying;
+
+			rcu_read_lock();
+			peer = idr_get_next(&tmp->netns_ids, &id);
+			dying = peer && peer->is_dying;
+			rcu_read_unlock();
+
+			if (!peer)
+				break;
+			if (!dying)
+				continue;
+
+			spin_lock(&tmp->nsid_lock);
+			if (idr_find(&tmp->netns_ids, id) == peer)
+				idr_remove(&tmp->netns_ids, id);
+			else
+				peer = NULL;
+			spin_unlock(&tmp->nsid_lock);
+
+			if (peer)
+				rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0,
+						  NULL, GFP_KERNEL);
+		}
 		if (tmp == last)
 			break;
 	}
-	spin_lock(&net->nsid_lock);
-	idr_destroy(&net->netns_ids);
-	spin_unlock(&net->nsid_lock);
 }
 
 static LLIST_HEAD(cleanup_list);
@@ -688,8 +705,15 @@ static void cleanup_net(struct work_struct *work)
 	last = list_last_entry(&net_namespace_list, struct net, list);
 	up_write(&net_rwsem);
 
+	llist_for_each_entry(net, net_kill_list, cleanup_list)
+		net->is_dying = true;
+
+	unhash_nsid(last);
+
 	llist_for_each_entry(net, net_kill_list, cleanup_list) {
-		unhash_nsid(net, last);
+		spin_lock(&net->nsid_lock);
+		idr_destroy(&net->netns_ids);
+		spin_unlock(&net->nsid_lock);
 		list_add_tail(&net->exit_list, &net_exit_list);
 	}
 
-- 
2.51.0
Re: [PATCH] netns: optimize netns cleaning by batching unhash_nsid calls
Posted by Eric Dumazet 1 week, 4 days ago
On Mon, Jan 26, 2026 at 10:47 AM Qiliang Yuan <realwujing@gmail.com> wrote:
>
> Currently, unhash_nsid() scans the entire net_namespace_list for each
> netns in a destruction batch during cleanup_net(). This leads to
> an O(M * N) complexity, where M is the batch size and N is the total
> number of namespaces in the system.
>
> Reduce the complexity to O(N) by introducing an 'is_dying' flag to mark
> the entire batch of namespaces being destroyed. This allows unhash_nsid()
> to perform a single-pass traversal over the system's namespaces. In
> this pass, for each survivor namespace, iterate through its netns_ids
> and remove any mappings that point to a marked namespace.
>
> Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
> Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
> ---
>  include/net/net_namespace.h |  1 +
>  net/core/net_namespace.c    | 50 +++++++++++++++++++++++++++----------
>  2 files changed, 38 insertions(+), 13 deletions(-)
>
> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index cb664f6e3558..bd1acc6056ac 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -69,6 +69,7 @@ struct net {
>
>         unsigned int            dev_base_seq;   /* protected by rtnl_mutex */
>         u32                     ifindex;
> +       bool                    is_dying;
>
>         spinlock_t              nsid_lock;
>         atomic_t                fnhe_genid;
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index a6e6a964a287..d24e46c034f2 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -413,6 +413,8 @@ static __net_init int preinit_net(struct net *net, struct user_namespace *user_n
>
>         get_random_bytes(&net->hash_mix, sizeof(u32));
>         net->dev_base_seq = 1;
> +       net->ifindex = 0;
Unrelated change (and not needed)

> +       net->is_dying = false;
No need to clear fields, whole net is cleared at alloc time (zalloc)

>         net->user_ns = user_ns;
>
>         idr_init(&net->netns_ids);
> @@ -624,9 +626,10 @@ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
>  }
>  EXPORT_SYMBOL_GPL(net_ns_get_ownership);
>
> -static void unhash_nsid(struct net *net, struct net *last)
> +static void unhash_nsid(struct net *last)
>  {
>         struct net *tmp;
> +
>         /* This function is only called from cleanup_net() work,
>          * and this work is the only process, that may delete
>          * a net from net_namespace_list. So, when the below
> @@ -636,20 +639,34 @@ static void unhash_nsid(struct net *net, struct net *last)
>         for_each_net(tmp) {
>                 int id;
>
> -               spin_lock(&tmp->nsid_lock);
> -               id = __peernet2id(tmp, net);
> -               if (id >= 0)
> -                       idr_remove(&tmp->netns_ids, id);
> -               spin_unlock(&tmp->nsid_lock);
> -               if (id >= 0)
> -                       rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
> -                                         GFP_KERNEL);
> +               for (id = 0; ; id++) {
> +                       struct net *peer;
> +                       bool dying;
> +
> +                       rcu_read_lock();
> +                       peer = idr_get_next(&tmp->netns_ids, &id);
> +                       dying = peer && peer->is_dying;
> +                       rcu_read_unlock();

Hopefully the number of entries in netns_ids is small, otherwise this
could have O(N*M) complexity.

> +
> +                       if (!peer)
> +                               break;
> +                       if (!dying)
> +                               continue;
> +
> +                       spin_lock(&tmp->nsid_lock);
> +                       if (idr_find(&tmp->netns_ids, id) == peer)
> +                               idr_remove(&tmp->netns_ids, id);
> +                       else
> +                               peer = NULL;
> +                       spin_unlock(&tmp->nsid_lock);
> +
> +                       if (peer)
> +                               rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0,
> +                                                 NULL, GFP_KERNEL);
> +               }
>                 if (tmp == last)
>                         break;
>         }
> -       spin_lock(&net->nsid_lock);
> -       idr_destroy(&net->netns_ids);
> -       spin_unlock(&net->nsid_lock);
>  }
>
>  static LLIST_HEAD(cleanup_list);
> @@ -688,8 +705,15 @@ static void cleanup_net(struct work_struct *work)
>         last = list_last_entry(&net_namespace_list, struct net, list);
>         up_write(&net_rwsem);
>
> +       llist_for_each_entry(net, net_kill_list, cleanup_list)
> +               net->is_dying = true;
Move this to __put_net(), no need for yet another loop.

> +
> +       unhash_nsid(last);
> +
>         llist_for_each_entry(net, net_kill_list, cleanup_list) {
> -               unhash_nsid(net, last);
> +               spin_lock(&net->nsid_lock);
> +               idr_destroy(&net->netns_ids);
> +               spin_unlock(&net->nsid_lock);
>                 list_add_tail(&net->exit_list, &net_exit_list);
>         }
>
> --
> 2.51.0
>
[PATCH v2] netns: optimize netns cleaning by batching unhash_nsid calls
Posted by Qiliang Yuan 1 week, 4 days ago
Currently, unhash_nsid() scans the entire net_namespace_list for each
netns in a destruction batch during cleanup_net(). This leads to
O(M_batch * N_system * M_nsids) complexity, where M_batch is the
destruction batch size, N_system is the total number of namespaces,
and M_nsids is the number of IDs in each IDR.

Reduce the complexity to O(N_system * M_nsids) by introducing an
'is_dying' flag to mark namespaces being destroyed. This allows
unhash_nsid() to perform a single-pass traversal over the system's
namespaces. In this pass, for each survivor namespace, iterate
through its netns_ids and remove any mappings that point to a marked
namespace, effectively eliminating the M_batch multiplier.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
---
v2:
 - Remove unrelated ifindex and is_dying initialization in preinit_net.
 - Move is_dying = true to __put_net() to avoid an extra loop in cleanup_net.
v1:
 - Initial proposal using 'is_dying' flag to batch unhash_nsid calls.

 include/net/net_namespace.h |  1 +
 net/core/net_namespace.c    | 46 ++++++++++++++++++++++++++-----------
 2 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index cb664f6e3558..bd1acc6056ac 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -69,6 +69,7 @@ struct net {
 
 	unsigned int		dev_base_seq;	/* protected by rtnl_mutex */
 	u32			ifindex;
+	bool			is_dying;
 
 	spinlock_t		nsid_lock;
 	atomic_t		fnhe_genid;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a6e6a964a287..50fdd4f9bb3b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -624,9 +624,10 @@ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
 }
 EXPORT_SYMBOL_GPL(net_ns_get_ownership);
 
-static void unhash_nsid(struct net *net, struct net *last)
+static void unhash_nsid(struct net *last)
 {
 	struct net *tmp;
+
 	/* This function is only called from cleanup_net() work,
 	 * and this work is the only process, that may delete
 	 * a net from net_namespace_list. So, when the below
@@ -636,20 +637,34 @@ static void unhash_nsid(struct net *net, struct net *last)
 	for_each_net(tmp) {
 		int id;
 
-		spin_lock(&tmp->nsid_lock);
-		id = __peernet2id(tmp, net);
-		if (id >= 0)
-			idr_remove(&tmp->netns_ids, id);
-		spin_unlock(&tmp->nsid_lock);
-		if (id >= 0)
-			rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
-					  GFP_KERNEL);
+		for (id = 0; ; id++) {
+			struct net *peer;
+			bool dying;
+
+			rcu_read_lock();
+			peer = idr_get_next(&tmp->netns_ids, &id);
+			dying = peer && peer->is_dying;
+			rcu_read_unlock();
+
+			if (!peer)
+				break;
+			if (!dying)
+				continue;
+
+			spin_lock(&tmp->nsid_lock);
+			if (idr_find(&tmp->netns_ids, id) == peer)
+				idr_remove(&tmp->netns_ids, id);
+			else
+				peer = NULL;
+			spin_unlock(&tmp->nsid_lock);
+
+			if (peer)
+				rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0,
+						  NULL, GFP_KERNEL);
+		}
 		if (tmp == last)
 			break;
 	}
-	spin_lock(&net->nsid_lock);
-	idr_destroy(&net->netns_ids);
-	spin_unlock(&net->nsid_lock);
 }
 
 static LLIST_HEAD(cleanup_list);
@@ -688,8 +703,12 @@ static void cleanup_net(struct work_struct *work)
 	last = list_last_entry(&net_namespace_list, struct net, list);
 	up_write(&net_rwsem);
 
+	unhash_nsid(last);
+
 	llist_for_each_entry(net, net_kill_list, cleanup_list) {
-		unhash_nsid(net, last);
+		spin_lock(&net->nsid_lock);
+		idr_destroy(&net->netns_ids);
+		spin_unlock(&net->nsid_lock);
 		list_add_tail(&net->exit_list, &net_exit_list);
 	}
 
@@ -739,6 +758,7 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
 void __put_net(struct net *net)
 {
 	ref_tracker_dir_exit(&net->refcnt_tracker);
+	net->is_dying = true;
 	/* Cleanup the network namespace in process context */
 	if (llist_add(&net->cleanup_list, &cleanup_list))
 		queue_work(netns_wq, &net_cleanup_work);
-- 
2.51.0
Re: [PATCH v2] netns: optimize netns cleaning by batching unhash_nsid calls
Posted by Menglong Dong 1 week, 3 days ago
On 2026/1/26 19:24, Qiliang Yuan wrote:
> Currently, unhash_nsid() scans the entire net_namespace_list for each
> netns in a destruction batch during cleanup_net(). This leads to
> O(M_batch * N_system * M_nsids) complexity, where M_batch is the
> destruction batch size, N_system is the total number of namespaces,
> and M_nsids is the number of IDs in each IDR.
> 
> Reduce the complexity to O(N_system * M_nsids) by introducing an
> 'is_dying' flag to mark namespaces being destroyed. This allows
> unhash_nsid() to perform a single-pass traversal over the system's
> namespaces. In this pass, for each survivor namespace, iterate
> through its netns_ids and remove any mappings that point to a marked
> namespace, effectively eliminating the M_batch multiplier.
> 
> Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
> Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>

I said it many times. Don't send a new version by replying your
previous version, which is not friend to the reviewers, OK?

And target tree show be added. In this patch, it should be "net-next".

> ---
> v2:
>  - Remove unrelated ifindex and is_dying initialization in preinit_net.
>  - Move is_dying = true to __put_net() to avoid an extra loop in cleanup_net.
> v1:
>  - Initial proposal using 'is_dying' flag to batch unhash_nsid calls.
> 
>  include/net/net_namespace.h |  1 +
>  net/core/net_namespace.c    | 46 ++++++++++++++++++++++++++-----------
>  2 files changed, 34 insertions(+), 13 deletions(-)
> 
> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index cb664f6e3558..bd1acc6056ac 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -69,6 +69,7 @@ struct net {
>  
>  	unsigned int		dev_base_seq;	/* protected by rtnl_mutex */
>  	u32			ifindex;
> +	bool			is_dying;
>  
>  	spinlock_t		nsid_lock;
>  	atomic_t		fnhe_genid;
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index a6e6a964a287..50fdd4f9bb3b 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -624,9 +624,10 @@ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
>  }
>  EXPORT_SYMBOL_GPL(net_ns_get_ownership);
>  
> -static void unhash_nsid(struct net *net, struct net *last)
> +static void unhash_nsid(struct net *last)
>  {
>  	struct net *tmp;
> +
>  	/* This function is only called from cleanup_net() work,
>  	 * and this work is the only process, that may delete
>  	 * a net from net_namespace_list. So, when the below
> @@ -636,20 +637,34 @@ static void unhash_nsid(struct net *net, struct net *last)
>  	for_each_net(tmp) {
>  		int id;
>  
> -		spin_lock(&tmp->nsid_lock);
> -		id = __peernet2id(tmp, net);
> -		if (id >= 0)
> -			idr_remove(&tmp->netns_ids, id);
> -		spin_unlock(&tmp->nsid_lock);
> -		if (id >= 0)
> -			rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
> -					  GFP_KERNEL);
> +		for (id = 0; ; id++) {
> +			struct net *peer;
> +			bool dying;
> +
> +			rcu_read_lock();
> +			peer = idr_get_next(&tmp->netns_ids, &id);
> +			dying = peer && peer->is_dying;
> +			rcu_read_unlock();
> +
> +			if (!peer)
> +				break;
> +			if (!dying)
> +				continue;
> +
> +			spin_lock(&tmp->nsid_lock);
> +			if (idr_find(&tmp->netns_ids, id) == peer)
> +				idr_remove(&tmp->netns_ids, id);
> +			else
> +				peer = NULL;
> +			spin_unlock(&tmp->nsid_lock);
> +
> +			if (peer)
> +				rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0,
> +						  NULL, GFP_KERNEL);
> +		}
>  		if (tmp == last)
>  			break;
>  	}
> -	spin_lock(&net->nsid_lock);
> -	idr_destroy(&net->netns_ids);
> -	spin_unlock(&net->nsid_lock);
>  }
>  
>  static LLIST_HEAD(cleanup_list);
> @@ -688,8 +703,12 @@ static void cleanup_net(struct work_struct *work)
>  	last = list_last_entry(&net_namespace_list, struct net, list);
>  	up_write(&net_rwsem);
>  
> +	unhash_nsid(last);
> +
>  	llist_for_each_entry(net, net_kill_list, cleanup_list) {
> -		unhash_nsid(net, last);
> +		spin_lock(&net->nsid_lock);
> +		idr_destroy(&net->netns_ids);
> +		spin_unlock(&net->nsid_lock);
>  		list_add_tail(&net->exit_list, &net_exit_list);
>  	}
>  
> @@ -739,6 +758,7 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
>  void __put_net(struct net *net)
>  {
>  	ref_tracker_dir_exit(&net->refcnt_tracker);
> +	net->is_dying = true;
>  	/* Cleanup the network namespace in process context */
>  	if (llist_add(&net->cleanup_list, &cleanup_list))
>  		queue_work(netns_wq, &net_cleanup_work);
>
Re: [PATCH v2] netns: optimize netns cleaning by batching unhash_nsid calls
Posted by Eric Dumazet 1 week, 4 days ago
On Mon, Jan 26, 2026 at 12:25 PM Qiliang Yuan <realwujing@gmail.com> wrote:
>
> Currently, unhash_nsid() scans the entire net_namespace_list for each
> netns in a destruction batch during cleanup_net(). This leads to
> O(M_batch * N_system * M_nsids) complexity, where M_batch is the
> destruction batch size, N_system is the total number of namespaces,
> and M_nsids is the number of IDs in each IDR.

Please wait ~24 hours before sending a new version.

Documentation/process/maintainer-netdev.rst

Resending after review
~~~~~~~~~~~~~~~~~~~~~~

Allow at least 24 hours to pass between postings. This will ensure reviewers
from all geographical locations have a chance to chime in. Do not wait
too long (weeks) between postings either as it will make it harder for reviewers
to recall all the context.

Make sure you address all the feedback in your new posting. Do not post a new
version of the code if the discussion about the previous version is still
ongoing, unless directly instructed by a reviewer.

The new version of patches should be posted as a separate thread,
not as a reply to the previous posting. Change log should include a link
to the previous posting (see :ref:`Changes requested`).