include/net/net_namespace.h | 1 + net/core/net_namespace.c | 46 ++++++++++++++++++++++++++----------- 2 files changed, 34 insertions(+), 13 deletions(-)
Currently, unhash_nsid() scans the entire net_namespace_list for each
netns in a destruction batch during cleanup_net(). This leads to
O(M_batch * N_system * M_nsids) complexity, where M_batch is the
destruction batch size, N_system is the total number of namespaces,
and M_nsids is the number of IDs in each IDR.
Reduce the complexity to O(N_system * M_nsids) by introducing an
'is_dying' flag to mark namespaces being destroyed. This allows
unhash_nsid() to perform a single-pass traversal over the system's
namespaces. In this pass, for each survivor namespace, iterate
through its netns_ids and remove any mappings that point to a marked
namespace, effectively eliminating the M_batch multiplier.
Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
---
v2:
- Remove unrelated ifindex and is_dying initialization in preinit_net.
- Move is_dying = true to __put_net() to avoid an extra loop in cleanup_net.
v1:
- Initial proposal using 'is_dying' flag to batch unhash_nsid calls.
include/net/net_namespace.h | 1 +
net/core/net_namespace.c | 46 ++++++++++++++++++++++++++-----------
2 files changed, 34 insertions(+), 13 deletions(-)
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index cb664f6e3558..bd1acc6056ac 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -69,6 +69,7 @@ struct net {
unsigned int dev_base_seq; /* protected by rtnl_mutex */
u32 ifindex;
+ bool is_dying;
spinlock_t nsid_lock;
atomic_t fnhe_genid;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a6e6a964a287..50fdd4f9bb3b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -624,9 +624,10 @@ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
}
EXPORT_SYMBOL_GPL(net_ns_get_ownership);
-static void unhash_nsid(struct net *net, struct net *last)
+static void unhash_nsid(struct net *last)
{
struct net *tmp;
+
/* This function is only called from cleanup_net() work,
* and this work is the only process, that may delete
* a net from net_namespace_list. So, when the below
@@ -636,20 +637,34 @@ static void unhash_nsid(struct net *net, struct net *last)
for_each_net(tmp) {
int id;
- spin_lock(&tmp->nsid_lock);
- id = __peernet2id(tmp, net);
- if (id >= 0)
- idr_remove(&tmp->netns_ids, id);
- spin_unlock(&tmp->nsid_lock);
- if (id >= 0)
- rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
- GFP_KERNEL);
+ for (id = 0; ; id++) {
+ struct net *peer;
+ bool dying;
+
+ rcu_read_lock();
+ peer = idr_get_next(&tmp->netns_ids, &id);
+ dying = peer && peer->is_dying;
+ rcu_read_unlock();
+
+ if (!peer)
+ break;
+ if (!dying)
+ continue;
+
+ spin_lock(&tmp->nsid_lock);
+ if (idr_find(&tmp->netns_ids, id) == peer)
+ idr_remove(&tmp->netns_ids, id);
+ else
+ peer = NULL;
+ spin_unlock(&tmp->nsid_lock);
+
+ if (peer)
+ rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0,
+ NULL, GFP_KERNEL);
+ }
if (tmp == last)
break;
}
- spin_lock(&net->nsid_lock);
- idr_destroy(&net->netns_ids);
- spin_unlock(&net->nsid_lock);
}
static LLIST_HEAD(cleanup_list);
@@ -688,8 +703,12 @@ static void cleanup_net(struct work_struct *work)
last = list_last_entry(&net_namespace_list, struct net, list);
up_write(&net_rwsem);
+ unhash_nsid(last);
+
llist_for_each_entry(net, net_kill_list, cleanup_list) {
- unhash_nsid(net, last);
+ spin_lock(&net->nsid_lock);
+ idr_destroy(&net->netns_ids);
+ spin_unlock(&net->nsid_lock);
list_add_tail(&net->exit_list, &net_exit_list);
}
@@ -739,6 +758,7 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
void __put_net(struct net *net)
{
ref_tracker_dir_exit(&net->refcnt_tracker);
+ net->is_dying = true;
/* Cleanup the network namespace in process context */
if (llist_add(&net->cleanup_list, &cleanup_list))
queue_work(netns_wq, &net_cleanup_work);
--
2.51.0
On 2026/1/26 19:24, Qiliang Yuan wrote:
> Currently, unhash_nsid() scans the entire net_namespace_list for each
> netns in a destruction batch during cleanup_net(). This leads to
> O(M_batch * N_system * M_nsids) complexity, where M_batch is the
> destruction batch size, N_system is the total number of namespaces,
> and M_nsids is the number of IDs in each IDR.
>
> Reduce the complexity to O(N_system * M_nsids) by introducing an
> 'is_dying' flag to mark namespaces being destroyed. This allows
> unhash_nsid() to perform a single-pass traversal over the system's
> namespaces. In this pass, for each survivor namespace, iterate
> through its netns_ids and remove any mappings that point to a marked
> namespace, effectively eliminating the M_batch multiplier.
>
> Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
> Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
I said it many times. Don't send a new version by replying your
previous version, which is not friend to the reviewers, OK?
And target tree show be added. In this patch, it should be "net-next".
> ---
> v2:
> - Remove unrelated ifindex and is_dying initialization in preinit_net.
> - Move is_dying = true to __put_net() to avoid an extra loop in cleanup_net.
> v1:
> - Initial proposal using 'is_dying' flag to batch unhash_nsid calls.
>
> include/net/net_namespace.h | 1 +
> net/core/net_namespace.c | 46 ++++++++++++++++++++++++++-----------
> 2 files changed, 34 insertions(+), 13 deletions(-)
>
> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index cb664f6e3558..bd1acc6056ac 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -69,6 +69,7 @@ struct net {
>
> unsigned int dev_base_seq; /* protected by rtnl_mutex */
> u32 ifindex;
> + bool is_dying;
>
> spinlock_t nsid_lock;
> atomic_t fnhe_genid;
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index a6e6a964a287..50fdd4f9bb3b 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -624,9 +624,10 @@ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
> }
> EXPORT_SYMBOL_GPL(net_ns_get_ownership);
>
> -static void unhash_nsid(struct net *net, struct net *last)
> +static void unhash_nsid(struct net *last)
> {
> struct net *tmp;
> +
> /* This function is only called from cleanup_net() work,
> * and this work is the only process, that may delete
> * a net from net_namespace_list. So, when the below
> @@ -636,20 +637,34 @@ static void unhash_nsid(struct net *net, struct net *last)
> for_each_net(tmp) {
> int id;
>
> - spin_lock(&tmp->nsid_lock);
> - id = __peernet2id(tmp, net);
> - if (id >= 0)
> - idr_remove(&tmp->netns_ids, id);
> - spin_unlock(&tmp->nsid_lock);
> - if (id >= 0)
> - rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
> - GFP_KERNEL);
> + for (id = 0; ; id++) {
> + struct net *peer;
> + bool dying;
> +
> + rcu_read_lock();
> + peer = idr_get_next(&tmp->netns_ids, &id);
> + dying = peer && peer->is_dying;
> + rcu_read_unlock();
> +
> + if (!peer)
> + break;
> + if (!dying)
> + continue;
> +
> + spin_lock(&tmp->nsid_lock);
> + if (idr_find(&tmp->netns_ids, id) == peer)
> + idr_remove(&tmp->netns_ids, id);
> + else
> + peer = NULL;
> + spin_unlock(&tmp->nsid_lock);
> +
> + if (peer)
> + rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0,
> + NULL, GFP_KERNEL);
> + }
> if (tmp == last)
> break;
> }
> - spin_lock(&net->nsid_lock);
> - idr_destroy(&net->netns_ids);
> - spin_unlock(&net->nsid_lock);
> }
>
> static LLIST_HEAD(cleanup_list);
> @@ -688,8 +703,12 @@ static void cleanup_net(struct work_struct *work)
> last = list_last_entry(&net_namespace_list, struct net, list);
> up_write(&net_rwsem);
>
> + unhash_nsid(last);
> +
> llist_for_each_entry(net, net_kill_list, cleanup_list) {
> - unhash_nsid(net, last);
> + spin_lock(&net->nsid_lock);
> + idr_destroy(&net->netns_ids);
> + spin_unlock(&net->nsid_lock);
> list_add_tail(&net->exit_list, &net_exit_list);
> }
>
> @@ -739,6 +758,7 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
> void __put_net(struct net *net)
> {
> ref_tracker_dir_exit(&net->refcnt_tracker);
> + net->is_dying = true;
> /* Cleanup the network namespace in process context */
> if (llist_add(&net->cleanup_list, &cleanup_list))
> queue_work(netns_wq, &net_cleanup_work);
>
On Mon, Jan 26, 2026 at 12:25 PM Qiliang Yuan <realwujing@gmail.com> wrote: > > Currently, unhash_nsid() scans the entire net_namespace_list for each > netns in a destruction batch during cleanup_net(). This leads to > O(M_batch * N_system * M_nsids) complexity, where M_batch is the > destruction batch size, N_system is the total number of namespaces, > and M_nsids is the number of IDs in each IDR. Please wait ~24 hours before sending a new version. Documentation/process/maintainer-netdev.rst Resending after review ~~~~~~~~~~~~~~~~~~~~~~ Allow at least 24 hours to pass between postings. This will ensure reviewers from all geographical locations have a chance to chime in. Do not wait too long (weeks) between postings either as it will make it harder for reviewers to recall all the context. Make sure you address all the feedback in your new posting. Do not post a new version of the code if the discussion about the previous version is still ongoing, unless directly instructed by a reviewer. The new version of patches should be posted as a separate thread, not as a reply to the previous posting. Change log should include a link to the previous posting (see :ref:`Changes requested`).
© 2016 - 2026 Red Hat, Inc.