include/net/net_namespace.h | 1 + net/core/net_namespace.c | 50 +++++++++++++++++++++++++++---------- 2 files changed, 38 insertions(+), 13 deletions(-)
Currently, unhash_nsid() scans the entire net_namespace_list for each
netns in a destruction batch during cleanup_net(). This leads to
an O(M * N) complexity, where M is the batch size and N is the total
number of namespaces in the system.
Reduce the complexity to O(N) by introducing an 'is_dying' flag to mark
the entire batch of namespaces being destroyed. This allows unhash_nsid()
to perform a single-pass traversal over the system's namespaces. In
this pass, for each survivor namespace, iterate through its netns_ids
and remove any mappings that point to a marked namespace.
Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
---
include/net/net_namespace.h | 1 +
net/core/net_namespace.c | 50 +++++++++++++++++++++++++++----------
2 files changed, 38 insertions(+), 13 deletions(-)
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index cb664f6e3558..bd1acc6056ac 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -69,6 +69,7 @@ struct net {
unsigned int dev_base_seq; /* protected by rtnl_mutex */
u32 ifindex;
+ bool is_dying;
spinlock_t nsid_lock;
atomic_t fnhe_genid;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a6e6a964a287..d24e46c034f2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -413,6 +413,8 @@ static __net_init int preinit_net(struct net *net, struct user_namespace *user_n
get_random_bytes(&net->hash_mix, sizeof(u32));
net->dev_base_seq = 1;
+ net->ifindex = 0;
+ net->is_dying = false;
net->user_ns = user_ns;
idr_init(&net->netns_ids);
@@ -624,9 +626,10 @@ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
}
EXPORT_SYMBOL_GPL(net_ns_get_ownership);
-static void unhash_nsid(struct net *net, struct net *last)
+static void unhash_nsid(struct net *last)
{
struct net *tmp;
+
/* This function is only called from cleanup_net() work,
* and this work is the only process, that may delete
* a net from net_namespace_list. So, when the below
@@ -636,20 +639,34 @@ static void unhash_nsid(struct net *net, struct net *last)
for_each_net(tmp) {
int id;
- spin_lock(&tmp->nsid_lock);
- id = __peernet2id(tmp, net);
- if (id >= 0)
- idr_remove(&tmp->netns_ids, id);
- spin_unlock(&tmp->nsid_lock);
- if (id >= 0)
- rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
- GFP_KERNEL);
+ for (id = 0; ; id++) {
+ struct net *peer;
+ bool dying;
+
+ rcu_read_lock();
+ peer = idr_get_next(&tmp->netns_ids, &id);
+ dying = peer && peer->is_dying;
+ rcu_read_unlock();
+
+ if (!peer)
+ break;
+ if (!dying)
+ continue;
+
+ spin_lock(&tmp->nsid_lock);
+ if (idr_find(&tmp->netns_ids, id) == peer)
+ idr_remove(&tmp->netns_ids, id);
+ else
+ peer = NULL;
+ spin_unlock(&tmp->nsid_lock);
+
+ if (peer)
+ rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0,
+ NULL, GFP_KERNEL);
+ }
if (tmp == last)
break;
}
- spin_lock(&net->nsid_lock);
- idr_destroy(&net->netns_ids);
- spin_unlock(&net->nsid_lock);
}
static LLIST_HEAD(cleanup_list);
@@ -688,8 +705,15 @@ static void cleanup_net(struct work_struct *work)
last = list_last_entry(&net_namespace_list, struct net, list);
up_write(&net_rwsem);
+ llist_for_each_entry(net, net_kill_list, cleanup_list)
+ net->is_dying = true;
+
+ unhash_nsid(last);
+
llist_for_each_entry(net, net_kill_list, cleanup_list) {
- unhash_nsid(net, last);
+ spin_lock(&net->nsid_lock);
+ idr_destroy(&net->netns_ids);
+ spin_unlock(&net->nsid_lock);
list_add_tail(&net->exit_list, &net_exit_list);
}
--
2.51.0
On Mon, Jan 26, 2026 at 10:47 AM Qiliang Yuan <realwujing@gmail.com> wrote:
>
> Currently, unhash_nsid() scans the entire net_namespace_list for each
> netns in a destruction batch during cleanup_net(). This leads to
> an O(M * N) complexity, where M is the batch size and N is the total
> number of namespaces in the system.
>
> Reduce the complexity to O(N) by introducing an 'is_dying' flag to mark
> the entire batch of namespaces being destroyed. This allows unhash_nsid()
> to perform a single-pass traversal over the system's namespaces. In
> this pass, for each survivor namespace, iterate through its netns_ids
> and remove any mappings that point to a marked namespace.
>
> Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
> Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
> ---
> include/net/net_namespace.h | 1 +
> net/core/net_namespace.c | 50 +++++++++++++++++++++++++++----------
> 2 files changed, 38 insertions(+), 13 deletions(-)
>
> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index cb664f6e3558..bd1acc6056ac 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -69,6 +69,7 @@ struct net {
>
> unsigned int dev_base_seq; /* protected by rtnl_mutex */
> u32 ifindex;
> + bool is_dying;
>
> spinlock_t nsid_lock;
> atomic_t fnhe_genid;
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index a6e6a964a287..d24e46c034f2 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -413,6 +413,8 @@ static __net_init int preinit_net(struct net *net, struct user_namespace *user_n
>
> get_random_bytes(&net->hash_mix, sizeof(u32));
> net->dev_base_seq = 1;
> + net->ifindex = 0;
Unrelated change (and not needed)
> + net->is_dying = false;
No need to clear fields, whole net is cleared at alloc time (zalloc)
> net->user_ns = user_ns;
>
> idr_init(&net->netns_ids);
> @@ -624,9 +626,10 @@ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
> }
> EXPORT_SYMBOL_GPL(net_ns_get_ownership);
>
> -static void unhash_nsid(struct net *net, struct net *last)
> +static void unhash_nsid(struct net *last)
> {
> struct net *tmp;
> +
> /* This function is only called from cleanup_net() work,
> * and this work is the only process, that may delete
> * a net from net_namespace_list. So, when the below
> @@ -636,20 +639,34 @@ static void unhash_nsid(struct net *net, struct net *last)
> for_each_net(tmp) {
> int id;
>
> - spin_lock(&tmp->nsid_lock);
> - id = __peernet2id(tmp, net);
> - if (id >= 0)
> - idr_remove(&tmp->netns_ids, id);
> - spin_unlock(&tmp->nsid_lock);
> - if (id >= 0)
> - rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
> - GFP_KERNEL);
> + for (id = 0; ; id++) {
> + struct net *peer;
> + bool dying;
> +
> + rcu_read_lock();
> + peer = idr_get_next(&tmp->netns_ids, &id);
> + dying = peer && peer->is_dying;
> + rcu_read_unlock();
Hopefully the number of entries in netns_ids is small, otherwise this
could have O(N*M) complexity.
> +
> + if (!peer)
> + break;
> + if (!dying)
> + continue;
> +
> + spin_lock(&tmp->nsid_lock);
> + if (idr_find(&tmp->netns_ids, id) == peer)
> + idr_remove(&tmp->netns_ids, id);
> + else
> + peer = NULL;
> + spin_unlock(&tmp->nsid_lock);
> +
> + if (peer)
> + rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0,
> + NULL, GFP_KERNEL);
> + }
> if (tmp == last)
> break;
> }
> - spin_lock(&net->nsid_lock);
> - idr_destroy(&net->netns_ids);
> - spin_unlock(&net->nsid_lock);
> }
>
> static LLIST_HEAD(cleanup_list);
> @@ -688,8 +705,15 @@ static void cleanup_net(struct work_struct *work)
> last = list_last_entry(&net_namespace_list, struct net, list);
> up_write(&net_rwsem);
>
> + llist_for_each_entry(net, net_kill_list, cleanup_list)
> + net->is_dying = true;
Move this to __put_net(), no need for yet another loop.
> +
> + unhash_nsid(last);
> +
> llist_for_each_entry(net, net_kill_list, cleanup_list) {
> - unhash_nsid(net, last);
> + spin_lock(&net->nsid_lock);
> + idr_destroy(&net->netns_ids);
> + spin_unlock(&net->nsid_lock);
> list_add_tail(&net->exit_list, &net_exit_list);
> }
>
> --
> 2.51.0
>
Currently, unhash_nsid() scans the entire net_namespace_list for each
netns in a destruction batch during cleanup_net(). This leads to
O(M_batch * N_system * M_nsids) complexity, where M_batch is the
destruction batch size, N_system is the total number of namespaces,
and M_nsids is the number of IDs in each IDR.
Reduce the complexity to O(N_system * M_nsids) by introducing an
'is_dying' flag to mark namespaces being destroyed. This allows
unhash_nsid() to perform a single-pass traversal over the system's
namespaces. In this pass, for each survivor namespace, iterate
through its netns_ids and remove any mappings that point to a marked
namespace, effectively eliminating the M_batch multiplier.
Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
---
v2:
- Remove unrelated ifindex and is_dying initialization in preinit_net.
- Move is_dying = true to __put_net() to avoid an extra loop in cleanup_net.
v1:
- Initial proposal using 'is_dying' flag to batch unhash_nsid calls.
include/net/net_namespace.h | 1 +
net/core/net_namespace.c | 46 ++++++++++++++++++++++++++-----------
2 files changed, 34 insertions(+), 13 deletions(-)
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index cb664f6e3558..bd1acc6056ac 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -69,6 +69,7 @@ struct net {
unsigned int dev_base_seq; /* protected by rtnl_mutex */
u32 ifindex;
+ bool is_dying;
spinlock_t nsid_lock;
atomic_t fnhe_genid;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a6e6a964a287..50fdd4f9bb3b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -624,9 +624,10 @@ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
}
EXPORT_SYMBOL_GPL(net_ns_get_ownership);
-static void unhash_nsid(struct net *net, struct net *last)
+static void unhash_nsid(struct net *last)
{
struct net *tmp;
+
/* This function is only called from cleanup_net() work,
* and this work is the only process, that may delete
* a net from net_namespace_list. So, when the below
@@ -636,20 +637,34 @@ static void unhash_nsid(struct net *net, struct net *last)
for_each_net(tmp) {
int id;
- spin_lock(&tmp->nsid_lock);
- id = __peernet2id(tmp, net);
- if (id >= 0)
- idr_remove(&tmp->netns_ids, id);
- spin_unlock(&tmp->nsid_lock);
- if (id >= 0)
- rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
- GFP_KERNEL);
+ for (id = 0; ; id++) {
+ struct net *peer;
+ bool dying;
+
+ rcu_read_lock();
+ peer = idr_get_next(&tmp->netns_ids, &id);
+ dying = peer && peer->is_dying;
+ rcu_read_unlock();
+
+ if (!peer)
+ break;
+ if (!dying)
+ continue;
+
+ spin_lock(&tmp->nsid_lock);
+ if (idr_find(&tmp->netns_ids, id) == peer)
+ idr_remove(&tmp->netns_ids, id);
+ else
+ peer = NULL;
+ spin_unlock(&tmp->nsid_lock);
+
+ if (peer)
+ rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0,
+ NULL, GFP_KERNEL);
+ }
if (tmp == last)
break;
}
- spin_lock(&net->nsid_lock);
- idr_destroy(&net->netns_ids);
- spin_unlock(&net->nsid_lock);
}
static LLIST_HEAD(cleanup_list);
@@ -688,8 +703,12 @@ static void cleanup_net(struct work_struct *work)
last = list_last_entry(&net_namespace_list, struct net, list);
up_write(&net_rwsem);
+ unhash_nsid(last);
+
llist_for_each_entry(net, net_kill_list, cleanup_list) {
- unhash_nsid(net, last);
+ spin_lock(&net->nsid_lock);
+ idr_destroy(&net->netns_ids);
+ spin_unlock(&net->nsid_lock);
list_add_tail(&net->exit_list, &net_exit_list);
}
@@ -739,6 +758,7 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
void __put_net(struct net *net)
{
ref_tracker_dir_exit(&net->refcnt_tracker);
+ net->is_dying = true;
/* Cleanup the network namespace in process context */
if (llist_add(&net->cleanup_list, &cleanup_list))
queue_work(netns_wq, &net_cleanup_work);
--
2.51.0
On 2026/1/26 19:24, Qiliang Yuan wrote:
> Currently, unhash_nsid() scans the entire net_namespace_list for each
> netns in a destruction batch during cleanup_net(). This leads to
> O(M_batch * N_system * M_nsids) complexity, where M_batch is the
> destruction batch size, N_system is the total number of namespaces,
> and M_nsids is the number of IDs in each IDR.
>
> Reduce the complexity to O(N_system * M_nsids) by introducing an
> 'is_dying' flag to mark namespaces being destroyed. This allows
> unhash_nsid() to perform a single-pass traversal over the system's
> namespaces. In this pass, for each survivor namespace, iterate
> through its netns_ids and remove any mappings that point to a marked
> namespace, effectively eliminating the M_batch multiplier.
>
> Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
> Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
I said it many times. Don't send a new version by replying your
previous version, which is not friend to the reviewers, OK?
And target tree show be added. In this patch, it should be "net-next".
> ---
> v2:
> - Remove unrelated ifindex and is_dying initialization in preinit_net.
> - Move is_dying = true to __put_net() to avoid an extra loop in cleanup_net.
> v1:
> - Initial proposal using 'is_dying' flag to batch unhash_nsid calls.
>
> include/net/net_namespace.h | 1 +
> net/core/net_namespace.c | 46 ++++++++++++++++++++++++++-----------
> 2 files changed, 34 insertions(+), 13 deletions(-)
>
> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index cb664f6e3558..bd1acc6056ac 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -69,6 +69,7 @@ struct net {
>
> unsigned int dev_base_seq; /* protected by rtnl_mutex */
> u32 ifindex;
> + bool is_dying;
>
> spinlock_t nsid_lock;
> atomic_t fnhe_genid;
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index a6e6a964a287..50fdd4f9bb3b 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -624,9 +624,10 @@ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
> }
> EXPORT_SYMBOL_GPL(net_ns_get_ownership);
>
> -static void unhash_nsid(struct net *net, struct net *last)
> +static void unhash_nsid(struct net *last)
> {
> struct net *tmp;
> +
> /* This function is only called from cleanup_net() work,
> * and this work is the only process, that may delete
> * a net from net_namespace_list. So, when the below
> @@ -636,20 +637,34 @@ static void unhash_nsid(struct net *net, struct net *last)
> for_each_net(tmp) {
> int id;
>
> - spin_lock(&tmp->nsid_lock);
> - id = __peernet2id(tmp, net);
> - if (id >= 0)
> - idr_remove(&tmp->netns_ids, id);
> - spin_unlock(&tmp->nsid_lock);
> - if (id >= 0)
> - rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
> - GFP_KERNEL);
> + for (id = 0; ; id++) {
> + struct net *peer;
> + bool dying;
> +
> + rcu_read_lock();
> + peer = idr_get_next(&tmp->netns_ids, &id);
> + dying = peer && peer->is_dying;
> + rcu_read_unlock();
> +
> + if (!peer)
> + break;
> + if (!dying)
> + continue;
> +
> + spin_lock(&tmp->nsid_lock);
> + if (idr_find(&tmp->netns_ids, id) == peer)
> + idr_remove(&tmp->netns_ids, id);
> + else
> + peer = NULL;
> + spin_unlock(&tmp->nsid_lock);
> +
> + if (peer)
> + rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0,
> + NULL, GFP_KERNEL);
> + }
> if (tmp == last)
> break;
> }
> - spin_lock(&net->nsid_lock);
> - idr_destroy(&net->netns_ids);
> - spin_unlock(&net->nsid_lock);
> }
>
> static LLIST_HEAD(cleanup_list);
> @@ -688,8 +703,12 @@ static void cleanup_net(struct work_struct *work)
> last = list_last_entry(&net_namespace_list, struct net, list);
> up_write(&net_rwsem);
>
> + unhash_nsid(last);
> +
> llist_for_each_entry(net, net_kill_list, cleanup_list) {
> - unhash_nsid(net, last);
> + spin_lock(&net->nsid_lock);
> + idr_destroy(&net->netns_ids);
> + spin_unlock(&net->nsid_lock);
> list_add_tail(&net->exit_list, &net_exit_list);
> }
>
> @@ -739,6 +758,7 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
> void __put_net(struct net *net)
> {
> ref_tracker_dir_exit(&net->refcnt_tracker);
> + net->is_dying = true;
> /* Cleanup the network namespace in process context */
> if (llist_add(&net->cleanup_list, &cleanup_list))
> queue_work(netns_wq, &net_cleanup_work);
>
On Mon, Jan 26, 2026 at 12:25 PM Qiliang Yuan <realwujing@gmail.com> wrote: > > Currently, unhash_nsid() scans the entire net_namespace_list for each > netns in a destruction batch during cleanup_net(). This leads to > O(M_batch * N_system * M_nsids) complexity, where M_batch is the > destruction batch size, N_system is the total number of namespaces, > and M_nsids is the number of IDs in each IDR. Please wait ~24 hours before sending a new version. Documentation/process/maintainer-netdev.rst Resending after review ~~~~~~~~~~~~~~~~~~~~~~ Allow at least 24 hours to pass between postings. This will ensure reviewers from all geographical locations have a chance to chime in. Do not wait too long (weeks) between postings either as it will make it harder for reviewers to recall all the context. Make sure you address all the feedback in your new posting. Do not post a new version of the code if the discussion about the previous version is still ongoing, unless directly instructed by a reviewer. The new version of patches should be posted as a separate thread, not as a reply to the previous posting. Change log should include a link to the previous posting (see :ref:`Changes requested`).
© 2016 - 2026 Red Hat, Inc.