net/netfilter/nf_tables_api.c | 2 +- net/netfilter/nft_connlimit.c | 4 ++-- net/netfilter/nft_counter.c | 4 ++-- net/netfilter/nft_last.c | 4 ++-- net/netfilter/nft_limit.c | 4 ++-- net/netfilter/nft_quota.c | 4 ++-- 6 files changed, 11 insertions(+), 11 deletions(-)
nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to
use __GFP_ACCOUNT flag for objects that are dynamically
allocated from the packet path.
Such objects are allocated inside .init() or .clone() callbacks
of struct nft_expr_ops executed in task context while processing
netlink messages.
In addition, this patch adds accounting to nft_set_elem_expr_clone()
used for the same purposes.
Signed-off-by: Vasily Averin <vvs@openvz.org>
---
net/netfilter/nf_tables_api.c | 2 +-
net/netfilter/nft_connlimit.c | 4 ++--
net/netfilter/nft_counter.c | 4 ++--
net/netfilter/nft_last.c | 4 ++--
net/netfilter/nft_limit.c | 4 ++--
net/netfilter/nft_quota.c | 4 ++--
6 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 04be94236a34..e01241151ef7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5447,7 +5447,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
int err, i, k;
for (i = 0; i < set->num_exprs; i++) {
- expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL);
+ expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL_ACCOUNT);
if (!expr)
goto err_expr;
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 3362417ebfdb..9c2146aac59e 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -77,7 +77,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
invert = true;
}
- priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL);
+ priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL_ACCOUNT);
if (!priv->list)
return -ENOMEM;
@@ -214,7 +214,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_connlimit *priv_dst = nft_expr_priv(dst);
struct nft_connlimit *priv_src = nft_expr_priv(src);
- priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
+ priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC | __GFP_ACCOUNT);
if (!priv_dst->list)
return -ENOMEM;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index f179e8c3b0ca..040a697d96b3 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -62,7 +62,7 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
struct nft_counter __percpu *cpu_stats;
struct nft_counter *this_cpu;
- cpu_stats = alloc_percpu(struct nft_counter);
+ cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT);
if (cpu_stats == NULL)
return -ENOMEM;
@@ -235,7 +235,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
nft_counter_fetch(priv, &total);
- cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
+ cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC | __GFP_ACCOUNT);
if (cpu_stats == NULL)
return -ENOMEM;
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 4f745a409d34..4cf4f6349855 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -30,7 +30,7 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
u64 last_jiffies;
int err;
- last = kzalloc(sizeof(*last), GFP_KERNEL);
+ last = kzalloc(sizeof(*last), GFP_KERNEL_ACCOUNT);
if (!last)
return -ENOMEM;
@@ -105,7 +105,7 @@ static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
{
struct nft_last_priv *priv_dst = nft_expr_priv(dst);
- priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
+ priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC | __GFP_ACCOUNT);
if (!priv_dst->last)
return -ENOMEM;
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index a726b623963d..d7779d5f3cbb 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -90,7 +90,7 @@ static int nft_limit_init(struct nft_limit_priv *priv,
priv->rate);
}
- priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL);
+ priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT);
if (!priv->limit)
return -ENOMEM;
@@ -144,7 +144,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst,
priv_dst->burst = priv_src->burst;
priv_dst->invert = priv_src->invert;
- priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
+ priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC | __GFP_ACCOUNT);
if (!priv_dst->limit)
return -ENOMEM;
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index f394a0b562f6..2def4d92a170 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -90,7 +90,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
return -EOPNOTSUPP;
}
- priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL);
+ priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL_ACCOUNT);
if (!priv->consumed)
return -ENOMEM;
@@ -236,7 +236,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
{
struct nft_quota *priv_dst = nft_expr_priv(dst);
- priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
+ priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC | __GFP_ACCOUNT);
if (!priv_dst->consumed)
return -ENOMEM;
--
2.31.1
On Thu, Mar 31, 2022 at 11:40:23AM +0300, Vasily Averin wrote:
> nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to
> use __GFP_ACCOUNT flag for objects that are dynamically
> allocated from the packet path.
>
> Such objects are allocated inside .init() or .clone() callbacks
> of struct nft_expr_ops executed in task context while processing
> netlink messages.
>
> In addition, this patch adds accounting to nft_set_elem_expr_clone()
> used for the same purposes.
The patch looks good to me. The only concern I have is a potential
performance regression. Are any of these allocations happening on really
hot paths? From a very brief look it seems like the answer is no,
but I'm not well familiar with the netfilter code. Would be nice to
have a confirmation from somebody working on the netfilter.
We are roughly talking about x2 slower memory allocations, which
is usually not a problem at all, given that allocations are still
very cheap.
Please, feel free to add my
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Thanks!
>
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> ---
> net/netfilter/nf_tables_api.c | 2 +-
> net/netfilter/nft_connlimit.c | 4 ++--
> net/netfilter/nft_counter.c | 4 ++--
> net/netfilter/nft_last.c | 4 ++--
> net/netfilter/nft_limit.c | 4 ++--
> net/netfilter/nft_quota.c | 4 ++--
> 6 files changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index 04be94236a34..e01241151ef7 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -5447,7 +5447,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
> int err, i, k;
>
> for (i = 0; i < set->num_exprs; i++) {
> - expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL);
> + expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL_ACCOUNT);
> if (!expr)
> goto err_expr;
>
> diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
> index 3362417ebfdb..9c2146aac59e 100644
> --- a/net/netfilter/nft_connlimit.c
> +++ b/net/netfilter/nft_connlimit.c
> @@ -77,7 +77,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
> invert = true;
> }
>
> - priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL);
> + priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL_ACCOUNT);
> if (!priv->list)
> return -ENOMEM;
>
> @@ -214,7 +214,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
> struct nft_connlimit *priv_dst = nft_expr_priv(dst);
> struct nft_connlimit *priv_src = nft_expr_priv(src);
>
> - priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
> + priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC | __GFP_ACCOUNT);
> if (!priv_dst->list)
> return -ENOMEM;
>
> diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
> index f179e8c3b0ca..040a697d96b3 100644
> --- a/net/netfilter/nft_counter.c
> +++ b/net/netfilter/nft_counter.c
> @@ -62,7 +62,7 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
> struct nft_counter __percpu *cpu_stats;
> struct nft_counter *this_cpu;
>
> - cpu_stats = alloc_percpu(struct nft_counter);
> + cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT);
> if (cpu_stats == NULL)
> return -ENOMEM;
>
> @@ -235,7 +235,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
>
> nft_counter_fetch(priv, &total);
>
> - cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
> + cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC | __GFP_ACCOUNT);
> if (cpu_stats == NULL)
> return -ENOMEM;
>
> diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
> index 4f745a409d34..4cf4f6349855 100644
> --- a/net/netfilter/nft_last.c
> +++ b/net/netfilter/nft_last.c
> @@ -30,7 +30,7 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
> u64 last_jiffies;
> int err;
>
> - last = kzalloc(sizeof(*last), GFP_KERNEL);
> + last = kzalloc(sizeof(*last), GFP_KERNEL_ACCOUNT);
> if (!last)
> return -ENOMEM;
>
> @@ -105,7 +105,7 @@ static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
> {
> struct nft_last_priv *priv_dst = nft_expr_priv(dst);
>
> - priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
> + priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC | __GFP_ACCOUNT);
> if (!priv_dst->last)
> return -ENOMEM;
>
> diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
> index a726b623963d..d7779d5f3cbb 100644
> --- a/net/netfilter/nft_limit.c
> +++ b/net/netfilter/nft_limit.c
> @@ -90,7 +90,7 @@ static int nft_limit_init(struct nft_limit_priv *priv,
> priv->rate);
> }
>
> - priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL);
> + priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT);
> if (!priv->limit)
> return -ENOMEM;
>
> @@ -144,7 +144,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst,
> priv_dst->burst = priv_src->burst;
> priv_dst->invert = priv_src->invert;
>
> - priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
> + priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC | __GFP_ACCOUNT);
> if (!priv_dst->limit)
> return -ENOMEM;
>
> diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
> index f394a0b562f6..2def4d92a170 100644
> --- a/net/netfilter/nft_quota.c
> +++ b/net/netfilter/nft_quota.c
> @@ -90,7 +90,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
> return -EOPNOTSUPP;
> }
>
> - priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL);
> + priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL_ACCOUNT);
> if (!priv->consumed)
> return -ENOMEM;
>
> @@ -236,7 +236,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
> {
> struct nft_quota *priv_dst = nft_expr_priv(dst);
>
> - priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
> + priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC | __GFP_ACCOUNT);
> if (!priv_dst->consumed)
> return -ENOMEM;
>
> --
> 2.31.1
>
Vasily Averin <vasily.averin@linux.dev> wrote:
> nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to
> use __GFP_ACCOUNT flag for objects that are dynamically
> allocated from the packet path.
>
> Such objects are allocated inside .init() or .clone() callbacks
> of struct nft_expr_ops executed in task context while processing
> netlink messages.
They can also be called from packet path.
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index 04be94236a34..e01241151ef7 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -5447,7 +5447,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
> int err, i, k;
>
> for (i = 0; i < set->num_exprs; i++) {
> - expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL);
> + expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL_ACCOUNT);
> if (!expr)
> goto err_expr;
This is ok.
> diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
> index 3362417ebfdb..9c2146aac59e 100644
> --- a/net/netfilter/nft_connlimit.c
> +++ b/net/netfilter/nft_connlimit.c
> @@ -77,7 +77,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
> invert = true;
> }
>
> - priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL);
> + priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL_ACCOUNT);
> if (!priv->list)
> return -ENOMEM;
Same.
> @@ -214,7 +214,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
> struct nft_connlimit *priv_dst = nft_expr_priv(dst);
> struct nft_connlimit *priv_src = nft_expr_priv(src);
>
> - priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
> + priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC | __GFP_ACCOUNT);
This can be called from packet path, via nft_dynset.c.
nft_do_chain -> nft_dynset_eval -> nft_dynset_new ->
nft_dynset_expr_setup -> nft_expr_clone -> src->ops->clone()
> diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
> index f179e8c3b0ca..040a697d96b3 100644
> --- a/net/netfilter/nft_counter.c
> +++ b/net/netfilter/nft_counter.c
> @@ -62,7 +62,7 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
> struct nft_counter __percpu *cpu_stats;
> struct nft_counter *this_cpu;
>
> - cpu_stats = alloc_percpu(struct nft_counter);
> + cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT);
This is ok.
> @@ -235,7 +235,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
>
> nft_counter_fetch(priv, &total);
>
> - cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
> + cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC | __GFP_ACCOUNT);
> if (cpu_stats == NULL)
> return -ENOMEM;
Same problem as connlimit, can be called from packet path.
Basically all GFP_ATOMIC are suspicious.
Not sure how to resolve this, similar mechanics in iptables world (e.g.
connlimit or SET target) don't use memcg accounting.
Perhaps for now resend with only the GFP_KERNEL parts converted?
Those are safe.
Insertion from packet path is limited by set->size (element count) only
at this time.
On 4/1/22 15:03, Florian Westphal wrote: > Vasily Averin <vasily.averin@linux.dev> wrote: >> nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to >> use __GFP_ACCOUNT flag for objects that are dynamically >> allocated from the packet path. >> >> Such objects are allocated inside .init() or .clone() callbacks >> of struct nft_expr_ops executed in task context while processing >> netlink messages. > > They can also be called from packet path. >> @@ -214,7 +214,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src) >> struct nft_connlimit *priv_dst = nft_expr_priv(dst); >> struct nft_connlimit *priv_src = nft_expr_priv(src); >> >> - priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC); >> + priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC | __GFP_ACCOUNT); > > This can be called from packet path, via nft_dynset.c. > > nft_do_chain -> nft_dynset_eval -> nft_dynset_new -> > nft_dynset_expr_setup -> nft_expr_clone -> src->ops->clone() > Thank you, I noticed this case but did not understand that it is related to packet path. >> @@ -235,7 +235,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src) >> >> nft_counter_fetch(priv, &total); >> >> - cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC); >> + cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC | __GFP_ACCOUNT); >> if (cpu_stats == NULL) >> return -ENOMEM; > > Same problem as connlimit, can be called from packet path. > Basically all GFP_ATOMIC are suspicious. > > Not sure how to resolve this, similar mechanics in iptables world (e.g. > connlimit or SET target) don't use memcg accounting. > > Perhaps for now resend with only the GFP_KERNEL parts converted? > Those are safe. It is safe for packet path too, _ACCOUNT allocation will not be able to find memcg in case of "!in_task()" context. On the other hand any additional checks on such path will affect performance. Could you please estimate how often is this code used in the case of nft vs packet path? If packet path is rare case I think we can keep current code as is. If the opposite is the case, then I can add __GFP_ACCOUNT flag depending on in_task() check. Thank you, Vasily Averin
Vasily Averin <vasily.averin@linux.dev> wrote: > > Same problem as connlimit, can be called from packet path. > > Basically all GFP_ATOMIC are suspicious. > > > > Not sure how to resolve this, similar mechanics in iptables world (e.g. > > connlimit or SET target) don't use memcg accounting. > > > > Perhaps for now resend with only the GFP_KERNEL parts converted? > > Those are safe. > > It is safe for packet path too, _ACCOUNT allocation will not be able to find memcg > in case of "!in_task()" context. > On the other hand any additional checks on such path will affect performance. I'm not sure this works with ksoftirqd serving network stack? > Could you please estimate how often is this code used in the case of nft vs packet path? It depends on user configuration. Update from packet path is used for things like port knocking or other dyanamic filter lists, or somehing like Limiting connections to x-per-address/subnet and so on. > If the opposite is the case, then I can add __GFP_ACCOUNT flag depending on in_task() check. But what task/memcg is used for the accounting in that case?
On Fri, Apr 01, 2022 at 09:31:59PM +0200, Florian Westphal wrote: > Vasily Averin <vasily.averin@linux.dev> wrote: > > > Same problem as connlimit, can be called from packet path. > > > Basically all GFP_ATOMIC are suspicious. > > > > > > Not sure how to resolve this, similar mechanics in iptables world (e.g. > > > connlimit or SET target) don't use memcg accounting. > > > > > > Perhaps for now resend with only the GFP_KERNEL parts converted? > > > Those are safe. > > > > It is safe for packet path too, _ACCOUNT allocation will not be able to find memcg > > in case of "!in_task()" context. > > On the other hand any additional checks on such path will affect performance. > > I'm not sure this works with ksoftirqd serving network stack? > > > Could you please estimate how often is this code used in the case of nft vs packet path? > > It depends on user configuration. > Update from packet path is used for things like port knocking or other > dyanamic filter lists, or somehing like Limiting connections to x-per-address/subnet and so on. > > > If the opposite is the case, then I can add __GFP_ACCOUNT flag depending on in_task() check. > > But what task/memcg is used for the accounting in that case? Root memcg/no accounting, which is the same. There is a way to account for a specific memcg in such cases, it's used for bpf maps, for example. We save a pointer to the memcg which created the map and charge it for all allocations from a !in_task context. But the performance can be affected, so let's not do without regression tests and a serious need. Thanks!
Roman Gushchin <roman.gushchin@linux.dev> wrote: > On Fri, Apr 01, 2022 at 09:31:59PM +0200, Florian Westphal wrote: > > But what task/memcg is used for the accounting in that case? > > Root memcg/no accounting, which is the same. > > There is a way to account for a specific memcg in such cases, it's used for > bpf maps, for example. We save a pointer to the memcg which created the map and > charge it for all allocations from a !in_task context. Great, so we could use same scheme later on if its required for some use case. > so let's not do without regression tests and a serious need. Sounds good. Thanks.
On 4/1/22 22:31, Florian Westphal wrote: > Vasily Averin <vasily.averin@linux.dev> wrote: >>> Same problem as connlimit, can be called from packet path. >>> Basically all GFP_ATOMIC are suspicious. >>> >>> Not sure how to resolve this, similar mechanics in iptables world (e.g. >>> connlimit or SET target) don't use memcg accounting. >>> >>> Perhaps for now resend with only the GFP_KERNEL parts converted? >>> Those are safe. >> >> It is safe for packet path too, _ACCOUNT allocation will not be able to find memcg >> in case of "!in_task()" context. >> On the other hand any additional checks on such path will affect performance. > > I'm not sure this works with ksoftirqd serving network stack? Please take look at memcg_kmem_bypass() called from memcg_slab_pre_alloc_hook -> get_obj_cgroup_from_current By default memcg accounting does not work for any kernel threads. If required thread can use set_active_memcg() but at present it is not widely used. >> Could you please estimate how often is this code used in the case of nft vs packet path? > > It depends on user configuration. > Update from packet path is used for things like port knocking or other > dyanamic filter lists, or somehing like Limiting connections to x-per-address/subnet and so on. Ok, I think we can skip accounting in such cases at the moment. I doubt it can be misused and consume significant piece of host memory. So I'm going to resend the patch w/o accounting in all .clone callbacks. >> If the opposite is the case, then I can add __GFP_ACCOUNT flag depending on in_task() check. > > But what task/memcg is used for the accounting in that case? Thanks to Roman for the explanation in concurrent thread. Thank you, Vasily Averin
nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to
use __GFP_ACCOUNT flag for objects that are dynamically
allocated from the packet path.
Such objects are allocated inside nft_expr_ops->init() callbacks
executed in task context while processing netlink messages.
In addition, this patch adds accounting to nft_set_elem_expr_clone()
used for the same purposes.
Signed-off-by: Vasily Averin <vvs@openvz.org>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
---
v2: removed accounting in nft_expr_ops->clone() callbacks
---
net/netfilter/nf_tables_api.c | 2 +-
net/netfilter/nft_connlimit.c | 2 +-
net/netfilter/nft_counter.c | 2 +-
net/netfilter/nft_last.c | 2 +-
net/netfilter/nft_limit.c | 2 +-
net/netfilter/nft_quota.c | 2 +-
6 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 04be94236a34..e01241151ef7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5447,7 +5447,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
int err, i, k;
for (i = 0; i < set->num_exprs; i++) {
- expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL);
+ expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL_ACCOUNT);
if (!expr)
goto err_expr;
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 3362417ebfdb..f5df535bcbd0 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -77,7 +77,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
invert = true;
}
- priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL);
+ priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL_ACCOUNT);
if (!priv->list)
return -ENOMEM;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index f179e8c3b0ca..7691e42f6bbe 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -62,7 +62,7 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
struct nft_counter __percpu *cpu_stats;
struct nft_counter *this_cpu;
- cpu_stats = alloc_percpu(struct nft_counter);
+ cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT);
if (cpu_stats == NULL)
return -ENOMEM;
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 4f745a409d34..97d0d09d48d3 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -30,7 +30,7 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
u64 last_jiffies;
int err;
- last = kzalloc(sizeof(*last), GFP_KERNEL);
+ last = kzalloc(sizeof(*last), GFP_KERNEL_ACCOUNT);
if (!last)
return -ENOMEM;
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index a726b623963d..c1f7e8bb33e6 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -90,7 +90,7 @@ static int nft_limit_init(struct nft_limit_priv *priv,
priv->rate);
}
- priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL);
+ priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT);
if (!priv->limit)
return -ENOMEM;
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index f394a0b562f6..0d2f55900f7b 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -90,7 +90,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
return -EOPNOTSUPP;
}
- priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL);
+ priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL_ACCOUNT);
if (!priv->consumed)
return -ENOMEM;
--
2.31.1
On Sat, Apr 02, 2022 at 12:50:37PM +0300, Vasily Averin wrote: > nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to > use __GFP_ACCOUNT flag for objects that are dynamically > allocated from the packet path. > > Such objects are allocated inside nft_expr_ops->init() callbacks > executed in task context while processing netlink messages. > > In addition, this patch adds accounting to nft_set_elem_expr_clone() > used for the same purposes. Applied to nf, thanks
© 2016 - 2026 Red Hat, Inc.