[PATCH bpf-next v2] bpf: Remove redundant free_verifier_state()/pop_stack()

Luis Gerhorst posted 1 patch 3 months, 4 weeks ago
kernel/bpf/verifier.c | 37 +++++++++++--------------------------
1 file changed, 11 insertions(+), 26 deletions(-)
[PATCH bpf-next v2] bpf: Remove redundant free_verifier_state()/pop_stack()
Posted by Luis Gerhorst 3 months, 4 weeks ago
This patch removes duplicated code.

Eduard points out [1]:

    Same cleanup cycles are done in push_stack() and push_async_cb(),
    both functions are only reachable from do_check_common() via
    do_check() -> do_check_insn().

    Hence, I think that cur state should not be freed in push_*()
    functions and pop_stack() loop there is not needed.

This would also fix the 'symptom' for [2], but the issue also has a
simpler fix which was sent separately. This fix also makes sure the
push_*() callers always return an error for which
error_recoverable_with_nospec(err) is false. This is required because
otherwise we try to recover and access the stale `state`.

Moving free_verifier_state() and pop_stack(..., pop_log=false) to happen
after the bpf_vlog_reset() call in do_check_common() is fine because the
pop_stack() call that is moved does not call bpf_vlog_reset() with the
pop_log=false parameter.

[1] https://lore.kernel.org/all/b6931bd0dd72327c55287862f821ca6c4c3eb69a.camel@gmail.com/
[2] https://lore.kernel.org/all/68497853.050a0220.33aa0e.036a.GAE@google.com/

Reported-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/all/b6931bd0dd72327c55287862f821ca6c4c3eb69a.camel@gmail.com/
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Luis Gerhorst <luis.gerhorst@fau.de>
---

Changes since v1:
- Move free_verifier_state() and pop_stack() into free_state() and
  remove err label in push_*() altogether (incl. comment), both
  suggested by Eduard
- Link to v1: https://lore.kernel.org/bpf/20250611211431.275731-1-luis.gerhorst@fau.de/

 kernel/bpf/verifier.c | 37 +++++++++++--------------------------
 1 file changed, 11 insertions(+), 26 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c378074516cf..5f4e0a8b20f8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2097,7 +2097,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 
 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
 	if (!elem)
-		goto err;
+		return NULL;
 
 	elem->insn_idx = insn_idx;
 	elem->prev_insn_idx = prev_insn_idx;
@@ -2107,12 +2107,12 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 	env->stack_size++;
 	err = copy_verifier_state(&elem->st, cur);
 	if (err)
-		goto err;
+		return NULL;
 	elem->st.speculative |= speculative;
 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
 		verbose(env, "The sequence of %d jumps is too complex.\n",
 			env->stack_size);
-		goto err;
+		return NULL;
 	}
 	if (elem->st.parent) {
 		++elem->st.parent->branches;
@@ -2127,12 +2127,6 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 		 */
 	}
 	return &elem->st;
-err:
-	free_verifier_state(env->cur_state, true);
-	env->cur_state = NULL;
-	/* pop all elements and return */
-	while (!pop_stack(env, NULL, NULL, false));
-	return NULL;
 }
 
 #define CALLER_SAVED_REGS 6
@@ -2864,7 +2858,7 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
 
 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
 	if (!elem)
-		goto err;
+		return NULL;
 
 	elem->insn_idx = insn_idx;
 	elem->prev_insn_idx = prev_insn_idx;
@@ -2876,7 +2870,7 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
 		verbose(env,
 			"The sequence of %d jumps is too complex for async cb.\n",
 			env->stack_size);
-		goto err;
+		return NULL;
 	}
 	/* Unlike push_stack() do not copy_verifier_state().
 	 * The caller state doesn't matter.
@@ -2887,19 +2881,13 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
 	elem->st.in_sleepable = is_sleepable;
 	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
 	if (!frame)
-		goto err;
+		return NULL;
 	init_func_state(env, frame,
 			BPF_MAIN_FUNC /* callsite */,
 			0 /* frameno within this callchain */,
 			subprog /* subprog number within this prog */);
 	elem->st.frame[0] = frame;
 	return &elem->st;
-err:
-	free_verifier_state(env->cur_state, true);
-	env->cur_state = NULL;
-	/* pop all elements and return */
-	while (!pop_stack(env, NULL, NULL, false));
-	return NULL;
 }
 
 
@@ -22934,6 +22922,11 @@ static void free_states(struct bpf_verifier_env *env)
 	struct bpf_scc_info *info;
 	int i, j;
 
+	WARN_ON_ONCE(!env->cur_state);
+	free_verifier_state(env->cur_state, true);
+	env->cur_state = NULL;
+	while (!pop_stack(env, NULL, NULL, false));
+
 	list_for_each_safe(pos, tmp, &env->free_list) {
 		sl = container_of(pos, struct bpf_verifier_state_list, node);
 		free_verifier_state(&sl->state, false);
@@ -23085,14 +23078,6 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
 
 	ret = do_check(env);
 out:
-	/* check for NULL is necessary, since cur_state can be freed inside
-	 * do_check() under memory pressure.
-	 */
-	if (env->cur_state) {
-		free_verifier_state(env->cur_state, true);
-		env->cur_state = NULL;
-	}
-	while (!pop_stack(env, NULL, NULL, false));
 	if (!ret && pop_log)
 		bpf_vlog_reset(&env->log, 0);
 	free_states(env);

base-commit: af91af33c16853c569ca814124781b849886f007
-- 
2.49.0
Re: [PATCH bpf-next v2] bpf: Remove redundant free_verifier_state()/pop_stack()
Posted by Eduard Zingerman 3 months, 4 weeks ago
On Fri, 2025-06-13 at 11:01 +0200, Luis Gerhorst wrote:
> This patch removes duplicated code.
> 
> Eduard points out [1]:
> 
>     Same cleanup cycles are done in push_stack() and push_async_cb(),
>     both functions are only reachable from do_check_common() via
>     do_check() -> do_check_insn().
> 
>     Hence, I think that cur state should not be freed in push_*()
>     functions and pop_stack() loop there is not needed.
> 
> This would also fix the 'symptom' for [2], but the issue also has a
> simpler fix which was sent separately. This fix also makes sure the
> push_*() callers always return an error for which
> error_recoverable_with_nospec(err) is false. This is required because
> otherwise we try to recover and access the stale `state`.
> 
> Moving free_verifier_state() and pop_stack(..., pop_log=false) to happen
> after the bpf_vlog_reset() call in do_check_common() is fine because the
> pop_stack() call that is moved does not call bpf_vlog_reset() with the
> pop_log=false parameter.
> 
> [1] https://lore.kernel.org/all/b6931bd0dd72327c55287862f821ca6c4c3eb69a.camel@gmail.com/
> [2] https://lore.kernel.org/all/68497853.050a0220.33aa0e.036a.GAE@google.com/
> 
> Reported-by: Eduard Zingerman <eddyz87@gmail.com>
> Link: https://lore.kernel.org/all/b6931bd0dd72327c55287862f821ca6c4c3eb69a.camel@gmail.com/
> Acked-by: Eduard Zingerman <eddyz87@gmail.com>
> Signed-off-by: Luis Gerhorst <luis.gerhorst@fau.de>
> ---

Tried v2, all looks good.

[...]

> @@ -22934,6 +22922,11 @@ static void free_states(struct bpf_verifier_env *env)
>  	struct bpf_scc_info *info;
>  	int i, j;
>  
> +	WARN_ON_ONCE(!env->cur_state);

Tbh I woudn't do this a warning, just an 'if (env->cur_state) ...',
but that's immaterial. Given current way do_check_common() is written
env->cur_state != NULL at this point, so the patch is safe to land.

> +	free_verifier_state(env->cur_state, true);
> +	env->cur_state = NULL;
> +	while (!pop_stack(env, NULL, NULL, false));
> +
>  	list_for_each_safe(pos, tmp, &env->free_list) {
>  		sl = container_of(pos, struct bpf_verifier_state_list, node);
>  		free_verifier_state(&sl->state, false);
Re: [PATCH bpf-next v2] bpf: Remove redundant free_verifier_state()/pop_stack()
Posted by Alexei Starovoitov 3 months, 4 weeks ago
On Fri, Jun 13, 2025 at 2:17 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Fri, 2025-06-13 at 11:01 +0200, Luis Gerhorst wrote:
> > This patch removes duplicated code.
> >
> > Eduard points out [1]:
> >
> >     Same cleanup cycles are done in push_stack() and push_async_cb(),
> >     both functions are only reachable from do_check_common() via
> >     do_check() -> do_check_insn().
> >
> >     Hence, I think that cur state should not be freed in push_*()
> >     functions and pop_stack() loop there is not needed.
> >
> > This would also fix the 'symptom' for [2], but the issue also has a
> > simpler fix which was sent separately. This fix also makes sure the
> > push_*() callers always return an error for which
> > error_recoverable_with_nospec(err) is false. This is required because
> > otherwise we try to recover and access the stale `state`.
> >
> > Moving free_verifier_state() and pop_stack(..., pop_log=false) to happen
> > after the bpf_vlog_reset() call in do_check_common() is fine because the
> > pop_stack() call that is moved does not call bpf_vlog_reset() with the
> > pop_log=false parameter.
> >
> > [1] https://lore.kernel.org/all/b6931bd0dd72327c55287862f821ca6c4c3eb69a.camel@gmail.com/
> > [2] https://lore.kernel.org/all/68497853.050a0220.33aa0e.036a.GAE@google.com/
> >
> > Reported-by: Eduard Zingerman <eddyz87@gmail.com>
> > Link: https://lore.kernel.org/all/b6931bd0dd72327c55287862f821ca6c4c3eb69a.camel@gmail.com/
> > Acked-by: Eduard Zingerman <eddyz87@gmail.com>
> > Signed-off-by: Luis Gerhorst <luis.gerhorst@fau.de>
> > ---
>
> Tried v2, all looks good.
>
> [...]
>
> > @@ -22934,6 +22922,11 @@ static void free_states(struct bpf_verifier_env *env)
> >       struct bpf_scc_info *info;
> >       int i, j;
> >
> > +     WARN_ON_ONCE(!env->cur_state);
>
> Tbh I woudn't do this a warning, just an 'if (env->cur_state) ...',
> but that's immaterial. Given current way do_check_common() is written
> env->cur_state != NULL at this point, so the patch is safe to land.

I removed it while applying, since it's useless.
If do_check_common() changes in the future and cur_state is NULL
here the warn will warn, but won't prevent the crash in the next line.

Also for tricky things we switched to verifier_bug() instead of WARN.
So no new WARNs allowed.