Introduce a path iterator, which reliably walk a struct path toward
the root. This path iterator is based on path_walk_parent. A fixed
zero'ed root is passed to path_walk_parent(). Therefore, unless the
user terminates it earlier, the iterator will terminate at the real
root.
Signed-off-by: Song Liu <song@kernel.org>
---
kernel/bpf/Makefile | 1 +
kernel/bpf/helpers.c | 3 +++
kernel/bpf/path_iter.c | 58 ++++++++++++++++++++++++++++++++++++++++++
kernel/bpf/verifier.c | 5 ++++
4 files changed, 67 insertions(+)
create mode 100644 kernel/bpf/path_iter.c
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 3a335c50e6e3..454a650d934e 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o
ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o
endif
+obj-$(CONFIG_BPF_SYSCALL) += path_iter.o
CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index b71e428ad936..b190c78e40f6 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -3397,6 +3397,9 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPAB
BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
#endif
BTF_ID_FLAGS(func, __bpf_trap)
+BTF_ID_FLAGS(func, bpf_iter_path_new, KF_ITER_NEW | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_iter_path_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_iter_path_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/path_iter.c b/kernel/bpf/path_iter.c
new file mode 100644
index 000000000000..0d972ec84beb
--- /dev/null
+++ b/kernel/bpf/path_iter.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/bpf_mem_alloc.h>
+#include <linux/namei.h>
+#include <linux/path.h>
+
+/* open-coded iterator */
+struct bpf_iter_path {
+ __u64 __opaque[3];
+} __aligned(8);
+
+struct bpf_iter_path_kern {
+ struct path path;
+ __u64 flags;
+} __aligned(8);
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_iter_path_new(struct bpf_iter_path *it,
+ struct path *start,
+ __u64 flags)
+{
+ struct bpf_iter_path_kern *kit = (void *)it;
+
+ BUILD_BUG_ON(sizeof(*kit) > sizeof(*it));
+ BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it));
+
+ if (flags) {
+ memset(&kit->path, 0, sizeof(struct path));
+ return -EINVAL;
+ }
+
+ kit->path = *start;
+ path_get(&kit->path);
+ kit->flags = flags;
+
+ return 0;
+}
+
+__bpf_kfunc struct path *bpf_iter_path_next(struct bpf_iter_path *it)
+{
+ struct bpf_iter_path_kern *kit = (void *)it;
+ struct path root = {};
+
+ if (!path_walk_parent(&kit->path, &root))
+ return NULL;
+ return &kit->path;
+}
+
+__bpf_kfunc void bpf_iter_path_destroy(struct bpf_iter_path *it)
+{
+ struct bpf_iter_path_kern *kit = (void *)it;
+
+ path_put(&kit->path);
+}
+
+__bpf_kfunc_end_defs();
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a7d6e0c5928b..45b45cdfb223 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7036,6 +7036,10 @@ BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
struct sock *sk;
};
+BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct path) {
+ struct dentry *dentry;
+};
+
static bool type_is_rcu(struct bpf_verifier_env *env,
struct bpf_reg_state *reg,
const char *field_name, u32 btf_id)
@@ -7076,6 +7080,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
const char *field_name, u32 btf_id)
{
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct path));
return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
"__safe_trusted_or_null");
--
2.47.1
On Mon, Jun 02, 2025 at 11:59:19PM -0700, Song Liu wrote:
> Introduce a path iterator, which reliably walk a struct path toward
> the root. This path iterator is based on path_walk_parent. A fixed
> zero'ed root is passed to path_walk_parent(). Therefore, unless the
> user terminates it earlier, the iterator will terminate at the real
> root.
>
> Signed-off-by: Song Liu <song@kernel.org>
> ---
> kernel/bpf/Makefile | 1 +
> kernel/bpf/helpers.c | 3 +++
> kernel/bpf/path_iter.c | 58 ++++++++++++++++++++++++++++++++++++++++++
> kernel/bpf/verifier.c | 5 ++++
> 4 files changed, 67 insertions(+)
> create mode 100644 kernel/bpf/path_iter.c
>
> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> index 3a335c50e6e3..454a650d934e 100644
> --- a/kernel/bpf/Makefile
> +++ b/kernel/bpf/Makefile
> @@ -56,6 +56,7 @@ obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o
> ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
> obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o
> endif
> +obj-$(CONFIG_BPF_SYSCALL) += path_iter.o
>
> CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE)
> CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE)
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index b71e428ad936..b190c78e40f6 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -3397,6 +3397,9 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPAB
> BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
> #endif
> BTF_ID_FLAGS(func, __bpf_trap)
> +BTF_ID_FLAGS(func, bpf_iter_path_new, KF_ITER_NEW | KF_SLEEPABLE)
Hm, I'd expect KF_TRUSTED_ARGS to be enforced onto
bpf_iter_path_new(), no? Shouldn't this only be operating on a stable
struct path reference?
> +BTF_ID_FLAGS(func, bpf_iter_path_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
> +BTF_ID_FLAGS(func, bpf_iter_path_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
At this point, the claim is that such are only to be used from the
context of the BPF LSM. If true, I'd expect these BPF kfuncs to be
part of bpf_fs_kfunc_set_ids once moved into fs/bpf_fs_kfuncs.c.
> static const struct btf_kfunc_id_set common_kfunc_set = {
> diff --git a/kernel/bpf/path_iter.c b/kernel/bpf/path_iter.c
> new file mode 100644
> index 000000000000..0d972ec84beb
> --- /dev/null
> +++ b/kernel/bpf/path_iter.c
> @@ -0,0 +1,58 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
> +#include <linux/bpf.h>
> +#include <linux/bpf_mem_alloc.h>
> +#include <linux/namei.h>
> +#include <linux/path.h>
> +
> +/* open-coded iterator */
> +struct bpf_iter_path {
> + __u64 __opaque[3];
> +} __aligned(8);
> +
> +struct bpf_iter_path_kern {
> + struct path path;
> + __u64 flags;
> +} __aligned(8);
> +
> +__bpf_kfunc_start_defs();
> +
> +__bpf_kfunc int bpf_iter_path_new(struct bpf_iter_path *it,
> + struct path *start,
> + __u64 flags)
> +{
> + struct bpf_iter_path_kern *kit = (void *)it;
> +
> + BUILD_BUG_ON(sizeof(*kit) > sizeof(*it));
> + BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it));
> +
> + if (flags) {
> + memset(&kit->path, 0, sizeof(struct path));
This warrants a comment for sure. Also why not just zero it out
entirely?
> + return -EINVAL;
> + }
> +
> + kit->path = *start;
> + path_get(&kit->path);
> + kit->flags = flags;
> +
> + return 0;
> +}
> +
> +__bpf_kfunc struct path *bpf_iter_path_next(struct bpf_iter_path *it)
> +{
> + struct bpf_iter_path_kern *kit = (void *)it;
> + struct path root = {};
I think this also warrants a comment. Specifically, that unless the
loop is explicitly terminated, bpf_iter_path_next() will continue
looping until we've reached the global root of the VFS.
> + if (!path_walk_parent(&kit->path, &root))
> + return NULL;
> + return &kit->path;
> +}
> +
> +__bpf_kfunc void bpf_iter_path_destroy(struct bpf_iter_path *it)
> +{
> + struct bpf_iter_path_kern *kit = (void *)it;
> +
> + path_put(&kit->path);
> +}
> +
> +__bpf_kfunc_end_defs();
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index a7d6e0c5928b..45b45cdfb223 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -7036,6 +7036,10 @@ BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
> struct sock *sk;
> };
>
> +BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct path) {
> + struct dentry *dentry;
> +};
Only trusted if struct path is trusted, and hence why KF_TRUSTED_ARGS
should be enforced.
> static bool type_is_rcu(struct bpf_verifier_env *env,
> struct bpf_reg_state *reg,
> const char *field_name, u32 btf_id)
> @@ -7076,6 +7080,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
> const char *field_name, u32 btf_id)
> {
> BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
> + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct path));
>
> return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
> "__safe_trusted_or_null");
> --
> 2.47.1
>
On Thu, Jun 5, 2025 at 12:27 PM Matt Bobrowski <mattbobrowski@google.com> wrote:
>
> On Mon, Jun 02, 2025 at 11:59:19PM -0700, Song Liu wrote:
> > Introduce a path iterator, which reliably walk a struct path toward
> > the root. This path iterator is based on path_walk_parent. A fixed
> > zero'ed root is passed to path_walk_parent(). Therefore, unless the
> > user terminates it earlier, the iterator will terminate at the real
> > root.
> >
> > Signed-off-by: Song Liu <song@kernel.org>
> > ---
> > kernel/bpf/Makefile | 1 +
> > kernel/bpf/helpers.c | 3 +++
> > kernel/bpf/path_iter.c | 58 ++++++++++++++++++++++++++++++++++++++++++
> > kernel/bpf/verifier.c | 5 ++++
> > 4 files changed, 67 insertions(+)
> > create mode 100644 kernel/bpf/path_iter.c
> >
> > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> > index 3a335c50e6e3..454a650d934e 100644
> > --- a/kernel/bpf/Makefile
> > +++ b/kernel/bpf/Makefile
> > @@ -56,6 +56,7 @@ obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o
> > ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
> > obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o
> > endif
> > +obj-$(CONFIG_BPF_SYSCALL) += path_iter.o
> >
> > CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE)
> > CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE)
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index b71e428ad936..b190c78e40f6 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -3397,6 +3397,9 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPAB
> > BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
> > #endif
> > BTF_ID_FLAGS(func, __bpf_trap)
> > +BTF_ID_FLAGS(func, bpf_iter_path_new, KF_ITER_NEW | KF_SLEEPABLE)
>
> Hm, I'd expect KF_TRUSTED_ARGS to be enforced onto
> bpf_iter_path_new(), no? Shouldn't this only be operating on a stable
> struct path reference?
Good catch! Added KF_TRUSTED_ARGS. also added a test with
untrusted pointer.
>
> > +BTF_ID_FLAGS(func, bpf_iter_path_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
> > +BTF_ID_FLAGS(func, bpf_iter_path_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
>
> At this point, the claim is that such are only to be used from the
> context of the BPF LSM. If true, I'd expect these BPF kfuncs to be
> part of bpf_fs_kfunc_set_ids once moved into fs/bpf_fs_kfuncs.c.
I moved this to fs/bpf_fs_kfuncs.c in the next version.
>
> > static const struct btf_kfunc_id_set common_kfunc_set = {
> > diff --git a/kernel/bpf/path_iter.c b/kernel/bpf/path_iter.c
> > new file mode 100644
> > index 000000000000..0d972ec84beb
> > --- /dev/null
> > +++ b/kernel/bpf/path_iter.c
> > @@ -0,0 +1,58 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
> > +#include <linux/bpf.h>
> > +#include <linux/bpf_mem_alloc.h>
> > +#include <linux/namei.h>
> > +#include <linux/path.h>
> > +
> > +/* open-coded iterator */
> > +struct bpf_iter_path {
> > + __u64 __opaque[3];
> > +} __aligned(8);
> > +
> > +struct bpf_iter_path_kern {
> > + struct path path;
> > + __u64 flags;
> > +} __aligned(8);
> > +
> > +__bpf_kfunc_start_defs();
> > +
> > +__bpf_kfunc int bpf_iter_path_new(struct bpf_iter_path *it,
> > + struct path *start,
> > + __u64 flags)
> > +{
> > + struct bpf_iter_path_kern *kit = (void *)it;
> > +
> > + BUILD_BUG_ON(sizeof(*kit) > sizeof(*it));
> > + BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it));
> > +
> > + if (flags) {
> > + memset(&kit->path, 0, sizeof(struct path));
>
> This warrants a comment for sure. Also why not just zero it out
> entirely?
Added some comments in v3. Also "flags" is removed in v3.
Thanks,
Song
[...]
On Mon, Jun 2, 2025 at 11:59 PM Song Liu <song@kernel.org> wrote:
>
> Introduce a path iterator, which reliably walk a struct path toward
> the root. This path iterator is based on path_walk_parent. A fixed
> zero'ed root is passed to path_walk_parent(). Therefore, unless the
> user terminates it earlier, the iterator will terminate at the real
> root.
>
> Signed-off-by: Song Liu <song@kernel.org>
> ---
> kernel/bpf/Makefile | 1 +
> kernel/bpf/helpers.c | 3 +++
> kernel/bpf/path_iter.c | 58 ++++++++++++++++++++++++++++++++++++++++++
> kernel/bpf/verifier.c | 5 ++++
> 4 files changed, 67 insertions(+)
> create mode 100644 kernel/bpf/path_iter.c
>
> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> index 3a335c50e6e3..454a650d934e 100644
> --- a/kernel/bpf/Makefile
> +++ b/kernel/bpf/Makefile
> @@ -56,6 +56,7 @@ obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o
> ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
> obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o
> endif
> +obj-$(CONFIG_BPF_SYSCALL) += path_iter.o
>
> CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE)
> CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE)
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index b71e428ad936..b190c78e40f6 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -3397,6 +3397,9 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPAB
> BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
> #endif
> BTF_ID_FLAGS(func, __bpf_trap)
> +BTF_ID_FLAGS(func, bpf_iter_path_new, KF_ITER_NEW | KF_SLEEPABLE)
> +BTF_ID_FLAGS(func, bpf_iter_path_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
> +BTF_ID_FLAGS(func, bpf_iter_path_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
> BTF_KFUNCS_END(common_btf_ids)
>
> static const struct btf_kfunc_id_set common_kfunc_set = {
> diff --git a/kernel/bpf/path_iter.c b/kernel/bpf/path_iter.c
> new file mode 100644
> index 000000000000..0d972ec84beb
> --- /dev/null
> +++ b/kernel/bpf/path_iter.c
> @@ -0,0 +1,58 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
> +#include <linux/bpf.h>
> +#include <linux/bpf_mem_alloc.h>
> +#include <linux/namei.h>
> +#include <linux/path.h>
> +
> +/* open-coded iterator */
> +struct bpf_iter_path {
> + __u64 __opaque[3];
> +} __aligned(8);
> +
> +struct bpf_iter_path_kern {
> + struct path path;
> + __u64 flags;
> +} __aligned(8);
> +
> +__bpf_kfunc_start_defs();
> +
> +__bpf_kfunc int bpf_iter_path_new(struct bpf_iter_path *it,
> + struct path *start,
> + __u64 flags)
> +{
> + struct bpf_iter_path_kern *kit = (void *)it;
> +
> + BUILD_BUG_ON(sizeof(*kit) > sizeof(*it));
> + BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it));
> +
> + if (flags) {
> + memset(&kit->path, 0, sizeof(struct path));
> + return -EINVAL;
> + }
> +
> + kit->path = *start;
> + path_get(&kit->path);
> + kit->flags = flags;
> +
> + return 0;
> +}
> +
> +__bpf_kfunc struct path *bpf_iter_path_next(struct bpf_iter_path *it)
> +{
> + struct bpf_iter_path_kern *kit = (void *)it;
> + struct path root = {};
> +
> + if (!path_walk_parent(&kit->path, &root))
> + return NULL;
> + return &kit->path;
> +}
> +
> +__bpf_kfunc void bpf_iter_path_destroy(struct bpf_iter_path *it)
> +{
> + struct bpf_iter_path_kern *kit = (void *)it;
> +
> + path_put(&kit->path);
note, destroy() will be called even if construction of iterator fails
or we exhausted iterator. So you need to make sure that you have
bpf_iter_path state where you can detect that there is no path present
and skip path_put().
> +}
> +
> +__bpf_kfunc_end_defs();
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index a7d6e0c5928b..45b45cdfb223 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -7036,6 +7036,10 @@ BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
> struct sock *sk;
> };
>
> +BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct path) {
> + struct dentry *dentry;
> +};
> +
> static bool type_is_rcu(struct bpf_verifier_env *env,
> struct bpf_reg_state *reg,
> const char *field_name, u32 btf_id)
> @@ -7076,6 +7080,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
> const char *field_name, u32 btf_id)
> {
> BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
> + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct path));
>
> return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
> "__safe_trusted_or_null");
> --
> 2.47.1
>
On Tue, Jun 3, 2025 at 11:40 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
[...]
> > +__bpf_kfunc struct path *bpf_iter_path_next(struct bpf_iter_path *it)
> > +{
> > + struct bpf_iter_path_kern *kit = (void *)it;
> > + struct path root = {};
> > +
> > + if (!path_walk_parent(&kit->path, &root))
> > + return NULL;
> > + return &kit->path;
> > +}
> > +
> > +__bpf_kfunc void bpf_iter_path_destroy(struct bpf_iter_path *it)
> > +{
> > + struct bpf_iter_path_kern *kit = (void *)it;
> > +
> > + path_put(&kit->path);
>
> note, destroy() will be called even if construction of iterator fails
> or we exhausted iterator. So you need to make sure that you have
> bpf_iter_path state where you can detect that there is no path present
> and skip path_put().
In bpf_iter_path_next(), when path_walk_parent() returns false, we
still hold reference to kit->path, then _destroy() will release it. So we
should be fine, no?
Thanks,
Song
>
> > +}
> > +
> > +__bpf_kfunc_end_defs();
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index a7d6e0c5928b..45b45cdfb223 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -7036,6 +7036,10 @@ BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
> > struct sock *sk;
> > };
> >
> > +BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct path) {
> > + struct dentry *dentry;
> > +};
> > +
> > static bool type_is_rcu(struct bpf_verifier_env *env,
> > struct bpf_reg_state *reg,
> > const char *field_name, u32 btf_id)
> > @@ -7076,6 +7080,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
> > const char *field_name, u32 btf_id)
> > {
> > BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
> > + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct path));
> >
> > return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
> > "__safe_trusted_or_null");
> > --
> > 2.47.1
> >
>
On Tue, Jun 3, 2025 at 2:09 PM Song Liu <song@kernel.org> wrote:
>
> On Tue, Jun 3, 2025 at 11:40 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> [...]
> > > +__bpf_kfunc struct path *bpf_iter_path_next(struct bpf_iter_path *it)
> > > +{
> > > + struct bpf_iter_path_kern *kit = (void *)it;
> > > + struct path root = {};
> > > +
> > > + if (!path_walk_parent(&kit->path, &root))
> > > + return NULL;
> > > + return &kit->path;
> > > +}
> > > +
> > > +__bpf_kfunc void bpf_iter_path_destroy(struct bpf_iter_path *it)
> > > +{
> > > + struct bpf_iter_path_kern *kit = (void *)it;
> > > +
> > > + path_put(&kit->path);
> >
> > note, destroy() will be called even if construction of iterator fails
> > or we exhausted iterator. So you need to make sure that you have
> > bpf_iter_path state where you can detect that there is no path present
> > and skip path_put().
>
> In bpf_iter_path_next(), when path_walk_parent() returns false, we
> still hold reference to kit->path, then _destroy() will release it. So we
> should be fine, no?
you still need to handle iterators that failed to be initialized,
though? And one can argue that if path_walk_parent() returns false, we
need to put that last path before returning NULL, no?
>
> Thanks,
> Song
>
> >
> > > +}
> > > +
> > > +__bpf_kfunc_end_defs();
> > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > index a7d6e0c5928b..45b45cdfb223 100644
> > > --- a/kernel/bpf/verifier.c
> > > +++ b/kernel/bpf/verifier.c
> > > @@ -7036,6 +7036,10 @@ BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
> > > struct sock *sk;
> > > };
> > >
> > > +BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct path) {
> > > + struct dentry *dentry;
> > > +};
> > > +
> > > static bool type_is_rcu(struct bpf_verifier_env *env,
> > > struct bpf_reg_state *reg,
> > > const char *field_name, u32 btf_id)
> > > @@ -7076,6 +7080,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
> > > const char *field_name, u32 btf_id)
> > > {
> > > BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
> > > + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct path));
> > >
> > > return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
> > > "__safe_trusted_or_null");
> > > --
> > > 2.47.1
> > >
> >
On Tue, Jun 3, 2025 at 2:45 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Jun 3, 2025 at 2:09 PM Song Liu <song@kernel.org> wrote:
> >
> > On Tue, Jun 3, 2025 at 11:40 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > [...]
> > > > +__bpf_kfunc struct path *bpf_iter_path_next(struct bpf_iter_path *it)
> > > > +{
> > > > + struct bpf_iter_path_kern *kit = (void *)it;
> > > > + struct path root = {};
> > > > +
> > > > + if (!path_walk_parent(&kit->path, &root))
> > > > + return NULL;
> > > > + return &kit->path;
> > > > +}
> > > > +
> > > > +__bpf_kfunc void bpf_iter_path_destroy(struct bpf_iter_path *it)
> > > > +{
> > > > + struct bpf_iter_path_kern *kit = (void *)it;
> > > > +
> > > > + path_put(&kit->path);
> > >
> > > note, destroy() will be called even if construction of iterator fails
> > > or we exhausted iterator. So you need to make sure that you have
> > > bpf_iter_path state where you can detect that there is no path present
> > > and skip path_put().
> >
> > In bpf_iter_path_next(), when path_walk_parent() returns false, we
> > still hold reference to kit->path, then _destroy() will release it. So we
> > should be fine, no?
>
> you still need to handle iterators that failed to be initialized,
> though? And one can argue that if path_walk_parent() returns false, we
> need to put that last path before returning NULL, no?
kit->path is zero'ed on initialization failures, so we can path_put() it
safely. For _next() returns NULL case, we can either put kit->path
in _destroy(), which is the logic now, or put kit->path in the last
_next() call and make _destroy() a no-op in that case. I don't have
a strong preference either way.
Thanks,
Song
On Tue, Jun 3, 2025 at 4:20 PM Song Liu <song@kernel.org> wrote:
>
> On Tue, Jun 3, 2025 at 2:45 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Tue, Jun 3, 2025 at 2:09 PM Song Liu <song@kernel.org> wrote:
> > >
> > > On Tue, Jun 3, 2025 at 11:40 AM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > [...]
> > > > > +__bpf_kfunc struct path *bpf_iter_path_next(struct bpf_iter_path *it)
> > > > > +{
> > > > > + struct bpf_iter_path_kern *kit = (void *)it;
> > > > > + struct path root = {};
> > > > > +
> > > > > + if (!path_walk_parent(&kit->path, &root))
> > > > > + return NULL;
> > > > > + return &kit->path;
> > > > > +}
> > > > > +
> > > > > +__bpf_kfunc void bpf_iter_path_destroy(struct bpf_iter_path *it)
> > > > > +{
> > > > > + struct bpf_iter_path_kern *kit = (void *)it;
> > > > > +
> > > > > + path_put(&kit->path);
> > > >
> > > > note, destroy() will be called even if construction of iterator fails
> > > > or we exhausted iterator. So you need to make sure that you have
> > > > bpf_iter_path state where you can detect that there is no path present
> > > > and skip path_put().
> > >
> > > In bpf_iter_path_next(), when path_walk_parent() returns false, we
> > > still hold reference to kit->path, then _destroy() will release it. So we
> > > should be fine, no?
> >
> > you still need to handle iterators that failed to be initialized,
> > though? And one can argue that if path_walk_parent() returns false, we
> > need to put that last path before returning NULL, no?
>
> kit->path is zero'ed on initialization failures, so we can path_put() it
> safely. For _next() returns NULL case, we can either put kit->path
> in _destroy(), which is the logic now, or put kit->path in the last
> _next() call and make _destroy() a no-op in that case. I don't have
> a strong preference either way.
I didn't realize path_put() is a no-op for zeroed-out struct path. I'd
probably leave a comment for future selves, I don't have strong
preference otherwise.
>
> Thanks,
> Song
On 6/3/25 11:40 AM, Andrii Nakryiko wrote:
> On Mon, Jun 2, 2025 at 11:59 PM Song Liu <song@kernel.org> wrote:
>> Introduce a path iterator, which reliably walk a struct path toward
>> the root. This path iterator is based on path_walk_parent. A fixed
>> zero'ed root is passed to path_walk_parent(). Therefore, unless the
>> user terminates it earlier, the iterator will terminate at the real
>> root.
>>
>> Signed-off-by: Song Liu <song@kernel.org>
>> ---
>> kernel/bpf/Makefile | 1 +
>> kernel/bpf/helpers.c | 3 +++
>> kernel/bpf/path_iter.c | 58 ++++++++++++++++++++++++++++++++++++++++++
>> kernel/bpf/verifier.c | 5 ++++
>> 4 files changed, 67 insertions(+)
>> create mode 100644 kernel/bpf/path_iter.c
>>
>> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
>> index 3a335c50e6e3..454a650d934e 100644
>> --- a/kernel/bpf/Makefile
>> +++ b/kernel/bpf/Makefile
>> @@ -56,6 +56,7 @@ obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o
>> ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
>> obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o
>> endif
>> +obj-$(CONFIG_BPF_SYSCALL) += path_iter.o
>>
>> CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE)
>> CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE)
>> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
>> index b71e428ad936..b190c78e40f6 100644
>> --- a/kernel/bpf/helpers.c
>> +++ b/kernel/bpf/helpers.c
>> @@ -3397,6 +3397,9 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPAB
>> BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
>> #endif
>> BTF_ID_FLAGS(func, __bpf_trap)
>> +BTF_ID_FLAGS(func, bpf_iter_path_new, KF_ITER_NEW | KF_SLEEPABLE)
>> +BTF_ID_FLAGS(func, bpf_iter_path_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
>> +BTF_ID_FLAGS(func, bpf_iter_path_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
>> BTF_KFUNCS_END(common_btf_ids)
>>
>> static const struct btf_kfunc_id_set common_kfunc_set = {
>> diff --git a/kernel/bpf/path_iter.c b/kernel/bpf/path_iter.c
>> new file mode 100644
>> index 000000000000..0d972ec84beb
>> --- /dev/null
>> +++ b/kernel/bpf/path_iter.c
>> @@ -0,0 +1,58 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
>> +#include <linux/bpf.h>
>> +#include <linux/bpf_mem_alloc.h>
>> +#include <linux/namei.h>
>> +#include <linux/path.h>
>> +
>> +/* open-coded iterator */
>> +struct bpf_iter_path {
>> + __u64 __opaque[3];
>> +} __aligned(8);
>> +
>> +struct bpf_iter_path_kern {
>> + struct path path;
>> + __u64 flags;
>> +} __aligned(8);
>> +
>> +__bpf_kfunc_start_defs();
>> +
>> +__bpf_kfunc int bpf_iter_path_new(struct bpf_iter_path *it,
>> + struct path *start,
>> + __u64 flags)
>> +{
>> + struct bpf_iter_path_kern *kit = (void *)it;
>> +
>> + BUILD_BUG_ON(sizeof(*kit) > sizeof(*it));
>> + BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it));
>> +
>> + if (flags) {
>> + memset(&kit->path, 0, sizeof(struct path));
>> + return -EINVAL;
>> + }
>> +
>> + kit->path = *start;
>> + path_get(&kit->path);
>> + kit->flags = flags;
>> +
>> + return 0;
>> +}
>> +
>> +__bpf_kfunc struct path *bpf_iter_path_next(struct bpf_iter_path *it)
>> +{
>> + struct bpf_iter_path_kern *kit = (void *)it;
>> + struct path root = {};
>> +
>> + if (!path_walk_parent(&kit->path, &root))
>> + return NULL;
>> + return &kit->path;
>> +}
>> +
>> +__bpf_kfunc void bpf_iter_path_destroy(struct bpf_iter_path *it)
>> +{
>> + struct bpf_iter_path_kern *kit = (void *)it;
>> +
>> + path_put(&kit->path);
> note, destroy() will be called even if construction of iterator fails
> or we exhausted iterator. So you need to make sure that you have
> bpf_iter_path state where you can detect that there is no path present
> and skip path_put().
In rare cases, it is possible &kit->path address could be destroyed
and reused, right? Maybe we need more state in kit to detect the change?
>
>> +}
>> +
>> +__bpf_kfunc_end_defs();
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index a7d6e0c5928b..45b45cdfb223 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -7036,6 +7036,10 @@ BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
>> struct sock *sk;
>> };
>>
>> +BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct path) {
>> + struct dentry *dentry;
>> +};
>> +
>> static bool type_is_rcu(struct bpf_verifier_env *env,
>> struct bpf_reg_state *reg,
>> const char *field_name, u32 btf_id)
>> @@ -7076,6 +7080,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
>> const char *field_name, u32 btf_id)
>> {
>> BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
>> + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct path));
>>
>> return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
>> "__safe_trusted_or_null");
>> --
>> 2.47.1
>>
On Tue, Jun 3, 2025 at 1:50 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>
>
>
> On 6/3/25 11:40 AM, Andrii Nakryiko wrote:
> > On Mon, Jun 2, 2025 at 11:59 PM Song Liu <song@kernel.org> wrote:
> >> Introduce a path iterator, which reliably walk a struct path toward
> >> the root. This path iterator is based on path_walk_parent. A fixed
> >> zero'ed root is passed to path_walk_parent(). Therefore, unless the
> >> user terminates it earlier, the iterator will terminate at the real
> >> root.
> >>
> >> Signed-off-by: Song Liu <song@kernel.org>
> >> ---
> >> kernel/bpf/Makefile | 1 +
> >> kernel/bpf/helpers.c | 3 +++
> >> kernel/bpf/path_iter.c | 58 ++++++++++++++++++++++++++++++++++++++++++
> >> kernel/bpf/verifier.c | 5 ++++
> >> 4 files changed, 67 insertions(+)
> >> create mode 100644 kernel/bpf/path_iter.c
> >>
> >> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> >> index 3a335c50e6e3..454a650d934e 100644
> >> --- a/kernel/bpf/Makefile
> >> +++ b/kernel/bpf/Makefile
> >> @@ -56,6 +56,7 @@ obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o
> >> ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
> >> obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o
> >> endif
> >> +obj-$(CONFIG_BPF_SYSCALL) += path_iter.o
> >>
> >> CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE)
> >> CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE)
> >> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> >> index b71e428ad936..b190c78e40f6 100644
> >> --- a/kernel/bpf/helpers.c
> >> +++ b/kernel/bpf/helpers.c
> >> @@ -3397,6 +3397,9 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPAB
> >> BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
> >> #endif
> >> BTF_ID_FLAGS(func, __bpf_trap)
> >> +BTF_ID_FLAGS(func, bpf_iter_path_new, KF_ITER_NEW | KF_SLEEPABLE)
> >> +BTF_ID_FLAGS(func, bpf_iter_path_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
> >> +BTF_ID_FLAGS(func, bpf_iter_path_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
> >> BTF_KFUNCS_END(common_btf_ids)
> >>
> >> static const struct btf_kfunc_id_set common_kfunc_set = {
> >> diff --git a/kernel/bpf/path_iter.c b/kernel/bpf/path_iter.c
> >> new file mode 100644
> >> index 000000000000..0d972ec84beb
> >> --- /dev/null
> >> +++ b/kernel/bpf/path_iter.c
> >> @@ -0,0 +1,58 @@
> >> +// SPDX-License-Identifier: GPL-2.0-only
> >> +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
> >> +#include <linux/bpf.h>
> >> +#include <linux/bpf_mem_alloc.h>
> >> +#include <linux/namei.h>
> >> +#include <linux/path.h>
> >> +
> >> +/* open-coded iterator */
> >> +struct bpf_iter_path {
> >> + __u64 __opaque[3];
> >> +} __aligned(8);
> >> +
> >> +struct bpf_iter_path_kern {
> >> + struct path path;
> >> + __u64 flags;
> >> +} __aligned(8);
> >> +
> >> +__bpf_kfunc_start_defs();
> >> +
> >> +__bpf_kfunc int bpf_iter_path_new(struct bpf_iter_path *it,
> >> + struct path *start,
> >> + __u64 flags)
> >> +{
> >> + struct bpf_iter_path_kern *kit = (void *)it;
> >> +
> >> + BUILD_BUG_ON(sizeof(*kit) > sizeof(*it));
> >> + BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it));
> >> +
> >> + if (flags) {
> >> + memset(&kit->path, 0, sizeof(struct path));
> >> + return -EINVAL;
> >> + }
> >> +
> >> + kit->path = *start;
> >> + path_get(&kit->path);
> >> + kit->flags = flags;
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +__bpf_kfunc struct path *bpf_iter_path_next(struct bpf_iter_path *it)
> >> +{
> >> + struct bpf_iter_path_kern *kit = (void *)it;
> >> + struct path root = {};
> >> +
> >> + if (!path_walk_parent(&kit->path, &root))
> >> + return NULL;
> >> + return &kit->path;
> >> +}
> >> +
> >> +__bpf_kfunc void bpf_iter_path_destroy(struct bpf_iter_path *it)
> >> +{
> >> + struct bpf_iter_path_kern *kit = (void *)it;
> >> +
> >> + path_put(&kit->path);
> > note, destroy() will be called even if construction of iterator fails
> > or we exhausted iterator. So you need to make sure that you have
> > bpf_iter_path state where you can detect that there is no path present
> > and skip path_put().
>
> In rare cases, it is possible &kit->path address could be destroyed
> and reused, right? Maybe we need more state in kit to detect the change?
kit->path is always referenced, so this should not happen.
Thanks,
Song
On Mon, Jun 2, 2025 at 11:59 PM Song Liu <song@kernel.org> wrote:
>
> Introduce a path iterator, which reliably walk a struct path toward
> the root. This path iterator is based on path_walk_parent. A fixed
> zero'ed root is passed to path_walk_parent(). Therefore, unless the
> user terminates it earlier, the iterator will terminate at the real
> root.
>
> Signed-off-by: Song Liu <song@kernel.org>
> ---
> kernel/bpf/Makefile | 1 +
> kernel/bpf/helpers.c | 3 +++
> kernel/bpf/path_iter.c | 58 ++++++++++++++++++++++++++++++++++++++++++
> kernel/bpf/verifier.c | 5 ++++
> 4 files changed, 67 insertions(+)
> create mode 100644 kernel/bpf/path_iter.c
>
> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> index 3a335c50e6e3..454a650d934e 100644
> --- a/kernel/bpf/Makefile
> +++ b/kernel/bpf/Makefile
> @@ -56,6 +56,7 @@ obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o
> ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
> obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o
> endif
> +obj-$(CONFIG_BPF_SYSCALL) += path_iter.o
>
> CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE)
> CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE)
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index b71e428ad936..b190c78e40f6 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -3397,6 +3397,9 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPAB
> BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
> #endif
> BTF_ID_FLAGS(func, __bpf_trap)
> +BTF_ID_FLAGS(func, bpf_iter_path_new, KF_ITER_NEW | KF_SLEEPABLE)
> +BTF_ID_FLAGS(func, bpf_iter_path_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
> +BTF_ID_FLAGS(func, bpf_iter_path_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
> BTF_KFUNCS_END(common_btf_ids)
>
> static const struct btf_kfunc_id_set common_kfunc_set = {
> diff --git a/kernel/bpf/path_iter.c b/kernel/bpf/path_iter.c
> new file mode 100644
> index 000000000000..0d972ec84beb
> --- /dev/null
> +++ b/kernel/bpf/path_iter.c
I think Christian's preference was to keep
everything in fs/bpf_fs_kfuncs.c
Don't add a new file. Just add this iter there.
> @@ -0,0 +1,58 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
> +#include <linux/bpf.h>
> +#include <linux/bpf_mem_alloc.h>
> +#include <linux/namei.h>
> +#include <linux/path.h>
> +
> +/* open-coded iterator */
> +struct bpf_iter_path {
> + __u64 __opaque[3];
> +} __aligned(8);
> +
> +struct bpf_iter_path_kern {
> + struct path path;
> + __u64 flags;
Why? flags is unused. Don't waste space for it.
On Tue, Jun 03, 2025 at 08:13:18AM -0700, Alexei Starovoitov wrote:
> On Mon, Jun 2, 2025 at 11:59 PM Song Liu <song@kernel.org> wrote:
> >
> > Introduce a path iterator, which reliably walk a struct path toward
> > the root. This path iterator is based on path_walk_parent. A fixed
> > zero'ed root is passed to path_walk_parent(). Therefore, unless the
> > user terminates it earlier, the iterator will terminate at the real
> > root.
> >
> > Signed-off-by: Song Liu <song@kernel.org>
> > ---
> > kernel/bpf/Makefile | 1 +
> > kernel/bpf/helpers.c | 3 +++
> > kernel/bpf/path_iter.c | 58 ++++++++++++++++++++++++++++++++++++++++++
> > kernel/bpf/verifier.c | 5 ++++
> > 4 files changed, 67 insertions(+)
> > create mode 100644 kernel/bpf/path_iter.c
> >
> > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> > index 3a335c50e6e3..454a650d934e 100644
> > --- a/kernel/bpf/Makefile
> > +++ b/kernel/bpf/Makefile
> > @@ -56,6 +56,7 @@ obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o
> > ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
> > obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o
> > endif
> > +obj-$(CONFIG_BPF_SYSCALL) += path_iter.o
> >
> > CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE)
> > CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE)
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index b71e428ad936..b190c78e40f6 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -3397,6 +3397,9 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPAB
> > BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
> > #endif
> > BTF_ID_FLAGS(func, __bpf_trap)
> > +BTF_ID_FLAGS(func, bpf_iter_path_new, KF_ITER_NEW | KF_SLEEPABLE)
> > +BTF_ID_FLAGS(func, bpf_iter_path_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
> > +BTF_ID_FLAGS(func, bpf_iter_path_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
> > BTF_KFUNCS_END(common_btf_ids)
> >
> > static const struct btf_kfunc_id_set common_kfunc_set = {
> > diff --git a/kernel/bpf/path_iter.c b/kernel/bpf/path_iter.c
> > new file mode 100644
> > index 000000000000..0d972ec84beb
> > --- /dev/null
> > +++ b/kernel/bpf/path_iter.c
>
> I think Christian's preference was to keep
> everything in fs/bpf_fs_kfuncs.c
Yes. And since that also adds new fs helpers I want to take that through
the VFS tree, please. I'll provide a stable branch as we do with all
other subsystems.
© 2016 - 2026 Red Hat, Inc.