Currently, many 'BPF_MAP_CREATE' failures return '-EINVAL' without
providing any explanation to user space.
With the extended BPF syscall support, detailed error messages can now be
reported. This allows users to understand the specific reason for a
failed map creation, rather than just receiving a generic '-EINVAL'.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
kernel/bpf/syscall.c | 88 +++++++++++++++++++++++++++++++++++++++-----
1 file changed, 79 insertions(+), 9 deletions(-)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 14fc5738f2b9..e64cc7504731 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1366,23 +1366,72 @@ static bool bpf_net_capable(void)
return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
}
+struct bpf_vlog_wrapper {
+ struct bpf_common_attr *attr;
+ struct bpf_verifier_log *log;
+};
+
+static void bpf_vlog_wrapper_destructor(struct bpf_vlog_wrapper *w)
+{
+ if (!w->log)
+ return;
+
+ (void) bpf_vlog_finalize(w->log, &w->attr->log_true_size);
+ kfree(w->log);
+}
+
+#define DEFINE_BPF_VLOG_WRAPPER(name, common_attrs) \
+ struct bpf_vlog_wrapper name __cleanup(bpf_vlog_wrapper_destructor) = { \
+ .attr = common_attrs, \
+ }
+
+static int bpf_vlog_wrapper_init(struct bpf_vlog_wrapper *w)
+{
+ struct bpf_common_attr *attr = w->attr;
+ struct bpf_verifier_log *log;
+ int err;
+
+ if (!attr->log_buf)
+ return 0;
+
+ log = kzalloc(sizeof(*log), GFP_KERNEL);
+ if (!log)
+ return -ENOMEM;
+
+ err = bpf_vlog_init(log, attr->log_level, u64_to_user_ptr(attr->log_buf), attr->log_size);
+ if (err) {
+ kfree(log);
+ return err;
+ }
+
+ w->log = log;
+ return 0;
+}
+
#define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size
/* called via syscall */
-static int map_create(union bpf_attr *attr, bpfptr_t uattr)
+static int map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_common_attr *common_attrs)
{
const struct bpf_map_ops *ops;
struct bpf_token *token = NULL;
int numa_node = bpf_map_attr_numa_node(attr);
u32 map_type = attr->map_type;
+ struct bpf_verifier_log *log;
struct bpf_map *map;
bool token_flag;
int f_flags;
int err;
+ DEFINE_BPF_VLOG_WRAPPER(log_wrapper, common_attrs);
err = CHECK_ATTR(BPF_MAP_CREATE);
if (err)
return -EINVAL;
+ err = bpf_vlog_wrapper_init(&log_wrapper);
+ if (err)
+ return err;
+ log = log_wrapper.log;
+
/* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it
* to avoid per-map type checks tripping on unknown flag
*/
@@ -1390,17 +1439,25 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
attr->map_flags &= ~BPF_F_TOKEN_FD;
if (attr->btf_vmlinux_value_type_id) {
- if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
- attr->btf_key_type_id || attr->btf_value_type_id)
+ if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS) {
+ bpf_log(log, "btf_vmlinux_value_type_id can only be used with struct_ops maps.\n");
return -EINVAL;
+ }
+ if (attr->btf_key_type_id || attr->btf_value_type_id) {
+ bpf_log(log, "btf_vmlinux_value_type_id is mutually exclusive with btf_key_type_id and btf_value_type_id.\n");
+ return -EINVAL;
+ }
} else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
+ bpf_log(log, "Invalid btf_value_type_id.\n");
return -EINVAL;
}
if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
attr->map_type != BPF_MAP_TYPE_ARENA &&
- attr->map_extra != 0)
+ attr->map_extra != 0) {
+ bpf_log(log, "Invalid map_extra.\n");
return -EINVAL;
+ }
f_flags = bpf_get_file_flag(attr->map_flags);
if (f_flags < 0)
@@ -1408,13 +1465,17 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
if (numa_node != NUMA_NO_NODE &&
((unsigned int)numa_node >= nr_node_ids ||
- !node_online(numa_node)))
+ !node_online(numa_node))) {
+ bpf_log(log, "Invalid numa_node.\n");
return -EINVAL;
+ }
/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
map_type = attr->map_type;
- if (map_type >= ARRAY_SIZE(bpf_map_types))
+ if (map_type >= ARRAY_SIZE(bpf_map_types)) {
+ bpf_log(log, "Invalid map_type.\n");
return -EINVAL;
+ }
map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types));
ops = bpf_map_types[map_type];
if (!ops)
@@ -1432,8 +1493,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
if (token_flag) {
token = bpf_token_get_from_fd(attr->map_token_fd);
- if (IS_ERR(token))
+ if (IS_ERR(token)) {
+ bpf_log(log, "Invalid map_token_fd.\n");
return PTR_ERR(token);
+ }
/* if current token doesn't grant map creation permissions,
* then we can't use this token, so ignore it and rely on
@@ -1516,8 +1579,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
err = bpf_obj_name_cpy(map->name, attr->map_name,
sizeof(attr->map_name));
- if (err < 0)
+ if (err < 0) {
+ bpf_log(log, "Invalid map_name.\n");
goto free_map;
+ }
preempt_disable();
map->cookie = gen_cookie_next(&bpf_map_cookie);
@@ -1540,6 +1605,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
btf = btf_get_by_fd(attr->btf_fd);
if (IS_ERR(btf)) {
+ bpf_log(log, "Invalid btf_fd.\n");
err = PTR_ERR(btf);
goto free_map;
}
@@ -6279,7 +6345,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
switch (cmd) {
case BPF_MAP_CREATE:
- err = map_create(&attr, uattr);
+ common_attrs.log_true_size = 0;
+ err = map_create(&attr, uattr, &common_attrs);
+ ret = copy_common_attr_log_true_size(uattr_common, size_common,
+ &common_attrs.log_true_size);
+ err = ret ? ret : err;
break;
case BPF_MAP_LOOKUP_ELEM:
err = map_lookup_elem(&attr);
--
2.52.0
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 14fc5738f2b9..e64cc7504731 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
[ ... ]
> @@ -6279,7 +6345,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
>
> switch (cmd) {
> case BPF_MAP_CREATE:
> - err = map_create(&attr, uattr);
> + common_attrs.log_true_size = 0;
> + err = map_create(&attr, uattr, &common_attrs);
> + ret = copy_common_attr_log_true_size(uattr_common, size_common,
> + &common_attrs.log_true_size);
> + err = ret ? ret : err;
When map_create() succeeds, it returns a file descriptor that is already
installed in the caller's fd table via bpf_map_new_fd(). If
copy_common_attr_log_true_size() then fails (e.g., user provided a
read-only buffer for uattr_common), the syscall returns -EFAULT but the
fd remains installed.
Could this leak the file descriptor? The user gets an error and has no
way to know what fd number was allocated, so they cannot close it.
The sequence would be:
1. map_create() succeeds, returns fd (e.g., 5)
2. copy_common_attr_log_true_size() fails, ret = -EFAULT
3. err = ret ? ret : err = -EFAULT
4. syscall returns -EFAULT
5. map and fd persist, but user cannot close fd 5
> + break;
> case BPF_MAP_LOOKUP_ELEM:
> err = map_lookup_elem(&attr);
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/20756616585
On 7/1/26 01:44, bot+bpf-ci@kernel.org wrote:
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index 14fc5738f2b9..e64cc7504731 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>
> [ ... ]
>
>> @@ -6279,7 +6345,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
>>
>> switch (cmd) {
>> case BPF_MAP_CREATE:
>> - err = map_create(&attr, uattr);
>> + common_attrs.log_true_size = 0;
>> + err = map_create(&attr, uattr, &common_attrs);
>> + ret = copy_common_attr_log_true_size(uattr_common, size_common,
>> + &common_attrs.log_true_size);
>> + err = ret ? ret : err;
>
> When map_create() succeeds, it returns a file descriptor that is already
> installed in the caller's fd table via bpf_map_new_fd(). If
> copy_common_attr_log_true_size() then fails (e.g., user provided a
> read-only buffer for uattr_common), the syscall returns -EFAULT but the
> fd remains installed.
>
> Could this leak the file descriptor? The user gets an error and has no
> way to know what fd number was allocated, so they cannot close it.
>
Good catch — you’re right.
If 'map_create()' succeeds and 'copy_common_attr_log_true_size()' later
fails (e.g. returning -EFAULT), the newly created file descriptor would
remain installed and could be leaked.
I’ll fix this in the next revision by explicitly closing the fd when
ret is non-zero.
Thanks,
Leon
> The sequence would be:
> 1. map_create() succeeds, returns fd (e.g., 5)
> 2. copy_common_attr_log_true_size() fails, ret = -EFAULT
> 3. err = ret ? ret : err = -EFAULT
> 4. syscall returns -EFAULT
> 5. map and fd persist, but user cannot close fd 5
>
>> + break;
>> case BPF_MAP_LOOKUP_ELEM:
>> err = map_lookup_elem(&attr);
>
>
> ---
> AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
>
> CI run summary: https://github.com/kernel-patches/bpf/actions/runs/20756616585
On Tue, Jan 6, 2026 at 9:57 PM Leon Hwang <leon.hwang@linux.dev> wrote:
>
>
>
> On 7/1/26 01:44, bot+bpf-ci@kernel.org wrote:
> >> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> >> index 14fc5738f2b9..e64cc7504731 100644
> >> --- a/kernel/bpf/syscall.c
> >> +++ b/kernel/bpf/syscall.c
> >
> > [ ... ]
> >
> >> @@ -6279,7 +6345,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
> >>
> >> switch (cmd) {
> >> case BPF_MAP_CREATE:
> >> - err = map_create(&attr, uattr);
> >> + common_attrs.log_true_size = 0;
> >> + err = map_create(&attr, uattr, &common_attrs);
> >> + ret = copy_common_attr_log_true_size(uattr_common, size_common,
> >> + &common_attrs.log_true_size);
> >> + err = ret ? ret : err;
> >
> > When map_create() succeeds, it returns a file descriptor that is already
> > installed in the caller's fd table via bpf_map_new_fd(). If
> > copy_common_attr_log_true_size() then fails (e.g., user provided a
> > read-only buffer for uattr_common), the syscall returns -EFAULT but the
> > fd remains installed.
> >
> > Could this leak the file descriptor? The user gets an error and has no
> > way to know what fd number was allocated, so they cannot close it.
> >
>
> Good catch — you’re right.
>
> If 'map_create()' succeeds and 'copy_common_attr_log_true_size()' later
> fails (e.g. returning -EFAULT), the newly created file descriptor would
> remain installed and could be leaked.
>
> I’ll fix this in the next revision by explicitly closing the fd when
> ret is non-zero.
No. The refactoring was wrong. Don't make the kernel do extra work.
Patch 3 introduced a bug and closing fd is not a solution.
Such a pattern can be exploited for DoS.
On 7/1/26 14:20, Alexei Starovoitov wrote:
> On Tue, Jan 6, 2026 at 9:57 PM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>>
>>
>> On 7/1/26 01:44, bot+bpf-ci@kernel.org wrote:
>>>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>>>> index 14fc5738f2b9..e64cc7504731 100644
>>>> --- a/kernel/bpf/syscall.c
>>>> +++ b/kernel/bpf/syscall.c
>>>
>>> [ ... ]
>>>
>>>> @@ -6279,7 +6345,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
>>>>
>>>> switch (cmd) {
>>>> case BPF_MAP_CREATE:
>>>> - err = map_create(&attr, uattr);
>>>> + common_attrs.log_true_size = 0;
>>>> + err = map_create(&attr, uattr, &common_attrs);
>>>> + ret = copy_common_attr_log_true_size(uattr_common, size_common,
>>>> + &common_attrs.log_true_size);
>>>> + err = ret ? ret : err;
>>>
>>> When map_create() succeeds, it returns a file descriptor that is already
>>> installed in the caller's fd table via bpf_map_new_fd(). If
>>> copy_common_attr_log_true_size() then fails (e.g., user provided a
>>> read-only buffer for uattr_common), the syscall returns -EFAULT but the
>>> fd remains installed.
>>>
>>> Could this leak the file descriptor? The user gets an error and has no
>>> way to know what fd number was allocated, so they cannot close it.
>>>
>>
>> Good catch — you’re right.
>>
>> If 'map_create()' succeeds and 'copy_common_attr_log_true_size()' later
>> fails (e.g. returning -EFAULT), the newly created file descriptor would
>> remain installed and could be leaked.
>>
>> I’ll fix this in the next revision by explicitly closing the fd when
>> ret is non-zero.
>
> No. The refactoring was wrong. Don't make the kernel do extra work.
> Patch 3 introduced a bug and closing fd is not a solution.
> Such a pattern can be exploited for DoS.
You’re right — closing the fd after the fact is not the correct
solution, and introducing extra work in the kernel is undesirable. Doing
so could also open the door to DoS-style abuse.
The correct approach is to copy log_true_size into common_attrs
before allocating and installing the new fd, so that a failure in
copying cannot leave behind a partially created object.
I’ll rework this accordingly in the next revision.
Thanks,
Leon
On Tue, Jan 6, 2026 at 10:32 PM Leon Hwang <leon.hwang@linux.dev> wrote:
>
>
>
> On 7/1/26 14:20, Alexei Starovoitov wrote:
> > On Tue, Jan 6, 2026 at 9:57 PM Leon Hwang <leon.hwang@linux.dev> wrote:
> >>
> >>
> >>
> >> On 7/1/26 01:44, bot+bpf-ci@kernel.org wrote:
> >>>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> >>>> index 14fc5738f2b9..e64cc7504731 100644
> >>>> --- a/kernel/bpf/syscall.c
> >>>> +++ b/kernel/bpf/syscall.c
> >>>
> >>> [ ... ]
> >>>
> >>>> @@ -6279,7 +6345,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
> >>>>
> >>>> switch (cmd) {
> >>>> case BPF_MAP_CREATE:
> >>>> - err = map_create(&attr, uattr);
> >>>> + common_attrs.log_true_size = 0;
> >>>> + err = map_create(&attr, uattr, &common_attrs);
> >>>> + ret = copy_common_attr_log_true_size(uattr_common, size_common,
> >>>> + &common_attrs.log_true_size);
> >>>> + err = ret ? ret : err;
> >>>
> >>> When map_create() succeeds, it returns a file descriptor that is already
> >>> installed in the caller's fd table via bpf_map_new_fd(). If
> >>> copy_common_attr_log_true_size() then fails (e.g., user provided a
> >>> read-only buffer for uattr_common), the syscall returns -EFAULT but the
> >>> fd remains installed.
> >>>
> >>> Could this leak the file descriptor? The user gets an error and has no
> >>> way to know what fd number was allocated, so they cannot close it.
> >>>
> >>
> >> Good catch — you’re right.
> >>
> >> If 'map_create()' succeeds and 'copy_common_attr_log_true_size()' later
> >> fails (e.g. returning -EFAULT), the newly created file descriptor would
> >> remain installed and could be leaked.
> >>
> >> I’ll fix this in the next revision by explicitly closing the fd when
> >> ret is non-zero.
> >
> > No. The refactoring was wrong. Don't make the kernel do extra work.
> > Patch 3 introduced a bug and closing fd is not a solution.
> > Such a pattern can be exploited for DoS.
>
> You’re right — closing the fd after the fact is not the correct
> solution, and introducing extra work in the kernel is undesirable. Doing
> so could also open the door to DoS-style abuse.
>
> The correct approach is to copy log_true_size into common_attrs
> before allocating and installing the new fd, so that a failure in
> copying cannot leave behind a partially created object.
Why move it at all?
I don't think you should be moving
copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
&log_true_size, sizeof(log_true_size))
from where it is in verifier.c
© 2016 - 2026 Red Hat, Inc.