User space needs access to kernel BTF for many modern features of BPF.
Right now each process needs to read the BTF blob either in pieces or
as a whole. Allow mmaping the sysfs file so that processes can directly
access the memory allocated for it in the kernel.
Signed-off-by: Lorenz Bauer <lmb@isovalent.com>
---
include/asm-generic/vmlinux.lds.h | 3 ++-
kernel/bpf/sysfs_btf.c | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 58a635a6d5bdf0c53c267c2a3d21a5ed8678ce73..1750390735fac7637cc4d2fa05f96cb2a36aa448 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -667,10 +667,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
*/
#ifdef CONFIG_DEBUG_INFO_BTF
#define BTF \
+ . = ALIGN(PAGE_SIZE); \
.BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \
BOUNDED_SECTION_BY(.BTF, _BTF) \
} \
- . = ALIGN(4); \
+ . = ALIGN(PAGE_SIZE); \
.BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \
*(.BTF_ids) \
}
diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
index 81d6cf90584a7157929c50f62a5c6862e7a3d081..37278d7f38ae72f2d7efcfa859e86aaf12e39a25 100644
--- a/kernel/bpf/sysfs_btf.c
+++ b/kernel/bpf/sysfs_btf.c
@@ -7,14 +7,51 @@
#include <linux/kobject.h>
#include <linux/init.h>
#include <linux/sysfs.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/btf.h>
/* See scripts/link-vmlinux.sh, gen_btf() func for details */
extern char __start_BTF[];
extern char __stop_BTF[];
+static int btf_sysfs_vmlinux_mmap(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *attr,
+ struct vm_area_struct *vma)
+{
+ unsigned long pages = PAGE_ALIGN(attr->size) >> PAGE_SHIFT;
+ size_t vm_size = vma->vm_end - vma->vm_start;
+ unsigned long addr = (unsigned long)attr->private;
+ int i, err = 0;
+
+ if (addr != (unsigned long)__start_BTF || !PAGE_ALIGNED(addr))
+ return -EINVAL;
+
+ if (vma->vm_pgoff)
+ return -EINVAL;
+
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_MAYSHARE))
+ return -EACCES;
+
+ if (vm_size >> PAGE_SHIFT > pages)
+ return -EINVAL;
+
+ vm_flags_mod(vma, VM_DONTDUMP, VM_MAYEXEC | VM_MAYWRITE);
+
+ for (i = 0; i < pages && !err; i++, addr += PAGE_SIZE)
+ err = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
+ virt_to_page(addr));
+
+ if (err)
+ zap_vma_pages(vma);
+
+ return err;
+}
+
static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = {
.attr = { .name = "vmlinux", .mode = 0444, },
.read_new = sysfs_bin_attr_simple_read,
+ .mmap = btf_sysfs_vmlinux_mmap,
};
struct kobject *btf_kobj;
--
2.49.0
Hi Lorenz,
kernel test robot noticed the following build warnings:
[auto build test WARNING on 38d976c32d85ef12dcd2b8a231196f7049548477]
url: https://github.com/intel-lab-lkp/linux/commits/Lorenz-Bauer/btf-allow-mmap-of-vmlinux-btf/20250506-024103
base: 38d976c32d85ef12dcd2b8a231196f7049548477
patch link: https://lore.kernel.org/r/20250505-vmlinux-mmap-v3-1-5d53afa060e8%40isovalent.com
patch subject: [PATCH bpf-next v3 1/3] btf: allow mmap of vmlinux btf
config: arc-randconfig-r073-20250508 (https://download.01.org/0day-ci/archive/20250509/202505091116.jHtyWJW4-lkp@intel.com/config)
compiler: arc-linux-gcc (GCC) 12.4.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250509/202505091116.jHtyWJW4-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202505091116.jHtyWJW4-lkp@intel.com/
All warnings (new ones prefixed by >>):
In file included from arch/arc/include/asm/page.h:136,
from arch/arc/include/asm/thread_info.h:16,
from include/linux/thread_info.h:60,
from include/asm-generic/preempt.h:5,
from ./arch/arc/include/generated/asm/preempt.h:1,
from include/linux/preempt.h:79,
from include/linux/spinlock.h:56,
from include/linux/mmzone.h:8,
from include/linux/gfp.h:7,
from include/linux/umh.h:4,
from include/linux/kmod.h:9,
from include/linux/module.h:17,
from kernel/bpf/sysfs_btf.c:6:
kernel/bpf/sysfs_btf.c: In function 'btf_sysfs_vmlinux_mmap':
>> kernel/bpf/sysfs_btf.c:43:51: warning: passing argument 1 of 'virt_to_pfn' makes pointer from integer without a cast [-Wint-conversion]
43 | virt_to_page(addr));
| ^~~~
| |
| long unsigned int
include/asm-generic/memory_model.h:18:46: note: in definition of macro '__pfn_to_page'
18 | #define __pfn_to_page(pfn) (mem_map + ((pfn) - ARCH_PFN_OFFSET))
| ^~~
kernel/bpf/sysfs_btf.c:43:38: note: in expansion of macro 'virt_to_page'
43 | virt_to_page(addr));
| ^~~~~~~~~~~~
arch/arc/include/asm/page.h:123:53: note: expected 'const void *' but argument is of type 'long unsigned int'
123 | static inline unsigned long virt_to_pfn(const void *kaddr)
| ~~~~~~~~~~~~^~~~~
vim +/virt_to_pfn +43 kernel/bpf/sysfs_btf.c
17
18 static int btf_sysfs_vmlinux_mmap(struct file *filp, struct kobject *kobj,
19 const struct bin_attribute *attr,
20 struct vm_area_struct *vma)
21 {
22 unsigned long pages = PAGE_ALIGN(attr->size) >> PAGE_SHIFT;
23 size_t vm_size = vma->vm_end - vma->vm_start;
24 unsigned long addr = (unsigned long)attr->private;
25 int i, err = 0;
26
27 if (addr != (unsigned long)__start_BTF || !PAGE_ALIGNED(addr))
28 return -EINVAL;
29
30 if (vma->vm_pgoff)
31 return -EINVAL;
32
33 if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_MAYSHARE))
34 return -EACCES;
35
36 if (vm_size >> PAGE_SHIFT > pages)
37 return -EINVAL;
38
39 vm_flags_mod(vma, VM_DONTDUMP, VM_MAYEXEC | VM_MAYWRITE);
40
41 for (i = 0; i < pages && !err; i++, addr += PAGE_SIZE)
42 err = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
> 43 virt_to_page(addr));
44
45 if (err)
46 zap_vma_pages(vma);
47
48 return err;
49 }
50
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Mon, May 5, 2025 at 11:39 AM Lorenz Bauer <lmb@isovalent.com> wrote:
>
> User space needs access to kernel BTF for many modern features of BPF.
> Right now each process needs to read the BTF blob either in pieces or
> as a whole. Allow mmaping the sysfs file so that processes can directly
> access the memory allocated for it in the kernel.
>
> Signed-off-by: Lorenz Bauer <lmb@isovalent.com>
> ---
> include/asm-generic/vmlinux.lds.h | 3 ++-
> kernel/bpf/sysfs_btf.c | 37 +++++++++++++++++++++++++++++++++++++
> 2 files changed, 39 insertions(+), 1 deletion(-)
>
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index 58a635a6d5bdf0c53c267c2a3d21a5ed8678ce73..1750390735fac7637cc4d2fa05f96cb2a36aa448 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -667,10 +667,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
> */
> #ifdef CONFIG_DEBUG_INFO_BTF
> #define BTF \
> + . = ALIGN(PAGE_SIZE); \
> .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \
> BOUNDED_SECTION_BY(.BTF, _BTF) \
> } \
> - . = ALIGN(4); \
> + . = ALIGN(PAGE_SIZE); \
> .BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \
> *(.BTF_ids) \
> }
> diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
> index 81d6cf90584a7157929c50f62a5c6862e7a3d081..37278d7f38ae72f2d7efcfa859e86aaf12e39a25 100644
> --- a/kernel/bpf/sysfs_btf.c
> +++ b/kernel/bpf/sysfs_btf.c
> @@ -7,14 +7,51 @@
> #include <linux/kobject.h>
> #include <linux/init.h>
> #include <linux/sysfs.h>
> +#include <linux/mm.h>
> +#include <linux/io.h>
> +#include <linux/btf.h>
>
> /* See scripts/link-vmlinux.sh, gen_btf() func for details */
> extern char __start_BTF[];
> extern char __stop_BTF[];
>
> +static int btf_sysfs_vmlinux_mmap(struct file *filp, struct kobject *kobj,
> + const struct bin_attribute *attr,
> + struct vm_area_struct *vma)
> +{
> + unsigned long pages = PAGE_ALIGN(attr->size) >> PAGE_SHIFT;
> + size_t vm_size = vma->vm_end - vma->vm_start;
> + unsigned long addr = (unsigned long)attr->private;
> + int i, err = 0;
> +
> + if (addr != (unsigned long)__start_BTF || !PAGE_ALIGNED(addr))
> + return -EINVAL;
> +
> + if (vma->vm_pgoff)
> + return -EINVAL;
any particular reason to not allow vm_pgoff?
> +
> + if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_MAYSHARE))
> + return -EACCES;
> +
> + if (vm_size >> PAGE_SHIFT > pages)
() around shift operation, please, for those of us who haven't
memorized the entire C operator precedence table ;)
> + return -EINVAL;
> +
> + vm_flags_mod(vma, VM_DONTDUMP, VM_MAYEXEC | VM_MAYWRITE);
> +
> + for (i = 0; i < pages && !err; i++, addr += PAGE_SIZE)
> + err = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
> + virt_to_page(addr));
> +
> + if (err)
> + zap_vma_pages(vma);
it's certainly subjective, but I find this error handling with !err in
for loop condition hard to follow. What's wrong with arguably more
straightforward (and as you can see I'm not a big fan of mutated addr
but calculated vma->vm_start + i * PAGE_SIZE: pick one style one
follow it for both entities?):
for (i = 0; i < pages; i++) {
err = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
virt_to_page(addr + i * PAGE_SIZE));
if (err) {
zap_vma_pages(vma);
return err;
}
}
return 0;
?
> +
> + return err;
> +}
> +
> static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = {
> .attr = { .name = "vmlinux", .mode = 0444, },
> .read_new = sysfs_bin_attr_simple_read,
> + .mmap = btf_sysfs_vmlinux_mmap,
> };
>
> struct kobject *btf_kobj;
>
> --
> 2.49.0
>
On Tue, May 6, 2025 at 10:39 PM Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > > + if (vma->vm_pgoff) > > + return -EINVAL; > > any particular reason to not allow vm_pgoff? Doesn't seem particularly useful because the header is at offset 0, and I don't trust myself to get the overflow checks done right. > it's certainly subjective, but I find this error handling with !err in > for loop condition hard to follow. What's wrong with arguably more > straightforward (and as you can see I'm not a big fan of mutated addr > but calculated vma->vm_start + i * PAGE_SIZE: pick one style one > follow it for both entities?): Yeah that's nicer, I was just going off of what Alexei proposed.
© 2016 - 2025 Red Hat, Inc.