From: Josh Poimboeuf <jpoimboe@kernel.org>
Objtool warns about calling pr_debug() from uaccess-enabled regions, and
rightfully so. Add a dbg_sec_uaccess() macro which temporarily disables
uaccess before doing the dynamic printk, and use that to add debug
messages throughout the uaccess-enabled regions.
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
kernel/unwind/sframe.c | 60 ++++++++++++++++++++++++++++--------
kernel/unwind/sframe_debug.h | 31 +++++++++++++++++++
2 files changed, 78 insertions(+), 13 deletions(-)
diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
index 66d3ba3c8389..3972bce40fc7 100644
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -53,12 +53,15 @@ static __always_inline int __read_fde(struct sframe_section *sec,
sizeof(struct sframe_fde), Efault);
ip = sec->sframe_start + fde->start_addr;
- if (ip < sec->text_start || ip > sec->text_end)
+ if (ip < sec->text_start || ip > sec->text_end) {
+ dbg_sec_uaccess("bad fde num %d\n", fde_num);
return -EINVAL;
+ }
return 0;
Efault:
+ dbg_sec_uaccess("fde %d usercopy failed\n", fde_num);
return -EFAULT;
}
@@ -85,16 +88,22 @@ static __always_inline int __find_fde(struct sframe_section *sec,
unsafe_get_user(func_off, (s32 __user *)mid, Efault);
if (ip_off >= func_off) {
- if (func_off < func_off_low)
+ if (func_off < func_off_low) {
+ dbg_sec_uaccess("fde %u not sorted\n",
+ (unsigned int)(mid - first));
return -EFAULT;
+ }
func_off_low = func_off;
found = mid;
low = mid + 1;
} else {
- if (func_off > func_off_high)
+ if (func_off > func_off_high) {
+ dbg_sec_uaccess("fde %u not sorted\n",
+ (unsigned int)(mid - first));
return -EFAULT;
+ }
func_off_high = func_off;
@@ -116,6 +125,7 @@ static __always_inline int __find_fde(struct sframe_section *sec,
return 0;
Efault:
+ dbg_sec_uaccess("fde usercopy failed\n");
return -EFAULT;
}
@@ -140,6 +150,8 @@ static __always_inline int __find_fde(struct sframe_section *sec,
____UNSAFE_GET_USER_INC(to, from, u_or_s##32, label); \
break; \
default: \
+ dbg_sec_uaccess("%d: bad UNSAFE_GET_USER_INC size %u\n",\
+ __LINE__, size); \
return -EFAULT; \
} \
})
@@ -174,24 +186,34 @@ static __always_inline int __read_fre(struct sframe_section *sec,
u8 info;
addr_size = fre_type_to_size(fre_type);
- if (!addr_size)
+ if (!addr_size) {
+ dbg_sec_uaccess("bad addr_size in fde info %u\n", fde->info);
return -EFAULT;
+ }
- if (fre_addr + addr_size + 1 > sec->fres_end)
+ if (fre_addr + addr_size + 1 > sec->fres_end) {
+ dbg_sec_uaccess("fre addr+info goes past end of subsection\n");
return -EFAULT;
+ }
UNSAFE_GET_USER_INC(ip_off, cur, addr_size, Efault);
- if (fde_type == SFRAME_FDE_TYPE_PCINC && ip_off > fde->func_size)
+ if (fde_type == SFRAME_FDE_TYPE_PCINC && ip_off > fde->func_size) {
+ dbg_sec_uaccess("fre starts past end of function: ip_off=0x%x, func_size=0x%x\n",
+ ip_off, fde->func_size);
return -EFAULT;
+ }
UNSAFE_GET_USER_INC(info, cur, 1, Efault);
offset_count = SFRAME_FRE_OFFSET_COUNT(info);
offset_size = offset_size_enum_to_size(SFRAME_FRE_OFFSET_SIZE(info));
- if (!offset_count || !offset_size)
+ if (!offset_count || !offset_size) {
+ dbg_sec_uaccess("zero offset_count or size in fre info %u\n",info);
return -EFAULT;
-
- if (cur + (offset_count * offset_size) > sec->fres_end)
+ }
+ if (cur + (offset_count * offset_size) > sec->fres_end) {
+ dbg_sec_uaccess("fre goes past end of subsection\n");
return -EFAULT;
+ }
fre->size = addr_size + 1 + (offset_count * offset_size);
@@ -200,8 +222,10 @@ static __always_inline int __read_fre(struct sframe_section *sec,
ra_off = sec->ra_off;
if (!ra_off) {
- if (!offset_count--)
+ if (!offset_count--) {
+ dbg_sec_uaccess("zero offset_count, can't find ra_off\n");
return -EFAULT;
+ }
UNSAFE_GET_USER_INC(ra_off, cur, offset_size, Efault);
}
@@ -212,8 +236,10 @@ static __always_inline int __read_fre(struct sframe_section *sec,
UNSAFE_GET_USER_INC(fp_off, cur, offset_size, Efault);
}
- if (offset_count)
+ if (offset_count) {
+ dbg_sec_uaccess("non-zero offset_count after reading fre\n");
return -EFAULT;
+ }
fre->ip_off = ip_off;
fre->cfa_off = cfa_off;
@@ -224,6 +250,7 @@ static __always_inline int __read_fre(struct sframe_section *sec,
return 0;
Efault:
+ dbg_sec_uaccess("fre usercopy failed\n");
return -EFAULT;
}
@@ -257,13 +284,20 @@ static __always_inline int __find_fre(struct sframe_section *sec,
which = !which;
ret = __read_fre(sec, fde, fre_addr, fre);
- if (ret)
+ if (ret) {
+ dbg_sec_uaccess("fde addr 0x%x: __read_fre(%u) failed\n",
+ fde->start_addr, i);
+ dbg_print_fde_uaccess(sec, fde);
return ret;
+ }
fre_addr += fre->size;
- if (prev_fre && fre->ip_off <= prev_fre->ip_off)
+ if (prev_fre && fre->ip_off <= prev_fre->ip_off) {
+ dbg_sec_uaccess("fde addr 0x%x: fre %u not sorted\n",
+ fde->start_addr, i);
return -EFAULT;
+ }
if (fre->ip_off > ip_off)
break;
diff --git a/kernel/unwind/sframe_debug.h b/kernel/unwind/sframe_debug.h
index 7794bf0bd78c..045e9c0b16c9 100644
--- a/kernel/unwind/sframe_debug.h
+++ b/kernel/unwind/sframe_debug.h
@@ -13,6 +13,26 @@
#define dbg_sec(fmt, ...) \
dbg("%s: " fmt, sec->filename, ##__VA_ARGS__)
+#define __dbg_sec_descriptor(fmt, ...) \
+ __dynamic_pr_debug(&descriptor, "sframe: %s: " fmt, \
+ sec->filename, ##__VA_ARGS__)
+
+/*
+ * To avoid breaking uaccess rules, temporarily disable uaccess
+ * before calling printk.
+ */
+#define dbg_sec_uaccess(fmt, ...) \
+({ \
+ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
+ if (DYNAMIC_DEBUG_BRANCH(descriptor)) { \
+ user_read_access_end(); \
+ __dbg_sec_descriptor(fmt, ##__VA_ARGS__); \
+ BUG_ON(!user_read_access_begin( \
+ (void __user *)sec->sframe_start, \
+ sec->sframe_end - sec->sframe_start)); \
+ } \
+})
+
static __always_inline void dbg_print_header(struct sframe_section *sec)
{
unsigned long fdes_end;
@@ -27,6 +47,15 @@ static __always_inline void dbg_print_header(struct sframe_section *sec)
sec->ra_off, sec->fp_off);
}
+static __always_inline void dbg_print_fde_uaccess(struct sframe_section *sec,
+ struct sframe_fde *fde)
+{
+ dbg_sec_uaccess("FDE: start_addr:0x%x func_size:0x%x "
+ "fres_off:0x%x fres_num:%d info:%u rep_size:%u\n",
+ fde->start_addr, fde->func_size,
+ fde->fres_off, fde->fres_num, fde->info, fde->rep_size);
+}
+
static inline void dbg_init(struct sframe_section *sec)
{
struct mm_struct *mm = current->mm;
@@ -57,8 +86,10 @@ static inline void dbg_free(struct sframe_section *sec)
#define dbg(args...) no_printk(args)
#define dbg_sec(args... ) no_printk(args)
+#define dbg_sec_uaccess(args...) no_printk(args)
static inline void dbg_print_header(struct sframe_section *sec) {}
+static inline void dbg_print_fde_uaccess(struct sframe_section *sec, struct sframe_fde *fde) {}
static inline void dbg_init(struct sframe_section *sec) {}
static inline void dbg_free(struct sframe_section *sec) {}
--
2.47.2
On Mon, 7 Jul 2025 at 19:12, Steven Rostedt <rostedt@kernel.org> wrote: > > From: Josh Poimboeuf <jpoimboe@kernel.org> > > Objtool warns about calling pr_debug() from uaccess-enabled regions, and > rightfully so. Add a dbg_sec_uaccess() macro which temporarily disables > uaccess before doing the dynamic printk, and use that to add debug > messages throughout the uaccess-enabled regions. Please kill this patch, and stop doing stupid things. The whole AND ONLY point of using the unsafe user access macros is performance. You are now actively breaking the whole point of them. If you need to add debug printouts to user accesses, just don't use the 'unsafe' ones. Or add the debug to after the unsafe region has finished. Not this way. This patch is disgusting, in other words. It's wrong. STOP IT. Linus
On Mon, 7 Jul 2025 20:38:35 -0700 Linus Torvalds <torvalds@linux-foundation.org> wrote: > On Mon, 7 Jul 2025 at 19:12, Steven Rostedt <rostedt@kernel.org> wrote: > This patch is disgusting, in other words. It's wrong. STOP IT. > No problem, I can easily drop it. I just took Josh's PoC patches and posted them. This particular series still needs a bit of work. That's one of the reasons I split it out of the other series. The core series is at the stage for acceptance and looking for feedback from other maintainers. This series is still a work in progress. Others have asked me to post them so they can start playing with it. We still need to come up with a system call with a robust API to allow the dynamic linker to tell the kernel where the SFrames are for the libraries it loads. Hence the "DO NOT APPLY" patch at the end (which I just noticed the subject text got dropped when I pulled it into git from patchwork and sent out this version, at least the change long still suggest it shouldn't be applied). But I will remove this patch from the queue. I never even used this debugging. What I did was inject trace_printk() all over the place to make sure it was working as expected. Thanks, -- Steve
On Tue, Jul 08, 2025 at 09:23:51AM -0400, Steven Rostedt wrote: > On Mon, 7 Jul 2025 20:38:35 -0700 > Linus Torvalds <torvalds@linux-foundation.org> wrote: > > > On Mon, 7 Jul 2025 at 19:12, Steven Rostedt <rostedt@kernel.org> wrote: > > > This patch is disgusting, in other words. It's wrong. STOP IT. > > > > No problem, I can easily drop it. > > I just took Josh's PoC patches and posted them. This particular series > still needs a bit of work. That's one of the reasons I split it out of the > other series. The core series is at the stage for acceptance and looking > for feedback from other maintainers. This series is still a work in > progress. Others have asked me to post them so they can start playing with > it. > > We still need to come up with a system call with a robust API to allow the > dynamic linker to tell the kernel where the SFrames are for the libraries > it loads. Hence the "DO NOT APPLY" patch at the end (which I just noticed the > subject text got dropped when I pulled it into git from patchwork and sent > out this version, at least the change long still suggest it shouldn't be > applied). > > But I will remove this patch from the queue. I never even used this > debugging. What I did was inject trace_printk() all over the place to make > sure it was working as expected. I had found those debug printks really useful for debugging corrupt/missing .sframe data, but yeah, this patch is ridiculously bad. Sorry for putting that out into the world ;-) And those are all error paths, so it's rather pointless to do that whole uaccess disable/enable/disable dance. So yeah, drop it for now and I can replace it with something better later on. -- Josh
On Tue, 8 Jul 2025 at 07:34, Josh Poimboeuf <jpoimboe@kernel.org> wrote: > > I had found those debug printks really useful for debugging > corrupt/missing .sframe data, but yeah, this patch is ridiculously bad. > Sorry for putting that out into the world ;-) I suspect that code that is still needs that level of debugging should just not use the 'unsafe' user access helpers. They really are meant for "this sequence turns into three CPU instructions" kind of uses, and the "unsafe" part of the naming was very much intended to be a "please don't use this unless you are being very careful and limited" marker. Now, I do think that the "goto label for exceptions" part of the unsafe accessors can be very convenient, so maybe we should make _that_ part of the interface more widely available. IOW, without the whole user_read_access_begin/user_read_access_end dance? That model is already used by "__{get,put}_kernel_nofault()", but I think it's limited to just some unusual code in mm/maccess.c. Linus
On Tue, 8 Jul 2025 07:34:36 -0700 Josh Poimboeuf <jpoimboe@kernel.org> wrote: > I had found those debug printks really useful for debugging > corrupt/missing .sframe data, but yeah, this patch is ridiculously bad. > Sorry for putting that out into the world ;-) > > And those are all error paths, so it's rather pointless to do that whole > uaccess disable/enable/disable dance. > > So yeah, drop it for now and I can replace it with something better > later on. Would something like this work? If someone enables the config to enable the validation, I don't think we need dynamic printk to do it (as that requires passing in the format directly and not via a pointer). -- Steve diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index 0060cc576776..524738e2b823 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -321,11 +321,24 @@ int sframe_find(unsigned long ip, struct unwind_user_frame *frame) #ifdef CONFIG_SFRAME_VALIDATION -static __always_inline int __sframe_validate_section(struct sframe_section *sec) +/* Used to save error messages in uaccess sections */ +struct err_msg { + const char *fmt; + int param1; + int param2; + long param3; + long param4; +}; + +static __always_inline +int __sframe_validate_section(struct sframe_section *sec, struct err_msg *err) { unsigned long prev_ip = 0; unsigned int i; +/* current->comm, current->pid, sec->filename */ +#define ERR_HDR KERN_WARNING "%s (%d) %s: " + for (i = 0; i < sec->num_fdes; i++) { struct sframe_fre *fre, *prev_fre = NULL; unsigned long ip, fre_addr; @@ -341,7 +354,8 @@ static __always_inline int __sframe_validate_section(struct sframe_section *sec) ip = sec->sframe_start + fde.start_addr; if (ip <= prev_ip) { - dbg_sec_uaccess("fde %u not sorted\n", i); + err->fmt = ERR_HDR "fde %u not sorted\n"; + err->param1 = i; return -EFAULT; } prev_ip = ip; @@ -355,15 +369,23 @@ static __always_inline int __sframe_validate_section(struct sframe_section *sec) ret = __read_fre(sec, &fde, fre_addr, fre); if (ret) { - dbg_sec_uaccess("fde %u: __read_fre(%u) failed\n", i, j); - dbg_print_fde_uaccess(sec, &fde); + err->fmt = ERR_HDR + "fde %u: __read_fre(%u) failed\n" + " frame_start=%lx frame_end=%lx\n"; + err->param1 = i; + err->param2 = j; + err->param3 = (long)sec->sframe_start; + err->param4 = (long)sec->sframe_end; return ret; } fre_addr += fre->size; if (prev_fre && fre->ip_off <= prev_fre->ip_off) { - dbg_sec_uaccess("fde %u: fre %u not sorted\n", i, j); + err->fmt = ERR_HDR + "fde %u: fre %u not sorted\n"; + err->param1 = i; + err->param2 = j; return -EFAULT; } @@ -376,16 +398,26 @@ static __always_inline int __sframe_validate_section(struct sframe_section *sec) static int sframe_validate_section(struct sframe_section *sec) { + struct err_msg err; int ret; + memset(&err, 0, sizeof(err)); + if (!user_read_access_begin((void __user *)sec->sframe_start, sec->sframe_end - sec->sframe_start)) { - dbg_sec("section usercopy failed\n"); + pr_warn("%s (%d): section usercopy failed\n", + current->comm, current->pid); return -EFAULT; } - ret = __sframe_validate_section(sec); + ret = __sframe_validate_section(sec, &err); user_read_access_end(); + + if (ret) { + printk(err.fmt, current->comm, current->pid, + sec->filename, err.param1, err.param2, + err.param3, err.param4); + } return ret; }
On Tue, 8 Jul 2025 at 07:41, Steven Rostedt <rostedt@goodmis.org> wrote: > > Would something like this work? If someone enables the config to enable the > validation, I don't think we need dynamic printk to do it (as that requires > passing in the format directly and not via a pointer). I really think you should just not use 'user_access_begin()" AT ALL if you need to play these kinds of games. Linus
On Tue, 8 Jul 2025 08:53:56 -0700 Linus Torvalds <torvalds@linux-foundation.org> wrote: > On Tue, 8 Jul 2025 at 07:41, Steven Rostedt <rostedt@goodmis.org> wrote: > > > > Would something like this work? If someone enables the config to enable the > > validation, I don't think we need dynamic printk to do it (as that requires > > passing in the format directly and not via a pointer). > > I really think you should just not use 'user_access_begin()" AT ALL if > you need to play these kinds of games. > Looking at the code a bit deeper, I don't think we need to play these games and still keep the user_read_access_begin(). The places that are more performance critical (where it reads the sframe during normal stack walking during profiling) has no debug output, and there's nothing there that needs to take it out of the user_read_access area. It's the validator that adds these hacks. I don't think it needs to. It can just wrap the calls to the code that requires user_read_access and then check the return value. The validator is just a debugging feature and performance should not be an issue. But I do think performance is something to care about during normal operations where the one big user_read_access_begin() can help. What about something like this? It adds "safe" versions of the user space access functions and uses them only in the slow (we don't care about performance) validator: -- Steve diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index 0060cc576776..79ff3c0fc11f 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -321,7 +321,34 @@ int sframe_find(unsigned long ip, struct unwind_user_frame *frame) #ifdef CONFIG_SFRAME_VALIDATION -static __always_inline int __sframe_validate_section(struct sframe_section *sec) +static int safe_read_fde(struct sframe_section *sec, + unsigned int fde_num, struct sframe_fde *fde) +{ + int ret; + + if (!user_read_access_begin((void __user *)sec->sframe_start, + sec->sframe_end - sec->sframe_start)) + return -EFAULT; + ret = __read_fde(sec, fde_num, fde); + user_read_access_end(); + return ret; +} + +static int safe_read_fre(struct sframe_section *sec, + struct sframe_fde *fde, unsigned long fre_addr, + struct sframe_fre *fre) +{ + int ret; + + if (!user_read_access_begin((void __user *)sec->sframe_start, + sec->sframe_end - sec->sframe_start)) + return -EFAULT; + ret = __read_fre(sec, fde, fre_addr, fre); + user_read_access_end(); + return ret; +} + +static int sframe_validate_section(struct sframe_section *sec) { unsigned long prev_ip = 0; unsigned int i; @@ -335,13 +362,13 @@ static __always_inline int __sframe_validate_section(struct sframe_section *sec) unsigned int j; int ret; - ret = __read_fde(sec, i, &fde); + ret = safe_read_fde(sec, i, &fde); if (ret) return ret; ip = sec->sframe_start + fde.start_addr; if (ip <= prev_ip) { - dbg_sec_uaccess("fde %u not sorted\n", i); + dbg_sec("fde %u not sorted\n", i); return -EFAULT; } prev_ip = ip; @@ -353,17 +380,20 @@ static __always_inline int __sframe_validate_section(struct sframe_section *sec) fre = which ? fres : fres + 1; which = !which; - ret = __read_fre(sec, &fde, fre_addr, fre); + ret = safe_read_fre(sec, &fde, fre_addr, fre); if (ret) { - dbg_sec_uaccess("fde %u: __read_fre(%u) failed\n", i, j); - dbg_print_fde_uaccess(sec, &fde); + dbg_sec("fde %u: __read_fre(%u) failed\n", i, j); + dbg_sec("FDE: start_addr:0x%x func_size:0x%x fres_off:0x%x fres_num:%d info:%u rep_size:%u\n", + fde.start_addr, fde.func_size, + fde.fres_off, fde.fres_num, + fde.info, fde.rep_size); return ret; } fre_addr += fre->size; if (prev_fre && fre->ip_off <= prev_fre->ip_off) { - dbg_sec_uaccess("fde %u: fre %u not sorted\n", i, j); + dbg_sec("fde %u: fre %u not sorted\n", i, j); return -EFAULT; } @@ -374,21 +404,6 @@ static __always_inline int __sframe_validate_section(struct sframe_section *sec) return 0; } -static int sframe_validate_section(struct sframe_section *sec) -{ - int ret; - - if (!user_read_access_begin((void __user *)sec->sframe_start, - sec->sframe_end - sec->sframe_start)) { - dbg_sec("section usercopy failed\n"); - return -EFAULT; - } - - ret = __sframe_validate_section(sec); - user_read_access_end(); - return ret; -} - #else /* !CONFIG_SFRAME_VALIDATION */ static int sframe_validate_section(struct sframe_section *sec) { return 0; }
On Tue, Jul 08, 2025 at 12:31:20PM -0400, Steven Rostedt wrote: > On Tue, 8 Jul 2025 08:53:56 -0700 > Linus Torvalds <torvalds@linux-foundation.org> wrote: > > > On Tue, 8 Jul 2025 at 07:41, Steven Rostedt <rostedt@goodmis.org> wrote: > > > > > > Would something like this work? If someone enables the config to enable the > > > validation, I don't think we need dynamic printk to do it (as that requires > > > passing in the format directly and not via a pointer). > > > > I really think you should just not use 'user_access_begin()" AT ALL if > > you need to play these kinds of games. > > > > Looking at the code a bit deeper, I don't think we need to play these games > and still keep the user_read_access_begin(). > > The places that are more performance critical (where it reads the sframe > during normal stack walking during profiling) has no debug output, and > there's nothing there that needs to take it out of the user_read_access > area. > > It's the validator that adds these hacks. I don't think it needs to. It can > just wrap the calls to the code that requires user_read_access and then > check the return value. The validator is just a debugging feature and > performance should not be an issue. > > But I do think performance is something to care about during normal > operations where the one big user_read_access_begin() can help. > > What about something like this? It adds "safe" versions of the user space > access functions and uses them only in the slow (we don't care about > performance) validator: Looks good to me, thanks! -- Josh
© 2016 - 2025 Red Hat, Inc.