kernel/livepatch/transition.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
On Fri, Mar 03, 2023 at 03:00:13PM +0100, Petr Mladek wrote:
> > MAX_STACK_ENTRIES is 100, which seems excessive. If we halved that, the
> > array would be "only" 400 bytes, which is *almost* reasonable to
> > allocate on the stack?
>
> It is just for the stack in the process context. Right?
>
> I think that I have never seen a stack with over 50 entries. And in
> the worst case, a bigger amount of entries would "just" result in
> a non-reliable stack which might be acceptable.
>
> It looks acceptable to me.
>
> > Alternatively we could have a percpu entries array... :-/
>
> That said, percpu entries would be fine as well. It sounds like
> a good price for the livepatching feature. I think that livepatching
> is used on big systems anyway.
>
> I slightly prefer the per-cpu solution.
Booting a kernel with PREEMPT+LOCKDEP gave me a high-water mark of 60+
stack entries, seen when probing a device. I decided not to mess with
MAX_STACK_ENTRIES, and instead just convert the entries to percpu. This
patch could be inserted at the beginning of the set.
---8<---
Subject: [PATCH 0.5/3] livepatch: Convert stack entries array to percpu
The entries array in klp_check_stack() is static local because it's too
big to be reasonably allocated on the stack. Serialized access is
enforced by the klp_mutex.
In preparation for calling klp_check_stack() without the mutex (from
cond_resched), convert it to a percpu variable.
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
kernel/livepatch/transition.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index f1b25ec581e0..135fc73e2e5d 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -14,6 +14,8 @@
#include "transition.h"
#define MAX_STACK_ENTRIES 100
+DEFINE_PER_CPU(unsigned long[MAX_STACK_ENTRIES], klp_stack_entries);
+
#define STACK_ERR_BUF_SIZE 128
#define SIGNALS_TIMEOUT 15
@@ -240,12 +242,15 @@ static int klp_check_stack_func(struct klp_func *func, unsigned long *entries,
*/
static int klp_check_stack(struct task_struct *task, const char **oldname)
{
- static unsigned long entries[MAX_STACK_ENTRIES];
+ unsigned long *entries = this_cpu_ptr(klp_stack_entries);
struct klp_object *obj;
struct klp_func *func;
int ret, nr_entries;
- ret = stack_trace_save_tsk_reliable(task, entries, ARRAY_SIZE(entries));
+ /* Protect 'klp_stack_entries' */
+ lockdep_assert_preemption_disabled();
+
+ ret = stack_trace_save_tsk_reliable(task, entries, MAX_STACK_ENTRIES);
if (ret < 0)
return -EINVAL;
nr_entries = ret;
--
2.39.2
On Mon 2023-03-13 16:33:46, Josh Poimboeuf wrote:
> On Fri, Mar 03, 2023 at 03:00:13PM +0100, Petr Mladek wrote:
> > > MAX_STACK_ENTRIES is 100, which seems excessive. If we halved that, the
> > > array would be "only" 400 bytes, which is *almost* reasonable to
> > > allocate on the stack?
> >
> > It is just for the stack in the process context. Right?
> >
> > I think that I have never seen a stack with over 50 entries. And in
> > the worst case, a bigger amount of entries would "just" result in
> > a non-reliable stack which might be acceptable.
> >
> > It looks acceptable to me.
> >
> > > Alternatively we could have a percpu entries array... :-/
> >
> > That said, percpu entries would be fine as well. It sounds like
> > a good price for the livepatching feature. I think that livepatching
> > is used on big systems anyway.
> >
> > I slightly prefer the per-cpu solution.
>
> Booting a kernel with PREEMPT+LOCKDEP gave me a high-water mark of 60+
> stack entries, seen when probing a device. I decided not to mess with
> MAX_STACK_ENTRIES, and instead just convert the entries to percpu. This
> patch could be inserted at the beginning of the set.
Good to know.
>
> ---8<---
>
> Subject: [PATCH 0.5/3] livepatch: Convert stack entries array to percpu
>
> --- a/kernel/livepatch/transition.c
> +++ b/kernel/livepatch/transition.c
> @@ -240,12 +242,15 @@ static int klp_check_stack_func(struct klp_func *func, unsigned long *entries,
> */
> static int klp_check_stack(struct task_struct *task, const char **oldname)
> {
> - static unsigned long entries[MAX_STACK_ENTRIES];
> + unsigned long *entries = this_cpu_ptr(klp_stack_entries);
> struct klp_object *obj;
> struct klp_func *func;
> int ret, nr_entries;
>
> - ret = stack_trace_save_tsk_reliable(task, entries, ARRAY_SIZE(entries));
> + /* Protect 'klp_stack_entries' */
> + lockdep_assert_preemption_disabled();
I think about adding:
/*
* Stay on the safe side even when cond_resched() is called from
* an IRQ context by mistake.
*/
if (!in_task())
return -EINVAL;
Or is this prevented another way, please?
> +
> + ret = stack_trace_save_tsk_reliable(task, entries, MAX_STACK_ENTRIES);
> if (ret < 0)
> return -EINVAL;
> nr_entries = ret;
Otherwise, it looks good to me.
Best Regards,
Petr
On Tue, Mar 14, 2023 at 11:50:21AM +0100, Petr Mladek wrote:
> > static int klp_check_stack(struct task_struct *task, const char **oldname)
> > {
> > - static unsigned long entries[MAX_STACK_ENTRIES];
> > + unsigned long *entries = this_cpu_ptr(klp_stack_entries);
> > struct klp_object *obj;
> > struct klp_func *func;
> > int ret, nr_entries;
> >
> > - ret = stack_trace_save_tsk_reliable(task, entries, ARRAY_SIZE(entries));
> > + /* Protect 'klp_stack_entries' */
> > + lockdep_assert_preemption_disabled();
>
> I think about adding:
>
> /*
> * Stay on the safe side even when cond_resched() is called from
> * an IRQ context by mistake.
> */
> if (!in_task())
> return -EINVAL;
>
> Or is this prevented another way, please?
An IRQ handler trying to schedule would be a pretty major bug, no?
I think there are already several different checks for that. For
example, the __might_resched() call in cond_resched().
--
Josh
The following commit has been merged into the sched/core branch of tip:
Commit-ID: e92606fa172f63a26054885b9715be86c643229d
Gitweb: https://git.kernel.org/tip/e92606fa172f63a26054885b9715be86c643229d
Author: Josh Poimboeuf <jpoimboe@kernel.org>
AuthorDate: Mon, 13 Mar 2023 16:33:46 -07:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 22 Mar 2023 17:09:28 +01:00
livepatch: Convert stack entries array to percpu
The entries array in klp_check_stack() is static local because it's too
big to be reasonably allocated on the stack. Serialized access is
enforced by the klp_mutex.
In preparation for calling klp_check_stack() without the mutex (from
cond_resched), convert it to a percpu variable.
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230313233346.kayh4t2lpicjkpsv@treble
---
kernel/livepatch/transition.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index f1b25ec..135fc73 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -14,6 +14,8 @@
#include "transition.h"
#define MAX_STACK_ENTRIES 100
+DEFINE_PER_CPU(unsigned long[MAX_STACK_ENTRIES], klp_stack_entries);
+
#define STACK_ERR_BUF_SIZE 128
#define SIGNALS_TIMEOUT 15
@@ -240,12 +242,15 @@ static int klp_check_stack_func(struct klp_func *func, unsigned long *entries,
*/
static int klp_check_stack(struct task_struct *task, const char **oldname)
{
- static unsigned long entries[MAX_STACK_ENTRIES];
+ unsigned long *entries = this_cpu_ptr(klp_stack_entries);
struct klp_object *obj;
struct klp_func *func;
int ret, nr_entries;
- ret = stack_trace_save_tsk_reliable(task, entries, ARRAY_SIZE(entries));
+ /* Protect 'klp_stack_entries' */
+ lockdep_assert_preemption_disabled();
+
+ ret = stack_trace_save_tsk_reliable(task, entries, MAX_STACK_ENTRIES);
if (ret < 0)
return -EINVAL;
nr_entries = ret;
© 2016 - 2026 Red Hat, Inc.