include/linux/livepatch_sched.h | 15 +++++-------- include/linux/sched.h | 6 ----- kernel/livepatch/transition.c | 30 ++++++------------------- kernel/sched/core.c | 50 +++++++---------------------------------- 4 files changed, 21 insertions(+), 80 deletions(-)
With the goal of deprecating / removing VOLUNTARY preempt, live-patch
needs to stop relying on cond_resched() to make forward progress.
Instead, rely on schedule() with TASK_FREEZABLE set. Just like
live-patching, the freezer needs to be able to stop tasks in a safe /
known state.
Compile tested only.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
include/linux/livepatch_sched.h | 15 +++++--------
include/linux/sched.h | 6 -----
kernel/livepatch/transition.c | 30 ++++++-------------------
kernel/sched/core.c | 50 +++++++----------------------------------
4 files changed, 21 insertions(+), 80 deletions(-)
diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h
index 013794fb5da0..7e8171226dd7 100644
--- a/include/linux/livepatch_sched.h
+++ b/include/linux/livepatch_sched.h
@@ -3,27 +3,24 @@
#define _LINUX_LIVEPATCH_SCHED_H_
#include <linux/jump_label.h>
-#include <linux/static_call_types.h>
+#include <linux/sched.h>
+
#ifdef CONFIG_LIVEPATCH
void __klp_sched_try_switch(void);
-#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-
DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
-static __always_inline void klp_sched_try_switch(void)
+static __always_inline void klp_sched_try_switch(struct task_struct *curr)
{
- if (static_branch_unlikely(&klp_sched_try_switch_key))
+ if (static_branch_unlikely(&klp_sched_try_switch_key) &&
+ READ_ONCE(curr->__state) & TASK_FREEZABLE)
__klp_sched_try_switch();
}
-#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
-
#else /* !CONFIG_LIVEPATCH */
-static inline void klp_sched_try_switch(void) {}
-static inline void __klp_sched_try_switch(void) {}
+static inline void klp_sched_try_switch(struct task_struct *curr) {}
#endif /* CONFIG_LIVEPATCH */
#endif /* _LINUX_LIVEPATCH_SCHED_H_ */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6e5c38718ff5..b988e1ae9bd9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -44,7 +44,6 @@
#include <linux/seqlock_types.h>
#include <linux/kcsan.h>
#include <linux/rv.h>
-#include <linux/livepatch_sched.h>
#include <linux/uidgid_types.h>
#include <asm/kmap_size.h>
@@ -2069,9 +2068,6 @@ extern int __cond_resched(void);
#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-void sched_dynamic_klp_enable(void);
-void sched_dynamic_klp_disable(void);
-
DECLARE_STATIC_CALL(cond_resched, __cond_resched);
static __always_inline int _cond_resched(void)
@@ -2092,7 +2088,6 @@ static __always_inline int _cond_resched(void)
static inline int _cond_resched(void)
{
- klp_sched_try_switch();
return __cond_resched();
}
@@ -2102,7 +2097,6 @@ static inline int _cond_resched(void)
static inline int _cond_resched(void)
{
- klp_sched_try_switch();
return 0;
}
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index ba069459c101..2676c43642ff 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -29,22 +29,13 @@ static unsigned int klp_signals_cnt;
/*
* When a livepatch is in progress, enable klp stack checking in
- * cond_resched(). This helps CPU-bound kthreads get patched.
+ * schedule(). This helps CPU-bound kthreads get patched.
*/
-#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-
-#define klp_cond_resched_enable() sched_dynamic_klp_enable()
-#define klp_cond_resched_disable() sched_dynamic_klp_disable()
-
-#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
DEFINE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
-EXPORT_SYMBOL(klp_sched_try_switch_key);
-#define klp_cond_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
-#define klp_cond_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
-
-#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
+#define klp_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
+#define klp_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
/*
* This work can be performed periodically to finish patching or unpatching any
@@ -365,9 +356,6 @@ static bool klp_try_switch_task(struct task_struct *task)
void __klp_sched_try_switch(void)
{
- if (likely(!klp_patch_pending(current)))
- return;
-
/*
* This function is called from cond_resched() which is called in many
* places throughout the kernel. Using the klp_mutex here might
@@ -377,14 +365,14 @@ void __klp_sched_try_switch(void)
* klp_try_switch_task(). Thanks to task_call_func() they won't be
* able to switch this task while it's running.
*/
- preempt_disable();
+ lockdep_assert_preemption_disabled();
/*
* Make sure current didn't get patched between the above check and
* preempt_disable().
*/
if (unlikely(!klp_patch_pending(current)))
- goto out;
+ return;
/*
* Enforce the order of the TIF_PATCH_PENDING read above and the
@@ -395,11 +383,7 @@ void __klp_sched_try_switch(void)
smp_rmb();
klp_try_switch_task(current);
-
-out:
- preempt_enable();
}
-EXPORT_SYMBOL(__klp_sched_try_switch);
/*
* Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
@@ -508,7 +492,7 @@ void klp_try_complete_transition(void)
}
/* Done! Now cleanup the data structures. */
- klp_cond_resched_disable();
+ klp_resched_disable();
patch = klp_transition_patch;
klp_complete_transition();
@@ -560,7 +544,7 @@ void klp_start_transition(void)
set_tsk_thread_flag(task, TIF_PATCH_PENDING);
}
- klp_cond_resched_enable();
+ klp_resched_enable();
klp_signals_cnt = 0;
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4d617946d6e8..e6bfcdfb631e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -66,6 +66,7 @@
#include <linux/vtime.h>
#include <linux/wait_api.h>
#include <linux/workqueue_api.h>
+#include <linux/livepatch_sched.h>
#ifdef CONFIG_PREEMPT_DYNAMIC
# ifdef CONFIG_GENERIC_ENTRY
@@ -6654,6 +6655,8 @@ static void __sched notrace __schedule(int sched_mode)
if (sched_feat(HRTICK) || sched_feat(HRTICK_DL))
hrtick_clear(rq);
+ klp_sched_try_switch(prev);
+
local_irq_disable();
rcu_note_context_switch(preempt);
@@ -7322,7 +7325,6 @@ EXPORT_STATIC_CALL_TRAMP(might_resched);
static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
int __sched dynamic_cond_resched(void)
{
- klp_sched_try_switch();
if (!static_branch_unlikely(&sk_dynamic_cond_resched))
return 0;
return __cond_resched();
@@ -7494,7 +7496,6 @@ int sched_dynamic_mode(const char *str)
#endif
static DEFINE_MUTEX(sched_dynamic_mutex);
-static bool klp_override;
static void __sched_dynamic_update(int mode)
{
@@ -7502,8 +7503,7 @@ static void __sched_dynamic_update(int mode)
* Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
* the ZERO state, which is invalid.
*/
- if (!klp_override)
- preempt_dynamic_enable(cond_resched);
+ preempt_dynamic_enable(cond_resched);
preempt_dynamic_enable(might_resched);
preempt_dynamic_enable(preempt_schedule);
preempt_dynamic_enable(preempt_schedule_notrace);
@@ -7512,8 +7512,7 @@ static void __sched_dynamic_update(int mode)
switch (mode) {
case preempt_dynamic_none:
- if (!klp_override)
- preempt_dynamic_enable(cond_resched);
+ preempt_dynamic_enable(cond_resched);
preempt_dynamic_disable(might_resched);
preempt_dynamic_disable(preempt_schedule);
preempt_dynamic_disable(preempt_schedule_notrace);
@@ -7524,8 +7523,7 @@ static void __sched_dynamic_update(int mode)
break;
case preempt_dynamic_voluntary:
- if (!klp_override)
- preempt_dynamic_enable(cond_resched);
+ preempt_dynamic_enable(cond_resched);
preempt_dynamic_enable(might_resched);
preempt_dynamic_disable(preempt_schedule);
preempt_dynamic_disable(preempt_schedule_notrace);
@@ -7536,8 +7534,7 @@ static void __sched_dynamic_update(int mode)
break;
case preempt_dynamic_full:
- if (!klp_override)
- preempt_dynamic_disable(cond_resched);
+ preempt_dynamic_disable(cond_resched);
preempt_dynamic_disable(might_resched);
preempt_dynamic_enable(preempt_schedule);
preempt_dynamic_enable(preempt_schedule_notrace);
@@ -7548,8 +7545,7 @@ static void __sched_dynamic_update(int mode)
break;
case preempt_dynamic_lazy:
- if (!klp_override)
- preempt_dynamic_disable(cond_resched);
+ preempt_dynamic_disable(cond_resched);
preempt_dynamic_disable(might_resched);
preempt_dynamic_enable(preempt_schedule);
preempt_dynamic_enable(preempt_schedule_notrace);
@@ -7570,36 +7566,6 @@ void sched_dynamic_update(int mode)
mutex_unlock(&sched_dynamic_mutex);
}
-#ifdef CONFIG_HAVE_PREEMPT_DYNAMIC_CALL
-
-static int klp_cond_resched(void)
-{
- __klp_sched_try_switch();
- return __cond_resched();
-}
-
-void sched_dynamic_klp_enable(void)
-{
- mutex_lock(&sched_dynamic_mutex);
-
- klp_override = true;
- static_call_update(cond_resched, klp_cond_resched);
-
- mutex_unlock(&sched_dynamic_mutex);
-}
-
-void sched_dynamic_klp_disable(void)
-{
- mutex_lock(&sched_dynamic_mutex);
-
- klp_override = false;
- __sched_dynamic_update(preempt_dynamic_mode);
-
- mutex_unlock(&sched_dynamic_mutex);
-}
-
-#endif /* CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
-
static int __init setup_preempt_mode(char *str)
{
int mode = sched_dynamic_mode(str);
> void __klp_sched_try_switch(void)
> {
> - if (likely(!klp_patch_pending(current)))
> - return;
> -
> /*
> * This function is called from cond_resched() which is called in many
> * places throughout the kernel. Using the klp_mutex here might
> @@ -377,14 +365,14 @@ void __klp_sched_try_switch(void)
> * klp_try_switch_task(). Thanks to task_call_func() they won't be
> * able to switch this task while it's running.
> */
> - preempt_disable();
> + lockdep_assert_preemption_disabled();
>
> /*
> * Make sure current didn't get patched between the above check and
> * preempt_disable().
> */
> if (unlikely(!klp_patch_pending(current)))
> - goto out;
> + return;
It does not look correct. We just make sure that
klp_patch_pending(current) did not change here. It would be highly
unlikely. However, we should keep the likely way out (the first removed
condition above). So let's also s/unlikely/likely/ here.
And the comments in the function should be updated as well.
Miroslav
On Mon 2025-03-24 14:49:09, Peter Zijlstra wrote:
>
> With the goal of deprecating / removing VOLUNTARY preempt, live-patch
> needs to stop relying on cond_resched() to make forward progress.
>
> Instead, rely on schedule() with TASK_FREEZABLE set. Just like
> live-patching, the freezer needs to be able to stop tasks in a safe /
> known state.
> Compile tested only.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
> include/linux/livepatch_sched.h | 15 +++++--------
> include/linux/sched.h | 6 -----
> kernel/livepatch/transition.c | 30 ++++++-------------------
> kernel/sched/core.c | 50 +++++++----------------------------------
> 4 files changed, 21 insertions(+), 80 deletions(-)
>
> diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h
> index 013794fb5da0..7e8171226dd7 100644
> --- a/include/linux/livepatch_sched.h
> +++ b/include/linux/livepatch_sched.h
> @@ -3,27 +3,24 @@
> #define _LINUX_LIVEPATCH_SCHED_H_
>
> #include <linux/jump_label.h>
> -#include <linux/static_call_types.h>
> +#include <linux/sched.h>
> +
>
> #ifdef CONFIG_LIVEPATCH
>
> void __klp_sched_try_switch(void);
>
> -#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
> -
> DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
>
> -static __always_inline void klp_sched_try_switch(void)
> +static __always_inline void klp_sched_try_switch(struct task_struct *curr)
> {
> - if (static_branch_unlikely(&klp_sched_try_switch_key))
> + if (static_branch_unlikely(&klp_sched_try_switch_key) &&
> + READ_ONCE(curr->__state) & TASK_FREEZABLE)
> __klp_sched_try_switch();
> }
Do we really need to check the TASK_FREEZABLE state, please?
My understanding is that TASK_FREEZABLE is set when kernel kthreads go into
a "freezable" sleep, e.g. wait_event_freezable().
But __klp_sched_try_switch() should be safe when the task is not
running and the stack is reliable. IMHO, it should be safe anytime
it is being scheduled out.
Note that wait_event_freezable() is a good location. It is usually called in
the main loop of the kthread where the stack is small. So that the chance
that it is not running a livepatched function is higher than on
another random schedulable location.
But we actually wanted to have it in cond_resched() because
it might take a long time to reach the main loop, and sleep there.
Best Regards,
Petr
On Wed, Mar 26, 2025 at 10:49:10AM +0100, Petr Mladek wrote:
> On Mon 2025-03-24 14:49:09, Peter Zijlstra wrote:
> >
> > With the goal of deprecating / removing VOLUNTARY preempt, live-patch
> > needs to stop relying on cond_resched() to make forward progress.
> >
> > Instead, rely on schedule() with TASK_FREEZABLE set. Just like
> > live-patching, the freezer needs to be able to stop tasks in a safe /
> > known state.
>
> > Compile tested only.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> > include/linux/livepatch_sched.h | 15 +++++--------
> > include/linux/sched.h | 6 -----
> > kernel/livepatch/transition.c | 30 ++++++-------------------
> > kernel/sched/core.c | 50 +++++++----------------------------------
> > 4 files changed, 21 insertions(+), 80 deletions(-)
> >
> > diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h
> > index 013794fb5da0..7e8171226dd7 100644
> > --- a/include/linux/livepatch_sched.h
> > +++ b/include/linux/livepatch_sched.h
> > @@ -3,27 +3,24 @@
> > #define _LINUX_LIVEPATCH_SCHED_H_
> >
> > #include <linux/jump_label.h>
> > -#include <linux/static_call_types.h>
> > +#include <linux/sched.h>
> > +
> >
> > #ifdef CONFIG_LIVEPATCH
> >
> > void __klp_sched_try_switch(void);
> >
> > -#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
> > -
> > DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
> >
> > -static __always_inline void klp_sched_try_switch(void)
> > +static __always_inline void klp_sched_try_switch(struct task_struct *curr)
> > {
> > - if (static_branch_unlikely(&klp_sched_try_switch_key))
> > + if (static_branch_unlikely(&klp_sched_try_switch_key) &&
> > + READ_ONCE(curr->__state) & TASK_FREEZABLE)
> > __klp_sched_try_switch();
> > }
>
> Do we really need to check the TASK_FREEZABLE state, please?
>
> My understanding is that TASK_FREEZABLE is set when kernel kthreads go into
> a "freezable" sleep, e.g. wait_event_freezable().
Right.
> But __klp_sched_try_switch() should be safe when the task is not
> running and the stack is reliable. IMHO, it should be safe anytime
> it is being scheduled out.
So for the reasons you touched upon in the next paragraph, FREEZABLE
seemed like a more suitable location.
> Note that wait_event_freezable() is a good location. It is usually called in
> the main loop of the kthread where the stack is small. So that the chance
> that it is not running a livepatched function is higher than on
> another random schedulable location.
Right, it is the natural quiescent state of the kthread, it holds no
resources.
> But we actually wanted to have it in cond_resched() because
> it might take a long time to reach the main loop, and sleep there.
Well, cond_resched() is going to get deleted, so we need to find
something else. And I was thinking that the suspend people want
reasonable timeliness too -- you don't want your laptop to continue
running for many seconds after you close the lid and stuff it in your
bag, now do you.
So per that reasoning I figured FREEZABLE should be good enough.
Sharing the pain with suspend can only lead to improving both -- faster
patching progress leads to faster suspend and vice-versa.
Hi,
On Wed, 26 Mar 2025, Peter Zijlstra wrote:
> On Wed, Mar 26, 2025 at 10:49:10AM +0100, Petr Mladek wrote:
> > On Mon 2025-03-24 14:49:09, Peter Zijlstra wrote:
> > >
> > > With the goal of deprecating / removing VOLUNTARY preempt, live-patch
> > > needs to stop relying on cond_resched() to make forward progress.
> > >
> > > Instead, rely on schedule() with TASK_FREEZABLE set. Just like
> > > live-patching, the freezer needs to be able to stop tasks in a safe /
> > > known state.
> >
> > > Compile tested only.
> > >
> > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > > ---
> > > include/linux/livepatch_sched.h | 15 +++++--------
> > > include/linux/sched.h | 6 -----
> > > kernel/livepatch/transition.c | 30 ++++++-------------------
> > > kernel/sched/core.c | 50 +++++++----------------------------------
> > > 4 files changed, 21 insertions(+), 80 deletions(-)
> > >
> > > diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h
> > > index 013794fb5da0..7e8171226dd7 100644
> > > --- a/include/linux/livepatch_sched.h
> > > +++ b/include/linux/livepatch_sched.h
> > > @@ -3,27 +3,24 @@
> > > #define _LINUX_LIVEPATCH_SCHED_H_
> > >
> > > #include <linux/jump_label.h>
> > > -#include <linux/static_call_types.h>
> > > +#include <linux/sched.h>
> > > +
> > >
> > > #ifdef CONFIG_LIVEPATCH
> > >
> > > void __klp_sched_try_switch(void);
> > >
> > > -#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
> > > -
> > > DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
> > >
> > > -static __always_inline void klp_sched_try_switch(void)
> > > +static __always_inline void klp_sched_try_switch(struct task_struct *curr)
> > > {
> > > - if (static_branch_unlikely(&klp_sched_try_switch_key))
> > > + if (static_branch_unlikely(&klp_sched_try_switch_key) &&
> > > + READ_ONCE(curr->__state) & TASK_FREEZABLE)
> > > __klp_sched_try_switch();
> > > }
> >
> > Do we really need to check the TASK_FREEZABLE state, please?
> >
> > My understanding is that TASK_FREEZABLE is set when kernel kthreads go into
> > a "freezable" sleep, e.g. wait_event_freezable().
>
> Right.
>
> > But __klp_sched_try_switch() should be safe when the task is not
> > running and the stack is reliable. IMHO, it should be safe anytime
> > it is being scheduled out.
>
> So for the reasons you touched upon in the next paragraph, FREEZABLE
> seemed like a more suitable location.
>
> > Note that wait_event_freezable() is a good location. It is usually called in
> > the main loop of the kthread where the stack is small. So that the chance
> > that it is not running a livepatched function is higher than on
> > another random schedulable location.
>
> Right, it is the natural quiescent state of the kthread, it holds no
> resources.
>
> > But we actually wanted to have it in cond_resched() because
> > it might take a long time to reach the main loop, and sleep there.
>
> Well, cond_resched() is going to get deleted, so we need to find
> something else. And I was thinking that the suspend people want
> reasonable timeliness too -- you don't want your laptop to continue
> running for many seconds after you close the lid and stuff it in your
> bag, now do you.
>
> So per that reasoning I figured FREEZABLE should be good enough.
>
> Sharing the pain with suspend can only lead to improving both -- faster
> patching progress leads to faster suspend and vice-versa.
If I remember correctly, we had something like this in the old kGraft
implementation of the live patching (SUSE way). We exactly had a hook
somewhere in the kthread freezing code. This looks much cleaner and as far
as I know the fridge went through improvements recently.
Peter, so that I understand it correctly... we would rely on all kthreads
becoming freezable eventually so that both suspend and livepatch benefit.
Is that what you meant by the above?
Miroslav
On Wed, Mar 26, 2025 at 03:37:50PM +0100, Miroslav Benes wrote: > If I remember correctly, we had something like this in the old kGraft > implementation of the live patching (SUSE way). We exactly had a hook > somewhere in the kthread freezing code. This looks much cleaner and as far > as I know the fridge went through improvements recently. Yeah, I rewrote it a while ago :-) > Peter, so that I understand it correctly... we would rely on all kthreads > becoming freezable eventually so that both suspend and livepatch benefit. > Is that what you meant by the above? Well, IIRC (its been a while already) all kthreads should have a FREEZABLE already. Things like suspend-to-idle don't hit the hotplug path at all anymore and everything must freeze, otherwise they fail. I was more meaning the time-to-freeze; if some kthreads take a long time to freeze/patch then this would want improving on both ends.
On Wed, 26 Mar 2025, Peter Zijlstra wrote: > On Wed, Mar 26, 2025 at 03:37:50PM +0100, Miroslav Benes wrote: > > > If I remember correctly, we had something like this in the old kGraft > > implementation of the live patching (SUSE way). We exactly had a hook > > somewhere in the kthread freezing code. This looks much cleaner and as far > > as I know the fridge went through improvements recently. > > Yeah, I rewrote it a while ago :-) Right :) > > Peter, so that I understand it correctly... we would rely on all kthreads > > becoming freezable eventually so that both suspend and livepatch benefit. > > Is that what you meant by the above? > > Well, IIRC (its been a while already) all kthreads should have a > FREEZABLE already. Things like suspend-to-idle don't hit the hotplug > path at all anymore and everything must freeze, otherwise they fail. Good. > I was more meaning the time-to-freeze; if some kthreads take a long time > to freeze/patch then this would want improving on both ends. Makes sense. So I am all for the patch. See my comment elsewhere though. Miroslav
© 2016 - 2025 Red Hat, Inc.