The tasklist_lock in the KLP transition may cause high latency under
certain workloads. To address this, we can replace it with RCU.
When a new task is forked, its kernel stack is always initialized to
empty[0]. As a result, we can set these new tasks to the
KLP_TRANSITION_IDLE state immediately after forking. If these tasks are
forked during the KLP transition but before klp_check_and_switch_task(), it
is safe to switch them to the klp_target_state within
klp_check_and_switch_task(). Additionally, if the klp_ftrace_handler() is
triggered before the task is switched to the klp_target_state, it is also
safe to perform the state transition within this ftrace handler[1].
With these changes, we can safely replace the tasklist_lock with RCU.
Link: https://lore.kernel.org/all/20250213173253.ovivhuq2c5rmvkhj@jpoimboe/ [0]
Link: https://lore.kernel.org/all/20250214181206.xkvxohoc4ft26uhf@jpoimboe/ [1]
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
include/linux/livepatch.h | 4 ++--
kernel/fork.c | 2 +-
kernel/livepatch/patch.c | 7 ++++++-
kernel/livepatch/transition.c | 35 ++++++++++++++---------------------
kernel/livepatch/transition.h | 1 +
5 files changed, 24 insertions(+), 25 deletions(-)
diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 51a258c24ff5..41c424120f49 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -198,7 +198,7 @@ int klp_enable_patch(struct klp_patch *);
int klp_module_coming(struct module *mod);
void klp_module_going(struct module *mod);
-void klp_copy_process(struct task_struct *child);
+void klp_init_process(struct task_struct *child);
void klp_update_patch_state(struct task_struct *task);
static inline bool klp_patch_pending(struct task_struct *task)
@@ -241,7 +241,7 @@ static inline int klp_module_coming(struct module *mod) { return 0; }
static inline void klp_module_going(struct module *mod) {}
static inline bool klp_patch_pending(struct task_struct *task) { return false; }
static inline void klp_update_patch_state(struct task_struct *task) {}
-static inline void klp_copy_process(struct task_struct *child) {}
+static inline void klp_init_process(struct task_struct *child) {}
static inline
int klp_apply_section_relocs(struct module *pmod, Elf_Shdr *sechdrs,
diff --git a/kernel/fork.c b/kernel/fork.c
index 735405a9c5f3..da247c4d5ec5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2544,7 +2544,7 @@ __latent_entropy struct task_struct *copy_process(
p->exit_signal = args->exit_signal;
}
- klp_copy_process(p);
+ klp_init_process(p);
sched_core_fork(p);
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index 90408500e5a3..5e523a3fbb3c 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -95,7 +95,12 @@ static void notrace klp_ftrace_handler(unsigned long ip,
patch_state = current->patch_state;
- WARN_ON_ONCE(patch_state == KLP_TRANSITION_IDLE);
+ /* If the patch_state is KLP_TRANSITION_IDLE, it indicates the
+ * task was forked after klp_init_transition(). For this newly
+ * forked task, it is safe to switch it to klp_target_state.
+ */
+ if (patch_state == KLP_TRANSITION_IDLE)
+ current->patch_state = klp_target_state;
if (patch_state == KLP_TRANSITION_UNPATCHED) {
/*
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index ba069459c101..ae4512e2acc9 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -23,7 +23,7 @@ static DEFINE_PER_CPU(unsigned long[MAX_STACK_ENTRIES], klp_stack_entries);
struct klp_patch *klp_transition_patch;
-static int klp_target_state = KLP_TRANSITION_IDLE;
+int klp_target_state = KLP_TRANSITION_IDLE;
static unsigned int klp_signals_cnt;
@@ -294,6 +294,13 @@ static int klp_check_and_switch_task(struct task_struct *task, void *arg)
{
int ret;
+ /* If the patch_state remains KLP_TRANSITION_IDLE at this point, it
+ * indicates that the task was forked after klp_init_transition(). For
+ * this newly forked task, it is now safe to perform the switch.
+ */
+ if (task->patch_state == KLP_TRANSITION_IDLE)
+ goto out;
+
if (task_curr(task) && task != current)
return -EBUSY;
@@ -301,6 +308,7 @@ static int klp_check_and_switch_task(struct task_struct *task, void *arg)
if (ret)
return ret;
+out:
clear_tsk_thread_flag(task, TIF_PATCH_PENDING);
task->patch_state = klp_target_state;
return 0;
@@ -466,11 +474,11 @@ void klp_try_complete_transition(void)
* Usually this will transition most (or all) of the tasks on a system
* unless the patch includes changes to a very common function.
*/
- read_lock(&tasklist_lock);
+ rcu_read_lock();
for_each_process_thread(g, task)
if (!klp_try_switch_task(task))
complete = false;
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
/*
* Ditto for the idle "swapper" tasks.
@@ -694,25 +702,10 @@ void klp_reverse_transition(void)
}
/* Called from copy_process() during fork */
-void klp_copy_process(struct task_struct *child)
+void klp_init_process(struct task_struct *child)
{
-
- /*
- * The parent process may have gone through a KLP transition since
- * the thread flag was copied in setup_thread_stack earlier. Bring
- * the task flag up to date with the parent here.
- *
- * The operation is serialized against all klp_*_transition()
- * operations by the tasklist_lock. The only exceptions are
- * klp_update_patch_state(current) and __klp_sched_try_switch(), but we
- * cannot race with them because we are current.
- */
- if (test_tsk_thread_flag(current, TIF_PATCH_PENDING))
- set_tsk_thread_flag(child, TIF_PATCH_PENDING);
- else
- clear_tsk_thread_flag(child, TIF_PATCH_PENDING);
-
- child->patch_state = current->patch_state;
+ clear_tsk_thread_flag(child, TIF_PATCH_PENDING);
+ child->patch_state = KLP_TRANSITION_IDLE;
}
/*
diff --git a/kernel/livepatch/transition.h b/kernel/livepatch/transition.h
index 322db16233de..febcf1d50fc5 100644
--- a/kernel/livepatch/transition.h
+++ b/kernel/livepatch/transition.h
@@ -5,6 +5,7 @@
#include <linux/livepatch.h>
extern struct klp_patch *klp_transition_patch;
+extern int klp_target_state;
void klp_init_transition(struct klp_patch *patch, int state);
void klp_cancel_transition(void);
--
2.43.5
On Sun, Feb 23, 2025 at 02:20:46PM +0800, Yafang Shao wrote:
> +++ b/kernel/livepatch/patch.c
> @@ -95,7 +95,12 @@ static void notrace klp_ftrace_handler(unsigned long ip,
>
> patch_state = current->patch_state;
>
> - WARN_ON_ONCE(patch_state == KLP_TRANSITION_IDLE);
> + /* If the patch_state is KLP_TRANSITION_IDLE, it indicates the
> + * task was forked after klp_init_transition(). For this newly
> + * forked task, it is safe to switch it to klp_target_state.
> + */
> + if (patch_state == KLP_TRANSITION_IDLE)
> + current->patch_state = klp_target_state;
Hm, but then the following line is:
> if (patch_state == KLP_TRANSITION_UNPATCHED) {
Shouldn't the local 'patch_state' variable be updated?
It also seems unnecessary to update 'current->patch_state' here.
> @@ -294,6 +294,13 @@ static int klp_check_and_switch_task(struct task_struct *task, void *arg)
> {
> int ret;
>
> + /* If the patch_state remains KLP_TRANSITION_IDLE at this point, it
> + * indicates that the task was forked after klp_init_transition(). For
> + * this newly forked task, it is now safe to perform the switch.
> + */
> + if (task->patch_state == KLP_TRANSITION_IDLE)
> + goto out;
> +
This also seems unnecessary. No need to transition the patch if the
ftrace handler is already doing the right thing. klp_try_switch_task()
can just return early on !TIF_PATCH_PENDING.
> @@ -466,11 +474,11 @@ void klp_try_complete_transition(void)
> * Usually this will transition most (or all) of the tasks on a system
> * unless the patch includes changes to a very common function.
> */
> - read_lock(&tasklist_lock);
> + rcu_read_lock();
> for_each_process_thread(g, task)
> if (!klp_try_switch_task(task))
> complete = false;
> - read_unlock(&tasklist_lock);
> + rcu_read_unlock();
Can this also be done for the idle tasks?
--
Josh
On Wed, Feb 26, 2025 at 2:33 AM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> On Sun, Feb 23, 2025 at 02:20:46PM +0800, Yafang Shao wrote:
> > +++ b/kernel/livepatch/patch.c
> > @@ -95,7 +95,12 @@ static void notrace klp_ftrace_handler(unsigned long ip,
> >
> > patch_state = current->patch_state;
> >
> > - WARN_ON_ONCE(patch_state == KLP_TRANSITION_IDLE);
> > + /* If the patch_state is KLP_TRANSITION_IDLE, it indicates the
> > + * task was forked after klp_init_transition(). For this newly
> > + * forked task, it is safe to switch it to klp_target_state.
> > + */
> > + if (patch_state == KLP_TRANSITION_IDLE)
> > + current->patch_state = klp_target_state;
>
> Hm, but then the following line is:
>
> > if (patch_state == KLP_TRANSITION_UNPATCHED) {
>
> Shouldn't the local 'patch_state' variable be updated?
Ah, I missed it.
>
> It also seems unnecessary to update 'current->patch_state' here.
Got it.
>
> > @@ -294,6 +294,13 @@ static int klp_check_and_switch_task(struct task_struct *task, void *arg)
> > {
> > int ret;
> >
> > + /* If the patch_state remains KLP_TRANSITION_IDLE at this point, it
> > + * indicates that the task was forked after klp_init_transition(). For
> > + * this newly forked task, it is now safe to perform the switch.
> > + */
> > + if (task->patch_state == KLP_TRANSITION_IDLE)
> > + goto out;
> > +
>
> This also seems unnecessary. No need to transition the patch if the
> ftrace handler is already doing the right thing. klp_try_switch_task()
> can just return early on !TIF_PATCH_PENDING.
Good suggestion.
>
> > @@ -466,11 +474,11 @@ void klp_try_complete_transition(void)
> > * Usually this will transition most (or all) of the tasks on a system
> > * unless the patch includes changes to a very common function.
> > */
> > - read_lock(&tasklist_lock);
> > + rcu_read_lock();
> > for_each_process_thread(g, task)
> > if (!klp_try_switch_task(task))
> > complete = false;
> > - read_unlock(&tasklist_lock);
> > + rcu_read_unlock();
>
> Can this also be done for the idle tasks?
The cpus_read_lock() around the idle tasks is in place to protect
against CPU hotplug operations. If we aim to eliminate this lock
during the KLP transition, the CPU hotplug logic would need to be
adjusted accordingly. For instance, we would need to address how to
handle wake_up_if_idle() when a CPU is in the process of hotplugging.
Given that the number of CPUs is unlikely to be large enough to create
a bottleneck in the current implementation, optimizing this mechanism
may not be a priority at the moment. It might be more practical to
address this issue at a later stage.
--
Regards
Yafang
© 2016 - 2025 Red Hat, Inc.