[RFC PATCH 07/86] Revert "livepatch,sched: Add livepatch task switching to cond_resched()"

Ankur Arora posted 86 patches 2 years, 1 month ago
Only 57 patches received!
[RFC PATCH 07/86] Revert "livepatch,sched: Add livepatch task switching to cond_resched()"
Posted by Ankur Arora 2 years, 1 month ago
This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.

Note that removing this commit reintroduces "live patches failing to
complete within a reasonable amount of time due to CPU-bound kthreads."

Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
existence of cond_resched() so this will need an alternate fix.

Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 include/linux/livepatch.h       |   1 -
 include/linux/livepatch_sched.h |  29 ---------
 include/linux/sched.h           |  20 ++----
 kernel/livepatch/core.c         |   1 -
 kernel/livepatch/transition.c   | 107 +++++---------------------------
 kernel/sched/core.c             |  64 +++----------------
 6 files changed, 28 insertions(+), 194 deletions(-)
 delete mode 100644 include/linux/livepatch_sched.h

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 9b9b38e89563..293e29960c6e 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -13,7 +13,6 @@
 #include <linux/ftrace.h>
 #include <linux/completion.h>
 #include <linux/list.h>
-#include <linux/livepatch_sched.h>
 
 #if IS_ENABLED(CONFIG_LIVEPATCH)
 
diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h
deleted file mode 100644
index 013794fb5da0..000000000000
--- a/include/linux/livepatch_sched.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _LINUX_LIVEPATCH_SCHED_H_
-#define _LINUX_LIVEPATCH_SCHED_H_
-
-#include <linux/jump_label.h>
-#include <linux/static_call_types.h>
-
-#ifdef CONFIG_LIVEPATCH
-
-void __klp_sched_try_switch(void);
-
-#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-
-DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
-
-static __always_inline void klp_sched_try_switch(void)
-{
-	if (static_branch_unlikely(&klp_sched_try_switch_key))
-		__klp_sched_try_switch();
-}
-
-#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
-
-#else /* !CONFIG_LIVEPATCH */
-static inline void klp_sched_try_switch(void) {}
-static inline void __klp_sched_try_switch(void) {}
-#endif /* CONFIG_LIVEPATCH */
-
-#endif /* _LINUX_LIVEPATCH_SCHED_H_ */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5bdf80136e42..c5b0ef1ecfe4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -36,7 +36,6 @@
 #include <linux/seqlock.h>
 #include <linux/kcsan.h>
 #include <linux/rv.h>
-#include <linux/livepatch_sched.h>
 #include <asm/kmap_size.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
@@ -2087,9 +2086,6 @@ extern int __cond_resched(void);
 
 #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
 
-void sched_dynamic_klp_enable(void);
-void sched_dynamic_klp_disable(void);
-
 DECLARE_STATIC_CALL(cond_resched, __cond_resched);
 
 static __always_inline int _cond_resched(void)
@@ -2098,7 +2094,6 @@ static __always_inline int _cond_resched(void)
 }
 
 #elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
-
 extern int dynamic_cond_resched(void);
 
 static __always_inline int _cond_resched(void)
@@ -2106,25 +2101,20 @@ static __always_inline int _cond_resched(void)
 	return dynamic_cond_resched();
 }
 
-#else /* !CONFIG_PREEMPTION */
+#else
 
 static inline int _cond_resched(void)
 {
-	klp_sched_try_switch();
 	return __cond_resched();
 }
 
-#endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
+#endif /* CONFIG_PREEMPT_DYNAMIC */
 
-#else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */
+#else
 
-static inline int _cond_resched(void)
-{
-	klp_sched_try_switch();
-	return 0;
-}
+static inline int _cond_resched(void) { return 0; }
 
-#endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */
+#endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
 
 #define cond_resched() ({			\
 	__might_resched(__FILE__, __LINE__, 0);	\
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 61328328c474..fc851455740c 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -33,7 +33,6 @@
  *
  * - klp_ftrace_handler()
  * - klp_update_patch_state()
- * - __klp_sched_try_switch()
  */
 DEFINE_MUTEX(klp_mutex);
 
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index e54c3d60a904..70bc38f27af7 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -9,7 +9,6 @@
 
 #include <linux/cpu.h>
 #include <linux/stacktrace.h>
-#include <linux/static_call.h>
 #include "core.h"
 #include "patch.h"
 #include "transition.h"
@@ -27,25 +26,6 @@ static int klp_target_state = KLP_UNDEFINED;
 
 static unsigned int klp_signals_cnt;
 
-/*
- * When a livepatch is in progress, enable klp stack checking in
- * cond_resched().  This helps CPU-bound kthreads get patched.
- */
-#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-
-#define klp_cond_resched_enable() sched_dynamic_klp_enable()
-#define klp_cond_resched_disable() sched_dynamic_klp_disable()
-
-#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
-
-DEFINE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
-EXPORT_SYMBOL(klp_sched_try_switch_key);
-
-#define klp_cond_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
-#define klp_cond_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
-
-#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
-
 /*
  * This work can be performed periodically to finish patching or unpatching any
  * "straggler" tasks which failed to transition in the first attempt.
@@ -194,8 +174,8 @@ void klp_update_patch_state(struct task_struct *task)
 	 * barrier (smp_rmb) for two cases:
 	 *
 	 * 1) Enforce the order of the TIF_PATCH_PENDING read and the
-	 *    klp_target_state read.  The corresponding write barriers are in
-	 *    klp_init_transition() and klp_reverse_transition().
+	 *    klp_target_state read.  The corresponding write barrier is in
+	 *    klp_init_transition().
 	 *
 	 * 2) Enforce the order of the TIF_PATCH_PENDING read and a future read
 	 *    of func->transition, if klp_ftrace_handler() is called later on
@@ -363,44 +343,6 @@ static bool klp_try_switch_task(struct task_struct *task)
 	return !ret;
 }
 
-void __klp_sched_try_switch(void)
-{
-	if (likely(!klp_patch_pending(current)))
-		return;
-
-	/*
-	 * This function is called from cond_resched() which is called in many
-	 * places throughout the kernel.  Using the klp_mutex here might
-	 * deadlock.
-	 *
-	 * Instead, disable preemption to prevent racing with other callers of
-	 * klp_try_switch_task().  Thanks to task_call_func() they won't be
-	 * able to switch this task while it's running.
-	 */
-	preempt_disable();
-
-	/*
-	 * Make sure current didn't get patched between the above check and
-	 * preempt_disable().
-	 */
-	if (unlikely(!klp_patch_pending(current)))
-		goto out;
-
-	/*
-	 * Enforce the order of the TIF_PATCH_PENDING read above and the
-	 * klp_target_state read in klp_try_switch_task().  The corresponding
-	 * write barriers are in klp_init_transition() and
-	 * klp_reverse_transition().
-	 */
-	smp_rmb();
-
-	klp_try_switch_task(current);
-
-out:
-	preempt_enable();
-}
-EXPORT_SYMBOL(__klp_sched_try_switch);
-
 /*
  * Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
  * Kthreads with TIF_PATCH_PENDING set are woken up.
@@ -507,8 +449,7 @@ void klp_try_complete_transition(void)
 		return;
 	}
 
-	/* Done!  Now cleanup the data structures. */
-	klp_cond_resched_disable();
+	/* we're done, now cleanup the data structures */
 	patch = klp_transition_patch;
 	klp_complete_transition();
 
@@ -560,8 +501,6 @@ void klp_start_transition(void)
 			set_tsk_thread_flag(task, TIF_PATCH_PENDING);
 	}
 
-	klp_cond_resched_enable();
-
 	klp_signals_cnt = 0;
 }
 
@@ -617,9 +556,8 @@ void klp_init_transition(struct klp_patch *patch, int state)
 	 * see a func in transition with a task->patch_state of KLP_UNDEFINED.
 	 *
 	 * Also enforce the order of the klp_target_state write and future
-	 * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() and
-	 * __klp_sched_try_switch() don't set a task->patch_state to
-	 * KLP_UNDEFINED.
+	 * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() doesn't
+	 * set a task->patch_state to KLP_UNDEFINED.
 	 */
 	smp_wmb();
 
@@ -655,10 +593,14 @@ void klp_reverse_transition(void)
 		 klp_target_state == KLP_PATCHED ? "patching to unpatching" :
 						   "unpatching to patching");
 
+	klp_transition_patch->enabled = !klp_transition_patch->enabled;
+
+	klp_target_state = !klp_target_state;
+
 	/*
 	 * Clear all TIF_PATCH_PENDING flags to prevent races caused by
-	 * klp_update_patch_state() or __klp_sched_try_switch() running in
-	 * parallel with the reverse transition.
+	 * klp_update_patch_state() running in parallel with
+	 * klp_start_transition().
 	 */
 	read_lock(&tasklist_lock);
 	for_each_process_thread(g, task)
@@ -668,28 +610,9 @@ void klp_reverse_transition(void)
 	for_each_possible_cpu(cpu)
 		clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING);
 
-	/*
-	 * Make sure all existing invocations of klp_update_patch_state() and
-	 * __klp_sched_try_switch() see the cleared TIF_PATCH_PENDING before
-	 * starting the reverse transition.
-	 */
+	/* Let any remaining calls to klp_update_patch_state() complete */
 	klp_synchronize_transition();
 
-	/*
-	 * All patching has stopped, now re-initialize the global variables to
-	 * prepare for the reverse transition.
-	 */
-	klp_transition_patch->enabled = !klp_transition_patch->enabled;
-	klp_target_state = !klp_target_state;
-
-	/*
-	 * Enforce the order of the klp_target_state write and the
-	 * TIF_PATCH_PENDING writes in klp_start_transition() to ensure
-	 * klp_update_patch_state() and __klp_sched_try_switch() don't set
-	 * task->patch_state to the wrong value.
-	 */
-	smp_wmb();
-
 	klp_start_transition();
 }
 
@@ -703,9 +626,9 @@ void klp_copy_process(struct task_struct *child)
 	 * the task flag up to date with the parent here.
 	 *
 	 * The operation is serialized against all klp_*_transition()
-	 * operations by the tasklist_lock. The only exceptions are
-	 * klp_update_patch_state(current) and __klp_sched_try_switch(), but we
-	 * cannot race with them because we are current.
+	 * operations by the tasklist_lock. The only exception is
+	 * klp_update_patch_state(current), but we cannot race with
+	 * that because we are current.
 	 */
 	if (test_tsk_thread_flag(current, TIF_PATCH_PENDING))
 		set_tsk_thread_flag(child, TIF_PATCH_PENDING);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0e8764d63041..b43fda3c5733 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8597,7 +8597,6 @@ EXPORT_STATIC_CALL_TRAMP(might_resched);
 static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
 int __sched dynamic_cond_resched(void)
 {
-	klp_sched_try_switch();
 	if (!static_branch_unlikely(&sk_dynamic_cond_resched))
 		return 0;
 	return __cond_resched();
@@ -8746,17 +8745,13 @@ int sched_dynamic_mode(const char *str)
 #error "Unsupported PREEMPT_DYNAMIC mechanism"
 #endif
 
-DEFINE_MUTEX(sched_dynamic_mutex);
-static bool klp_override;
-
-static void __sched_dynamic_update(int mode)
+void sched_dynamic_update(int mode)
 {
 	/*
 	 * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
 	 * the ZERO state, which is invalid.
 	 */
-	if (!klp_override)
-		preempt_dynamic_enable(cond_resched);
+	preempt_dynamic_enable(cond_resched);
 	preempt_dynamic_enable(might_resched);
 	preempt_dynamic_enable(preempt_schedule);
 	preempt_dynamic_enable(preempt_schedule_notrace);
@@ -8764,79 +8759,36 @@ static void __sched_dynamic_update(int mode)
 
 	switch (mode) {
 	case preempt_dynamic_none:
-		if (!klp_override)
-			preempt_dynamic_enable(cond_resched);
+		preempt_dynamic_enable(cond_resched);
 		preempt_dynamic_disable(might_resched);
 		preempt_dynamic_disable(preempt_schedule);
 		preempt_dynamic_disable(preempt_schedule_notrace);
 		preempt_dynamic_disable(irqentry_exit_cond_resched);
-		if (mode != preempt_dynamic_mode)
-			pr_info("Dynamic Preempt: none\n");
+		pr_info("Dynamic Preempt: none\n");
 		break;
 
 	case preempt_dynamic_voluntary:
-		if (!klp_override)
-			preempt_dynamic_enable(cond_resched);
+		preempt_dynamic_enable(cond_resched);
 		preempt_dynamic_enable(might_resched);
 		preempt_dynamic_disable(preempt_schedule);
 		preempt_dynamic_disable(preempt_schedule_notrace);
 		preempt_dynamic_disable(irqentry_exit_cond_resched);
-		if (mode != preempt_dynamic_mode)
-			pr_info("Dynamic Preempt: voluntary\n");
+		pr_info("Dynamic Preempt: voluntary\n");
 		break;
 
 	case preempt_dynamic_full:
-		if (!klp_override)
-			preempt_dynamic_disable(cond_resched);
+		preempt_dynamic_disable(cond_resched);
 		preempt_dynamic_disable(might_resched);
 		preempt_dynamic_enable(preempt_schedule);
 		preempt_dynamic_enable(preempt_schedule_notrace);
 		preempt_dynamic_enable(irqentry_exit_cond_resched);
-		if (mode != preempt_dynamic_mode)
-			pr_info("Dynamic Preempt: full\n");
+		pr_info("Dynamic Preempt: full\n");
 		break;
 	}
 
 	preempt_dynamic_mode = mode;
 }
 
-void sched_dynamic_update(int mode)
-{
-	mutex_lock(&sched_dynamic_mutex);
-	__sched_dynamic_update(mode);
-	mutex_unlock(&sched_dynamic_mutex);
-}
-
-#ifdef CONFIG_HAVE_PREEMPT_DYNAMIC_CALL
-
-static int klp_cond_resched(void)
-{
-	__klp_sched_try_switch();
-	return __cond_resched();
-}
-
-void sched_dynamic_klp_enable(void)
-{
-	mutex_lock(&sched_dynamic_mutex);
-
-	klp_override = true;
-	static_call_update(cond_resched, klp_cond_resched);
-
-	mutex_unlock(&sched_dynamic_mutex);
-}
-
-void sched_dynamic_klp_disable(void)
-{
-	mutex_lock(&sched_dynamic_mutex);
-
-	klp_override = false;
-	__sched_dynamic_update(preempt_dynamic_mode);
-
-	mutex_unlock(&sched_dynamic_mutex);
-}
-
-#endif /* CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
-
 static int __init setup_preempt_mode(char *str)
 {
 	int mode = sched_dynamic_mode(str);
-- 
2.31.1
Re: [RFC PATCH 07/86] Revert "livepatch,sched: Add livepatch task switching to cond_resched()"
Posted by Steven Rostedt 2 years, 1 month ago
On Tue,  7 Nov 2023 13:56:53 -0800
Ankur Arora <ankur.a.arora@oracle.com> wrote:

> This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.
> 
> Note that removing this commit reintroduces "live patches failing to
> complete within a reasonable amount of time due to CPU-bound kthreads."
> 
> Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
> existence of cond_resched() so this will need an alternate fix.
> 

Then it would probably be a good idea to Cc the live patching maintainers!

-- Steve

> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
>  include/linux/livepatch.h       |   1 -
>  include/linux/livepatch_sched.h |  29 ---------
>  include/linux/sched.h           |  20 ++----
>  kernel/livepatch/core.c         |   1 -
>  kernel/livepatch/transition.c   | 107 +++++---------------------------
>  kernel/sched/core.c             |  64 +++----------------
>  6 files changed, 28 insertions(+), 194 deletions(-)
>  delete mode 100644 include/linux/livepatch_sched.h
> 
> diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
> index 9b9b38e89563..293e29960c6e 100644
> --- a/include/linux/livepatch.h
> +++ b/include/linux/livepatch.h
> @@ -13,7 +13,6 @@
>  #include <linux/ftrace.h>
>  #include <linux/completion.h>
>  #include <linux/list.h>
> -#include <linux/livepatch_sched.h>
>  
>  #if IS_ENABLED(CONFIG_LIVEPATCH)
>  
> diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h
> deleted file mode 100644
> index 013794fb5da0..000000000000
> --- a/include/linux/livepatch_sched.h
> +++ /dev/null
> @@ -1,29 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0-or-later */
> -#ifndef _LINUX_LIVEPATCH_SCHED_H_
> -#define _LINUX_LIVEPATCH_SCHED_H_
> -
> -#include <linux/jump_label.h>
> -#include <linux/static_call_types.h>
> -
> -#ifdef CONFIG_LIVEPATCH
> -
> -void __klp_sched_try_switch(void);
> -
> -#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
> -
> -DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
> -
> -static __always_inline void klp_sched_try_switch(void)
> -{
> -	if (static_branch_unlikely(&klp_sched_try_switch_key))
> -		__klp_sched_try_switch();
> -}
> -
> -#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
> -
> -#else /* !CONFIG_LIVEPATCH */
> -static inline void klp_sched_try_switch(void) {}
> -static inline void __klp_sched_try_switch(void) {}
> -#endif /* CONFIG_LIVEPATCH */
> -
> -#endif /* _LINUX_LIVEPATCH_SCHED_H_ */
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 5bdf80136e42..c5b0ef1ecfe4 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -36,7 +36,6 @@
>  #include <linux/seqlock.h>
>  #include <linux/kcsan.h>
>  #include <linux/rv.h>
> -#include <linux/livepatch_sched.h>
>  #include <asm/kmap_size.h>
>  
>  /* task_struct member predeclarations (sorted alphabetically): */
> @@ -2087,9 +2086,6 @@ extern int __cond_resched(void);
>  
>  #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
>  
> -void sched_dynamic_klp_enable(void);
> -void sched_dynamic_klp_disable(void);
> -
>  DECLARE_STATIC_CALL(cond_resched, __cond_resched);
>  
>  static __always_inline int _cond_resched(void)
> @@ -2098,7 +2094,6 @@ static __always_inline int _cond_resched(void)
>  }
>  
>  #elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
> -
>  extern int dynamic_cond_resched(void);
>  
>  static __always_inline int _cond_resched(void)
> @@ -2106,25 +2101,20 @@ static __always_inline int _cond_resched(void)
>  	return dynamic_cond_resched();
>  }
>  
> -#else /* !CONFIG_PREEMPTION */
> +#else
>  
>  static inline int _cond_resched(void)
>  {
> -	klp_sched_try_switch();
>  	return __cond_resched();
>  }
>  
> -#endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
> +#endif /* CONFIG_PREEMPT_DYNAMIC */
>  
> -#else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */
> +#else
>  
> -static inline int _cond_resched(void)
> -{
> -	klp_sched_try_switch();
> -	return 0;
> -}
> +static inline int _cond_resched(void) { return 0; }
>  
> -#endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */
> +#endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
>  
>  #define cond_resched() ({			\
>  	__might_resched(__FILE__, __LINE__, 0);	\
> diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
> index 61328328c474..fc851455740c 100644
> --- a/kernel/livepatch/core.c
> +++ b/kernel/livepatch/core.c
> @@ -33,7 +33,6 @@
>   *
>   * - klp_ftrace_handler()
>   * - klp_update_patch_state()
> - * - __klp_sched_try_switch()
>   */
>  DEFINE_MUTEX(klp_mutex);
>  
> diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
> index e54c3d60a904..70bc38f27af7 100644
> --- a/kernel/livepatch/transition.c
> +++ b/kernel/livepatch/transition.c
> @@ -9,7 +9,6 @@
>  
>  #include <linux/cpu.h>
>  #include <linux/stacktrace.h>
> -#include <linux/static_call.h>
>  #include "core.h"
>  #include "patch.h"
>  #include "transition.h"
> @@ -27,25 +26,6 @@ static int klp_target_state = KLP_UNDEFINED;
>  
>  static unsigned int klp_signals_cnt;
>  
> -/*
> - * When a livepatch is in progress, enable klp stack checking in
> - * cond_resched().  This helps CPU-bound kthreads get patched.
> - */
> -#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
> -
> -#define klp_cond_resched_enable() sched_dynamic_klp_enable()
> -#define klp_cond_resched_disable() sched_dynamic_klp_disable()
> -
> -#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
> -
> -DEFINE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
> -EXPORT_SYMBOL(klp_sched_try_switch_key);
> -
> -#define klp_cond_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
> -#define klp_cond_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
> -
> -#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
> -
>  /*
>   * This work can be performed periodically to finish patching or unpatching any
>   * "straggler" tasks which failed to transition in the first attempt.
> @@ -194,8 +174,8 @@ void klp_update_patch_state(struct task_struct *task)
>  	 * barrier (smp_rmb) for two cases:
>  	 *
>  	 * 1) Enforce the order of the TIF_PATCH_PENDING read and the
> -	 *    klp_target_state read.  The corresponding write barriers are in
> -	 *    klp_init_transition() and klp_reverse_transition().
> +	 *    klp_target_state read.  The corresponding write barrier is in
> +	 *    klp_init_transition().
>  	 *
>  	 * 2) Enforce the order of the TIF_PATCH_PENDING read and a future read
>  	 *    of func->transition, if klp_ftrace_handler() is called later on
> @@ -363,44 +343,6 @@ static bool klp_try_switch_task(struct task_struct *task)
>  	return !ret;
>  }
>  
> -void __klp_sched_try_switch(void)
> -{
> -	if (likely(!klp_patch_pending(current)))
> -		return;
> -
> -	/*
> -	 * This function is called from cond_resched() which is called in many
> -	 * places throughout the kernel.  Using the klp_mutex here might
> -	 * deadlock.
> -	 *
> -	 * Instead, disable preemption to prevent racing with other callers of
> -	 * klp_try_switch_task().  Thanks to task_call_func() they won't be
> -	 * able to switch this task while it's running.
> -	 */
> -	preempt_disable();
> -
> -	/*
> -	 * Make sure current didn't get patched between the above check and
> -	 * preempt_disable().
> -	 */
> -	if (unlikely(!klp_patch_pending(current)))
> -		goto out;
> -
> -	/*
> -	 * Enforce the order of the TIF_PATCH_PENDING read above and the
> -	 * klp_target_state read in klp_try_switch_task().  The corresponding
> -	 * write barriers are in klp_init_transition() and
> -	 * klp_reverse_transition().
> -	 */
> -	smp_rmb();
> -
> -	klp_try_switch_task(current);
> -
> -out:
> -	preempt_enable();
> -}
> -EXPORT_SYMBOL(__klp_sched_try_switch);
> -
>  /*
>   * Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
>   * Kthreads with TIF_PATCH_PENDING set are woken up.
> @@ -507,8 +449,7 @@ void klp_try_complete_transition(void)
>  		return;
>  	}
>  
> -	/* Done!  Now cleanup the data structures. */
> -	klp_cond_resched_disable();
> +	/* we're done, now cleanup the data structures */
>  	patch = klp_transition_patch;
>  	klp_complete_transition();
>  
> @@ -560,8 +501,6 @@ void klp_start_transition(void)
>  			set_tsk_thread_flag(task, TIF_PATCH_PENDING);
>  	}
>  
> -	klp_cond_resched_enable();
> -
>  	klp_signals_cnt = 0;
>  }
>  
> @@ -617,9 +556,8 @@ void klp_init_transition(struct klp_patch *patch, int state)
>  	 * see a func in transition with a task->patch_state of KLP_UNDEFINED.
>  	 *
>  	 * Also enforce the order of the klp_target_state write and future
> -	 * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() and
> -	 * __klp_sched_try_switch() don't set a task->patch_state to
> -	 * KLP_UNDEFINED.
> +	 * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() doesn't
> +	 * set a task->patch_state to KLP_UNDEFINED.
>  	 */
>  	smp_wmb();
>  
> @@ -655,10 +593,14 @@ void klp_reverse_transition(void)
>  		 klp_target_state == KLP_PATCHED ? "patching to unpatching" :
>  						   "unpatching to patching");
>  
> +	klp_transition_patch->enabled = !klp_transition_patch->enabled;
> +
> +	klp_target_state = !klp_target_state;
> +
>  	/*
>  	 * Clear all TIF_PATCH_PENDING flags to prevent races caused by
> -	 * klp_update_patch_state() or __klp_sched_try_switch() running in
> -	 * parallel with the reverse transition.
> +	 * klp_update_patch_state() running in parallel with
> +	 * klp_start_transition().
>  	 */
>  	read_lock(&tasklist_lock);
>  	for_each_process_thread(g, task)
> @@ -668,28 +610,9 @@ void klp_reverse_transition(void)
>  	for_each_possible_cpu(cpu)
>  		clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING);
>  
> -	/*
> -	 * Make sure all existing invocations of klp_update_patch_state() and
> -	 * __klp_sched_try_switch() see the cleared TIF_PATCH_PENDING before
> -	 * starting the reverse transition.
> -	 */
> +	/* Let any remaining calls to klp_update_patch_state() complete */
>  	klp_synchronize_transition();
>  
> -	/*
> -	 * All patching has stopped, now re-initialize the global variables to
> -	 * prepare for the reverse transition.
> -	 */
> -	klp_transition_patch->enabled = !klp_transition_patch->enabled;
> -	klp_target_state = !klp_target_state;
> -
> -	/*
> -	 * Enforce the order of the klp_target_state write and the
> -	 * TIF_PATCH_PENDING writes in klp_start_transition() to ensure
> -	 * klp_update_patch_state() and __klp_sched_try_switch() don't set
> -	 * task->patch_state to the wrong value.
> -	 */
> -	smp_wmb();
> -
>  	klp_start_transition();
>  }
>  
> @@ -703,9 +626,9 @@ void klp_copy_process(struct task_struct *child)
>  	 * the task flag up to date with the parent here.
>  	 *
>  	 * The operation is serialized against all klp_*_transition()
> -	 * operations by the tasklist_lock. The only exceptions are
> -	 * klp_update_patch_state(current) and __klp_sched_try_switch(), but we
> -	 * cannot race with them because we are current.
> +	 * operations by the tasklist_lock. The only exception is
> +	 * klp_update_patch_state(current), but we cannot race with
> +	 * that because we are current.
>  	 */
>  	if (test_tsk_thread_flag(current, TIF_PATCH_PENDING))
>  		set_tsk_thread_flag(child, TIF_PATCH_PENDING);
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 0e8764d63041..b43fda3c5733 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -8597,7 +8597,6 @@ EXPORT_STATIC_CALL_TRAMP(might_resched);
>  static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
>  int __sched dynamic_cond_resched(void)
>  {
> -	klp_sched_try_switch();
>  	if (!static_branch_unlikely(&sk_dynamic_cond_resched))
>  		return 0;
>  	return __cond_resched();
> @@ -8746,17 +8745,13 @@ int sched_dynamic_mode(const char *str)
>  #error "Unsupported PREEMPT_DYNAMIC mechanism"
>  #endif
>  
> -DEFINE_MUTEX(sched_dynamic_mutex);
> -static bool klp_override;
> -
> -static void __sched_dynamic_update(int mode)
> +void sched_dynamic_update(int mode)
>  {
>  	/*
>  	 * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
>  	 * the ZERO state, which is invalid.
>  	 */
> -	if (!klp_override)
> -		preempt_dynamic_enable(cond_resched);
> +	preempt_dynamic_enable(cond_resched);
>  	preempt_dynamic_enable(might_resched);
>  	preempt_dynamic_enable(preempt_schedule);
>  	preempt_dynamic_enable(preempt_schedule_notrace);
> @@ -8764,79 +8759,36 @@ static void __sched_dynamic_update(int mode)
>  
>  	switch (mode) {
>  	case preempt_dynamic_none:
> -		if (!klp_override)
> -			preempt_dynamic_enable(cond_resched);
> +		preempt_dynamic_enable(cond_resched);
>  		preempt_dynamic_disable(might_resched);
>  		preempt_dynamic_disable(preempt_schedule);
>  		preempt_dynamic_disable(preempt_schedule_notrace);
>  		preempt_dynamic_disable(irqentry_exit_cond_resched);
> -		if (mode != preempt_dynamic_mode)
> -			pr_info("Dynamic Preempt: none\n");
> +		pr_info("Dynamic Preempt: none\n");
>  		break;
>  
>  	case preempt_dynamic_voluntary:
> -		if (!klp_override)
> -			preempt_dynamic_enable(cond_resched);
> +		preempt_dynamic_enable(cond_resched);
>  		preempt_dynamic_enable(might_resched);
>  		preempt_dynamic_disable(preempt_schedule);
>  		preempt_dynamic_disable(preempt_schedule_notrace);
>  		preempt_dynamic_disable(irqentry_exit_cond_resched);
> -		if (mode != preempt_dynamic_mode)
> -			pr_info("Dynamic Preempt: voluntary\n");
> +		pr_info("Dynamic Preempt: voluntary\n");
>  		break;
>  
>  	case preempt_dynamic_full:
> -		if (!klp_override)
> -			preempt_dynamic_disable(cond_resched);
> +		preempt_dynamic_disable(cond_resched);
>  		preempt_dynamic_disable(might_resched);
>  		preempt_dynamic_enable(preempt_schedule);
>  		preempt_dynamic_enable(preempt_schedule_notrace);
>  		preempt_dynamic_enable(irqentry_exit_cond_resched);
> -		if (mode != preempt_dynamic_mode)
> -			pr_info("Dynamic Preempt: full\n");
> +		pr_info("Dynamic Preempt: full\n");
>  		break;
>  	}
>  
>  	preempt_dynamic_mode = mode;
>  }
>  
> -void sched_dynamic_update(int mode)
> -{
> -	mutex_lock(&sched_dynamic_mutex);
> -	__sched_dynamic_update(mode);
> -	mutex_unlock(&sched_dynamic_mutex);
> -}
> -
> -#ifdef CONFIG_HAVE_PREEMPT_DYNAMIC_CALL
> -
> -static int klp_cond_resched(void)
> -{
> -	__klp_sched_try_switch();
> -	return __cond_resched();
> -}
> -
> -void sched_dynamic_klp_enable(void)
> -{
> -	mutex_lock(&sched_dynamic_mutex);
> -
> -	klp_override = true;
> -	static_call_update(cond_resched, klp_cond_resched);
> -
> -	mutex_unlock(&sched_dynamic_mutex);
> -}
> -
> -void sched_dynamic_klp_disable(void)
> -{
> -	mutex_lock(&sched_dynamic_mutex);
> -
> -	klp_override = false;
> -	__sched_dynamic_update(preempt_dynamic_mode);
> -
> -	mutex_unlock(&sched_dynamic_mutex);
> -}
> -
> -#endif /* CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
> -
>  static int __init setup_preempt_mode(char *str)
>  {
>  	int mode = sched_dynamic_mode(str);
Re: [RFC PATCH 07/86] Revert "livepatch,sched: Add livepatch task switching to cond_resched()"
Posted by Josh Poimboeuf 2 years, 1 month ago
On Tue, Nov 07, 2023 at 06:16:09PM -0500, Steven Rostedt wrote:
> On Tue,  7 Nov 2023 13:56:53 -0800
> Ankur Arora <ankur.a.arora@oracle.com> wrote:
> 
> > This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.
> > 
> > Note that removing this commit reintroduces "live patches failing to
> > complete within a reasonable amount of time due to CPU-bound kthreads."
> > 
> > Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
> > existence of cond_resched() so this will need an alternate fix.

We definitely don't want to introduce a regression, something will need
to be figured out before removing cond_resched().

We could hook into preempt_schedule_irq(), but that wouldn't work for
non-ORC.

Another option would be to hook into schedule().  Then livepatch could
set TIF_NEED_RESCHED on remaining unpatched tasks.  But again if they go
through the preemption path then we have the same problem for non-ORC.

Worst case we'll need to sprinkle cond_livepatch() everywhere :-/

-- 
Josh
Re: [RFC PATCH 07/86] Revert "livepatch,sched: Add livepatch task switching to cond_resched()"
Posted by Steven Rostedt 2 years, 1 month ago
On Thu, 9 Nov 2023 09:26:37 -0800
Josh Poimboeuf <jpoimboe@kernel.org> wrote:

> On Tue, Nov 07, 2023 at 06:16:09PM -0500, Steven Rostedt wrote:
> > On Tue,  7 Nov 2023 13:56:53 -0800
> > Ankur Arora <ankur.a.arora@oracle.com> wrote:
> >   
> > > This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.
> > > 
> > > Note that removing this commit reintroduces "live patches failing to
> > > complete within a reasonable amount of time due to CPU-bound kthreads."
> > > 
> > > Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
> > > existence of cond_resched() so this will need an alternate fix.  
> 
> We definitely don't want to introduce a regression, something will need
> to be figured out before removing cond_resched().
> 
> We could hook into preempt_schedule_irq(), but that wouldn't work for
> non-ORC.
> 
> Another option would be to hook into schedule().  Then livepatch could
> set TIF_NEED_RESCHED on remaining unpatched tasks.  But again if they go
> through the preemption path then we have the same problem for non-ORC.
> 
> Worst case we'll need to sprinkle cond_livepatch() everywhere :-/
> 

I guess I'm not fully understanding what the cond rescheds are for. But
would an IPI to all CPUs setting NEED_RESCHED, fix it?

-- Steve
Re: [RFC PATCH 07/86] Revert "livepatch,sched: Add livepatch task switching to cond_resched()"
Posted by Josh Poimboeuf 2 years, 1 month ago
On Thu, Nov 09, 2023 at 12:31:47PM -0500, Steven Rostedt wrote:
> On Thu, 9 Nov 2023 09:26:37 -0800
> Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> 
> > On Tue, Nov 07, 2023 at 06:16:09PM -0500, Steven Rostedt wrote:
> > > On Tue,  7 Nov 2023 13:56:53 -0800
> > > Ankur Arora <ankur.a.arora@oracle.com> wrote:
> > >   
> > > > This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.
> > > > 
> > > > Note that removing this commit reintroduces "live patches failing to
> > > > complete within a reasonable amount of time due to CPU-bound kthreads."
> > > > 
> > > > Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
> > > > existence of cond_resched() so this will need an alternate fix.  
> > 
> > We definitely don't want to introduce a regression, something will need
> > to be figured out before removing cond_resched().
> > 
> > We could hook into preempt_schedule_irq(), but that wouldn't work for
> > non-ORC.
> > 
> > Another option would be to hook into schedule().  Then livepatch could
> > set TIF_NEED_RESCHED on remaining unpatched tasks.  But again if they go
> > through the preemption path then we have the same problem for non-ORC.
> > 
> > Worst case we'll need to sprinkle cond_livepatch() everywhere :-/
> > 
> 
> I guess I'm not fully understanding what the cond rescheds are for. But
> would an IPI to all CPUs setting NEED_RESCHED, fix it?

If all livepatch arches had the ORC unwinder, yes.

The problem is that frame pointer (and similar) unwinders can't reliably
unwind past an interrupt frame.

-- 
Josh
Re: [RFC PATCH 07/86] Revert "livepatch,sched: Add livepatch task switching to cond_resched()"
Posted by Steven Rostedt 2 years, 1 month ago
On Thu, 9 Nov 2023 09:51:18 -0800
Josh Poimboeuf <jpoimboe@kernel.org> wrote:

> > I guess I'm not fully understanding what the cond rescheds are for. But
> > would an IPI to all CPUs setting NEED_RESCHED, fix it?  
> 
> If all livepatch arches had the ORC unwinder, yes.
> 
> The problem is that frame pointer (and similar) unwinders can't reliably
> unwind past an interrupt frame.

Perhaps we can use this to push those archs with bad unwinders to port over
ORC unwinding ;-)

-- Steve
Re: [RFC PATCH 07/86] Revert "livepatch,sched: Add livepatch task switching to cond_resched()"
Posted by Ankur Arora 2 years, 1 month ago
Josh Poimboeuf <jpoimboe@kernel.org> writes:

> On Thu, Nov 09, 2023 at 12:31:47PM -0500, Steven Rostedt wrote:
>> On Thu, 9 Nov 2023 09:26:37 -0800
>> Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>>
>> > On Tue, Nov 07, 2023 at 06:16:09PM -0500, Steven Rostedt wrote:
>> > > On Tue,  7 Nov 2023 13:56:53 -0800
>> > > Ankur Arora <ankur.a.arora@oracle.com> wrote:
>> > >
>> > > > This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.
>> > > >
>> > > > Note that removing this commit reintroduces "live patches failing to
>> > > > complete within a reasonable amount of time due to CPU-bound kthreads."
>> > > >
>> > > > Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
>> > > > existence of cond_resched() so this will need an alternate fix.
>> >
>> > We definitely don't want to introduce a regression, something will need
>> > to be figured out before removing cond_resched().
>> >
>> > We could hook into preempt_schedule_irq(), but that wouldn't work for
>> > non-ORC.
>> >
>> > Another option would be to hook into schedule().  Then livepatch could
>> > set TIF_NEED_RESCHED on remaining unpatched tasks.  But again if they go
>> > through the preemption path then we have the same problem for non-ORC.
>> >
>> > Worst case we'll need to sprinkle cond_livepatch() everywhere :-/
>> >
>>
>> I guess I'm not fully understanding what the cond rescheds are for. But
>> would an IPI to all CPUs setting NEED_RESCHED, fix it?

Yeah. We could just temporarily toggle to full preemption, when
NEED_RESCHED_LAZY is always upgraded to NEED_RESCHED which will
then send IPIs.

> If all livepatch arches had the ORC unwinder, yes.
>
> The problem is that frame pointer (and similar) unwinders can't reliably
> unwind past an interrupt frame.

Ah, I wonder if we could just disable the preempt_schedule_irq() path
temporarily? Hooking into schedule() alongside something like this:

@@ -379,7 +379,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)

 void irqentry_exit_cond_resched(void)
 {
-       if (!preempt_count()) {
+       if (klp_cond_resched_disable() && !preempt_count()) {

The problem would be tasks that don't go through any preemptible
sections.

--
ankur
Re: [RFC PATCH 07/86] Revert "livepatch,sched: Add livepatch task switching to cond_resched()"
Posted by Josh Poimboeuf 2 years, 1 month ago
On Thu, Nov 09, 2023 at 02:50:48PM -0800, Ankur Arora wrote:
> >> I guess I'm not fully understanding what the cond rescheds are for. But
> >> would an IPI to all CPUs setting NEED_RESCHED, fix it?
> 
> Yeah. We could just temporarily toggle to full preemption, when
> NEED_RESCHED_LAZY is always upgraded to NEED_RESCHED which will
> then send IPIs.
> 
> > If all livepatch arches had the ORC unwinder, yes.
> >
> > The problem is that frame pointer (and similar) unwinders can't reliably
> > unwind past an interrupt frame.
> 
> Ah, I wonder if we could just disable the preempt_schedule_irq() path
> temporarily? Hooking into schedule() alongside something like this:
> 
> @@ -379,7 +379,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
> 
>  void irqentry_exit_cond_resched(void)
>  {
> -       if (!preempt_count()) {
> +       if (klp_cond_resched_disable() && !preempt_count()) {
> 
> The problem would be tasks that don't go through any preemptible
> sections.

Let me back up a bit and explain what klp is trying to do.

When a livepatch is applied, klp needs to unwind all the tasks,
preferably within a reasonable amount of time.

We can't unwind task A from task B while task A is running, since task A
could be changing the stack during the unwind.  So task A needs to be
blocked or asleep.  The only exception to that is if the unwind happens
in the context of task A itself.

The problem we were seeing was CPU-bound kthreads (e.g., vhost_worker)
not getting patched within a reasonable amount of time.  We fixed it by
hooking the klp unwind into cond_resched() so it can unwind from the
task itself.

It only worked because we had a non-preempted hook (because non-ORC
unwinders can't unwind reliably through preemption) which called klp to
unwind from the context of the task.

Without something to hook into, we have a problem.  We could of course
hook into schedule(), but if the kthread never calls schedule() from a
non-preempted context then it still doesn't help.

-- 
Josh
Re: [RFC PATCH 07/86] Revert "livepatch,sched: Add livepatch task switching to cond_resched()"
Posted by Ankur Arora 2 years, 1 month ago
Josh Poimboeuf <jpoimboe@kernel.org> writes:

> On Thu, Nov 09, 2023 at 02:50:48PM -0800, Ankur Arora wrote:
>> >> I guess I'm not fully understanding what the cond rescheds are for. But
>> >> would an IPI to all CPUs setting NEED_RESCHED, fix it?
>>
>> Yeah. We could just temporarily toggle to full preemption, when
>> NEED_RESCHED_LAZY is always upgraded to NEED_RESCHED which will
>> then send IPIs.
>>
>> > If all livepatch arches had the ORC unwinder, yes.
>> >
>> > The problem is that frame pointer (and similar) unwinders can't reliably
>> > unwind past an interrupt frame.
>>
>> Ah, I wonder if we could just disable the preempt_schedule_irq() path
>> temporarily? Hooking into schedule() alongside something like this:
>>
>> @@ -379,7 +379,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
>>
>>  void irqentry_exit_cond_resched(void)
>>  {
>> -       if (!preempt_count()) {
>> +       if (klp_cond_resched_disable() && !preempt_count()) {
>>
>> The problem would be tasks that don't go through any preemptible
>> sections.
>
> Let me back up a bit and explain what klp is trying to do.
>
> When a livepatch is applied, klp needs to unwind all the tasks,
> preferably within a reasonable amount of time.
>
> We can't unwind task A from task B while task A is running, since task A
> could be changing the stack during the unwind.  So task A needs to be
> blocked or asleep.  The only exception to that is if the unwind happens
> in the context of task A itself.

> The problem we were seeing was CPU-bound kthreads (e.g., vhost_worker)
> not getting patched within a reasonable amount of time.  We fixed it by
> hooking the klp unwind into cond_resched() so it can unwind from the
> task itself.

Right, so the task calls schedule() itself via cond_resched() and that
works. If the task schedules out by calling preempt_enable() that
presumably works as well.

So, that leaves two paths where we can't unwind:

 1. a task never entering or leaving preemptible sections
 2. or, a task which gets preempted in irqentry_exit_cond_resched()
   This we could disable temporarily.

> It only worked because we had a non-preempted hook (because non-ORC
> unwinders can't unwind reliably through preemption) which called klp to
> unwind from the context of the task.
>
> Without something to hook into, we have a problem.  We could of course
> hook into schedule(), but if the kthread never calls schedule() from a
> non-preempted context then it still doesn't help.

Yeah agreed. The first one is a problem. And, that's a problem with the
removal of cond_resched() generally. Because the way to fix case 1 was
typically to add a cond_resched() when softlockups were seen or in
code review.

--
ankur
Re: [RFC PATCH 07/86] Revert "livepatch,sched: Add livepatch task switching to cond_resched()"
Posted by Ankur Arora 2 years, 1 month ago
Steven Rostedt <rostedt@goodmis.org> writes:

> On Tue,  7 Nov 2023 13:56:53 -0800
> Ankur Arora <ankur.a.arora@oracle.com> wrote:
>
>> This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.
>>
>> Note that removing this commit reintroduces "live patches failing to
>> complete within a reasonable amount of time due to CPU-bound kthreads."
>>
>> Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
>> existence of cond_resched() so this will need an alternate fix.
>>
>
> Then it would probably be a good idea to Cc the live patching maintainers!

Indeed. Could have sworn that I had. But clearly not.

Apologies and thanks for adding them.

>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> ---
>>  include/linux/livepatch.h       |   1 -
>>  include/linux/livepatch_sched.h |  29 ---------
>>  include/linux/sched.h           |  20 ++----
>>  kernel/livepatch/core.c         |   1 -
>>  kernel/livepatch/transition.c   | 107 +++++---------------------------
>>  kernel/sched/core.c             |  64 +++----------------
>>  6 files changed, 28 insertions(+), 194 deletions(-)
>>  delete mode 100644 include/linux/livepatch_sched.h
>>
>> diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
>> index 9b9b38e89563..293e29960c6e 100644
>> --- a/include/linux/livepatch.h
>> +++ b/include/linux/livepatch.h
>> @@ -13,7 +13,6 @@
>>  #include <linux/ftrace.h>
>>  #include <linux/completion.h>
>>  #include <linux/list.h>
>> -#include <linux/livepatch_sched.h>
>>
>>  #if IS_ENABLED(CONFIG_LIVEPATCH)
>>
>> diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h
>> deleted file mode 100644
>> index 013794fb5da0..000000000000
>> --- a/include/linux/livepatch_sched.h
>> +++ /dev/null
>> @@ -1,29 +0,0 @@
>> -/* SPDX-License-Identifier: GPL-2.0-or-later */
>> -#ifndef _LINUX_LIVEPATCH_SCHED_H_
>> -#define _LINUX_LIVEPATCH_SCHED_H_
>> -
>> -#include <linux/jump_label.h>
>> -#include <linux/static_call_types.h>
>> -
>> -#ifdef CONFIG_LIVEPATCH
>> -
>> -void __klp_sched_try_switch(void);
>> -
>> -#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
>> -
>> -DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
>> -
>> -static __always_inline void klp_sched_try_switch(void)
>> -{
>> -	if (static_branch_unlikely(&klp_sched_try_switch_key))
>> -		__klp_sched_try_switch();
>> -}
>> -
>> -#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
>> -
>> -#else /* !CONFIG_LIVEPATCH */
>> -static inline void klp_sched_try_switch(void) {}
>> -static inline void __klp_sched_try_switch(void) {}
>> -#endif /* CONFIG_LIVEPATCH */
>> -
>> -#endif /* _LINUX_LIVEPATCH_SCHED_H_ */
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index 5bdf80136e42..c5b0ef1ecfe4 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -36,7 +36,6 @@
>>  #include <linux/seqlock.h>
>>  #include <linux/kcsan.h>
>>  #include <linux/rv.h>
>> -#include <linux/livepatch_sched.h>
>>  #include <asm/kmap_size.h>
>>
>>  /* task_struct member predeclarations (sorted alphabetically): */
>> @@ -2087,9 +2086,6 @@ extern int __cond_resched(void);
>>
>>  #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
>>
>> -void sched_dynamic_klp_enable(void);
>> -void sched_dynamic_klp_disable(void);
>> -
>>  DECLARE_STATIC_CALL(cond_resched, __cond_resched);
>>
>>  static __always_inline int _cond_resched(void)
>> @@ -2098,7 +2094,6 @@ static __always_inline int _cond_resched(void)
>>  }
>>
>>  #elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
>> -
>>  extern int dynamic_cond_resched(void);
>>
>>  static __always_inline int _cond_resched(void)
>> @@ -2106,25 +2101,20 @@ static __always_inline int _cond_resched(void)
>>  	return dynamic_cond_resched();
>>  }
>>
>> -#else /* !CONFIG_PREEMPTION */
>> +#else
>>
>>  static inline int _cond_resched(void)
>>  {
>> -	klp_sched_try_switch();
>>  	return __cond_resched();
>>  }
>>
>> -#endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
>> +#endif /* CONFIG_PREEMPT_DYNAMIC */
>>
>> -#else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */
>> +#else
>>
>> -static inline int _cond_resched(void)
>> -{
>> -	klp_sched_try_switch();
>> -	return 0;
>> -}
>> +static inline int _cond_resched(void) { return 0; }
>>
>> -#endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */
>> +#endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
>>
>>  #define cond_resched() ({			\
>>  	__might_resched(__FILE__, __LINE__, 0);	\
>> diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
>> index 61328328c474..fc851455740c 100644
>> --- a/kernel/livepatch/core.c
>> +++ b/kernel/livepatch/core.c
>> @@ -33,7 +33,6 @@
>>   *
>>   * - klp_ftrace_handler()
>>   * - klp_update_patch_state()
>> - * - __klp_sched_try_switch()
>>   */
>>  DEFINE_MUTEX(klp_mutex);
>>
>> diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
>> index e54c3d60a904..70bc38f27af7 100644
>> --- a/kernel/livepatch/transition.c
>> +++ b/kernel/livepatch/transition.c
>> @@ -9,7 +9,6 @@
>>
>>  #include <linux/cpu.h>
>>  #include <linux/stacktrace.h>
>> -#include <linux/static_call.h>
>>  #include "core.h"
>>  #include "patch.h"
>>  #include "transition.h"
>> @@ -27,25 +26,6 @@ static int klp_target_state = KLP_UNDEFINED;
>>
>>  static unsigned int klp_signals_cnt;
>>
>> -/*
>> - * When a livepatch is in progress, enable klp stack checking in
>> - * cond_resched().  This helps CPU-bound kthreads get patched.
>> - */
>> -#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
>> -
>> -#define klp_cond_resched_enable() sched_dynamic_klp_enable()
>> -#define klp_cond_resched_disable() sched_dynamic_klp_disable()
>> -
>> -#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
>> -
>> -DEFINE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
>> -EXPORT_SYMBOL(klp_sched_try_switch_key);
>> -
>> -#define klp_cond_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
>> -#define klp_cond_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
>> -
>> -#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
>> -
>>  /*
>>   * This work can be performed periodically to finish patching or unpatching any
>>   * "straggler" tasks which failed to transition in the first attempt.
>> @@ -194,8 +174,8 @@ void klp_update_patch_state(struct task_struct *task)
>>  	 * barrier (smp_rmb) for two cases:
>>  	 *
>>  	 * 1) Enforce the order of the TIF_PATCH_PENDING read and the
>> -	 *    klp_target_state read.  The corresponding write barriers are in
>> -	 *    klp_init_transition() and klp_reverse_transition().
>> +	 *    klp_target_state read.  The corresponding write barrier is in
>> +	 *    klp_init_transition().
>>  	 *
>>  	 * 2) Enforce the order of the TIF_PATCH_PENDING read and a future read
>>  	 *    of func->transition, if klp_ftrace_handler() is called later on
>> @@ -363,44 +343,6 @@ static bool klp_try_switch_task(struct task_struct *task)
>>  	return !ret;
>>  }
>>
>> -void __klp_sched_try_switch(void)
>> -{
>> -	if (likely(!klp_patch_pending(current)))
>> -		return;
>> -
>> -	/*
>> -	 * This function is called from cond_resched() which is called in many
>> -	 * places throughout the kernel.  Using the klp_mutex here might
>> -	 * deadlock.
>> -	 *
>> -	 * Instead, disable preemption to prevent racing with other callers of
>> -	 * klp_try_switch_task().  Thanks to task_call_func() they won't be
>> -	 * able to switch this task while it's running.
>> -	 */
>> -	preempt_disable();
>> -
>> -	/*
>> -	 * Make sure current didn't get patched between the above check and
>> -	 * preempt_disable().
>> -	 */
>> -	if (unlikely(!klp_patch_pending(current)))
>> -		goto out;
>> -
>> -	/*
>> -	 * Enforce the order of the TIF_PATCH_PENDING read above and the
>> -	 * klp_target_state read in klp_try_switch_task().  The corresponding
>> -	 * write barriers are in klp_init_transition() and
>> -	 * klp_reverse_transition().
>> -	 */
>> -	smp_rmb();
>> -
>> -	klp_try_switch_task(current);
>> -
>> -out:
>> -	preempt_enable();
>> -}
>> -EXPORT_SYMBOL(__klp_sched_try_switch);
>> -
>>  /*
>>   * Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
>>   * Kthreads with TIF_PATCH_PENDING set are woken up.
>> @@ -507,8 +449,7 @@ void klp_try_complete_transition(void)
>>  		return;
>>  	}
>>
>> -	/* Done!  Now cleanup the data structures. */
>> -	klp_cond_resched_disable();
>> +	/* we're done, now cleanup the data structures */
>>  	patch = klp_transition_patch;
>>  	klp_complete_transition();
>>
>> @@ -560,8 +501,6 @@ void klp_start_transition(void)
>>  			set_tsk_thread_flag(task, TIF_PATCH_PENDING);
>>  	}
>>
>> -	klp_cond_resched_enable();
>> -
>>  	klp_signals_cnt = 0;
>>  }
>>
>> @@ -617,9 +556,8 @@ void klp_init_transition(struct klp_patch *patch, int state)
>>  	 * see a func in transition with a task->patch_state of KLP_UNDEFINED.
>>  	 *
>>  	 * Also enforce the order of the klp_target_state write and future
>> -	 * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() and
>> -	 * __klp_sched_try_switch() don't set a task->patch_state to
>> -	 * KLP_UNDEFINED.
>> +	 * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() doesn't
>> +	 * set a task->patch_state to KLP_UNDEFINED.
>>  	 */
>>  	smp_wmb();
>>
>> @@ -655,10 +593,14 @@ void klp_reverse_transition(void)
>>  		 klp_target_state == KLP_PATCHED ? "patching to unpatching" :
>>  						   "unpatching to patching");
>>
>> +	klp_transition_patch->enabled = !klp_transition_patch->enabled;
>> +
>> +	klp_target_state = !klp_target_state;
>> +
>>  	/*
>>  	 * Clear all TIF_PATCH_PENDING flags to prevent races caused by
>> -	 * klp_update_patch_state() or __klp_sched_try_switch() running in
>> -	 * parallel with the reverse transition.
>> +	 * klp_update_patch_state() running in parallel with
>> +	 * klp_start_transition().
>>  	 */
>>  	read_lock(&tasklist_lock);
>>  	for_each_process_thread(g, task)
>> @@ -668,28 +610,9 @@ void klp_reverse_transition(void)
>>  	for_each_possible_cpu(cpu)
>>  		clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING);
>>
>> -	/*
>> -	 * Make sure all existing invocations of klp_update_patch_state() and
>> -	 * __klp_sched_try_switch() see the cleared TIF_PATCH_PENDING before
>> -	 * starting the reverse transition.
>> -	 */
>> +	/* Let any remaining calls to klp_update_patch_state() complete */
>>  	klp_synchronize_transition();
>>
>> -	/*
>> -	 * All patching has stopped, now re-initialize the global variables to
>> -	 * prepare for the reverse transition.
>> -	 */
>> -	klp_transition_patch->enabled = !klp_transition_patch->enabled;
>> -	klp_target_state = !klp_target_state;
>> -
>> -	/*
>> -	 * Enforce the order of the klp_target_state write and the
>> -	 * TIF_PATCH_PENDING writes in klp_start_transition() to ensure
>> -	 * klp_update_patch_state() and __klp_sched_try_switch() don't set
>> -	 * task->patch_state to the wrong value.
>> -	 */
>> -	smp_wmb();
>> -
>>  	klp_start_transition();
>>  }
>>
>> @@ -703,9 +626,9 @@ void klp_copy_process(struct task_struct *child)
>>  	 * the task flag up to date with the parent here.
>>  	 *
>>  	 * The operation is serialized against all klp_*_transition()
>> -	 * operations by the tasklist_lock. The only exceptions are
>> -	 * klp_update_patch_state(current) and __klp_sched_try_switch(), but we
>> -	 * cannot race with them because we are current.
>> +	 * operations by the tasklist_lock. The only exception is
>> +	 * klp_update_patch_state(current), but we cannot race with
>> +	 * that because we are current.
>>  	 */
>>  	if (test_tsk_thread_flag(current, TIF_PATCH_PENDING))
>>  		set_tsk_thread_flag(child, TIF_PATCH_PENDING);
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index 0e8764d63041..b43fda3c5733 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -8597,7 +8597,6 @@ EXPORT_STATIC_CALL_TRAMP(might_resched);
>>  static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
>>  int __sched dynamic_cond_resched(void)
>>  {
>> -	klp_sched_try_switch();
>>  	if (!static_branch_unlikely(&sk_dynamic_cond_resched))
>>  		return 0;
>>  	return __cond_resched();
>> @@ -8746,17 +8745,13 @@ int sched_dynamic_mode(const char *str)
>>  #error "Unsupported PREEMPT_DYNAMIC mechanism"
>>  #endif
>>
>> -DEFINE_MUTEX(sched_dynamic_mutex);
>> -static bool klp_override;
>> -
>> -static void __sched_dynamic_update(int mode)
>> +void sched_dynamic_update(int mode)
>>  {
>>  	/*
>>  	 * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
>>  	 * the ZERO state, which is invalid.
>>  	 */
>> -	if (!klp_override)
>> -		preempt_dynamic_enable(cond_resched);
>> +	preempt_dynamic_enable(cond_resched);
>>  	preempt_dynamic_enable(might_resched);
>>  	preempt_dynamic_enable(preempt_schedule);
>>  	preempt_dynamic_enable(preempt_schedule_notrace);
>> @@ -8764,79 +8759,36 @@ static void __sched_dynamic_update(int mode)
>>
>>  	switch (mode) {
>>  	case preempt_dynamic_none:
>> -		if (!klp_override)
>> -			preempt_dynamic_enable(cond_resched);
>> +		preempt_dynamic_enable(cond_resched);
>>  		preempt_dynamic_disable(might_resched);
>>  		preempt_dynamic_disable(preempt_schedule);
>>  		preempt_dynamic_disable(preempt_schedule_notrace);
>>  		preempt_dynamic_disable(irqentry_exit_cond_resched);
>> -		if (mode != preempt_dynamic_mode)
>> -			pr_info("Dynamic Preempt: none\n");
>> +		pr_info("Dynamic Preempt: none\n");
>>  		break;
>>
>>  	case preempt_dynamic_voluntary:
>> -		if (!klp_override)
>> -			preempt_dynamic_enable(cond_resched);
>> +		preempt_dynamic_enable(cond_resched);
>>  		preempt_dynamic_enable(might_resched);
>>  		preempt_dynamic_disable(preempt_schedule);
>>  		preempt_dynamic_disable(preempt_schedule_notrace);
>>  		preempt_dynamic_disable(irqentry_exit_cond_resched);
>> -		if (mode != preempt_dynamic_mode)
>> -			pr_info("Dynamic Preempt: voluntary\n");
>> +		pr_info("Dynamic Preempt: voluntary\n");
>>  		break;
>>
>>  	case preempt_dynamic_full:
>> -		if (!klp_override)
>> -			preempt_dynamic_disable(cond_resched);
>> +		preempt_dynamic_disable(cond_resched);
>>  		preempt_dynamic_disable(might_resched);
>>  		preempt_dynamic_enable(preempt_schedule);
>>  		preempt_dynamic_enable(preempt_schedule_notrace);
>>  		preempt_dynamic_enable(irqentry_exit_cond_resched);
>> -		if (mode != preempt_dynamic_mode)
>> -			pr_info("Dynamic Preempt: full\n");
>> +		pr_info("Dynamic Preempt: full\n");
>>  		break;
>>  	}
>>
>>  	preempt_dynamic_mode = mode;
>>  }
>>
>> -void sched_dynamic_update(int mode)
>> -{
>> -	mutex_lock(&sched_dynamic_mutex);
>> -	__sched_dynamic_update(mode);
>> -	mutex_unlock(&sched_dynamic_mutex);
>> -}
>> -
>> -#ifdef CONFIG_HAVE_PREEMPT_DYNAMIC_CALL
>> -
>> -static int klp_cond_resched(void)
>> -{
>> -	__klp_sched_try_switch();
>> -	return __cond_resched();
>> -}
>> -
>> -void sched_dynamic_klp_enable(void)
>> -{
>> -	mutex_lock(&sched_dynamic_mutex);
>> -
>> -	klp_override = true;
>> -	static_call_update(cond_resched, klp_cond_resched);
>> -
>> -	mutex_unlock(&sched_dynamic_mutex);
>> -}
>> -
>> -void sched_dynamic_klp_disable(void)
>> -{
>> -	mutex_lock(&sched_dynamic_mutex);
>> -
>> -	klp_override = false;
>> -	__sched_dynamic_update(preempt_dynamic_mode);
>> -
>> -	mutex_unlock(&sched_dynamic_mutex);
>> -}
>> -
>> -#endif /* CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
>> -
>>  static int __init setup_preempt_mode(char *str)
>>  {
>>  	int mode = sched_dynamic_mode(str);


--
ankur