For now, migrate_enable and migrate_disable are global, which makes them
become hotspots in some case. Take BPF for example, the function calling
to migrate_enable and migrate_disable in BPF trampoline can introduce
significant overhead, and following is the 'perf top' of FENTRY's
benchmark (./tools/testing/selftests/bpf/bench trig-fentry):
54.63% bpf_prog_2dcccf652aac1793_bench_trigger_fentry [k]
bpf_prog_2dcccf652aac1793_bench_trigger_fentry
10.43% [kernel] [k] migrate_enable
10.07% bpf_trampoline_6442517037 [k] bpf_trampoline_6442517037
8.06% [kernel] [k] __bpf_prog_exit_recur
4.11% libc.so.6 [.] syscall
2.15% [kernel] [k] entry_SYSCALL_64
1.48% [kernel] [k] memchr_inv
1.32% [kernel] [k] fput
1.16% [kernel] [k] _copy_to_user
0.73% [kernel] [k] bpf_prog_test_run_raw_tp
So in this commit, we make migrate_enable/migrate_disable inline to obtain
better performance. The struct rq is defined internally in
kernel/sched/sched.h, and the field "nr_pinned" is accessed in
migrate_enable/migrate_disable, which makes it hard to make them inline.
Alexei Starovoitov suggests to generate the offset of "nr_pinned" in [1],
so we can define the migrate_enable/migrate_disable in
include/linux/sched.h and access "this_rq()->nr_pinned" with
"(void *)this_rq() + RQ_nr_pinned".
The offset of "nr_pinned" is generated in include/generated/rq-offsets.h
by kernel/sched/rq-offsets.c.
Generally speaking, we move the definition of migrate_enable and
migrate_disable to include/linux/sched.h from kernel/sched/core.c. The
calling to __set_cpus_allowed_ptr() is leaved in __migrate_enable().
The "struct rq" is not available in include/linux/sched.h, so we can't
access the "runqueues" with this_cpu_ptr(), as the compilation will fail
in this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr():
typeof((ptr) + 0)
So we introduce the this_rq_raw() and access the runqueues with
arch_raw_cpu_ptr() directly.
Before this patch, the performance of BPF FENTRY is:
fentry : 113.030 ± 0.149M/s
fentry : 112.501 ± 0.187M/s
fentry : 112.828 ± 0.267M/s
fentry : 115.287 ± 0.241M/s
After this patch, the performance of BPF FENTRY increases to:
fentry : 143.644 ± 0.670M/s
fentry : 149.764 ± 0.362M/s
fentry : 149.642 ± 0.156M/s
fentry : 145.263 ± 0.221M/s
Link: https://lore.kernel.org/bpf/CAADnVQ+5sEDKHdsJY5ZsfGDO_1SEhhQWHrt2SMBG5SYyQ+jt7w@mail.gmail.com/ [1]
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
Kbuild | 13 ++++++-
include/linux/preempt.h | 3 --
include/linux/sched.h | 72 +++++++++++++++++++++++++++++++++++++++
kernel/bpf/verifier.c | 3 +-
kernel/sched/core.c | 56 +++---------------------------
kernel/sched/rq-offsets.c | 12 +++++++
6 files changed, 101 insertions(+), 58 deletions(-)
create mode 100644 kernel/sched/rq-offsets.c
diff --git a/Kbuild b/Kbuild
index f327ca86990c..13324b4bbe23 100644
--- a/Kbuild
+++ b/Kbuild
@@ -34,13 +34,24 @@ arch/$(SRCARCH)/kernel/asm-offsets.s: $(timeconst-file) $(bounds-file)
$(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE
$(call filechk,offsets,__ASM_OFFSETS_H__)
+# Generate rq-offsets.h
+
+rq-offsets-file := include/generated/rq-offsets.h
+
+targets += kernel/sched/rq-offsets.s
+
+kernel/sched/rq-offsets.s: $(offsets-file)
+
+$(rq-offsets-file): kernel/sched/rq-offsets.s FORCE
+ $(call filechk,offsets,__RQ_OFFSETS_H__)
+
# Check for missing system calls
quiet_cmd_syscalls = CALL $<
cmd_syscalls = $(CONFIG_SHELL) $< $(CC) $(c_flags) $(missing_syscalls_flags)
PHONY += missing-syscalls
-missing-syscalls: scripts/checksyscalls.sh $(offsets-file)
+missing-syscalls: scripts/checksyscalls.sh $(rq-offsets-file)
$(call cmd,syscalls)
# Check the manual modification of atomic headers
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 1fad1c8a4c76..92237c319035 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -424,8 +424,6 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
* work-conserving schedulers.
*
*/
-extern void migrate_disable(void);
-extern void migrate_enable(void);
/**
* preempt_disable_nested - Disable preemption inside a normally preempt disabled section
@@ -471,7 +469,6 @@ static __always_inline void preempt_enable_nested(void)
DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable())
DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
-DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
#ifdef CONFIG_PREEMPT_DYNAMIC
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b272382673d..be489558207f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -49,6 +49,9 @@
#include <linux/tracepoint-defs.h>
#include <linux/unwind_deferred_types.h>
#include <asm/kmap_size.h>
+#ifndef COMPILE_OFFSETS
+#include <generated/rq-offsets.h>
+#endif
/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
@@ -2307,4 +2310,73 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo
#define alloc_tag_restore(_tag, _old) do {} while (0)
#endif
+#if defined(CONFIG_SMP) && !defined(COMPILE_OFFSETS)
+
+extern void __migrate_enable(void);
+
+struct rq;
+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+#define this_rq_raw() arch_raw_cpu_ptr(&runqueues)
+
+static inline void migrate_enable(void)
+{
+ struct task_struct *p = current;
+
+#ifdef CONFIG_DEBUG_PREEMPT
+ /*
+ * Check both overflow from migrate_disable() and superfluous
+ * migrate_enable().
+ */
+ if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
+ return;
+#endif
+
+ if (p->migration_disabled > 1) {
+ p->migration_disabled--;
+ return;
+ }
+
+ /*
+ * Ensure stop_task runs either before or after this, and that
+ * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
+ */
+ guard(preempt)();
+ if (unlikely(p->cpus_ptr != &p->cpus_mask))
+ __migrate_enable();
+ /*
+ * Mustn't clear migration_disabled() until cpus_ptr points back at the
+ * regular cpus_mask, otherwise things that race (eg.
+ * select_fallback_rq) get confused.
+ */
+ barrier();
+ p->migration_disabled = 0;
+ (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned))--;
+}
+
+static inline void migrate_disable(void)
+{
+ struct task_struct *p = current;
+
+ if (p->migration_disabled) {
+#ifdef CONFIG_DEBUG_PREEMPT
+ /*
+ *Warn about overflow half-way through the range.
+ */
+ WARN_ON_ONCE((s16)p->migration_disabled < 0);
+#endif
+ p->migration_disabled++;
+ return;
+ }
+
+ guard(preempt)();
+ (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned))++;
+ p->migration_disabled = 1;
+}
+#else
+static inline void migrate_disable(void) { }
+static inline void migrate_enable(void) { }
+#endif
+
+DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
+
#endif
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0806295945e4..bfba29a4fb10 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -23853,8 +23853,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
BTF_SET_START(btf_id_deny)
BTF_ID_UNUSED
#ifdef CONFIG_SMP
-BTF_ID(func, migrate_disable)
-BTF_ID(func, migrate_enable)
+BTF_ID(func, __migrate_enable)
#endif
#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
BTF_ID(func, rcu_read_unlock_strict)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index be00629f0ba4..00383fed9f63 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -119,6 +119,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+EXPORT_SYMBOL_GPL(runqueues);
#ifdef CONFIG_SCHED_PROXY_EXEC
DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
@@ -2381,28 +2382,7 @@ static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
__do_set_cpus_allowed(p, &ac);
}
-void migrate_disable(void)
-{
- struct task_struct *p = current;
-
- if (p->migration_disabled) {
-#ifdef CONFIG_DEBUG_PREEMPT
- /*
- *Warn about overflow half-way through the range.
- */
- WARN_ON_ONCE((s16)p->migration_disabled < 0);
-#endif
- p->migration_disabled++;
- return;
- }
-
- guard(preempt)();
- this_rq()->nr_pinned++;
- p->migration_disabled = 1;
-}
-EXPORT_SYMBOL_GPL(migrate_disable);
-
-void migrate_enable(void)
+void __migrate_enable(void)
{
struct task_struct *p = current;
struct affinity_context ac = {
@@ -2410,37 +2390,9 @@ void migrate_enable(void)
.flags = SCA_MIGRATE_ENABLE,
};
-#ifdef CONFIG_DEBUG_PREEMPT
- /*
- * Check both overflow from migrate_disable() and superfluous
- * migrate_enable().
- */
- if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
- return;
-#endif
-
- if (p->migration_disabled > 1) {
- p->migration_disabled--;
- return;
- }
-
- /*
- * Ensure stop_task runs either before or after this, and that
- * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
- */
- guard(preempt)();
- if (p->cpus_ptr != &p->cpus_mask)
- __set_cpus_allowed_ptr(p, &ac);
- /*
- * Mustn't clear migration_disabled() until cpus_ptr points back at the
- * regular cpus_mask, otherwise things that race (eg.
- * select_fallback_rq) get confused.
- */
- barrier();
- p->migration_disabled = 0;
- this_rq()->nr_pinned--;
+ __set_cpus_allowed_ptr(p, &ac);
}
-EXPORT_SYMBOL_GPL(migrate_enable);
+EXPORT_SYMBOL_GPL(__migrate_enable);
static inline bool rq_has_pinned_tasks(struct rq *rq)
{
diff --git a/kernel/sched/rq-offsets.c b/kernel/sched/rq-offsets.c
new file mode 100644
index 000000000000..a23747bbe25b
--- /dev/null
+++ b/kernel/sched/rq-offsets.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#define COMPILE_OFFSETS
+#include <linux/kbuild.h>
+#include <linux/types.h>
+#include "sched.h"
+
+int main(void)
+{
+ DEFINE(RQ_nr_pinned, offsetof(struct rq, nr_pinned));
+
+ return 0;
+}
--
2.50.1
Hello, kernel test robot noticed "BUG:using_smp_processor_id()in_preemptible" on: commit: 10f0da02e16093e603e5f82fe735f836a3791ca0 ("[PATCH tip 2/3] sched: make migrate_enable/migrate_disable inline") url: https://github.com/intel-lab-lkp/linux/commits/Menglong-Dong/arch-add-the-macro-COMPILE_OFFSETS-to-all-the-asm-offsets-c/20250810-110846 base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git b2fc521b40b9e94f6fe2cc9820b14ae67d8fe891 patch link: https://lore.kernel.org/all/20250810030442.246974-3-dongml2@chinatelecom.cn/ patch subject: [PATCH tip 2/3] sched: make migrate_enable/migrate_disable inline in testcase: boot config: i386-randconfig-007-20250811 compiler: gcc-12 test machine: qemu-system-i386 -enable-kvm -cpu SandyBridge -smp 2 -m 4G (please refer to attached dmesg/kmsg for entire log/backtrace) +--------------------------------------------+------------+------------+ | | 51b2cbc934 | 10f0da02e1 | +--------------------------------------------+------------+------------+ | BUG:using_smp_processor_id()in_preemptible | 0 | 6 | +--------------------------------------------+------------+------------+ If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202508120411.6173c4f-lkp@intel.com [ 25.544225][ T1] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 [ 25.643781][ T1] caller is debug_smp_processor_id (lib/smp_processor_id.c:59) [ 25.657229][ T1] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.16.0-12127-g10f0da02e160 #1 PREEMPTLAZY [ 25.658066][ T1] Call Trace: [ 25.658066][ T1] dump_stack_lvl (lib/dump_stack.c:123) [ 25.658066][ T1] dump_stack (lib/dump_stack.c:130) [ 25.658066][ T1] check_preemption_disabled (arch/x86/include/asm/preempt.h:85 lib/smp_processor_id.c:51) [ 25.658066][ T1] debug_smp_processor_id (lib/smp_processor_id.c:59) [ 25.658066][ T1] __kvm_is_vmx_supported (arch/x86/include/asm/cpuid/api.h:29 arch/x86/include/asm/cpuid/api.h:74 arch/x86/include/asm/cpuid/api.h:113 arch/x86/kvm/vmx/vmx.c:2743) [ 25.658066][ T1] vmx_init (arch/x86/kvm/vmx/vmx.c:8557) [ 25.658066][ T1] ? pi_init_cpu (arch/x86/kvm/vmx/main.c:1028) [ 25.658066][ T1] vt_init (arch/x86/kvm/vmx/main.c:1033) [ 25.658066][ T1] do_one_initcall (init/main.c:1269) [ 25.658066][ T1] do_initcalls (init/main.c:1330 init/main.c:1347) [ 25.658066][ T1] kernel_init_freeable (init/main.c:1583) [ 25.658066][ T1] ? rest_init (init/main.c:1461) [ 25.658066][ T1] kernel_init (init/main.c:1471) [ 25.658066][ T1] ret_from_fork (arch/x86/kernel/process.c:154) [ 25.658066][ T1] ? rest_init (init/main.c:1461) [ 25.658066][ T1] ret_from_fork_asm (arch/x86/entry/entry_32.S:737) [ 25.658066][ T1] entry_INT80_32 (arch/x86/entry/entry_32.S:945) [ 25.815107][ T1] kvm_intel: VMX not supported by CPU 0 [ 25.822075][ T1] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 [ 25.833038][ T1] caller is debug_smp_processor_id (lib/smp_processor_id.c:59) [ 25.842786][ T1] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.16.0-12127-g10f0da02e160 #1 PREEMPTLAZY [ 25.843623][ T1] Call Trace: [ 25.843623][ T1] dump_stack_lvl (lib/dump_stack.c:123) [ 25.843623][ T1] dump_stack (lib/dump_stack.c:130) [ 25.843623][ T1] check_preemption_disabled (arch/x86/include/asm/preempt.h:85 lib/smp_processor_id.c:51) [ 25.843623][ T1] ? vt_init (arch/x86/kvm/svm/svm.c:5494) [ 25.843623][ T1] debug_smp_processor_id (lib/smp_processor_id.c:59) [ 25.843623][ T1] __kvm_is_svm_supported (arch/x86/kvm/svm/svm.c:429) [ 25.843623][ T1] svm_init (arch/x86/kvm/svm/svm.c:456 arch/x86/kvm/svm/svm.c:5501) [ 25.843623][ T1] do_one_initcall (init/main.c:1269) [ 25.843623][ T1] do_initcalls (init/main.c:1330 init/main.c:1347) [ 25.843623][ T1] kernel_init_freeable (init/main.c:1583) [ 25.843623][ T1] ? rest_init (init/main.c:1461) [ 25.843623][ T1] kernel_init (init/main.c:1471) [ 25.843623][ T1] ret_from_fork (arch/x86/kernel/process.c:154) [ 25.843623][ T1] ? rest_init (init/main.c:1461) [ 25.843623][ T1] ret_from_fork_asm (arch/x86/entry/entry_32.S:737) [ 25.843623][ T1] entry_INT80_32 (arch/x86/entry/entry_32.S:945) The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20250812/202508120411.6173c4f-lkp@intel.com -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Hi Menglong, kernel test robot noticed the following build warnings: [auto build test WARNING on tip/master] url: https://github.com/intel-lab-lkp/linux/commits/Menglong-Dong/arch-add-the-macro-COMPILE_OFFSETS-to-all-the-asm-offsets-c/20250810-110846 base: tip/master patch link: https://lore.kernel.org/r/20250810030442.246974-3-dongml2%40chinatelecom.cn patch subject: [PATCH tip 2/3] sched: make migrate_enable/migrate_disable inline config: arc-randconfig-001-20250810 (https://download.01.org/0day-ci/archive/20250810/202508101230.2KBZa0Ql-lkp@intel.com/config) compiler: arc-linux-gcc (GCC) 8.5.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250810/202508101230.2KBZa0Ql-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202508101230.2KBZa0Ql-lkp@intel.com/ All warnings (new ones prefixed by >>): >> kernel/sched/core.c:2385:6: warning: no previous prototype for '__migrate_enable' [-Wmissing-prototypes] void __migrate_enable(void) ^~~~~~~~~~~~~~~~ vim +/__migrate_enable +2385 kernel/sched/core.c 2384 > 2385 void __migrate_enable(void) 2386 { 2387 struct task_struct *p = current; 2388 struct affinity_context ac = { 2389 .new_mask = &p->cpus_mask, 2390 .flags = SCA_MIGRATE_ENABLE, 2391 }; 2392 2393 __set_cpus_allowed_ptr(p, &ac); 2394 } 2395 EXPORT_SYMBOL_GPL(__migrate_enable); 2396 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.