Documentation/admin-guide/kernel-parameters.txt | 4 arch/Kconfig | 4 arch/loongarch/Kconfig | 1 arch/loongarch/include/asm/thread_info.h | 76 +- arch/riscv/Kconfig | 1 arch/riscv/include/asm/thread_info.h | 31 - arch/s390/Kconfig | 1 arch/s390/include/asm/thread_info.h | 44 - arch/x86/Kconfig | 1 arch/x86/entry/syscall_32.c | 3 arch/x86/include/asm/thread_info.h | 76 +- drivers/hv/mshv_root_main.c | 3 fs/binfmt_elf.c | 2 fs/exec.c | 2 include/asm-generic/thread_info_tif.h | 51 + include/linux/entry-common.h | 38 - include/linux/irq-entry-common.h | 68 ++ include/linux/mm.h | 25 include/linux/resume_user_mode.h | 2 include/linux/rseq.h | 235 +++++---- include/linux/rseq_entry.h | 607 +++++++++++++++++++++++ include/linux/rseq_types.h | 93 +++ include/linux/sched.h | 48 + include/linux/thread_info.h | 5 include/trace/events/rseq.h | 4 include/uapi/linux/rseq.h | 21 init/Kconfig | 28 + kernel/entry/common.c | 39 - kernel/entry/syscall-common.c | 8 kernel/ptrace.c | 6 kernel/rseq.c | 623 +++++++++--------------- kernel/sched/core.c | 10 kernel/sched/membarrier.c | 8 kernel/sched/sched.h | 5 virt/kvm/kvm_main.c | 3 35 files changed, 1450 insertions(+), 726 deletions(-)
This is a follow up on the V3 series, which can be found here:
https://lore.kernel.org/all/20250904185336.943880027@linutronix.de
The V2 posting contains a detailed list of the addressed problems. TLDR:
- A significant amount of pointless RSEQ operations on exit to user
space, which have been reported by people as measurable impact after
glibc switched to use RSEQ
- Suboptimal hotpath handling both in the scheduler and on exit to user
space.
This series addresses these issues by:
1) Limiting the RSEQ work to the actual conditions where it is
required. The full benefit is only available for architectures using
the generic entry infrastructure. All others get at least the basic
improvements.
2) Re-implementing the whole user space handling based on proper data
structures and by actually looking at the impact it creates in the
fast path.
3) Moving the actual handling of RSEQ out to the latest point in the exit
path, where possible. This is fully inlined into the fast path to keep
the impact confined.
Changes vs. V3:
1) Move _all_ RSEQ related data into a umbrella data structure
When trying to adopt the time slice extension PoC to the rework, I
noticed that just moving the event and IDs into a data structure is
dumb.
Moving all rseq related data into an umbrella struct allows to
simplify fork/exec by reducing them to memset/memcpy() plus minimal
extra work, which pretty much avoids to copy or reset further
additions later on.
That's a purely mechanical change done with coccinelle on top of V3
and then gradually folded back with scripting into the series.
2) Further simplification of the exit_to_user_mode() integration.
The games with ti_work returned by rseq_exit_to_user_mode() are not
necessary at all and just complicate things.
The point is that the RSEQ code is only invoked when all other TIF
bits except TIF_RSEQ have been processed. So moving the handling out
of the exit_to_user_mode() loop into a surrounding loop makes it way
simpler and the resulting ASM is more straight forward. The unlikely
error case is just looping back once more into the inner loop.
That elimitates the extra TIF_RSEQ optimization of the previous series
(patch 37/37) as it now comes for free immediately when hooking up
TIF_RSEQ. That means the related helper and the ugly goto are gone
too.
Delta patch to V3 is below.
As for the previous version these patches have a pile of dependencies:
The series depends on the separately posted rseq bugfix:
https://lore.kernel.org/lkml/87o6sj6z95.ffs@tglx/
and the uaccess generic helper series:
https://lore.kernel.org/lkml/20250813150610.521355442@linutronix.de/
and a related futex fix in
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/urgent
The combination of all of them and some other related fixes (rseq
selftests) are available here:
git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git rseq/base
For your convenience all of it is also available as a conglomerate from
git:
git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git rseq/perf
Thanks,
tglx
---
Documentation/admin-guide/kernel-parameters.txt | 4
arch/Kconfig | 4
arch/loongarch/Kconfig | 1
arch/loongarch/include/asm/thread_info.h | 76 +-
arch/riscv/Kconfig | 1
arch/riscv/include/asm/thread_info.h | 31 -
arch/s390/Kconfig | 1
arch/s390/include/asm/thread_info.h | 44 -
arch/x86/Kconfig | 1
arch/x86/entry/syscall_32.c | 3
arch/x86/include/asm/thread_info.h | 76 +-
drivers/hv/mshv_root_main.c | 3
fs/binfmt_elf.c | 2
fs/exec.c | 2
include/asm-generic/thread_info_tif.h | 51 +
include/linux/entry-common.h | 38 -
include/linux/irq-entry-common.h | 68 ++
include/linux/mm.h | 25
include/linux/resume_user_mode.h | 2
include/linux/rseq.h | 235 +++++----
include/linux/rseq_entry.h | 607 +++++++++++++++++++++++
include/linux/rseq_types.h | 93 +++
include/linux/sched.h | 48 +
include/linux/thread_info.h | 5
include/trace/events/rseq.h | 4
include/uapi/linux/rseq.h | 21
init/Kconfig | 28 +
kernel/entry/common.c | 39 -
kernel/entry/syscall-common.c | 8
kernel/ptrace.c | 6
kernel/rseq.c | 623 +++++++++---------------
kernel/sched/core.c | 10
kernel/sched/membarrier.c | 8
kernel/sched/sched.h | 5
virt/kvm/kvm_main.c | 3
35 files changed, 1450 insertions(+), 726 deletions(-)
---
Delta to V3:
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index 2f166e93f016..b8ea95011ec3 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -11,11 +11,11 @@ void __rseq_handle_slowpath(struct pt_regs *regs);
static inline void rseq_handle_slowpath(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_GENERIC_ENTRY)) {
- if (current->rseq_event.slowpath)
+ if (current->rseq.event.slowpath)
__rseq_handle_slowpath(regs);
} else {
/* '&' is intentional to spare one conditional branch */
- if (current->rseq_event.sched_switch & current->rseq_event.has_rseq)
+ if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
__rseq_handle_slowpath(regs);
}
}
@@ -30,10 +30,10 @@ static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *reg
{
if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
/* '&' is intentional to spare one conditional branch */
- if (current->rseq_event.has_rseq & current->rseq_event.user_irq)
+ if (current->rseq.event.has_rseq & current->rseq.event.user_irq)
__rseq_signal_deliver(ksig->sig, regs);
} else {
- if (current->rseq_event.has_rseq)
+ if (current->rseq.event.has_rseq)
__rseq_signal_deliver(ksig->sig, regs);
}
}
@@ -46,7 +46,7 @@ static inline void rseq_raise_notify_resume(struct task_struct *t)
/* Invoked from context switch to force evaluation on exit to user */
static __always_inline void rseq_sched_switch_event(struct task_struct *t)
{
- struct rseq_event *ev = &t->rseq_event;
+ struct rseq_event *ev = &t->rseq.event;
if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
/*
@@ -64,7 +64,7 @@ static __always_inline void rseq_sched_switch_event(struct task_struct *t)
}
} else {
if (ev->has_rseq) {
- t->rseq_event.sched_switch = true;
+ t->rseq.event.sched_switch = true;
rseq_raise_notify_resume(t);
}
}
@@ -79,7 +79,7 @@ static __always_inline void rseq_sched_switch_event(struct task_struct *t)
*/
static __always_inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu)
{
- t->rseq_event.ids_changed = true;
+ t->rseq.event.ids_changed = true;
}
/*
@@ -96,16 +96,16 @@ static __always_inline void rseq_sched_set_task_mm_cid(struct task_struct *t, un
* provide a conditional for it readily. So avoid excessive updates
* when nothing changes.
*/
- if (t->rseq_ids.mm_cid != cid)
- t->rseq_event.ids_changed = true;
+ if (t->rseq.ids.mm_cid != cid)
+ t->rseq.event.ids_changed = true;
}
/* Enforce a full update after RSEQ registration and when execve() failed */
static inline void rseq_force_update(void)
{
- if (current->rseq_event.has_rseq) {
- current->rseq_event.ids_changed = true;
- current->rseq_event.sched_switch = true;
+ if (current->rseq.event.has_rseq) {
+ current->rseq.event.ids_changed = true;
+ current->rseq.event.sched_switch = true;
rseq_raise_notify_resume(current);
}
}
@@ -124,15 +124,27 @@ static inline void rseq_force_update(void)
*/
static inline void rseq_virt_userspace_exit(void)
{
+ if (current->rseq.event.sched_switch)
/*
* The generic optimization for deferring RSEQ updates until the next
* exit relies on having a dedicated TIF_RSEQ.
*/
if (!IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS) &&
- current->rseq_event.sched_switch)
+ current->rseq.event.sched_switch)
rseq_raise_notify_resume(current);
}
+static inline void rseq_reset(struct task_struct *t)
+{
+ memset(&t->rseq, 0, sizeof(t->rseq));
+ t->rseq.ids.cpu_cid = ~0ULL;
+}
+
+static inline void rseq_execve(struct task_struct *t)
+{
+ rseq_reset(t);
+}
+
/*
* If parent process has a registered restartable sequences area, the
* child inherits. Unregister rseq for a clone with CLONE_VM set.
@@ -140,17 +152,10 @@ static inline void rseq_virt_userspace_exit(void)
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
{
if (clone_flags & CLONE_VM) {
- t->rseq = NULL;
- t->rseq_len = 0;
- t->rseq_sig = 0;
- t->rseq_ids.cpu_cid = ~0ULL;
- t->rseq_event.all = 0;
+ rseq_reset(t);
} else {
t->rseq = current->rseq;
- t->rseq_len = current->rseq_len;
- t->rseq_sig = current->rseq_sig;
- t->rseq_ids.cpu_cid = ~0ULL;
- t->rseq_event = current->rseq_event;
+ t->rseq.ids.cpu_cid = ~0ULL;
/*
* If it has rseq, force it into the slow path right away
* because it is guaranteed to fault.
@@ -160,22 +165,13 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
* for those who do it's required to enforce the slow path
* as the scheduler sets only TIF_RSEQ.
*/
- if (t->rseq_event.has_rseq) {
- t->rseq_event.slowpath = true;
+ if (t->rseq.event.has_rseq) {
+ t->rseq.event.slowpath = true;
set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
}
}
}
-static inline void rseq_execve(struct task_struct *t)
-{
- t->rseq = NULL;
- t->rseq_len = 0;
- t->rseq_sig = 0;
- t->rseq_ids.cpu_cid = ~0ULL;
- t->rseq_event.all = 0;
-}
-
#else /* CONFIG_RSEQ */
static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h
index fe6b3cc54d54..ce1ad66c48c3 100644
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -11,7 +11,6 @@ struct rseq_stats {
unsigned long signal;
unsigned long slowpath;
unsigned long fastpath;
- unsigned long quicktif;
unsigned long ids;
unsigned long cs;
unsigned long clear;
@@ -65,7 +64,7 @@ static inline void rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
}
#else /* CONFIG_TRACEPOINT */
-static inline void rseq_trace_update(struct task_struct *t) { }
+static inline void rseq_trace_update(struct task_struct *t, struct rseq_ids *ids) { }
static inline void rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
unsigned long offset, unsigned long abort_ip) { }
#endif /* !CONFIG_TRACEPOINT */
@@ -84,7 +83,7 @@ bool rseq_debug_validate_ids(struct task_struct *t);
static __always_inline void rseq_note_user_irq_entry(void)
{
if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
- current->rseq_event.user_irq = true;
+ current->rseq.event.user_irq = true;
}
/*
@@ -172,17 +171,17 @@ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsi
/* abort_ip - 4 is >= 0. See abort_ip check above */
uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
unsafe_get_user(usig, uc_sig, fail);
- if (unlikely(usig != t->rseq_sig))
+ if (unlikely(usig != t->rseq.sig))
goto die;
/* rseq_event.user_irq is only valid if CONFIG_GENERIC_IRQ_ENTRY=y */
if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
/* If not in interrupt from user context, let it die */
- if (unlikely(!t->rseq_event.user_irq))
+ if (unlikely(!t->rseq.event.user_irq))
goto die;
}
- unsafe_put_user(0ULL, &t->rseq->rseq_cs, fail);
+ unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, fail);
user_access_end();
instruction_pointer_set(regs, (unsigned long)abort_ip);
@@ -191,12 +190,12 @@ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsi
rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
return true;
clear:
- unsafe_put_user(0ULL, &t->rseq->rseq_cs, fail);
+ unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, fail);
user_access_end();
rseq_stat_inc(rseq_stats.clear);
return true;
die:
- t->rseq_event.fatal = true;
+ t->rseq.event.fatal = true;
fail:
user_access_end();
return false;
@@ -209,23 +208,23 @@ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsi
*/
bool rseq_debug_validate_ids(struct task_struct *t)
{
- struct rseq __user *rseq = t->rseq;
+ struct rseq __user *rseq = t->rseq.usrptr;
u32 cpu_id, uval, node_id;
- if (t->rseq_ids.cpu_cid == ~0)
+ if (t->rseq.ids.cpu_cid == ~0)
return true;
/*
* Look it up outside of the user access section as cpu_to_node()
* can end up in debug code.
*/
- node_id = cpu_to_node(t->rseq_ids.cpu_id);
+ node_id = cpu_to_node(t->rseq.ids.cpu_id);
if (!user_read_masked_begin(rseq))
return false;
unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault);
- if (cpu_id != t->rseq_ids.cpu_id)
+ if (cpu_id != t->rseq.ids.cpu_id)
goto die;
unsafe_get_user(uval, &rseq->cpu_id, efault);
if (uval != cpu_id)
@@ -234,12 +233,12 @@ bool rseq_debug_validate_ids(struct task_struct *t)
if (uval != node_id)
goto die;
unsafe_get_user(uval, &rseq->mm_cid, efault);
- if (uval != t->rseq_ids.mm_cid)
+ if (uval != t->rseq.ids.mm_cid)
goto die;
user_access_end();
return true;
die:
- t->rseq_event.fatal = true;
+ t->rseq.event.fatal = true;
efault:
user_access_end();
return false;
@@ -263,7 +262,7 @@ rseq_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long c
rseq_stat_inc(rseq_stats.cs);
if (unlikely(csaddr >= tasksize)) {
- t->rseq_event.fatal = true;
+ t->rseq.event.fatal = true;
return false;
}
@@ -307,11 +306,11 @@ rseq_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long c
/* The address is guaranteed to be >= 0 and < TASK_SIZE */
uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
unsafe_get_user(usig, uc_sig, fail);
- if (unlikely(usig != t->rseq_sig))
+ if (unlikely(usig != t->rseq.sig))
goto die;
/* Invalidate the critical section */
- unsafe_put_user(0ULL, &t->rseq->rseq_cs, fail);
+ unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, fail);
user_access_end();
/* Update the instruction pointer */
@@ -321,12 +320,12 @@ rseq_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long c
rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
return true;
clear:
- unsafe_put_user(0ULL, &t->rseq->rseq_cs, fail);
+ unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, fail);
user_access_end();
rseq_stat_inc(rseq_stats.clear);
return true;
die:
- t->rseq_event.fatal = true;
+ t->rseq.event.fatal = true;
fail:
user_access_end();
return false;
@@ -358,7 +357,7 @@ static rseq_inline
bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids,
u32 node_id, u64 *csaddr)
{
- struct rseq __user *rseq = t->rseq;
+ struct rseq __user *rseq = t->rseq.usrptr;
if (static_branch_unlikely(&rseq_debug_enabled)) {
if (!rseq_debug_validate_ids(t))
@@ -377,7 +376,7 @@ bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids,
user_access_end();
/* Cache the new values */
- t->rseq_ids.cpu_cid = ids->cpu_cid;
+ t->rseq.ids.cpu_cid = ids->cpu_cid;
rseq_stat_inc(rseq_stats.ids);
rseq_trace_update(t, ids);
return true;
@@ -405,7 +404,7 @@ static rseq_inline bool rseq_update_usr(struct task_struct *t, struct pt_regs *r
*/
if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
if (!static_branch_unlikely(&rseq_debug_enabled)) {
- if (likely(!t->rseq_event.user_irq))
+ if (likely(!t->rseq.event.user_irq))
return true;
}
}
@@ -476,21 +475,21 @@ static __always_inline bool __rseq_exit_to_user_mode_restart(struct pt_regs *reg
* A sane compiler requires four instructions for the nothing to do
* case including clearing the events, but your milage might vary.
*/
- if (likely(!(t->rseq_event.sched_switch & t->rseq_event.has_rseq)))
+ if (likely(!(t->rseq.event.sched_switch & t->rseq.event.has_rseq)))
goto done;
rseq_stat_inc(rseq_stats.fastpath);
pagefault_disable();
- if (likely(!t->rseq_event.ids_changed)) {
+ if (likely(!t->rseq.event.ids_changed)) {
/*
* If IDs have not changed rseq_event::user_irq must be true
* See rseq_sched_switch_event().
*/
u64 csaddr;
- if (unlikely(get_user_masked_u64(&csaddr, &t->rseq->rseq_cs)))
+ if (unlikely(get_user_masked_u64(&csaddr, &t->rseq.usrptr->rseq_cs)))
goto fail;
if (static_branch_unlikely(&rseq_debug_enabled) || unlikely(csaddr)) {
@@ -512,61 +511,55 @@ static __always_inline bool __rseq_exit_to_user_mode_restart(struct pt_regs *reg
done:
/* Clear state so next entry starts from a clean slate */
- t->rseq_event.events = 0;
+ t->rseq.event.events = 0;
return false;
fail:
pagefault_enable();
/* Force it into the slow path. Don't clear the state! */
- t->rseq_event.slowpath = true;
+ t->rseq.event.slowpath = true;
set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
return true;
}
+/*
+ * Required to allow conversion to GENERIC_ENTRY w/o GENERIC_TIF_BITS
+ * as that's not upstream yet.
+ */
#ifdef CONFIG_HAVE_GENERIC_TIF_BITS
-# define CHECK_TIF_RSEQ _TIF_RSEQ
+static __always_inline bool test_tif_rseq(unsigned long ti_work)
+{
+ return ti_work & _TIF_RSEQ;
+}
+
static __always_inline void clear_tif_rseq(void)
{
static_assert(TIF_RSEQ != TIF_NOTIFY_RESUME);
clear_thread_flag(TIF_RSEQ);
}
#else
-# define CHECK_TIF_RSEQ 0UL
-static inline void clear_tif_rseq(void) { }
+static __always_inline bool test_tif_rseq(unsigned long ti_work) { return true; }
+static __always_inline void clear_tif_rseq(void) { }
#endif
-static __always_inline unsigned long
-rseq_exit_to_user_mode_work(struct pt_regs *regs, unsigned long ti_work, const unsigned long mask)
+static __always_inline bool
+rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
{
- /*
- * Check if all work bits have been cleared before handling rseq.
- *
- * In case of a seperate TIF_RSEQ this checks for all other bits to
- * be cleared and TIF_RSEQ to be set.
- */
- if ((ti_work & mask) != CHECK_TIF_RSEQ)
- return ti_work;
+ if (likely(!test_tif_rseq(ti_work)))
+ return false;
- if (likely(!__rseq_exit_to_user_mode_restart(regs))) {
- clear_tif_rseq();
- return ti_work & ~CHECK_TIF_RSEQ;
- }
- return ti_work | _TIF_NOTIFY_RESUME;
-}
+ if (unlikely(__rseq_exit_to_user_mode_restart(regs)))
+ return true;
-static __always_inline bool
-rseq_exit_to_user_mode_early(unsigned long ti_work, const unsigned long mask)
-{
- if (IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS))
- return (ti_work & mask) == CHECK_TIF_RSEQ;
+ clear_tif_rseq();
return false;
}
-#endif /* !CONFIG_GENERIC_ENTRY */
+#endif /* CONFIG_GENERIC_ENTRY */
static __always_inline void rseq_syscall_exit_to_user_mode(void)
{
- struct rseq_event *ev = ¤t->rseq_event;
+ struct rseq_event *ev = ¤t->rseq.event;
rseq_stat_inc(rseq_stats.exit);
@@ -579,7 +572,7 @@ static __always_inline void rseq_syscall_exit_to_user_mode(void)
static __always_inline void rseq_irqentry_exit_to_user_mode(void)
{
- struct rseq_event *ev = ¤t->rseq_event;
+ struct rseq_event *ev = ¤t->rseq.event;
rseq_stat_inc(rseq_stats.exit);
@@ -601,18 +594,11 @@ static inline void rseq_debug_syscall_return(struct pt_regs *regs)
__rseq_debug_syscall_return(regs);
}
#else /* CONFIG_RSEQ */
-static inline unsigned long rseq_exit_to_user_mode_work(struct pt_regs *regs,
- unsigned long ti_work,
- const unsigned long mask)
-{
- return ti_work;
-}
-
-static inline bool rseq_exit_to_user_mode_early(unsigned long ti_work, const unsigned long mask)
+static inline void rseq_note_user_irq_entry(void) { }
+static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs)
{
return false;
}
-static inline void rseq_note_user_irq_entry(void) { }
static inline void rseq_syscall_exit_to_user_mode(void) { }
static inline void rseq_irqentry_exit_to_user_mode(void) { }
static inline void rseq_debug_syscall_return(struct pt_regs *regs) { }
diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h
index 68dfc215bbff..9c7a34154de8 100644
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -3,10 +3,11 @@
#define _LINUX_RSEQ_TYPES_H
#include <linux/types.h>
-/* Forward declaration for sched.h */
+
+#ifdef CONFIG_RSEQ
struct rseq;
-/*
+/**
* struct rseq_event - Storage for rseq related event management
* @all: Compound to initialize and clear the data efficiently
* @events: Compound to access events with a single load/store
@@ -50,7 +51,7 @@ struct rseq_event {
};
};
-/*
+/**
* struct rseq_ids - Cache for ids, which need to be updated
* @cpu_cid: Compound of @cpu_id and @mm_cid to make the
* compiler emit a single compare on 64-bit
@@ -69,4 +70,24 @@ struct rseq_ids {
};
};
+/**
+ * struct rseq_data - Storage for all rseq related data
+ * @usrptr: Pointer to the registered user space RSEQ memory
+ * @len: Length of the RSEQ region
+ * @sig: Signature of critial section abort IPs
+ * @event: Storage for event management
+ * @ids: Storage for cached CPU ID and MM CID
+ */
+struct rseq_data {
+ struct rseq __user *usrptr;
+ u32 len;
+ u32 sig;
+ struct rseq_event event;
+ struct rseq_ids ids;
+};
+
+#else /* CONFIG_RSEQ */
+struct rseq_data { };
+#endif /* !CONFIG_RSEQ */
+
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5ba86a668980..857ed17d443b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1400,13 +1400,7 @@ struct task_struct {
unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */
-#ifdef CONFIG_RSEQ
- struct rseq __user *rseq;
- u32 rseq_len;
- u32 rseq_sig;
- struct rseq_event rseq_event;
- struct rseq_ids rseq_ids;
-#endif
+ struct rseq_data rseq;
#ifdef CONFIG_SCHED_MM_CID
int mm_cid; /* Current cid in mm */
diff --git a/include/trace/events/rseq.h b/include/trace/events/rseq.h
index a6ec3f0c8ae7..ce85d650bf4b 100644
--- a/include/trace/events/rseq.h
+++ b/include/trace/events/rseq.h
@@ -21,9 +21,9 @@ TRACE_EVENT(rseq_update,
),
TP_fast_assign(
- __entry->cpu_id = t->rseq_ids.cpu_id;
+ __entry->cpu_id = t->rseq.ids.cpu_id;
__entry->node_id = cpu_to_node(__entry->cpu_id);
- __entry->mm_cid = t->rseq_ids.mm_cid;
+ __entry->mm_cid = t->rseq.ids.mm_cid;
),
TP_printk("cpu_id=%d node_id=%d mm_cid=%d", __entry->cpu_id,
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 73028b98aa6a..cca17016c5da 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -11,26 +11,21 @@
/* Workaround to allow gradual conversion of architecture code */
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
-/**
- * exit_to_user_mode_loop - do any pending work before leaving to user space
- * @regs: Pointer to pt_regs on entry stack
- * @ti_work: TIF work flags as read by the caller
- */
-__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
- unsigned long ti_work)
+#ifdef CONFIG_HAVE_GENERIC_TIF_BITS
+#define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK & ~_TIF_RSEQ)
+#else
+#define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK)
+#endif
+
+static __always_inline unsigned long __exit_to_user_mode_loop(struct pt_regs *regs,
+ unsigned long ti_work)
{
/*
* Before returning to user space ensure that all pending work
* items have been completed.
- *
- * Optimize for TIF_RSEQ being the only bit set.
*/
- if (rseq_exit_to_user_mode_early(ti_work, EXIT_TO_USER_MODE_WORK)) {
- rseq_stat_inc(rseq_stats.quicktif);
- goto do_rseq;
- }
+ while (ti_work & EXIT_TO_USER_MODE_WORK_LOOP) {
- do {
local_irq_enable_exit_to_user(ti_work);
if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY))
@@ -62,26 +57,29 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
tick_nohz_user_enter_prepare();
ti_work = read_thread_flags();
-
- do_rseq:
- /*
- * This returns the unmodified ti_work, when ti_work is not
- * empty (except for TIF_RSEQ). In that case it waits for
- * the next round to avoid multiple updates in case of
- * rescheduling.
- *
- * When it handles rseq it returns either with empty work
- * on success or with TIF_NOTIFY_RESUME set on failure to
- * kick the handling into the slow path.
- */
- ti_work = rseq_exit_to_user_mode_work(regs, ti_work, EXIT_TO_USER_MODE_WORK);
-
- } while (ti_work & EXIT_TO_USER_MODE_WORK);
+ }
/* Return the latest work state for arch_exit_to_user_mode() */
return ti_work;
}
+/**
+ * exit_to_user_mode_loop - do any pending work before leaving to user space
+ * @regs: Pointer to pt_regs on entry stack
+ * @ti_work: TIF work flags as read by the caller
+ */
+__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
+ unsigned long ti_work)
+{
+ for (;;) {
+ ti_work = __exit_to_user_mode_loop(regs, ti_work);
+
+ if (likely(!rseq_exit_to_user_mode_restart(regs, ti_work)))
+ return ti_work;
+ ti_work = read_thread_flags();
+ }
+}
+
noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
{
irqentry_state_t ret = {
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 75a84efad40f..392ec2f75f01 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -793,9 +793,9 @@ static long ptrace_get_rseq_configuration(struct task_struct *task,
unsigned long size, void __user *data)
{
struct ptrace_rseq_configuration conf = {
- .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
- .rseq_abi_size = task->rseq_len,
- .signature = task->rseq_sig,
+ .rseq_abi_pointer = (u64)(uintptr_t)task->rseq.usrptr,
+ .rseq_abi_size = task->rseq.len,
+ .signature = task->rseq.sig,
.flags = 0,
};
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 01c7402d13a3..52b276a5e004 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -134,7 +134,6 @@ static int rseq_stats_show(struct seq_file *m, void *p)
stats.signal += data_race(per_cpu(rseq_stats.signal, cpu));
stats.slowpath += data_race(per_cpu(rseq_stats.slowpath, cpu));
stats.fastpath += data_race(per_cpu(rseq_stats.fastpath, cpu));
- stats.quicktif += data_race(per_cpu(rseq_stats.quicktif, cpu));
stats.ids += data_race(per_cpu(rseq_stats.ids, cpu));
stats.cs += data_race(per_cpu(rseq_stats.cs, cpu));
stats.clear += data_race(per_cpu(rseq_stats.clear, cpu));
@@ -145,7 +144,6 @@ static int rseq_stats_show(struct seq_file *m, void *p)
seq_printf(m, "signal: %16lu\n", stats.signal);
seq_printf(m, "slowp: %16lu\n", stats.slowpath);
seq_printf(m, "fastp: %16lu\n", stats.fastpath);
- seq_printf(m, "quickt: %16lu\n", stats.quicktif);
seq_printf(m, "ids: %16lu\n", stats.ids);
seq_printf(m, "cs: %16lu\n", stats.cs);
seq_printf(m, "clear: %16lu\n", stats.clear);
@@ -227,7 +225,7 @@ static bool rseq_handle_cs(struct task_struct *t, struct pt_regs *regs)
{
u64 csaddr;
- if (get_user_masked_u64(&csaddr, &t->rseq->rseq_cs))
+ if (get_user_masked_u64(&csaddr, &t->rseq.usrptr->rseq_cs))
return false;
if (likely(!csaddr))
return true;
@@ -271,10 +269,10 @@ static void rseq_slowpath_update_usr(struct pt_regs *regs)
* inconsistencies.
*/
scoped_guard(irq) {
+ event = t->rseq.event.sched_switch;
+ t->rseq.event.all &= evt_mask.all;
ids.cpu_id = task_cpu(t);
ids.mm_cid = task_mm_cid(t);
- event = t->rseq_event.sched_switch;
- t->rseq_event.all &= evt_mask.all;
}
if (!event)
@@ -287,7 +285,7 @@ static void rseq_slowpath_update_usr(struct pt_regs *regs)
* Clear the errors just in case this might survive magically, but
* leave the rest intact.
*/
- t->rseq_event.error = 0;
+ t->rseq.event.error = 0;
force_sig(SIGSEGV);
}
}
@@ -325,7 +323,7 @@ void __rseq_signal_deliver(int sig, struct pt_regs *regs)
* Clear the errors just in case this might survive
* magically, but leave the rest intact.
*/
- current->rseq_event.error = 0;
+ current->rseq.event.error = 0;
force_sigsegv(sig);
}
}
@@ -335,9 +333,9 @@ void __rseq_debug_syscall_return(struct pt_regs *regs)
struct task_struct *t = current;
u64 csaddr;
- if (!t->rseq_event.has_rseq)
+ if (!t->rseq.event.has_rseq)
return;
- if (get_user_masked_u64(&csaddr, &t->rseq->rseq_cs))
+ if (get_user_masked_u64(&csaddr, &t->rseq.usrptr->rseq_cs))
goto fail;
if (likely(!csaddr))
return;
@@ -393,33 +391,30 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32
if (flags & ~RSEQ_FLAG_UNREGISTER)
return -EINVAL;
/* Unregister rseq for current thread. */
- if (current->rseq != rseq || !current->rseq)
+ if (current->rseq.usrptr != rseq || !current->rseq.usrptr)
return -EINVAL;
- if (rseq_len != current->rseq_len)
+ if (rseq_len != current->rseq.len)
return -EINVAL;
- if (current->rseq_sig != sig)
+ if (current->rseq.sig != sig)
return -EPERM;
if (!rseq_reset_ids())
return -EFAULT;
- current->rseq = NULL;
- current->rseq_sig = 0;
- current->rseq_len = 0;
- current->rseq_event.all = 0;
+ rseq_reset(current);
return 0;
}
if (unlikely(flags))
return -EINVAL;
- if (current->rseq) {
+ if (current->rseq.usrptr) {
/*
* If rseq is already registered, check whether
* the provided address differs from the prior
* one.
*/
- if (current->rseq != rseq || rseq_len != current->rseq_len)
+ if (current->rseq.usrptr != rseq || rseq_len != current->rseq.len)
return -EINVAL;
- if (current->rseq_sig != sig)
+ if (current->rseq.sig != sig)
return -EPERM;
/* Already registered. */
return -EBUSY;
@@ -457,16 +452,16 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32
* Activate the registration by setting the rseq area address, length
* and signature in the task struct.
*/
- current->rseq = rseq;
- current->rseq_len = rseq_len;
- current->rseq_sig = sig;
+ current->rseq.usrptr = rseq;
+ current->rseq.len = rseq_len;
+ current->rseq.sig = sig;
/*
* If rseq was previously inactive, and has just been
* registered, ensure the cpu_id_start and cpu_id fields
* are updated before returning to user-space.
*/
- current->rseq_event.has_rseq = true;
+ current->rseq.event.has_rseq = true;
rseq_force_update();
return 0;
On 9/8/25 3:31 PM, Thomas Gleixner wrote: > For your convenience all of it is also available as a conglomerate from > git: > > git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git rseq/perf I used this branch for some quick testing. Since I last looked at the rseq performance overhead, glibc must have improved a few things. FWIW, box is running libc 2.41 at the moment. Test box is on debian unstable, so gets frequent updates. In any case, for one of my usual kernel overhead runs of checking running a basic IOPS based test, I see the following on the stock (-rc5 + 6.18 targeted changes) kernel running that test: + 0.89% io_uring [kernel.kallsyms] [k] __get_user_8 + 0.58% io_uring [kernel.kallsyms] [k] __put_user_8 + 1.13% io_uring [kernel.kallsyms] [k] __rseq_handle_notify_resume which is about 2.6% of purely rseq related overhead. Pulling in the above branch and running the exact same test, all of the above are gone and perusing the profile has nothing jump out at me in terms of shifting those cycles to other bookkeeping. So yes, this work does make a very noticeable difference! -- Jens Axboe
On Wed, Sep 10 2025 at 07:55, Jens Axboe wrote:
> On 9/8/25 3:31 PM, Thomas Gleixner wrote:
>> For your convenience all of it is also available as a conglomerate from
>> git:
>>
>> git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git rseq/perf
>
> I used this branch for some quick testing. Since I last looked at the
> rseq performance overhead, glibc must have improved a few things. FWIW,
> box is running libc 2.41 at the moment. Test box is on debian unstable,
> so gets frequent updates. In any case, for one of my usual kernel
> overhead runs of checking running a basic IOPS based test, I see the
> following on the stock (-rc5 + 6.18 targeted changes) kernel running
> that test:
>
> + 0.89% io_uring [kernel.kallsyms] [k] __get_user_8
> + 0.58% io_uring [kernel.kallsyms] [k] __put_user_8
> + 1.13% io_uring [kernel.kallsyms] [k] __rseq_handle_notify_resume
>
> which is about 2.6% of purely rseq related overhead. Pulling in the
> above branch and running the exact same test, all of the above are gone
> and perusing the profile has nothing jump out at me in terms of shifting
> those cycles to other bookkeeping.
>
> So yes, this work does make a very noticeable difference!
I would have been really surprised if not :)
Thanks a lot for testing!
tglx
On 2025-09-10 09:55, Jens Axboe wrote: > On 9/8/25 3:31 PM, Thomas Gleixner wrote: >> For your convenience all of it is also available as a conglomerate from >> git: >> >> git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git rseq/perf > > I used this branch for some quick testing. Since I last looked at the > rseq performance overhead, glibc must have improved a few things. FWIW, > box is running libc 2.41 at the moment. Test box is on debian unstable, > so gets frequent updates. In any case, for one of my usual kernel > overhead runs of checking running a basic IOPS based test, I see the > following on the stock (-rc5 + 6.18 targeted changes) kernel running > that test: > > + 0.89% io_uring [kernel.kallsyms] [k] __get_user_8 > + 0.58% io_uring [kernel.kallsyms] [k] __put_user_8 > + 1.13% io_uring [kernel.kallsyms] [k] __rseq_handle_notify_resume > > which is about 2.6% of purely rseq related overhead. Pulling in the > above branch and running the exact same test, all of the above are gone > and perusing the profile has nothing jump out at me in terms of shifting > those cycles to other bookkeeping. > > So yes, this work does make a very noticeable difference! If you have time, could you also run the same test on this branch with glibc rseq registration disabled, just to see of there is some noticable differences. To disable glibc automatic registration, just export this variable: GLIBC_TUNABLES="glibc.pthread.rseq=0" Thanks!
On 9/10/25 8:45 AM, Michael Jeanson wrote: > On 2025-09-10 09:55, Jens Axboe wrote: >> On 9/8/25 3:31 PM, Thomas Gleixner wrote: >>> For your convenience all of it is also available as a conglomerate from >>> git: >>> >>> git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git rseq/perf >> >> I used this branch for some quick testing. Since I last looked at the >> rseq performance overhead, glibc must have improved a few things. FWIW, >> box is running libc 2.41 at the moment. Test box is on debian unstable, >> so gets frequent updates. In any case, for one of my usual kernel >> overhead runs of checking running a basic IOPS based test, I see the >> following on the stock (-rc5 + 6.18 targeted changes) kernel running >> that test: >> >> + 0.89% io_uring [kernel.kallsyms] [k] __get_user_8 >> + 0.58% io_uring [kernel.kallsyms] [k] __put_user_8 >> + 1.13% io_uring [kernel.kallsyms] [k] __rseq_handle_notify_resume >> >> which is about 2.6% of purely rseq related overhead. Pulling in the >> above branch and running the exact same test, all of the above are gone >> and perusing the profile has nothing jump out at me in terms of shifting >> those cycles to other bookkeeping. >> >> So yes, this work does make a very noticeable difference! > > If you have time, could you also run the same test on this branch with glibc rseq registration disabled, just to see of there is some noticable differences. > > To disable glibc automatic registration, just export this variable: > > GLIBC_TUNABLES="glibc.pthread.rseq=0" > Thanks! Running that same branch (-rc5 + pending stuff + tglx rseq changes) doesn't show any noticeable differences in perf diff between having rseq enabled or not via the above tunable. All I see are really minor fluctuations between unrelated functions, as the run-to-run consistency isn't entirely 0. But the trace doesn't have any signs of any rseq/exit_to_usermode activity even with rseq enabled at this point. -- Jens Axboe
From: Thomas Gleixner <tglx@linutronix.de>
The RSEQ critical section mechanism only clears the event mask when a
critical section is registered, otherwise it is stale and collects
bits.
That means once a critical section is installed the first invocation of
that code when TIF_NOTIFY_RESUME is set will abort the critical section,
even when the TIF bit was not raised by the rseq preempt/migrate/signal
helpers.
This also has a performance implication because TIF_NOTIFY_RESUME is a
multiplexing TIF bit, which is utilized by quite some infrastructure. That
means every invocation of __rseq_notify_resume() goes unconditionally
through the heavy lifting of user space access and consistency checks even
if there is no reason to do so.
Keeping the stale event mask around when exiting to user space also
prevents it from being utilized by the upcoming time slice extension
mechanism.
Avoid this by reading and clearing the event mask before doing the user
space critical section access with interrupts or preemption disabled, which
ensures that the read and clear operation is CPU local atomic versus
scheduling and the membarrier IPI.
This is correct as after re-enabling interrupts/preemption any relevant
event will set the bit again and raise TIF_NOTIFY_RESUME, which makes the
user space exit code take another round of TIF bit clearing.
If the event mask was non-zero, invoke the slow path. On debug kernels the
slow path is invoked unconditionally and the result of the event mask
evaluation is handed in.
Add a exit path check after the TIF bit loop, which validates on debug
kernels that the event mask is zero before exiting to user space.
While at it reword the convoluted comment why the pt_regs pointer can be
NULL under certain circumstances.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
---
include/linux/irq-entry-common.h | 7 ++--
include/linux/rseq.h | 10 +++++
kernel/rseq.c | 66 ++++++++++++++++++++++++++-------------
3 files changed, 58 insertions(+), 25 deletions(-)
---
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -2,11 +2,12 @@
#ifndef __LINUX_IRQENTRYCOMMON_H
#define __LINUX_IRQENTRYCOMMON_H
+#include <linux/context_tracking.h>
+#include <linux/kmsan.h>
+#include <linux/rseq.h>
#include <linux/static_call_types.h>
#include <linux/syscalls.h>
-#include <linux/context_tracking.h>
#include <linux/tick.h>
-#include <linux/kmsan.h>
#include <linux/unwind_deferred.h>
#include <asm/entry-common.h>
@@ -226,6 +227,8 @@ static __always_inline void exit_to_user
arch_exit_to_user_mode_prepare(regs, ti_work);
+ rseq_exit_to_user_mode();
+
/* Ensure that kernel state is sane for a return to userspace */
kmap_assert_nomap();
lockdep_assert_irqs_disabled();
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -66,6 +66,14 @@ static inline void rseq_migrate(struct t
rseq_set_notify_resume(t);
}
+static __always_inline void rseq_exit_to_user_mode(void)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
+ if (WARN_ON_ONCE(current->rseq && current->rseq_event_mask))
+ current->rseq_event_mask = 0;
+ }
+}
+
/*
* If parent process has a registered restartable sequences area, the
* child inherits. Unregister rseq for a clone with CLONE_VM set.
@@ -118,7 +126,7 @@ static inline void rseq_fork(struct task
static inline void rseq_execve(struct task_struct *t)
{
}
-
+static inline void rseq_exit_to_user_mode(void) { }
#endif
#ifdef CONFIG_DEBUG_RSEQ
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -324,9 +324,9 @@ static bool rseq_warn_flags(const char *
return true;
}
-static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
+static int rseq_check_flags(struct task_struct *t, u32 cs_flags)
{
- u32 flags, event_mask;
+ u32 flags;
int ret;
if (rseq_warn_flags("rseq_cs", cs_flags))
@@ -339,17 +339,7 @@ static int rseq_need_restart(struct task
if (rseq_warn_flags("rseq", flags))
return -EINVAL;
-
- /*
- * Load and clear event mask atomically with respect to
- * scheduler preemption and membarrier IPIs.
- */
- scoped_guard(RSEQ_EVENT_GUARD) {
- event_mask = t->rseq_event_mask;
- t->rseq_event_mask = 0;
- }
-
- return !!event_mask;
+ return 0;
}
static int clear_rseq_cs(struct rseq __user *rseq)
@@ -380,7 +370,7 @@ static bool in_rseq_cs(unsigned long ip,
return ip - rseq_cs->start_ip < rseq_cs->post_commit_offset;
}
-static int rseq_ip_fixup(struct pt_regs *regs)
+static int rseq_ip_fixup(struct pt_regs *regs, bool abort)
{
unsigned long ip = instruction_pointer(regs);
struct task_struct *t = current;
@@ -398,9 +388,11 @@ static int rseq_ip_fixup(struct pt_regs
*/
if (!in_rseq_cs(ip, &rseq_cs))
return clear_rseq_cs(t->rseq);
- ret = rseq_need_restart(t, rseq_cs.flags);
- if (ret <= 0)
+ ret = rseq_check_flags(t, rseq_cs.flags);
+ if (ret < 0)
return ret;
+ if (!abort)
+ return 0;
ret = clear_rseq_cs(t->rseq);
if (ret)
return ret;
@@ -430,14 +422,44 @@ void __rseq_handle_notify_resume(struct
return;
/*
- * regs is NULL if and only if the caller is in a syscall path. Skip
- * fixup and leave rseq_cs as is so that rseq_sycall() will detect and
- * kill a misbehaving userspace on debug kernels.
+ * If invoked from hypervisors or IO-URING, then @regs is a NULL
+ * pointer, so fixup cannot be done. If the syscall which led to
+ * this invocation was invoked inside a critical section, then it
+ * will either end up in this code again or a possible violation of
+ * a syscall inside a critical region can only be detected by the
+ * debug code in rseq_syscall() in a debug enabled kernel.
*/
if (regs) {
- ret = rseq_ip_fixup(regs);
- if (unlikely(ret < 0))
- goto error;
+ /*
+ * Read and clear the event mask first. If the task was not
+ * preempted or migrated or a signal is on the way, there
+ * is no point in doing any of the heavy lifting here on
+ * production kernels. In that case TIF_NOTIFY_RESUME was
+ * raised by some other functionality.
+ *
+ * This is correct because the read/clear operation is
+ * guarded against scheduler preemption, which makes it CPU
+ * local atomic. If the task is preempted right after
+ * re-enabling preemption then TIF_NOTIFY_RESUME is set
+ * again and this function is invoked another time _before_
+ * the task is able to return to user mode.
+ *
+ * On a debug kernel, invoke the fixup code unconditionally
+ * with the result handed in to allow the detection of
+ * inconsistencies.
+ */
+ u32 event_mask;
+
+ scoped_guard(RSEQ_EVENT_GUARD) {
+ event_mask = t->rseq_event_mask;
+ t->rseq_event_mask = 0;
+ }
+
+ if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event_mask) {
+ ret = rseq_ip_fixup(regs, !!event_mask);
+ if (unlikely(ret < 0))
+ goto error;
+ }
}
if (unlikely(rseq_update_cpu_node_id(t)))
goto error;
From: Thomas Gleixner <tglx@linutronix.de>
Scrolling over tons of pointless
{
}
lines to find the actual code is annoying at best.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
---
include/linux/rseq.h | 47 ++++++++++++-----------------------------------
1 file changed, 12 insertions(+), 35 deletions(-)
---
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -101,44 +101,21 @@ static inline void rseq_execve(struct ta
t->rseq_event_mask = 0;
}
-#else
-
-static inline void rseq_set_notify_resume(struct task_struct *t)
-{
-}
-static inline void rseq_handle_notify_resume(struct ksignal *ksig,
- struct pt_regs *regs)
-{
-}
-static inline void rseq_signal_deliver(struct ksignal *ksig,
- struct pt_regs *regs)
-{
-}
-static inline void rseq_preempt(struct task_struct *t)
-{
-}
-static inline void rseq_migrate(struct task_struct *t)
-{
-}
-static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
-{
-}
-static inline void rseq_execve(struct task_struct *t)
-{
-}
+#else /* CONFIG_RSEQ */
+static inline void rseq_set_notify_resume(struct task_struct *t) { }
+static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { }
+static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
+static inline void rseq_preempt(struct task_struct *t) { }
+static inline void rseq_migrate(struct task_struct *t) { }
+static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { }
+static inline void rseq_execve(struct task_struct *t) { }
static inline void rseq_exit_to_user_mode(void) { }
-#endif
+#endif /* !CONFIG_RSEQ */
#ifdef CONFIG_DEBUG_RSEQ
-
void rseq_syscall(struct pt_regs *regs);
-
-#else
-
-static inline void rseq_syscall(struct pt_regs *regs)
-{
-}
-
-#endif
+#else /* CONFIG_DEBUG_RSEQ */
+static inline void rseq_syscall(struct pt_regs *regs) { }
+#endif /* !CONFIG_DEBUG_RSEQ */
#endif /* _LINUX_RSEQ_H */
Move the comment which documents the RSEQ algorithm to the top of the file,
so it does not create horrible diffs later when the actual implementation
is fed into the mincer.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
kernel/rseq.c | 119 ++++++++++++++++++++++++++++------------------------------
1 file changed, 59 insertions(+), 60 deletions(-)
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -8,6 +8,65 @@
* Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*/
+/*
+ * Restartable sequences are a lightweight interface that allows
+ * user-level code to be executed atomically relative to scheduler
+ * preemption and signal delivery. Typically used for implementing
+ * per-cpu operations.
+ *
+ * It allows user-space to perform update operations on per-cpu data
+ * without requiring heavy-weight atomic operations.
+ *
+ * Detailed algorithm of rseq user-space assembly sequences:
+ *
+ * init(rseq_cs)
+ * cpu = TLS->rseq::cpu_id_start
+ * [1] TLS->rseq::rseq_cs = rseq_cs
+ * [start_ip] ----------------------------
+ * [2] if (cpu != TLS->rseq::cpu_id)
+ * goto abort_ip;
+ * [3] <last_instruction_in_cs>
+ * [post_commit_ip] ----------------------------
+ *
+ * The address of jump target abort_ip must be outside the critical
+ * region, i.e.:
+ *
+ * [abort_ip] < [start_ip] || [abort_ip] >= [post_commit_ip]
+ *
+ * Steps [2]-[3] (inclusive) need to be a sequence of instructions in
+ * userspace that can handle being interrupted between any of those
+ * instructions, and then resumed to the abort_ip.
+ *
+ * 1. Userspace stores the address of the struct rseq_cs assembly
+ * block descriptor into the rseq_cs field of the registered
+ * struct rseq TLS area. This update is performed through a single
+ * store within the inline assembly instruction sequence.
+ * [start_ip]
+ *
+ * 2. Userspace tests to check whether the current cpu_id field match
+ * the cpu number loaded before start_ip, branching to abort_ip
+ * in case of a mismatch.
+ *
+ * If the sequence is preempted or interrupted by a signal
+ * at or after start_ip and before post_commit_ip, then the kernel
+ * clears TLS->__rseq_abi::rseq_cs, and sets the user-space return
+ * ip to abort_ip before returning to user-space, so the preempted
+ * execution resumes at abort_ip.
+ *
+ * 3. Userspace critical section final instruction before
+ * post_commit_ip is the commit. The critical section is
+ * self-terminating.
+ * [post_commit_ip]
+ *
+ * 4. <success>
+ *
+ * On failure at [2], or if interrupted by preempt or signal delivery
+ * between [1] and [3]:
+ *
+ * [abort_ip]
+ * F1. <failure>
+ */
+
#include <linux/sched.h>
#include <linux/uaccess.h>
#include <linux/syscalls.h>
@@ -98,66 +157,6 @@ static int rseq_validate_ro_fields(struc
unsafe_put_user(value, &t->rseq->field, error_label)
#endif
-/*
- *
- * Restartable sequences are a lightweight interface that allows
- * user-level code to be executed atomically relative to scheduler
- * preemption and signal delivery. Typically used for implementing
- * per-cpu operations.
- *
- * It allows user-space to perform update operations on per-cpu data
- * without requiring heavy-weight atomic operations.
- *
- * Detailed algorithm of rseq user-space assembly sequences:
- *
- * init(rseq_cs)
- * cpu = TLS->rseq::cpu_id_start
- * [1] TLS->rseq::rseq_cs = rseq_cs
- * [start_ip] ----------------------------
- * [2] if (cpu != TLS->rseq::cpu_id)
- * goto abort_ip;
- * [3] <last_instruction_in_cs>
- * [post_commit_ip] ----------------------------
- *
- * The address of jump target abort_ip must be outside the critical
- * region, i.e.:
- *
- * [abort_ip] < [start_ip] || [abort_ip] >= [post_commit_ip]
- *
- * Steps [2]-[3] (inclusive) need to be a sequence of instructions in
- * userspace that can handle being interrupted between any of those
- * instructions, and then resumed to the abort_ip.
- *
- * 1. Userspace stores the address of the struct rseq_cs assembly
- * block descriptor into the rseq_cs field of the registered
- * struct rseq TLS area. This update is performed through a single
- * store within the inline assembly instruction sequence.
- * [start_ip]
- *
- * 2. Userspace tests to check whether the current cpu_id field match
- * the cpu number loaded before start_ip, branching to abort_ip
- * in case of a mismatch.
- *
- * If the sequence is preempted or interrupted by a signal
- * at or after start_ip and before post_commit_ip, then the kernel
- * clears TLS->__rseq_abi::rseq_cs, and sets the user-space return
- * ip to abort_ip before returning to user-space, so the preempted
- * execution resumes at abort_ip.
- *
- * 3. Userspace critical section final instruction before
- * post_commit_ip is the commit. The critical section is
- * self-terminating.
- * [post_commit_ip]
- *
- * 4. <success>
- *
- * On failure at [2], or if interrupted by preempt or signal delivery
- * between [1] and [3]:
- *
- * [abort_ip]
- * F1. <failure>
- */
-
static int rseq_update_cpu_node_id(struct task_struct *t)
{
struct rseq __user *rseq = t->rseq;
There is no point for this being visible in the resume_to_user_mode()
handling.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
include/linux/resume_user_mode.h | 2 +-
include/linux/rseq.h | 13 +++++++------
2 files changed, 8 insertions(+), 7 deletions(-)
--- a/include/linux/resume_user_mode.h
+++ b/include/linux/resume_user_mode.h
@@ -59,7 +59,7 @@ static inline void resume_user_mode_work
mem_cgroup_handle_over_high(GFP_KERNEL);
blkcg_maybe_throttle_current();
- rseq_handle_notify_resume(NULL, regs);
+ rseq_handle_notify_resume(regs);
}
#endif /* LINUX_RESUME_USER_MODE_H */
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -37,19 +37,20 @@ static inline void rseq_set_notify_resum
void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);
-static inline void rseq_handle_notify_resume(struct ksignal *ksig,
- struct pt_regs *regs)
+static inline void rseq_handle_notify_resume(struct pt_regs *regs)
{
if (current->rseq)
- __rseq_handle_notify_resume(ksig, regs);
+ __rseq_handle_notify_resume(NULL, regs);
}
static inline void rseq_signal_deliver(struct ksignal *ksig,
struct pt_regs *regs)
{
- scoped_guard(RSEQ_EVENT_GUARD)
- __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
- rseq_handle_notify_resume(ksig, regs);
+ if (current->rseq) {
+ scoped_guard(RSEQ_EVENT_GUARD)
+ __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
+ __rseq_handle_notify_resume(ksig, regs);
+ }
}
/* rseq_preempt() requires preemption to be disabled. */
There is no point to read the critical section element in the newly
registered user space RSEQ struct first in order to clear it.
Just clear it and be done with it.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
kernel/rseq.c | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -492,11 +492,9 @@ void rseq_syscall(struct pt_regs *regs)
/*
* sys_rseq - setup restartable sequences for caller thread.
*/
-SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
- int, flags, u32, sig)
+SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig)
{
int ret;
- u64 rseq_cs;
if (flags & RSEQ_FLAG_UNREGISTER) {
if (flags & ~RSEQ_FLAG_UNREGISTER)
@@ -557,11 +555,9 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
* avoid a potential segfault on return to user-space. The proper thing
* to do would have been to fail the registration but this would break
* older libcs that reuse the rseq area for new threads without
- * clearing the fields.
+ * clearing the fields. Don't bother reading it, just reset it.
*/
- if (rseq_get_rseq_cs_ptr_val(rseq, &rseq_cs))
- return -EFAULT;
- if (rseq_cs && clear_rseq_cs(rseq))
+ if (put_user_masked_u64(0UL, &rseq->rseq_cs))
return -EFAULT;
#ifdef CONFIG_DEBUG_RSEQ
Since commit 0190e4198e47 ("rseq: Deprecate RSEQ_CS_FLAG_NO_RESTART_ON_*
flags") the bits in task::rseq_event_mask are meaningless and just extra
work in terms of setting them individually.
Aside of that the only relevant point where an event has to be raised is
context switch. Neither the CPU nor MM CID can change without going through
a context switch.
Collapse them all into a single boolean which simplifies the code a lot and
remove the pointless invocations which have been sprinkled all over the
place for no value.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
---
V2: Reduce it to the sched switch event.
---
fs/exec.c | 2 -
include/linux/rseq.h | 66 +++++++++-------------------------------------
include/linux/sched.h | 10 +++---
include/uapi/linux/rseq.h | 21 ++++----------
kernel/rseq.c | 28 +++++++++++--------
kernel/sched/core.c | 5 ---
kernel/sched/membarrier.c | 8 ++---
7 files changed, 48 insertions(+), 92 deletions(-)
---
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1775,7 +1775,7 @@ static int bprm_execve(struct linux_binp
force_fatal_sig(SIGSEGV);
sched_mm_cid_after_execve(current);
- rseq_set_notify_resume(current);
+ rseq_sched_switch_event(current);
current->in_execve = 0;
return retval;
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -3,38 +3,8 @@
#define _LINUX_RSEQ_H
#ifdef CONFIG_RSEQ
-
-#include <linux/preempt.h>
#include <linux/sched.h>
-#ifdef CONFIG_MEMBARRIER
-# define RSEQ_EVENT_GUARD irq
-#else
-# define RSEQ_EVENT_GUARD preempt
-#endif
-
-/*
- * Map the event mask on the user-space ABI enum rseq_cs_flags
- * for direct mask checks.
- */
-enum rseq_event_mask_bits {
- RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
- RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
- RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
-};
-
-enum rseq_event_mask {
- RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT),
- RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT),
- RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT),
-};
-
-static inline void rseq_set_notify_resume(struct task_struct *t)
-{
- if (t->rseq)
- set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
-}
-
void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);
static inline void rseq_handle_notify_resume(struct pt_regs *regs)
@@ -43,35 +13,27 @@ static inline void rseq_handle_notify_re
__rseq_handle_notify_resume(NULL, regs);
}
-static inline void rseq_signal_deliver(struct ksignal *ksig,
- struct pt_regs *regs)
+static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
{
if (current->rseq) {
- scoped_guard(RSEQ_EVENT_GUARD)
- __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
+ current->rseq_event_pending = true;
__rseq_handle_notify_resume(ksig, regs);
}
}
-/* rseq_preempt() requires preemption to be disabled. */
-static inline void rseq_preempt(struct task_struct *t)
+static inline void rseq_sched_switch_event(struct task_struct *t)
{
- __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
- rseq_set_notify_resume(t);
-}
-
-/* rseq_migrate() requires preemption to be disabled. */
-static inline void rseq_migrate(struct task_struct *t)
-{
- __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
- rseq_set_notify_resume(t);
+ if (t->rseq) {
+ t->rseq_event_pending = true;
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+ }
}
static __always_inline void rseq_exit_to_user_mode(void)
{
if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
- if (WARN_ON_ONCE(current->rseq && current->rseq_event_mask))
- current->rseq_event_mask = 0;
+ if (WARN_ON_ONCE(current->rseq && current->rseq_event_pending))
+ current->rseq_event_pending = false;
}
}
@@ -85,12 +47,12 @@ static inline void rseq_fork(struct task
t->rseq = NULL;
t->rseq_len = 0;
t->rseq_sig = 0;
- t->rseq_event_mask = 0;
+ t->rseq_event_pending = false;
} else {
t->rseq = current->rseq;
t->rseq_len = current->rseq_len;
t->rseq_sig = current->rseq_sig;
- t->rseq_event_mask = current->rseq_event_mask;
+ t->rseq_event_pending = current->rseq_event_pending;
}
}
@@ -99,15 +61,13 @@ static inline void rseq_execve(struct ta
t->rseq = NULL;
t->rseq_len = 0;
t->rseq_sig = 0;
- t->rseq_event_mask = 0;
+ t->rseq_event_pending = false;
}
#else /* CONFIG_RSEQ */
-static inline void rseq_set_notify_resume(struct task_struct *t) { }
static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
-static inline void rseq_preempt(struct task_struct *t) { }
-static inline void rseq_migrate(struct task_struct *t) { }
+static inline void rseq_sched_switch_event(struct task_struct *t) { }
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { }
static inline void rseq_execve(struct task_struct *t) { }
static inline void rseq_exit_to_user_mode(void) { }
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1401,14 +1401,14 @@ struct task_struct {
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_RSEQ
- struct rseq __user *rseq;
- u32 rseq_len;
- u32 rseq_sig;
+ struct rseq __user *rseq;
+ u32 rseq_len;
+ u32 rseq_sig;
/*
- * RmW on rseq_event_mask must be performed atomically
+ * RmW on rseq_event_pending must be performed atomically
* with respect to preemption.
*/
- unsigned long rseq_event_mask;
+ bool rseq_event_pending;
# ifdef CONFIG_DEBUG_RSEQ
/*
* This is a place holder to save a copy of the rseq fields for
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -114,20 +114,13 @@ struct rseq {
/*
* Restartable sequences flags field.
*
- * This field should only be updated by the thread which
- * registered this data structure. Read by the kernel.
- * Mainly used for single-stepping through rseq critical sections
- * with debuggers.
- *
- * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
- * Inhibit instruction sequence block restart on preemption
- * for this thread.
- * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
- * Inhibit instruction sequence block restart on signal
- * delivery for this thread.
- * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
- * Inhibit instruction sequence block restart on migration for
- * this thread.
+ * This field was initialy intended to allow event masking for for
+ * single-stepping through rseq critical sections with debuggers.
+ * The kernel does not support this anymore and the relevant bits
+ * are checked for being always false:
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
*/
__u32 flags;
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -78,6 +78,12 @@
#define CREATE_TRACE_POINTS
#include <trace/events/rseq.h>
+#ifdef CONFIG_MEMBARRIER
+# define RSEQ_EVENT_GUARD irq
+#else
+# define RSEQ_EVENT_GUARD preempt
+#endif
+
/* The original rseq structure size (including padding) is 32 bytes. */
#define ORIG_RSEQ_SIZE 32
@@ -430,11 +436,11 @@ void __rseq_handle_notify_resume(struct
*/
if (regs) {
/*
- * Read and clear the event mask first. If the task was not
- * preempted or migrated or a signal is on the way, there
- * is no point in doing any of the heavy lifting here on
- * production kernels. In that case TIF_NOTIFY_RESUME was
- * raised by some other functionality.
+ * Read and clear the event pending bit first. If the task
+ * was not preempted or migrated or a signal is on the way,
+ * there is no point in doing any of the heavy lifting here
+ * on production kernels. In that case TIF_NOTIFY_RESUME
+ * was raised by some other functionality.
*
* This is correct because the read/clear operation is
* guarded against scheduler preemption, which makes it CPU
@@ -447,15 +453,15 @@ void __rseq_handle_notify_resume(struct
* with the result handed in to allow the detection of
* inconsistencies.
*/
- u32 event_mask;
+ bool event;
scoped_guard(RSEQ_EVENT_GUARD) {
- event_mask = t->rseq_event_mask;
- t->rseq_event_mask = 0;
+ event = t->rseq_event_pending;
+ t->rseq_event_pending = false;
}
- if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event_mask) {
- ret = rseq_ip_fixup(regs, !!event_mask);
+ if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) {
+ ret = rseq_ip_fixup(regs, event);
if (unlikely(ret < 0))
goto error;
}
@@ -584,7 +590,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
* registered, ensure the cpu_id_start and cpu_id fields
* are updated before returning to user-space.
*/
- rseq_set_notify_resume(current);
+ rseq_sched_switch_event(current);
return 0;
}
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3364,7 +3364,6 @@ void set_task_cpu(struct task_struct *p,
if (p->sched_class->migrate_task_rq)
p->sched_class->migrate_task_rq(p, new_cpu);
p->se.nr_migrations++;
- rseq_migrate(p);
sched_mm_cid_migrate_from(p);
perf_event_task_migrate(p);
}
@@ -4795,7 +4794,6 @@ int sched_cgroup_fork(struct task_struct
p->sched_task_group = tg;
}
#endif
- rseq_migrate(p);
/*
* We're setting the CPU for the first time, we don't migrate,
* so use __set_task_cpu().
@@ -4859,7 +4857,6 @@ void wake_up_new_task(struct task_struct
* as we're not fully set-up yet.
*/
p->recent_used_cpu = task_cpu(p);
- rseq_migrate(p);
__set_task_cpu(p, select_task_rq(p, task_cpu(p), &wake_flags));
rq = __task_rq_lock(p, &rf);
update_rq_clock(rq);
@@ -5153,7 +5150,7 @@ prepare_task_switch(struct rq *rq, struc
kcov_prepare_switch(prev);
sched_info_switch(rq, prev, next);
perf_event_task_sched_out(prev, next);
- rseq_preempt(prev);
+ rseq_sched_switch_event(prev);
fire_sched_out_preempt_notifiers(prev, next);
kmap_local_sched_out();
prepare_task(next);
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -199,7 +199,7 @@ static void ipi_rseq(void *info)
* is negligible.
*/
smp_mb();
- rseq_preempt(current);
+ rseq_sched_switch_event(current);
}
static void ipi_sync_rq_state(void *info)
@@ -407,9 +407,9 @@ static int membarrier_private_expedited(
* membarrier, we will end up with some thread in the mm
* running without a core sync.
*
- * For RSEQ, don't rseq_preempt() the caller. User code
- * is not supposed to issue syscalls at all from inside an
- * rseq critical section.
+ * For RSEQ, don't invoke rseq_sched_switch_event() on the
+ * caller. User code is not supposed to issue syscalls at
+ * all from inside an rseq critical section.
*/
if (flags != MEMBARRIER_FLAG_SYNC_CORE) {
preempt_disable();
On 2025-09-08 17:31, Thomas Gleixner wrote:
> Since commit 0190e4198e47 ("rseq: Deprecate RSEQ_CS_FLAG_NO_RESTART_ON_*
> flags") the bits in task::rseq_event_mask are meaningless and just extra
> work in terms of setting them individually.
>
> Aside of that the only relevant point where an event has to be raised is
> context switch. Neither the CPU nor MM CID can change without going through
> a context switch.
>
> Collapse them all into a single boolean which simplifies the code a lot and
> remove the pointless invocations which have been sprinkled all over the
> place for no value.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Cc: "Paul E. McKenney" <paulmck@kernel.org>
> Cc: Boqun Feng <boqun.feng@gmail.com>
>
> ---
> V2: Reduce it to the sched switch event.
> ---
> fs/exec.c | 2 -
> include/linux/rseq.h | 66 +++++++++-------------------------------------
> include/linux/sched.h | 10 +++---
> include/uapi/linux/rseq.h | 21 ++++----------
> kernel/rseq.c | 28 +++++++++++--------
> kernel/sched/core.c | 5 ---
> kernel/sched/membarrier.c | 8 ++---
> 7 files changed, 48 insertions(+), 92 deletions(-)
> ---
>
[...]
> --- a/include/uapi/linux/rseq.h
> +++ b/include/uapi/linux/rseq.h
> @@ -114,20 +114,13 @@ struct rseq {
> /*
> * Restartable sequences flags field.
> *
> - * This field should only be updated by the thread which
> - * registered this data structure. Read by the kernel.
> - * Mainly used for single-stepping through rseq critical sections
> - * with debuggers.
> - *
> - * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
> - * Inhibit instruction sequence block restart on preemption
> - * for this thread.
> - * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
> - * Inhibit instruction sequence block restart on signal
> - * delivery for this thread.
> - * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
> - * Inhibit instruction sequence block restart on migration for
> - * this thread.
> + * This field was initialy intended to allow event masking for for
initialy -> initially
for for -> for
Other than those nits:
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
Hypervisors invoke resume_user_mode_work() before entering the guest, which
clears TIF_NOTIFY_RESUME. The @regs argument is NULL as there is no user
space context available to them, so the rseq notify handler skips
inspecting the critical section, but updates the CPU/MM CID values
unconditionally so that the eventual pending rseq event is not lost on the
way to user space.
This is a pointless exercise as the task might be rescheduled before
actually returning to user space and it creates unnecessary work in the
vcpu_run() loops.
It's way more efficient to ignore that invocation based on @regs == NULL
and let the hypervisors re-raise TIF_NOTIFY_RESUME after returning from the
vcpu_run() loop before returning from the ioctl().
This ensures that a pending RSEQ update is not lost and the IDs are updated
before returning to user space.
Once the RSEQ handling is decoupled from TIF_NOTIFY_RESUME, this turns into
a NOOP.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Dexuan Cui <decui@microsoft.com>
---
V3: Add the missing rseq.h include for HV - 0-day
---
drivers/hv/mshv_root_main.c | 3 +
include/linux/rseq.h | 17 +++++++++
kernel/rseq.c | 76 +++++++++++++++++++++++---------------------
virt/kvm/kvm_main.c | 3 +
4 files changed, 63 insertions(+), 36 deletions(-)
--- a/drivers/hv/mshv_root_main.c
+++ b/drivers/hv/mshv_root_main.c
@@ -28,6 +28,7 @@
#include <linux/crash_dump.h>
#include <linux/panic_notifier.h>
#include <linux/vmalloc.h>
+#include <linux/rseq.h>
#include "mshv_eventfd.h"
#include "mshv.h"
@@ -585,6 +586,8 @@ static long mshv_run_vp_with_root_schedu
}
} while (!vp->run.flags.intercept_suspend);
+ rseq_virt_userspace_exit();
+
return ret;
}
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -38,6 +38,22 @@ static __always_inline void rseq_exit_to
}
/*
+ * KVM/HYPERV invoke resume_user_mode_work() before entering guest mode,
+ * which clears TIF_NOTIFY_RESUME. To avoid updating user space RSEQ in
+ * that case just to do it eventually again before returning to user space,
+ * the entry resume_user_mode_work() invocation is ignored as the register
+ * argument is NULL.
+ *
+ * After returning from guest mode, they have to invoke this function to
+ * re-raise TIF_NOTIFY_RESUME if necessary.
+ */
+static inline void rseq_virt_userspace_exit(void)
+{
+ if (current->rseq_event_pending)
+ set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
+}
+
+/*
* If parent process has a registered restartable sequences area, the
* child inherits. Unregister rseq for a clone with CLONE_VM set.
*/
@@ -68,6 +84,7 @@ static inline void rseq_execve(struct ta
static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_sched_switch_event(struct task_struct *t) { }
+static inline void rseq_virt_userspace_exit(void) { }
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { }
static inline void rseq_execve(struct task_struct *t) { }
static inline void rseq_exit_to_user_mode(void) { }
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -422,50 +422,54 @@ void __rseq_handle_notify_resume(struct
{
struct task_struct *t = current;
int ret, sig;
+ bool event;
+
+ /*
+ * If invoked from hypervisors before entering the guest via
+ * resume_user_mode_work(), then @regs is a NULL pointer.
+ *
+ * resume_user_mode_work() clears TIF_NOTIFY_RESUME and re-raises
+ * it before returning from the ioctl() to user space when
+ * rseq_event.sched_switch is set.
+ *
+ * So it's safe to ignore here instead of pointlessly updating it
+ * in the vcpu_run() loop.
+ */
+ if (!regs)
+ return;
if (unlikely(t->flags & PF_EXITING))
return;
/*
- * If invoked from hypervisors or IO-URING, then @regs is a NULL
- * pointer, so fixup cannot be done. If the syscall which led to
- * this invocation was invoked inside a critical section, then it
- * will either end up in this code again or a possible violation of
- * a syscall inside a critical region can only be detected by the
- * debug code in rseq_syscall() in a debug enabled kernel.
+ * Read and clear the event pending bit first. If the task
+ * was not preempted or migrated or a signal is on the way,
+ * there is no point in doing any of the heavy lifting here
+ * on production kernels. In that case TIF_NOTIFY_RESUME
+ * was raised by some other functionality.
+ *
+ * This is correct because the read/clear operation is
+ * guarded against scheduler preemption, which makes it CPU
+ * local atomic. If the task is preempted right after
+ * re-enabling preemption then TIF_NOTIFY_RESUME is set
+ * again and this function is invoked another time _before_
+ * the task is able to return to user mode.
+ *
+ * On a debug kernel, invoke the fixup code unconditionally
+ * with the result handed in to allow the detection of
+ * inconsistencies.
*/
- if (regs) {
- /*
- * Read and clear the event pending bit first. If the task
- * was not preempted or migrated or a signal is on the way,
- * there is no point in doing any of the heavy lifting here
- * on production kernels. In that case TIF_NOTIFY_RESUME
- * was raised by some other functionality.
- *
- * This is correct because the read/clear operation is
- * guarded against scheduler preemption, which makes it CPU
- * local atomic. If the task is preempted right after
- * re-enabling preemption then TIF_NOTIFY_RESUME is set
- * again and this function is invoked another time _before_
- * the task is able to return to user mode.
- *
- * On a debug kernel, invoke the fixup code unconditionally
- * with the result handed in to allow the detection of
- * inconsistencies.
- */
- bool event;
-
- scoped_guard(RSEQ_EVENT_GUARD) {
- event = t->rseq_event_pending;
- t->rseq_event_pending = false;
- }
+ scoped_guard(RSEQ_EVENT_GUARD) {
+ event = t->rseq_event_pending;
+ t->rseq_event_pending = false;
+ }
- if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) {
- ret = rseq_ip_fixup(regs, event);
- if (unlikely(ret < 0))
- goto error;
- }
+ if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) {
+ ret = rseq_ip_fixup(regs, event);
+ if (unlikely(ret < 0))
+ goto error;
}
+
if (unlikely(rseq_update_cpu_node_id(t)))
goto error;
return;
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -49,6 +49,7 @@
#include <linux/lockdep.h>
#include <linux/kthread.h>
#include <linux/suspend.h>
+#include <linux/rseq.h>
#include <asm/processor.h>
#include <asm/ioctl.h>
@@ -4466,6 +4467,8 @@ static long kvm_vcpu_ioctl(struct file *
r = kvm_arch_vcpu_ioctl_run(vcpu);
vcpu->wants_to_run = false;
+ rseq_virt_userspace_exit();
+
trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
break;
}
On 2025-09-08 17:31, Thomas Gleixner wrote:
> Hypervisors invoke resume_user_mode_work() before entering the guest, which
> clears TIF_NOTIFY_RESUME. The @regs argument is NULL as there is no user
> space context available to them, so the rseq notify handler skips
> inspecting the critical section, but updates the CPU/MM CID values
> unconditionally so that the eventual pending rseq event is not lost on the
> way to user space.
>
> This is a pointless exercise as the task might be rescheduled before
> actually returning to user space and it creates unnecessary work in the
> vcpu_run() loops.
>
> It's way more efficient to ignore that invocation based on @regs == NULL
> and let the hypervisors re-raise TIF_NOTIFY_RESUME after returning from the
> vcpu_run() loop before returning from the ioctl().
>
> This ensures that a pending RSEQ update is not lost and the IDs are updated
> before returning to user space.
>
> Once the RSEQ handling is decoupled from TIF_NOTIFY_RESUME, this turns into
> a NOOP.
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Sean Christopherson <seanjc@google.com>
> Cc: Wei Liu <wei.liu@kernel.org>
> Cc: Dexuan Cui <decui@microsoft.com>
> ---
> V3: Add the missing rseq.h include for HV - 0-day
> ---
> drivers/hv/mshv_root_main.c | 3 +
> include/linux/rseq.h | 17 +++++++++
> kernel/rseq.c | 76 +++++++++++++++++++++++---------------------
> virt/kvm/kvm_main.c | 3 +
> 4 files changed, 63 insertions(+), 36 deletions(-)
>
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -28,6 +28,7 @@
> #include <linux/crash_dump.h>
> #include <linux/panic_notifier.h>
> #include <linux/vmalloc.h>
> +#include <linux/rseq.h>
>
> #include "mshv_eventfd.h"
> #include "mshv.h"
> @@ -585,6 +586,8 @@ static long mshv_run_vp_with_root_schedu
> }
> } while (!vp->run.flags.intercept_suspend);
>
> + rseq_virt_userspace_exit();
> +
> return ret;
> }
>
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -38,6 +38,22 @@ static __always_inline void rseq_exit_to
> }
>
> /*
> + * KVM/HYPERV invoke resume_user_mode_work() before entering guest mode,
> + * which clears TIF_NOTIFY_RESUME. To avoid updating user space RSEQ in
> + * that case just to do it eventually again before returning to user space,
> + * the entry resume_user_mode_work() invocation is ignored as the register
> + * argument is NULL.
> + *
> + * After returning from guest mode, they have to invoke this function to
> + * re-raise TIF_NOTIFY_RESUME if necessary.
> + */
> +static inline void rseq_virt_userspace_exit(void)
> +{
> + if (current->rseq_event_pending)
> + set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
> +}
> +
> +/*
> * If parent process has a registered restartable sequences area, the
> * child inherits. Unregister rseq for a clone with CLONE_VM set.
> */
> @@ -68,6 +84,7 @@ static inline void rseq_execve(struct ta
> static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { }
> static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
> static inline void rseq_sched_switch_event(struct task_struct *t) { }
> +static inline void rseq_virt_userspace_exit(void) { }
> static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { }
> static inline void rseq_execve(struct task_struct *t) { }
> static inline void rseq_exit_to_user_mode(void) { }
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -422,50 +422,54 @@ void __rseq_handle_notify_resume(struct
> {
> struct task_struct *t = current;
> int ret, sig;
> + bool event;
> +
> + /*
> + * If invoked from hypervisors before entering the guest via
> + * resume_user_mode_work(), then @regs is a NULL pointer.
> + *
> + * resume_user_mode_work() clears TIF_NOTIFY_RESUME and re-raises
> + * it before returning from the ioctl() to user space when
> + * rseq_event.sched_switch is set.
> + *
> + * So it's safe to ignore here instead of pointlessly updating it
> + * in the vcpu_run() loop.
> + */
> + if (!regs)
> + return;
>
> if (unlikely(t->flags & PF_EXITING))
> return;
>
> /*
> - * If invoked from hypervisors or IO-URING, then @regs is a NULL
> - * pointer, so fixup cannot be done. If the syscall which led to
> - * this invocation was invoked inside a critical section, then it
> - * will either end up in this code again or a possible violation of
> - * a syscall inside a critical region can only be detected by the
> - * debug code in rseq_syscall() in a debug enabled kernel.
> + * Read and clear the event pending bit first. If the task
> + * was not preempted or migrated or a signal is on the way,
> + * there is no point in doing any of the heavy lifting here
> + * on production kernels. In that case TIF_NOTIFY_RESUME
> + * was raised by some other functionality.
> + *
> + * This is correct because the read/clear operation is
> + * guarded against scheduler preemption, which makes it CPU
> + * local atomic. If the task is preempted right after
> + * re-enabling preemption then TIF_NOTIFY_RESUME is set
> + * again and this function is invoked another time _before_
> + * the task is able to return to user mode.
> + *
> + * On a debug kernel, invoke the fixup code unconditionally
> + * with the result handed in to allow the detection of
> + * inconsistencies.
> */
> - if (regs) {
> - /*
> - * Read and clear the event pending bit first. If the task
> - * was not preempted or migrated or a signal is on the way,
> - * there is no point in doing any of the heavy lifting here
> - * on production kernels. In that case TIF_NOTIFY_RESUME
> - * was raised by some other functionality.
> - *
> - * This is correct because the read/clear operation is
> - * guarded against scheduler preemption, which makes it CPU
> - * local atomic. If the task is preempted right after
> - * re-enabling preemption then TIF_NOTIFY_RESUME is set
> - * again and this function is invoked another time _before_
> - * the task is able to return to user mode.
> - *
> - * On a debug kernel, invoke the fixup code unconditionally
> - * with the result handed in to allow the detection of
> - * inconsistencies.
> - */
> - bool event;
> -
> - scoped_guard(RSEQ_EVENT_GUARD) {
> - event = t->rseq_event_pending;
> - t->rseq_event_pending = false;
> - }
> + scoped_guard(RSEQ_EVENT_GUARD) {
> + event = t->rseq_event_pending;
> + t->rseq_event_pending = false;
> + }
>
> - if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) {
> - ret = rseq_ip_fixup(regs, event);
> - if (unlikely(ret < 0))
> - goto error;
> - }
> + if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) {
> + ret = rseq_ip_fixup(regs, event);
> + if (unlikely(ret < 0))
> + goto error;
> }
> +
> if (unlikely(rseq_update_cpu_node_id(t)))
> goto error;
> return;
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -49,6 +49,7 @@
> #include <linux/lockdep.h>
> #include <linux/kthread.h>
> #include <linux/suspend.h>
> +#include <linux/rseq.h>
>
> #include <asm/processor.h>
> #include <asm/ioctl.h>
> @@ -4466,6 +4467,8 @@ static long kvm_vcpu_ioctl(struct file *
> r = kvm_arch_vcpu_ioctl_run(vcpu);
> vcpu->wants_to_run = false;
>
> + rseq_virt_userspace_exit();
> +
> trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
> break;
> }
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Mon, Sep 08, 2025, Thomas Gleixner wrote: > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -49,6 +49,7 @@ > #include <linux/lockdep.h> > #include <linux/kthread.h> > #include <linux/suspend.h> > +#include <linux/rseq.h> > > #include <asm/processor.h> > #include <asm/ioctl.h> > @@ -4466,6 +4467,8 @@ static long kvm_vcpu_ioctl(struct file * > r = kvm_arch_vcpu_ioctl_run(vcpu); > vcpu->wants_to_run = false; > Finally had a lightbulb moment as to how to eat this hack while not stonewalling the entire series. Can you add something like: /* * FIXME: Remove this hack once all KVM architectures support * the generic TIF bits, i.e. a dedicated TIF_RSEQ. */ to discourage further abuse, and to make it clear that such ugliness isn't anyone's first choice. With that, Acked-by: Sean Christopherson <seanjc@google.com> > + rseq_virt_userspace_exit(); > + > trace_kvm_userspace_exit(vcpu->run->exit_reason, r); > break; > } >
On Mon, Sep 08 2025 at 17:00, Sean Christopherson wrote: > On Mon, Sep 08, 2025, Thomas Gleixner wrote: >> --- a/virt/kvm/kvm_main.c >> +++ b/virt/kvm/kvm_main.c >> @@ -49,6 +49,7 @@ >> #include <linux/lockdep.h> >> #include <linux/kthread.h> >> #include <linux/suspend.h> >> +#include <linux/rseq.h> >> >> #include <asm/processor.h> >> #include <asm/ioctl.h> >> @@ -4466,6 +4467,8 @@ static long kvm_vcpu_ioctl(struct file * >> r = kvm_arch_vcpu_ioctl_run(vcpu); >> vcpu->wants_to_run = false; >> > > Finally had a lightbulb moment as to how to eat this hack while not stonewalling > the entire series. Can you add something like: > > /* > * FIXME: Remove this hack once all KVM architectures support > * the generic TIF bits, i.e. a dedicated TIF_RSEQ. > */ > > to discourage further abuse, and to make it clear that such ugliness isn't anyone's > first choice. With that, Fair enough.
There is no need to update these values unconditionally if there is no
event pending.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/rseq.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -464,11 +464,12 @@ void __rseq_handle_notify_resume(struct
t->rseq_event_pending = false;
}
- if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) {
- ret = rseq_ip_fixup(regs, event);
- if (unlikely(ret < 0))
- goto error;
- }
+ if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
+ return;
+
+ ret = rseq_ip_fixup(regs, event);
+ if (unlikely(ret < 0))
+ goto error;
if (unlikely(rseq_update_cpu_node_id(t)))
goto error;
On 2025-09-08 17:31, Thomas Gleixner wrote:
> There is no need to update these values unconditionally if there is no
> event pending.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>
> ---
> kernel/rseq.c | 11 ++++++-----
> 1 file changed, 6 insertions(+), 5 deletions(-)
>
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -464,11 +464,12 @@ void __rseq_handle_notify_resume(struct
> t->rseq_event_pending = false;
> }
>
> - if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) {
> - ret = rseq_ip_fixup(regs, event);
> - if (unlikely(ret < 0))
> - goto error;
> - }
> + if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
> + return;
> +
> + ret = rseq_ip_fixup(regs, event);
> + if (unlikely(ret < 0))
> + goto error;
>
> if (unlikely(rseq_update_cpu_node_id(t)))
> goto error;
>
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
In preparation for a major rewrite of this code, provide a data structure
for rseq management.
Put all the rseq related data into it (except for the debug part), which
allows to simplify fork/execve by using memset() and memcpy() instead of
adding new fields to initialize over and over.
Create a storage struct for event management as well and put the
sched_switch event and a indicator for RSEQ on a task into it as a
start. That uses a union, which allows to mask and clear the whole lot
efficiently.
The indicators are explicitly not a bit field. Bit fields generate abysmal
code.
The boolean members are defined as u8 as that actually guarantees that it
fits. There seem to be strange architecture ABIs which need more than 8 bits
for a boolean.
The has_rseq member is redundant vs. task::rseq, but it turns out that
boolean operations and quick checks on the union generate better code than
fiddling with separate entities and data types.
This struct will be extended over time to carry more information.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
V4: Move all rseq related data into a dedicated umbrella struct
---
include/linux/rseq.h | 48 +++++++++++++++-------------------
include/linux/rseq_types.h | 51 ++++++++++++++++++++++++++++++++++++
include/linux/sched.h | 14 ++--------
kernel/ptrace.c | 6 ++--
kernel/rseq.c | 63 ++++++++++++++++++++++-----------------------
5 files changed, 110 insertions(+), 72 deletions(-)
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -9,22 +9,22 @@ void __rseq_handle_notify_resume(struct
static inline void rseq_handle_notify_resume(struct pt_regs *regs)
{
- if (current->rseq)
+ if (current->rseq.event.has_rseq)
__rseq_handle_notify_resume(NULL, regs);
}
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
{
- if (current->rseq) {
- current->rseq_event_pending = true;
+ if (current->rseq.event.has_rseq) {
+ current->rseq.event.sched_switch = true;
__rseq_handle_notify_resume(ksig, regs);
}
}
static inline void rseq_sched_switch_event(struct task_struct *t)
{
- if (t->rseq) {
- t->rseq_event_pending = true;
+ if (t->rseq.event.has_rseq) {
+ t->rseq.event.sched_switch = true;
set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
}
}
@@ -32,8 +32,9 @@ static inline void rseq_sched_switch_eve
static __always_inline void rseq_exit_to_user_mode(void)
{
if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
- if (WARN_ON_ONCE(current->rseq && current->rseq_event_pending))
- current->rseq_event_pending = false;
+ if (WARN_ON_ONCE(current->rseq.event.has_rseq &&
+ current->rseq.event.events))
+ current->rseq.event.events = 0;
}
}
@@ -49,35 +50,30 @@ static __always_inline void rseq_exit_to
*/
static inline void rseq_virt_userspace_exit(void)
{
- if (current->rseq_event_pending)
+ if (current->rseq.event.sched_switch)
set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
}
+static inline void rseq_reset(struct task_struct *t)
+{
+ memset(&t->rseq, 0, sizeof(t->rseq));
+}
+
+static inline void rseq_execve(struct task_struct *t)
+{
+ rseq_reset(t);
+}
+
/*
* If parent process has a registered restartable sequences area, the
* child inherits. Unregister rseq for a clone with CLONE_VM set.
*/
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
{
- if (clone_flags & CLONE_VM) {
- t->rseq = NULL;
- t->rseq_len = 0;
- t->rseq_sig = 0;
- t->rseq_event_pending = false;
- } else {
+ if (clone_flags & CLONE_VM)
+ rseq_reset(t);
+ else
t->rseq = current->rseq;
- t->rseq_len = current->rseq_len;
- t->rseq_sig = current->rseq_sig;
- t->rseq_event_pending = current->rseq_event_pending;
- }
-}
-
-static inline void rseq_execve(struct task_struct *t)
-{
- t->rseq = NULL;
- t->rseq_len = 0;
- t->rseq_sig = 0;
- t->rseq_event_pending = false;
}
#else /* CONFIG_RSEQ */
--- /dev/null
+++ b/include/linux/rseq_types.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RSEQ_TYPES_H
+#define _LINUX_RSEQ_TYPES_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_RSEQ
+struct rseq;
+
+/**
+ * struct rseq_event - Storage for rseq related event management
+ * @all: Compound to initialize and clear the data efficiently
+ * @events: Compound to access events with a single load/store
+ * @sched_switch: True if the task was scheduled out
+ * @has_rseq: True if the task has a rseq pointer installed
+ */
+struct rseq_event {
+ union {
+ u32 all;
+ struct {
+ union {
+ u16 events;
+ struct {
+ u8 sched_switch;
+ };
+ };
+
+ u8 has_rseq;
+ };
+ };
+};
+
+/**
+ * struct rseq_data - Storage for all rseq related data
+ * @usrptr: Pointer to the registered user space RSEQ memory
+ * @len: Length of the RSEQ region
+ * @sig: Signature of critial section abort IPs
+ * @event: Storage for event management
+ */
+struct rseq_data {
+ struct rseq __user *usrptr;
+ u32 len;
+ u32 sig;
+ struct rseq_event event;
+};
+
+#else /* CONFIG_RSEQ */
+struct rseq_data { };
+#endif /* !CONFIG_RSEQ */
+
+#endif
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -41,6 +41,7 @@
#include <linux/task_io_accounting.h>
#include <linux/posix-timers_types.h>
#include <linux/restart_block.h>
+#include <linux/rseq_types.h>
#include <uapi/linux/rseq.h>
#include <linux/seqlock_types.h>
#include <linux/kcsan.h>
@@ -1400,16 +1401,8 @@ struct task_struct {
unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */
-#ifdef CONFIG_RSEQ
- struct rseq __user *rseq;
- u32 rseq_len;
- u32 rseq_sig;
- /*
- * RmW on rseq_event_pending must be performed atomically
- * with respect to preemption.
- */
- bool rseq_event_pending;
-# ifdef CONFIG_DEBUG_RSEQ
+ struct rseq_data rseq;
+#ifdef CONFIG_DEBUG_RSEQ
/*
* This is a place holder to save a copy of the rseq fields for
* validation of read-only fields. The struct rseq has a
@@ -1417,7 +1410,6 @@ struct task_struct {
* directly. Reserve a size large enough for the known fields.
*/
char rseq_fields[sizeof(struct rseq)];
-# endif
#endif
#ifdef CONFIG_SCHED_MM_CID
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -793,9 +793,9 @@ static long ptrace_get_rseq_configuratio
unsigned long size, void __user *data)
{
struct ptrace_rseq_configuration conf = {
- .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
- .rseq_abi_size = task->rseq_len,
- .signature = task->rseq_sig,
+ .rseq_abi_pointer = (u64)(uintptr_t)task->rseq.usrptr,
+ .rseq_abi_size = task->rseq.len,
+ .signature = task->rseq.sig,
.flags = 0,
};
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -103,13 +103,13 @@ static int rseq_validate_ro_fields(struc
DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
u32 cpu_id_start, cpu_id, node_id, mm_cid;
- struct rseq __user *rseq = t->rseq;
+ struct rseq __user *rseq = t->rseq.usrptr;
/*
* Validate fields which are required to be read-only by
* user-space.
*/
- if (!user_read_access_begin(rseq, t->rseq_len))
+ if (!user_read_access_begin(rseq, t->rseq.len))
goto efault;
unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end);
unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end);
@@ -147,10 +147,10 @@ static int rseq_validate_ro_fields(struc
* Update an rseq field and its in-kernel copy in lock-step to keep a coherent
* state.
*/
-#define rseq_unsafe_put_user(t, value, field, error_label) \
- do { \
- unsafe_put_user(value, &t->rseq->field, error_label); \
- rseq_kernel_fields(t)->field = value; \
+#define rseq_unsafe_put_user(t, value, field, error_label) \
+ do { \
+ unsafe_put_user(value, &t->rseq.usrptr->field, error_label); \
+ rseq_kernel_fields(t)->field = value; \
} while (0)
#else
@@ -160,12 +160,12 @@ static int rseq_validate_ro_fields(struc
}
#define rseq_unsafe_put_user(t, value, field, error_label) \
- unsafe_put_user(value, &t->rseq->field, error_label)
+ unsafe_put_user(value, &t->rseq.usrptr->field, error_label)
#endif
static int rseq_update_cpu_node_id(struct task_struct *t)
{
- struct rseq __user *rseq = t->rseq;
+ struct rseq __user *rseq = t->rseq.usrptr;
u32 cpu_id = raw_smp_processor_id();
u32 node_id = cpu_to_node(cpu_id);
u32 mm_cid = task_mm_cid(t);
@@ -176,7 +176,7 @@ static int rseq_update_cpu_node_id(struc
if (rseq_validate_ro_fields(t))
goto efault;
WARN_ON_ONCE((int) mm_cid < 0);
- if (!user_write_access_begin(rseq, t->rseq_len))
+ if (!user_write_access_begin(rseq, t->rseq.len))
goto efault;
rseq_unsafe_put_user(t, cpu_id, cpu_id_start, efault_end);
@@ -201,7 +201,7 @@ static int rseq_update_cpu_node_id(struc
static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
{
- struct rseq __user *rseq = t->rseq;
+ struct rseq __user *rseq = t->rseq.usrptr;
u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0,
mm_cid = 0;
@@ -211,7 +211,7 @@ static int rseq_reset_rseq_cpu_node_id(s
if (rseq_validate_ro_fields(t))
goto efault;
- if (!user_write_access_begin(rseq, t->rseq_len))
+ if (!user_write_access_begin(rseq, t->rseq.len))
goto efault;
/*
@@ -272,7 +272,7 @@ static int rseq_get_rseq_cs(struct task_
u32 sig;
int ret;
- ret = rseq_get_rseq_cs_ptr_val(t->rseq, &ptr);
+ ret = rseq_get_rseq_cs_ptr_val(t->rseq.usrptr, &ptr);
if (ret)
return ret;
@@ -305,10 +305,10 @@ static int rseq_get_rseq_cs(struct task_
if (ret)
return ret;
- if (current->rseq_sig != sig) {
+ if (current->rseq.sig != sig) {
printk_ratelimited(KERN_WARNING
"Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
- sig, current->rseq_sig, current->pid, usig);
+ sig, current->rseq.sig, current->pid, usig);
return -EINVAL;
}
return 0;
@@ -338,7 +338,7 @@ static int rseq_check_flags(struct task_
return -EINVAL;
/* Get thread flags. */
- ret = get_user(flags, &t->rseq->flags);
+ ret = get_user(flags, &t->rseq.usrptr->flags);
if (ret)
return ret;
@@ -392,13 +392,13 @@ static int rseq_ip_fixup(struct pt_regs
* Clear the rseq_cs pointer and return.
*/
if (!in_rseq_cs(ip, &rseq_cs))
- return clear_rseq_cs(t->rseq);
+ return clear_rseq_cs(t->rseq.usrptr);
ret = rseq_check_flags(t, rseq_cs.flags);
if (ret < 0)
return ret;
if (!abort)
return 0;
- ret = clear_rseq_cs(t->rseq);
+ ret = clear_rseq_cs(t->rseq.usrptr);
if (ret)
return ret;
trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset,
@@ -460,8 +460,8 @@ void __rseq_handle_notify_resume(struct
* inconsistencies.
*/
scoped_guard(RSEQ_EVENT_GUARD) {
- event = t->rseq_event_pending;
- t->rseq_event_pending = false;
+ event = t->rseq.event.sched_switch;
+ t->rseq.event.sched_switch = false;
}
if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
@@ -492,7 +492,7 @@ void rseq_syscall(struct pt_regs *regs)
struct task_struct *t = current;
struct rseq_cs rseq_cs;
- if (!t->rseq)
+ if (!t->rseq.usrptr)
return;
if (rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
force_sig(SIGSEGV);
@@ -511,33 +511,31 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
if (flags & ~RSEQ_FLAG_UNREGISTER)
return -EINVAL;
/* Unregister rseq for current thread. */
- if (current->rseq != rseq || !current->rseq)
+ if (current->rseq.usrptr != rseq || !current->rseq.usrptr)
return -EINVAL;
- if (rseq_len != current->rseq_len)
+ if (rseq_len != current->rseq.len)
return -EINVAL;
- if (current->rseq_sig != sig)
+ if (current->rseq.sig != sig)
return -EPERM;
ret = rseq_reset_rseq_cpu_node_id(current);
if (ret)
return ret;
- current->rseq = NULL;
- current->rseq_sig = 0;
- current->rseq_len = 0;
+ rseq_reset(current);
return 0;
}
if (unlikely(flags))
return -EINVAL;
- if (current->rseq) {
+ if (current->rseq.usrptr) {
/*
* If rseq is already registered, check whether
* the provided address differs from the prior
* one.
*/
- if (current->rseq != rseq || rseq_len != current->rseq_len)
+ if (current->rseq.usrptr != rseq || rseq_len != current->rseq.len)
return -EINVAL;
- if (current->rseq_sig != sig)
+ if (current->rseq.sig != sig)
return -EPERM;
/* Already registered. */
return -EBUSY;
@@ -586,15 +584,16 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
* Activate the registration by setting the rseq area address, length
* and signature in the task struct.
*/
- current->rseq = rseq;
- current->rseq_len = rseq_len;
- current->rseq_sig = sig;
+ current->rseq.usrptr = rseq;
+ current->rseq.len = rseq_len;
+ current->rseq.sig = sig;
/*
* If rseq was previously inactive, and has just been
* registered, ensure the cpu_id_start and cpu_id fields
* are updated before returning to user-space.
*/
+ current->rseq.event.has_rseq = true;
rseq_sched_switch_event(current);
return 0;
On 2025-09-08 17:31, Thomas Gleixner wrote:
> In preparation for a major rewrite of this code, provide a data structure
> for rseq management.
>
> Put all the rseq related data into it (except for the debug part), which
> allows to simplify fork/execve by using memset() and memcpy() instead of
> adding new fields to initialize over and over.
>
> Create a storage struct for event management as well and put the
> sched_switch event and a indicator for RSEQ on a task into it as a
> start. That uses a union, which allows to mask and clear the whole lot
> efficiently.
>
> The indicators are explicitly not a bit field. Bit fields generate abysmal
> code.
>
> The boolean members are defined as u8 as that actually guarantees that it
> fits. There seem to be strange architecture ABIs which need more than 8 bits
seem -> seems
Other than this nit:
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> for a boolean.
>
> The has_rseq member is redundant vs. task::rseq, but it turns out that
> boolean operations and quick checks on the union generate better code than
> fiddling with separate entities and data types.
>
> This struct will be extended over time to carry more information.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> V4: Move all rseq related data into a dedicated umbrella struct
> ---
> include/linux/rseq.h | 48 +++++++++++++++-------------------
> include/linux/rseq_types.h | 51 ++++++++++++++++++++++++++++++++++++
> include/linux/sched.h | 14 ++--------
> kernel/ptrace.c | 6 ++--
> kernel/rseq.c | 63 ++++++++++++++++++++++-----------------------
> 5 files changed, 110 insertions(+), 72 deletions(-)
>
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -9,22 +9,22 @@ void __rseq_handle_notify_resume(struct
>
> static inline void rseq_handle_notify_resume(struct pt_regs *regs)
> {
> - if (current->rseq)
> + if (current->rseq.event.has_rseq)
> __rseq_handle_notify_resume(NULL, regs);
> }
>
> static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
> {
> - if (current->rseq) {
> - current->rseq_event_pending = true;
> + if (current->rseq.event.has_rseq) {
> + current->rseq.event.sched_switch = true;
> __rseq_handle_notify_resume(ksig, regs);
> }
> }
>
> static inline void rseq_sched_switch_event(struct task_struct *t)
> {
> - if (t->rseq) {
> - t->rseq_event_pending = true;
> + if (t->rseq.event.has_rseq) {
> + t->rseq.event.sched_switch = true;
> set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
> }
> }
> @@ -32,8 +32,9 @@ static inline void rseq_sched_switch_eve
> static __always_inline void rseq_exit_to_user_mode(void)
> {
> if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
> - if (WARN_ON_ONCE(current->rseq && current->rseq_event_pending))
> - current->rseq_event_pending = false;
> + if (WARN_ON_ONCE(current->rseq.event.has_rseq &&
> + current->rseq.event.events))
> + current->rseq.event.events = 0;
> }
> }
>
> @@ -49,35 +50,30 @@ static __always_inline void rseq_exit_to
> */
> static inline void rseq_virt_userspace_exit(void)
> {
> - if (current->rseq_event_pending)
> + if (current->rseq.event.sched_switch)
> set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
> }
>
> +static inline void rseq_reset(struct task_struct *t)
> +{
> + memset(&t->rseq, 0, sizeof(t->rseq));
> +}
> +
> +static inline void rseq_execve(struct task_struct *t)
> +{
> + rseq_reset(t);
> +}
> +
> /*
> * If parent process has a registered restartable sequences area, the
> * child inherits. Unregister rseq for a clone with CLONE_VM set.
> */
> static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
> {
> - if (clone_flags & CLONE_VM) {
> - t->rseq = NULL;
> - t->rseq_len = 0;
> - t->rseq_sig = 0;
> - t->rseq_event_pending = false;
> - } else {
> + if (clone_flags & CLONE_VM)
> + rseq_reset(t);
> + else
> t->rseq = current->rseq;
> - t->rseq_len = current->rseq_len;
> - t->rseq_sig = current->rseq_sig;
> - t->rseq_event_pending = current->rseq_event_pending;
> - }
> -}
> -
> -static inline void rseq_execve(struct task_struct *t)
> -{
> - t->rseq = NULL;
> - t->rseq_len = 0;
> - t->rseq_sig = 0;
> - t->rseq_event_pending = false;
> }
>
> #else /* CONFIG_RSEQ */
> --- /dev/null
> +++ b/include/linux/rseq_types.h
> @@ -0,0 +1,51 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_RSEQ_TYPES_H
> +#define _LINUX_RSEQ_TYPES_H
> +
> +#include <linux/types.h>
> +
> +#ifdef CONFIG_RSEQ
> +struct rseq;
> +
> +/**
> + * struct rseq_event - Storage for rseq related event management
> + * @all: Compound to initialize and clear the data efficiently
> + * @events: Compound to access events with a single load/store
> + * @sched_switch: True if the task was scheduled out
> + * @has_rseq: True if the task has a rseq pointer installed
> + */
> +struct rseq_event {
> + union {
> + u32 all;
> + struct {
> + union {
> + u16 events;
> + struct {
> + u8 sched_switch;
> + };
> + };
> +
> + u8 has_rseq;
> + };
> + };
> +};
> +
> +/**
> + * struct rseq_data - Storage for all rseq related data
> + * @usrptr: Pointer to the registered user space RSEQ memory
> + * @len: Length of the RSEQ region
> + * @sig: Signature of critial section abort IPs
> + * @event: Storage for event management
> + */
> +struct rseq_data {
> + struct rseq __user *usrptr;
> + u32 len;
> + u32 sig;
> + struct rseq_event event;
> +};
> +
> +#else /* CONFIG_RSEQ */
> +struct rseq_data { };
> +#endif /* !CONFIG_RSEQ */
> +
> +#endif
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -41,6 +41,7 @@
> #include <linux/task_io_accounting.h>
> #include <linux/posix-timers_types.h>
> #include <linux/restart_block.h>
> +#include <linux/rseq_types.h>
> #include <uapi/linux/rseq.h>
> #include <linux/seqlock_types.h>
> #include <linux/kcsan.h>
> @@ -1400,16 +1401,8 @@ struct task_struct {
> unsigned long numa_pages_migrated;
> #endif /* CONFIG_NUMA_BALANCING */
>
> -#ifdef CONFIG_RSEQ
> - struct rseq __user *rseq;
> - u32 rseq_len;
> - u32 rseq_sig;
> - /*
> - * RmW on rseq_event_pending must be performed atomically
> - * with respect to preemption.
> - */
> - bool rseq_event_pending;
> -# ifdef CONFIG_DEBUG_RSEQ
> + struct rseq_data rseq;
> +#ifdef CONFIG_DEBUG_RSEQ
> /*
> * This is a place holder to save a copy of the rseq fields for
> * validation of read-only fields. The struct rseq has a
> @@ -1417,7 +1410,6 @@ struct task_struct {
> * directly. Reserve a size large enough for the known fields.
> */
> char rseq_fields[sizeof(struct rseq)];
> -# endif
> #endif
>
> #ifdef CONFIG_SCHED_MM_CID
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -793,9 +793,9 @@ static long ptrace_get_rseq_configuratio
> unsigned long size, void __user *data)
> {
> struct ptrace_rseq_configuration conf = {
> - .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
> - .rseq_abi_size = task->rseq_len,
> - .signature = task->rseq_sig,
> + .rseq_abi_pointer = (u64)(uintptr_t)task->rseq.usrptr,
> + .rseq_abi_size = task->rseq.len,
> + .signature = task->rseq.sig,
> .flags = 0,
> };
>
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -103,13 +103,13 @@ static int rseq_validate_ro_fields(struc
> DEFAULT_RATELIMIT_INTERVAL,
> DEFAULT_RATELIMIT_BURST);
> u32 cpu_id_start, cpu_id, node_id, mm_cid;
> - struct rseq __user *rseq = t->rseq;
> + struct rseq __user *rseq = t->rseq.usrptr;
>
> /*
> * Validate fields which are required to be read-only by
> * user-space.
> */
> - if (!user_read_access_begin(rseq, t->rseq_len))
> + if (!user_read_access_begin(rseq, t->rseq.len))
> goto efault;
> unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end);
> unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end);
> @@ -147,10 +147,10 @@ static int rseq_validate_ro_fields(struc
> * Update an rseq field and its in-kernel copy in lock-step to keep a coherent
> * state.
> */
> -#define rseq_unsafe_put_user(t, value, field, error_label) \
> - do { \
> - unsafe_put_user(value, &t->rseq->field, error_label); \
> - rseq_kernel_fields(t)->field = value; \
> +#define rseq_unsafe_put_user(t, value, field, error_label) \
> + do { \
> + unsafe_put_user(value, &t->rseq.usrptr->field, error_label); \
> + rseq_kernel_fields(t)->field = value; \
> } while (0)
>
> #else
> @@ -160,12 +160,12 @@ static int rseq_validate_ro_fields(struc
> }
>
> #define rseq_unsafe_put_user(t, value, field, error_label) \
> - unsafe_put_user(value, &t->rseq->field, error_label)
> + unsafe_put_user(value, &t->rseq.usrptr->field, error_label)
> #endif
>
> static int rseq_update_cpu_node_id(struct task_struct *t)
> {
> - struct rseq __user *rseq = t->rseq;
> + struct rseq __user *rseq = t->rseq.usrptr;
> u32 cpu_id = raw_smp_processor_id();
> u32 node_id = cpu_to_node(cpu_id);
> u32 mm_cid = task_mm_cid(t);
> @@ -176,7 +176,7 @@ static int rseq_update_cpu_node_id(struc
> if (rseq_validate_ro_fields(t))
> goto efault;
> WARN_ON_ONCE((int) mm_cid < 0);
> - if (!user_write_access_begin(rseq, t->rseq_len))
> + if (!user_write_access_begin(rseq, t->rseq.len))
> goto efault;
>
> rseq_unsafe_put_user(t, cpu_id, cpu_id_start, efault_end);
> @@ -201,7 +201,7 @@ static int rseq_update_cpu_node_id(struc
>
> static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
> {
> - struct rseq __user *rseq = t->rseq;
> + struct rseq __user *rseq = t->rseq.usrptr;
> u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0,
> mm_cid = 0;
>
> @@ -211,7 +211,7 @@ static int rseq_reset_rseq_cpu_node_id(s
> if (rseq_validate_ro_fields(t))
> goto efault;
>
> - if (!user_write_access_begin(rseq, t->rseq_len))
> + if (!user_write_access_begin(rseq, t->rseq.len))
> goto efault;
>
> /*
> @@ -272,7 +272,7 @@ static int rseq_get_rseq_cs(struct task_
> u32 sig;
> int ret;
>
> - ret = rseq_get_rseq_cs_ptr_val(t->rseq, &ptr);
> + ret = rseq_get_rseq_cs_ptr_val(t->rseq.usrptr, &ptr);
> if (ret)
> return ret;
>
> @@ -305,10 +305,10 @@ static int rseq_get_rseq_cs(struct task_
> if (ret)
> return ret;
>
> - if (current->rseq_sig != sig) {
> + if (current->rseq.sig != sig) {
> printk_ratelimited(KERN_WARNING
> "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
> - sig, current->rseq_sig, current->pid, usig);
> + sig, current->rseq.sig, current->pid, usig);
> return -EINVAL;
> }
> return 0;
> @@ -338,7 +338,7 @@ static int rseq_check_flags(struct task_
> return -EINVAL;
>
> /* Get thread flags. */
> - ret = get_user(flags, &t->rseq->flags);
> + ret = get_user(flags, &t->rseq.usrptr->flags);
> if (ret)
> return ret;
>
> @@ -392,13 +392,13 @@ static int rseq_ip_fixup(struct pt_regs
> * Clear the rseq_cs pointer and return.
> */
> if (!in_rseq_cs(ip, &rseq_cs))
> - return clear_rseq_cs(t->rseq);
> + return clear_rseq_cs(t->rseq.usrptr);
> ret = rseq_check_flags(t, rseq_cs.flags);
> if (ret < 0)
> return ret;
> if (!abort)
> return 0;
> - ret = clear_rseq_cs(t->rseq);
> + ret = clear_rseq_cs(t->rseq.usrptr);
> if (ret)
> return ret;
> trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset,
> @@ -460,8 +460,8 @@ void __rseq_handle_notify_resume(struct
> * inconsistencies.
> */
> scoped_guard(RSEQ_EVENT_GUARD) {
> - event = t->rseq_event_pending;
> - t->rseq_event_pending = false;
> + event = t->rseq.event.sched_switch;
> + t->rseq.event.sched_switch = false;
> }
>
> if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
> @@ -492,7 +492,7 @@ void rseq_syscall(struct pt_regs *regs)
> struct task_struct *t = current;
> struct rseq_cs rseq_cs;
>
> - if (!t->rseq)
> + if (!t->rseq.usrptr)
> return;
> if (rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
> force_sig(SIGSEGV);
> @@ -511,33 +511,31 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
> if (flags & ~RSEQ_FLAG_UNREGISTER)
> return -EINVAL;
> /* Unregister rseq for current thread. */
> - if (current->rseq != rseq || !current->rseq)
> + if (current->rseq.usrptr != rseq || !current->rseq.usrptr)
> return -EINVAL;
> - if (rseq_len != current->rseq_len)
> + if (rseq_len != current->rseq.len)
> return -EINVAL;
> - if (current->rseq_sig != sig)
> + if (current->rseq.sig != sig)
> return -EPERM;
> ret = rseq_reset_rseq_cpu_node_id(current);
> if (ret)
> return ret;
> - current->rseq = NULL;
> - current->rseq_sig = 0;
> - current->rseq_len = 0;
> + rseq_reset(current);
> return 0;
> }
>
> if (unlikely(flags))
> return -EINVAL;
>
> - if (current->rseq) {
> + if (current->rseq.usrptr) {
> /*
> * If rseq is already registered, check whether
> * the provided address differs from the prior
> * one.
> */
> - if (current->rseq != rseq || rseq_len != current->rseq_len)
> + if (current->rseq.usrptr != rseq || rseq_len != current->rseq.len)
> return -EINVAL;
> - if (current->rseq_sig != sig)
> + if (current->rseq.sig != sig)
> return -EPERM;
> /* Already registered. */
> return -EBUSY;
> @@ -586,15 +584,16 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
> * Activate the registration by setting the rseq area address, length
> * and signature in the task struct.
> */
> - current->rseq = rseq;
> - current->rseq_len = rseq_len;
> - current->rseq_sig = sig;
> + current->rseq.usrptr = rseq;
> + current->rseq.len = rseq_len;
> + current->rseq.sig = sig;
>
> /*
> * If rseq was previously inactive, and has just been
> * registered, ensure the cpu_id_start and cpu_id fields
> * are updated before returning to user-space.
> */
> + current->rseq.event.has_rseq = true;
> rseq_sched_switch_event(current);
>
> return 0;
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Tue, Sep 09 2025 at 09:30, Mathieu Desnoyers wrote:
> On 2025-09-08 17:31, Thomas Gleixner wrote:
>> The boolean members are defined as u8 as that actually guarantees that it
>> fits. There seem to be strange architecture ABIs which need more than 8 bits
>
> seem -> seems
This is an inverted sentence, i.e. the subject is after the verb. Let me
rephrase it in the regular sentence order:
Strange architecture ABIs seem to require more than 8 bits ...
The subject is 'ABIs' which is plural. Therefore it requires the plural
form of the verb 'seem', i.e. no 's' at the end. The inversion of the
sentence does not change that at all.
Think about this variant of the sentence:
There are strange architecture ABIs ...
According to your logic this would be:
There is strange architecture ABIs ...
which is obviously wrong, no?
I'm not a native speaker, but basic grammar rules are exactly the same
for native and non-native speakers as far as I know.
Thanks,
tglx
On 2025-09-12 16:44, Thomas Gleixner wrote: > On Tue, Sep 09 2025 at 09:30, Mathieu Desnoyers wrote: >> On 2025-09-08 17:31, Thomas Gleixner wrote: >>> The boolean members are defined as u8 as that actually guarantees that it >>> fits. There seem to be strange architecture ABIs which need more than 8 bits >> >> seem -> seems > > This is an inverted sentence, i.e. the subject is after the verb. Let me > rephrase it in the regular sentence order: > > Strange architecture ABIs seem to require more than 8 bits ... > > The subject is 'ABIs' which is plural. Therefore it requires the plural > form of the verb 'seem', i.e. no 's' at the end. You're right, I mistakenly thought the subject was "architecture". My bad. > I'm not a native speaker, but basic grammar rules are exactly the same > for native and non-native speakers as far as I know. Neither am I. ;) Thanks, Mathieu -- Mathieu Desnoyers EfficiOS Inc. https://www.efficios.com
From: Thomas Gleixner <tglx@linutronix.de>
Cleanup the include ordering, kernel-doc and other trivialities before
making further changes.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
include/linux/entry-common.h | 8 ++++----
include/linux/irq-entry-common.h | 2 ++
2 files changed, 6 insertions(+), 4 deletions(-)
---
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -3,11 +3,11 @@
#define __LINUX_ENTRYCOMMON_H
#include <linux/irq-entry-common.h>
+#include <linux/livepatch.h>
#include <linux/ptrace.h>
+#include <linux/resume_user_mode.h>
#include <linux/seccomp.h>
#include <linux/sched.h>
-#include <linux/livepatch.h>
-#include <linux/resume_user_mode.h>
#include <asm/entry-common.h>
#include <asm/syscall.h>
@@ -37,6 +37,7 @@
SYSCALL_WORK_SYSCALL_AUDIT | \
SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
ARCH_SYSCALL_WORK_ENTER)
+
#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \
SYSCALL_WORK_SYSCALL_TRACE | \
SYSCALL_WORK_SYSCALL_AUDIT | \
@@ -61,8 +62,7 @@
*/
void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
-long syscall_trace_enter(struct pt_regs *regs, long syscall,
- unsigned long work);
+long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work);
/**
* syscall_enter_from_user_mode_work - Check and handle work before invoking
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -68,6 +68,7 @@ static __always_inline bool arch_in_rcu_
/**
* enter_from_user_mode - Establish state when coming from user mode
+ * @regs: Pointer to currents pt_regs
*
* Syscall/interrupt entry disables interrupts, but user mode is traced as
* interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
@@ -357,6 +358,7 @@ irqentry_state_t noinstr irqentry_enter(
* Conditional reschedule with additional sanity checks.
*/
void raw_irqentry_exit_cond_resched(void);
+
#ifdef CONFIG_PREEMPT_DYNAMIC
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched
Open code the only user in the x86 syscall code and reduce the zoo of
functions.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
---
arch/x86/entry/syscall_32.c | 3 ++-
include/linux/entry-common.h | 26 +++++---------------------
kernel/entry/syscall-common.c | 8 --------
3 files changed, 7 insertions(+), 30 deletions(-)
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -274,9 +274,10 @@ static noinstr bool __do_fast_syscall_32
* fetch EBP before invoking any of the syscall entry work
* functions.
*/
- syscall_enter_from_user_mode_prepare(regs);
+ enter_from_user_mode(regs);
instrumentation_begin();
+ local_irq_enable();
/* Fetch EBP from where the vDSO stashed it. */
if (IS_ENABLED(CONFIG_X86_64)) {
/*
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -45,23 +45,6 @@
SYSCALL_WORK_SYSCALL_EXIT_TRAP | \
ARCH_SYSCALL_WORK_EXIT)
-/**
- * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
- * @regs: Pointer to currents pt_regs
- *
- * Invoked from architecture specific syscall entry code with interrupts
- * disabled. The calling code has to be non-instrumentable. When the
- * function returns all state is correct, interrupts are enabled and the
- * subsequent functions can be instrumented.
- *
- * This handles lockdep, RCU (context tracking) and tracing state, i.e.
- * the functionality provided by enter_from_user_mode().
- *
- * This is invoked when there is extra architecture specific functionality
- * to be done between establishing state and handling user mode entry work.
- */
-void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
-
long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work);
/**
@@ -71,8 +54,8 @@ long syscall_trace_enter(struct pt_regs
* @syscall: The syscall number
*
* Invoked from architecture specific syscall entry code with interrupts
- * enabled after invoking syscall_enter_from_user_mode_prepare() and extra
- * architecture specific work.
+ * enabled after invoking enter_from_user_mode(), enabling interrupts and
+ * extra architecture specific work.
*
* Returns: The original or a modified syscall number
*
@@ -108,8 +91,9 @@ static __always_inline long syscall_ente
* function returns all state is correct, interrupts are enabled and the
* subsequent functions can be instrumented.
*
- * This is combination of syscall_enter_from_user_mode_prepare() and
- * syscall_enter_from_user_mode_work().
+ * This is the combination of enter_from_user_mode() and
+ * syscall_enter_from_user_mode_work() to be used when there is no
+ * architecture specific work to be done between the two.
*
* Returns: The original or a modified syscall number. See
* syscall_enter_from_user_mode_work() for further explanation.
--- a/kernel/entry/syscall-common.c
+++ b/kernel/entry/syscall-common.c
@@ -63,14 +63,6 @@ long syscall_trace_enter(struct pt_regs
return ret ? : syscall;
}
-noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
-{
- enter_from_user_mode(regs);
- instrumentation_begin();
- local_irq_enable();
- instrumentation_end();
-}
-
/*
* If SYSCALL_EMU is set, then the only reason to report is when
* SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
On 2025-09-08 17:31, Thomas Gleixner wrote:
> Open code the only user in the x86 syscall code and reduce the zoo of
> functions.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: x86@kernel.org
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>
> ---
> arch/x86/entry/syscall_32.c | 3 ++-
> include/linux/entry-common.h | 26 +++++---------------------
> kernel/entry/syscall-common.c | 8 --------
> 3 files changed, 7 insertions(+), 30 deletions(-)
>
> --- a/arch/x86/entry/syscall_32.c
> +++ b/arch/x86/entry/syscall_32.c
> @@ -274,9 +274,10 @@ static noinstr bool __do_fast_syscall_32
> * fetch EBP before invoking any of the syscall entry work
> * functions.
> */
> - syscall_enter_from_user_mode_prepare(regs);
> + enter_from_user_mode(regs);
>
> instrumentation_begin();
> + local_irq_enable();
> /* Fetch EBP from where the vDSO stashed it. */
> if (IS_ENABLED(CONFIG_X86_64)) {
> /*
> --- a/include/linux/entry-common.h
> +++ b/include/linux/entry-common.h
> @@ -45,23 +45,6 @@
> SYSCALL_WORK_SYSCALL_EXIT_TRAP | \
> ARCH_SYSCALL_WORK_EXIT)
>
> -/**
> - * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
> - * @regs: Pointer to currents pt_regs
> - *
> - * Invoked from architecture specific syscall entry code with interrupts
> - * disabled. The calling code has to be non-instrumentable. When the
> - * function returns all state is correct, interrupts are enabled and the
> - * subsequent functions can be instrumented.
> - *
> - * This handles lockdep, RCU (context tracking) and tracing state, i.e.
> - * the functionality provided by enter_from_user_mode().
> - *
> - * This is invoked when there is extra architecture specific functionality
> - * to be done between establishing state and handling user mode entry work.
> - */
> -void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
> -
> long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work);
>
> /**
> @@ -71,8 +54,8 @@ long syscall_trace_enter(struct pt_regs
> * @syscall: The syscall number
> *
> * Invoked from architecture specific syscall entry code with interrupts
> - * enabled after invoking syscall_enter_from_user_mode_prepare() and extra
> - * architecture specific work.
> + * enabled after invoking enter_from_user_mode(), enabling interrupts and
> + * extra architecture specific work.
> *
> * Returns: The original or a modified syscall number
> *
> @@ -108,8 +91,9 @@ static __always_inline long syscall_ente
> * function returns all state is correct, interrupts are enabled and the
> * subsequent functions can be instrumented.
> *
> - * This is combination of syscall_enter_from_user_mode_prepare() and
> - * syscall_enter_from_user_mode_work().
> + * This is the combination of enter_from_user_mode() and
> + * syscall_enter_from_user_mode_work() to be used when there is no
> + * architecture specific work to be done between the two.
> *
> * Returns: The original or a modified syscall number. See
> * syscall_enter_from_user_mode_work() for further explanation.
> --- a/kernel/entry/syscall-common.c
> +++ b/kernel/entry/syscall-common.c
> @@ -63,14 +63,6 @@ long syscall_trace_enter(struct pt_regs
> return ret ? : syscall;
> }
>
> -noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
> -{
> - enter_from_user_mode(regs);
> - instrumentation_begin();
> - local_irq_enable();
> - instrumentation_end();
> -}
> -
> /*
> * If SYSCALL_EMU is set, then the only reason to report is when
> * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
>
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
There is no point to have this as a function which just inlines
enter_from_user_mode(). The function call overhead is larger than the
function itself.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/irq-entry-common.h | 13 +++++++++++--
kernel/entry/common.c | 13 -------------
2 files changed, 11 insertions(+), 15 deletions(-)
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -278,7 +278,10 @@ static __always_inline void exit_to_user
*
* The function establishes state (lockdep, RCU (context tracking), tracing)
*/
-void irqentry_enter_from_user_mode(struct pt_regs *regs);
+static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs)
+{
+ enter_from_user_mode(regs);
+}
/**
* irqentry_exit_to_user_mode - Interrupt exit work
@@ -293,7 +296,13 @@ void irqentry_enter_from_user_mode(struc
* Interrupt exit is not invoking #1 which is the syscall specific one time
* work.
*/
-void irqentry_exit_to_user_mode(struct pt_regs *regs);
+static __always_inline void irqentry_exit_to_user_mode(struct pt_regs *regs)
+{
+ instrumentation_begin();
+ exit_to_user_mode_prepare(regs);
+ instrumentation_end();
+ exit_to_user_mode();
+}
#ifndef irqentry_state
/**
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -62,19 +62,6 @@ void __weak arch_do_signal_or_restart(st
return ti_work;
}
-noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
-{
- enter_from_user_mode(regs);
-}
-
-noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
-{
- instrumentation_begin();
- exit_to_user_mode_prepare(regs);
- instrumentation_end();
- exit_to_user_mode();
-}
-
noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
{
irqentry_state_t ret = {
On 2025-09-08 17:31, Thomas Gleixner wrote:
> There is no point to have this as a function which just inlines
> enter_from_user_mode(). The function call overhead is larger than the
> function itself.
I'm wondering if there is a reason for making those actual functions and
not inlines.
The functions sit in kernel/entry/common.c, which are built with
specific compiler flags in kernel/entry/Makefile:
# Prevent the noinstr section from being pestered by sanitizer and other
goodies
# as long as these things cannot be disabled per function.
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
# Branch profiling isn't noinstr-safe
ccflags-$(CONFIG_TRACE_BRANCH_PROFILING) += -DDISABLE_BRANCH_PROFILING
CFLAGS_REMOVE_common.o = -fstack-protector -fstack-protector-strong
CFLAGS_common.o += -fno-stack-protector
So I wonder if we're not breaking something in the area of "noinstr"
tagging by inlining those into their caller ?
Thanks,
Mathieu
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>
> ---
> include/linux/irq-entry-common.h | 13 +++++++++++--
> kernel/entry/common.c | 13 -------------
> 2 files changed, 11 insertions(+), 15 deletions(-)
>
> --- a/include/linux/irq-entry-common.h
> +++ b/include/linux/irq-entry-common.h
> @@ -278,7 +278,10 @@ static __always_inline void exit_to_user
> *
> * The function establishes state (lockdep, RCU (context tracking), tracing)
> */
> -void irqentry_enter_from_user_mode(struct pt_regs *regs);
> +static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs)
> +{
> + enter_from_user_mode(regs);
> +}
>
> /**
> * irqentry_exit_to_user_mode - Interrupt exit work
> @@ -293,7 +296,13 @@ void irqentry_enter_from_user_mode(struc
> * Interrupt exit is not invoking #1 which is the syscall specific one time
> * work.
> */
> -void irqentry_exit_to_user_mode(struct pt_regs *regs);
> +static __always_inline void irqentry_exit_to_user_mode(struct pt_regs *regs)
> +{
> + instrumentation_begin();
> + exit_to_user_mode_prepare(regs);
> + instrumentation_end();
> + exit_to_user_mode();
> +}
>
> #ifndef irqentry_state
> /**
> --- a/kernel/entry/common.c
> +++ b/kernel/entry/common.c
> @@ -62,19 +62,6 @@ void __weak arch_do_signal_or_restart(st
> return ti_work;
> }
>
> -noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
> -{
> - enter_from_user_mode(regs);
> -}
> -
> -noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
> -{
> - instrumentation_begin();
> - exit_to_user_mode_prepare(regs);
> - instrumentation_end();
> - exit_to_user_mode();
> -}
> -
> noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
> {
> irqentry_state_t ret = {
>
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Tue, Sep 09 2025 at 09:38, Mathieu Desnoyers wrote: > On 2025-09-08 17:31, Thomas Gleixner wrote: >> There is no point to have this as a function which just inlines >> enter_from_user_mode(). The function call overhead is larger than the >> function itself. > > I'm wondering if there is a reason for making those actual functions and > not inlines. > > The functions sit in kernel/entry/common.c, which are built with > specific compiler flags in kernel/entry/Makefile: > > # Prevent the noinstr section from being pestered by sanitizer and other > goodies > # as long as these things cannot be disabled per function. > KASAN_SANITIZE := n > UBSAN_SANITIZE := n > KCOV_INSTRUMENT := n > > # Branch profiling isn't noinstr-safe > ccflags-$(CONFIG_TRACE_BRANCH_PROFILING) += -DDISABLE_BRANCH_PROFILING > > CFLAGS_REMOVE_common.o = -fstack-protector -fstack-protector-strong > CFLAGS_common.o += -fno-stack-protector > > So I wonder if we're not breaking something in the area of "noinstr" > tagging by inlining those into their caller ? No, because the call sites have to be non-instrumented as well.
On 2025-09-09 10:10, Thomas Gleixner wrote: > On Tue, Sep 09 2025 at 09:38, Mathieu Desnoyers wrote: > >> On 2025-09-08 17:31, Thomas Gleixner wrote: >>> There is no point to have this as a function which just inlines >>> enter_from_user_mode(). The function call overhead is larger than the >>> function itself. >> >> I'm wondering if there is a reason for making those actual functions and >> not inlines. >> >> The functions sit in kernel/entry/common.c, which are built with >> specific compiler flags in kernel/entry/Makefile: >> >> # Prevent the noinstr section from being pestered by sanitizer and other >> goodies >> # as long as these things cannot be disabled per function. >> KASAN_SANITIZE := n >> UBSAN_SANITIZE := n >> KCOV_INSTRUMENT := n >> >> # Branch profiling isn't noinstr-safe >> ccflags-$(CONFIG_TRACE_BRANCH_PROFILING) += -DDISABLE_BRANCH_PROFILING >> >> CFLAGS_REMOVE_common.o = -fstack-protector -fstack-protector-strong >> CFLAGS_common.o += -fno-stack-protector >> >> So I wonder if we're not breaking something in the area of "noinstr" >> tagging by inlining those into their caller ? > > No, because the call sites have to be non-instrumented as well. OK. Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> -- Mathieu Desnoyers EfficiOS Inc. https://www.efficios.com
There is nothing mm specific in that and including mm.h can cause header
recursion hell.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
include/linux/mm.h | 25 -------------------------
include/linux/sched.h | 26 ++++++++++++++++++++++++++
2 files changed, 26 insertions(+), 25 deletions(-)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2310,31 +2310,6 @@ struct zap_details {
/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */
#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1))
-#ifdef CONFIG_SCHED_MM_CID
-void sched_mm_cid_before_execve(struct task_struct *t);
-void sched_mm_cid_after_execve(struct task_struct *t);
-void sched_mm_cid_fork(struct task_struct *t);
-void sched_mm_cid_exit_signals(struct task_struct *t);
-static inline int task_mm_cid(struct task_struct *t)
-{
- return t->mm_cid;
-}
-#else
-static inline void sched_mm_cid_before_execve(struct task_struct *t) { }
-static inline void sched_mm_cid_after_execve(struct task_struct *t) { }
-static inline void sched_mm_cid_fork(struct task_struct *t) { }
-static inline void sched_mm_cid_exit_signals(struct task_struct *t) { }
-static inline int task_mm_cid(struct task_struct *t)
-{
- /*
- * Use the processor id as a fall-back when the mm cid feature is
- * disabled. This provides functional per-cpu data structure accesses
- * in user-space, althrough it won't provide the memory usage benefits.
- */
- return raw_smp_processor_id();
-}
-#endif
-
#ifdef CONFIG_MMU
extern bool can_do_mlock(void);
#else
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2304,4 +2304,30 @@ static __always_inline void alloc_tag_re
#define alloc_tag_restore(_tag, _old) do {} while (0)
#endif
+/* Avoids recursive inclusion hell */
+#ifdef CONFIG_SCHED_MM_CID
+void sched_mm_cid_before_execve(struct task_struct *t);
+void sched_mm_cid_after_execve(struct task_struct *t);
+void sched_mm_cid_fork(struct task_struct *t);
+void sched_mm_cid_exit_signals(struct task_struct *t);
+static inline int task_mm_cid(struct task_struct *t)
+{
+ return t->mm_cid;
+}
+#else
+static inline void sched_mm_cid_before_execve(struct task_struct *t) { }
+static inline void sched_mm_cid_after_execve(struct task_struct *t) { }
+static inline void sched_mm_cid_fork(struct task_struct *t) { }
+static inline void sched_mm_cid_exit_signals(struct task_struct *t) { }
+static inline int task_mm_cid(struct task_struct *t)
+{
+ /*
+ * Use the processor id as a fall-back when the mm cid feature is
+ * disabled. This provides functional per-cpu data structure accesses
+ * in user-space, althrough it won't provide the memory usage benefits.
+ */
+ return task_cpu(t);
+}
+#endif
+
#endif
In preparation for rewriting RSEQ exit to user space handling provide
storage to cache the CPU ID and MM CID values which were written to user
space. That prepares for a quick check, which avoids the update when
nothing changed.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/rseq.h | 7 +++++--
include/linux/rseq_types.h | 21 +++++++++++++++++++++
include/trace/events/rseq.h | 4 ++--
kernel/rseq.c | 4 ++++
4 files changed, 32 insertions(+), 4 deletions(-)
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -57,6 +57,7 @@ static inline void rseq_virt_userspace_e
static inline void rseq_reset(struct task_struct *t)
{
memset(&t->rseq, 0, sizeof(t->rseq));
+ t->rseq.ids.cpu_cid = ~0ULL;
}
static inline void rseq_execve(struct task_struct *t)
@@ -70,10 +71,12 @@ static inline void rseq_execve(struct ta
*/
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
{
- if (clone_flags & CLONE_VM)
+ if (clone_flags & CLONE_VM) {
rseq_reset(t);
- else
+ } else {
t->rseq = current->rseq;
+ t->rseq.ids.cpu_cid = ~0ULL;
+ }
}
#else /* CONFIG_RSEQ */
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -31,17 +31,38 @@ struct rseq_event {
};
/**
+ * struct rseq_ids - Cache for ids, which need to be updated
+ * @cpu_cid: Compound of @cpu_id and @mm_cid to make the
+ * compiler emit a single compare on 64-bit
+ * @cpu_id: The CPU ID which was written last to user space
+ * @mm_cid: The MM CID which was written last to user space
+ *
+ * @cpu_id and @mm_cid are updated when the data is written to user space.
+ */
+struct rseq_ids {
+ union {
+ u64 cpu_cid;
+ struct {
+ u32 cpu_id;
+ u32 mm_cid;
+ };
+ };
+};
+
+/**
* struct rseq_data - Storage for all rseq related data
* @usrptr: Pointer to the registered user space RSEQ memory
* @len: Length of the RSEQ region
* @sig: Signature of critial section abort IPs
* @event: Storage for event management
+ * @ids: Storage for cached CPU ID and MM CID
*/
struct rseq_data {
struct rseq __user *usrptr;
u32 len;
u32 sig;
struct rseq_event event;
+ struct rseq_ids ids;
};
#else /* CONFIG_RSEQ */
--- a/include/trace/events/rseq.h
+++ b/include/trace/events/rseq.h
@@ -21,9 +21,9 @@ TRACE_EVENT(rseq_update,
),
TP_fast_assign(
- __entry->cpu_id = raw_smp_processor_id();
+ __entry->cpu_id = t->rseq.ids.cpu_id;
__entry->node_id = cpu_to_node(__entry->cpu_id);
- __entry->mm_cid = task_mm_cid(t);
+ __entry->mm_cid = t->rseq.ids.mm_cid;
),
TP_printk("cpu_id=%d node_id=%d mm_cid=%d", __entry->cpu_id,
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -184,6 +184,10 @@ static int rseq_update_cpu_node_id(struc
rseq_unsafe_put_user(t, node_id, node_id, efault_end);
rseq_unsafe_put_user(t, mm_cid, mm_cid, efault_end);
+ /* Cache the user space values */
+ t->rseq.ids.cpu_id = cpu_id;
+ t->rseq.ids.mm_cid = mm_cid;
+
/*
* Additional feature fields added after ORIG_RSEQ_SIZE
* need to be conditionally updated only if
On 2025-09-08 17:31, Thomas Gleixner wrote:
> In preparation for rewriting RSEQ exit to user space handling provide
> storage to cache the CPU ID and MM CID values which were written to user
> space. That prepares for a quick check, which avoids the update when
> nothing changed.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>
> ---
> include/linux/rseq.h | 7 +++++--
> include/linux/rseq_types.h | 21 +++++++++++++++++++++
> include/trace/events/rseq.h | 4 ++--
> kernel/rseq.c | 4 ++++
> 4 files changed, 32 insertions(+), 4 deletions(-)
>
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -57,6 +57,7 @@ static inline void rseq_virt_userspace_e
> static inline void rseq_reset(struct task_struct *t)
> {
> memset(&t->rseq, 0, sizeof(t->rseq));
> + t->rseq.ids.cpu_cid = ~0ULL;
> }
>
> static inline void rseq_execve(struct task_struct *t)
> @@ -70,10 +71,12 @@ static inline void rseq_execve(struct ta
> */
> static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
> {
> - if (clone_flags & CLONE_VM)
> + if (clone_flags & CLONE_VM) {
> rseq_reset(t);
> - else
> + } else {
> t->rseq = current->rseq;
> + t->rseq.ids.cpu_cid = ~0ULL;
> + }
> }
>
> #else /* CONFIG_RSEQ */
> --- a/include/linux/rseq_types.h
> +++ b/include/linux/rseq_types.h
> @@ -31,17 +31,38 @@ struct rseq_event {
> };
>
> /**
> + * struct rseq_ids - Cache for ids, which need to be updated
need -> needs
> + * @cpu_cid: Compound of @cpu_id and @mm_cid to make the
> + * compiler emit a single compare on 64-bit
> + * @cpu_id: The CPU ID which was written last to user space
> + * @mm_cid: The MM CID which was written last to user space
> + *
> + * @cpu_id and @mm_cid are updated when the data is written to user space.
> + */
> +struct rseq_ids {
> + union {
> + u64 cpu_cid;
> + struct {
> + u32 cpu_id;
> + u32 mm_cid;
> + };
> + };
> +};
> +
> +/**
> * struct rseq_data - Storage for all rseq related data
> * @usrptr: Pointer to the registered user space RSEQ memory
> * @len: Length of the RSEQ region
> * @sig: Signature of critial section abort IPs
> * @event: Storage for event management
> + * @ids: Storage for cached CPU ID and MM CID
It's far from clear from the diff, but is there a missing space at the
beginning of the line above ?
Other than that:
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> */
> struct rseq_data {
> struct rseq __user *usrptr;
> u32 len;
> u32 sig;
> struct rseq_event event;
> + struct rseq_ids ids;
> };
>
> #else /* CONFIG_RSEQ */
> --- a/include/trace/events/rseq.h
> +++ b/include/trace/events/rseq.h
> @@ -21,9 +21,9 @@ TRACE_EVENT(rseq_update,
> ),
>
> TP_fast_assign(
> - __entry->cpu_id = raw_smp_processor_id();
> + __entry->cpu_id = t->rseq.ids.cpu_id;
> __entry->node_id = cpu_to_node(__entry->cpu_id);
> - __entry->mm_cid = task_mm_cid(t);
> + __entry->mm_cid = t->rseq.ids.mm_cid;
> ),
>
> TP_printk("cpu_id=%d node_id=%d mm_cid=%d", __entry->cpu_id,
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -184,6 +184,10 @@ static int rseq_update_cpu_node_id(struc
> rseq_unsafe_put_user(t, node_id, node_id, efault_end);
> rseq_unsafe_put_user(t, mm_cid, mm_cid, efault_end);
>
> + /* Cache the user space values */
> + t->rseq.ids.cpu_id = cpu_id;
> + t->rseq.ids.mm_cid = mm_cid;
> +
> /*
> * Additional feature fields added after ORIG_RSEQ_SIZE
> * need to be conditionally updated only if
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Tue, Sep 09 2025 at 09:43, Mathieu Desnoyers wrote:
> On 2025-09-08 17:31, Thomas Gleixner wrote:
>> /**
>> + * struct rseq_ids - Cache for ids, which need to be updated
>
> need -> needs
ids is plural, so 'need' is correct, no?
>> + * @cpu_cid: Compound of @cpu_id and @mm_cid to make the
>> + * compiler emit a single compare on 64-bit
>> + * @cpu_id: The CPU ID which was written last to user space
>> + * @mm_cid: The MM CID which was written last to user space
>> + *
>> + * @cpu_id and @mm_cid are updated when the data is written to user space.
>> + */
>> +struct rseq_ids {
>> + union {
>> + u64 cpu_cid;
>> + struct {
>> + u32 cpu_id;
>> + u32 mm_cid;
>> + };
>> + };
>> +};
>> +
>> +/**
>> * struct rseq_data - Storage for all rseq related data
>> * @usrptr: Pointer to the registered user space RSEQ memory
>> * @len: Length of the RSEQ region
>> * @sig: Signature of critial section abort IPs
>> * @event: Storage for event management
>> + * @ids: Storage for cached CPU ID and MM CID
>
> It's far from clear from the diff, but is there a missing space at the
> beginning of the line above ?
No. The actual diff is:
* @event: Storage for event management
+ * @ids: Storage for cached CPU ID and MM CID
*/
It's just the reply quoting which makes it ugly.
On 2025-09-09 10:13, Thomas Gleixner wrote:
> On Tue, Sep 09 2025 at 09:43, Mathieu Desnoyers wrote:
>> On 2025-09-08 17:31, Thomas Gleixner wrote:
>>> /**
>>> + * struct rseq_ids - Cache for ids, which need to be updated
>>
>> need -> needs
>
> ids is plural, so 'need' is correct, no?
It's the cache that needs to be updated (cache for ids). So
technically the verb conjugates with "cache" (singular) and not
"ids".
>
>>> + * @cpu_cid: Compound of @cpu_id and @mm_cid to make the
>>> + * compiler emit a single compare on 64-bit
>>> + * @cpu_id: The CPU ID which was written last to user space
>>> + * @mm_cid: The MM CID which was written last to user space
>>> + *
>>> + * @cpu_id and @mm_cid are updated when the data is written to user space.
>>> + */
>>> +struct rseq_ids {
>>> + union {
>>> + u64 cpu_cid;
>>> + struct {
>>> + u32 cpu_id;
>>> + u32 mm_cid;
>>> + };
>>> + };
>>> +};
>>> +
>>> +/**
>>> * struct rseq_data - Storage for all rseq related data
>>> * @usrptr: Pointer to the registered user space RSEQ memory
>>> * @len: Length of the RSEQ region
>>> * @sig: Signature of critial section abort IPs
>>> * @event: Storage for event management
>>> + * @ids: Storage for cached CPU ID and MM CID
>>
>> It's far from clear from the diff, but is there a missing space at the
>> beginning of the line above ?
>
> No. The actual diff is:
>
> * @event: Storage for event management
> + * @ids: Storage for cached CPU ID and MM CID
> */
>
> It's just the reply quoting which makes it ugly.
Sounds good.
Thanks,
Mathieu
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
For RSEQ the only relevant reason to inspect and eventually fixup (abort)
user space critical sections is when user space was interrupted and the
task was scheduled out.
If the user to kernel entry was from a syscall no fixup is required. If
user space invokes a syscall from a critical section it can keep the
pieces as documented.
This is only supported on architectures which utilize the generic entry
code. If your architecture does not use it, bad luck.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/irq-entry-common.h | 3 ++-
include/linux/rseq.h | 16 +++++++++++-----
include/linux/rseq_entry.h | 18 ++++++++++++++++++
include/linux/rseq_types.h | 2 ++
4 files changed, 33 insertions(+), 6 deletions(-)
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -4,7 +4,7 @@
#include <linux/context_tracking.h>
#include <linux/kmsan.h>
-#include <linux/rseq.h>
+#include <linux/rseq_entry.h>
#include <linux/static_call_types.h>
#include <linux/syscalls.h>
#include <linux/tick.h>
@@ -281,6 +281,7 @@ static __always_inline void exit_to_user
static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs)
{
enter_from_user_mode(regs);
+ rseq_note_user_irq_entry();
}
/**
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -31,11 +31,17 @@ static inline void rseq_sched_switch_eve
static __always_inline void rseq_exit_to_user_mode(void)
{
- if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
- if (WARN_ON_ONCE(current->rseq.event.has_rseq &&
- current->rseq.event.events))
- current->rseq.event.events = 0;
- }
+ struct rseq_event *ev = ¤t->rseq.event;
+
+ if (IS_ENABLED(CONFIG_DEBUG_RSEQ))
+ WARN_ON_ONCE(ev->sched_switch);
+
+ /*
+ * Ensure that event (especially user_irq) is cleared when the
+ * interrupt did not result in a schedule and therefore the
+ * rseq processing did not clear it.
+ */
+ ev->events = 0;
}
/*
--- /dev/null
+++ b/include/linux/rseq_entry.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RSEQ_ENTRY_H
+#define _LINUX_RSEQ_ENTRY_H
+
+#ifdef CONFIG_RSEQ
+#include <linux/rseq.h>
+
+static __always_inline void rseq_note_user_irq_entry(void)
+{
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
+ current->rseq.event.user_irq = true;
+}
+
+#else /* CONFIG_RSEQ */
+static inline void rseq_note_user_irq_entry(void) { }
+#endif /* !CONFIG_RSEQ */
+
+#endif /* _LINUX_RSEQ_ENTRY_H */
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -12,6 +12,7 @@ struct rseq;
* @all: Compound to initialize and clear the data efficiently
* @events: Compound to access events with a single load/store
* @sched_switch: True if the task was scheduled out
+ * @user_irq: True on interrupt entry from user mode
* @has_rseq: True if the task has a rseq pointer installed
*/
struct rseq_event {
@@ -22,6 +23,7 @@ struct rseq_event {
u16 events;
struct {
u8 sched_switch;
+ u8 user_irq;
};
};
On 2025-09-08 17:31, Thomas Gleixner wrote:
> For RSEQ the only relevant reason to inspect and eventually fixup (abort)
> user space critical sections is when user space was interrupted and the
> task was scheduled out.
>
> If the user to kernel entry was from a syscall no fixup is required. If
> user space invokes a syscall from a critical section it can keep the
> pieces as documented.
>
> This is only supported on architectures which utilize the generic entry
> code. If your architecture does not use it, bad luck.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>
> ---
> include/linux/irq-entry-common.h | 3 ++-
> include/linux/rseq.h | 16 +++++++++++-----
> include/linux/rseq_entry.h | 18 ++++++++++++++++++
> include/linux/rseq_types.h | 2 ++
> 4 files changed, 33 insertions(+), 6 deletions(-)
>
> --- a/include/linux/irq-entry-common.h
> +++ b/include/linux/irq-entry-common.h
> @@ -4,7 +4,7 @@
>
> #include <linux/context_tracking.h>
> #include <linux/kmsan.h>
> -#include <linux/rseq.h>
> +#include <linux/rseq_entry.h>
> #include <linux/static_call_types.h>
> #include <linux/syscalls.h>
> #include <linux/tick.h>
> @@ -281,6 +281,7 @@ static __always_inline void exit_to_user
> static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs)
> {
> enter_from_user_mode(regs);
> + rseq_note_user_irq_entry();
> }
>
> /**
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -31,11 +31,17 @@ static inline void rseq_sched_switch_eve
>
> static __always_inline void rseq_exit_to_user_mode(void)
> {
> - if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
> - if (WARN_ON_ONCE(current->rseq.event.has_rseq &&
> - current->rseq.event.events))
> - current->rseq.event.events = 0;
> - }
> + struct rseq_event *ev = ¤t->rseq.event;
> +
> + if (IS_ENABLED(CONFIG_DEBUG_RSEQ))
> + WARN_ON_ONCE(ev->sched_switch);
OK. Now I'm confused.
It is perfectly legal to issue a system call from userspace as long
as it's not from within an rseq critical section.
That system call can be scheduled out, and can set the ev->sched_switch.
This would cause the rseq_exit_to_user_mode from system call to
hit this.
What is disallowed is only issuing a system call from a rseq critical
section. The other parts of rseq (updates of cpu id and mm cid) still
have to happen when returning from a system call.
What am I missing ?
Thanks,
Mathieu
> +
> + /*
> + * Ensure that event (especially user_irq) is cleared when the
> + * interrupt did not result in a schedule and therefore the
> + * rseq processing did not clear it.
> + */
> + ev->events = 0;
> }
>
> /*
> --- /dev/null
> +++ b/include/linux/rseq_entry.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_RSEQ_ENTRY_H
> +#define _LINUX_RSEQ_ENTRY_H
> +
> +#ifdef CONFIG_RSEQ
> +#include <linux/rseq.h>
> +
> +static __always_inline void rseq_note_user_irq_entry(void)
> +{
> + if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
> + current->rseq.event.user_irq = true;
> +}
> +
> +#else /* CONFIG_RSEQ */
> +static inline void rseq_note_user_irq_entry(void) { }
> +#endif /* !CONFIG_RSEQ */
> +
> +#endif /* _LINUX_RSEQ_ENTRY_H */
> --- a/include/linux/rseq_types.h
> +++ b/include/linux/rseq_types.h
> @@ -12,6 +12,7 @@ struct rseq;
> * @all: Compound to initialize and clear the data efficiently
> * @events: Compound to access events with a single load/store
> * @sched_switch: True if the task was scheduled out
> + * @user_irq: True on interrupt entry from user mode
> * @has_rseq: True if the task has a rseq pointer installed
> */
> struct rseq_event {
> @@ -22,6 +23,7 @@ struct rseq_event {
> u16 events;
> struct {
> u8 sched_switch;
> + u8 user_irq;
> };
> };
>
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Tue, Sep 09 2025 at 09:53, Mathieu Desnoyers wrote:
>> static __always_inline void rseq_exit_to_user_mode(void)
>> {
>> - if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
>> - if (WARN_ON_ONCE(current->rseq.event.has_rseq &&
>> - current->rseq.event.events))
>> - current->rseq.event.events = 0;
>> - }
>> + struct rseq_event *ev = ¤t->rseq.event;
>> +
>> + if (IS_ENABLED(CONFIG_DEBUG_RSEQ))
>> + WARN_ON_ONCE(ev->sched_switch);
>
> OK. Now I'm confused.
>
> It is perfectly legal to issue a system call from userspace as long
> as it's not from within an rseq critical section.
>
> That system call can be scheduled out, and can set the ev->sched_switch.
>
> This would cause the rseq_exit_to_user_mode from system call to
> hit this.
>
> What is disallowed is only issuing a system call from a rseq critical
> section. The other parts of rseq (updates of cpu id and mm cid) still
> have to happen when returning from a system call.
>
> What am I missing ?
The fact that any setting of ev->sched_switch has to be handled on the
way out independent of user interrupt or not as MM CID can change
obviously.
This is not any different from the state before this patch. Just that it
now only looks at sched_switch instead of the full event as that might
contain a set user_irq bit w/o sched_switch being set, no?
Thanks,
tglx
On 2025-09-09 10:17, Thomas Gleixner wrote:
> On Tue, Sep 09 2025 at 09:53, Mathieu Desnoyers wrote:
>>> static __always_inline void rseq_exit_to_user_mode(void)
>>> {
>>> - if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
>>> - if (WARN_ON_ONCE(current->rseq.event.has_rseq &&
>>> - current->rseq.event.events))
>>> - current->rseq.event.events = 0;
>>> - }
>>> + struct rseq_event *ev = ¤t->rseq.event;
>>> +
>>> + if (IS_ENABLED(CONFIG_DEBUG_RSEQ))
>>> + WARN_ON_ONCE(ev->sched_switch);
>>
>> OK. Now I'm confused.
>>
>> It is perfectly legal to issue a system call from userspace as long
>> as it's not from within an rseq critical section.
>>
>> That system call can be scheduled out, and can set the ev->sched_switch.
>>
>> This would cause the rseq_exit_to_user_mode from system call to
>> hit this.
>>
>> What is disallowed is only issuing a system call from a rseq critical
>> section. The other parts of rseq (updates of cpu id and mm cid) still
>> have to happen when returning from a system call.
>>
>> What am I missing ?
>
> The fact that any setting of ev->sched_switch has to be handled on the
> way out independent of user interrupt or not as MM CID can change
> obviously.
>
> This is not any different from the state before this patch. Just that it
> now only looks at sched_switch instead of the full event as that might
> contain a set user_irq bit w/o sched_switch being set, no?
OK, so this is called after the events were handled, and this is just
a sanity check.
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>
> Thanks,
>
> tglx
>
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
Provide tracepoint wrappers for the upcoming RSEQ exit to user space inline
fast path, so that the header can be safely included by code which defines
actual trace points.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
V4: Fix the fallback stub
V3: Get rid of one indentation level - Mathieu
---
include/linux/rseq_entry.h | 28 ++++++++++++++++++++++++++++
kernel/rseq.c | 17 +++++++++++++++++
2 files changed, 45 insertions(+)
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -5,6 +5,34 @@
#ifdef CONFIG_RSEQ
#include <linux/rseq.h>
+#include <linux/tracepoint-defs.h>
+
+#ifdef CONFIG_TRACEPOINTS
+DECLARE_TRACEPOINT(rseq_update);
+DECLARE_TRACEPOINT(rseq_ip_fixup);
+void __rseq_trace_update(struct task_struct *t);
+void __rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
+ unsigned long offset, unsigned long abort_ip);
+
+static inline void rseq_trace_update(struct task_struct *t, struct rseq_ids *ids)
+{
+ if (tracepoint_enabled(rseq_update) && ids)
+ __rseq_trace_update(t);
+}
+
+static inline void rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
+ unsigned long offset, unsigned long abort_ip)
+{
+ if (tracepoint_enabled(rseq_ip_fixup))
+ __rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
+}
+
+#else /* CONFIG_TRACEPOINT */
+static inline void rseq_trace_update(struct task_struct *t, struct rseq_ids *ids) { }
+static inline void rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
+ unsigned long offset, unsigned long abort_ip) { }
+#endif /* !CONFIG_TRACEPOINT */
+
static __always_inline void rseq_note_user_irq_entry(void)
{
if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -91,6 +91,23 @@
RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)
+#ifdef CONFIG_TRACEPOINTS
+/*
+ * Out of line, so the actual update functions can be in a header to be
+ * inlined into the exit to user code.
+ */
+void __rseq_trace_update(struct task_struct *t)
+{
+ trace_rseq_update(t);
+}
+
+void __rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
+ unsigned long offset, unsigned long abort_ip)
+{
+ trace_rseq_ip_fixup(ip, start_ip, offset, abort_ip);
+}
+#endif /* CONFIG_TRACEPOINTS */
+
#ifdef CONFIG_DEBUG_RSEQ
static struct rseq *rseq_kernel_fields(struct task_struct *t)
{
Analyzing the call frequency without actually using tracing is helpful for
analysis of this infrastructure. The overhead is minimal as it just
increments a per CPU counter associated to each operation.
The debugfs readout provides a racy sum of all counters.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
include/linux/rseq.h | 16 ---------
include/linux/rseq_entry.h | 49 +++++++++++++++++++++++++++
init/Kconfig | 12 ++++++
kernel/rseq.c | 79 +++++++++++++++++++++++++++++++++++++++++----
4 files changed, 133 insertions(+), 23 deletions(-)
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -29,21 +29,6 @@ static inline void rseq_sched_switch_eve
}
}
-static __always_inline void rseq_exit_to_user_mode(void)
-{
- struct rseq_event *ev = ¤t->rseq.event;
-
- if (IS_ENABLED(CONFIG_DEBUG_RSEQ))
- WARN_ON_ONCE(ev->sched_switch);
-
- /*
- * Ensure that event (especially user_irq) is cleared when the
- * interrupt did not result in a schedule and therefore the
- * rseq processing did not clear it.
- */
- ev->events = 0;
-}
-
/*
* KVM/HYPERV invoke resume_user_mode_work() before entering guest mode,
* which clears TIF_NOTIFY_RESUME. To avoid updating user space RSEQ in
@@ -92,7 +77,6 @@ static inline void rseq_sched_switch_eve
static inline void rseq_virt_userspace_exit(void) { }
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { }
static inline void rseq_execve(struct task_struct *t) { }
-static inline void rseq_exit_to_user_mode(void) { }
#endif /* !CONFIG_RSEQ */
#ifdef CONFIG_DEBUG_RSEQ
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -2,6 +2,37 @@
#ifndef _LINUX_RSEQ_ENTRY_H
#define _LINUX_RSEQ_ENTRY_H
+/* Must be outside the CONFIG_RSEQ guard to resolve the stubs */
+#ifdef CONFIG_RSEQ_STATS
+#include <linux/percpu.h>
+
+struct rseq_stats {
+ unsigned long exit;
+ unsigned long signal;
+ unsigned long slowpath;
+ unsigned long ids;
+ unsigned long cs;
+ unsigned long clear;
+ unsigned long fixup;
+};
+
+DECLARE_PER_CPU(struct rseq_stats, rseq_stats);
+
+/*
+ * Slow path has interrupts and preemption enabled, but the fast path
+ * runs with interrupts disabled so there is no point in having the
+ * preemption checks implied in __this_cpu_inc() for every operation.
+ */
+#ifdef RSEQ_BUILD_SLOW_PATH
+#define rseq_stat_inc(which) this_cpu_inc((which))
+#else
+#define rseq_stat_inc(which) raw_cpu_inc((which))
+#endif
+
+#else /* CONFIG_RSEQ_STATS */
+#define rseq_stat_inc(x) do { } while (0)
+#endif /* !CONFIG_RSEQ_STATS */
+
#ifdef CONFIG_RSEQ
#include <linux/rseq.h>
@@ -39,8 +70,26 @@ static __always_inline void rseq_note_us
current->rseq.event.user_irq = true;
}
+static __always_inline void rseq_exit_to_user_mode(void)
+{
+ struct rseq_event *ev = ¤t->rseq.event;
+
+ rseq_stat_inc(rseq_stats.exit);
+
+ if (IS_ENABLED(CONFIG_DEBUG_RSEQ))
+ WARN_ON_ONCE(ev->sched_switch);
+
+ /*
+ * Ensure that event (especially user_irq) is cleared when the
+ * interrupt did not result in a schedule and therefore the
+ * rseq processing did not clear it.
+ */
+ ev->events = 0;
+}
+
#else /* CONFIG_RSEQ */
static inline void rseq_note_user_irq_entry(void) { }
+static inline void rseq_exit_to_user_mode(void) { }
#endif /* !CONFIG_RSEQ */
#endif /* _LINUX_RSEQ_ENTRY_H */
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1883,6 +1883,18 @@ config RSEQ
If unsure, say Y.
+config RSEQ_STATS
+ default n
+ bool "Enable lightweight statistics of restartable sequences" if EXPERT
+ depends on RSEQ && DEBUG_FS
+ help
+ Enable lightweight counters which expose information about the
+ frequency of RSEQ operations via debugfs. Mostly interesting for
+ kernel debugging or performance analysis. While lightweight it's
+ still adding code into the user/kernel mode transitions.
+
+ If unsure, say N.
+
config DEBUG_RSEQ
default n
bool "Enable debugging of rseq() system call" if EXPERT
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -67,12 +67,16 @@
* F1. <failure>
*/
+/* Required to select the proper per_cpu ops for rseq_stats_inc() */
+#define RSEQ_BUILD_SLOW_PATH
+
+#include <linux/debugfs.h>
+#include <linux/ratelimit.h>
+#include <linux/rseq_entry.h>
#include <linux/sched.h>
-#include <linux/uaccess.h>
#include <linux/syscalls.h>
-#include <linux/rseq.h>
+#include <linux/uaccess.h>
#include <linux/types.h>
-#include <linux/ratelimit.h>
#include <asm/ptrace.h>
#define CREATE_TRACE_POINTS
@@ -108,6 +112,56 @@ void __rseq_trace_ip_fixup(unsigned long
}
#endif /* CONFIG_TRACEPOINTS */
+#ifdef CONFIG_RSEQ_STATS
+DEFINE_PER_CPU(struct rseq_stats, rseq_stats);
+
+static int rseq_debug_show(struct seq_file *m, void *p)
+{
+ struct rseq_stats stats = { };
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ stats.exit += data_race(per_cpu(rseq_stats.exit, cpu));
+ stats.signal += data_race(per_cpu(rseq_stats.signal, cpu));
+ stats.slowpath += data_race(per_cpu(rseq_stats.slowpath, cpu));
+ stats.ids += data_race(per_cpu(rseq_stats.ids, cpu));
+ stats.cs += data_race(per_cpu(rseq_stats.cs, cpu));
+ stats.clear += data_race(per_cpu(rseq_stats.clear, cpu));
+ stats.fixup += data_race(per_cpu(rseq_stats.fixup, cpu));
+ }
+
+ seq_printf(m, "exit: %16lu\n", stats.exit);
+ seq_printf(m, "signal: %16lu\n", stats.signal);
+ seq_printf(m, "slowp: %16lu\n", stats.slowpath);
+ seq_printf(m, "ids: %16lu\n", stats.ids);
+ seq_printf(m, "cs: %16lu\n", stats.cs);
+ seq_printf(m, "clear: %16lu\n", stats.clear);
+ seq_printf(m, "fixup: %16lu\n", stats.fixup);
+ return 0;
+}
+
+static int rseq_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, rseq_debug_show, inode->i_private);
+}
+
+static const struct file_operations dfs_ops = {
+ .open = rseq_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init rseq_debugfs_init(void)
+{
+ struct dentry *root_dir = debugfs_create_dir("rseq", NULL);
+
+ debugfs_create_file("stats", 0444, root_dir, NULL, &dfs_ops);
+ return 0;
+}
+__initcall(rseq_debugfs_init);
+#endif /* CONFIG_RSEQ_STATS */
+
#ifdef CONFIG_DEBUG_RSEQ
static struct rseq *rseq_kernel_fields(struct task_struct *t)
{
@@ -187,12 +241,13 @@ static int rseq_update_cpu_node_id(struc
u32 node_id = cpu_to_node(cpu_id);
u32 mm_cid = task_mm_cid(t);
- /*
- * Validate read-only rseq fields.
- */
+ rseq_stat_inc(rseq_stats.ids);
+
+ /* Validate read-only rseq fields on debug kernels */
if (rseq_validate_ro_fields(t))
goto efault;
WARN_ON_ONCE((int) mm_cid < 0);
+
if (!user_write_access_begin(rseq, t->rseq.len))
goto efault;
@@ -403,6 +458,8 @@ static int rseq_ip_fixup(struct pt_regs
struct rseq_cs rseq_cs;
int ret;
+ rseq_stat_inc(rseq_stats.cs);
+
ret = rseq_get_rseq_cs(t, &rseq_cs);
if (ret)
return ret;
@@ -412,8 +469,10 @@ static int rseq_ip_fixup(struct pt_regs
* If not nested over a rseq critical section, restart is useless.
* Clear the rseq_cs pointer and return.
*/
- if (!in_rseq_cs(ip, &rseq_cs))
+ if (!in_rseq_cs(ip, &rseq_cs)) {
+ rseq_stat_inc(rseq_stats.clear);
return clear_rseq_cs(t->rseq.usrptr);
+ }
ret = rseq_check_flags(t, rseq_cs.flags);
if (ret < 0)
return ret;
@@ -422,6 +481,7 @@ static int rseq_ip_fixup(struct pt_regs
ret = clear_rseq_cs(t->rseq.usrptr);
if (ret)
return ret;
+ rseq_stat_inc(rseq_stats.fixup);
trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset,
rseq_cs.abort_ip);
instruction_pointer_set(regs, (unsigned long)rseq_cs.abort_ip);
@@ -462,6 +522,11 @@ void __rseq_handle_notify_resume(struct
if (unlikely(t->flags & PF_EXITING))
return;
+ if (ksig)
+ rseq_stat_inc(rseq_stats.signal);
+ else
+ rseq_stat_inc(rseq_stats.slowpath);
+
/*
* Read and clear the event pending bit first. If the task
* was not preempted or migrated or a signal is on the way,
Config based debug is rarely turned on and is not available easily when
things go wrong.
Provide a static branch to allow permanent integration of debug mechanisms
along with the usual toggles in Kconfig, command line and debugfs.
Requested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
V3: Fix __setup() return value - Michael
---
Documentation/admin-guide/kernel-parameters.txt | 4 +
include/linux/rseq_entry.h | 3
init/Kconfig | 14 ++++
kernel/rseq.c | 73 ++++++++++++++++++++++--
4 files changed, 90 insertions(+), 4 deletions(-)
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6443,6 +6443,10 @@
Memory area to be used by remote processor image,
managed by CMA.
+ rseq_debug= [KNL] Enable or disable restartable sequence
+ debug mode. Defaults to CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE.
+ Format: <bool>
+
rt_group_sched= [KNL] Enable or disable SCHED_RR/FIFO group scheduling
when CONFIG_RT_GROUP_SCHED=y. Defaults to
!CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED.
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -34,6 +34,7 @@ DECLARE_PER_CPU(struct rseq_stats, rseq_
#endif /* !CONFIG_RSEQ_STATS */
#ifdef CONFIG_RSEQ
+#include <linux/jump_label.h>
#include <linux/rseq.h>
#include <linux/tracepoint-defs.h>
@@ -64,6 +65,8 @@ static inline void rseq_trace_ip_fixup(u
unsigned long offset, unsigned long abort_ip) { }
#endif /* !CONFIG_TRACEPOINT */
+DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);
+
static __always_inline void rseq_note_user_irq_entry(void)
{
if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1895,10 +1895,24 @@ config RSEQ_STATS
If unsure, say N.
+config RSEQ_DEBUG_DEFAULT_ENABLE
+ default n
+ bool "Enable restartable sequences debug mode by default" if EXPERT
+ depends on RSEQ
+ help
+ This enables the static branch for debug mode of restartable
+ sequences.
+
+ This also can be controlled on the kernel command line via the
+ command line parameter "rseq_debug=0/1" and through debugfs.
+
+ If unsure, say N.
+
config DEBUG_RSEQ
default n
bool "Enable debugging of rseq() system call" if EXPERT
depends on RSEQ && DEBUG_KERNEL
+ select RSEQ_DEBUG_DEFAULT_ENABLE
help
Enable extra debugging checks for the rseq system call.
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -95,6 +95,27 @@
RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)
+DEFINE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);
+
+static inline void rseq_control_debug(bool on)
+{
+ if (on)
+ static_branch_enable(&rseq_debug_enabled);
+ else
+ static_branch_disable(&rseq_debug_enabled);
+}
+
+static int __init rseq_setup_debug(char *str)
+{
+ bool on;
+
+ if (kstrtobool(str, &on))
+ return -EINVAL;
+ rseq_control_debug(on);
+ return 1;
+}
+__setup("rseq_debug=", rseq_setup_debug);
+
#ifdef CONFIG_TRACEPOINTS
/*
* Out of line, so the actual update functions can be in a header to be
@@ -112,10 +133,11 @@ void __rseq_trace_ip_fixup(unsigned long
}
#endif /* CONFIG_TRACEPOINTS */
+#ifdef CONFIG_DEBUG_FS
#ifdef CONFIG_RSEQ_STATS
DEFINE_PER_CPU(struct rseq_stats, rseq_stats);
-static int rseq_debug_show(struct seq_file *m, void *p)
+static int rseq_stats_show(struct seq_file *m, void *p)
{
struct rseq_stats stats = { };
unsigned int cpu;
@@ -140,14 +162,56 @@ static int rseq_debug_show(struct seq_fi
return 0;
}
+static int rseq_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, rseq_stats_show, inode->i_private);
+}
+
+static const struct file_operations stat_ops = {
+ .open = rseq_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init rseq_stats_init(struct dentry *root_dir)
+{
+ debugfs_create_file("stats", 0444, root_dir, NULL, &stat_ops);
+ return 0;
+}
+#else
+static inline void rseq_stats_init(struct dentry *root_dir) { }
+#endif /* CONFIG_RSEQ_STATS */
+
+static int rseq_debug_show(struct seq_file *m, void *p)
+{
+ bool on = static_branch_unlikely(&rseq_debug_enabled);
+
+ seq_printf(m, "%d\n", on);
+ return 0;
+}
+
+static ssize_t rseq_debug_write(struct file *file, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ bool on;
+
+ if (kstrtobool_from_user(ubuf, count, &on))
+ return -EINVAL;
+
+ rseq_control_debug(on);
+ return count;
+}
+
static int rseq_debug_open(struct inode *inode, struct file *file)
{
return single_open(file, rseq_debug_show, inode->i_private);
}
-static const struct file_operations dfs_ops = {
+static const struct file_operations debug_ops = {
.open = rseq_debug_open,
.read = seq_read,
+ .write = rseq_debug_write,
.llseek = seq_lseek,
.release = single_release,
};
@@ -156,11 +220,12 @@ static int __init rseq_debugfs_init(void
{
struct dentry *root_dir = debugfs_create_dir("rseq", NULL);
- debugfs_create_file("stats", 0444, root_dir, NULL, &dfs_ops);
+ debugfs_create_file("debug", 0644, root_dir, NULL, &debug_ops);
+ rseq_stats_init(root_dir);
return 0;
}
__initcall(rseq_debugfs_init);
-#endif /* CONFIG_RSEQ_STATS */
+#endif /* CONFIG_DEBUG_FS */
#ifdef CONFIG_DEBUG_RSEQ
static struct rseq *rseq_kernel_fields(struct task_struct *t)
Provide a straight forward implementation to check for and eventually
clear/fixup critical sections in user space.
The non-debug version does only the minimal sanity checks and aims for
efficiency.
There are two attack vectors, which are checked for:
1) An abort IP which is in the kernel address space. That would cause at
least x86 to return to kernel space via IRET.
2) A rogue critical section descriptor with an abort IP pointing to some
arbitrary address, which is not preceeded by the RSEQ signature.
If the section descriptors are invalid then the resulting misbehaviour of
the user space application is not the kernels problem.
The kernel provides a run-time switchable debug slow path, which implements
the full zoo of checks including termination of the task when one of the
gazillion conditions is not met.
Replace the zoo in rseq.c with it and invoke it from the TIF_NOTIFY_RESUME
handler. Move the reminders into the CONFIG_DEBUG_RSEQ section, which will
be replaced and removed in a subsequent step.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
V3: Brought back the signature check along with a comment - Mathieu
---
include/linux/rseq_entry.h | 209 +++++++++++++++++++++++++++++++++++++++
include/linux/rseq_types.h | 11 +-
kernel/rseq.c | 238 +++++++++++++--------------------------------
3 files changed, 288 insertions(+), 170 deletions(-)
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -36,6 +36,7 @@ DECLARE_PER_CPU(struct rseq_stats, rseq_
#ifdef CONFIG_RSEQ
#include <linux/jump_label.h>
#include <linux/rseq.h>
+#include <linux/uaccess.h>
#include <linux/tracepoint-defs.h>
@@ -67,12 +68,220 @@ static inline void rseq_trace_ip_fixup(u
DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);
+#ifdef RSEQ_BUILD_SLOW_PATH
+#define rseq_inline
+#else
+#define rseq_inline __always_inline
+#endif
+
+bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
+
static __always_inline void rseq_note_user_irq_entry(void)
{
if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
current->rseq.event.user_irq = true;
}
+/*
+ * Check whether there is a valid critical section and whether the
+ * instruction pointer in @regs is inside the critical section.
+ *
+ * - If the critical section is invalid, terminate the task.
+ *
+ * - If valid and the instruction pointer is inside, set it to the abort IP
+ *
+ * - If valid and the instruction pointer is outside, clear the critical
+ * section address.
+ *
+ * Returns true, if the section was valid and either fixup or clear was
+ * done, false otherwise.
+ *
+ * In the failure case task::rseq_event::fatal is set when a invalid
+ * section was found. It's clear when the failure was an unresolved page
+ * fault.
+ *
+ * If inlined into the exit to user path with interrupts disabled, the
+ * caller has to protect against page faults with pagefault_disable().
+ *
+ * In preemptible task context this would be counterproductive as the page
+ * faults could not be fully resolved. As a consequence unresolved page
+ * faults in task context are fatal too.
+ */
+
+#ifdef RSEQ_BUILD_SLOW_PATH
+/*
+ * The debug version is put out of line, but kept here so the code stays
+ * together.
+ *
+ * @csaddr has already been checked by the caller to be in user space
+ */
+bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr)
+{
+ struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
+ u64 start_ip, abort_ip, offset, cs_end, head, tasksize = TASK_SIZE;
+ unsigned long ip = instruction_pointer(regs);
+ u64 __user *uc_head = (u64 __user *) ucs;
+ u32 usig, __user *uc_sig;
+
+ if (!user_rw_masked_begin(ucs))
+ return false;
+
+ /*
+ * Evaluate the user pile and exit if one of the conditions is not
+ * fulfilled.
+ */
+ unsafe_get_user(start_ip, &ucs->start_ip, fail);
+ if (unlikely(start_ip >= tasksize))
+ goto die;
+ /* If outside, just clear the critical section. */
+ if (ip < start_ip)
+ goto clear;
+
+ unsafe_get_user(offset, &ucs->post_commit_offset, fail);
+ cs_end = start_ip + offset;
+ /* Check for overflow and wraparound */
+ if (unlikely(cs_end >= tasksize || cs_end < start_ip))
+ goto die;
+
+ /* If not inside, clear it. */
+ if (ip >= cs_end)
+ goto clear;
+
+ unsafe_get_user(abort_ip, &ucs->abort_ip, fail);
+ /* Ensure it's "valid" */
+ if (unlikely(abort_ip >= tasksize || abort_ip < sizeof(*uc_sig)))
+ goto die;
+ /* Validate that the abort IP is not in the critical section */
+ if (unlikely(abort_ip - start_ip < offset))
+ goto die;
+
+ /*
+ * Check version and flags for 0. No point in emitting deprecated
+ * warnings before dying. That could be done in the slow path
+ * eventually, but *shrug*.
+ */
+ unsafe_get_user(head, uc_head, fail);
+ if (unlikely(head))
+ goto die;
+
+ /* abort_ip - 4 is >= 0. See abort_ip check above */
+ uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
+ unsafe_get_user(usig, uc_sig, fail);
+ if (unlikely(usig != t->rseq.sig))
+ goto die;
+
+ /* rseq_event.user_irq is only valid if CONFIG_GENERIC_IRQ_ENTRY=y */
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ /* If not in interrupt from user context, let it die */
+ if (unlikely(!t->rseq.event.user_irq))
+ goto die;
+ }
+
+ unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, fail);
+ user_access_end();
+
+ instruction_pointer_set(regs, (unsigned long)abort_ip);
+
+ rseq_stat_inc(rseq_stats.fixup);
+ rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
+ return true;
+clear:
+ unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, fail);
+ user_access_end();
+ rseq_stat_inc(rseq_stats.clear);
+ return true;
+die:
+ t->rseq.event.fatal = true;
+fail:
+ user_access_end();
+ return false;
+}
+#endif /* RSEQ_BUILD_SLOW_PATH */
+
+/*
+ * This only ensures that abort_ip is in the user address space by masking it.
+ * No other sanity checks are done here, that's what the debug code is for.
+ */
+static rseq_inline bool
+rseq_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr)
+{
+ struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
+ unsigned long ip = instruction_pointer(regs);
+ u64 start_ip, abort_ip, offset;
+ u32 usig, __user *uc_sig;
+
+ rseq_stat_inc(rseq_stats.cs);
+
+ if (unlikely(csaddr >= TASK_SIZE)) {
+ t->rseq.event.fatal = true;
+ return false;
+ }
+
+ if (static_branch_unlikely(&rseq_debug_enabled))
+ return rseq_debug_update_user_cs(t, regs, csaddr);
+
+ if (!user_rw_masked_begin(ucs))
+ return false;
+
+ unsafe_get_user(start_ip, &ucs->start_ip, fail);
+ unsafe_get_user(offset, &ucs->post_commit_offset, fail);
+ unsafe_get_user(abort_ip, &ucs->abort_ip, fail);
+
+ /*
+ * No sanity checks. If user space screwed it up, it can
+ * keep the pieces. That's what debug code is for.
+ *
+ * If outside, just clear the critical section.
+ */
+ if (ip - start_ip >= offset)
+ goto clear;
+
+ /*
+ * Two requirements for @abort_ip:
+ * - Must be in user space as x86 IRET would happily return to
+ * the kernel.
+ * - The four bytes preceeding the instruction at @abort_ip must
+ * contain the signature.
+ *
+ * The latter protects against the following attack vector:
+ *
+ * An attacker with limited abilities to write, creates a critical
+ * section descriptor, sets the abort IP to a library function or
+ * some other ROP gadget and stores the address of the descriptor
+ * in TLS::rseq::rseq_cs. An RSEQ abort would then evade ROP
+ * protection.
+ */
+ if (abort_ip >= TASK_SIZE || abort_ip < sizeof(*uc_sig))
+ goto die;
+
+ /* The address is guaranteed to be >= 0 and < TASK_SIZE */
+ uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
+ unsafe_get_user(usig, uc_sig, fail);
+ if (unlikely(usig != t->rseq.sig))
+ goto die;
+
+ /* Invalidate the critical section */
+ unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, fail);
+ user_access_end();
+
+ /* Update the instruction pointer */
+ instruction_pointer_set(regs, (unsigned long)abort_ip);
+
+ rseq_stat_inc(rseq_stats.fixup);
+ rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
+ return true;
+clear:
+ unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, fail);
+ user_access_end();
+ rseq_stat_inc(rseq_stats.clear);
+ return true;
+die:
+ t->rseq.event.fatal = true;
+fail:
+ user_access_end();
+ return false;
+}
+
static __always_inline void rseq_exit_to_user_mode(void)
{
struct rseq_event *ev = ¤t->rseq.event;
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -14,10 +14,12 @@ struct rseq;
* @sched_switch: True if the task was scheduled out
* @user_irq: True on interrupt entry from user mode
* @has_rseq: True if the task has a rseq pointer installed
+ * @error: Compound error code for the slow path to analyze
+ * @fatal: User space data corrupted or invalid
*/
struct rseq_event {
union {
- u32 all;
+ u64 all;
struct {
union {
u16 events;
@@ -28,6 +30,13 @@ struct rseq_event {
};
u8 has_rseq;
+ u8 __pad;
+ union {
+ u16 error;
+ struct {
+ u8 fatal;
+ };
+ };
};
};
};
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -382,175 +382,15 @@ static int rseq_reset_rseq_cpu_node_id(s
return -EFAULT;
}
-/*
- * Get the user-space pointer value stored in the 'rseq_cs' field.
- */
-static int rseq_get_rseq_cs_ptr_val(struct rseq __user *rseq, u64 *rseq_cs)
-{
- if (!rseq_cs)
- return -EFAULT;
-
-#ifdef CONFIG_64BIT
- if (get_user(*rseq_cs, &rseq->rseq_cs))
- return -EFAULT;
-#else
- if (copy_from_user(rseq_cs, &rseq->rseq_cs, sizeof(*rseq_cs)))
- return -EFAULT;
-#endif
-
- return 0;
-}
-
-/*
- * If the rseq_cs field of 'struct rseq' contains a valid pointer to
- * user-space, copy 'struct rseq_cs' from user-space and validate its fields.
- */
-static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
-{
- struct rseq_cs __user *urseq_cs;
- u64 ptr;
- u32 __user *usig;
- u32 sig;
- int ret;
-
- ret = rseq_get_rseq_cs_ptr_val(t->rseq.usrptr, &ptr);
- if (ret)
- return ret;
-
- /* If the rseq_cs pointer is NULL, return a cleared struct rseq_cs. */
- if (!ptr) {
- memset(rseq_cs, 0, sizeof(*rseq_cs));
- return 0;
- }
- /* Check that the pointer value fits in the user-space process space. */
- if (ptr >= TASK_SIZE)
- return -EINVAL;
- urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr;
- if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
- return -EFAULT;
-
- if (rseq_cs->start_ip >= TASK_SIZE ||
- rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE ||
- rseq_cs->abort_ip >= TASK_SIZE ||
- rseq_cs->version > 0)
- return -EINVAL;
- /* Check for overflow. */
- if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip)
- return -EINVAL;
- /* Ensure that abort_ip is not in the critical section. */
- if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
- return -EINVAL;
-
- usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32));
- ret = get_user(sig, usig);
- if (ret)
- return ret;
-
- if (current->rseq.sig != sig) {
- printk_ratelimited(KERN_WARNING
- "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
- sig, current->rseq.sig, current->pid, usig);
- return -EINVAL;
- }
- return 0;
-}
-
-static bool rseq_warn_flags(const char *str, u32 flags)
+static bool rseq_handle_cs(struct task_struct *t, struct pt_regs *regs)
{
- u32 test_flags;
+ u64 csaddr;
- if (!flags)
+ if (get_user_masked_u64(&csaddr, &t->rseq.usrptr->rseq_cs))
return false;
- test_flags = flags & RSEQ_CS_NO_RESTART_FLAGS;
- if (test_flags)
- pr_warn_once("Deprecated flags (%u) in %s ABI structure", test_flags, str);
- test_flags = flags & ~RSEQ_CS_NO_RESTART_FLAGS;
- if (test_flags)
- pr_warn_once("Unknown flags (%u) in %s ABI structure", test_flags, str);
- return true;
-}
-
-static int rseq_check_flags(struct task_struct *t, u32 cs_flags)
-{
- u32 flags;
- int ret;
-
- if (rseq_warn_flags("rseq_cs", cs_flags))
- return -EINVAL;
-
- /* Get thread flags. */
- ret = get_user(flags, &t->rseq.usrptr->flags);
- if (ret)
- return ret;
-
- if (rseq_warn_flags("rseq", flags))
- return -EINVAL;
- return 0;
-}
-
-static int clear_rseq_cs(struct rseq __user *rseq)
-{
- /*
- * The rseq_cs field is set to NULL on preemption or signal
- * delivery on top of rseq assembly block, as well as on top
- * of code outside of the rseq assembly block. This performs
- * a lazy clear of the rseq_cs field.
- *
- * Set rseq_cs to NULL.
- */
-#ifdef CONFIG_64BIT
- return put_user(0UL, &rseq->rseq_cs);
-#else
- if (clear_user(&rseq->rseq_cs, sizeof(rseq->rseq_cs)))
- return -EFAULT;
- return 0;
-#endif
-}
-
-/*
- * Unsigned comparison will be true when ip >= start_ip, and when
- * ip < start_ip + post_commit_offset.
- */
-static bool in_rseq_cs(unsigned long ip, struct rseq_cs *rseq_cs)
-{
- return ip - rseq_cs->start_ip < rseq_cs->post_commit_offset;
-}
-
-static int rseq_ip_fixup(struct pt_regs *regs, bool abort)
-{
- unsigned long ip = instruction_pointer(regs);
- struct task_struct *t = current;
- struct rseq_cs rseq_cs;
- int ret;
-
- rseq_stat_inc(rseq_stats.cs);
-
- ret = rseq_get_rseq_cs(t, &rseq_cs);
- if (ret)
- return ret;
-
- /*
- * Handle potentially not being within a critical section.
- * If not nested over a rseq critical section, restart is useless.
- * Clear the rseq_cs pointer and return.
- */
- if (!in_rseq_cs(ip, &rseq_cs)) {
- rseq_stat_inc(rseq_stats.clear);
- return clear_rseq_cs(t->rseq.usrptr);
- }
- ret = rseq_check_flags(t, rseq_cs.flags);
- if (ret < 0)
- return ret;
- if (!abort)
- return 0;
- ret = clear_rseq_cs(t->rseq.usrptr);
- if (ret)
- return ret;
- rseq_stat_inc(rseq_stats.fixup);
- trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset,
- rseq_cs.abort_ip);
- instruction_pointer_set(regs, (unsigned long)rseq_cs.abort_ip);
- return 0;
+ if (likely(!csaddr))
+ return true;
+ return rseq_update_user_cs(t, regs, csaddr);
}
/*
@@ -567,8 +407,8 @@ static int rseq_ip_fixup(struct pt_regs
void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
{
struct task_struct *t = current;
- int ret, sig;
bool event;
+ int sig;
/*
* If invoked from hypervisors before entering the guest via
@@ -618,8 +458,7 @@ void __rseq_handle_notify_resume(struct
if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
return;
- ret = rseq_ip_fixup(regs, event);
- if (unlikely(ret < 0))
+ if (!rseq_handle_cs(t, regs))
goto error;
if (unlikely(rseq_update_cpu_node_id(t)))
@@ -632,6 +471,67 @@ void __rseq_handle_notify_resume(struct
}
#ifdef CONFIG_DEBUG_RSEQ
+/*
+ * Unsigned comparison will be true when ip >= start_ip, and when
+ * ip < start_ip + post_commit_offset.
+ */
+static bool in_rseq_cs(unsigned long ip, struct rseq_cs *rseq_cs)
+{
+ return ip - rseq_cs->start_ip < rseq_cs->post_commit_offset;
+}
+
+/*
+ * If the rseq_cs field of 'struct rseq' contains a valid pointer to
+ * user-space, copy 'struct rseq_cs' from user-space and validate its fields.
+ */
+static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
+{
+ struct rseq_cs __user *urseq_cs;
+ u64 ptr;
+ u32 __user *usig;
+ u32 sig;
+ int ret;
+
+ if (get_user_masked_u64(&ptr, &t->rseq.usrptr->rseq_cs))
+ return -EFAULT;
+
+ /* If the rseq_cs pointer is NULL, return a cleared struct rseq_cs. */
+ if (!ptr) {
+ memset(rseq_cs, 0, sizeof(*rseq_cs));
+ return 0;
+ }
+ /* Check that the pointer value fits in the user-space process space. */
+ if (ptr >= TASK_SIZE)
+ return -EINVAL;
+ urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr;
+ if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
+ return -EFAULT;
+
+ if (rseq_cs->start_ip >= TASK_SIZE ||
+ rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE ||
+ rseq_cs->abort_ip >= TASK_SIZE ||
+ rseq_cs->version > 0)
+ return -EINVAL;
+ /* Check for overflow. */
+ if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip)
+ return -EINVAL;
+ /* Ensure that abort_ip is not in the critical section. */
+ if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
+ return -EINVAL;
+
+ usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32));
+ ret = get_user(sig, usig);
+ if (ret)
+ return ret;
+
+ if (current->rseq.sig != sig) {
+ printk_ratelimited(KERN_WARNING
+ "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
+ sig, current->rseq.sig, current->pid, usig);
+ return -EINVAL;
+ }
+ return 0;
+}
/*
* Terminate the process if a syscall is issued within a restartable
On 2025-09-08 17:32, Thomas Gleixner wrote:
> Provide a straight forward implementation to check for and eventually
> clear/fixup critical sections in user space.
>
> The non-debug version does only the minimal sanity checks and aims for
> efficiency.
>
> There are two attack vectors, which are checked for:
>
> 1) An abort IP which is in the kernel address space. That would cause at
> least x86 to return to kernel space via IRET.
>
> 2) A rogue critical section descriptor with an abort IP pointing to some
> arbitrary address, which is not preceeded by the RSEQ signature.
preceeded -> preceded
>
> If the section descriptors are invalid then the resulting misbehaviour of
> the user space application is not the kernels problem.
>
> The kernel provides a run-time switchable debug slow path, which implements
> the full zoo of checks including termination of the task when one of the
> gazillion conditions is not met.
>
> Replace the zoo in rseq.c with it and invoke it from the TIF_NOTIFY_RESUME
> handler. Move the reminders into the CONFIG_DEBUG_RSEQ section, which will
reminders -> remainders
> be replaced and removed in a subsequent step.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> V3: Brought back the signature check along with a comment - Mathieu
> ---
> include/linux/rseq_entry.h | 209 +++++++++++++++++++++++++++++++++++++++
> include/linux/rseq_types.h | 11 +-
> kernel/rseq.c | 238 +++++++++++++--------------------------------
> 3 files changed, 288 insertions(+), 170 deletions(-)
>
> --- a/include/linux/rseq_entry.h
> +++ b/include/linux/rseq_entry.h
> @@ -36,6 +36,7 @@ DECLARE_PER_CPU(struct rseq_stats, rseq_
> #ifdef CONFIG_RSEQ
> #include <linux/jump_label.h>
> #include <linux/rseq.h>
> +#include <linux/uaccess.h>
>
> #include <linux/tracepoint-defs.h>
>
> @@ -67,12 +68,220 @@ static inline void rseq_trace_ip_fixup(u
>
> DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);
>
> +#ifdef RSEQ_BUILD_SLOW_PATH
> +#define rseq_inline
> +#else
> +#define rseq_inline __always_inline
> +#endif
> +
> +bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
> +
> static __always_inline void rseq_note_user_irq_entry(void)
> {
> if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
> current->rseq.event.user_irq = true;
> }
>
> +/*
> + * Check whether there is a valid critical section and whether the
> + * instruction pointer in @regs is inside the critical section.
> + *
> + * - If the critical section is invalid, terminate the task.
> + *
> + * - If valid and the instruction pointer is inside, set it to the abort IP
> + *
> + * - If valid and the instruction pointer is outside, clear the critical
> + * section address.
> + *
> + * Returns true, if the section was valid and either fixup or clear was
> + * done, false otherwise.
> + *
> + * In the failure case task::rseq_event::fatal is set when a invalid
> + * section was found. It's clear when the failure was an unresolved page
> + * fault.
> + *
> + * If inlined into the exit to user path with interrupts disabled, the
> + * caller has to protect against page faults with pagefault_disable().
> + *
> + * In preemptible task context this would be counterproductive as the page
> + * faults could not be fully resolved. As a consequence unresolved page
> + * faults in task context are fatal too.
> + */
> +
> +#ifdef RSEQ_BUILD_SLOW_PATH
> +/*
> + * The debug version is put out of line, but kept here so the code stays
> + * together.
> + *
> + * @csaddr has already been checked by the caller to be in user space
> + */
> +bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr)
> +{
> + struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
> + u64 start_ip, abort_ip, offset, cs_end, head, tasksize = TASK_SIZE;
> + unsigned long ip = instruction_pointer(regs);
> + u64 __user *uc_head = (u64 __user *) ucs;
> + u32 usig, __user *uc_sig;
> +
> + if (!user_rw_masked_begin(ucs))
> + return false;
> +
> + /*
> + * Evaluate the user pile and exit if one of the conditions is not
> + * fulfilled.
> + */
> + unsafe_get_user(start_ip, &ucs->start_ip, fail);
> + if (unlikely(start_ip >= tasksize))
> + goto die;
> + /* If outside, just clear the critical section. */
> + if (ip < start_ip)
> + goto clear;
> +
> + unsafe_get_user(offset, &ucs->post_commit_offset, fail);
> + cs_end = start_ip + offset;
> + /* Check for overflow and wraparound */
> + if (unlikely(cs_end >= tasksize || cs_end < start_ip))
> + goto die;
> +
> + /* If not inside, clear it. */
> + if (ip >= cs_end)
> + goto clear;
> +
> + unsafe_get_user(abort_ip, &ucs->abort_ip, fail);
> + /* Ensure it's "valid" */
> + if (unlikely(abort_ip >= tasksize || abort_ip < sizeof(*uc_sig)))
> + goto die;
> + /* Validate that the abort IP is not in the critical section */
> + if (unlikely(abort_ip - start_ip < offset))
> + goto die;
> +
> + /*
> + * Check version and flags for 0. No point in emitting deprecated
> + * warnings before dying. That could be done in the slow path
> + * eventually, but *shrug*.
> + */
> + unsafe_get_user(head, uc_head, fail);
> + if (unlikely(head))
> + goto die;
> +
> + /* abort_ip - 4 is >= 0. See abort_ip check above */
> + uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
> + unsafe_get_user(usig, uc_sig, fail);
> + if (unlikely(usig != t->rseq.sig))
> + goto die;
> +
> + /* rseq_event.user_irq is only valid if CONFIG_GENERIC_IRQ_ENTRY=y */
> + if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
> + /* If not in interrupt from user context, let it die */
> + if (unlikely(!t->rseq.event.user_irq))
> + goto die;
> + }
> +
> + unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, fail);
> + user_access_end();
> +
> + instruction_pointer_set(regs, (unsigned long)abort_ip);
> +
> + rseq_stat_inc(rseq_stats.fixup);
> + rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
> + return true;
> +clear:
> + unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, fail);
> + user_access_end();
> + rseq_stat_inc(rseq_stats.clear);
> + return true;
> +die:
> + t->rseq.event.fatal = true;
> +fail:
> + user_access_end();
> + return false;
> +}
> +#endif /* RSEQ_BUILD_SLOW_PATH */
> +
> +/*
> + * This only ensures that abort_ip is in the user address space by masking it.
> + * No other sanity checks are done here, that's what the debug code is for.
This comment should be updated to state that the signature is also
checked.
> + */
> +static rseq_inline bool
> +rseq_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr)
> +{
> + struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
> + unsigned long ip = instruction_pointer(regs);
> + u64 start_ip, abort_ip, offset;
> + u32 usig, __user *uc_sig;
> +
> + rseq_stat_inc(rseq_stats.cs);
> +
> + if (unlikely(csaddr >= TASK_SIZE)) {
> + t->rseq.event.fatal = true;
> + return false;
> + }
> +
> + if (static_branch_unlikely(&rseq_debug_enabled))
> + return rseq_debug_update_user_cs(t, regs, csaddr);
> +
> + if (!user_rw_masked_begin(ucs))
> + return false;
> +
> + unsafe_get_user(start_ip, &ucs->start_ip, fail);
> + unsafe_get_user(offset, &ucs->post_commit_offset, fail);
> + unsafe_get_user(abort_ip, &ucs->abort_ip, fail);
> +
> + /*
> + * No sanity checks. If user space screwed it up, it can
> + * keep the pieces. That's what debug code is for.
> + *
> + * If outside, just clear the critical section.
> + */
> + if (ip - start_ip >= offset)
> + goto clear;
> +
> + /*
> + * Two requirements for @abort_ip:
> + * - Must be in user space as x86 IRET would happily return to
> + * the kernel.
> + * - The four bytes preceeding the instruction at @abort_ip must
preceeding -> preceding
> + * contain the signature.
> + *
> + * The latter protects against the following attack vector:
> + *
> + * An attacker with limited abilities to write, creates a critical
> + * section descriptor, sets the abort IP to a library function or
> + * some other ROP gadget and stores the address of the descriptor
> + * in TLS::rseq::rseq_cs. An RSEQ abort would then evade ROP
> + * protection.
Thanks for documenting this.
Mathieu
> + */
> + if (abort_ip >= TASK_SIZE || abort_ip < sizeof(*uc_sig))
> + goto die;
> +
> + /* The address is guaranteed to be >= 0 and < TASK_SIZE */
> + uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
> + unsafe_get_user(usig, uc_sig, fail);
> + if (unlikely(usig != t->rseq.sig))
> + goto die;
> +
> + /* Invalidate the critical section */
> + unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, fail);
> + user_access_end();
> +
> + /* Update the instruction pointer */
> + instruction_pointer_set(regs, (unsigned long)abort_ip);
> +
> + rseq_stat_inc(rseq_stats.fixup);
> + rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
> + return true;
> +clear:
> + unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, fail);
> + user_access_end();
> + rseq_stat_inc(rseq_stats.clear);
> + return true;
> +die:
> + t->rseq.event.fatal = true;
> +fail:
> + user_access_end();
> + return false;
> +}
> +
> static __always_inline void rseq_exit_to_user_mode(void)
> {
> struct rseq_event *ev = ¤t->rseq.event;
> --- a/include/linux/rseq_types.h
> +++ b/include/linux/rseq_types.h
> @@ -14,10 +14,12 @@ struct rseq;
> * @sched_switch: True if the task was scheduled out
> * @user_irq: True on interrupt entry from user mode
> * @has_rseq: True if the task has a rseq pointer installed
> + * @error: Compound error code for the slow path to analyze
> + * @fatal: User space data corrupted or invalid
> */
> struct rseq_event {
> union {
> - u32 all;
> + u64 all;
> struct {
> union {
> u16 events;
> @@ -28,6 +30,13 @@ struct rseq_event {
> };
>
> u8 has_rseq;
> + u8 __pad;
> + union {
> + u16 error;
> + struct {
> + u8 fatal;
> + };
> + };
> };
> };
> };
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -382,175 +382,15 @@ static int rseq_reset_rseq_cpu_node_id(s
> return -EFAULT;
> }
>
> -/*
> - * Get the user-space pointer value stored in the 'rseq_cs' field.
> - */
> -static int rseq_get_rseq_cs_ptr_val(struct rseq __user *rseq, u64 *rseq_cs)
> -{
> - if (!rseq_cs)
> - return -EFAULT;
> -
> -#ifdef CONFIG_64BIT
> - if (get_user(*rseq_cs, &rseq->rseq_cs))
> - return -EFAULT;
> -#else
> - if (copy_from_user(rseq_cs, &rseq->rseq_cs, sizeof(*rseq_cs)))
> - return -EFAULT;
> -#endif
> -
> - return 0;
> -}
> -
> -/*
> - * If the rseq_cs field of 'struct rseq' contains a valid pointer to
> - * user-space, copy 'struct rseq_cs' from user-space and validate its fields.
> - */
> -static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
> -{
> - struct rseq_cs __user *urseq_cs;
> - u64 ptr;
> - u32 __user *usig;
> - u32 sig;
> - int ret;
> -
> - ret = rseq_get_rseq_cs_ptr_val(t->rseq.usrptr, &ptr);
> - if (ret)
> - return ret;
> -
> - /* If the rseq_cs pointer is NULL, return a cleared struct rseq_cs. */
> - if (!ptr) {
> - memset(rseq_cs, 0, sizeof(*rseq_cs));
> - return 0;
> - }
> - /* Check that the pointer value fits in the user-space process space. */
> - if (ptr >= TASK_SIZE)
> - return -EINVAL;
> - urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr;
> - if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
> - return -EFAULT;
> -
> - if (rseq_cs->start_ip >= TASK_SIZE ||
> - rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE ||
> - rseq_cs->abort_ip >= TASK_SIZE ||
> - rseq_cs->version > 0)
> - return -EINVAL;
> - /* Check for overflow. */
> - if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip)
> - return -EINVAL;
> - /* Ensure that abort_ip is not in the critical section. */
> - if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
> - return -EINVAL;
> -
> - usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32));
> - ret = get_user(sig, usig);
> - if (ret)
> - return ret;
> -
> - if (current->rseq.sig != sig) {
> - printk_ratelimited(KERN_WARNING
> - "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
> - sig, current->rseq.sig, current->pid, usig);
> - return -EINVAL;
> - }
> - return 0;
> -}
> -
> -static bool rseq_warn_flags(const char *str, u32 flags)
> +static bool rseq_handle_cs(struct task_struct *t, struct pt_regs *regs)
> {
> - u32 test_flags;
> + u64 csaddr;
>
> - if (!flags)
> + if (get_user_masked_u64(&csaddr, &t->rseq.usrptr->rseq_cs))
> return false;
> - test_flags = flags & RSEQ_CS_NO_RESTART_FLAGS;
> - if (test_flags)
> - pr_warn_once("Deprecated flags (%u) in %s ABI structure", test_flags, str);
> - test_flags = flags & ~RSEQ_CS_NO_RESTART_FLAGS;
> - if (test_flags)
> - pr_warn_once("Unknown flags (%u) in %s ABI structure", test_flags, str);
> - return true;
> -}
> -
> -static int rseq_check_flags(struct task_struct *t, u32 cs_flags)
> -{
> - u32 flags;
> - int ret;
> -
> - if (rseq_warn_flags("rseq_cs", cs_flags))
> - return -EINVAL;
> -
> - /* Get thread flags. */
> - ret = get_user(flags, &t->rseq.usrptr->flags);
> - if (ret)
> - return ret;
> -
> - if (rseq_warn_flags("rseq", flags))
> - return -EINVAL;
> - return 0;
> -}
> -
> -static int clear_rseq_cs(struct rseq __user *rseq)
> -{
> - /*
> - * The rseq_cs field is set to NULL on preemption or signal
> - * delivery on top of rseq assembly block, as well as on top
> - * of code outside of the rseq assembly block. This performs
> - * a lazy clear of the rseq_cs field.
> - *
> - * Set rseq_cs to NULL.
> - */
> -#ifdef CONFIG_64BIT
> - return put_user(0UL, &rseq->rseq_cs);
> -#else
> - if (clear_user(&rseq->rseq_cs, sizeof(rseq->rseq_cs)))
> - return -EFAULT;
> - return 0;
> -#endif
> -}
> -
> -/*
> - * Unsigned comparison will be true when ip >= start_ip, and when
> - * ip < start_ip + post_commit_offset.
> - */
> -static bool in_rseq_cs(unsigned long ip, struct rseq_cs *rseq_cs)
> -{
> - return ip - rseq_cs->start_ip < rseq_cs->post_commit_offset;
> -}
> -
> -static int rseq_ip_fixup(struct pt_regs *regs, bool abort)
> -{
> - unsigned long ip = instruction_pointer(regs);
> - struct task_struct *t = current;
> - struct rseq_cs rseq_cs;
> - int ret;
> -
> - rseq_stat_inc(rseq_stats.cs);
> -
> - ret = rseq_get_rseq_cs(t, &rseq_cs);
> - if (ret)
> - return ret;
> -
> - /*
> - * Handle potentially not being within a critical section.
> - * If not nested over a rseq critical section, restart is useless.
> - * Clear the rseq_cs pointer and return.
> - */
> - if (!in_rseq_cs(ip, &rseq_cs)) {
> - rseq_stat_inc(rseq_stats.clear);
> - return clear_rseq_cs(t->rseq.usrptr);
> - }
> - ret = rseq_check_flags(t, rseq_cs.flags);
> - if (ret < 0)
> - return ret;
> - if (!abort)
> - return 0;
> - ret = clear_rseq_cs(t->rseq.usrptr);
> - if (ret)
> - return ret;
> - rseq_stat_inc(rseq_stats.fixup);
> - trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset,
> - rseq_cs.abort_ip);
> - instruction_pointer_set(regs, (unsigned long)rseq_cs.abort_ip);
> - return 0;
> + if (likely(!csaddr))
> + return true;
> + return rseq_update_user_cs(t, regs, csaddr);
> }
>
> /*
> @@ -567,8 +407,8 @@ static int rseq_ip_fixup(struct pt_regs
> void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
> {
> struct task_struct *t = current;
> - int ret, sig;
> bool event;
> + int sig;
>
> /*
> * If invoked from hypervisors before entering the guest via
> @@ -618,8 +458,7 @@ void __rseq_handle_notify_resume(struct
> if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
> return;
>
> - ret = rseq_ip_fixup(regs, event);
> - if (unlikely(ret < 0))
> + if (!rseq_handle_cs(t, regs))
> goto error;
>
> if (unlikely(rseq_update_cpu_node_id(t)))
> @@ -632,6 +471,67 @@ void __rseq_handle_notify_resume(struct
> }
>
> #ifdef CONFIG_DEBUG_RSEQ
> +/*
> + * Unsigned comparison will be true when ip >= start_ip, and when
> + * ip < start_ip + post_commit_offset.
> + */
> +static bool in_rseq_cs(unsigned long ip, struct rseq_cs *rseq_cs)
> +{
> + return ip - rseq_cs->start_ip < rseq_cs->post_commit_offset;
> +}
> +
> +/*
> + * If the rseq_cs field of 'struct rseq' contains a valid pointer to
> + * user-space, copy 'struct rseq_cs' from user-space and validate its fields.
> + */
> +static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
> +{
> + struct rseq_cs __user *urseq_cs;
> + u64 ptr;
> + u32 __user *usig;
> + u32 sig;
> + int ret;
> +
> + if (get_user_masked_u64(&ptr, &t->rseq.usrptr->rseq_cs))
> + return -EFAULT;
> +
> + /* If the rseq_cs pointer is NULL, return a cleared struct rseq_cs. */
> + if (!ptr) {
> + memset(rseq_cs, 0, sizeof(*rseq_cs));
> + return 0;
> + }
> + /* Check that the pointer value fits in the user-space process space. */
> + if (ptr >= TASK_SIZE)
> + return -EINVAL;
> + urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr;
> + if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
> + return -EFAULT;
> +
> + if (rseq_cs->start_ip >= TASK_SIZE ||
> + rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE ||
> + rseq_cs->abort_ip >= TASK_SIZE ||
> + rseq_cs->version > 0)
> + return -EINVAL;
> + /* Check for overflow. */
> + if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip)
> + return -EINVAL;
> + /* Ensure that abort_ip is not in the critical section. */
> + if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
> + return -EINVAL;
> +
> + usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32));
> + ret = get_user(sig, usig);
> + if (ret)
> + return ret;
> +
> + if (current->rseq.sig != sig) {
> + printk_ratelimited(KERN_WARNING
> + "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
> + sig, current->rseq.sig, current->pid, usig);
> + return -EINVAL;
> + }
> + return 0;
> +}
>
> /*
> * Terminate the process if a syscall is issued within a restartable
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
Just utilize the new infrastructure and put the original one to rest.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
kernel/rseq.c | 80 ++++++++--------------------------------------------------
1 file changed, 12 insertions(+), 68 deletions(-)
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -472,83 +472,27 @@ void __rseq_handle_notify_resume(struct
#ifdef CONFIG_DEBUG_RSEQ
/*
- * Unsigned comparison will be true when ip >= start_ip, and when
- * ip < start_ip + post_commit_offset.
- */
-static bool in_rseq_cs(unsigned long ip, struct rseq_cs *rseq_cs)
-{
- return ip - rseq_cs->start_ip < rseq_cs->post_commit_offset;
-}
-
-/*
- * If the rseq_cs field of 'struct rseq' contains a valid pointer to
- * user-space, copy 'struct rseq_cs' from user-space and validate its fields.
- */
-static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
-{
- struct rseq_cs __user *urseq_cs;
- u64 ptr;
- u32 __user *usig;
- u32 sig;
- int ret;
-
- if (get_user_masked_u64(&ptr, &t->rseq.usrptr->rseq_cs))
- return -EFAULT;
-
- /* If the rseq_cs pointer is NULL, return a cleared struct rseq_cs. */
- if (!ptr) {
- memset(rseq_cs, 0, sizeof(*rseq_cs));
- return 0;
- }
- /* Check that the pointer value fits in the user-space process space. */
- if (ptr >= TASK_SIZE)
- return -EINVAL;
- urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr;
- if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
- return -EFAULT;
-
- if (rseq_cs->start_ip >= TASK_SIZE ||
- rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE ||
- rseq_cs->abort_ip >= TASK_SIZE ||
- rseq_cs->version > 0)
- return -EINVAL;
- /* Check for overflow. */
- if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip)
- return -EINVAL;
- /* Ensure that abort_ip is not in the critical section. */
- if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
- return -EINVAL;
-
- usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32));
- ret = get_user(sig, usig);
- if (ret)
- return ret;
-
- if (current->rseq.sig != sig) {
- printk_ratelimited(KERN_WARNING
- "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
- sig, current->rseq.sig, current->pid, usig);
- return -EINVAL;
- }
- return 0;
-}
-
-/*
* Terminate the process if a syscall is issued within a restartable
* sequence.
*/
void rseq_syscall(struct pt_regs *regs)
{
- unsigned long ip = instruction_pointer(regs);
struct task_struct *t = current;
- struct rseq_cs rseq_cs;
+ u64 csaddr;
- if (!t->rseq.usrptr)
+ if (!t->rseq.event.has_rseq)
+ return;
+ if (get_user_masked_u64(&csaddr, &t->rseq.usrptr->rseq_cs))
+ goto fail;
+ if (likely(!csaddr))
return;
- if (rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
- force_sig(SIGSEGV);
+ if (unlikely(csaddr >= TASK_SIZE))
+ goto fail;
+ if (rseq_debug_update_user_cs(t, regs, csaddr))
+ return;
+fail:
+ force_sig(SIGSEGV);
}
-
#endif
/*
Disconnect it from the config switch and use the static debug branch. This
is a temporary measure for validating the rework. At the end this check
needs to be hidden behind lockdep as it has nothing to do with the other
debug infrastructure, which mainly aids user space debugging by enabling a
zoo of checks which terminate misbehaving tasks instead of letting them
keep the hard to diagnose pieces.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
include/linux/rseq_entry.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -288,7 +288,7 @@ static __always_inline void rseq_exit_to
rseq_stat_inc(rseq_stats.exit);
- if (IS_ENABLED(CONFIG_DEBUG_RSEQ))
+ if (static_branch_unlikely(&rseq_debug_enabled))
WARN_ON_ONCE(ev->sched_switch);
/*
Make the syscall exit debug mechanism available via the static branch on
architectures which utilize the generic entry code.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
include/linux/entry-common.h | 2 +-
include/linux/rseq_entry.h | 9 +++++++++
kernel/rseq.c | 19 +++++++++++++------
3 files changed, 23 insertions(+), 7 deletions(-)
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -146,7 +146,7 @@ static __always_inline void syscall_exit
local_irq_enable();
}
- rseq_syscall(regs);
+ rseq_debug_syscall_return(regs);
/*
* Do one-time syscall specific work. If these work items are
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -286,9 +286,18 @@ static __always_inline void rseq_exit_to
ev->events = 0;
}
+void __rseq_debug_syscall_return(struct pt_regs *regs);
+
+static inline void rseq_debug_syscall_return(struct pt_regs *regs)
+{
+ if (static_branch_unlikely(&rseq_debug_enabled))
+ __rseq_debug_syscall_return(regs);
+}
+
#else /* CONFIG_RSEQ */
static inline void rseq_note_user_irq_entry(void) { }
static inline void rseq_exit_to_user_mode(void) { }
+static inline void rseq_debug_syscall_return(struct pt_regs *regs) { }
#endif /* !CONFIG_RSEQ */
#endif /* _LINUX_RSEQ_ENTRY_H */
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -470,12 +470,7 @@ void __rseq_handle_notify_resume(struct
force_sigsegv(sig);
}
-#ifdef CONFIG_DEBUG_RSEQ
-/*
- * Terminate the process if a syscall is issued within a restartable
- * sequence.
- */
-void rseq_syscall(struct pt_regs *regs)
+void __rseq_debug_syscall_return(struct pt_regs *regs)
{
struct task_struct *t = current;
u64 csaddr;
@@ -493,6 +488,18 @@ void rseq_syscall(struct pt_regs *regs)
fail:
force_sig(SIGSEGV);
}
+
+#ifdef CONFIG_DEBUG_RSEQ
+/*
+ * Kept around to keep GENERIC_ENTRY=n architectures supported.
+ *
+ * Terminate the process if a syscall is issued within a restartable
+ * sequence.
+ */
+void rseq_syscall(struct pt_regs *regs)
+{
+ __rseq_debug_syscall_return(regs);
+}
#endif
/*
Provide a new and straight forward implementation to set the IDs (CPU ID,
Node ID and MM CID), which can be later inlined into the fast path.
It does all operations in one user_rw_masked_begin() section and retrieves
also the critical section member (rseq::cs_rseq) from user space to avoid
another user..begin/end() pair. This is in preparation for optimizing the
fast path to avoid extra work when not required.
Use it to replace the whole related zoo in rseq.c
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
V3: Fixed the node ID comparison in the debug path - Mathieu
---
fs/binfmt_elf.c | 2
include/linux/rseq_entry.h | 101 +++++++++++++++++++++
include/linux/sched.h | 10 --
kernel/rseq.c | 208 ++++++---------------------------------------
4 files changed, 134 insertions(+), 187 deletions(-)
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -46,7 +46,7 @@
#include <linux/cred.h>
#include <linux/dax.h>
#include <linux/uaccess.h>
-#include <linux/rseq.h>
+#include <uapi/linux/rseq.h>
#include <asm/param.h>
#include <asm/page.h>
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -38,6 +38,8 @@ DECLARE_PER_CPU(struct rseq_stats, rseq_
#include <linux/rseq.h>
#include <linux/uaccess.h>
+#include <uapi/linux/rseq.h>
+
#include <linux/tracepoint-defs.h>
#ifdef CONFIG_TRACEPOINTS
@@ -75,6 +77,7 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEB
#endif
bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
+bool rseq_debug_validate_ids(struct task_struct *t);
static __always_inline void rseq_note_user_irq_entry(void)
{
@@ -196,6 +199,50 @@ bool rseq_debug_update_user_cs(struct ta
user_access_end();
return false;
}
+
+/*
+ * On debug kernels validate that user space did not mess with it if
+ * DEBUG_RSEQ is enabled, but don't on the first exit to user space. In
+ * that case cpu_cid is ~0. See fork/execve.
+ */
+bool rseq_debug_validate_ids(struct task_struct *t)
+{
+ struct rseq __user *rseq = t->rseq.usrptr;
+ u32 cpu_id, uval, node_id;
+
+ if (t->rseq.ids.cpu_cid == ~0)
+ return true;
+
+ /*
+ * Look it up outside of the user access section as cpu_to_node()
+ * can end up in debug code.
+ */
+ node_id = cpu_to_node(t->rseq.ids.cpu_id);
+
+ if (!user_read_masked_begin(rseq))
+ return false;
+
+ unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault);
+ if (cpu_id != t->rseq.ids.cpu_id)
+ goto die;
+ unsafe_get_user(uval, &rseq->cpu_id, efault);
+ if (uval != cpu_id)
+ goto die;
+ unsafe_get_user(uval, &rseq->node_id, efault);
+ if (uval != node_id)
+ goto die;
+ unsafe_get_user(uval, &rseq->mm_cid, efault);
+ if (uval != t->rseq.ids.mm_cid)
+ goto die;
+ user_access_end();
+ return true;
+die:
+ t->rseq.event.fatal = true;
+efault:
+ user_access_end();
+ return false;
+}
+
#endif /* RSEQ_BUILD_SLOW_PATH */
/*
@@ -281,6 +328,60 @@ rseq_update_user_cs(struct task_struct *
user_access_end();
return false;
}
+
+/*
+ * Updates CPU ID, Node ID and MM CID and reads the critical section
+ * address, when @csaddr != NULL. This allows to put the ID update and the
+ * read under the same uaccess region to spare a seperate begin/end.
+ *
+ * As this is either invoked from a C wrapper with @csaddr = NULL or from
+ * the fast path code with a valid pointer, a clever compiler should be
+ * able to optimize the read out. Spares a duplicate implementation.
+ *
+ * Returns true, if the operation was successful, false otherwise.
+ *
+ * In the failure case task::rseq_event::fatal is set when invalid data
+ * was found on debug kernels. It's clear when the failure was an unresolved page
+ * fault.
+ *
+ * If inlined into the exit to user path with interrupts disabled, the
+ * caller has to protect against page faults with pagefault_disable().
+ *
+ * In preemptible task context this would be counterproductive as the page
+ * faults could not be fully resolved. As a consequence unresolved page
+ * faults in task context are fatal too.
+ */
+static rseq_inline
+bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids,
+ u32 node_id, u64 *csaddr)
+{
+ struct rseq __user *rseq = t->rseq.usrptr;
+
+ if (static_branch_unlikely(&rseq_debug_enabled)) {
+ if (!rseq_debug_validate_ids(t))
+ return false;
+ }
+
+ if (!user_rw_masked_begin(rseq))
+ return false;
+
+ unsafe_put_user(ids->cpu_id, &rseq->cpu_id_start, efault);
+ unsafe_put_user(ids->cpu_id, &rseq->cpu_id, efault);
+ unsafe_put_user(node_id, &rseq->node_id, efault);
+ unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault);
+ if (csaddr)
+ unsafe_get_user(*csaddr, &rseq->rseq_cs, efault);
+ user_access_end();
+
+ /* Cache the new values */
+ t->rseq.ids.cpu_cid = ids->cpu_cid;
+ rseq_stat_inc(rseq_stats.ids);
+ rseq_trace_update(t, ids);
+ return true;
+efault:
+ user_access_end();
+ return false;
+}
static __always_inline void rseq_exit_to_user_mode(void)
{
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -42,7 +42,6 @@
#include <linux/posix-timers_types.h>
#include <linux/restart_block.h>
#include <linux/rseq_types.h>
-#include <uapi/linux/rseq.h>
#include <linux/seqlock_types.h>
#include <linux/kcsan.h>
#include <linux/rv.h>
@@ -1402,15 +1401,6 @@ struct task_struct {
#endif /* CONFIG_NUMA_BALANCING */
struct rseq_data rseq;
-#ifdef CONFIG_DEBUG_RSEQ
- /*
- * This is a place holder to save a copy of the rseq fields for
- * validation of read-only fields. The struct rseq has a
- * variable-length array at the end, so it cannot be used
- * directly. Reserve a size large enough for the known fields.
- */
- char rseq_fields[sizeof(struct rseq)];
-#endif
#ifdef CONFIG_SCHED_MM_CID
int mm_cid; /* Current cid in mm */
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -88,13 +88,6 @@
# define RSEQ_EVENT_GUARD preempt
#endif
-/* The original rseq structure size (including padding) is 32 bytes. */
-#define ORIG_RSEQ_SIZE 32
-
-#define RSEQ_CS_NO_RESTART_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | \
- RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
- RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)
-
DEFINE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);
static inline void rseq_control_debug(bool on)
@@ -227,159 +220,9 @@ static int __init rseq_debugfs_init(void
__initcall(rseq_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
-#ifdef CONFIG_DEBUG_RSEQ
-static struct rseq *rseq_kernel_fields(struct task_struct *t)
-{
- return (struct rseq *) t->rseq_fields;
-}
-
-static int rseq_validate_ro_fields(struct task_struct *t)
-{
- static DEFINE_RATELIMIT_STATE(_rs,
- DEFAULT_RATELIMIT_INTERVAL,
- DEFAULT_RATELIMIT_BURST);
- u32 cpu_id_start, cpu_id, node_id, mm_cid;
- struct rseq __user *rseq = t->rseq.usrptr;
-
- /*
- * Validate fields which are required to be read-only by
- * user-space.
- */
- if (!user_read_access_begin(rseq, t->rseq.len))
- goto efault;
- unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end);
- unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end);
- unsafe_get_user(node_id, &rseq->node_id, efault_end);
- unsafe_get_user(mm_cid, &rseq->mm_cid, efault_end);
- user_read_access_end();
-
- if ((cpu_id_start != rseq_kernel_fields(t)->cpu_id_start ||
- cpu_id != rseq_kernel_fields(t)->cpu_id ||
- node_id != rseq_kernel_fields(t)->node_id ||
- mm_cid != rseq_kernel_fields(t)->mm_cid) && __ratelimit(&_rs)) {
-
- pr_warn("Detected rseq corruption for pid: %d, name: %s\n"
- "\tcpu_id_start: %u ?= %u\n"
- "\tcpu_id: %u ?= %u\n"
- "\tnode_id: %u ?= %u\n"
- "\tmm_cid: %u ?= %u\n",
- t->pid, t->comm,
- cpu_id_start, rseq_kernel_fields(t)->cpu_id_start,
- cpu_id, rseq_kernel_fields(t)->cpu_id,
- node_id, rseq_kernel_fields(t)->node_id,
- mm_cid, rseq_kernel_fields(t)->mm_cid);
- }
-
- /* For now, only print a console warning on mismatch. */
- return 0;
-
-efault_end:
- user_read_access_end();
-efault:
- return -EFAULT;
-}
-
-/*
- * Update an rseq field and its in-kernel copy in lock-step to keep a coherent
- * state.
- */
-#define rseq_unsafe_put_user(t, value, field, error_label) \
- do { \
- unsafe_put_user(value, &t->rseq.usrptr->field, error_label); \
- rseq_kernel_fields(t)->field = value; \
- } while (0)
-
-#else
-static int rseq_validate_ro_fields(struct task_struct *t)
-{
- return 0;
-}
-
-#define rseq_unsafe_put_user(t, value, field, error_label) \
- unsafe_put_user(value, &t->rseq.usrptr->field, error_label)
-#endif
-
-static int rseq_update_cpu_node_id(struct task_struct *t)
-{
- struct rseq __user *rseq = t->rseq.usrptr;
- u32 cpu_id = raw_smp_processor_id();
- u32 node_id = cpu_to_node(cpu_id);
- u32 mm_cid = task_mm_cid(t);
-
- rseq_stat_inc(rseq_stats.ids);
-
- /* Validate read-only rseq fields on debug kernels */
- if (rseq_validate_ro_fields(t))
- goto efault;
- WARN_ON_ONCE((int) mm_cid < 0);
-
- if (!user_write_access_begin(rseq, t->rseq.len))
- goto efault;
-
- rseq_unsafe_put_user(t, cpu_id, cpu_id_start, efault_end);
- rseq_unsafe_put_user(t, cpu_id, cpu_id, efault_end);
- rseq_unsafe_put_user(t, node_id, node_id, efault_end);
- rseq_unsafe_put_user(t, mm_cid, mm_cid, efault_end);
-
- /* Cache the user space values */
- t->rseq.ids.cpu_id = cpu_id;
- t->rseq.ids.mm_cid = mm_cid;
-
- /*
- * Additional feature fields added after ORIG_RSEQ_SIZE
- * need to be conditionally updated only if
- * t->rseq_len != ORIG_RSEQ_SIZE.
- */
- user_write_access_end();
- trace_rseq_update(t);
- return 0;
-
-efault_end:
- user_write_access_end();
-efault:
- return -EFAULT;
-}
-
-static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
+static bool rseq_set_ids(struct task_struct *t, struct rseq_ids *ids, u32 node_id)
{
- struct rseq __user *rseq = t->rseq.usrptr;
- u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0,
- mm_cid = 0;
-
- /*
- * Validate read-only rseq fields.
- */
- if (rseq_validate_ro_fields(t))
- goto efault;
-
- if (!user_write_access_begin(rseq, t->rseq.len))
- goto efault;
-
- /*
- * Reset all fields to their initial state.
- *
- * All fields have an initial state of 0 except cpu_id which is set to
- * RSEQ_CPU_ID_UNINITIALIZED, so that any user coming in after
- * unregistration can figure out that rseq needs to be registered
- * again.
- */
- rseq_unsafe_put_user(t, cpu_id_start, cpu_id_start, efault_end);
- rseq_unsafe_put_user(t, cpu_id, cpu_id, efault_end);
- rseq_unsafe_put_user(t, node_id, node_id, efault_end);
- rseq_unsafe_put_user(t, mm_cid, mm_cid, efault_end);
-
- /*
- * Additional feature fields added after ORIG_RSEQ_SIZE
- * need to be conditionally reset only if
- * t->rseq_len != ORIG_RSEQ_SIZE.
- */
- user_write_access_end();
- return 0;
-
-efault_end:
- user_write_access_end();
-efault:
- return -EFAULT;
+ return rseq_set_ids_get_csaddr(t, ids, node_id, NULL);
}
static bool rseq_handle_cs(struct task_struct *t, struct pt_regs *regs)
@@ -407,6 +250,8 @@ static bool rseq_handle_cs(struct task_s
void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
{
struct task_struct *t = current;
+ struct rseq_ids ids;
+ u32 node_id;
bool event;
int sig;
@@ -453,6 +298,8 @@ void __rseq_handle_notify_resume(struct
scoped_guard(RSEQ_EVENT_GUARD) {
event = t->rseq.event.sched_switch;
t->rseq.event.sched_switch = false;
+ ids.cpu_id = task_cpu(t);
+ ids.mm_cid = task_mm_cid(t);
}
if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
@@ -461,7 +308,8 @@ void __rseq_handle_notify_resume(struct
if (!rseq_handle_cs(t, regs))
goto error;
- if (unlikely(rseq_update_cpu_node_id(t)))
+ node_id = cpu_to_node(ids.cpu_id);
+ if (!rseq_set_ids(t, &ids, node_id))
goto error;
return;
@@ -502,13 +350,33 @@ void rseq_syscall(struct pt_regs *regs)
}
#endif
+static bool rseq_reset_ids(void)
+{
+ struct rseq_ids ids = {
+ .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
+ .mm_cid = 0,
+ };
+
+ /*
+ * If this fails, terminate it because this leaves the kernel in
+ * stupid state as exit to user space will try to fixup the ids
+ * again.
+ */
+ if (rseq_set_ids(current, &ids, 0))
+ return true;
+
+ force_sig(SIGSEGV);
+ return false;
+}
+
+/* The original rseq structure size (including padding) is 32 bytes. */
+#define ORIG_RSEQ_SIZE 32
+
/*
* sys_rseq - setup restartable sequences for caller thread.
*/
SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig)
{
- int ret;
-
if (flags & RSEQ_FLAG_UNREGISTER) {
if (flags & ~RSEQ_FLAG_UNREGISTER)
return -EINVAL;
@@ -519,9 +387,8 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
return -EINVAL;
if (current->rseq.sig != sig)
return -EPERM;
- ret = rseq_reset_rseq_cpu_node_id(current);
- if (ret)
- return ret;
+ if (!rseq_reset_ids())
+ return -EFAULT;
rseq_reset(current);
return 0;
}
@@ -571,17 +438,6 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
if (put_user_masked_u64(0UL, &rseq->rseq_cs))
return -EFAULT;
-#ifdef CONFIG_DEBUG_RSEQ
- /*
- * Initialize the in-kernel rseq fields copy for validation of
- * read-only fields.
- */
- if (get_user(rseq_kernel_fields(current)->cpu_id_start, &rseq->cpu_id_start) ||
- get_user(rseq_kernel_fields(current)->cpu_id, &rseq->cpu_id) ||
- get_user(rseq_kernel_fields(current)->node_id, &rseq->node_id) ||
- get_user(rseq_kernel_fields(current)->mm_cid, &rseq->mm_cid))
- return -EFAULT;
-#endif
/*
* Activate the registration by setting the rseq area address, length
* and signature in the task struct.
On 2025-09-08 17:32, Thomas Gleixner wrote:
> Provide a new and straight forward implementation to set the IDs (CPU ID,
> Node ID and MM CID), which can be later inlined into the fast path.
>
> It does all operations in one user_rw_masked_begin() section and retrieves
> also the critical section member (rseq::cs_rseq) from user space to avoid
> another user..begin/end() pair. This is in preparation for optimizing the
> fast path to avoid extra work when not required.
>
> Use it to replace the whole related zoo in rseq.c
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> V3: Fixed the node ID comparison in the debug path - Mathieu
> ---
> fs/binfmt_elf.c | 2
> include/linux/rseq_entry.h | 101 +++++++++++++++++++++
> include/linux/sched.h | 10 --
> kernel/rseq.c | 208 ++++++---------------------------------------
> 4 files changed, 134 insertions(+), 187 deletions(-)
>
> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -46,7 +46,7 @@
> #include <linux/cred.h>
> #include <linux/dax.h>
> #include <linux/uaccess.h>
> -#include <linux/rseq.h>
> +#include <uapi/linux/rseq.h>
> #include <asm/param.h>
> #include <asm/page.h>
>
> --- a/include/linux/rseq_entry.h
> +++ b/include/linux/rseq_entry.h
> @@ -38,6 +38,8 @@ DECLARE_PER_CPU(struct rseq_stats, rseq_
> #include <linux/rseq.h>
> #include <linux/uaccess.h>
>
> +#include <uapi/linux/rseq.h>
> +
> #include <linux/tracepoint-defs.h>
>
> #ifdef CONFIG_TRACEPOINTS
> @@ -75,6 +77,7 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEB
> #endif
>
> bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
> +bool rseq_debug_validate_ids(struct task_struct *t);
>
> static __always_inline void rseq_note_user_irq_entry(void)
> {
> @@ -196,6 +199,50 @@ bool rseq_debug_update_user_cs(struct ta
> user_access_end();
> return false;
> }
> +
> +/*
> + * On debug kernels validate that user space did not mess with it if
> + * DEBUG_RSEQ is enabled, but don't on the first exit to user space. In
> + * that case cpu_cid is ~0. See fork/execve.
> + */
> +bool rseq_debug_validate_ids(struct task_struct *t)
> +{
> + struct rseq __user *rseq = t->rseq.usrptr;
Why not check on NULL rseq user pointer rather than skip using
the ~0ULL cpu_cid ?
> + u32 cpu_id, uval, node_id;
> +
> + if (t->rseq.ids.cpu_cid == ~0)
Here we are using ~0 and where cpu_cid is set in rseq_set() ~0ULL is
used. Now I understand that because of sign-extend and type promotion
this happens to provide all bits set on a 64-bit, but can we please just
pick one, e.g. ~0ULL ?
Also why does rseq_fork() set it to ~0ULL rather than keep the value
from the parent when forking a new process ?
Whatever was present in the parent process in the rseq area should
still be in the child, including the rseq registration.
Or am missing something ?
> + return true;
> +
> + /*
> + * Look it up outside of the user access section as cpu_to_node()
> + * can end up in debug code.
> + */
> + node_id = cpu_to_node(t->rseq.ids.cpu_id);
> +
> + if (!user_read_masked_begin(rseq))
> + return false;
> +
> + unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault);
> + if (cpu_id != t->rseq.ids.cpu_id)
> + goto die;
> + unsafe_get_user(uval, &rseq->cpu_id, efault);
> + if (uval != cpu_id)
> + goto die;
> + unsafe_get_user(uval, &rseq->node_id, efault);
> + if (uval != node_id)
> + goto die;
> + unsafe_get_user(uval, &rseq->mm_cid, efault);
> + if (uval != t->rseq.ids.mm_cid)
> + goto die;
> + user_access_end();
> + return true;
> +die:
> + t->rseq.event.fatal = true;
> +efault:
> + user_access_end();
> + return false;
> +}
> +
> #endif /* RSEQ_BUILD_SLOW_PATH */
>
> /*
> @@ -281,6 +328,60 @@ rseq_update_user_cs(struct task_struct *
> user_access_end();
> return false;
> }
> +
> +/*
> + * Updates CPU ID, Node ID and MM CID and reads the critical section
> + * address, when @csaddr != NULL. This allows to put the ID update and the
> + * read under the same uaccess region to spare a seperate begin/end.
separate
> + *
> + * As this is either invoked from a C wrapper with @csaddr = NULL or from
> + * the fast path code with a valid pointer, a clever compiler should be
> + * able to optimize the read out. Spares a duplicate implementation.
> + *
> + * Returns true, if the operation was successful, false otherwise.
> + *
> + * In the failure case task::rseq_event::fatal is set when invalid data
> + * was found on debug kernels. It's clear when the failure was an unresolved page
> + * fault.
> + *
> + * If inlined into the exit to user path with interrupts disabled, the
> + * caller has to protect against page faults with pagefault_disable().
> + *
> + * In preemptible task context this would be counterproductive as the page
> + * faults could not be fully resolved. As a consequence unresolved page
> + * faults in task context are fatal too.
> + */
> +static rseq_inline
> +bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids,
> + u32 node_id, u64 *csaddr)
> +{
> + struct rseq __user *rseq = t->rseq.usrptr;
> +
> + if (static_branch_unlikely(&rseq_debug_enabled)) {
> + if (!rseq_debug_validate_ids(t))
> + return false;
> + }
> +
> + if (!user_rw_masked_begin(rseq))
> + return false;
> +
> + unsafe_put_user(ids->cpu_id, &rseq->cpu_id_start, efault);
> + unsafe_put_user(ids->cpu_id, &rseq->cpu_id, efault);
> + unsafe_put_user(node_id, &rseq->node_id, efault);
> + unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault);
> + if (csaddr)
> + unsafe_get_user(*csaddr, &rseq->rseq_cs, efault);
> + user_access_end();
> +
> + /* Cache the new values */
> + t->rseq.ids.cpu_cid = ids->cpu_cid;
> + rseq_stat_inc(rseq_stats.ids);
> + rseq_trace_update(t, ids);
> + return true;
> +efault:
> + user_access_end();
> + return false;
> +}
>
> static __always_inline void rseq_exit_to_user_mode(void)
> {
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -42,7 +42,6 @@
> #include <linux/posix-timers_types.h>
> #include <linux/restart_block.h>
> #include <linux/rseq_types.h>
> -#include <uapi/linux/rseq.h>
> #include <linux/seqlock_types.h>
> #include <linux/kcsan.h>
> #include <linux/rv.h>
> @@ -1402,15 +1401,6 @@ struct task_struct {
> #endif /* CONFIG_NUMA_BALANCING */
>
> struct rseq_data rseq;
> -#ifdef CONFIG_DEBUG_RSEQ
> - /*
> - * This is a place holder to save a copy of the rseq fields for
> - * validation of read-only fields. The struct rseq has a
> - * variable-length array at the end, so it cannot be used
> - * directly. Reserve a size large enough for the known fields.
> - */
> - char rseq_fields[sizeof(struct rseq)];
> -#endif
>
> #ifdef CONFIG_SCHED_MM_CID
> int mm_cid; /* Current cid in mm */
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -88,13 +88,6 @@
> # define RSEQ_EVENT_GUARD preempt
> #endif
>
> -/* The original rseq structure size (including padding) is 32 bytes. */
> -#define ORIG_RSEQ_SIZE 32
> -
> -#define RSEQ_CS_NO_RESTART_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | \
> - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
> - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)
> -
> DEFINE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);
>
> static inline void rseq_control_debug(bool on)
> @@ -227,159 +220,9 @@ static int __init rseq_debugfs_init(void
> __initcall(rseq_debugfs_init);
> #endif /* CONFIG_DEBUG_FS */
>
> -#ifdef CONFIG_DEBUG_RSEQ
> -static struct rseq *rseq_kernel_fields(struct task_struct *t)
> -{
> - return (struct rseq *) t->rseq_fields;
> -}
> -
> -static int rseq_validate_ro_fields(struct task_struct *t)
> -{
> - static DEFINE_RATELIMIT_STATE(_rs,
> - DEFAULT_RATELIMIT_INTERVAL,
> - DEFAULT_RATELIMIT_BURST);
> - u32 cpu_id_start, cpu_id, node_id, mm_cid;
> - struct rseq __user *rseq = t->rseq.usrptr;
> -
> - /*
> - * Validate fields which are required to be read-only by
> - * user-space.
> - */
> - if (!user_read_access_begin(rseq, t->rseq.len))
> - goto efault;
> - unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end);
> - unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end);
> - unsafe_get_user(node_id, &rseq->node_id, efault_end);
> - unsafe_get_user(mm_cid, &rseq->mm_cid, efault_end);
> - user_read_access_end();
> -
> - if ((cpu_id_start != rseq_kernel_fields(t)->cpu_id_start ||
> - cpu_id != rseq_kernel_fields(t)->cpu_id ||
> - node_id != rseq_kernel_fields(t)->node_id ||
> - mm_cid != rseq_kernel_fields(t)->mm_cid) && __ratelimit(&_rs)) {
> -
> - pr_warn("Detected rseq corruption for pid: %d, name: %s\n"
> - "\tcpu_id_start: %u ?= %u\n"
> - "\tcpu_id: %u ?= %u\n"
> - "\tnode_id: %u ?= %u\n"
> - "\tmm_cid: %u ?= %u\n",
> - t->pid, t->comm,
> - cpu_id_start, rseq_kernel_fields(t)->cpu_id_start,
> - cpu_id, rseq_kernel_fields(t)->cpu_id,
> - node_id, rseq_kernel_fields(t)->node_id,
> - mm_cid, rseq_kernel_fields(t)->mm_cid);
> - }
> -
> - /* For now, only print a console warning on mismatch. */
> - return 0;
> -
> -efault_end:
> - user_read_access_end();
> -efault:
> - return -EFAULT;
> -}
> -
> -/*
> - * Update an rseq field and its in-kernel copy in lock-step to keep a coherent
> - * state.
> - */
> -#define rseq_unsafe_put_user(t, value, field, error_label) \
> - do { \
> - unsafe_put_user(value, &t->rseq.usrptr->field, error_label); \
> - rseq_kernel_fields(t)->field = value; \
> - } while (0)
> -
> -#else
> -static int rseq_validate_ro_fields(struct task_struct *t)
> -{
> - return 0;
> -}
> -
> -#define rseq_unsafe_put_user(t, value, field, error_label) \
> - unsafe_put_user(value, &t->rseq.usrptr->field, error_label)
> -#endif
> -
> -static int rseq_update_cpu_node_id(struct task_struct *t)
> -{
> - struct rseq __user *rseq = t->rseq.usrptr;
> - u32 cpu_id = raw_smp_processor_id();
> - u32 node_id = cpu_to_node(cpu_id);
> - u32 mm_cid = task_mm_cid(t);
> -
> - rseq_stat_inc(rseq_stats.ids);
> -
> - /* Validate read-only rseq fields on debug kernels */
> - if (rseq_validate_ro_fields(t))
> - goto efault;
> - WARN_ON_ONCE((int) mm_cid < 0);
> -
> - if (!user_write_access_begin(rseq, t->rseq.len))
> - goto efault;
> -
> - rseq_unsafe_put_user(t, cpu_id, cpu_id_start, efault_end);
> - rseq_unsafe_put_user(t, cpu_id, cpu_id, efault_end);
> - rseq_unsafe_put_user(t, node_id, node_id, efault_end);
> - rseq_unsafe_put_user(t, mm_cid, mm_cid, efault_end);
> -
> - /* Cache the user space values */
> - t->rseq.ids.cpu_id = cpu_id;
> - t->rseq.ids.mm_cid = mm_cid;
> -
> - /*
> - * Additional feature fields added after ORIG_RSEQ_SIZE
> - * need to be conditionally updated only if
> - * t->rseq_len != ORIG_RSEQ_SIZE.
> - */
> - user_write_access_end();
> - trace_rseq_update(t);
> - return 0;
> -
> -efault_end:
> - user_write_access_end();
> -efault:
> - return -EFAULT;
> -}
> -
> -static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
> +static bool rseq_set_ids(struct task_struct *t, struct rseq_ids *ids, u32 node_id)
> {
> - struct rseq __user *rseq = t->rseq.usrptr;
> - u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0,
> - mm_cid = 0;
> -
> - /*
> - * Validate read-only rseq fields.
> - */
> - if (rseq_validate_ro_fields(t))
> - goto efault;
> -
> - if (!user_write_access_begin(rseq, t->rseq.len))
> - goto efault;
> -
> - /*
> - * Reset all fields to their initial state.
> - *
> - * All fields have an initial state of 0 except cpu_id which is set to
> - * RSEQ_CPU_ID_UNINITIALIZED, so that any user coming in after
> - * unregistration can figure out that rseq needs to be registered
> - * again.
> - */
> - rseq_unsafe_put_user(t, cpu_id_start, cpu_id_start, efault_end);
> - rseq_unsafe_put_user(t, cpu_id, cpu_id, efault_end);
> - rseq_unsafe_put_user(t, node_id, node_id, efault_end);
> - rseq_unsafe_put_user(t, mm_cid, mm_cid, efault_end);
> -
> - /*
> - * Additional feature fields added after ORIG_RSEQ_SIZE
> - * need to be conditionally reset only if
> - * t->rseq_len != ORIG_RSEQ_SIZE.
> - */
> - user_write_access_end();
> - return 0;
> -
> -efault_end:
> - user_write_access_end();
> -efault:
> - return -EFAULT;
> + return rseq_set_ids_get_csaddr(t, ids, node_id, NULL);
> }
>
> static bool rseq_handle_cs(struct task_struct *t, struct pt_regs *regs)
> @@ -407,6 +250,8 @@ static bool rseq_handle_cs(struct task_s
> void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
> {
> struct task_struct *t = current;
> + struct rseq_ids ids;
> + u32 node_id;
> bool event;
> int sig;
>
> @@ -453,6 +298,8 @@ void __rseq_handle_notify_resume(struct
> scoped_guard(RSEQ_EVENT_GUARD) {
> event = t->rseq.event.sched_switch;
> t->rseq.event.sched_switch = false;
> + ids.cpu_id = task_cpu(t);
> + ids.mm_cid = task_mm_cid(t);
> }
>
> if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
> @@ -461,7 +308,8 @@ void __rseq_handle_notify_resume(struct
> if (!rseq_handle_cs(t, regs))
> goto error;
>
> - if (unlikely(rseq_update_cpu_node_id(t)))
> + node_id = cpu_to_node(ids.cpu_id);
> + if (!rseq_set_ids(t, &ids, node_id))
> goto error;
> return;
>
> @@ -502,13 +350,33 @@ void rseq_syscall(struct pt_regs *regs)
> }
> #endif
>
> +static bool rseq_reset_ids(void)
> +{
> + struct rseq_ids ids = {
> + .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
> + .mm_cid = 0,
> + };
> +
> + /*
> + * If this fails, terminate it because this leaves the kernel in
> + * stupid state as exit to user space will try to fixup the ids
> + * again.
> + */
> + if (rseq_set_ids(current, &ids, 0))
> + return true;
> +
> + force_sig(SIGSEGV);
> + return false;
> +}
> +
> +/* The original rseq structure size (including padding) is 32 bytes. */
> +#define ORIG_RSEQ_SIZE 32
> +
> /*
> * sys_rseq - setup restartable sequences for caller thread.
> */
> SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig)
> {
> - int ret;
> -
> if (flags & RSEQ_FLAG_UNREGISTER) {
> if (flags & ~RSEQ_FLAG_UNREGISTER)
> return -EINVAL;
> @@ -519,9 +387,8 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
> return -EINVAL;
> if (current->rseq.sig != sig)
> return -EPERM;
> - ret = rseq_reset_rseq_cpu_node_id(current);
> - if (ret)
> - return ret;
> + if (!rseq_reset_ids())
> + return -EFAULT;
AFAIU, this is only done to have the debug checks, stats, and
tracing side-effects, because the content of cpu_cid will be set
to ~0ULL by the following rseq_reset, am I correct ?
It's kind of weird to do some reset work to have it immediately
overwritten.
> rseq_reset(current);
> return 0;
> }
> @@ -571,17 +438,6 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
> if (put_user_masked_u64(0UL, &rseq->rseq_cs))
> return -EFAULT;
>
> -#ifdef CONFIG_DEBUG_RSEQ
> - /*
> - * Initialize the in-kernel rseq fields copy for validation of
> - * read-only fields.
> - */
> - if (get_user(rseq_kernel_fields(current)->cpu_id_start, &rseq->cpu_id_start) ||
> - get_user(rseq_kernel_fields(current)->cpu_id, &rseq->cpu_id) ||
> - get_user(rseq_kernel_fields(current)->node_id, &rseq->node_id) ||
> - get_user(rseq_kernel_fields(current)->mm_cid, &rseq->mm_cid))
> - return -EFAULT;
> -#endif
OK I think I get why you are using cpu_cid != ~0ULL rather than the rseq
user pointer in the debug code. After rseq registration, now that you
remove those get_user, the kernel vs userspace fields are out of sync,
so you need a way to figure this out on return to userspace from
sys_rseq register.
Thanks,
Mathieu
> /*
> * Activate the registration by setting the rseq area address, length
> * and signature in the task struct.
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Thu, Sep 11 2025 at 09:40, Mathieu Desnoyers wrote:
> On 2025-09-08 17:32, Thomas Gleixner wrote:
>> +/*
>> + * On debug kernels validate that user space did not mess with it if
>> + * DEBUG_RSEQ is enabled, but don't on the first exit to user space. In
>> + * that case cpu_cid is ~0. See fork/execve.
>> + */
>> +bool rseq_debug_validate_ids(struct task_struct *t)
>> +{
>> + struct rseq __user *rseq = t->rseq.usrptr;
>
> Why not check on NULL rseq user pointer rather than skip using
> the ~0ULL cpu_cid ?
It's not about the NULL rseq pointer.
At the point of a freshly installed RSEQ the cached values are invalid
when the task goes out to user space for the first time because the
write out obviously never happened before that.
We could write out some defined value when RSEQ is installed and cache
that, but I did not think about that and this is debug code so I did not
care about the performance that much.
>> + u32 cpu_id, uval, node_id;
>> +
>> + if (t->rseq.ids.cpu_cid == ~0)
>
> Here we are using ~0 and where cpu_cid is set in rseq_set() ~0ULL is
> used. Now I understand that because of sign-extend and type promotion
> this happens to provide all bits set on a 64-bit, but can we please just
> pick one, e.g. ~0ULL ?
Sure. Trivial enough
> Also why does rseq_fork() set it to ~0ULL rather than keep the value
> from the parent when forking a new process ?
>
> Whatever was present in the parent process in the rseq area should
> still be in the child, including the rseq registration.
>
> Or am missing something ?
Not really. I just made all those cases (install, fork, exec...) behave
the same way. It's harmless enough, no?
Thanks,
tglx
On 2025-09-11 12:02, Thomas Gleixner wrote: > On Thu, Sep 11 2025 at 09:40, Mathieu Desnoyers wrote: >> On 2025-09-08 17:32, Thomas Gleixner wrote: [...] > >> Also why does rseq_fork() set it to ~0ULL rather than keep the value >> from the parent when forking a new process ? >> >> Whatever was present in the parent process in the rseq area should >> still be in the child, including the rseq registration. >> >> Or am missing something ? > > Not really. I just made all those cases (install, fork, exec...) behave > the same way. It's harmless enough, no? It does prevent validation of the rseq area content across fork in the child process. I do not expect this to have a significant impact in practice though. Thanks, Mathieu -- Mathieu Desnoyers EfficiOS Inc. https://www.efficios.com
Completely separate the signal delivery path from the notify handler as
they have different semantics versus the event handling.
The signal delivery only needs to ensure that the interrupted user context
was not in a critical section or the section is aborted before it switches
to the signal frame context. The signal frame context does not have the
original instruction pointer anymore, so that can't be handled on exit to
user space.
No point in updating the CPU/CID ids as they might change again before the
task returns to user space for real.
The fast path optimization, which checks for the 'entry from user via
interrupt' condition is only available for architectures which use the
generic entry code.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
V3: Move rseq_update_usr() to the next patch - Mathieu
---
include/linux/rseq.h | 21 ++++++++++++++++-----
kernel/rseq.c | 30 ++++++++++++++++++++++--------
2 files changed, 38 insertions(+), 13 deletions(-)
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -5,22 +5,33 @@
#ifdef CONFIG_RSEQ
#include <linux/sched.h>
-void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);
+void __rseq_handle_notify_resume(struct pt_regs *regs);
static inline void rseq_handle_notify_resume(struct pt_regs *regs)
{
if (current->rseq.event.has_rseq)
- __rseq_handle_notify_resume(NULL, regs);
+ __rseq_handle_notify_resume(regs);
}
+void __rseq_signal_deliver(int sig, struct pt_regs *regs);
+
+/*
+ * Invoked from signal delivery to fixup based on the register context before
+ * switching to the signal delivery context.
+ */
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
{
- if (current->rseq.event.has_rseq) {
- current->rseq.event.sched_switch = true;
- __rseq_handle_notify_resume(ksig, regs);
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ /* '&' is intentional to spare one conditional branch */
+ if (current->rseq.event.has_rseq & current->rseq.event.user_irq)
+ __rseq_signal_deliver(ksig->sig, regs);
+ } else {
+ if (current->rseq.event.has_rseq)
+ __rseq_signal_deliver(ksig->sig, regs);
}
}
+/* Raised from context switch and exevce to force evaluation on exit to user */
static inline void rseq_sched_switch_event(struct task_struct *t)
{
if (t->rseq.event.has_rseq) {
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -247,13 +247,12 @@ static bool rseq_handle_cs(struct task_s
* respect to other threads scheduled on the same CPU, and with respect
* to signal handlers.
*/
-void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
+void __rseq_handle_notify_resume(struct pt_regs *regs)
{
struct task_struct *t = current;
struct rseq_ids ids;
u32 node_id;
bool event;
- int sig;
/*
* If invoked from hypervisors before entering the guest via
@@ -272,10 +271,7 @@ void __rseq_handle_notify_resume(struct
if (unlikely(t->flags & PF_EXITING))
return;
- if (ksig)
- rseq_stat_inc(rseq_stats.signal);
- else
- rseq_stat_inc(rseq_stats.slowpath);
+ rseq_stat_inc(rseq_stats.slowpath);
/*
* Read and clear the event pending bit first. If the task
@@ -314,8 +310,26 @@ void __rseq_handle_notify_resume(struct
return;
error:
- sig = ksig ? ksig->sig : 0;
- force_sigsegv(sig);
+ force_sig(SIGSEGV);
+}
+
+void __rseq_signal_deliver(int sig, struct pt_regs *regs)
+{
+ rseq_stat_inc(rseq_stats.signal);
+ /*
+ * Don't update IDs, they are handled on exit to user if
+ * necessary. The important thing is to abort a critical section of
+ * the interrupted context as after this point the instruction
+ * pointer in @regs points to the signal handler.
+ */
+ if (unlikely(!rseq_handle_cs(current, regs))) {
+ /*
+ * Clear the errors just in case this might survive
+ * magically, but leave the rest intact.
+ */
+ current->rseq.event.error = 0;
+ force_sigsegv(sig);
+ }
}
void __rseq_debug_syscall_return(struct pt_regs *regs)
Replace the whole logic with a new implementation, which is shared with
signal delivery and the upcoming exit fast path.
Contrary to the original implementation, this ignores invocations from
KVM/IO-uring, which invoke resume_user_mode_work() with the @regs argument
set to NULL.
The original implementation updated the CPU/Node/MM CID fields, but that
was just a side effect, which was addressing the problem that this
invocation cleared TIF_NOTIFY_RESUME, which in turn could cause an update
on return to user space to be lost.
This problem has been addressed differently, so that it's not longer
required to do that update before entering the guest.
That might be considered a user visible change, when the hosts thread TLS
memory is mapped into the guest, but as this was never intentionally
supported, this abuse of kernel internal implementation details is not
considered an ABI break.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
V3: Moved rseq_update_usr() to this one - Mathieu
Documented the KVM visible change - Sean
---
include/linux/rseq_entry.h | 29 +++++++++++++++++
kernel/rseq.c | 76 +++++++++++++++++++--------------------------
2 files changed, 62 insertions(+), 43 deletions(-)
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -383,6 +383,35 @@ bool rseq_set_ids_get_csaddr(struct task
return false;
}
+/*
+ * Update user space with new IDs and conditionally check whether the task
+ * is in a critical section.
+ */
+static rseq_inline bool rseq_update_usr(struct task_struct *t, struct pt_regs *regs,
+ struct rseq_ids *ids, u32 node_id)
+{
+ u64 csaddr;
+
+ if (!rseq_set_ids_get_csaddr(t, ids, node_id, &csaddr))
+ return false;
+
+ /*
+ * On architectures which utilize the generic entry code this
+ * allows to skip the critical section when the entry was not from
+ * a user space interrupt, unless debug mode is enabled.
+ */
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ if (!static_branch_unlikely(&rseq_debug_enabled)) {
+ if (likely(!t->rseq.event.user_irq))
+ return true;
+ }
+ }
+ if (likely(!csaddr))
+ return true;
+ /* Sigh, this really needs to do work */
+ return rseq_update_user_cs(t, regs, csaddr);
+}
+
static __always_inline void rseq_exit_to_user_mode(void)
{
struct rseq_event *ev = ¤t->rseq.event;
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -82,12 +82,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/rseq.h>
-#ifdef CONFIG_MEMBARRIER
-# define RSEQ_EVENT_GUARD irq
-#else
-# define RSEQ_EVENT_GUARD preempt
-#endif
-
DEFINE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);
static inline void rseq_control_debug(bool on)
@@ -236,38 +230,15 @@ static bool rseq_handle_cs(struct task_s
return rseq_update_user_cs(t, regs, csaddr);
}
-/*
- * This resume handler must always be executed between any of:
- * - preemption,
- * - signal delivery,
- * and return to user-space.
- *
- * This is how we can ensure that the entire rseq critical section
- * will issue the commit instruction only if executed atomically with
- * respect to other threads scheduled on the same CPU, and with respect
- * to signal handlers.
- */
-void __rseq_handle_notify_resume(struct pt_regs *regs)
+static void rseq_slowpath_update_usr(struct pt_regs *regs)
{
+ /* Preserve rseq state and user_irq state for exit to user */
+ const struct rseq_event evt_mask = { .has_rseq = true, .user_irq = true, };
struct task_struct *t = current;
struct rseq_ids ids;
u32 node_id;
bool event;
- /*
- * If invoked from hypervisors before entering the guest via
- * resume_user_mode_work(), then @regs is a NULL pointer.
- *
- * resume_user_mode_work() clears TIF_NOTIFY_RESUME and re-raises
- * it before returning from the ioctl() to user space when
- * rseq_event.sched_switch is set.
- *
- * So it's safe to ignore here instead of pointlessly updating it
- * in the vcpu_run() loop.
- */
- if (!regs)
- return;
-
if (unlikely(t->flags & PF_EXITING))
return;
@@ -291,26 +262,45 @@ void __rseq_handle_notify_resume(struct
* with the result handed in to allow the detection of
* inconsistencies.
*/
- scoped_guard(RSEQ_EVENT_GUARD) {
+ scoped_guard(irq) {
event = t->rseq.event.sched_switch;
- t->rseq.event.sched_switch = false;
+ t->rseq.event.all &= evt_mask.all;
ids.cpu_id = task_cpu(t);
ids.mm_cid = task_mm_cid(t);
}
- if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
+ if (!event)
return;
- if (!rseq_handle_cs(t, regs))
- goto error;
-
node_id = cpu_to_node(ids.cpu_id);
- if (!rseq_set_ids(t, &ids, node_id))
- goto error;
- return;
-error:
- force_sig(SIGSEGV);
+ if (unlikely(!rseq_update_usr(t, regs, &ids, node_id))) {
+ /*
+ * Clear the errors just in case this might survive magically, but
+ * leave the rest intact.
+ */
+ t->rseq.event.error = 0;
+ force_sig(SIGSEGV);
+ }
+}
+
+void __rseq_handle_notify_resume(struct pt_regs *regs)
+{
+ /*
+ * If invoked from hypervisors before entering the guest via
+ * resume_user_mode_work(), then @regs is a NULL pointer.
+ *
+ * resume_user_mode_work() clears TIF_NOTIFY_RESUME and re-raises
+ * it before returning from the ioctl() to user space when
+ * rseq_event.sched_switch is set.
+ *
+ * So it's safe to ignore here instead of pointlessly updating it
+ * in the vcpu_run() loop.
+ */
+ if (!regs)
+ return;
+
+ rseq_slowpath_update_usr(regs);
}
void __rseq_signal_deliver(int sig, struct pt_regs *regs)
After removing the various condition bits earlier it turns out that one
extra information is needed to avoid setting event::sched_switch and
TIF_NOTIFY_RESUME unconditionally on every context switch.
The update of the RSEQ user space memory is only required, when either
the task was interrupted in user space and schedules
or
the CPU or MM CID changes in schedule() independent of the entry mode
Right now only the interrupt from user information is available.
Add a event flag, which is set when the CPU or MM CID or both change.
Evaluate this event in the scheduler to decide whether the sched_switch
event and the TIF bit need to be set.
It's an extra conditional in context_switch(), but the downside of
unconditionally handling RSEQ after a context switch to user is way more
significant. The utilized boolean logic minimizes this to a single
conditional branch.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
fs/exec.c | 2 -
include/linux/rseq.h | 81 +++++++++++++++++++++++++++++++++++++++++----
include/linux/rseq_types.h | 11 +++++-
kernel/rseq.c | 2 -
kernel/sched/core.c | 7 +++
kernel/sched/sched.h | 5 ++
6 files changed, 95 insertions(+), 13 deletions(-)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1775,7 +1775,7 @@ static int bprm_execve(struct linux_binp
force_fatal_sig(SIGSEGV);
sched_mm_cid_after_execve(current);
- rseq_sched_switch_event(current);
+ rseq_force_update();
current->in_execve = 0;
return retval;
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -9,7 +9,8 @@ void __rseq_handle_notify_resume(struct
static inline void rseq_handle_notify_resume(struct pt_regs *regs)
{
- if (current->rseq.event.has_rseq)
+ /* '&' is intentional to spare one conditional branch */
+ if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
__rseq_handle_notify_resume(regs);
}
@@ -31,12 +32,75 @@ static inline void rseq_signal_deliver(s
}
}
-/* Raised from context switch and exevce to force evaluation on exit to user */
-static inline void rseq_sched_switch_event(struct task_struct *t)
+static inline void rseq_raise_notify_resume(struct task_struct *t)
{
- if (t->rseq.event.has_rseq) {
- t->rseq.event.sched_switch = true;
- set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+}
+
+/* Invoked from context switch to force evaluation on exit to user */
+static __always_inline void rseq_sched_switch_event(struct task_struct *t)
+{
+ struct rseq_event *ev = &t->rseq.event;
+
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ /*
+ * Avoid a boat load of conditionals by using simple logic
+ * to determine whether NOTIFY_RESUME needs to be raised.
+ *
+ * It's required when the CPU or MM CID has changed or
+ * the entry was from user space.
+ */
+ bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq;
+
+ if (raise) {
+ ev->sched_switch = true;
+ rseq_raise_notify_resume(t);
+ }
+ } else {
+ if (ev->has_rseq) {
+ t->rseq.event.sched_switch = true;
+ rseq_raise_notify_resume(t);
+ }
+ }
+}
+
+/*
+ * Invoked from __set_task_cpu() when a task migrates to enforce an IDs
+ * update.
+ *
+ * This does not raise TIF_NOTIFY_RESUME as that happens in
+ * rseq_sched_switch_event().
+ */
+static __always_inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu)
+{
+ t->rseq.event.ids_changed = true;
+}
+
+/*
+ * Invoked from switch_mm_cid() in context switch when the task gets a MM
+ * CID assigned.
+ *
+ * This does not raise TIF_NOTIFY_RESUME as that happens in
+ * rseq_sched_switch_event().
+ */
+static __always_inline void rseq_sched_set_task_mm_cid(struct task_struct *t, unsigned int cid)
+{
+ /*
+ * Requires a comparison as the switch_mm_cid() code does not
+ * provide a conditional for it readily. So avoid excessive updates
+ * when nothing changes.
+ */
+ if (t->rseq.ids.mm_cid != cid)
+ t->rseq.event.ids_changed = true;
+}
+
+/* Enforce a full update after RSEQ registration and when execve() failed */
+static inline void rseq_force_update(void)
+{
+ if (current->rseq.event.has_rseq) {
+ current->rseq.event.ids_changed = true;
+ current->rseq.event.sched_switch = true;
+ rseq_raise_notify_resume(current);
}
}
@@ -53,7 +117,7 @@ static inline void rseq_sched_switch_eve
static inline void rseq_virt_userspace_exit(void)
{
if (current->rseq.event.sched_switch)
- set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
+ rseq_raise_notify_resume(current);
}
static inline void rseq_reset(struct task_struct *t)
@@ -85,6 +149,9 @@ static inline void rseq_fork(struct task
static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_sched_switch_event(struct task_struct *t) { }
+static inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu) { }
+static inline void rseq_sched_set_task_mm_cid(struct task_struct *t, unsigned int cid) { }
+static inline void rseq_force_update(void) { }
static inline void rseq_virt_userspace_exit(void) { }
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { }
static inline void rseq_execve(struct task_struct *t) { }
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -11,20 +11,27 @@ struct rseq;
* struct rseq_event - Storage for rseq related event management
* @all: Compound to initialize and clear the data efficiently
* @events: Compound to access events with a single load/store
- * @sched_switch: True if the task was scheduled out
+ * @sched_switch: True if the task was scheduled and needs update on
+ * exit to user
+ * @ids_changed: Indicator that IDs need to be updated
* @user_irq: True on interrupt entry from user mode
* @has_rseq: True if the task has a rseq pointer installed
* @error: Compound error code for the slow path to analyze
* @fatal: User space data corrupted or invalid
+ *
+ * @sched_switch and @ids_changed must be adjacent and the combo must be
+ * 16bit aligned to allow a single store, when both are set at the same
+ * time in the scheduler.
*/
struct rseq_event {
union {
u64 all;
struct {
union {
- u16 events;
+ u32 events;
struct {
u8 sched_switch;
+ u8 ids_changed;
u8 user_irq;
};
};
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -456,7 +456,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
* are updated before returning to user-space.
*/
current->rseq.event.has_rseq = true;
- rseq_sched_switch_event(current);
+ rseq_force_update();
return 0;
}
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5150,7 +5150,6 @@ prepare_task_switch(struct rq *rq, struc
kcov_prepare_switch(prev);
sched_info_switch(rq, prev, next);
perf_event_task_sched_out(prev, next);
- rseq_sched_switch_event(prev);
fire_sched_out_preempt_notifiers(prev, next);
kmap_local_sched_out();
prepare_task(next);
@@ -5348,6 +5347,12 @@ context_switch(struct rq *rq, struct tas
/* switch_mm_cid() requires the memory barriers above. */
switch_mm_cid(rq, prev, next);
+ /*
+ * Tell rseq that the task was scheduled in. Must be after
+ * switch_mm_cid() to get the TIF flag set.
+ */
+ rseq_sched_switch_event(next);
+
prepare_lock_switch(rq, next, rf);
/* Here we just switch the register state and the stack. */
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2181,6 +2181,7 @@ static inline void __set_task_cpu(struct
smp_wmb();
WRITE_ONCE(task_thread_info(p)->cpu, cpu);
p->wake_cpu = cpu;
+ rseq_sched_set_task_cpu(p, cpu);
#endif /* CONFIG_SMP */
}
@@ -3778,8 +3779,10 @@ static inline void switch_mm_cid(struct
mm_cid_put_lazy(prev);
prev->mm_cid = -1;
}
- if (next->mm_cid_active)
+ if (next->mm_cid_active) {
next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next, next->mm);
+ rseq_sched_set_task_mm_cid(next, next->mm_cid);
+ }
}
#else /* !CONFIG_SCHED_MM_CID: */
On 2025-09-08 17:32, Thomas Gleixner wrote:
> After removing the various condition bits earlier it turns out that one
> extra information is needed to avoid setting event::sched_switch and
> TIF_NOTIFY_RESUME unconditionally on every context switch.
>
> The update of the RSEQ user space memory is only required, when either
>
> the task was interrupted in user space and schedules
>
> or
>
> the CPU or MM CID changes in schedule() independent of the entry mode
>
> Right now only the interrupt from user information is available.
>
> Add a event flag, which is set when the CPU or MM CID or both change.
a event -> an event
>
> Evaluate this event in the scheduler to decide whether the sched_switch
> event and the TIF bit need to be set.
>
> It's an extra conditional in context_switch(), but the downside of
> unconditionally handling RSEQ after a context switch to user is way more
> significant. The utilized boolean logic minimizes this to a single
> conditional branch.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>
> ---
> fs/exec.c | 2 -
> include/linux/rseq.h | 81 +++++++++++++++++++++++++++++++++++++++++----
> include/linux/rseq_types.h | 11 +++++-
> kernel/rseq.c | 2 -
> kernel/sched/core.c | 7 +++
> kernel/sched/sched.h | 5 ++
> 6 files changed, 95 insertions(+), 13 deletions(-)
>
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1775,7 +1775,7 @@ static int bprm_execve(struct linux_binp
> force_fatal_sig(SIGSEGV);
>
> sched_mm_cid_after_execve(current);
> - rseq_sched_switch_event(current);
> + rseq_force_update();
> current->in_execve = 0;
>
> return retval;
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -9,7 +9,8 @@ void __rseq_handle_notify_resume(struct
>
> static inline void rseq_handle_notify_resume(struct pt_regs *regs)
> {
> - if (current->rseq.event.has_rseq)
> + /* '&' is intentional to spare one conditional branch */
> + if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
I wonder.. except for the corner case of rseq unregistration,
when can we have sched_switch set but not has_rseq ?
We could remove a load from the fast path and the AND if we
clear the sched_switch flag on rseq unregistration.
Thanks,
Mathieu
> __rseq_handle_notify_resume(regs);
> }
>
> @@ -31,12 +32,75 @@ static inline void rseq_signal_deliver(s
> }
> }
>
> -/* Raised from context switch and exevce to force evaluation on exit to user */
> -static inline void rseq_sched_switch_event(struct task_struct *t)
> +static inline void rseq_raise_notify_resume(struct task_struct *t)
> {
> - if (t->rseq.event.has_rseq) {
> - t->rseq.event.sched_switch = true;
> - set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
> + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
> +}
> +
> +/* Invoked from context switch to force evaluation on exit to user */
> +static __always_inline void rseq_sched_switch_event(struct task_struct *t)
> +{
> + struct rseq_event *ev = &t->rseq.event;
> +
> + if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
> + /*
> + * Avoid a boat load of conditionals by using simple logic
> + * to determine whether NOTIFY_RESUME needs to be raised.
> + *
> + * It's required when the CPU or MM CID has changed or
> + * the entry was from user space.
> + */
> + bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq;
> +
> + if (raise) {
> + ev->sched_switch = true;
> + rseq_raise_notify_resume(t);
> + }
> + } else {
> + if (ev->has_rseq) {
> + t->rseq.event.sched_switch = true;
> + rseq_raise_notify_resume(t);
> + }
> + }
> +}
> +
> +/*
> + * Invoked from __set_task_cpu() when a task migrates to enforce an IDs
> + * update.
> + *
> + * This does not raise TIF_NOTIFY_RESUME as that happens in
> + * rseq_sched_switch_event().
> + */
> +static __always_inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu)
> +{
> + t->rseq.event.ids_changed = true;
> +}
> +
> +/*
> + * Invoked from switch_mm_cid() in context switch when the task gets a MM
> + * CID assigned.
> + *
> + * This does not raise TIF_NOTIFY_RESUME as that happens in
> + * rseq_sched_switch_event().
> + */
> +static __always_inline void rseq_sched_set_task_mm_cid(struct task_struct *t, unsigned int cid)
> +{
> + /*
> + * Requires a comparison as the switch_mm_cid() code does not
> + * provide a conditional for it readily. So avoid excessive updates
> + * when nothing changes.
> + */
> + if (t->rseq.ids.mm_cid != cid)
> + t->rseq.event.ids_changed = true;
> +}
> +
> +/* Enforce a full update after RSEQ registration and when execve() failed */
> +static inline void rseq_force_update(void)
> +{
> + if (current->rseq.event.has_rseq) {
> + current->rseq.event.ids_changed = true;
> + current->rseq.event.sched_switch = true;
> + rseq_raise_notify_resume(current);
> }
> }
>
> @@ -53,7 +117,7 @@ static inline void rseq_sched_switch_eve
> static inline void rseq_virt_userspace_exit(void)
> {
> if (current->rseq.event.sched_switch)
> - set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
> + rseq_raise_notify_resume(current);
> }
>
> static inline void rseq_reset(struct task_struct *t)
> @@ -85,6 +149,9 @@ static inline void rseq_fork(struct task
> static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { }
> static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
> static inline void rseq_sched_switch_event(struct task_struct *t) { }
> +static inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu) { }
> +static inline void rseq_sched_set_task_mm_cid(struct task_struct *t, unsigned int cid) { }
> +static inline void rseq_force_update(void) { }
> static inline void rseq_virt_userspace_exit(void) { }
> static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { }
> static inline void rseq_execve(struct task_struct *t) { }
> --- a/include/linux/rseq_types.h
> +++ b/include/linux/rseq_types.h
> @@ -11,20 +11,27 @@ struct rseq;
> * struct rseq_event - Storage for rseq related event management
> * @all: Compound to initialize and clear the data efficiently
> * @events: Compound to access events with a single load/store
> - * @sched_switch: True if the task was scheduled out
> + * @sched_switch: True if the task was scheduled and needs update on
> + * exit to user
> + * @ids_changed: Indicator that IDs need to be updated
> * @user_irq: True on interrupt entry from user mode
> * @has_rseq: True if the task has a rseq pointer installed
> * @error: Compound error code for the slow path to analyze
> * @fatal: User space data corrupted or invalid
> + *
> + * @sched_switch and @ids_changed must be adjacent and the combo must be
> + * 16bit aligned to allow a single store, when both are set at the same
> + * time in the scheduler.
> */
> struct rseq_event {
> union {
> u64 all;
> struct {
> union {
> - u16 events;
> + u32 events;
> struct {
> u8 sched_switch;
> + u8 ids_changed;
> u8 user_irq;
> };
> };
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -456,7 +456,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
> * are updated before returning to user-space.
> */
> current->rseq.event.has_rseq = true;
> - rseq_sched_switch_event(current);
> + rseq_force_update();
>
> return 0;
> }
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -5150,7 +5150,6 @@ prepare_task_switch(struct rq *rq, struc
> kcov_prepare_switch(prev);
> sched_info_switch(rq, prev, next);
> perf_event_task_sched_out(prev, next);
> - rseq_sched_switch_event(prev);
> fire_sched_out_preempt_notifiers(prev, next);
> kmap_local_sched_out();
> prepare_task(next);
> @@ -5348,6 +5347,12 @@ context_switch(struct rq *rq, struct tas
> /* switch_mm_cid() requires the memory barriers above. */
> switch_mm_cid(rq, prev, next);
>
> + /*
> + * Tell rseq that the task was scheduled in. Must be after
> + * switch_mm_cid() to get the TIF flag set.
> + */
> + rseq_sched_switch_event(next);
> +
> prepare_lock_switch(rq, next, rf);
>
> /* Here we just switch the register state and the stack. */
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2181,6 +2181,7 @@ static inline void __set_task_cpu(struct
> smp_wmb();
> WRITE_ONCE(task_thread_info(p)->cpu, cpu);
> p->wake_cpu = cpu;
> + rseq_sched_set_task_cpu(p, cpu);
> #endif /* CONFIG_SMP */
> }
>
> @@ -3778,8 +3779,10 @@ static inline void switch_mm_cid(struct
> mm_cid_put_lazy(prev);
> prev->mm_cid = -1;
> }
> - if (next->mm_cid_active)
> + if (next->mm_cid_active) {
> next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next, next->mm);
> + rseq_sched_set_task_mm_cid(next, next->mm_cid);
> + }
> }
>
> #else /* !CONFIG_SCHED_MM_CID: */
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Thu, Sep 11 2025 at 10:03, Mathieu Desnoyers wrote:
> On 2025-09-08 17:32, Thomas Gleixner wrote:
>> static inline void rseq_handle_notify_resume(struct pt_regs *regs)
>> {
>> - if (current->rseq.event.has_rseq)
>> + /* '&' is intentional to spare one conditional branch */
>> + if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
>
> I wonder.. except for the corner case of rseq unregistration,
> when can we have sched_switch set but not has_rseq ?
>
> We could remove a load from the fast path and the AND if we
> clear the sched_switch flag on rseq unregistration.
We probably could. Though I doubt it matters much and I opted for
correctness instead of premature optimization.
Thanks,
tglx
On 2025-09-11 12:06, Thomas Gleixner wrote:
> On Thu, Sep 11 2025 at 10:03, Mathieu Desnoyers wrote:
>> On 2025-09-08 17:32, Thomas Gleixner wrote:
>>> static inline void rseq_handle_notify_resume(struct pt_regs *regs)
>>> {
>>> - if (current->rseq.event.has_rseq)
>>> + /* '&' is intentional to spare one conditional branch */
>>> + if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
>>
>> I wonder.. except for the corner case of rseq unregistration,
>> when can we have sched_switch set but not has_rseq ?
>>
>> We could remove a load from the fast path and the AND if we
>> clear the sched_switch flag on rseq unregistration.
>
> We probably could. Though I doubt it matters much and I opted for
> correctness instead of premature optimization.
Note that this concerns code that documents an explicit " & " as
intentional to spare a branch, which led me to assume that optimizing
it was important.
Thanks,
Mathieu
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Thu, Sep 11 2025 at 13:15, Mathieu Desnoyers wrote:
> On 2025-09-11 12:06, Thomas Gleixner wrote:
>> On Thu, Sep 11 2025 at 10:03, Mathieu Desnoyers wrote:
>>> On 2025-09-08 17:32, Thomas Gleixner wrote:
>>>> static inline void rseq_handle_notify_resume(struct pt_regs *regs)
>>>> {
>>>> - if (current->rseq.event.has_rseq)
>>>> + /* '&' is intentional to spare one conditional branch */
>>>> + if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
>>>
>>> I wonder.. except for the corner case of rseq unregistration,
>>> when can we have sched_switch set but not has_rseq ?
>>>
>>> We could remove a load from the fast path and the AND if we
>>> clear the sched_switch flag on rseq unregistration.
>>
>> We probably could. Though I doubt it matters much and I opted for
>> correctness instead of premature optimization.
> Note that this concerns code that documents an explicit " & " as
> intentional to spare a branch, which led me to assume that optimizing
> it was important.
Correct. Optimization and correctness are not mutually exclusive and it
was an obvious optimization to spare the second branch which the
compiler is forced to emit with &&.
A load/AND from them same cache line is definitely cheaper than an extra
conditional branch.
I'm happy to look into that once the dust has settled...
Thanks,
tglx
Implement the actual logic for handling RSEQ updates in a fast path after
handling the TIF work and at the point where the task is actually returning
to user space.
This is the right point to do that because at this point the CPU and the MM
CID are stable and cannot longer change due to yet another reschedule.
That happens when the task is handling it via TIF_NOTIFY_RESUME in
resume_user_mode_work(), which is invoked from the exit to user mode work
loop.
The function is invoked after the TIF work is handled and runs with
interrupts disabled, which means it cannot resolve page faults. It
therefore disables page faults and in case the access to the user space
memory faults, it:
- notes the fail in the event struct
- raises TIF_NOTIFY_RESUME
- returns false to the caller
The caller has to go back to the TIF work, which runs with interrupts
enabled and therefore can resolve the page faults. This happens mostly on
fork() when the memory is marked COW. That will be optimized by setting the
failure flag and raising TIF_NOTIFY_RESUME right on fork to avoid the
otherwise unavoidable round trip.
If the user memory inspection finds invalid data, the function returns
false as well and sets the fatal flag in the event struct along with
TIF_NOTIFY_RESUME. The slow path notify handler has to evaluate that flag
and terminate the task with SIGSEGV as documented.
The initial decision to invoke any of this is based on two flags in the
event struct: @has_rseq and @sched_switch. The decision is in pseudo ASM:
load tsk::event::has_rseq
and tsk::event::sched_switch
jnz inspect_user_space
mov $0, tsk::event::events
...
leave
So for the common case where the task was not scheduled out, this really
boils down to four instructions before going out if the compiler is not
completely stupid (and yes, some of them are).
If the condition is true, then it checks, whether CPU ID or MM CID have
changed. If so, then the CPU/MM IDs have to be updated and are thereby
cached for the next round. The update unconditionally retrieves the user
space critical section address to spare another user*begin/end() pair. If
that's not zero and tsk::event::user_irq is set, then the critical section
is analyzed and acted upon. If either zero or the entry came via syscall
the critical section analysis is skipped.
If the comparison is false then the critical section has to be analyzed
because the event flag is then only true when entry from user was by
interrupt.
This is provided without the actual hookup to let reviewers focus on the
implementation details. The hookup happens in the next step.
Note: As with quite some other optimizations this depends on the generic
entry infrastructure and is not enabled to be sucked into random
architecture implementations.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/rseq_entry.h | 133 +++++++++++++++++++++++++++++++++++++++++++--
include/linux/rseq_types.h | 3 +
kernel/rseq.c | 2
3 files changed, 133 insertions(+), 5 deletions(-)
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -10,6 +10,7 @@ struct rseq_stats {
unsigned long exit;
unsigned long signal;
unsigned long slowpath;
+ unsigned long fastpath;
unsigned long ids;
unsigned long cs;
unsigned long clear;
@@ -202,8 +203,8 @@ bool rseq_debug_update_user_cs(struct ta
/*
* On debug kernels validate that user space did not mess with it if
- * DEBUG_RSEQ is enabled, but don't on the first exit to user space. In
- * that case cpu_cid is ~0. See fork/execve.
+ * debugging is enabled, but don't do that on the first exit to user
+ * space. In that case cpu_cid is ~0. See fork/execve.
*/
bool rseq_debug_validate_ids(struct task_struct *t)
{
@@ -254,12 +255,13 @@ rseq_update_user_cs(struct task_struct *
{
struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
unsigned long ip = instruction_pointer(regs);
+ unsigned long tasksize = TASK_SIZE;
u64 start_ip, abort_ip, offset;
u32 usig, __user *uc_sig;
rseq_stat_inc(rseq_stats.cs);
- if (unlikely(csaddr >= TASK_SIZE)) {
+ if (unlikely(csaddr >= tasksize)) {
t->rseq.event.fatal = true;
return false;
}
@@ -298,7 +300,7 @@ rseq_update_user_cs(struct task_struct *
* in TLS::rseq::rseq_cs. An RSEQ abort would then evade ROP
* protection.
*/
- if (abort_ip >= TASK_SIZE || abort_ip < sizeof(*uc_sig))
+ if (unlikely(abort_ip >= tasksize || abort_ip < sizeof(*uc_sig)))
goto die;
/* The address is guaranteed to be >= 0 and < TASK_SIZE */
@@ -412,6 +414,124 @@ static rseq_inline bool rseq_update_usr(
return rseq_update_user_cs(t, regs, csaddr);
}
+/*
+ * If you want to use this then convert your architecture to the generic
+ * entry code. I'm tired of building workarounds for people who can't be
+ * bothered to make the maintainence of generic infrastructure less
+ * burdensome. Just sucking everything into the architecture code and
+ * thereby making others chase the horrible hacks and keep them working is
+ * neither acceptable nor sustainable.
+ */
+#ifdef CONFIG_GENERIC_ENTRY
+
+/*
+ * This is inlined into the exit path because:
+ *
+ * 1) It's a one time comparison in the fast path when there is no event to
+ * handle
+ *
+ * 2) The access to the user space rseq memory (TLS) is unlikely to fault
+ * so the straight inline operation is:
+ *
+ * - Four 32-bit stores only if CPU ID/ MM CID need to be updated
+ * - One 64-bit load to retrieve the critical section address
+ *
+ * 3) In the unlikely case that the critical section address is != NULL:
+ *
+ * - One 64-bit load to retrieve the start IP
+ * - One 64-bit load to retrieve the offset for calculating the end
+ * - One 64-bit load to retrieve the abort IP
+ * - One store to clear the critical section address
+ *
+ * The non-debug case implements only the minimal required checking and
+ * protection against a rogue abort IP in kernel space, which would be
+ * exploitable at least on x86. Any fallout from invalid critical section
+ * descriptors is a user space problem. The debug case provides the full
+ * set of checks and terminates the task if a condition is not met.
+ *
+ * In case of a fault or an invalid value, this sets TIF_NOTIFY_RESUME and
+ * tells the caller to loop back into exit_to_user_mode_loop(). The rseq
+ * slow path there will handle the fail.
+ */
+static __always_inline bool __rseq_exit_to_user_mode_restart(struct pt_regs *regs)
+{
+ struct task_struct *t = current;
+
+ /*
+ * If the task did not go through schedule or got the flag enforced
+ * by the rseq syscall or execve, then nothing to do here.
+ *
+ * CPU ID and MM CID can only change when going through a context
+ * switch.
+ *
+ * This can only be done when rseq_event::has_rseq is true.
+ * rseq_sched_switch_event() sets rseq_event::sched unconditionally
+ * true to avoid a load of rseq_event::has_rseq in the context
+ * switch path.
+ *
+ * This check uses a '&' and not a '&&' to force the compiler to do
+ * an actual AND operation instead of two seperate conditionals.
+ *
+ * A sane compiler requires four instructions for the nothing to do
+ * case including clearing the events, but your milage might vary.
+ */
+ if (likely(!(t->rseq.event.sched_switch & t->rseq.event.has_rseq)))
+ goto done;
+
+ rseq_stat_inc(rseq_stats.fastpath);
+
+ pagefault_disable();
+
+ if (likely(!t->rseq.event.ids_changed)) {
+ /*
+ * If IDs have not changed rseq_event::user_irq must be true
+ * See rseq_sched_switch_event().
+ */
+ u64 csaddr;
+
+ if (unlikely(get_user_masked_u64(&csaddr, &t->rseq.usrptr->rseq_cs)))
+ goto fail;
+
+ if (static_branch_unlikely(&rseq_debug_enabled) || unlikely(csaddr)) {
+ if (unlikely(!rseq_update_user_cs(t, regs, csaddr)))
+ goto fail;
+ }
+ } else {
+ struct rseq_ids ids = {
+ .cpu_id = task_cpu(t),
+ .mm_cid = task_mm_cid(t),
+ };
+ u32 node_id = cpu_to_node(ids.cpu_id);
+
+ if (unlikely(!rseq_update_usr(t, regs, &ids, node_id)))
+ goto fail;
+ }
+
+ pagefault_enable();
+
+done:
+ /* Clear state so next entry starts from a clean slate */
+ t->rseq.event.events = 0;
+ return false;
+
+fail:
+ pagefault_enable();
+ /* Force it into the slow path. Don't clear the state! */
+ t->rseq.event.slowpath = true;
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+ return true;
+}
+
+static __always_inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs)
+{
+ if (unlikely(__rseq_exit_to_user_mode_restart(regs)))
+ return true;
+
+ return false;
+}
+
+#endif /* CONFIG_GENERIC_ENTRY */
+
static __always_inline void rseq_exit_to_user_mode(void)
{
struct rseq_event *ev = ¤t->rseq.event;
@@ -436,9 +556,12 @@ static inline void rseq_debug_syscall_re
if (static_branch_unlikely(&rseq_debug_enabled))
__rseq_debug_syscall_return(regs);
}
-
#else /* CONFIG_RSEQ */
static inline void rseq_note_user_irq_entry(void) { }
+static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs)
+{
+ return false;
+}
static inline void rseq_exit_to_user_mode(void) { }
static inline void rseq_debug_syscall_return(struct pt_regs *regs) { }
#endif /* !CONFIG_RSEQ */
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -18,6 +18,8 @@ struct rseq;
* @has_rseq: True if the task has a rseq pointer installed
* @error: Compound error code for the slow path to analyze
* @fatal: User space data corrupted or invalid
+ * @slowpath: Indicator that slow path processing via TIF_NOTIFY_RESUME
+ * is required
*
* @sched_switch and @ids_changed must be adjacent and the combo must be
* 16bit aligned to allow a single store, when both are set at the same
@@ -42,6 +44,7 @@ struct rseq_event {
u16 error;
struct {
u8 fatal;
+ u8 slowpath;
};
};
};
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -133,6 +133,7 @@ static int rseq_stats_show(struct seq_fi
stats.exit += data_race(per_cpu(rseq_stats.exit, cpu));
stats.signal += data_race(per_cpu(rseq_stats.signal, cpu));
stats.slowpath += data_race(per_cpu(rseq_stats.slowpath, cpu));
+ stats.fastpath += data_race(per_cpu(rseq_stats.fastpath, cpu));
stats.ids += data_race(per_cpu(rseq_stats.ids, cpu));
stats.cs += data_race(per_cpu(rseq_stats.cs, cpu));
stats.clear += data_race(per_cpu(rseq_stats.clear, cpu));
@@ -142,6 +143,7 @@ static int rseq_stats_show(struct seq_fi
seq_printf(m, "exit: %16lu\n", stats.exit);
seq_printf(m, "signal: %16lu\n", stats.signal);
seq_printf(m, "slowp: %16lu\n", stats.slowpath);
+ seq_printf(m, "fastp: %16lu\n", stats.fastpath);
seq_printf(m, "ids: %16lu\n", stats.ids);
seq_printf(m, "cs: %16lu\n", stats.cs);
seq_printf(m, "clear: %16lu\n", stats.clear);
On 2025-09-08 17:32, Thomas Gleixner wrote:
> Implement the actual logic for handling RSEQ updates in a fast path after
> handling the TIF work and at the point where the task is actually returning
> to user space.
>
> This is the right point to do that because at this point the CPU and the MM
> CID are stable and cannot longer change due to yet another reschedule.
> That happens when the task is handling it via TIF_NOTIFY_RESUME in
> resume_user_mode_work(), which is invoked from the exit to user mode work
> loop.
>
> The function is invoked after the TIF work is handled and runs with
> interrupts disabled, which means it cannot resolve page faults. It
> therefore disables page faults and in case the access to the user space
> memory faults, it:
>
> - notes the fail in the event struct
> - raises TIF_NOTIFY_RESUME
> - returns false to the caller
>
> The caller has to go back to the TIF work, which runs with interrupts
> enabled and therefore can resolve the page faults. This happens mostly on
> fork() when the memory is marked COW. That will be optimized by setting the
> failure flag and raising TIF_NOTIFY_RESUME right on fork to avoid the
> otherwise unavoidable round trip.
>
> If the user memory inspection finds invalid data, the function returns
> false as well and sets the fatal flag in the event struct along with
> TIF_NOTIFY_RESUME. The slow path notify handler has to evaluate that flag
> and terminate the task with SIGSEGV as documented.
>
> The initial decision to invoke any of this is based on two flags in the
> event struct: @has_rseq and @sched_switch. The decision is in pseudo ASM:
>
> load tsk::event::has_rseq
> and tsk::event::sched_switch
I suspect that my earlier comment about has_rseq and AND applies here
as well. We could probably remove the has_rseq load and the AND
altogether and just load sched_switch for the conditional branch ?
> jnz inspect_user_space
> mov $0, tsk::event::events
> ...
> leave
>
> So for the common case where the task was not scheduled out, this really
> boils down to four instructions before going out if the compiler is not
> completely stupid (and yes, some of them are).
Or three.
>
> If the condition is true, then it checks, whether CPU ID or MM CID have
> changed. If so, then the CPU/MM IDs have to be updated and are thereby
> cached for the next round. The update unconditionally retrieves the user
> space critical section address to spare another user*begin/end() pair. If
> that's not zero and tsk::event::user_irq is set, then the critical section
> is analyzed and acted upon. If either zero or the entry came via syscall
> the critical section analysis is skipped.
>
> If the comparison is false then the critical section has to be analyzed
> because the event flag is then only true when entry from user was by
> interrupt.
>
> This is provided without the actual hookup to let reviewers focus on the
> implementation details. The hookup happens in the next step.
>
> Note: As with quite some other optimizations this depends on the generic
> entry infrastructure and is not enabled to be sucked into random
> architecture implementations.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>
> ---
> include/linux/rseq_entry.h | 133 +++++++++++++++++++++++++++++++++++++++++++--
> include/linux/rseq_types.h | 3 +
> kernel/rseq.c | 2
> 3 files changed, 133 insertions(+), 5 deletions(-)
>
> --- a/include/linux/rseq_entry.h
> +++ b/include/linux/rseq_entry.h
> @@ -10,6 +10,7 @@ struct rseq_stats {
> unsigned long exit;
> unsigned long signal;
> unsigned long slowpath;
> + unsigned long fastpath;
> unsigned long ids;
> unsigned long cs;
> unsigned long clear;
> @@ -202,8 +203,8 @@ bool rseq_debug_update_user_cs(struct ta
>
> /*
> * On debug kernels validate that user space did not mess with it if
> - * DEBUG_RSEQ is enabled, but don't on the first exit to user space. In
> - * that case cpu_cid is ~0. See fork/execve.
> + * debugging is enabled, but don't do that on the first exit to user
> + * space. In that case cpu_cid is ~0. See fork/execve.
see earlier comment about using either ~0 or ~0ULL for consistency.
> */
> bool rseq_debug_validate_ids(struct task_struct *t)
> {
> @@ -254,12 +255,13 @@ rseq_update_user_cs(struct task_struct *
> {
> struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
> unsigned long ip = instruction_pointer(regs);
> + unsigned long tasksize = TASK_SIZE;
> u64 start_ip, abort_ip, offset;
> u32 usig, __user *uc_sig;
>
> rseq_stat_inc(rseq_stats.cs);
>
> - if (unlikely(csaddr >= TASK_SIZE)) {
> + if (unlikely(csaddr >= tasksize)) {
> t->rseq.event.fatal = true;
> return false;
> }
> @@ -298,7 +300,7 @@ rseq_update_user_cs(struct task_struct *
> * in TLS::rseq::rseq_cs. An RSEQ abort would then evade ROP
> * protection.
> */
> - if (abort_ip >= TASK_SIZE || abort_ip < sizeof(*uc_sig))
> + if (unlikely(abort_ip >= tasksize || abort_ip < sizeof(*uc_sig)))
> goto die;
>
> /* The address is guaranteed to be >= 0 and < TASK_SIZE */
> @@ -412,6 +414,124 @@ static rseq_inline bool rseq_update_usr(
> return rseq_update_user_cs(t, regs, csaddr);
> }
>
> +/*
> + * If you want to use this then convert your architecture to the generic
> + * entry code. I'm tired of building workarounds for people who can't be
> + * bothered to make the maintainence of generic infrastructure less
maintenance
> + * burdensome. Just sucking everything into the architecture code and
> + * thereby making others chase the horrible hacks and keep them working is
> + * neither acceptable nor sustainable.
> + */
> +#ifdef CONFIG_GENERIC_ENTRY
> +
> +/*
> + * This is inlined into the exit path because:
> + *
> + * 1) It's a one time comparison in the fast path when there is no event to
> + * handle
> + *
> + * 2) The access to the user space rseq memory (TLS) is unlikely to fault
> + * so the straight inline operation is:
> + *
> + * - Four 32-bit stores only if CPU ID/ MM CID need to be updated
> + * - One 64-bit load to retrieve the critical section address
> + *
> + * 3) In the unlikely case that the critical section address is != NULL:
> + *
> + * - One 64-bit load to retrieve the start IP
> + * - One 64-bit load to retrieve the offset for calculating the end
> + * - One 64-bit load to retrieve the abort IP
> + * - One store to clear the critical section address
> + *
> + * The non-debug case implements only the minimal required checking and
> + * protection against a rogue abort IP in kernel space, which would be
> + * exploitable at least on x86. Any fallout from invalid critical section
> + * descriptors is a user space problem. The debug case provides the full
> + * set of checks and terminates the task if a condition is not met.
> + *
> + * In case of a fault or an invalid value, this sets TIF_NOTIFY_RESUME and
> + * tells the caller to loop back into exit_to_user_mode_loop(). The rseq
> + * slow path there will handle the fail.
> + */
> +static __always_inline bool __rseq_exit_to_user_mode_restart(struct pt_regs *regs)
> +{
> + struct task_struct *t = current;
> +
> + /*
> + * If the task did not go through schedule or got the flag enforced
> + * by the rseq syscall or execve, then nothing to do here.
> + *
> + * CPU ID and MM CID can only change when going through a context
> + * switch.
> + *
> + * This can only be done when rseq_event::has_rseq is true.
> + * rseq_sched_switch_event() sets rseq_event::sched unconditionally
> + * true to avoid a load of rseq_event::has_rseq in the context
> + * switch path.
Based on the patch just before this one, I don't see that the
implementation of rseq_sched_switch_event() matches this comment.
It does check ev->has_rseq.
So I guess we need to pick one here:
Either we skip a has_rseq check in the scheduler and always set the
sched_switch event, in which case we need the sched_switch & has_rseq
check on return to userspace,
or we check has_rseq in the scheduler, and we don't need to load it
on return to userspace.
I suspect that this series went from one approach to the other without
entirely updating the relevant return to userspace code.
> + *
> + * This check uses a '&' and not a '&&' to force the compiler to do
> + * an actual AND operation instead of two seperate conditionals.
separate
> + *
> + * A sane compiler requires four instructions for the nothing to do
> + * case including clearing the events, but your milage might vary.
mileage
> + */
> + if (likely(!(t->rseq.event.sched_switch & t->rseq.event.has_rseq)))
Same comment as earlier about checking has_rseq which may be redundant.
Thanks,
Mathieu
> + goto done;
> +
> + rseq_stat_inc(rseq_stats.fastpath);
> +
> + pagefault_disable();
> +
> + if (likely(!t->rseq.event.ids_changed)) {
> + /*
> + * If IDs have not changed rseq_event::user_irq must be true
> + * See rseq_sched_switch_event().
> + */
> + u64 csaddr;
> +
> + if (unlikely(get_user_masked_u64(&csaddr, &t->rseq.usrptr->rseq_cs)))
> + goto fail;
> +
> + if (static_branch_unlikely(&rseq_debug_enabled) || unlikely(csaddr)) {
> + if (unlikely(!rseq_update_user_cs(t, regs, csaddr)))
> + goto fail;
> + }
> + } else {
> + struct rseq_ids ids = {
> + .cpu_id = task_cpu(t),
> + .mm_cid = task_mm_cid(t),
> + };
> + u32 node_id = cpu_to_node(ids.cpu_id);
> +
> + if (unlikely(!rseq_update_usr(t, regs, &ids, node_id)))
> + goto fail;
> + }
> +
> + pagefault_enable();
> +
> +done:
> + /* Clear state so next entry starts from a clean slate */
> + t->rseq.event.events = 0;
> + return false;
> +
> +fail:
> + pagefault_enable();
> + /* Force it into the slow path. Don't clear the state! */
> + t->rseq.event.slowpath = true;
> + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
> + return true;
> +}
> +
> +static __always_inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs)
> +{
> + if (unlikely(__rseq_exit_to_user_mode_restart(regs)))
> + return true;
> +
> + return false;
> +}
> +
> +#endif /* CONFIG_GENERIC_ENTRY */
> +
> static __always_inline void rseq_exit_to_user_mode(void)
> {
> struct rseq_event *ev = ¤t->rseq.event;
> @@ -436,9 +556,12 @@ static inline void rseq_debug_syscall_re
> if (static_branch_unlikely(&rseq_debug_enabled))
> __rseq_debug_syscall_return(regs);
> }
> -
> #else /* CONFIG_RSEQ */
> static inline void rseq_note_user_irq_entry(void) { }
> +static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs)
> +{
> + return false;
> +}
> static inline void rseq_exit_to_user_mode(void) { }
> static inline void rseq_debug_syscall_return(struct pt_regs *regs) { }
> #endif /* !CONFIG_RSEQ */
> --- a/include/linux/rseq_types.h
> +++ b/include/linux/rseq_types.h
> @@ -18,6 +18,8 @@ struct rseq;
> * @has_rseq: True if the task has a rseq pointer installed
> * @error: Compound error code for the slow path to analyze
> * @fatal: User space data corrupted or invalid
> + * @slowpath: Indicator that slow path processing via TIF_NOTIFY_RESUME
> + * is required
> *
> * @sched_switch and @ids_changed must be adjacent and the combo must be
> * 16bit aligned to allow a single store, when both are set at the same
> @@ -42,6 +44,7 @@ struct rseq_event {
> u16 error;
> struct {
> u8 fatal;
> + u8 slowpath;
> };
> };
> };
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -133,6 +133,7 @@ static int rseq_stats_show(struct seq_fi
> stats.exit += data_race(per_cpu(rseq_stats.exit, cpu));
> stats.signal += data_race(per_cpu(rseq_stats.signal, cpu));
> stats.slowpath += data_race(per_cpu(rseq_stats.slowpath, cpu));
> + stats.fastpath += data_race(per_cpu(rseq_stats.fastpath, cpu));
> stats.ids += data_race(per_cpu(rseq_stats.ids, cpu));
> stats.cs += data_race(per_cpu(rseq_stats.cs, cpu));
> stats.clear += data_race(per_cpu(rseq_stats.clear, cpu));
> @@ -142,6 +143,7 @@ static int rseq_stats_show(struct seq_fi
> seq_printf(m, "exit: %16lu\n", stats.exit);
> seq_printf(m, "signal: %16lu\n", stats.signal);
> seq_printf(m, "slowp: %16lu\n", stats.slowpath);
> + seq_printf(m, "fastp: %16lu\n", stats.fastpath);
> seq_printf(m, "ids: %16lu\n", stats.ids);
> seq_printf(m, "cs: %16lu\n", stats.cs);
> seq_printf(m, "clear: %16lu\n", stats.clear);
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Thu, Sep 11 2025 at 10:27, Mathieu Desnoyers wrote: > On 2025-09-08 17:32, Thomas Gleixner wrote: >> The initial decision to invoke any of this is based on two flags in the >> event struct: @has_rseq and @sched_switch. The decision is in pseudo ASM: >> >> load tsk::event::has_rseq >> and tsk::event::sched_switch > > I suspect that my earlier comment about has_rseq and AND applies here > as well. We could probably remove the has_rseq load and the AND > altogether and just load sched_switch for the conditional branch ? Same comment as before.
Now that all bits and pieces are in place, hook the RSEQ handling fast path
function into exit_to_user_mode_prepare() after the TIF work bits have been
handled. If case of fast path failure, TIF_NOTIFY_RESUME has been raised
and the caller needs to take another turn through the TIF handling slow
path.
This only works for architectures, which use the generic entry code.
Architectures, who still have their own incomplete hacks are not supported
and won't be.
This results in the following improvements:
Kernel build Before After Reduction
exit to user 80692981 80514451
signal checks: 32581 121 99%
slowpath runs: 1201408 1.49% 198 0.00% 100%
fastpath runs: 675941 0.84% N/A
id updates: 1233989 1.53% 50541 0.06% 96%
cs checks: 1125366 1.39% 0 0.00% 100%
cs cleared: 1125366 100% 0 100%
cs fixup: 0 0% 0
RSEQ selftests Before After Reduction
exit to user: 386281778 387373750
signal checks: 35661203 0 100%
slowpath runs: 140542396 36.38% 100 0.00% 100%
fastpath runs: 9509789 2.51% N/A
id updates: 176203599 45.62% 9087994 2.35% 95%
cs checks: 175587856 45.46% 4728394 1.22% 98%
cs cleared: 172359544 98.16% 1319307 27.90% 99%
cs fixup: 3228312 1.84% 3409087 72.10%
The 'cs cleared' and 'cs fixup' percentanges are not relative to the exit
to user invocations, they are relative to the actual 'cs check'
invocations.
While some of this could have been avoided in the original code, like the
obvious clearing of CS when it's already clear, the main problem of going
through TIF_NOTIFY_RESUME cannot be solved. In some workloads the RSEQ
notify handler is invoked more than once before going out to user
space. Doing this once when everything has stabilized is the only solution
to avoid this.
The initial attempt to completely decouple it from the TIF work turned out
to be suboptimal for workloads, which do a lot of quick and short system
calls. Even if the fast path decision is only 4 instructions (including a
conditional branch), this adds up quickly and becomes measurable when the
rate for actually having to handle rseq is in the low single digit
percentage range of user/kernel transitions.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
V4: Move the rseq handling into a separate loop to avoid gotos later on
---
include/linux/irq-entry-common.h | 7 ++-----
include/linux/resume_user_mode.h | 2 +-
include/linux/rseq.h | 23 +++++++++++++++++------
init/Kconfig | 2 +-
kernel/entry/common.c | 26 +++++++++++++++++++-------
kernel/rseq.c | 8 ++++++--
6 files changed, 46 insertions(+), 22 deletions(-)
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -197,11 +197,8 @@ static __always_inline void arch_exit_to
*/
void arch_do_signal_or_restart(struct pt_regs *regs);
-/**
- * exit_to_user_mode_loop - do any pending work before leaving to user space
- */
-unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
- unsigned long ti_work);
+/* Handle pending TIF work */
+unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work);
/**
* exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
--- a/include/linux/resume_user_mode.h
+++ b/include/linux/resume_user_mode.h
@@ -59,7 +59,7 @@ static inline void resume_user_mode_work
mem_cgroup_handle_over_high(GFP_KERNEL);
blkcg_maybe_throttle_current();
- rseq_handle_notify_resume(regs);
+ rseq_handle_slowpath(regs);
}
#endif /* LINUX_RESUME_USER_MODE_H */
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -5,13 +5,19 @@
#ifdef CONFIG_RSEQ
#include <linux/sched.h>
-void __rseq_handle_notify_resume(struct pt_regs *regs);
+void __rseq_handle_slowpath(struct pt_regs *regs);
-static inline void rseq_handle_notify_resume(struct pt_regs *regs)
+/* Invoked from resume_user_mode_work() */
+static inline void rseq_handle_slowpath(struct pt_regs *regs)
{
- /* '&' is intentional to spare one conditional branch */
- if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
- __rseq_handle_notify_resume(regs);
+ if (IS_ENABLED(CONFIG_GENERIC_ENTRY)) {
+ if (current->rseq.event.slowpath)
+ __rseq_handle_slowpath(regs);
+ } else {
+ /* '&' is intentional to spare one conditional branch */
+ if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
+ __rseq_handle_slowpath(regs);
+ }
}
void __rseq_signal_deliver(int sig, struct pt_regs *regs);
@@ -142,11 +148,16 @@ static inline void rseq_fork(struct task
} else {
t->rseq = current->rseq;
t->rseq.ids.cpu_cid = ~0ULL;
+ /*
+ * If it has rseq, force it into the slow path right away
+ * because it is guaranteed to fault.
+ */
+ t->rseq.event.slowpath = t->rseq.event.has_rseq;
}
}
#else /* CONFIG_RSEQ */
-static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { }
+static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_sched_switch_event(struct task_struct *t) { }
static inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu) { }
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1911,7 +1911,7 @@ config RSEQ_DEBUG_DEFAULT_ENABLE
config DEBUG_RSEQ
default n
bool "Enable debugging of rseq() system call" if EXPERT
- depends on RSEQ && DEBUG_KERNEL
+ depends on RSEQ && DEBUG_KERNEL && !GENERIC_ENTRY
select RSEQ_DEBUG_DEFAULT_ENABLE
help
Enable extra debugging checks for the rseq system call.
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -11,13 +11,8 @@
/* Workaround to allow gradual conversion of architecture code */
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
-/**
- * exit_to_user_mode_loop - do any pending work before leaving to user space
- * @regs: Pointer to pt_regs on entry stack
- * @ti_work: TIF work flags as read by the caller
- */
-__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
- unsigned long ti_work)
+static __always_inline unsigned long __exit_to_user_mode_loop(struct pt_regs *regs,
+ unsigned long ti_work)
{
/*
* Before returning to user space ensure that all pending work
@@ -62,6 +57,23 @@ void __weak arch_do_signal_or_restart(st
return ti_work;
}
+/**
+ * exit_to_user_mode_loop - do any pending work before leaving to user space
+ * @regs: Pointer to pt_regs on entry stack
+ * @ti_work: TIF work flags as read by the caller
+ */
+__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
+ unsigned long ti_work)
+{
+ for (;;) {
+ ti_work = __exit_to_user_mode_loop(regs, ti_work);
+
+ if (likely(!rseq_exit_to_user_mode_restart(regs)))
+ return ti_work;
+ ti_work = read_thread_flags();
+ }
+}
+
noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
{
irqentry_state_t ret = {
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -234,7 +234,11 @@ static bool rseq_handle_cs(struct task_s
static void rseq_slowpath_update_usr(struct pt_regs *regs)
{
- /* Preserve rseq state and user_irq state for exit to user */
+ /*
+ * Preserve rseq state and user_irq state. The generic entry code
+ * clears user_irq on the way out, the non-generic entry
+ * architectures are not having user_irq.
+ */
const struct rseq_event evt_mask = { .has_rseq = true, .user_irq = true, };
struct task_struct *t = current;
struct rseq_ids ids;
@@ -286,7 +290,7 @@ static void rseq_slowpath_update_usr(str
}
}
-void __rseq_handle_notify_resume(struct pt_regs *regs)
+void __rseq_handle_slowpath(struct pt_regs *regs)
{
/*
* If invoked from hypervisors before entering the guest via
On 2025-09-08 17:32, Thomas Gleixner wrote:
> Now that all bits and pieces are in place, hook the RSEQ handling fast path
> function into exit_to_user_mode_prepare() after the TIF work bits have been
> handled. If case of fast path failure, TIF_NOTIFY_RESUME has been raised
> and the caller needs to take another turn through the TIF handling slow
> path.
>
> This only works for architectures, which use the generic entry code.
Remove comma after "architectures"
> Architectures, who still have their own incomplete hacks are not supported
Remove comma after "Architectures"
> and won't be.
>
> This results in the following improvements:
>
> Kernel build Before After Reduction
>
> exit to user 80692981 80514451
> signal checks: 32581 121 99%
> slowpath runs: 1201408 1.49% 198 0.00% 100%
> fastpath runs: 675941 0.84% N/A
> id updates: 1233989 1.53% 50541 0.06% 96%
> cs checks: 1125366 1.39% 0 0.00% 100%
> cs cleared: 1125366 100% 0 100%
> cs fixup: 0 0% 0
>
> RSEQ selftests Before After Reduction
>
> exit to user: 386281778 387373750
> signal checks: 35661203 0 100%
> slowpath runs: 140542396 36.38% 100 0.00% 100%
> fastpath runs: 9509789 2.51% N/A
> id updates: 176203599 45.62% 9087994 2.35% 95%
> cs checks: 175587856 45.46% 4728394 1.22% 98%
> cs cleared: 172359544 98.16% 1319307 27.90% 99%
> cs fixup: 3228312 1.84% 3409087 72.10%
>
> The 'cs cleared' and 'cs fixup' percentanges are not relative to the exit
percentages
> to user invocations, they are relative to the actual 'cs check'
> invocations.
>
> While some of this could have been avoided in the original code, like the
> obvious clearing of CS when it's already clear, the main problem of going
> through TIF_NOTIFY_RESUME cannot be solved. In some workloads the RSEQ
> notify handler is invoked more than once before going out to user
> space. Doing this once when everything has stabilized is the only solution
> to avoid this.
>
> The initial attempt to completely decouple it from the TIF work turned out
> to be suboptimal for workloads, which do a lot of quick and short system
> calls. Even if the fast path decision is only 4 instructions (including a
> conditional branch), this adds up quickly and becomes measurable when the
> rate for actually having to handle rseq is in the low single digit
> percentage range of user/kernel transitions.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> V4: Move the rseq handling into a separate loop to avoid gotos later on
> ---
> include/linux/irq-entry-common.h | 7 ++-----
> include/linux/resume_user_mode.h | 2 +-
> include/linux/rseq.h | 23 +++++++++++++++++------
> init/Kconfig | 2 +-
> kernel/entry/common.c | 26 +++++++++++++++++++-------
> kernel/rseq.c | 8 ++++++--
> 6 files changed, 46 insertions(+), 22 deletions(-)
>
> --- a/include/linux/irq-entry-common.h
> +++ b/include/linux/irq-entry-common.h
> @@ -197,11 +197,8 @@ static __always_inline void arch_exit_to
> */
> void arch_do_signal_or_restart(struct pt_regs *regs);
>
> -/**
> - * exit_to_user_mode_loop - do any pending work before leaving to user space
> - */
> -unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
> - unsigned long ti_work);
> +/* Handle pending TIF work */
> +unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work);
>
> /**
> * exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
> --- a/include/linux/resume_user_mode.h
> +++ b/include/linux/resume_user_mode.h
> @@ -59,7 +59,7 @@ static inline void resume_user_mode_work
> mem_cgroup_handle_over_high(GFP_KERNEL);
> blkcg_maybe_throttle_current();
>
> - rseq_handle_notify_resume(regs);
> + rseq_handle_slowpath(regs);
> }
>
> #endif /* LINUX_RESUME_USER_MODE_H */
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -5,13 +5,19 @@
> #ifdef CONFIG_RSEQ
> #include <linux/sched.h>
>
> -void __rseq_handle_notify_resume(struct pt_regs *regs);
> +void __rseq_handle_slowpath(struct pt_regs *regs);
>
> -static inline void rseq_handle_notify_resume(struct pt_regs *regs)
> +/* Invoked from resume_user_mode_work() */
> +static inline void rseq_handle_slowpath(struct pt_regs *regs)
> {
> - /* '&' is intentional to spare one conditional branch */
> - if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
> - __rseq_handle_notify_resume(regs);
> + if (IS_ENABLED(CONFIG_GENERIC_ENTRY)) {
> + if (current->rseq.event.slowpath)
> + __rseq_handle_slowpath(regs);
> + } else {
> + /* '&' is intentional to spare one conditional branch */
> + if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
Ref. to earlier comment about has_rseq check perhaps redundant.
> + __rseq_handle_slowpath(regs);
> + }
> }
>
> void __rseq_signal_deliver(int sig, struct pt_regs *regs);
> @@ -142,11 +148,16 @@ static inline void rseq_fork(struct task
> } else {
> t->rseq = current->rseq;
> t->rseq.ids.cpu_cid = ~0ULL;
As discussed earlier, do we really want to clear cpu_cid here, or
copy from parent ? If we keep the parent's cached values, I suspect
we can skip the page fault on return from fork in many cases.
> + /*
> + * If it has rseq, force it into the slow path right away
> + * because it is guaranteed to fault.
> + */
> + t->rseq.event.slowpath = t->rseq.event.has_rseq;
I think we can do better here. It's only guaranteed to fault if:
- has_rseq is set, AND
- cpu or cid has changed compared to the cached value OR
- rseq_cs user pointer is non-NULL.
Otherwise we should be able to handle the return from fork from the fast
path just with loads from the rseq area, or am I missing something ?
Thanks,
Mathieu
> }
> }
>
> #else /* CONFIG_RSEQ */
> -static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { }
> +static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
> static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
> static inline void rseq_sched_switch_event(struct task_struct *t) { }
> static inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu) { }
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1911,7 +1911,7 @@ config RSEQ_DEBUG_DEFAULT_ENABLE
> config DEBUG_RSEQ
> default n
> bool "Enable debugging of rseq() system call" if EXPERT
> - depends on RSEQ && DEBUG_KERNEL
> + depends on RSEQ && DEBUG_KERNEL && !GENERIC_ENTRY
I'm confused about this hunk. Perhaps this belongs to a different
commit ?
Thanks,
Mathieu
> select RSEQ_DEBUG_DEFAULT_ENABLE
> help
> Enable extra debugging checks for the rseq system call.
> --- a/kernel/entry/common.c
> +++ b/kernel/entry/common.c
> @@ -11,13 +11,8 @@
> /* Workaround to allow gradual conversion of architecture code */
> void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
>
> -/**
> - * exit_to_user_mode_loop - do any pending work before leaving to user space
> - * @regs: Pointer to pt_regs on entry stack
> - * @ti_work: TIF work flags as read by the caller
> - */
> -__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
> - unsigned long ti_work)
> +static __always_inline unsigned long __exit_to_user_mode_loop(struct pt_regs *regs,
> + unsigned long ti_work)
> {
> /*
> * Before returning to user space ensure that all pending work
> @@ -62,6 +57,23 @@ void __weak arch_do_signal_or_restart(st
> return ti_work;
> }
>
> +/**
> + * exit_to_user_mode_loop - do any pending work before leaving to user space
> + * @regs: Pointer to pt_regs on entry stack
> + * @ti_work: TIF work flags as read by the caller
> + */
> +__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
> + unsigned long ti_work)
> +{
> + for (;;) {
> + ti_work = __exit_to_user_mode_loop(regs, ti_work);
> +
> + if (likely(!rseq_exit_to_user_mode_restart(regs)))
> + return ti_work;
> + ti_work = read_thread_flags();
> + }
> +}
> +
> noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
> {
> irqentry_state_t ret = {
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -234,7 +234,11 @@ static bool rseq_handle_cs(struct task_s
>
> static void rseq_slowpath_update_usr(struct pt_regs *regs)
> {
> - /* Preserve rseq state and user_irq state for exit to user */
> + /*
> + * Preserve rseq state and user_irq state. The generic entry code
> + * clears user_irq on the way out, the non-generic entry
> + * architectures are not having user_irq.
> + */
> const struct rseq_event evt_mask = { .has_rseq = true, .user_irq = true, };
> struct task_struct *t = current;
> struct rseq_ids ids;
> @@ -286,7 +290,7 @@ static void rseq_slowpath_update_usr(str
> }
> }
>
> -void __rseq_handle_notify_resume(struct pt_regs *regs)
> +void __rseq_handle_slowpath(struct pt_regs *regs)
> {
> /*
> * If invoked from hypervisors before entering the guest via
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Thu, Sep 11 2025 at 10:44, Mathieu Desnoyers wrote:
> On 2025-09-08 17:32, Thomas Gleixner wrote:
>> void __rseq_signal_deliver(int sig, struct pt_regs *regs);
>> @@ -142,11 +148,16 @@ static inline void rseq_fork(struct task
>> } else {
>> t->rseq = current->rseq;
>> t->rseq.ids.cpu_cid = ~0ULL;
>
> As discussed earlier, do we really want to clear cpu_cid here, or
> copy from parent ? If we keep the parent's cached values, I suspect
Suspicion is not really a good engineering principle.
> we can skip the page fault on return from fork in many cases.
I doubt that it's many cases, see below.
>> + /*
>> + * If it has rseq, force it into the slow path right away
>> + * because it is guaranteed to fault.
>> + */
>> + t->rseq.event.slowpath = t->rseq.event.has_rseq;
>
> I think we can do better here. It's only guaranteed to fault if:
>
> - has_rseq is set, AND
> - cpu or cid has changed compared to the cached value OR
> - rseq_cs user pointer is non-NULL.
>
> Otherwise we should be able to handle the return from fork from the fast
> path just with loads from the rseq area, or am I missing something ?
The normal case is that the fault is pretty much guaranteed to happen
because the scheduler places the child on a different CPU and therefore
the CPU/MM IDs need to be updated anyway.
The only cases where this is not true, are when there is no capacity to
do so or on UP or the parent was affine to a single CPU, which is what
the child inherits.
Now for those cases the only time, where such an optimization would be
meaningful is when the child is so short lived, that it does not
schedule. Otherwise you just procrastinate the fault to a later point
in time.
The point is that it will fault in the vast majority of cases and then
the fault will happen in the fast path first, which means the exit code
has to go another round through the work loop instead of forcing the
fault right away on the first exit in the slowpath, where it can be
actually resolved.
> Thanks,
>
> Mathieu
Your method of ending a mail with a greeting and then continuing to
reply downwards is pretty annoying. I almost missed the below if I
wouldn't have tried to trim the reply.
>> }
>> }
>>
>> #else /* CONFIG_RSEQ */
>> -static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { }
>> +static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
>> static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
>> static inline void rseq_sched_switch_event(struct task_struct *t) { }
>> static inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu) { }
>> --- a/init/Kconfig
>> +++ b/init/Kconfig
>> @@ -1911,7 +1911,7 @@ config RSEQ_DEBUG_DEFAULT_ENABLE
>> config DEBUG_RSEQ
>> default n
>> bool "Enable debugging of rseq() system call" if EXPERT
>> - depends on RSEQ && DEBUG_KERNEL
>> + depends on RSEQ && DEBUG_KERNEL && !GENERIC_ENTRY
>
> I'm confused about this hunk. Perhaps this belongs to a different
> commit ?
This is attached to wiring up the fast path in GENERIC_ENTRY, because
the fastpath, which does debug when enabled through the static key, is
at the end of the exit to user work loop and therefore the extra round
through the detached debug muck can be avoided.
While on !GENERIC_ENTRY architectures the processing happens always in
the TIF_NOTIFY_RESUME slowpath, so the extra debug muck is required, no?
> Mathieu
<TRIM pointless leftovers>
Can you please trim your replies?
</>
Thanks,
tglx
On 2025-09-11 12:47, Thomas Gleixner wrote:
> On Thu, Sep 11 2025 at 10:44, Mathieu Desnoyers wrote:
>> On 2025-09-08 17:32, Thomas Gleixner wrote:
>>> void __rseq_signal_deliver(int sig, struct pt_regs *regs);
>>> @@ -142,11 +148,16 @@ static inline void rseq_fork(struct task
>>> } else {
>>> t->rseq = current->rseq;
>>> t->rseq.ids.cpu_cid = ~0ULL;
>>
>> As discussed earlier, do we really want to clear cpu_cid here, or
>> copy from parent ? If we keep the parent's cached values, I suspect
>
> Suspicion is not really a good engineering principle.
Nor is hand waving away concerns without evidence.
>
>> we can skip the page fault on return from fork in many cases.
>
> I doubt that it's many cases, see below.
>
>>> + /*
>>> + * If it has rseq, force it into the slow path right away
>>> + * because it is guaranteed to fault.
>>> + */
>>> + t->rseq.event.slowpath = t->rseq.event.has_rseq;
>>
>> I think we can do better here. It's only guaranteed to fault if:
>>
>> - has_rseq is set, AND
>> - cpu or cid has changed compared to the cached value OR
>> - rseq_cs user pointer is non-NULL.
>>
>> Otherwise we should be able to handle the return from fork from the fast
>> path just with loads from the rseq area, or am I missing something ?
>
> The normal case is that the fault is pretty much guaranteed to happen
> because the scheduler places the child on a different CPU and therefore
> the CPU/MM IDs need to be updated anyway.
Is this "normal case" the behavior you wish the scheduler to have or
is it based on metrics ?
Here is the result of a sched_fork() instrumentation vs the first
pick_next_task that follows for each thread (accounted with schedstats
see diff at the end of this email, followed by my kernel config), and we
can observe the following pattern with a parallel kernel build workload
(64-core VM, on a physical machine that has 384 logical cpus):
- When undercommitted, in which case the scheduler has idle core
targets to select (make -j32 on a 64-core VM), indeed the behavior
favors spreading the workload to different CPUs (in which case we
would hit a page fault on return from fork, as you predicted):
543 same-cpu vs 62k different-cpu.
Most of the same-cpu are on CPU 0 (402), surprisingly,
- When using all the cores (make -j64 on a 64-core VM) (and
also for overcommit scenarios tested up to -j400), the
picture differs:
27k same-cpu vs 32k different-cpu, which is pretty much even.
This is a kernel build workload, which consist mostly of
single-threaded processes (make, gcc). I therefore expect the
rseq mm_cid to be always 0 for those cases.
>
> The only cases where this is not true, are when there is no capacity to
> do so or on UP or the parent was affine to a single CPU, which is what
> the child inherits.
Wrong. See above.
Moreover, a common userspace process execution pattern is fork followed
by exec, in which case whatever page faults happen on fork become
irrelevant as soon as exec replaces the entire mapping.
>
> Now for those cases the only time, where such an optimization would be
> meaningful is when the child is so short lived,
Yes.
> that it does not
> schedule.
This is not quite right. The child can schedule as many times as it
wants without requiring stores to the rseq area, as long as it does not
migrate, nor change mm_cid value. Of course we should also consider
that it would trigger a page fault as soon as it stores to the rseq
area rseq_cs field from userspace.
> Otherwise you just procrastinate the fault to a later point
> in time.
Which may very well be never for short-lived processes and
exec-after-fork, in which case this is a win.
>
> The point is that it will fault in the vast majority of cases and then
No it won't.
> the fault will happen in the fast path first, which means the exit code
> has to go another round through the work loop instead of forcing the
> fault right away on the first exit in the slowpath, where it can be
> actually resolved.
I think you misunderstood my improvement suggestion: I do _not_
recommend that we pointlessly take the fast-path on fork, but rather
than we keep the cached cpu_cid value (from the parent) around to check
whether we actually need to store to userspace and take a page fault.
Based on this, we can choose whether we use the fast-path or go directly
to the slow-path.
>>> --- a/init/Kconfig
>>> +++ b/init/Kconfig
>>> @@ -1911,7 +1911,7 @@ config RSEQ_DEBUG_DEFAULT_ENABLE
>>> config DEBUG_RSEQ
>>> default n
>>> bool "Enable debugging of rseq() system call" if EXPERT
>>> - depends on RSEQ && DEBUG_KERNEL
>>> + depends on RSEQ && DEBUG_KERNEL && !GENERIC_ENTRY
>>
>> I'm confused about this hunk. Perhaps this belongs to a different
>> commit ?
>
> This is attached to wiring up the fast path in GENERIC_ENTRY, because
> the fastpath, which does debug when enabled through the static key, is
> at the end of the exit to user work loop and therefore the extra round
> through the detached debug muck can be avoided.
>
> While on !GENERIC_ENTRY architectures the processing happens always in
> the TIF_NOTIFY_RESUME slowpath, so the extra debug muck is required, no?
OK.
Thanks,
Mathieu
---
diff --git a/include/linux/sched.h b/include/linux/sched.h
index aa9c5be7a632..81f1106e5643 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -854,6 +854,7 @@ struct task_struct {
*/
int recent_used_cpu;
int wake_cpu;
+ int parent_cpu; /* -1 if cleared */
#endif
int on_rq;
diff --git a/kernel/fork.c b/kernel/fork.c
index 1ee8eb11f38b..98e8498cd33b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2136,6 +2136,7 @@ __latent_entropy struct task_struct *copy_process(
#endif
/* Perform scheduler related setup. Assign this task to a CPU. */
+ // XXX: does it ?
retval = sched_fork(clone_flags, p);
if (retval)
goto bad_fork_cleanup_policy;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 81c6df746df1..9c9bb92fda88 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4491,6 +4491,7 @@ int wake_up_state(struct task_struct *p, unsigned
int state)
*/
static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
{
+ p->parent_cpu = -1;
p->on_rq = 0;
p->se.on_rq = 0;
@@ -4791,6 +4792,7 @@ int sched_fork(unsigned long clone_flags, struct
task_struct *p)
#if defined(CONFIG_SMP)
p->on_cpu = 0;
#endif
+ p->parent_cpu = cpu_of(this_rq());
init_task_preempt_count(p);
#ifdef CONFIG_SMP
plist_node_init(&p->pushable_tasks, MAX_PRIO);
@@ -6128,8 +6130,10 @@ pick_next_task(struct rq *rq, struct task_struct
*prev, struct rq_flags *rf)
struct rq *rq_i;
bool need_sync;
- if (!sched_core_enabled(rq))
- return __pick_next_task(rq, prev, rf);
+ if (!sched_core_enabled(rq)) {
+ next = __pick_next_task(rq, prev, rf);
+ goto out;
+ }
cpu = cpu_of(rq);
@@ -6142,7 +6146,8 @@ pick_next_task(struct rq *rq, struct task_struct
*prev, struct rq_flags *rf)
*/
rq->core_pick = NULL;
rq->core_dl_server = NULL;
- return __pick_next_task(rq, prev, rf);
+ next = __pick_next_task(rq, prev, rf);
+ goto out;
}
/*
@@ -6343,6 +6348,16 @@ pick_next_task(struct rq *rq, struct task_struct
*prev, struct rq_flags *rf)
if (rq->core->core_forceidle_count && next == rq->idle)
queue_core_balance(rq);
+out:
+ if (next->parent_cpu >= 0) {
+ if (next->parent_cpu == cpu) {
+ schedstat_inc(this_rq()->wakeup_new_same_cpu);
+ } else {
+ schedstat_inc(this_rq()->wakeup_new_diff_cpu);
+ }
+ next->parent_cpu = -1;
+ }
+
return next;
}
@@ -6563,7 +6578,17 @@ static inline void sched_core_cpu_dying(unsigned
int cpu) {}
static struct task_struct *
pick_next_task(struct rq *rq, struct task_struct *prev, struct
rq_flags *rf)
{
- return __pick_next_task(rq, prev, rf);
+ struct task_struct *next = __pick_next_task(rq, prev, rf);
+
+ if (next->parent_cpu >= 0) {
+ if (next->parent_cpu == cpu_of(rq)) {
+ schedstat_inc(this_rq()->wakeup_new_same_cpu);
+ } else {
+ schedstat_inc(this_rq()->wakeup_new_diff_cpu);
+ }
+ next->parent_cpu = -1;
+ }
+ return next;
}
#endif /* CONFIG_SCHED_CORE */
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 557246880a7e..4e3c197c1ff8 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -990,6 +990,8 @@ do { \
P(sched_goidle);
P(ttwu_count);
P(ttwu_local);
+ P(wakeup_new_same_cpu);
+ P(wakeup_new_diff_cpu);
}
#undef P
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 83e3aa917142..aae15f83790d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1264,6 +1264,9 @@ struct rq {
/* try_to_wake_up() stats */
unsigned int ttwu_count;
unsigned int ttwu_local;
+
+ unsigned int wakeup_new_same_cpu;
+ unsigned int wakeup_new_diff_cpu;
#endif
#ifdef CONFIG_CPU_IDLE
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index 4346fd81c31f..6c1929cece77 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -123,12 +123,13 @@ static int show_schedstat(struct seq_file *seq,
void *v)
/* runqueue-specific stats */
seq_printf(seq,
- "cpu%d %u 0 %u %u %u %u %llu %llu %lu",
+ "cpu%d %u 0 %u %u %u %u %llu %llu %lu %u %u",
cpu, rq->yld_count,
rq->sched_count, rq->sched_goidle,
rq->ttwu_count, rq->ttwu_local,
rq->rq_cpu_time,
- rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
+ rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount,
+ rq->wakeup_new_same_cpu, rq->wakeup_new_diff_cpu);
seq_printf(seq, "\n");
.config
------------------------
#
# Automatically generated file; DO NOT EDIT.
# Linux/x86 6.16.7 Kernel Configuration
#
CONFIG_CC_VERSION_TEXT="gcc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0"
CONFIG_CC_IS_GCC=y
CONFIG_GCC_VERSION=130300
CONFIG_CLANG_VERSION=0
CONFIG_AS_IS_GNU=y
CONFIG_AS_VERSION=24200
CONFIG_LD_IS_BFD=y
CONFIG_LD_VERSION=24200
CONFIG_LLD_VERSION=0
CONFIG_RUSTC_VERSION=0
CONFIG_RUSTC_LLVM_VERSION=0
CONFIG_CC_CAN_LINK=y
CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y
CONFIG_TOOLS_SUPPORT_RELR=y
CONFIG_CC_HAS_ASM_INLINE=y
CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y
CONFIG_LD_CAN_USE_KEEP_IN_OVERLAY=y
CONFIG_PAHOLE_VERSION=125
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_TABLE_SORT=y
CONFIG_THREAD_INFO_IN_TASK=y
#
# General setup
#
CONFIG_INIT_ENV_ARG_LIMIT=32
# CONFIG_COMPILE_TEST is not set
# CONFIG_WERROR is not set
CONFIG_LOCALVERSION=""
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_BUILD_SALT=""
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_HAVE_KERNEL_XZ=y
CONFIG_HAVE_KERNEL_LZO=y
CONFIG_HAVE_KERNEL_LZ4=y
CONFIG_HAVE_KERNEL_ZSTD=y
CONFIG_KERNEL_GZIP=y
# CONFIG_KERNEL_BZIP2 is not set
# CONFIG_KERNEL_LZMA is not set
# CONFIG_KERNEL_XZ is not set
# CONFIG_KERNEL_LZO is not set
# CONFIG_KERNEL_LZ4 is not set
# CONFIG_KERNEL_ZSTD is not set
CONFIG_DEFAULT_INIT=""
CONFIG_DEFAULT_HOSTNAME="(none)"
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_SYSVIPC_COMPAT=y
CONFIG_POSIX_MQUEUE=y
CONFIG_POSIX_MQUEUE_SYSCTL=y
# CONFIG_WATCH_QUEUE is not set
CONFIG_CROSS_MEMORY_ATTACH=y
CONFIG_AUDIT=y
CONFIG_HAVE_ARCH_AUDITSYSCALL=y
CONFIG_AUDITSYSCALL=y
#
# IRQ subsystem
#
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_IRQ_SHOW=y
CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_GENERIC_IRQ_MIGRATION=y
CONFIG_HARDIRQS_SW_RESEND=y
CONFIG_IRQ_DOMAIN=y
CONFIG_IRQ_DOMAIN_HIERARCHY=y
CONFIG_GENERIC_MSI_IRQ=y
CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y
CONFIG_GENERIC_IRQ_RESERVATION_MODE=y
CONFIG_IRQ_FORCED_THREADING=y
CONFIG_SPARSE_IRQ=y
# CONFIG_GENERIC_IRQ_DEBUGFS is not set
# end of IRQ subsystem
CONFIG_CLOCKSOURCE_WATCHDOG=y
CONFIG_ARCH_CLOCKSOURCE_INIT=y
CONFIG_GENERIC_TIME_VSYSCALL=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST_IDLE=y
CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
CONFIG_GENERIC_CMOS_UPDATE=y
CONFIG_HAVE_POSIX_CPU_TIMERS_TASK_WORK=y
CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y
CONFIG_CONTEXT_TRACKING=y
CONFIG_CONTEXT_TRACKING_IDLE=y
#
# Timers subsystem
#
CONFIG_TICK_ONESHOT=y
CONFIG_NO_HZ_COMMON=y
# CONFIG_HZ_PERIODIC is not set
# CONFIG_NO_HZ_IDLE is not set
CONFIG_NO_HZ_FULL=y
CONFIG_CONTEXT_TRACKING_USER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US=100
# end of Timers subsystem
CONFIG_BPF=y
CONFIG_HAVE_EBPF_JIT=y
CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
#
# BPF subsystem
#
CONFIG_BPF_SYSCALL=y
CONFIG_BPF_JIT=y
# CONFIG_BPF_JIT_ALWAYS_ON is not set
CONFIG_BPF_JIT_DEFAULT_ON=y
# CONFIG_BPF_UNPRIV_DEFAULT_OFF is not set
# CONFIG_BPF_PRELOAD is not set
# CONFIG_BPF_LSM is not set
# end of BPF subsystem
CONFIG_PREEMPT_BUILD=y
CONFIG_ARCH_HAS_PREEMPT_LAZY=y
# CONFIG_PREEMPT_NONE is not set
CONFIG_PREEMPT_VOLUNTARY=y
# CONFIG_PREEMPT is not set
# CONFIG_PREEMPT_LAZY is not set
# CONFIG_PREEMPT_RT is not set
CONFIG_PREEMPT_COUNT=y
CONFIG_PREEMPTION=y
CONFIG_PREEMPT_DYNAMIC=y
# CONFIG_SCHED_CORE is not set
#
# CPU/Task time and stats accounting
#
CONFIG_VIRT_CPU_ACCOUNTING=y
CONFIG_VIRT_CPU_ACCOUNTING_GEN=y
# CONFIG_IRQ_TIME_ACCOUNTING is not set
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
# CONFIG_PSI is not set
# end of CPU/Task time and stats accounting
CONFIG_CPU_ISOLATION=y
#
# RCU Subsystem
#
CONFIG_TREE_RCU=y
CONFIG_PREEMPT_RCU=y
# CONFIG_RCU_EXPERT is not set
CONFIG_TREE_SRCU=y
CONFIG_TASKS_RCU_GENERIC=y
CONFIG_NEED_TASKS_RCU=y
CONFIG_TASKS_RCU=y
CONFIG_TASKS_RUDE_RCU=y
CONFIG_TASKS_TRACE_RCU=y
CONFIG_RCU_STALL_COMMON=y
CONFIG_RCU_NEED_SEGCBLIST=y
CONFIG_RCU_NOCB_CPU=y
# CONFIG_RCU_NOCB_CPU_DEFAULT_ALL is not set
# CONFIG_RCU_LAZY is not set
# end of RCU Subsystem
CONFIG_IKCONFIG=m
# CONFIG_IKCONFIG_PROC is not set
# CONFIG_IKHEADERS is not set
CONFIG_LOG_BUF_SHIFT=18
CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
# CONFIG_PRINTK_INDEX is not set
CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
#
# Scheduler features
#
# CONFIG_UCLAMP_TASK is not set
# end of Scheduler features
CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
CONFIG_CC_HAS_INT128=y
CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough=5"
CONFIG_GCC10_NO_ARRAY_BOUNDS=y
CONFIG_CC_NO_ARRAY_BOUNDS=y
CONFIG_GCC_NO_STRINGOP_OVERFLOW=y
CONFIG_CC_NO_STRINGOP_OVERFLOW=y
CONFIG_ARCH_SUPPORTS_INT128=y
CONFIG_NUMA_BALANCING=y
CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
CONFIG_SLAB_OBJ_EXT=y
CONFIG_CGROUPS=y
CONFIG_PAGE_COUNTER=y
# CONFIG_CGROUP_FAVOR_DYNMODS is not set
CONFIG_MEMCG=y
# CONFIG_MEMCG_V1 is not set
CONFIG_BLK_CGROUP=y
CONFIG_CGROUP_WRITEBACK=y
CONFIG_CGROUP_SCHED=y
CONFIG_GROUP_SCHED_WEIGHT=y
CONFIG_FAIR_GROUP_SCHED=y
CONFIG_CFS_BANDWIDTH=y
# CONFIG_RT_GROUP_SCHED is not set
CONFIG_SCHED_MM_CID=y
# CONFIG_CGROUP_PIDS is not set
# CONFIG_CGROUP_RDMA is not set
# CONFIG_CGROUP_DMEM is not set
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_HUGETLB=y
CONFIG_CPUSETS=y
# CONFIG_CPUSETS_V1 is not set
CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_PERF=y
CONFIG_CGROUP_BPF=y
# CONFIG_CGROUP_MISC is not set
# CONFIG_CGROUP_DEBUG is not set
CONFIG_SOCK_CGROUP_DATA=y
CONFIG_NAMESPACES=y
CONFIG_UTS_NS=y
CONFIG_TIME_NS=y
CONFIG_IPC_NS=y
CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_NET_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
CONFIG_RD_GZIP=y
CONFIG_RD_BZIP2=y
CONFIG_RD_LZMA=y
CONFIG_RD_XZ=y
CONFIG_RD_LZO=y
CONFIG_RD_LZ4=y
CONFIG_RD_ZSTD=y
# CONFIG_BOOT_CONFIG is not set
CONFIG_INITRAMFS_PRESERVE_MTIME=y
CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_LD_ORPHAN_WARN=y
CONFIG_LD_ORPHAN_WARN_LEVEL="warn"
CONFIG_SYSCTL=y
CONFIG_HAVE_UID16=y
CONFIG_SYSCTL_EXCEPTION_TRACE=y
CONFIG_SYSFS_SYSCALL=y
CONFIG_HAVE_PCSPKR_PLATFORM=y
CONFIG_EXPERT=y
CONFIG_UID16=y
CONFIG_MULTIUSER=y
CONFIG_SGETMASK_SYSCALL=y
CONFIG_FHANDLE=y
CONFIG_POSIX_TIMERS=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_PCSPKR_PLATFORM=y
# CONFIG_BASE_SMALL is not set
CONFIG_FUTEX=y
CONFIG_FUTEX_PI=y
CONFIG_FUTEX_MPOL=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
CONFIG_AIO=y
CONFIG_IO_URING=y
CONFIG_ADVISE_SYSCALLS=y
CONFIG_MEMBARRIER=y
CONFIG_KCMP=y
CONFIG_RSEQ=y
CONFIG_DEBUG_RSEQ=y
CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_PC104 is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_SELFTEST is not set
CONFIG_KALLSYMS_ALL=y
CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y
CONFIG_ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS=y
CONFIG_HAVE_PERF_EVENTS=y
CONFIG_GUEST_PERF_EVENTS=y
#
# Kernel Performance Events And Counters
#
CONFIG_PERF_EVENTS=y
# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
# end of Kernel Performance Events And Counters
CONFIG_SYSTEM_DATA_VERIFICATION=y
CONFIG_PROFILING=y
CONFIG_TRACEPOINTS=y
#
# Kexec and crash features
#
CONFIG_CRASH_RESERVE=y
CONFIG_VMCORE_INFO=y
CONFIG_KEXEC_CORE=y
CONFIG_KEXEC=y
# CONFIG_KEXEC_FILE is not set
CONFIG_KEXEC_JUMP=y
# CONFIG_KEXEC_HANDOVER is not set
CONFIG_CRASH_DUMP=y
CONFIG_CRASH_HOTPLUG=y
CONFIG_CRASH_MAX_MEMORY_RANGES=8192
# end of Kexec and crash features
# end of General setup
CONFIG_64BIT=y
CONFIG_X86_64=y
CONFIG_X86=y
CONFIG_INSTRUCTION_DECODER=y
CONFIG_OUTPUT_FORMAT="elf64-x86-64"
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_MMU=y
CONFIG_ARCH_MMAP_RND_BITS_MIN=28
CONFIG_ARCH_MMAP_RND_BITS_MAX=32
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_AUDIT_ARCH=y
CONFIG_HAVE_INTEL_TXT=y
CONFIG_X86_64_SMP=y
CONFIG_ARCH_SUPPORTS_UPROBES=y
CONFIG_FIX_EARLYCON_MEM=y
CONFIG_PGTABLE_LEVELS=5
#
# Processor type and features
#
CONFIG_SMP=y
CONFIG_X86_X2APIC=y
# CONFIG_X86_POSTED_MSI is not set
CONFIG_X86_MPPARSE=y
# CONFIG_X86_CPU_RESCTRL is not set
# CONFIG_X86_FRED is not set
CONFIG_X86_EXTENDED_PLATFORM=y
CONFIG_X86_NUMACHIP=y
# CONFIG_X86_VSMP is not set
# CONFIG_X86_UV is not set
# CONFIG_X86_INTEL_MID is not set
# CONFIG_X86_GOLDFISH is not set
CONFIG_X86_INTEL_LPSS=y
# CONFIG_X86_AMD_PLATFORM_DEVICE is not set
CONFIG_IOSF_MBI=y
# CONFIG_IOSF_MBI_DEBUG is not set
CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y
CONFIG_SCHED_OMIT_FRAME_POINTER=y
CONFIG_HYPERVISOR_GUEST=y
CONFIG_PARAVIRT=y
CONFIG_PARAVIRT_XXL=y
# CONFIG_PARAVIRT_DEBUG is not set
CONFIG_PARAVIRT_SPINLOCKS=y
CONFIG_X86_HV_CALLBACK_VECTOR=y
CONFIG_XEN=y
CONFIG_XEN_PV=y
CONFIG_XEN_512GB=y
CONFIG_XEN_PV_SMP=y
CONFIG_XEN_PV_DOM0=y
CONFIG_XEN_PVHVM=y
CONFIG_XEN_PVHVM_SMP=y
CONFIG_XEN_PVHVM_GUEST=y
CONFIG_XEN_SAVE_RESTORE=y
# CONFIG_XEN_DEBUG_FS is not set
# CONFIG_XEN_PVH is not set
CONFIG_XEN_DOM0=y
CONFIG_XEN_PV_MSR_SAFE=y
CONFIG_KVM_GUEST=y
CONFIG_ARCH_CPUIDLE_HALTPOLL=y
# CONFIG_PVH is not set
# CONFIG_PARAVIRT_TIME_ACCOUNTING is not set
CONFIG_PARAVIRT_CLOCK=y
# CONFIG_JAILHOUSE_GUEST is not set
# CONFIG_ACRN_GUEST is not set
# CONFIG_INTEL_TDX_GUEST is not set
CONFIG_CC_HAS_MARCH_NATIVE=y
# CONFIG_X86_NATIVE_CPU is not set
CONFIG_X86_INTERNODE_CACHE_SHIFT=6
CONFIG_X86_L1_CACHE_SHIFT=6
CONFIG_X86_TSC=y
CONFIG_X86_HAVE_PAE=y
CONFIG_X86_CX8=y
CONFIG_X86_CMOV=y
CONFIG_X86_MINIMUM_CPU_FAMILY=64
CONFIG_X86_DEBUGCTLMSR=y
CONFIG_IA32_FEAT_CTL=y
CONFIG_X86_VMX_FEATURE_NAMES=y
CONFIG_PROCESSOR_SELECT=y
CONFIG_BROADCAST_TLB_FLUSH=y
CONFIG_CPU_SUP_INTEL=y
CONFIG_CPU_SUP_AMD=y
CONFIG_CPU_SUP_HYGON=y
CONFIG_CPU_SUP_CENTAUR=y
CONFIG_CPU_SUP_ZHAOXIN=y
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
CONFIG_DMI=y
CONFIG_GART_IOMMU=y
CONFIG_BOOT_VESA_SUPPORT=y
# CONFIG_MAXSMP is not set
CONFIG_NR_CPUS_RANGE_BEGIN=2
CONFIG_NR_CPUS_RANGE_END=512
CONFIG_NR_CPUS_DEFAULT=64
CONFIG_NR_CPUS=256
CONFIG_SCHED_CLUSTER=y
CONFIG_SCHED_SMT=y
CONFIG_SCHED_MC=y
CONFIG_SCHED_MC_PRIO=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_ACPI_MADT_WAKEUP=y
CONFIG_X86_IO_APIC=y
CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
CONFIG_X86_MCE=y
# CONFIG_X86_MCELOG_LEGACY is not set
CONFIG_X86_MCE_INTEL=y
CONFIG_X86_MCE_AMD=y
CONFIG_X86_MCE_THRESHOLD=y
# CONFIG_X86_MCE_INJECT is not set
#
# Performance monitoring
#
CONFIG_PERF_EVENTS_INTEL_UNCORE=y
CONFIG_PERF_EVENTS_INTEL_RAPL=y
CONFIG_PERF_EVENTS_INTEL_CSTATE=y
# CONFIG_PERF_EVENTS_AMD_POWER is not set
CONFIG_PERF_EVENTS_AMD_UNCORE=y
# CONFIG_PERF_EVENTS_AMD_BRS is not set
# end of Performance monitoring
CONFIG_X86_16BIT=y
CONFIG_X86_ESPFIX64=y
CONFIG_X86_VSYSCALL_EMULATION=y
CONFIG_X86_IOPL_IOPERM=y
CONFIG_MICROCODE=y
# CONFIG_MICROCODE_LATE_LOADING is not set
# CONFIG_X86_MSR is not set
# CONFIG_X86_CPUID is not set
CONFIG_X86_DIRECT_GBPAGES=y
# CONFIG_X86_CPA_STATISTICS is not set
# CONFIG_AMD_MEM_ENCRYPT is not set
CONFIG_NUMA=y
CONFIG_AMD_NUMA=y
CONFIG_X86_64_ACPI_NUMA=y
CONFIG_NODES_SHIFT=6
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SPARSEMEM_DEFAULT=y
CONFIG_ARCH_MEMORY_PROBE=y
CONFIG_ARCH_PROC_KCORE_TEXT=y
CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
# CONFIG_X86_PMEM_LEGACY is not set
CONFIG_X86_CHECK_BIOS_CORRUPTION=y
CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
CONFIG_MTRR=y
CONFIG_MTRR_SANITIZER=y
CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=1
CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1
CONFIG_X86_PAT=y
CONFIG_X86_UMIP=y
CONFIG_CC_HAS_IBT=y
# CONFIG_X86_KERNEL_IBT is not set
CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y
CONFIG_ARCH_PKEY_BITS=4
CONFIG_X86_INTEL_TSX_MODE_OFF=y
# CONFIG_X86_INTEL_TSX_MODE_ON is not set
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
# CONFIG_X86_SGX is not set
# CONFIG_X86_USER_SHADOW_STACK is not set
CONFIG_EFI=y
CONFIG_EFI_STUB=y
CONFIG_EFI_HANDOVER_PROTOCOL=y
# CONFIG_EFI_MIXED is not set
CONFIG_EFI_RUNTIME_MAP=y
# CONFIG_HZ_100 is not set
CONFIG_HZ_250=y
# CONFIG_HZ_300 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=250
CONFIG_SCHED_HRTICK=y
CONFIG_ARCH_SUPPORTS_KEXEC=y
CONFIG_ARCH_SUPPORTS_KEXEC_FILE=y
CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY=y
CONFIG_ARCH_SUPPORTS_KEXEC_SIG=y
CONFIG_ARCH_SUPPORTS_KEXEC_SIG_FORCE=y
CONFIG_ARCH_SUPPORTS_KEXEC_BZIMAGE_VERIFY_SIG=y
CONFIG_ARCH_SUPPORTS_KEXEC_JUMP=y
CONFIG_ARCH_SUPPORTS_KEXEC_HANDOVER=y
CONFIG_ARCH_SUPPORTS_CRASH_DUMP=y
CONFIG_ARCH_DEFAULT_CRASH_DUMP=y
CONFIG_ARCH_SUPPORTS_CRASH_HOTPLUG=y
CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION=y
CONFIG_PHYSICAL_START=0x1000000
CONFIG_RELOCATABLE=y
CONFIG_RANDOMIZE_BASE=y
CONFIG_X86_NEED_RELOCS=y
CONFIG_PHYSICAL_ALIGN=0x1000000
CONFIG_RANDOMIZE_MEMORY=y
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0xa
CONFIG_HOTPLUG_CPU=y
# CONFIG_COMPAT_VDSO is not set
CONFIG_LEGACY_VSYSCALL_XONLY=y
# CONFIG_LEGACY_VSYSCALL_NONE is not set
# CONFIG_CMDLINE_BOOL is not set
CONFIG_MODIFY_LDT_SYSCALL=y
# CONFIG_STRICT_SIGALTSTACK_SIZE is not set
CONFIG_HAVE_LIVEPATCH=y
# CONFIG_LIVEPATCH is not set
CONFIG_X86_BUS_LOCK_DETECT=y
# end of Processor type and features
CONFIG_CC_HAS_NAMED_AS=y
CONFIG_CC_HAS_NAMED_AS_FIXED_SANITIZERS=y
CONFIG_USE_X86_SEG_SUPPORT=y
CONFIG_CC_HAS_SLS=y
CONFIG_CC_HAS_RETURN_THUNK=y
CONFIG_CC_HAS_ENTRY_PADDING=y
CONFIG_FUNCTION_PADDING_CFI=11
CONFIG_FUNCTION_PADDING_BYTES=16
CONFIG_CALL_PADDING=y
CONFIG_HAVE_CALL_THUNKS=y
CONFIG_CALL_THUNKS=y
CONFIG_PREFIX_SYMBOLS=y
CONFIG_CPU_MITIGATIONS=y
CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=y
CONFIG_MITIGATION_RETPOLINE=y
CONFIG_MITIGATION_RETHUNK=y
CONFIG_MITIGATION_UNRET_ENTRY=y
CONFIG_MITIGATION_CALL_DEPTH_TRACKING=y
# CONFIG_CALL_THUNKS_DEBUG is not set
CONFIG_MITIGATION_IBPB_ENTRY=y
CONFIG_MITIGATION_IBRS_ENTRY=y
CONFIG_MITIGATION_SRSO=y
# CONFIG_MITIGATION_SLS is not set
CONFIG_MITIGATION_GDS=y
CONFIG_MITIGATION_RFDS=y
CONFIG_MITIGATION_SPECTRE_BHI=y
CONFIG_MITIGATION_MDS=y
CONFIG_MITIGATION_TAA=y
CONFIG_MITIGATION_MMIO_STALE_DATA=y
CONFIG_MITIGATION_L1TF=y
CONFIG_MITIGATION_RETBLEED=y
CONFIG_MITIGATION_SPECTRE_V1=y
CONFIG_MITIGATION_SPECTRE_V2=y
CONFIG_MITIGATION_SRBDS=y
CONFIG_MITIGATION_SSB=y
CONFIG_MITIGATION_ITS=y
CONFIG_MITIGATION_TSA=y
CONFIG_ARCH_HAS_ADD_PAGES=y
#
# Power management and ACPI options
#
CONFIG_ARCH_HIBERNATION_HEADER=y
CONFIG_SUSPEND=y
CONFIG_SUSPEND_FREEZER=y
# CONFIG_SUSPEND_SKIP_SYNC is not set
CONFIG_HIBERNATE_CALLBACKS=y
CONFIG_HIBERNATION=y
CONFIG_HIBERNATION_SNAPSHOT_DEV=y
CONFIG_HIBERNATION_COMP_LZO=y
CONFIG_HIBERNATION_DEF_COMP="lzo"
CONFIG_PM_STD_PARTITION=""
CONFIG_PM_SLEEP=y
CONFIG_PM_SLEEP_SMP=y
# CONFIG_PM_AUTOSLEEP is not set
# CONFIG_PM_USERSPACE_AUTOSLEEP is not set
# CONFIG_PM_WAKELOCKS is not set
CONFIG_PM=y
CONFIG_PM_DEBUG=y
CONFIG_PM_ADVANCED_DEBUG=y
# CONFIG_PM_TEST_SUSPEND is not set
CONFIG_PM_SLEEP_DEBUG=y
# CONFIG_DPM_WATCHDOG is not set
CONFIG_PM_TRACE=y
CONFIG_PM_TRACE_RTC=y
CONFIG_PM_CLK=y
CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
CONFIG_ENERGY_MODEL=y
CONFIG_ARCH_SUPPORTS_ACPI=y
CONFIG_ACPI=y
CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y
CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y
CONFIG_ACPI_THERMAL_LIB=y
# CONFIG_ACPI_DEBUGGER is not set
CONFIG_ACPI_SPCR_TABLE=y
# CONFIG_ACPI_FPDT is not set
CONFIG_ACPI_LPIT=y
CONFIG_ACPI_SLEEP=y
CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y
CONFIG_ACPI_EC=y
# CONFIG_ACPI_EC_DEBUGFS is not set
CONFIG_ACPI_AC=y
CONFIG_ACPI_BATTERY=y
CONFIG_ACPI_BUTTON=y
# CONFIG_ACPI_VIDEO is not set
CONFIG_ACPI_FAN=y
# CONFIG_ACPI_TAD is not set
CONFIG_ACPI_DOCK=y
CONFIG_ACPI_CPU_FREQ_PSS=y
CONFIG_ACPI_PROCESSOR_CSTATE=y
CONFIG_ACPI_PROCESSOR_IDLE=y
CONFIG_ACPI_CPPC_LIB=y
CONFIG_ACPI_PROCESSOR=y
# CONFIG_ACPI_IPMI is not set
CONFIG_ACPI_HOTPLUG_CPU=y
CONFIG_ACPI_PROCESSOR_AGGREGATOR=y
CONFIG_ACPI_THERMAL=y
CONFIG_ACPI_CUSTOM_DSDT_FILE=""
CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y
CONFIG_ACPI_TABLE_UPGRADE=y
# CONFIG_ACPI_DEBUG is not set
CONFIG_ACPI_PCI_SLOT=y
CONFIG_ACPI_CONTAINER=y
CONFIG_ACPI_HOTPLUG_MEMORY=y
CONFIG_ACPI_HOTPLUG_IOAPIC=y
# CONFIG_ACPI_SBS is not set
CONFIG_ACPI_HED=y
CONFIG_ACPI_BGRT=y
# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set
# CONFIG_ACPI_NFIT is not set
CONFIG_ACPI_NUMA=y
# CONFIG_ACPI_HMAT is not set
CONFIG_HAVE_ACPI_APEI=y
CONFIG_HAVE_ACPI_APEI_NMI=y
CONFIG_ACPI_APEI=y
CONFIG_ACPI_APEI_GHES=y
CONFIG_ACPI_APEI_PCIEAER=y
CONFIG_ACPI_APEI_MEMORY_FAILURE=y
# CONFIG_ACPI_APEI_EINJ is not set
# CONFIG_ACPI_APEI_ERST_DEBUG is not set
# CONFIG_ACPI_DPTF is not set
# CONFIG_ACPI_EXTLOG is not set
# CONFIG_ACPI_CONFIGFS is not set
# CONFIG_ACPI_PFRUT is not set
CONFIG_ACPI_PCC=y
# CONFIG_ACPI_FFH is not set
CONFIG_ACPI_MRRM=y
# CONFIG_PMIC_OPREGION is not set
CONFIG_ACPI_PRMT=y
CONFIG_X86_PM_TIMER=y
#
# CPU Frequency scaling
#
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_GOV_ATTR_SET=y
CONFIG_CPU_FREQ_GOV_COMMON=y
CONFIG_CPU_FREQ_STAT=y
CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
#
# CPU frequency scaling drivers
#
CONFIG_X86_INTEL_PSTATE=y
CONFIG_X86_PCC_CPUFREQ=y
CONFIG_X86_AMD_PSTATE=y
CONFIG_X86_AMD_PSTATE_DEFAULT_MODE=3
# CONFIG_X86_AMD_PSTATE_UT is not set
CONFIG_X86_ACPI_CPUFREQ=y
CONFIG_X86_ACPI_CPUFREQ_CPB=y
CONFIG_X86_POWERNOW_K8=y
# CONFIG_X86_AMD_FREQ_SENSITIVITY is not set
CONFIG_X86_SPEEDSTEP_CENTRINO=y
# CONFIG_X86_P4_CLOCKMOD is not set
#
# shared options
#
CONFIG_CPUFREQ_ARCH_CUR_FREQ=y
# end of CPU Frequency scaling
#
# CPU Idle
#
CONFIG_CPU_IDLE=y
CONFIG_CPU_IDLE_GOV_LADDER=y
CONFIG_CPU_IDLE_GOV_MENU=y
# CONFIG_CPU_IDLE_GOV_TEO is not set
CONFIG_CPU_IDLE_GOV_HALTPOLL=y
CONFIG_HALTPOLL_CPUIDLE=y
# end of CPU Idle
CONFIG_INTEL_IDLE=y
# end of Power management and ACPI options
#
# Bus options (PCI etc.)
#
CONFIG_PCI_DIRECT=y
CONFIG_PCI_MMCONFIG=y
CONFIG_PCI_XEN=y
CONFIG_MMCONF_FAM10H=y
# CONFIG_ISA_BUS is not set
CONFIG_ISA_DMA_API=y
CONFIG_AMD_NB=y
CONFIG_AMD_NODE=y
# end of Bus options (PCI etc.)
#
# Binary Emulations
#
CONFIG_IA32_EMULATION=y
# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
CONFIG_X86_X32_ABI=y
CONFIG_COMPAT_32=y
CONFIG_COMPAT=y
CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
# end of Binary Emulations
CONFIG_VIRTUALIZATION=y
# CONFIG_KVM is not set
CONFIG_X86_REQUIRED_FEATURE_ALWAYS=y
CONFIG_X86_REQUIRED_FEATURE_NOPL=y
CONFIG_X86_REQUIRED_FEATURE_CX8=y
CONFIG_X86_REQUIRED_FEATURE_CMOV=y
CONFIG_X86_REQUIRED_FEATURE_CPUID=y
CONFIG_X86_REQUIRED_FEATURE_FPU=y
CONFIG_X86_REQUIRED_FEATURE_PAE=y
CONFIG_X86_REQUIRED_FEATURE_MSR=y
CONFIG_X86_REQUIRED_FEATURE_FXSR=y
CONFIG_X86_REQUIRED_FEATURE_XMM=y
CONFIG_X86_REQUIRED_FEATURE_XMM2=y
CONFIG_X86_REQUIRED_FEATURE_LM=y
CONFIG_X86_DISABLED_FEATURE_VME=y
CONFIG_X86_DISABLED_FEATURE_K6_MTRR=y
CONFIG_X86_DISABLED_FEATURE_CYRIX_ARR=y
CONFIG_X86_DISABLED_FEATURE_CENTAUR_MCR=y
CONFIG_X86_DISABLED_FEATURE_LAM=y
CONFIG_X86_DISABLED_FEATURE_ENQCMD=y
CONFIG_X86_DISABLED_FEATURE_SGX=y
CONFIG_X86_DISABLED_FEATURE_TDX_GUEST=y
CONFIG_X86_DISABLED_FEATURE_USER_SHSTK=y
CONFIG_X86_DISABLED_FEATURE_IBT=y
CONFIG_X86_DISABLED_FEATURE_FRED=y
CONFIG_X86_DISABLED_FEATURE_SEV_SNP=y
CONFIG_AS_AVX512=y
CONFIG_AS_GFNI=y
CONFIG_AS_VAES=y
CONFIG_AS_VPCLMULQDQ=y
CONFIG_AS_WRUSS=y
CONFIG_ARCH_CONFIGURES_CPU_MITIGATIONS=y
CONFIG_ARCH_HAS_DMA_OPS=y
#
# General architecture-dependent options
#
CONFIG_HOTPLUG_SMT=y
CONFIG_HOTPLUG_CORE_SYNC=y
CONFIG_HOTPLUG_CORE_SYNC_DEAD=y
CONFIG_HOTPLUG_CORE_SYNC_FULL=y
CONFIG_HOTPLUG_SPLIT_STARTUP=y
CONFIG_HOTPLUG_PARALLEL=y
CONFIG_GENERIC_ENTRY=y
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
# CONFIG_STATIC_KEYS_SELFTEST is not set
# CONFIG_STATIC_CALL_SELFTEST is not set
CONFIG_OPTPROBES=y
CONFIG_KPROBES_ON_FTRACE=y
CONFIG_UPROBES=y
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
CONFIG_ARCH_USE_BUILTIN_BSWAP=y
CONFIG_KRETPROBES=y
CONFIG_KRETPROBE_ON_RETHOOK=y
CONFIG_HAVE_IOREMAP_PROT=y
CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
CONFIG_HAVE_OPTPROBES=y
CONFIG_HAVE_KPROBES_ON_FTRACE=y
CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE=y
CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
CONFIG_HAVE_NMI=y
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y
CONFIG_HAVE_ARCH_TRACEHOOK=y
CONFIG_HAVE_DMA_CONTIGUOUS=y
CONFIG_GENERIC_SMP_IDLE_THREAD=y
CONFIG_ARCH_HAS_FORTIFY_SOURCE=y
CONFIG_ARCH_HAS_SET_MEMORY=y
CONFIG_ARCH_HAS_SET_DIRECT_MAP=y
CONFIG_ARCH_HAS_CPU_FINALIZE_INIT=y
CONFIG_ARCH_HAS_CPU_PASID=y
CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y
CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y
CONFIG_ARCH_WANTS_NO_INSTR=y
CONFIG_HAVE_ASM_MODVERSIONS=y
CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
CONFIG_HAVE_RSEQ=y
CONFIG_HAVE_RUST=y
CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y
CONFIG_HAVE_HW_BREAKPOINT=y
CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y
CONFIG_HAVE_USER_RETURN_NOTIFIER=y
CONFIG_HAVE_PERF_EVENTS_NMI=y
CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y
CONFIG_HAVE_PERF_REGS=y
CONFIG_HAVE_PERF_USER_STACK_DUMP=y
CONFIG_HAVE_ARCH_JUMP_LABEL=y
CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y
CONFIG_MMU_GATHER_TABLE_FREE=y
CONFIG_MMU_GATHER_RCU_TABLE_FREE=y
CONFIG_MMU_GATHER_MERGE_VMAS=y
CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM=y
CONFIG_MMU_LAZY_TLB_REFCOUNT=y
CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
CONFIG_ARCH_HAVE_EXTRA_ELF_NOTES=y
CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS=y
CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
CONFIG_HAVE_CMPXCHG_LOCAL=y
CONFIG_HAVE_CMPXCHG_DOUBLE=y
CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y
CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y
CONFIG_HAVE_ARCH_SECCOMP=y
CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
CONFIG_SECCOMP=y
CONFIG_SECCOMP_FILTER=y
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_HAVE_ARCH_STACKLEAK=y
CONFIG_HAVE_STACKPROTECTOR=y
CONFIG_STACKPROTECTOR=y
CONFIG_STACKPROTECTOR_STRONG=y
CONFIG_ARCH_SUPPORTS_LTO_CLANG=y
CONFIG_ARCH_SUPPORTS_LTO_CLANG_THIN=y
CONFIG_LTO_NONE=y
CONFIG_ARCH_SUPPORTS_AUTOFDO_CLANG=y
CONFIG_ARCH_SUPPORTS_PROPELLER_CLANG=y
CONFIG_ARCH_SUPPORTS_CFI_CLANG=y
CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y
CONFIG_HAVE_CONTEXT_TRACKING_USER=y
CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK=y
CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
CONFIG_HAVE_MOVE_PUD=y
CONFIG_HAVE_MOVE_PMD=y
CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y
CONFIG_HAVE_ARCH_HUGE_VMAP=y
CONFIG_HAVE_ARCH_HUGE_VMALLOC=y
CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
CONFIG_ARCH_WANT_PMD_MKWRITE=y
CONFIG_HAVE_ARCH_SOFT_DIRTY=y
CONFIG_HAVE_MOD_ARCH_SPECIFIC=y
CONFIG_MODULES_USE_ELF_RELA=y
CONFIG_ARCH_HAS_EXECMEM_ROX=y
CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y
CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y
CONFIG_SOFTIRQ_ON_OWN_STACK=y
CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
CONFIG_HAVE_ARCH_MMAP_RND_BITS=y
CONFIG_HAVE_EXIT_THREAD=y
CONFIG_ARCH_MMAP_RND_BITS=28
CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y
CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y
CONFIG_HAVE_PAGE_SIZE_4KB=y
CONFIG_PAGE_SIZE_4KB=y
CONFIG_PAGE_SIZE_LESS_THAN_64KB=y
CONFIG_PAGE_SIZE_LESS_THAN_256KB=y
CONFIG_PAGE_SHIFT=12
CONFIG_HAVE_OBJTOOL=y
CONFIG_HAVE_JUMP_LABEL_HACK=y
CONFIG_HAVE_NOINSTR_HACK=y
CONFIG_HAVE_NOINSTR_VALIDATION=y
CONFIG_HAVE_UACCESS_VALIDATION=y
CONFIG_HAVE_STACK_VALIDATION=y
CONFIG_HAVE_RELIABLE_STACKTRACE=y
CONFIG_OLD_SIGSUSPEND3=y
CONFIG_COMPAT_OLD_SIGACTION=y
CONFIG_COMPAT_32BIT_TIME=y
CONFIG_ARCH_SUPPORTS_RT=y
CONFIG_HAVE_ARCH_VMAP_STACK=y
CONFIG_VMAP_STACK=y
CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET=y
CONFIG_RANDOMIZE_KSTACK_OFFSET=y
# CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT is not set
CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y
CONFIG_STRICT_KERNEL_RWX=y
CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
CONFIG_STRICT_MODULE_RWX=y
CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y
CONFIG_ARCH_USE_MEMREMAP_PROT=y
# CONFIG_LOCK_EVENT_COUNTS is not set
CONFIG_ARCH_HAS_MEM_ENCRYPT=y
CONFIG_HAVE_STATIC_CALL=y
CONFIG_HAVE_STATIC_CALL_INLINE=y
CONFIG_HAVE_PREEMPT_DYNAMIC=y
CONFIG_HAVE_PREEMPT_DYNAMIC_CALL=y
CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_ARCH_SUPPORTS_PAGE_TABLE_CHECK=y
CONFIG_ARCH_HAS_ELFCORE_COMPAT=y
CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH=y
CONFIG_DYNAMIC_SIGFRAME=y
CONFIG_ARCH_HAS_HW_PTE_YOUNG=y
CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y
CONFIG_ARCH_HAS_KERNEL_FPU_SUPPORT=y
CONFIG_ARCH_VMLINUX_NEEDS_RELOCS=y
#
# GCOV-based kernel profiling
#
# CONFIG_GCOV_KERNEL is not set
CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
# end of GCOV-based kernel profiling
CONFIG_HAVE_GCC_PLUGINS=y
CONFIG_FUNCTION_ALIGNMENT_4B=y
CONFIG_FUNCTION_ALIGNMENT_16B=y
CONFIG_FUNCTION_ALIGNMENT=16
# end of General architecture-dependent options
CONFIG_RT_MUTEXES=y
CONFIG_MODULE_SIG_FORMAT=y
CONFIG_MODULES=y
# CONFIG_MODULE_DEBUG is not set
# CONFIG_MODULE_FORCE_LOAD is not set
CONFIG_MODULE_UNLOAD=y
# CONFIG_MODULE_FORCE_UNLOAD is not set
# CONFIG_MODULE_UNLOAD_TAINT_TRACKING is not set
CONFIG_MODVERSIONS=y
CONFIG_GENKSYMS=y
# CONFIG_GENDWARFKSYMS is not set
CONFIG_ASM_MODVERSIONS=y
# CONFIG_EXTENDED_MODVERSIONS is not set
CONFIG_BASIC_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_MODULE_SIG=y
# CONFIG_MODULE_SIG_FORCE is not set
CONFIG_MODULE_SIG_ALL=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
# CONFIG_MODULE_SIG_SHA3_256 is not set
# CONFIG_MODULE_SIG_SHA3_384 is not set
# CONFIG_MODULE_SIG_SHA3_512 is not set
CONFIG_MODULE_SIG_HASH="sha512"
# CONFIG_MODULE_COMPRESS is not set
# CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS is not set
CONFIG_MODPROBE_PATH="/sbin/modprobe"
# CONFIG_TRIM_UNUSED_KSYMS is not set
CONFIG_MODULES_TREE_LOOKUP=y
CONFIG_BLOCK=y
CONFIG_BLOCK_LEGACY_AUTOLOAD=y
CONFIG_BLK_CGROUP_RWSTAT=y
CONFIG_BLK_CGROUP_PUNT_BIO=y
CONFIG_BLK_DEV_BSG_COMMON=y
CONFIG_BLK_DEV_BSGLIB=y
CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_BLK_DEV_WRITE_MOUNTED=y
# CONFIG_BLK_DEV_ZONED is not set
CONFIG_BLK_DEV_THROTTLING=y
# CONFIG_BLK_WBT is not set
# CONFIG_BLK_CGROUP_IOLATENCY is not set
# CONFIG_BLK_CGROUP_IOCOST is not set
# CONFIG_BLK_CGROUP_IOPRIO is not set
CONFIG_BLK_DEBUG_FS=y
# CONFIG_BLK_SED_OPAL is not set
# CONFIG_BLK_INLINE_ENCRYPTION is not set
#
# Partition Types
#
CONFIG_PARTITION_ADVANCED=y
CONFIG_ACORN_PARTITION=y
# CONFIG_ACORN_PARTITION_CUMANA is not set
# CONFIG_ACORN_PARTITION_EESOX is not set
CONFIG_ACORN_PARTITION_ICS=y
# CONFIG_ACORN_PARTITION_ADFS is not set
# CONFIG_ACORN_PARTITION_POWERTEC is not set
CONFIG_ACORN_PARTITION_RISCIX=y
CONFIG_AIX_PARTITION=y
CONFIG_OSF_PARTITION=y
CONFIG_AMIGA_PARTITION=y
CONFIG_ATARI_PARTITION=y
CONFIG_MAC_PARTITION=y
CONFIG_MSDOS_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_MINIX_SUBPARTITION=y
CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_LDM_PARTITION=y
# CONFIG_LDM_DEBUG is not set
CONFIG_SGI_PARTITION=y
CONFIG_ULTRIX_PARTITION=y
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
CONFIG_EFI_PARTITION=y
CONFIG_SYSV68_PARTITION=y
CONFIG_CMDLINE_PARTITION=y
# end of Partition Types
CONFIG_BLK_PM=y
CONFIG_BLOCK_HOLDER_DEPRECATED=y
CONFIG_BLK_MQ_STACKING=y
#
# IO Schedulers
#
CONFIG_MQ_IOSCHED_DEADLINE=y
CONFIG_MQ_IOSCHED_KYBER=y
# CONFIG_IOSCHED_BFQ is not set
# end of IO Schedulers
CONFIG_PADATA=y
CONFIG_ASN1=y
CONFIG_UNINLINE_SPIN_UNLOCK=y
CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
CONFIG_MUTEX_SPIN_ON_OWNER=y
CONFIG_RWSEM_SPIN_ON_OWNER=y
CONFIG_LOCK_SPIN_ON_OWNER=y
CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
CONFIG_QUEUED_SPINLOCKS=y
CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
CONFIG_QUEUED_RWLOCKS=y
CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y
CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y
CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y
CONFIG_FREEZER=y
#
# Executable file formats
#
CONFIG_BINFMT_ELF=y
CONFIG_COMPAT_BINFMT_ELF=y
CONFIG_ELFCORE=y
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_BINFMT_SCRIPT=y
# CONFIG_BINFMT_MISC is not set
CONFIG_COREDUMP=y
# end of Executable file formats
#
# Memory Management options
#
CONFIG_ZPOOL=y
CONFIG_SWAP=y
CONFIG_ZSWAP=y
# CONFIG_ZSWAP_DEFAULT_ON is not set
# CONFIG_ZSWAP_SHRINKER_DEFAULT_ON is not set
# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set
CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO=y
# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set
# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set
# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set
# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD is not set
CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lzo"
CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC=y
CONFIG_ZSWAP_ZPOOL_DEFAULT="zsmalloc"
CONFIG_ZSMALLOC=y
# CONFIG_ZSMALLOC_STAT is not set
CONFIG_ZSMALLOC_CHAIN_SIZE=8
#
# Slab allocator options
#
CONFIG_SLUB=y
CONFIG_KVFREE_RCU_BATCHED=y
# CONFIG_SLUB_TINY is not set
CONFIG_SLAB_MERGE_DEFAULT=y
# CONFIG_SLAB_FREELIST_RANDOM is not set
# CONFIG_SLAB_FREELIST_HARDENED is not set
# CONFIG_SLAB_BUCKETS is not set
# CONFIG_SLUB_STATS is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_RANDOM_KMALLOC_CACHES is not set
# end of Slab allocator options
# CONFIG_SHUFFLE_PAGE_ALLOCATOR is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SPARSEMEM=y
CONFIG_SPARSEMEM_EXTREME=y
CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
CONFIG_SPARSEMEM_VMEMMAP=y
CONFIG_SPARSEMEM_VMEMMAP_PREINIT=y
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
CONFIG_ARCH_WANT_HUGETLB_VMEMMAP_PREINIT=y
CONFIG_HAVE_GUP_FAST=y
CONFIG_NUMA_KEEP_MEMINFO=y
CONFIG_MEMORY_ISOLATION=y
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
CONFIG_HAVE_BOOTMEM_INFO_NODE=y
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MHP_DEFAULT_ONLINE_TYPE_OFFLINE=y
# CONFIG_MHP_DEFAULT_ONLINE_TYPE_ONLINE_AUTO is not set
# CONFIG_MHP_DEFAULT_ONLINE_TYPE_ONLINE_KERNEL is not set
# CONFIG_MHP_DEFAULT_ONLINE_TYPE_ONLINE_MOVABLE is not set
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_MHP_MEMMAP_ON_MEMORY=y
CONFIG_ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE=y
CONFIG_SPLIT_PTE_PTLOCKS=y
CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
CONFIG_SPLIT_PMD_PTLOCKS=y
CONFIG_MEMORY_BALLOON=y
CONFIG_BALLOON_COMPACTION=y
CONFIG_COMPACTION=y
CONFIG_COMPACT_UNEVICTABLE_DEFAULT=1
CONFIG_PAGE_REPORTING=y
CONFIG_MIGRATION=y
CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y
CONFIG_ARCH_ENABLE_THP_MIGRATION=y
CONFIG_CONTIG_ALLOC=y
CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PHYS_ADDR_T_64BIT=y
CONFIG_MMU_NOTIFIER=y
# CONFIG_KSM is not set
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
CONFIG_MEMORY_FAILURE=y
# CONFIG_HWPOISON_INJECT is not set
CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
CONFIG_ARCH_WANTS_THP_SWAP=y
CONFIG_MM_ID=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
# CONFIG_TRANSPARENT_HUGEPAGE_NEVER is not set
CONFIG_THP_SWAP=y
# CONFIG_READ_ONLY_THP_FOR_FS is not set
# CONFIG_NO_PAGE_MAPCOUNT is not set
CONFIG_PAGE_MAPCOUNT=y
CONFIG_PGTABLE_HAS_HUGE_LEAVES=y
CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
CONFIG_ARCH_SUPPORTS_PUD_PFNMAP=y
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
CONFIG_USE_PERCPU_NUMA_NODE_ID=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_CMA=y
# CONFIG_CMA_DEBUGFS is not set
# CONFIG_CMA_SYSFS is not set
CONFIG_CMA_AREAS=7
CONFIG_PAGE_BLOCK_ORDER=10
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_GENERIC_EARLY_IOREMAP=y
# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
# CONFIG_IDLE_PAGE_TRACKING is not set
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_ARCH_HAS_CURRENT_STACK_POINTER=y
CONFIG_ARCH_HAS_PTE_DEVMAP=y
CONFIG_ARCH_HAS_ZONE_DMA_SET=y
CONFIG_ZONE_DMA=y
CONFIG_ZONE_DMA32=y
# CONFIG_ZONE_DEVICE is not set
CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y
CONFIG_ARCH_HAS_PKEYS=y
CONFIG_ARCH_USES_PG_ARCH_2=y
CONFIG_VM_EVENT_COUNTERS=y
# CONFIG_PERCPU_STATS is not set
# CONFIG_GUP_TEST is not set
# CONFIG_DMAPOOL_TEST is not set
CONFIG_ARCH_HAS_PTE_SPECIAL=y
CONFIG_MEMFD_CREATE=y
CONFIG_SECRETMEM=y
# CONFIG_ANON_VMA_NAME is not set
CONFIG_HAVE_ARCH_USERFAULTFD_WP=y
CONFIG_HAVE_ARCH_USERFAULTFD_MINOR=y
CONFIG_USERFAULTFD=y
CONFIG_PTE_MARKER_UFFD_WP=y
# CONFIG_LRU_GEN is not set
CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y
CONFIG_PER_VMA_LOCK=y
CONFIG_LOCK_MM_AND_FIND_VMA=y
CONFIG_IOMMU_MM_DATA=y
CONFIG_EXECMEM=y
CONFIG_NUMA_MEMBLKS=y
# CONFIG_NUMA_EMU is not set
CONFIG_ARCH_SUPPORTS_PT_RECLAIM=y
CONFIG_PT_RECLAIM=y
#
# Data Access Monitoring
#
# CONFIG_DAMON is not set
# end of Data Access Monitoring
# end of Memory Management options
CONFIG_NET=y
CONFIG_NET_INGRESS=y
CONFIG_NET_EGRESS=y
CONFIG_NET_XGRESS=y
CONFIG_SKB_EXTENSIONS=y
CONFIG_NET_DEVMEM=y
CONFIG_NET_CRC32C=y
#
# Networking options
#
CONFIG_PACKET=y
# CONFIG_PACKET_DIAG is not set
CONFIG_UNIX=y
CONFIG_AF_UNIX_OOB=y
# CONFIG_UNIX_DIAG is not set
# CONFIG_TLS is not set
# CONFIG_XFRM_USER is not set
# CONFIG_NET_KEY is not set
# CONFIG_XDP_SOCKETS is not set
CONFIG_NET_HANDSHAKE=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_FIB_TRIE_STATS=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_MULTIPATH=y
CONFIG_IP_ROUTE_VERBOSE=y
CONFIG_IP_ROUTE_CLASSID=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
# CONFIG_IP_PNP_BOOTP is not set
# CONFIG_IP_PNP_RARP is not set
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE_DEMUX is not set
CONFIG_IP_MROUTE_COMMON=y
CONFIG_IP_MROUTE=y
# CONFIG_IP_MROUTE_MULTIPLE_TABLES is not set
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
CONFIG_SYN_COOKIES=y
# CONFIG_NET_IPVTI is not set
# CONFIG_NET_FOU is not set
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
CONFIG_INET_TABLE_PERTURB_ORDER=16
# CONFIG_INET_DIAG is not set
CONFIG_TCP_CONG_ADVANCED=y
# CONFIG_TCP_CONG_BIC is not set
CONFIG_TCP_CONG_CUBIC=y
# CONFIG_TCP_CONG_WESTWOOD is not set
# CONFIG_TCP_CONG_HTCP is not set
# CONFIG_TCP_CONG_HSTCP is not set
# CONFIG_TCP_CONG_HYBLA is not set
# CONFIG_TCP_CONG_VEGAS is not set
# CONFIG_TCP_CONG_NV is not set
# CONFIG_TCP_CONG_SCALABLE is not set
# CONFIG_TCP_CONG_LP is not set
# CONFIG_TCP_CONG_VENO is not set
# CONFIG_TCP_CONG_YEAH is not set
# CONFIG_TCP_CONG_ILLINOIS is not set
# CONFIG_TCP_CONG_DCTCP is not set
# CONFIG_TCP_CONG_CDG is not set
# CONFIG_TCP_CONG_BBR is not set
CONFIG_DEFAULT_CUBIC=y
# CONFIG_DEFAULT_RENO is not set
CONFIG_DEFAULT_TCP_CONG="cubic"
CONFIG_TCP_SIGPOOL=y
# CONFIG_TCP_AO is not set
CONFIG_TCP_MD5SIG=y
CONFIG_IPV6=y
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_IPV6_ROUTE_INFO=y
# CONFIG_IPV6_OPTIMISTIC_DAD is not set
# CONFIG_INET6_AH is not set
# CONFIG_INET6_ESP is not set
# CONFIG_INET6_IPCOMP is not set
# CONFIG_IPV6_MIP6 is not set
# CONFIG_IPV6_ILA is not set
# CONFIG_IPV6_VTI is not set
# CONFIG_IPV6_SIT is not set
# CONFIG_IPV6_TUNNEL is not set
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_SUBTREES=y
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
CONFIG_IPV6_PIMSM_V2=y
# CONFIG_IPV6_SEG6_LWTUNNEL is not set
# CONFIG_IPV6_SEG6_HMAC is not set
# CONFIG_IPV6_RPL_LWTUNNEL is not set
# CONFIG_IPV6_IOAM6_LWTUNNEL is not set
CONFIG_NETLABEL=y
# CONFIG_MPTCP is not set
CONFIG_NETWORK_SECMARK=y
CONFIG_NET_PTP_CLASSIFY=y
# CONFIG_NETWORK_PHY_TIMESTAMPING is not set
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_BRIDGE_NETFILTER=y
#
# Core Netfilter Configuration
#
CONFIG_NETFILTER_INGRESS=y
CONFIG_NETFILTER_EGRESS=y
CONFIG_NETFILTER_SKIP_EGRESS=y
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_FAMILY_BRIDGE=y
CONFIG_NETFILTER_FAMILY_ARP=y
CONFIG_NETFILTER_BPF_LINK=y
CONFIG_NETFILTER_NETLINK_HOOK=m
# CONFIG_NETFILTER_NETLINK_ACCT is not set
# CONFIG_NETFILTER_NETLINK_QUEUE is not set
# CONFIG_NETFILTER_NETLINK_LOG is not set
CONFIG_NETFILTER_NETLINK_OSF=m
CONFIG_NF_CONNTRACK=m
# CONFIG_NF_LOG_SYSLOG is not set
CONFIG_NETFILTER_CONNCOUNT=m
CONFIG_NF_CONNTRACK_MARK=y
# CONFIG_NF_CONNTRACK_SECMARK is not set
# CONFIG_NF_CONNTRACK_ZONES is not set
# CONFIG_NF_CONNTRACK_PROCFS is not set
# CONFIG_NF_CONNTRACK_EVENTS is not set
# CONFIG_NF_CONNTRACK_TIMEOUT is not set
# CONFIG_NF_CONNTRACK_TIMESTAMP is not set
# CONFIG_NF_CONNTRACK_LABELS is not set
CONFIG_NF_CONNTRACK_OVS=y
CONFIG_NF_CT_PROTO_DCCP=y
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
# CONFIG_NF_CONNTRACK_AMANDA is not set
# CONFIG_NF_CONNTRACK_FTP is not set
# CONFIG_NF_CONNTRACK_H323 is not set
# CONFIG_NF_CONNTRACK_IRC is not set
# CONFIG_NF_CONNTRACK_NETBIOS_NS is not set
# CONFIG_NF_CONNTRACK_SNMP is not set
# CONFIG_NF_CONNTRACK_PPTP is not set
# CONFIG_NF_CONNTRACK_SANE is not set
# CONFIG_NF_CONNTRACK_SIP is not set
# CONFIG_NF_CONNTRACK_TFTP is not set
# CONFIG_NF_CT_NETLINK is not set
CONFIG_NF_NAT=m
CONFIG_NF_NAT_REDIRECT=y
CONFIG_NF_NAT_MASQUERADE=y
CONFIG_NF_NAT_OVS=y
CONFIG_NETFILTER_SYNPROXY=m
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
CONFIG_NFT_FLOW_OFFLOAD=m
CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
CONFIG_NFT_REJECT_INET=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_HASH=m
CONFIG_NFT_FIB=m
CONFIG_NFT_FIB_INET=m
CONFIG_NFT_SOCKET=m
CONFIG_NFT_OSF=m
CONFIG_NFT_TPROXY=m
CONFIG_NFT_SYNPROXY=m
CONFIG_NF_DUP_NETDEV=m
CONFIG_NFT_DUP_NETDEV=m
CONFIG_NFT_FWD_NETDEV=m
CONFIG_NFT_FIB_NETDEV=m
CONFIG_NFT_REJECT_NETDEV=m
CONFIG_NF_FLOW_TABLE_INET=m
CONFIG_NF_FLOW_TABLE=m
# CONFIG_NF_FLOW_TABLE_PROCFS is not set
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XTABLES_COMPAT=y
#
# Xtables combined modules
#
CONFIG_NETFILTER_XT_MARK=m
CONFIG_NETFILTER_XT_CONNMARK=m
#
# Xtables targets
#
# CONFIG_NETFILTER_XT_TARGET_AUDIT is not set
# CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set
# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set
# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set
# CONFIG_NETFILTER_XT_TARGET_CT is not set
# CONFIG_NETFILTER_XT_TARGET_DSCP is not set
# CONFIG_NETFILTER_XT_TARGET_HL is not set
# CONFIG_NETFILTER_XT_TARGET_HMARK is not set
# CONFIG_NETFILTER_XT_TARGET_IDLETIMER is not set
# CONFIG_NETFILTER_XT_TARGET_LED is not set
# CONFIG_NETFILTER_XT_TARGET_LOG is not set
# CONFIG_NETFILTER_XT_TARGET_MARK is not set
CONFIG_NETFILTER_XT_NAT=m
CONFIG_NETFILTER_XT_TARGET_NETMAP=m
# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set
# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set
# CONFIG_NETFILTER_XT_TARGET_RATEEST is not set
CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
# CONFIG_NETFILTER_XT_TARGET_TEE is not set
# CONFIG_NETFILTER_XT_TARGET_TPROXY is not set
# CONFIG_NETFILTER_XT_TARGET_SECMARK is not set
# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set
# CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set
#
# Xtables matches
#
CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
# CONFIG_NETFILTER_XT_MATCH_BPF is not set
# CONFIG_NETFILTER_XT_MATCH_CGROUP is not set
# CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set
# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set
# CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set
# CONFIG_NETFILTER_XT_MATCH_CONNLABEL is not set
# CONFIG_NETFILTER_XT_MATCH_CONNLIMIT is not set
CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
# CONFIG_NETFILTER_XT_MATCH_CPU is not set
# CONFIG_NETFILTER_XT_MATCH_DCCP is not set
# CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set
# CONFIG_NETFILTER_XT_MATCH_DSCP is not set
# CONFIG_NETFILTER_XT_MATCH_ECN is not set
# CONFIG_NETFILTER_XT_MATCH_ESP is not set
# CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set
# CONFIG_NETFILTER_XT_MATCH_HELPER is not set
# CONFIG_NETFILTER_XT_MATCH_HL is not set
CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
# CONFIG_NETFILTER_XT_MATCH_L2TP is not set
# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set
CONFIG_NETFILTER_XT_MATCH_LIMIT=m
# CONFIG_NETFILTER_XT_MATCH_MAC is not set
CONFIG_NETFILTER_XT_MATCH_MARK=m
# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set
# CONFIG_NETFILTER_XT_MATCH_NFACCT is not set
# CONFIG_NETFILTER_XT_MATCH_OSF is not set
# CONFIG_NETFILTER_XT_MATCH_OWNER is not set
# CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set
# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set
# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set
# CONFIG_NETFILTER_XT_MATCH_RATEEST is not set
# CONFIG_NETFILTER_XT_MATCH_REALM is not set
# CONFIG_NETFILTER_XT_MATCH_RECENT is not set
# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
# CONFIG_NETFILTER_XT_MATCH_SOCKET is not set
# CONFIG_NETFILTER_XT_MATCH_STATE is not set
# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set
# CONFIG_NETFILTER_XT_MATCH_STRING is not set
# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set
# CONFIG_NETFILTER_XT_MATCH_TIME is not set
# CONFIG_NETFILTER_XT_MATCH_U32 is not set
# end of Core Netfilter Configuration
# CONFIG_IP_SET is not set
# CONFIG_IP_VS is not set
#
# IP: Netfilter Configuration
#
CONFIG_NF_DEFRAG_IPV4=m
CONFIG_IP_NF_IPTABLES_LEGACY=y
CONFIG_NF_SOCKET_IPV4=m
CONFIG_NF_TPROXY_IPV4=m
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_REJECT_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_DUP_IPV4=m
# CONFIG_NF_LOG_ARP is not set
# CONFIG_NF_LOG_IPV4 is not set
CONFIG_NF_REJECT_IPV4=m
CONFIG_IP_NF_IPTABLES=y
# CONFIG_IP_NF_MATCH_AH is not set
# CONFIG_IP_NF_MATCH_ECN is not set
# CONFIG_IP_NF_MATCH_RPFILTER is not set
# CONFIG_IP_NF_MATCH_TTL is not set
CONFIG_IP_NF_FILTER=y
# CONFIG_IP_NF_TARGET_REJECT is not set
# CONFIG_IP_NF_TARGET_SYNPROXY is not set
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_TARGET_MASQUERADE=m
CONFIG_IP_NF_TARGET_NETMAP=m
CONFIG_IP_NF_TARGET_REDIRECT=m
# CONFIG_IP_NF_MANGLE is not set
# CONFIG_IP_NF_TARGET_ECN is not set
# CONFIG_IP_NF_RAW is not set
# CONFIG_IP_NF_SECURITY is not set
# CONFIG_IP_NF_ARPTABLES is not set
CONFIG_NFT_COMPAT_ARP=m
# CONFIG_IP_NF_ARPFILTER is not set
# CONFIG_IP_NF_ARP_MANGLE is not set
# end of IP: Netfilter Configuration
#
# IPv6: Netfilter Configuration
#
CONFIG_IP6_NF_IPTABLES_LEGACY=y
CONFIG_NF_SOCKET_IPV6=m
CONFIG_NF_TPROXY_IPV6=m
CONFIG_NF_TABLES_IPV6=y
CONFIG_NFT_REJECT_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_DUP_IPV6=m
CONFIG_NF_REJECT_IPV6=m
# CONFIG_NF_LOG_IPV6 is not set
CONFIG_IP6_NF_IPTABLES=y
# CONFIG_IP6_NF_MATCH_AH is not set
# CONFIG_IP6_NF_MATCH_EUI64 is not set
# CONFIG_IP6_NF_MATCH_FRAG is not set
# CONFIG_IP6_NF_MATCH_OPTS is not set
# CONFIG_IP6_NF_MATCH_HL is not set
# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set
# CONFIG_IP6_NF_MATCH_MH is not set
# CONFIG_IP6_NF_MATCH_RPFILTER is not set
# CONFIG_IP6_NF_MATCH_RT is not set
# CONFIG_IP6_NF_MATCH_SRH is not set
CONFIG_IP6_NF_FILTER=y
# CONFIG_IP6_NF_TARGET_REJECT is not set
# CONFIG_IP6_NF_TARGET_SYNPROXY is not set
# CONFIG_IP6_NF_MANGLE is not set
# CONFIG_IP6_NF_RAW is not set
# CONFIG_IP6_NF_SECURITY is not set
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
# end of IPv6: Netfilter Configuration
CONFIG_NF_DEFRAG_IPV6=m
CONFIG_NF_TABLES_BRIDGE=m
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
# CONFIG_NF_CONNTRACK_BRIDGE is not set
CONFIG_BRIDGE_NF_EBTABLES_LEGACY=y
CONFIG_BRIDGE_NF_EBTABLES=y
# CONFIG_BRIDGE_EBT_BROUTE is not set
# CONFIG_BRIDGE_EBT_T_FILTER is not set
CONFIG_BRIDGE_EBT_T_NAT=y
# CONFIG_BRIDGE_EBT_802_3 is not set
# CONFIG_BRIDGE_EBT_AMONG is not set
# CONFIG_BRIDGE_EBT_ARP is not set
# CONFIG_BRIDGE_EBT_IP is not set
# CONFIG_BRIDGE_EBT_IP6 is not set
# CONFIG_BRIDGE_EBT_LIMIT is not set
# CONFIG_BRIDGE_EBT_MARK is not set
# CONFIG_BRIDGE_EBT_PKTTYPE is not set
# CONFIG_BRIDGE_EBT_STP is not set
# CONFIG_BRIDGE_EBT_VLAN is not set
# CONFIG_BRIDGE_EBT_ARPREPLY is not set
# CONFIG_BRIDGE_EBT_DNAT is not set
# CONFIG_BRIDGE_EBT_MARK_T is not set
# CONFIG_BRIDGE_EBT_REDIRECT is not set
# CONFIG_BRIDGE_EBT_SNAT is not set
# CONFIG_BRIDGE_EBT_LOG is not set
# CONFIG_BRIDGE_EBT_NFLOG is not set
# CONFIG_IP_SCTP is not set
# CONFIG_RDS is not set
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_L2TP is not set
CONFIG_STP=y
CONFIG_BRIDGE=y
CONFIG_BRIDGE_IGMP_SNOOPING=y
# CONFIG_BRIDGE_MRP is not set
# CONFIG_BRIDGE_CFM is not set
# CONFIG_NET_DSA is not set
# CONFIG_VLAN_8021Q is not set
CONFIG_LLC=y
# CONFIG_LLC2 is not set
# CONFIG_ATALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
# CONFIG_PHONET is not set
# CONFIG_6LOWPAN is not set
# CONFIG_IEEE802154 is not set
CONFIG_NET_SCHED=y
#
# Queueing/Scheduling
#
CONFIG_NET_SCH_HTB=y
CONFIG_NET_SCH_HFSC=y
CONFIG_NET_SCH_PRIO=y
CONFIG_NET_SCH_MULTIQ=y
CONFIG_NET_SCH_RED=y
CONFIG_NET_SCH_SFB=y
CONFIG_NET_SCH_SFQ=y
CONFIG_NET_SCH_TEQL=y
CONFIG_NET_SCH_TBF=y
CONFIG_NET_SCH_CBS=y
CONFIG_NET_SCH_ETF=y
CONFIG_NET_SCH_MQPRIO_LIB=y
CONFIG_NET_SCH_TAPRIO=y
CONFIG_NET_SCH_GRED=y
CONFIG_NET_SCH_NETEM=y
CONFIG_NET_SCH_DRR=y
CONFIG_NET_SCH_MQPRIO=y
CONFIG_NET_SCH_SKBPRIO=y
CONFIG_NET_SCH_CHOKE=y
CONFIG_NET_SCH_QFQ=y
CONFIG_NET_SCH_CODEL=y
CONFIG_NET_SCH_FQ_CODEL=y
CONFIG_NET_SCH_CAKE=y
CONFIG_NET_SCH_FQ=y
CONFIG_NET_SCH_HHF=y
CONFIG_NET_SCH_PIE=y
# CONFIG_NET_SCH_FQ_PIE is not set
CONFIG_NET_SCH_INGRESS=y
CONFIG_NET_SCH_PLUG=y
# CONFIG_NET_SCH_ETS is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_DEFAULT_FQ is not set
# CONFIG_DEFAULT_CODEL is not set
# CONFIG_DEFAULT_FQ_CODEL is not set
# CONFIG_DEFAULT_SFQ is not set
CONFIG_DEFAULT_PFIFO_FAST=y
CONFIG_DEFAULT_NET_SCH="pfifo_fast"
#
# Classification
#
CONFIG_NET_CLS=y
CONFIG_NET_CLS_BASIC=y
CONFIG_NET_CLS_ROUTE4=y
CONFIG_NET_CLS_FW=y
CONFIG_NET_CLS_U32=y
CONFIG_CLS_U32_PERF=y
CONFIG_CLS_U32_MARK=y
CONFIG_NET_CLS_FLOW=y
CONFIG_NET_CLS_CGROUP=y
CONFIG_NET_CLS_BPF=y
CONFIG_NET_CLS_FLOWER=y
CONFIG_NET_CLS_MATCHALL=y
CONFIG_NET_EMATCH=y
CONFIG_NET_EMATCH_STACK=32
CONFIG_NET_EMATCH_CMP=y
CONFIG_NET_EMATCH_NBYTE=y
CONFIG_NET_EMATCH_U32=y
CONFIG_NET_EMATCH_META=y
CONFIG_NET_EMATCH_TEXT=y
CONFIG_NET_EMATCH_IPT=y
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=y
CONFIG_NET_ACT_GACT=y
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=y
CONFIG_NET_ACT_SAMPLE=y
CONFIG_NET_ACT_NAT=y
CONFIG_NET_ACT_PEDIT=y
CONFIG_NET_ACT_SIMP=y
CONFIG_NET_ACT_SKBEDIT=y
CONFIG_NET_ACT_CSUM=y
# CONFIG_NET_ACT_MPLS is not set
CONFIG_NET_ACT_VLAN=y
CONFIG_NET_ACT_BPF=y
# CONFIG_NET_ACT_CONNMARK is not set
# CONFIG_NET_ACT_CTINFO is not set
CONFIG_NET_ACT_SKBMOD=y
CONFIG_NET_ACT_IFE=y
CONFIG_NET_ACT_TUNNEL_KEY=y
CONFIG_NET_ACT_CT=m
# CONFIG_NET_ACT_GATE is not set
CONFIG_NET_IFE_SKBMARK=y
CONFIG_NET_IFE_SKBPRIO=y
CONFIG_NET_IFE_SKBTCINDEX=y
# CONFIG_NET_TC_SKB_EXT is not set
CONFIG_NET_SCH_FIFO=y
CONFIG_DCB=y
CONFIG_DNS_RESOLVER=y
# CONFIG_BATMAN_ADV is not set
# CONFIG_OPENVSWITCH is not set
# CONFIG_VSOCKETS is not set
# CONFIG_NETLINK_DIAG is not set
# CONFIG_MPLS is not set
CONFIG_NET_NSH=y
# CONFIG_HSR is not set
# CONFIG_NET_SWITCHDEV is not set
CONFIG_NET_L3_MASTER_DEV=y
# CONFIG_QRTR is not set
# CONFIG_NET_NCSI is not set
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_MAX_SKB_FRAGS=17
CONFIG_RPS=y
CONFIG_RFS_ACCEL=y
CONFIG_SOCK_RX_QUEUE_MAPPING=y
CONFIG_XPS=y
# CONFIG_CGROUP_NET_PRIO is not set
CONFIG_CGROUP_NET_CLASSID=y
CONFIG_NET_RX_BUSY_POLL=y
CONFIG_BQL=y
CONFIG_BPF_STREAM_PARSER=y
CONFIG_NET_FLOW_LIMIT=y
#
# Network testing
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_NET_DROP_MONITOR is not set
# end of Network testing
# end of Networking options
CONFIG_HAMRADIO=y
#
# Packet Radio protocols
#
# CONFIG_AX25 is not set
# CONFIG_CAN is not set
# CONFIG_BT is not set
# CONFIG_AF_RXRPC is not set
# CONFIG_AF_KCM is not set
CONFIG_STREAM_PARSER=y
# CONFIG_MCTP is not set
CONFIG_FIB_RULES=y
CONFIG_WIRELESS=y
# CONFIG_CFG80211 is not set
#
# CFG80211 needs to be enabled for MAC80211
#
CONFIG_MAC80211_STA_HASH_MAX_SIZE=0
CONFIG_RFKILL=y
CONFIG_RFKILL_LEDS=y
CONFIG_RFKILL_INPUT=y
# CONFIG_RFKILL_GPIO is not set
# CONFIG_NET_9P is not set
CONFIG_CAIF=y
# CONFIG_CAIF_DEBUG is not set
# CONFIG_CAIF_NETDEV is not set
# CONFIG_CAIF_USB is not set
# CONFIG_CEPH_LIB is not set
# CONFIG_NFC is not set
CONFIG_PSAMPLE=y
CONFIG_NET_IFE=y
# CONFIG_LWTUNNEL is not set
CONFIG_NET_SELFTESTS=y
CONFIG_NET_SOCK_MSG=y
CONFIG_NET_DEVLINK=y
CONFIG_PAGE_POOL=y
CONFIG_PAGE_POOL_STATS=y
CONFIG_FAILOVER=y
CONFIG_ETHTOOL_NETLINK=y
#
# Device Drivers
#
CONFIG_HAVE_PCI=y
CONFIG_GENERIC_PCI_IOMAP=y
CONFIG_PCI=y
CONFIG_PCI_DOMAINS=y
CONFIG_PCIEPORTBUS=y
CONFIG_HOTPLUG_PCI_PCIE=y
CONFIG_PCIEAER=y
# CONFIG_PCIEAER_INJECT is not set
# CONFIG_PCIE_ECRC is not set
CONFIG_PCIEASPM=y
CONFIG_PCIEASPM_DEFAULT=y
# CONFIG_PCIEASPM_POWERSAVE is not set
# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set
# CONFIG_PCIEASPM_PERFORMANCE is not set
CONFIG_PCIE_PME=y
# CONFIG_PCIE_DPC is not set
# CONFIG_PCIE_PTM is not set
CONFIG_PCI_MSI=y
CONFIG_PCI_QUIRKS=y
# CONFIG_PCI_DEBUG is not set
CONFIG_PCI_REALLOC_ENABLE_AUTO=y
# CONFIG_PCI_STUB is not set
# CONFIG_PCI_PF_STUB is not set
# CONFIG_XEN_PCIDEV_FRONTEND is not set
CONFIG_PCI_ATS=y
# CONFIG_PCI_DOE is not set
CONFIG_PCI_LOCKLESS_CONFIG=y
CONFIG_PCI_IOV=y
# CONFIG_PCI_NPEM is not set
CONFIG_PCI_PRI=y
CONFIG_PCI_PASID=y
# CONFIG_PCIE_TPH is not set
CONFIG_PCI_LABEL=y
# CONFIG_PCIE_BUS_TUNE_OFF is not set
CONFIG_PCIE_BUS_DEFAULT=y
# CONFIG_PCIE_BUS_SAFE is not set
# CONFIG_PCIE_BUS_PERFORMANCE is not set
# CONFIG_PCIE_BUS_PEER2PEER is not set
CONFIG_VGA_ARB=y
CONFIG_VGA_ARB_MAX_GPUS=16
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
CONFIG_HOTPLUG_PCI_CPCI=y
# CONFIG_HOTPLUG_PCI_CPCI_ZT5550 is not set
# CONFIG_HOTPLUG_PCI_CPCI_GENERIC is not set
# CONFIG_HOTPLUG_PCI_OCTEONEP is not set
CONFIG_HOTPLUG_PCI_SHPC=y
#
# PCI controller drivers
#
# CONFIG_VMD is not set
#
# Cadence-based PCIe controllers
#
# end of Cadence-based PCIe controllers
#
# DesignWare-based PCIe controllers
#
CONFIG_PCIE_DW=y
# CONFIG_PCIE_DW_DEBUGFS is not set
CONFIG_PCIE_DW_HOST=y
# CONFIG_PCI_MESON is not set
CONFIG_PCIE_DW_PLAT=y
CONFIG_PCIE_DW_PLAT_HOST=y
# end of DesignWare-based PCIe controllers
#
# Mobiveil-based PCIe controllers
#
# end of Mobiveil-based PCIe controllers
#
# PLDA-based PCIe controllers
#
# end of PLDA-based PCIe controllers
# end of PCI controller drivers
#
# PCI Endpoint
#
# CONFIG_PCI_ENDPOINT is not set
# end of PCI Endpoint
#
# PCI switch controller drivers
#
# CONFIG_PCI_SW_SWITCHTEC is not set
# end of PCI switch controller drivers
# CONFIG_PCI_PWRCTRL_SLOT is not set
# CONFIG_CXL_BUS is not set
# CONFIG_PCCARD is not set
CONFIG_RAPIDIO=y
# CONFIG_RAPIDIO_TSI721 is not set
CONFIG_RAPIDIO_DISC_TIMEOUT=30
# CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS is not set
CONFIG_RAPIDIO_DMA_ENGINE=y
# CONFIG_RAPIDIO_DEBUG is not set
# CONFIG_RAPIDIO_ENUM_BASIC is not set
# CONFIG_RAPIDIO_CHMAN is not set
# CONFIG_RAPIDIO_MPORT_CDEV is not set
#
# RapidIO Switch drivers
#
# CONFIG_RAPIDIO_CPS_XX is not set
# CONFIG_RAPIDIO_CPS_GEN2 is not set
# CONFIG_RAPIDIO_RXS_GEN3 is not set
# end of RapidIO Switch drivers
#
# Generic Driver Options
#
CONFIG_UEVENT_HELPER=y
CONFIG_UEVENT_HELPER_PATH=""
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
# CONFIG_DEVTMPFS_SAFE is not set
# CONFIG_STANDALONE is not set
CONFIG_PREVENT_FIRMWARE_BUILD=y
#
# Firmware loader
#
CONFIG_FW_LOADER=y
CONFIG_FW_LOADER_DEBUG=y
CONFIG_EXTRA_FIRMWARE=""
# CONFIG_FW_LOADER_USER_HELPER is not set
# CONFIG_FW_LOADER_COMPRESS is not set
CONFIG_FW_CACHE=y
# CONFIG_FW_UPLOAD is not set
# end of Firmware loader
CONFIG_ALLOW_DEV_COREDUMP=y
# CONFIG_DEBUG_DRIVER is not set
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set
# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set
CONFIG_SYS_HYPERVISOR=y
CONFIG_GENERIC_CPU_DEVICES=y
CONFIG_GENERIC_CPU_AUTOPROBE=y
CONFIG_GENERIC_CPU_VULNERABILITIES=y
CONFIG_REGMAP=y
CONFIG_REGMAP_I2C=y
CONFIG_REGMAP_SPI=y
CONFIG_REGMAP_MMIO=y
CONFIG_REGMAP_IRQ=y
CONFIG_DMA_SHARED_BUFFER=y
# CONFIG_DMA_FENCE_TRACE is not set
# CONFIG_FW_DEVLINK_SYNC_STATE_TIMEOUT is not set
# end of Generic Driver Options
#
# Bus devices
#
# CONFIG_MHI_BUS is not set
# CONFIG_MHI_BUS_EP is not set
# end of Bus devices
#
# Cache Drivers
#
# end of Cache Drivers
CONFIG_CONNECTOR=y
CONFIG_PROC_EVENTS=y
#
# Firmware Drivers
#
#
# ARM System Control and Management Interface Protocol
#
# end of ARM System Control and Management Interface Protocol
CONFIG_EDD=y
CONFIG_EDD_OFF=y
CONFIG_FIRMWARE_MEMMAP=y
CONFIG_DMIID=y
# CONFIG_DMI_SYSFS is not set
CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
# CONFIG_ISCSI_IBFT is not set
# CONFIG_FW_CFG_SYSFS is not set
CONFIG_SYSFB=y
# CONFIG_SYSFB_SIMPLEFB is not set
# CONFIG_GOOGLE_FIRMWARE is not set
#
# EFI (Extensible Firmware Interface) Support
#
CONFIG_EFI_ESRT=y
# CONFIG_EFI_VARS_PSTORE is not set
CONFIG_EFI_DXE_MEM_ATTRIBUTES=y
CONFIG_EFI_RUNTIME_WRAPPERS=y
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
# CONFIG_EFI_CAPSULE_LOADER is not set
# CONFIG_EFI_TEST is not set
# CONFIG_APPLE_PROPERTIES is not set
# CONFIG_RESET_ATTACK_MITIGATION is not set
# CONFIG_EFI_RCI2_TABLE is not set
# CONFIG_EFI_DISABLE_PCI_DMA is not set
CONFIG_EFI_EARLYCON=y
CONFIG_EFI_CUSTOM_SSDT_OVERLAYS=y
# CONFIG_EFI_DISABLE_RUNTIME is not set
# CONFIG_EFI_COCO_SECRET is not set
# end of EFI (Extensible Firmware Interface) Support
CONFIG_UEFI_CPER=y
CONFIG_UEFI_CPER_X86=y
#
# Qualcomm firmware drivers
#
# end of Qualcomm firmware drivers
#
# Tegra firmware driver
#
# end of Tegra firmware driver
# end of Firmware Drivers
# CONFIG_FWCTL is not set
# CONFIG_GNSS is not set
# CONFIG_MTD is not set
# CONFIG_OF is not set
CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
CONFIG_PARPORT=y
# CONFIG_PARPORT_PC is not set
CONFIG_PARPORT_1284=y
CONFIG_PNP=y
# CONFIG_PNP_DEBUG_MESSAGES is not set
#
# Protocols
#
CONFIG_PNPACPI=y
CONFIG_BLK_DEV=y
# CONFIG_BLK_DEV_NULL_BLK is not set
# CONFIG_BLK_DEV_FD is not set
CONFIG_CDROM=y
# CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set
# CONFIG_ZRAM is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
# CONFIG_BLK_DEV_DRBD is not set
# CONFIG_BLK_DEV_NBD is not set
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=65536
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
CONFIG_XEN_BLKDEV_FRONTEND=y
# CONFIG_XEN_BLKDEV_BACKEND is not set
CONFIG_VIRTIO_BLK=y
# CONFIG_BLK_DEV_RBD is not set
# CONFIG_BLK_DEV_UBLK is not set
#
# NVME Support
#
# CONFIG_BLK_DEV_NVME is not set
# CONFIG_NVME_FC is not set
# CONFIG_NVME_TCP is not set
# CONFIG_NVME_TARGET is not set
# end of NVME Support
#
# Misc devices
#
# CONFIG_AD525X_DPOT is not set
# CONFIG_DUMMY_IRQ is not set
# CONFIG_IBM_ASM is not set
# CONFIG_PHANTOM is not set
# CONFIG_RPMB is not set
# CONFIG_TI_FPC202 is not set
# CONFIG_TIFM_CORE is not set
# CONFIG_ICS932S401 is not set
# CONFIG_ENCLOSURE_SERVICES is not set
# CONFIG_HP_ILO is not set
# CONFIG_APDS9802ALS is not set
# CONFIG_ISL29003 is not set
# CONFIG_ISL29020 is not set
# CONFIG_SENSORS_TSL2550 is not set
# CONFIG_SENSORS_BH1770 is not set
# CONFIG_SENSORS_APDS990X is not set
# CONFIG_HMC6352 is not set
# CONFIG_DS1682 is not set
# CONFIG_LATTICE_ECP3_CONFIG is not set
CONFIG_SRAM=y
# CONFIG_DW_XDATA_PCIE is not set
# CONFIG_PCI_ENDPOINT_TEST is not set
# CONFIG_XILINX_SDFEC is not set
# CONFIG_NTSYNC is not set
# CONFIG_NSM is not set
# CONFIG_C2PORT is not set
#
# EEPROM support
#
# CONFIG_EEPROM_AT24 is not set
# CONFIG_EEPROM_AT25 is not set
# CONFIG_EEPROM_MAX6875 is not set
CONFIG_EEPROM_93CX6=y
# CONFIG_EEPROM_93XX46 is not set
# CONFIG_EEPROM_IDT_89HPESX is not set
# CONFIG_EEPROM_EE1004 is not set
# end of EEPROM support
# CONFIG_CB710_CORE is not set
# CONFIG_SENSORS_LIS3_I2C is not set
# CONFIG_ALTERA_STAPL is not set
CONFIG_INTEL_MEI=y
CONFIG_INTEL_MEI_ME=y
# CONFIG_INTEL_MEI_TXE is not set
# CONFIG_INTEL_MEI_VSC_HW is not set
# CONFIG_VMWARE_VMCI is not set
# CONFIG_GENWQE is not set
# CONFIG_BCM_VK is not set
# CONFIG_MISC_ALCOR_PCI is not set
# CONFIG_MISC_RTSX_PCI is not set
# CONFIG_MISC_RTSX_USB is not set
# CONFIG_UACCE is not set
# CONFIG_PVPANIC is not set
# CONFIG_GP_PCI1XXXX is not set
# CONFIG_KEBA_CP500 is not set
# CONFIG_AMD_SBRMI_I2C is not set
# end of Misc devices
#
# SCSI device support
#
CONFIG_SCSI_MOD=y
# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI_COMMON=y
CONFIG_SCSI=y
CONFIG_SCSI_DMA=y
CONFIG_SCSI_PROC_FS=y
#
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_ST is not set
CONFIG_BLK_DEV_SR=y
CONFIG_CHR_DEV_SG=y
CONFIG_BLK_DEV_BSG=y
# CONFIG_CHR_DEV_SCH is not set
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SCAN_ASYNC=y
#
# SCSI Transports
#
CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_FC_ATTRS is not set
# CONFIG_SCSI_ISCSI_ATTRS is not set
# CONFIG_SCSI_SAS_ATTRS is not set
# CONFIG_SCSI_SAS_LIBSAS is not set
# CONFIG_SCSI_SRP_ATTRS is not set
# end of SCSI Transports
CONFIG_SCSI_LOWLEVEL=y
# CONFIG_ISCSI_TCP is not set
# CONFIG_ISCSI_BOOT_SYSFS is not set
# CONFIG_SCSI_CXGB3_ISCSI is not set
# CONFIG_SCSI_CXGB4_ISCSI is not set
# CONFIG_SCSI_BNX2_ISCSI is not set
# CONFIG_BE2ISCSI is not set
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_HPSA is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_3W_SAS is not set
# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AACRAID is not set
# CONFIG_SCSI_AIC7XXX is not set
# CONFIG_SCSI_AIC79XX is not set
# CONFIG_SCSI_AIC94XX is not set
# CONFIG_SCSI_MVSAS is not set
# CONFIG_SCSI_MVUMI is not set
# CONFIG_SCSI_ADVANSYS is not set
# CONFIG_SCSI_ARCMSR is not set
# CONFIG_SCSI_ESAS2R is not set
CONFIG_MEGARAID_NEWGEN=y
# CONFIG_MEGARAID_MM is not set
# CONFIG_MEGARAID_LEGACY is not set
# CONFIG_MEGARAID_SAS is not set
# CONFIG_SCSI_MPT3SAS is not set
# CONFIG_SCSI_MPT2SAS is not set
# CONFIG_SCSI_MPI3MR is not set
# CONFIG_SCSI_SMARTPQI is not set
# CONFIG_SCSI_HPTIOP is not set
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_SCSI_MYRB is not set
# CONFIG_SCSI_MYRS is not set
# CONFIG_VMWARE_PVSCSI is not set
# CONFIG_XEN_SCSI_FRONTEND is not set
# CONFIG_SCSI_SNIC is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_FDOMAIN_PCI is not set
# CONFIG_SCSI_ISCI is not set
# CONFIG_SCSI_IPS is not set
# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_STEX is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
# CONFIG_SCSI_QLA_ISCSI is not set
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_AM53C974 is not set
# CONFIG_SCSI_WD719X is not set
# CONFIG_SCSI_DEBUG is not set
# CONFIG_SCSI_PMCRAID is not set
# CONFIG_SCSI_PM8001 is not set
# CONFIG_SCSI_VIRTIO is not set
# CONFIG_SCSI_DH is not set
# end of SCSI device support
CONFIG_ATA=y
CONFIG_SATA_HOST=y
CONFIG_PATA_TIMINGS=y
CONFIG_ATA_VERBOSE_ERROR=y
CONFIG_ATA_FORCE=y
CONFIG_ATA_ACPI=y
CONFIG_SATA_ZPODD=y
CONFIG_SATA_PMP=y
#
# Controllers with non-SFF native interface
#
CONFIG_SATA_AHCI=y
CONFIG_SATA_MOBILE_LPM_POLICY=0
CONFIG_SATA_AHCI_PLATFORM=y
# CONFIG_AHCI_DWC is not set
# CONFIG_SATA_INIC162X is not set
CONFIG_SATA_ACARD_AHCI=y
# CONFIG_SATA_SIL24 is not set
CONFIG_ATA_SFF=y
#
# SFF controllers with custom DMA interface
#
# CONFIG_PDC_ADMA is not set
# CONFIG_SATA_QSTOR is not set
# CONFIG_SATA_SX4 is not set
CONFIG_ATA_BMDMA=y
#
# SATA SFF controllers with BMDMA
#
CONFIG_ATA_PIIX=y
# CONFIG_SATA_DWC is not set
# CONFIG_SATA_MV is not set
# CONFIG_SATA_NV is not set
# CONFIG_SATA_PROMISE is not set
# CONFIG_SATA_SIL is not set
# CONFIG_SATA_SIS is not set
# CONFIG_SATA_SVW is not set
# CONFIG_SATA_ULI is not set
# CONFIG_SATA_VIA is not set
# CONFIG_SATA_VITESSE is not set
#
# PATA SFF controllers with BMDMA
#
# CONFIG_PATA_ALI is not set
# CONFIG_PATA_AMD is not set
# CONFIG_PATA_ARTOP is not set
# CONFIG_PATA_ATIIXP is not set
# CONFIG_PATA_ATP867X is not set
# CONFIG_PATA_CMD64X is not set
# CONFIG_PATA_CYPRESS is not set
# CONFIG_PATA_EFAR is not set
# CONFIG_PATA_HPT366 is not set
# CONFIG_PATA_HPT37X is not set
# CONFIG_PATA_HPT3X2N is not set
# CONFIG_PATA_HPT3X3 is not set
# CONFIG_PATA_IT8213 is not set
# CONFIG_PATA_IT821X is not set
# CONFIG_PATA_JMICRON is not set
# CONFIG_PATA_MARVELL is not set
# CONFIG_PATA_NETCELL is not set
# CONFIG_PATA_NINJA32 is not set
# CONFIG_PATA_NS87415 is not set
# CONFIG_PATA_OLDPIIX is not set
# CONFIG_PATA_OPTIDMA is not set
# CONFIG_PATA_PDC2027X is not set
# CONFIG_PATA_PDC_OLD is not set
# CONFIG_PATA_RADISYS is not set
# CONFIG_PATA_RDC is not set
# CONFIG_PATA_SCH is not set
# CONFIG_PATA_SERVERWORKS is not set
# CONFIG_PATA_SIL680 is not set
CONFIG_PATA_SIS=y
# CONFIG_PATA_TOSHIBA is not set
# CONFIG_PATA_TRIFLEX is not set
# CONFIG_PATA_VIA is not set
# CONFIG_PATA_WINBOND is not set
#
# PIO-only SFF controllers
#
# CONFIG_PATA_CMD640_PCI is not set
# CONFIG_PATA_MPIIX is not set
# CONFIG_PATA_NS87410 is not set
# CONFIG_PATA_OPTI is not set
# CONFIG_PATA_RZ1000 is not set
#
# Generic fallback / legacy drivers
#
# CONFIG_PATA_ACPI is not set
CONFIG_ATA_GENERIC=y
# CONFIG_PATA_LEGACY is not set
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_MD_AUTODETECT=y
CONFIG_MD_BITMAP_FILE=y
# CONFIG_MD_LINEAR is not set
# CONFIG_MD_RAID0 is not set
# CONFIG_MD_RAID1 is not set
# CONFIG_MD_RAID10 is not set
# CONFIG_MD_RAID456 is not set
# CONFIG_BCACHE is not set
CONFIG_BLK_DEV_DM_BUILTIN=y
CONFIG_BLK_DEV_DM=y
# CONFIG_DM_DEBUG is not set
CONFIG_DM_BUFIO=m
# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set
CONFIG_DM_BIO_PRISON=m
CONFIG_DM_PERSISTENT_DATA=m
# CONFIG_DM_UNSTRIPED is not set
CONFIG_DM_CRYPT=y
# CONFIG_DM_SNAPSHOT is not set
CONFIG_DM_THIN_PROVISIONING=m
# CONFIG_DM_CACHE is not set
# CONFIG_DM_WRITECACHE is not set
# CONFIG_DM_EBS is not set
# CONFIG_DM_ERA is not set
# CONFIG_DM_CLONE is not set
# CONFIG_DM_MIRROR is not set
# CONFIG_DM_RAID is not set
# CONFIG_DM_ZERO is not set
# CONFIG_DM_MULTIPATH is not set
# CONFIG_DM_DELAY is not set
# CONFIG_DM_DUST is not set
# CONFIG_DM_INIT is not set
CONFIG_DM_UEVENT=y
# CONFIG_DM_FLAKEY is not set
# CONFIG_DM_VERITY is not set
# CONFIG_DM_SWITCH is not set
# CONFIG_DM_LOG_WRITES is not set
# CONFIG_DM_INTEGRITY is not set
# CONFIG_DM_AUDIT is not set
# CONFIG_DM_VDO is not set
# CONFIG_TARGET_CORE is not set
CONFIG_FUSION=y
# CONFIG_FUSION_SPI is not set
# CONFIG_FUSION_SAS is not set
CONFIG_FUSION_MAX_SGE=128
CONFIG_FUSION_LOGGING=y
#
# IEEE 1394 (FireWire) support
#
# CONFIG_FIREWIRE is not set
# CONFIG_FIREWIRE_NOSY is not set
# end of IEEE 1394 (FireWire) support
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_MAC_EMUMOUSEBTN=y
CONFIG_NETDEVICES=y
CONFIG_NET_CORE=y
# CONFIG_BONDING is not set
# CONFIG_DUMMY is not set
# CONFIG_WIREGUARD is not set
# CONFIG_OVPN is not set
# CONFIG_EQUALIZER is not set
CONFIG_NET_FC=y
# CONFIG_IFB is not set
# CONFIG_NET_TEAM is not set
CONFIG_MACVLAN=y
CONFIG_MACVTAP=y
# CONFIG_IPVLAN is not set
# CONFIG_VXLAN is not set
# CONFIG_GENEVE is not set
# CONFIG_BAREUDP is not set
# CONFIG_GTP is not set
# CONFIG_PFCP is not set
# CONFIG_AMT is not set
# CONFIG_MACSEC is not set
# CONFIG_NETCONSOLE is not set
# CONFIG_RIONET is not set
CONFIG_TUN=y
CONFIG_TAP=y
# CONFIG_TUN_VNET_CROSS_LE is not set
CONFIG_VETH=y
CONFIG_VIRTIO_NET=y
# CONFIG_NLMON is not set
# CONFIG_NETKIT is not set
CONFIG_NET_VRF=y
# CONFIG_ARCNET is not set
# CONFIG_CAIF_DRIVERS is not set
CONFIG_ETHERNET=y
CONFIG_MDIO=y
CONFIG_NET_VENDOR_3COM=y
# CONFIG_VORTEX is not set
# CONFIG_TYPHOON is not set
CONFIG_NET_VENDOR_ADAPTEC=y
# CONFIG_ADAPTEC_STARFIRE is not set
CONFIG_NET_VENDOR_AGERE=y
# CONFIG_ET131X is not set
CONFIG_NET_VENDOR_ALACRITECH=y
# CONFIG_SLICOSS is not set
CONFIG_NET_VENDOR_ALTEON=y
# CONFIG_ACENIC is not set
# CONFIG_ALTERA_TSE is not set
CONFIG_NET_VENDOR_AMAZON=y
# CONFIG_ENA_ETHERNET is not set
CONFIG_NET_VENDOR_AMD=y
# CONFIG_AMD8111_ETH is not set
# CONFIG_PCNET32 is not set
# CONFIG_AMD_XGBE is not set
# CONFIG_PDS_CORE is not set
CONFIG_NET_VENDOR_AQUANTIA=y
# CONFIG_AQTION is not set
CONFIG_NET_VENDOR_ARC=y
CONFIG_NET_VENDOR_ASIX=y
# CONFIG_SPI_AX88796C is not set
CONFIG_NET_VENDOR_ATHEROS=y
# CONFIG_ATL2 is not set
# CONFIG_ATL1 is not set
# CONFIG_ATL1E is not set
# CONFIG_ATL1C is not set
# CONFIG_ALX is not set
# CONFIG_CX_ECAT is not set
CONFIG_NET_VENDOR_BROADCOM=y
# CONFIG_B44 is not set
# CONFIG_BCMGENET is not set
# CONFIG_BNX2 is not set
# CONFIG_CNIC is not set
# CONFIG_TIGON3 is not set
# CONFIG_BNX2X is not set
# CONFIG_SYSTEMPORT is not set
# CONFIG_BNXT is not set
CONFIG_NET_VENDOR_CADENCE=y
# CONFIG_MACB is not set
CONFIG_NET_VENDOR_CAVIUM=y
# CONFIG_THUNDER_NIC_PF is not set
# CONFIG_THUNDER_NIC_VF is not set
# CONFIG_THUNDER_NIC_BGX is not set
# CONFIG_THUNDER_NIC_RGX is not set
CONFIG_CAVIUM_PTP=y
# CONFIG_LIQUIDIO is not set
# CONFIG_LIQUIDIO_VF is not set
CONFIG_NET_VENDOR_CHELSIO=y
# CONFIG_CHELSIO_T1 is not set
# CONFIG_CHELSIO_T3 is not set
# CONFIG_CHELSIO_T4 is not set
# CONFIG_CHELSIO_T4VF is not set
CONFIG_NET_VENDOR_CISCO=y
# CONFIG_ENIC is not set
CONFIG_NET_VENDOR_CORTINA=y
CONFIG_NET_VENDOR_DAVICOM=y
# CONFIG_DM9051 is not set
# CONFIG_DNET is not set
CONFIG_NET_VENDOR_DEC=y
CONFIG_NET_TULIP=y
# CONFIG_DE2104X is not set
# CONFIG_TULIP is not set
# CONFIG_WINBOND_840 is not set
# CONFIG_DM9102 is not set
# CONFIG_ULI526X is not set
CONFIG_NET_VENDOR_DLINK=y
# CONFIG_DL2K is not set
CONFIG_NET_VENDOR_EMULEX=y
# CONFIG_BE2NET is not set
CONFIG_NET_VENDOR_ENGLEDER=y
# CONFIG_TSNEP is not set
CONFIG_NET_VENDOR_EZCHIP=y
CONFIG_NET_VENDOR_FUNGIBLE=y
# CONFIG_FUN_ETH is not set
CONFIG_NET_VENDOR_GOOGLE=y
# CONFIG_GVE is not set
CONFIG_NET_VENDOR_HISILICON=y
# CONFIG_HIBMCGE is not set
CONFIG_NET_VENDOR_HUAWEI=y
# CONFIG_HINIC is not set
# CONFIG_HINIC3 is not set
CONFIG_NET_VENDOR_I825XX=y
CONFIG_NET_VENDOR_INTEL=y
# CONFIG_E100 is not set
# CONFIG_E1000 is not set
# CONFIG_E1000E is not set
CONFIG_IGB=y
CONFIG_IGB_HWMON=y
CONFIG_IGB_DCA=y
# CONFIG_IGBVF is not set
CONFIG_IXGBE=y
CONFIG_IXGBE_HWMON=y
CONFIG_IXGBE_DCA=y
CONFIG_IXGBE_DCB=y
# CONFIG_IXGBEVF is not set
# CONFIG_I40E is not set
# CONFIG_I40EVF is not set
# CONFIG_ICE is not set
# CONFIG_FM10K is not set
# CONFIG_IGC is not set
# CONFIG_IDPF is not set
# CONFIG_JME is not set
CONFIG_NET_VENDOR_ADI=y
CONFIG_NET_VENDOR_LITEX=y
CONFIG_NET_VENDOR_MARVELL=y
# CONFIG_MVMDIO is not set
# CONFIG_SKGE is not set
# CONFIG_SKY2 is not set
# CONFIG_OCTEON_EP is not set
# CONFIG_OCTEON_EP_VF is not set
CONFIG_NET_VENDOR_MELLANOX=y
# CONFIG_MLX4_EN is not set
# CONFIG_MLX5_CORE is not set
# CONFIG_MLXSW_CORE is not set
# CONFIG_MLXFW is not set
CONFIG_NET_VENDOR_META=y
# CONFIG_FBNIC is not set
CONFIG_NET_VENDOR_MICREL=y
# CONFIG_KS8842 is not set
# CONFIG_KS8851 is not set
# CONFIG_KS8851_MLL is not set
# CONFIG_KSZ884X_PCI is not set
CONFIG_NET_VENDOR_MICROCHIP=y
# CONFIG_ENC28J60 is not set
# CONFIG_ENCX24J600 is not set
# CONFIG_LAN743X is not set
# CONFIG_LAN865X is not set
# CONFIG_VCAP is not set
CONFIG_NET_VENDOR_MICROSEMI=y
CONFIG_NET_VENDOR_MICROSOFT=y
CONFIG_NET_VENDOR_MYRI=y
# CONFIG_MYRI10GE is not set
# CONFIG_FEALNX is not set
CONFIG_NET_VENDOR_NI=y
# CONFIG_NI_XGE_MANAGEMENT_ENET is not set
CONFIG_NET_VENDOR_NATSEMI=y
# CONFIG_NATSEMI is not set
# CONFIG_NS83820 is not set
CONFIG_NET_VENDOR_NETERION=y
# CONFIG_S2IO is not set
CONFIG_NET_VENDOR_NETRONOME=y
# CONFIG_NFP is not set
CONFIG_NET_VENDOR_8390=y
# CONFIG_NE2K_PCI is not set
CONFIG_NET_VENDOR_NVIDIA=y
# CONFIG_FORCEDETH is not set
CONFIG_NET_VENDOR_OKI=y
# CONFIG_ETHOC is not set
CONFIG_NET_VENDOR_PACKET_ENGINES=y
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
CONFIG_NET_VENDOR_PENSANDO=y
# CONFIG_IONIC is not set
CONFIG_NET_VENDOR_QLOGIC=y
# CONFIG_QLA3XXX is not set
# CONFIG_QLCNIC is not set
# CONFIG_NETXEN_NIC is not set
# CONFIG_QED is not set
CONFIG_NET_VENDOR_BROCADE=y
# CONFIG_BNA is not set
CONFIG_NET_VENDOR_QUALCOMM=y
# CONFIG_QCOM_EMAC is not set
# CONFIG_RMNET is not set
CONFIG_NET_VENDOR_RDC=y
# CONFIG_R6040 is not set
CONFIG_NET_VENDOR_REALTEK=y
# CONFIG_ATP is not set
# CONFIG_8139CP is not set
# CONFIG_8139TOO is not set
# CONFIG_R8169 is not set
# CONFIG_RTASE is not set
CONFIG_NET_VENDOR_RENESAS=y
CONFIG_NET_VENDOR_ROCKER=y
CONFIG_NET_VENDOR_SAMSUNG=y
# CONFIG_SXGBE_ETH is not set
CONFIG_NET_VENDOR_SEEQ=y
CONFIG_NET_VENDOR_SILAN=y
# CONFIG_SC92031 is not set
CONFIG_NET_VENDOR_SIS=y
# CONFIG_SIS900 is not set
# CONFIG_SIS190 is not set
CONFIG_NET_VENDOR_SOLARFLARE=y
# CONFIG_SFC is not set
# CONFIG_SFC_FALCON is not set
# CONFIG_SFC_SIENA is not set
CONFIG_NET_VENDOR_SMSC=y
# CONFIG_EPIC100 is not set
# CONFIG_SMSC911X is not set
# CONFIG_SMSC9420 is not set
CONFIG_NET_VENDOR_SOCIONEXT=y
CONFIG_NET_VENDOR_STMICRO=y
# CONFIG_STMMAC_ETH is not set
CONFIG_NET_VENDOR_SUN=y
# CONFIG_HAPPYMEAL is not set
# CONFIG_SUNGEM is not set
# CONFIG_CASSINI is not set
# CONFIG_NIU is not set
CONFIG_NET_VENDOR_SYNOPSYS=y
# CONFIG_DWC_XLGMAC is not set
CONFIG_NET_VENDOR_TEHUTI=y
# CONFIG_TEHUTI is not set
# CONFIG_TEHUTI_TN40 is not set
CONFIG_NET_VENDOR_TI=y
# CONFIG_TI_CPSW_PHY_SEL is not set
# CONFIG_TLAN is not set
CONFIG_NET_VENDOR_VERTEXCOM=y
# CONFIG_MSE102X is not set
CONFIG_NET_VENDOR_VIA=y
# CONFIG_VIA_RHINE is not set
# CONFIG_VIA_VELOCITY is not set
CONFIG_NET_VENDOR_WANGXUN=y
# CONFIG_NGBE is not set
CONFIG_NET_VENDOR_WIZNET=y
# CONFIG_WIZNET_W5100 is not set
# CONFIG_WIZNET_W5300 is not set
CONFIG_NET_VENDOR_XILINX=y
# CONFIG_XILINX_EMACLITE is not set
# CONFIG_XILINX_LL_TEMAC is not set
CONFIG_FDDI=y
# CONFIG_DEFXX is not set
# CONFIG_SKFP is not set
# CONFIG_HIPPI is not set
CONFIG_PHYLIB=y
CONFIG_SWPHY=y
# CONFIG_LED_TRIGGER_PHY is not set
CONFIG_FIXED_PHY=y
#
# MII PHY device drivers
#
# CONFIG_AS21XXX_PHY is not set
# CONFIG_AIR_EN8811H_PHY is not set
CONFIG_AMD_PHY=y
# CONFIG_ADIN_PHY is not set
# CONFIG_ADIN1100_PHY is not set
# CONFIG_AQUANTIA_PHY is not set
# CONFIG_AX88796B_PHY is not set
CONFIG_BROADCOM_PHY=y
# CONFIG_BCM54140_PHY is not set
# CONFIG_BCM7XXX_PHY is not set
# CONFIG_BCM84881_PHY is not set
CONFIG_BCM87XX_PHY=y
CONFIG_BCM_NET_PHYLIB=y
CONFIG_CICADA_PHY=y
# CONFIG_CORTINA_PHY is not set
CONFIG_DAVICOM_PHY=y
CONFIG_ICPLUS_PHY=y
CONFIG_LXT_PHY=y
# CONFIG_INTEL_XWAY_PHY is not set
CONFIG_LSI_ET1011C_PHY=y
CONFIG_MARVELL_PHY=y
# CONFIG_MARVELL_10G_PHY is not set
# CONFIG_MARVELL_88Q2XXX_PHY is not set
# CONFIG_MARVELL_88X2222_PHY is not set
# CONFIG_MAXLINEAR_GPHY is not set
# CONFIG_MAXLINEAR_86110_PHY is not set
# CONFIG_MEDIATEK_GE_PHY is not set
CONFIG_MICREL_PHY=y
# CONFIG_MICROCHIP_T1S_PHY is not set
# CONFIG_MICROCHIP_PHY is not set
# CONFIG_MICROCHIP_T1_PHY is not set
# CONFIG_MICROSEMI_PHY is not set
# CONFIG_MOTORCOMM_PHY is not set
CONFIG_NATIONAL_PHY=y
# CONFIG_NXP_CBTX_PHY is not set
# CONFIG_NXP_C45_TJA11XX_PHY is not set
# CONFIG_NXP_TJA11XX_PHY is not set
# CONFIG_NCN26000_PHY is not set
CONFIG_QCOM_NET_PHYLIB=y
CONFIG_AT803X_PHY=y
# CONFIG_QCA83XX_PHY is not set
# CONFIG_QCA808X_PHY is not set
CONFIG_QSEMI_PHY=y
CONFIG_REALTEK_PHY=y
CONFIG_REALTEK_PHY_HWMON=y
# CONFIG_RENESAS_PHY is not set
# CONFIG_ROCKCHIP_PHY is not set
CONFIG_SMSC_PHY=y
CONFIG_STE10XP=y
# CONFIG_TERANETICS_PHY is not set
# CONFIG_DP83822_PHY is not set
# CONFIG_DP83TC811_PHY is not set
# CONFIG_DP83848_PHY is not set
# CONFIG_DP83867_PHY is not set
# CONFIG_DP83869_PHY is not set
# CONFIG_DP83TD510_PHY is not set
# CONFIG_DP83TG720_PHY is not set
CONFIG_VITESSE_PHY=y
# CONFIG_XILINX_GMII2RGMII is not set
# CONFIG_MICREL_KS8995MA is not set
# CONFIG_PSE_CONTROLLER is not set
CONFIG_MDIO_BUS=y
CONFIG_FWNODE_MDIO=y
CONFIG_ACPI_MDIO=y
CONFIG_MDIO_BITBANG=y
# CONFIG_MDIO_BCM_UNIMAC is not set
CONFIG_MDIO_GPIO=y
# CONFIG_MDIO_MVUSB is not set
# CONFIG_MDIO_MSCC_MIIM is not set
# CONFIG_MDIO_THUNDER is not set
#
# MDIO Multiplexers
#
#
# PCS device drivers
#
# CONFIG_PCS_XPCS is not set
# end of PCS device drivers
# CONFIG_PLIP is not set
CONFIG_PPP=y
# CONFIG_PPP_BSDCOMP is not set
# CONFIG_PPP_DEFLATE is not set
CONFIG_PPP_FILTER=y
# CONFIG_PPP_MPPE is not set
CONFIG_PPP_MULTILINK=y
# CONFIG_PPPOE is not set
CONFIG_PPPOE_HASH_BITS=4
# CONFIG_PPP_ASYNC is not set
# CONFIG_PPP_SYNC_TTY is not set
# CONFIG_SLIP is not set
CONFIG_SLHC=y
CONFIG_USB_NET_DRIVERS=y
# CONFIG_USB_CATC is not set
# CONFIG_USB_KAWETH is not set
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_RTL8152 is not set
# CONFIG_USB_LAN78XX is not set
# CONFIG_USB_USBNET is not set
# CONFIG_USB_HSO is not set
# CONFIG_USB_IPHETH is not set
CONFIG_WLAN=y
CONFIG_WLAN_VENDOR_ADMTEK=y
CONFIG_WLAN_VENDOR_ATH=y
# CONFIG_ATH_DEBUG is not set
# CONFIG_ATH5K_PCI is not set
CONFIG_WLAN_VENDOR_ATMEL=y
CONFIG_WLAN_VENDOR_BROADCOM=y
CONFIG_WLAN_VENDOR_INTEL=y
CONFIG_WLAN_VENDOR_INTERSIL=y
CONFIG_WLAN_VENDOR_MARVELL=y
CONFIG_WLAN_VENDOR_MEDIATEK=y
CONFIG_WLAN_VENDOR_MICROCHIP=y
CONFIG_WLAN_VENDOR_PURELIFI=y
CONFIG_WLAN_VENDOR_RALINK=y
CONFIG_WLAN_VENDOR_REALTEK=y
CONFIG_WLAN_VENDOR_RSI=y
CONFIG_WLAN_VENDOR_SILABS=y
CONFIG_WLAN_VENDOR_ST=y
CONFIG_WLAN_VENDOR_TI=y
CONFIG_WLAN_VENDOR_ZYDAS=y
CONFIG_WLAN_VENDOR_QUANTENNA=y
CONFIG_WAN=y
# CONFIG_HDLC is not set
# CONFIG_FRAMER is not set
#
# Wireless WAN
#
# CONFIG_WWAN is not set
# end of Wireless WAN
CONFIG_XEN_NETDEV_FRONTEND=y
# CONFIG_XEN_NETDEV_BACKEND is not set
# CONFIG_VMXNET3 is not set
# CONFIG_FUJITSU_ES is not set
# CONFIG_NETDEVSIM is not set
CONFIG_NET_FAILOVER=y
CONFIG_ISDN=y
# CONFIG_MISDN is not set
#
# Input device support
#
CONFIG_INPUT=y
CONFIG_INPUT_LEDS=y
# CONFIG_INPUT_FF_MEMLESS is not set
# CONFIG_INPUT_SPARSEKMAP is not set
# CONFIG_INPUT_MATRIXKMAP is not set
CONFIG_INPUT_VIVALDIFMAP=y
#
# Userland interfaces
#
CONFIG_INPUT_MOUSEDEV=y
CONFIG_INPUT_MOUSEDEV_PSAUX=y
CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
CONFIG_INPUT_JOYDEV=y
CONFIG_INPUT_EVDEV=y
#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
# CONFIG_KEYBOARD_ADP5520 is not set
# CONFIG_KEYBOARD_ADP5588 is not set
# CONFIG_KEYBOARD_ADP5589 is not set
# CONFIG_KEYBOARD_APPLESPI is not set
CONFIG_KEYBOARD_ATKBD=y
# CONFIG_KEYBOARD_QT1050 is not set
# CONFIG_KEYBOARD_QT1070 is not set
# CONFIG_KEYBOARD_QT2160 is not set
# CONFIG_KEYBOARD_DLINK_DIR685 is not set
# CONFIG_KEYBOARD_LKKBD is not set
# CONFIG_KEYBOARD_GPIO is not set
# CONFIG_KEYBOARD_GPIO_POLLED is not set
# CONFIG_KEYBOARD_TCA6416 is not set
# CONFIG_KEYBOARD_TCA8418 is not set
# CONFIG_KEYBOARD_MATRIX is not set
# CONFIG_KEYBOARD_LM8323 is not set
# CONFIG_KEYBOARD_LM8333 is not set
# CONFIG_KEYBOARD_MAX7359 is not set
# CONFIG_KEYBOARD_MPR121 is not set
# CONFIG_KEYBOARD_NEWTON is not set
# CONFIG_KEYBOARD_OPENCORES is not set
# CONFIG_KEYBOARD_PINEPHONE is not set
# CONFIG_KEYBOARD_SAMSUNG is not set
# CONFIG_KEYBOARD_STOWAWAY is not set
# CONFIG_KEYBOARD_SUNKBD is not set
# CONFIG_KEYBOARD_TM2_TOUCHKEY is not set
# CONFIG_KEYBOARD_XTKBD is not set
# CONFIG_KEYBOARD_CYPRESS_SF is not set
CONFIG_INPUT_MOUSE=y
# CONFIG_MOUSE_PS2 is not set
# CONFIG_MOUSE_SERIAL is not set
# CONFIG_MOUSE_APPLETOUCH is not set
# CONFIG_MOUSE_BCM5974 is not set
# CONFIG_MOUSE_CYAPA is not set
# CONFIG_MOUSE_ELAN_I2C is not set
# CONFIG_MOUSE_VSXXXAA is not set
# CONFIG_MOUSE_GPIO is not set
# CONFIG_MOUSE_SYNAPTICS_I2C is not set
# CONFIG_MOUSE_SYNAPTICS_USB is not set
CONFIG_INPUT_JOYSTICK=y
# CONFIG_JOYSTICK_ANALOG is not set
# CONFIG_JOYSTICK_A3D is not set
# CONFIG_JOYSTICK_ADI is not set
# CONFIG_JOYSTICK_COBRA is not set
# CONFIG_JOYSTICK_GF2K is not set
# CONFIG_JOYSTICK_GRIP is not set
# CONFIG_JOYSTICK_GRIP_MP is not set
# CONFIG_JOYSTICK_GUILLEMOT is not set
# CONFIG_JOYSTICK_INTERACT is not set
# CONFIG_JOYSTICK_SIDEWINDER is not set
# CONFIG_JOYSTICK_TMDC is not set
# CONFIG_JOYSTICK_IFORCE is not set
# CONFIG_JOYSTICK_WARRIOR is not set
# CONFIG_JOYSTICK_MAGELLAN is not set
# CONFIG_JOYSTICK_SPACEORB is not set
# CONFIG_JOYSTICK_SPACEBALL is not set
# CONFIG_JOYSTICK_STINGER is not set
# CONFIG_JOYSTICK_TWIDJOY is not set
# CONFIG_JOYSTICK_ZHENHUA is not set
# CONFIG_JOYSTICK_DB9 is not set
# CONFIG_JOYSTICK_GAMECON is not set
# CONFIG_JOYSTICK_TURBOGRAFX is not set
# CONFIG_JOYSTICK_AS5011 is not set
# CONFIG_JOYSTICK_JOYDUMP is not set
# CONFIG_JOYSTICK_XPAD is not set
# CONFIG_JOYSTICK_WALKERA0701 is not set
# CONFIG_JOYSTICK_PSXPAD_SPI is not set
# CONFIG_JOYSTICK_PXRC is not set
# CONFIG_JOYSTICK_QWIIC is not set
# CONFIG_JOYSTICK_FSIA6B is not set
# CONFIG_JOYSTICK_SENSEHAT is not set
# CONFIG_JOYSTICK_SEESAW is not set
CONFIG_INPUT_TABLET=y
# CONFIG_TABLET_USB_ACECAD is not set
# CONFIG_TABLET_USB_AIPTEK is not set
# CONFIG_TABLET_USB_HANWANG is not set
# CONFIG_TABLET_USB_KBTAB is not set
# CONFIG_TABLET_USB_PEGASUS is not set
# CONFIG_TABLET_SERIAL_WACOM4 is not set
CONFIG_INPUT_TOUCHSCREEN=y
# CONFIG_TOUCHSCREEN_88PM860X is not set
# CONFIG_TOUCHSCREEN_ADS7846 is not set
# CONFIG_TOUCHSCREEN_AD7877 is not set
# CONFIG_TOUCHSCREEN_AD7879 is not set
# CONFIG_TOUCHSCREEN_ATMEL_MXT is not set
# CONFIG_TOUCHSCREEN_AUO_PIXCIR is not set
# CONFIG_TOUCHSCREEN_BU21013 is not set
# CONFIG_TOUCHSCREEN_BU21029 is not set
# CONFIG_TOUCHSCREEN_CHIPONE_ICN8505 is not set
# CONFIG_TOUCHSCREEN_CY8CTMA140 is not set
# CONFIG_TOUCHSCREEN_CY8CTMG110 is not set
# CONFIG_TOUCHSCREEN_CYTTSP_CORE is not set
# CONFIG_TOUCHSCREEN_CYTTSP5 is not set
# CONFIG_TOUCHSCREEN_DA9034 is not set
# CONFIG_TOUCHSCREEN_DA9052 is not set
# CONFIG_TOUCHSCREEN_DYNAPRO is not set
# CONFIG_TOUCHSCREEN_HAMPSHIRE is not set
# CONFIG_TOUCHSCREEN_EETI is not set
# CONFIG_TOUCHSCREEN_EGALAX_SERIAL is not set
# CONFIG_TOUCHSCREEN_EXC3000 is not set
# CONFIG_TOUCHSCREEN_FUJITSU is not set
# CONFIG_TOUCHSCREEN_GOODIX is not set
# CONFIG_TOUCHSCREEN_GOODIX_BERLIN_I2C is not set
# CONFIG_TOUCHSCREEN_GOODIX_BERLIN_SPI is not set
# CONFIG_TOUCHSCREEN_HIDEEP is not set
# CONFIG_TOUCHSCREEN_HYCON_HY46XX is not set
# CONFIG_TOUCHSCREEN_HYNITRON_CSTXXX is not set
# CONFIG_TOUCHSCREEN_ILI210X is not set
# CONFIG_TOUCHSCREEN_ILITEK is not set
# CONFIG_TOUCHSCREEN_S6SY761 is not set
# CONFIG_TOUCHSCREEN_GUNZE is not set
# CONFIG_TOUCHSCREEN_EKTF2127 is not set
# CONFIG_TOUCHSCREEN_ELAN is not set
# CONFIG_TOUCHSCREEN_ELO is not set
# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
# CONFIG_TOUCHSCREEN_WACOM_I2C is not set
# CONFIG_TOUCHSCREEN_MAX11801 is not set
# CONFIG_TOUCHSCREEN_MMS114 is not set
# CONFIG_TOUCHSCREEN_MELFAS_MIP4 is not set
# CONFIG_TOUCHSCREEN_MSG2638 is not set
# CONFIG_TOUCHSCREEN_MTOUCH is not set
# CONFIG_TOUCHSCREEN_NOVATEK_NVT_TS is not set
# CONFIG_TOUCHSCREEN_IMAGIS is not set
# CONFIG_TOUCHSCREEN_INEXIO is not set
# CONFIG_TOUCHSCREEN_PENMOUNT is not set
# CONFIG_TOUCHSCREEN_EDT_FT5X06 is not set
# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
# CONFIG_TOUCHSCREEN_PIXCIR is not set
# CONFIG_TOUCHSCREEN_WDT87XX_I2C is not set
# CONFIG_TOUCHSCREEN_WM831X is not set
# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
# CONFIG_TOUCHSCREEN_TSC_SERIO is not set
# CONFIG_TOUCHSCREEN_TSC2004 is not set
# CONFIG_TOUCHSCREEN_TSC2005 is not set
# CONFIG_TOUCHSCREEN_TSC2007 is not set
# CONFIG_TOUCHSCREEN_PCAP is not set
# CONFIG_TOUCHSCREEN_RM_TS is not set
# CONFIG_TOUCHSCREEN_SILEAD is not set
# CONFIG_TOUCHSCREEN_SIS_I2C is not set
# CONFIG_TOUCHSCREEN_ST1232 is not set
# CONFIG_TOUCHSCREEN_STMFTS is not set
# CONFIG_TOUCHSCREEN_SURFACE3_SPI is not set
# CONFIG_TOUCHSCREEN_SX8654 is not set
# CONFIG_TOUCHSCREEN_TPS6507X is not set
# CONFIG_TOUCHSCREEN_ZET6223 is not set
# CONFIG_TOUCHSCREEN_ZFORCE is not set
# CONFIG_TOUCHSCREEN_ROHM_BU21023 is not set
# CONFIG_TOUCHSCREEN_IQS5XX is not set
# CONFIG_TOUCHSCREEN_IQS7211 is not set
# CONFIG_TOUCHSCREEN_ZINITIX is not set
# CONFIG_TOUCHSCREEN_HIMAX_HX83112B is not set
CONFIG_INPUT_MISC=y
# CONFIG_INPUT_88PM860X_ONKEY is not set
# CONFIG_INPUT_AD714X is not set
# CONFIG_INPUT_BMA150 is not set
# CONFIG_INPUT_E3X0_BUTTON is not set
# CONFIG_INPUT_PCSPKR is not set
# CONFIG_INPUT_MAX77693_HAPTIC is not set
# CONFIG_INPUT_MAX8925_ONKEY is not set
# CONFIG_INPUT_MAX8997_HAPTIC is not set
# CONFIG_INPUT_MMA8450 is not set
# CONFIG_INPUT_APANEL is not set
# CONFIG_INPUT_GPIO_BEEPER is not set
# CONFIG_INPUT_GPIO_DECODER is not set
# CONFIG_INPUT_GPIO_VIBRA is not set
# CONFIG_INPUT_ATLAS_BTNS is not set
# CONFIG_INPUT_ATI_REMOTE2 is not set
# CONFIG_INPUT_KEYSPAN_REMOTE is not set
# CONFIG_INPUT_KXTJ9 is not set
# CONFIG_INPUT_POWERMATE is not set
# CONFIG_INPUT_YEALINK is not set
# CONFIG_INPUT_CM109 is not set
# CONFIG_INPUT_REGULATOR_HAPTIC is not set
# CONFIG_INPUT_TWL6040_VIBRA is not set
CONFIG_INPUT_UINPUT=y
# CONFIG_INPUT_PALMAS_PWRBUTTON is not set
# CONFIG_INPUT_PCF8574 is not set
# CONFIG_INPUT_PWM_BEEPER is not set
# CONFIG_INPUT_PWM_VIBRA is not set
# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set
# CONFIG_INPUT_DA7280_HAPTICS is not set
# CONFIG_INPUT_DA9052_ONKEY is not set
# CONFIG_INPUT_DA9055_ONKEY is not set
# CONFIG_INPUT_DA9063_ONKEY is not set
# CONFIG_INPUT_WM831X_ON is not set
# CONFIG_INPUT_PCAP is not set
# CONFIG_INPUT_ADXL34X is not set
# CONFIG_INPUT_IMS_PCU is not set
# CONFIG_INPUT_IQS269A is not set
# CONFIG_INPUT_IQS626A is not set
# CONFIG_INPUT_IQS7222 is not set
# CONFIG_INPUT_CMA3000 is not set
# CONFIG_INPUT_XEN_KBDDEV_FRONTEND is not set
# CONFIG_INPUT_IDEAPAD_SLIDEBAR is not set
# CONFIG_INPUT_DRV260X_HAPTICS is not set
# CONFIG_INPUT_DRV2665_HAPTICS is not set
# CONFIG_INPUT_DRV2667_HAPTICS is not set
# CONFIG_RMI4_CORE is not set
#
# Hardware I/O ports
#
CONFIG_SERIO=y
CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
CONFIG_SERIO_I8042=y
# CONFIG_SERIO_SERPORT is not set
# CONFIG_SERIO_CT82C710 is not set
# CONFIG_SERIO_PARKBD is not set
# CONFIG_SERIO_PCIPS2 is not set
CONFIG_SERIO_LIBPS2=y
# CONFIG_SERIO_RAW is not set
# CONFIG_SERIO_ALTERA_PS2 is not set
# CONFIG_SERIO_PS2MULT is not set
# CONFIG_SERIO_ARC_PS2 is not set
# CONFIG_SERIO_GPIO_PS2 is not set
# CONFIG_USERIO is not set
# CONFIG_GAMEPORT is not set
# end of Hardware I/O ports
# end of Input device support
#
# Character devices
#
CONFIG_TTY=y
CONFIG_VT=y
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_VT_CONSOLE=y
CONFIG_VT_CONSOLE_SLEEP=y
CONFIG_VT_HW_CONSOLE_BINDING=y
CONFIG_UNIX98_PTYS=y
CONFIG_LEGACY_PTYS=y
CONFIG_LEGACY_PTY_COUNT=0
CONFIG_LEGACY_TIOCSTI=y
CONFIG_LDISC_AUTOLOAD=y
#
# Serial drivers
#
CONFIG_SERIAL_EARLYCON=y
CONFIG_SERIAL_8250=y
# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
CONFIG_SERIAL_8250_PNP=y
# CONFIG_SERIAL_8250_16550A_VARIANTS is not set
# CONFIG_SERIAL_8250_FINTEK is not set
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_DMA=y
CONFIG_SERIAL_8250_PCILIB=y
CONFIG_SERIAL_8250_PCI=y
CONFIG_SERIAL_8250_EXAR=y
CONFIG_SERIAL_8250_NR_UARTS=48
CONFIG_SERIAL_8250_RUNTIME_UARTS=32
CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_MANY_PORTS=y
# CONFIG_SERIAL_8250_PCI1XXXX is not set
CONFIG_SERIAL_8250_SHARE_IRQ=y
# CONFIG_SERIAL_8250_DETECT_IRQ is not set
CONFIG_SERIAL_8250_RSA=y
CONFIG_SERIAL_8250_DWLIB=y
# CONFIG_SERIAL_8250_DW is not set
# CONFIG_SERIAL_8250_RT288X is not set
CONFIG_SERIAL_8250_LPSS=y
# CONFIG_SERIAL_8250_MID is not set
CONFIG_SERIAL_8250_PERICOM=y
# CONFIG_SERIAL_8250_NI is not set
#
# Non-8250 serial port support
#
# CONFIG_SERIAL_MAX3100 is not set
CONFIG_SERIAL_MAX310X=y
# CONFIG_SERIAL_UARTLITE is not set
CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
CONFIG_CONSOLE_POLL=y
# CONFIG_SERIAL_JSM is not set
# CONFIG_SERIAL_LANTIQ is not set
CONFIG_SERIAL_SCCNXP=y
CONFIG_SERIAL_SCCNXP_CONSOLE=y
# CONFIG_SERIAL_SC16IS7XX is not set
# CONFIG_SERIAL_ALTERA_JTAGUART is not set
# CONFIG_SERIAL_ALTERA_UART is not set
# CONFIG_SERIAL_ARC is not set
# CONFIG_SERIAL_RP2 is not set
# CONFIG_SERIAL_FSL_LPUART is not set
# CONFIG_SERIAL_FSL_LINFLEXUART is not set
# CONFIG_SERIAL_SPRD is not set
# end of Serial drivers
CONFIG_SERIAL_MCTRL_GPIO=y
CONFIG_SERIAL_NONSTANDARD=y
# CONFIG_MOXA_INTELLIO is not set
# CONFIG_MOXA_SMARTIO is not set
# CONFIG_N_HDLC is not set
# CONFIG_N_GSM is not set
# CONFIG_NOZOMI is not set
# CONFIG_NULL_TTY is not set
CONFIG_HVC_DRIVER=y
CONFIG_HVC_IRQ=y
CONFIG_HVC_XEN=y
CONFIG_HVC_XEN_FRONTEND=y
# CONFIG_SERIAL_DEV_BUS is not set
CONFIG_TTY_PRINTK=y
CONFIG_TTY_PRINTK_LEVEL=6
CONFIG_PRINTER=y
# CONFIG_LP_CONSOLE is not set
# CONFIG_PPDEV is not set
CONFIG_VIRTIO_CONSOLE=y
CONFIG_IPMI_HANDLER=y
CONFIG_IPMI_DMI_DECODE=y
CONFIG_IPMI_PLAT_DATA=y
# CONFIG_IPMI_PANIC_EVENT is not set
# CONFIG_IPMI_DEVICE_INTERFACE is not set
CONFIG_IPMI_SI=y
# CONFIG_IPMI_SSIF is not set
# CONFIG_IPMI_WATCHDOG is not set
# CONFIG_IPMI_POWEROFF is not set
CONFIG_HW_RANDOM=y
# CONFIG_HW_RANDOM_TIMERIOMEM is not set
# CONFIG_HW_RANDOM_INTEL is not set
# CONFIG_HW_RANDOM_AMD is not set
# CONFIG_HW_RANDOM_BA431 is not set
# CONFIG_HW_RANDOM_VIA is not set
# CONFIG_HW_RANDOM_VIRTIO is not set
# CONFIG_HW_RANDOM_XIPHERA is not set
# CONFIG_APPLICOM is not set
# CONFIG_MWAVE is not set
CONFIG_DEVMEM=y
# CONFIG_NVRAM is not set
CONFIG_DEVPORT=y
CONFIG_HPET=y
CONFIG_HPET_MMAP=y
CONFIG_HPET_MMAP_DEFAULT=y
# CONFIG_HANGCHECK_TIMER is not set
CONFIG_TCG_TPM=y
CONFIG_TCG_TPM2_HMAC=y
# CONFIG_HW_RANDOM_TPM is not set
CONFIG_TCG_TIS_CORE=y
CONFIG_TCG_TIS=y
# CONFIG_TCG_TIS_SPI is not set
# CONFIG_TCG_TIS_I2C is not set
# CONFIG_TCG_TIS_I2C_CR50 is not set
# CONFIG_TCG_TIS_I2C_ATMEL is not set
# CONFIG_TCG_TIS_I2C_INFINEON is not set
# CONFIG_TCG_TIS_I2C_NUVOTON is not set
# CONFIG_TCG_NSC is not set
# CONFIG_TCG_ATMEL is not set
# CONFIG_TCG_INFINEON is not set
# CONFIG_TCG_XEN is not set
CONFIG_TCG_CRB=y
# CONFIG_TCG_VTPM_PROXY is not set
# CONFIG_TCG_TIS_ST33ZP24_I2C is not set
# CONFIG_TCG_TIS_ST33ZP24_SPI is not set
# CONFIG_TELCLOCK is not set
# CONFIG_XILLYBUS is not set
# CONFIG_XILLYUSB is not set
# end of Character devices
#
# I2C support
#
CONFIG_I2C=y
CONFIG_ACPI_I2C_OPREGION=y
CONFIG_I2C_BOARDINFO=y
# CONFIG_I2C_CHARDEV is not set
# CONFIG_I2C_MUX is not set
CONFIG_I2C_HELPER_AUTO=y
CONFIG_I2C_ALGOBIT=y
#
# I2C Hardware Bus support
#
#
# PC SMBus host controller drivers
#
# CONFIG_I2C_ALI1535 is not set
# CONFIG_I2C_ALI1563 is not set
# CONFIG_I2C_ALI15X3 is not set
# CONFIG_I2C_AMD756 is not set
# CONFIG_I2C_AMD8111 is not set
# CONFIG_I2C_AMD_MP2 is not set
# CONFIG_I2C_I801 is not set
# CONFIG_I2C_ISCH is not set
# CONFIG_I2C_ISMT is not set
# CONFIG_I2C_PIIX4 is not set
# CONFIG_I2C_NFORCE2 is not set
# CONFIG_I2C_NVIDIA_GPU is not set
# CONFIG_I2C_SIS5595 is not set
# CONFIG_I2C_SIS630 is not set
# CONFIG_I2C_SIS96X is not set
# CONFIG_I2C_VIA is not set
# CONFIG_I2C_VIAPRO is not set
# CONFIG_I2C_ZHAOXIN is not set
#
# ACPI drivers
#
# CONFIG_I2C_SCMI is not set
#
# I2C system bus drivers (mostly embedded / system-on-chip)
#
# CONFIG_I2C_CBUS_GPIO is not set
# CONFIG_I2C_DESIGNWARE_CORE is not set
# CONFIG_I2C_EMEV2 is not set
# CONFIG_I2C_GPIO is not set
# CONFIG_I2C_OCORES is not set
# CONFIG_I2C_PCA_PLATFORM is not set
# CONFIG_I2C_SIMTEC is not set
# CONFIG_I2C_XILINX is not set
#
# External I2C/SMBus adapter drivers
#
# CONFIG_I2C_DIOLAN_U2C is not set
# CONFIG_I2C_CP2615 is not set
# CONFIG_I2C_PARPORT is not set
# CONFIG_I2C_PCI1XXXX is not set
# CONFIG_I2C_ROBOTFUZZ_OSIF is not set
# CONFIG_I2C_TAOS_EVM is not set
# CONFIG_I2C_TINY_USB is not set
#
# Other I2C/SMBus bus drivers
#
# CONFIG_I2C_MLXCPLD is not set
# CONFIG_I2C_VIRTIO is not set
# end of I2C Hardware Bus support
# CONFIG_I2C_STUB is not set
# CONFIG_I2C_SLAVE is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
# end of I2C support
# CONFIG_I3C is not set
CONFIG_SPI=y
# CONFIG_SPI_DEBUG is not set
CONFIG_SPI_MASTER=y
# CONFIG_SPI_MEM is not set
#
# SPI Master Controller Drivers
#
# CONFIG_SPI_ALTERA is not set
# CONFIG_SPI_AXI_SPI_ENGINE is not set
# CONFIG_SPI_BITBANG is not set
# CONFIG_SPI_BUTTERFLY is not set
# CONFIG_SPI_CADENCE is not set
# CONFIG_SPI_CH341 is not set
# CONFIG_SPI_DESIGNWARE is not set
# CONFIG_SPI_GPIO is not set
# CONFIG_SPI_LM70_LLP is not set
# CONFIG_SPI_MICROCHIP_CORE is not set
# CONFIG_SPI_MICROCHIP_CORE_QSPI is not set
# CONFIG_SPI_LANTIQ_SSC is not set
# CONFIG_SPI_OC_TINY is not set
# CONFIG_SPI_PCI1XXXX is not set
# CONFIG_SPI_PXA2XX is not set
# CONFIG_SPI_SC18IS602 is not set
# CONFIG_SPI_SIFIVE is not set
# CONFIG_SPI_MXIC is not set
# CONFIG_SPI_XCOMM is not set
# CONFIG_SPI_XILINX is not set
#
# SPI Multiplexer support
#
# CONFIG_SPI_MUX is not set
#
# SPI Protocol Masters
#
# CONFIG_SPI_SPIDEV is not set
# CONFIG_SPI_LOOPBACK_TEST is not set
# CONFIG_SPI_TLE62X0 is not set
# CONFIG_SPI_SLAVE is not set
CONFIG_SPI_DYNAMIC=y
# CONFIG_SPMI is not set
# CONFIG_HSI is not set
CONFIG_PPS=y
# CONFIG_PPS_DEBUG is not set
#
# PPS clients support
#
# CONFIG_PPS_CLIENT_KTIMER is not set
# CONFIG_PPS_CLIENT_LDISC is not set
# CONFIG_PPS_CLIENT_PARPORT is not set
# CONFIG_PPS_CLIENT_GPIO is not set
# CONFIG_PPS_GENERATOR is not set
#
# PTP clock support
#
CONFIG_PTP_1588_CLOCK=y
CONFIG_PTP_1588_CLOCK_OPTIONAL=y
#
# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks.
#
CONFIG_PTP_1588_CLOCK_KVM=y
CONFIG_PTP_1588_CLOCK_VMCLOCK=y
# CONFIG_PTP_1588_CLOCK_IDT82P33 is not set
# CONFIG_PTP_1588_CLOCK_IDTCM is not set
# CONFIG_PTP_1588_CLOCK_FC3W is not set
# CONFIG_PTP_1588_CLOCK_MOCK is not set
# CONFIG_PTP_1588_CLOCK_VMW is not set
# end of PTP clock support
CONFIG_PINCTRL=y
CONFIG_PINMUX=y
CONFIG_PINCONF=y
CONFIG_GENERIC_PINCONF=y
# CONFIG_DEBUG_PINCTRL is not set
# CONFIG_PINCTRL_AMD is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
# CONFIG_PINCTRL_MCP23S08 is not set
CONFIG_PINCTRL_SX150X=y
#
# Intel pinctrl drivers
#
CONFIG_PINCTRL_BAYTRAIL=y
# CONFIG_PINCTRL_CHERRYVIEW is not set
# CONFIG_PINCTRL_LYNXPOINT is not set
CONFIG_PINCTRL_INTEL=y
# CONFIG_PINCTRL_INTEL_PLATFORM is not set
# CONFIG_PINCTRL_ALDERLAKE is not set
# CONFIG_PINCTRL_BROXTON is not set
# CONFIG_PINCTRL_CANNONLAKE is not set
# CONFIG_PINCTRL_CEDARFORK is not set
# CONFIG_PINCTRL_DENVERTON is not set
# CONFIG_PINCTRL_ELKHARTLAKE is not set
# CONFIG_PINCTRL_EMMITSBURG is not set
# CONFIG_PINCTRL_GEMINILAKE is not set
# CONFIG_PINCTRL_ICELAKE is not set
# CONFIG_PINCTRL_JASPERLAKE is not set
# CONFIG_PINCTRL_LAKEFIELD is not set
# CONFIG_PINCTRL_LEWISBURG is not set
# CONFIG_PINCTRL_METEORLAKE is not set
# CONFIG_PINCTRL_METEORPOINT is not set
# CONFIG_PINCTRL_SUNRISEPOINT is not set
# CONFIG_PINCTRL_TIGERLAKE is not set
# end of Intel pinctrl drivers
#
# Renesas pinctrl drivers
#
# end of Renesas pinctrl drivers
CONFIG_GPIOLIB=y
CONFIG_GPIOLIB_FASTPATH_LIMIT=512
CONFIG_GPIO_ACPI=y
CONFIG_GPIOLIB_IRQCHIP=y
# CONFIG_DEBUG_GPIO is not set
CONFIG_GPIO_SYSFS=y
CONFIG_GPIO_CDEV=y
CONFIG_GPIO_CDEV_V1=y
#
# Memory mapped GPIO drivers
#
# CONFIG_GPIO_ALTERA is not set
# CONFIG_GPIO_AMDPT is not set
# CONFIG_GPIO_DWAPB is not set
# CONFIG_GPIO_EXAR is not set
# CONFIG_GPIO_GENERIC_PLATFORM is not set
# CONFIG_GPIO_GRANITERAPIDS is not set
CONFIG_GPIO_ICH=y
# CONFIG_GPIO_MB86S7X is not set
# CONFIG_GPIO_POLARFIRE_SOC is not set
# CONFIG_GPIO_XILINX is not set
# CONFIG_GPIO_AMD_FCH is not set
# end of Memory mapped GPIO drivers
#
# Port-mapped I/O GPIO drivers
#
# CONFIG_GPIO_VX855 is not set
# CONFIG_GPIO_F7188X is not set
# CONFIG_GPIO_IT87 is not set
# CONFIG_GPIO_SCH311X is not set
# CONFIG_GPIO_WINBOND is not set
# CONFIG_GPIO_WS16C48 is not set
# end of Port-mapped I/O GPIO drivers
#
# I2C GPIO expanders
#
# CONFIG_GPIO_FXL6408 is not set
# CONFIG_GPIO_DS4520 is not set
# CONFIG_GPIO_MAX7300 is not set
# CONFIG_GPIO_MAX732X is not set
# CONFIG_GPIO_PCA953X is not set
# CONFIG_GPIO_PCA9570 is not set
# CONFIG_GPIO_PCF857X is not set
# CONFIG_GPIO_TPIC2810 is not set
# end of I2C GPIO expanders
#
# MFD GPIO expanders
#
# CONFIG_GPIO_ADP5520 is not set
# CONFIG_GPIO_DA9052 is not set
# CONFIG_GPIO_DA9055 is not set
# CONFIG_GPIO_ELKHARTLAKE is not set
CONFIG_GPIO_PALMAS=y
CONFIG_GPIO_RC5T583=y
CONFIG_GPIO_TPS6586X=y
CONFIG_GPIO_TPS65910=y
# CONFIG_GPIO_TPS65912 is not set
# CONFIG_GPIO_TWL6040 is not set
# CONFIG_GPIO_WM831X is not set
# CONFIG_GPIO_WM8350 is not set
# CONFIG_GPIO_WM8994 is not set
# end of MFD GPIO expanders
#
# PCI GPIO expanders
#
# CONFIG_GPIO_AMD8111 is not set
# CONFIG_GPIO_BT8XX is not set
# CONFIG_GPIO_ML_IOH is not set
# CONFIG_GPIO_PCI_IDIO_16 is not set
# CONFIG_GPIO_PCIE_IDIO_24 is not set
# CONFIG_GPIO_RDC321X is not set
# end of PCI GPIO expanders
#
# SPI GPIO expanders
#
# CONFIG_GPIO_74X164 is not set
# CONFIG_GPIO_MAX3191X is not set
# CONFIG_GPIO_MAX7301 is not set
# CONFIG_GPIO_MC33880 is not set
# CONFIG_GPIO_PISOSR is not set
# CONFIG_GPIO_XRA1403 is not set
# end of SPI GPIO expanders
#
# USB GPIO expanders
#
# CONFIG_GPIO_MPSSE is not set
# end of USB GPIO expanders
#
# Virtual GPIO drivers
#
# CONFIG_GPIO_AGGREGATOR is not set
# CONFIG_GPIO_LATCH is not set
# CONFIG_GPIO_MOCKUP is not set
# CONFIG_GPIO_VIRTIO is not set
# CONFIG_GPIO_SIM is not set
# end of Virtual GPIO drivers
#
# GPIO Debugging utilities
#
# CONFIG_GPIO_SLOPPY_LOGIC_ANALYZER is not set
# CONFIG_GPIO_VIRTUSER is not set
# end of GPIO Debugging utilities
# CONFIG_W1 is not set
CONFIG_POWER_RESET=y
# CONFIG_POWER_RESET_RESTART is not set
# CONFIG_POWER_SEQUENCING is not set
CONFIG_POWER_SUPPLY=y
# CONFIG_POWER_SUPPLY_DEBUG is not set
CONFIG_POWER_SUPPLY_HWMON=y
# CONFIG_IP5XXX_POWER is not set
# CONFIG_MAX8925_POWER is not set
# CONFIG_WM831X_BACKUP is not set
# CONFIG_WM831X_POWER is not set
# CONFIG_WM8350_POWER is not set
# CONFIG_TEST_POWER is not set
# CONFIG_BATTERY_88PM860X is not set
# CONFIG_CHARGER_ADP5061 is not set
# CONFIG_BATTERY_CHAGALL is not set
# CONFIG_BATTERY_CW2015 is not set
# CONFIG_BATTERY_DS2780 is not set
# CONFIG_BATTERY_DS2781 is not set
# CONFIG_BATTERY_DS2782 is not set
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_CHARGER_SBS is not set
# CONFIG_BATTERY_BQ27XXX is not set
# CONFIG_BATTERY_DA9030 is not set
# CONFIG_BATTERY_DA9052 is not set
# CONFIG_BATTERY_MAX17042 is not set
# CONFIG_BATTERY_MAX1720X is not set
# CONFIG_CHARGER_MAX8903 is not set
# CONFIG_CHARGER_LP8727 is not set
# CONFIG_CHARGER_GPIO is not set
CONFIG_CHARGER_MANAGER=y
# CONFIG_CHARGER_LT3651 is not set
# CONFIG_CHARGER_LTC4162L is not set
# CONFIG_CHARGER_MAX77693 is not set
# CONFIG_CHARGER_MAX77976 is not set
# CONFIG_CHARGER_MAX8971 is not set
# CONFIG_CHARGER_BQ2415X is not set
# CONFIG_CHARGER_BQ24190 is not set
# CONFIG_CHARGER_BQ24257 is not set
# CONFIG_CHARGER_BQ24735 is not set
# CONFIG_CHARGER_BQ2515X is not set
# CONFIG_CHARGER_BQ25890 is not set
# CONFIG_CHARGER_BQ25980 is not set
# CONFIG_CHARGER_BQ256XX is not set
# CONFIG_CHARGER_SMB347 is not set
# CONFIG_CHARGER_TPS65090 is not set
# CONFIG_BATTERY_GAUGE_LTC2941 is not set
# CONFIG_BATTERY_GOLDFISH is not set
# CONFIG_BATTERY_RT5033 is not set
# CONFIG_CHARGER_RT9455 is not set
# CONFIG_CHARGER_RT9467 is not set
# CONFIG_CHARGER_RT9471 is not set
# CONFIG_CHARGER_BD99954 is not set
# CONFIG_BATTERY_UG3105 is not set
# CONFIG_FUEL_GAUGE_MM8013 is not set
CONFIG_HWMON=y
# CONFIG_HWMON_DEBUG_CHIP is not set
#
# Native drivers
#
# CONFIG_SENSORS_ABITUGURU is not set
# CONFIG_SENSORS_ABITUGURU3 is not set
# CONFIG_SENSORS_AD7314 is not set
# CONFIG_SENSORS_AD7414 is not set
# CONFIG_SENSORS_AD7418 is not set
# CONFIG_SENSORS_ADM1025 is not set
# CONFIG_SENSORS_ADM1026 is not set
# CONFIG_SENSORS_ADM1029 is not set
# CONFIG_SENSORS_ADM1031 is not set
# CONFIG_SENSORS_ADM1177 is not set
# CONFIG_SENSORS_ADM9240 is not set
# CONFIG_SENSORS_ADT7310 is not set
# CONFIG_SENSORS_ADT7410 is not set
# CONFIG_SENSORS_ADT7411 is not set
# CONFIG_SENSORS_ADT7462 is not set
# CONFIG_SENSORS_ADT7470 is not set
# CONFIG_SENSORS_ADT7475 is not set
# CONFIG_SENSORS_AHT10 is not set
# CONFIG_SENSORS_AQUACOMPUTER_D5NEXT is not set
# CONFIG_SENSORS_AS370 is not set
# CONFIG_SENSORS_ASC7621 is not set
# CONFIG_SENSORS_ASUS_ROG_RYUJIN is not set
# CONFIG_SENSORS_AXI_FAN_CONTROL is not set
# CONFIG_SENSORS_K8TEMP is not set
# CONFIG_SENSORS_K10TEMP is not set
# CONFIG_SENSORS_FAM15H_POWER is not set
# CONFIG_SENSORS_APPLESMC is not set
# CONFIG_SENSORS_ASB100 is not set
# CONFIG_SENSORS_ATXP1 is not set
# CONFIG_SENSORS_CHIPCAP2 is not set
# CONFIG_SENSORS_CORSAIR_CPRO is not set
# CONFIG_SENSORS_CORSAIR_PSU is not set
# CONFIG_SENSORS_DRIVETEMP is not set
# CONFIG_SENSORS_DS620 is not set
# CONFIG_SENSORS_DS1621 is not set
# CONFIG_SENSORS_DELL_SMM is not set
# CONFIG_SENSORS_DA9052_ADC is not set
# CONFIG_SENSORS_DA9055 is not set
# CONFIG_SENSORS_I5K_AMB is not set
# CONFIG_SENSORS_F71805F is not set
# CONFIG_SENSORS_F71882FG is not set
# CONFIG_SENSORS_F75375S is not set
# CONFIG_SENSORS_FSCHMD is not set
# CONFIG_SENSORS_FTSTEUTATES is not set
# CONFIG_SENSORS_GIGABYTE_WATERFORCE is not set
# CONFIG_SENSORS_GL518SM is not set
# CONFIG_SENSORS_GL520SM is not set
# CONFIG_SENSORS_G760A is not set
# CONFIG_SENSORS_G762 is not set
# CONFIG_SENSORS_HIH6130 is not set
# CONFIG_SENSORS_HS3001 is not set
# CONFIG_SENSORS_HTU31 is not set
# CONFIG_SENSORS_IBMAEM is not set
# CONFIG_SENSORS_IBMPEX is not set
# CONFIG_SENSORS_I5500 is not set
CONFIG_SENSORS_CORETEMP=y
# CONFIG_SENSORS_ISL28022 is not set
# CONFIG_SENSORS_IT87 is not set
# CONFIG_SENSORS_JC42 is not set
# CONFIG_SENSORS_POWERZ is not set
# CONFIG_SENSORS_POWR1220 is not set
# CONFIG_SENSORS_LENOVO_EC is not set
# CONFIG_SENSORS_LINEAGE is not set
# CONFIG_SENSORS_LTC2945 is not set
# CONFIG_SENSORS_LTC2947_I2C is not set
# CONFIG_SENSORS_LTC2947_SPI is not set
# CONFIG_SENSORS_LTC2990 is not set
# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
# CONFIG_SENSORS_LTC4151 is not set
# CONFIG_SENSORS_LTC4215 is not set
# CONFIG_SENSORS_LTC4222 is not set
# CONFIG_SENSORS_LTC4245 is not set
# CONFIG_SENSORS_LTC4260 is not set
# CONFIG_SENSORS_LTC4261 is not set
# CONFIG_SENSORS_LTC4282 is not set
# CONFIG_SENSORS_MAX1111 is not set
# CONFIG_SENSORS_MAX127 is not set
# CONFIG_SENSORS_MAX16065 is not set
# CONFIG_SENSORS_MAX1619 is not set
# CONFIG_SENSORS_MAX1668 is not set
# CONFIG_SENSORS_MAX197 is not set
# CONFIG_SENSORS_MAX31722 is not set
# CONFIG_SENSORS_MAX31730 is not set
# CONFIG_SENSORS_MAX31760 is not set
# CONFIG_MAX31827 is not set
# CONFIG_SENSORS_MAX6620 is not set
# CONFIG_SENSORS_MAX6621 is not set
# CONFIG_SENSORS_MAX6639 is not set
# CONFIG_SENSORS_MAX6650 is not set
# CONFIG_SENSORS_MAX6697 is not set
# CONFIG_SENSORS_MAX31790 is not set
# CONFIG_SENSORS_MC34VR500 is not set
# CONFIG_SENSORS_MCP3021 is not set
# CONFIG_SENSORS_TC654 is not set
# CONFIG_SENSORS_TPS23861 is not set
# CONFIG_SENSORS_MR75203 is not set
# CONFIG_SENSORS_ADCXX is not set
# CONFIG_SENSORS_LM63 is not set
# CONFIG_SENSORS_LM70 is not set
# CONFIG_SENSORS_LM73 is not set
# CONFIG_SENSORS_LM75 is not set
# CONFIG_SENSORS_LM77 is not set
# CONFIG_SENSORS_LM78 is not set
# CONFIG_SENSORS_LM80 is not set
# CONFIG_SENSORS_LM83 is not set
# CONFIG_SENSORS_LM85 is not set
# CONFIG_SENSORS_LM87 is not set
# CONFIG_SENSORS_LM90 is not set
# CONFIG_SENSORS_LM92 is not set
# CONFIG_SENSORS_LM93 is not set
# CONFIG_SENSORS_LM95234 is not set
# CONFIG_SENSORS_LM95241 is not set
# CONFIG_SENSORS_LM95245 is not set
# CONFIG_SENSORS_PC87360 is not set
# CONFIG_SENSORS_PC87427 is not set
# CONFIG_SENSORS_NCT6683 is not set
# CONFIG_SENSORS_NCT6775 is not set
# CONFIG_SENSORS_NCT6775_I2C is not set
# CONFIG_SENSORS_NCT7363 is not set
# CONFIG_SENSORS_NCT7802 is not set
# CONFIG_SENSORS_NCT7904 is not set
# CONFIG_SENSORS_NPCM7XX is not set
# CONFIG_SENSORS_NZXT_KRAKEN2 is not set
# CONFIG_SENSORS_NZXT_KRAKEN3 is not set
# CONFIG_SENSORS_NZXT_SMART2 is not set
# CONFIG_SENSORS_OCC_P8_I2C is not set
# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_PMBUS is not set
# CONFIG_SENSORS_PT5161L is not set
# CONFIG_SENSORS_PWM_FAN is not set
# CONFIG_SENSORS_SBTSI is not set
# CONFIG_SENSORS_SHT15 is not set
# CONFIG_SENSORS_SHT21 is not set
# CONFIG_SENSORS_SHT3x is not set
# CONFIG_SENSORS_SHT4x is not set
# CONFIG_SENSORS_SHTC1 is not set
# CONFIG_SENSORS_SIS5595 is not set
# CONFIG_SENSORS_DME1737 is not set
# CONFIG_SENSORS_EMC1403 is not set
# CONFIG_SENSORS_EMC2103 is not set
# CONFIG_SENSORS_EMC2305 is not set
# CONFIG_SENSORS_EMC6W201 is not set
# CONFIG_SENSORS_SMSC47M1 is not set
# CONFIG_SENSORS_SMSC47M192 is not set
# CONFIG_SENSORS_SMSC47B397 is not set
# CONFIG_SENSORS_SCH5627 is not set
# CONFIG_SENSORS_SCH5636 is not set
# CONFIG_SENSORS_STTS751 is not set
# CONFIG_SENSORS_ADC128D818 is not set
# CONFIG_SENSORS_ADS7828 is not set
# CONFIG_SENSORS_ADS7871 is not set
# CONFIG_SENSORS_AMC6821 is not set
# CONFIG_SENSORS_INA209 is not set
# CONFIG_SENSORS_INA2XX is not set
# CONFIG_SENSORS_INA238 is not set
# CONFIG_SENSORS_INA3221 is not set
# CONFIG_SENSORS_SPD5118 is not set
# CONFIG_SENSORS_TC74 is not set
# CONFIG_SENSORS_THMC50 is not set
# CONFIG_SENSORS_TMP102 is not set
# CONFIG_SENSORS_TMP103 is not set
# CONFIG_SENSORS_TMP108 is not set
# CONFIG_SENSORS_TMP401 is not set
# CONFIG_SENSORS_TMP421 is not set
# CONFIG_SENSORS_TMP464 is not set
# CONFIG_SENSORS_TMP513 is not set
# CONFIG_SENSORS_VIA_CPUTEMP is not set
# CONFIG_SENSORS_VIA686A is not set
# CONFIG_SENSORS_VT1211 is not set
# CONFIG_SENSORS_VT8231 is not set
# CONFIG_SENSORS_W83773G is not set
# CONFIG_SENSORS_W83781D is not set
# CONFIG_SENSORS_W83791D is not set
# CONFIG_SENSORS_W83792D is not set
# CONFIG_SENSORS_W83793 is not set
# CONFIG_SENSORS_W83795 is not set
# CONFIG_SENSORS_W83L785TS is not set
# CONFIG_SENSORS_W83L786NG is not set
# CONFIG_SENSORS_W83627HF is not set
# CONFIG_SENSORS_W83627EHF is not set
# CONFIG_SENSORS_WM831X is not set
# CONFIG_SENSORS_WM8350 is not set
# CONFIG_SENSORS_XGENE is not set
#
# ACPI drivers
#
CONFIG_SENSORS_ACPI_POWER=y
# CONFIG_SENSORS_ATK0110 is not set
# CONFIG_SENSORS_ASUS_WMI is not set
# CONFIG_SENSORS_ASUS_EC is not set
# CONFIG_SENSORS_HP_WMI is not set
CONFIG_THERMAL=y
CONFIG_THERMAL_NETLINK=y
# CONFIG_THERMAL_STATISTICS is not set
# CONFIG_THERMAL_DEBUGFS is not set
# CONFIG_THERMAL_CORE_TESTING is not set
CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0
CONFIG_THERMAL_HWMON=y
CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set
CONFIG_THERMAL_GOV_FAIR_SHARE=y
CONFIG_THERMAL_GOV_STEP_WISE=y
# CONFIG_THERMAL_GOV_BANG_BANG is not set
CONFIG_THERMAL_GOV_USER_SPACE=y
CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
# CONFIG_DEVFREQ_THERMAL is not set
# CONFIG_PCIE_THERMAL is not set
CONFIG_THERMAL_EMULATION=y
#
# Intel thermal drivers
#
CONFIG_INTEL_POWERCLAMP=y
CONFIG_X86_THERMAL_VECTOR=y
CONFIG_INTEL_TCC=y
CONFIG_X86_PKG_TEMP_THERMAL=y
# CONFIG_INTEL_SOC_DTS_THERMAL is not set
#
# ACPI INT340X thermal drivers
#
# CONFIG_INT340X_THERMAL is not set
# end of ACPI INT340X thermal drivers
# CONFIG_INTEL_PCH_THERMAL is not set
# CONFIG_INTEL_TCC_COOLING is not set
# CONFIG_INTEL_HFI_THERMAL is not set
# end of Intel thermal drivers
CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_CORE=y
# CONFIG_WATCHDOG_NOWAYOUT is not set
CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y
CONFIG_WATCHDOG_OPEN_TIMEOUT=0
# CONFIG_WATCHDOG_SYSFS is not set
# CONFIG_WATCHDOG_HRTIMER_PRETIMEOUT is not set
#
# Watchdog Pretimeout Governors
#
# CONFIG_WATCHDOG_PRETIMEOUT_GOV is not set
#
# Watchdog Device Drivers
#
# CONFIG_SOFT_WATCHDOG is not set
# CONFIG_DA9052_WATCHDOG is not set
# CONFIG_DA9055_WATCHDOG is not set
# CONFIG_DA9063_WATCHDOG is not set
# CONFIG_LENOVO_SE10_WDT is not set
# CONFIG_LENOVO_SE30_WDT is not set
# CONFIG_WDAT_WDT is not set
# CONFIG_WM831X_WATCHDOG is not set
# CONFIG_WM8350_WATCHDOG is not set
# CONFIG_XILINX_WATCHDOG is not set
# CONFIG_ZIIRAVE_WATCHDOG is not set
# CONFIG_CADENCE_WATCHDOG is not set
# CONFIG_DW_WATCHDOG is not set
# CONFIG_MAX63XX_WATCHDOG is not set
# CONFIG_ACQUIRE_WDT is not set
# CONFIG_ADVANTECH_WDT is not set
# CONFIG_ADVANTECH_EC_WDT is not set
# CONFIG_ALIM1535_WDT is not set
# CONFIG_ALIM7101_WDT is not set
# CONFIG_EBC_C384_WDT is not set
# CONFIG_EXAR_WDT is not set
# CONFIG_F71808E_WDT is not set
# CONFIG_SP5100_TCO is not set
# CONFIG_SBC_FITPC2_WATCHDOG is not set
# CONFIG_EUROTECH_WDT is not set
# CONFIG_IB700_WDT is not set
# CONFIG_IBMASR is not set
# CONFIG_WAFER_WDT is not set
# CONFIG_I6300ESB_WDT is not set
# CONFIG_IE6XX_WDT is not set
# CONFIG_INTEL_OC_WATCHDOG is not set
# CONFIG_ITCO_WDT is not set
# CONFIG_IT8712F_WDT is not set
# CONFIG_IT87_WDT is not set
# CONFIG_HP_WATCHDOG is not set
# CONFIG_SC1200_WDT is not set
# CONFIG_PC87413_WDT is not set
# CONFIG_NV_TCO is not set
# CONFIG_60XX_WDT is not set
# CONFIG_SMSC_SCH311X_WDT is not set
# CONFIG_SMSC37B787_WDT is not set
# CONFIG_TQMX86_WDT is not set
# CONFIG_VIA_WDT is not set
# CONFIG_W83627HF_WDT is not set
# CONFIG_W83877F_WDT is not set
# CONFIG_W83977F_WDT is not set
# CONFIG_MACHZ_WDT is not set
# CONFIG_SBC_EPX_C3_WATCHDOG is not set
# CONFIG_INTEL_MEI_WDT is not set
# CONFIG_NI903X_WDT is not set
# CONFIG_NIC7018_WDT is not set
# CONFIG_MEN_A21_WDT is not set
# CONFIG_XEN_WDT is not set
#
# PCI-based Watchdog Cards
#
# CONFIG_PCIPCWATCHDOG is not set
# CONFIG_WDTPCI is not set
#
# USB-based Watchdog Cards
#
# CONFIG_USBPCWATCHDOG is not set
CONFIG_SSB_POSSIBLE=y
# CONFIG_SSB is not set
CONFIG_BCMA_POSSIBLE=y
# CONFIG_BCMA is not set
#
# Multifunction device drivers
#
CONFIG_MFD_CORE=y
CONFIG_MFD_AS3711=y
# CONFIG_MFD_SMPRO is not set
CONFIG_PMIC_ADP5520=y
CONFIG_MFD_AAT2870_CORE=y
# CONFIG_MFD_BCM590XX is not set
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_AXP20X_I2C is not set
# CONFIG_MFD_CGBC is not set
# CONFIG_MFD_CS42L43_I2C is not set
# CONFIG_MFD_MADERA is not set
CONFIG_PMIC_DA903X=y
CONFIG_PMIC_DA9052=y
CONFIG_MFD_DA9052_SPI=y
CONFIG_MFD_DA9052_I2C=y
CONFIG_MFD_DA9055=y
# CONFIG_MFD_DA9062 is not set
CONFIG_MFD_DA9063=y
# CONFIG_MFD_DA9150 is not set
# CONFIG_MFD_DLN2 is not set
# CONFIG_MFD_MC13XXX_SPI is not set
# CONFIG_MFD_MC13XXX_I2C is not set
# CONFIG_MFD_MP2629 is not set
# CONFIG_MFD_INTEL_QUARK_I2C_GPIO is not set
CONFIG_LPC_ICH=y
# CONFIG_LPC_SCH is not set
# CONFIG_MFD_INTEL_LPSS_ACPI is not set
# CONFIG_MFD_INTEL_LPSS_PCI is not set
# CONFIG_MFD_INTEL_PMC_BXT is not set
# CONFIG_MFD_IQS62X is not set
# CONFIG_MFD_JANZ_CMODIO is not set
# CONFIG_MFD_KEMPLD is not set
# CONFIG_MFD_88PM800 is not set
# CONFIG_MFD_88PM805 is not set
CONFIG_MFD_88PM860X=y
# CONFIG_MFD_MAX14577 is not set
# CONFIG_MFD_MAX77541 is not set
CONFIG_MFD_MAX77693=y
# CONFIG_MFD_MAX77705 is not set
# CONFIG_MFD_MAX77843 is not set
# CONFIG_MFD_MAX8907 is not set
CONFIG_MFD_MAX8925=y
CONFIG_MFD_MAX8997=y
CONFIG_MFD_MAX8998=y
# CONFIG_MFD_MT6360 is not set
# CONFIG_MFD_MT6370 is not set
# CONFIG_MFD_MT6397 is not set
# CONFIG_MFD_MENF21BMC is not set
# CONFIG_MFD_OCELOT is not set
CONFIG_EZX_PCAP=y
# CONFIG_MFD_VIPERBOARD is not set
# CONFIG_MFD_RETU is not set
# CONFIG_MFD_SY7636A is not set
# CONFIG_MFD_RDC321X is not set
# CONFIG_MFD_RT4831 is not set
# CONFIG_MFD_RT5033 is not set
# CONFIG_MFD_RT5120 is not set
CONFIG_MFD_RC5T583=y
# CONFIG_MFD_SI476X_CORE is not set
# CONFIG_MFD_SM501 is not set
# CONFIG_MFD_SKY81452 is not set
CONFIG_MFD_SYSCON=y
# CONFIG_MFD_LP3943 is not set
CONFIG_MFD_LP8788=y
# CONFIG_MFD_TI_LMU is not set
CONFIG_MFD_PALMAS=y
# CONFIG_TPS6105X is not set
# CONFIG_TPS65010 is not set
# CONFIG_TPS6507X is not set
# CONFIG_MFD_TPS65086 is not set
CONFIG_MFD_TPS65090=y
# CONFIG_MFD_TI_LP873X is not set
CONFIG_MFD_TPS6586X=y
CONFIG_MFD_TPS65910=y
CONFIG_MFD_TPS65912=y
CONFIG_MFD_TPS65912_I2C=y
CONFIG_MFD_TPS65912_SPI=y
# CONFIG_MFD_TPS6594_I2C is not set
# CONFIG_MFD_TPS6594_SPI is not set
# CONFIG_TWL4030_CORE is not set
CONFIG_TWL6040_CORE=y
# CONFIG_MFD_WL1273_CORE is not set
# CONFIG_MFD_LM3533 is not set
# CONFIG_MFD_TQMX86 is not set
# CONFIG_MFD_VX855 is not set
# CONFIG_MFD_ARIZONA_I2C is not set
# CONFIG_MFD_ARIZONA_SPI is not set
CONFIG_MFD_WM8400=y
CONFIG_MFD_WM831X=y
CONFIG_MFD_WM831X_I2C=y
CONFIG_MFD_WM831X_SPI=y
CONFIG_MFD_WM8350=y
CONFIG_MFD_WM8350_I2C=y
CONFIG_MFD_WM8994=y
# CONFIG_MFD_ATC260X_I2C is not set
# CONFIG_MFD_CS40L50_I2C is not set
# CONFIG_MFD_CS40L50_SPI is not set
# CONFIG_MFD_INTEL_M10_BMC_SPI is not set
# CONFIG_MFD_UPBOARD_FPGA is not set
# end of Multifunction device drivers
CONFIG_REGULATOR=y
# CONFIG_REGULATOR_DEBUG is not set
# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
# CONFIG_REGULATOR_USERSPACE_CONSUMER is not set
# CONFIG_REGULATOR_NETLINK_EVENTS is not set
# CONFIG_REGULATOR_88PG86X is not set
CONFIG_REGULATOR_88PM8607=y
# CONFIG_REGULATOR_ACT8865 is not set
# CONFIG_REGULATOR_AD5398 is not set
# CONFIG_REGULATOR_ADP5055 is not set
# CONFIG_REGULATOR_AAT2870 is not set
# CONFIG_REGULATOR_AS3711 is not set
# CONFIG_REGULATOR_AW37503 is not set
# CONFIG_REGULATOR_DA903X is not set
# CONFIG_REGULATOR_DA9052 is not set
# CONFIG_REGULATOR_DA9055 is not set
# CONFIG_REGULATOR_DA9210 is not set
# CONFIG_REGULATOR_DA9211 is not set
# CONFIG_REGULATOR_FAN53555 is not set
# CONFIG_REGULATOR_GPIO is not set
# CONFIG_REGULATOR_ISL9305 is not set
# CONFIG_REGULATOR_ISL6271A is not set
# CONFIG_REGULATOR_LP3971 is not set
# CONFIG_REGULATOR_LP3972 is not set
CONFIG_REGULATOR_LP872X=y
# CONFIG_REGULATOR_LP8755 is not set
CONFIG_REGULATOR_LP8788=y
# CONFIG_REGULATOR_LTC3589 is not set
# CONFIG_REGULATOR_LTC3676 is not set
# CONFIG_REGULATOR_MAX1586 is not set
# CONFIG_REGULATOR_MAX77503 is not set
# CONFIG_REGULATOR_MAX77857 is not set
# CONFIG_REGULATOR_MAX8649 is not set
# CONFIG_REGULATOR_MAX8660 is not set
# CONFIG_REGULATOR_MAX8893 is not set
# CONFIG_REGULATOR_MAX8925 is not set
# CONFIG_REGULATOR_MAX8952 is not set
# CONFIG_REGULATOR_MAX8997 is not set
# CONFIG_REGULATOR_MAX8998 is not set
# CONFIG_REGULATOR_MAX20086 is not set
# CONFIG_REGULATOR_MAX20411 is not set
# CONFIG_REGULATOR_MAX77693 is not set
# CONFIG_REGULATOR_MAX77826 is not set
# CONFIG_REGULATOR_MP8859 is not set
# CONFIG_REGULATOR_MT6311 is not set
# CONFIG_REGULATOR_PALMAS is not set
# CONFIG_REGULATOR_PCA9450 is not set
# CONFIG_REGULATOR_PF9453 is not set
# CONFIG_REGULATOR_PCAP is not set
# CONFIG_REGULATOR_PV88060 is not set
# CONFIG_REGULATOR_PV88080 is not set
# CONFIG_REGULATOR_PV88090 is not set
# CONFIG_REGULATOR_PWM is not set
# CONFIG_REGULATOR_RAA215300 is not set
# CONFIG_REGULATOR_RC5T583 is not set
# CONFIG_REGULATOR_RT4801 is not set
# CONFIG_REGULATOR_RT4803 is not set
# CONFIG_REGULATOR_RT5190A is not set
# CONFIG_REGULATOR_RT5739 is not set
# CONFIG_REGULATOR_RT5759 is not set
# CONFIG_REGULATOR_RT6160 is not set
# CONFIG_REGULATOR_RT6190 is not set
# CONFIG_REGULATOR_RT6245 is not set
# CONFIG_REGULATOR_RTQ2134 is not set
# CONFIG_REGULATOR_RTMV20 is not set
# CONFIG_REGULATOR_RTQ6752 is not set
# CONFIG_REGULATOR_RTQ2208 is not set
# CONFIG_REGULATOR_SLG51000 is not set
# CONFIG_REGULATOR_TPS51632 is not set
# CONFIG_REGULATOR_TPS62360 is not set
# CONFIG_REGULATOR_TPS65023 is not set
# CONFIG_REGULATOR_TPS6507X is not set
# CONFIG_REGULATOR_TPS65090 is not set
# CONFIG_REGULATOR_TPS65132 is not set
# CONFIG_REGULATOR_TPS6524X is not set
# CONFIG_REGULATOR_TPS6586X is not set
# CONFIG_REGULATOR_TPS65910 is not set
# CONFIG_REGULATOR_TPS65912 is not set
# CONFIG_REGULATOR_WM831X is not set
# CONFIG_REGULATOR_WM8350 is not set
# CONFIG_REGULATOR_WM8400 is not set
# CONFIG_REGULATOR_WM8994 is not set
CONFIG_RC_CORE=y
# CONFIG_LIRC is not set
CONFIG_RC_MAP=y
CONFIG_RC_DECODERS=y
# CONFIG_IR_IMON_DECODER is not set
CONFIG_IR_JVC_DECODER=y
CONFIG_IR_MCE_KBD_DECODER=y
CONFIG_IR_NEC_DECODER=y
CONFIG_IR_RC5_DECODER=y
CONFIG_IR_RC6_DECODER=y
# CONFIG_IR_RCMM_DECODER is not set
CONFIG_IR_SANYO_DECODER=y
CONFIG_IR_SHARP_DECODER=y
CONFIG_IR_SONY_DECODER=y
CONFIG_IR_XMP_DECODER=y
# CONFIG_RC_DEVICES is not set
#
# CEC support
#
# CONFIG_MEDIA_CEC_SUPPORT is not set
# end of CEC support
# CONFIG_MEDIA_SUPPORT is not set
#
# Graphics support
#
CONFIG_APERTURE_HELPERS=y
CONFIG_SCREEN_INFO=y
CONFIG_VIDEO=y
CONFIG_AUXDISPLAY=y
# CONFIG_HD44780 is not set
# CONFIG_LCD2S is not set
# CONFIG_PARPORT_PANEL is not set
# CONFIG_CHARLCD_BL_OFF is not set
# CONFIG_CHARLCD_BL_ON is not set
CONFIG_CHARLCD_BL_FLASH=y
# CONFIG_IMG_ASCII_LCD is not set
# CONFIG_HT16K33 is not set
# CONFIG_MAX6959 is not set
# CONFIG_SEG_LED_GPIO is not set
# CONFIG_PANEL is not set
CONFIG_AGP=y
CONFIG_AGP_AMD64=y
CONFIG_AGP_INTEL=y
# CONFIG_AGP_SIS is not set
CONFIG_AGP_VIA=y
CONFIG_INTEL_GTT=y
CONFIG_VGA_SWITCHEROO=y
CONFIG_DRM=y
#
# DRM debugging options
#
# CONFIG_DRM_WERROR is not set
# CONFIG_DRM_DEBUG_MM is not set
# end of DRM debugging options
CONFIG_DRM_KMS_HELPER=y
# CONFIG_DRM_PANIC is not set
# CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS is not set
CONFIG_DRM_DEBUG_MODESET_LOCK=y
CONFIG_DRM_CLIENT=y
CONFIG_DRM_CLIENT_LIB=y
CONFIG_DRM_CLIENT_SELECTION=y
CONFIG_DRM_CLIENT_SETUP=y
#
# Supported DRM clients
#
CONFIG_DRM_FBDEV_EMULATION=y
CONFIG_DRM_FBDEV_OVERALLOC=100
# CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM is not set
# CONFIG_DRM_CLIENT_LOG is not set
CONFIG_DRM_CLIENT_DEFAULT_FBDEV=y
CONFIG_DRM_CLIENT_DEFAULT="fbdev"
# end of Supported DRM clients
CONFIG_DRM_LOAD_EDID_FIRMWARE=y
CONFIG_DRM_GEM_SHMEM_HELPER=y
#
# Drivers for system framebuffers
#
# CONFIG_DRM_EFIDRM is not set
# CONFIG_DRM_SIMPLEDRM is not set
# CONFIG_DRM_VESADRM is not set
# end of Drivers for system framebuffers
#
# ARM devices
#
# end of ARM devices
# CONFIG_DRM_RADEON is not set
# CONFIG_DRM_AMDGPU is not set
# CONFIG_DRM_NOUVEAU is not set
# CONFIG_DRM_I915 is not set
# CONFIG_DRM_XE is not set
# CONFIG_DRM_VGEM is not set
# CONFIG_DRM_VKMS is not set
# CONFIG_DRM_VMWGFX is not set
# CONFIG_DRM_GMA500 is not set
# CONFIG_DRM_UDL is not set
CONFIG_DRM_AST=y
# CONFIG_DRM_MGAG200 is not set
# CONFIG_DRM_QXL is not set
# CONFIG_DRM_VIRTIO_GPU is not set
CONFIG_DRM_PANEL=y
#
# Display Panels
#
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
# end of Display Panels
CONFIG_DRM_BRIDGE=y
CONFIG_DRM_PANEL_BRIDGE=y
#
# Display Interface Bridges
#
# CONFIG_DRM_I2C_NXP_TDA998X is not set
# CONFIG_DRM_ANALOGIX_ANX78XX is not set
# end of Display Interface Bridges
# CONFIG_DRM_ETNAVIV is not set
# CONFIG_DRM_HISI_HIBMC is not set
# CONFIG_DRM_APPLETBDRM is not set
# CONFIG_DRM_BOCHS is not set
# CONFIG_DRM_CIRRUS_QEMU is not set
# CONFIG_DRM_GM12U320 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_TINYDRM_HX8357D is not set
# CONFIG_TINYDRM_ILI9163 is not set
# CONFIG_TINYDRM_ILI9225 is not set
# CONFIG_TINYDRM_ILI9341 is not set
# CONFIG_TINYDRM_ILI9486 is not set
# CONFIG_TINYDRM_MI0283QT is not set
# CONFIG_TINYDRM_REPAPER is not set
# CONFIG_TINYDRM_SHARP_MEMORY is not set
# CONFIG_DRM_XEN_FRONTEND is not set
# CONFIG_DRM_VBOXVIDEO is not set
# CONFIG_DRM_GUD is not set
# CONFIG_DRM_ST7571_I2C is not set
# CONFIG_DRM_ST7586 is not set
# CONFIG_DRM_ST7735R is not set
# CONFIG_DRM_SSD130X is not set
CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y
#
# Frame buffer Devices
#
CONFIG_FB=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_PM2 is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_ARC is not set
CONFIG_FB_ASILIANT=y
CONFIG_FB_IMSTT=y
# CONFIG_FB_VGA16 is not set
# CONFIG_FB_UVESA is not set
CONFIG_FB_VESA=y
CONFIG_FB_EFI=y
# CONFIG_FB_N411 is not set
# CONFIG_FB_HGA is not set
# CONFIG_FB_OPENCORES is not set
# CONFIG_FB_S1D13XXX is not set
# CONFIG_FB_NVIDIA is not set
# CONFIG_FB_RIVA is not set
# CONFIG_FB_I740 is not set
# CONFIG_FB_MATROX is not set
# CONFIG_FB_RADEON is not set
# CONFIG_FB_ATY128 is not set
# CONFIG_FB_ATY is not set
# CONFIG_FB_S3 is not set
# CONFIG_FB_SAVAGE is not set
# CONFIG_FB_SIS is not set
# CONFIG_FB_VIA is not set
# CONFIG_FB_NEOMAGIC is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_3DFX is not set
# CONFIG_FB_VOODOO1 is not set
# CONFIG_FB_VT8623 is not set
# CONFIG_FB_TRIDENT is not set
# CONFIG_FB_ARK is not set
# CONFIG_FB_PM3 is not set
# CONFIG_FB_CARMINE is not set
# CONFIG_FB_SMSCUFX is not set
# CONFIG_FB_UDL is not set
# CONFIG_FB_IBM_GXT4500 is not set
# CONFIG_FB_VIRTUAL is not set
# CONFIG_XEN_FBDEV_FRONTEND is not set
# CONFIG_FB_METRONOME is not set
# CONFIG_FB_MB862XX is not set
CONFIG_FB_SIMPLE=y
# CONFIG_FB_SSD1307 is not set
# CONFIG_FB_SM712 is not set
CONFIG_FB_CORE=y
CONFIG_FB_NOTIFY=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_DEVICE=y
CONFIG_FB_CFB_FILLRECT=y
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
CONFIG_FB_SYS_FILLRECT=y
CONFIG_FB_SYS_COPYAREA=y
CONFIG_FB_SYS_IMAGEBLIT=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
CONFIG_FB_SYSMEM_FOPS=y
CONFIG_FB_DEFERRED_IO=y
CONFIG_FB_IOMEM_FOPS=y
CONFIG_FB_IOMEM_HELPERS=y
CONFIG_FB_SYSMEM_HELPERS=y
CONFIG_FB_SYSMEM_HELPERS_DEFERRED=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
# end of Frame buffer Devices
#
# Backlight & LCD device support
#
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_BACKLIGHT_CLASS_DEVICE=y
# CONFIG_BACKLIGHT_KTD253 is not set
# CONFIG_BACKLIGHT_KTD2801 is not set
# CONFIG_BACKLIGHT_KTZ8866 is not set
# CONFIG_BACKLIGHT_PWM is not set
# CONFIG_BACKLIGHT_DA903X is not set
# CONFIG_BACKLIGHT_DA9052 is not set
# CONFIG_BACKLIGHT_MAX8925 is not set
# CONFIG_BACKLIGHT_APPLE is not set
# CONFIG_BACKLIGHT_QCOM_WLED is not set
# CONFIG_BACKLIGHT_SAHARA is not set
# CONFIG_BACKLIGHT_WM831X is not set
# CONFIG_BACKLIGHT_ADP5520 is not set
# CONFIG_BACKLIGHT_ADP8860 is not set
# CONFIG_BACKLIGHT_ADP8870 is not set
# CONFIG_BACKLIGHT_88PM860X is not set
# CONFIG_BACKLIGHT_AAT2870 is not set
# CONFIG_BACKLIGHT_LM3509 is not set
# CONFIG_BACKLIGHT_LM3630A is not set
# CONFIG_BACKLIGHT_LM3639 is not set
# CONFIG_BACKLIGHT_LP855X is not set
# CONFIG_BACKLIGHT_LP8788 is not set
# CONFIG_BACKLIGHT_MP3309C is not set
# CONFIG_BACKLIGHT_AS3711 is not set
# CONFIG_BACKLIGHT_GPIO is not set
# CONFIG_BACKLIGHT_LV5207LP is not set
# CONFIG_BACKLIGHT_BD6107 is not set
# CONFIG_BACKLIGHT_ARCXCNN is not set
# end of Backlight & LCD device support
CONFIG_HDMI=y
#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
CONFIG_DUMMY_CONSOLE=y
CONFIG_DUMMY_CONSOLE_COLUMNS=80
CONFIG_DUMMY_CONSOLE_ROWS=25
CONFIG_FRAMEBUFFER_CONSOLE=y
# CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION is not set
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
# CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER is not set
# end of Console display driver support
# CONFIG_LOGO is not set
# end of Graphics support
# CONFIG_DRM_ACCEL is not set
# CONFIG_SOUND is not set
CONFIG_HID_SUPPORT=y
CONFIG_HID=y
# CONFIG_HID_BATTERY_STRENGTH is not set
CONFIG_HIDRAW=y
# CONFIG_UHID is not set
CONFIG_HID_GENERIC=y
#
# Special HID drivers
#
# CONFIG_HID_A4TECH is not set
# CONFIG_HID_ACCUTOUCH is not set
# CONFIG_HID_ACRUX is not set
# CONFIG_HID_APPLE is not set
# CONFIG_HID_APPLEIR is not set
# CONFIG_HID_APPLETB_BL is not set
# CONFIG_HID_APPLETB_KBD is not set
# CONFIG_HID_ASUS is not set
# CONFIG_HID_AUREAL is not set
# CONFIG_HID_BELKIN is not set
# CONFIG_HID_BETOP_FF is not set
# CONFIG_HID_BIGBEN_FF is not set
# CONFIG_HID_CHERRY is not set
# CONFIG_HID_CHICONY is not set
# CONFIG_HID_CORSAIR is not set
# CONFIG_HID_COUGAR is not set
# CONFIG_HID_MACALLY is not set
# CONFIG_HID_CMEDIA is not set
# CONFIG_HID_CP2112 is not set
# CONFIG_HID_CREATIVE_SB0540 is not set
# CONFIG_HID_CYPRESS is not set
# CONFIG_HID_DRAGONRISE is not set
# CONFIG_HID_EMS_FF is not set
# CONFIG_HID_ELAN is not set
# CONFIG_HID_ELECOM is not set
# CONFIG_HID_ELO is not set
# CONFIG_HID_EVISION is not set
# CONFIG_HID_EZKEY is not set
# CONFIG_HID_FT260 is not set
# CONFIG_HID_GEMBIRD is not set
# CONFIG_HID_GFRM is not set
# CONFIG_HID_GLORIOUS is not set
# CONFIG_HID_HOLTEK is not set
# CONFIG_HID_GOODIX_SPI is not set
# CONFIG_HID_GOOGLE_STADIA_FF is not set
# CONFIG_HID_VIVALDI is not set
# CONFIG_HID_GT683R is not set
# CONFIG_HID_KEYTOUCH is not set
# CONFIG_HID_KYE is not set
# CONFIG_HID_KYSONA is not set
# CONFIG_HID_UCLOGIC is not set
# CONFIG_HID_WALTOP is not set
# CONFIG_HID_VIEWSONIC is not set
# CONFIG_HID_VRC2 is not set
# CONFIG_HID_XIAOMI is not set
# CONFIG_HID_GYRATION is not set
# CONFIG_HID_ICADE is not set
# CONFIG_HID_ITE is not set
# CONFIG_HID_JABRA is not set
# CONFIG_HID_TWINHAN is not set
# CONFIG_HID_KENSINGTON is not set
# CONFIG_HID_LCPOWER is not set
# CONFIG_HID_LED is not set
# CONFIG_HID_LENOVO is not set
# CONFIG_HID_LETSKETCH is not set
# CONFIG_HID_MAGICMOUSE is not set
# CONFIG_HID_MALTRON is not set
# CONFIG_HID_MAYFLASH is not set
# CONFIG_HID_MEGAWORLD_FF is not set
# CONFIG_HID_REDRAGON is not set
# CONFIG_HID_MICROSOFT is not set
# CONFIG_HID_MONTEREY is not set
# CONFIG_HID_MULTITOUCH is not set
# CONFIG_HID_NINTENDO is not set
# CONFIG_HID_NTI is not set
# CONFIG_HID_NTRIG is not set
# CONFIG_HID_ORTEK is not set
# CONFIG_HID_PANTHERLORD is not set
# CONFIG_HID_PENMOUNT is not set
# CONFIG_HID_PETALYNX is not set
# CONFIG_HID_PICOLCD is not set
# CONFIG_HID_PLANTRONICS is not set
# CONFIG_HID_PXRC is not set
# CONFIG_HID_RAZER is not set
# CONFIG_HID_PRIMAX is not set
# CONFIG_HID_RETRODE is not set
# CONFIG_HID_ROCCAT is not set
# CONFIG_HID_SAITEK is not set
# CONFIG_HID_SAMSUNG is not set
# CONFIG_HID_SEMITEK is not set
# CONFIG_HID_SIGMAMICRO is not set
# CONFIG_HID_SONY is not set
# CONFIG_HID_SPEEDLINK is not set
# CONFIG_HID_STEAM is not set
# CONFIG_HID_STEELSERIES is not set
# CONFIG_HID_SUNPLUS is not set
# CONFIG_HID_RMI is not set
# CONFIG_HID_GREENASIA is not set
# CONFIG_HID_SMARTJOYPLUS is not set
# CONFIG_HID_TIVO is not set
# CONFIG_HID_TOPSEED is not set
# CONFIG_HID_TOPRE is not set
# CONFIG_HID_THINGM is not set
# CONFIG_HID_THRUSTMASTER is not set
# CONFIG_HID_UDRAW_PS3 is not set
# CONFIG_HID_U2FZERO is not set
# CONFIG_HID_UNIVERSAL_PIDFF is not set
# CONFIG_HID_WACOM is not set
# CONFIG_HID_WIIMOTE is not set
# CONFIG_HID_WINWING is not set
# CONFIG_HID_XINMO is not set
# CONFIG_HID_ZEROPLUS is not set
# CONFIG_HID_ZYDACRON is not set
# CONFIG_HID_SENSOR_HUB is not set
# CONFIG_HID_ALPS is not set
# CONFIG_HID_MCP2200 is not set
# CONFIG_HID_MCP2221 is not set
# end of Special HID drivers
#
# HID-BPF support
#
# CONFIG_HID_BPF is not set
# end of HID-BPF support
CONFIG_I2C_HID=y
# CONFIG_I2C_HID_ACPI is not set
# CONFIG_I2C_HID_OF is not set
#
# Intel ISH HID support
#
# CONFIG_INTEL_ISH_HID is not set
# end of Intel ISH HID support
#
# AMD SFH HID Support
#
# CONFIG_AMD_SFH_HID is not set
# end of AMD SFH HID Support
#
# Intel THC HID Support
#
# CONFIG_INTEL_THC_HID is not set
# end of Intel THC HID Support
#
# USB HID support
#
CONFIG_USB_HID=y
CONFIG_HID_PID=y
CONFIG_USB_HIDDEV=y
# end of USB HID support
CONFIG_USB_OHCI_LITTLE_ENDIAN=y
CONFIG_USB_SUPPORT=y
CONFIG_USB_COMMON=y
# CONFIG_USB_LED_TRIG is not set
# CONFIG_USB_ULPI_BUS is not set
# CONFIG_USB_CONN_GPIO is not set
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB=y
CONFIG_USB_PCI=y
CONFIG_USB_PCI_AMD=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
#
# Miscellaneous USB options
#
CONFIG_USB_DEFAULT_PERSIST=y
# CONFIG_USB_FEW_INIT_RETRIES is not set
CONFIG_USB_DYNAMIC_MINORS=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OTG_DISABLE_EXTERNAL_HUB is not set
# CONFIG_USB_LEDS_TRIGGER_USBPORT is not set
CONFIG_USB_AUTOSUSPEND_DELAY=2
CONFIG_USB_DEFAULT_AUTHORIZATION_MODE=1
# CONFIG_USB_MON is not set
#
# USB Host Controller Drivers
#
# CONFIG_USB_C67X00_HCD is not set
CONFIG_USB_XHCI_HCD=y
# CONFIG_USB_XHCI_DBGCAP is not set
CONFIG_USB_XHCI_PCI=y
# CONFIG_USB_XHCI_PCI_RENESAS is not set
# CONFIG_USB_XHCI_PLATFORM is not set
# CONFIG_USB_XHCI_SIDEBAND is not set
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_ROOT_HUB_TT=y
CONFIG_USB_EHCI_TT_NEWSCHED=y
CONFIG_USB_EHCI_PCI=y
# CONFIG_USB_EHCI_FSL is not set
CONFIG_USB_EHCI_HCD_PLATFORM=y
# CONFIG_USB_OXU210HP_HCD is not set
# CONFIG_USB_ISP116X_HCD is not set
# CONFIG_USB_MAX3421_HCD is not set
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PCI=y
CONFIG_USB_OHCI_HCD_PLATFORM=y
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set
# CONFIG_USB_R8A66597_HCD is not set
# CONFIG_USB_HCD_TEST_MODE is not set
# CONFIG_USB_XEN_HCD is not set
#
# USB Device Class drivers
#
# CONFIG_USB_ACM is not set
# CONFIG_USB_PRINTER is not set
# CONFIG_USB_WDM is not set
# CONFIG_USB_TMC is not set
#
# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
see USB_STORAGE Help for more info
#
# CONFIG_USB_STORAGE is not set
#
# USB Imaging devices
#
# CONFIG_USB_MDC800 is not set
# CONFIG_USB_MICROTEK is not set
# CONFIG_USBIP_CORE is not set
#
# USB dual-mode controller drivers
#
# CONFIG_USB_CDNS_SUPPORT is not set
# CONFIG_USB_MUSB_HDRC is not set
# CONFIG_USB_DWC3 is not set
# CONFIG_USB_DWC2 is not set
# CONFIG_USB_CHIPIDEA is not set
# CONFIG_USB_ISP1760 is not set
#
# USB port drivers
#
# CONFIG_USB_SERIAL is not set
#
# USB Miscellaneous drivers
#
# CONFIG_USB_USS720 is not set
# CONFIG_USB_EMI62 is not set
# CONFIG_USB_EMI26 is not set
# CONFIG_USB_ADUTUX is not set
# CONFIG_USB_SEVSEG is not set
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
# CONFIG_USB_CYPRESS_CY7C63 is not set
# CONFIG_USB_CYTHERM is not set
# CONFIG_USB_IDMOUSE is not set
# CONFIG_USB_APPLEDISPLAY is not set
# CONFIG_APPLE_MFI_FASTCHARGE is not set
# CONFIG_USB_LJCA is not set
# CONFIG_USB_SISUSBVGA is not set
# CONFIG_USB_LD is not set
# CONFIG_USB_TRANCEVIBRATOR is not set
# CONFIG_USB_IOWARRIOR is not set
# CONFIG_USB_TEST is not set
# CONFIG_USB_EHSET_TEST_FIXTURE is not set
# CONFIG_USB_ISIGHTFW is not set
# CONFIG_USB_YUREX is not set
# CONFIG_USB_EZUSB_FX2 is not set
# CONFIG_USB_HUB_USB251XB is not set
# CONFIG_USB_HSIC_USB3503 is not set
# CONFIG_USB_HSIC_USB4604 is not set
# CONFIG_USB_LINK_LAYER_TEST is not set
# CONFIG_USB_CHAOSKEY is not set
#
# USB Physical Layer drivers
#
# CONFIG_NOP_USB_XCEIV is not set
# CONFIG_USB_GPIO_VBUS is not set
# CONFIG_USB_ISP1301 is not set
# end of USB Physical Layer drivers
# CONFIG_USB_GADGET is not set
# CONFIG_TYPEC is not set
# CONFIG_USB_ROLE_SWITCH is not set
CONFIG_MMC=y
# CONFIG_MMC_BLOCK is not set
# CONFIG_SDIO_UART is not set
# CONFIG_MMC_TEST is not set
#
# MMC/SD/SDIO Host Controller Drivers
#
# CONFIG_MMC_DEBUG is not set
# CONFIG_MMC_SDHCI is not set
# CONFIG_MMC_WBSD is not set
# CONFIG_MMC_TIFM_SD is not set
# CONFIG_MMC_SPI is not set
# CONFIG_MMC_CB710 is not set
# CONFIG_MMC_VIA_SDMMC is not set
# CONFIG_MMC_VUB300 is not set
# CONFIG_MMC_USHC is not set
# CONFIG_MMC_USDHI6ROL0 is not set
# CONFIG_MMC_CQHCI is not set
# CONFIG_MMC_HSQ is not set
# CONFIG_MMC_TOSHIBA_PCI is not set
# CONFIG_MMC_MTK is not set
# CONFIG_SCSI_UFSHCD is not set
# CONFIG_MEMSTICK is not set
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
# CONFIG_LEDS_CLASS_FLASH is not set
# CONFIG_LEDS_CLASS_MULTICOLOR is not set
# CONFIG_LEDS_BRIGHTNESS_HW_CHANGED is not set
#
# LED drivers
#
# CONFIG_LEDS_88PM860X is not set
# CONFIG_LEDS_APU is not set
# CONFIG_LEDS_AW200XX is not set
# CONFIG_LEDS_LM3530 is not set
# CONFIG_LEDS_LM3532 is not set
# CONFIG_LEDS_LM3642 is not set
# CONFIG_LEDS_PCA9532 is not set
# CONFIG_LEDS_GPIO is not set
# CONFIG_LEDS_LP3944 is not set
# CONFIG_LEDS_LP3952 is not set
# CONFIG_LEDS_LP8788 is not set
# CONFIG_LEDS_PCA955X is not set
# CONFIG_LEDS_PCA963X is not set
# CONFIG_LEDS_PCA995X is not set
# CONFIG_LEDS_WM831X_STATUS is not set
# CONFIG_LEDS_WM8350 is not set
# CONFIG_LEDS_DA903X is not set
# CONFIG_LEDS_DA9052 is not set
# CONFIG_LEDS_DAC124S085 is not set
# CONFIG_LEDS_PWM is not set
# CONFIG_LEDS_REGULATOR is not set
# CONFIG_LEDS_BD2606MVV is not set
# CONFIG_LEDS_BD2802 is not set
# CONFIG_LEDS_INTEL_SS4200 is not set
# CONFIG_LEDS_LT3593 is not set
# CONFIG_LEDS_ADP5520 is not set
# CONFIG_LEDS_TCA6507 is not set
# CONFIG_LEDS_TLC591XX is not set
# CONFIG_LEDS_MAX8997 is not set
# CONFIG_LEDS_LM355x is not set
# CONFIG_LEDS_IS31FL319X is not set
#
# LED driver for blink(1) USB RGB LED is under Special HID drivers
(HID_THINGM)
#
# CONFIG_LEDS_BLINKM is not set
# CONFIG_LEDS_MLXCPLD is not set
# CONFIG_LEDS_MLXREG is not set
# CONFIG_LEDS_USER is not set
# CONFIG_LEDS_NIC78BX is not set
# CONFIG_LEDS_SPI_BYTE is not set
#
# Flash and Torch LED drivers
#
#
# RGB LED drivers
#
#
# LED Triggers
#
CONFIG_LEDS_TRIGGERS=y
# CONFIG_LEDS_TRIGGER_TIMER is not set
# CONFIG_LEDS_TRIGGER_ONESHOT is not set
# CONFIG_LEDS_TRIGGER_DISK is not set
# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
CONFIG_LEDS_TRIGGER_CPU=y
# CONFIG_LEDS_TRIGGER_ACTIVITY is not set
# CONFIG_LEDS_TRIGGER_GPIO is not set
# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
#
# iptables trigger is under Netfilter config (LED target)
#
# CONFIG_LEDS_TRIGGER_TRANSIENT is not set
# CONFIG_LEDS_TRIGGER_CAMERA is not set
# CONFIG_LEDS_TRIGGER_PANIC is not set
# CONFIG_LEDS_TRIGGER_NETDEV is not set
# CONFIG_LEDS_TRIGGER_PATTERN is not set
# CONFIG_LEDS_TRIGGER_TTY is not set
# CONFIG_LEDS_TRIGGER_INPUT_EVENTS is not set
#
# Simatic LED drivers
#
# CONFIG_ACCESSIBILITY is not set
# CONFIG_INFINIBAND is not set
CONFIG_EDAC_ATOMIC_SCRUB=y
CONFIG_EDAC_SUPPORT=y
CONFIG_EDAC=y
CONFIG_EDAC_LEGACY_SYSFS=y
# CONFIG_EDAC_DEBUG is not set
# CONFIG_EDAC_DECODE_MCE is not set
# CONFIG_EDAC_GHES is not set
# CONFIG_EDAC_SCRUB is not set
# CONFIG_EDAC_ECS is not set
# CONFIG_EDAC_MEM_REPAIR is not set
# CONFIG_EDAC_E752X is not set
# CONFIG_EDAC_I82975X is not set
# CONFIG_EDAC_I3000 is not set
# CONFIG_EDAC_I3200 is not set
# CONFIG_EDAC_IE31200 is not set
# CONFIG_EDAC_X38 is not set
# CONFIG_EDAC_I5400 is not set
# CONFIG_EDAC_I7CORE is not set
# CONFIG_EDAC_I5100 is not set
# CONFIG_EDAC_I7300 is not set
# CONFIG_EDAC_SBRIDGE is not set
# CONFIG_EDAC_SKX is not set
# CONFIG_EDAC_I10NM is not set
# CONFIG_EDAC_PND2 is not set
# CONFIG_EDAC_IGEN6 is not set
CONFIG_RTC_LIB=y
CONFIG_RTC_MC146818_LIB=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_HCTOSYS=y
CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
CONFIG_RTC_SYSTOHC=y
CONFIG_RTC_SYSTOHC_DEVICE="rtc0"
# CONFIG_RTC_DEBUG is not set
CONFIG_RTC_NVMEM=y
#
# RTC interfaces
#
CONFIG_RTC_INTF_SYSFS=y
CONFIG_RTC_INTF_PROC=y
CONFIG_RTC_INTF_DEV=y
# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
# CONFIG_RTC_DRV_TEST is not set
#
# I2C RTC drivers
#
# CONFIG_RTC_DRV_88PM860X is not set
# CONFIG_RTC_DRV_ABB5ZES3 is not set
# CONFIG_RTC_DRV_ABEOZ9 is not set
# CONFIG_RTC_DRV_ABX80X is not set
# CONFIG_RTC_DRV_DS1307 is not set
# CONFIG_RTC_DRV_DS1374 is not set
# CONFIG_RTC_DRV_DS1672 is not set
# CONFIG_RTC_DRV_LP8788 is not set
# CONFIG_RTC_DRV_MAX6900 is not set
# CONFIG_RTC_DRV_MAX8925 is not set
# CONFIG_RTC_DRV_MAX8998 is not set
# CONFIG_RTC_DRV_MAX8997 is not set
# CONFIG_RTC_DRV_MAX31335 is not set
# CONFIG_RTC_DRV_RS5C372 is not set
# CONFIG_RTC_DRV_ISL1208 is not set
# CONFIG_RTC_DRV_ISL12022 is not set
# CONFIG_RTC_DRV_X1205 is not set
# CONFIG_RTC_DRV_PCF8523 is not set
# CONFIG_RTC_DRV_PCF85063 is not set
# CONFIG_RTC_DRV_PCF85363 is not set
# CONFIG_RTC_DRV_PCF8563 is not set
# CONFIG_RTC_DRV_PCF8583 is not set
# CONFIG_RTC_DRV_M41T80 is not set
# CONFIG_RTC_DRV_BQ32K is not set
# CONFIG_RTC_DRV_PALMAS is not set
# CONFIG_RTC_DRV_TPS6586X is not set
# CONFIG_RTC_DRV_TPS65910 is not set
# CONFIG_RTC_DRV_RC5T583 is not set
# CONFIG_RTC_DRV_S35390A is not set
# CONFIG_RTC_DRV_FM3130 is not set
# CONFIG_RTC_DRV_RX8010 is not set
# CONFIG_RTC_DRV_RX8111 is not set
# CONFIG_RTC_DRV_RX8581 is not set
# CONFIG_RTC_DRV_RX8025 is not set
# CONFIG_RTC_DRV_EM3027 is not set
# CONFIG_RTC_DRV_RV3028 is not set
# CONFIG_RTC_DRV_RV3032 is not set
# CONFIG_RTC_DRV_RV8803 is not set
# CONFIG_RTC_DRV_SD2405AL is not set
# CONFIG_RTC_DRV_SD3078 is not set
#
# SPI RTC drivers
#
# CONFIG_RTC_DRV_M41T93 is not set
# CONFIG_RTC_DRV_M41T94 is not set
# CONFIG_RTC_DRV_DS1302 is not set
# CONFIG_RTC_DRV_DS1305 is not set
# CONFIG_RTC_DRV_DS1343 is not set
# CONFIG_RTC_DRV_DS1347 is not set
# CONFIG_RTC_DRV_DS1390 is not set
# CONFIG_RTC_DRV_MAX6916 is not set
# CONFIG_RTC_DRV_R9701 is not set
# CONFIG_RTC_DRV_RX4581 is not set
# CONFIG_RTC_DRV_RS5C348 is not set
# CONFIG_RTC_DRV_MAX6902 is not set
# CONFIG_RTC_DRV_PCF2123 is not set
# CONFIG_RTC_DRV_MCP795 is not set
CONFIG_RTC_I2C_AND_SPI=y
#
# SPI and I2C RTC drivers
#
# CONFIG_RTC_DRV_DS3232 is not set
# CONFIG_RTC_DRV_PCF2127 is not set
# CONFIG_RTC_DRV_RV3029C2 is not set
# CONFIG_RTC_DRV_RX6110 is not set
#
# Platform RTC drivers
#
CONFIG_RTC_DRV_CMOS=y
# CONFIG_RTC_DRV_DS1286 is not set
# CONFIG_RTC_DRV_DS1511 is not set
# CONFIG_RTC_DRV_DS1553 is not set
# CONFIG_RTC_DRV_DS1685_FAMILY is not set
# CONFIG_RTC_DRV_DS1742 is not set
# CONFIG_RTC_DRV_DS2404 is not set
# CONFIG_RTC_DRV_DA9052 is not set
# CONFIG_RTC_DRV_DA9055 is not set
# CONFIG_RTC_DRV_DA9063 is not set
# CONFIG_RTC_DRV_STK17TA8 is not set
# CONFIG_RTC_DRV_M48T86 is not set
# CONFIG_RTC_DRV_M48T35 is not set
# CONFIG_RTC_DRV_M48T59 is not set
# CONFIG_RTC_DRV_MSM6242 is not set
# CONFIG_RTC_DRV_RP5C01 is not set
# CONFIG_RTC_DRV_WM831X is not set
# CONFIG_RTC_DRV_WM8350 is not set
#
# on-CPU RTC drivers
#
# CONFIG_RTC_DRV_FTRTC010 is not set
# CONFIG_RTC_DRV_PCAP is not set
#
# HID Sensor RTC drivers
#
# CONFIG_RTC_DRV_GOLDFISH is not set
CONFIG_DMADEVICES=y
# CONFIG_DMADEVICES_DEBUG is not set
#
# DMA Devices
#
CONFIG_DMA_ENGINE=y
CONFIG_DMA_ACPI=y
# CONFIG_ALTERA_MSGDMA is not set
# CONFIG_INTEL_IDMA64 is not set
# CONFIG_INTEL_IDXD is not set
# CONFIG_INTEL_IDXD_COMPAT is not set
CONFIG_INTEL_IOATDMA=y
# CONFIG_PLX_DMA is not set
# CONFIG_XILINX_DMA is not set
# CONFIG_XILINX_XDMA is not set
# CONFIG_AMD_PTDMA is not set
# CONFIG_AMD_QDMA is not set
# CONFIG_QCOM_HIDMA_MGMT is not set
# CONFIG_QCOM_HIDMA is not set
CONFIG_DW_DMAC_CORE=y
# CONFIG_DW_DMAC is not set
CONFIG_DW_DMAC_PCI=y
# CONFIG_DW_EDMA is not set
# CONFIG_SF_PDMA is not set
# CONFIG_INTEL_LDMA is not set
#
# DMA Clients
#
# CONFIG_ASYNC_TX_DMA is not set
# CONFIG_DMATEST is not set
CONFIG_DMA_ENGINE_RAID=y
#
# DMABUF options
#
CONFIG_SYNC_FILE=y
# CONFIG_SW_SYNC is not set
# CONFIG_UDMABUF is not set
# CONFIG_DMABUF_MOVE_NOTIFY is not set
# CONFIG_DMABUF_DEBUG is not set
# CONFIG_DMABUF_SELFTESTS is not set
# CONFIG_DMABUF_HEAPS is not set
# CONFIG_DMABUF_SYSFS_STATS is not set
# end of DMABUF options
CONFIG_DCA=y
# CONFIG_UIO is not set
# CONFIG_VFIO is not set
CONFIG_VIRT_DRIVERS=y
CONFIG_VMGENID=y
# CONFIG_VBOXGUEST is not set
# CONFIG_NITRO_ENCLAVES is not set
# CONFIG_EFI_SECRET is not set
CONFIG_VIRTIO_ANCHOR=y
CONFIG_VIRTIO=y
CONFIG_VIRTIO_PCI_LIB=y
CONFIG_VIRTIO_PCI_LIB_LEGACY=y
CONFIG_VIRTIO_MENU=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_PCI_ADMIN_LEGACY=y
CONFIG_VIRTIO_PCI_LEGACY=y
CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_MEM=m
# CONFIG_VIRTIO_INPUT is not set
CONFIG_VIRTIO_MMIO=y
CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
# CONFIG_VIRTIO_DEBUG is not set
# CONFIG_VIRTIO_RTC is not set
# CONFIG_VDPA is not set
CONFIG_VHOST_IOTLB=y
CONFIG_VHOST_TASK=y
CONFIG_VHOST=y
CONFIG_VHOST_MENU=y
CONFIG_VHOST_NET=y
# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set
CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y
#
# Microsoft Hyper-V guest support
#
# CONFIG_HYPERV is not set
# end of Microsoft Hyper-V guest support
#
# Xen driver support
#
CONFIG_XEN_BALLOON=y
CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y
CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512
CONFIG_XEN_SCRUB_PAGES_DEFAULT=y
# CONFIG_XEN_DEV_EVTCHN is not set
CONFIG_XEN_BACKEND=y
# CONFIG_XENFS is not set
CONFIG_XEN_SYS_HYPERVISOR=y
CONFIG_XEN_XENBUS_FRONTEND=y
# CONFIG_XEN_GNTDEV is not set
# CONFIG_XEN_GRANT_DEV_ALLOC is not set
# CONFIG_XEN_GRANT_DMA_ALLOC is not set
CONFIG_SWIOTLB_XEN=y
# CONFIG_XEN_PCIDEV_BACKEND is not set
# CONFIG_XEN_PVCALLS_FRONTEND is not set
# CONFIG_XEN_PVCALLS_BACKEND is not set
CONFIG_XEN_PRIVCMD=m
CONFIG_XEN_ACPI_PROCESSOR=y
CONFIG_XEN_MCE_LOG=y
CONFIG_XEN_HAVE_PVMMU=y
CONFIG_XEN_EFI=y
CONFIG_XEN_AUTO_XLATE=y
CONFIG_XEN_ACPI=y
CONFIG_XEN_HAVE_VPMU=y
# CONFIG_XEN_VIRTIO is not set
# end of Xen driver support
# CONFIG_GREYBUS is not set
# CONFIG_COMEDI is not set
CONFIG_STAGING=y
# CONFIG_FB_SM750 is not set
CONFIG_STAGING_MEDIA=y
# CONFIG_FB_TFT is not set
# CONFIG_VME_BUS is not set
# CONFIG_GPIB is not set
# CONFIG_GOLDFISH is not set
CONFIG_CHROME_PLATFORMS=y
# CONFIG_CHROMEOS_ACPI is not set
# CONFIG_CHROMEOS_LAPTOP is not set
# CONFIG_CHROMEOS_PSTORE is not set
# CONFIG_CHROMEOS_TBMC is not set
# CONFIG_CROS_EC is not set
# CONFIG_CROS_KBD_LED_BACKLIGHT is not set
# CONFIG_CROS_HPS_I2C is not set
# CONFIG_CHROMEOS_PRIVACY_SCREEN is not set
# CONFIG_MELLANOX_PLATFORM is not set
CONFIG_SURFACE_PLATFORMS=y
# CONFIG_SURFACE3_WMI is not set
# CONFIG_SURFACE_3_POWER_OPREGION is not set
# CONFIG_SURFACE_GPE is not set
# CONFIG_SURFACE_HOTPLUG is not set
# CONFIG_SURFACE_PRO3_BUTTON is not set
CONFIG_X86_PLATFORM_DEVICES=y
CONFIG_ACPI_WMI=y
CONFIG_WMI_BMOF=y
# CONFIG_HUAWEI_WMI is not set
# CONFIG_MXM_WMI is not set
# CONFIG_XIAOMI_WMI is not set
# CONFIG_GIGABYTE_WMI is not set
# CONFIG_YOGABOOK is not set
# CONFIG_ACERHDF is not set
# CONFIG_ACER_WIRELESS is not set
# CONFIG_ACER_WMI is not set
#
# AMD HSMP Driver
#
# CONFIG_AMD_HSMP_ACPI is not set
# CONFIG_AMD_HSMP_PLAT is not set
# end of AMD HSMP Driver
# CONFIG_AMD_PMC is not set
# CONFIG_AMD_3D_VCACHE is not set
# CONFIG_AMD_WBRF is not set
# CONFIG_AMD_ISP_PLATFORM is not set
# CONFIG_ADV_SWBUTTON is not set
# CONFIG_APPLE_GMUX is not set
# CONFIG_ASUS_LAPTOP is not set
# CONFIG_ASUS_WIRELESS is not set
# CONFIG_ASUS_WMI is not set
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_MERAKI_MX100 is not set
# CONFIG_EEEPC_LAPTOP is not set
# CONFIG_X86_PLATFORM_DRIVERS_DELL is not set
# CONFIG_AMILO_RFKILL is not set
# CONFIG_FUJITSU_LAPTOP is not set
# CONFIG_FUJITSU_TABLET is not set
# CONFIG_GPD_POCKET_FAN is not set
# CONFIG_X86_PLATFORM_DRIVERS_HP is not set
# CONFIG_WIRELESS_HOTKEY is not set
# CONFIG_IBM_RTL is not set
# CONFIG_IDEAPAD_LAPTOP is not set
# CONFIG_LENOVO_WMI_HOTKEY_UTILITIES is not set
# CONFIG_SENSORS_HDAPS is not set
# CONFIG_THINKPAD_ACPI is not set
# CONFIG_THINKPAD_LMI is not set
# CONFIG_INTEL_ATOMISP2_PM is not set
# CONFIG_INTEL_IFS is not set
# CONFIG_INTEL_SAR_INT1092 is not set
# CONFIG_INTEL_SKL_INT3472 is not set
#
# Intel Speed Select Technology interface support
#
# CONFIG_INTEL_SPEED_SELECT_INTERFACE is not set
# end of Intel Speed Select Technology interface support
# CONFIG_INTEL_WMI_SBL_FW_UPDATE is not set
# CONFIG_INTEL_WMI_THUNDERBOLT is not set
#
# Intel Uncore Frequency Control
#
# CONFIG_INTEL_UNCORE_FREQ_CONTROL is not set
# end of Intel Uncore Frequency Control
# CONFIG_INTEL_HID_EVENT is not set
# CONFIG_INTEL_VBTN is not set
# CONFIG_INTEL_INT0002_VGPIO is not set
# CONFIG_INTEL_OAKTRAIL is not set
# CONFIG_INTEL_PUNIT_IPC is not set
# CONFIG_INTEL_RST is not set
# CONFIG_INTEL_SMARTCONNECT is not set
# CONFIG_INTEL_TURBO_MAX_3 is not set
# CONFIG_INTEL_VSEC is not set
# CONFIG_ACPI_QUICKSTART is not set
# CONFIG_MEEGOPAD_ANX7428 is not set
# CONFIG_MSI_EC is not set
# CONFIG_MSI_LAPTOP is not set
# CONFIG_MSI_WMI is not set
# CONFIG_MSI_WMI_PLATFORM is not set
# CONFIG_PCENGINES_APU2 is not set
# CONFIG_PORTWELL_EC is not set
# CONFIG_BARCO_P50_GPIO is not set
# CONFIG_SAMSUNG_GALAXYBOOK is not set
# CONFIG_SAMSUNG_LAPTOP is not set
# CONFIG_SAMSUNG_Q10 is not set
# CONFIG_TOSHIBA_BT_RFKILL is not set
# CONFIG_TOSHIBA_HAPS is not set
# CONFIG_TOSHIBA_WMI is not set
# CONFIG_ACPI_CMPC is not set
# CONFIG_COMPAL_LAPTOP is not set
# CONFIG_LG_LAPTOP is not set
# CONFIG_PANASONIC_LAPTOP is not set
# CONFIG_SONY_LAPTOP is not set
# CONFIG_SYSTEM76_ACPI is not set
# CONFIG_TOPSTAR_LAPTOP is not set
# CONFIG_SERIAL_MULTI_INSTANTIATE is not set
# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_LENOVO_WMI_CAMERA is not set
# CONFIG_DASHARO_ACPI is not set
# CONFIG_INTEL_IPS is not set
# CONFIG_INTEL_SCU_PCI is not set
# CONFIG_INTEL_SCU_PLATFORM is not set
# CONFIG_SIEMENS_SIMATIC_IPC is not set
# CONFIG_WINMATE_FM07_KEYS is not set
# CONFIG_OXP_EC is not set
# CONFIG_TUXEDO_NB04_WMI_AB is not set
CONFIG_P2SB=y
CONFIG_HAVE_CLK=y
CONFIG_HAVE_CLK_PREPARE=y
CONFIG_COMMON_CLK=y
# CONFIG_COMMON_CLK_WM831X is not set
# CONFIG_LMK04832 is not set
# CONFIG_COMMON_CLK_MAX9485 is not set
# CONFIG_COMMON_CLK_SI5341 is not set
# CONFIG_COMMON_CLK_SI5351 is not set
# CONFIG_COMMON_CLK_SI544 is not set
# CONFIG_COMMON_CLK_CDCE706 is not set
# CONFIG_COMMON_CLK_CS2000_CP is not set
# CONFIG_CLK_TWL6040 is not set
# CONFIG_COMMON_CLK_PALMAS is not set
# CONFIG_COMMON_CLK_PWM is not set
# CONFIG_XILINX_VCU is not set
# CONFIG_HWSPINLOCK is not set
#
# Clock Source drivers
#
CONFIG_CLKEVT_I8253=y
CONFIG_I8253_LOCK=y
CONFIG_CLKBLD_I8253=y
# end of Clock Source drivers
CONFIG_MAILBOX=y
CONFIG_PCC=y
# CONFIG_ALTERA_MBOX is not set
CONFIG_IOMMU_IOVA=y
CONFIG_IOMMU_API=y
CONFIG_IOMMU_SUPPORT=y
#
# Generic IOMMU Pagetable Support
#
CONFIG_IOMMU_IO_PGTABLE=y
# end of Generic IOMMU Pagetable Support
# CONFIG_IOMMU_DEBUGFS is not set
# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set
CONFIG_IOMMU_DEFAULT_DMA_LAZY=y
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
CONFIG_IOMMU_DMA=y
CONFIG_IOMMU_SVA=y
CONFIG_IOMMU_IOPF=y
CONFIG_AMD_IOMMU=y
CONFIG_DMAR_TABLE=y
CONFIG_INTEL_IOMMU=y
# CONFIG_INTEL_IOMMU_SVM is not set
# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
CONFIG_INTEL_IOMMU_FLOPPY_WA=y
# CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON is not set
CONFIG_INTEL_IOMMU_PERF_EVENTS=y
# CONFIG_IOMMUFD is not set
CONFIG_IRQ_REMAP=y
# CONFIG_VIRTIO_IOMMU is not set
#
# Remoteproc drivers
#
# CONFIG_REMOTEPROC is not set
# end of Remoteproc drivers
#
# Rpmsg drivers
#
# CONFIG_RPMSG_QCOM_GLINK_RPM is not set
# CONFIG_RPMSG_VIRTIO is not set
# end of Rpmsg drivers
#
# SOC (System On Chip) specific Drivers
#
#
# Amlogic SoC drivers
#
# end of Amlogic SoC drivers
#
# Broadcom SoC drivers
#
# end of Broadcom SoC drivers
#
# NXP/Freescale QorIQ SoC drivers
#
# end of NXP/Freescale QorIQ SoC drivers
#
# fujitsu SoC drivers
#
# end of fujitsu SoC drivers
#
# i.MX SoC drivers
#
# end of i.MX SoC drivers
#
# Enable LiteX SoC Builder specific drivers
#
# end of Enable LiteX SoC Builder specific drivers
# CONFIG_WPCM450_SOC is not set
#
# Qualcomm SoC drivers
#
# end of Qualcomm SoC drivers
# CONFIG_SOC_TI is not set
#
# Xilinx SoC drivers
#
# end of Xilinx SoC drivers
# end of SOC (System On Chip) specific Drivers
#
# PM Domains
#
#
# Amlogic PM Domains
#
# end of Amlogic PM Domains
#
# Broadcom PM Domains
#
# end of Broadcom PM Domains
#
# i.MX PM Domains
#
# end of i.MX PM Domains
#
# Qualcomm PM Domains
#
# end of Qualcomm PM Domains
# end of PM Domains
CONFIG_PM_DEVFREQ=y
#
# DEVFREQ Governors
#
CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
CONFIG_DEVFREQ_GOV_PERFORMANCE=y
CONFIG_DEVFREQ_GOV_POWERSAVE=y
CONFIG_DEVFREQ_GOV_USERSPACE=y
# CONFIG_DEVFREQ_GOV_PASSIVE is not set
#
# DEVFREQ Drivers
#
# CONFIG_PM_DEVFREQ_EVENT is not set
CONFIG_EXTCON=y
#
# Extcon Device Drivers
#
# CONFIG_EXTCON_FSA9480 is not set
# CONFIG_EXTCON_GPIO is not set
# CONFIG_EXTCON_INTEL_INT3496 is not set
# CONFIG_EXTCON_LC824206XA is not set
# CONFIG_EXTCON_MAX3355 is not set
# CONFIG_EXTCON_MAX77693 is not set
# CONFIG_EXTCON_MAX8997 is not set
# CONFIG_EXTCON_PALMAS is not set
# CONFIG_EXTCON_PTN5150 is not set
# CONFIG_EXTCON_RT8973A is not set
# CONFIG_EXTCON_SM5502 is not set
# CONFIG_EXTCON_USB_GPIO is not set
CONFIG_MEMORY=y
# CONFIG_IIO is not set
# CONFIG_NTB is not set
CONFIG_PWM=y
# CONFIG_PWM_DEBUG is not set
# CONFIG_PWM_CLK is not set
# CONFIG_PWM_DWC is not set
# CONFIG_PWM_GPIO is not set
# CONFIG_PWM_LPSS_PCI is not set
# CONFIG_PWM_LPSS_PLATFORM is not set
# CONFIG_PWM_PCA9685 is not set
#
# IRQ chip support
#
# end of IRQ chip support
# CONFIG_IPACK_BUS is not set
CONFIG_RESET_CONTROLLER=y
# CONFIG_RESET_GPIO is not set
# CONFIG_RESET_SIMPLE is not set
# CONFIG_RESET_TI_SYSCON is not set
# CONFIG_RESET_TI_TPS380X is not set
#
# PHY Subsystem
#
# CONFIG_GENERIC_PHY is not set
# CONFIG_USB_LGM_PHY is not set
# CONFIG_PHY_CAN_TRANSCEIVER is not set
#
# PHY drivers for Broadcom platforms
#
# CONFIG_BCM_KONA_USB2_PHY is not set
# end of PHY drivers for Broadcom platforms
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
# CONFIG_PHY_INTEL_LGM_EMMC is not set
# end of PHY Subsystem
CONFIG_POWERCAP=y
# CONFIG_INTEL_RAPL is not set
CONFIG_IDLE_INJECT=y
# CONFIG_MCB is not set
#
# Performance monitor support
#
# CONFIG_DWC_PCIE_PMU is not set
# end of Performance monitor support
CONFIG_RAS=y
# CONFIG_RAS_CEC is not set
# CONFIG_AMD_ATL is not set
# CONFIG_USB4 is not set
#
# Android
#
# CONFIG_ANDROID_BINDER_IPC is not set
# end of Android
# CONFIG_LIBNVDIMM is not set
CONFIG_DAX=y
# CONFIG_DEV_DAX is not set
CONFIG_NVMEM=y
CONFIG_NVMEM_SYSFS=y
# CONFIG_NVMEM_LAYOUTS is not set
# CONFIG_NVMEM_RMEM is not set
#
# HW tracing support
#
# CONFIG_STM is not set
# CONFIG_INTEL_TH is not set
# end of HW tracing support
# CONFIG_FPGA is not set
# CONFIG_TEE is not set
CONFIG_PM_OPP=y
# CONFIG_SIOX is not set
# CONFIG_SLIMBUS is not set
# CONFIG_INTERCONNECT is not set
# CONFIG_COUNTER is not set
# CONFIG_PECI is not set
# CONFIG_HTE is not set
# end of Device Drivers
#
# File systems
#
CONFIG_DCACHE_WORD_ACCESS=y
CONFIG_VALIDATE_FS_PARSER=y
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set
# CONFIG_EXT3_FS is not set
CONFIG_EXT4_FS=y
CONFIG_EXT4_USE_FOR_EXT2=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
# CONFIG_EXT4_DEBUG is not set
CONFIG_JBD2=y
# CONFIG_JBD2_DEBUG is not set
CONFIG_FS_MBCACHE=y
# CONFIG_JFS_FS is not set
# CONFIG_XFS_FS is not set
# CONFIG_GFS2_FS is not set
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set
CONFIG_BTRFS_DEBUG=y
CONFIG_BTRFS_ASSERT=y
# CONFIG_BTRFS_EXPERIMENTAL is not set
CONFIG_BTRFS_FS_REF_VERIFY=y
# CONFIG_NILFS2_FS is not set
# CONFIG_F2FS_FS is not set
# CONFIG_BCACHEFS_FS is not set
CONFIG_FS_POSIX_ACL=y
CONFIG_EXPORTFS=y
# CONFIG_EXPORTFS_BLOCK_OPS is not set
CONFIG_FILE_LOCKING=y
# CONFIG_FS_ENCRYPTION is not set
# CONFIG_FS_VERITY is not set
CONFIG_FSNOTIFY=y
CONFIG_DNOTIFY=y
CONFIG_INOTIFY_USER=y
CONFIG_FANOTIFY=y
CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_QUOTA_DEBUG is not set
# CONFIG_QFMT_V1 is not set
# CONFIG_QFMT_V2 is not set
CONFIG_QUOTACTL=y
# CONFIG_AUTOFS_FS is not set
CONFIG_FUSE_FS=y
# CONFIG_CUSE is not set
# CONFIG_VIRTIO_FS is not set
CONFIG_FUSE_PASSTHROUGH=y
CONFIG_FUSE_IO_URING=y
CONFIG_OVERLAY_FS=y
# CONFIG_OVERLAY_FS_REDIRECT_DIR is not set
CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW=y
# CONFIG_OVERLAY_FS_INDEX is not set
# CONFIG_OVERLAY_FS_XINO_AUTO is not set
# CONFIG_OVERLAY_FS_METACOPY is not set
# CONFIG_OVERLAY_FS_DEBUG is not set
#
# Caches
#
# end of Caches
#
# CD-ROM/DVD Filesystems
#
# CONFIG_ISO9660_FS is not set
# CONFIG_UDF_FS is not set
# end of CD-ROM/DVD Filesystems
#
# DOS/FAT/EXFAT/NT Filesystems
#
CONFIG_FAT_FS=y
# CONFIG_MSDOS_FS is not set
CONFIG_VFAT_FS=y
CONFIG_FAT_DEFAULT_CODEPAGE=437
CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
# CONFIG_FAT_DEFAULT_UTF8 is not set
# CONFIG_EXFAT_FS is not set
# CONFIG_NTFS3_FS is not set
# CONFIG_NTFS_FS is not set
# end of DOS/FAT/EXFAT/NT Filesystems
#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_PROC_VMCORE=y
# CONFIG_PROC_VMCORE_DEVICE_DUMP is not set
CONFIG_PROC_SYSCTL=y
CONFIG_PROC_PAGE_MONITOR=y
CONFIG_PROC_CHILDREN=y
CONFIG_PROC_PID_ARCH_STATUS=y
CONFIG_KERNFS=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_TMPFS_XATTR=y
# CONFIG_TMPFS_INODE64 is not set
# CONFIG_TMPFS_QUOTA is not set
CONFIG_HUGETLBFS=y
# CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set
CONFIG_HUGETLB_PAGE=y
CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y
CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING=y
CONFIG_ARCH_HAS_GIGANTIC_PAGE=y
# CONFIG_CONFIGFS_FS is not set
CONFIG_EFIVAR_FS=y
# end of Pseudo filesystems
CONFIG_MISC_FILESYSTEMS=y
# CONFIG_ORANGEFS_FS is not set
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
CONFIG_ECRYPT_FS=y
CONFIG_ECRYPT_FS_MESSAGING=y
# CONFIG_HFS_FS is not set
# CONFIG_HFSPLUS_FS is not set
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
# CONFIG_CRAMFS is not set
# CONFIG_SQUASHFS is not set
# CONFIG_VXFS_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_OMFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_ROMFS_FS is not set
CONFIG_PSTORE=y
CONFIG_PSTORE_DEFAULT_KMSG_BYTES=10240
CONFIG_PSTORE_COMPRESS=y
# CONFIG_PSTORE_CONSOLE is not set
# CONFIG_PSTORE_PMSG is not set
# CONFIG_PSTORE_FTRACE is not set
# CONFIG_PSTORE_RAM is not set
# CONFIG_PSTORE_BLK is not set
# CONFIG_UFS_FS is not set
# CONFIG_EROFS_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V2=y
CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_NFS_SWAP=y
CONFIG_NFS_V4_1=y
CONFIG_NFS_V4_2=y
CONFIG_PNFS_FILE_LAYOUT=y
CONFIG_PNFS_BLOCK=y
CONFIG_PNFS_FLEXFILE_LAYOUT=y
CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org"
CONFIG_NFS_V4_1_MIGRATION=y
CONFIG_NFS_V4_SECURITY_LABEL=y
CONFIG_ROOT_NFS=y
# CONFIG_NFS_FSCACHE is not set
# CONFIG_NFS_USE_LEGACY_DNS is not set
CONFIG_NFS_USE_KERNEL_DNS=y
CONFIG_NFS_DEBUG=y
CONFIG_NFS_DISABLE_UDP_SUPPORT=y
# CONFIG_NFS_V4_2_READ_PLUS is not set
CONFIG_NFSD=y
# CONFIG_NFSD_V2 is not set
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
# CONFIG_NFSD_BLOCKLAYOUT is not set
# CONFIG_NFSD_SCSILAYOUT is not set
# CONFIG_NFSD_FLEXFILELAYOUT is not set
# CONFIG_NFSD_V4_2_INTER_SSC is not set
CONFIG_NFSD_V4_SECURITY_LABEL=y
# CONFIG_NFSD_LEGACY_CLIENT_TRACKING is not set
# CONFIG_NFSD_V4_DELEG_TIMESTAMPS is not set
CONFIG_GRACE_PERIOD=y
CONFIG_LOCKD=y
CONFIG_LOCKD_V4=y
CONFIG_NFS_ACL_SUPPORT=y
CONFIG_NFS_COMMON=y
# CONFIG_NFS_LOCALIO is not set
CONFIG_NFS_V4_2_SSC_HELPER=y
CONFIG_SUNRPC=y
CONFIG_SUNRPC_GSS=y
CONFIG_SUNRPC_BACKCHANNEL=y
CONFIG_SUNRPC_SWAP=y
CONFIG_RPCSEC_GSS_KRB5=y
CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1=y
# CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2 is not set
CONFIG_SUNRPC_DEBUG=y
# CONFIG_CEPH_FS is not set
# CONFIG_CIFS is not set
# CONFIG_SMB_SERVER is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
# CONFIG_NLS_CODEPAGE_850 is not set
# CONFIG_NLS_CODEPAGE_852 is not set
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
# CONFIG_NLS_CODEPAGE_862 is not set
# CONFIG_NLS_CODEPAGE_863 is not set
# CONFIG_NLS_CODEPAGE_864 is not set
# CONFIG_NLS_CODEPAGE_865 is not set
# CONFIG_NLS_CODEPAGE_866 is not set
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
# CONFIG_NLS_CODEPAGE_950 is not set
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
# CONFIG_NLS_CODEPAGE_874 is not set
# CONFIG_NLS_ISO8859_8 is not set
# CONFIG_NLS_CODEPAGE_1250 is not set
# CONFIG_NLS_CODEPAGE_1251 is not set
# CONFIG_NLS_ASCII is not set
# CONFIG_NLS_ISO8859_1 is not set
# CONFIG_NLS_ISO8859_2 is not set
# CONFIG_NLS_ISO8859_3 is not set
# CONFIG_NLS_ISO8859_4 is not set
# CONFIG_NLS_ISO8859_5 is not set
# CONFIG_NLS_ISO8859_6 is not set
# CONFIG_NLS_ISO8859_7 is not set
# CONFIG_NLS_ISO8859_9 is not set
# CONFIG_NLS_ISO8859_13 is not set
# CONFIG_NLS_ISO8859_14 is not set
# CONFIG_NLS_ISO8859_15 is not set
# CONFIG_NLS_KOI8_R is not set
# CONFIG_NLS_KOI8_U is not set
# CONFIG_NLS_MAC_ROMAN is not set
# CONFIG_NLS_MAC_CELTIC is not set
# CONFIG_NLS_MAC_CENTEURO is not set
# CONFIG_NLS_MAC_CROATIAN is not set
# CONFIG_NLS_MAC_CYRILLIC is not set
# CONFIG_NLS_MAC_GAELIC is not set
# CONFIG_NLS_MAC_GREEK is not set
# CONFIG_NLS_MAC_ICELAND is not set
# CONFIG_NLS_MAC_INUIT is not set
# CONFIG_NLS_MAC_ROMANIAN is not set
# CONFIG_NLS_MAC_TURKISH is not set
# CONFIG_NLS_UTF8 is not set
# CONFIG_UNICODE is not set
CONFIG_IO_WQ=y
# end of File systems
#
# Security options
#
CONFIG_KEYS=y
# CONFIG_KEYS_REQUEST_CACHE is not set
CONFIG_PERSISTENT_KEYRINGS=y
# CONFIG_BIG_KEYS is not set
CONFIG_TRUSTED_KEYS=y
CONFIG_HAVE_TRUSTED_KEYS=y
CONFIG_TRUSTED_KEYS_TPM=y
CONFIG_ENCRYPTED_KEYS=y
# CONFIG_USER_DECRYPTED_DATA is not set
# CONFIG_KEY_DH_OPERATIONS is not set
# CONFIG_SECURITY_DMESG_RESTRICT is not set
CONFIG_PROC_MEM_ALWAYS_FORCE=y
# CONFIG_PROC_MEM_FORCE_PTRACE is not set
# CONFIG_PROC_MEM_NO_FORCE is not set
CONFIG_SECURITY=y
CONFIG_HAS_SECURITY_AUDIT=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_PATH=y
CONFIG_INTEL_TXT=y
CONFIG_LSM_MMAP_MIN_ADDR=0
# CONFIG_STATIC_USERMODEHELPER is not set
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DEVELOP=y
CONFIG_SECURITY_SELINUX_AVC_STATS=y
CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9
CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256
# CONFIG_SECURITY_SELINUX_DEBUG is not set
CONFIG_SECURITY_SMACK=y
# CONFIG_SECURITY_SMACK_BRINGUP is not set
# CONFIG_SECURITY_SMACK_NETFILTER is not set
# CONFIG_SECURITY_SMACK_APPEND_SIGNALS is not set
CONFIG_SECURITY_TOMOYO=y
CONFIG_SECURITY_TOMOYO_MAX_ACCEPT_ENTRY=2048
CONFIG_SECURITY_TOMOYO_MAX_AUDIT_LOG=1024
# CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER is not set
CONFIG_SECURITY_TOMOYO_POLICY_LOADER="/sbin/tomoyo-init"
CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER="/sbin/init"
# CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING is not set
CONFIG_SECURITY_APPARMOR=y
# CONFIG_SECURITY_APPARMOR_DEBUG is not set
CONFIG_SECURITY_APPARMOR_INTROSPECT_POLICY=y
CONFIG_SECURITY_APPARMOR_HASH=y
CONFIG_SECURITY_APPARMOR_HASH_DEFAULT=y
CONFIG_SECURITY_APPARMOR_EXPORT_BINARY=y
CONFIG_SECURITY_APPARMOR_PARANOID_LOAD=y
# CONFIG_SECURITY_LOADPIN is not set
CONFIG_SECURITY_YAMA=y
# CONFIG_SECURITY_SAFESETID is not set
# CONFIG_SECURITY_LOCKDOWN_LSM is not set
# CONFIG_SECURITY_LANDLOCK is not set
# CONFIG_SECURITY_IPE is not set
CONFIG_INTEGRITY=y
CONFIG_INTEGRITY_SIGNATURE=y
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
CONFIG_INTEGRITY_TRUSTED_KEYRING=y
CONFIG_INTEGRITY_AUDIT=y
CONFIG_IMA=y
CONFIG_IMA_MEASURE_PCR_IDX=10
CONFIG_IMA_LSM_RULES=y
CONFIG_IMA_NG_TEMPLATE=y
# CONFIG_IMA_SIG_TEMPLATE is not set
CONFIG_IMA_DEFAULT_TEMPLATE="ima-ng"
CONFIG_IMA_DEFAULT_HASH_SHA1=y
# CONFIG_IMA_DEFAULT_HASH_SHA256 is not set
# CONFIG_IMA_DEFAULT_HASH_SHA512 is not set
CONFIG_IMA_DEFAULT_HASH="sha1"
# CONFIG_IMA_WRITE_POLICY is not set
# CONFIG_IMA_READ_POLICY is not set
CONFIG_IMA_APPRAISE=y
# CONFIG_IMA_ARCH_POLICY is not set
# CONFIG_IMA_APPRAISE_BUILD_POLICY is not set
CONFIG_IMA_APPRAISE_BOOTPARAM=y
# CONFIG_IMA_APPRAISE_MODSIG is not set
# CONFIG_IMA_BLACKLIST_KEYRING is not set
# CONFIG_IMA_LOAD_X509 is not set
CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS=y
CONFIG_IMA_QUEUE_EARLY_BOOT_KEYS=y
# CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set
# CONFIG_IMA_DISABLE_HTABLE is not set
CONFIG_EVM=y
CONFIG_EVM_ATTR_FSUUID=y
# CONFIG_EVM_EXTRA_SMACK_XATTRS is not set
# CONFIG_EVM_ADD_XATTRS is not set
# CONFIG_EVM_LOAD_X509 is not set
# CONFIG_DEFAULT_SECURITY_SELINUX is not set
# CONFIG_DEFAULT_SECURITY_SMACK is not set
# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
CONFIG_DEFAULT_SECURITY_APPARMOR=y
# CONFIG_DEFAULT_SECURITY_DAC is not set
CONFIG_LSM="yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo"
#
# Kernel hardening options
#
#
# Memory initialization
#
CONFIG_CC_HAS_AUTO_VAR_INIT_PATTERN=y
CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO_BARE=y
CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO=y
CONFIG_INIT_STACK_NONE=y
# CONFIG_INIT_STACK_ALL_PATTERN is not set
# CONFIG_INIT_STACK_ALL_ZERO is not set
# CONFIG_INIT_ON_ALLOC_DEFAULT_ON is not set
# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set
CONFIG_CC_HAS_ZERO_CALL_USED_REGS=y
# CONFIG_ZERO_CALL_USED_REGS is not set
# end of Memory initialization
#
# Bounds checking
#
CONFIG_FORTIFY_SOURCE=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_HARDENED_USERCOPY_DEFAULT_ON=y
# end of Bounds checking
#
# Hardening of kernel data structures
#
# CONFIG_LIST_HARDENED is not set
# CONFIG_BUG_ON_DATA_CORRUPTION is not set
# end of Hardening of kernel data structures
CONFIG_RANDSTRUCT_NONE=y
# end of Kernel hardening options
# end of Security options
CONFIG_XOR_BLOCKS=y
CONFIG_CRYPTO=y
#
# Crypto core or helper
#
CONFIG_CRYPTO_ALGAPI=y
CONFIG_CRYPTO_ALGAPI2=y
CONFIG_CRYPTO_AEAD=y
CONFIG_CRYPTO_AEAD2=y
CONFIG_CRYPTO_SIG=y
CONFIG_CRYPTO_SIG2=y
CONFIG_CRYPTO_SKCIPHER=y
CONFIG_CRYPTO_SKCIPHER2=y
CONFIG_CRYPTO_HASH=y
CONFIG_CRYPTO_HASH2=y
CONFIG_CRYPTO_RNG=y
CONFIG_CRYPTO_RNG2=y
CONFIG_CRYPTO_RNG_DEFAULT=y
CONFIG_CRYPTO_AKCIPHER2=y
CONFIG_CRYPTO_AKCIPHER=y
CONFIG_CRYPTO_KPP2=y
CONFIG_CRYPTO_KPP=y
CONFIG_CRYPTO_ACOMP2=y
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_MANAGER2=y
# CONFIG_CRYPTO_USER is not set
# CONFIG_CRYPTO_SELFTESTS is not set
CONFIG_CRYPTO_NULL=y
# CONFIG_CRYPTO_PCRYPT is not set
CONFIG_CRYPTO_CRYPTD=y
CONFIG_CRYPTO_AUTHENC=y
# CONFIG_CRYPTO_KRB5ENC is not set
# CONFIG_CRYPTO_BENCHMARK is not set
CONFIG_CRYPTO_ENGINE=m
# end of Crypto core or helper
#
# Public-key cryptography
#
CONFIG_CRYPTO_RSA=y
# CONFIG_CRYPTO_DH is not set
CONFIG_CRYPTO_ECC=y
CONFIG_CRYPTO_ECDH=y
# CONFIG_CRYPTO_ECDSA is not set
# CONFIG_CRYPTO_ECRDSA is not set
# CONFIG_CRYPTO_CURVE25519 is not set
# end of Public-key cryptography
#
# Block ciphers
#
CONFIG_CRYPTO_AES=y
# CONFIG_CRYPTO_AES_TI is not set
# CONFIG_CRYPTO_ARIA is not set
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_CAMELLIA is not set
# CONFIG_CRYPTO_CAST5 is not set
# CONFIG_CRYPTO_CAST6 is not set
CONFIG_CRYPTO_DES=y
# CONFIG_CRYPTO_FCRYPT is not set
# CONFIG_CRYPTO_SERPENT is not set
# CONFIG_CRYPTO_SM4_GENERIC is not set
# CONFIG_CRYPTO_TWOFISH is not set
# end of Block ciphers
#
# Length-preserving ciphers and modes
#
# CONFIG_CRYPTO_ADIANTUM is not set
# CONFIG_CRYPTO_CHACHA20 is not set
CONFIG_CRYPTO_CBC=y
# CONFIG_CRYPTO_CTR is not set
CONFIG_CRYPTO_CTS=y
CONFIG_CRYPTO_ECB=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_LRW=y
# CONFIG_CRYPTO_PCBC is not set
CONFIG_CRYPTO_XTS=y
# end of Length-preserving ciphers and modes
#
# AEAD (authenticated encryption with associated data) ciphers
#
# CONFIG_CRYPTO_AEGIS128 is not set
# CONFIG_CRYPTO_CHACHA20POLY1305 is not set
# CONFIG_CRYPTO_CCM is not set
# CONFIG_CRYPTO_GCM is not set
CONFIG_CRYPTO_GENIV=m
# CONFIG_CRYPTO_SEQIV is not set
CONFIG_CRYPTO_ECHAINIV=m
CONFIG_CRYPTO_ESSIV=y
# end of AEAD (authenticated encryption with associated data) ciphers
#
# Hashes, digests, and MACs
#
CONFIG_CRYPTO_BLAKE2B=y
# CONFIG_CRYPTO_CMAC is not set
# CONFIG_CRYPTO_GHASH is not set
CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_MICHAEL_MIC is not set
# CONFIG_CRYPTO_RMD160 is not set
CONFIG_CRYPTO_SHA1=y
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
CONFIG_CRYPTO_SHA3=y
# CONFIG_CRYPTO_SM3_GENERIC is not set
# CONFIG_CRYPTO_STREEBOG is not set
# CONFIG_CRYPTO_WP512 is not set
# CONFIG_CRYPTO_XCBC is not set
CONFIG_CRYPTO_XXHASH=y
# end of Hashes, digests, and MACs
#
# CRCs (cyclic redundancy checks)
#
CONFIG_CRYPTO_CRC32C=y
# CONFIG_CRYPTO_CRC32 is not set
# end of CRCs (cyclic redundancy checks)
#
# Compression
#
CONFIG_CRYPTO_DEFLATE=y
CONFIG_CRYPTO_LZO=y
# CONFIG_CRYPTO_842 is not set
# CONFIG_CRYPTO_LZ4 is not set
# CONFIG_CRYPTO_LZ4HC is not set
# CONFIG_CRYPTO_ZSTD is not set
# end of Compression
#
# Random number generation
#
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DRBG_MENU=y
CONFIG_CRYPTO_DRBG_HMAC=y
# CONFIG_CRYPTO_DRBG_HASH is not set
# CONFIG_CRYPTO_DRBG_CTR is not set
CONFIG_CRYPTO_DRBG=y
CONFIG_CRYPTO_JITTERENTROPY=y
CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKS=64
CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKSIZE=32
CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# end of Random number generation
#
# Userspace interface
#
# CONFIG_CRYPTO_USER_API_HASH is not set
# CONFIG_CRYPTO_USER_API_SKCIPHER is not set
# CONFIG_CRYPTO_USER_API_RNG is not set
# CONFIG_CRYPTO_USER_API_AEAD is not set
# end of Userspace interface
CONFIG_CRYPTO_HASH_INFO=y
#
# Accelerated Cryptographic Algorithms for CPU (x86)
#
CONFIG_CRYPTO_AES_NI_INTEL=y
# CONFIG_CRYPTO_BLOWFISH_X86_64 is not set
# CONFIG_CRYPTO_CAMELLIA_X86_64 is not set
# CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64 is not set
# CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 is not set
# CONFIG_CRYPTO_CAST5_AVX_X86_64 is not set
# CONFIG_CRYPTO_CAST6_AVX_X86_64 is not set
# CONFIG_CRYPTO_DES3_EDE_X86_64 is not set
# CONFIG_CRYPTO_SERPENT_SSE2_X86_64 is not set
# CONFIG_CRYPTO_SERPENT_AVX_X86_64 is not set
# CONFIG_CRYPTO_SERPENT_AVX2_X86_64 is not set
# CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64 is not set
# CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64 is not set
# CONFIG_CRYPTO_TWOFISH_X86_64 is not set
# CONFIG_CRYPTO_TWOFISH_X86_64_3WAY is not set
# CONFIG_CRYPTO_TWOFISH_AVX_X86_64 is not set
# CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64 is not set
# CONFIG_CRYPTO_ARIA_AESNI_AVX2_X86_64 is not set
# CONFIG_CRYPTO_ARIA_GFNI_AVX512_X86_64 is not set
# CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set
# CONFIG_CRYPTO_NHPOLY1305_SSE2 is not set
# CONFIG_CRYPTO_NHPOLY1305_AVX2 is not set
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
# CONFIG_CRYPTO_SHA1_SSSE3 is not set
# CONFIG_CRYPTO_SHA512_SSSE3 is not set
# CONFIG_CRYPTO_SM3_AVX_X86_64 is not set
# CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL is not set
# end of Accelerated Cryptographic Algorithms for CPU (x86)
CONFIG_CRYPTO_HW=y
CONFIG_CRYPTO_DEV_PADLOCK=y
# CONFIG_CRYPTO_DEV_PADLOCK_AES is not set
# CONFIG_CRYPTO_DEV_PADLOCK_SHA is not set
# CONFIG_CRYPTO_DEV_ATMEL_ECC is not set
# CONFIG_CRYPTO_DEV_ATMEL_SHA204A is not set
# CONFIG_CRYPTO_DEV_CCP is not set
# CONFIG_CRYPTO_DEV_NITROX_CNN55XX is not set
# CONFIG_CRYPTO_DEV_QAT_DH895xCC is not set
# CONFIG_CRYPTO_DEV_QAT_C3XXX is not set
# CONFIG_CRYPTO_DEV_QAT_C62X is not set
# CONFIG_CRYPTO_DEV_QAT_4XXX is not set
# CONFIG_CRYPTO_DEV_QAT_420XX is not set
# CONFIG_CRYPTO_DEV_QAT_6XXX is not set
# CONFIG_CRYPTO_DEV_QAT_DH895xCCVF is not set
# CONFIG_CRYPTO_DEV_QAT_C3XXXVF is not set
# CONFIG_CRYPTO_DEV_QAT_C62XVF is not set
CONFIG_CRYPTO_DEV_VIRTIO=m
# CONFIG_CRYPTO_DEV_SAFEXCEL is not set
# CONFIG_CRYPTO_DEV_AMLOGIC_GXL is not set
CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
CONFIG_X509_CERTIFICATE_PARSER=y
# CONFIG_PKCS8_PRIVATE_KEY_PARSER is not set
CONFIG_PKCS7_MESSAGE_PARSER=y
# CONFIG_PKCS7_TEST_KEY is not set
# CONFIG_SIGNED_PE_FILE_VERIFICATION is not set
# CONFIG_FIPS_SIGNATURE_SELFTEST is not set
#
# Certificates for signature checking
#
CONFIG_MODULE_SIG_KEY="certs/signing_key.pem"
CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_KEY_TYPE_ECDSA is not set
CONFIG_SYSTEM_TRUSTED_KEYRING=y
CONFIG_SYSTEM_TRUSTED_KEYS=""
# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set
# CONFIG_SECONDARY_TRUSTED_KEYRING is not set
# CONFIG_SYSTEM_BLACKLIST_KEYRING is not set
# end of Certificates for signature checking
# CONFIG_CRYPTO_KRB5 is not set
CONFIG_BINARY_PRINTF=y
#
# Library routines
#
CONFIG_RAID6_PQ=y
CONFIG_RAID6_PQ_BENCHMARK=y
CONFIG_LINEAR_RANGES=y
# CONFIG_PACKING is not set
CONFIG_BITREVERSE=y
CONFIG_GENERIC_STRNCPY_FROM_USER=y
CONFIG_GENERIC_STRNLEN_USER=y
CONFIG_GENERIC_NET_UTILS=y
# CONFIG_CORDIC is not set
# CONFIG_PRIME_NUMBERS is not set
CONFIG_RATIONAL=y
CONFIG_GENERIC_IOMAP=y
CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
CONFIG_ARCH_USE_SYM_ANNOTATIONS=y
#
# Crypto library routines
#
CONFIG_CRYPTO_LIB_UTILS=y
CONFIG_CRYPTO_LIB_AES=y
CONFIG_CRYPTO_LIB_AESCFB=y
CONFIG_CRYPTO_LIB_GF128MUL=y
CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y
CONFIG_CRYPTO_LIB_DES=y
CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11
CONFIG_CRYPTO_LIB_SHA1=y
CONFIG_CRYPTO_LIB_SHA256=y
CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256=y
CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD=y
CONFIG_CRYPTO_LIB_SHA256_GENERIC=y
# CONFIG_CRYPTO_BLAKE2S_X86 is not set
CONFIG_CRYPTO_SHA256_X86_64=y
# end of Crypto library routines
CONFIG_CRC_CCITT=y
CONFIG_CRC16=y
CONFIG_CRC_T10DIF=y
CONFIG_ARCH_HAS_CRC_T10DIF=y
CONFIG_CRC_T10DIF_ARCH=y
CONFIG_CRC32=y
CONFIG_ARCH_HAS_CRC32=y
CONFIG_CRC32_ARCH=y
CONFIG_CRC64=y
CONFIG_ARCH_HAS_CRC64=y
CONFIG_CRC64_ARCH=y
CONFIG_CRC_OPTIMIZATIONS=y
CONFIG_XXHASH=y
# CONFIG_RANDOM32_SELFTEST is not set
CONFIG_ZLIB_INFLATE=y
CONFIG_ZLIB_DEFLATE=y
CONFIG_LZO_COMPRESS=y
CONFIG_LZO_DECOMPRESS=y
CONFIG_LZ4_DECOMPRESS=y
CONFIG_ZSTD_COMMON=y
CONFIG_ZSTD_COMPRESS=y
CONFIG_ZSTD_DECOMPRESS=y
CONFIG_XZ_DEC=y
CONFIG_XZ_DEC_X86=y
CONFIG_XZ_DEC_POWERPC=y
CONFIG_XZ_DEC_ARM=y
CONFIG_XZ_DEC_ARMTHUMB=y
CONFIG_XZ_DEC_ARM64=y
CONFIG_XZ_DEC_SPARC=y
CONFIG_XZ_DEC_RISCV=y
# CONFIG_XZ_DEC_MICROLZMA is not set
CONFIG_XZ_DEC_BCJ=y
# CONFIG_XZ_DEC_TEST is not set
CONFIG_DECOMPRESS_GZIP=y
CONFIG_DECOMPRESS_BZIP2=y
CONFIG_DECOMPRESS_LZMA=y
CONFIG_DECOMPRESS_XZ=y
CONFIG_DECOMPRESS_LZO=y
CONFIG_DECOMPRESS_LZ4=y
CONFIG_DECOMPRESS_ZSTD=y
CONFIG_GENERIC_ALLOCATOR=y
CONFIG_TEXTSEARCH=y
CONFIG_TEXTSEARCH_KMP=y
CONFIG_TEXTSEARCH_BM=y
CONFIG_TEXTSEARCH_FSM=y
CONFIG_INTERVAL_TREE=y
CONFIG_XARRAY_MULTI=y
CONFIG_ASSOCIATIVE_ARRAY=y
CONFIG_HAS_IOMEM=y
CONFIG_HAS_IOPORT=y
CONFIG_HAS_IOPORT_MAP=y
CONFIG_HAS_DMA=y
CONFIG_DMA_OPS_HELPERS=y
CONFIG_NEED_SG_DMA_FLAGS=y
CONFIG_NEED_SG_DMA_LENGTH=y
CONFIG_NEED_DMA_MAP_STATE=y
CONFIG_ARCH_DMA_ADDR_T_64BIT=y
CONFIG_SWIOTLB=y
# CONFIG_SWIOTLB_DYNAMIC is not set
CONFIG_DMA_NEED_SYNC=y
# CONFIG_DMA_CMA is not set
# CONFIG_DMA_API_DEBUG is not set
# CONFIG_DMA_MAP_BENCHMARK is not set
CONFIG_SGL_ALLOC=y
CONFIG_IOMMU_HELPER=y
CONFIG_CPU_RMAP=y
CONFIG_DQL=y
CONFIG_GLOB=y
# CONFIG_GLOB_SELFTEST is not set
CONFIG_NLATTR=y
CONFIG_CLZ_TAB=y
# CONFIG_IRQ_POLL is not set
CONFIG_MPILIB=y
CONFIG_SIGNATURE=y
CONFIG_DIMLIB=y
CONFIG_OID_REGISTRY=y
CONFIG_UCS2_STRING=y
CONFIG_HAVE_GENERIC_VDSO=y
CONFIG_GENERIC_GETTIMEOFDAY=y
CONFIG_GENERIC_VDSO_TIME_NS=y
CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT=y
CONFIG_VDSO_GETRANDOM=y
CONFIG_GENERIC_VDSO_DATA_STORE=y
CONFIG_FONT_SUPPORT=y
# CONFIG_FONTS is not set
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
CONFIG_SG_POOL=y
CONFIG_ARCH_HAS_PMEM_API=y
CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION=y
CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y
CONFIG_ARCH_HAS_COPY_MC=y
CONFIG_ARCH_STACKWALK=y
CONFIG_STACKDEPOT=y
CONFIG_STACKDEPOT_MAX_FRAMES=64
CONFIG_SBITMAP=y
# CONFIG_LWQ_TEST is not set
# end of Library routines
CONFIG_PLDMFW=y
CONFIG_ASN1_ENCODER=y
CONFIG_FIRMWARE_TABLE=y
CONFIG_UNION_FIND=y
#
# Kernel hacking
#
#
# printk and dmesg options
#
CONFIG_PRINTK_TIME=y
# CONFIG_PRINTK_CALLER is not set
# CONFIG_STACKTRACE_BUILD_ID is not set
CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7
CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
CONFIG_BOOT_PRINTK_DELAY=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_DYNAMIC_DEBUG_CORE=y
CONFIG_SYMBOLIC_ERRNAME=y
CONFIG_DEBUG_BUGVERBOSE=y
# end of printk and dmesg options
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MISC=y
#
# Compile-time checks and compiler options
#
CONFIG_DEBUG_INFO=y
CONFIG_AS_HAS_NON_CONST_ULEB128=y
# CONFIG_DEBUG_INFO_NONE is not set
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
# CONFIG_DEBUG_INFO_DWARF4 is not set
# CONFIG_DEBUG_INFO_DWARF5 is not set
# CONFIG_DEBUG_INFO_REDUCED is not set
CONFIG_DEBUG_INFO_COMPRESSED_NONE=y
# CONFIG_DEBUG_INFO_COMPRESSED_ZLIB is not set
# CONFIG_DEBUG_INFO_COMPRESSED_ZSTD is not set
# CONFIG_DEBUG_INFO_SPLIT is not set
# CONFIG_DEBUG_INFO_BTF is not set
CONFIG_PAHOLE_HAS_SPLIT_BTF=y
CONFIG_PAHOLE_HAS_LANG_EXCLUDE=y
# CONFIG_GDB_SCRIPTS is not set
CONFIG_FRAME_WARN=1024
# CONFIG_STRIP_ASM_SYMS is not set
# CONFIG_READABLE_ASM is not set
# CONFIG_HEADERS_INSTALL is not set
# CONFIG_DEBUG_SECTION_MISMATCH is not set
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
# CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B is not set
CONFIG_OBJTOOL=y
# CONFIG_OBJTOOL_WERROR is not set
# CONFIG_VMLINUX_MAP is not set
# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
# end of Compile-time checks and compiler options
#
# Generic Kernel Debugging Instruments
#
CONFIG_MAGIC_SYSRQ=y
CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1
CONFIG_MAGIC_SYSRQ_SERIAL=y
CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE=""
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_FS_ALLOW_ALL=y
# CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set
# CONFIG_DEBUG_FS_ALLOW_NONE is not set
CONFIG_HAVE_ARCH_KGDB=y
CONFIG_KGDB=y
CONFIG_KGDB_HONOUR_BLOCKLIST=y
CONFIG_KGDB_SERIAL_CONSOLE=y
# CONFIG_KGDB_TESTS is not set
CONFIG_KGDB_LOW_LEVEL_TRAP=y
CONFIG_KGDB_KDB=y
CONFIG_KDB_DEFAULT_ENABLE=0x1
CONFIG_KDB_KEYBOARD=y
CONFIG_KDB_CONTINUE_CATASTROPHIC=0
CONFIG_ARCH_HAS_EARLY_DEBUG=y
CONFIG_ARCH_HAS_UBSAN=y
# CONFIG_UBSAN is not set
CONFIG_HAVE_ARCH_KCSAN=y
CONFIG_HAVE_KCSAN_COMPILER=y
# CONFIG_KCSAN is not set
# end of Generic Kernel Debugging Instruments
#
# Networking Debugging
#
# CONFIG_NET_DEV_REFCNT_TRACKER is not set
# CONFIG_NET_NS_REFCNT_TRACKER is not set
# CONFIG_DEBUG_NET is not set
# CONFIG_DEBUG_NET_SMALL_RTNL is not set
# end of Networking Debugging
#
# Memory Debugging
#
# CONFIG_PAGE_EXTENSION is not set
# CONFIG_DEBUG_PAGEALLOC is not set
CONFIG_SLUB_DEBUG=y
# CONFIG_SLUB_DEBUG_ON is not set
# CONFIG_PAGE_OWNER is not set
# CONFIG_PAGE_TABLE_CHECK is not set
# CONFIG_PAGE_POISONING is not set
# CONFIG_DEBUG_PAGE_REF is not set
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_ARCH_HAS_DEBUG_WX=y
# CONFIG_DEBUG_WX is not set
CONFIG_ARCH_HAS_PTDUMP=y
# CONFIG_PTDUMP_DEBUGFS is not set
CONFIG_HAVE_DEBUG_KMEMLEAK=y
# CONFIG_DEBUG_KMEMLEAK is not set
# CONFIG_PER_VMA_LOCK_STATS is not set
# CONFIG_DEBUG_OBJECTS is not set
# CONFIG_SHRINKER_DEBUG is not set
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_SCHED_STACK_END_CHECK is not set
CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y
# CONFIG_DEBUG_VFS is not set
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_VM_PGTABLE is not set
CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y
# CONFIG_DEBUG_VIRTUAL is not set
# CONFIG_DEBUG_MEMORY_INIT is not set
# CONFIG_DEBUG_PER_CPU_MAPS is not set
CONFIG_ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP=y
# CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP is not set
# CONFIG_MEM_ALLOC_PROFILING is not set
CONFIG_HAVE_ARCH_KASAN=y
CONFIG_HAVE_ARCH_KASAN_VMALLOC=y
CONFIG_CC_HAS_KASAN_GENERIC=y
CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y
# CONFIG_KASAN is not set
CONFIG_HAVE_ARCH_KFENCE=y
# CONFIG_KFENCE is not set
CONFIG_HAVE_ARCH_KMSAN=y
# end of Memory Debugging
# CONFIG_DEBUG_SHIRQ is not set
#
# Debug Oops, Lockups and Hangs
#
# CONFIG_PANIC_ON_OOPS is not set
CONFIG_PANIC_ON_OOPS_VALUE=0
CONFIG_PANIC_TIMEOUT=0
CONFIG_LOCKUP_DETECTOR=y
CONFIG_SOFTLOCKUP_DETECTOR=y
# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
CONFIG_HAVE_HARDLOCKUP_DETECTOR_BUDDY=y
CONFIG_HARDLOCKUP_DETECTOR=y
# CONFIG_HARDLOCKUP_DETECTOR_PREFER_BUDDY is not set
CONFIG_HARDLOCKUP_DETECTOR_PERF=y
# CONFIG_HARDLOCKUP_DETECTOR_BUDDY is not set
# CONFIG_HARDLOCKUP_DETECTOR_ARCH is not set
CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER=y
CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y
# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120
# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
CONFIG_DETECT_HUNG_TASK_BLOCKER=y
# CONFIG_WQ_WATCHDOG is not set
# CONFIG_WQ_CPU_INTENSIVE_REPORT is not set
# CONFIG_TEST_LOCKUP is not set
# end of Debug Oops, Lockups and Hangs
#
# Scheduler Debugging
#
CONFIG_SCHED_INFO=y
CONFIG_SCHEDSTATS=y
# end of Scheduler Debugging
# CONFIG_DEBUG_PREEMPT is not set
#
# Lock Debugging (spinlocks, mutexes, etc...)
#
CONFIG_LOCK_DEBUGGING_SUPPORT=y
# CONFIG_PROVE_LOCKING is not set
# CONFIG_LOCK_STAT is not set
# CONFIG_DEBUG_RT_MUTEXES is not set
# CONFIG_DEBUG_SPINLOCK is not set
# CONFIG_DEBUG_MUTEXES is not set
# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
# CONFIG_DEBUG_RWSEMS is not set
# CONFIG_DEBUG_LOCK_ALLOC is not set
# CONFIG_DEBUG_ATOMIC_SLEEP is not set
# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
# CONFIG_LOCK_TORTURE_TEST is not set
# CONFIG_WW_MUTEX_SELFTEST is not set
# CONFIG_SCF_TORTURE_TEST is not set
# CONFIG_CSD_LOCK_WAIT_DEBUG is not set
# end of Lock Debugging (spinlocks, mutexes, etc...)
# CONFIG_NMI_CHECK_CPU is not set
CONFIG_DEBUG_IRQFLAGS=y
CONFIG_STACKTRACE=y
# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
# CONFIG_DEBUG_KOBJECT is not set
#
# Debug kernel data structures
#
# CONFIG_DEBUG_LIST is not set
# CONFIG_DEBUG_PLIST is not set
# CONFIG_DEBUG_SG is not set
# CONFIG_DEBUG_NOTIFIERS is not set
# CONFIG_DEBUG_MAPLE_TREE is not set
# end of Debug kernel data structures
#
# RCU Debugging
#
# CONFIG_RCU_SCALE_TEST is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_RCU_REF_SCALE_TEST is not set
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_RCU_EXP_CPU_STALL_TIMEOUT=0
# CONFIG_RCU_CPU_STALL_CPUTIME is not set
# CONFIG_RCU_TRACE is not set
# CONFIG_RCU_EQS_DEBUG is not set
# end of RCU Debugging
# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set
# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set
CONFIG_LATENCYTOP=y
# CONFIG_DEBUG_CGROUP_REF is not set
CONFIG_USER_STACKTRACE_SUPPORT=y
CONFIG_NOP_TRACER=y
CONFIG_HAVE_RETHOOK=y
CONFIG_RETHOOK=y
CONFIG_HAVE_FUNCTION_TRACER=y
CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
CONFIG_HAVE_FUNCTION_GRAPH_FREGS=y
CONFIG_HAVE_FTRACE_GRAPH_FUNC=y
CONFIG_HAVE_DYNAMIC_FTRACE=y
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=y
CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS=y
CONFIG_HAVE_DYNAMIC_FTRACE_NO_PATCHABLE=y
CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
CONFIG_HAVE_FENTRY=y
CONFIG_HAVE_OBJTOOL_MCOUNT=y
CONFIG_HAVE_OBJTOOL_NOP_MCOUNT=y
CONFIG_HAVE_C_RECORDMCOUNT=y
CONFIG_HAVE_BUILDTIME_MCOUNT_SORT=y
CONFIG_BUILDTIME_MCOUNT_SORT=y
CONFIG_TRACER_MAX_TRACE=y
CONFIG_TRACE_CLOCK=y
CONFIG_RING_BUFFER=y
CONFIG_EVENT_TRACING=y
CONFIG_CONTEXT_SWITCH_TRACER=y
CONFIG_TRACING=y
CONFIG_GENERIC_TRACER=y
CONFIG_TRACING_SUPPORT=y
CONFIG_FTRACE=y
# CONFIG_BOOTTIME_TRACING is not set
CONFIG_FUNCTION_TRACER=y
CONFIG_FUNCTION_GRAPH_TRACER=y
# CONFIG_FUNCTION_GRAPH_RETVAL is not set
# CONFIG_FUNCTION_GRAPH_RETADDR is not set
CONFIG_DYNAMIC_FTRACE=y
CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
CONFIG_DYNAMIC_FTRACE_WITH_ARGS=y
CONFIG_FPROBE=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_STACK_TRACER=y
# CONFIG_IRQSOFF_TRACER is not set
# CONFIG_PREEMPT_TRACER is not set
CONFIG_SCHED_TRACER=y
# CONFIG_HWLAT_TRACER is not set
# CONFIG_OSNOISE_TRACER is not set
# CONFIG_TIMERLAT_TRACER is not set
CONFIG_MMIOTRACE=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_TRACER_SNAPSHOT=y
# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set
CONFIG_BRANCH_PROFILE_NONE=y
# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_FPROBE_EVENTS=y
CONFIG_KPROBE_EVENTS=y
# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set
CONFIG_UPROBE_EVENTS=y
CONFIG_BPF_EVENTS=y
CONFIG_DYNAMIC_EVENTS=y
CONFIG_PROBE_EVENTS=y
# CONFIG_BPF_KPROBE_OVERRIDE is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
CONFIG_FTRACE_MCOUNT_USE_CC=y
# CONFIG_SYNTH_EVENTS is not set
# CONFIG_USER_EVENTS is not set
# CONFIG_HIST_TRIGGERS is not set
# CONFIG_TRACE_EVENT_INJECT is not set
CONFIG_TRACEPOINT_BENCHMARK=y
# CONFIG_RING_BUFFER_BENCHMARK is not set
# CONFIG_TRACE_EVAL_MAP_FILE is not set
# CONFIG_FTRACE_RECORD_RECURSION is not set
# CONFIG_FTRACE_VALIDATE_RCU_IS_WATCHING is not set
# CONFIG_FTRACE_STARTUP_TEST is not set
# CONFIG_FTRACE_SORT_STARTUP_TEST is not set
# CONFIG_RING_BUFFER_STARTUP_TEST is not set
# CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS is not set
# CONFIG_MMIOTRACE_TEST is not set
# CONFIG_PREEMPTIRQ_DELAY_TEST is not set
# CONFIG_KPROBE_EVENT_GEN_TEST is not set
# CONFIG_RV is not set
# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
# CONFIG_SAMPLES is not set
CONFIG_HAVE_SAMPLE_FTRACE_DIRECT=y
CONFIG_HAVE_SAMPLE_FTRACE_DIRECT_MULTI=y
CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
CONFIG_STRICT_DEVMEM=y
# CONFIG_IO_STRICT_DEVMEM is not set
#
# x86 Debugging
#
CONFIG_EARLY_PRINTK_USB=y
# CONFIG_X86_VERBOSE_BOOTUP is not set
CONFIG_EARLY_PRINTK=y
CONFIG_EARLY_PRINTK_DBGP=y
# CONFIG_EARLY_PRINTK_USB_XDBC is not set
# CONFIG_EFI_PGT_DUMP is not set
# CONFIG_DEBUG_TLBFLUSH is not set
# CONFIG_IOMMU_DEBUG is not set
CONFIG_HAVE_MMIOTRACE_SUPPORT=y
# CONFIG_X86_DECODER_SELFTEST is not set
# CONFIG_IO_DELAY_0X80 is not set
CONFIG_IO_DELAY_0XED=y
# CONFIG_IO_DELAY_UDELAY is not set
# CONFIG_IO_DELAY_NONE is not set
# CONFIG_DEBUG_BOOT_PARAMS is not set
# CONFIG_CPA_DEBUG is not set
# CONFIG_DEBUG_ENTRY is not set
# CONFIG_DEBUG_NMI_SELFTEST is not set
CONFIG_X86_DEBUG_FPU=y
# CONFIG_PUNIT_ATOM_DEBUG is not set
CONFIG_UNWINDER_ORC=y
# CONFIG_UNWINDER_FRAME_POINTER is not set
# end of x86 Debugging
#
# Kernel Testing and Coverage
#
# CONFIG_KUNIT is not set
# CONFIG_NOTIFIER_ERROR_INJECTION is not set
CONFIG_FUNCTION_ERROR_INJECTION=y
# CONFIG_FAULT_INJECTION is not set
CONFIG_ARCH_HAS_KCOV=y
# CONFIG_KCOV is not set
CONFIG_RUNTIME_TESTING_MENU=y
# CONFIG_TEST_DHRY is not set
# CONFIG_LKDTM is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_DIV64 is not set
# CONFIG_TEST_MULDIV64 is not set
# CONFIG_BACKTRACE_SELF_TEST is not set
# CONFIG_TEST_REF_TRACKER is not set
# CONFIG_RBTREE_TEST is not set
# CONFIG_REED_SOLOMON_TEST is not set
# CONFIG_INTERVAL_TREE_TEST is not set
# CONFIG_PERCPU_TEST is not set
# CONFIG_ATOMIC64_SELFTEST is not set
# CONFIG_TEST_HEXDUMP is not set
# CONFIG_TEST_KSTRTOX is not set
# CONFIG_TEST_BITMAP is not set
# CONFIG_TEST_UUID is not set
# CONFIG_TEST_XARRAY is not set
# CONFIG_TEST_MAPLE_TREE is not set
# CONFIG_TEST_RHASHTABLE is not set
# CONFIG_TEST_IDA is not set
# CONFIG_TEST_LKM is not set
# CONFIG_TEST_BITOPS is not set
# CONFIG_TEST_VMALLOC is not set
# CONFIG_TEST_BPF is not set
# CONFIG_FIND_BIT_BENCHMARK is not set
# CONFIG_TEST_FIRMWARE is not set
# CONFIG_TEST_SYSCTL is not set
# CONFIG_TEST_UDELAY is not set
# CONFIG_TEST_STATIC_KEYS is not set
# CONFIG_TEST_DYNAMIC_DEBUG is not set
# CONFIG_TEST_KMOD is not set
# CONFIG_TEST_KALLSYMS is not set
# CONFIG_TEST_MEMCAT_P is not set
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_FREE_PAGES is not set
# CONFIG_TEST_FPU is not set
# CONFIG_TEST_CLOCKSOURCE_WATCHDOG is not set
# CONFIG_TEST_OBJPOOL is not set
CONFIG_ARCH_USE_MEMTEST=y
CONFIG_MEMTEST=y
# end of Kernel Testing and Coverage
#
# Rust hacking
#
# end of Rust hacking
# end of Kernel hacking
CONFIG_IO_URING_ZCRX=y
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Thu, Sep 11 2025 at 16:00, Mathieu Desnoyers wrote:
> On 2025-09-11 12:47, Thomas Gleixner wrote:
>> The normal case is that the fault is pretty much guaranteed to happen
>> because the scheduler places the child on a different CPU and therefore
>> the CPU/MM IDs need to be updated anyway.
>
> Is this "normal case" the behavior you wish the scheduler to have or
> is it based on metrics ?
I did measurements and that's how I came to the conclusion. I measured
forks vs. faults in the page which holds the RSEQ memory, aka. TLS.
> Here is the result of a sched_fork() instrumentation vs the first
> pick_next_task that follows for each thread (accounted with schedstats
> see diff at the end of this email, followed by my kernel config), and we
> can observe the following pattern with a parallel kernel build workload
> (64-core VM, on a physical machine that has 384 logical cpus):
>
> - When undercommitted, in which case the scheduler has idle core
> targets to select (make -j32 on a 64-core VM), indeed the behavior
> favors spreading the workload to different CPUs (in which case we
> would hit a page fault on return from fork, as you predicted):
> 543 same-cpu vs 62k different-cpu.
> Most of the same-cpu are on CPU 0 (402), surprisingly,
>
> - When using all the cores (make -j64 on a 64-core VM) (and
> also for overcommit scenarios tested up to -j400), the
> picture differs:
> 27k same-cpu vs 32k different-cpu, which is pretty much even.
Sure. In overcommit there is obviously a higher probability to stay on
the same CPU.
> This is a kernel build workload, which consist mostly of
> single-threaded processes (make, gcc). I therefore expect the
> rseq mm_cid to be always 0 for those cases.
Sure, but that does not mean the CPU stays the same.
>> The only cases where this is not true, are when there is no capacity to
>> do so or on UP or the parent was affine to a single CPU, which is what
>> the child inherits.
>
> Wrong. See above.
Huch? You just confirmed what I was saying. It's not true for
overcommit, i.e. there is no capacity to do place it on a different CPU
right away.
> Moreover, a common userspace process execution pattern is fork followed
> by exec, in which case whatever page faults happen on fork become
> irrelevant as soon as exec replaces the entire mapping.
That's correct, but for exec to happen it needs to complete the fork
first and go out to user space to do the exec without touching TLS and
without being placed on a different CPU, right?
So again, that depends on the ability of placement where this ends up.
>> the fault will happen in the fast path first, which means the exit code
>> has to go another round through the work loop instead of forcing the
>> fault right away on the first exit in the slowpath, where it can be
>> actually resolved.
>
> I think you misunderstood my improvement suggestion: I do _not_
> recommend that we pointlessly take the fast-path on fork, but rather
> than we keep the cached cpu_cid value (from the parent) around to check
> whether we actually need to store to userspace and take a page fault.
> Based on this, we can choose whether we use the fast-path or go directly
> to the slow-path.
That's not that trivial because you need extra an extra decision in the
scheduler to take the slowpath based on information whether that's the
first schedule and change after fork so the scheduler can set either
TIF_RSEQ or NOTIFY along with the slowpath flag.
But that's just not worth it. So forcing the slowpath after fork seemed
to be a sensible thing to do.
Anyway, I just took the numbers for a make -j128 build on a 64CPU
systems again with the force removed. I have to correct my claim about
vast majority for that case, but 69% of forked childs which fault in the
TLS/RSEQ page before exit or exec is definitely not a small number.
The pattern, which brought me to actually force the slowpath
unconditionally was:
child()
...
return to user:
fast_path -> faults
slow_path -> faults and handles the fault
fast_path (usually a NOOP, but not necessarily)
done
Though I'm not insisting on keeping the fork force slowpath mechanics.
As we might eventually agree on, it depends on the workload and the
state of the system, so the milage may vary pretty much.
Thanks,
tglx
On 2025-09-12 10:22, Thomas Gleixner wrote: > On Thu, Sep 11 2025 at 16:00, Mathieu Desnoyers wrote: >> On 2025-09-11 12:47, Thomas Gleixner wrote: >>> The only cases where this is not true, are when there is no capacity to >>> do so or on UP or the parent was affine to a single CPU, which is what >>> the child inherits. >> >> Wrong. See above. > > Huch? You just confirmed what I was saying. It's not true for > overcommit, i.e. there is no capacity to do place it on a different CPU > right away. I agree considering that being "in no capacity to do so" includes the fact that no other CPU are immediately available to run it. I misunderstood this as being a longer-term configuration constraint rather than a transient thing. > >>> the fault will happen in the fast path first, which means the exit code >>> has to go another round through the work loop instead of forcing the >>> fault right away on the first exit in the slowpath, where it can be >>> actually resolved. >> >> I think you misunderstood my improvement suggestion: I do _not_ >> recommend that we pointlessly take the fast-path on fork, but rather >> than we keep the cached cpu_cid value (from the parent) around to check >> whether we actually need to store to userspace and take a page fault. >> Based on this, we can choose whether we use the fast-path or go directly >> to the slow-path. > > That's not that trivial because you need extra an extra decision in the > scheduler to take the slowpath based on information whether that's the > first schedule and change after fork so the scheduler can set either > TIF_RSEQ or NOTIFY along with the slowpath flag. > > But that's just not worth it. So forcing the slowpath after fork seemed > to be a sensible thing to do. I agree that it may not be worth the complexity. > > Anyway, I just took the numbers for a make -j128 build on a 64CPU > systems again with the force removed. I have to correct my claim about > vast majority for that case, but 69% of forked childs which fault in the > TLS/RSEQ page before exit or exec is definitely not a small number. > > The pattern, which brought me to actually force the slowpath > unconditionally was: > > child() > ... > return to user: > > fast_path -> faults > slow_path -> faults and handles the fault > fast_path (usually a NOOP, but not necessarily) > done > > Though I'm not insisting on keeping the fork force slowpath mechanics. I'm OK forcing the slowpath on return to user from fork, but the comments justifying why we do this should be worded in more nuanced terms with regards to the ratio of expected faults on fork and variability with workloads. That opens the door to further improvements in the future based on additional metrics. > > As we might eventually agree on, it depends on the workload and the > state of the system, so the milage may vary pretty much. Yes. I'm OK with just altering the comments and commit message to a wording that is less absolute about the optimization choices made here. Thanks, Mathieu -- Mathieu Desnoyers EfficiOS Inc. https://www.efficios.com
On 2025-09-11 10:44, Mathieu Desnoyers wrote:
> On 2025-09-08 17:32, Thomas Gleixner wrote:
>> Now that all bits and pieces are in place, hook the RSEQ handling fast
>> path
>> function into exit_to_user_mode_prepare() after the TIF work bits have
>> been
>> handled. If case of fast path failure, TIF_NOTIFY_RESUME has been raised
>> and the caller needs to take another turn through the TIF handling slow
>> path.
>>
>> This only works for architectures, which use the generic entry code.
>
> Remove comma after "architectures"
>
>> Architectures, who still have their own incomplete hacks are not
>> supported
>
> Remove comma after "Architectures"
>
>> and won't be.
>>
>> This results in the following improvements:
>>
>> Kernel build Before After Reduction
>>
>> exit to user 80692981 80514451
>> signal checks: 32581 121 99%
>> slowpath runs: 1201408 1.49% 198 0.00% 100%
>> fastpath runs: 675941 0.84% N/A
>> id updates: 1233989 1.53% 50541 0.06% 96%
>> cs checks: 1125366 1.39% 0 0.00% 100%
>> cs cleared: 1125366 100% 0 100%
>> cs fixup: 0 0% 0
>>
>> RSEQ selftests Before After Reduction
>>
>> exit to user: 386281778 387373750
>> signal checks: 35661203 0 100%
>> slowpath runs: 140542396 36.38% 100 0.00% 100%
>> fastpath runs: 9509789 2.51% N/A
>> id updates: 176203599 45.62% 9087994 2.35% 95%
>> cs checks: 175587856 45.46% 4728394 1.22% 98%
>> cs cleared: 172359544 98.16% 1319307 27.90% 99%
>> cs fixup: 3228312 1.84% 3409087 72.10%
>>
>> The 'cs cleared' and 'cs fixup' percentanges are not relative to the exit
>
> percentages
>
>> to user invocations, they are relative to the actual 'cs check'
>> invocations.
>>
>> While some of this could have been avoided in the original code, like the
>> obvious clearing of CS when it's already clear, the main problem of going
>> through TIF_NOTIFY_RESUME cannot be solved. In some workloads the RSEQ
>> notify handler is invoked more than once before going out to user
>> space. Doing this once when everything has stabilized is the only
>> solution
>> to avoid this.
>>
>> The initial attempt to completely decouple it from the TIF work turned
>> out
>> to be suboptimal for workloads, which do a lot of quick and short system
>> calls. Even if the fast path decision is only 4 instructions (including a
>> conditional branch), this adds up quickly and becomes measurable when the
>> rate for actually having to handle rseq is in the low single digit
>> percentage range of user/kernel transitions.
>>
>> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>> ---
>> V4: Move the rseq handling into a separate loop to avoid gotos later on
>> ---
>> include/linux/irq-entry-common.h | 7 ++-----
>> include/linux/resume_user_mode.h | 2 +-
>> include/linux/rseq.h | 23 +++++++++++++++++------
>> init/Kconfig | 2 +-
>> kernel/entry/common.c | 26 +++++++++++++++++++-------
>> kernel/rseq.c | 8 ++++++--
>> 6 files changed, 46 insertions(+), 22 deletions(-)
>>
>> --- a/include/linux/irq-entry-common.h
>> +++ b/include/linux/irq-entry-common.h
>> @@ -197,11 +197,8 @@ static __always_inline void arch_exit_to
>> */
>> void arch_do_signal_or_restart(struct pt_regs *regs);
>> -/**
>> - * exit_to_user_mode_loop - do any pending work before leaving to
>> user space
>> - */
>> -unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
>> - unsigned long ti_work);
>> +/* Handle pending TIF work */
>> +unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned
>> long ti_work);
>> /**
>> * exit_to_user_mode_prepare - call exit_to_user_mode_loop() if
>> required
>> --- a/include/linux/resume_user_mode.h
>> +++ b/include/linux/resume_user_mode.h
>> @@ -59,7 +59,7 @@ static inline void resume_user_mode_work
>> mem_cgroup_handle_over_high(GFP_KERNEL);
>> blkcg_maybe_throttle_current();
>> - rseq_handle_notify_resume(regs);
>> + rseq_handle_slowpath(regs);
>> }
>> #endif /* LINUX_RESUME_USER_MODE_H */
>> --- a/include/linux/rseq.h
>> +++ b/include/linux/rseq.h
>> @@ -5,13 +5,19 @@
>> #ifdef CONFIG_RSEQ
>> #include <linux/sched.h>
>> -void __rseq_handle_notify_resume(struct pt_regs *regs);
>> +void __rseq_handle_slowpath(struct pt_regs *regs);
>> -static inline void rseq_handle_notify_resume(struct pt_regs *regs)
>> +/* Invoked from resume_user_mode_work() */
>> +static inline void rseq_handle_slowpath(struct pt_regs *regs)
>> {
>> - /* '&' is intentional to spare one conditional branch */
>> - if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
>> - __rseq_handle_notify_resume(regs);
>> + if (IS_ENABLED(CONFIG_GENERIC_ENTRY)) {
>> + if (current->rseq.event.slowpath)
>> + __rseq_handle_slowpath(regs);
>> + } else {
>> + /* '&' is intentional to spare one conditional branch */
>> + if (current->rseq.event.sched_switch & current-
>> >rseq.event.has_rseq)
>
> Ref. to earlier comment about has_rseq check perhaps redundant.
>
>> + __rseq_handle_slowpath(regs);
>> + }
>> }
>> void __rseq_signal_deliver(int sig, struct pt_regs *regs);
>> @@ -142,11 +148,16 @@ static inline void rseq_fork(struct task
>> } else {
>> t->rseq = current->rseq;
>> t->rseq.ids.cpu_cid = ~0ULL;
>
> As discussed earlier, do we really want to clear cpu_cid here, or
> copy from parent ? If we keep the parent's cached values, I suspect
> we can skip the page fault on return from fork in many cases.
>
>> + /*
>> + * If it has rseq, force it into the slow path right away
>> + * because it is guaranteed to fault.
>> + */
>> + t->rseq.event.slowpath = t->rseq.event.has_rseq;
>
> I think we can do better here. It's only guaranteed to fault if:
>
> - has_rseq is set, AND
> - cpu or cid has changed compared to the cached value OR
> - rseq_cs user pointer is non-NULL.
>
> Otherwise we should be able to handle the return from fork from the fast
> path just with loads from the rseq area, or am I missing something ?
>
> Thanks,
>
> Mathieu
>
Just making sure you don't miss one additional comment below...
>> }
>> }
>> #else /* CONFIG_RSEQ */
>> -static inline void rseq_handle_notify_resume(struct ksignal *ksig,
>> struct pt_regs *regs) { }
>> +static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
>> static inline void rseq_signal_deliver(struct ksignal *ksig, struct
>> pt_regs *regs) { }
>> static inline void rseq_sched_switch_event(struct task_struct *t) { }
>> static inline void rseq_sched_set_task_cpu(struct task_struct *t,
>> unsigned int cpu) { }
>> --- a/init/Kconfig
>> +++ b/init/Kconfig
>> @@ -1911,7 +1911,7 @@ config RSEQ_DEBUG_DEFAULT_ENABLE
>> config DEBUG_RSEQ
>> default n
>> bool "Enable debugging of rseq() system call" if EXPERT
>> - depends on RSEQ && DEBUG_KERNEL
>> + depends on RSEQ && DEBUG_KERNEL && !GENERIC_ENTRY
>
> I'm confused about this hunk. Perhaps this belongs to a different
> commit ?
^ here.
Thanks,
Mathieu
>
> Thanks,
>
> Mathieu
>
>> select RSEQ_DEBUG_DEFAULT_ENABLE
>> help
>> Enable extra debugging checks for the rseq system call.
>> --- a/kernel/entry/common.c
>> +++ b/kernel/entry/common.c
>> @@ -11,13 +11,8 @@
>> /* Workaround to allow gradual conversion of architecture code */
>> void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
>> -/**
>> - * exit_to_user_mode_loop - do any pending work before leaving to
>> user space
>> - * @regs: Pointer to pt_regs on entry stack
>> - * @ti_work: TIF work flags as read by the caller
>> - */
>> -__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs
>> *regs,
>> - unsigned long ti_work)
>> +static __always_inline unsigned long __exit_to_user_mode_loop(struct
>> pt_regs *regs,
>> + unsigned long ti_work)
>> {
>> /*
>> * Before returning to user space ensure that all pending work
>> @@ -62,6 +57,23 @@ void __weak arch_do_signal_or_restart(st
>> return ti_work;
>> }
>> +/**
>> + * exit_to_user_mode_loop - do any pending work before leaving to
>> user space
>> + * @regs: Pointer to pt_regs on entry stack
>> + * @ti_work: TIF work flags as read by the caller
>> + */
>> +__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs
>> *regs,
>> + unsigned long ti_work)
>> +{
>> + for (;;) {
>> + ti_work = __exit_to_user_mode_loop(regs, ti_work);
>> +
>> + if (likely(!rseq_exit_to_user_mode_restart(regs)))
>> + return ti_work;
>> + ti_work = read_thread_flags();
>> + }
>> +}
>> +
>> noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
>> {
>> irqentry_state_t ret = {
>> --- a/kernel/rseq.c
>> +++ b/kernel/rseq.c
>> @@ -234,7 +234,11 @@ static bool rseq_handle_cs(struct task_s
>> static void rseq_slowpath_update_usr(struct pt_regs *regs)
>> {
>> - /* Preserve rseq state and user_irq state for exit to user */
>> + /*
>> + * Preserve rseq state and user_irq state. The generic entry code
>> + * clears user_irq on the way out, the non-generic entry
>> + * architectures are not having user_irq.
>> + */
>> const struct rseq_event evt_mask = { .has_rseq = true, .user_irq
>> = true, };
>> struct task_struct *t = current;
>> struct rseq_ids ids;
>> @@ -286,7 +290,7 @@ static void rseq_slowpath_update_usr(str
>> }
>> }
>> -void __rseq_handle_notify_resume(struct pt_regs *regs)
>> +void __rseq_handle_slowpath(struct pt_regs *regs)
>> {
>> /*
>> * If invoked from hypervisors before entering the guest via
>>
>
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Thu, Sep 11 2025 at 10:45, Mathieu Desnoyers wrote:
>> Mathieu
>>
<REMOVING ~170 lines of non-content to find a single line of information>
Seriously?
</>
>
> Just making sure you don't miss one additional comment below...
Contrary to you, I trim my replies, so I found it...
exit_to_user_mode_prepare() is used for both interrupts and syscalls, but
there is extra rseq work, which is only required for in the interrupt exit
case.
Split up the function and provide wrappers for syscalls and interrupts,
which allows to separate the rseq exit work in the next step.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
include/linux/entry-common.h | 2 -
include/linux/irq-entry-common.h | 42 ++++++++++++++++++++++++++++++++++-----
2 files changed, 38 insertions(+), 6 deletions(-)
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -156,7 +156,7 @@ static __always_inline void syscall_exit
if (unlikely(work & SYSCALL_WORK_EXIT))
syscall_exit_work(regs, work);
local_irq_disable_exit_to_user();
- exit_to_user_mode_prepare(regs);
+ syscall_exit_to_user_mode_prepare(regs);
}
/**
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -201,7 +201,7 @@ void arch_do_signal_or_restart(struct pt
unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work);
/**
- * exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
+ * __exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
* @regs: Pointer to pt_regs on entry stack
*
* 1) check that interrupts are disabled
@@ -209,8 +209,10 @@ unsigned long exit_to_user_mode_loop(str
* 3) call exit_to_user_mode_loop() if any flags from
* EXIT_TO_USER_MODE_WORK are set
* 4) check that interrupts are still disabled
+ *
+ * Don't invoke directly, use the syscall/irqentry_ prefixed variants below
*/
-static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
+static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs)
{
unsigned long ti_work;
@@ -224,15 +226,45 @@ static __always_inline void exit_to_user
ti_work = exit_to_user_mode_loop(regs, ti_work);
arch_exit_to_user_mode_prepare(regs, ti_work);
+}
- rseq_exit_to_user_mode();
-
+static __always_inline void __exit_to_user_mode_validate(void)
+{
/* Ensure that kernel state is sane for a return to userspace */
kmap_assert_nomap();
lockdep_assert_irqs_disabled();
lockdep_sys_exit();
}
+
+/**
+ * syscall_exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
+ * @regs: Pointer to pt_regs on entry stack
+ *
+ * Wrapper around __exit_to_user_mode_prepare() to separate the exit work for
+ * syscalls and interrupts.
+ */
+static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
+{
+ __exit_to_user_mode_prepare(regs);
+ rseq_exit_to_user_mode();
+ __exit_to_user_mode_validate();
+}
+
+/**
+ * irqentry_exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
+ * @regs: Pointer to pt_regs on entry stack
+ *
+ * Wrapper around __exit_to_user_mode_prepare() to separate the exit work for
+ * syscalls and interrupts.
+ */
+static __always_inline void irqentry_exit_to_user_mode_prepare(struct pt_regs *regs)
+{
+ __exit_to_user_mode_prepare(regs);
+ rseq_exit_to_user_mode();
+ __exit_to_user_mode_validate();
+}
+
/**
* exit_to_user_mode - Fixup state when exiting to user mode
*
@@ -297,7 +329,7 @@ static __always_inline void irqentry_ent
static __always_inline void irqentry_exit_to_user_mode(struct pt_regs *regs)
{
instrumentation_begin();
- exit_to_user_mode_prepare(regs);
+ irqentry_exit_to_user_mode_prepare(regs);
instrumentation_end();
exit_to_user_mode();
}
Separate the interrupt and syscall exit handling. Syscall exit does not
require to clear the user_irq bit as it can't be set. On interrupt exit it
can be set when the interrupt did not result in a scheduling event and
therefore the return path did not invoke the TIF work handling, which would
have cleared it.
The debug check for the event state is also not really required even when
debug mode is enabled via the static key. Debug mode is largely aiding user
space by enabling a larger amount of validation checks, which cause a
segfault when a malformed critical section is detected. In production mode
the critical section handling takes the content mostly as is and lets user
space keep the pieces when it screwed up.
On kernel changes in that area the state check is useful, but that can be
done when lockdep is enabled, which is anyway a required test scenario for
fundamental changes.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
include/linux/irq-entry-common.h | 4 ++--
include/linux/rseq_entry.h | 21 +++++++++++++++++----
2 files changed, 19 insertions(+), 6 deletions(-)
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -247,7 +247,7 @@ static __always_inline void __exit_to_us
static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
{
__exit_to_user_mode_prepare(regs);
- rseq_exit_to_user_mode();
+ rseq_syscall_exit_to_user_mode();
__exit_to_user_mode_validate();
}
@@ -261,7 +261,7 @@ static __always_inline void syscall_exit
static __always_inline void irqentry_exit_to_user_mode_prepare(struct pt_regs *regs)
{
__exit_to_user_mode_prepare(regs);
- rseq_exit_to_user_mode();
+ rseq_irqentry_exit_to_user_mode();
__exit_to_user_mode_validate();
}
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -532,19 +532,31 @@ static __always_inline bool rseq_exit_to
#endif /* CONFIG_GENERIC_ENTRY */
-static __always_inline void rseq_exit_to_user_mode(void)
+static __always_inline void rseq_syscall_exit_to_user_mode(void)
{
struct rseq_event *ev = ¤t->rseq.event;
rseq_stat_inc(rseq_stats.exit);
- if (static_branch_unlikely(&rseq_debug_enabled))
+ /* Needed to remove the store for the !lockdep case */
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
WARN_ON_ONCE(ev->sched_switch);
+ ev->events = 0;
+ }
+}
+
+static __always_inline void rseq_irqentry_exit_to_user_mode(void)
+{
+ struct rseq_event *ev = ¤t->rseq.event;
+
+ rseq_stat_inc(rseq_stats.exit);
+
+ lockdep_assert_once(!ev->sched_switch);
/*
* Ensure that event (especially user_irq) is cleared when the
* interrupt did not result in a schedule and therefore the
- * rseq processing did not clear it.
+ * rseq processing could not clear it.
*/
ev->events = 0;
}
@@ -562,7 +574,8 @@ static inline bool rseq_exit_to_user_mod
{
return false;
}
-static inline void rseq_exit_to_user_mode(void) { }
+static inline void rseq_syscall_exit_to_user_mode(void) { }
+static inline void rseq_irqentry_exit_to_user_mode(void) { }
static inline void rseq_debug_syscall_return(struct pt_regs *regs) { }
#endif /* !CONFIG_RSEQ */
Common TIF bits do not have to be defined by every architecture. They can
be defined in a generic header.
That allows adding generic TIF bits without chasing a gazillion of
architecture headers, which is again a unjustified burden on anyone who
works on generic infrastructure as it always needs a boat load of work to
keep existing architecture code working when adding new stuff.
While it is not as horrible as the ignorance of the generic entry
infrastructure, it is a welcome mechanism to make architecture people
rethink their approach of just leaching generic improvements into
architecture code and thereby making it accumulatingly harder to maintain
and improve generic code. It's about time that this changes.
Provide the infrastructure and split the TIF space in half, 16 generic and
16 architecture specific bits.
This could probably be extended by TIF_SINGLESTEP and BLOCKSTEP, but those
are only used in architecture specific code. So leave them alone for now.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
arch/Kconfig | 4 ++
include/asm-generic/thread_info_tif.h | 48 ++++++++++++++++++++++++++++++++++
2 files changed, 52 insertions(+)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1730,6 +1730,10 @@ config ARCH_VMLINUX_NEEDS_RELOCS
relocations preserved. This is used by some architectures to
construct bespoke relocation tables for KASLR.
+# Select if architecture uses the common generic TIF bits
+config HAVE_GENERIC_TIF_BITS
+ bool
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
--- /dev/null
+++ b/include/asm-generic/thread_info_tif.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_THREAD_INFO_TIF_H_
+#define _ASM_GENERIC_THREAD_INFO_TIF_H_
+
+#include <vdso/bits.h>
+
+/* Bits 16-31 are reserved for architecture specific purposes */
+
+#define TIF_NOTIFY_RESUME 0 // callback before returning to user
+#define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME)
+
+#define TIF_SIGPENDING 1 // signal pending
+#define _TIF_SIGPENDING BIT(TIF_SIGPENDING)
+
+#define TIF_NOTIFY_SIGNAL 2 // signal notifications exist
+#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL)
+
+#define TIF_MEMDIE 3 // is terminating due to OOM killer
+#define _TIF_MEMDIE BIT(TIF_MEMDIE)
+
+#define TIF_NEED_RESCHED 4 // rescheduling necessary
+#define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED)
+
+#ifdef HAVE_TIF_NEED_RESCHED_LAZY
+# define TIF_NEED_RESCHED_LAZY 5 // Lazy rescheduling needed
+# define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY)
+#endif
+
+#ifdef HAVE_TIF_POLLING_NRFLAG
+# define TIF_POLLING_NRFLAG 6 // idle is polling for TIF_NEED_RESCHED
+# define _TIF_POLLING_NRFLAG BIT(TIF_POLLING_NRFLAG)
+#endif
+
+#define TIF_USER_RETURN_NOTIFY 7 // notify kernel of userspace return
+#define _TIF_USER_RETURN_NOTIFY BIT(TIF_USER_RETURN_NOTIFY)
+
+#define TIF_UPROBE 8 // breakpointed or singlestepping
+#define _TIF_UPROBE BIT(TIF_UPROBE)
+
+#define TIF_PATCH_PENDING 9 // pending live patching update
+#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
+
+#ifdef HAVE_TIF_RESTORE_SIGMASK
+# define TIF_RESTORE_SIGMASK 10 // Restore signal mask in do_signal() */
+# define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK)
+#endif
+
+#endif /* _ASM_GENERIC_THREAD_INFO_TIF_H_ */
The following commit has been merged into the core/core branch of tip:
Commit-ID: 29589343488e116ac31f6f3cfa83e43949a2207a
Gitweb: https://git.kernel.org/tip/29589343488e116ac31f6f3cfa83e43949a2207a
Author: Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Mon, 08 Sep 2025 23:32:30 +02:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Wed, 17 Sep 2025 08:14:03 +02:00
asm-generic: Provide generic TIF infrastructure
Common TIF bits do not have to be defined by every architecture. They can
be defined in a generic header.
That allows adding generic TIF bits without chasing a gazillion of
architecture headers, which is again a unjustified burden on anyone who
works on generic infrastructure as it always needs a boat load of work to
keep existing architecture code working when adding new stuff.
While it is not as horrible as the ignorance of the generic entry
infrastructure, it is a welcome mechanism to make architecture people
rethink their approach of just leaching generic improvements into
architecture code and thereby making it accumulatingly harder to maintain
and improve generic code. It's about time that this changes.
Provide the infrastructure and split the TIF space in half, 16 generic and
16 architecture specific bits.
This could probably be extended by TIF_SINGLESTEP and BLOCKSTEP, but those
are only used in architecture specific code. So leave them alone for now.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
arch/Kconfig | 4 ++-
include/asm-generic/thread_info_tif.h | 48 ++++++++++++++++++++++++++-
2 files changed, 52 insertions(+)
create mode 100644 include/asm-generic/thread_info_tif.h
diff --git a/arch/Kconfig b/arch/Kconfig
index d1b4ffd..c20df40 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1730,6 +1730,10 @@ config ARCH_VMLINUX_NEEDS_RELOCS
relocations preserved. This is used by some architectures to
construct bespoke relocation tables for KASLR.
+# Select if architecture uses the common generic TIF bits
+config HAVE_GENERIC_TIF_BITS
+ bool
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/include/asm-generic/thread_info_tif.h b/include/asm-generic/thread_info_tif.h
new file mode 100644
index 0000000..ee3793e
--- /dev/null
+++ b/include/asm-generic/thread_info_tif.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_THREAD_INFO_TIF_H_
+#define _ASM_GENERIC_THREAD_INFO_TIF_H_
+
+#include <vdso/bits.h>
+
+/* Bits 16-31 are reserved for architecture specific purposes */
+
+#define TIF_NOTIFY_RESUME 0 // callback before returning to user
+#define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME)
+
+#define TIF_SIGPENDING 1 // signal pending
+#define _TIF_SIGPENDING BIT(TIF_SIGPENDING)
+
+#define TIF_NOTIFY_SIGNAL 2 // signal notifications exist
+#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL)
+
+#define TIF_MEMDIE 3 // is terminating due to OOM killer
+#define _TIF_MEMDIE BIT(TIF_MEMDIE)
+
+#define TIF_NEED_RESCHED 4 // rescheduling necessary
+#define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED)
+
+#ifdef HAVE_TIF_NEED_RESCHED_LAZY
+# define TIF_NEED_RESCHED_LAZY 5 // Lazy rescheduling needed
+# define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY)
+#endif
+
+#ifdef HAVE_TIF_POLLING_NRFLAG
+# define TIF_POLLING_NRFLAG 6 // idle is polling for TIF_NEED_RESCHED
+# define _TIF_POLLING_NRFLAG BIT(TIF_POLLING_NRFLAG)
+#endif
+
+#define TIF_USER_RETURN_NOTIFY 7 // notify kernel of userspace return
+#define _TIF_USER_RETURN_NOTIFY BIT(TIF_USER_RETURN_NOTIFY)
+
+#define TIF_UPROBE 8 // breakpointed or singlestepping
+#define _TIF_UPROBE BIT(TIF_UPROBE)
+
+#define TIF_PATCH_PENDING 9 // pending live patching update
+#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
+
+#ifdef HAVE_TIF_RESTORE_SIGMASK
+# define TIF_RESTORE_SIGMASK 10 // Restore signal mask in do_signal() */
+# define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK)
+#endif
+
+#endif /* _ASM_GENERIC_THREAD_INFO_TIF_H_ */
No point in defining generic items and the upcoming RSEQ optimizations are
only available with this _and_ the generic entry infrastructure, which is
already used by x86. So no further action required here.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: x86@kernel.org
---
arch/x86/Kconfig | 1
arch/x86/include/asm/thread_info.h | 74 +++++++++++++++----------------------
2 files changed, 31 insertions(+), 44 deletions(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -239,6 +239,7 @@ config X86
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EISA if X86_32
select HAVE_EXIT_THREAD
+ select HAVE_GENERIC_TIF_BITS
select HAVE_GUP_FAST
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
select HAVE_FTRACE_GRAPH_FUNC if HAVE_FUNCTION_GRAPH_TRACER
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -80,56 +80,42 @@ struct thread_info {
#endif
/*
- * thread information flags
- * - these are process state flags that various assembly files
- * may need to access
+ * Tell the generic TIF infrastructure which bits x86 supports
*/
-#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
-#define TIF_SIGPENDING 2 /* signal pending */
-#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
-#define TIF_NEED_RESCHED_LAZY 4 /* Lazy rescheduling needed */
-#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/
-#define TIF_SSBD 6 /* Speculative store bypass disable */
-#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
-#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
-#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
-#define TIF_UPROBE 12 /* breakpointed or singlestepping */
-#define TIF_PATCH_PENDING 13 /* pending live patching update */
-#define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */
-#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
-#define TIF_NOTSC 16 /* TSC is not accessible in userland */
-#define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */
-#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
-#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_POLLING_NRFLAG
+#define HAVE_TIF_SINGLESTEP
+
+#include <asm-generic/thread_info_tif.h>
+
+/* Architecture specific TIF space starts at 16 */
+#define TIF_SSBD 16 /* Speculative store bypass disable */
+#define TIF_SPEC_IB 17 /* Indirect branch speculation mitigation */
+#define TIF_SPEC_L1D_FLUSH 18 /* Flush L1D on mm switches (processes) */
+#define TIF_NEED_FPU_LOAD 19 /* load FPU on return to userspace */
+#define TIF_NOCPUID 20 /* CPUID is not accessible in userland */
+#define TIF_NOTSC 21 /* TSC is not accessible in userland */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
#define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
-#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
+#define TIF_SINGLESTEP 25 /* reenable singlestep on user return*/
+#define TIF_BLOCKSTEP 26 /* set when we want DEBUGCTLMSR_BTF */
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
-#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
+#define TIF_ADDR32 28 /* 32-bit address space on 64 bits */
-#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
-#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
-#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
-#define _TIF_SSBD (1 << TIF_SSBD)
-#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
-#define _TIF_SPEC_L1D_FLUSH (1 << TIF_SPEC_L1D_FLUSH)
-#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
-#define _TIF_UPROBE (1 << TIF_UPROBE)
-#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
-#define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD)
-#define _TIF_NOCPUID (1 << TIF_NOCPUID)
-#define _TIF_NOTSC (1 << TIF_NOTSC)
-#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
-#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
-#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
-#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
-#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
-#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
-#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
-#define _TIF_ADDR32 (1 << TIF_ADDR32)
+#define _TIF_SSBD BIT(TIF_SSBD)
+#define _TIF_SPEC_IB BIT(TIF_SPEC_IB)
+#define _TIF_SPEC_L1D_FLUSH BIT(TIF_SPEC_L1D_FLUSH)
+#define _TIF_NEED_FPU_LOAD BIT(TIF_NEED_FPU_LOAD)
+#define _TIF_NOCPUID BIT(TIF_NOCPUID)
+#define _TIF_NOTSC BIT(TIF_NOTSC)
+#define _TIF_IO_BITMAP BIT(TIF_IO_BITMAP)
+#define _TIF_SPEC_FORCE_UPDATE BIT(TIF_SPEC_FORCE_UPDATE)
+#define _TIF_FORCED_TF BIT(TIF_FORCED_TF)
+#define _TIF_BLOCKSTEP BIT(TIF_BLOCKSTEP)
+#define _TIF_SINGLESTEP BIT(TIF_SINGLESTEP)
+#define _TIF_LAZY_MMU_UPDATES BIT(TIF_LAZY_MMU_UPDATES)
+#define _TIF_ADDR32 BIT(TIF_ADDR32)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \
The following commit has been merged into the core/core branch of tip:
Commit-ID: da3f033a9fbfdb88826c1b0486fe1522fe4f94aa
Gitweb: https://git.kernel.org/tip/da3f033a9fbfdb88826c1b0486fe1522fe4f94aa
Author: Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Mon, 08 Sep 2025 23:32:32 +02:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Wed, 17 Sep 2025 08:14:04 +02:00
x86: Use generic TIF bits
No point in defining generic items and the upcoming RSEQ optimizations are
only available with this _and_ the generic entry infrastructure, which is
already used by x86. So no further action required here.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
arch/x86/Kconfig | 1 +-
arch/x86/include/asm/thread_info.h | 76 +++++++++++------------------
2 files changed, 32 insertions(+), 45 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 52c8910..70b94e0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -239,6 +239,7 @@ config X86
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EISA if X86_32
select HAVE_EXIT_THREAD
+ select HAVE_GENERIC_TIF_BITS
select HAVE_GUP_FAST
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
select HAVE_FTRACE_GRAPH_FUNC if HAVE_FUNCTION_GRAPH_TRACER
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 9282465..e71e0e8 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -80,56 +80,42 @@ struct thread_info {
#endif
/*
- * thread information flags
- * - these are process state flags that various assembly files
- * may need to access
+ * Tell the generic TIF infrastructure which bits x86 supports
*/
-#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
-#define TIF_SIGPENDING 2 /* signal pending */
-#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
-#define TIF_NEED_RESCHED_LAZY 4 /* Lazy rescheduling needed */
-#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/
-#define TIF_SSBD 6 /* Speculative store bypass disable */
-#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
-#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
-#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
-#define TIF_UPROBE 12 /* breakpointed or singlestepping */
-#define TIF_PATCH_PENDING 13 /* pending live patching update */
-#define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */
-#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
-#define TIF_NOTSC 16 /* TSC is not accessible in userland */
-#define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */
-#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
-#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_POLLING_NRFLAG
+#define HAVE_TIF_SINGLESTEP
+
+#include <asm-generic/thread_info_tif.h>
+
+/* Architecture specific TIF space starts at 16 */
+#define TIF_SSBD 16 /* Speculative store bypass disable */
+#define TIF_SPEC_IB 17 /* Indirect branch speculation mitigation */
+#define TIF_SPEC_L1D_FLUSH 18 /* Flush L1D on mm switches (processes) */
+#define TIF_NEED_FPU_LOAD 19 /* load FPU on return to userspace */
+#define TIF_NOCPUID 20 /* CPUID is not accessible in userland */
+#define TIF_NOTSC 21 /* TSC is not accessible in userland */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
#define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
-#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
+#define TIF_SINGLESTEP 25 /* reenable singlestep on user return*/
+#define TIF_BLOCKSTEP 26 /* set when we want DEBUGCTLMSR_BTF */
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
-#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
-
-#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
-#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
-#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
-#define _TIF_SSBD (1 << TIF_SSBD)
-#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
-#define _TIF_SPEC_L1D_FLUSH (1 << TIF_SPEC_L1D_FLUSH)
-#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
-#define _TIF_UPROBE (1 << TIF_UPROBE)
-#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
-#define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD)
-#define _TIF_NOCPUID (1 << TIF_NOCPUID)
-#define _TIF_NOTSC (1 << TIF_NOTSC)
-#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
-#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
-#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
-#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
-#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
-#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
-#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
-#define _TIF_ADDR32 (1 << TIF_ADDR32)
+#define TIF_ADDR32 28 /* 32-bit address space on 64 bits */
+
+#define _TIF_SSBD BIT(TIF_SSBD)
+#define _TIF_SPEC_IB BIT(TIF_SPEC_IB)
+#define _TIF_SPEC_L1D_FLUSH BIT(TIF_SPEC_L1D_FLUSH)
+#define _TIF_NEED_FPU_LOAD BIT(TIF_NEED_FPU_LOAD)
+#define _TIF_NOCPUID BIT(TIF_NOCPUID)
+#define _TIF_NOTSC BIT(TIF_NOTSC)
+#define _TIF_IO_BITMAP BIT(TIF_IO_BITMAP)
+#define _TIF_SPEC_FORCE_UPDATE BIT(TIF_SPEC_FORCE_UPDATE)
+#define _TIF_FORCED_TF BIT(TIF_FORCED_TF)
+#define _TIF_BLOCKSTEP BIT(TIF_BLOCKSTEP)
+#define _TIF_SINGLESTEP BIT(TIF_SINGLESTEP)
+#define _TIF_LAZY_MMU_UPDATES BIT(TIF_LAZY_MMU_UPDATES)
+#define _TIF_ADDR32 BIT(TIF_ADDR32)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \
No point in defining generic items and the upcoming RSEQ optimizations are
only available with this _and_ the generic entry infrastructure, which is
already used by s390. So no further action required here.
This leaves a comment about the AUDIT/TRACE/SECCOMP bits which are handled
by SYSCALL_WORK in the generic code, so they seem redundant, but that's a
problem for the s390 wizards to think about.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
---
arch/s390/Kconfig | 1
arch/s390/include/asm/thread_info.h | 44 ++++++++++++++----------------------
2 files changed, 19 insertions(+), 26 deletions(-)
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -199,6 +199,7 @@ config S390
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select HAVE_GENERIC_TIF_BITS
select HAVE_GUP_FAST
select HAVE_FENTRY
select HAVE_FTRACE_GRAPH_FUNC
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -56,43 +56,35 @@ void arch_setup_new_exec(void);
/*
* thread information flags bit numbers
+ *
+ * Tell the generic TIF infrastructure which special bits s390 supports
*/
-#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
-#define TIF_SIGPENDING 1 /* signal pending */
-#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
-#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */
-#define TIF_UPROBE 4 /* breakpointed or single-stepping */
-#define TIF_PATCH_PENDING 5 /* pending live patching update */
-#define TIF_ASCE_PRIMARY 6 /* primary asce is kernel asce */
-#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
-#define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */
-#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
-#define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */
-#define TIF_31BIT 16 /* 32bit process */
-#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
-#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
-#define TIF_SINGLE_STEP 19 /* This task is single stepped */
-#define TIF_BLOCK_STEP 20 /* This task is block stepped */
-#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_RESTORE_SIGMASK
+
+#include <asm-generic/thread_info_tif.h>
+
+/* Architecture specific bits */
+#define TIF_ASCE_PRIMARY 16 /* primary asce is kernel asce */
+#define TIF_GUARDED_STORAGE 17 /* load guarded storage control block */
+#define TIF_ISOLATE_BP_GUEST 18 /* Run KVM guests with isolated BP */
+#define TIF_PER_TRAP 19 /* Need to handle PER trap on exit to usermode */
+#define TIF_31BIT 20 /* 32bit process */
+#define TIF_SINGLE_STEP 21 /* This task is single stepped */
+#define TIF_BLOCK_STEP 22 /* This task is block stepped */
+#define TIF_UPROBE_SINGLESTEP 23 /* This task is uprobe single stepped */
+
+/* These could move over to SYSCALL_WORK bits, no? */
#define TIF_SYSCALL_TRACE 24 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 25 /* syscall auditing active */
#define TIF_SECCOMP 26 /* secure computing */
#define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */
-#define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING BIT(TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED)
-#define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY)
-#define _TIF_UPROBE BIT(TIF_UPROBE)
-#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
#define _TIF_ASCE_PRIMARY BIT(TIF_ASCE_PRIMARY)
-#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL)
#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE)
#define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST)
#define _TIF_PER_TRAP BIT(TIF_PER_TRAP)
#define _TIF_31BIT BIT(TIF_31BIT)
-#define _TIF_MEMDIE BIT(TIF_MEMDIE)
-#define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK)
#define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP)
#define _TIF_BLOCK_STEP BIT(TIF_BLOCK_STEP)
#define _TIF_UPROBE_SINGLESTEP BIT(TIF_UPROBE_SINGLESTEP)
On Mon, Sep 08, 2025 at 11:32:34PM +0200, Thomas Gleixner wrote: > No point in defining generic items and the upcoming RSEQ optimizations are > only available with this _and_ the generic entry infrastructure, which is > already used by s390. So no further action required here. > > This leaves a comment about the AUDIT/TRACE/SECCOMP bits which are handled > by SYSCALL_WORK in the generic code, so they seem redundant, but that's a > problem for the s390 wizards to think about. > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Cc: Heiko Carstens <hca@linux.ibm.com> > Cc: Christian Borntraeger <borntraeger@linux.ibm.com> > Cc: Sven Schnelle <svens@linux.ibm.com> > > --- > arch/s390/Kconfig | 1 > arch/s390/include/asm/thread_info.h | 44 ++++++++++++++---------------------- > 2 files changed, 19 insertions(+), 26 deletions(-) FWIW: Acked-by: Heiko Carstens <hca@linux.ibm.com>
Thomas Gleixner <tglx@linutronix.de> writes: > No point in defining generic items and the upcoming RSEQ optimizations are > only available with this _and_ the generic entry infrastructure, which is > already used by s390. So no further action required here. > > This leaves a comment about the AUDIT/TRACE/SECCOMP bits which are handled > by SYSCALL_WORK in the generic code, so they seem redundant, but that's a > problem for the s390 wizards to think about. > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Cc: Heiko Carstens <hca@linux.ibm.com> > Cc: Christian Borntraeger <borntraeger@linux.ibm.com> > Cc: Sven Schnelle <svens@linux.ibm.com> > > --- a/arch/s390/include/asm/thread_info.h > +++ b/arch/s390/include/asm/thread_info.h > @@ -56,43 +56,35 @@ void arch_setup_new_exec(void); > [..] > +/* These could move over to SYSCALL_WORK bits, no? */ > #define TIF_SYSCALL_TRACE 24 /* syscall trace active */ > #define TIF_SYSCALL_AUDIT 25 /* syscall auditing active */ > #define TIF_SECCOMP 26 /* secure computing */ > #define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */ Looks like these flags are a leftover of the conversion to generic entry and are no longer used. I'll send a patch on top of your patchset which remove them. Thanks! Sven
The following commit has been merged into the core/core branch of tip:
Commit-ID: 06e5b72858b168fcedb0402b8dfdb896276fc08e
Gitweb: https://git.kernel.org/tip/06e5b72858b168fcedb0402b8dfdb896276fc08e
Author: Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Mon, 08 Sep 2025 23:32:34 +02:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Wed, 17 Sep 2025 08:14:04 +02:00
s390: Use generic TIF bits
No point in defining generic items and the upcoming RSEQ optimizations are
only available with this _and_ the generic entry infrastructure, which is
already used by s390. So no further action required here.
This leaves a comment about the AUDIT/TRACE/SECCOMP bits which are handled
by SYSCALL_WORK in the generic code, so they seem redundant, but that's a
problem for the s390 wizards to think about.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
---
arch/s390/Kconfig | 1 +-
arch/s390/include/asm/thread_info.h | 44 +++++++++++-----------------
2 files changed, 19 insertions(+), 26 deletions(-)
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index bf680c2..f991ab9 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -199,6 +199,7 @@ config S390
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select HAVE_GENERIC_TIF_BITS
select HAVE_GUP_FAST
select HAVE_FENTRY
select HAVE_FTRACE_GRAPH_FUNC
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index f6ed2c8..fe6da06 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -56,43 +56,35 @@ void arch_setup_new_exec(void);
/*
* thread information flags bit numbers
+ *
+ * Tell the generic TIF infrastructure which special bits s390 supports
*/
-#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
-#define TIF_SIGPENDING 1 /* signal pending */
-#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
-#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */
-#define TIF_UPROBE 4 /* breakpointed or single-stepping */
-#define TIF_PATCH_PENDING 5 /* pending live patching update */
-#define TIF_ASCE_PRIMARY 6 /* primary asce is kernel asce */
-#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
-#define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */
-#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
-#define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */
-#define TIF_31BIT 16 /* 32bit process */
-#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
-#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
-#define TIF_SINGLE_STEP 19 /* This task is single stepped */
-#define TIF_BLOCK_STEP 20 /* This task is block stepped */
-#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_RESTORE_SIGMASK
+
+#include <asm-generic/thread_info_tif.h>
+
+/* Architecture specific bits */
+#define TIF_ASCE_PRIMARY 16 /* primary asce is kernel asce */
+#define TIF_GUARDED_STORAGE 17 /* load guarded storage control block */
+#define TIF_ISOLATE_BP_GUEST 18 /* Run KVM guests with isolated BP */
+#define TIF_PER_TRAP 19 /* Need to handle PER trap on exit to usermode */
+#define TIF_31BIT 20 /* 32bit process */
+#define TIF_SINGLE_STEP 21 /* This task is single stepped */
+#define TIF_BLOCK_STEP 22 /* This task is block stepped */
+#define TIF_UPROBE_SINGLESTEP 23 /* This task is uprobe single stepped */
+
+/* These could move over to SYSCALL_WORK bits, no? */
#define TIF_SYSCALL_TRACE 24 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 25 /* syscall auditing active */
#define TIF_SECCOMP 26 /* secure computing */
#define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */
-#define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING BIT(TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED)
-#define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY)
-#define _TIF_UPROBE BIT(TIF_UPROBE)
-#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
#define _TIF_ASCE_PRIMARY BIT(TIF_ASCE_PRIMARY)
-#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL)
#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE)
#define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST)
#define _TIF_PER_TRAP BIT(TIF_PER_TRAP)
#define _TIF_31BIT BIT(TIF_31BIT)
-#define _TIF_MEMDIE BIT(TIF_MEMDIE)
-#define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK)
#define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP)
#define _TIF_BLOCK_STEP BIT(TIF_BLOCK_STEP)
#define _TIF_UPROBE_SINGLESTEP BIT(TIF_UPROBE_SINGLESTEP)
No point in defining generic items and the upcoming RSEQ optimizations are
only available with this _and_ the generic entry infrastructure, which is
already used by loongarch. So no further action required here.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Huacai Chen <chenhuacai@kernel.org>
---
arch/loongarch/Kconfig | 1
arch/loongarch/include/asm/thread_info.h | 76 +++++++++++++------------------
2 files changed, 35 insertions(+), 42 deletions(-)
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -140,6 +140,7 @@ config LOONGARCH
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
select HAVE_EXIT_THREAD
+ select HAVE_GENERIC_TIF_BITS
select HAVE_GUP_FAST
select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FUNCTION_ARG_ACCESS_API
--- a/arch/loongarch/include/asm/thread_info.h
+++ b/arch/loongarch/include/asm/thread_info.h
@@ -65,50 +65,42 @@ register unsigned long current_stack_poi
* access
* - pending work-to-be-done flags are in LSW
* - other flags in MSW
+ *
+ * Tell the generic TIF infrastructure which special bits loongarch supports
*/
-#define TIF_NEED_RESCHED 0 /* rescheduling necessary */
-#define TIF_NEED_RESCHED_LAZY 1 /* lazy rescheduling necessary */
-#define TIF_SIGPENDING 2 /* signal pending */
-#define TIF_NOTIFY_RESUME 3 /* callback before returning to user */
-#define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */
-#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
-#define TIF_NOHZ 6 /* in adaptive nohz mode */
-#define TIF_UPROBE 7 /* breakpointed or singlestepping */
-#define TIF_USEDFPU 8 /* FPU was used by this task this quantum (SMP) */
-#define TIF_USEDSIMD 9 /* SIMD has been used this quantum */
-#define TIF_MEMDIE 10 /* is terminating due to OOM killer */
-#define TIF_FIXADE 11 /* Fix address errors in software */
-#define TIF_LOGADE 12 /* Log address errors to syslog */
-#define TIF_32BIT_REGS 13 /* 32-bit general purpose registers */
-#define TIF_32BIT_ADDR 14 /* 32-bit address space */
-#define TIF_LOAD_WATCH 15 /* If set, load watch registers */
-#define TIF_SINGLESTEP 16 /* Single Step */
-#define TIF_LSX_CTX_LIVE 17 /* LSX context must be preserved */
-#define TIF_LASX_CTX_LIVE 18 /* LASX context must be preserved */
-#define TIF_USEDLBT 19 /* LBT was used by this task this quantum (SMP) */
-#define TIF_LBT_CTX_LIVE 20 /* LBT context must be preserved */
-#define TIF_PATCH_PENDING 21 /* pending live patching update */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_RESTORE_SIGMASK
-#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
-#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
-#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
-#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
-#define _TIF_NOTIFY_SIGNAL (1<<TIF_NOTIFY_SIGNAL)
-#define _TIF_NOHZ (1<<TIF_NOHZ)
-#define _TIF_UPROBE (1<<TIF_UPROBE)
-#define _TIF_USEDFPU (1<<TIF_USEDFPU)
-#define _TIF_USEDSIMD (1<<TIF_USEDSIMD)
-#define _TIF_FIXADE (1<<TIF_FIXADE)
-#define _TIF_LOGADE (1<<TIF_LOGADE)
-#define _TIF_32BIT_REGS (1<<TIF_32BIT_REGS)
-#define _TIF_32BIT_ADDR (1<<TIF_32BIT_ADDR)
-#define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH)
-#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
-#define _TIF_LSX_CTX_LIVE (1<<TIF_LSX_CTX_LIVE)
-#define _TIF_LASX_CTX_LIVE (1<<TIF_LASX_CTX_LIVE)
-#define _TIF_USEDLBT (1<<TIF_USEDLBT)
-#define _TIF_LBT_CTX_LIVE (1<<TIF_LBT_CTX_LIVE)
-#define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING)
+#include <asm-generic/thread_info_tif.h>
+
+/* Architecture specific bits */
+#define TIF_NOHZ 16 /* in adaptive nohz mode */
+#define TIF_USEDFPU 17 /* FPU was used by this task this quantum (SMP) */
+#define TIF_USEDSIMD 18 /* SIMD has been used this quantum */
+#define TIF_FIXADE 10 /* Fix address errors in software */
+#define TIF_LOGADE 20 /* Log address errors to syslog */
+#define TIF_32BIT_REGS 21 /* 32-bit general purpose registers */
+#define TIF_32BIT_ADDR 22 /* 32-bit address space */
+#define TIF_LOAD_WATCH 23 /* If set, load watch registers */
+#define TIF_SINGLESTEP 24 /* Single Step */
+#define TIF_LSX_CTX_LIVE 25 /* LSX context must be preserved */
+#define TIF_LASX_CTX_LIVE 26 /* LASX context must be preserved */
+#define TIF_USEDLBT 27 /* LBT was used by this task this quantum (SMP) */
+#define TIF_LBT_CTX_LIVE 28 /* LBT context must be preserved */
+
+#define _TIF_NOHZ BIT(TIF_NOHZ)
+#define _TIF_USEDFPU BIT(TIF_USEDFPU)
+#define _TIF_USEDSIMD BIT(TIF_USEDSIMD)
+#define _TIF_FIXADE BIT(TIF_FIXADE)
+#define _TIF_LOGADE BIT(TIF_LOGADE)
+#define _TIF_32BIT_REGS BIT(TIF_32BIT_REGS)
+#define _TIF_32BIT_ADDR BIT(TIF_32BIT_ADDR)
+#define _TIF_LOAD_WATCH BIT(TIF_LOAD_WATCH)
+#define _TIF_SINGLESTEP BIT(TIF_SINGLESTEP)
+#define _TIF_LSX_CTX_LIVE BIT(TIF_LSX_CTX_LIVE)
+#define _TIF_LASX_CTX_LIVE BIT(TIF_LASX_CTX_LIVE)
+#define _TIF_USEDLBT BIT(TIF_USEDLBT)
+#define _TIF_LBT_CTX_LIVE BIT(TIF_LBT_CTX_LIVE)
#endif /* __KERNEL__ */
#endif /* _ASM_THREAD_INFO_H */
The following commit has been merged into the core/core branch of tip:
Commit-ID: f9629891d407e455dc1334e1594108c73074fe8b
Gitweb: https://git.kernel.org/tip/f9629891d407e455dc1334e1594108c73074fe8b
Author: Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Mon, 08 Sep 2025 23:32:36 +02:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Wed, 17 Sep 2025 08:14:04 +02:00
loongarch: Use generic TIF bits
No point in defining generic items and the upcoming RSEQ optimizations are
only available with this _and_ the generic entry infrastructure, which is
already used by loongarch. So no further action required here.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/loongarch/Kconfig | 1 +-
arch/loongarch/include/asm/thread_info.h | 76 ++++++++++-------------
2 files changed, 35 insertions(+), 42 deletions(-)
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index f0abc38..2e90d86 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -140,6 +140,7 @@ config LOONGARCH
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
select HAVE_EXIT_THREAD
+ select HAVE_GENERIC_TIF_BITS
select HAVE_GUP_FAST
select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FUNCTION_ARG_ACCESS_API
diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h
index 9dfa2ef..def7cb1 100644
--- a/arch/loongarch/include/asm/thread_info.h
+++ b/arch/loongarch/include/asm/thread_info.h
@@ -65,50 +65,42 @@ register unsigned long current_stack_pointer __asm__("$sp");
* access
* - pending work-to-be-done flags are in LSW
* - other flags in MSW
+ *
+ * Tell the generic TIF infrastructure which special bits loongarch supports
*/
-#define TIF_NEED_RESCHED 0 /* rescheduling necessary */
-#define TIF_NEED_RESCHED_LAZY 1 /* lazy rescheduling necessary */
-#define TIF_SIGPENDING 2 /* signal pending */
-#define TIF_NOTIFY_RESUME 3 /* callback before returning to user */
-#define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */
-#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
-#define TIF_NOHZ 6 /* in adaptive nohz mode */
-#define TIF_UPROBE 7 /* breakpointed or singlestepping */
-#define TIF_USEDFPU 8 /* FPU was used by this task this quantum (SMP) */
-#define TIF_USEDSIMD 9 /* SIMD has been used this quantum */
-#define TIF_MEMDIE 10 /* is terminating due to OOM killer */
-#define TIF_FIXADE 11 /* Fix address errors in software */
-#define TIF_LOGADE 12 /* Log address errors to syslog */
-#define TIF_32BIT_REGS 13 /* 32-bit general purpose registers */
-#define TIF_32BIT_ADDR 14 /* 32-bit address space */
-#define TIF_LOAD_WATCH 15 /* If set, load watch registers */
-#define TIF_SINGLESTEP 16 /* Single Step */
-#define TIF_LSX_CTX_LIVE 17 /* LSX context must be preserved */
-#define TIF_LASX_CTX_LIVE 18 /* LASX context must be preserved */
-#define TIF_USEDLBT 19 /* LBT was used by this task this quantum (SMP) */
-#define TIF_LBT_CTX_LIVE 20 /* LBT context must be preserved */
-#define TIF_PATCH_PENDING 21 /* pending live patching update */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_RESTORE_SIGMASK
+
+#include <asm-generic/thread_info_tif.h>
+
+/* Architecture specific bits */
+#define TIF_NOHZ 16 /* in adaptive nohz mode */
+#define TIF_USEDFPU 17 /* FPU was used by this task this quantum (SMP) */
+#define TIF_USEDSIMD 18 /* SIMD has been used this quantum */
+#define TIF_FIXADE 10 /* Fix address errors in software */
+#define TIF_LOGADE 20 /* Log address errors to syslog */
+#define TIF_32BIT_REGS 21 /* 32-bit general purpose registers */
+#define TIF_32BIT_ADDR 22 /* 32-bit address space */
+#define TIF_LOAD_WATCH 23 /* If set, load watch registers */
+#define TIF_SINGLESTEP 24 /* Single Step */
+#define TIF_LSX_CTX_LIVE 25 /* LSX context must be preserved */
+#define TIF_LASX_CTX_LIVE 26 /* LASX context must be preserved */
+#define TIF_USEDLBT 27 /* LBT was used by this task this quantum (SMP) */
+#define TIF_LBT_CTX_LIVE 28 /* LBT context must be preserved */
-#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
-#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
-#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
-#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
-#define _TIF_NOTIFY_SIGNAL (1<<TIF_NOTIFY_SIGNAL)
-#define _TIF_NOHZ (1<<TIF_NOHZ)
-#define _TIF_UPROBE (1<<TIF_UPROBE)
-#define _TIF_USEDFPU (1<<TIF_USEDFPU)
-#define _TIF_USEDSIMD (1<<TIF_USEDSIMD)
-#define _TIF_FIXADE (1<<TIF_FIXADE)
-#define _TIF_LOGADE (1<<TIF_LOGADE)
-#define _TIF_32BIT_REGS (1<<TIF_32BIT_REGS)
-#define _TIF_32BIT_ADDR (1<<TIF_32BIT_ADDR)
-#define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH)
-#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
-#define _TIF_LSX_CTX_LIVE (1<<TIF_LSX_CTX_LIVE)
-#define _TIF_LASX_CTX_LIVE (1<<TIF_LASX_CTX_LIVE)
-#define _TIF_USEDLBT (1<<TIF_USEDLBT)
-#define _TIF_LBT_CTX_LIVE (1<<TIF_LBT_CTX_LIVE)
-#define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING)
+#define _TIF_NOHZ BIT(TIF_NOHZ)
+#define _TIF_USEDFPU BIT(TIF_USEDFPU)
+#define _TIF_USEDSIMD BIT(TIF_USEDSIMD)
+#define _TIF_FIXADE BIT(TIF_FIXADE)
+#define _TIF_LOGADE BIT(TIF_LOGADE)
+#define _TIF_32BIT_REGS BIT(TIF_32BIT_REGS)
+#define _TIF_32BIT_ADDR BIT(TIF_32BIT_ADDR)
+#define _TIF_LOAD_WATCH BIT(TIF_LOAD_WATCH)
+#define _TIF_SINGLESTEP BIT(TIF_SINGLESTEP)
+#define _TIF_LSX_CTX_LIVE BIT(TIF_LSX_CTX_LIVE)
+#define _TIF_LASX_CTX_LIVE BIT(TIF_LASX_CTX_LIVE)
+#define _TIF_USEDLBT BIT(TIF_USEDLBT)
+#define _TIF_LBT_CTX_LIVE BIT(TIF_LBT_CTX_LIVE)
#endif /* __KERNEL__ */
#endif /* _ASM_THREAD_INFO_H */
No point in defining generic items and the upcoming RSEQ optimizations are
only available with this _and_ the generic entry infrastructure, which is
already used by RISCV. So no further action required here.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
---
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/thread_info.h | 29 ++++++++++++-----------------
2 files changed, 13 insertions(+), 17 deletions(-)
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -161,6 +161,7 @@ config RISCV
select HAVE_FUNCTION_GRAPH_FREGS
select HAVE_FUNCTION_TRACER if !XIP_KERNEL && HAVE_DYNAMIC_FTRACE
select HAVE_EBPF_JIT if MMU
+ select HAVE_GENERIC_TIF_BITS
select HAVE_GUP_FAST if MMU
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -107,23 +107,18 @@ int arch_dup_task_struct(struct task_str
* - pending work-to-be-done flags are in lowest half-word
* - other flags in upper half-word(s)
*/
-#define TIF_NEED_RESCHED 0 /* rescheduling necessary */
-#define TIF_NEED_RESCHED_LAZY 1 /* Lazy rescheduling needed */
-#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
-#define TIF_SIGPENDING 3 /* signal pending */
-#define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */
-#define TIF_MEMDIE 5 /* is terminating due to OOM killer */
-#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
-#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
-#define TIF_32BIT 11 /* compat-mode 32bit process */
-#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */
-#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
-#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
-#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
-#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
-#define _TIF_UPROBE (1 << TIF_UPROBE)
-#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE)
+/*
+ * Tell the generic TIF infrastructure which bits riscv supports
+ */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_RESTORE_SIGMASK
+
+#include <asm-generic/thread_info_tif.h>
+
+#define TIF_32BIT 16 /* compat-mode 32bit process */
+#define TIF_RISCV_V_DEFER_RESTORE 17 /* restore Vector before returing to user */
+
+#define _TIF_RISCV_V_DEFER_RESTORE BIT(TIF_RISCV_V_DEFER_RESTORE)
#endif /* _ASM_RISCV_THREAD_INFO_H */
The following commit has been merged into the core/core branch of tip:
Commit-ID: 41e871f2b63edaf8da20907d512cd5d91ba51476
Gitweb: https://git.kernel.org/tip/41e871f2b63edaf8da20907d512cd5d91ba51476
Author: Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Mon, 08 Sep 2025 23:32:38 +02:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Wed, 17 Sep 2025 08:14:05 +02:00
riscv: Use generic TIF bits
No point in defining generic items and the upcoming RSEQ optimizations are
only available with this _and_ the generic entry infrastructure, which is
already used by RISCV. So no further action required here.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/riscv/Kconfig | 1 +-
arch/riscv/include/asm/thread_info.h | 31 +++++++++++----------------
2 files changed, 14 insertions(+), 18 deletions(-)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 51dcd8e..0c28061 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -161,6 +161,7 @@ config RISCV
select HAVE_FUNCTION_GRAPH_FREGS
select HAVE_FUNCTION_TRACER if !XIP_KERNEL && HAVE_DYNAMIC_FTRACE
select HAVE_EBPF_JIT if MMU
+ select HAVE_GENERIC_TIF_BITS
select HAVE_GUP_FAST if MMU
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index f5916a7..a315b02 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -107,23 +107,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
* - pending work-to-be-done flags are in lowest half-word
* - other flags in upper half-word(s)
*/
-#define TIF_NEED_RESCHED 0 /* rescheduling necessary */
-#define TIF_NEED_RESCHED_LAZY 1 /* Lazy rescheduling needed */
-#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
-#define TIF_SIGPENDING 3 /* signal pending */
-#define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */
-#define TIF_MEMDIE 5 /* is terminating due to OOM killer */
-#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
-#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
-#define TIF_32BIT 11 /* compat-mode 32bit process */
-#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */
-
-#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
-#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
-#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
-#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
-#define _TIF_UPROBE (1 << TIF_UPROBE)
-#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE)
+
+/*
+ * Tell the generic TIF infrastructure which bits riscv supports
+ */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_RESTORE_SIGMASK
+
+#include <asm-generic/thread_info_tif.h>
+
+#define TIF_32BIT 16 /* compat-mode 32bit process */
+#define TIF_RISCV_V_DEFER_RESTORE 17 /* restore Vector before returing to user */
+
+#define _TIF_RISCV_V_DEFER_RESTORE BIT(TIF_RISCV_V_DEFER_RESTORE)
#endif /* _ASM_RISCV_THREAD_INFO_H */
TIF_NOTIFY_RESUME is a multiplexing TIF bit, which is suboptimal especially
with the RSEQ fast path depending on it, but not really handling it.
Define a seperate TIF_RSEQ in the generic TIF space and enable the full
seperation of fast and slow path for architectures which utilize that.
That avoids the hassle with invocations of resume_user_mode_work() from
hypervisors, which clear TIF_NOTIFY_RESUME. It makes the therefore required
re-evaluation at the end of vcpu_run() a NOOP on architectures which
utilize the generic TIF space and have a seperate TIF_RSEQ.
The hypervisor TIF handling does not include the seperate TIF_RSEQ as there
is no point in doing so. The guest does neither know nor care about the VMM
host applications RSEQ state. That state is only relevant when the ioctl()
returns to user space.
The fastpath implementation still utilizes TIF_NOTIFY_RESUME for failure
handling, but this only happens within exit_to_user_mode_loop(), so
arguably the hypervisor ioctl() code is long done when this happens.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
V4: Adjust it to the new outer loop mechanism
V3: Updated the comment for rseq_virt_userspace_exit() - Sean
Added a static assert for TIF_RSEQ != TIF_NOTIFY_RESUME - Sean
---
include/asm-generic/thread_info_tif.h | 3 +++
include/linux/irq-entry-common.h | 2 +-
include/linux/rseq.h | 32 ++++++++++++++++++++++++--------
include/linux/rseq_entry.h | 29 +++++++++++++++++++++++++++--
include/linux/thread_info.h | 5 +++++
kernel/entry/common.c | 10 ++++++++--
6 files changed, 68 insertions(+), 13 deletions(-)
--- a/include/asm-generic/thread_info_tif.h
+++ b/include/asm-generic/thread_info_tif.h
@@ -45,4 +45,7 @@
# define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK)
#endif
+#define TIF_RSEQ 11 // Run RSEQ fast path
+#define _TIF_RSEQ BIT(TIF_RSEQ)
+
#endif /* _ASM_GENERIC_THREAD_INFO_TIF_H_ */
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -30,7 +30,7 @@
#define EXIT_TO_USER_MODE_WORK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
- _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
+ _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | _TIF_RSEQ | \
ARCH_EXIT_TO_USER_MODE_WORK)
/**
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -40,7 +40,7 @@ static inline void rseq_signal_deliver(s
static inline void rseq_raise_notify_resume(struct task_struct *t)
{
- set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+ set_tsk_thread_flag(t, TIF_RSEQ);
}
/* Invoked from context switch to force evaluation on exit to user */
@@ -112,17 +112,25 @@ static inline void rseq_force_update(voi
/*
* KVM/HYPERV invoke resume_user_mode_work() before entering guest mode,
- * which clears TIF_NOTIFY_RESUME. To avoid updating user space RSEQ in
- * that case just to do it eventually again before returning to user space,
- * the entry resume_user_mode_work() invocation is ignored as the register
- * argument is NULL.
+ * which clears TIF_NOTIFY_RESUME on architectures that don't use the
+ * generic TIF bits and therefore can't provide a separate TIF_RSEQ flag.
*
- * After returning from guest mode, they have to invoke this function to
- * re-raise TIF_NOTIFY_RESUME if necessary.
+ * To avoid updating user space RSEQ in that case just to do it eventually
+ * again before returning to user space, because __rseq_handle_slowpath()
+ * does nothing when invoked with NULL register state.
+ *
+ * After returning from guest mode, before exiting to userspace, hypervisors
+ * must invoke this function to re-raise TIF_NOTIFY_RESUME if necessary.
*/
static inline void rseq_virt_userspace_exit(void)
{
if (current->rseq.event.sched_switch)
+ /*
+ * The generic optimization for deferring RSEQ updates until the next
+ * exit relies on having a dedicated TIF_RSEQ.
+ */
+ if (!IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS) &&
+ current->rseq.event.sched_switch)
rseq_raise_notify_resume(current);
}
@@ -151,8 +159,16 @@ static inline void rseq_fork(struct task
/*
* If it has rseq, force it into the slow path right away
* because it is guaranteed to fault.
+ *
+ * Setting TIF_NOTIFY_RESUME is redundant but harmless for
+ * architectures which do not have a seperate TIF_RSEQ, but
+ * for those who do it's required to enforce the slow path
+ * as the scheduler sets only TIF_RSEQ.
*/
- t->rseq.event.slowpath = t->rseq.event.has_rseq;
+ if (t->rseq.event.has_rseq) {
+ t->rseq.event.slowpath = true;
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+ }
}
}
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -522,11 +522,36 @@ static __always_inline bool __rseq_exit_
return true;
}
-static __always_inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs)
+/*
+ * Required to allow conversion to GENERIC_ENTRY w/o GENERIC_TIF_BITS
+ * as that's not upstream yet.
+ */
+#ifdef CONFIG_HAVE_GENERIC_TIF_BITS
+static __always_inline bool test_tif_rseq(unsigned long ti_work)
{
+ return ti_work & _TIF_RSEQ;
+}
+
+static __always_inline void clear_tif_rseq(void)
+{
+ static_assert(TIF_RSEQ != TIF_NOTIFY_RESUME);
+ clear_thread_flag(TIF_RSEQ);
+}
+#else
+static __always_inline bool test_tif_rseq(unsigned long ti_work) { return true; }
+static __always_inline void clear_tif_rseq(void) { }
+#endif
+
+static __always_inline bool
+rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
+{
+ if (likely(!test_tif_rseq(ti_work)))
+ return false;
+
if (unlikely(__rseq_exit_to_user_mode_restart(regs)))
return true;
+ clear_tif_rseq();
return false;
}
@@ -570,7 +595,7 @@ static inline void rseq_debug_syscall_re
}
#else /* CONFIG_RSEQ */
static inline void rseq_note_user_irq_entry(void) { }
-static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs)
+static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
{
return false;
}
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -67,6 +67,11 @@ enum syscall_work_bit {
#define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED
#endif
+#ifndef TIF_RSEQ
+# define TIF_RSEQ TIF_NOTIFY_RESUME
+# define _TIF_RSEQ _TIF_NOTIFY_RESUME
+#endif
+
#ifdef __KERNEL__
#ifndef arch_set_restart_data
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -11,6 +11,12 @@
/* Workaround to allow gradual conversion of architecture code */
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
+#ifdef CONFIG_HAVE_GENERIC_TIF_BITS
+#define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK & ~_TIF_RSEQ)
+#else
+#define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK)
+#endif
+
static __always_inline unsigned long __exit_to_user_mode_loop(struct pt_regs *regs,
unsigned long ti_work)
{
@@ -18,7 +24,7 @@ static __always_inline unsigned long __e
* Before returning to user space ensure that all pending work
* items have been completed.
*/
- while (ti_work & EXIT_TO_USER_MODE_WORK) {
+ while (ti_work & EXIT_TO_USER_MODE_WORK_LOOP) {
local_irq_enable_exit_to_user(ti_work);
@@ -68,7 +74,7 @@ static __always_inline unsigned long __e
for (;;) {
ti_work = __exit_to_user_mode_loop(regs, ti_work);
- if (likely(!rseq_exit_to_user_mode_restart(regs)))
+ if (likely(!rseq_exit_to_user_mode_restart(regs, ti_work)))
return ti_work;
ti_work = read_thread_flags();
}
© 2016 - 2026 Red Hat, Inc.