zicfiss / zicfilp introduces a new exception to priv isa `software check
exception` with cause code = 18. This patch implements software check
exception.
Additionally it implements a cfi violation handler which checks for code
in xtval. If xtval=2, it means that sw check exception happened because of
an indirect branch not landing on 4 byte aligned PC or not landing on
`lpad` instruction or label value embedded in `lpad` not matching label
value setup in `x7`. If xtval=3, it means that sw check exception happened
because of mismatch between link register (x1 or x5) and top of shadow
stack (on execution of `sspopchk`).
In case of cfi violation, SIGSEGV is raised with code=SEGV_CPERR.
SEGV_CPERR was introduced by x86 shadow stack patches.
To keep uprobes working, handle the uprobe event first before reporting
the CFI violation in software-check exception handler. Because when the
landing pad is activated, if the uprobe point is set at the lpad
instruction at the beginning of a function, the system triggers a software
-check exception instead of an ebreak exception due to the exception
priority, then uprobe can't work successfully.
Co-developed-by: Zong Li <zong.li@sifive.com>
Reviewed-by: Zong Li <zong.li@sifive.com>
Signed-off-by: Zong Li <zong.li@sifive.com>
Signed-off-by: Deepak Gupta <debug@rivosinc.com>
---
arch/riscv/include/asm/asm-prototypes.h | 1 +
arch/riscv/include/asm/entry-common.h | 2 ++
arch/riscv/kernel/entry.S | 3 ++
arch/riscv/kernel/traps.c | 51 +++++++++++++++++++++++++++++++++
4 files changed, 57 insertions(+)
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index cd627ec289f1..5a27cefd7805 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -51,6 +51,7 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_u);
DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
DECLARE_DO_ERROR_INFO(do_trap_break);
+DECLARE_DO_ERROR_INFO(do_trap_software_check);
asmlinkage void handle_bad_stack(struct pt_regs *regs);
asmlinkage void do_page_fault(struct pt_regs *regs);
diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h
index b28ccc6cdeea..34ed149af5d1 100644
--- a/arch/riscv/include/asm/entry-common.h
+++ b/arch/riscv/include/asm/entry-common.h
@@ -40,4 +40,6 @@ static inline int handle_misaligned_store(struct pt_regs *regs)
}
#endif
+bool handle_user_cfi_violation(struct pt_regs *regs);
+
#endif /* _ASM_RISCV_ENTRY_COMMON_H */
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 978115567bca..8d25837a9384 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -474,6 +474,9 @@ SYM_DATA_START_LOCAL(excp_vect_table)
RISCV_PTR do_page_fault /* load page fault */
RISCV_PTR do_trap_unknown
RISCV_PTR do_page_fault /* store page fault */
+ RISCV_PTR do_trap_unknown /* cause=16 */
+ RISCV_PTR do_trap_unknown /* cause=17 */
+ RISCV_PTR do_trap_software_check /* cause=18 is sw check exception */
SYM_DATA_END_LABEL(excp_vect_table, SYM_L_LOCAL, excp_vect_table_end)
#ifndef CONFIG_MMU
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 8ff8e8b36524..64388370e1ad 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -354,6 +354,57 @@ void do_trap_ecall_u(struct pt_regs *regs)
}
+#define CFI_TVAL_FCFI_CODE 2
+#define CFI_TVAL_BCFI_CODE 3
+/* handle cfi violations */
+bool handle_user_cfi_violation(struct pt_regs *regs)
+{
+ unsigned long tval = csr_read(CSR_TVAL);
+ bool is_fcfi = (tval == CFI_TVAL_FCFI_CODE && cpu_supports_indirect_br_lp_instr());
+ bool is_bcfi = (tval == CFI_TVAL_BCFI_CODE && cpu_supports_shadow_stack());
+
+ /*
+ * Handle uprobe event first. The probe point can be a valid target
+ * of indirect jumps or calls, in this case, forward cfi violation
+ * will be triggered instead of breakpoint exception.
+ */
+ if (is_fcfi && probe_breakpoint_handler(regs))
+ return true;
+
+ if (is_fcfi || is_bcfi) {
+ do_trap_error(regs, SIGSEGV, SEGV_CPERR, regs->epc,
+ "Oops - control flow violation");
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * software check exception is defined with risc-v cfi spec. Software check
+ * exception is raised when:-
+ * a) An indirect branch doesn't land on 4 byte aligned PC or `lpad`
+ * instruction or `label` value programmed in `lpad` instr doesn't
+ * match with value setup in `x7`. reported code in `xtval` is 2.
+ * b) `sspopchk` instruction finds a mismatch between top of shadow stack (ssp)
+ * and x1/x5. reported code in `xtval` is 3.
+ */
+asmlinkage __visible __trap_section void do_trap_software_check(struct pt_regs *regs)
+{
+ if (user_mode(regs)) {
+ irqentry_enter_from_user_mode(regs);
+
+ /* not a cfi violation, then merge into flow of unknown trap handler */
+ if (!handle_user_cfi_violation(regs))
+ do_trap_unknown(regs);
+
+ irqentry_exit_to_user_mode(regs);
+ } else {
+ /* sw check exception coming from kernel is a bug in kernel */
+ die(regs, "Kernel BUG");
+ }
+}
+
#ifdef CONFIG_MMU
asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs)
{
--
2.43.0
On Thu, Jun 5, 2025 at 1:17 AM Deepak Gupta <debug@rivosinc.com> wrote: > > zicfiss / zicfilp introduces a new exception to priv isa `software check > exception` with cause code = 18. This patch implements software check > exception. > > Additionally it implements a cfi violation handler which checks for code > in xtval. If xtval=2, it means that sw check exception happened because of > an indirect branch not landing on 4 byte aligned PC or not landing on > `lpad` instruction or label value embedded in `lpad` not matching label > value setup in `x7`. If xtval=3, it means that sw check exception happened > because of mismatch between link register (x1 or x5) and top of shadow > stack (on execution of `sspopchk`). > > In case of cfi violation, SIGSEGV is raised with code=SEGV_CPERR. > SEGV_CPERR was introduced by x86 shadow stack patches. > > To keep uprobes working, handle the uprobe event first before reporting > the CFI violation in software-check exception handler. Because when the > landing pad is activated, if the uprobe point is set at the lpad > instruction at the beginning of a function, the system triggers a software > -check exception instead of an ebreak exception due to the exception > priority, then uprobe can't work successfully. > > Co-developed-by: Zong Li <zong.li@sifive.com> > Reviewed-by: Zong Li <zong.li@sifive.com> > Signed-off-by: Zong Li <zong.li@sifive.com> > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > --- > arch/riscv/include/asm/asm-prototypes.h | 1 + > arch/riscv/include/asm/entry-common.h | 2 ++ > arch/riscv/kernel/entry.S | 3 ++ > arch/riscv/kernel/traps.c | 51 +++++++++++++++++++++++++++++++++ > 4 files changed, 57 insertions(+) > > diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h > index cd627ec289f1..5a27cefd7805 100644 > --- a/arch/riscv/include/asm/asm-prototypes.h > +++ b/arch/riscv/include/asm/asm-prototypes.h > @@ -51,6 +51,7 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_u); > DECLARE_DO_ERROR_INFO(do_trap_ecall_s); > DECLARE_DO_ERROR_INFO(do_trap_ecall_m); > DECLARE_DO_ERROR_INFO(do_trap_break); > +DECLARE_DO_ERROR_INFO(do_trap_software_check); > > asmlinkage void handle_bad_stack(struct pt_regs *regs); > asmlinkage void do_page_fault(struct pt_regs *regs); > diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h > index b28ccc6cdeea..34ed149af5d1 100644 > --- a/arch/riscv/include/asm/entry-common.h > +++ b/arch/riscv/include/asm/entry-common.h > @@ -40,4 +40,6 @@ static inline int handle_misaligned_store(struct pt_regs *regs) > } > #endif > > +bool handle_user_cfi_violation(struct pt_regs *regs); > + > #endif /* _ASM_RISCV_ENTRY_COMMON_H */ > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > index 978115567bca..8d25837a9384 100644 > --- a/arch/riscv/kernel/entry.S > +++ b/arch/riscv/kernel/entry.S > @@ -474,6 +474,9 @@ SYM_DATA_START_LOCAL(excp_vect_table) > RISCV_PTR do_page_fault /* load page fault */ > RISCV_PTR do_trap_unknown > RISCV_PTR do_page_fault /* store page fault */ > + RISCV_PTR do_trap_unknown /* cause=16 */ > + RISCV_PTR do_trap_unknown /* cause=17 */ > + RISCV_PTR do_trap_software_check /* cause=18 is sw check exception */ > SYM_DATA_END_LABEL(excp_vect_table, SYM_L_LOCAL, excp_vect_table_end) > > #ifndef CONFIG_MMU > diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c > index 8ff8e8b36524..64388370e1ad 100644 > --- a/arch/riscv/kernel/traps.c > +++ b/arch/riscv/kernel/traps.c > @@ -354,6 +354,57 @@ void do_trap_ecall_u(struct pt_regs *regs) > > } > > +#define CFI_TVAL_FCFI_CODE 2 > +#define CFI_TVAL_BCFI_CODE 3 > +/* handle cfi violations */ > +bool handle_user_cfi_violation(struct pt_regs *regs) > +{ > + unsigned long tval = csr_read(CSR_TVAL); > + bool is_fcfi = (tval == CFI_TVAL_FCFI_CODE && cpu_supports_indirect_br_lp_instr()); > + bool is_bcfi = (tval == CFI_TVAL_BCFI_CODE && cpu_supports_shadow_stack()); > + > + /* > + * Handle uprobe event first. The probe point can be a valid target > + * of indirect jumps or calls, in this case, forward cfi violation > + * will be triggered instead of breakpoint exception. > + */ > + if (is_fcfi && probe_breakpoint_handler(regs)) > + return true; Hi Deepak, Sorry for missing something earlier. I think we would like to clear sstatus.SPELP in the uprobe handling case. For example: diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index c2ea999c1167..e8492bb57e09 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -349,8 +349,10 @@ bool handle_user_cfi_violation(struct pt_regs *regs) bool is_fcfi = (tval == CFI_TVAL_FCFI_CODE && cpu_supports_indirect_br_lp_instr()); bool is_bcfi = (tval == CFI_TVAL_BCFI_CODE && cpu_supports_shadow_stack()); - if (is_fcfi && probe_breakpoint_handler(regs)) + if (is_fcfi && probe_breakpoint_handler(regs)) { + regs->status = regs->status & ~SR_ELP; return true; + } if (is_fcfi || is_bcfi) { do_trap_error(regs, SIGSEGV, SEGV_CPERR, regs->epc, When a user mode CFI violation occurs, the ELP state should be 1, and the system traps into supervisor mode. During this trap, sstatus.SPELP is set to 1, and the ELP state is reset to 0. If we don’t clear sstatus.SPELP, the ELP state will become 1 again after executing the sret instruction. As a result, the system might trigger another forward CFI violation upon executing the next instruction in the user program, unless it happens to be a lpad instruction. The previous patch was tested on QEMU, but QEMU does not set the sstatus.SPELP bit to 1 when a forward CFI violation occurs. Therefore, I suspect that QEMU might also require some fixes. Thanks > + > + if (is_fcfi || is_bcfi) { > + do_trap_error(regs, SIGSEGV, SEGV_CPERR, regs->epc, > + "Oops - control flow violation"); > + return true; > + } > + > + return false; > +} > + > +/* > + * software check exception is defined with risc-v cfi spec. Software check > + * exception is raised when:- > + * a) An indirect branch doesn't land on 4 byte aligned PC or `lpad` > + * instruction or `label` value programmed in `lpad` instr doesn't > + * match with value setup in `x7`. reported code in `xtval` is 2. > + * b) `sspopchk` instruction finds a mismatch between top of shadow stack (ssp) > + * and x1/x5. reported code in `xtval` is 3. > + */ > +asmlinkage __visible __trap_section void do_trap_software_check(struct pt_regs *regs) > +{ > + if (user_mode(regs)) { > + irqentry_enter_from_user_mode(regs); > + > + /* not a cfi violation, then merge into flow of unknown trap handler */ > + if (!handle_user_cfi_violation(regs)) > + do_trap_unknown(regs); > + > + irqentry_exit_to_user_mode(regs); > + } else { > + /* sw check exception coming from kernel is a bug in kernel */ > + die(regs, "Kernel BUG"); > + } > +} > + > #ifdef CONFIG_MMU > asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs) > { > > -- > 2.43.0 >
On Mon, Jun 16, 2025 at 3:31 PM Zong Li <zong.li@sifive.com> wrote: > > On Thu, Jun 5, 2025 at 1:17 AM Deepak Gupta <debug@rivosinc.com> wrote: > > > > zicfiss / zicfilp introduces a new exception to priv isa `software check > > exception` with cause code = 18. This patch implements software check > > exception. > > > > Additionally it implements a cfi violation handler which checks for code > > in xtval. If xtval=2, it means that sw check exception happened because of > > an indirect branch not landing on 4 byte aligned PC or not landing on > > `lpad` instruction or label value embedded in `lpad` not matching label > > value setup in `x7`. If xtval=3, it means that sw check exception happened > > because of mismatch between link register (x1 or x5) and top of shadow > > stack (on execution of `sspopchk`). > > > > In case of cfi violation, SIGSEGV is raised with code=SEGV_CPERR. > > SEGV_CPERR was introduced by x86 shadow stack patches. > > > > To keep uprobes working, handle the uprobe event first before reporting > > the CFI violation in software-check exception handler. Because when the > > landing pad is activated, if the uprobe point is set at the lpad > > instruction at the beginning of a function, the system triggers a software > > -check exception instead of an ebreak exception due to the exception > > priority, then uprobe can't work successfully. > > > > Co-developed-by: Zong Li <zong.li@sifive.com> > > Reviewed-by: Zong Li <zong.li@sifive.com> > > Signed-off-by: Zong Li <zong.li@sifive.com> > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > > --- > > arch/riscv/include/asm/asm-prototypes.h | 1 + > > arch/riscv/include/asm/entry-common.h | 2 ++ > > arch/riscv/kernel/entry.S | 3 ++ > > arch/riscv/kernel/traps.c | 51 +++++++++++++++++++++++++++++++++ > > 4 files changed, 57 insertions(+) > > > > diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h > > index cd627ec289f1..5a27cefd7805 100644 > > --- a/arch/riscv/include/asm/asm-prototypes.h > > +++ b/arch/riscv/include/asm/asm-prototypes.h > > @@ -51,6 +51,7 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_u); > > DECLARE_DO_ERROR_INFO(do_trap_ecall_s); > > DECLARE_DO_ERROR_INFO(do_trap_ecall_m); > > DECLARE_DO_ERROR_INFO(do_trap_break); > > +DECLARE_DO_ERROR_INFO(do_trap_software_check); > > > > asmlinkage void handle_bad_stack(struct pt_regs *regs); > > asmlinkage void do_page_fault(struct pt_regs *regs); > > diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h > > index b28ccc6cdeea..34ed149af5d1 100644 > > --- a/arch/riscv/include/asm/entry-common.h > > +++ b/arch/riscv/include/asm/entry-common.h > > @@ -40,4 +40,6 @@ static inline int handle_misaligned_store(struct pt_regs *regs) > > } > > #endif > > > > +bool handle_user_cfi_violation(struct pt_regs *regs); > > + > > #endif /* _ASM_RISCV_ENTRY_COMMON_H */ > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > > index 978115567bca..8d25837a9384 100644 > > --- a/arch/riscv/kernel/entry.S > > +++ b/arch/riscv/kernel/entry.S > > @@ -474,6 +474,9 @@ SYM_DATA_START_LOCAL(excp_vect_table) > > RISCV_PTR do_page_fault /* load page fault */ > > RISCV_PTR do_trap_unknown > > RISCV_PTR do_page_fault /* store page fault */ > > + RISCV_PTR do_trap_unknown /* cause=16 */ > > + RISCV_PTR do_trap_unknown /* cause=17 */ > > + RISCV_PTR do_trap_software_check /* cause=18 is sw check exception */ > > SYM_DATA_END_LABEL(excp_vect_table, SYM_L_LOCAL, excp_vect_table_end) > > > > #ifndef CONFIG_MMU > > diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c > > index 8ff8e8b36524..64388370e1ad 100644 > > --- a/arch/riscv/kernel/traps.c > > +++ b/arch/riscv/kernel/traps.c > > @@ -354,6 +354,57 @@ void do_trap_ecall_u(struct pt_regs *regs) > > > > } > > > > +#define CFI_TVAL_FCFI_CODE 2 > > +#define CFI_TVAL_BCFI_CODE 3 > > +/* handle cfi violations */ > > +bool handle_user_cfi_violation(struct pt_regs *regs) > > +{ > > + unsigned long tval = csr_read(CSR_TVAL); > > + bool is_fcfi = (tval == CFI_TVAL_FCFI_CODE && cpu_supports_indirect_br_lp_instr()); > > + bool is_bcfi = (tval == CFI_TVAL_BCFI_CODE && cpu_supports_shadow_stack()); > > + > > + /* > > + * Handle uprobe event first. The probe point can be a valid target > > + * of indirect jumps or calls, in this case, forward cfi violation > > + * will be triggered instead of breakpoint exception. > > + */ > > + if (is_fcfi && probe_breakpoint_handler(regs)) > > + return true; > > Hi Deepak, > Sorry for missing something earlier. I think we would like to clear > sstatus.SPELP in the uprobe handling case. For example: > > diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c > index c2ea999c1167..e8492bb57e09 100644 > --- a/arch/riscv/kernel/traps.c > +++ b/arch/riscv/kernel/traps.c > @@ -349,8 +349,10 @@ bool handle_user_cfi_violation(struct pt_regs *regs) > bool is_fcfi = (tval == CFI_TVAL_FCFI_CODE && > cpu_supports_indirect_br_lp_instr()); > bool is_bcfi = (tval == CFI_TVAL_BCFI_CODE && > cpu_supports_shadow_stack()); > > - if (is_fcfi && probe_breakpoint_handler(regs)) > + if (is_fcfi && probe_breakpoint_handler(regs)) { > + regs->status = regs->status & ~SR_ELP; > return true; > + } > > if (is_fcfi || is_bcfi) { > do_trap_error(regs, SIGSEGV, SEGV_CPERR, regs->epc, > > > When a user mode CFI violation occurs, the ELP state should be 1, and > the system traps into supervisor mode. During this trap, sstatus.SPELP > is set to 1, and the ELP state is reset to 0. If we don’t clear > sstatus.SPELP, the ELP state will become 1 again after executing the > sret instruction. As a result, the system might trigger another > forward CFI violation upon executing the next instruction in the user > program, unless it happens to be a lpad instruction. > > The previous patch was tested on QEMU, but QEMU does not set the > sstatus.SPELP bit to 1 when a forward CFI violation occurs. Therefore, > I suspect that QEMU might also require some fixes. Hi Deepak, The issue with QEMU was that the sw-check exception bit in medeleg couldn't be set. This has been fixed in the latest QEMU mainline. I have re-tested the latest QEMU version, and it works. > > Thanks > > > + > > + if (is_fcfi || is_bcfi) { > > + do_trap_error(regs, SIGSEGV, SEGV_CPERR, regs->epc, > > + "Oops - control flow violation"); > > + return true; > > + } > > + > > + return false; > > +} > > + > > +/* > > + * software check exception is defined with risc-v cfi spec. Software check > > + * exception is raised when:- > > + * a) An indirect branch doesn't land on 4 byte aligned PC or `lpad` > > + * instruction or `label` value programmed in `lpad` instr doesn't > > + * match with value setup in `x7`. reported code in `xtval` is 2. > > + * b) `sspopchk` instruction finds a mismatch between top of shadow stack (ssp) > > + * and x1/x5. reported code in `xtval` is 3. > > + */ > > +asmlinkage __visible __trap_section void do_trap_software_check(struct pt_regs *regs) > > +{ > > + if (user_mode(regs)) { > > + irqentry_enter_from_user_mode(regs); > > + > > + /* not a cfi violation, then merge into flow of unknown trap handler */ > > + if (!handle_user_cfi_violation(regs)) > > + do_trap_unknown(regs); > > + > > + irqentry_exit_to_user_mode(regs); > > + } else { > > + /* sw check exception coming from kernel is a bug in kernel */ > > + die(regs, "Kernel BUG"); > > + } > > +} > > + > > #ifdef CONFIG_MMU > > asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs) > > { > > > > -- > > 2.43.0 > >
Hi Zong, On Thu, Jun 19, 2025 at 7:16 PM Zong Li <zong.li@sifive.com> wrote: > > On Mon, Jun 16, 2025 at 3:31 PM Zong Li <zong.li@sifive.com> wrote: > > > > On Thu, Jun 5, 2025 at 1:17 AM Deepak Gupta <debug@rivosinc.com> wrote: > > > > > > zicfiss / zicfilp introduces a new exception to priv isa `software check > > > exception` with cause code = 18. This patch implements software check > > > exception. > > > ..... > > When a user mode CFI violation occurs, the ELP state should be 1, and > > the system traps into supervisor mode. During this trap, sstatus.SPELP > > is set to 1, and the ELP state is reset to 0. If we don’t clear > > sstatus.SPELP, the ELP state will become 1 again after executing the > > sret instruction. As a result, the system might trigger another > > forward CFI violation upon executing the next instruction in the user > > program, unless it happens to be a lpad instruction. > > > > The previous patch was tested on QEMU, but QEMU does not set the > > sstatus.SPELP bit to 1 when a forward CFI violation occurs. Therefore, > > I suspect that QEMU might also require some fixes. > > Hi Deepak, > The issue with QEMU was that the sw-check exception bit in medeleg > couldn't be set. This has been fixed in the latest QEMU mainline. I > have re-tested the latest QEMU version, and it works. What was this issue, can you point me to the patch in mainline? > > > > > Thanks > > > > > + > > > + if (is_fcfi || is_bcfi) { > > > + do_trap_error(regs, SIGSEGV, SEGV_CPERR, regs->epc, > > > + "Oops - control flow violation"); > > > + return true; > > > + } > > > + > > > + return false; > > > +} > > > + > > > +/* > > > + * software check exception is defined with risc-v cfi spec. Software check > > > + * exception is raised when:- > > > + * a) An indirect branch doesn't land on 4 byte aligned PC or `lpad` > > > + * instruction or `label` value programmed in `lpad` instr doesn't > > > + * match with value setup in `x7`. reported code in `xtval` is 2. > > > + * b) `sspopchk` instruction finds a mismatch between top of shadow stack (ssp) > > > + * and x1/x5. reported code in `xtval` is 3. > > > + */ > > > +asmlinkage __visible __trap_section void do_trap_software_check(struct pt_regs *regs) > > > +{ > > > + if (user_mode(regs)) { > > > + irqentry_enter_from_user_mode(regs); > > > + > > > + /* not a cfi violation, then merge into flow of unknown trap handler */ > > > + if (!handle_user_cfi_violation(regs)) > > > + do_trap_unknown(regs); > > > + > > > + irqentry_exit_to_user_mode(regs); > > > + } else { > > > + /* sw check exception coming from kernel is a bug in kernel */ > > > + die(regs, "Kernel BUG"); > > > + } > > > +} > > > + > > > #ifdef CONFIG_MMU > > > asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs) > > > { > > > > > > -- > > > 2.43.0 > > >
On Wed, Jul 16, 2025 at 5:34 AM Deepak Gupta <debug@rivosinc.com> wrote: > > Hi Zong, > > > On Thu, Jun 19, 2025 at 7:16 PM Zong Li <zong.li@sifive.com> wrote: > > > > On Mon, Jun 16, 2025 at 3:31 PM Zong Li <zong.li@sifive.com> wrote: > > > > > > On Thu, Jun 5, 2025 at 1:17 AM Deepak Gupta <debug@rivosinc.com> wrote: > > > > > > > > zicfiss / zicfilp introduces a new exception to priv isa `software check > > > > exception` with cause code = 18. This patch implements software check > > > > exception. > > > > > ..... > > > > When a user mode CFI violation occurs, the ELP state should be 1, and > > > the system traps into supervisor mode. During this trap, sstatus.SPELP > > > is set to 1, and the ELP state is reset to 0. If we don’t clear > > > sstatus.SPELP, the ELP state will become 1 again after executing the > > > sret instruction. As a result, the system might trigger another > > > forward CFI violation upon executing the next instruction in the user > > > program, unless it happens to be a lpad instruction. > > > > > > The previous patch was tested on QEMU, but QEMU does not set the > > > sstatus.SPELP bit to 1 when a forward CFI violation occurs. Therefore, > > > I suspect that QEMU might also require some fixes. > > > > Hi Deepak, > > The issue with QEMU was that the sw-check exception bit in medeleg > > couldn't be set. This has been fixed in the latest QEMU mainline. I > > have re-tested the latest QEMU version, and it works. > > What was this issue, can you point me to the patch in mainline? Hi Deepak The issue was that my QEMU setup somehow missed the change of `target/riscv/csr.c` in your following patch: https://github.com/qemu/qemu/commit/6031102401ae8a69a87b20fbec2aae666625d96a After I upgraded to the latest QEMU source, I found the kernel issue if we didn't clear sstatus.SPELP in the handler Thanks > > > > > > > > > Thanks > > > > > > > + > > > > + if (is_fcfi || is_bcfi) { > > > > + do_trap_error(regs, SIGSEGV, SEGV_CPERR, regs->epc, > > > > + "Oops - control flow violation"); > > > > + return true; > > > > + } > > > > + > > > > + return false; > > > > +} > > > > + > > > > +/* > > > > + * software check exception is defined with risc-v cfi spec. Software check > > > > + * exception is raised when:- > > > > + * a) An indirect branch doesn't land on 4 byte aligned PC or `lpad` > > > > + * instruction or `label` value programmed in `lpad` instr doesn't > > > > + * match with value setup in `x7`. reported code in `xtval` is 2. > > > > + * b) `sspopchk` instruction finds a mismatch between top of shadow stack (ssp) > > > > + * and x1/x5. reported code in `xtval` is 3. > > > > + */ > > > > +asmlinkage __visible __trap_section void do_trap_software_check(struct pt_regs *regs) > > > > +{ > > > > + if (user_mode(regs)) { > > > > + irqentry_enter_from_user_mode(regs); > > > > + > > > > + /* not a cfi violation, then merge into flow of unknown trap handler */ > > > > + if (!handle_user_cfi_violation(regs)) > > > > + do_trap_unknown(regs); > > > > + > > > > + irqentry_exit_to_user_mode(regs); > > > > + } else { > > > > + /* sw check exception coming from kernel is a bug in kernel */ > > > > + die(regs, "Kernel BUG"); > > > > + } > > > > +} > > > > + > > > > #ifdef CONFIG_MMU > > > > asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs) > > > > { > > > > > > > > -- > > > > 2.43.0 > > > >
On Tue, Jul 15, 2025 at 7:06 PM Zong Li <zong.li@sifive.com> wrote: > > On Wed, Jul 16, 2025 at 5:34 AM Deepak Gupta <debug@rivosinc.com> wrote: > > > > Hi Zong, > > > > > > On Thu, Jun 19, 2025 at 7:16 PM Zong Li <zong.li@sifive.com> wrote: > > > > > > On Mon, Jun 16, 2025 at 3:31 PM Zong Li <zong.li@sifive.com> wrote: > > > > > > > > On Thu, Jun 5, 2025 at 1:17 AM Deepak Gupta <debug@rivosinc.com> wrote: > > > > > > > > > > zicfiss / zicfilp introduces a new exception to priv isa `software check > > > > > exception` with cause code = 18. This patch implements software check > > > > > exception. > > > > > > > ..... > > > > > > When a user mode CFI violation occurs, the ELP state should be 1, and > > > > the system traps into supervisor mode. During this trap, sstatus.SPELP > > > > is set to 1, and the ELP state is reset to 0. If we don’t clear > > > > sstatus.SPELP, the ELP state will become 1 again after executing the > > > > sret instruction. As a result, the system might trigger another > > > > forward CFI violation upon executing the next instruction in the user > > > > program, unless it happens to be a lpad instruction. > > > > > > > > The previous patch was tested on QEMU, but QEMU does not set the > > > > sstatus.SPELP bit to 1 when a forward CFI violation occurs. Therefore, > > > > I suspect that QEMU might also require some fixes. > > > > > > Hi Deepak, > > > The issue with QEMU was that the sw-check exception bit in medeleg > > > couldn't be set. This has been fixed in the latest QEMU mainline. I > > > have re-tested the latest QEMU version, and it works. > > > > What was this issue, can you point me to the patch in mainline? > > Hi Deepak > The issue was that my QEMU setup somehow missed the change of > `target/riscv/csr.c` in your following patch: > https://github.com/qemu/qemu/commit/6031102401ae8a69a87b20fbec2aae666625d96a > After I upgraded to the latest QEMU source, I found the kernel issue > if we didn't clear sstatus.SPELP in the handler > Thanks Aah ok, got it. > > > > > > > > > > > > > > Thanks > > > > > > > > > + > > > > > + if (is_fcfi || is_bcfi) { > > > > > + do_trap_error(regs, SIGSEGV, SEGV_CPERR, regs->epc, > > > > > + "Oops - control flow violation"); > > > > > + return true; > > > > > + } > > > > > + > > > > > + return false; > > > > > +} > > > > > + > > > > > +/* > > > > > + * software check exception is defined with risc-v cfi spec. Software check > > > > > + * exception is raised when:- > > > > > + * a) An indirect branch doesn't land on 4 byte aligned PC or `lpad` > > > > > + * instruction or `label` value programmed in `lpad` instr doesn't > > > > > + * match with value setup in `x7`. reported code in `xtval` is 2. > > > > > + * b) `sspopchk` instruction finds a mismatch between top of shadow stack (ssp) > > > > > + * and x1/x5. reported code in `xtval` is 3. > > > > > + */ > > > > > +asmlinkage __visible __trap_section void do_trap_software_check(struct pt_regs *regs) > > > > > +{ > > > > > + if (user_mode(regs)) { > > > > > + irqentry_enter_from_user_mode(regs); > > > > > + > > > > > + /* not a cfi violation, then merge into flow of unknown trap handler */ > > > > > + if (!handle_user_cfi_violation(regs)) > > > > > + do_trap_unknown(regs); > > > > > + > > > > > + irqentry_exit_to_user_mode(regs); > > > > > + } else { > > > > > + /* sw check exception coming from kernel is a bug in kernel */ > > > > > + die(regs, "Kernel BUG"); > > > > > + } > > > > > +} > > > > > + > > > > > #ifdef CONFIG_MMU > > > > > asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs) > > > > > { > > > > > > > > > > -- > > > > > 2.43.0 > > > > >
On Thu, Jun 19, 2025 at 7:16 PM Zong Li <zong.li@sifive.com> wrote: > > On Mon, Jun 16, 2025 at 3:31 PM Zong Li <zong.li@sifive.com> wrote: > > > > On Thu, Jun 5, 2025 at 1:17 AM Deepak Gupta <debug@rivosinc.com> wrote: > > > > > > zicfiss / zicfilp introduces a new exception to priv isa `software check > > > exception` with cause code = 18. This patch implements software check > > > exception. > > > > > > Additionally it implements a cfi violation handler which checks for code > > > in xtval. If xtval=2, it means that sw check exception happened because of > > > an indirect branch not landing on 4 byte aligned PC or not landing on > > > `lpad` instruction or label value embedded in `lpad` not matching label > > > value setup in `x7`. If xtval=3, it means that sw check exception happened > > > because of mismatch between link register (x1 or x5) and top of shadow > > > stack (on execution of `sspopchk`). > > > > > > In case of cfi violation, SIGSEGV is raised with code=SEGV_CPERR. > > > SEGV_CPERR was introduced by x86 shadow stack patches. > > > > > > To keep uprobes working, handle the uprobe event first before reporting > > > the CFI violation in software-check exception handler. Because when the > > > landing pad is activated, if the uprobe point is set at the lpad > > > instruction at the beginning of a function, the system triggers a software > > > -check exception instead of an ebreak exception due to the exception > > > priority, then uprobe can't work successfully. > > > > > > Co-developed-by: Zong Li <zong.li@sifive.com> > > > Reviewed-by: Zong Li <zong.li@sifive.com> > > > Signed-off-by: Zong Li <zong.li@sifive.com> > > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > > > --- > > > arch/riscv/include/asm/asm-prototypes.h | 1 + > > > arch/riscv/include/asm/entry-common.h | 2 ++ > > > arch/riscv/kernel/entry.S | 3 ++ > > > arch/riscv/kernel/traps.c | 51 +++++++++++++++++++++++++++++++++ > > > 4 files changed, 57 insertions(+) > > > > > > diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h > > > index cd627ec289f1..5a27cefd7805 100644 > > > --- a/arch/riscv/include/asm/asm-prototypes.h > > > +++ b/arch/riscv/include/asm/asm-prototypes.h > > > @@ -51,6 +51,7 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_u); > > > DECLARE_DO_ERROR_INFO(do_trap_ecall_s); > > > DECLARE_DO_ERROR_INFO(do_trap_ecall_m); > > > DECLARE_DO_ERROR_INFO(do_trap_break); > > > +DECLARE_DO_ERROR_INFO(do_trap_software_check); > > > > > > asmlinkage void handle_bad_stack(struct pt_regs *regs); > > > asmlinkage void do_page_fault(struct pt_regs *regs); > > > diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h > > > index b28ccc6cdeea..34ed149af5d1 100644 > > > --- a/arch/riscv/include/asm/entry-common.h > > > +++ b/arch/riscv/include/asm/entry-common.h > > > @@ -40,4 +40,6 @@ static inline int handle_misaligned_store(struct pt_regs *regs) > > > } > > > #endif > > > > > > +bool handle_user_cfi_violation(struct pt_regs *regs); > > > + > > > #endif /* _ASM_RISCV_ENTRY_COMMON_H */ > > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > > > index 978115567bca..8d25837a9384 100644 > > > --- a/arch/riscv/kernel/entry.S > > > +++ b/arch/riscv/kernel/entry.S > > > @@ -474,6 +474,9 @@ SYM_DATA_START_LOCAL(excp_vect_table) > > > RISCV_PTR do_page_fault /* load page fault */ > > > RISCV_PTR do_trap_unknown > > > RISCV_PTR do_page_fault /* store page fault */ > > > + RISCV_PTR do_trap_unknown /* cause=16 */ > > > + RISCV_PTR do_trap_unknown /* cause=17 */ > > > + RISCV_PTR do_trap_software_check /* cause=18 is sw check exception */ > > > SYM_DATA_END_LABEL(excp_vect_table, SYM_L_LOCAL, excp_vect_table_end) > > > > > > #ifndef CONFIG_MMU > > > diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c > > > index 8ff8e8b36524..64388370e1ad 100644 > > > --- a/arch/riscv/kernel/traps.c > > > +++ b/arch/riscv/kernel/traps.c > > > @@ -354,6 +354,57 @@ void do_trap_ecall_u(struct pt_regs *regs) > > > > > > } > > > > > > +#define CFI_TVAL_FCFI_CODE 2 > > > +#define CFI_TVAL_BCFI_CODE 3 > > > +/* handle cfi violations */ > > > +bool handle_user_cfi_violation(struct pt_regs *regs) > > > +{ > > > + unsigned long tval = csr_read(CSR_TVAL); > > > + bool is_fcfi = (tval == CFI_TVAL_FCFI_CODE && cpu_supports_indirect_br_lp_instr()); > > > + bool is_bcfi = (tval == CFI_TVAL_BCFI_CODE && cpu_supports_shadow_stack()); > > > + > > > + /* > > > + * Handle uprobe event first. The probe point can be a valid target > > > + * of indirect jumps or calls, in this case, forward cfi violation > > > + * will be triggered instead of breakpoint exception. > > > + */ > > > + if (is_fcfi && probe_breakpoint_handler(regs)) > > > + return true; > > > > Hi Deepak, > > Sorry for missing something earlier. I think we would like to clear > > sstatus.SPELP in the uprobe handling case. For example: > > > > diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c > > index c2ea999c1167..e8492bb57e09 100644 > > --- a/arch/riscv/kernel/traps.c > > +++ b/arch/riscv/kernel/traps.c > > @@ -349,8 +349,10 @@ bool handle_user_cfi_violation(struct pt_regs *regs) > > bool is_fcfi = (tval == CFI_TVAL_FCFI_CODE && > > cpu_supports_indirect_br_lp_instr()); > > bool is_bcfi = (tval == CFI_TVAL_BCFI_CODE && > > cpu_supports_shadow_stack()); > > > > - if (is_fcfi && probe_breakpoint_handler(regs)) > > + if (is_fcfi && probe_breakpoint_handler(regs)) { > > + regs->status = regs->status & ~SR_ELP; > > return true; > > + } Make sense. I'll pick it up in the next version. Thanks. > > > > if (is_fcfi || is_bcfi) { > > do_trap_error(regs, SIGSEGV, SEGV_CPERR, regs->epc, > > > > > > When a user mode CFI violation occurs, the ELP state should be 1, and > > the system traps into supervisor mode. During this trap, sstatus.SPELP > > is set to 1, and the ELP state is reset to 0. If we don’t clear > > sstatus.SPELP, the ELP state will become 1 again after executing the > > sret instruction. As a result, the system might trigger another > > forward CFI violation upon executing the next instruction in the user > > program, unless it happens to be a lpad instruction. > > > > The previous patch was tested on QEMU, but QEMU does not set the > > sstatus.SPELP bit to 1 when a forward CFI violation occurs. Therefore, > > I suspect that QEMU might also require some fixes. > > Hi Deepak, > The issue with QEMU was that the sw-check exception bit in medeleg > couldn't be set. This has been fixed in the latest QEMU mainline. I > have re-tested the latest QEMU version, and it works. Thanks for the fix. > > > > > Thanks > > > > > + > > > + if (is_fcfi || is_bcfi) { > > > + do_trap_error(regs, SIGSEGV, SEGV_CPERR, regs->epc, > > > + "Oops - control flow violation"); > > > + return true; > > > + } > > > + > > > + return false; > > > +} > > > + > > > +/* > > > + * software check exception is defined with risc-v cfi spec. Software check > > > + * exception is raised when:- > > > + * a) An indirect branch doesn't land on 4 byte aligned PC or `lpad` > > > + * instruction or `label` value programmed in `lpad` instr doesn't > > > + * match with value setup in `x7`. reported code in `xtval` is 2. > > > + * b) `sspopchk` instruction finds a mismatch between top of shadow stack (ssp) > > > + * and x1/x5. reported code in `xtval` is 3. > > > + */ > > > +asmlinkage __visible __trap_section void do_trap_software_check(struct pt_regs *regs) > > > +{ > > > + if (user_mode(regs)) { > > > + irqentry_enter_from_user_mode(regs); > > > + > > > + /* not a cfi violation, then merge into flow of unknown trap handler */ > > > + if (!handle_user_cfi_violation(regs)) > > > + do_trap_unknown(regs); > > > + > > > + irqentry_exit_to_user_mode(regs); > > > + } else { > > > + /* sw check exception coming from kernel is a bug in kernel */ > > > + die(regs, "Kernel BUG"); > > > + } > > > +} > > > + > > > #ifdef CONFIG_MMU > > > asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs) > > > { > > > > > > -- > > > 2.43.0 > > >
© 2016 - 2025 Red Hat, Inc.