[PATCH v12 05/28] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit

Deepak Gupta posted 28 patches 11 months ago
There is a newer version of this series
[PATCH v12 05/28] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit
Posted by Deepak Gupta 11 months ago
Carves out space in arch specific thread struct for cfi status and shadow
stack in usermode on riscv.

This patch does following
- defines a new structure cfi_status with status bit for cfi feature
- defines shadow stack pointer, base and size in cfi_status structure
- defines offsets to new member fields in thread in asm-offsets.c
- Saves and restore shadow stack pointer on trap entry (U --> S) and exit
  (S --> U)

Shadow stack save/restore is gated on feature availiblity and implemented
using alternative. CSR can be context switched in `switch_to` as well but
soon as kernel shadow stack support gets rolled in, shadow stack pointer
will need to be switched at trap entry/exit point (much like `sp`). It can
be argued that kernel using shadow stack deployment scenario may not be as
prevalant as user mode using this feature. But even if there is some
minimal deployment of kernel shadow stack, that means that it needs to be
supported. And thus save/restore of shadow stack pointer in entry.S instead
of in `switch_to.h`.

Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
Reviewed-by: Zong Li <zong.li@sifive.com>
Signed-off-by: Deepak Gupta <debug@rivosinc.com>
---
 arch/riscv/include/asm/processor.h   |  1 +
 arch/riscv/include/asm/thread_info.h |  3 +++
 arch/riscv/include/asm/usercfi.h     | 24 ++++++++++++++++++++++++
 arch/riscv/kernel/asm-offsets.c      |  4 ++++
 arch/riscv/kernel/entry.S            | 26 ++++++++++++++++++++++++++
 5 files changed, 58 insertions(+)

diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index e3aba3336e63..d851bb5c6da0 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -14,6 +14,7 @@
 
 #include <asm/ptrace.h>
 #include <asm/hwcap.h>
+#include <asm/usercfi.h>
 
 #define arch_get_mmap_end(addr, len, flags)			\
 ({								\
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index f5916a70879a..a0cfe00c2ca6 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -62,6 +62,9 @@ struct thread_info {
 	long			user_sp;	/* User stack pointer */
 	int			cpu;
 	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
+#ifdef CONFIG_RISCV_USER_CFI
+	struct cfi_status	user_cfi_state;
+#endif
 #ifdef CONFIG_SHADOW_CALL_STACK
 	void			*scs_base;
 	void			*scs_sp;
diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h
new file mode 100644
index 000000000000..5f2027c51917
--- /dev/null
+++ b/arch/riscv/include/asm/usercfi.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (C) 2024 Rivos, Inc.
+ * Deepak Gupta <debug@rivosinc.com>
+ */
+#ifndef _ASM_RISCV_USERCFI_H
+#define _ASM_RISCV_USERCFI_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+#ifdef CONFIG_RISCV_USER_CFI
+struct cfi_status {
+	unsigned long ubcfi_en : 1; /* Enable for backward cfi. */
+	unsigned long rsvd : ((sizeof(unsigned long) * 8) - 1);
+	unsigned long user_shdw_stk; /* Current user shadow stack pointer */
+	unsigned long shdw_stk_base; /* Base address of shadow stack */
+	unsigned long shdw_stk_size; /* size of shadow stack */
+};
+
+#endif /* CONFIG_RISCV_USER_CFI */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_RISCV_USERCFI_H */
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index e89455a6a0e5..0c188aaf3925 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -50,6 +50,10 @@ void asm_offsets(void)
 #endif
 
 	OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu);
+#ifdef CONFIG_RISCV_USER_CFI
+	OFFSET(TASK_TI_CFI_STATUS, task_struct, thread_info.user_cfi_state);
+	OFFSET(TASK_TI_USER_SSP, task_struct, thread_info.user_cfi_state.user_shdw_stk);
+#endif
 	OFFSET(TASK_THREAD_F0,  task_struct, thread.fstate.f[0]);
 	OFFSET(TASK_THREAD_F1,  task_struct, thread.fstate.f[1]);
 	OFFSET(TASK_THREAD_F2,  task_struct, thread.fstate.f[2]);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 33a5a9f2a0d4..68c99124ea55 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -147,6 +147,20 @@ SYM_CODE_START(handle_exception)
 
 	REG_L s0, TASK_TI_USER_SP(tp)
 	csrrc s1, CSR_STATUS, t0
+	/*
+	 * If previous mode was U, capture shadow stack pointer and save it away
+	 * Zero CSR_SSP at the same time for sanitization.
+	 */
+	ALTERNATIVE("nop; nop; nop; nop",
+				__stringify(			\
+				andi s2, s1, SR_SPP;	\
+				bnez s2, skip_ssp_save;	\
+				csrrw s2, CSR_SSP, x0;	\
+				REG_S s2, TASK_TI_USER_SSP(tp); \
+				skip_ssp_save:),
+				0,
+				RISCV_ISA_EXT_ZICFISS,
+				CONFIG_RISCV_USER_CFI)
 	csrr s2, CSR_EPC
 	csrr s3, CSR_TVAL
 	csrr s4, CSR_CAUSE
@@ -236,6 +250,18 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
 	 * structures again.
 	 */
 	csrw CSR_SCRATCH, tp
+
+	/*
+	 * Going back to U mode, restore shadow stack pointer
+	 */
+	ALTERNATIVE("nop; nop",
+				__stringify(					\
+				REG_L s3, TASK_TI_USER_SSP(tp); \
+				csrw CSR_SSP, s3),
+				0,
+				RISCV_ISA_EXT_ZICFISS,
+				CONFIG_RISCV_USER_CFI)
+
 1:
 #ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
 	move a0, sp

-- 
2.34.1
Re: [PATCH v12 05/28] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit
Posted by Radim Krčmář 10 months ago
2025-03-14T14:39:24-07:00, Deepak Gupta <debug@rivosinc.com>:
> diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
> @@ -62,6 +62,9 @@ struct thread_info {
>  	long			user_sp;	/* User stack pointer */
>  	int			cpu;
>  	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
> +#ifdef CONFIG_RISCV_USER_CFI
> +	struct cfi_status	user_cfi_state;
> +#endif

I don't think it makes sense to put all the data in thread_info.
kernel_ssp and user_ssp is more than enough and the rest can comfortably
live elsewhere in task_struct.

thread_info is supposed to be as small as possible -- just spanning
multiple cache-lines could be noticeable.

> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> @@ -147,6 +147,20 @@ SYM_CODE_START(handle_exception)
>  
>  	REG_L s0, TASK_TI_USER_SP(tp)
>  	csrrc s1, CSR_STATUS, t0
> +	/*
> +	 * If previous mode was U, capture shadow stack pointer and save it away
> +	 * Zero CSR_SSP at the same time for sanitization.
> +	 */
> +	ALTERNATIVE("nop; nop; nop; nop",
> +				__stringify(			\
> +				andi s2, s1, SR_SPP;	\
> +				bnez s2, skip_ssp_save;	\
> +				csrrw s2, CSR_SSP, x0;	\
> +				REG_S s2, TASK_TI_USER_SSP(tp); \
> +				skip_ssp_save:),
> +				0,
> +				RISCV_ISA_EXT_ZICFISS,
> +				CONFIG_RISCV_USER_CFI)

(I'd prefer this closer to the user_sp and kernel_sp swap, it's breaking
 the flow here.  We also already know if we've returned from userspace
 or not even without SR_SPP, but reusing the information might tangle
 the logic.)

>  	csrr s2, CSR_EPC
>  	csrr s3, CSR_TVAL
>  	csrr s4, CSR_CAUSE
> @@ -236,6 +250,18 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
>  	csrw CSR_SCRATCH, tp
> +
> +	/*
> +	 * Going back to U mode, restore shadow stack pointer
> +	 */

Are we?  I think we can be just as well returning back to kernel-space.
Similar to how we can enter the exception handler from kernel-space.

> +	ALTERNATIVE("nop; nop",
> +				__stringify(					\
> +				REG_L s3, TASK_TI_USER_SSP(tp); \
> +				csrw CSR_SSP, s3),
> +				0,
> +				RISCV_ISA_EXT_ZICFISS,
> +				CONFIG_RISCV_USER_CFI)
> +

Thanks.
Re: [PATCH v12 05/28] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit
Posted by Deepak Gupta 9 months, 3 weeks ago
Sorry forgot to respond to rest of your comments on this thread.


On Thu, Apr 10, 2025 at 01:04:39PM +0200, Radim Krčmář wrote:
>2025-03-14T14:39:24-07:00, Deepak Gupta <debug@rivosinc.com>:
>> diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
>> @@ -62,6 +62,9 @@ struct thread_info {
>>  	long			user_sp;	/* User stack pointer */
>>  	int			cpu;
>>  	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
>> +#ifdef CONFIG_RISCV_USER_CFI
>> +	struct cfi_status	user_cfi_state;
>> +#endif

<... snipped ...>

>
>
>> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
>> @@ -147,6 +147,20 @@ SYM_CODE_START(handle_exception)
>>
>>  	REG_L s0, TASK_TI_USER_SP(tp)
>>  	csrrc s1, CSR_STATUS, t0
>> +	/*
>> +	 * If previous mode was U, capture shadow stack pointer and save it away
>> +	 * Zero CSR_SSP at the same time for sanitization.
>> +	 */
>> +	ALTERNATIVE("nop; nop; nop; nop",
>> +				__stringify(			\
>> +				andi s2, s1, SR_SPP;	\
>> +				bnez s2, skip_ssp_save;	\
>> +				csrrw s2, CSR_SSP, x0;	\
>> +				REG_S s2, TASK_TI_USER_SSP(tp); \
>> +				skip_ssp_save:),
>> +				0,
>> +				RISCV_ISA_EXT_ZICFISS,
>> +				CONFIG_RISCV_USER_CFI)
>
>(I'd prefer this closer to the user_sp and kernel_sp swap, it's breaking
> the flow here.  We also already know if we've returned from userspace
> or not even without SR_SPP, but reusing the information might tangle
> the logic.)

If CSR_SCRATCH was 0, then we would be coming from kernel else flow goes
to `.Lsave_context`. If we were coming from kernel mode, then eventually
flow merges to `.Lsave_context`.

So we will be saving CSR_SSP on all kernel -- > kernel trap handling. That
would be unnecessary. IIRC, this was one of the first review comments in
early RFC series of these patch series (to not touch CSR_SSP un-necessarily)

We can avoid that by ensuring when we branch by determining if we are coming
from user to something like `.Lsave_ssp` which eventually merges into
".Lsave_context". And if we were coming from kernel then we would branch to
`.Lsave_context` and thus skipping ssp save logic. But # of branches it
introduces in early exception handling is equivalent to what current patches
do. So I don't see any value in doing that.

Let me know if I am missing something.

>
>>  	csrr s2, CSR_EPC
>>  	csrr s3, CSR_TVAL
>>  	csrr s4, CSR_CAUSE
>> @@ -236,6 +250,18 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
>>  	csrw CSR_SCRATCH, tp
>> +
>> +	/*
>> +	 * Going back to U mode, restore shadow stack pointer
>> +	 */
I can remove my comment because it's obvious.

>
>Are we?  I think we can be just as well returning back to kernel-space.
>Similar to how we can enter the exception handler from kernel-space.

Yes we are. See excerpt from `ret_from_exception` in `entry.S`

"""
SYM_CODE_START_NOALIGN(ret_from_exception)
	REG_L s0, PT_STATUS(sp)
#ifdef CONFIG_RISCV_M_MODE
	/* the MPP value is too large to be used as an immediate arg for addi */
	li t0, SR_MPP
	and s0, s0, t0
#else
	andi s0, s0, SR_SPP
#endif
	bnez s0, 1f

<... snipped ...>

	/*
	 * Going back to U mode, restore shadow stack pointer
	 */
	ALTERNATIVE("nops(2)",
				__stringify(			\
				REG_L s3, TASK_TI_USER_SSP(tp); \
				csrw CSR_SSP, s3),
				0,
				RISCV_ISA_EXT_ZICFISS,
				CONFIG_RISCV_USER_CFI)

1:
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
	move a0, sp
	call riscv_v_context_nesting_end

<... snipped ...>

"""


>
>> +	ALTERNATIVE("nop; nop",
>> +				__stringify(					\
>> +				REG_L s3, TASK_TI_USER_SSP(tp); \
>> +				csrw CSR_SSP, s3),
>> +				0,
>> +				RISCV_ISA_EXT_ZICFISS,
>> +				CONFIG_RISCV_USER_CFI)
>> +
>
>Thanks.
Re: [PATCH v12 05/28] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit
Posted by Radim Krčmář 9 months, 3 weeks ago
2025-04-23T17:23:56-07:00, Deepak Gupta <debug@rivosinc.com>:
> On Thu, Apr 10, 2025 at 01:04:39PM +0200, Radim Krčmář wrote:
>>2025-03-14T14:39:24-07:00, Deepak Gupta <debug@rivosinc.com>:
>>> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
>>> @@ -147,6 +147,20 @@ SYM_CODE_START(handle_exception)
>>>
>>>  	REG_L s0, TASK_TI_USER_SP(tp)
>>>  	csrrc s1, CSR_STATUS, t0
>>> +	/*
>>> +	 * If previous mode was U, capture shadow stack pointer and save it away
>>> +	 * Zero CSR_SSP at the same time for sanitization.
>>> +	 */
>>> +	ALTERNATIVE("nop; nop; nop; nop",
>>> +				__stringify(			\
>>> +				andi s2, s1, SR_SPP;	\
>>> +				bnez s2, skip_ssp_save;	\
>>> +				csrrw s2, CSR_SSP, x0;	\
>>> +				REG_S s2, TASK_TI_USER_SSP(tp); \
>>> +				skip_ssp_save:),
>>> +				0,
>>> +				RISCV_ISA_EXT_ZICFISS,
>>> +				CONFIG_RISCV_USER_CFI)
>>
>>(I'd prefer this closer to the user_sp and kernel_sp swap, it's breaking
>> the flow here.  We also already know if we've returned from userspace
>> or not even without SR_SPP, but reusing the information might tangle
>> the logic.)
>
> If CSR_SCRATCH was 0, then we would be coming from kernel else flow goes
> to `.Lsave_context`. If we were coming from kernel mode, then eventually
> flow merges to `.Lsave_context`.
>
> So we will be saving CSR_SSP on all kernel -- > kernel trap handling. That
> would be unnecessary. IIRC, this was one of the first review comments in
> early RFC series of these patch series (to not touch CSR_SSP un-necessarily)
>
> We can avoid that by ensuring when we branch by determining if we are coming
> from user to something like `.Lsave_ssp` which eventually merges into
> ".Lsave_context". And if we were coming from kernel then we would branch to
> `.Lsave_context` and thus skipping ssp save logic. But # of branches it
> introduces in early exception handling is equivalent to what current patches
> do. So I don't see any value in doing that.
>
> Let me know if I am missing something.

Right, it's hard to avoid the extra branches.

I think we could modify the entry point (STVEC), so we start at
different paths based on kernel/userspace trap and only jump once to the
common code, like:

  SYM_CODE_START(handle_exception_kernel)
    /* kernel setup magic */
    j handle_exception_common
  SYM_CODE_START(handle_exception_user)
    /* userspace setup magic */
  handle_exception_common:

This is not a suggestion for this series.  I would be perfectly happy
with just a cleaner code.

Would it be possible to hide the ALTERNATIVE ugliness behind a macro and
move it outside the code block that saves pt_regs?

Thanks.
Re: [PATCH v12 05/28] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit
Posted by Deepak Gupta 9 months, 3 weeks ago
On Thu, Apr 24, 2025 at 02:16:32PM +0200, Radim Krčmář wrote:
>2025-04-23T17:23:56-07:00, Deepak Gupta <debug@rivosinc.com>:
>> On Thu, Apr 10, 2025 at 01:04:39PM +0200, Radim Krčmář wrote:
>>>2025-03-14T14:39:24-07:00, Deepak Gupta <debug@rivosinc.com>:
>>>> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
>>>> @@ -147,6 +147,20 @@ SYM_CODE_START(handle_exception)
>>>>
>>>>  	REG_L s0, TASK_TI_USER_SP(tp)
>>>>  	csrrc s1, CSR_STATUS, t0
>>>> +	/*
>>>> +	 * If previous mode was U, capture shadow stack pointer and save it away
>>>> +	 * Zero CSR_SSP at the same time for sanitization.
>>>> +	 */
>>>> +	ALTERNATIVE("nop; nop; nop; nop",
>>>> +				__stringify(			\
>>>> +				andi s2, s1, SR_SPP;	\
>>>> +				bnez s2, skip_ssp_save;	\
>>>> +				csrrw s2, CSR_SSP, x0;	\
>>>> +				REG_S s2, TASK_TI_USER_SSP(tp); \
>>>> +				skip_ssp_save:),
>>>> +				0,
>>>> +				RISCV_ISA_EXT_ZICFISS,
>>>> +				CONFIG_RISCV_USER_CFI)
>>>
>>>(I'd prefer this closer to the user_sp and kernel_sp swap, it's breaking
>>> the flow here.  We also already know if we've returned from userspace
>>> or not even without SR_SPP, but reusing the information might tangle
>>> the logic.)
>>
>> If CSR_SCRATCH was 0, then we would be coming from kernel else flow goes
>> to `.Lsave_context`. If we were coming from kernel mode, then eventually
>> flow merges to `.Lsave_context`.
>>
>> So we will be saving CSR_SSP on all kernel -- > kernel trap handling. That
>> would be unnecessary. IIRC, this was one of the first review comments in
>> early RFC series of these patch series (to not touch CSR_SSP un-necessarily)
>>
>> We can avoid that by ensuring when we branch by determining if we are coming
>> from user to something like `.Lsave_ssp` which eventually merges into
>> ".Lsave_context". And if we were coming from kernel then we would branch to
>> `.Lsave_context` and thus skipping ssp save logic. But # of branches it
>> introduces in early exception handling is equivalent to what current patches
>> do. So I don't see any value in doing that.
>>
>> Let me know if I am missing something.
>
>Right, it's hard to avoid the extra branches.
>
>I think we could modify the entry point (STVEC), so we start at
>different paths based on kernel/userspace trap and only jump once to the
>common code, like:
>
>  SYM_CODE_START(handle_exception_kernel)
>    /* kernel setup magic */
>    j handle_exception_common
>  SYM_CODE_START(handle_exception_user)
>    /* userspace setup magic */
>  handle_exception_common:

Hmm... This can be done. But then it would require to constantly modify `stvec`
When you're going back to user mode, you would have to write `stvec` with addr
of `handle_exception_user`. But then you can easily get a NMI. It can become
ugly. Needs much more thought and on first glance feels error prone.

Only if we have an extension that allows different trap address depending on
mode you're coming from (arm does that, right?, I think x86 FRED also does
that)
>
>This is not a suggestion for this series.  I would be perfectly happy
>with just a cleaner code.
>
>Would it be possible to hide the ALTERNATIVE ugliness behind a macro and
>move it outside the code block that saves pt_regs?

Sure, I'll do something about it.

>
>Thanks.
Re: [PATCH v12 05/28] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit
Posted by Radim Krčmář 9 months, 2 weeks ago
2025-04-24T11:03:59-07:00, Deepak Gupta <debug@rivosinc.com>:
> On Thu, Apr 24, 2025 at 02:16:32PM +0200, Radim Krčmář wrote:
>>2025-04-23T17:23:56-07:00, Deepak Gupta <debug@rivosinc.com>:
>>> On Thu, Apr 10, 2025 at 01:04:39PM +0200, Radim Krčmář wrote:
>>>>2025-03-14T14:39:24-07:00, Deepak Gupta <debug@rivosinc.com>:
>>>>> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
>>>>> @@ -147,6 +147,20 @@ SYM_CODE_START(handle_exception)
>>>>>
>>>>>  	REG_L s0, TASK_TI_USER_SP(tp)
>>>>>  	csrrc s1, CSR_STATUS, t0
>>>>> +	/*
>>>>> +	 * If previous mode was U, capture shadow stack pointer and save it away
>>>>> +	 * Zero CSR_SSP at the same time for sanitization.
>>>>> +	 */
>>>>> +	ALTERNATIVE("nop; nop; nop; nop",
>>>>> +				__stringify(			\
>>>>> +				andi s2, s1, SR_SPP;	\
>>>>> +				bnez s2, skip_ssp_save;	\
>>>>> +				csrrw s2, CSR_SSP, x0;	\
>>>>> +				REG_S s2, TASK_TI_USER_SSP(tp); \
>>>>> +				skip_ssp_save:),
>>>>> +				0,
>>>>> +				RISCV_ISA_EXT_ZICFISS,
>>>>> +				CONFIG_RISCV_USER_CFI)
>>>>
>>>>(I'd prefer this closer to the user_sp and kernel_sp swap, it's breaking
>>>> the flow here.  We also already know if we've returned from userspace
>>>> or not even without SR_SPP, but reusing the information might tangle
>>>> the logic.)
>>>
>>> If CSR_SCRATCH was 0, then we would be coming from kernel else flow goes
>>> to `.Lsave_context`. If we were coming from kernel mode, then eventually
>>> flow merges to `.Lsave_context`.
>>>
>>> So we will be saving CSR_SSP on all kernel -- > kernel trap handling. That
>>> would be unnecessary. IIRC, this was one of the first review comments in
>>> early RFC series of these patch series (to not touch CSR_SSP un-necessarily)
>>>
>>> We can avoid that by ensuring when we branch by determining if we are coming
>>> from user to something like `.Lsave_ssp` which eventually merges into
>>> ".Lsave_context". And if we were coming from kernel then we would branch to
>>> `.Lsave_context` and thus skipping ssp save logic. But # of branches it
>>> introduces in early exception handling is equivalent to what current patches
>>> do. So I don't see any value in doing that.
>>>
>>> Let me know if I am missing something.
>>
>>Right, it's hard to avoid the extra branches.
>>
>>I think we could modify the entry point (STVEC), so we start at
>>different paths based on kernel/userspace trap and only jump once to the
>>common code, like:
>>
>>  SYM_CODE_START(handle_exception_kernel)
>>    /* kernel setup magic */
>>    j handle_exception_common
>>  SYM_CODE_START(handle_exception_user)
>>    /* userspace setup magic */
>>  handle_exception_common:
>
> Hmm... This can be done. But then it would require to constantly modify `stvec`
> When you're going back to user mode, you would have to write `stvec` with addr
> of `handle_exception_user`.

We'd just be writing STVEC instead of SSCRATCH, probably at the very
same places.
It's possible that some micro-architectures will be disturbed more by
writing STVEC than SSCRATCH, though, so it's not an easy change to make.

>                             But then you can easily get a NMI. It can become
> ugly. Needs much more thought and on first glance feels error prone.

Yeah, the M-mode Linux adds a lot of fun.  I don't see support for the
Smrnmi extension, so unlucky NMIs should be fatal even now.
Re: [PATCH v12 05/28] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit
Posted by Deepak Gupta 9 months, 3 weeks ago
On Thu, Apr 10, 2025 at 01:04:39PM +0200, Radim Krčmář wrote:
>2025-03-14T14:39:24-07:00, Deepak Gupta <debug@rivosinc.com>:
>> diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
>> @@ -62,6 +62,9 @@ struct thread_info {
>>  	long			user_sp;	/* User stack pointer */
>>  	int			cpu;
>>  	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
>> +#ifdef CONFIG_RISCV_USER_CFI
>> +	struct cfi_status	user_cfi_state;
>> +#endif
>
>I don't think it makes sense to put all the data in thread_info.
>kernel_ssp and user_ssp is more than enough and the rest can comfortably
>live elsewhere in task_struct.
>
>thread_info is supposed to be as small as possible -- just spanning
>multiple cache-lines could be noticeable.

I can change it to only include only `user_ssp`, base and size.

But before we go there, see below:

$ pahole -C thread_info kbuild/vmlinux
struct thread_info {
         long unsigned int          flags;                /*     0     8 */
         int                        preempt_count;        /*     8     4 */

         /* XXX 4 bytes hole, try to pack */

         long int                   kernel_sp;            /*    16     8 */
         long int                   user_sp;              /*    24     8 */
         int                        cpu;                  /*    32     4 */

         /* XXX 4 bytes hole, try to pack */

         long unsigned int          syscall_work;         /*    40     8 */
         struct cfi_status          user_cfi_state;       /*    48    32 */
         /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
         long unsigned int          a0;                   /*    80     8 */
         long unsigned int          a1;                   /*    88     8 */
         long unsigned int          a2;                   /*    96     8 */

         /* size: 104, cachelines: 2, members: 10 */
         /* sum members: 96, holes: 2, sum holes: 8 */
         /* last cacheline: 40 bytes */
};

If we were to remove entire `cfi_status`, it would still be 72 bytes (88 bytes
if shadow call stack were enabled) and already spans across two cachelines. I
did see the comment above that it should fit inside a cacheline. Although I
assumed its stale comment given that it already spans across cacheline and I
didn't see any special mention in commit messages of changes which grew this
structure above one cacheline. So I assumed this was a stale comment.

On the other hand, whenever enable/lock bits are checked, there is a high
likelyhood that user_ssp and other fields are going to be accessed and
thus it actually might be helpful to have it all in one cacheline during
runtime.

So I am not sure if its helpful sticking to the comment which already is stale.

>
>> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
>> @@ -147,6 +147,20 @@ SYM_CODE_START(handle_exception)
>>
>>  	REG_L s0, TASK_TI_USER_SP(tp)
>>  	csrrc s1, CSR_STATUS, t0
>> +	/*
>> +	 * If previous mode was U, capture shadow stack pointer and save it away
>> +	 * Zero CSR_SSP at the same time for sanitization.
>> +	 */
>> +	ALTERNATIVE("nop; nop; nop; nop",
>> +				__stringify(			\
>> +				andi s2, s1, SR_SPP;	\
>> +				bnez s2, skip_ssp_save;	\
>> +				csrrw s2, CSR_SSP, x0;	\
>> +				REG_S s2, TASK_TI_USER_SSP(tp); \
>> +				skip_ssp_save:),
>> +				0,
>> +				RISCV_ISA_EXT_ZICFISS,
>> +				CONFIG_RISCV_USER_CFI)
>
>(I'd prefer this closer to the user_sp and kernel_sp swap, it's breaking
> the flow here.  We also already know if we've returned from userspace
> or not even without SR_SPP, but reusing the information might tangle
> the logic.)
>
>>  	csrr s2, CSR_EPC
>>  	csrr s3, CSR_TVAL
>>  	csrr s4, CSR_CAUSE
>> @@ -236,6 +250,18 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
>>  	csrw CSR_SCRATCH, tp
>> +
>> +	/*
>> +	 * Going back to U mode, restore shadow stack pointer
>> +	 */
>
>Are we?  I think we can be just as well returning back to kernel-space.
>Similar to how we can enter the exception handler from kernel-space.
>
>> +	ALTERNATIVE("nop; nop",
>> +				__stringify(					\
>> +				REG_L s3, TASK_TI_USER_SSP(tp); \
>> +				csrw CSR_SSP, s3),
>> +				0,
>> +				RISCV_ISA_EXT_ZICFISS,
>> +				CONFIG_RISCV_USER_CFI)
>> +
>
>Thanks.
Re: [PATCH v12 05/28] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit
Posted by Radim Krčmář 9 months, 3 weeks ago
2025-04-23T17:00:29-07:00, Deepak Gupta <debug@rivosinc.com>:
> On Thu, Apr 10, 2025 at 01:04:39PM +0200, Radim Krčmář wrote:
>>2025-03-14T14:39:24-07:00, Deepak Gupta <debug@rivosinc.com>:
>>> diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
>>> @@ -62,6 +62,9 @@ struct thread_info {
>>>  	long			user_sp;	/* User stack pointer */
>>>  	int			cpu;
>>>  	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
>>> +#ifdef CONFIG_RISCV_USER_CFI
>>> +	struct cfi_status	user_cfi_state;
>>> +#endif
>>
>>I don't think it makes sense to put all the data in thread_info.
>>kernel_ssp and user_ssp is more than enough and the rest can comfortably
>>live elsewhere in task_struct.
>>
>>thread_info is supposed to be as small as possible -- just spanning
>>multiple cache-lines could be noticeable.
>
> I can change it to only include only `user_ssp`, base and size.

No need for base and size either -- we don't touch that in the common
exception code.

> But before we go there, see below:
>
> $ pahole -C thread_info kbuild/vmlinux
> struct thread_info {
>          long unsigned int          flags;                /*     0     8 */
>          int                        preempt_count;        /*     8     4 */
>
>          /* XXX 4 bytes hole, try to pack */
>
>          long int                   kernel_sp;            /*    16     8 */
>          long int                   user_sp;              /*    24     8 */
>          int                        cpu;                  /*    32     4 */
>
>          /* XXX 4 bytes hole, try to pack */
>
>          long unsigned int          syscall_work;         /*    40     8 */
>          struct cfi_status          user_cfi_state;       /*    48    32 */
>          /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
>          long unsigned int          a0;                   /*    80     8 */
>          long unsigned int          a1;                   /*    88     8 */
>          long unsigned int          a2;                   /*    96     8 */
>
>          /* size: 104, cachelines: 2, members: 10 */
>          /* sum members: 96, holes: 2, sum holes: 8 */
>          /* last cacheline: 40 bytes */
> };
>
> If we were to remove entire `cfi_status`, it would still be 72 bytes (88 bytes
> if shadow call stack were enabled) and already spans across two cachelines.

It has only 64 bytes of data without shadow call stack, but it wasted 8
bytes on the holes.
a2 is somewhat an outlier that is not used most exception paths and
excluding it makes everything fit nicely even now.

> if shadow call stack were enabled) and already spans across two cachelines. I
> did see the comment above that it should fit inside a cacheline. Although I
> assumed its stale comment given that it already spans across cacheline and I
> didn't see any special mention in commit messages of changes which grew this
> structure above one cacheline. So I assumed this was a stale comment.
>
> On the other hand, whenever enable/lock bits are checked, there is a high
> likelyhood that user_ssp and other fields are going to be accessed and
> thus it actually might be helpful to have it all in one cacheline during
> runtime.

Yes, although accessing enable/lock bits will be relatively rare.
It seems better to have the overhead during thread setup, rather than on
every trap.

> So I am not sure if its helpful sticking to the comment which already is stale.

We could fix the holes and also use sp instead of a0 in the
new_vmalloc_check, so everything would fit better.

We are really close to fitting into a single cache-line, so I'd prefer
if shadow stack only filled thread_info with data that is used very
often in the exception handling code.

I think we could do without user_sp in thread_info as well, so there are
other packing options.

Btw. could ssp be added to pt_regs?

Thanks.
Re: [PATCH v12 05/28] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit
Posted by Deepak Gupta 9 months, 3 weeks ago
On Thu, Apr 24, 2025 at 01:52:43PM +0200, Radim Krčmář wrote:
>2025-04-23T17:00:29-07:00, Deepak Gupta <debug@rivosinc.com>:
>> On Thu, Apr 10, 2025 at 01:04:39PM +0200, Radim Krčmář wrote:
>>>2025-03-14T14:39:24-07:00, Deepak Gupta <debug@rivosinc.com>:
>>>> diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
>>>> @@ -62,6 +62,9 @@ struct thread_info {
>>>>  	long			user_sp;	/* User stack pointer */
>>>>  	int			cpu;
>>>>  	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
>>>> +#ifdef CONFIG_RISCV_USER_CFI
>>>> +	struct cfi_status	user_cfi_state;
>>>> +#endif
>>>
>>>I don't think it makes sense to put all the data in thread_info.
>>>kernel_ssp and user_ssp is more than enough and the rest can comfortably
>>>live elsewhere in task_struct.
>>>
>>>thread_info is supposed to be as small as possible -- just spanning
>>>multiple cache-lines could be noticeable.
>>
>> I can change it to only include only `user_ssp`, base and size.
>
>No need for base and size either -- we don't touch that in the common
>exception code.

got it.

>
>> But before we go there, see below:
>>
>> $ pahole -C thread_info kbuild/vmlinux
>> struct thread_info {
>>          long unsigned int          flags;                /*     0     8 */
>>          int                        preempt_count;        /*     8     4 */
>>
>>          /* XXX 4 bytes hole, try to pack */
>>
>>          long int                   kernel_sp;            /*    16     8 */
>>          long int                   user_sp;              /*    24     8 */
>>          int                        cpu;                  /*    32     4 */
>>
>>          /* XXX 4 bytes hole, try to pack */
>>
>>          long unsigned int          syscall_work;         /*    40     8 */
>>          struct cfi_status          user_cfi_state;       /*    48    32 */
>>          /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
>>          long unsigned int          a0;                   /*    80     8 */
>>          long unsigned int          a1;                   /*    88     8 */
>>          long unsigned int          a2;                   /*    96     8 */
>>
>>          /* size: 104, cachelines: 2, members: 10 */
>>          /* sum members: 96, holes: 2, sum holes: 8 */
>>          /* last cacheline: 40 bytes */
>> };
>>
>> If we were to remove entire `cfi_status`, it would still be 72 bytes (88 bytes
>> if shadow call stack were enabled) and already spans across two cachelines.
>
>It has only 64 bytes of data without shadow call stack, but it wasted 8
>bytes on the holes.
>a2 is somewhat an outlier that is not used most exception paths and
>excluding it makes everything fit nicely even now.

But we can't exclude shadow call stack. It'll lead to increased size if that
config is selected. A solution has to work for all the cases and not half
hearted effort.

>
>> if shadow call stack were enabled) and already spans across two cachelines. I
>> did see the comment above that it should fit inside a cacheline. Although I
>> assumed its stale comment given that it already spans across cacheline and I
>> didn't see any special mention in commit messages of changes which grew this
>> structure above one cacheline. So I assumed this was a stale comment.
>>
>> On the other hand, whenever enable/lock bits are checked, there is a high
>> likelyhood that user_ssp and other fields are going to be accessed and
>> thus it actually might be helpful to have it all in one cacheline during
>> runtime.
>
>Yes, although accessing enable/lock bits will be relatively rare.
>It seems better to have the overhead during thread setup, rather than on
>every trap.
>
>> So I am not sure if its helpful sticking to the comment which already is stale.
>
>We could fix the holes and also use sp instead of a0 in the
>new_vmalloc_check, so everything would fit better.
>
>We are really close to fitting into a single cache-line, so I'd prefer
>if shadow stack only filled thread_info with data that is used very
>often in the exception handling code.

I don't get what's the big deal if it results in two cachelines. We can
(re)organize data structure in a way the most frequently accessed members are
together in a single cacheline. We just need to find those members.

In the hot path of exception handling, I see accesses to pt_regs on stack as
well. These are definitley different cacheline than thread_info.

I understand the argument of one member field crossing into two cachelines can
have undesired perf effects. I do not understand reasoning that thread_info
exactly has to fit inside one cacheline.

If this was always supposed to fit in a single cacheline, clearly this
invariant isn't/wasn't maintained as changes trickled in. I would like to see
what maintainers have to say or someone who did data analysis on this.


>
>I think we could do without user_sp in thread_info as well, so there are
>other packing options.

Sure, probably somewhere in task_struct. But fact of the matter is that it has
to be saved/restore during exception entry/exit. But then load/store to
task_struct is essentially a different cachline. Not sure what we will achieve
here?

>
>Btw. could ssp be added to pt_regs?

I had that earlier. It breaks user abi. And it was a no go.

>
>Thanks.
Re: [PATCH v12 05/28] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit
Posted by Radim Krčmář 9 months, 2 weeks ago
2025-04-24T10:56:34-07:00, Deepak Gupta <debug@rivosinc.com>:
> On Thu, Apr 24, 2025 at 01:52:43PM +0200, Radim Krčmář wrote:
>>2025-04-23T17:00:29-07:00, Deepak Gupta <debug@rivosinc.com>:
>>> On Thu, Apr 10, 2025 at 01:04:39PM +0200, Radim Krčmář wrote:
>>>>2025-03-14T14:39:24-07:00, Deepak Gupta <debug@rivosinc.com>:
>>>>> diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
>>>>> @@ -62,6 +62,9 @@ struct thread_info {
>>>>>  	long			user_sp;	/* User stack pointer */
>>>>>  	int			cpu;
>>>>>  	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
>>>>> +#ifdef CONFIG_RISCV_USER_CFI
>>>>> +	struct cfi_status	user_cfi_state;
>>>>> +#endif
>>>>
>>>>I don't think it makes sense to put all the data in thread_info.
>>>>kernel_ssp and user_ssp is more than enough and the rest can comfortably
>>>>live elsewhere in task_struct.
>>>>
>>>>thread_info is supposed to be as small as possible -- just spanning
>>>>multiple cache-lines could be noticeable.
>>>
>>> I can change it to only include only `user_ssp`, base and size.
>>
>>No need for base and size either -- we don't touch that in the common
>>exception code.
>
> got it.
>
>>
>>> But before we go there, see below:
>>>
>>> $ pahole -C thread_info kbuild/vmlinux
>>> struct thread_info {
>>>          long unsigned int          flags;                /*     0     8 */
>>>          int                        preempt_count;        /*     8     4 */
>>>
>>>          /* XXX 4 bytes hole, try to pack */
>>>
>>>          long int                   kernel_sp;            /*    16     8 */
>>>          long int                   user_sp;              /*    24     8 */
>>>          int                        cpu;                  /*    32     4 */
>>>
>>>          /* XXX 4 bytes hole, try to pack */
>>>
>>>          long unsigned int          syscall_work;         /*    40     8 */
>>>          struct cfi_status          user_cfi_state;       /*    48    32 */
>>>          /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
>>>          long unsigned int          a0;                   /*    80     8 */
>>>          long unsigned int          a1;                   /*    88     8 */
>>>          long unsigned int          a2;                   /*    96     8 */
>>>
>>>          /* size: 104, cachelines: 2, members: 10 */
>>>          /* sum members: 96, holes: 2, sum holes: 8 */
>>>          /* last cacheline: 40 bytes */
>>> };
>>>
>>> If we were to remove entire `cfi_status`, it would still be 72 bytes (88 bytes
>>> if shadow call stack were enabled) and already spans across two cachelines.
>>
>>It has only 64 bytes of data without shadow call stack, but it wasted 8
>>bytes on the holes.
>>a2 is somewhat an outlier that is not used most exception paths and
>>excluding it makes everything fit nicely even now.
>
> But we can't exclude shadow call stack. It'll lead to increased size if that
> config is selected. A solution has to work for all the cases and not half
> hearted effort.

We could drop a0 or user_sp and place the two ints next to each other,
saving at least 16 bytes.

(user_sp, a0, a1, and a2 are just temporary storage.  I think would be
 fine with just two temporaries + kernel_sp, to provide three registers
 for new_vmalloc_check and we never need more.)

>>> if shadow call stack were enabled) and already spans across two cachelines. I
>>> did see the comment above that it should fit inside a cacheline. Although I
>>> assumed its stale comment given that it already spans across cacheline and I
>>> didn't see any special mention in commit messages of changes which grew this
>>> structure above one cacheline. So I assumed this was a stale comment.
>>>
>>> On the other hand, whenever enable/lock bits are checked, there is a high
>>> likelyhood that user_ssp and other fields are going to be accessed and
>>> thus it actually might be helpful to have it all in one cacheline during
>>> runtime.
>>
>>Yes, although accessing enable/lock bits will be relatively rare.
>>It seems better to have the overhead during thread setup, rather than on
>>every trap.
>>
>>> So I am not sure if its helpful sticking to the comment which already is stale.
>>
>>We could fix the holes and also use sp instead of a0 in the
>>new_vmalloc_check, so everything would fit better.
>>
>>We are really close to fitting into a single cache-line, so I'd prefer
>>if shadow stack only filled thread_info with data that is used very
>>often in the exception handling code.
>
> I don't get what's the big deal if it results in two cachelines. We can
> (re)organize data structure in a way the most frequently accessed members are
> together in a single cacheline. We just need to find those members.

Yes, and because this patch is reorganizing the structure, I thought it
would be better to do the analysis now, rather than to incur additional
debt.

thread_info members are accessed during the first instructions after a
trap.  We want to maximize the chance that the execution doesn't stall
until uarch has time to engage its crystal ball.

> In the hot path of exception handling, I see accesses to pt_regs on stack as
> well. These are definitley different cacheline than thread_info.

Right, and we also access cache-lines for the code.

I don't know how well each uarch keeps the early trap data/code in
caches, but it doesn't seem like a bad idea to minimize the amount of
cache-lines that are accessed early after trap.

> I understand the argument of one member field crossing into two cachelines can
> have undesired perf effects. I do not understand reasoning that thread_info
> exactly has to fit inside one cacheline.

I agree that we could probably lift the constraint for some values --
it's a lot of performance modeling and convincing, though...

In this series, I think it would be good to avoid splitting kernel_sp
and a0/a1 into two cache-lines.  kernel_sp and a0/a1 are accessed within
the first few instructions.

> If this was always supposed to fit in a single cacheline, clearly this
> invariant isn't/wasn't maintained as changes trickled in. I would like to see
> what maintainers have to say or someone who did data analysis on this.

I don't think it is necessary to fix the rest, just not making things
worse is already great.

>>I think we could do without user_sp in thread_info as well, so there are
>>other packing options.
>
> Sure, probably somewhere in task_struct. But fact of the matter is that it has
> to be saved/restore during exception entry/exit. But then load/store to
> task_struct is essentially a different cachline. Not sure what we will achieve
> here?

user_sp is only temporarily storage space in thread_info.
The sp register is restored from pt_regs, so we could refactor the code
to drop user_sp from thread_info.

e.g. use a0, a1, or a2 for the temporary storage: user_sp is not even
the userspace sp, it is sp of the previous sp "user", which might have
been the kernel.

>>Btw. could ssp be added to pt_regs?
>
> I had that earlier. It breaks user abi. And it was a no go.

Thanks, I was afraid of that. :)

We might want to eventually push ssp to the stack to follow the same
design for trap nesting as sp has, but that can happen when implementing
ssp for the kernel.  Squeezing into thread_info should work for now.
Re: [PATCH v12 05/28] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit
Posted by Alexandre Ghiti 10 months, 1 week ago
On 14/03/2025 22:39, Deepak Gupta wrote:
> Carves out space in arch specific thread struct for cfi status and shadow
> stack in usermode on riscv.
>
> This patch does following
> - defines a new structure cfi_status with status bit for cfi feature
> - defines shadow stack pointer, base and size in cfi_status structure
> - defines offsets to new member fields in thread in asm-offsets.c
> - Saves and restore shadow stack pointer on trap entry (U --> S) and exit
>    (S --> U)
>
> Shadow stack save/restore is gated on feature availiblity and implemented
> using alternative. CSR can be context switched in `switch_to` as well but
> soon as kernel shadow stack support gets rolled in, shadow stack pointer
> will need to be switched at trap entry/exit point (much like `sp`). It can
> be argued that kernel using shadow stack deployment scenario may not be as
> prevalant as user mode using this feature. But even if there is some
> minimal deployment of kernel shadow stack, that means that it needs to be
> supported. And thus save/restore of shadow stack pointer in entry.S instead
> of in `switch_to.h`.
>
> Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
> Reviewed-by: Zong Li <zong.li@sifive.com>
> Signed-off-by: Deepak Gupta <debug@rivosinc.com>
> ---
>   arch/riscv/include/asm/processor.h   |  1 +
>   arch/riscv/include/asm/thread_info.h |  3 +++
>   arch/riscv/include/asm/usercfi.h     | 24 ++++++++++++++++++++++++
>   arch/riscv/kernel/asm-offsets.c      |  4 ++++
>   arch/riscv/kernel/entry.S            | 26 ++++++++++++++++++++++++++
>   5 files changed, 58 insertions(+)
>
> diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> index e3aba3336e63..d851bb5c6da0 100644
> --- a/arch/riscv/include/asm/processor.h
> +++ b/arch/riscv/include/asm/processor.h
> @@ -14,6 +14,7 @@
>   
>   #include <asm/ptrace.h>
>   #include <asm/hwcap.h>
> +#include <asm/usercfi.h>
>   
>   #define arch_get_mmap_end(addr, len, flags)			\
>   ({								\
> diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
> index f5916a70879a..a0cfe00c2ca6 100644
> --- a/arch/riscv/include/asm/thread_info.h
> +++ b/arch/riscv/include/asm/thread_info.h
> @@ -62,6 +62,9 @@ struct thread_info {
>   	long			user_sp;	/* User stack pointer */
>   	int			cpu;
>   	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
> +#ifdef CONFIG_RISCV_USER_CFI
> +	struct cfi_status	user_cfi_state;
> +#endif
>   #ifdef CONFIG_SHADOW_CALL_STACK
>   	void			*scs_base;
>   	void			*scs_sp;
> diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h
> new file mode 100644
> index 000000000000..5f2027c51917
> --- /dev/null
> +++ b/arch/riscv/include/asm/usercfi.h
> @@ -0,0 +1,24 @@
> +/* SPDX-License-Identifier: GPL-2.0
> + * Copyright (C) 2024 Rivos, Inc.
> + * Deepak Gupta <debug@rivosinc.com>
> + */
> +#ifndef _ASM_RISCV_USERCFI_H
> +#define _ASM_RISCV_USERCFI_H
> +
> +#ifndef __ASSEMBLY__
> +#include <linux/types.h>
> +
> +#ifdef CONFIG_RISCV_USER_CFI
> +struct cfi_status {
> +	unsigned long ubcfi_en : 1; /* Enable for backward cfi. */
> +	unsigned long rsvd : ((sizeof(unsigned long) * 8) - 1);
> +	unsigned long user_shdw_stk; /* Current user shadow stack pointer */
> +	unsigned long shdw_stk_base; /* Base address of shadow stack */
> +	unsigned long shdw_stk_size; /* size of shadow stack */
> +};
> +
> +#endif /* CONFIG_RISCV_USER_CFI */
> +
> +#endif /* __ASSEMBLY__ */
> +
> +#endif /* _ASM_RISCV_USERCFI_H */
> diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
> index e89455a6a0e5..0c188aaf3925 100644
> --- a/arch/riscv/kernel/asm-offsets.c
> +++ b/arch/riscv/kernel/asm-offsets.c
> @@ -50,6 +50,10 @@ void asm_offsets(void)
>   #endif
>   
>   	OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu);
> +#ifdef CONFIG_RISCV_USER_CFI
> +	OFFSET(TASK_TI_CFI_STATUS, task_struct, thread_info.user_cfi_state);
> +	OFFSET(TASK_TI_USER_SSP, task_struct, thread_info.user_cfi_state.user_shdw_stk);
> +#endif
>   	OFFSET(TASK_THREAD_F0,  task_struct, thread.fstate.f[0]);
>   	OFFSET(TASK_THREAD_F1,  task_struct, thread.fstate.f[1]);
>   	OFFSET(TASK_THREAD_F2,  task_struct, thread.fstate.f[2]);
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index 33a5a9f2a0d4..68c99124ea55 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -147,6 +147,20 @@ SYM_CODE_START(handle_exception)
>   
>   	REG_L s0, TASK_TI_USER_SP(tp)
>   	csrrc s1, CSR_STATUS, t0
> +	/*
> +	 * If previous mode was U, capture shadow stack pointer and save it away
> +	 * Zero CSR_SSP at the same time for sanitization.
> +	 */
> +	ALTERNATIVE("nop; nop; nop; nop",


You could use __nops(4) here instead.


> +				__stringify(			\
> +				andi s2, s1, SR_SPP;	\
> +				bnez s2, skip_ssp_save;	\
> +				csrrw s2, CSR_SSP, x0;	\
> +				REG_S s2, TASK_TI_USER_SSP(tp); \
> +				skip_ssp_save:),
> +				0,
> +				RISCV_ISA_EXT_ZICFISS,
> +				CONFIG_RISCV_USER_CFI)
>   	csrr s2, CSR_EPC
>   	csrr s3, CSR_TVAL
>   	csrr s4, CSR_CAUSE
> @@ -236,6 +250,18 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
>   	 * structures again.
>   	 */
>   	csrw CSR_SCRATCH, tp
> +
> +	/*
> +	 * Going back to U mode, restore shadow stack pointer
> +	 */
> +	ALTERNATIVE("nop; nop",


Ditto


> +				__stringify(					\
> +				REG_L s3, TASK_TI_USER_SSP(tp); \
> +				csrw CSR_SSP, s3),
> +				0,
> +				RISCV_ISA_EXT_ZICFISS,
> +				CONFIG_RISCV_USER_CFI)
> +
>   1:
>   #ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
>   	move a0, sp
>
Apart from the nits above, you can add:

Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>

Thanks,

Alex