[PATCH v4 5/8] arm64: probes: Add GCS support to bl/blr/ret

Jeremy Linton posted 8 patches 2 months, 2 weeks ago
There is a newer version of this series
[PATCH v4 5/8] arm64: probes: Add GCS support to bl/blr/ret
Posted by Jeremy Linton 2 months, 2 weeks ago
The arm64 probe simulation doesn't currently have logic in place
to deal with GCS and this results in core dumps if probes are inserted
at control flow locations. Fix-up bl, blr and ret to manipulate the
shadow stack as needed.

While we manipulate and validate the shadow stack correctly, the
hardware provides additional security by only allowing GCS operations
against pages which are marked to support GCS. For writing there is
gcssttr() which enforces this, but there isn't an equivalent for
reading. This means that uprobe users should be aware that probing on
control flow instructions which require reading the shadow stack (ex:
ret) offers lower security guarantees than what is achieved without
the uprobe active.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
---
 arch/arm64/kernel/probes/simulate-insn.c | 35 ++++++++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 09a0b36122d0..c75dce7bbe13 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -13,6 +13,7 @@
 #include <asm/traps.h>
 
 #include "simulate-insn.h"
+#include "asm/gcs.h"
 
 #define bbl_displacement(insn)		\
 	sign_extend32(((insn) & 0x3ffffff) << 2, 27)
@@ -49,6 +50,20 @@ static inline u32 get_w_reg(struct pt_regs *regs, int reg)
 	return lower_32_bits(pt_regs_read_reg(regs, reg));
 }
 
+static inline void update_lr(struct pt_regs *regs, long addr)
+{
+	int err = 0;
+
+	if (user_mode(regs) && task_gcs_el0_enabled(current)) {
+		push_user_gcs(addr + 4,	 &err);
+		if (err) {
+			force_sig(SIGSEGV);
+			return;
+		}
+	}
+	procedure_link_pointer_set(regs, addr + 4);
+}
+
 static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
 {
 	int xn = opcode & 0x1f;
@@ -107,9 +122,8 @@ simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs)
 {
 	int disp = bbl_displacement(opcode);
 
-	/* Link register is x30 */
 	if (opcode & (1 << 31))
-		set_x_reg(regs, 30, addr + 4);
+		update_lr(regs, addr);
 
 	instruction_pointer_set(regs, addr + disp);
 }
@@ -133,17 +147,26 @@ simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs)
 	/* update pc first in case we're doing a "blr lr" */
 	instruction_pointer_set(regs, get_x_reg(regs, xn));
 
-	/* Link register is x30 */
 	if (((opcode >> 21) & 0x3) == 1)
-		set_x_reg(regs, 30, addr + 4);
+		update_lr(regs, addr);
 }
 
 void __kprobes
 simulate_ret(u32 opcode, long addr, struct pt_regs *regs)
 {
-	int xn = (opcode >> 5) & 0x1f;
+	u64 ret_addr;
+	int err = 0;
+	unsigned long lr = procedure_link_pointer(regs);
 
-	instruction_pointer_set(regs, get_x_reg(regs, xn));
+	if (user_mode(regs) && task_gcs_el0_enabled(current)) {
+		ret_addr = pop_user_gcs(&err);
+		if (err || ret_addr != lr) {
+			force_sig(SIGSEGV);
+			return;
+		}
+	}
+
+	instruction_pointer_set(regs, lr);
 }
 
 void __kprobes
-- 
2.50.1
Re: [PATCH v4 5/8] arm64: probes: Add GCS support to bl/blr/ret
Posted by Catalin Marinas 2 months, 2 weeks ago
On Fri, Jul 18, 2025 at 11:37:37PM -0500, Jeremy Linton wrote:
> diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
> index 09a0b36122d0..c75dce7bbe13 100644
> --- a/arch/arm64/kernel/probes/simulate-insn.c
> +++ b/arch/arm64/kernel/probes/simulate-insn.c
> @@ -13,6 +13,7 @@
>  #include <asm/traps.h>
>  
>  #include "simulate-insn.h"
> +#include "asm/gcs.h"
>  
>  #define bbl_displacement(insn)		\
>  	sign_extend32(((insn) & 0x3ffffff) << 2, 27)
> @@ -49,6 +50,20 @@ static inline u32 get_w_reg(struct pt_regs *regs, int reg)
>  	return lower_32_bits(pt_regs_read_reg(regs, reg));
>  }
>  
> +static inline void update_lr(struct pt_regs *regs, long addr)
> +{
> +	int err = 0;
> +
> +	if (user_mode(regs) && task_gcs_el0_enabled(current)) {
> +		push_user_gcs(addr + 4,	 &err);
> +		if (err) {
> +			force_sig(SIGSEGV);
> +			return;
> +		}
> +	}
> +	procedure_link_pointer_set(regs, addr + 4);
> +}
> +
>  static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
>  {
>  	int xn = opcode & 0x1f;
> @@ -107,9 +122,8 @@ simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs)
>  {
>  	int disp = bbl_displacement(opcode);
>  
> -	/* Link register is x30 */
>  	if (opcode & (1 << 31))
> -		set_x_reg(regs, 30, addr + 4);
> +		update_lr(regs, addr);

Why not pass (addr + 4) here and skip the addition in update_lr()?

>  
>  	instruction_pointer_set(regs, addr + disp);
>  }
> @@ -133,17 +147,26 @@ simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs)
>  	/* update pc first in case we're doing a "blr lr" */
>  	instruction_pointer_set(regs, get_x_reg(regs, xn));
>  
> -	/* Link register is x30 */
>  	if (((opcode >> 21) & 0x3) == 1)
> -		set_x_reg(regs, 30, addr + 4);
> +		update_lr(regs, addr);
>  }

I can see why this function was originally updating PC (in case of a blr
lr) but updating the LR was not supposed to fail. With GCS, I think we
should follow similar logic to simulate_b_bl() and skip updating PC/LR
if the write to the GCS failed (assuming that's what the hardware does,
I haven't checked the spec).

>  void __kprobes
>  simulate_ret(u32 opcode, long addr, struct pt_regs *regs)
>  {
> -	int xn = (opcode >> 5) & 0x1f;
> +	u64 ret_addr;
> +	int err = 0;
> +	unsigned long lr = procedure_link_pointer(regs);
>  
> -	instruction_pointer_set(regs, get_x_reg(regs, xn));
> +	if (user_mode(regs) && task_gcs_el0_enabled(current)) {
> +		ret_addr = pop_user_gcs(&err);
> +		if (err || ret_addr != lr) {
> +			force_sig(SIGSEGV);
> +			return;
> +		}
> +	}
> +
> +	instruction_pointer_set(regs, lr);
>  }

What happened to the RET Xn case?

-- 
Catalin
Re: [PATCH v4 5/8] arm64: probes: Add GCS support to bl/blr/ret
Posted by Jeremy Linton 2 months, 2 weeks ago
Hi,


Thanks for catching a bug!

On 7/23/25 5:00 AM, Catalin Marinas wrote:
> On Fri, Jul 18, 2025 at 11:37:37PM -0500, Jeremy Linton wrote:
>> diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
>> index 09a0b36122d0..c75dce7bbe13 100644
>> --- a/arch/arm64/kernel/probes/simulate-insn.c
>> +++ b/arch/arm64/kernel/probes/simulate-insn.c
>> @@ -13,6 +13,7 @@
>>   #include <asm/traps.h>
>>   
>>   #include "simulate-insn.h"
>> +#include "asm/gcs.h"
>>   
>>   #define bbl_displacement(insn)		\
>>   	sign_extend32(((insn) & 0x3ffffff) << 2, 27)
>> @@ -49,6 +50,20 @@ static inline u32 get_w_reg(struct pt_regs *regs, int reg)
>>   	return lower_32_bits(pt_regs_read_reg(regs, reg));
>>   }
>>   
>> +static inline void update_lr(struct pt_regs *regs, long addr)
>> +{
>> +	int err = 0;
>> +
>> +	if (user_mode(regs) && task_gcs_el0_enabled(current)) {
>> +		push_user_gcs(addr + 4,	 &err);
>> +		if (err) {
>> +			force_sig(SIGSEGV);
>> +			return;
>> +		}
>> +	}
>> +	procedure_link_pointer_set(regs, addr + 4);
>> +}
>> +
>>   static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
>>   {
>>   	int xn = opcode & 0x1f;
>> @@ -107,9 +122,8 @@ simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs)
>>   {
>>   	int disp = bbl_displacement(opcode);
>>   
>> -	/* Link register is x30 */
>>   	if (opcode & (1 << 31))
>> -		set_x_reg(regs, 30, addr + 4);
>> +		update_lr(regs, addr);
> 
> Why not pass (addr + 4) here and skip the addition in update_lr()?

Seemed to make more sense to do the adds in the function using the 
offset values.
> 
>>   
>>   	instruction_pointer_set(regs, addr + disp);
>>   }
>> @@ -133,17 +147,26 @@ simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs)
>>   	/* update pc first in case we're doing a "blr lr" */
>>   	instruction_pointer_set(regs, get_x_reg(regs, xn));
>>   
>> -	/* Link register is x30 */
>>   	if (((opcode >> 21) & 0x3) == 1)
>> -		set_x_reg(regs, 30, addr + 4);
>> +		update_lr(regs, addr);
>>   }
> 
> I can see why this function was originally updating PC (in case of a blr
> lr) but updating the LR was not supposed to fail. With GCS, I think we
> should follow similar logic to simulate_b_bl() and skip updating PC/LR
> if the write to the GCS failed (assuming that's what the hardware does,
> I haven't checked the spec).

Yes I 'fixed' this in simulate ret below, which is probably when I 
dropped the xn because I was just testing it with compiled code that was 
always using lr.


> 
>>   void __kprobes
>>   simulate_ret(u32 opcode, long addr, struct pt_regs *regs)
>>   {
>> -	int xn = (opcode >> 5) & 0x1f;
>> +	u64 ret_addr;
>> +	int err = 0;
>> +	unsigned long lr = procedure_link_pointer(regs);
>>   
>> -	instruction_pointer_set(regs, get_x_reg(regs, xn));
>> +	if (user_mode(regs) && task_gcs_el0_enabled(current)) {
>> +		ret_addr = pop_user_gcs(&err);
>> +		if (err || ret_addr != lr) {
>> +			force_sig(SIGSEGV);
>> +			return;
>> +		}
>> +	}
>> +
>> +	instruction_pointer_set(regs, lr);
>>   }
> 
> What happened to the RET Xn case?
> 

I broke it because my testing code/app never generated it.
Re: [PATCH v4 5/8] arm64: probes: Add GCS support to bl/blr/ret
Posted by Mark Brown 2 months, 2 weeks ago
On Wed, Jul 23, 2025 at 11:00:33AM +0100, Catalin Marinas wrote:
> On Fri, Jul 18, 2025 at 11:37:37PM -0500, Jeremy Linton wrote:

> > @@ -133,17 +147,26 @@ simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs)
> >  	/* update pc first in case we're doing a "blr lr" */
> >  	instruction_pointer_set(regs, get_x_reg(regs, xn));
> >  
> > -	/* Link register is x30 */
> >  	if (((opcode >> 21) & 0x3) == 1)
> > -		set_x_reg(regs, 30, addr + 4);
> > +		update_lr(regs, addr);
> >  }

> I can see why this function was originally updating PC (in case of a blr
> lr) but updating the LR was not supposed to fail. With GCS, I think we
> should follow similar logic to simulate_b_bl() and skip updating PC/LR
> if the write to the GCS failed (assuming that's what the hardware does,
> I haven't checked the spec).

Yes, the pseudocode does the GCS validation before it starts updating
BTYPE or any of the registers.