The arm64 probe simulation doesn't currently have logic in place
to deal with GCS and this results in core dumps if probes are inserted
at control flow locations. Fix-up bl, blr and ret to manipulate the
shadow stack as needed.
While we manipulate and validate the shadow stack correctly, the
hardware provides additional security by only allowing GCS operations
against pages which are marked to support GCS. For writing there is
gcssttr() which enforces this, but there isn't an equivalent for
reading. This means that uprobe users should be aware that probing on
control flow instructions which require reading the shadow stack (ex:
ret) offers lower security guarantees than what is achieved without
the uprobe active.
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
---
arch/arm64/kernel/probes/simulate-insn.c | 35 ++++++++++++++++++++----
1 file changed, 29 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 09a0b36122d0..c75dce7bbe13 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -13,6 +13,7 @@
#include <asm/traps.h>
#include "simulate-insn.h"
+#include "asm/gcs.h"
#define bbl_displacement(insn) \
sign_extend32(((insn) & 0x3ffffff) << 2, 27)
@@ -49,6 +50,20 @@ static inline u32 get_w_reg(struct pt_regs *regs, int reg)
return lower_32_bits(pt_regs_read_reg(regs, reg));
}
+static inline void update_lr(struct pt_regs *regs, long addr)
+{
+ int err = 0;
+
+ if (user_mode(regs) && task_gcs_el0_enabled(current)) {
+ push_user_gcs(addr + 4, &err);
+ if (err) {
+ force_sig(SIGSEGV);
+ return;
+ }
+ }
+ procedure_link_pointer_set(regs, addr + 4);
+}
+
static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
{
int xn = opcode & 0x1f;
@@ -107,9 +122,8 @@ simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs)
{
int disp = bbl_displacement(opcode);
- /* Link register is x30 */
if (opcode & (1 << 31))
- set_x_reg(regs, 30, addr + 4);
+ update_lr(regs, addr);
instruction_pointer_set(regs, addr + disp);
}
@@ -133,17 +147,26 @@ simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs)
/* update pc first in case we're doing a "blr lr" */
instruction_pointer_set(regs, get_x_reg(regs, xn));
- /* Link register is x30 */
if (((opcode >> 21) & 0x3) == 1)
- set_x_reg(regs, 30, addr + 4);
+ update_lr(regs, addr);
}
void __kprobes
simulate_ret(u32 opcode, long addr, struct pt_regs *regs)
{
- int xn = (opcode >> 5) & 0x1f;
+ u64 ret_addr;
+ int err = 0;
+ unsigned long lr = procedure_link_pointer(regs);
- instruction_pointer_set(regs, get_x_reg(regs, xn));
+ if (user_mode(regs) && task_gcs_el0_enabled(current)) {
+ ret_addr = pop_user_gcs(&err);
+ if (err || ret_addr != lr) {
+ force_sig(SIGSEGV);
+ return;
+ }
+ }
+
+ instruction_pointer_set(regs, lr);
}
void __kprobes
--
2.50.1
On Fri, Jul 18, 2025 at 11:37:37PM -0500, Jeremy Linton wrote:
> diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
> index 09a0b36122d0..c75dce7bbe13 100644
> --- a/arch/arm64/kernel/probes/simulate-insn.c
> +++ b/arch/arm64/kernel/probes/simulate-insn.c
> @@ -13,6 +13,7 @@
> #include <asm/traps.h>
>
> #include "simulate-insn.h"
> +#include "asm/gcs.h"
>
> #define bbl_displacement(insn) \
> sign_extend32(((insn) & 0x3ffffff) << 2, 27)
> @@ -49,6 +50,20 @@ static inline u32 get_w_reg(struct pt_regs *regs, int reg)
> return lower_32_bits(pt_regs_read_reg(regs, reg));
> }
>
> +static inline void update_lr(struct pt_regs *regs, long addr)
> +{
> + int err = 0;
> +
> + if (user_mode(regs) && task_gcs_el0_enabled(current)) {
> + push_user_gcs(addr + 4, &err);
> + if (err) {
> + force_sig(SIGSEGV);
> + return;
> + }
> + }
> + procedure_link_pointer_set(regs, addr + 4);
> +}
> +
> static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
> {
> int xn = opcode & 0x1f;
> @@ -107,9 +122,8 @@ simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs)
> {
> int disp = bbl_displacement(opcode);
>
> - /* Link register is x30 */
> if (opcode & (1 << 31))
> - set_x_reg(regs, 30, addr + 4);
> + update_lr(regs, addr);
Why not pass (addr + 4) here and skip the addition in update_lr()?
>
> instruction_pointer_set(regs, addr + disp);
> }
> @@ -133,17 +147,26 @@ simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs)
> /* update pc first in case we're doing a "blr lr" */
> instruction_pointer_set(regs, get_x_reg(regs, xn));
>
> - /* Link register is x30 */
> if (((opcode >> 21) & 0x3) == 1)
> - set_x_reg(regs, 30, addr + 4);
> + update_lr(regs, addr);
> }
I can see why this function was originally updating PC (in case of a blr
lr) but updating the LR was not supposed to fail. With GCS, I think we
should follow similar logic to simulate_b_bl() and skip updating PC/LR
if the write to the GCS failed (assuming that's what the hardware does,
I haven't checked the spec).
> void __kprobes
> simulate_ret(u32 opcode, long addr, struct pt_regs *regs)
> {
> - int xn = (opcode >> 5) & 0x1f;
> + u64 ret_addr;
> + int err = 0;
> + unsigned long lr = procedure_link_pointer(regs);
>
> - instruction_pointer_set(regs, get_x_reg(regs, xn));
> + if (user_mode(regs) && task_gcs_el0_enabled(current)) {
> + ret_addr = pop_user_gcs(&err);
> + if (err || ret_addr != lr) {
> + force_sig(SIGSEGV);
> + return;
> + }
> + }
> +
> + instruction_pointer_set(regs, lr);
> }
What happened to the RET Xn case?
--
Catalin
Hi,
Thanks for catching a bug!
On 7/23/25 5:00 AM, Catalin Marinas wrote:
> On Fri, Jul 18, 2025 at 11:37:37PM -0500, Jeremy Linton wrote:
>> diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
>> index 09a0b36122d0..c75dce7bbe13 100644
>> --- a/arch/arm64/kernel/probes/simulate-insn.c
>> +++ b/arch/arm64/kernel/probes/simulate-insn.c
>> @@ -13,6 +13,7 @@
>> #include <asm/traps.h>
>>
>> #include "simulate-insn.h"
>> +#include "asm/gcs.h"
>>
>> #define bbl_displacement(insn) \
>> sign_extend32(((insn) & 0x3ffffff) << 2, 27)
>> @@ -49,6 +50,20 @@ static inline u32 get_w_reg(struct pt_regs *regs, int reg)
>> return lower_32_bits(pt_regs_read_reg(regs, reg));
>> }
>>
>> +static inline void update_lr(struct pt_regs *regs, long addr)
>> +{
>> + int err = 0;
>> +
>> + if (user_mode(regs) && task_gcs_el0_enabled(current)) {
>> + push_user_gcs(addr + 4, &err);
>> + if (err) {
>> + force_sig(SIGSEGV);
>> + return;
>> + }
>> + }
>> + procedure_link_pointer_set(regs, addr + 4);
>> +}
>> +
>> static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
>> {
>> int xn = opcode & 0x1f;
>> @@ -107,9 +122,8 @@ simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs)
>> {
>> int disp = bbl_displacement(opcode);
>>
>> - /* Link register is x30 */
>> if (opcode & (1 << 31))
>> - set_x_reg(regs, 30, addr + 4);
>> + update_lr(regs, addr);
>
> Why not pass (addr + 4) here and skip the addition in update_lr()?
Seemed to make more sense to do the adds in the function using the
offset values.
>
>>
>> instruction_pointer_set(regs, addr + disp);
>> }
>> @@ -133,17 +147,26 @@ simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs)
>> /* update pc first in case we're doing a "blr lr" */
>> instruction_pointer_set(regs, get_x_reg(regs, xn));
>>
>> - /* Link register is x30 */
>> if (((opcode >> 21) & 0x3) == 1)
>> - set_x_reg(regs, 30, addr + 4);
>> + update_lr(regs, addr);
>> }
>
> I can see why this function was originally updating PC (in case of a blr
> lr) but updating the LR was not supposed to fail. With GCS, I think we
> should follow similar logic to simulate_b_bl() and skip updating PC/LR
> if the write to the GCS failed (assuming that's what the hardware does,
> I haven't checked the spec).
Yes I 'fixed' this in simulate ret below, which is probably when I
dropped the xn because I was just testing it with compiled code that was
always using lr.
>
>> void __kprobes
>> simulate_ret(u32 opcode, long addr, struct pt_regs *regs)
>> {
>> - int xn = (opcode >> 5) & 0x1f;
>> + u64 ret_addr;
>> + int err = 0;
>> + unsigned long lr = procedure_link_pointer(regs);
>>
>> - instruction_pointer_set(regs, get_x_reg(regs, xn));
>> + if (user_mode(regs) && task_gcs_el0_enabled(current)) {
>> + ret_addr = pop_user_gcs(&err);
>> + if (err || ret_addr != lr) {
>> + force_sig(SIGSEGV);
>> + return;
>> + }
>> + }
>> +
>> + instruction_pointer_set(regs, lr);
>> }
>
> What happened to the RET Xn case?
>
I broke it because my testing code/app never generated it.
On Wed, Jul 23, 2025 at 11:00:33AM +0100, Catalin Marinas wrote: > On Fri, Jul 18, 2025 at 11:37:37PM -0500, Jeremy Linton wrote: > > @@ -133,17 +147,26 @@ simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs) > > /* update pc first in case we're doing a "blr lr" */ > > instruction_pointer_set(regs, get_x_reg(regs, xn)); > > > > - /* Link register is x30 */ > > if (((opcode >> 21) & 0x3) == 1) > > - set_x_reg(regs, 30, addr + 4); > > + update_lr(regs, addr); > > } > I can see why this function was originally updating PC (in case of a blr > lr) but updating the LR was not supposed to fail. With GCS, I think we > should follow similar logic to simulate_b_bl() and skip updating PC/LR > if the write to the GCS failed (assuming that's what the hardware does, > I haven't checked the spec). Yes, the pseudocode does the GCS validation before it starts updating BTYPE or any of the registers.
© 2016 - 2025 Red Hat, Inc.