[PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.

Dylan Hatch posted 2 patches 10 months ago
[PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Dylan Hatch 10 months ago
To enable late module patching, livepatch modules need to be able to
apply some of their relocations well after being loaded. In this
scenario, use the text-poking API to allow this, even with
STRICT_MODULE_RWX.

This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
text_poke() for late relocations").

Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
 arch/arm64/kernel/module.c | 129 ++++++++++++++++++++++++-------------
 1 file changed, 83 insertions(+), 46 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 06bb680bfe975..0703502d9dc37 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -18,11 +18,13 @@
 #include <linux/moduleloader.h>
 #include <linux/random.h>
 #include <linux/scs.h>
+#include <linux/memory.h>
 
 #include <asm/alternative.h>
 #include <asm/insn.h>
 #include <asm/scs.h>
 #include <asm/sections.h>
+#include <asm/text-patching.h>
 
 enum aarch64_reloc_op {
 	RELOC_OP_NONE,
@@ -48,7 +50,8 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
 	return 0;
 }
 
-static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
+static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len,
+		      void *(*write)(void *dest, const void *src, size_t len))
 {
 	s64 sval = do_reloc(op, place, val);
 
@@ -66,7 +69,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
 
 	switch (len) {
 	case 16:
-		*(s16 *)place = sval;
+		write(place, &sval, sizeof(s16));
 		switch (op) {
 		case RELOC_OP_ABS:
 			if (sval < 0 || sval > U16_MAX)
@@ -82,7 +85,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
 		}
 		break;
 	case 32:
-		*(s32 *)place = sval;
+		write(place, &sval, sizeof(s32));
 		switch (op) {
 		case RELOC_OP_ABS:
 			if (sval < 0 || sval > U32_MAX)
@@ -98,7 +101,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
 		}
 		break;
 	case 64:
-		*(s64 *)place = sval;
+		write(place, &sval, sizeof(s64));
 		break;
 	default:
 		pr_err("Invalid length (%d) for data relocation\n", len);
@@ -113,11 +116,13 @@ enum aarch64_insn_movw_imm_type {
 };
 
 static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
-			   int lsb, enum aarch64_insn_movw_imm_type imm_type)
+			   int lsb, enum aarch64_insn_movw_imm_type imm_type,
+			   void *(*write)(void *dest, const void *src, size_t len))
 {
 	u64 imm;
 	s64 sval;
 	u32 insn = le32_to_cpu(*place);
+	__le32 le_insn;
 
 	sval = do_reloc(op, place, val);
 	imm = sval >> lsb;
@@ -145,7 +150,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
 
 	/* Update the instruction with the new encoding. */
 	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
-	*place = cpu_to_le32(insn);
+	le_insn = cpu_to_le32(insn);
+	write(place, &le_insn, sizeof(le_insn));
 
 	if (imm > U16_MAX)
 		return -ERANGE;
@@ -154,11 +160,13 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
 }
 
 static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
-			  int lsb, int len, enum aarch64_insn_imm_type imm_type)
+			  int lsb, int len, enum aarch64_insn_imm_type imm_type,
+			  void *(*write)(void *dest, const void *src, size_t len))
 {
 	u64 imm, imm_mask;
 	s64 sval;
 	u32 insn = le32_to_cpu(*place);
+	__le32 le_insn;
 
 	/* Calculate the relocation value. */
 	sval = do_reloc(op, place, val);
@@ -170,7 +178,8 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
 
 	/* Update the instruction's immediate field. */
 	insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
-	*place = cpu_to_le32(insn);
+	le_insn = cpu_to_le32(insn);
+	write(place, &le_insn, sizeof(le_insn));
 
 	/*
 	 * Extract the upper value bits (including the sign bit) and
@@ -189,17 +198,19 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
 }
 
 static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
-			   __le32 *place, u64 val)
+			   __le32 *place, u64 val,
+			   void *(*write)(void *dest, const void *src, size_t len))
 {
 	u32 insn;
+	__le32 le_insn;
 
 	if (!is_forbidden_offset_for_adrp(place))
 		return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
-				      AARCH64_INSN_IMM_ADR);
+				      AARCH64_INSN_IMM_ADR, write);
 
 	/* patch ADRP to ADR if it is in range */
 	if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
-			    AARCH64_INSN_IMM_ADR)) {
+			    AARCH64_INSN_IMM_ADR, write)) {
 		insn = le32_to_cpu(*place);
 		insn &= ~BIT(31);
 	} else {
@@ -211,15 +222,17 @@ static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
 						   AARCH64_INSN_BRANCH_NOLINK);
 	}
 
-	*place = cpu_to_le32(insn);
+	le_insn = cpu_to_le32(insn);
+	write(place, &le_insn, sizeof(le_insn));
 	return 0;
 }
 
-int apply_relocate_add(Elf64_Shdr *sechdrs,
+static int __apply_relocate_add(Elf64_Shdr *sechdrs,
 		       const char *strtab,
 		       unsigned int symindex,
 		       unsigned int relsec,
-		       struct module *me)
+		       struct module *me,
+		       void *(*write)(void *dest, const void *src, size_t len))
 {
 	unsigned int i;
 	int ovf;
@@ -255,23 +268,23 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		/* Data relocations. */
 		case R_AARCH64_ABS64:
 			overflow_check = false;
-			ovf = reloc_data(RELOC_OP_ABS, loc, val, 64);
+			ovf = reloc_data(RELOC_OP_ABS, loc, val, 64, write);
 			break;
 		case R_AARCH64_ABS32:
-			ovf = reloc_data(RELOC_OP_ABS, loc, val, 32);
+			ovf = reloc_data(RELOC_OP_ABS, loc, val, 32, write);
 			break;
 		case R_AARCH64_ABS16:
-			ovf = reloc_data(RELOC_OP_ABS, loc, val, 16);
+			ovf = reloc_data(RELOC_OP_ABS, loc, val, 16, write);
 			break;
 		case R_AARCH64_PREL64:
 			overflow_check = false;
-			ovf = reloc_data(RELOC_OP_PREL, loc, val, 64);
+			ovf = reloc_data(RELOC_OP_PREL, loc, val, 64, write);
 			break;
 		case R_AARCH64_PREL32:
-			ovf = reloc_data(RELOC_OP_PREL, loc, val, 32);
+			ovf = reloc_data(RELOC_OP_PREL, loc, val, 32, write);
 			break;
 		case R_AARCH64_PREL16:
-			ovf = reloc_data(RELOC_OP_PREL, loc, val, 16);
+			ovf = reloc_data(RELOC_OP_PREL, loc, val, 16, write);
 			break;
 
 		/* MOVW instruction relocations. */
@@ -280,88 +293,88 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			fallthrough;
 		case R_AARCH64_MOVW_UABS_G0:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_UABS_G1_NC:
 			overflow_check = false;
 			fallthrough;
 		case R_AARCH64_MOVW_UABS_G1:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_UABS_G2_NC:
 			overflow_check = false;
 			fallthrough;
 		case R_AARCH64_MOVW_UABS_G2:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_UABS_G3:
 			/* We're using the top bits so we can't overflow. */
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_SABS_G0:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 		case R_AARCH64_MOVW_SABS_G1:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 		case R_AARCH64_MOVW_SABS_G2:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G0_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G0:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G1_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G1:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G2_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G2:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G3:
 			/* We're using the top bits so we can't overflow. */
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 
 		/* Immediate instruction relocations. */
 		case R_AARCH64_LD_PREL_LO19:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
-					     AARCH64_INSN_IMM_19);
+					     AARCH64_INSN_IMM_19, write);
 			break;
 		case R_AARCH64_ADR_PREL_LO21:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
-					     AARCH64_INSN_IMM_ADR);
+					     AARCH64_INSN_IMM_ADR, write);
 			break;
 		case R_AARCH64_ADR_PREL_PG_HI21_NC:
 			overflow_check = false;
 			fallthrough;
 		case R_AARCH64_ADR_PREL_PG_HI21:
-			ovf = reloc_insn_adrp(me, sechdrs, loc, val);
+			ovf = reloc_insn_adrp(me, sechdrs, loc, val, write);
 			if (ovf && ovf != -ERANGE)
 				return ovf;
 			break;
@@ -369,46 +382,46 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_AARCH64_LDST8_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12,
-					     AARCH64_INSN_IMM_12);
+					     AARCH64_INSN_IMM_12, write);
 			break;
 		case R_AARCH64_LDST16_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11,
-					     AARCH64_INSN_IMM_12);
+					     AARCH64_INSN_IMM_12, write);
 			break;
 		case R_AARCH64_LDST32_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10,
-					     AARCH64_INSN_IMM_12);
+					     AARCH64_INSN_IMM_12, write);
 			break;
 		case R_AARCH64_LDST64_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9,
-					     AARCH64_INSN_IMM_12);
+					     AARCH64_INSN_IMM_12, write);
 			break;
 		case R_AARCH64_LDST128_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8,
-					     AARCH64_INSN_IMM_12);
+					     AARCH64_INSN_IMM_12, write);
 			break;
 		case R_AARCH64_TSTBR14:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14,
-					     AARCH64_INSN_IMM_14);
+					     AARCH64_INSN_IMM_14, write);
 			break;
 		case R_AARCH64_CONDBR19:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
-					     AARCH64_INSN_IMM_19);
+					     AARCH64_INSN_IMM_19, write);
 			break;
 		case R_AARCH64_JUMP26:
 		case R_AARCH64_CALL26:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
-					     AARCH64_INSN_IMM_26);
+					     AARCH64_INSN_IMM_26, write);
 			if (ovf == -ERANGE) {
 				val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
 				if (!val)
 					return -ENOEXEC;
 				ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
-						     26, AARCH64_INSN_IMM_26);
+						     26, AARCH64_INSN_IMM_26, write);
 			}
 			break;
 
@@ -431,6 +444,30 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 	return -ENOEXEC;
 }
 
+int apply_relocate_add(Elf64_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	int ret;
+	bool early = me->state == MODULE_STATE_UNFORMED;
+	void *(*write)(void *, const void *, size_t) = memcpy;
+
+	if (!early) {
+		write = text_poke;
+		mutex_lock(&text_mutex);
+	}
+
+	ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
+				   write);
+
+	if (!early)
+		mutex_unlock(&text_mutex);
+
+	return ret;
+}
+
 static inline void __init_plt(struct plt_entry *plt, unsigned long addr)
 {
 	*plt = get_plt_entry(addr, plt);
-- 
2.49.0.604.gff1f9ca942-goog
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Will Deacon 9 months ago
Hi Dylan,

On Sat, Apr 12, 2025 at 01:09:39AM +0000, Dylan Hatch wrote:
> To enable late module patching, livepatch modules need to be able to
> apply some of their relocations well after being loaded. In this
> scenario, use the text-poking API to allow this, even with
> STRICT_MODULE_RWX.
> 
> This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> text_poke() for late relocations").
> 
> Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> ---
>  arch/arm64/kernel/module.c | 129 ++++++++++++++++++++++++-------------
>  1 file changed, 83 insertions(+), 46 deletions(-)

On its own, this isn't gaining us an awful lot upstream as we don't have
livepatch support (arm64 does not select HAVE_LIVEPATCH), however I'm
not against incremental changes towards enabling that. Are you planning
to work on follow-up changes for the rest of the support?

> diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
> index 06bb680bfe975..0703502d9dc37 100644
> --- a/arch/arm64/kernel/module.c
> +++ b/arch/arm64/kernel/module.c
> @@ -18,11 +18,13 @@
>  #include <linux/moduleloader.h>
>  #include <linux/random.h>
>  #include <linux/scs.h>
> +#include <linux/memory.h>
>  
>  #include <asm/alternative.h>
>  #include <asm/insn.h>
>  #include <asm/scs.h>
>  #include <asm/sections.h>
> +#include <asm/text-patching.h>
>  
>  enum aarch64_reloc_op {
>  	RELOC_OP_NONE,
> @@ -48,7 +50,8 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
>  	return 0;
>  }
>  
> -static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
> +static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len,
> +		      void *(*write)(void *dest, const void *src, size_t len))
>  {

I think it's a bit grotty indirecting the write when in reality we either
need a straight-forward assignment (like we have today) or a call to
an instruction-patching helper.

In particular, when you get to functions such as:

>  static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
> -			   int lsb, enum aarch64_insn_movw_imm_type imm_type)
> +			   int lsb, enum aarch64_insn_movw_imm_type imm_type,
> +			   void *(*write)(void *dest, const void *src, size_t len))
>  {
>  	u64 imm;
>  	s64 sval;
>  	u32 insn = le32_to_cpu(*place);
> +	__le32 le_insn;
>  
>  	sval = do_reloc(op, place, val);
>  	imm = sval >> lsb;
> @@ -145,7 +150,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
>  
>  	/* Update the instruction with the new encoding. */
>  	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
> -	*place = cpu_to_le32(insn);
> +	le_insn = cpu_to_le32(insn);
> +	write(place, &le_insn, sizeof(le_insn));

we're forced into passing &le_insn because we need the same function
prototype as memcpy().

Instead, how about we pass the 'struct module *mod' pointer down from
apply_relocate_add()? That's already done for reloc_insn_adrp() and it
would mean that the above could be written as:


static int reloc_insn_movw(struct module *mod, enum aarch64_reloc_op op,
			   __le32 *place, u64 val, int lsb,
			   enum aarch64_insn_movw_imm_type imm_type)
{
	...

	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
	insn = cpu_to_le32(insn);

	if (mod->state != MODULE_STATE_UNFORMED)
		aarch64_insn_set(place, insn, sizeof(insn));
	else
		*place = insn;


meaning we can also drop the first patch, because I don't think we need
a text_poke() helper.

> +int apply_relocate_add(Elf64_Shdr *sechdrs,
> +		       const char *strtab,
> +		       unsigned int symindex,
> +		       unsigned int relsec,
> +		       struct module *me)
> +{
> +	int ret;
> +	bool early = me->state == MODULE_STATE_UNFORMED;
> +	void *(*write)(void *, const void *, size_t) = memcpy;
> +
> +	if (!early) {
> +		write = text_poke;
> +		mutex_lock(&text_mutex);
> +	}
> +
> +	ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
> +				   write);
> +
> +	if (!early)
> +		mutex_unlock(&text_mutex);

Why is the responsibility of the arch code to take the 'text_mutex' here?
The core code should be able to do that when the module state is !=
MODULE_STATE_UNFORMED.

Cheers,

Will
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Dylan Hatch 8 months, 2 weeks ago
Hi Will,

I've sent a v4 of these patches that should address your comments,
feel free to have a look.

On Fri, May 9, 2025 at 9:15 AM Will Deacon <will@kernel.org> wrote:
>
> Hi Dylan,
>
> On Sat, Apr 12, 2025 at 01:09:39AM +0000, Dylan Hatch wrote:
> > To enable late module patching, livepatch modules need to be able to
> > apply some of their relocations well after being loaded. In this
> > scenario, use the text-poking API to allow this, even with
> > STRICT_MODULE_RWX.
> >
> > This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> > text_poke() for late relocations").
> >
> > Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> > ---
> >  arch/arm64/kernel/module.c | 129 ++++++++++++++++++++++++-------------
> >  1 file changed, 83 insertions(+), 46 deletions(-)
>
> On its own, this isn't gaining us an awful lot upstream as we don't have
> livepatch support (arm64 does not select HAVE_LIVEPATCH), however I'm
> not against incremental changes towards enabling that. Are you planning
> to work on follow-up changes for the rest of the support?

As Song mentioned, hopefully
https://lore.kernel.org/linux-arm-kernel/20250320171559.3423224-1-song@kernel.org/
in combination with this patch should be enough for initial support of
livepatch on arm64. I'll need to follow up with Weinan on next steps
for the SFrame approach.

>
> > diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
> > index 06bb680bfe975..0703502d9dc37 100644
> > --- a/arch/arm64/kernel/module.c
> > +++ b/arch/arm64/kernel/module.c
> > @@ -18,11 +18,13 @@
> >  #include <linux/moduleloader.h>
> >  #include <linux/random.h>
> >  #include <linux/scs.h>
> > +#include <linux/memory.h>
> >
> >  #include <asm/alternative.h>
> >  #include <asm/insn.h>
> >  #include <asm/scs.h>
> >  #include <asm/sections.h>
> > +#include <asm/text-patching.h>
> >
> >  enum aarch64_reloc_op {
> >       RELOC_OP_NONE,
> > @@ -48,7 +50,8 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
> >       return 0;
> >  }
> >
> > -static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
> > +static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len,
> > +                   void *(*write)(void *dest, const void *src, size_t len))
> >  {
>
> I think it's a bit grotty indirecting the write when in reality we either
> need a straight-forward assignment (like we have today) or a call to
> an instruction-patching helper.
>
> In particular, when you get to functions such as:
>
> >  static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
> > -                        int lsb, enum aarch64_insn_movw_imm_type imm_type)
> > +                        int lsb, enum aarch64_insn_movw_imm_type imm_type,
> > +                        void *(*write)(void *dest, const void *src, size_t len))
> >  {
> >       u64 imm;
> >       s64 sval;
> >       u32 insn = le32_to_cpu(*place);
> > +     __le32 le_insn;
> >
> >       sval = do_reloc(op, place, val);
> >       imm = sval >> lsb;
> > @@ -145,7 +150,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
> >
> >       /* Update the instruction with the new encoding. */
> >       insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
> > -     *place = cpu_to_le32(insn);
> > +     le_insn = cpu_to_le32(insn);
> > +     write(place, &le_insn, sizeof(le_insn));
>
> we're forced into passing &le_insn because we need the same function
> prototype as memcpy().
>
> Instead, how about we pass the 'struct module *mod' pointer down from
> apply_relocate_add()? That's already done for reloc_insn_adrp() and it
> would mean that the above could be written as:
>
>
> static int reloc_insn_movw(struct module *mod, enum aarch64_reloc_op op,
>                            __le32 *place, u64 val, int lsb,
>                            enum aarch64_insn_movw_imm_type imm_type)
> {
>         ...
>
>         insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
>         insn = cpu_to_le32(insn);
>
>         if (mod->state != MODULE_STATE_UNFORMED)
>                 aarch64_insn_set(place, insn, sizeof(insn));
>         else
>                 *place = insn;
>
>
> meaning we can also drop the first patch, because I don't think we need
> a text_poke() helper.

Dropped the first patch in v4 and switched to the method suggested
here, so we use a normal assignment for the non-late case. Though, it
does seem a little repetitive, with 6 different sites doing this
module state check. If having a straightforward assignment directly in
the reloc_* functions isn't too important, I wonder if we can make
local memset/memcpy-like helpers to contain this check? Like:

static void *write_insn(void *place, u32 insn, struct module *me);
static void *write_data(void *place, s64 *sval, struct module *me);

>
> > +int apply_relocate_add(Elf64_Shdr *sechdrs,
> > +                    const char *strtab,
> > +                    unsigned int symindex,
> > +                    unsigned int relsec,
> > +                    struct module *me)
> > +{
> > +     int ret;
> > +     bool early = me->state == MODULE_STATE_UNFORMED;
> > +     void *(*write)(void *, const void *, size_t) = memcpy;
> > +
> > +     if (!early) {
> > +             write = text_poke;
> > +             mutex_lock(&text_mutex);
> > +     }
> > +
> > +     ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
> > +                                write);
> > +
> > +     if (!early)
> > +             mutex_unlock(&text_mutex);
>
> Why is the responsibility of the arch code to take the 'text_mutex' here?
> The core code should be able to do that when the module state is !=
> MODULE_STATE_UNFORMED.
>

Moved the locking to kernel/livepatch/core.c in v4, since other call
sites to apply_relocate_add() don't attempt late relocations. Since
the locking was already being done in the x86 module code I had to
remove this. It also made sense to me to split out the text_mutex
changes into a separate patch because they only touch module/x86 and
livepatch code, so that's how I did it in v4. But they can just as
easily be squashed into one patch.

Thanks,
Dylan
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Song Liu 9 months ago
Hi Will,

On Fri, May 9, 2025 at 9:15 AM Will Deacon <will@kernel.org> wrote:
>
> Hi Dylan,
>
> On Sat, Apr 12, 2025 at 01:09:39AM +0000, Dylan Hatch wrote:
> > To enable late module patching, livepatch modules need to be able to
> > apply some of their relocations well after being loaded. In this
> > scenario, use the text-poking API to allow this, even with
> > STRICT_MODULE_RWX.
> >
> > This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> > text_poke() for late relocations").
> >
> > Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> > ---
> >  arch/arm64/kernel/module.c | 129 ++++++++++++++++++++++++-------------
> >  1 file changed, 83 insertions(+), 46 deletions(-)
>
> On its own, this isn't gaining us an awful lot upstream as we don't have
> livepatch support (arm64 does not select HAVE_LIVEPATCH), however I'm
> not against incremental changes towards enabling that. Are you planning
> to work on follow-up changes for the rest of the support?

There are two patchsets that are trying to enable HAVE_LIVEPATCH
for arm64:

[1] https://lore.kernel.org/all/20250127213310.2496133-1-wnliu@google.com/
[2] https://lore.kernel.org/linux-arm-kernel/20250320171559.3423224-1-song@kernel.org/

Toshiyuki has tested this patch with both approaches.

Since patchset [1] depends on SFrame, which is less mature at the
moment (clang doesn't support it, kernel side code is very new), live patch
folks think [2] is a better approach at the moment. Could you please share
your thoughts on [2]. If it looks good, we hope to ship it to 6.16 kernels, as
we (Meta) want to use livepatch in our fleet.

Thanks,
Song
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Dylan Hatch 9 months, 1 week ago
On Fri, Apr 11, 2025 at 6:10 PM Dylan Hatch <dylanbhatch@google.com> wrote:
>
> To enable late module patching, livepatch modules need to be able to
> apply some of their relocations well after being loaded. In this
> scenario, use the text-poking API to allow this, even with
> STRICT_MODULE_RWX.
>
> This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> text_poke() for late relocations").
>
> Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> ---
>  arch/arm64/kernel/module.c | 129 ++++++++++++++++++++++++-------------
>  1 file changed, 83 insertions(+), 46 deletions(-)
>
> diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
> index 06bb680bfe975..0703502d9dc37 100644
> --- a/arch/arm64/kernel/module.c
> +++ b/arch/arm64/kernel/module.c
> @@ -18,11 +18,13 @@
>  #include <linux/moduleloader.h>
>  #include <linux/random.h>
>  #include <linux/scs.h>
> +#include <linux/memory.h>
>
>  #include <asm/alternative.h>
>  #include <asm/insn.h>
>  #include <asm/scs.h>
>  #include <asm/sections.h>
> +#include <asm/text-patching.h>
>
>  enum aarch64_reloc_op {
>         RELOC_OP_NONE,
> @@ -48,7 +50,8 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
>         return 0;
>  }
>
> -static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
> +static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len,
> +                     void *(*write)(void *dest, const void *src, size_t len))
>  {
>         s64 sval = do_reloc(op, place, val);
>
> @@ -66,7 +69,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
>
>         switch (len) {
>         case 16:
> -               *(s16 *)place = sval;
> +               write(place, &sval, sizeof(s16));
>                 switch (op) {
>                 case RELOC_OP_ABS:
>                         if (sval < 0 || sval > U16_MAX)
> @@ -82,7 +85,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
>                 }
>                 break;
>         case 32:
> -               *(s32 *)place = sval;
> +               write(place, &sval, sizeof(s32));
>                 switch (op) {
>                 case RELOC_OP_ABS:
>                         if (sval < 0 || sval > U32_MAX)
> @@ -98,7 +101,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
>                 }
>                 break;
>         case 64:
> -               *(s64 *)place = sval;
> +               write(place, &sval, sizeof(s64));
>                 break;
>         default:
>                 pr_err("Invalid length (%d) for data relocation\n", len);
> @@ -113,11 +116,13 @@ enum aarch64_insn_movw_imm_type {
>  };
>
>  static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
> -                          int lsb, enum aarch64_insn_movw_imm_type imm_type)
> +                          int lsb, enum aarch64_insn_movw_imm_type imm_type,
> +                          void *(*write)(void *dest, const void *src, size_t len))
>  {
>         u64 imm;
>         s64 sval;
>         u32 insn = le32_to_cpu(*place);
> +       __le32 le_insn;
>
>         sval = do_reloc(op, place, val);
>         imm = sval >> lsb;
> @@ -145,7 +150,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
>
>         /* Update the instruction with the new encoding. */
>         insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
> -       *place = cpu_to_le32(insn);
> +       le_insn = cpu_to_le32(insn);
> +       write(place, &le_insn, sizeof(le_insn));
>
>         if (imm > U16_MAX)
>                 return -ERANGE;
> @@ -154,11 +160,13 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
>  }
>
>  static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
> -                         int lsb, int len, enum aarch64_insn_imm_type imm_type)
> +                         int lsb, int len, enum aarch64_insn_imm_type imm_type,
> +                         void *(*write)(void *dest, const void *src, size_t len))
>  {
>         u64 imm, imm_mask;
>         s64 sval;
>         u32 insn = le32_to_cpu(*place);
> +       __le32 le_insn;
>
>         /* Calculate the relocation value. */
>         sval = do_reloc(op, place, val);
> @@ -170,7 +178,8 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
>
>         /* Update the instruction's immediate field. */
>         insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
> -       *place = cpu_to_le32(insn);
> +       le_insn = cpu_to_le32(insn);
> +       write(place, &le_insn, sizeof(le_insn));
>
>         /*
>          * Extract the upper value bits (including the sign bit) and
> @@ -189,17 +198,19 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
>  }
>
>  static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
> -                          __le32 *place, u64 val)
> +                          __le32 *place, u64 val,
> +                          void *(*write)(void *dest, const void *src, size_t len))
>  {
>         u32 insn;
> +       __le32 le_insn;
>
>         if (!is_forbidden_offset_for_adrp(place))
>                 return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
> -                                     AARCH64_INSN_IMM_ADR);
> +                                     AARCH64_INSN_IMM_ADR, write);
>
>         /* patch ADRP to ADR if it is in range */
>         if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
> -                           AARCH64_INSN_IMM_ADR)) {
> +                           AARCH64_INSN_IMM_ADR, write)) {
>                 insn = le32_to_cpu(*place);
>                 insn &= ~BIT(31);
>         } else {
> @@ -211,15 +222,17 @@ static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
>                                                    AARCH64_INSN_BRANCH_NOLINK);
>         }
>
> -       *place = cpu_to_le32(insn);
> +       le_insn = cpu_to_le32(insn);
> +       write(place, &le_insn, sizeof(le_insn));
>         return 0;
>  }
>
> -int apply_relocate_add(Elf64_Shdr *sechdrs,
> +static int __apply_relocate_add(Elf64_Shdr *sechdrs,
>                        const char *strtab,
>                        unsigned int symindex,
>                        unsigned int relsec,
> -                      struct module *me)
> +                      struct module *me,
> +                      void *(*write)(void *dest, const void *src, size_t len))
>  {
>         unsigned int i;
>         int ovf;
> @@ -255,23 +268,23 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
>                 /* Data relocations. */
>                 case R_AARCH64_ABS64:
>                         overflow_check = false;
> -                       ovf = reloc_data(RELOC_OP_ABS, loc, val, 64);
> +                       ovf = reloc_data(RELOC_OP_ABS, loc, val, 64, write);
>                         break;
>                 case R_AARCH64_ABS32:
> -                       ovf = reloc_data(RELOC_OP_ABS, loc, val, 32);
> +                       ovf = reloc_data(RELOC_OP_ABS, loc, val, 32, write);
>                         break;
>                 case R_AARCH64_ABS16:
> -                       ovf = reloc_data(RELOC_OP_ABS, loc, val, 16);
> +                       ovf = reloc_data(RELOC_OP_ABS, loc, val, 16, write);
>                         break;
>                 case R_AARCH64_PREL64:
>                         overflow_check = false;
> -                       ovf = reloc_data(RELOC_OP_PREL, loc, val, 64);
> +                       ovf = reloc_data(RELOC_OP_PREL, loc, val, 64, write);
>                         break;
>                 case R_AARCH64_PREL32:
> -                       ovf = reloc_data(RELOC_OP_PREL, loc, val, 32);
> +                       ovf = reloc_data(RELOC_OP_PREL, loc, val, 32, write);
>                         break;
>                 case R_AARCH64_PREL16:
> -                       ovf = reloc_data(RELOC_OP_PREL, loc, val, 16);
> +                       ovf = reloc_data(RELOC_OP_PREL, loc, val, 16, write);
>                         break;
>
>                 /* MOVW instruction relocations. */
> @@ -280,88 +293,88 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
>                         fallthrough;
>                 case R_AARCH64_MOVW_UABS_G0:
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_UABS_G1_NC:
>                         overflow_check = false;
>                         fallthrough;
>                 case R_AARCH64_MOVW_UABS_G1:
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_UABS_G2_NC:
>                         overflow_check = false;
>                         fallthrough;
>                 case R_AARCH64_MOVW_UABS_G2:
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_UABS_G3:
>                         /* We're using the top bits so we can't overflow. */
>                         overflow_check = false;
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_SABS_G0:
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>                 case R_AARCH64_MOVW_SABS_G1:
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>                 case R_AARCH64_MOVW_SABS_G2:
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G0_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G0:
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G1_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G1:
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G2_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G2:
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G3:
>                         /* We're using the top bits so we can't overflow. */
>                         overflow_check = false;
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>
>                 /* Immediate instruction relocations. */
>                 case R_AARCH64_LD_PREL_LO19:
>                         ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
> -                                            AARCH64_INSN_IMM_19);
> +                                            AARCH64_INSN_IMM_19, write);
>                         break;
>                 case R_AARCH64_ADR_PREL_LO21:
>                         ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
> -                                            AARCH64_INSN_IMM_ADR);
> +                                            AARCH64_INSN_IMM_ADR, write);
>                         break;
>                 case R_AARCH64_ADR_PREL_PG_HI21_NC:
>                         overflow_check = false;
>                         fallthrough;
>                 case R_AARCH64_ADR_PREL_PG_HI21:
> -                       ovf = reloc_insn_adrp(me, sechdrs, loc, val);
> +                       ovf = reloc_insn_adrp(me, sechdrs, loc, val, write);
>                         if (ovf && ovf != -ERANGE)
>                                 return ovf;
>                         break;
> @@ -369,46 +382,46 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
>                 case R_AARCH64_LDST8_ABS_LO12_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12,
> -                                            AARCH64_INSN_IMM_12);
> +                                            AARCH64_INSN_IMM_12, write);
>                         break;
>                 case R_AARCH64_LDST16_ABS_LO12_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11,
> -                                            AARCH64_INSN_IMM_12);
> +                                            AARCH64_INSN_IMM_12, write);
>                         break;
>                 case R_AARCH64_LDST32_ABS_LO12_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10,
> -                                            AARCH64_INSN_IMM_12);
> +                                            AARCH64_INSN_IMM_12, write);
>                         break;
>                 case R_AARCH64_LDST64_ABS_LO12_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9,
> -                                            AARCH64_INSN_IMM_12);
> +                                            AARCH64_INSN_IMM_12, write);
>                         break;
>                 case R_AARCH64_LDST128_ABS_LO12_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8,
> -                                            AARCH64_INSN_IMM_12);
> +                                            AARCH64_INSN_IMM_12, write);
>                         break;
>                 case R_AARCH64_TSTBR14:
>                         ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14,
> -                                            AARCH64_INSN_IMM_14);
> +                                            AARCH64_INSN_IMM_14, write);
>                         break;
>                 case R_AARCH64_CONDBR19:
>                         ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
> -                                            AARCH64_INSN_IMM_19);
> +                                            AARCH64_INSN_IMM_19, write);
>                         break;
>                 case R_AARCH64_JUMP26:
>                 case R_AARCH64_CALL26:
>                         ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
> -                                            AARCH64_INSN_IMM_26);
> +                                            AARCH64_INSN_IMM_26, write);
>                         if (ovf == -ERANGE) {
>                                 val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
>                                 if (!val)
>                                         return -ENOEXEC;
>                                 ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
> -                                                    26, AARCH64_INSN_IMM_26);
> +                                                    26, AARCH64_INSN_IMM_26, write);
>                         }
>                         break;
>
> @@ -431,6 +444,30 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
>         return -ENOEXEC;
>  }
>
> +int apply_relocate_add(Elf64_Shdr *sechdrs,
> +                      const char *strtab,
> +                      unsigned int symindex,
> +                      unsigned int relsec,
> +                      struct module *me)
> +{
> +       int ret;
> +       bool early = me->state == MODULE_STATE_UNFORMED;
> +       void *(*write)(void *, const void *, size_t) = memcpy;
> +
> +       if (!early) {
> +               write = text_poke;
> +               mutex_lock(&text_mutex);
> +       }
> +
> +       ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
> +                                  write);
> +
> +       if (!early)
> +               mutex_unlock(&text_mutex);
> +
> +       return ret;
> +}
> +
>  static inline void __init_plt(struct plt_entry *plt, unsigned long addr)
>  {
>         *plt = get_plt_entry(addr, plt);
> --
> 2.49.0.604.gff1f9ca942-goog
>

Catalin and Will,

Are you comfortable with taking these patches? Is this a workable
approach to enable livepatching arm64 modules?

Thanks,
Dylan
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Song Liu 9 months, 3 weeks ago
On Fri, Apr 11, 2025 at 6:10 PM Dylan Hatch <dylanbhatch@google.com> wrote:
>
> To enable late module patching, livepatch modules need to be able to
> apply some of their relocations well after being loaded. In this
> scenario, use the text-poking API to allow this, even with
> STRICT_MODULE_RWX.
>
> This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> text_poke() for late relocations").
>
> Signed-off-by: Dylan Hatch <dylanbhatch@google.com>

Acked-by: Song Liu <song@kernel.org>
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Song Liu 9 months, 3 weeks ago
On Mon, Apr 21, 2025 at 5:35 PM Song Liu <song@kernel.org> wrote:
>
> On Fri, Apr 11, 2025 at 6:10 PM Dylan Hatch <dylanbhatch@google.com> wrote:
> >
> > To enable late module patching, livepatch modules need to be able to
> > apply some of their relocations well after being loaded. In this
> > scenario, use the text-poking API to allow this, even with
> > STRICT_MODULE_RWX.
> >
> > This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> > text_poke() for late relocations").
> >
> > Signed-off-by: Dylan Hatch <dylanbhatch@google.com>

Could you please share how you test this?

Thanks,
Song


> Acked-by: Song Liu <song@kernel.org>
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Dylan Hatch 9 months, 2 weeks ago
On Mon, Apr 21, 2025 at 11:25 PM Song Liu <song@kernel.org> wrote:
>
> On Mon, Apr 21, 2025 at 5:35 PM Song Liu <song@kernel.org> wrote:
> >
> > On Fri, Apr 11, 2025 at 6:10 PM Dylan Hatch <dylanbhatch@google.com> wrote:
> > >
> > > To enable late module patching, livepatch modules need to be able to
> > > apply some of their relocations well after being loaded. In this
> > > scenario, use the text-poking API to allow this, even with
> > > STRICT_MODULE_RWX.
> > >
> > > This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> > > text_poke() for late relocations").
> > >
> > > Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
>
> Could you please share how you test this?
>

For context, we enable livepatch for arm64 by porting this RFC series
(along with other internal patches) into our kernel:
https://lore.kernel.org/all/20230202074036.507249-1-madvenka@linux.microsoft.com/.

The way I tested this patch is: with STRICT_MODULE_RWX, load a module
and a livepatch that touches that module (in either order), and
confirm the kernel doesn't crash.

Without this patch, a crash is caused in apply_relocate_add() if both
a module and a livepatch that touches the module are both loaded. This
happens through one of two code paths:

  1. If the module is already loaded when the livepatch is applied,
through the module_init() callback.
  2. If the module is loaded after the livepatch is applied, through
prepare_coming_module().

In both scenarios, the livepatch module's text is already RX-only.

Thanks,
Dylan
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Song Liu 9 months, 2 weeks ago
On Tue, Apr 22, 2025 at 5:27 PM Dylan Hatch <dylanbhatch@google.com> wrote:
>
> On Mon, Apr 21, 2025 at 11:25 PM Song Liu <song@kernel.org> wrote:
> >
> > On Mon, Apr 21, 2025 at 5:35 PM Song Liu <song@kernel.org> wrote:
> > >
> > > On Fri, Apr 11, 2025 at 6:10 PM Dylan Hatch <dylanbhatch@google.com> wrote:
> > > >
> > > > To enable late module patching, livepatch modules need to be able to
> > > > apply some of their relocations well after being loaded. In this
> > > > scenario, use the text-poking API to allow this, even with
> > > > STRICT_MODULE_RWX.
> > > >
> > > > This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> > > > text_poke() for late relocations").
> > > >
> > > > Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> >
> > Could you please share how you test this?
> >
>
> For context, we enable livepatch for arm64 by porting this RFC series
> (along with other internal patches) into our kernel:
> https://lore.kernel.org/all/20230202074036.507249-1-madvenka@linux.microsoft.com/.
>
> The way I tested this patch is: with STRICT_MODULE_RWX, load a module
> and a livepatch that touches that module (in either order), and
> confirm the kernel doesn't crash.
>
> Without this patch, a crash is caused in apply_relocate_add() if both
> a module and a livepatch that touches the module are both loaded. This
> happens through one of two code paths:
>
>   1. If the module is already loaded when the livepatch is applied,
> through the module_init() callback.
>   2. If the module is loaded after the livepatch is applied, through
> prepare_coming_module().
>
> In both scenarios, the livepatch module's text is already RX-only.

Thanks for sharing the information!

Could you please help test this set with [1]? This is a different approach
than the one by Madhavan. We were hoping to ship it soon.

Thanks,
Song

[1] https://lore.kernel.org/live-patching/20250320171559.3423224-1-song@kernel.org/