[PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.

Dylan Hatch posted 2 patches 8 months, 1 week ago
[PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Dylan Hatch 8 months, 1 week ago
To enable late module patching, livepatch modules need to be able to
apply some of their relocations well after being loaded. In this
scenario, use the text-poking API to allow this, even with
STRICT_MODULE_RWX.

This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
text_poke() for late relocations").

Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
 arch/arm64/kernel/module.c | 129 ++++++++++++++++++++++++-------------
 1 file changed, 83 insertions(+), 46 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 06bb680bfe975..0703502d9dc37 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -18,11 +18,13 @@
 #include <linux/moduleloader.h>
 #include <linux/random.h>
 #include <linux/scs.h>
+#include <linux/memory.h>
 
 #include <asm/alternative.h>
 #include <asm/insn.h>
 #include <asm/scs.h>
 #include <asm/sections.h>
+#include <asm/text-patching.h>
 
 enum aarch64_reloc_op {
 	RELOC_OP_NONE,
@@ -48,7 +50,8 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
 	return 0;
 }
 
-static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
+static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len,
+		      void *(*write)(void *dest, const void *src, size_t len))
 {
 	s64 sval = do_reloc(op, place, val);
 
@@ -66,7 +69,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
 
 	switch (len) {
 	case 16:
-		*(s16 *)place = sval;
+		write(place, &sval, sizeof(s16));
 		switch (op) {
 		case RELOC_OP_ABS:
 			if (sval < 0 || sval > U16_MAX)
@@ -82,7 +85,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
 		}
 		break;
 	case 32:
-		*(s32 *)place = sval;
+		write(place, &sval, sizeof(s32));
 		switch (op) {
 		case RELOC_OP_ABS:
 			if (sval < 0 || sval > U32_MAX)
@@ -98,7 +101,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
 		}
 		break;
 	case 64:
-		*(s64 *)place = sval;
+		write(place, &sval, sizeof(s64));
 		break;
 	default:
 		pr_err("Invalid length (%d) for data relocation\n", len);
@@ -113,11 +116,13 @@ enum aarch64_insn_movw_imm_type {
 };
 
 static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
-			   int lsb, enum aarch64_insn_movw_imm_type imm_type)
+			   int lsb, enum aarch64_insn_movw_imm_type imm_type,
+			   void *(*write)(void *dest, const void *src, size_t len))
 {
 	u64 imm;
 	s64 sval;
 	u32 insn = le32_to_cpu(*place);
+	__le32 le_insn;
 
 	sval = do_reloc(op, place, val);
 	imm = sval >> lsb;
@@ -145,7 +150,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
 
 	/* Update the instruction with the new encoding. */
 	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
-	*place = cpu_to_le32(insn);
+	le_insn = cpu_to_le32(insn);
+	write(place, &le_insn, sizeof(le_insn));
 
 	if (imm > U16_MAX)
 		return -ERANGE;
@@ -154,11 +160,13 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
 }
 
 static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
-			  int lsb, int len, enum aarch64_insn_imm_type imm_type)
+			  int lsb, int len, enum aarch64_insn_imm_type imm_type,
+			  void *(*write)(void *dest, const void *src, size_t len))
 {
 	u64 imm, imm_mask;
 	s64 sval;
 	u32 insn = le32_to_cpu(*place);
+	__le32 le_insn;
 
 	/* Calculate the relocation value. */
 	sval = do_reloc(op, place, val);
@@ -170,7 +178,8 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
 
 	/* Update the instruction's immediate field. */
 	insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
-	*place = cpu_to_le32(insn);
+	le_insn = cpu_to_le32(insn);
+	write(place, &le_insn, sizeof(le_insn));
 
 	/*
 	 * Extract the upper value bits (including the sign bit) and
@@ -189,17 +198,19 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
 }
 
 static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
-			   __le32 *place, u64 val)
+			   __le32 *place, u64 val,
+			   void *(*write)(void *dest, const void *src, size_t len))
 {
 	u32 insn;
+	__le32 le_insn;
 
 	if (!is_forbidden_offset_for_adrp(place))
 		return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
-				      AARCH64_INSN_IMM_ADR);
+				      AARCH64_INSN_IMM_ADR, write);
 
 	/* patch ADRP to ADR if it is in range */
 	if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
-			    AARCH64_INSN_IMM_ADR)) {
+			    AARCH64_INSN_IMM_ADR, write)) {
 		insn = le32_to_cpu(*place);
 		insn &= ~BIT(31);
 	} else {
@@ -211,15 +222,17 @@ static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
 						   AARCH64_INSN_BRANCH_NOLINK);
 	}
 
-	*place = cpu_to_le32(insn);
+	le_insn = cpu_to_le32(insn);
+	write(place, &le_insn, sizeof(le_insn));
 	return 0;
 }
 
-int apply_relocate_add(Elf64_Shdr *sechdrs,
+static int __apply_relocate_add(Elf64_Shdr *sechdrs,
 		       const char *strtab,
 		       unsigned int symindex,
 		       unsigned int relsec,
-		       struct module *me)
+		       struct module *me,
+		       void *(*write)(void *dest, const void *src, size_t len))
 {
 	unsigned int i;
 	int ovf;
@@ -255,23 +268,23 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		/* Data relocations. */
 		case R_AARCH64_ABS64:
 			overflow_check = false;
-			ovf = reloc_data(RELOC_OP_ABS, loc, val, 64);
+			ovf = reloc_data(RELOC_OP_ABS, loc, val, 64, write);
 			break;
 		case R_AARCH64_ABS32:
-			ovf = reloc_data(RELOC_OP_ABS, loc, val, 32);
+			ovf = reloc_data(RELOC_OP_ABS, loc, val, 32, write);
 			break;
 		case R_AARCH64_ABS16:
-			ovf = reloc_data(RELOC_OP_ABS, loc, val, 16);
+			ovf = reloc_data(RELOC_OP_ABS, loc, val, 16, write);
 			break;
 		case R_AARCH64_PREL64:
 			overflow_check = false;
-			ovf = reloc_data(RELOC_OP_PREL, loc, val, 64);
+			ovf = reloc_data(RELOC_OP_PREL, loc, val, 64, write);
 			break;
 		case R_AARCH64_PREL32:
-			ovf = reloc_data(RELOC_OP_PREL, loc, val, 32);
+			ovf = reloc_data(RELOC_OP_PREL, loc, val, 32, write);
 			break;
 		case R_AARCH64_PREL16:
-			ovf = reloc_data(RELOC_OP_PREL, loc, val, 16);
+			ovf = reloc_data(RELOC_OP_PREL, loc, val, 16, write);
 			break;
 
 		/* MOVW instruction relocations. */
@@ -280,88 +293,88 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			fallthrough;
 		case R_AARCH64_MOVW_UABS_G0:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_UABS_G1_NC:
 			overflow_check = false;
 			fallthrough;
 		case R_AARCH64_MOVW_UABS_G1:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_UABS_G2_NC:
 			overflow_check = false;
 			fallthrough;
 		case R_AARCH64_MOVW_UABS_G2:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_UABS_G3:
 			/* We're using the top bits so we can't overflow. */
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_SABS_G0:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 		case R_AARCH64_MOVW_SABS_G1:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 		case R_AARCH64_MOVW_SABS_G2:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G0_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G0:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G1_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G1:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G2_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
-					      AARCH64_INSN_IMM_MOVKZ);
+					      AARCH64_INSN_IMM_MOVKZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G2:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 		case R_AARCH64_MOVW_PREL_G3:
 			/* We're using the top bits so we can't overflow. */
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48,
-					      AARCH64_INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ, write);
 			break;
 
 		/* Immediate instruction relocations. */
 		case R_AARCH64_LD_PREL_LO19:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
-					     AARCH64_INSN_IMM_19);
+					     AARCH64_INSN_IMM_19, write);
 			break;
 		case R_AARCH64_ADR_PREL_LO21:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
-					     AARCH64_INSN_IMM_ADR);
+					     AARCH64_INSN_IMM_ADR, write);
 			break;
 		case R_AARCH64_ADR_PREL_PG_HI21_NC:
 			overflow_check = false;
 			fallthrough;
 		case R_AARCH64_ADR_PREL_PG_HI21:
-			ovf = reloc_insn_adrp(me, sechdrs, loc, val);
+			ovf = reloc_insn_adrp(me, sechdrs, loc, val, write);
 			if (ovf && ovf != -ERANGE)
 				return ovf;
 			break;
@@ -369,46 +382,46 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_AARCH64_LDST8_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12,
-					     AARCH64_INSN_IMM_12);
+					     AARCH64_INSN_IMM_12, write);
 			break;
 		case R_AARCH64_LDST16_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11,
-					     AARCH64_INSN_IMM_12);
+					     AARCH64_INSN_IMM_12, write);
 			break;
 		case R_AARCH64_LDST32_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10,
-					     AARCH64_INSN_IMM_12);
+					     AARCH64_INSN_IMM_12, write);
 			break;
 		case R_AARCH64_LDST64_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9,
-					     AARCH64_INSN_IMM_12);
+					     AARCH64_INSN_IMM_12, write);
 			break;
 		case R_AARCH64_LDST128_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8,
-					     AARCH64_INSN_IMM_12);
+					     AARCH64_INSN_IMM_12, write);
 			break;
 		case R_AARCH64_TSTBR14:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14,
-					     AARCH64_INSN_IMM_14);
+					     AARCH64_INSN_IMM_14, write);
 			break;
 		case R_AARCH64_CONDBR19:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
-					     AARCH64_INSN_IMM_19);
+					     AARCH64_INSN_IMM_19, write);
 			break;
 		case R_AARCH64_JUMP26:
 		case R_AARCH64_CALL26:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
-					     AARCH64_INSN_IMM_26);
+					     AARCH64_INSN_IMM_26, write);
 			if (ovf == -ERANGE) {
 				val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
 				if (!val)
 					return -ENOEXEC;
 				ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
-						     26, AARCH64_INSN_IMM_26);
+						     26, AARCH64_INSN_IMM_26, write);
 			}
 			break;
 
@@ -431,6 +444,30 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 	return -ENOEXEC;
 }
 
+int apply_relocate_add(Elf64_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	int ret;
+	bool early = me->state == MODULE_STATE_UNFORMED;
+	void *(*write)(void *, const void *, size_t) = memcpy;
+
+	if (!early) {
+		write = text_poke;
+		mutex_lock(&text_mutex);
+	}
+
+	ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
+				   write);
+
+	if (!early)
+		mutex_unlock(&text_mutex);
+
+	return ret;
+}
+
 static inline void __init_plt(struct plt_entry *plt, unsigned long addr)
 {
 	*plt = get_plt_entry(addr, plt);
-- 
2.49.0.604.gff1f9ca942-goog
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Will Deacon 7 months, 1 week ago
Hi Dylan,

On Sat, Apr 12, 2025 at 01:09:39AM +0000, Dylan Hatch wrote:
> To enable late module patching, livepatch modules need to be able to
> apply some of their relocations well after being loaded. In this
> scenario, use the text-poking API to allow this, even with
> STRICT_MODULE_RWX.
> 
> This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> text_poke() for late relocations").
> 
> Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> ---
>  arch/arm64/kernel/module.c | 129 ++++++++++++++++++++++++-------------
>  1 file changed, 83 insertions(+), 46 deletions(-)

On its own, this isn't gaining us an awful lot upstream as we don't have
livepatch support (arm64 does not select HAVE_LIVEPATCH), however I'm
not against incremental changes towards enabling that. Are you planning
to work on follow-up changes for the rest of the support?

> diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
> index 06bb680bfe975..0703502d9dc37 100644
> --- a/arch/arm64/kernel/module.c
> +++ b/arch/arm64/kernel/module.c
> @@ -18,11 +18,13 @@
>  #include <linux/moduleloader.h>
>  #include <linux/random.h>
>  #include <linux/scs.h>
> +#include <linux/memory.h>
>  
>  #include <asm/alternative.h>
>  #include <asm/insn.h>
>  #include <asm/scs.h>
>  #include <asm/sections.h>
> +#include <asm/text-patching.h>
>  
>  enum aarch64_reloc_op {
>  	RELOC_OP_NONE,
> @@ -48,7 +50,8 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
>  	return 0;
>  }
>  
> -static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
> +static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len,
> +		      void *(*write)(void *dest, const void *src, size_t len))
>  {

I think it's a bit grotty indirecting the write when in reality we either
need a straight-forward assignment (like we have today) or a call to
an instruction-patching helper.

In particular, when you get to functions such as:

>  static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
> -			   int lsb, enum aarch64_insn_movw_imm_type imm_type)
> +			   int lsb, enum aarch64_insn_movw_imm_type imm_type,
> +			   void *(*write)(void *dest, const void *src, size_t len))
>  {
>  	u64 imm;
>  	s64 sval;
>  	u32 insn = le32_to_cpu(*place);
> +	__le32 le_insn;
>  
>  	sval = do_reloc(op, place, val);
>  	imm = sval >> lsb;
> @@ -145,7 +150,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
>  
>  	/* Update the instruction with the new encoding. */
>  	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
> -	*place = cpu_to_le32(insn);
> +	le_insn = cpu_to_le32(insn);
> +	write(place, &le_insn, sizeof(le_insn));

we're forced into passing &le_insn because we need the same function
prototype as memcpy().

Instead, how about we pass the 'struct module *mod' pointer down from
apply_relocate_add()? That's already done for reloc_insn_adrp() and it
would mean that the above could be written as:


static int reloc_insn_movw(struct module *mod, enum aarch64_reloc_op op,
			   __le32 *place, u64 val, int lsb,
			   enum aarch64_insn_movw_imm_type imm_type)
{
	...

	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
	insn = cpu_to_le32(insn);

	if (mod->state != MODULE_STATE_UNFORMED)
		aarch64_insn_set(place, insn, sizeof(insn));
	else
		*place = insn;


meaning we can also drop the first patch, because I don't think we need
a text_poke() helper.

> +int apply_relocate_add(Elf64_Shdr *sechdrs,
> +		       const char *strtab,
> +		       unsigned int symindex,
> +		       unsigned int relsec,
> +		       struct module *me)
> +{
> +	int ret;
> +	bool early = me->state == MODULE_STATE_UNFORMED;
> +	void *(*write)(void *, const void *, size_t) = memcpy;
> +
> +	if (!early) {
> +		write = text_poke;
> +		mutex_lock(&text_mutex);
> +	}
> +
> +	ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
> +				   write);
> +
> +	if (!early)
> +		mutex_unlock(&text_mutex);

Why is the responsibility of the arch code to take the 'text_mutex' here?
The core code should be able to do that when the module state is !=
MODULE_STATE_UNFORMED.

Cheers,

Will
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Dylan Hatch 7 months ago
Hi Will,

I've sent a v4 of these patches that should address your comments,
feel free to have a look.

On Fri, May 9, 2025 at 9:15 AM Will Deacon <will@kernel.org> wrote:
>
> Hi Dylan,
>
> On Sat, Apr 12, 2025 at 01:09:39AM +0000, Dylan Hatch wrote:
> > To enable late module patching, livepatch modules need to be able to
> > apply some of their relocations well after being loaded. In this
> > scenario, use the text-poking API to allow this, even with
> > STRICT_MODULE_RWX.
> >
> > This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> > text_poke() for late relocations").
> >
> > Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> > ---
> >  arch/arm64/kernel/module.c | 129 ++++++++++++++++++++++++-------------
> >  1 file changed, 83 insertions(+), 46 deletions(-)
>
> On its own, this isn't gaining us an awful lot upstream as we don't have
> livepatch support (arm64 does not select HAVE_LIVEPATCH), however I'm
> not against incremental changes towards enabling that. Are you planning
> to work on follow-up changes for the rest of the support?

As Song mentioned, hopefully
https://lore.kernel.org/linux-arm-kernel/20250320171559.3423224-1-song@kernel.org/
in combination with this patch should be enough for initial support of
livepatch on arm64. I'll need to follow up with Weinan on next steps
for the SFrame approach.

>
> > diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
> > index 06bb680bfe975..0703502d9dc37 100644
> > --- a/arch/arm64/kernel/module.c
> > +++ b/arch/arm64/kernel/module.c
> > @@ -18,11 +18,13 @@
> >  #include <linux/moduleloader.h>
> >  #include <linux/random.h>
> >  #include <linux/scs.h>
> > +#include <linux/memory.h>
> >
> >  #include <asm/alternative.h>
> >  #include <asm/insn.h>
> >  #include <asm/scs.h>
> >  #include <asm/sections.h>
> > +#include <asm/text-patching.h>
> >
> >  enum aarch64_reloc_op {
> >       RELOC_OP_NONE,
> > @@ -48,7 +50,8 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
> >       return 0;
> >  }
> >
> > -static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
> > +static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len,
> > +                   void *(*write)(void *dest, const void *src, size_t len))
> >  {
>
> I think it's a bit grotty indirecting the write when in reality we either
> need a straight-forward assignment (like we have today) or a call to
> an instruction-patching helper.
>
> In particular, when you get to functions such as:
>
> >  static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
> > -                        int lsb, enum aarch64_insn_movw_imm_type imm_type)
> > +                        int lsb, enum aarch64_insn_movw_imm_type imm_type,
> > +                        void *(*write)(void *dest, const void *src, size_t len))
> >  {
> >       u64 imm;
> >       s64 sval;
> >       u32 insn = le32_to_cpu(*place);
> > +     __le32 le_insn;
> >
> >       sval = do_reloc(op, place, val);
> >       imm = sval >> lsb;
> > @@ -145,7 +150,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
> >
> >       /* Update the instruction with the new encoding. */
> >       insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
> > -     *place = cpu_to_le32(insn);
> > +     le_insn = cpu_to_le32(insn);
> > +     write(place, &le_insn, sizeof(le_insn));
>
> we're forced into passing &le_insn because we need the same function
> prototype as memcpy().
>
> Instead, how about we pass the 'struct module *mod' pointer down from
> apply_relocate_add()? That's already done for reloc_insn_adrp() and it
> would mean that the above could be written as:
>
>
> static int reloc_insn_movw(struct module *mod, enum aarch64_reloc_op op,
>                            __le32 *place, u64 val, int lsb,
>                            enum aarch64_insn_movw_imm_type imm_type)
> {
>         ...
>
>         insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
>         insn = cpu_to_le32(insn);
>
>         if (mod->state != MODULE_STATE_UNFORMED)
>                 aarch64_insn_set(place, insn, sizeof(insn));
>         else
>                 *place = insn;
>
>
> meaning we can also drop the first patch, because I don't think we need
> a text_poke() helper.

Dropped the first patch in v4 and switched to the method suggested
here, so we use a normal assignment for the non-late case. Though, it
does seem a little repetitive, with 6 different sites doing this
module state check. If having a straightforward assignment directly in
the reloc_* functions isn't too important, I wonder if we can make
local memset/memcpy-like helpers to contain this check? Like:

static void *write_insn(void *place, u32 insn, struct module *me);
static void *write_data(void *place, s64 *sval, struct module *me);

>
> > +int apply_relocate_add(Elf64_Shdr *sechdrs,
> > +                    const char *strtab,
> > +                    unsigned int symindex,
> > +                    unsigned int relsec,
> > +                    struct module *me)
> > +{
> > +     int ret;
> > +     bool early = me->state == MODULE_STATE_UNFORMED;
> > +     void *(*write)(void *, const void *, size_t) = memcpy;
> > +
> > +     if (!early) {
> > +             write = text_poke;
> > +             mutex_lock(&text_mutex);
> > +     }
> > +
> > +     ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
> > +                                write);
> > +
> > +     if (!early)
> > +             mutex_unlock(&text_mutex);
>
> Why is the responsibility of the arch code to take the 'text_mutex' here?
> The core code should be able to do that when the module state is !=
> MODULE_STATE_UNFORMED.
>

Moved the locking to kernel/livepatch/core.c in v4, since other call
sites to apply_relocate_add() don't attempt late relocations. Since
the locking was already being done in the x86 module code I had to
remove this. It also made sense to me to split out the text_mutex
changes into a separate patch because they only touch module/x86 and
livepatch code, so that's how I did it in v4. But they can just as
easily be squashed into one patch.

Thanks,
Dylan
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Song Liu 7 months, 1 week ago
Hi Will,

On Fri, May 9, 2025 at 9:15 AM Will Deacon <will@kernel.org> wrote:
>
> Hi Dylan,
>
> On Sat, Apr 12, 2025 at 01:09:39AM +0000, Dylan Hatch wrote:
> > To enable late module patching, livepatch modules need to be able to
> > apply some of their relocations well after being loaded. In this
> > scenario, use the text-poking API to allow this, even with
> > STRICT_MODULE_RWX.
> >
> > This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> > text_poke() for late relocations").
> >
> > Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> > ---
> >  arch/arm64/kernel/module.c | 129 ++++++++++++++++++++++++-------------
> >  1 file changed, 83 insertions(+), 46 deletions(-)
>
> On its own, this isn't gaining us an awful lot upstream as we don't have
> livepatch support (arm64 does not select HAVE_LIVEPATCH), however I'm
> not against incremental changes towards enabling that. Are you planning
> to work on follow-up changes for the rest of the support?

There are two patchsets that are trying to enable HAVE_LIVEPATCH
for arm64:

[1] https://lore.kernel.org/all/20250127213310.2496133-1-wnliu@google.com/
[2] https://lore.kernel.org/linux-arm-kernel/20250320171559.3423224-1-song@kernel.org/

Toshiyuki has tested this patch with both approaches.

Since patchset [1] depends on SFrame, which is less mature at the
moment (clang doesn't support it, kernel side code is very new), live patch
folks think [2] is a better approach at the moment. Could you please share
your thoughts on [2]. If it looks good, we hope to ship it to 6.16 kernels, as
we (Meta) want to use livepatch in our fleet.

Thanks,
Song
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Dylan Hatch 7 months, 3 weeks ago
On Fri, Apr 11, 2025 at 6:10 PM Dylan Hatch <dylanbhatch@google.com> wrote:
>
> To enable late module patching, livepatch modules need to be able to
> apply some of their relocations well after being loaded. In this
> scenario, use the text-poking API to allow this, even with
> STRICT_MODULE_RWX.
>
> This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> text_poke() for late relocations").
>
> Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> ---
>  arch/arm64/kernel/module.c | 129 ++++++++++++++++++++++++-------------
>  1 file changed, 83 insertions(+), 46 deletions(-)
>
> diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
> index 06bb680bfe975..0703502d9dc37 100644
> --- a/arch/arm64/kernel/module.c
> +++ b/arch/arm64/kernel/module.c
> @@ -18,11 +18,13 @@
>  #include <linux/moduleloader.h>
>  #include <linux/random.h>
>  #include <linux/scs.h>
> +#include <linux/memory.h>
>
>  #include <asm/alternative.h>
>  #include <asm/insn.h>
>  #include <asm/scs.h>
>  #include <asm/sections.h>
> +#include <asm/text-patching.h>
>
>  enum aarch64_reloc_op {
>         RELOC_OP_NONE,
> @@ -48,7 +50,8 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
>         return 0;
>  }
>
> -static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
> +static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len,
> +                     void *(*write)(void *dest, const void *src, size_t len))
>  {
>         s64 sval = do_reloc(op, place, val);
>
> @@ -66,7 +69,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
>
>         switch (len) {
>         case 16:
> -               *(s16 *)place = sval;
> +               write(place, &sval, sizeof(s16));
>                 switch (op) {
>                 case RELOC_OP_ABS:
>                         if (sval < 0 || sval > U16_MAX)
> @@ -82,7 +85,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
>                 }
>                 break;
>         case 32:
> -               *(s32 *)place = sval;
> +               write(place, &sval, sizeof(s32));
>                 switch (op) {
>                 case RELOC_OP_ABS:
>                         if (sval < 0 || sval > U32_MAX)
> @@ -98,7 +101,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
>                 }
>                 break;
>         case 64:
> -               *(s64 *)place = sval;
> +               write(place, &sval, sizeof(s64));
>                 break;
>         default:
>                 pr_err("Invalid length (%d) for data relocation\n", len);
> @@ -113,11 +116,13 @@ enum aarch64_insn_movw_imm_type {
>  };
>
>  static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
> -                          int lsb, enum aarch64_insn_movw_imm_type imm_type)
> +                          int lsb, enum aarch64_insn_movw_imm_type imm_type,
> +                          void *(*write)(void *dest, const void *src, size_t len))
>  {
>         u64 imm;
>         s64 sval;
>         u32 insn = le32_to_cpu(*place);
> +       __le32 le_insn;
>
>         sval = do_reloc(op, place, val);
>         imm = sval >> lsb;
> @@ -145,7 +150,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
>
>         /* Update the instruction with the new encoding. */
>         insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
> -       *place = cpu_to_le32(insn);
> +       le_insn = cpu_to_le32(insn);
> +       write(place, &le_insn, sizeof(le_insn));
>
>         if (imm > U16_MAX)
>                 return -ERANGE;
> @@ -154,11 +160,13 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
>  }
>
>  static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
> -                         int lsb, int len, enum aarch64_insn_imm_type imm_type)
> +                         int lsb, int len, enum aarch64_insn_imm_type imm_type,
> +                         void *(*write)(void *dest, const void *src, size_t len))
>  {
>         u64 imm, imm_mask;
>         s64 sval;
>         u32 insn = le32_to_cpu(*place);
> +       __le32 le_insn;
>
>         /* Calculate the relocation value. */
>         sval = do_reloc(op, place, val);
> @@ -170,7 +178,8 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
>
>         /* Update the instruction's immediate field. */
>         insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
> -       *place = cpu_to_le32(insn);
> +       le_insn = cpu_to_le32(insn);
> +       write(place, &le_insn, sizeof(le_insn));
>
>         /*
>          * Extract the upper value bits (including the sign bit) and
> @@ -189,17 +198,19 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
>  }
>
>  static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
> -                          __le32 *place, u64 val)
> +                          __le32 *place, u64 val,
> +                          void *(*write)(void *dest, const void *src, size_t len))
>  {
>         u32 insn;
> +       __le32 le_insn;
>
>         if (!is_forbidden_offset_for_adrp(place))
>                 return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
> -                                     AARCH64_INSN_IMM_ADR);
> +                                     AARCH64_INSN_IMM_ADR, write);
>
>         /* patch ADRP to ADR if it is in range */
>         if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
> -                           AARCH64_INSN_IMM_ADR)) {
> +                           AARCH64_INSN_IMM_ADR, write)) {
>                 insn = le32_to_cpu(*place);
>                 insn &= ~BIT(31);
>         } else {
> @@ -211,15 +222,17 @@ static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
>                                                    AARCH64_INSN_BRANCH_NOLINK);
>         }
>
> -       *place = cpu_to_le32(insn);
> +       le_insn = cpu_to_le32(insn);
> +       write(place, &le_insn, sizeof(le_insn));
>         return 0;
>  }
>
> -int apply_relocate_add(Elf64_Shdr *sechdrs,
> +static int __apply_relocate_add(Elf64_Shdr *sechdrs,
>                        const char *strtab,
>                        unsigned int symindex,
>                        unsigned int relsec,
> -                      struct module *me)
> +                      struct module *me,
> +                      void *(*write)(void *dest, const void *src, size_t len))
>  {
>         unsigned int i;
>         int ovf;
> @@ -255,23 +268,23 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
>                 /* Data relocations. */
>                 case R_AARCH64_ABS64:
>                         overflow_check = false;
> -                       ovf = reloc_data(RELOC_OP_ABS, loc, val, 64);
> +                       ovf = reloc_data(RELOC_OP_ABS, loc, val, 64, write);
>                         break;
>                 case R_AARCH64_ABS32:
> -                       ovf = reloc_data(RELOC_OP_ABS, loc, val, 32);
> +                       ovf = reloc_data(RELOC_OP_ABS, loc, val, 32, write);
>                         break;
>                 case R_AARCH64_ABS16:
> -                       ovf = reloc_data(RELOC_OP_ABS, loc, val, 16);
> +                       ovf = reloc_data(RELOC_OP_ABS, loc, val, 16, write);
>                         break;
>                 case R_AARCH64_PREL64:
>                         overflow_check = false;
> -                       ovf = reloc_data(RELOC_OP_PREL, loc, val, 64);
> +                       ovf = reloc_data(RELOC_OP_PREL, loc, val, 64, write);
>                         break;
>                 case R_AARCH64_PREL32:
> -                       ovf = reloc_data(RELOC_OP_PREL, loc, val, 32);
> +                       ovf = reloc_data(RELOC_OP_PREL, loc, val, 32, write);
>                         break;
>                 case R_AARCH64_PREL16:
> -                       ovf = reloc_data(RELOC_OP_PREL, loc, val, 16);
> +                       ovf = reloc_data(RELOC_OP_PREL, loc, val, 16, write);
>                         break;
>
>                 /* MOVW instruction relocations. */
> @@ -280,88 +293,88 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
>                         fallthrough;
>                 case R_AARCH64_MOVW_UABS_G0:
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_UABS_G1_NC:
>                         overflow_check = false;
>                         fallthrough;
>                 case R_AARCH64_MOVW_UABS_G1:
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_UABS_G2_NC:
>                         overflow_check = false;
>                         fallthrough;
>                 case R_AARCH64_MOVW_UABS_G2:
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_UABS_G3:
>                         /* We're using the top bits so we can't overflow. */
>                         overflow_check = false;
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_SABS_G0:
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>                 case R_AARCH64_MOVW_SABS_G1:
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>                 case R_AARCH64_MOVW_SABS_G2:
>                         ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G0_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G0:
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G1_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G1:
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G2_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
> -                                             AARCH64_INSN_IMM_MOVKZ);
> +                                             AARCH64_INSN_IMM_MOVKZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G2:
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>                 case R_AARCH64_MOVW_PREL_G3:
>                         /* We're using the top bits so we can't overflow. */
>                         overflow_check = false;
>                         ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48,
> -                                             AARCH64_INSN_IMM_MOVNZ);
> +                                             AARCH64_INSN_IMM_MOVNZ, write);
>                         break;
>
>                 /* Immediate instruction relocations. */
>                 case R_AARCH64_LD_PREL_LO19:
>                         ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
> -                                            AARCH64_INSN_IMM_19);
> +                                            AARCH64_INSN_IMM_19, write);
>                         break;
>                 case R_AARCH64_ADR_PREL_LO21:
>                         ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
> -                                            AARCH64_INSN_IMM_ADR);
> +                                            AARCH64_INSN_IMM_ADR, write);
>                         break;
>                 case R_AARCH64_ADR_PREL_PG_HI21_NC:
>                         overflow_check = false;
>                         fallthrough;
>                 case R_AARCH64_ADR_PREL_PG_HI21:
> -                       ovf = reloc_insn_adrp(me, sechdrs, loc, val);
> +                       ovf = reloc_insn_adrp(me, sechdrs, loc, val, write);
>                         if (ovf && ovf != -ERANGE)
>                                 return ovf;
>                         break;
> @@ -369,46 +382,46 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
>                 case R_AARCH64_LDST8_ABS_LO12_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12,
> -                                            AARCH64_INSN_IMM_12);
> +                                            AARCH64_INSN_IMM_12, write);
>                         break;
>                 case R_AARCH64_LDST16_ABS_LO12_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11,
> -                                            AARCH64_INSN_IMM_12);
> +                                            AARCH64_INSN_IMM_12, write);
>                         break;
>                 case R_AARCH64_LDST32_ABS_LO12_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10,
> -                                            AARCH64_INSN_IMM_12);
> +                                            AARCH64_INSN_IMM_12, write);
>                         break;
>                 case R_AARCH64_LDST64_ABS_LO12_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9,
> -                                            AARCH64_INSN_IMM_12);
> +                                            AARCH64_INSN_IMM_12, write);
>                         break;
>                 case R_AARCH64_LDST128_ABS_LO12_NC:
>                         overflow_check = false;
>                         ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8,
> -                                            AARCH64_INSN_IMM_12);
> +                                            AARCH64_INSN_IMM_12, write);
>                         break;
>                 case R_AARCH64_TSTBR14:
>                         ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14,
> -                                            AARCH64_INSN_IMM_14);
> +                                            AARCH64_INSN_IMM_14, write);
>                         break;
>                 case R_AARCH64_CONDBR19:
>                         ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
> -                                            AARCH64_INSN_IMM_19);
> +                                            AARCH64_INSN_IMM_19, write);
>                         break;
>                 case R_AARCH64_JUMP26:
>                 case R_AARCH64_CALL26:
>                         ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
> -                                            AARCH64_INSN_IMM_26);
> +                                            AARCH64_INSN_IMM_26, write);
>                         if (ovf == -ERANGE) {
>                                 val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
>                                 if (!val)
>                                         return -ENOEXEC;
>                                 ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
> -                                                    26, AARCH64_INSN_IMM_26);
> +                                                    26, AARCH64_INSN_IMM_26, write);
>                         }
>                         break;
>
> @@ -431,6 +444,30 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
>         return -ENOEXEC;
>  }
>
> +int apply_relocate_add(Elf64_Shdr *sechdrs,
> +                      const char *strtab,
> +                      unsigned int symindex,
> +                      unsigned int relsec,
> +                      struct module *me)
> +{
> +       int ret;
> +       bool early = me->state == MODULE_STATE_UNFORMED;
> +       void *(*write)(void *, const void *, size_t) = memcpy;
> +
> +       if (!early) {
> +               write = text_poke;
> +               mutex_lock(&text_mutex);
> +       }
> +
> +       ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
> +                                  write);
> +
> +       if (!early)
> +               mutex_unlock(&text_mutex);
> +
> +       return ret;
> +}
> +
>  static inline void __init_plt(struct plt_entry *plt, unsigned long addr)
>  {
>         *plt = get_plt_entry(addr, plt);
> --
> 2.49.0.604.gff1f9ca942-goog
>

Catalin and Will,

Are you comfortable with taking these patches? Is this a workable
approach to enable livepatching arm64 modules?

Thanks,
Dylan
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Song Liu 8 months ago
On Fri, Apr 11, 2025 at 6:10 PM Dylan Hatch <dylanbhatch@google.com> wrote:
>
> To enable late module patching, livepatch modules need to be able to
> apply some of their relocations well after being loaded. In this
> scenario, use the text-poking API to allow this, even with
> STRICT_MODULE_RWX.
>
> This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> text_poke() for late relocations").
>
> Signed-off-by: Dylan Hatch <dylanbhatch@google.com>

Acked-by: Song Liu <song@kernel.org>
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Song Liu 8 months ago
On Mon, Apr 21, 2025 at 5:35 PM Song Liu <song@kernel.org> wrote:
>
> On Fri, Apr 11, 2025 at 6:10 PM Dylan Hatch <dylanbhatch@google.com> wrote:
> >
> > To enable late module patching, livepatch modules need to be able to
> > apply some of their relocations well after being loaded. In this
> > scenario, use the text-poking API to allow this, even with
> > STRICT_MODULE_RWX.
> >
> > This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> > text_poke() for late relocations").
> >
> > Signed-off-by: Dylan Hatch <dylanbhatch@google.com>

Could you please share how you test this?

Thanks,
Song


> Acked-by: Song Liu <song@kernel.org>
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Dylan Hatch 8 months ago
On Mon, Apr 21, 2025 at 11:25 PM Song Liu <song@kernel.org> wrote:
>
> On Mon, Apr 21, 2025 at 5:35 PM Song Liu <song@kernel.org> wrote:
> >
> > On Fri, Apr 11, 2025 at 6:10 PM Dylan Hatch <dylanbhatch@google.com> wrote:
> > >
> > > To enable late module patching, livepatch modules need to be able to
> > > apply some of their relocations well after being loaded. In this
> > > scenario, use the text-poking API to allow this, even with
> > > STRICT_MODULE_RWX.
> > >
> > > This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> > > text_poke() for late relocations").
> > >
> > > Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
>
> Could you please share how you test this?
>

For context, we enable livepatch for arm64 by porting this RFC series
(along with other internal patches) into our kernel:
https://lore.kernel.org/all/20230202074036.507249-1-madvenka@linux.microsoft.com/.

The way I tested this patch is: with STRICT_MODULE_RWX, load a module
and a livepatch that touches that module (in either order), and
confirm the kernel doesn't crash.

Without this patch, a crash is caused in apply_relocate_add() if both
a module and a livepatch that touches the module are both loaded. This
happens through one of two code paths:

  1. If the module is already loaded when the livepatch is applied,
through the module_init() callback.
  2. If the module is loaded after the livepatch is applied, through
prepare_coming_module().

In both scenarios, the livepatch module's text is already RX-only.

Thanks,
Dylan
Re: [PATCH v2 2/2] arm64/module: Use text-poke API for late relocations.
Posted by Song Liu 8 months ago
On Tue, Apr 22, 2025 at 5:27 PM Dylan Hatch <dylanbhatch@google.com> wrote:
>
> On Mon, Apr 21, 2025 at 11:25 PM Song Liu <song@kernel.org> wrote:
> >
> > On Mon, Apr 21, 2025 at 5:35 PM Song Liu <song@kernel.org> wrote:
> > >
> > > On Fri, Apr 11, 2025 at 6:10 PM Dylan Hatch <dylanbhatch@google.com> wrote:
> > > >
> > > > To enable late module patching, livepatch modules need to be able to
> > > > apply some of their relocations well after being loaded. In this
> > > > scenario, use the text-poking API to allow this, even with
> > > > STRICT_MODULE_RWX.
> > > >
> > > > This patch is largely based off commit 88fc078a7a8f6 ("x86/module: Use
> > > > text_poke() for late relocations").
> > > >
> > > > Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> >
> > Could you please share how you test this?
> >
>
> For context, we enable livepatch for arm64 by porting this RFC series
> (along with other internal patches) into our kernel:
> https://lore.kernel.org/all/20230202074036.507249-1-madvenka@linux.microsoft.com/.
>
> The way I tested this patch is: with STRICT_MODULE_RWX, load a module
> and a livepatch that touches that module (in either order), and
> confirm the kernel doesn't crash.
>
> Without this patch, a crash is caused in apply_relocate_add() if both
> a module and a livepatch that touches the module are both loaded. This
> happens through one of two code paths:
>
>   1. If the module is already loaded when the livepatch is applied,
> through the module_init() callback.
>   2. If the module is loaded after the livepatch is applied, through
> prepare_coming_module().
>
> In both scenarios, the livepatch module's text is already RX-only.

Thanks for sharing the information!

Could you please help test this set with [1]? This is a different approach
than the one by Madhavan. We were hoping to ship it soon.

Thanks,
Song

[1] https://lore.kernel.org/live-patching/20250320171559.3423224-1-song@kernel.org/