[PATCH 7/7] perf annotate-data: Handle the access to the 'current' pointer on arm64

Li Huafei posted 7 patches 9 months, 1 week ago
[PATCH 7/7] perf annotate-data: Handle the access to the 'current' pointer on arm64
Posted by Li Huafei 9 months, 1 week ago
According to the implementation of the 'current' macro on ARM64, the
sp_el0 register stores the pointer to the current task's task_struct.
For example:

 mrs x1, sp_el0
 ldr x2, [x1, #1896]

We can infer that the ldr instruction is accessing a member of the
task_struct structure at an offset of 1896. The key is to construct the
data type for x1. The instruction 'mrs x1, sp_el0' belongs to the inline
function get_current(). By finding the DIE of the inline function
through its instruction address, and then obtaining the DIE for its
return type, which should be 'struct task_struct *'. Then, we update the
register state of x1 with this type information.

Signed-off-by: Li Huafei <lihuafei1@huawei.com>
---
 tools/perf/arch/arm64/annotate/instructions.c | 71 +++++++++++++++----
 1 file changed, 57 insertions(+), 14 deletions(-)

diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c
index f2053e7f60a8..c5a0a6381547 100644
--- a/tools/perf/arch/arm64/annotate/instructions.c
+++ b/tools/perf/arch/arm64/annotate/instructions.c
@@ -263,6 +263,20 @@ update_insn_state_arm64(struct type_state *state, struct data_loc_info *dloc,
 	Dwarf_Die type_die;
 	int sreg, dreg;
 	u32 insn_offset = dl->al.offset;
+	static regex_t add_regex, mrs_regex;
+	static bool regex_compiled;
+
+	if (!regex_compiled) {
+		/*
+		 * Matching the operand assembly syntax of the add instruction:
+		 *
+		 *  <Xd|SP>, <Xn|SP>, #<imm>
+		 */
+		regcomp(&add_regex, "^([xw][0-9]{1,2}|sp), ([xw][0-9]{1,2}|sp), #(0x[0-9a-f]+)",
+			REG_EXTENDED);
+		regcomp(&mrs_regex, "^(x[0-9]{1,2}), sp_el0", REG_EXTENDED);
+		regex_compiled = true;
+	}
 
 	/* Access global variables via PC relative addressing, for example:
 	 *
@@ -296,20 +310,6 @@ update_insn_state_arm64(struct type_state *state, struct data_loc_info *dloc,
 		regmatch_t match[4];
 		char *ops = strdup(dl->ops.raw);
 		u64 offset;
-		static regex_t add_regex;
-		static bool regex_compiled;
-
-		/*
-		 * Matching the operand assembly syntax of the add instruction:
-		 *
-		 *  <Xd|SP>, <Xn|SP>, #<imm>
-		 */
-		if (!regex_compiled) {
-			regcomp(&add_regex,
-				"^([xw][0-9]{1,2}|sp), ([xw][0-9]{1,2}|sp), #(0x[0-9a-f]+)",
-				REG_EXTENDED);
-			regex_compiled = true;
-		}
 
 		if (!ops)
 			return;
@@ -351,6 +351,49 @@ update_insn_state_arm64(struct type_state *state, struct data_loc_info *dloc,
 		return;
 	}
 
+	if (!strncmp(dl->ins.name, "mrs", 3)) {
+		regmatch_t match[2];
+		char *ops = strdup(dl->ops.raw);
+		Dwarf_Die func_die;
+		Dwarf_Attribute attr;
+		u64 ip = dloc->ms->sym->start + dl->al.offset;
+		u64 pc = map__rip_2objdump(dloc->ms->map, ip);
+
+		if (!ops)
+			return;
+
+		if (regexec(&mrs_regex, dl->ops.raw, 2, match, 0))
+			return;
+
+		ops[match[1].rm_eo] = '\0';
+		sreg = get_arm64_regnum(ops + match[1].rm_so);
+		if (sreg < 0 || !has_reg_type(state, sreg)) {
+			free(ops);
+			return;
+		}
+
+		/*
+		 * Find the inline function 'get_current()' Dwarf_Die and
+		 * obtain its return value data type, which should be
+		 * 'struct task_struct *'.
+		 */
+		if (!die_find_inlinefunc(cu_die, pc, &func_die) ||
+		    !dwarf_attr_integrate(&func_die, DW_AT_type, &attr) ||
+		    !dwarf_formref_die(&attr, &type_die)) {
+			free(ops);
+			return;
+		}
+
+		tsr = &state->regs[sreg];
+		tsr->type = type_die;
+		tsr->kind = TSR_KIND_TYPE;
+		tsr->ok = true;
+
+		pr_debug_dtp("mrs sp_el0 [%x] -> reg%d", insn_offset, sreg);
+		free(ops);
+		return;
+	}
+
 	if (strncmp(dl->ins.name, "ld", 2))
 		return;
 
-- 
2.25.1
Re: [PATCH 7/7] perf annotate-data: Handle the access to the 'current' pointer on arm64
Posted by Namhyung Kim 9 months ago
On Sat, Mar 15, 2025 at 12:21:37AM +0800, Li Huafei wrote:
> According to the implementation of the 'current' macro on ARM64, the
> sp_el0 register stores the pointer to the current task's task_struct.
> For example:
> 
>  mrs x1, sp_el0
>  ldr x2, [x1, #1896]

Same here.  It'd be great if you could share a real example where it
found the current for x1 in the second instruction.

> 
> We can infer that the ldr instruction is accessing a member of the
> task_struct structure at an offset of 1896. The key is to construct the
> data type for x1. The instruction 'mrs x1, sp_el0' belongs to the inline
> function get_current(). By finding the DIE of the inline function
> through its instruction address, and then obtaining the DIE for its
> return type, which should be 'struct task_struct *'. Then, we update the
> register state of x1 with this type information.
> 
> Signed-off-by: Li Huafei <lihuafei1@huawei.com>
> ---
>  tools/perf/arch/arm64/annotate/instructions.c | 71 +++++++++++++++----
>  1 file changed, 57 insertions(+), 14 deletions(-)
> 
> diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c
> index f2053e7f60a8..c5a0a6381547 100644
> --- a/tools/perf/arch/arm64/annotate/instructions.c
> +++ b/tools/perf/arch/arm64/annotate/instructions.c
> @@ -263,6 +263,20 @@ update_insn_state_arm64(struct type_state *state, struct data_loc_info *dloc,
>  	Dwarf_Die type_die;
>  	int sreg, dreg;
>  	u32 insn_offset = dl->al.offset;
> +	static regex_t add_regex, mrs_regex;
> +	static bool regex_compiled;
> +
> +	if (!regex_compiled) {
> +		/*
> +		 * Matching the operand assembly syntax of the add instruction:
> +		 *
> +		 *  <Xd|SP>, <Xn|SP>, #<imm>
> +		 */
> +		regcomp(&add_regex, "^([xw][0-9]{1,2}|sp), ([xw][0-9]{1,2}|sp), #(0x[0-9a-f]+)",
> +			REG_EXTENDED);
> +		regcomp(&mrs_regex, "^(x[0-9]{1,2}), sp_el0", REG_EXTENDED);
> +		regex_compiled = true;
> +	}
>  
>  	/* Access global variables via PC relative addressing, for example:
>  	 *
> @@ -296,20 +310,6 @@ update_insn_state_arm64(struct type_state *state, struct data_loc_info *dloc,
>  		regmatch_t match[4];
>  		char *ops = strdup(dl->ops.raw);
>  		u64 offset;
> -		static regex_t add_regex;
> -		static bool regex_compiled;
> -
> -		/*
> -		 * Matching the operand assembly syntax of the add instruction:
> -		 *
> -		 *  <Xd|SP>, <Xn|SP>, #<imm>
> -		 */
> -		if (!regex_compiled) {
> -			regcomp(&add_regex,
> -				"^([xw][0-9]{1,2}|sp), ([xw][0-9]{1,2}|sp), #(0x[0-9a-f]+)",
> -				REG_EXTENDED);
> -			regex_compiled = true;
> -		}
>  
>  		if (!ops)
>  			return;
> @@ -351,6 +351,49 @@ update_insn_state_arm64(struct type_state *state, struct data_loc_info *dloc,
>  		return;
>  	}
>  
> +	if (!strncmp(dl->ins.name, "mrs", 3)) {

It should be kernel specific, you may want to add a check for it like
__map__is_kernel(dloc->ms->map).

Thanks,
Namhyung


> +		regmatch_t match[2];
> +		char *ops = strdup(dl->ops.raw);
> +		Dwarf_Die func_die;
> +		Dwarf_Attribute attr;
> +		u64 ip = dloc->ms->sym->start + dl->al.offset;
> +		u64 pc = map__rip_2objdump(dloc->ms->map, ip);
> +
> +		if (!ops)
> +			return;
> +
> +		if (regexec(&mrs_regex, dl->ops.raw, 2, match, 0))
> +			return;
> +
> +		ops[match[1].rm_eo] = '\0';
> +		sreg = get_arm64_regnum(ops + match[1].rm_so);
> +		if (sreg < 0 || !has_reg_type(state, sreg)) {
> +			free(ops);
> +			return;
> +		}
> +
> +		/*
> +		 * Find the inline function 'get_current()' Dwarf_Die and
> +		 * obtain its return value data type, which should be
> +		 * 'struct task_struct *'.
> +		 */
> +		if (!die_find_inlinefunc(cu_die, pc, &func_die) ||
> +		    !dwarf_attr_integrate(&func_die, DW_AT_type, &attr) ||
> +		    !dwarf_formref_die(&attr, &type_die)) {
> +			free(ops);
> +			return;
> +		}
> +
> +		tsr = &state->regs[sreg];
> +		tsr->type = type_die;
> +		tsr->kind = TSR_KIND_TYPE;
> +		tsr->ok = true;
> +
> +		pr_debug_dtp("mrs sp_el0 [%x] -> reg%d", insn_offset, sreg);
> +		free(ops);
> +		return;
> +	}
> +
>  	if (strncmp(dl->ins.name, "ld", 2))
>  		return;
>  
> -- 
> 2.25.1
>