[PATCH v1 2/6] perf dwarf-aux: More accurate variable type match for breg

Zecheng Li posted 6 patches 2 months, 1 week ago
There is a newer version of this series
[PATCH v1 2/6] perf dwarf-aux: More accurate variable type match for breg
Posted by Zecheng Li 2 months, 1 week ago
Introduces the function is_breg_access_indirect to determine whether a
memory access involving a DW_OP_breg* operation refers to the variable's
value directly or requires dereferencing the variable's type as a
pointer based on the DWARF expression. Previously, all breg based
accesses were assumed to directly access the variable's value
(is_pointer = false).

The is_breg_access_indirect function handles three main cases:

1. Base register + offset only: (e.g., DW_OP_breg7 RSP+88) The
   calculated address is the location of the variable. The access is
   direct, so no type dereference is needed. Returns false.

2. Base register + offset, followed by other operations ending in
   DW_OP_stack_value, including DW_OP_deref: (e.g., DW_OP_breg7 RSP+96,
   DW_OP_deref, DW_OP_plus_uconst 0x64, DW_OP_stack_value) The DWARF
   expression computes the variable's value, but that value requires a
   dereference. The memory access is fetching that value, so no type
   dereference is needed. Returns false.

3. Base register + offset, followed only by DW_OP_stack_value: (e.g.,
   DW_OP_breg7 RSP+176, DW_OP_stack_value) This indicates the value at
   the base + offset is the variable's value. Since this value is being
   used as an address in the memory access, the variable's type is
   treated as a pointer and requires a type dereference. Returns true.

The is_pointer argument passed to match_var_offset is now set by
is_breg_access_indirect for breg accesses.

Signed-off-by: Zecheng Li <zecheng@google.com>
---
 tools/perf/util/dwarf-aux.c | 42 ++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index bf906dff9ef0..814c96ea509f 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -1424,6 +1424,38 @@ static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data,
 	return true;
 }
 
+/**
+ * is_breg_access_indirect - Check if breg based access implies type dereference
+ * @ops: DWARF operations array
+ * @nops: Number of operations in @ops
+ *
+ * Returns true if the DWARF expression indicates the variable's value is
+ * a pointer that the memory access dereferences.
+ * Returns false if the expression evaluates to the variable's value directly.
+ * This is called after check_allowed_ops.
+ */
+static bool is_breg_access_indirect(Dwarf_Op *ops, size_t nops)
+{
+	ops++;
+	nops--;
+
+	/* only the base register */
+	if (nops == 0)
+		return false;
+
+	switch (ops->atom) {
+	case DW_OP_stack_value:
+		return true;
+	case DW_OP_deref_size:
+	case DW_OP_deref:
+	case DW_OP_piece:
+		return false;
+	default:
+		/* unreachable, OP not supported */
+		return false;
+	}
+}
+
 /* Only checks direct child DIEs in the given scope. */
 static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
 {
@@ -1452,7 +1484,7 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
 		if (data->is_fbreg && ops->atom == DW_OP_fbreg &&
 		    check_allowed_ops(ops, nops) &&
 		    match_var_offset(die_mem, data, data->offset, ops->number,
-				     /*is_pointer=*/false))
+				     /*is_pointer=*/is_breg_access_indirect(ops, nops)))
 			return DIE_FIND_CB_END;
 
 		/* Only match with a simple case */
@@ -1464,11 +1496,11 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
 					     /*is_pointer=*/true))
 				return DIE_FIND_CB_END;
 
-			/* Local variables accessed by a register + offset */
+			/* variables accessed by a register + offset */
 			if (ops->atom == (DW_OP_breg0 + data->reg) &&
 			    check_allowed_ops(ops, nops) &&
 			    match_var_offset(die_mem, data, data->offset, ops->number,
-					     /*is_pointer=*/false))
+					     /*is_pointer=*/is_breg_access_indirect(ops, nops)))
 				return DIE_FIND_CB_END;
 		} else {
 			/* pointer variables saved in a register 32 or above */
@@ -1478,11 +1510,11 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
 					     /*is_pointer=*/true))
 				return DIE_FIND_CB_END;
 
-			/* Local variables accessed by a register + offset */
+			/* variables accessed by a register + offset */
 			if (ops->atom == DW_OP_bregx && data->reg == ops->number &&
 			    check_allowed_ops(ops, nops) &&
 			    match_var_offset(die_mem, data, data->offset, ops->number2,
-					     /*is_poitner=*/false))
+					     /*is_pointer=*/is_breg_access_indirect(ops, nops)))
 				return DIE_FIND_CB_END;
 		}
 	}
-- 
2.50.1.470.g6ba607880d-goog
Re: [PATCH v1 2/6] perf dwarf-aux: More accurate variable type match for breg
Posted by Namhyung Kim 2 months, 1 week ago
On Fri, Jul 25, 2025 at 08:28:05PM +0000, Zecheng Li wrote:
> Introduces the function is_breg_access_indirect to determine whether a
> memory access involving a DW_OP_breg* operation refers to the variable's
> value directly or requires dereferencing the variable's type as a
> pointer based on the DWARF expression. Previously, all breg based
> accesses were assumed to directly access the variable's value
> (is_pointer = false).
> 
> The is_breg_access_indirect function handles three main cases:
> 
> 1. Base register + offset only: (e.g., DW_OP_breg7 RSP+88) The
>    calculated address is the location of the variable. The access is
>    direct, so no type dereference is needed. Returns false.

This is the common case.  Either basic type is store in the stack or
pointer type is spilled into the stack.

> 
> 2. Base register + offset, followed by other operations ending in
>    DW_OP_stack_value, including DW_OP_deref: (e.g., DW_OP_breg7 RSP+96,
>    DW_OP_deref, DW_OP_plus_uconst 0x64, DW_OP_stack_value) The DWARF
>    expression computes the variable's value, but that value requires a
>    dereference. The memory access is fetching that value, so no type
>    dereference is needed. Returns false.

This is a complex case: the variable needs a pointer calculation.  We
don't support those (complex) expressions for now.

> 
> 3. Base register + offset, followed only by DW_OP_stack_value: (e.g.,
>    DW_OP_breg7 RSP+176, DW_OP_stack_value) This indicates the value at
>    the base + offset is the variable's value. Since this value is being
>    used as an address in the memory access, the variable's type is
>    treated as a pointer and requires a type dereference. Returns true.

The value has a value.  But I guess the type dereference is only
required if the base register points to the stack.

> 
> The is_pointer argument passed to match_var_offset is now set by
> is_breg_access_indirect for breg accesses.
> 
> Signed-off-by: Zecheng Li <zecheng@google.com>
> ---
>  tools/perf/util/dwarf-aux.c | 42 ++++++++++++++++++++++++++++++++-----
>  1 file changed, 37 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
> index bf906dff9ef0..814c96ea509f 100644
> --- a/tools/perf/util/dwarf-aux.c
> +++ b/tools/perf/util/dwarf-aux.c
> @@ -1424,6 +1424,38 @@ static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data,
>  	return true;
>  }
>  
> +/**
> + * is_breg_access_indirect - Check if breg based access implies type dereference
> + * @ops: DWARF operations array
> + * @nops: Number of operations in @ops
> + *
> + * Returns true if the DWARF expression indicates the variable's value is
> + * a pointer that the memory access dereferences.
> + * Returns false if the expression evaluates to the variable's value directly.
> + * This is called after check_allowed_ops.
> + */
> +static bool is_breg_access_indirect(Dwarf_Op *ops, size_t nops)
> +{
> +	ops++;
> +	nops--;
> +
> +	/* only the base register */
> +	if (nops == 0)
> +		return false;
> +
> +	switch (ops->atom) {
> +	case DW_OP_stack_value:
> +		return true;

As I said, I think it also need to check if the base is the stack.


> +	case DW_OP_deref_size:
> +	case DW_OP_deref:
> +	case DW_OP_piece:
> +		return false;

I'm not sure if it's always false.  I sometimes see this pattern

  DW_OP_bregN, DW_OP_deref*, DW_OP_stack_value

which I believe it's almost same as just

  DW_OP_bregN

No?

> +	default:
> +		/* unreachable, OP not supported */
> +		return false;
> +	}
> +}
> +
>  /* Only checks direct child DIEs in the given scope. */
>  static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
>  {
> @@ -1452,7 +1484,7 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
>  		if (data->is_fbreg && ops->atom == DW_OP_fbreg &&
>  		    check_allowed_ops(ops, nops) &&
>  		    match_var_offset(die_mem, data, data->offset, ops->number,
> -				     /*is_pointer=*/false))
> +				     /*is_pointer=*/is_breg_access_indirect(ops, nops)))

The annotate like /*is_pointer=*/ is used for constant arguments.
You can delete here and below.

Thanks,
Namhyung

>  			return DIE_FIND_CB_END;
>  
>  		/* Only match with a simple case */
> @@ -1464,11 +1496,11 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
>  					     /*is_pointer=*/true))
>  				return DIE_FIND_CB_END;
>  
> -			/* Local variables accessed by a register + offset */
> +			/* variables accessed by a register + offset */
>  			if (ops->atom == (DW_OP_breg0 + data->reg) &&
>  			    check_allowed_ops(ops, nops) &&
>  			    match_var_offset(die_mem, data, data->offset, ops->number,
> -					     /*is_pointer=*/false))
> +					     /*is_pointer=*/is_breg_access_indirect(ops, nops)))
>  				return DIE_FIND_CB_END;
>  		} else {
>  			/* pointer variables saved in a register 32 or above */
> @@ -1478,11 +1510,11 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
>  					     /*is_pointer=*/true))
>  				return DIE_FIND_CB_END;
>  
> -			/* Local variables accessed by a register + offset */
> +			/* variables accessed by a register + offset */
>  			if (ops->atom == DW_OP_bregx && data->reg == ops->number &&
>  			    check_allowed_ops(ops, nops) &&
>  			    match_var_offset(die_mem, data, data->offset, ops->number2,
> -					     /*is_poitner=*/false))
> +					     /*is_pointer=*/is_breg_access_indirect(ops, nops)))
>  				return DIE_FIND_CB_END;
>  		}
>  	}
> -- 
> 2.50.1.470.g6ba607880d-goog
>