[PATCH v3 10/20] gendwarfksyms: Expand structure types

Sami Tolvanen posted 20 patches 2 months ago
There is a newer version of this series
[PATCH v3 10/20] gendwarfksyms: Expand structure types
Posted by Sami Tolvanen 2 months ago
Recursively expand DWARF structure types, i.e. structs, unions, and
enums. Also include relevant DWARF attributes in type strings to
encode structure layout, for example.

Example output with --dump-dies:

  subprogram (
    formal_parameter structure_type &str {
      member pointer_type {
        base_type u8 byte_size(1) encoding(7)
      } data_ptr data_member_location(0) ,
      member base_type usize byte_size(8) encoding(7) length data_member_location(8)
    } byte_size(16) alignment(8) msg
  )
  -> base_type void

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 scripts/gendwarfksyms/dwarf.c         | 137 +++++++++++++++++++++++++-
 scripts/gendwarfksyms/gendwarfksyms.h |   5 +
 2 files changed, 140 insertions(+), 2 deletions(-)

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index caf25da0a9b9..b7f1dc29cb9c 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -205,9 +205,13 @@ static void process_fqn(struct die *cache, Dwarf_Die *die)
 				    value);                                \
 	}
 
+DEFINE_PROCESS_UDATA_ATTRIBUTE(accessibility)
 DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(bit_size)
 DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size)
 DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(data_bit_offset)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(data_member_location)
 
 /* Match functions -- die_match_callback_t */
 #define DEFINE_MATCH(type)                                     \
@@ -216,8 +220,11 @@ DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
 		return dwarf_tag(die) == DW_TAG_##type##_type; \
 	}
 
+DEFINE_MATCH(enumerator)
 DEFINE_MATCH(formal_parameter)
+DEFINE_MATCH(member)
 DEFINE_MATCH(subrange)
+DEFINE_MATCH(variant)
 
 bool match_all(Dwarf_Die *die)
 {
@@ -295,6 +302,10 @@ static void __process_list_type(struct state *state, struct die *cache,
 		process(cache, " ");
 		process(cache, name);
 	}
+	process_accessibility_attr(cache, die);
+	process_bit_size_attr(cache, die);
+	process_data_bit_offset_attr(cache, die);
+	process_data_member_location_attr(cache, die);
 }
 
 #define DEFINE_PROCESS_LIST_TYPE(type)                                       \
@@ -305,6 +316,7 @@ static void __process_list_type(struct state *state, struct die *cache,
 	}
 
 DEFINE_PROCESS_LIST_TYPE(formal_parameter)
+DEFINE_PROCESS_LIST_TYPE(member)
 
 /* Container types with DW_AT_type */
 static void __process_type(struct state *state, struct die *cache,
@@ -337,6 +349,7 @@ DEFINE_PROCESS_TYPE(reference)
 DEFINE_PROCESS_TYPE(restrict)
 DEFINE_PROCESS_TYPE(rvalue_reference)
 DEFINE_PROCESS_TYPE(shared)
+DEFINE_PROCESS_TYPE(template_type_parameter)
 DEFINE_PROCESS_TYPE(volatile)
 DEFINE_PROCESS_TYPE(typedef)
 
@@ -390,6 +403,106 @@ static void process_subroutine_type(struct state *state, struct die *cache,
 	__process_subroutine_type(state, cache, die, "subroutine_type");
 }
 
+static void process_variant_type(struct state *state, struct die *cache,
+				 Dwarf_Die *die)
+{
+	process_list_comma(state, cache);
+	process(cache, "variant {");
+	process_linebreak(cache, 1);
+	check(process_die_container(state, cache, die, process_type,
+				    match_member_type));
+	process_linebreak(cache, -1);
+	process(cache, "}");
+}
+
+static void process_variant_part_type(struct state *state, struct die *cache,
+				      Dwarf_Die *die)
+{
+	process_list_comma(state, cache);
+	process(cache, "variant_part {");
+	process_linebreak(cache, 1);
+	check(process_die_container(state, cache, die, process_type,
+				    match_variant_type));
+	process_linebreak(cache, -1);
+	process(cache, "}");
+}
+
+static int ___process_structure_type(struct state *state, struct die *cache,
+				     Dwarf_Die *die)
+{
+	switch (dwarf_tag(die)) {
+	case DW_TAG_member:
+	case DW_TAG_variant_part:
+		return check(process_type(state, cache, die));
+	case DW_TAG_class_type:
+	case DW_TAG_enumeration_type:
+	case DW_TAG_structure_type:
+	case DW_TAG_template_type_parameter:
+	case DW_TAG_union_type:
+	case DW_TAG_subprogram:
+		/* Skip non-member types, including member functions */
+		return 0;
+	default:
+		error("unexpected structure_type child: %x", dwarf_tag(die));
+	}
+}
+
+static void __process_structure_type(struct state *state, struct die *cache,
+				     Dwarf_Die *die, const char *type,
+				     die_callback_t process_func,
+				     die_match_callback_t match_func)
+{
+	process(cache, type);
+	process_fqn(cache, die);
+	process(cache, " {");
+	process_linebreak(cache, 1);
+
+	check(process_die_container(state, cache, die, process_func,
+				    match_func));
+
+	process_linebreak(cache, -1);
+	process(cache, "}");
+
+	process_byte_size_attr(cache, die);
+	process_alignment_attr(cache, die);
+}
+
+#define DEFINE_PROCESS_STRUCTURE_TYPE(structure)                        \
+	static void process_##structure##_type(                         \
+		struct state *state, struct die *cache, Dwarf_Die *die) \
+	{                                                               \
+		__process_structure_type(state, cache, die,             \
+					 #structure "_type",            \
+					 ___process_structure_type,     \
+					 match_all);                    \
+	}
+
+DEFINE_PROCESS_STRUCTURE_TYPE(class)
+DEFINE_PROCESS_STRUCTURE_TYPE(structure)
+DEFINE_PROCESS_STRUCTURE_TYPE(union)
+
+static void process_enumerator_type(struct state *state, struct die *cache,
+				    Dwarf_Die *die)
+{
+	Dwarf_Word value;
+
+	process_list_comma(state, cache);
+	process(cache, "enumerator");
+	process_fqn(cache, die);
+
+	if (get_udata_attr(die, DW_AT_const_value, &value)) {
+		process(cache, " = ");
+		process_fmt(cache, "%" PRIu64, value);
+	}
+}
+
+static void process_enumeration_type(struct state *state, struct die *cache,
+				     Dwarf_Die *die)
+{
+	__process_structure_type(state, cache, die, "enumeration_type",
+				 process_type, match_enumerator_type);
+}
+
 static void process_base_type(struct state *state, struct die *cache,
 			      Dwarf_Die *die)
 {
@@ -400,6 +513,16 @@ static void process_base_type(struct state *state, struct die *cache,
 	process_alignment_attr(cache, die);
 }
 
+static void process_unspecified_type(struct state *state, struct die *cache,
+				     Dwarf_Die *die)
+{
+	/*
+	 * These can be emitted for stand-elone assembly code, which means we
+	 * might run into them in vmlinux.o.
+	 */
+	process(cache, "unspecified_type");
+}
+
 static void process_cached(struct state *state, struct die *cache,
 			   Dwarf_Die *die)
 {
@@ -460,17 +583,27 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 	PROCESS_TYPE(rvalue_reference)
 	PROCESS_TYPE(shared)
 	PROCESS_TYPE(volatile)
+	/* Container types */
+	PROCESS_TYPE(class)
+	PROCESS_TYPE(structure)
+	PROCESS_TYPE(union)
+	PROCESS_TYPE(enumeration)
 	/* Subtypes */
+	PROCESS_TYPE(enumerator)
 	PROCESS_TYPE(formal_parameter)
+	PROCESS_TYPE(member)
 	PROCESS_TYPE(subrange)
+	PROCESS_TYPE(template_type_parameter)
+	PROCESS_TYPE(variant)
+	PROCESS_TYPE(variant_part)
 	/* Other types */
 	PROCESS_TYPE(array)
 	PROCESS_TYPE(base)
 	PROCESS_TYPE(subroutine)
 	PROCESS_TYPE(typedef)
+	PROCESS_TYPE(unspecified)
 	default:
-		debug("unimplemented type: %x", tag);
-		break;
+		error("unexpected type: %x", tag);
 	}
 
 	/* Update cache state and append to the parent (if any) */
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index d5186472f705..ad50e35e3351 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -63,8 +63,13 @@ extern int dump_dies;
 #define checkp(expr) __check(expr, __res < 0)
 
 /* Consistent aliases (DW_TAG_<type>_type) for DWARF tags */
+#define DW_TAG_enumerator_type DW_TAG_enumerator
 #define DW_TAG_formal_parameter_type DW_TAG_formal_parameter
+#define DW_TAG_member_type DW_TAG_member
+#define DW_TAG_template_type_parameter_type DW_TAG_template_type_parameter
 #define DW_TAG_typedef_type DW_TAG_typedef
+#define DW_TAG_variant_part_type DW_TAG_variant_part
+#define DW_TAG_variant_type DW_TAG_variant
 
 /*
  * symbols.c
-- 
2.46.0.792.g87dc391469-goog
Re: [PATCH v3 10/20] gendwarfksyms: Expand structure types
Posted by Petr Pavlu 1 month, 4 weeks ago
On 9/23/24 20:18, Sami Tolvanen wrote:
> Recursively expand DWARF structure types, i.e. structs, unions, and
> enums. Also include relevant DWARF attributes in type strings to
> encode structure layout, for example.
> 
> Example output with --dump-dies:
> 
>   subprogram (
>     formal_parameter structure_type &str {
>       member pointer_type {
>         base_type u8 byte_size(1) encoding(7)
>       } data_ptr data_member_location(0) ,
>       member base_type usize byte_size(8) encoding(7) length data_member_location(8)
>     } byte_size(16) alignment(8) msg
>   )
>   -> base_type void
> 
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> ---
>  scripts/gendwarfksyms/dwarf.c         | 137 +++++++++++++++++++++++++-
>  scripts/gendwarfksyms/gendwarfksyms.h |   5 +
>  2 files changed, 140 insertions(+), 2 deletions(-)
> 
> diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
> index caf25da0a9b9..b7f1dc29cb9c 100644
> --- a/scripts/gendwarfksyms/dwarf.c
> +++ b/scripts/gendwarfksyms/dwarf.c
> @@ -205,9 +205,13 @@ static void process_fqn(struct die *cache, Dwarf_Die *die)
>  				    value);                                \
>  	}
>  
> +DEFINE_PROCESS_UDATA_ATTRIBUTE(accessibility)
>  DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment)
> +DEFINE_PROCESS_UDATA_ATTRIBUTE(bit_size)
>  DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size)
>  DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
> +DEFINE_PROCESS_UDATA_ATTRIBUTE(data_bit_offset)
> +DEFINE_PROCESS_UDATA_ATTRIBUTE(data_member_location)
>  
>  /* Match functions -- die_match_callback_t */
>  #define DEFINE_MATCH(type)                                     \
> @@ -216,8 +220,11 @@ DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
>  		return dwarf_tag(die) == DW_TAG_##type##_type; \
>  	}
>  
> +DEFINE_MATCH(enumerator)
>  DEFINE_MATCH(formal_parameter)
> +DEFINE_MATCH(member)
>  DEFINE_MATCH(subrange)
> +DEFINE_MATCH(variant)
>  
>  bool match_all(Dwarf_Die *die)
>  {
> @@ -295,6 +302,10 @@ static void __process_list_type(struct state *state, struct die *cache,
>  		process(cache, " ");
>  		process(cache, name);
>  	}
> +	process_accessibility_attr(cache, die);
> +	process_bit_size_attr(cache, die);
> +	process_data_bit_offset_attr(cache, die);
> +	process_data_member_location_attr(cache, die);
>  }
>  
>  #define DEFINE_PROCESS_LIST_TYPE(type)                                       \
> @@ -305,6 +316,7 @@ static void __process_list_type(struct state *state, struct die *cache,
>  	}
>  
>  DEFINE_PROCESS_LIST_TYPE(formal_parameter)
> +DEFINE_PROCESS_LIST_TYPE(member)
>  
>  /* Container types with DW_AT_type */
>  static void __process_type(struct state *state, struct die *cache,
> @@ -337,6 +349,7 @@ DEFINE_PROCESS_TYPE(reference)
>  DEFINE_PROCESS_TYPE(restrict)
>  DEFINE_PROCESS_TYPE(rvalue_reference)
>  DEFINE_PROCESS_TYPE(shared)
> +DEFINE_PROCESS_TYPE(template_type_parameter)
>  DEFINE_PROCESS_TYPE(volatile)
>  DEFINE_PROCESS_TYPE(typedef)
>  
> @@ -390,6 +403,106 @@ static void process_subroutine_type(struct state *state, struct die *cache,
>  	__process_subroutine_type(state, cache, die, "subroutine_type");
>  }
>  
> +static void process_variant_type(struct state *state, struct die *cache,
> +				 Dwarf_Die *die)
> +{
> +	process_list_comma(state, cache);
> +	process(cache, "variant {");
> +	process_linebreak(cache, 1);
> +	check(process_die_container(state, cache, die, process_type,
> +				    match_member_type));
> +	process_linebreak(cache, -1);
> +	process(cache, "}");
> +}
> +
> +static void process_variant_part_type(struct state *state, struct die *cache,
> +				      Dwarf_Die *die)
> +{
> +	process_list_comma(state, cache);
> +	process(cache, "variant_part {");
> +	process_linebreak(cache, 1);
> +	check(process_die_container(state, cache, die, process_type,
> +				    match_variant_type));
> +	process_linebreak(cache, -1);
> +	process(cache, "}");
> +}

For variant types, should the tool worry also about DW_AT_discr and
DW_AT_discr_value?

> +
> +static int ___process_structure_type(struct state *state, struct die *cache,
> +				     Dwarf_Die *die)
> +{
> +	switch (dwarf_tag(die)) {
> +	case DW_TAG_member:
> +	case DW_TAG_variant_part:
> +		return check(process_type(state, cache, die));
> +	case DW_TAG_class_type:
> +	case DW_TAG_enumeration_type:
> +	case DW_TAG_structure_type:
> +	case DW_TAG_template_type_parameter:
> +	case DW_TAG_union_type:
> +	case DW_TAG_subprogram:
> +		/* Skip non-member types, including member functions */
> +		return 0;
> +	default:
> +		error("unexpected structure_type child: %x", dwarf_tag(die));
> +	}
> +}
> +
> +static void __process_structure_type(struct state *state, struct die *cache,
> +				     Dwarf_Die *die, const char *type,
> +				     die_callback_t process_func,
> +				     die_match_callback_t match_func)
> +{
> +	process(cache, type);
> +	process_fqn(cache, die);
> +	process(cache, " {");
> +	process_linebreak(cache, 1);
> +
> +	check(process_die_container(state, cache, die, process_func,
> +				    match_func));
> +
> +	process_linebreak(cache, -1);
> +	process(cache, "}");
> +
> +	process_byte_size_attr(cache, die);
> +	process_alignment_attr(cache, die);
> +}
> +
> +#define DEFINE_PROCESS_STRUCTURE_TYPE(structure)                        \
> +	static void process_##structure##_type(                         \
> +		struct state *state, struct die *cache, Dwarf_Die *die) \
> +	{                                                               \
> +		__process_structure_type(state, cache, die,             \
> +					 #structure "_type",            \
> +					 ___process_structure_type,     \
> +					 match_all);                    \
> +	}
> +
> +DEFINE_PROCESS_STRUCTURE_TYPE(class)
> +DEFINE_PROCESS_STRUCTURE_TYPE(structure)
> +DEFINE_PROCESS_STRUCTURE_TYPE(union)
> +
> +static void process_enumerator_type(struct state *state, struct die *cache,
> +				    Dwarf_Die *die)
> +{
> +	Dwarf_Word value;
> +
> +	process_list_comma(state, cache);
> +	process(cache, "enumerator");
> +	process_fqn(cache, die);
> +
> +	if (get_udata_attr(die, DW_AT_const_value, &value)) {
> +		process(cache, " = ");
> +		process_fmt(cache, "%" PRIu64, value);
> +	}
> +}
> +
> +static void process_enumeration_type(struct state *state, struct die *cache,
> +				     Dwarf_Die *die)
> +{
> +	__process_structure_type(state, cache, die, "enumeration_type",
> +				 process_type, match_enumerator_type);
> +}
> +
>  static void process_base_type(struct state *state, struct die *cache,
>  			      Dwarf_Die *die)
>  {
> @@ -400,6 +513,16 @@ static void process_base_type(struct state *state, struct die *cache,
>  	process_alignment_attr(cache, die);
>  }
>  
> +static void process_unspecified_type(struct state *state, struct die *cache,
> +				     Dwarf_Die *die)
> +{
> +	/*
> +	 * These can be emitted for stand-elone assembly code, which means we
> +	 * might run into them in vmlinux.o.
> +	 */

Nit: stand-elone -> stand-alone.

> +	process(cache, "unspecified_type");
> +}
> +
>  static void process_cached(struct state *state, struct die *cache,
>  			   Dwarf_Die *die)
>  {
> @@ -460,17 +583,27 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
>  	PROCESS_TYPE(rvalue_reference)
>  	PROCESS_TYPE(shared)
>  	PROCESS_TYPE(volatile)
> +	/* Container types */
> +	PROCESS_TYPE(class)
> +	PROCESS_TYPE(structure)
> +	PROCESS_TYPE(union)
> +	PROCESS_TYPE(enumeration)
>  	/* Subtypes */
> +	PROCESS_TYPE(enumerator)
>  	PROCESS_TYPE(formal_parameter)
> +	PROCESS_TYPE(member)
>  	PROCESS_TYPE(subrange)
> +	PROCESS_TYPE(template_type_parameter)
> +	PROCESS_TYPE(variant)
> +	PROCESS_TYPE(variant_part)
>  	/* Other types */
>  	PROCESS_TYPE(array)
>  	PROCESS_TYPE(base)
>  	PROCESS_TYPE(subroutine)
>  	PROCESS_TYPE(typedef)
> +	PROCESS_TYPE(unspecified)
>  	default:
> -		debug("unimplemented type: %x", tag);
> -		break;
> +		error("unexpected type: %x", tag);
>  	}
>  
>  	/* Update cache state and append to the parent (if any) */
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> index d5186472f705..ad50e35e3351 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.h
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -63,8 +63,13 @@ extern int dump_dies;
>  #define checkp(expr) __check(expr, __res < 0)
>  
>  /* Consistent aliases (DW_TAG_<type>_type) for DWARF tags */
> +#define DW_TAG_enumerator_type DW_TAG_enumerator
>  #define DW_TAG_formal_parameter_type DW_TAG_formal_parameter
> +#define DW_TAG_member_type DW_TAG_member
> +#define DW_TAG_template_type_parameter_type DW_TAG_template_type_parameter
>  #define DW_TAG_typedef_type DW_TAG_typedef
> +#define DW_TAG_variant_part_type DW_TAG_variant_part
> +#define DW_TAG_variant_type DW_TAG_variant
>  
>  /*
>   * symbols.c

-- 
Thanks,
Petr
Re: [PATCH v3 10/20] gendwarfksyms: Expand structure types
Posted by Sami Tolvanen 1 month, 4 weeks ago
On Tue, Oct 1, 2024 at 2:16 PM Petr Pavlu <petr.pavlu@suse.com> wrote:
>
> On 9/23/24 20:18, Sami Tolvanen wrote:
> > +static void process_variant_type(struct state *state, struct die *cache,
> > +                              Dwarf_Die *die)
> > +{
> > +     process_list_comma(state, cache);
> > +     process(cache, "variant {");
> > +     process_linebreak(cache, 1);
> > +     check(process_die_container(state, cache, die, process_type,
> > +                                 match_member_type));
> > +     process_linebreak(cache, -1);
> > +     process(cache, "}");
> > +}
> > +
> > +static void process_variant_part_type(struct state *state, struct die *cache,
> > +                                   Dwarf_Die *die)
> > +{
> > +     process_list_comma(state, cache);
> > +     process(cache, "variant_part {");
> > +     process_linebreak(cache, 1);
> > +     check(process_die_container(state, cache, die, process_type,
> > +                                 match_variant_type));
> > +     process_linebreak(cache, -1);
> > +     process(cache, "}");
> > +}
>
> For variant types, should the tool worry also about DW_AT_discr and
> DW_AT_discr_value?

Hmm, I initially thought they were not necessary, but looking at the
rustc DWARF output a bit more closely, it might actually be a good
idea to include this information. I'll add these both, although since
DW_AT_discr just points to a member DIE, it's probably enough to just
process all child DIEs for the variant_part instead of limiting
ourselves to variants only.

> > +static void process_unspecified_type(struct state *state, struct die *cache,
> > +                                  Dwarf_Die *die)
> > +{
> > +     /*
> > +      * These can be emitted for stand-elone assembly code, which means we
> > +      * might run into them in vmlinux.o.
> > +      */
>
> Nit: stand-elone -> stand-alone.

Oops, I'll fix this too. Thanks!

Sami