[v2] docs: kdoc: thrash up dump_struct()

[PATCH v2 10/12] docs: kdoc: further rewrite_struct_members() cleanup

Posted by Jonathan Corbet 6 months, 1 week ago

Get rid of some redundant checks, and generally tighten up the code; no
logical change.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/lib/kdoc/kdoc_parser.py | 86 ++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 45 deletions(-)

diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py
index e3d0270b1a19..b3f937901037 100644
--- a/scripts/lib/kdoc/kdoc_parser.py
+++ b/scripts/lib/kdoc/kdoc_parser.py
@@ -673,73 +673,69 @@ class KernelDoc:
         while tuples:
             for t in tuples:
                 newmember = ""
-                maintype = t[0]
-                s_ids = t[5]
-                content = t[3]
-
-                oldmember = "".join(t)
-
-                for s_id in s_ids.split(','):
+                oldmember = "".join(t) # Reconstruct the original formatting
+                dtype, name, lbr, content, rbr, rest, semi = t
+                #
+                # Pass through each field name, normalizing the form and formatting.
+                #
+                for s_id in rest.split(','):
                     s_id = s_id.strip()
-
-                    newmember += f"{maintype} {s_id}; "
+                    newmember += f"{dtype} {s_id}; "
+                    #
+                    # Remove bitfield/array/pointer info, getting the bare name.
+                    #
                     s_id = KernRe(r'[:\[].*').sub('', s_id)
                     s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
-
+                    #
+                    # Pass through the members of this inner structure/union.
+                    #
                     for arg in content.split(';'):
                         arg = arg.strip()
-
-                        if not arg:
-                            continue
-
+                        #
+                        # Look for (type)(*name)(args) - pointer to function
+                        #
                         r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
                         if r.match(arg):
+                            dtype, name, extra = r.group(1), r.group(2), r.group(3)
                             # Pointer-to-function
-                            dtype = r.group(1)
-                            name = r.group(2)
-                            extra = r.group(3)
-
-                            if not name:
-                                continue
-
                             if not s_id:
                                 # Anonymous struct/union
                                 newmember += f"{dtype}{name}{extra}; "
                             else:
                                 newmember += f"{dtype}{s_id}.{name}{extra}; "
-
+                        #
+                        # Otherwise a non-function member.
+                        #
                         else:
-                            # Handle bitmaps
+                            #
+                            # Remove bitmap and array portions and spaces around commas
+                            #
                             arg = KernRe(r':\s*\d+\s*').sub('', arg)
-
-                            # Handle arrays
                             arg = KernRe(r'\[.*\]').sub('', arg)
-
-                            # Handle multiple IDs
                             arg = KernRe(r'\s*,\s*').sub(',', arg)
-
+                            #
+                            # Look for a normal decl - "type name[,name...]"
+                            #
                             r = KernRe(r'(.*)\s+([\S+,]+)')
-
                             if r.search(arg):
-                                dtype = r.group(1)
-                                names = r.group(2)
+                                for name in r.group(2).split(','):
+                                    name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name)
+                                    if not s_id:
+                                        # Anonymous struct/union
+                                        newmember += f"{r.group(1)} {name}; "
+                                    else:
+                                        newmember += f"{r.group(1)} {s_id}.{name}; "
                             else:
                                 newmember += f"{arg}; "
-                                continue
-
-                            for name in names.split(','):
-                                name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip()
-
-                                if not name:
-                                    continue
-
-                                if not s_id:
-                                    # Anonymous struct/union
-                                    newmember += f"{dtype} {name}; "
-                                else:
-                                    newmember += f"{dtype} {s_id}.{name}; "
-
+                #
+                # At the end of the s_id loop, replace the original declaration with
+                # the munged version.
+                #
                 members = members.replace(oldmember, newmember)
+            #
+            # End of the tuple loop - search again and see if there are outer members
+            # that now turn up.
+            #
             tuples = struct_members.findall(members)
         return members
 
-- 
2.50.1

Re: [PATCH v2 10/12] docs: kdoc: further rewrite_struct_members() cleanup

Posted by Mauro Carvalho Chehab 6 months ago

Em Thu,  7 Aug 2025 15:16:37 -0600
Jonathan Corbet <corbet@lwn.net> escreveu:

> Get rid of some redundant checks, and generally tighten up the code; no
> logical change.

LGTM, but see below:

> 
> Signed-off-by: Jonathan Corbet <corbet@lwn.net>
> ---
>  scripts/lib/kdoc/kdoc_parser.py | 86 ++++++++++++++++-----------------
>  1 file changed, 41 insertions(+), 45 deletions(-)
> 
> diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py
> index e3d0270b1a19..b3f937901037 100644
> --- a/scripts/lib/kdoc/kdoc_parser.py
> +++ b/scripts/lib/kdoc/kdoc_parser.py
> @@ -673,73 +673,69 @@ class KernelDoc:
>          while tuples:
>              for t in tuples:
>                  newmember = ""
> -                maintype = t[0]
> -                s_ids = t[5]
> -                content = t[3]
> -
> -                oldmember = "".join(t)
> -
> -                for s_id in s_ids.split(','):
> +                oldmember = "".join(t) # Reconstruct the original formatting
> +                dtype, name, lbr, content, rbr, rest, semi = t

Here, I would either use non-group matches or use "_" for the vars
we're just ignoring.

IMO, the cleanest approach without using finditer would be:


	struct_members = KernRe("("			# 0: the entire pattern
				r'(struct|union)' 	# 1: declaration type
				r'([^\{\};]+)'
				r'(?:\{)'
				r'(?:[^\{\}]*)'		# 2: Contents of declaration
				r'(?:\})'
				r'([^\{\};]*)(;)')	# 3: Remaining stuff after declaration
				")")

	tuples = struct_members.findall(members)
	while tuples:
            for t in tuples:
		oldmember, maintype, content, s_ids = match.groups()

I wonder if using finditer would avoid the first while - I guess not
as the logic here picks multi-level members - but if it matches, then
It would be a nice improvement to use it.

Anyway, such cleanup can be done later. So:

Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

> +                #
> +                # Pass through each field name, normalizing the form and formatting.
> +                #
> +                for s_id in rest.split(','):
>                      s_id = s_id.strip()
> -
> -                    newmember += f"{maintype} {s_id}; "
> +                    newmember += f"{dtype} {s_id}; "
> +                    #
> +                    # Remove bitfield/array/pointer info, getting the bare name.
> +                    #
>                      s_id = KernRe(r'[:\[].*').sub('', s_id)
>                      s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
> -
> +                    #
> +                    # Pass through the members of this inner structure/union.
> +                    #
>                      for arg in content.split(';'):
>                          arg = arg.strip()
> -
> -                        if not arg:
> -                            continue
> -
> +                        #
> +                        # Look for (type)(*name)(args) - pointer to function
> +                        #
>                          r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
>                          if r.match(arg):
> +                            dtype, name, extra = r.group(1), r.group(2), r.group(3)
>                              # Pointer-to-function
> -                            dtype = r.group(1)
> -                            name = r.group(2)
> -                            extra = r.group(3)
> -
> -                            if not name:
> -                                continue
> -
>                              if not s_id:
>                                  # Anonymous struct/union
>                                  newmember += f"{dtype}{name}{extra}; "
>                              else:
>                                  newmember += f"{dtype}{s_id}.{name}{extra}; "
> -
> +                        #
> +                        # Otherwise a non-function member.
> +                        #
>                          else:
> -                            # Handle bitmaps
> +                            #
> +                            # Remove bitmap and array portions and spaces around commas
> +                            #
>                              arg = KernRe(r':\s*\d+\s*').sub('', arg)
> -
> -                            # Handle arrays
>                              arg = KernRe(r'\[.*\]').sub('', arg)
> -
> -                            # Handle multiple IDs
>                              arg = KernRe(r'\s*,\s*').sub(',', arg)
> -
> +                            #
> +                            # Look for a normal decl - "type name[,name...]"
> +                            #
>                              r = KernRe(r'(.*)\s+([\S+,]+)')
> -
>                              if r.search(arg):
> -                                dtype = r.group(1)
> -                                names = r.group(2)
> +                                for name in r.group(2).split(','):
> +                                    name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name)
> +                                    if not s_id:
> +                                        # Anonymous struct/union
> +                                        newmember += f"{r.group(1)} {name}; "
> +                                    else:
> +                                        newmember += f"{r.group(1)} {s_id}.{name}; "
>                              else:
>                                  newmember += f"{arg}; "
> -                                continue
> -
> -                            for name in names.split(','):
> -                                name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip()
> -
> -                                if not name:
> -                                    continue
> -
> -                                if not s_id:
> -                                    # Anonymous struct/union
> -                                    newmember += f"{dtype} {name}; "
> -                                else:
> -                                    newmember += f"{dtype} {s_id}.{name}; "
> -
> +                #
> +                # At the end of the s_id loop, replace the original declaration with
> +                # the munged version.
> +                #
>                  members = members.replace(oldmember, newmember)
> +            #
> +            # End of the tuple loop - search again and see if there are outer members
> +            # that now turn up.
> +            #
>              tuples = struct_members.findall(members)
>          return members
>  



Thanks,
Mauro