[v1] Thrash up the parser/output interface

[PATCH 04/12] docs: kdoc: Centralize handling of the item section list

Posted by Jonathan Corbet 7 months, 1 week ago

The section list always comes directly from the under-construction entry
and is used uniformly.  Formalize section handling in the KdocItem class,
and have output_declaration() load the sections directly from the entry,
eliminating a lot of duplicated, verbose parameters.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/lib/kdoc/kdoc_item.py   |  8 ++++++++
 scripts/lib/kdoc/kdoc_output.py | 36 ++++++++++++---------------------
 scripts/lib/kdoc/kdoc_parser.py | 20 +++---------------
 3 files changed, 24 insertions(+), 40 deletions(-)

diff --git a/scripts/lib/kdoc/kdoc_item.py b/scripts/lib/kdoc/kdoc_item.py
index add2cc772fec..c8329019a219 100644
--- a/scripts/lib/kdoc/kdoc_item.py
+++ b/scripts/lib/kdoc/kdoc_item.py
@@ -9,6 +9,7 @@ class KdocItem:
         self.name = name
         self.type = type
         self.declaration_start_line = start_line
+        self.sections = self.sections_start_lines = { }
         #
         # Just save everything else into our own dict so that the output
         # side can grab it directly as before.  As we move things into more
@@ -24,3 +25,10 @@ class KdocItem:
 
     def __getitem__(self, key):
         return self.get(key)
+
+    #
+    # Tracking of section information.
+    #
+    def set_sections(self, sections, start_lines):
+        self.sections = sections
+        self.section_start_lines = start_lines
diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py
index 4895c80e4b81..15cb89f91987 100644
--- a/scripts/lib/kdoc/kdoc_output.py
+++ b/scripts/lib/kdoc/kdoc_output.py
@@ -338,11 +338,7 @@ class RestFormat(OutputFormat):
         starts by putting out the name of the doc section itself, but that
         tends to duplicate a header already in the template file.
         """
-
-        sections = args.get('sections', {})
-        section_start_lines = args.get('section_start_lines', {})
-
-        for section in sections:
+        for section, text in args.sections.items():
             # Skip sections that are in the nosymbol_table
             if section in self.nosymbol:
                 continue
@@ -354,8 +350,8 @@ class RestFormat(OutputFormat):
             else:
                 self.data += f'{self.lineprefix}**{section}**\n\n'
 
-            self.print_lineno(section_start_lines.get(section, 0))
-            self.output_highlight(sections[section])
+            self.print_lineno(args.section_start_lines.get(section, 0))
+            self.output_highlight(text)
             self.data += "\n"
         self.data += "\n"
 
@@ -635,23 +631,20 @@ class ManFormat(OutputFormat):
                 self.data += line + "\n"
 
     def out_doc(self, fname, name, args):
-        sections = args.get('sections', {})
-
         if not self.check_doc(name, args):
             return
 
         self.data += f'.TH "{self.modulename}" 9 "{self.modulename}" "{self.man_date}" "API Manual" LINUX' + "\n"
 
-        for section in sections:
+        for section, text in args.sections.items():
             self.data += f'.SH "{section}"' + "\n"
-            self.output_highlight(sections.get(section))
+            self.output_highlight(text)
 
     def out_function(self, fname, name, args):
         """output function in man"""
 
         parameterlist = args.get('parameterlist', [])
         parameterdescs = args.get('parameterdescs', {})
-        sections = args.get('sections', {})
 
         self.data += f'.TH "{args["function"]}" 9 "{args["function"]}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n"
 
@@ -692,15 +685,14 @@ class ManFormat(OutputFormat):
             self.data += f'.IP "{parameter}" 12' + "\n"
             self.output_highlight(parameterdescs.get(parameter_name, ""))
 
-        for section in sections:
+        for section, text in args.sections.items():
             self.data += f'.SH "{section.upper()}"' + "\n"
-            self.output_highlight(sections[section])
+            self.output_highlight(text)
 
     def out_enum(self, fname, name, args):
 
         name = args.get('enum', '')
         parameterlist = args.get('parameterlist', [])
-        sections = args.get('sections', {})
 
         self.data += f'.TH "{self.modulename}" 9 "enum {args["enum"]}" "{self.man_date}" "API Manual" LINUX' + "\n"
 
@@ -727,24 +719,23 @@ class ManFormat(OutputFormat):
             self.data += f'.IP "{parameter}" 12' + "\n"
             self.output_highlight(args['parameterdescs'].get(parameter_name, ""))
 
-        for section in sections:
+        for section, text in args.sections.items():
             self.data += f'.SH "{section}"' + "\n"
-            self.output_highlight(sections[section])
+            self.output_highlight(text)
 
     def out_typedef(self, fname, name, args):
         module = self.modulename
         typedef = args.get('typedef')
         purpose = args.get('purpose')
-        sections = args.get('sections', {})
 
         self.data += f'.TH "{module}" 9 "{typedef}" "{self.man_date}" "API Manual" LINUX' + "\n"
 
         self.data += ".SH NAME\n"
         self.data += f"typedef {typedef} \\- {purpose}\n"
 
-        for section in sections:
+        for section, text in args.sections.items():
             self.data += f'.SH "{section}"' + "\n"
-            self.output_highlight(sections.get(section))
+            self.output_highlight(text)
 
     def out_struct(self, fname, name, args):
         module = self.modulename
@@ -753,7 +744,6 @@ class ManFormat(OutputFormat):
         purpose = args.get('purpose')
         definition = args.get('definition')
         parameterlist = args.get('parameterlist', [])
-        sections = args.get('sections', {})
         parameterdescs = args.get('parameterdescs', {})
 
         self.data += f'.TH "{module}" 9 "{struct_type} {struct_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
@@ -782,6 +772,6 @@ class ManFormat(OutputFormat):
             self.data += f'.IP "{parameter}" 12' + "\n"
             self.output_highlight(parameterdescs.get(parameter_name))
 
-        for section in sections:
+        for section, text in args.sections.items():
             self.data += f'.SH "{section}"' + "\n"
-            self.output_highlight(sections.get(section))
+            self.output_highlight(text)
diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py
index 2e00c8b3a5f2..608f3a1045dc 100644
--- a/scripts/lib/kdoc/kdoc_parser.py
+++ b/scripts/lib/kdoc/kdoc_parser.py
@@ -272,13 +272,13 @@ class KernelDoc:
         item = KdocItem(name, dtype, self.entry.declaration_start_line, **args)
         item.warnings = self.entry.warnings
 
-        sections = item.get('sections', {})
-
         # Drop empty sections
         # TODO: improve empty sections logic to emit warnings
+        sections = self.entry.sections
         for section in ["Description", "Return"]:
             if section in sections and not sections[section].rstrip():
                 del sections[section]
+        item.set_sections(sections, self.entry.section_start_lines)
 
         self.entries.append(item)
 
@@ -824,8 +824,6 @@ class KernelDoc:
                                 parameterdescs=self.entry.parameterdescs,
                                 parametertypes=self.entry.parametertypes,
                                 parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
-                                sections=self.entry.sections,
-                                section_start_lines=self.entry.section_start_lines,
                                 purpose=self.entry.declaration_purpose)
 
     def dump_enum(self, ln, proto):
@@ -908,8 +906,6 @@ class KernelDoc:
                                 parameterlist=self.entry.parameterlist,
                                 parameterdescs=self.entry.parameterdescs,
                                 parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
-                                sections=self.entry.sections,
-                                section_start_lines=self.entry.section_start_lines,
                                 purpose=self.entry.declaration_purpose)
 
     def dump_declaration(self, ln, prototype):
@@ -1079,8 +1075,6 @@ class KernelDoc:
                                     parameterdescs=self.entry.parameterdescs,
                                     parametertypes=self.entry.parametertypes,
                                     parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
-                                    sections=self.entry.sections,
-                                    section_start_lines=self.entry.section_start_lines,
                                     purpose=self.entry.declaration_purpose,
                                     func_macro=func_macro)
         else:
@@ -1092,8 +1086,6 @@ class KernelDoc:
                                     parameterdescs=self.entry.parameterdescs,
                                     parametertypes=self.entry.parametertypes,
                                     parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
-                                    sections=self.entry.sections,
-                                    section_start_lines=self.entry.section_start_lines,
                                     purpose=self.entry.declaration_purpose,
                                     func_macro=func_macro)
 
@@ -1137,8 +1129,6 @@ class KernelDoc:
                                     parameterdescs=self.entry.parameterdescs,
                                     parametertypes=self.entry.parametertypes,
                                     parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
-                                    sections=self.entry.sections,
-                                    section_start_lines=self.entry.section_start_lines,
                                     purpose=self.entry.declaration_purpose)
             return
 
@@ -1159,8 +1149,6 @@ class KernelDoc:
 
             self.output_declaration('typedef', declaration_name,
                                     typedef=declaration_name,
-                                    sections=self.entry.sections,
-                                    section_start_lines=self.entry.section_start_lines,
                                     purpose=self.entry.declaration_purpose)
             return
 
@@ -1642,9 +1630,7 @@ class KernelDoc:
 
         if doc_end.search(line):
             self.dump_section()
-            self.output_declaration("doc", self.entry.identifier,
-                                    sections=self.entry.sections,
-                                    section_start_lines=self.entry.section_start_lines)
+            self.output_declaration("doc", self.entry.identifier)
             self.reset_state(ln)
 
         elif doc_content.search(line):
-- 
2.49.0

Re: [PATCH 04/12] docs: kdoc: Centralize handling of the item section list

Posted by Mauro Carvalho Chehab 7 months ago

Em Wed,  2 Jul 2025 16:35:16 -0600
Jonathan Corbet <corbet@lwn.net> escreveu:

> The section list always comes directly from the under-construction entry
> and is used uniformly.  Formalize section handling in the KdocItem class,
> and have output_declaration() load the sections directly from the entry,
> eliminating a lot of duplicated, verbose parameters.
> 
> Signed-off-by: Jonathan Corbet <corbet@lwn.net>
> ---
>  scripts/lib/kdoc/kdoc_item.py   |  8 ++++++++
>  scripts/lib/kdoc/kdoc_output.py | 36 ++++++++++++---------------------
>  scripts/lib/kdoc/kdoc_parser.py | 20 +++---------------
>  3 files changed, 24 insertions(+), 40 deletions(-)
> 
> diff --git a/scripts/lib/kdoc/kdoc_item.py b/scripts/lib/kdoc/kdoc_item.py
> index add2cc772fec..c8329019a219 100644
> --- a/scripts/lib/kdoc/kdoc_item.py
> +++ b/scripts/lib/kdoc/kdoc_item.py
> @@ -9,6 +9,7 @@ class KdocItem:
>          self.name = name
>          self.type = type
>          self.declaration_start_line = start_line
> +        self.sections = self.sections_start_lines = { }

Nitpicks:
- to make coding-style uniform, please use "{}" without spaces;
- Please place one statement per line, just like we (usually) do in Kernel. 

  In this specific case, I strongly suspect that what you coded is not
  implementing the semantics you want. See:

	1. are you creating a single dict and placing the same dict on two
	   variables?
  or:
	2. are you initializing two different vars with their own empty
	   dict?

The subsequent code assumes (2), but a quick check with python3 command
line:

	>>> a = b = {}
	>>> a["foo"] = "bar"
	>>> print(b)
	{'foo': 'bar'}

Shows that Python is doing (1) here: it basically creates a single
dict and assign pointers to it for both self.declaration_start_line
and self.sections. Clearly, this is not what we want. 

This is not a problem in practice with the current code, as 
set_sections will replace both at the same time, but if we ever
handle them using different functions, this will become a problem.

The rest of the code looks sane to me.

>          #
>          # Just save everything else into our own dict so that the output
>          # side can grab it directly as before.  As we move things into more
> @@ -24,3 +25,10 @@ class KdocItem:
>  
>      def __getitem__(self, key):
>          return self.get(key)
> +
> +    #
> +    # Tracking of section information.
> +    #
> +    def set_sections(self, sections, start_lines):
> +        self.sections = sections
> +        self.section_start_lines = start_lines
> diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py
> index 4895c80e4b81..15cb89f91987 100644
> --- a/scripts/lib/kdoc/kdoc_output.py
> +++ b/scripts/lib/kdoc/kdoc_output.py
> @@ -338,11 +338,7 @@ class RestFormat(OutputFormat):
>          starts by putting out the name of the doc section itself, but that
>          tends to duplicate a header already in the template file.
>          """
> -
> -        sections = args.get('sections', {})
> -        section_start_lines = args.get('section_start_lines', {})
> -
> -        for section in sections:
> +        for section, text in args.sections.items():
>              # Skip sections that are in the nosymbol_table
>              if section in self.nosymbol:
>                  continue
> @@ -354,8 +350,8 @@ class RestFormat(OutputFormat):
>              else:
>                  self.data += f'{self.lineprefix}**{section}**\n\n'
>  
> -            self.print_lineno(section_start_lines.get(section, 0))
> -            self.output_highlight(sections[section])
> +            self.print_lineno(args.section_start_lines.get(section, 0))
> +            self.output_highlight(text)
>              self.data += "\n"
>          self.data += "\n"
>  
> @@ -635,23 +631,20 @@ class ManFormat(OutputFormat):
>                  self.data += line + "\n"
>  
>      def out_doc(self, fname, name, args):
> -        sections = args.get('sections', {})
> -
>          if not self.check_doc(name, args):
>              return
>  
>          self.data += f'.TH "{self.modulename}" 9 "{self.modulename}" "{self.man_date}" "API Manual" LINUX' + "\n"
>  
> -        for section in sections:
> +        for section, text in args.sections.items():
>              self.data += f'.SH "{section}"' + "\n"
> -            self.output_highlight(sections.get(section))
> +            self.output_highlight(text)
>  
>      def out_function(self, fname, name, args):
>          """output function in man"""
>  
>          parameterlist = args.get('parameterlist', [])
>          parameterdescs = args.get('parameterdescs', {})
> -        sections = args.get('sections', {})
>  
>          self.data += f'.TH "{args["function"]}" 9 "{args["function"]}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n"
>  
> @@ -692,15 +685,14 @@ class ManFormat(OutputFormat):
>              self.data += f'.IP "{parameter}" 12' + "\n"
>              self.output_highlight(parameterdescs.get(parameter_name, ""))
>  
> -        for section in sections:
> +        for section, text in args.sections.items():
>              self.data += f'.SH "{section.upper()}"' + "\n"
> -            self.output_highlight(sections[section])
> +            self.output_highlight(text)
>  
>      def out_enum(self, fname, name, args):
>  
>          name = args.get('enum', '')
>          parameterlist = args.get('parameterlist', [])
> -        sections = args.get('sections', {})
>  
>          self.data += f'.TH "{self.modulename}" 9 "enum {args["enum"]}" "{self.man_date}" "API Manual" LINUX' + "\n"
>  
> @@ -727,24 +719,23 @@ class ManFormat(OutputFormat):
>              self.data += f'.IP "{parameter}" 12' + "\n"
>              self.output_highlight(args['parameterdescs'].get(parameter_name, ""))
>  
> -        for section in sections:
> +        for section, text in args.sections.items():
>              self.data += f'.SH "{section}"' + "\n"
> -            self.output_highlight(sections[section])
> +            self.output_highlight(text)
>  
>      def out_typedef(self, fname, name, args):
>          module = self.modulename
>          typedef = args.get('typedef')
>          purpose = args.get('purpose')
> -        sections = args.get('sections', {})
>  
>          self.data += f'.TH "{module}" 9 "{typedef}" "{self.man_date}" "API Manual" LINUX' + "\n"
>  
>          self.data += ".SH NAME\n"
>          self.data += f"typedef {typedef} \\- {purpose}\n"
>  
> -        for section in sections:
> +        for section, text in args.sections.items():
>              self.data += f'.SH "{section}"' + "\n"
> -            self.output_highlight(sections.get(section))
> +            self.output_highlight(text)
>  
>      def out_struct(self, fname, name, args):
>          module = self.modulename
> @@ -753,7 +744,6 @@ class ManFormat(OutputFormat):
>          purpose = args.get('purpose')
>          definition = args.get('definition')
>          parameterlist = args.get('parameterlist', [])
> -        sections = args.get('sections', {})
>          parameterdescs = args.get('parameterdescs', {})
>  
>          self.data += f'.TH "{module}" 9 "{struct_type} {struct_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
> @@ -782,6 +772,6 @@ class ManFormat(OutputFormat):
>              self.data += f'.IP "{parameter}" 12' + "\n"
>              self.output_highlight(parameterdescs.get(parameter_name))
>  
> -        for section in sections:
> +        for section, text in args.sections.items():
>              self.data += f'.SH "{section}"' + "\n"
> -            self.output_highlight(sections.get(section))
> +            self.output_highlight(text)
> diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py
> index 2e00c8b3a5f2..608f3a1045dc 100644
> --- a/scripts/lib/kdoc/kdoc_parser.py
> +++ b/scripts/lib/kdoc/kdoc_parser.py
> @@ -272,13 +272,13 @@ class KernelDoc:
>          item = KdocItem(name, dtype, self.entry.declaration_start_line, **args)
>          item.warnings = self.entry.warnings
>  
> -        sections = item.get('sections', {})
> -
>          # Drop empty sections
>          # TODO: improve empty sections logic to emit warnings
> +        sections = self.entry.sections
>          for section in ["Description", "Return"]:
>              if section in sections and not sections[section].rstrip():
>                  del sections[section]
> +        item.set_sections(sections, self.entry.section_start_lines)
>  
>          self.entries.append(item)
>  
> @@ -824,8 +824,6 @@ class KernelDoc:
>                                  parameterdescs=self.entry.parameterdescs,
>                                  parametertypes=self.entry.parametertypes,
>                                  parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
> -                                sections=self.entry.sections,
> -                                section_start_lines=self.entry.section_start_lines,
>                                  purpose=self.entry.declaration_purpose)
>  
>      def dump_enum(self, ln, proto):
> @@ -908,8 +906,6 @@ class KernelDoc:
>                                  parameterlist=self.entry.parameterlist,
>                                  parameterdescs=self.entry.parameterdescs,
>                                  parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
> -                                sections=self.entry.sections,
> -                                section_start_lines=self.entry.section_start_lines,
>                                  purpose=self.entry.declaration_purpose)
>  
>      def dump_declaration(self, ln, prototype):
> @@ -1079,8 +1075,6 @@ class KernelDoc:
>                                      parameterdescs=self.entry.parameterdescs,
>                                      parametertypes=self.entry.parametertypes,
>                                      parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
> -                                    sections=self.entry.sections,
> -                                    section_start_lines=self.entry.section_start_lines,
>                                      purpose=self.entry.declaration_purpose,
>                                      func_macro=func_macro)
>          else:
> @@ -1092,8 +1086,6 @@ class KernelDoc:
>                                      parameterdescs=self.entry.parameterdescs,
>                                      parametertypes=self.entry.parametertypes,
>                                      parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
> -                                    sections=self.entry.sections,
> -                                    section_start_lines=self.entry.section_start_lines,
>                                      purpose=self.entry.declaration_purpose,
>                                      func_macro=func_macro)
>  
> @@ -1137,8 +1129,6 @@ class KernelDoc:
>                                      parameterdescs=self.entry.parameterdescs,
>                                      parametertypes=self.entry.parametertypes,
>                                      parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
> -                                    sections=self.entry.sections,
> -                                    section_start_lines=self.entry.section_start_lines,
>                                      purpose=self.entry.declaration_purpose)
>              return
>  
> @@ -1159,8 +1149,6 @@ class KernelDoc:
>  
>              self.output_declaration('typedef', declaration_name,
>                                      typedef=declaration_name,
> -                                    sections=self.entry.sections,
> -                                    section_start_lines=self.entry.section_start_lines,
>                                      purpose=self.entry.declaration_purpose)
>              return
>  
> @@ -1642,9 +1630,7 @@ class KernelDoc:
>  
>          if doc_end.search(line):
>              self.dump_section()
> -            self.output_declaration("doc", self.entry.identifier,
> -                                    sections=self.entry.sections,
> -                                    section_start_lines=self.entry.section_start_lines)
> +            self.output_declaration("doc", self.entry.identifier)
>              self.reset_state(ln)
>  
>          elif doc_content.search(line):



Thanks,
Mauro

Re: [PATCH 04/12] docs: kdoc: Centralize handling of the item section list

Posted by Jonathan Corbet 7 months ago

Mauro Carvalho Chehab <mchehab+huawei@kernel.org> writes:

> Em Wed,  2 Jul 2025 16:35:16 -0600
> Jonathan Corbet <corbet@lwn.net> escreveu:
>
>> The section list always comes directly from the under-construction entry
>> and is used uniformly.  Formalize section handling in the KdocItem class,
>> and have output_declaration() load the sections directly from the entry,
>> eliminating a lot of duplicated, verbose parameters.
>> 
>> Signed-off-by: Jonathan Corbet <corbet@lwn.net>
>> ---
>>  scripts/lib/kdoc/kdoc_item.py   |  8 ++++++++
>>  scripts/lib/kdoc/kdoc_output.py | 36 ++++++++++++---------------------
>>  scripts/lib/kdoc/kdoc_parser.py | 20 +++---------------
>>  3 files changed, 24 insertions(+), 40 deletions(-)
>> 
>> diff --git a/scripts/lib/kdoc/kdoc_item.py b/scripts/lib/kdoc/kdoc_item.py
>> index add2cc772fec..c8329019a219 100644
>> --- a/scripts/lib/kdoc/kdoc_item.py
>> +++ b/scripts/lib/kdoc/kdoc_item.py
>> @@ -9,6 +9,7 @@ class KdocItem:
>>          self.name = name
>>          self.type = type
>>          self.declaration_start_line = start_line
>> +        self.sections = self.sections_start_lines = { }
>
> Nitpicks:
> - to make coding-style uniform, please use "{}" without spaces;
> - Please place one statement per line, just like we (usually) do in Kernel. 

Sure, fine.

>   In this specific case, I strongly suspect that what you coded is not
>   implementing the semantics you want. See:
>
> 	1. are you creating a single dict and placing the same dict on two
> 	   variables?
>   or:
> 	2. are you initializing two different vars with their own empty
> 	   dict?
>
> The subsequent code assumes (2), but a quick check with python3 command
> line:

As you note, the subsequent code does *not* actually assume that; I know
the way Python semantics work :)  But I can separate the lines and make
things explicit.

Thanks,

jon