[v3] kernel-doc: make it parse new functions and structs

[PATCH v3 19/30] docs: kdoc_re: make NextedMatch use KernRe

Posted by Mauro Carvalho Chehab 1 week, 3 days ago

Instead of using re_compile, let's create the class with the
rejex and use KernRe to keep it cached.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 55 ++++++++--------------------
 tools/lib/python/kdoc/kdoc_re.py     | 22 ++++++++---
 2 files changed, 32 insertions(+), 45 deletions(-)

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 4d52a00acfad..3a5614106af7 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -153,32 +153,7 @@ struct_xforms = [
     (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
     (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
     (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
-]
-#
-# Struct regexes here are guaranteed to have the end delimiter matching
-# the start delimiter. Yet, right now, only one replace group
-# is allowed.
-#
-struct_nested_prefixes = [
-    (re.compile(r'\bSTRUCT_GROUP\('), r'\0'),
-]
-
-#
-# Function Regexes here are guaranteed to have the end delimiter matching
-# the start delimiter.
-#
-function_nested_prefixes = [
-    (re.compile(r"__cond_acquires\s*\("), ""),
-    (re.compile(r"__cond_releases\s*\("), ""),
-    (re.compile(r"__acquires\s*\("), ""),
-    (re.compile(r"__releases\s*\("), ""),
-    (re.compile(r"__must_hold\s*\("), ""),
-    (re.compile(r"__must_not_hold\s*\("), ""),
-    (re.compile(r"__must_hold_shared\s*\("), ""),
-    (re.compile(r"__cond_acquires_shared\s*\("), ""),
-    (re.compile(r"__acquires_shared\s*\("), ""),
-    (re.compile(r"__releases_shared\s*\("), ""),
-    (re.compile(r"__attribute__\s*\("), ""),
+    (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'),
 ]
 
 #
@@ -210,6 +185,17 @@ function_xforms = [
     (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
     (KernRe(r"__no_context_analysis\s*"), ""),
     (KernRe(r"__attribute_const__ +"), ""),
+    (NestedMatch(r"__cond_acquires\s*\("), ""),
+    (NestedMatch(r"__cond_releases\s*\("), ""),
+    (NestedMatch(r"__acquires\s*\("), ""),
+    (NestedMatch(r"__releases\s*\("), ""),
+    (NestedMatch(r"__must_hold\s*\("), ""),
+    (NestedMatch(r"__must_not_hold\s*\("), ""),
+    (NestedMatch(r"__must_hold_shared\s*\("), ""),
+    (NestedMatch(r"__cond_acquires_shared\s*\("), ""),
+    (NestedMatch(r"__acquires_shared\s*\("), ""),
+    (NestedMatch(r"__releases_shared\s*\("), ""),
+    (NestedMatch(r"__attribute__\s*\("), ""),
 ]
 
 #
@@ -230,7 +216,6 @@ var_xforms = [
 # Ancillary functions
 #
 
-
 multi_space = KernRe(r'\s\s+')
 def trim_whitespace(s):
     """
@@ -424,8 +409,6 @@ class KernelDoc:
         # Place all potential outputs into an array
         self.entries = []
 
-        self.nested = NestedMatch()
-
         #
         # We need Python 3.7 for its "dicts remember the insertion
         # order" guarantee
@@ -523,14 +506,11 @@ class KernelDoc:
         # State flags
         self.state = state.NORMAL
 
-    def apply_transforms(self, regex_xforms, nested_xforms, text):
+    def apply_transforms(self, xforms, text):
         """Apply a set of transforms to a block of text."""
-        for search, subst in regex_xforms:
+        for search, subst in xforms:
             text = search.sub(subst, text)
 
-        for search, sub in nested_xforms:
-            text = self.nested.sub(search, sub, text)
-
         return text.strip()
 
     def push_parameter(self, ln, decl_type, param, dtype,
@@ -909,8 +889,7 @@ class KernelDoc:
         # Go through the list of members applying all of our transformations.
         #
         members = trim_private_members(members)
-        members = self.apply_transforms(struct_xforms, struct_nested_prefixes,
-                                        members)
+        members = self.apply_transforms(struct_xforms, members)
 
         #
         # Deal with embedded struct and union members, and drop enums entirely.
@@ -1125,9 +1104,7 @@ class KernelDoc:
             #
             # Apply the initial transformations.
             #
-            prototype = self.apply_transforms(function_xforms,
-                                              function_nested_prefixes,
-                                              prototype)
+            prototype = self.apply_transforms(function_xforms, prototype)
 
         # Yes, this truly is vile.  We are looking for:
         # 1. Return type (may be nothing if we're looking at a macro)
diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index 8933e1a62776..e34d55c25680 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -200,7 +200,10 @@ class NestedMatch:
     #
     #   FOO(arg1, arg2, arg3)
 
-    def _search(self, regex, line):
+    def __init__(self, regex):
+        self.regex = KernRe(regex)
+
+    def _search(self, line):
         """
         Finds paired blocks for a regex that ends with a delimiter.
 
@@ -222,7 +225,7 @@ class NestedMatch:
 
         stack = []
 
-        for match_re in regex.finditer(line):
+        for match_re in self.regex.finditer(line):
             start = match_re.start()
             offset = match_re.end()
             string_char = None
@@ -270,7 +273,7 @@ class NestedMatch:
                         yield start, offset, pos + 1
                         break
 
-    def search(self, regex, line):
+    def search(self, line):
         """
         This is similar to re.search:
 
@@ -278,11 +281,11 @@ class NestedMatch:
         returning occurrences only if all delimiters are paired.
         """
 
-        for t in self._search(regex, line):
+        for t in self._search(line):
 
             yield line[t[0]:t[2]]
 
-    def sub(self, regex, sub, line, count=0):
+    def sub(self, sub, line, count=0):
         """
         This is similar to re.sub:
 
@@ -301,7 +304,7 @@ class NestedMatch:
         cur_pos = 0
         n = 0
 
-        for start, end, pos in self._search(regex, line):
+        for start, end, pos in self._search(line):
             out += line[cur_pos:start]
 
             # Value, ignoring start/end delimiters
@@ -328,3 +331,10 @@ class NestedMatch:
         out += line[cur_pos:l]
 
         return out
+
+    def __repr__(self):
+        """
+        Returns a displayable version of the class init.
+        """
+
+        return f'NestedMatch("{self.regex.regex.pattern}")'
-- 
2.52.0

RE: [Intel-wired-lan] [PATCH v3 19/30] docs: kdoc_re: make NextedMatch use KernRe

Posted by Kwapulinski, Piotr 1 week, 2 days ago

>-----Original Message-----
>From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf Of Mauro Carvalho Chehab
>Sent: Thursday, January 29, 2026 9:08 AM
>To: Jonathan Corbet <corbet@lwn.net>; Linux Doc Mailing List <linux-doc@vger.kernel.org>
>Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>; bpf@vger.kernel.org; intel-wired-lan@lists.osuosl.org; linux-hardening@vger.kernel.org; linux-kernel@vger.kernel.org; netdev@vger.kernel.org; Mauro Carvalho Chehab <mchehab@kernel.org>; Randy Dunlap <rdunlap@infradead.org>
>Subject: [Intel-wired-lan] [PATCH v3 19/30] docs: kdoc_re: make NextedMatch use KernRe
>
>Instead of using re_compile, let's create the class with the rejex and use KernRe to keep it cached.
Thank you for the patch.
Did you mean "regex" ?
Piotr

[...]

RE: [Intel-wired-lan] [PATCH v3 19/30] docs: kdoc_re: make NextedMatch use KernRe

Posted by Loktionov, Aleksandr 1 week, 3 days ago


> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf
> Of Mauro Carvalho Chehab
> Sent: Thursday, January 29, 2026 9:08 AM
> To: Jonathan Corbet <corbet@lwn.net>; Linux Doc Mailing List <linux-
> doc@vger.kernel.org>
> Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>;
> bpf@vger.kernel.org; intel-wired-lan@lists.osuosl.org; linux-
> hardening@vger.kernel.org; linux-kernel@vger.kernel.org;
> netdev@vger.kernel.org; Mauro Carvalho Chehab <mchehab@kernel.org>;
> Randy Dunlap <rdunlap@infradead.org>
> Subject: [Intel-wired-lan] [PATCH v3 19/30] docs: kdoc_re: make
> NextedMatch use KernRe
> 
> Instead of using re_compile, let's create the class with the rejex and
> use KernRe to keep it cached.
> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> ---
>  tools/lib/python/kdoc/kdoc_parser.py | 55 ++++++++-------------------
> -
>  tools/lib/python/kdoc/kdoc_re.py     | 22 ++++++++---
>  2 files changed, 32 insertions(+), 45 deletions(-)
> 
> diff --git a/tools/lib/python/kdoc/kdoc_parser.py
> b/tools/lib/python/kdoc/kdoc_parser.py
> index 4d52a00acfad..3a5614106af7 100644
> --- a/tools/lib/python/kdoc/kdoc_parser.py
> +++ b/tools/lib/python/kdoc/kdoc_parser.py
> @@ -153,32 +153,7 @@ struct_xforms = [
>      (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern +
> r'\)', re.S), r'dma_addr_t \1'),
>      (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern +
> r'\)', re.S), r'__u32 \1'),
>      (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64
> \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), -] -# -# Struct regexes
> here are guaranteed to have the end delimiter matching -# the start
> delimiter. Yet, right now, only one replace group -# is allowed.
> -#
> -struct_nested_prefixes = [
> -    (re.compile(r'\bSTRUCT_GROUP\('), r'\0'),
> -]
> -
> -#
> -# Function Regexes here are guaranteed to have the end delimiter
> matching -# the start delimiter.
> -#
> -function_nested_prefixes = [
> -    (re.compile(r"__cond_acquires\s*\("), ""),
> -    (re.compile(r"__cond_releases\s*\("), ""),
> -    (re.compile(r"__acquires\s*\("), ""),
> -    (re.compile(r"__releases\s*\("), ""),
> -    (re.compile(r"__must_hold\s*\("), ""),
> -    (re.compile(r"__must_not_hold\s*\("), ""),
> -    (re.compile(r"__must_hold_shared\s*\("), ""),
> -    (re.compile(r"__cond_acquires_shared\s*\("), ""),
> -    (re.compile(r"__acquires_shared\s*\("), ""),
> -    (re.compile(r"__releases_shared\s*\("), ""),
> -    (re.compile(r"__attribute__\s*\("), ""),
> +    (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'),
>  ]
> 
>  #
> @@ -210,6 +185,17 @@ function_xforms = [
>      (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"),
> r"\1, \2"),
>      (KernRe(r"__no_context_analysis\s*"), ""),
>      (KernRe(r"__attribute_const__ +"), ""),
> +    (NestedMatch(r"__cond_acquires\s*\("), ""),
> +    (NestedMatch(r"__cond_releases\s*\("), ""),
> +    (NestedMatch(r"__acquires\s*\("), ""),
> +    (NestedMatch(r"__releases\s*\("), ""),
> +    (NestedMatch(r"__must_hold\s*\("), ""),
> +    (NestedMatch(r"__must_not_hold\s*\("), ""),
> +    (NestedMatch(r"__must_hold_shared\s*\("), ""),
> +    (NestedMatch(r"__cond_acquires_shared\s*\("), ""),
> +    (NestedMatch(r"__acquires_shared\s*\("), ""),
> +    (NestedMatch(r"__releases_shared\s*\("), ""),
> +    (NestedMatch(r"__attribute__\s*\("), ""),
>  ]
> 
>  #
> @@ -230,7 +216,6 @@ var_xforms = [
>  # Ancillary functions
>  #
> 
> -
>  multi_space = KernRe(r'\s\s+')
>  def trim_whitespace(s):
>      """
> @@ -424,8 +409,6 @@ class KernelDoc:
>          # Place all potential outputs into an array
>          self.entries = []
> 
> -        self.nested = NestedMatch()
> -
>          #
>          # We need Python 3.7 for its "dicts remember the insertion
>          # order" guarantee
> @@ -523,14 +506,11 @@ class KernelDoc:
>          # State flags
>          self.state = state.NORMAL
> 
> -    def apply_transforms(self, regex_xforms, nested_xforms, text):
> +    def apply_transforms(self, xforms, text):
>          """Apply a set of transforms to a block of text."""
> -        for search, subst in regex_xforms:
> +        for search, subst in xforms:
>              text = search.sub(subst, text)
> 
> -        for search, sub in nested_xforms:
> -            text = self.nested.sub(search, sub, text)
> -
>          return text.strip()
> 
>      def push_parameter(self, ln, decl_type, param, dtype, @@ -909,8
> +889,7 @@ class KernelDoc:
>          # Go through the list of members applying all of our
> transformations.
>          #
>          members = trim_private_members(members)
> -        members = self.apply_transforms(struct_xforms,
> struct_nested_prefixes,
> -                                        members)
> +        members = self.apply_transforms(struct_xforms, members)
> 
>          #
>          # Deal with embedded struct and union members, and drop enums
> entirely.
> @@ -1125,9 +1104,7 @@ class KernelDoc:
>              #
>              # Apply the initial transformations.
>              #
> -            prototype = self.apply_transforms(function_xforms,
> -
> function_nested_prefixes,
> -                                              prototype)
> +            prototype = self.apply_transforms(function_xforms,
> + prototype)
> 
>          # Yes, this truly is vile.  We are looking for:
>          # 1. Return type (may be nothing if we're looking at a macro)
> diff --git a/tools/lib/python/kdoc/kdoc_re.py
> b/tools/lib/python/kdoc/kdoc_re.py
> index 8933e1a62776..e34d55c25680 100644
> --- a/tools/lib/python/kdoc/kdoc_re.py
> +++ b/tools/lib/python/kdoc/kdoc_re.py
> @@ -200,7 +200,10 @@ class NestedMatch:
>      #
>      #   FOO(arg1, arg2, arg3)
> 
> -    def _search(self, regex, line):
> +    def __init__(self, regex):
> +        self.regex = KernRe(regex)
> +
> +    def _search(self, line):
>          """
>          Finds paired blocks for a regex that ends with a delimiter.
> 
> @@ -222,7 +225,7 @@ class NestedMatch:
> 
>          stack = []
> 
> -        for match_re in regex.finditer(line):
> +        for match_re in self.regex.finditer(line):
>              start = match_re.start()
>              offset = match_re.end()
>              string_char = None
> @@ -270,7 +273,7 @@ class NestedMatch:
>                          yield start, offset, pos + 1
>                          break
> 
> -    def search(self, regex, line):
> +    def search(self, line):
>          """
>          This is similar to re.search:
> 
> @@ -278,11 +281,11 @@ class NestedMatch:
>          returning occurrences only if all delimiters are paired.
>          """
> 
> -        for t in self._search(regex, line):
> +        for t in self._search(line):
> 
>              yield line[t[0]:t[2]]
> 
> -    def sub(self, regex, sub, line, count=0):
> +    def sub(self, sub, line, count=0):
>          """
>          This is similar to re.sub:
> 
> @@ -301,7 +304,7 @@ class NestedMatch:
>          cur_pos = 0
>          n = 0
> 
> -        for start, end, pos in self._search(regex, line):
> +        for start, end, pos in self._search(line):
>              out += line[cur_pos:start]
> 
>              # Value, ignoring start/end delimiters @@ -328,3 +331,10
> @@ class NestedMatch:
>          out += line[cur_pos:l]
> 
>          return out
> +
> +    def __repr__(self):
> +        """
> +        Returns a displayable version of the class init.
> +        """
> +
> +        return f'NestedMatch("{self.regex.regex.pattern}")'
> --
> 2.52.0

Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>