[v3] kernel-doc: use a C lexical tokenizer for transforms

[PATCH v3 06/22] docs: kdoc: use tokenizer to handle comments on structs

Posted by Mauro Carvalho Chehab 3 weeks ago

Better handle comments inside structs. After those changes,
all unittests now pass:

  test_private:
    TestPublicPrivate:
        test balanced_inner_private:                                 OK
        test balanced_non_greddy_private:                            OK
        test balanced_private:                                       OK
        test no private:                                             OK
        test unbalanced_inner_private:                               OK
        test unbalanced_private:                                     OK
        test unbalanced_struct_group_tagged_with_private:            OK
        test unbalanced_two_struct_group_tagged_first_with_private:  OK
        test unbalanced_without_end_of_line:                         OK

  Ran 9 tests

This also solves a bug when handling STRUCT_GROUP() with a private
comment on it:

	@@ -397134,7 +397134,7 @@ basic V4L2 device-level support.
	             unsigned int    max_len;
	             unsigned int    offset;
	             struct page_pool_params_slow  slow;
	-            STRUCT_GROUP( struct net_device *netdev;
	+            struct net_device *netdev;
	             unsigned int queue_idx;
	             unsigned int    flags;
	       };

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <f83ee9e8c38407eaab6ad10d4ccf155fb36683cc.1773074166.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 4b3c555e6c8e..62d8030cf532 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -13,6 +13,7 @@ import sys
 import re
 from pprint import pformat
 
+from kdoc.c_lex import CTokenizer
 from kdoc.kdoc_re import NestedMatch, KernRe
 from kdoc.kdoc_item import KdocItem
 
@@ -84,15 +85,9 @@ def trim_private_members(text):
     """
     Remove ``struct``/``enum`` members that have been marked "private".
     """
-    # First look for a "public:" block that ends a private region, then
-    # handle the "private until the end" case.
-    #
-    text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
-    text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
-    #
-    # We needed the comments to do the above, but now we can take them out.
-    #
-    return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
+
+    tokens = CTokenizer(text)
+    return str(tokens)
 
 class state:
     """
-- 
2.52.0

RE: [PATCH v3 06/22] docs: kdoc: use tokenizer to handle comments on structs

Posted by Loktionov, Aleksandr 3 weeks ago


> -----Original Message-----
> From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> Sent: Tuesday, March 17, 2026 7:09 PM
> To: Jonathan Corbet <corbet@lwn.net>; Linux Doc Mailing List <linux-
> doc@vger.kernel.org>
> Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>; linux-
> hardening@vger.kernel.org; linux-kernel@vger.kernel.org; Loktionov,
> Aleksandr <aleksandr.loktionov@intel.com>; Randy Dunlap
> <rdunlap@infradead.org>
> Subject: [PATCH v3 06/22] docs: kdoc: use tokenizer to handle comments
> on structs
> 
> Better handle comments inside structs. After those changes, all
> unittests now pass:
> 
>   test_private:
>     TestPublicPrivate:
>         test balanced_inner_private:
> OK
>         test balanced_non_greddy_private:
> OK
>         test balanced_private:
> OK
>         test no private:
> OK
>         test unbalanced_inner_private:
> OK
>         test unbalanced_private:
> OK
>         test unbalanced_struct_group_tagged_with_private:
> OK
>         test unbalanced_two_struct_group_tagged_first_with_private:
> OK
>         test unbalanced_without_end_of_line:
> OK
> 
>   Ran 9 tests
> 
> This also solves a bug when handling STRUCT_GROUP() with a private
> comment on it:
> 
> 	@@ -397134,7 +397134,7 @@ basic V4L2 device-level support.
> 	             unsigned int    max_len;
> 	             unsigned int    offset;
> 	             struct page_pool_params_slow  slow;
> 	-            STRUCT_GROUP( struct net_device *netdev;
> 	+            struct net_device *netdev;
> 	             unsigned int queue_idx;
> 	             unsigned int    flags;
> 	       };
> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> Message-ID:
> <f83ee9e8c38407eaab6ad10d4ccf155fb36683cc.1773074166.git.mchehab+huawe
> i@kernel.org>
> ---
>  tools/lib/python/kdoc/kdoc_parser.py | 13 ++++---------
>  1 file changed, 4 insertions(+), 9 deletions(-)
> 
> diff --git a/tools/lib/python/kdoc/kdoc_parser.py
> b/tools/lib/python/kdoc/kdoc_parser.py
> index 4b3c555e6c8e..62d8030cf532 100644
> --- a/tools/lib/python/kdoc/kdoc_parser.py
> +++ b/tools/lib/python/kdoc/kdoc_parser.py
> @@ -13,6 +13,7 @@ import sys
>  import re
>  from pprint import pformat
> 
> +from kdoc.c_lex import CTokenizer
>  from kdoc.kdoc_re import NestedMatch, KernRe  from kdoc.kdoc_item
> import KdocItem
> 
> @@ -84,15 +85,9 @@ def trim_private_members(text):
>      """
>      Remove ``struct``/``enum`` members that have been marked
> "private".
>      """
> -    # First look for a "public:" block that ends a private region,
> then
> -    # handle the "private until the end" case.
> -    #
> -    text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/',
> flags=re.S).sub('', text)
> -    text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
> -    #
> -    # We needed the comments to do the above, but now we can take
> them out.
> -    #
> -    return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('',
> text).strip()
> +
> +    tokens = CTokenizer(text)
> +    return str(tokens)
> 
>  class state:
>      """
> --
> 2.52.0

Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>