From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0F5223AD537;
	Thu, 12 Mar 2026 07:12:34 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299554; cv=none;
 b=QIHIEipoBel4MgfNzdAW1F5x5foDhxUCsf6PepUFOzPCX5GsSifkXS3ruB6zS73/I72e3RXZCBzYJn7sUS3p/mBzJhDxqFtzzxzDmh43ep9NPf1t1MyLtID6fWBYrEodsj5CKyJQXNB2+1o5sNvxysY7u3ZxvxYUAxAdItjBLYw=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299554; c=relaxed/simple;
	bh=yNO5pOlAN1IiiLVxsvoOjfd9gNlGPseIkyDe7oIdI0A=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=cjgsLGTw0vLBkZWQwhcWyLZ4Va1I58z9/n49RGMp9uCk5LfI9cChekgakIJWenMPwpMdjVrOIyAG2q2HhVIBNVXZF1ItvMRUN1ZWCJs2P9Wi+baQkTrnaUJiMu0G4lfUBen4hRVVVj3XyVoSefIja6evi+U495JiAh8dgmn+S0s=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=HXJyiqKD; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="HXJyiqKD"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5D5FEC2BCB0;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=yNO5pOlAN1IiiLVxsvoOjfd9gNlGPseIkyDe7oIdI0A=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=HXJyiqKDaum8yUV9mqVXIQaANVZtGdLh0iO3IrpL1YcCPx5ayffY1Mt59VEfhIbls
	 LlKfGTwcy0UbwhHfR+CKEc9uXxYHHNgkP8yeP32PfZhrijzFNWsxWeWswHqxcJcZYF
	 wgBF2hc+V+SjCb84Zj/SP9YD84MZqJmJ/PyG2vG2tmJbu/5zFQgvpu5ah2sn/rMW/U
	 c1rz3GP6bmM9uwk7m1v67+7y2wpNBrovPqlu4PmT7/csX4viR++PGzu9WEB8mro2RP
	 QnS1rXkSaYlYiQCKr4Jn6A5omY/UqKW6mULdQLeIcU2oYEI3DkUmw9FR0DDb7ePthc
	 jIvLsVC73Fijg==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077g4-1J3u;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Shuah Khan <skhan@linuxfoundation.org>
Subject: [PATCH v2 01/20] docs: python: add helpers to run unit tests
Date: Thu, 12 Mar 2026 08:12:09 +0100
Message-ID: 
 <d4c7b04f72ef1efb2c5f2a7a17a5de5ba0378767.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

While python internal libraries have support for unit tests, its
output is not nice. Add a helper module to improve its output.

I wrote this module last year while testing some scripts I used
internally. The initial skeleton was generated with the help of
LLM tools, but it was higly modified to ensure that it will work
as I would expect.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <37999041f616ddef41e84cf2686c0264d1a51dc9.1773074166.git.mcheha=
b+huawei@kernel.org>
---
 Documentation/tools/python.rst      |   2 +
 Documentation/tools/unittest.rst    |  24 ++
 tools/lib/python/unittest_helper.py | 353 ++++++++++++++++++++++++++++
 3 files changed, 379 insertions(+)
 create mode 100644 Documentation/tools/unittest.rst
 create mode 100755 tools/lib/python/unittest_helper.py

diff --git a/Documentation/tools/python.rst b/Documentation/tools/python.rst
index 1444c1816735..3b7299161f20 100644
--- a/Documentation/tools/python.rst
+++ b/Documentation/tools/python.rst
@@ -11,3 +11,5 @@ Python libraries
    feat
    kdoc
    kabi
+
+   unittest
diff --git a/Documentation/tools/unittest.rst b/Documentation/tools/unittes=
t.rst
new file mode 100644
index 000000000000..14a2b2a65236
--- /dev/null
+++ b/Documentation/tools/unittest.rst
@@ -0,0 +1,24 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
+Python unittest
+=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
+
+Checking consistency of python modules can be complex. Sometimes, it is
+useful to define a set of unit tests to help checking them.
+
+While the actual test implementation is usecase dependent, Python already
+provides a standard way to add unit tests by using ``import unittest``.
+
+Using such class, requires setting up a test suite. Also, the default form=
at
+is a little bit ackward. To improve it and provide a more uniform way to
+report errors, some unittest classes and functions are defined.
+
+
+Unittest helper module
+=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
+
+.. automodule:: lib.python.unittest_helper
+   :members:
+   :show-inheritance:
+   :undoc-members:
diff --git a/tools/lib/python/unittest_helper.py b/tools/lib/python/unittes=
t_helper.py
new file mode 100755
index 000000000000..55d444cd73d4
--- /dev/null
+++ b/tools/lib/python/unittest_helper.py
@@ -0,0 +1,353 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025-2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=3DC0103,R0912,R0914,E1101
+
+"""
+Provides helper functions and classes execute python unit tests.
+
+Those help functions provide a nice colored output summary of each
+executed test and, when a test fails, it shows the different in diff
+format when running in verbose mode, like::
+
+    $ tools/unittests/nested_match.py -v
+    ...
+    Traceback (most recent call last):
+    File "/new_devel/docs/tools/unittests/nested_match.py", line 69, in te=
st_count_limit
+        self.assertEqual(replaced, "bar(a); bar(b); foo(c)")
+        ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    AssertionError: 'bar(a) foo(b); foo(c)' !=3D 'bar(a); bar(b); foo(c)'
+    - bar(a) foo(b); foo(c)
+    ?       ^^^^
+    + bar(a); bar(b); foo(c)
+    ?       ^^^^^
+    ...
+
+It also allows filtering what tests will be executed via ``-k`` parameter.
+
+Typical usage is to do::
+
+    from unittest_helper import run_unittest
+    ...
+
+    if __name__ =3D=3D "__main__":
+        run_unittest(__file__)
+
+If passing arguments is needed, on a more complex scenario, it can be
+used like on this example::
+
+    from unittest_helper import TestUnits, run_unittest
+    ...
+    env =3D {'sudo': ""}
+    ...
+    if __name__ =3D=3D "__main__":
+        runner =3D TestUnits()
+        base_parser =3D runner.parse_args()
+        base_parser.add_argument('--sudo', action=3D'store_true',
+                                help=3D'Enable tests requiring sudo privil=
eges')
+
+        args =3D base_parser.parse_args()
+
+        # Update module-level flag
+        if args.sudo:
+            env['sudo'] =3D "1"
+
+        # Run tests with customized arguments
+        runner.run(__file__, parser=3Dbase_parser, args=3Dargs, env=3Denv)
+"""
+
+import argparse
+import atexit
+import os
+import re
+import unittest
+import sys
+
+from unittest.mock import patch
+
+
+class Summary(unittest.TestResult):
+    """
+    Overrides ``unittest.TestResult`` class to provide a nice colored
+    summary. When in verbose mode, displays actual/expected difference in
+    unified diff format.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        #: Dictionary to store organized test results.
+        self.test_results =3D {}
+
+        #: max length of the test names.
+        self.max_name_length =3D 0
+
+    def startTest(self, test):
+        super().startTest(test)
+        test_id =3D test.id()
+        parts =3D test_id.split(".")
+
+        # Extract module, class, and method names
+        if len(parts) >=3D 3:
+            module_name =3D parts[-3]
+        else:
+            module_name =3D ""
+        if len(parts) >=3D 2:
+            class_name =3D parts[-2]
+        else:
+            class_name =3D ""
+
+        method_name =3D parts[-1]
+
+        # Build the hierarchical structure
+        if module_name not in self.test_results:
+            self.test_results[module_name] =3D {}
+
+        if class_name not in self.test_results[module_name]:
+            self.test_results[module_name][class_name] =3D []
+
+        # Track maximum test name length for alignment
+        display_name =3D f"{method_name}:"
+
+        self.max_name_length =3D max(len(display_name), self.max_name_leng=
th)
+
+    def _record_test(self, test, status):
+        test_id =3D test.id()
+        parts =3D test_id.split(".")
+        if len(parts) >=3D 3:
+            module_name =3D parts[-3]
+        else:
+            module_name =3D ""
+        if len(parts) >=3D 2:
+            class_name =3D parts[-2]
+        else:
+            class_name =3D ""
+        method_name =3D parts[-1]
+        self.test_results[module_name][class_name].append((method_name, st=
atus))
+
+    def addSuccess(self, test):
+        super().addSuccess(test)
+        self._record_test(test, "OK")
+
+    def addFailure(self, test, err):
+        super().addFailure(test, err)
+        self._record_test(test, "FAIL")
+
+    def addError(self, test, err):
+        super().addError(test, err)
+        self._record_test(test, "ERROR")
+
+    def addSkip(self, test, reason):
+        super().addSkip(test, reason)
+        self._record_test(test, f"SKIP ({reason})")
+
+    def printResults(self):
+        """
+        Print results using colors if tty.
+        """
+        # Check for ANSI color support
+        use_color =3D sys.stdout.isatty()
+        COLORS =3D {
+            "OK":            "\033[32m",   # Green
+            "FAIL":          "\033[31m",   # Red
+            "SKIP":          "\033[1;33m", # Yellow
+            "PARTIAL":       "\033[33m",   # Orange
+            "EXPECTED_FAIL": "\033[36m",   # Cyan
+            "reset":         "\033[0m",    # Reset to default terminal col=
or
+        }
+        if not use_color:
+            for c in COLORS:
+                COLORS[c] =3D ""
+
+        # Calculate maximum test name length
+        if not self.test_results:
+            return
+        try:
+            lengths =3D []
+            for module in self.test_results.values():
+                for tests in module.values():
+                    for test_name, _ in tests:
+                        lengths.append(len(test_name) + 1)  # +1 for colon
+            max_length =3D max(lengths) + 2  # Additional padding
+        except ValueError:
+            sys.exit("Test list is empty")
+
+        # Print results
+        for module_name, classes in self.test_results.items():
+            print(f"{module_name}:")
+            for class_name, tests in classes.items():
+                print(f"    {class_name}:")
+                for test_name, status in tests:
+                    # Get base status without reason for SKIP
+                    if status.startswith("SKIP"):
+                        status_code =3D status.split()[0]
+                    else:
+                        status_code =3D status
+                    color =3D COLORS.get(status_code, "")
+                    print(
+                        f"        {test_name + ':':<{max_length}}{color}{s=
tatus}{COLORS['reset']}"
+                    )
+            print()
+
+        # Print summary
+        print(f"\nRan {self.testsRun} tests", end=3D"")
+        if hasattr(self, "timeTaken"):
+            print(f" in {self.timeTaken:.3f}s", end=3D"")
+        print()
+
+        if not self.wasSuccessful():
+            print(f"\n{COLORS['FAIL']}FAILED (", end=3D"")
+            failures =3D getattr(self, "failures", [])
+            errors =3D getattr(self, "errors", [])
+            if failures:
+                print(f"failures=3D{len(failures)}", end=3D"")
+            if errors:
+                if failures:
+                    print(", ", end=3D"")
+                print(f"errors=3D{len(errors)}", end=3D"")
+            print(f"){COLORS['reset']}")
+
+
+def flatten_suite(suite):
+    """Flatten test suite hierarchy."""
+    tests =3D []
+    for item in suite:
+        if isinstance(item, unittest.TestSuite):
+            tests.extend(flatten_suite(item))
+        else:
+            tests.append(item)
+    return tests
+
+
+class TestUnits:
+    """
+    Helper class to set verbosity level.
+
+    This class discover test files, import its unittest classes and
+    executes the test on it.
+    """
+    def parse_args(self):
+        """Returns a parser for command line arguments."""
+        parser =3D argparse.ArgumentParser(description=3D"Test runner with=
 regex filtering")
+        parser.add_argument("-v", "--verbose", action=3D"count", default=
=3D1)
+        parser.add_argument("-f", "--failfast", action=3D"store_true")
+        parser.add_argument("-k", "--keyword",
+                            help=3D"Regex pattern to filter test methods")
+        return parser
+
+    def run(self, caller_file=3DNone, pattern=3DNone,
+            suite=3DNone, parser=3DNone, args=3DNone, env=3DNone):
+        """
+        Execute all tests from the unity test file.
+
+        It contains several optional parameters:
+
+        ``caller_file``:
+            -  name of the file that contains test.
+
+               typical usage is to place __file__ at the caller test, e.g.=
::
+
+                    if __name__ =3D=3D "__main__":
+                        TestUnits().run(__file__)
+
+        ``pattern``:
+            - optional pattern to match multiple file names. Defaults
+              to basename of ``caller_file``.
+
+        ``suite``:
+            - an unittest suite initialized by the caller using
+              ``unittest.TestLoader().discover()``.
+
+        ``parser``:
+            - an argparse parser. If not defined, this helper will create
+              one.
+
+        ``args``:
+            - an ``argparse.Namespace`` data filled by the caller.
+
+        ``env``:
+            - environment variables that will be passed to the test suite
+
+        At least ``caller_file`` or ``suite`` must be used, otherwise a
+        ``TypeError`` will be raised.
+        """
+        if not args:
+            if not parser:
+                parser =3D self.parse_args()
+            args =3D parser.parse_args()
+
+        if not caller_file and not suite:
+            raise TypeError("Either caller_file or suite is needed at Test=
Units")
+
+        verbose =3D args.verbose
+
+        if not env:
+            env =3D os.environ.copy()
+
+        env["VERBOSE"] =3D f"{verbose}"
+
+        patcher =3D patch.dict(os.environ, env)
+        patcher.start()
+        # ensure it gets stopped after
+        atexit.register(patcher.stop)
+
+
+        if verbose >=3D 2:
+            unittest.TextTestRunner(verbosity=3Dverbose).run =3D lambda su=
ite: suite
+
+        # Load ONLY tests from the calling file
+        if not suite:
+            if not pattern:
+                pattern =3D caller_file
+
+            loader =3D unittest.TestLoader()
+            suite =3D loader.discover(start_dir=3Dos.path.dirname(caller_f=
ile),
+                                    pattern=3Dos.path.basename(caller_file=
))
+
+        # Flatten the suite for environment injection
+        tests_to_inject =3D flatten_suite(suite)
+
+        # Filter tests by method name if -k specified
+        if args.keyword:
+            try:
+                pattern =3D re.compile(args.keyword)
+                filtered_suite =3D unittest.TestSuite()
+                for test in tests_to_inject:  # Use the pre-flattened list
+                    method_name =3D test.id().split(".")[-1]
+                    if pattern.search(method_name):
+                        filtered_suite.addTest(test)
+                suite =3D filtered_suite
+            except re.error as e:
+                sys.stderr.write(f"Invalid regex pattern: {e}\n")
+                sys.exit(1)
+        else:
+            # Maintain original suite structure if no keyword filtering
+            suite =3D unittest.TestSuite(tests_to_inject)
+
+        if verbose >=3D 2:
+            resultclass =3D None
+        else:
+            resultclass =3D Summary
+
+        runner =3D unittest.TextTestRunner(verbosity=3Dargs.verbose,
+                                            resultclass=3Dresultclass,
+                                            failfast=3Dargs.failfast)
+        result =3D runner.run(suite)
+        if resultclass:
+            result.printResults()
+
+        sys.exit(not result.wasSuccessful())
+
+
+def run_unittest(fname):
+    """
+    Basic usage of TestUnits class.
+
+    Use it when there's no need to pass any extra argument to the tests
+    with. The recommended way is to place this at the end of each
+    unittest module::
+
+        if __name__ =3D=3D "__main__":
+            run_unittest(__file__)
+    """
+    TestUnits().run(fname)
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id DAAA43ACF16;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299553; cv=none;
 b=PeBF/UA926XmYYrN0qcZBNXWimdEK3fW9OS4wS63GRN+FbCAZE9UDR9WQF61jZ83nt8yEHLlsrMfqI0BAN3XvmbyNNhPih+lOkVjIG8GRo4Bpbo8AQtocna/fkgItAyj+xl9hCi4CjynOMDDdMGRqNonlCOzryqanZjF4nYAp2s=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299553; c=relaxed/simple;
	bh=XQhCrlUmkD9zSOZhprhPJWsYbmUCBPONnwHs9Zlqn5w=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=uXE1EYsjSDUkPNeF700HT9qQijclRak7efIqMng4i1JgPyFcX8+4a3jAZpgHTNtThsDT1fw+3VP4STPH4GZgfQ+REL2M0wmq6rNIWrDGEAuY1XjnKRX1yvWsn72T8dJUAlNpIx+NJnBzpkvmlMYseTZoU+ZRw7awNOmD5kfCjQw=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=RmSMOjp7; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="RmSMOjp7"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5A054C2BCAF;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=XQhCrlUmkD9zSOZhprhPJWsYbmUCBPONnwHs9Zlqn5w=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=RmSMOjp7oksgvSaQuWzZ3dchGEkqhrk64yEV0l9a7X+pHVxohCBkFAYEGTaie/HNA
	 i+Pw9Z3qS+W4zu1TMGwMNmjFcy+wlsIg162nGQcy2b4GZmm1GdHSLH/4H0MRU9Tmw4
	 s8DWyR9EOtpv2ViHIPoHu0NqniUCwfa9Q2bOLT6EoNZHYTMFamQxySNADjYYkYwLRp
	 MYRpl5LhdjTaaYNP4wxqL27hs4jehH3fSMmD6o4/Dp5IqGp0mKiSj9ihnGjk/T3jf4
	 fajJP17BOqax227J4N/PGPAjmw35zY8JhUEVdEwv9QUgBS71NqKvZbzf1DlkMiTXs0
	 GUdpLhc81NUoA==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077g8-1Q2J;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Mauro Carvalho Chehab <mchehab@kernel.org>
Subject: [PATCH v2 02/20] unittests: add a testbench to check public/private
 kdoc comments
Date: Thu, 12 Mar 2026 08:12:10 +0100
Message-ID: 
 <947d8827008a65f9c195163a9a5381efa4f29e20.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

Add unit tests to check if the public/private and comments strip
is working properly.

Running it shows that, on several cases, public/private is not
doing what it is expected:

  test_private:
    TestPublicPrivate:
        test balanced_inner_private:                                 OK
        test balanced_non_greddy_private:                            OK
        test balanced_private:                                       OK
        test no private:                                             OK
        test unbalanced_inner_private:                               FAIL
        test unbalanced_private:                                     FAIL
        test unbalanced_struct_group_tagged_with_private:            FAIL
        test unbalanced_two_struct_group_tagged_first_with_private:  FAIL
        test unbalanced_without_end_of_line:                         FAIL

  Ran 9 tests

  FAILED (failures=3D5)

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <144f4952e0cb74fe9c9adc117e9a21ec8aa1cc10.1773074166.git.mcheha=
b+huawei@kernel.org>
---
 tools/unittests/test_private.py | 331 ++++++++++++++++++++++++++++++++
 1 file changed, 331 insertions(+)
 create mode 100755 tools/unittests/test_private.py

diff --git a/tools/unittests/test_private.py b/tools/unittests/test_private=
.py
new file mode 100755
index 000000000000..eae245ae8a12
--- /dev/null
+++ b/tools/unittests/test_private.py
@@ -0,0 +1,331 @@
+#!/usr/bin/env python3
+
+"""
+Unit tests for struct/union member extractor class.
+"""
+
+
+import os
+import re
+import unittest
+import sys
+
+from unittest.mock import MagicMock
+
+SRC_DIR =3D os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(SRC_DIR, "../lib/python"))
+
+from kdoc.kdoc_parser import trim_private_members
+from unittest_helper import run_unittest
+
+#
+# List of tests.
+#
+# The code will dynamically generate one test for each key on this diction=
ary.
+#
+
+#: Tests to check if CTokenizer is handling properly public/private commen=
ts.
+TESTS_PRIVATE =3D {
+    #
+    # Simplest case: no private. Ensure that trimming won't affect struct
+    #
+    "no private": {
+        "source": """
+            struct foo {
+                int a;
+                int b;
+                int c;
+            };
+        """,
+        "trimmed": """
+            struct foo {
+                int a;
+                int b;
+                int c;
+            };
+        """,
+    },
+
+    #
+    # Play "by the books" by always having a public in place
+    #
+
+    "balanced_private": {
+        "source": """
+            struct foo {
+                int a;
+                /* private: */
+                int b;
+                /* public: */
+                int c;
+            };
+        """,
+        "trimmed": """
+            struct foo {
+                int a;
+                int c;
+            };
+        """,
+    },
+
+    "balanced_non_greddy_private": {
+        "source": """
+            struct foo {
+                int a;
+                /* private: */
+                int b;
+                /* public: */
+                int c;
+                /* private: */
+                int d;
+                /* public: */
+                int e;
+
+            };
+        """,
+        "trimmed": """
+            struct foo {
+                int a;
+                int c;
+                int e;
+            };
+        """,
+    },
+
+    "balanced_inner_private": {
+        "source": """
+            struct foo {
+                struct {
+                    int a;
+                    /* private: ignore below */
+                    int b;
+                /* public: but this should not be ignored */
+                };
+                int b;
+            };
+        """,
+        "trimmed": """
+            struct foo {
+                struct {
+                    int a;
+                };
+                int b;
+            };
+        """,
+    },
+
+    #
+    # Test what happens if there's no public after private place
+    #
+
+    "unbalanced_private": {
+        "source": """
+            struct foo {
+                int a;
+                /* private: */
+                int b;
+                int c;
+            };
+        """,
+        "trimmed": """
+            struct foo {
+                int a;
+            };
+        """,
+    },
+
+    "unbalanced_inner_private": {
+        "source": """
+            struct foo {
+                struct {
+                    int a;
+                    /* private: ignore below */
+                    int b;
+                /* but this should not be ignored */
+                };
+                int b;
+            };
+        """,
+        "trimmed": """
+            struct foo {
+                struct {
+                    int a;
+                };
+                int b;
+            };
+        """,
+    },
+
+    "unbalanced_struct_group_tagged_with_private": {
+        "source": """
+            struct page_pool_params {
+                struct_group_tagged(page_pool_params_fast, fast,
+                        unsigned int    order;
+                        unsigned int    pool_size;
+                        int             nid;
+                        struct device   *dev;
+                        struct napi_struct *napi;
+                        enum dma_data_direction dma_dir;
+                        unsigned int    max_len;
+                        unsigned int    offset;
+                };
+                struct_group_tagged(page_pool_params_slow, slow,
+                        struct net_device *netdev;
+                        unsigned int queue_idx;
+                        unsigned int    flags;
+                        /* private: used by test code only */
+                        void (*init_callback)(netmem_ref netmem, void *arg=
);
+                        void *init_arg;
+                };
+            };
+        """,
+        "trimmed": """
+            struct page_pool_params {
+                struct_group_tagged(page_pool_params_fast, fast,
+                        unsigned int    order;
+                        unsigned int    pool_size;
+                        int             nid;
+                        struct device   *dev;
+                        struct napi_struct *napi;
+                        enum dma_data_direction dma_dir;
+                        unsigned int    max_len;
+                        unsigned int    offset;
+                };
+                struct_group_tagged(page_pool_params_slow, slow,
+                        struct net_device *netdev;
+                        unsigned int queue_idx;
+                        unsigned int    flags;
+                };
+            };
+        """,
+    },
+
+    "unbalanced_two_struct_group_tagged_first_with_private": {
+        "source": """
+            struct page_pool_params {
+                struct_group_tagged(page_pool_params_slow, slow,
+                        struct net_device *netdev;
+                        unsigned int queue_idx;
+                        unsigned int    flags;
+                        /* private: used by test code only */
+                        void (*init_callback)(netmem_ref netmem, void *arg=
);
+                        void *init_arg;
+                };
+                struct_group_tagged(page_pool_params_fast, fast,
+                        unsigned int    order;
+                        unsigned int    pool_size;
+                        int             nid;
+                        struct device   *dev;
+                        struct napi_struct *napi;
+                        enum dma_data_direction dma_dir;
+                        unsigned int    max_len;
+                        unsigned int    offset;
+                };
+            };
+        """,
+        "trimmed": """
+            struct page_pool_params {
+                struct_group_tagged(page_pool_params_slow, slow,
+                        struct net_device *netdev;
+                        unsigned int queue_idx;
+                        unsigned int    flags;
+                };
+                struct_group_tagged(page_pool_params_fast, fast,
+                        unsigned int    order;
+                        unsigned int    pool_size;
+                        int             nid;
+                        struct device   *dev;
+                        struct napi_struct *napi;
+                        enum dma_data_direction dma_dir;
+                        unsigned int    max_len;
+                        unsigned int    offset;
+                };
+            };
+        """,
+    },
+    "unbalanced_without_end_of_line": {
+        "source": """ \
+            struct page_pool_params { \
+                struct_group_tagged(page_pool_params_slow, slow, \
+                        struct net_device *netdev; \
+                        unsigned int queue_idx; \
+                        unsigned int    flags;
+                        /* private: used by test code only */
+                        void (*init_callback)(netmem_ref netmem, void *arg=
); \
+                        void *init_arg; \
+                }; \
+                struct_group_tagged(page_pool_params_fast, fast, \
+                        unsigned int    order; \
+                        unsigned int    pool_size; \
+                        int             nid; \
+                        struct device   *dev; \
+                        struct napi_struct *napi; \
+                        enum dma_data_direction dma_dir; \
+                        unsigned int    max_len; \
+                        unsigned int    offset; \
+                }; \
+            };
+        """,
+        "trimmed": """
+            struct page_pool_params {
+                struct_group_tagged(page_pool_params_slow, slow,
+                        struct net_device *netdev;
+                        unsigned int queue_idx;
+                        unsigned int    flags;
+                };
+                struct_group_tagged(page_pool_params_fast, fast,
+                        unsigned int    order;
+                        unsigned int    pool_size;
+                        int             nid;
+                        struct device   *dev;
+                        struct napi_struct *napi;
+                        enum dma_data_direction dma_dir;
+                        unsigned int    max_len;
+                        unsigned int    offset;
+                };
+            };
+        """,
+    },
+}
+
+
+class TestPublicPrivate(unittest.TestCase):
+    """
+    Main test class. Populated dynamically at runtime.
+    """
+
+    def setUp(self):
+        self.maxDiff =3D None
+
+    def add_test(cls, name, source, trimmed):
+        """
+        Dynamically add a test to the class
+        """
+        def test(cls):
+            result =3D trim_private_members(source)
+
+            result =3D re.sub(r"\s++", " ", result).strip()
+            expected =3D re.sub(r"\s++", " ", trimmed).strip()
+
+            msg =3D f"failed when parsing this source:\n" + source
+
+            cls.assertEqual(result, expected, msg=3Dmsg)
+
+        test.__name__ =3D f'test {name}'
+
+        setattr(TestPublicPrivate, test.__name__, test)
+
+
+#
+# Populate TestPublicPrivate class
+#
+test_class =3D TestPublicPrivate()
+for name, test in TESTS_PRIVATE.items():
+    test_class.add_test(name, test["source"], test["trimmed"])
+
+
+#
+# main
+#
+if __name__ =3D=3D "__main__":
+    run_unittest(__file__)
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id ADB5F38229E;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299553; cv=none;
 b=H7iZ743ObBkzDPJ9MXSbJ4MtxK+ND+ad+3pWgwksYtqdcTeWwPVBSrJLveHOr0wBf50QwmZ/E+8juGB85tLCt9bvaJ7KkTOyWdnQYtvQP2yZuocoGNtB33cEFF+r/jpbTB4YC64zpz4gfwO+FvCuXcW+dhowHUc12EsBMi7GT9s=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299553; c=relaxed/simple;
	bh=csYZ9dA1O9VyUFjc5J0DpD/DghpaaqHq/9OSwq1kYgs=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=mkC3mHuh/2KoR3VrLi8XVtEwaqFLIfrtTgOkb9jEId+qKtx3hUWZF9xdkcdZkKKiqDtL0S/ocLNDWd9R3DJKm7rcIv82bMlHHtwHLRM+GdKFnqzmIyM4Br3TB1Sm7GjenKL4Icr0c2EH0eIoQ0GvUuD08rQZcVO+XPUODAAIZbw=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=TbIhMUX+; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="TbIhMUX+"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 57CDFC2BC9E;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=csYZ9dA1O9VyUFjc5J0DpD/DghpaaqHq/9OSwq1kYgs=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=TbIhMUX+XkxadUrQIE31OR2aBPxFmGwzypQ/j7wVWH30AThJgrrJ8r89E/Ur4xJ1U
	 As0PRHw5/EWjVJag930+QqY+3iJ8LVPttM0/YT2tIK+38b9V3+vvea4jX+rDQZxlIl
	 umiSIsIsy8YBx7Hm5pBZmM7o2+OnYh05DpkwGpQ2+1m/FtG1yucV+pyJdGeFmUimbw
	 zsvm8DlNzA7WoHgIl8ClURKNFFUeY6VNVC8PQbzBaHR4DBOybEHr4gt6UT8ALu1au/
	 t9KHD4SF9LlXi4huy337VQx+tFWKpkSHX7ZPKCxatBWYUDAK5pyaRhKGYtjoxoPPTR
	 /zAvvwBhaTKGg==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077gC-1XAN;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Aleksandr Loktionov <aleksandr.loktionov@intel.com>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Randy Dunlap <rdunlap@infradead.org>
Subject: [PATCH v2 03/20] docs: kdoc: don't add broken comments inside
 prototypes
Date: Thu, 12 Mar 2026 08:12:11 +0100
Message-ID: 
 <2b957decdb6cedab4268f71a166c25b7abdb9a61.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

Parsing a file like drivers/scsi/isci/host.h, which contains
broken kernel-doc markups makes it create a prototype that contains
unmatched end comments.

That causes, for instance, struct sci_power_control to be shown this
this prototype:

    struct sci_power_control {
        * it is not. */ bool timer_started;
        */ struct sci_timer timer;
        * requesters field. */ u8 phys_waiting;
        */ u8 phys_granted_power;
        * mapped into requesters via struct sci_phy.phy_index */ struct isc=
i_phy *requesters[SCI_MAX_PHYS];
    };

as comments won't start with "/*" anymore.

Fix the logic to detect such cases, and keep adding the comments
inside it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <18e577dbbd538dcc22945ff139fe3638344e14f0.1773074166.git.mcheha=
b+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/k=
doc_parser.py
index edf70ba139a5..086579d00b5c 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -1355,6 +1355,12 @@ class KernelDoc:
         elif doc_content.search(line):
             self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}=
")
             self.state =3D state.PROTO
+
+            #
+            # Don't let it add partial comments at the code, as breaks the
+            # logic meant to remove comments from prototypes.
+            #
+            self.process_proto_type(ln, "/**\n" + line)
         # else ... ??
=20
     def process_inline_text(self, ln, line):
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id ADA5836C9E3;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299553; cv=none;
 b=Nk8BSDoVcbS1YNSF/2W14q9K6RHjLvdUkzPsSRXxvEbK9ENQIHp/xw+se6gEZY3+juR+6QRzAqFW9R2zXyMYZWsQNlwjXtNwU6LSFBjCZ73qxAjHTwft9O1rS3rkPJzEeNSG6t7snkP1rqU5oK8ZF4BMVBQPwdDE6x6BhmcOoa0=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299553; c=relaxed/simple;
	bh=r7ZWNO+ghbGf9tKGR7rDEB46pt/FEZ69GWHK20SQis4=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=ehxQbJ/WB9YPy0zxL0g58DOWt9ug6mFGMS/6kZy2ehulY+CE1++h1+aYfFLV2hlR6UqURHlZinurs2Fvx09YcFnXex56JhoFSMVLFQ2lOe1Fig9jSnfBCPHOSFMOqoqe0GmiGozH+8TnnN4INDeh3nXhiFbIj32vqQ5q0skCYPM=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=E14ncgOj; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="E14ncgOj"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 66DA9C2BCB2;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=r7ZWNO+ghbGf9tKGR7rDEB46pt/FEZ69GWHK20SQis4=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=E14ncgOjKLkgw64H+fWmzUQYizwUaLsQJzglUTFTaY+qFb8x2f2Pdjr7QOgsX0dTJ
	 IfxvyfjG94ZoCAeKlBnVsnfyFgzudGKfHJflA+cb4pw5aOe0JJ95x8c8aEprxB+dsb
	 Y5YNvMH9Hn+1MKWM7uT7+w9O2s0AYKPG02CLK4sKUW6g+N/HvmXS6ywbx2TwWTHOrQ
	 hUOFk+0x6jb1YqnaavIbnprDSO1VYw2ZLNJAEVCvw1hEF7c0P+twaQRjm/RDbMyRgZ
	 SxDhgbZ8yY+fmq/m2o2IO54m03nqd0V2G6TxinMJrKaX9JERC436NDGr50TdyEWYEG
	 wCNjLX+0rPDGg==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077gG-1e5M;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Aleksandr Loktionov <aleksandr.loktionov@intel.com>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Randy Dunlap <rdunlap@infradead.org>
Subject: [PATCH v2 04/20] docs: kdoc: properly handle empty enum arguments
Date: Thu, 12 Mar 2026 08:12:12 +0100
Message-ID: 
 <abcc260f770c4fcb2ae40be58bd8eea5e44bf697.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

Depending on how the enum proto is written, a comma at the end
may incorrectly make kernel-doc parse an arg like " ".

Strip spaces before checking if arg is empty.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <4182bfb7e5f5b4bbaf05cee1bede691e56247eaf.1773074166.git.mcheha=
b+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/k=
doc_parser.py
index 086579d00b5c..4b3c555e6c8e 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -810,9 +810,10 @@ class KernelDoc:
         member_set =3D set()
         members =3D KernRe(r'\([^;)]*\)').sub('', members)
         for arg in members.split(','):
-            if not arg:
-                continue
             arg =3D KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
+            if not arg.strip():
+                continue
+
             self.entry.parameterlist.append(arg)
             if arg not in self.entry.parameterdescs:
                 self.entry.parameterdescs[arg] =3D self.undescribed
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 10C713AD53C;
	Thu, 12 Mar 2026 07:12:34 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299554; cv=none;
 b=rGfXrhJq3/msn+Vv9e0pKewi9fy+A5EII0XEo3VmXNSZS6nDk1Sgut8qCl1D9r3L9NkXnk5AWGZE2ycmU3KytthWTVH8NmZFhAzk8JI/xE23w+fA4cUqdXIm8TE4DZq+d8tgA69xIl8hcbQrvwGx5poHayvkyGKEnc5BnGkuZQg=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299554; c=relaxed/simple;
	bh=Znhgh5VpMegTURFUzFsOSwJlqZmT0mMIAtISYNXYSXM=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=Mol0A1MnoIMvNSU6ttph7BYbG0c17jkV/kqA6jikNiXpFTna8J89c1DhBoLxEbU9m3wCCR/9ocJvFp9fuCp4uEJPIXoftdiZ3bfSibO6SLTa8vQdBcfklorRKaJrMX6JSweaal4liBLNAwqcfoF5LgCQQrGo5lOCXW4rRneBjOM=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=S8Ra4+cy; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="S8Ra4+cy"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 64416C2BCB3;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=Znhgh5VpMegTURFUzFsOSwJlqZmT0mMIAtISYNXYSXM=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=S8Ra4+cyC4cAuvdogezwt+o/z6U138dgnco6Lr6PABVV2eDL3RarhuTdCZqqKAqQF
	 hmL0ErRTpCt/L8WNZJ+vvwfiXRfybStNmQJZFnnV1+n1O0gxsjb9XZL0eRnODSZKzg
	 X/GjSdZKqtJ1qJ3s8QGk5FSeDlL7G3FPjArfDicI+SHJmTuh8dtdaGEyVLvNW1FRal
	 DpoDkfvroqXjNbearS+lZi603mR+jtZeiHwJXo6qDGdJXpd4FWxhVRfK3fGWROGcQG
	 V7gmY8ufafEM20tmB0bW4IHkZarmkT1P9x4N1nhUEQikNGsL4hEfSZfHZmRraOy1OW
	 umwd3OxIpT3Nw==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077gK-1lGt;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Aleksandr Loktionov <aleksandr.loktionov@intel.com>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Randy Dunlap <rdunlap@infradead.org>
Subject: [PATCH v2 05/20] docs: kdoc_re: add a C tokenizer
Date: Thu, 12 Mar 2026 08:12:13 +0100
Message-ID: 
 <8541ffa469647db1a7154f274fb2d55b4c127dcb.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

Handling C code purely using regular expressions doesn't work well.

Add a C tokenizer to help doing it the right way.

The tokenizer was written using as basis the Python re documentation
tokenizer example from:
	https://docs.python.org/3/library/re.html#writing-a-tokenizer

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <c63ad36c81fe043e9e33ca55630414893f127413.1773074166.git.mcheha=
b+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_re.py | 234 +++++++++++++++++++++++++++++++
 1 file changed, 234 insertions(+)

diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_=
re.py
index 085b89a4547c..7bed4e9a8810 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -141,6 +141,240 @@ class KernRe:
=20
         return self.last_match.groups()
=20
+class TokType():
+
+    @staticmethod
+    def __str__(val):
+        """Return the name of an enum value"""
+        return TokType._name_by_val.get(val, f"UNKNOWN({val})")
+
+class CToken():
+    """
+    Data class to define a C token.
+    """
+
+    # Tokens that can be used by the parser. Works like an C enum.
+
+    COMMENT =3D 0     #: A standard C or C99 comment, including delimiter.
+    STRING =3D 1      #: A string, including quotation marks.
+    CHAR =3D 2        #: A character, including apostophes.
+    NUMBER =3D 3      #: A number.
+    PUNC =3D 4        #: A puntuation mark: ``;`` / ``,`` / ``.``.
+    BEGIN =3D 5       #: A begin character: ``{`` / ``[`` / ``(``.
+    END =3D 6         #: A end character: ``}`` / ``]`` / ``)``.
+    CPP =3D 7         #: A preprocessor macro.
+    HASH =3D 8        #: The hash character - useful to handle other macro=
s.
+    OP =3D 9          #: A C operator (add, subtract, ...).
+    STRUCT =3D 10     #: A ``struct`` keyword.
+    UNION =3D 11      #: An ``union`` keyword.
+    ENUM =3D 12       #: A ``struct`` keyword.
+    TYPEDEF =3D 13    #: A ``typedef`` keyword.
+    NAME =3D 14       #: A name. Can be an ID or a type.
+    SPACE =3D 15      #: Any space characters, including new lines
+
+    MISMATCH =3D 255  #: an error indicator: should never happen in practi=
ce.
+
+    # Dict to convert from an enum interger into a string.
+    _name_by_val =3D {v: k for k, v in dict(vars()).items() if isinstance(=
v, int)}
+
+    # Dict to convert from string to an enum-like integer value.
+    _name_to_val =3D {k: v for v, k in _name_by_val.items()}
+
+    @staticmethod
+    def to_name(val):
+        """Convert from an integer value from CToken enum into a string"""
+
+        return CToken._name_by_val.get(val, f"UNKNOWN({val})")
+
+    @staticmethod
+    def from_name(name):
+        """Convert a string into a CToken enum value"""
+        if name in CToken._name_to_val:
+            return CToken._name_to_val[name]
+
+        return CToken.MISMATCH
+
+    def __init__(self, kind, value, pos,
+                 brace_level, paren_level, bracket_level):
+        self.kind =3D kind
+        self.value =3D value
+        self.pos =3D pos
+        self.brace_level =3D brace_level
+        self.paren_level =3D paren_level
+        self.bracket_level =3D bracket_level
+
+    def __repr__(self):
+        name =3D self.to_name(self.kind)
+        if isinstance(self.value, str):
+            value =3D '"' + self.value + '"'
+        else:
+            value =3D self.value
+
+        return f"CToken({name}, {value}, {self.pos}, " \
+               f"{self.brace_level}, {self.paren_level}, {self.bracket_lev=
el})"
+
+#: Tokens to parse C code.
+TOKEN_LIST =3D [
+    (CToken.COMMENT, r"//[^\n]*|/\*[\s\S]*?\*/"),
+
+    (CToken.STRING,  r'"(?:\\.|[^"\\])*"'),
+    (CToken.CHAR,    r"'(?:\\.|[^'\\])'"),
+
+    (CToken.NUMBER,  r"0[xX][0-9a-fA-F]+[uUlL]*|0[0-7]+[uUlL]*|"
+                     r"[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)?[fFlL]*"),
+
+    (CToken.PUNC,    r"[;,\.]"),
+
+    (CToken.BEGIN,   r"[\[\(\{]"),
+
+    (CToken.END,     r"[\]\)\}]"),
+
+    (CToken.CPP,     r"#\s*(define|include|ifdef|ifndef|if|else|elif|endif=
|undef|pragma)\b"),
+
+    (CToken.HASH,    r"#"),
+
+    (CToken.OP,      r"\+\+|\-\-|\->|=3D=3D|\!=3D|<=3D|>=3D|&&|\|\||<<|>>|=
\+=3D|\-=3D|\*=3D|/=3D|%=3D"
+                     r"|&=3D|\|=3D|\^=3D|=3D|\+|\-|\*|/|%|<|>|&|\||\^|~|!|=
\?|\:"),
+
+    (CToken.STRUCT,  r"\bstruct\b"),
+    (CToken.UNION,   r"\bunion\b"),
+    (CToken.ENUM,    r"\benum\b"),
+    (CToken.TYPEDEF, r"\bkinddef\b"),
+
+    (CToken.NAME,      r"[A-Za-z_][A-Za-z0-9_]*"),
+
+    (CToken.SPACE,   r"[\s]+"),
+
+    (CToken.MISMATCH,r"."),
+]
+
+#: Handle C continuation lines.
+RE_CONT =3D KernRe(r"\\\n")
+
+RE_COMMENT_START =3D KernRe(r'/\*\s*')
+
+#: tokenizer regex. Will be filled at the first CTokenizer usage.
+re_scanner =3D None
+
+class CTokenizer():
+    """
+    Scan C statements and definitions and produce tokens.
+
+    When converted to string, it drops comments and handle public/private
+    values, respecting depth.
+    """
+
+    # This class is inspired and follows the basic concepts of:
+    #   https://docs.python.org/3/library/re.html#writing-a-tokenizer
+
+    def _tokenize(self, source):
+        """
+        Interactor that parses ``source``, splitting it into tokens, as de=
fined
+        at ``self.TOKEN_LIST``.
+
+        The interactor returns a CToken class object.
+        """
+
+        # Handle continuation lines. Note that kdoc_parser already has a
+        # logic to do that. Still, let's keep it for completeness, as we m=
ight
+        # end re-using this tokenizer outsize kernel-doc some day - or we =
may
+        # eventually remove from there as a future cleanup.
+        source =3D RE_CONT.sub("", source)
+
+        brace_level =3D 0
+        paren_level =3D 0
+        bracket_level =3D 0
+
+        for match in re_scanner.finditer(source):
+            kind =3D CToken.from_name(match.lastgroup)
+            pos =3D match.start()
+            value =3D match.group()
+
+            if kind =3D=3D CToken.MISMATCH:
+                raise RuntimeError(f"Unexpected token '{value}' on {pos}:\=
n\t{source}")
+            elif kind =3D=3D CToken.BEGIN:
+                if value =3D=3D '(':
+                    paren_level +=3D 1
+                elif value =3D=3D '[':
+                    bracket_level +=3D 1
+                else:  # value =3D=3D '{'
+                    brace_level +=3D 1
+
+            elif kind =3D=3D CToken.END:
+                if value =3D=3D ')' and paren_level > 0:
+                    paren_level -=3D 1
+                elif value =3D=3D ']' and bracket_level > 0:
+                    bracket_level -=3D 1
+                elif brace_level > 0:    # value =3D=3D '}'
+                    brace_level -=3D 1
+
+            yield CToken(kind, value, pos,
+                         brace_level, paren_level, bracket_level)
+
+    def __init__(self, source):
+        """
+        Create a regular expression to handle TOKEN_LIST.
+
+        While I generally don't like using regex group naming via:
+            (?P<name>...)
+
+        in this particular case, it makes sense, as we can pick the name
+        when matching a code via re_scanner().
+        """
+        global re_scanner
+
+        if not re_scanner:
+            re_tokens =3D []
+
+            for kind, pattern in TOKEN_LIST:
+                name =3D CToken.to_name(kind)
+                re_tokens.append(f"(?P<{name}>{pattern})")
+
+            re_scanner =3D KernRe("|".join(re_tokens), re.MULTILINE | re.D=
OTALL)
+
+        self.tokens =3D []
+        for tok in self._tokenize(source):
+            self.tokens.append(tok)
+
+    def __str__(self):
+        out=3D""
+        show_stack =3D [True]
+
+        for tok in self.tokens:
+            if tok.kind =3D=3D CToken.BEGIN:
+                show_stack.append(show_stack[-1])
+
+            elif tok.kind =3D=3D CToken.END:
+                prev =3D show_stack[-1]
+                if len(show_stack) > 1:
+                    show_stack.pop()
+
+                if not prev and show_stack[-1]:
+                    #
+                    # Try to preserve indent
+                    #
+                    out +=3D "\t" * (len(show_stack) - 1)
+
+                    out +=3D str(tok.value)
+                    continue
+
+            elif tok.kind =3D=3D CToken.COMMENT:
+                comment =3D RE_COMMENT_START.sub("", tok.value)
+
+                if comment.startswith("private:"):
+                    show_stack[-1] =3D False
+                    show =3D False
+                elif comment.startswith("public:"):
+                    show_stack[-1] =3D True
+
+                continue
+
+            if show_stack[-1]:
+                    out +=3D str(tok.value)
+
+        return out
+
+
 #: Nested delimited pairs (brackets and parenthesis)
 DELIMITER_PAIRS =3D {
     '{': '}',
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id AD910314A6F;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299553; cv=none;
 b=QMvJyTiVVckNaQngetRt347M3GlgvQSCsvodIVHDNCNvLHQLPuylCVa4zzLsT2aCG7Wq1+MhDfOiR+fbk/lu1h25gGHfoFcSr5EgzVGreIt+jmaxgkLAg3Z/K3GZfPkD+AoNqfj/OaJbZEQEW5tawE3JFtURAEvxkyBNL93FUWY=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299553; c=relaxed/simple;
	bh=4zE5duoA2GQsqDENdL1YG74aJauA3hcB054cECvfzE0=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=UujYo7b4aWU2ErGFzhOFq/iUJcu0sTDDNN1b5qk45WMoj05B5IUY/g+EcYp7l9LyiomGpCTqptSS3kw3Se7CfjFVa+ViBL8GE43a/C6jiJimd4/gQhrDGwV6JR9I9V9TIHdh3ZaPjnC70CO0CDh+4alLytQdU+FKfYwA5+LxwfY=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=WlXxBgRu; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="WlXxBgRu"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 4EED3C4CEF7;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=4zE5duoA2GQsqDENdL1YG74aJauA3hcB054cECvfzE0=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=WlXxBgRuA5GSWJ9CE+evKx9kAPNc9AkZsoBgYNFMhsa9fomNm4TnYFGVnLDCkpJWk
	 FmmVCRggwXA9twvVLPSZk1lW68JIL1ioh+d0cxBtfQZMmjK+Q4DQ8RfxyxCpl46DAg
	 0FRcITWIh8WgRF8DpVtf9Mt4884YgOeXgnrP1oAwrLkrOj+oUP3BGNUvAtXXHPqTHW
	 o7BTWIFIxtfJcSKZsNIwq5e0okvs3932E+3Y5zC9lrkKoInFdD86fmE3zKu1ZCIQ4+
	 n9hIolIrGIaii+I50Yf4SpnqxeNQhv45iuaD87oYK3og/uvQYCF0ke0X0bB7HKQQpj
	 KCt2bSJ5qxEFA==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077gO-1sXv;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Aleksandr Loktionov <aleksandr.loktionov@intel.com>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Randy Dunlap <rdunlap@infradead.org>
Subject: [PATCH v2 06/20] docs: kdoc: use tokenizer to handle comments on
 structs
Date: Thu, 12 Mar 2026 08:12:14 +0100
Message-ID: 
 <37117386c8ec0918a1d3a967bd785827fa2c2a1f.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

Better handle comments inside structs. After those changes,
all unittests now pass:

  test_private:
    TestPublicPrivate:
        test balanced_inner_private:                                 OK
        test balanced_non_greddy_private:                            OK
        test balanced_private:                                       OK
        test no private:                                             OK
        test unbalanced_inner_private:                               OK
        test unbalanced_private:                                     OK
        test unbalanced_struct_group_tagged_with_private:            OK
        test unbalanced_two_struct_group_tagged_first_with_private:  OK
        test unbalanced_without_end_of_line:                         OK

  Ran 9 tests

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <f83ee9e8c38407eaab6ad10d4ccf155fb36683cc.1773074166.git.mcheha=
b+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/k=
doc_parser.py
index 4b3c555e6c8e..6b181ead3175 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -13,7 +13,7 @@ import sys
 import re
 from pprint import pformat
=20
-from kdoc.kdoc_re import NestedMatch, KernRe
+from kdoc.kdoc_re import NestedMatch, KernRe, CTokenizer
 from kdoc.kdoc_item import KdocItem
=20
 #
@@ -84,15 +84,9 @@ def trim_private_members(text):
     """
     Remove ``struct``/``enum`` members that have been marked "private".
     """
-    # First look for a "public:" block that ends a private region, then
-    # handle the "private until the end" case.
-    #
-    text =3D KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=3Dre.S)=
.sub('', text)
-    text =3D KernRe(r'/\*\s*private:.*', flags=3Dre.S).sub('', text)
-    #
-    # We needed the comments to do the above, but now we can take them out.
-    #
-    return KernRe(r'\s*/\*.*?\*/\s*', flags=3Dre.S).sub('', text).strip()
+
+    tokens =3D CTokenizer(text)
+    return str(tokens)
=20
 class state:
     """
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0A08F3AD52F;
	Thu, 12 Mar 2026 07:12:34 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299554; cv=none;
 b=EqUJFuuJb97rgEScTKOpu+lLEbg0Z4MfjgaQ5wliqVldKduc8Bsdb9gaBCqgCo1PDQUerWT/SwdKSp617OjXvWOPb6Z97Sl/YyXHhhFt92m0bz6TWPsurXlTmZupaFZcix9ylLZtOtsMmaNaiNZfR4mOIE1KCeQE+mG4D7DvvrM=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299554; c=relaxed/simple;
	bh=Iw4hGcMR++9b5HjvzcPazTJn71Wj6E8lsfP0l3hk0pA=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=msL2yFjp2QReGX7WZ7JC8cQyD7Me1gmyouX9tRop9L1qc7kOfsNRlKZYDNDB7CJw4+jWRNqT6vzBQvUBXslZ+OIj21PZHXrdDG5RQp7uFYIXvm4nuSp1jLnM6KQRpyfnc9X2GU6lrJjqzRlvTNyMvz8IkkjHYxttfdEFyniI0Dk=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=l3xI6Pkd; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="l3xI6Pkd"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 4EF07C116C6;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=Iw4hGcMR++9b5HjvzcPazTJn71Wj6E8lsfP0l3hk0pA=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=l3xI6PkdHWVBFa/8D4ngGu/oG+Ag2LfI/jfiEHXV7WT200eFfHwzGr/PoraQedkOf
	 +aUaDlEX2C6rpI70apDEZSLo6G6dKmzKOdMrx124Oz2i3Sq8S/eOvQ+9jkO+Fbqygb
	 pCfgAX5bBoVBIcg5qbnt9QbyjprkojJ0+MXIRbA5LjHavGy/d6xfwIUZ16B40JFbOD
	 kPNKQiqv5hXRPAS+lTtdZ0PvTQInE8ehRDKzVjlcEjoyxt1GSDEZeDI3vOjduxHpA9
	 oxrKiHKc55WBI2NzSD20RO29MCe4FuGJq8ZonNXES7ddijc+g6/2yYuamuarqUFmJK
	 lSdjRkQbeff3Q==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077gS-1zZC;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Aleksandr Loktionov <aleksandr.loktionov@intel.com>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Randy Dunlap <rdunlap@infradead.org>
Subject: [PATCH v2 07/20] docs: kdoc: move C Tokenizer to c_lex module
Date: Thu, 12 Mar 2026 08:12:15 +0100
Message-ID: 
 <6f325ef5c6be846c21b0c4df0f48bd0deeb236b0.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

Place the C tokenizer on a different module.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/c_lex.py       | 239 +++++++++++++++++++++++++++
 tools/lib/python/kdoc/kdoc_parser.py |   3 +-
 tools/lib/python/kdoc/kdoc_re.py     | 233 --------------------------
 3 files changed, 241 insertions(+), 234 deletions(-)
 create mode 100644 tools/lib/python/kdoc/c_lex.py

diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
new file mode 100644
index 000000000000..a104c29b63fb
--- /dev/null
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -0,0 +1,239 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+"""
+Regular expression ancillary classes.
+
+Those help caching regular expressions and do matching for kernel-doc.
+"""
+
+import re
+
+from .kdoc_re import KernRe
+
+class CToken():
+    """
+    Data class to define a C token.
+    """
+
+    # Tokens that can be used by the parser. Works like an C enum.
+
+    COMMENT =3D 0     #: A standard C or C99 comment, including delimiter.
+    STRING =3D 1      #: A string, including quotation marks.
+    CHAR =3D 2        #: A character, including apostophes.
+    NUMBER =3D 3      #: A number.
+    PUNC =3D 4        #: A puntuation mark: ``;`` / ``,`` / ``.``.
+    BEGIN =3D 5       #: A begin character: ``{`` / ``[`` / ``(``.
+    END =3D 6         #: A end character: ``}`` / ``]`` / ``)``.
+    CPP =3D 7         #: A preprocessor macro.
+    HASH =3D 8        #: The hash character - useful to handle other macro=
s.
+    OP =3D 9          #: A C operator (add, subtract, ...).
+    STRUCT =3D 10     #: A ``struct`` keyword.
+    UNION =3D 11      #: An ``union`` keyword.
+    ENUM =3D 12       #: A ``struct`` keyword.
+    TYPEDEF =3D 13    #: A ``typedef`` keyword.
+    NAME =3D 14       #: A name. Can be an ID or a type.
+    SPACE =3D 15      #: Any space characters, including new lines
+
+    MISMATCH =3D 255  #: an error indicator: should never happen in practi=
ce.
+
+    # Dict to convert from an enum interger into a string.
+    _name_by_val =3D {v: k for k, v in dict(vars()).items() if isinstance(=
v, int)}
+
+    # Dict to convert from string to an enum-like integer value.
+    _name_to_val =3D {k: v for v, k in _name_by_val.items()}
+
+    @staticmethod
+    def to_name(val):
+        """Convert from an integer value from CToken enum into a string"""
+
+        return CToken._name_by_val.get(val, f"UNKNOWN({val})")
+
+    @staticmethod
+    def from_name(name):
+        """Convert a string into a CToken enum value"""
+        if name in CToken._name_to_val:
+            return CToken._name_to_val[name]
+
+        return CToken.MISMATCH
+
+    def __init__(self, kind, value, pos,
+                 brace_level, paren_level, bracket_level):
+        self.kind =3D kind
+        self.value =3D value
+        self.pos =3D pos
+        self.brace_level =3D brace_level
+        self.paren_level =3D paren_level
+        self.bracket_level =3D bracket_level
+
+    def __repr__(self):
+        name =3D self.to_name(self.kind)
+        if isinstance(self.value, str):
+            value =3D '"' + self.value + '"'
+        else:
+            value =3D self.value
+
+        return f"CToken({name}, {value}, {self.pos}, " \
+               f"{self.brace_level}, {self.paren_level}, {self.bracket_lev=
el})"
+
+#: Tokens to parse C code.
+TOKEN_LIST =3D [
+    (CToken.COMMENT, r"//[^\n]*|/\*[\s\S]*?\*/"),
+
+    (CToken.STRING,  r'"(?:\\.|[^"\\])*"'),
+    (CToken.CHAR,    r"'(?:\\.|[^'\\])'"),
+
+    (CToken.NUMBER,  r"0[xX][0-9a-fA-F]+[uUlL]*|0[0-7]+[uUlL]*|"
+                     r"[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)?[fFlL]*"),
+
+    (CToken.PUNC,    r"[;,\.]"),
+
+    (CToken.BEGIN,   r"[\[\(\{]"),
+
+    (CToken.END,     r"[\]\)\}]"),
+
+    (CToken.CPP,     r"#\s*(define|include|ifdef|ifndef|if|else|elif|endif=
|undef|pragma)\b"),
+
+    (CToken.HASH,    r"#"),
+
+    (CToken.OP,      r"\+\+|\-\-|\->|=3D=3D|\!=3D|<=3D|>=3D|&&|\|\||<<|>>|=
\+=3D|\-=3D|\*=3D|/=3D|%=3D"
+                     r"|&=3D|\|=3D|\^=3D|=3D|\+|\-|\*|/|%|<|>|&|\||\^|~|!|=
\?|\:"),
+
+    (CToken.STRUCT,  r"\bstruct\b"),
+    (CToken.UNION,   r"\bunion\b"),
+    (CToken.ENUM,    r"\benum\b"),
+    (CToken.TYPEDEF, r"\bkinddef\b"),
+
+    (CToken.NAME,      r"[A-Za-z_][A-Za-z0-9_]*"),
+
+    (CToken.SPACE,   r"[\s]+"),
+
+    (CToken.MISMATCH,r"."),
+]
+
+#: Handle C continuation lines.
+RE_CONT =3D KernRe(r"\\\n")
+
+RE_COMMENT_START =3D KernRe(r'/\*\s*')
+
+#: tokenizer regex. Will be filled at the first CTokenizer usage.
+re_scanner =3D None
+
+class CTokenizer():
+    """
+    Scan C statements and definitions and produce tokens.
+
+    When converted to string, it drops comments and handle public/private
+    values, respecting depth.
+    """
+
+    # This class is inspired and follows the basic concepts of:
+    #   https://docs.python.org/3/library/re.html#writing-a-tokenizer
+
+    def _tokenize(self, source):
+        """
+        Interactor that parses ``source``, splitting it into tokens, as de=
fined
+        at ``self.TOKEN_LIST``.
+
+        The interactor returns a CToken class object.
+        """
+
+        # Handle continuation lines. Note that kdoc_parser already has a
+        # logic to do that. Still, let's keep it for completeness, as we m=
ight
+        # end re-using this tokenizer outsize kernel-doc some day - or we =
may
+        # eventually remove from there as a future cleanup.
+        source =3D RE_CONT.sub("", source)
+
+        brace_level =3D 0
+        paren_level =3D 0
+        bracket_level =3D 0
+
+        for match in re_scanner.finditer(source):
+            kind =3D CToken.from_name(match.lastgroup)
+            pos =3D match.start()
+            value =3D match.group()
+
+            if kind =3D=3D CToken.MISMATCH:
+                raise RuntimeError(f"Unexpected token '{value}' on {pos}:\=
n\t{source}")
+            elif kind =3D=3D CToken.BEGIN:
+                if value =3D=3D '(':
+                    paren_level +=3D 1
+                elif value =3D=3D '[':
+                    bracket_level +=3D 1
+                else:  # value =3D=3D '{'
+                    brace_level +=3D 1
+
+            elif kind =3D=3D CToken.END:
+                if value =3D=3D ')' and paren_level > 0:
+                    paren_level -=3D 1
+                elif value =3D=3D ']' and bracket_level > 0:
+                    bracket_level -=3D 1
+                elif brace_level > 0:    # value =3D=3D '}'
+                    brace_level -=3D 1
+
+            yield CToken(kind, value, pos,
+                         brace_level, paren_level, bracket_level)
+
+    def __init__(self, source):
+        """
+        Create a regular expression to handle TOKEN_LIST.
+
+        While I generally don't like using regex group naming via:
+            (?P<name>...)
+
+        in this particular case, it makes sense, as we can pick the name
+        when matching a code via re_scanner().
+        """
+        global re_scanner
+
+        if not re_scanner:
+            re_tokens =3D []
+
+            for kind, pattern in TOKEN_LIST:
+                name =3D CToken.to_name(kind)
+                re_tokens.append(f"(?P<{name}>{pattern})")
+
+            re_scanner =3D KernRe("|".join(re_tokens), re.MULTILINE | re.D=
OTALL)
+
+        self.tokens =3D []
+        for tok in self._tokenize(source):
+            self.tokens.append(tok)
+
+    def __str__(self):
+        out=3D""
+        show_stack =3D [True]
+
+        for tok in self.tokens:
+            if tok.kind =3D=3D CToken.BEGIN:
+                show_stack.append(show_stack[-1])
+
+            elif tok.kind =3D=3D CToken.END:
+                prev =3D show_stack[-1]
+                if len(show_stack) > 1:
+                    show_stack.pop()
+
+                if not prev and show_stack[-1]:
+                    #
+                    # Try to preserve indent
+                    #
+                    out +=3D "\t" * (len(show_stack) - 1)
+
+                    out +=3D str(tok.value)
+                    continue
+
+            elif tok.kind =3D=3D CToken.COMMENT:
+                comment =3D RE_COMMENT_START.sub("", tok.value)
+
+                if comment.startswith("private:"):
+                    show_stack[-1] =3D False
+                    show =3D False
+                elif comment.startswith("public:"):
+                    show_stack[-1] =3D True
+
+                continue
+
+            if show_stack[-1]:
+                    out +=3D str(tok.value)
+
+        return out
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/k=
doc_parser.py
index 6b181ead3175..e804e61b09c0 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -13,7 +13,8 @@ import sys
 import re
 from pprint import pformat
=20
-from kdoc.kdoc_re import NestedMatch, KernRe, CTokenizer
+from kdoc.kdoc_re import NestedMatch, KernRe
+from kdoc.c_lex import CTokenizer
 from kdoc.kdoc_item import KdocItem
=20
 #
diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_=
re.py
index 7bed4e9a8810..ba601a4f5035 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -141,239 +141,6 @@ class KernRe:
=20
         return self.last_match.groups()
=20
-class TokType():
-
-    @staticmethod
-    def __str__(val):
-        """Return the name of an enum value"""
-        return TokType._name_by_val.get(val, f"UNKNOWN({val})")
-
-class CToken():
-    """
-    Data class to define a C token.
-    """
-
-    # Tokens that can be used by the parser. Works like an C enum.
-
-    COMMENT =3D 0     #: A standard C or C99 comment, including delimiter.
-    STRING =3D 1      #: A string, including quotation marks.
-    CHAR =3D 2        #: A character, including apostophes.
-    NUMBER =3D 3      #: A number.
-    PUNC =3D 4        #: A puntuation mark: ``;`` / ``,`` / ``.``.
-    BEGIN =3D 5       #: A begin character: ``{`` / ``[`` / ``(``.
-    END =3D 6         #: A end character: ``}`` / ``]`` / ``)``.
-    CPP =3D 7         #: A preprocessor macro.
-    HASH =3D 8        #: The hash character - useful to handle other macro=
s.
-    OP =3D 9          #: A C operator (add, subtract, ...).
-    STRUCT =3D 10     #: A ``struct`` keyword.
-    UNION =3D 11      #: An ``union`` keyword.
-    ENUM =3D 12       #: A ``struct`` keyword.
-    TYPEDEF =3D 13    #: A ``typedef`` keyword.
-    NAME =3D 14       #: A name. Can be an ID or a type.
-    SPACE =3D 15      #: Any space characters, including new lines
-
-    MISMATCH =3D 255  #: an error indicator: should never happen in practi=
ce.
-
-    # Dict to convert from an enum interger into a string.
-    _name_by_val =3D {v: k for k, v in dict(vars()).items() if isinstance(=
v, int)}
-
-    # Dict to convert from string to an enum-like integer value.
-    _name_to_val =3D {k: v for v, k in _name_by_val.items()}
-
-    @staticmethod
-    def to_name(val):
-        """Convert from an integer value from CToken enum into a string"""
-
-        return CToken._name_by_val.get(val, f"UNKNOWN({val})")
-
-    @staticmethod
-    def from_name(name):
-        """Convert a string into a CToken enum value"""
-        if name in CToken._name_to_val:
-            return CToken._name_to_val[name]
-
-        return CToken.MISMATCH
-
-    def __init__(self, kind, value, pos,
-                 brace_level, paren_level, bracket_level):
-        self.kind =3D kind
-        self.value =3D value
-        self.pos =3D pos
-        self.brace_level =3D brace_level
-        self.paren_level =3D paren_level
-        self.bracket_level =3D bracket_level
-
-    def __repr__(self):
-        name =3D self.to_name(self.kind)
-        if isinstance(self.value, str):
-            value =3D '"' + self.value + '"'
-        else:
-            value =3D self.value
-
-        return f"CToken({name}, {value}, {self.pos}, " \
-               f"{self.brace_level}, {self.paren_level}, {self.bracket_lev=
el})"
-
-#: Tokens to parse C code.
-TOKEN_LIST =3D [
-    (CToken.COMMENT, r"//[^\n]*|/\*[\s\S]*?\*/"),
-
-    (CToken.STRING,  r'"(?:\\.|[^"\\])*"'),
-    (CToken.CHAR,    r"'(?:\\.|[^'\\])'"),
-
-    (CToken.NUMBER,  r"0[xX][0-9a-fA-F]+[uUlL]*|0[0-7]+[uUlL]*|"
-                     r"[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)?[fFlL]*"),
-
-    (CToken.PUNC,    r"[;,\.]"),
-
-    (CToken.BEGIN,   r"[\[\(\{]"),
-
-    (CToken.END,     r"[\]\)\}]"),
-
-    (CToken.CPP,     r"#\s*(define|include|ifdef|ifndef|if|else|elif|endif=
|undef|pragma)\b"),
-
-    (CToken.HASH,    r"#"),
-
-    (CToken.OP,      r"\+\+|\-\-|\->|=3D=3D|\!=3D|<=3D|>=3D|&&|\|\||<<|>>|=
\+=3D|\-=3D|\*=3D|/=3D|%=3D"
-                     r"|&=3D|\|=3D|\^=3D|=3D|\+|\-|\*|/|%|<|>|&|\||\^|~|!|=
\?|\:"),
-
-    (CToken.STRUCT,  r"\bstruct\b"),
-    (CToken.UNION,   r"\bunion\b"),
-    (CToken.ENUM,    r"\benum\b"),
-    (CToken.TYPEDEF, r"\bkinddef\b"),
-
-    (CToken.NAME,      r"[A-Za-z_][A-Za-z0-9_]*"),
-
-    (CToken.SPACE,   r"[\s]+"),
-
-    (CToken.MISMATCH,r"."),
-]
-
-#: Handle C continuation lines.
-RE_CONT =3D KernRe(r"\\\n")
-
-RE_COMMENT_START =3D KernRe(r'/\*\s*')
-
-#: tokenizer regex. Will be filled at the first CTokenizer usage.
-re_scanner =3D None
-
-class CTokenizer():
-    """
-    Scan C statements and definitions and produce tokens.
-
-    When converted to string, it drops comments and handle public/private
-    values, respecting depth.
-    """
-
-    # This class is inspired and follows the basic concepts of:
-    #   https://docs.python.org/3/library/re.html#writing-a-tokenizer
-
-    def _tokenize(self, source):
-        """
-        Interactor that parses ``source``, splitting it into tokens, as de=
fined
-        at ``self.TOKEN_LIST``.
-
-        The interactor returns a CToken class object.
-        """
-
-        # Handle continuation lines. Note that kdoc_parser already has a
-        # logic to do that. Still, let's keep it for completeness, as we m=
ight
-        # end re-using this tokenizer outsize kernel-doc some day - or we =
may
-        # eventually remove from there as a future cleanup.
-        source =3D RE_CONT.sub("", source)
-
-        brace_level =3D 0
-        paren_level =3D 0
-        bracket_level =3D 0
-
-        for match in re_scanner.finditer(source):
-            kind =3D CToken.from_name(match.lastgroup)
-            pos =3D match.start()
-            value =3D match.group()
-
-            if kind =3D=3D CToken.MISMATCH:
-                raise RuntimeError(f"Unexpected token '{value}' on {pos}:\=
n\t{source}")
-            elif kind =3D=3D CToken.BEGIN:
-                if value =3D=3D '(':
-                    paren_level +=3D 1
-                elif value =3D=3D '[':
-                    bracket_level +=3D 1
-                else:  # value =3D=3D '{'
-                    brace_level +=3D 1
-
-            elif kind =3D=3D CToken.END:
-                if value =3D=3D ')' and paren_level > 0:
-                    paren_level -=3D 1
-                elif value =3D=3D ']' and bracket_level > 0:
-                    bracket_level -=3D 1
-                elif brace_level > 0:    # value =3D=3D '}'
-                    brace_level -=3D 1
-
-            yield CToken(kind, value, pos,
-                         brace_level, paren_level, bracket_level)
-
-    def __init__(self, source):
-        """
-        Create a regular expression to handle TOKEN_LIST.
-
-        While I generally don't like using regex group naming via:
-            (?P<name>...)
-
-        in this particular case, it makes sense, as we can pick the name
-        when matching a code via re_scanner().
-        """
-        global re_scanner
-
-        if not re_scanner:
-            re_tokens =3D []
-
-            for kind, pattern in TOKEN_LIST:
-                name =3D CToken.to_name(kind)
-                re_tokens.append(f"(?P<{name}>{pattern})")
-
-            re_scanner =3D KernRe("|".join(re_tokens), re.MULTILINE | re.D=
OTALL)
-
-        self.tokens =3D []
-        for tok in self._tokenize(source):
-            self.tokens.append(tok)
-
-    def __str__(self):
-        out=3D""
-        show_stack =3D [True]
-
-        for tok in self.tokens:
-            if tok.kind =3D=3D CToken.BEGIN:
-                show_stack.append(show_stack[-1])
-
-            elif tok.kind =3D=3D CToken.END:
-                prev =3D show_stack[-1]
-                if len(show_stack) > 1:
-                    show_stack.pop()
-
-                if not prev and show_stack[-1]:
-                    #
-                    # Try to preserve indent
-                    #
-                    out +=3D "\t" * (len(show_stack) - 1)
-
-                    out +=3D str(tok.value)
-                    continue
-
-            elif tok.kind =3D=3D CToken.COMMENT:
-                comment =3D RE_COMMENT_START.sub("", tok.value)
-
-                if comment.startswith("private:"):
-                    show_stack[-1] =3D False
-                    show =3D False
-                elif comment.startswith("public:"):
-                    show_stack[-1] =3D True
-
-                continue
-
-            if show_stack[-1]:
-                    out +=3D str(tok.value)
-
-        return out
-
=20
 #: Nested delimited pairs (brackets and parenthesis)
 DELIMITER_PAIRS =3D {
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id ADAE0375F78;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299553; cv=none;
 b=tkkjv8FCoqBVLWOuqS2RdEMR6iDdeaRxkkDdXpU+5cuLNfL0FeS27if4wBY2Jxzr/5MTMmNTd/a7Mteq6rfyatrqdrmkGsOYayQTn7s89i8GsxvW63HtxfX4Q3US6U4SurSFsHzgl5ZmSx9aNnPAc5wF7rEFsMqGGXtEmrZ3o18=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299553; c=relaxed/simple;
	bh=GfkGEBCrWYbAWOv2b+GqnMJXIitm+in5/jhJ5VsTDNc=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=mrrfgaIS5mjbPiqHkxKQC27oVCj232c9XvlAtbWettIOH4CRCIct/5gV5Rv+B6CpyE+fACadELYQbVR9oyVIo7gPXo0Fuzw+TH6mNBjJQ3h98qRMwra+3UsSO/RQXqPIgtIjE5RE6I8+BUcD3Tq+q6KJZVKev8ByXE7VR19lHl8=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=exeJ/KkF; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="exeJ/KkF"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 50D07C4AF09;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=GfkGEBCrWYbAWOv2b+GqnMJXIitm+in5/jhJ5VsTDNc=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=exeJ/KkFlke8EuCjetR+4XhTRz1RnUpXnB5etMyQxdguPpfXwY0yfVT1TO6AqIZTP
	 AagBrDxBzMNUlbuz/6f4yvwx/ChgA6v0B/1PNSIhjBBsnWRE3AwA/hEsBJ04HK2AXC
	 meI9j8DyyS14K98c4Mjtbtf0UBOKDtxiWwR+viqzVvPevvd59eB2MPWWq1FZQphATg
	 Q0pBHySrxVV0ArvkBpDqLMHImfu6XUdCX0ZDW/44WHQAYwNr2mIc0kqnMK5YSkHSbC
	 7NALyOvRhi/VvFCcm58C0ap3izhcwroLvRnaxgtlVvkgpzRfq0tVsyTRVQvZNs3YGc
	 ONlA1+C9aiLww==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077gW-26qb;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Mauro Carvalho Chehab <mchehab@kernel.org>
Subject: [PATCH v2 08/20] unittests: test_private: modify it to use CTokenizer
 directly
Date: Thu, 12 Mar 2026 08:12:16 +0100
Message-ID: 
 <eeb01b2c5e7f6f67a4864f0361a1ba7ad58e3747.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

Change the logic to use the tokenizer directly. This allows
adding more unit tests to check the validty of the tokenizer
itself.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <2672257233ff73a9464c09b50924be51e25d4f59.1773074166.git.mcheha=
b+huawei@kernel.org>
---
 .../{test_private.py =3D> test_tokenizer.py}    | 76 +++++++++++++------
 1 file changed, 52 insertions(+), 24 deletions(-)
 rename tools/unittests/{test_private.py =3D> test_tokenizer.py} (85%)

diff --git a/tools/unittests/test_private.py b/tools/unittests/test_tokeniz=
er.py
similarity index 85%
rename from tools/unittests/test_private.py
rename to tools/unittests/test_tokenizer.py
index eae245ae8a12..da0f2c4c9e21 100755
--- a/tools/unittests/test_private.py
+++ b/tools/unittests/test_tokenizer.py
@@ -15,20 +15,44 @@ from unittest.mock import MagicMock
 SRC_DIR =3D os.path.dirname(os.path.realpath(__file__))
 sys.path.insert(0, os.path.join(SRC_DIR, "../lib/python"))
=20
-from kdoc.kdoc_parser import trim_private_members
+from kdoc.kdoc_re import CTokenizer
 from unittest_helper import run_unittest
=20
+
+
 #
 # List of tests.
 #
 # The code will dynamically generate one test for each key on this diction=
ary.
 #
=20
+def make_private_test(name, data):
+    """
+    Create a test named ``name`` using parameters given by ``data`` dict.
+    """
+
+    def test(self):
+        """In-lined lambda-like function to run the test"""
+        tokens =3D CTokenizer(data["source"])
+        result =3D str(tokens)
+
+        #
+        # Avoid whitespace false positives
+        #
+        result =3D re.sub(r"\s++", " ", result).strip()
+        expected =3D re.sub(r"\s++", " ", data["trimmed"]).strip()
+
+        msg =3D f"failed when parsing this source:\n{data['source']}"
+        self.assertEqual(result, expected, msg=3Dmsg)
+
+    return test
+
 #: Tests to check if CTokenizer is handling properly public/private commen=
ts.
 TESTS_PRIVATE =3D {
     #
     # Simplest case: no private. Ensure that trimming won't affect struct
     #
+    "__run__": make_private_test,
     "no private": {
         "source": """
             struct foo {
@@ -288,41 +312,45 @@ TESTS_PRIVATE =3D {
     },
 }
=20
+#: Dict containing all test groups fror CTokenizer
+TESTS =3D {
+    "TestPublicPrivate": TESTS_PRIVATE,
+}
=20
-class TestPublicPrivate(unittest.TestCase):
-    """
-    Main test class. Populated dynamically at runtime.
-    """
+def setUp(self):
+    self.maxDiff =3D None
=20
-    def setUp(self):
-        self.maxDiff =3D None
+def build_test_class(group_name, table):
+    """
+    Dynamically creates a class instance using type() as a generator
+    for a new class derivated from unittest.TestCase.
=20
-    def add_test(cls, name, source, trimmed):
-        """
-        Dynamically add a test to the class
-        """
-        def test(cls):
-            result =3D trim_private_members(source)
+    We're opting to do it inside a function to avoid the risk of
+    changing the globals() dictionary.
+    """
=20
-            result =3D re.sub(r"\s++", " ", result).strip()
-            expected =3D re.sub(r"\s++", " ", trimmed).strip()
+    class_dict =3D {
+        "setUp": setUp
+    }
=20
-            msg =3D f"failed when parsing this source:\n" + source
+    run =3D table["__run__"]
=20
-            cls.assertEqual(result, expected, msg=3Dmsg)
+    for test_name, data in table.items():
+        if test_name =3D=3D "__run__":
+            continue
=20
-        test.__name__ =3D f'test {name}'
+        class_dict[f"test_{test_name}"] =3D run(test_name, data)
=20
-        setattr(TestPublicPrivate, test.__name__, test)
+    cls =3D type(group_name, (unittest.TestCase,), class_dict)
=20
+    return cls.__name__, cls
=20
 #
-# Populate TestPublicPrivate class
+# Create classes and add them to the global dictionary
 #
-test_class =3D TestPublicPrivate()
-for name, test in TESTS_PRIVATE.items():
-    test_class.add_test(name, test["source"], test["trimmed"])
-
+for group, table in TESTS.items():
+    t =3D build_test_class(group, table)
+    globals()[t[0]] =3D t[1]
=20
 #
 # main
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id AD9AF358389;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299553; cv=none;
 b=amQEU19erHRzwB3qXJoc0z0VVhI3BTFIm64o3/2SBRZyA9kTscvluAeZw+iK/oaN2gsJCEv3CZiMRGU5zgHDbCjLRQ2Mqw+NTNW4CsVmTj1/Si+gfFk84yE92sdt4KCF6GKmA1U5tM5bMb6AHwQgTQQyJJuC3PnwBcZ+g67hxFA=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299553; c=relaxed/simple;
	bh=PBAIltyp/3ki4zWgumnkgvMbL5HEay2TKxHmsD6xLjo=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=BP9n4s7tXn1igx6FRF8d2RtzS7+F0D6viHmDZtmBP/qET2Mui1FNPj+2MGTFEcGl/zYsMHYA34Y4Gi/ijnbzIpiLGu9f5ZhplWH60KyLJoKZ4k7se7sTP6Fp/0j/GhpQEIbiNqlbucDmzgqS/rUCJdtXqj01UnNb1SRcxXouLBA=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=f56AkCuL; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="f56AkCuL"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 55455C2BC87;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=PBAIltyp/3ki4zWgumnkgvMbL5HEay2TKxHmsD6xLjo=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=f56AkCuLttQwji7AenyaHJwygBufbLaJkHjnk3UTnr6cJa2OCYNR424ZrLvFZG661
	 cG4BZ5F2kLLxwAHPfIYiN0i5l+uiWho1+GVSHwKzT774ZKFUuyCJYpxF2bHbThsf/q
	 E/IfFN856SABMjR8eGQkQzQaUg7ILvqGA+ZcEyhXwWmoQiRGyguoSnjL8fyxgkJTLR
	 OdBEz+uY7jF+9mUrYBggGVmusKHBHwmr9ijrbXwWGdAkqTIcwRW8a6z5bCVpAtpkG8
	 64cPWU0hl4Xx56XLnZvfCI5MUFIVTBEYlQtNk1Dm6q5exKIiM1/dyZaiIN+Kk4JSJB
	 mmIoh6jRcNZyQ==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077ga-2Dsr;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Mauro Carvalho Chehab <mchehab@kernel.org>
Subject: [PATCH v2 09/20] unittests: test_tokenizer: check if the tokenizer
 works
Date: Thu, 12 Mar 2026 08:12:17 +0100
Message-ID: 
 <6afe36c248f0f9280ce0ed456d878a1f718794a5.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

Add extra tests to check if the tokenizer is working properly.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/c_lex.py    |   4 +-
 tools/unittests/test_tokenizer.py | 109 +++++++++++++++++++++++++++++-
 2 files changed, 108 insertions(+), 5 deletions(-)

diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
index a104c29b63fb..38f70e836eb8 100644
--- a/tools/lib/python/kdoc/c_lex.py
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -58,8 +58,8 @@ class CToken():
=20
         return CToken.MISMATCH
=20
-    def __init__(self, kind, value, pos,
-                 brace_level, paren_level, bracket_level):
+    def __init__(self, kind, value=3DNone, pos=3D0,
+                 brace_level=3D0, paren_level=3D0, bracket_level=3D0):
         self.kind =3D kind
         self.value =3D value
         self.pos =3D pos
diff --git a/tools/unittests/test_tokenizer.py b/tools/unittests/test_token=
izer.py
index da0f2c4c9e21..efb1d1687811 100755
--- a/tools/unittests/test_tokenizer.py
+++ b/tools/unittests/test_tokenizer.py
@@ -15,16 +15,118 @@ from unittest.mock import MagicMock
 SRC_DIR =3D os.path.dirname(os.path.realpath(__file__))
 sys.path.insert(0, os.path.join(SRC_DIR, "../lib/python"))
=20
-from kdoc.kdoc_re import CTokenizer
+from kdoc.c_lex import CToken, CTokenizer
 from unittest_helper import run_unittest
=20
-
-
 #
 # List of tests.
 #
 # The code will dynamically generate one test for each key on this diction=
ary.
 #
+def tokens_to_list(tokens):
+    tuples =3D []
+
+    for tok in tokens:
+        if tok.kind =3D=3D CToken.SPACE:
+            continue
+
+        tuples +=3D [(tok.kind, tok.value,
+                    tok.brace_level, tok.paren_level, tok.bracket_level)]
+
+    return tuples
+
+
+def make_tokenizer_test(name, data):
+    """
+    Create a test named ``name`` using parameters given by ``data`` dict.
+    """
+
+    def test(self):
+        """In-lined lambda-like function to run the test"""
+
+        #
+        # Check if exceptions are properly handled
+        #
+        if "raises" in data:
+            with self.assertRaises(data["raises"]):
+                CTokenizer(data["source"])
+            return
+
+        #
+        # Check if tokenizer is producing expected results
+        #
+        tokens =3D CTokenizer(data["source"]).tokens
+
+        result =3D tokens_to_list(tokens)
+        expected =3D tokens_to_list(data["expected"])
+
+        self.assertEqual(result, expected, msg=3Df"{name}")
+
+    return test
+
+#: Tokenizer tests.
+TESTS_TOKENIZER =3D {
+    "__run__": make_tokenizer_test,
+
+    "basic_tokens": {
+        "source": """
+            int a; // comment
+            float b =3D 1.23;
+        """,
+        "expected": [
+            CToken(CToken.NAME, "int"),
+            CToken(CToken.NAME, "a"),
+            CToken(CToken.PUNC, ";"),
+            CToken(CToken.COMMENT, "// comment"),
+            CToken(CToken.NAME, "float"),
+            CToken(CToken.NAME, "b"),
+            CToken(CToken.OP, "=3D"),
+            CToken(CToken.NUMBER, "1.23"),
+            CToken(CToken.PUNC, ";"),
+        ],
+    },
+
+    "depth_counters": {
+        "source": """
+            struct X {
+                int arr[10];
+                func(a[0], (b + c));
+            }
+        """,
+        "expected": [
+            CToken(CToken.STRUCT, "struct"),
+            CToken(CToken.NAME, "X"),
+            CToken(CToken.BEGIN, "{", brace_level=3D1),
+
+            CToken(CToken.NAME, "int", brace_level=3D1),
+            CToken(CToken.NAME, "arr", brace_level=3D1),
+            CToken(CToken.BEGIN, "[", brace_level=3D1, bracket_level=3D1),
+            CToken(CToken.NUMBER, "10", brace_level=3D1, bracket_level=3D1=
),
+            CToken(CToken.END, "]", brace_level=3D1),
+            CToken(CToken.PUNC, ";", brace_level=3D1),
+            CToken(CToken.NAME, "func", brace_level=3D1),
+            CToken(CToken.BEGIN, "(", brace_level=3D1, paren_level=3D1),
+            CToken(CToken.NAME, "a", brace_level=3D1, paren_level=3D1),
+            CToken(CToken.BEGIN, "[", brace_level=3D1, paren_level=3D1, br=
acket_level=3D1),
+            CToken(CToken.NUMBER, "0", brace_level=3D1, paren_level=3D1, b=
racket_level=3D1),
+            CToken(CToken.END, "]", brace_level=3D1, paren_level=3D1),
+            CToken(CToken.PUNC, ",", brace_level=3D1, paren_level=3D1),
+            CToken(CToken.BEGIN, "(", brace_level=3D1, paren_level=3D2),
+            CToken(CToken.NAME, "b", brace_level=3D1, paren_level=3D2),
+            CToken(CToken.OP, "+", brace_level=3D1, paren_level=3D2),
+            CToken(CToken.NAME, "c", brace_level=3D1, paren_level=3D2),
+            CToken(CToken.END, ")", brace_level=3D1, paren_level=3D1),
+            CToken(CToken.END, ")", brace_level=3D1),
+            CToken(CToken.PUNC, ";", brace_level=3D1),
+            CToken(CToken.END, "}"),
+        ],
+    },
+
+    "mismatch_error": {
+        "source": "int a$ =3D 5;",          # $ is illegal
+        "raises": RuntimeError,
+    },
+}
=20
 def make_private_test(name, data):
     """
@@ -315,6 +417,7 @@ TESTS_PRIVATE =3D {
 #: Dict containing all test groups fror CTokenizer
 TESTS =3D {
     "TestPublicPrivate": TESTS_PRIVATE,
+    "TestTokenizer": TESTS_TOKENIZER,
 }
=20
 def setUp(self):
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id DF2303AD506;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299553; cv=none;
 b=qkoMlkBq4W/qp4DxhrL0PyQF+YLQ1dsuo1lT+nLqSjgUYmvLYZ86I2JD+duNXdR1u11aoGA1mz9mm48422MG1Yc2hkNkW1BidAlwH//8vUgQT6bR7kzVR4IbhLLBRQDhLrkansGOs/sGwgGguhc8Nkp4BJsymK2JpK+TUDzXd6Q=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299553; c=relaxed/simple;
	bh=W96OAjKB97J1xqMhi+cvTRiL2/QBqbUoTe3vi3d5Tzg=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=hlK6HFnX5m6zR0JFHBbfyBxF2yVBSyxP8chpvA32gWNBpV0zYX9QOge/pSPy5rgOE8ILTn41MRU8yfJn4GAtm3N/Hsj010lSxTlizjSUT/mdti4kb18DuYpOzTHat3a1AIxPvwFOnmqHBFRkGzEfJPkcd9DRcIN6WsoGhXaktAU=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=f3yLNonJ; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="f3yLNonJ"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 693DAC2BCB4;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=W96OAjKB97J1xqMhi+cvTRiL2/QBqbUoTe3vi3d5Tzg=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=f3yLNonJjnVHSI8xp/o9CqQfdBeRZL0XpKAhBwJpO+BFSU5s/C1zMyWhkq1XrtThR
	 0f8SoE6nIgSIxzJVQvBrN7soi5yP4fb/452HtZo2whWdMhinWRxvTKowFlQZc5DZcx
	 7/6XuAL6TEln5iKuTRyIHxmhgvWZIpPTyIwK4Djy974dD10FXJoFsxE/am7FTkNzbn
	 b2dH6+RODjhTU9c0P1GZ4oP/fVI1alfO8TGr1kTHLY7Du7oJ7czUbyNQqHoNdYVHNF
	 DecJVahFNXKHnP2cWAsTJLk9RUSpnvy22kfvEtUQ/uN1Oa2SMmrvi1K3D/WB/e1UNP
	 OPcetQ0C/KO4g==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077ge-2LMt;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Mauro Carvalho Chehab <mchehab@kernel.org>
Subject: [PATCH v2 10/20] unittests: add a runner to execute all unittests
Date: Thu, 12 Mar 2026 08:12:18 +0100
Message-ID: 
 <fb0bcc317d30200b0e91a3c18150e7394844b6d8.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

We'll soon have multiple unit tests, add a runner that will
discover all of them and execute all tests.

It was opted to discover only files that starts with "test",
as this way unittest discover won't try adding libraries or
other stuff that might not contain unittest classes.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/unittests/run.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100755 tools/unittests/run.py

diff --git a/tools/unittests/run.py b/tools/unittests/run.py
new file mode 100755
index 000000000000..8c19036d43a1
--- /dev/null
+++ b/tools/unittests/run.py
@@ -0,0 +1,17 @@
+#!/bin/env python3
+import os
+import unittest
+import sys
+
+TOOLS_DIR=3Dos.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
+sys.path.insert(0, TOOLS_DIR)
+
+from lib.python.unittest_helper import TestUnits
+
+if __name__ =3D=3D "__main__":
+    loader =3D unittest.TestLoader()
+
+    suite =3D loader.discover(start_dir=3Dos.path.join(TOOLS_DIR, "unittes=
ts"),
+                            pattern=3D"test*.py")
+
+    TestUnits().run("", suite=3Dsuite)
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id DED7A3AD505;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299553; cv=none;
 b=n4XyHCsortbtZ9bQrwhuoL76XoB6aHQAgVl3lBeSiJbKviEFUMIQOI4hARy0Vj4zcC1YQo5ZrLOsqldWdAtIouH9UvkAvD8Lca4m7+1ysPYkwvhlft1vNTaHycNRPDMs6Z4VYwBUTzvh04LmpL8tHDMaqB0//PltsgHepPKYLO0=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299553; c=relaxed/simple;
	bh=jMjtadqVu+g0IuJqU6X2U6Jm2wUokvvloT0ZwKYQV5c=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=Rbs0djCcTYue9CMavjuHs65kWsec9SIaqwDchM0Yec200Ua16uOONX65AejRzMUxgzHnMufukSM83KdQ/syGVcYEhH7u9iuXg/2L86lEwNqW+yukdclp1UHi6iKXfJTM0x8nWNEfF/4s9Nw6rAWTiOs6aWTIV/rqMxZX6GdMkP0=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=fpfVBA7m; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="fpfVBA7m"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 61EA4C19424;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=jMjtadqVu+g0IuJqU6X2U6Jm2wUokvvloT0ZwKYQV5c=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=fpfVBA7mtXCDygsA6sOa5Rv0B+aL8HWyR4QLieSOvYQfRaFDs9YHLLwUPCI/UmKAv
	 58zSQlXJL+5mN0rlytcZkx6KJ5kkLVBxgxCIRzLAUCcEB6vKDvrzwq6lyFlbobv6bg
	 VWUx08PrJ1AJBfIR9Og7L0r9NGk/f9BcZ8yAdM89nUBtsyiQ9jEqIMVl6qing1nuPT
	 hWKjfRvsv+Urlyc/fI1nCzeW/5TAVQDVV3YiZW4xlvhVMHaKUWRejjhqJGBrmcymgF
	 juISIWhGUzwL2WEMnVmgptlBrQjGet3tHJfuLk8biKLVX+66cEy1vcEe45276yy9vS
	 hJrEntptKJ07Q==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077gi-2SJp;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Mauro Carvalho Chehab <mchehab@kernel.org>
Subject: [PATCH v2 11/20] docs: kdoc: create a CMatch to match nested C blocks
Date: Thu, 12 Mar 2026 08:12:19 +0100
Message-ID: 
 <06d21e2c38a313aec8f7c8a6df4674c41c47c23b.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

The NextMatch code is complex, and will become even more complex
if we add there support for arguments.

Now that we have a tokenizer, we can use a better solution,
easier to be understood.

Yet, to improve performance, it is better to make it use a
previously tokenized code, changing its ABI.

So, reimplement NextMatch using the CTokener class. Once it
is done, we can drop NestedMatch.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/c_lex.py    | 222 +++++++++++++++++++++++++++---
 tools/unittests/test_tokenizer.py |   3 +-
 2 files changed, 203 insertions(+), 22 deletions(-)

diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
index 38f70e836eb8..e986a4ad73e3 100644
--- a/tools/lib/python/kdoc/c_lex.py
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -58,14 +58,13 @@ class CToken():
=20
         return CToken.MISMATCH
=20
+
     def __init__(self, kind, value=3DNone, pos=3D0,
                  brace_level=3D0, paren_level=3D0, bracket_level=3D0):
         self.kind =3D kind
         self.value =3D value
         self.pos =3D pos
-        self.brace_level =3D brace_level
-        self.paren_level =3D paren_level
-        self.bracket_level =3D bracket_level
+        self.level =3D (bracket_level, paren_level, brace_level)
=20
     def __repr__(self):
         name =3D self.to_name(self.kind)
@@ -74,8 +73,7 @@ class CToken():
         else:
             value =3D self.value
=20
-        return f"CToken({name}, {value}, {self.pos}, " \
-               f"{self.brace_level}, {self.paren_level}, {self.bracket_lev=
el})"
+        return f"CToken(CToken.{name}, {value}, {self.pos}, {self.level})"
=20
 #: Tokens to parse C code.
 TOKEN_LIST =3D [
@@ -105,20 +103,30 @@ TOKEN_LIST =3D [
     (CToken.ENUM,    r"\benum\b"),
     (CToken.TYPEDEF, r"\bkinddef\b"),
=20
-    (CToken.NAME,      r"[A-Za-z_][A-Za-z0-9_]*"),
+    (CToken.NAME,    r"[A-Za-z_][A-Za-z0-9_]*"),
=20
     (CToken.SPACE,   r"[\s]+"),
=20
     (CToken.MISMATCH,r"."),
 ]
=20
+def fill_re_scanner(token_list):
+    """Ancillary routine to convert TOKEN_LIST into a finditer regex"""
+    re_tokens =3D []
+
+    for kind, pattern in token_list:
+        name =3D CToken.to_name(kind)
+        re_tokens.append(f"(?P<{name}>{pattern})")
+
+    return KernRe("|".join(re_tokens), re.MULTILINE | re.DOTALL)
+
 #: Handle C continuation lines.
 RE_CONT =3D KernRe(r"\\\n")
=20
 RE_COMMENT_START =3D KernRe(r'/\*\s*')
=20
 #: tokenizer regex. Will be filled at the first CTokenizer usage.
-re_scanner =3D None
+RE_SCANNER =3D fill_re_scanner(TOKEN_LIST)
=20
 class CTokenizer():
     """
@@ -149,7 +157,7 @@ class CTokenizer():
         paren_level =3D 0
         bracket_level =3D 0
=20
-        for match in re_scanner.finditer(source):
+        for match in RE_SCANNER.finditer(source):
             kind =3D CToken.from_name(match.lastgroup)
             pos =3D match.start()
             value =3D match.group()
@@ -175,7 +183,7 @@ class CTokenizer():
             yield CToken(kind, value, pos,
                          brace_level, paren_level, bracket_level)
=20
-    def __init__(self, source):
+    def __init__(self, source=3DNone):
         """
         Create a regular expression to handle TOKEN_LIST.
=20
@@ -183,20 +191,18 @@ class CTokenizer():
             (?P<name>...)
=20
         in this particular case, it makes sense, as we can pick the name
-        when matching a code via re_scanner().
+        when matching a code via RE_SCANNER.
         """
-        global re_scanner
-
-        if not re_scanner:
-            re_tokens =3D []
-
-            for kind, pattern in TOKEN_LIST:
-                name =3D CToken.to_name(kind)
-                re_tokens.append(f"(?P<{name}>{pattern})")
-
-            re_scanner =3D KernRe("|".join(re_tokens), re.MULTILINE | re.D=
OTALL)
=20
         self.tokens =3D []
+
+        if not source:
+            return
+
+        if isinstance(source, list):
+            self.tokens =3D source
+            return
+
         for tok in self._tokenize(source):
             self.tokens.append(tok)
=20
@@ -237,3 +243,179 @@ class CTokenizer():
                     out +=3D str(tok.value)
=20
         return out
+
+
+class CMatch:
+    """
+    Finding nested delimiters is hard with regular expressions. It is
+    even harder on Python with its normal re module, as there are several
+    advanced regular expressions that are missing.
+
+    This is the case of this pattern::
+
+            '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
+
+    which is used to properly match open/close parentheses of the
+    string search STRUCT_GROUP(),
+
+    Add a class that counts pairs of delimiters, using it to match and
+    replace nested expressions.
+
+    The original approach was suggested by:
+
+        https://stackoverflow.com/questions/5454322/python-how-to-match-ne=
sted-parentheses-with-regex
+
+    Although I re-implemented it to make it more generic and match 3 types
+    of delimiters. The logic checks if delimiters are paired. If not, it
+    will ignore the search string.
+    """
+
+    # TODO: make CMatch handle multiple match groups
+    #
+    # Right now, regular expressions to match it are defined only up to
+    #       the start delimiter, e.g.:
+    #
+    #       \bSTRUCT_GROUP\(
+    #
+    # is similar to: STRUCT_GROUP\((.*)\)
+    # except that the content inside the match group is delimiter-aligned.
+    #
+    # The content inside parentheses is converted into a single replace
+    # group (e.g. r`\0').
+    #
+    # It would be nice to change such definition to support multiple
+    # match groups, allowing a regex equivalent to:
+    #
+    #   FOO\((.*), (.*), (.*)\)
+    #
+    # it is probably easier to define it not as a regular expression, but
+    # with some lexical definition like:
+    #
+    #   FOO(arg1, arg2, arg3)
+
+    def __init__(self, regex):
+        self.regex =3D KernRe(regex)
+
+    def _search(self, tokenizer):
+        """
+        Finds paired blocks for a regex that ends with a delimiter.
+
+        The suggestion of using finditer to match pairs came from:
+        https://stackoverflow.com/questions/5454322/python-how-to-match-ne=
sted-parentheses-with-regex
+        but I ended using a different implementation to align all three ty=
pes
+        of delimiters and seek for an initial regular expression.
+
+        The algorithm seeks for open/close paired delimiters and places th=
em
+        into a stack, yielding a start/stop position of each match when the
+        stack is zeroed.
+
+        The algorithm should work fine for properly paired lines, but will
+        silently ignore end delimiters that precede a start delimiter.
+        This should be OK for kernel-doc parser, as unaligned delimiters
+        would cause compilation errors. So, we don't need to raise excepti=
ons
+        to cover such issues.
+        """
+
+        start =3D None
+        offset =3D -1
+        started =3D False
+
+        import sys
+
+        stack =3D []
+
+        for i, tok in enumerate(tokenizer.tokens):
+            if start is None:
+                if tok.kind =3D=3D CToken.NAME and self.regex.match(tok.va=
lue):
+                    start =3D i
+                    stack.append((start, tok.level))
+                    started =3D False
+
+                continue
+
+            if not started and tok.kind =3D=3D CToken.BEGIN:
+                started =3D True
+                continue
+
+            if tok.kind =3D=3D CToken.END and tok.level =3D=3D stack[-1][1=
]:
+                start, level =3D stack.pop()
+                offset =3D i
+
+                yield CTokenizer(tokenizer.tokens[start:offset + 1])
+                start =3D None
+
+        #
+        # If an END zeroing levels is not there, return remaining stuff
+        # This is meant to solve cases where the caller logic might be
+        # picking an incomplete block.
+        #
+        if start and offset < 0:
+            print("WARNING: can't find an end", file=3Dsys.stderr)
+            yield CTokenizer(tokenizer.tokens[start:])
+
+    def search(self, source):
+        """
+        This is similar to re.search:
+
+        It matches a regex that it is followed by a delimiter,
+        returning occurrences only if all delimiters are paired.
+        """
+
+        if isinstance(source, CTokenizer):
+            tokenizer =3D source
+            is_token =3D True
+        else:
+            tokenizer =3D CTokenizer(source)
+            is_token =3D False
+
+        for new_tokenizer in self._search(tokenizer):
+            if is_token:
+                yield new_tokenizer
+            else:
+                yield str(new_tokenizer)
+
+    def sub(self, sub, line, count=3D0):
+        """
+        This is similar to re.sub:
+
+        It matches a regex that it is followed by a delimiter,
+        replacing occurrences only if all delimiters are paired.
+
+        if the sub argument contains::
+
+            r'\0'
+
+        it will work just like re: it places there the matched paired data
+        with the delimiter stripped.
+
+        If count is different than zero, it will replace at most count
+        items.
+        """
+        if isinstance(source, CTokenizer):
+            is_token =3D True
+            tokenizer =3D source
+        else:
+            is_token =3D False
+            tokenizer =3D CTokenizer(source)
+
+        new_tokenizer =3D CTokenizer()
+        cur_pos =3D 0
+        for start, end in self._search(tokenizer):
+            new_tokenizer.tokens +=3D tokenizer.tokens[cur_pos:start]
+#            new_tokenizer.tokens +=3D [sub_str]
+
+            cur_pos =3D end + 1
+
+        if cur_pos:
+            new_tokenizer.tokens +=3D tokenizer.tokens[cur_pos:]
+
+        print(new_tokenizer.tokens)
+
+        return str(new_tokenizer)
+
+    def __repr__(self):
+        """
+        Returns a displayable version of the class init.
+        """
+
+        return f'CMatch("{self.regex.regex.pattern}")'
diff --git a/tools/unittests/test_tokenizer.py b/tools/unittests/test_token=
izer.py
index efb1d1687811..3081f27a7786 100755
--- a/tools/unittests/test_tokenizer.py
+++ b/tools/unittests/test_tokenizer.py
@@ -30,8 +30,7 @@ def tokens_to_list(tokens):
         if tok.kind =3D=3D CToken.SPACE:
             continue
=20
-        tuples +=3D [(tok.kind, tok.value,
-                    tok.brace_level, tok.paren_level, tok.bracket_level)]
+        tuples +=3D [(tok.kind, tok.value, tok.level)]
=20
     return tuples
=20
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id DBA8D3ACF1A;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299553; cv=none;
 b=o55g4NZT46Lf+Nz9EhqZHHhWPgMJApHYDhIS2K5WSJeM4VvM58vZwBmeyUfxgjMEEJxYTS2KOFTzUICpDtB7i3Ww6vdfyQkBmb7sTlypOf8Pq66Bauf9oxU/Dne8Yv3Sq0RYDb1R9SXfb2kU18PDR+kK7Az7QhWLmZ0sWMqEUnI=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299553; c=relaxed/simple;
	bh=itfvAha849vueKehFwP19BJZ38xmpRLLjqZpZ/9bymI=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=SmVSBb1z4EVn44i/7XpSRlvbe2C9Evl/p1v6XqnFtLXKHuj6kNoaiK4O0aZb4jfUs3Q4oovNWy2gv2Z+wtdAGConLr6yok0WTllAncNlZvbwgigRDdL6XNsgA8RFZ2aBpaffF4whqWZG5GmFuh57irVS1RE5m92pTlUtkcsl1fM=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=nJrl74lv; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="nJrl74lv"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 69B9CC2BCB5;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=itfvAha849vueKehFwP19BJZ38xmpRLLjqZpZ/9bymI=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=nJrl74lv4heo+wRfbFh751Z6W0/21tyRzKKNUEPVgx5zR/bRqM+3AYZvTpygYU8do
	 JCzbQd5NHbx5fwi4iM6WA2qEkhtdaiNqRPNJtYwIDAkNB/ETsHgr5Zz9h6Qn9WeQSf
	 3dNuiUlbGX5sQAdBuPyY7dHMqSZogXncEM4cl2cuyHfzRGnmYWXLibZoYpmjIKEHgd
	 Yri0SVVQ9rAruHdA10k2hssykhajamOgK/ZMVQWQe5fYat02m5PZ8x5Rgu6tVM7ZIw
	 3KhHTvmbYjEXTtZgs1udKZ8fJ7+1M+bDETEpRc5emBSl51LGaH6wCoMpOMQqJF4I9J
	 qWhQs6yw53lTg==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077gn-2ZCv;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Mauro Carvalho Chehab <mchehab@kernel.org>
Subject: [PATCH v2 12/20] tools: unittests: add tests for CMatch
Date: Thu, 12 Mar 2026 08:12:20 +0100
Message-ID: 
 <6724c26ededd3232f778e68f55ca915e5ea35d27.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

The CMatch logic is complex enough to justify tests to ensure
that it is doing its job.

Add unittests to check the functionality provided by CMatch
by replicating expected patterns.

The CMatch class handles with complex macros. Add an unittest
to check if its doing the right thing and detect eventual regressions
as we improve its code.

The initial version was generated using gpt-oss:latest LLM
on my local GPU, as LLMs aren't bad transforming patterns
into unittests.

Yet, the curent version contains only the skeleton of what
LLM produced, as I ended higly changing its content to be
more representative and to have real case scenarios.

The kdoc_xforms test suite contains 3 test groups. Two of
them tests the basic functionality of CMatch to
replace patterns.

The last one (TestRealUsecases) contains real code snippets
from the Kernel with some cleanups to better fit in 80 columns
and uses the same transforms as kernel-doc, thus allowing
to test the logic used inside kdoc_parser to transform
functions, structs and variable patterns.

Its output is like this:

        $ tools/unittests/kdoc_xforms.py
        Ran 25 tests in 0.003s

        OK
	test_cmatch:
	    TestSearch:
	        test_search_acquires_multiple:      OK
	        test_search_acquires_nested_paren:  OK
	        test_search_acquires_simple:        OK
	        test_search_must_hold:              OK
	        test_search_must_hold_shared:       OK
	        test_search_no_false_positive:      OK
	        test_search_no_function:            OK
	        test_search_no_macro_remains:       OK

        Ran 8 tests

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/unittests/test_cmatch.py | 95 ++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100755 tools/unittests/test_cmatch.py

diff --git a/tools/unittests/test_cmatch.py b/tools/unittests/test_cmatch.py
new file mode 100755
index 000000000000..53b25aa4dc4a
--- /dev/null
+++ b/tools/unittests/test_cmatch.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=3DC0413,R0904
+
+
+"""
+Unit tests for kernel-doc CMatch.
+"""
+
+import os
+import re
+import sys
+import unittest
+
+
+# Import Python modules
+
+SRC_DIR =3D os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(SRC_DIR, "../lib/python"))
+
+from kdoc.c_lex import CMatch
+from kdoc.xforms_lists import CTransforms
+from unittest_helper import run_unittest
+
+#
+# Override unittest.TestCase to better compare diffs ignoring whitespaces
+#
+class TestCaseDiff(unittest.TestCase):
+    """
+    Disable maximum limit on diffs and add a method to better
+    handle diffs with whitespace differences.
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        """Ensure that there won't be limit for diffs"""
+        cls.maxDiff =3D None
+
+
+#
+# Tests doing with different macros
+#
+
+class TestSearch(TestCaseDiff):
+    """
+    Test search mechanism
+    """
+
+    def test_search_acquires_simple(self):
+        line =3D "__acquires(ctx) foo();"
+        result =3D ", ".join(CMatch("__acquires").search(line))
+        self.assertEqual(result, "__acquires(ctx)")
+
+    def test_search_acquires_multiple(self):
+        line =3D "__acquires(ctx) __acquires(other) bar();"
+        result =3D ", ".join(CMatch("__acquires").search(line))
+        self.assertEqual(result, "__acquires(ctx), __acquires(other)")
+
+    def test_search_acquires_nested_paren(self):
+        line =3D "__acquires((ctx1, ctx2)) baz();"
+        result =3D ", ".join(CMatch("__acquires").search(line))
+        self.assertEqual(result, "__acquires((ctx1, ctx2))")
+
+    def test_search_must_hold(self):
+        line =3D "__must_hold(&lock) do_something();"
+        result =3D ", ".join(CMatch("__must_hold").search(line))
+        self.assertEqual(result, "__must_hold(&lock)")
+
+    def test_search_must_hold_shared(self):
+        line =3D "__must_hold_shared(RCU) other();"
+        result =3D ", ".join(CMatch("__must_hold_shared").search(line))
+        self.assertEqual(result, "__must_hold_shared(RCU)")
+
+    def test_search_no_false_positive(self):
+        line =3D "call__acquires(foo);  // should stay intact"
+        result =3D ", ".join(CMatch(r"\b__acquires").search(line))
+        self.assertEqual(result, "")
+
+    def test_search_no_macro_remains(self):
+        line =3D "do_something_else();"
+        result =3D ", ".join(CMatch("__acquires").search(line))
+        self.assertEqual(result, "")
+
+    def test_search_no_function(self):
+        line =3D "something"
+        result =3D ", ".join(CMatch(line).search(line))
+        self.assertEqual(result, "")
+
+#
+# Run all tests
+#
+if __name__ =3D=3D "__main__":
+    run_unittest(__file__)
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0F9F33AD53A;
	Thu, 12 Mar 2026 07:12:34 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299554; cv=none;
 b=h6XisaRhwAYxw77AKH7NnPBvqHa9M1QXt/fHG0RikxySDWujaUjFrWu0TMMli9P0NnIIRsGrFYSmVXMLIgxOiUipYkA2nwWXNRg15osLEnYASfFiMSCYSEF4Uu3qWCDdtGY1jvTqB5BboMTYTuZnH7yjHEEY+AkeRCN+jnD14W4=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299554; c=relaxed/simple;
	bh=2ZlwXMaSRC68Pnv5x980Sgp39v0FitI5caNfm6ZBkZ4=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=OyZTaYS/b2NO35HoUglz6eFTtpbw8xlX1r3HWM782EfUu7GprX81+z4JTm2FIADxvVhHK2QxZlJNuPefkooAx3Rm9XPKkq1kuAmE01RSqrd2rezBUkf/gfXutgGd9NeFukEpMcPHNk/sUTDnEgKbh8E9625lo0JgJDEoe64FdZQ=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=ZkPKmdYW; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="ZkPKmdYW"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6C573C4AF0B;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=2ZlwXMaSRC68Pnv5x980Sgp39v0FitI5caNfm6ZBkZ4=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=ZkPKmdYW1a08Lkss0/YNU8mZr0LxJLyAd9oHRJeJoa96fzAquTofCi0c7nmRGuWai
	 NUiCLmxUq07dlnj1tfB4CnjK3QhChFrO9HbckfW7B/4lLT8L056ecIx58vjwV2aiHx
	 4fmy5FFRRvHCTV0i3bHhulM3KcbGBXF8OK4/ZtyJAKNVtIsjpYsWyqVDJqZQfjHAoC
	 G6iuuwnNYA8DrSlRgCrwmm455AaLhh8gkEJZuvqj7j/HQHUSjYouhEuzPxfho1TFIg
	 RtQRpm3GYXGKga3OLykYzeGe6pf+0zmWdfzPe9IR0Cw+v/8leoSj01WEj+G/5NDRcE
	 M48y7VYf2VPPA==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077h9-2gOm;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Mauro Carvalho Chehab <mchehab@kernel.org>
Subject: [PATCH v2 13/20] docs: c_lex: properly implement a sub() method for
 CMatch
Date: Thu, 12 Mar 2026 08:12:21 +0100
Message-ID: 
 <4c88a55b21910b1bae6838d74c8ab6b32e5c8213.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

Change the sub() method to do what it is expected, parsing
backref arguments like \0, \1, \2, ...

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/c_lex.py | 240 +++++++++++++++++++++++++++------
 1 file changed, 202 insertions(+), 38 deletions(-)

diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
index e986a4ad73e3..98031cb7907c 100644
--- a/tools/lib/python/kdoc/c_lex.py
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -10,6 +10,8 @@ Those help caching regular expressions and do matching fo=
r kernel-doc.
=20
 import re
=20
+from copy import copy
+
 from .kdoc_re import KernRe
=20
 class CToken():
@@ -36,6 +38,8 @@ class CToken():
     NAME =3D 14       #: A name. Can be an ID or a type.
     SPACE =3D 15      #: Any space characters, including new lines
=20
+    BACKREF =3D 16  #: Not a valid C sequence, but used at sub regex patte=
rns.
+
     MISMATCH =3D 255  #: an error indicator: should never happen in practi=
ce.
=20
     # Dict to convert from an enum interger into a string.
@@ -107,6 +111,8 @@ TOKEN_LIST =3D [
=20
     (CToken.SPACE,   r"[\s]+"),
=20
+    (CToken.BACKREF, r"\\\d+"),
+
     (CToken.MISMATCH,r"."),
 ]
=20
@@ -245,6 +251,167 @@ class CTokenizer():
         return out
=20
=20
+class CTokenArgs:
+    """
+    Ancillary class to help using backrefs from sub matches.
+
+    If the highest backref contain a "+" at the last element,
+    the logic will be greedy, picking all other delims.
+
+    This is needed to parse struct_group macros with end with ``MEMBERS...=
``.
+    """
+    def __init__(self, sub_str):
+        self.sub_groups =3D set()
+        self.max_group =3D -1
+        self.greedy =3D None
+
+        for m in KernRe(r'\\(\d+)([+]?)').finditer(sub_str):
+            group =3D int(m.group(1))
+            if m.group(2) =3D=3D "+":
+                if self.greedy and self.greedy !=3D group:
+                    raise ValueError("There are multiple greedy patterns!")
+                self.greedy =3D group
+
+            self.sub_groups.add(group)
+            self.max_group =3D max(self.max_group, group)
+
+        if self.greedy:
+            if self.greedy !=3D self.max_group:
+                raise ValueError("Greedy pattern is not the last one!")
+
+            sub_str =3D KernRe(r'(\\\d+)[+]').sub(r"\1", sub_str)
+
+        self.sub_str =3D sub_str
+        self.sub_tokeninzer =3D CTokenizer(sub_str)
+
+    def groups(self, new_tokenizer):
+        """
+        Create replacement arguments for backrefs like:
+
+        ``\0``, ``\1``, ``\2``, ...``\n``
+
+        It also accepts a ``+`` character to the highest backref. When use=
d,
+        it means in practice to ignore delimins after it, being greedy.
+
+        The logic is smart enough to only go up to the maximum required
+        argument, even if there are more.
+
+        If there is a backref for an argument above the limit, it will
+        raise an exception. Please notice that, on C, square brackets
+        don't have any separator on it. Trying to use ``\1``..``\n`` for
+        brackets also raise an exception.
+        """
+
+        level =3D (0, 0, 0)
+
+        if self.max_group < 0:
+            return level, []
+
+        tokens =3D new_tokenizer.tokens
+
+        #
+        # Fill \0 with the full token contents
+        #
+        groups_list =3D [ [] ]
+
+        if 0 in self.sub_groups:
+            inner_level =3D 0
+
+            for i in range(0, len(tokens)):
+                tok =3D tokens[i]
+
+                if tok.kind =3D=3D CToken.BEGIN:
+                    inner_level +=3D 1
+                    continue
+
+                if tok.kind =3D=3D CToken.END:
+                    inner_level -=3D 1
+                    if inner_level < 0:
+                        break
+
+                if inner_level:
+                    groups_list[0].append(tok)
+
+        if not self.max_group:
+            return level, groups_list
+
+        delim =3D None
+
+        #
+        # Ignore everything before BEGIN. The value of begin gives the
+        # delimiter to be used for the matches
+        #
+        for i in range(0, len(tokens)):
+            tok =3D tokens[i]
+            if tok.kind =3D=3D CToken.BEGIN:
+                if tok.value =3D=3D "{":
+                    delim =3D ";"
+                elif tok.value =3D=3D "(":
+                    delim =3D ","
+                else:
+                    raise ValueError(fr"Can't handle \1..\n on {sub_str}")
+
+                level =3D tok.level
+                break
+
+        pos =3D 1
+        groups_list.append([])
+
+        inner_level =3D 0
+        for i in range(i + 1, len(tokens)):
+            tok =3D tokens[i]
+
+            if tok.kind =3D=3D CToken.BEGIN:
+                inner_level +=3D 1
+            if tok.kind =3D=3D CToken.END:
+                inner_level -=3D 1
+                if inner_level < 0:
+                    break
+
+            if tok.kind =3D=3D CToken.PUNC and delim =3D=3D tok.value:
+                pos +=3D 1
+                if self.greedy and pos > self.max_group:
+                    pos -=3D 1
+                else:
+                    groups_list.append([])
+
+                    if pos > self.max_group:
+                        break
+
+                    continue
+
+            groups_list[pos].append(tok)
+
+        if pos < self.max_group:
+            raise ValueError(fr"{self.sub_str} groups are up to {pos} inst=
ead of {self.max_group}")
+
+        return level, groups_list
+
+    def tokens(self, new_tokenizer):
+        level, groups =3D self.groups(new_tokenizer)
+
+        new =3D CTokenizer()
+
+        for tok in self.sub_tokeninzer.tokens:
+            if tok.kind =3D=3D CToken.BACKREF:
+                group =3D int(tok.value[1:])
+
+                for group_tok in groups[group]:
+                    new_tok =3D copy(group_tok)
+
+                    new_level =3D [0, 0, 0]
+
+                    for i in range(0, len(level)):
+                        new_level[i] =3D new_tok.level[i] + level[i]
+
+                    new_tok.level =3D tuple(new_level)
+
+                    new.tokens +=3D [ new_tok ]
+            else:
+                new.tokens +=3D [ tok ]
+
+        return new.tokens
+
 class CMatch:
     """
     Finding nested delimiters is hard with regular expressions. It is
@@ -270,31 +437,9 @@ class CMatch:
     will ignore the search string.
     """
=20
-    # TODO: make CMatch handle multiple match groups
-    #
-    # Right now, regular expressions to match it are defined only up to
-    #       the start delimiter, e.g.:
-    #
-    #       \bSTRUCT_GROUP\(
-    #
-    # is similar to: STRUCT_GROUP\((.*)\)
-    # except that the content inside the match group is delimiter-aligned.
-    #
-    # The content inside parentheses is converted into a single replace
-    # group (e.g. r`\0').
-    #
-    # It would be nice to change such definition to support multiple
-    # match groups, allowing a regex equivalent to:
-    #
-    #   FOO\((.*), (.*), (.*)\)
-    #
-    # it is probably easier to define it not as a regular expression, but
-    # with some lexical definition like:
-    #
-    #   FOO(arg1, arg2, arg3)
=20
     def __init__(self, regex):
-        self.regex =3D KernRe(regex)
+        self.regex =3D KernRe("^" + regex + r"\b")
=20
     def _search(self, tokenizer):
         """
@@ -317,7 +462,6 @@ class CMatch:
         """
=20
         start =3D None
-        offset =3D -1
         started =3D False
=20
         import sys
@@ -339,9 +483,8 @@ class CMatch:
=20
             if tok.kind =3D=3D CToken.END and tok.level =3D=3D stack[-1][1=
]:
                 start, level =3D stack.pop()
-                offset =3D i
=20
-                yield CTokenizer(tokenizer.tokens[start:offset + 1])
+                yield start, i
                 start =3D None
=20
         #
@@ -349,9 +492,9 @@ class CMatch:
         # This is meant to solve cases where the caller logic might be
         # picking an incomplete block.
         #
-        if start and offset < 0:
+        if start and stack:
             print("WARNING: can't find an end", file=3Dsys.stderr)
-            yield CTokenizer(tokenizer.tokens[start:])
+            yield start, len(tokenizer.tokens)
=20
     def search(self, source):
         """
@@ -368,13 +511,15 @@ class CMatch:
             tokenizer =3D CTokenizer(source)
             is_token =3D False
=20
-        for new_tokenizer in self._search(tokenizer):
+        for start, end in self._search(tokenizer):
+            new_tokenizer =3D CTokenizer(tokenizer.tokens[start:end + 1])
+
             if is_token:
                 yield new_tokenizer
             else:
                 yield str(new_tokenizer)
=20
-    def sub(self, sub, line, count=3D0):
+    def sub(self, sub_str, source, count=3D0):
         """
         This is similar to re.sub:
=20
@@ -398,20 +543,39 @@ class CMatch:
             is_token =3D False
             tokenizer =3D CTokenizer(source)
=20
+        # Detect if sub_str contains sub arguments
+
+        args_match =3D CTokenArgs(sub_str)
+
         new_tokenizer =3D CTokenizer()
-        cur_pos =3D 0
+        pos =3D 0
+        n =3D 0
+
+        #
+        # NOTE: the code below doesn't consider overlays at sub.
+        # We may need to add some extra unit tests to check if those
+        # would cause problems. When replacing by "", this should not
+        # be a problem, but other transformations could be problematic
+        #
         for start, end in self._search(tokenizer):
-            new_tokenizer.tokens +=3D tokenizer.tokens[cur_pos:start]
-#            new_tokenizer.tokens +=3D [sub_str]
+            new_tokenizer.tokens +=3D tokenizer.tokens[pos:start]
=20
-            cur_pos =3D end + 1
+            new =3D CTokenizer(tokenizer.tokens[start:end + 1])
=20
-        if cur_pos:
-            new_tokenizer.tokens +=3D tokenizer.tokens[cur_pos:]
+            new_tokenizer.tokens +=3D args_match.tokens(new)
=20
-        print(new_tokenizer.tokens)
+            pos =3D end + 1
=20
-        return str(new_tokenizer)
+            n +=3D 1
+            if count and n >=3D count:
+                break
+
+        new_tokenizer.tokens +=3D tokenizer.tokens[pos:]
+
+        if not is_token:
+            return str(new_tokenizer)
+
+        return new_tokenizer
=20
     def __repr__(self):
         """
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0F4323AD534;
	Thu, 12 Mar 2026 07:12:34 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299554; cv=none;
 b=iWbw7l54qJHN8BjYNGTyE8/JLcRMtT3YX8mFei1jukdU4+czKsJRcz0yDi17BrkOxfIlluFXhHO6bQF5SSVBuPimE4SWYSbHBU1NNVgrbUlqpjlPT/1vhP+g5SWqN/vrNoHowqsL53T2jao+fRhcsHN9FVESCpW43VX9zXlNCu0=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299554; c=relaxed/simple;
	bh=024KPIqyz3ZfGXNW94ZUdDluTtjF6rZFufcUUDYWkXA=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=n1zaWcenaigevC0+4KPey8AMAAPmgTOrip8ct/dHYS/IpU/YbhkoRN2M2I79AXs7XBwEJr7BrAy+h6MezCww1AXcrL1f4x/NLpHlTjbi072X1iCTi2BWcyaUuwsogNP9onCP1GV595+mf5B+pOI9FzflfaVUxhBpIVQSy7J73U4=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=gikbu3ag; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="gikbu3ag"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6C475C2BCB7;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=024KPIqyz3ZfGXNW94ZUdDluTtjF6rZFufcUUDYWkXA=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=gikbu3agS1eM41ibUr0nT+7guTAR0vSqIVlcypBOOEPYNWUJtOJqPGUyE1vlK2Zi6
	 f0qVTUZ6aF7W58ny6a+SMEEg7iuQHMoeXR52UvVACgTT906BaLQIUj6C4EY8vmxmo0
	 7EHY1KYuSWQy5oyyT29wDJTZJ8UHNI33CYu4AEGMSYAVYbmyoDiKySnvKGuBF6oBUy
	 p53Ll67g1gagxYh0cxb1z4N0sRlTYyrtFul4fvTSK41DTQ38SILPV8yMVY8+MnKyyB
	 xXzzphjcLeQJ3ZGuFskhqEK+JfpDq8iGTiyBB+9RRmBWILxu8FZX6103Ep4Np8Avee
	 PwPgH0A2lRRwg==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077hE-2nq0;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Kees Cook <kees@kernel.org>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	"Gustavo A. R. Silva" <gustavoars@kernel.org>,
	Mauro Carvalho Chehab <mchehab@kernel.org>
Subject: [PATCH v2 14/20] unittests: test_cmatch: add tests for sub()
Date: Thu, 12 Mar 2026 08:12:22 +0100
Message-ID: 
 <4642a8e6d14e0ce7f134ae5bed88e5e94d425114.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

Now that we have code for sub(), test it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/unittests/test_cmatch.py | 721 ++++++++++++++++++++++++++++++++-
 1 file changed, 719 insertions(+), 2 deletions(-)

diff --git a/tools/unittests/test_cmatch.py b/tools/unittests/test_cmatch.py
index 53b25aa4dc4a..f6ccd2a942f1 100755
--- a/tools/unittests/test_cmatch.py
+++ b/tools/unittests/test_cmatch.py
@@ -21,7 +21,7 @@ SRC_DIR =3D os.path.dirname(os.path.realpath(__file__))
 sys.path.insert(0, os.path.join(SRC_DIR, "../lib/python"))
=20
 from kdoc.c_lex import CMatch
-from kdoc.xforms_lists import CTransforms
+from kdoc.kdoc_re import KernRe
 from unittest_helper import run_unittest
=20
 #
@@ -75,7 +75,7 @@ class TestSearch(TestCaseDiff):
=20
     def test_search_no_false_positive(self):
         line =3D "call__acquires(foo);  // should stay intact"
-        result =3D ", ".join(CMatch(r"\b__acquires").search(line))
+        result =3D ", ".join(CMatch(r"__acquires").search(line))
         self.assertEqual(result, "")
=20
     def test_search_no_macro_remains(self):
@@ -88,6 +88,723 @@ class TestSearch(TestCaseDiff):
         result =3D ", ".join(CMatch(line).search(line))
         self.assertEqual(result, "")
=20
+#
+# Override unittest.TestCase to better compare diffs ignoring whitespaces
+#
+class TestCaseDiff(unittest.TestCase):
+    """
+    Disable maximum limit on diffs and add a method to better
+    handle diffs with whitespace differences.
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        """Ensure that there won't be limit for diffs"""
+        cls.maxDiff =3D None
+
+    def assertLogicallyEqual(self, a, b):
+        """
+        Compare two results ignoring multiple whitespace differences.
+
+        This is useful to check more complex matches picked from examples.
+        On a plus side, we also don't need to use dedent.
+        Please notice that line breaks still need to match. We might
+        remove it at the regex, but this way, checking the diff is easier.
+        """
+        a =3D re.sub(r"[\t ]+", " ", a.strip())
+        b =3D re.sub(r"[\t ]+", " ", b.strip())
+
+        a =3D re.sub(r"\s+\n", "\n", a)
+        b =3D re.sub(r"\s+\n", "\n", b)
+
+        a =3D re.sub(" ;", ";", a)
+        b =3D re.sub(" ;", ";", b)
+
+        self.assertEqual(a, b)
+
+#
+# Tests doing with different macros
+#
+
+class TestSubMultipleMacros(TestCaseDiff):
+    """
+    Tests doing with different macros.
+
+    Here, we won't use assertLogicallyEqual. Instead, we'll check if each
+    of the expected patterns are present at the answer.
+    """
+
+    def test_acquires_simple(self):
+        """Simple replacement test with __acquires"""
+        line =3D "__acquires(ctx) foo();"
+        result =3D CMatch(r"__acquires").sub("REPLACED", line)
+
+        self.assertEqual("REPLACED foo();", result)
+
+    def test_acquires_multiple(self):
+        """Multiple __acquires"""
+        line =3D "__acquires(ctx) __acquires(other) bar();"
+        result =3D CMatch(r"__acquires").sub("REPLACED", line)
+
+        self.assertEqual("REPLACED REPLACED bar();", result)
+
+    def test_acquires_nested_paren(self):
+        """__acquires with nested pattern"""
+        line =3D "__acquires((ctx1, ctx2)) baz();"
+        result =3D CMatch(r"__acquires").sub("REPLACED", line)
+
+        self.assertEqual("REPLACED baz();", result)
+
+    def test_must_hold(self):
+        """__must_hold with a pointer"""
+        line =3D "__must_hold(&lock) do_something();"
+        result =3D CMatch(r"__must_hold").sub("REPLACED", line)
+
+        self.assertNotIn("__must_hold(", result)
+        self.assertIn("do_something();", result)
+
+    def test_must_hold_shared(self):
+        """__must_hold with an upercase defined value"""
+        line =3D "__must_hold_shared(RCU) other();"
+        result =3D CMatch(r"__must_hold_shared").sub("REPLACED", line)
+
+        self.assertNotIn("__must_hold_shared(", result)
+        self.assertIn("other();", result)
+
+    def test_no_false_positive(self):
+        """
+        Ensure that unrelated text containing similar patterns is preserved
+        """
+        line =3D "call__acquires(foo);  // should stay intact"
+        result =3D CMatch(r"\b__acquires").sub("REPLACED", line)
+
+        self.assertLogicallyEqual(result, "call__acquires(foo);")
+
+    def test_mixed_macros(self):
+        """Add a mix of macros"""
+        line =3D "__acquires(ctx) __releases(ctx) __must_hold(&lock) foo()=
;"
+
+        result =3D CMatch(r"__acquires").sub("REPLACED", line)
+        result =3D CMatch(r"__releases").sub("REPLACED", result)
+        result =3D CMatch(r"__must_hold").sub("REPLACED", result)
+
+        self.assertNotIn("__acquires(", result)
+        self.assertNotIn("__releases(", result)
+        self.assertNotIn("__must_hold(", result)
+
+        self.assertIn("foo();", result)
+
+    def test_no_macro_remains(self):
+        """Ensures that unmatched macros are untouched"""
+        line =3D "do_something_else();"
+        result =3D CMatch(r"__acquires").sub("REPLACED", line)
+
+        self.assertEqual(result, line)
+
+    def test_no_function(self):
+        """Ensures that no functions will remain untouched"""
+        line =3D "something"
+        result =3D CMatch(line).sub("REPLACED", line)
+
+        self.assertEqual(result, line)
+
+#
+# Check if the diff is logically equivalent. To simplify, the tests here
+# use a single macro name for all replacements.
+#
+
+class TestSubSimple(TestCaseDiff):
+    """
+    Test argument replacements.
+
+    Here, the function name can be anything. So, we picked __attribute__(),
+    to mimic a macro found at the Kernel, but none of the replacements her
+    has any relationship with the Kernel usage.
+    """
+
+    MACRO =3D "__attribute__"
+
+    @classmethod
+    def setUpClass(cls):
+        """Define a CMatch to be used for all tests"""
+        cls.matcher =3D CMatch(cls.MACRO)
+
+    def test_sub_with_capture(self):
+        """Test all arguments replacement with a single arg"""
+        line =3D f"{self.MACRO}(&ctx)\nfoo();"
+
+        result =3D self.matcher.sub(r"ACQUIRED(\0)", line)
+
+        self.assertLogicallyEqual("ACQUIRED(&ctx)\nfoo();", result)
+
+    def test_sub_zero_placeholder(self):
+        """Test all arguments replacement with a multiple args"""
+        line =3D f"{self.MACRO}(arg1, arg2)\nbar();"
+
+        result =3D self.matcher.sub(r"REPLACED(\0)", line)
+
+        self.assertLogicallyEqual("REPLACED(arg1, arg2)\nbar();", result)
+
+    def test_sub_single_placeholder(self):
+        """Single replacement rule for \1"""
+        line =3D f"{self.MACRO}(ctx, boo)\nfoo();"
+        result =3D self.matcher.sub(r"ACQUIRED(\1)", line)
+
+        self.assertLogicallyEqual("ACQUIRED(ctx)\nfoo();", result)
+
+    def test_sub_multiple_placeholders(self):
+        """Replacement rule for both \1 and \2"""
+        line =3D f"{self.MACRO}(arg1, arg2)\nbar();"
+        result =3D self.matcher.sub(r"REPLACE(\1, \2)", line)
+
+        self.assertLogicallyEqual("REPLACE(arg1, arg2)\nbar();", result)
+
+    def test_sub_mixed_placeholders(self):
+        """Replacement rule for \0, \1 and additional text"""
+        line =3D f"{self.MACRO}(foo, bar)\nbaz();"
+        result =3D self.matcher.sub(r"ALL(\0) FIRST(\1)", line)
+
+        self.assertLogicallyEqual("ALL(foo, bar) FIRST(foo)\nbaz();", resu=
lt)
+
+    def test_sub_no_placeholder(self):
+        """Replacement without placeholders"""
+        line =3D f"{self.MACRO}(arg)\nfoo();"
+        result =3D self.matcher.sub(r"NO_BACKREFS()", line)
+
+        self.assertLogicallyEqual("NO_BACKREFS()\nfoo();", result)
+
+    def test_sub_count_parameter(self):
+        """Verify that the algorithm stops after the requested count"""
+        line =3D f"{self.MACRO}(a1) x();\n{self.MACRO}(a2) y();"
+        result =3D self.matcher.sub(r"ONLY_FIRST(\1) ", line, count=3D1)
+
+        self.assertLogicallyEqual(f"ONLY_FIRST(a1) x();\n{self.MACRO}(a2) =
y();",
+                                  result)
+
+    def test_strip_multiple_acquires(self):
+        """Check if spaces between removed delimiters will be dropped"""
+        line =3D f"int {self.MACRO}(1)  {self.MACRO}(2 )   {self.MACRO}(3)=
 foo;"
+        result =3D self.matcher.sub("", line)
+
+        self.assertLogicallyEqual(result, "int foo;")
+
+
+#
+# Test replacements with slashrefs
+#
+
+
+class TestSubWithLocalXforms(TestCaseDiff):
+    """
+    Test diferent usecase patterns found at the Kernel.
+
+    Here, replacements using both CMatch and KernRe can be tested,
+    as it will import the actual replacement rules used by kernel-doc.
+    """
+
+    struct_xforms =3D [
+        (CMatch("__attribute__"), ' '),
+        (CMatch('__aligned'), ' '),
+        (CMatch('__counted_by'), ' '),
+        (CMatch('__counted_by_(le|be)'), ' '),
+        (CMatch('__guarded_by'), ' '),
+        (CMatch('__pt_guarded_by'), ' '),
+
+        (CMatch('__cacheline_group_(begin|end)'), ''),
+
+        (CMatch('struct_group'), r'\2'),
+        (CMatch('struct_group_attr'), r'\3'),
+        (CMatch('struct_group_tagged'), r'struct \1 { \3+ } \2;'),
+        (CMatch('__struct_group'), r'\4'),
+
+        (CMatch('__ETHTOOL_DECLARE_LINK_MODE_MASK'), r'DECLARE_BITMAP(\1, =
__ETHTOOL_LINK_MODE_MASK_NBITS)'),
+        (CMatch('DECLARE_PHY_INTERFACE_MASK',), r'DECLARE_BITMAP(\1, PHY_I=
NTERFACE_MODE_MAX)'),
+        (CMatch('DECLARE_BITMAP'), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
+
+        (CMatch('DECLARE_HASHTABLE'), r'unsigned long \1[1 << ((\2) - 1)]'=
),
+        (CMatch('DECLARE_KFIFO'), r'\2 *\1'),
+        (CMatch('DECLARE_KFIFO_PTR'), r'\2 *\1'),
+        (CMatch('(?:__)?DECLARE_FLEX_ARRAY'), r'\1 \2[]'),
+        (CMatch('DEFINE_DMA_UNMAP_ADDR'), r'dma_addr_t \1'),
+        (CMatch('DEFINE_DMA_UNMAP_LEN'), r'__u32 \1'),
+        (CMatch('VIRTIO_DECLARE_FEATURES'), r'union { u64 \1; u64 \1_array=
[VIRTIO_FEATURES_U64S]; }'),
+    ]
+
+    function_xforms =3D [
+        (CMatch('__printf'), ""),
+        (CMatch('__(?:re)?alloc_size'), ""),
+        (CMatch("__diagnose_as"), ""),
+        (CMatch("DECL_BUCKET_PARAMS"), r"\1, \2"),
+
+        (CMatch("__cond_acquires"), ""),
+        (CMatch("__cond_releases"), ""),
+        (CMatch("__acquires"), ""),
+        (CMatch("__releases"), ""),
+        (CMatch("__must_hold"), ""),
+        (CMatch("__must_not_hold"), ""),
+        (CMatch("__must_hold_shared"), ""),
+        (CMatch("__cond_acquires_shared"), ""),
+        (CMatch("__acquires_shared"), ""),
+        (CMatch("__releases_shared"), ""),
+        (CMatch("__attribute__"), ""),
+    ]
+
+    var_xforms =3D [
+        (CMatch('__guarded_by'), ""),
+        (CMatch('__pt_guarded_by'), ""),
+        (CMatch("LIST_HEAD"), r"struct list_head \1"),
+    ]
+
+    #: Transforms main dictionary used at apply_transforms().
+    xforms =3D {
+        "struct": struct_xforms,
+        "func": function_xforms,
+        "var": var_xforms,
+    }
+
+    @classmethod
+    def apply_transforms(cls, xform_type, text):
+        """
+        Mimic the behavior of kdoc_parser.apply_transforms() method.
+
+        For each element of STRUCT_XFORMS, apply apply_transforms.
+
+        There are two parameters:
+
+        - ``xform_type``
+            Can be ``func``, ``struct`` or ``var``;
+        - ``text``
+            The text where the sub patterns from CTransforms will be appli=
ed.
+        """
+        for search, subst in cls.xforms.get(xform_type):
+            text =3D search.sub(subst, text)
+
+        return text.strip()
+
+        cls.matcher =3D CMatch(r"struct_group[\w\_]*")
+
+    def test_struct_group(self):
+        """
+        Test struct_group using a pattern from
+        drivers/net/ethernet/asix/ax88796c_main.h.
+        """
+        line =3D """
+            struct tx_pkt_info {
+                    struct_group(tx_overhead,
+                            struct tx_sop_header sop;
+                            struct tx_segment_header seg;
+                    );
+                    struct tx_eop_header eop;
+                    u16 pkt_len;
+                    u16 seq_num;
+            };
+        """
+        expected =3D """
+            struct tx_pkt_info {
+                    struct tx_sop_header sop;
+                    struct tx_segment_header seg;
+                    ;
+                    struct tx_eop_header eop;
+                    u16 pkt_len;
+                    u16 seq_num;
+            };
+        """
+
+        result =3D self.apply_transforms("struct", line)
+        self.assertLogicallyEqual(result, expected)
+
+    def test_struct_group_attr(self):
+        """
+        Test two struct_group_attr using patterns from fs/smb/client/cifsp=
du.h.
+        """
+        line =3D """
+            typedef struct smb_com_open_rsp {
+                struct smb_hdr hdr;     /* wct =3D 34 BB */
+                __u8 AndXCommand;
+                __u8 AndXReserved;
+                __le16 AndXOffset;
+                __u8 OplockLevel;
+                __u16 Fid;
+                __le32 CreateAction;
+                struct_group_attr(common_attributes,,
+                    __le64 CreationTime;
+                    __le64 LastAccessTime;
+                    __le64 LastWriteTime;
+                    __le64 ChangeTime;
+                    __le32 FileAttributes;
+                );
+                __le64 AllocationSize;
+                __le64 EndOfFile;
+                __le16 FileType;
+                __le16 DeviceState;
+                __u8 DirectoryFlag;
+                __u16 ByteCount;        /* bct =3D 0 */
+            } OPEN_RSP;
+            typedef struct {
+                struct_group_attr(common_attributes,,
+                    __le64 CreationTime;
+                    __le64 LastAccessTime;
+                    __le64 LastWriteTime;
+                    __le64 ChangeTime;
+                    __le32 Attributes;
+                );
+                __u32 Pad1;
+                __le64 AllocationSize;
+                __le64 EndOfFile;
+                __le32 NumberOfLinks;
+                __u8 DeletePending;
+                __u8 Directory;
+                __u16 Pad2;
+                __le32 EASize;
+                __le32 FileNameLength;
+                union {
+                    char __pad;
+                    DECLARE_FLEX_ARRAY(char, FileName);
+                };
+            } FILE_ALL_INFO;       /* level 0x107 QPathInfo */
+        """
+        expected =3D """
+            typedef struct smb_com_open_rsp {
+                struct smb_hdr hdr;
+                __u8 AndXCommand;
+                __u8 AndXReserved;
+                __le16 AndXOffset;
+                __u8 OplockLevel;
+                __u16 Fid;
+                __le32 CreateAction;
+                __le64 CreationTime;
+                __le64 LastAccessTime;
+                __le64 LastWriteTime;
+                __le64 ChangeTime;
+                __le32 FileAttributes;
+                ;
+                __le64 AllocationSize;
+                __le64 EndOfFile;
+                __le16 FileType;
+                __le16 DeviceState;
+                __u8 DirectoryFlag;
+                __u16 ByteCount;
+            } OPEN_RSP;
+        typedef struct {
+            __le64 CreationTime;
+            __le64 LastAccessTime;
+            __le64 LastWriteTime;
+            __le64 ChangeTime;
+            __le32 Attributes;
+            ;
+            __u32 Pad1;
+            __le64 AllocationSize;
+            __le64 EndOfFile;
+            __le32 NumberOfLinks;
+            __u8 DeletePending;
+            __u8 Directory;
+            __u16 Pad2;
+            __le32 EASize;
+            __le32 FileNameLength;
+            union {
+                char __pad;
+                char FileName[];
+            };
+        } FILE_ALL_INFO;
+        """
+
+        result =3D self.apply_transforms("struct", line)
+        self.assertLogicallyEqual(result, expected)
+
+    def test_raw_struct_group(self):
+        """
+        Test a __struct_group pattern from include/uapi/cxl/features.h.
+        """
+        line =3D """
+            struct cxl_mbox_get_sup_feats_out {
+                __struct_group(cxl_mbox_get_sup_feats_out_hdr, hdr, /* emp=
ty */,
+                    __le16 num_entries;
+                    __le16 supported_feats;
+                    __u8 reserved[4];
+                );
+                struct cxl_feat_entry ents[] __counted_by_le(num_entries);
+            } __attribute__ ((__packed__));
+        """
+        expected =3D """
+            struct cxl_mbox_get_sup_feats_out {
+                __le16 num_entries;
+                __le16 supported_feats;
+                __u8 reserved[4];
+                ;
+                struct cxl_feat_entry ents[];
+            };
+        """
+
+        result =3D self.apply_transforms("struct", line)
+        self.assertLogicallyEqual(result, expected)
+
+    def test_raw_struct_group_tagged(self):
+        """
+        Test cxl_regs with struct_group_tagged patterns from drivers/cxl/c=
xl.h.
+
+        NOTE:
+
+            This one has actually a violation from what kernel-doc would
+            expect: Kernel-doc regex expects only 3 members, but this is
+            actually defined as::
+
+                #define struct_group_tagged(TAG, NAME, MEMBERS...)
+
+            The replace expression there is::
+
+                struct \1 { \3 } \2;
+
+            but it should be really something like::
+
+                struct \1 { \3 \4 \5 \6 \7 \8 ... } \2;
+
+            a later fix would be needed to address it.
+
+        """
+        line =3D """
+            struct cxl_regs {
+                struct_group_tagged(cxl_component_regs, component,
+                    void __iomem *hdm_decoder;
+                    void __iomem *ras;
+                );
+
+
+                /* This is actually a violation: too much commas */
+                struct_group_tagged(cxl_device_regs, device_regs,
+                    void __iomem *status, *mbox, *memdev;
+                );
+
+                struct_group_tagged(cxl_pmu_regs, pmu_regs,
+                    void __iomem *pmu;
+                );
+
+                struct_group_tagged(cxl_rch_regs, rch_regs,
+                    void __iomem *dport_aer;
+                );
+
+                struct_group_tagged(cxl_rcd_regs, rcd_regs,
+                    void __iomem *rcd_pcie_cap;
+                );
+            };
+        """
+        expected =3D """
+        struct cxl_regs {
+            struct cxl_component_regs {
+                void __iomem *hdm_decoder;
+                void __iomem *ras;
+            } component;;
+
+            struct cxl_device_regs {
+                void __iomem *status, *mbox, *memdev;
+            } device_regs;;
+
+            struct cxl_pmu_regs {
+                void __iomem *pmu;
+            } pmu_regs;;
+
+            struct cxl_rch_regs {
+                void __iomem *dport_aer;
+            } rch_regs;;
+
+            struct cxl_rcd_regs {
+                void __iomem *rcd_pcie_cap;
+            } rcd_regs;;
+        };
+        """
+
+        result =3D self.apply_transforms("struct", line)
+        self.assertLogicallyEqual(result, expected)
+
+    def test_struct_group_tagged_with_private(self):
+        """
+        Replace struct_group_tagged with private, using the same regex
+        for the replacement as what happens in xforms_lists.py.
+
+        As the private removal happens outside NestedGroup class, we manua=
lly
+        dropped the remaining part of the struct, to simulate what happens
+        at kdoc_parser.
+
+        Taken from include/net/page_pool/types.h
+        """
+        line =3D """
+            struct page_pool_params {
+                struct_group_tagged(page_pool_params_slow, slow,
+                                    struct net_device *netdev;
+                                    unsigned int queue_idx;
+                                    unsigned int    flags;
+                                    /* private: only under "slow" struct */
+                                    unsigned int ignored;
+                );
+                /* Struct below shall not be ignored */
+                struct_group_tagged(page_pool_params_fast, fast,
+                                    unsigned int    order;
+                                    unsigned int    pool_size;
+                                    int             nid;
+                                    struct device   *dev;
+                                    struct napi_struct *napi;
+                                    enum dma_data_direction dma_dir;
+                                    unsigned int    max_len;
+                                    unsigned int    offset;
+                );
+            };
+        """
+        expected =3D """
+            struct page_pool_params {
+                struct page_pool_params_slow {
+                    struct net_device *netdev;
+                    unsigned int queue_idx;
+                    unsigned int    flags;
+                } slow;;
+                struct page_pool_params_fast {
+                    unsigned int order;
+                    unsigned int    pool_size;
+                    int             nid;
+                    struct device   *dev;
+                    struct napi_struct *napi;
+                    enum dma_data_direction dma_dir;
+                    unsigned int    max_len;
+                    unsigned int    offset;
+                } fast;;
+            };
+        """
+
+        result =3D self.apply_transforms("struct", line)
+        self.assertLogicallyEqual(result, expected)
+
+    def test_struct_kcov(self):
+        """
+        """
+        line =3D """
+            struct kcov {
+                refcount_t              refcount;
+                spinlock_t              lock;
+                enum kcov_mode          mode __guarded_by(&lock);
+                unsigned int            size __guarded_by(&lock);
+                void                    *area __guarded_by(&lock);
+                struct task_struct      *t __guarded_by(&lock);
+                bool                    remote;
+                unsigned int            remote_size;
+                int                     sequence;
+            };
+        """
+        expected =3D """
+        """
+
+        result =3D self.apply_transforms("struct", line)
+        self.assertLogicallyEqual(result, expected)
+
+
+    def test_struct_kcov(self):
+        """
+        Test a struct from kernel/kcov.c.
+        """
+        line =3D """
+            struct kcov {
+                refcount_t              refcount;
+                spinlock_t              lock;
+                enum kcov_mode          mode __guarded_by(&lock);
+                unsigned int            size __guarded_by(&lock);
+                void                    *area __guarded_by(&lock);
+                struct task_struct      *t __guarded_by(&lock);
+                bool                    remote;
+                unsigned int            remote_size;
+                int                     sequence;
+            };
+        """
+        expected =3D """
+            struct kcov {
+                refcount_t              refcount;
+                spinlock_t              lock;
+                enum kcov_mode          mode;
+                unsigned int            size;
+                void                    *area;
+                struct task_struct      *t;
+                bool                    remote;
+                unsigned int            remote_size;
+                int                     sequence;
+            };
+        """
+
+        result =3D self.apply_transforms("struct", line)
+        self.assertLogicallyEqual(result, expected)
+
+    def test_vars_stackdepot(self):
+        """
+        Test guarded_by on vars from lib/stackdepot.c.
+        """
+        line =3D """
+            size_t pool_offset __guarded_by(&pool_lock) =3D DEPOT_POOL_SIZ=
E;
+            __guarded_by(&pool_lock) LIST_HEAD(free_stacks);
+            void **stack_pools __pt_guarded_by(&pool_lock);
+        """
+        expected =3D """
+            size_t pool_offset =3D DEPOT_POOL_SIZE;
+            struct list_head free_stacks;
+            void **stack_pools;
+        """
+
+        result =3D self.apply_transforms("var", line)
+        self.assertLogicallyEqual(result, expected)
+
+    def test_functions_with_acquires_and_releases(self):
+        """
+        Test guarded_by on vars from lib/stackdepot.c.
+        """
+        line =3D """
+            bool prepare_report_consumer(unsigned long *flags,
+                                         const struct access_info *ai,
+                                         struct other_info *other_info) \
+                                        __cond_acquires(true, &report_lock=
);
+
+            int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c) \
+                                  __cond_acquires(0, RCU_BH);
+
+            bool undo_report_consumer(unsigned long *flags,
+                                      const struct access_info *ai,
+                                      struct other_info *other_info) \
+                                     __cond_releases(true, &report_lock);
+
+            void debugfs_enter_cancellation(struct file *file,
+                                            struct debugfs_cancellation *c=
) \
+                                           __acquires(cancellation);
+
+            void debugfs_leave_cancellation(struct file *file,
+                                            struct debugfs_cancellation *c=
) \
+                                           __releases(cancellation);
+
+            acpi_cpu_flags acpi_os_acquire_lock(acpi_spinlock lockp) \
+                                               __acquires(lockp);
+
+            void acpi_os_release_lock(acpi_spinlock lockp,
+                                      acpi_cpu_flags not_used) \
+                                     __releases(lockp)
+        """
+        expected =3D """
+            bool prepare_report_consumer(unsigned long *flags,
+                                         const struct access_info *ai,
+                                         struct other_info *other_info);
+
+            int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c);
+
+            bool undo_report_consumer(unsigned long *flags,
+                                      const struct access_info *ai,
+                                      struct other_info *other_info);
+
+            void debugfs_enter_cancellation(struct file *file,
+                                            struct debugfs_cancellation *c=
);
+
+            void debugfs_leave_cancellation(struct file *file,
+                                            struct debugfs_cancellation *c=
);
+
+            acpi_cpu_flags acpi_os_acquire_lock(acpi_spinlock lockp);
+
+            void acpi_os_release_lock(acpi_spinlock lockp,
+                                      acpi_cpu_flags not_used)
+        """
+
+        result =3D self.apply_transforms("func", line)
+        self.assertLogicallyEqual(result, expected)
+
 #
 # Run all tests
 #
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id DEC883AD504;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299553; cv=none;
 b=DhLmKyL+Z5CvhxXuDLR5xMEZWkx/Dvowky/l6+AIwN6kt7FCVDHgHbuXcWVSrbxeuOUvbpeDtGndB9aBdxmPNoM0S7VyTLYECg0M3KlqWf3qJgbxeSKXH8rgczlXMUbRmTerROsMMMGt3d4VO4DRdQCioNE6qZJmsGVAbcoSaNw=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299553; c=relaxed/simple;
	bh=R3MxZBF5LN8l5MLvY90olqXjlgo8oaUuezo5dZ4V5RY=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=uogxD3/gwUKZyTsfO/SQhtIcIUkrWxpOjxMcKdz4Yg2p6vozfR1ODDZJi/xoTTNon5T/3PgOEN8eGm1+TclPMuSvsLY9CGLU3loDHbkyBOrV3YrJEcRnO1vHcPADFHum0HTo3YYrRDxzDf5h+r6SWaf7FxQst4hppX8l3bMn86k=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=CAczf86K; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="CAczf86K"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6EBC5C4AF0C;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=R3MxZBF5LN8l5MLvY90olqXjlgo8oaUuezo5dZ4V5RY=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=CAczf86KiCXLWoDaYcGyXt39zqEz+ltMZE7GRr3GMSIo4uKI2JYZlcRut2jAFkTDu
	 eqzmAsNH6B7YmhQ5VQWRHGId7S0IpI325imkLBhu02+fEtkPpX1oatpSm9zoDOewSM
	 VByf/cyG+crUgHWuFFtNlM2fYKCya29LQDrVvRCqknqwXb4EJk1dL2AAxihTeB6r3I
	 x+MOwUFPSXX8ZmLtRcak0Q8kWtyMrTZrxivdfli/6n3yuxnXnYyJs9mFLBGE61owo8
	 lbRRIMx3uusFrvmZNYXnc4s+b7q+GQgPtHmdlAkQfgLpFCpTj9Jk3HvTIPn7QPcv3h
	 7Gy7TBzaG8Hmw==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077hI-2v25;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Aleksandr Loktionov <aleksandr.loktionov@intel.com>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Randy Dunlap <rdunlap@infradead.org>
Subject: [PATCH v2 15/20] docs: kdoc: replace NestedMatch with CMatch
Date: Thu, 12 Mar 2026 08:12:23 +0100
Message-ID: 
 <6b21b7653ceaa4d20d62c651fa205cbe94401df1.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

Our previous approach to solve nested structs were to use
NestedMatch. It works well, but adding support to parse delimiters
is very complex.

Instead, use CMatch, which uses a C tokenizer, making the code more
reliable and simpler.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py  |  2 +-
 tools/lib/python/kdoc/xforms_lists.py | 31 ++++++++++++++-------------
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/k=
doc_parser.py
index e804e61b09c0..0da95b090a34 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -13,7 +13,7 @@ import sys
 import re
 from pprint import pformat
=20
-from kdoc.kdoc_re import NestedMatch, KernRe
+from kdoc.kdoc_re import KernRe
 from kdoc.c_lex import CTokenizer
 from kdoc.kdoc_item import KdocItem
=20
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/=
xforms_lists.py
index c07cbe1e6349..7fa7f52cec7b 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -4,7 +4,8 @@
=20
 import re
=20
-from kdoc.kdoc_re import KernRe, NestedMatch
+from kdoc.kdoc_re import KernRe
+from kdoc.c_lex import CMatch
=20
 struct_args_pattern =3D r'([^,)]+)'
=20
@@ -60,7 +61,7 @@ class CTransforms:
         #
         # As it doesn't properly match the end parenthesis on some cases.
         #
-        # So, a better solution was crafted: there's now a NestedMatch
+        # So, a better solution was crafted: there's now a CMatch
         # class that ensures that delimiters after a search are properly
         # matched. So, the implementation to drop STRUCT_GROUP() will be
         # handled in separate.
@@ -72,9 +73,9 @@ class CTransforms:
         #
         # Replace macros
         #
-        # TODO: use NestedMatch for FOO($1, $2, ...) matches
+        # TODO: use CMatch for FOO($1, $2, ...) matches
         #
-        # it is better to also move those to the NestedMatch logic,
+        # it is better to also move those to the CMatch logic,
         # to ensure that parentheses will be properly matched.
         #
         (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
@@ -95,17 +96,17 @@ class CTransforms:
         (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)'=
, re.S), r'__u32 \1'),
         (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1;=
 u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
=20
-        (NestedMatch(r"__cond_acquires\s*\("), ""),
-        (NestedMatch(r"__cond_releases\s*\("), ""),
-        (NestedMatch(r"__acquires\s*\("), ""),
-        (NestedMatch(r"__releases\s*\("), ""),
-        (NestedMatch(r"__must_hold\s*\("), ""),
-        (NestedMatch(r"__must_not_hold\s*\("), ""),
-        (NestedMatch(r"__must_hold_shared\s*\("), ""),
-        (NestedMatch(r"__cond_acquires_shared\s*\("), ""),
-        (NestedMatch(r"__acquires_shared\s*\("), ""),
-        (NestedMatch(r"__releases_shared\s*\("), ""),
-        (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'),
+        (CMatch(r"__cond_acquires"), ""),
+        (CMatch(r"__cond_releases"), ""),
+        (CMatch(r"__acquires"), ""),
+        (CMatch(r"__releases"), ""),
+        (CMatch(r"__must_hold"), ""),
+        (CMatch(r"__must_not_hold"), ""),
+        (CMatch(r"__must_hold_shared"), ""),
+        (CMatch(r"__cond_acquires_shared"), ""),
+        (CMatch(r"__acquires_shared"), ""),
+        (CMatch(r"__releases_shared"), ""),
+        (CMatch(r"STRUCT_GROUP"), r'\0'),
     ]
=20
     #: Transforms for function prototypes.
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id E03C43AD509;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299554; cv=none;
 b=qiLrSxfvLiz2RPDTvKFU6ukyM6y5rdLlgxS7ePj67mGQk08SxgS4DNRNDwcw61tls3eoW001nSQV/V2b8o6juJXULaWtb/aRNMvHE6UUFOzPXzgjt1vtbdfrpXEOEZIqsUgUCUayj07Q4BcPGMsfuFPdBBHub7jVUVADLjmPMZ0=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299554; c=relaxed/simple;
	bh=H4IWlHfCRm/9kMM9kyWHhyYxEDQxSEyHhWFeVtOIZww=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=C7HN4hn27bs3SORpcaxHAUq+RN3/KoDbmS8JPJMs0E/XF6niXcOn70U/m/71bz782DLgJxJSS5JGSp6xTudGhMsvnSP1wHO+v3MNN6kLKlb7LGGlRyBxEEJtexXJU7ZtXw8hqQCE/bDWxzVOCXNli7B1znQnALF3q1XrEhLXd7w=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=l0WVh6Xw; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="l0WVh6Xw"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 70AD2C4AF0D;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=H4IWlHfCRm/9kMM9kyWHhyYxEDQxSEyHhWFeVtOIZww=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=l0WVh6Xw9sG8qxNXc+v9jsaSHIF7fB6437iMDyMuBJnDjV4leQS+ATrh5OOr4TpsH
	 tlnHzHE/6byWaGkHu5FoQDPciwg+n7sJWjBv+jsM2kQTR73OMyooA929fagR6gifds
	 M/NVejEcy0tE9270/OAI1LRijZvwSBBNKx7tLivXkLR4zTDspyopotFPmyRmCedh5O
	 FT5j360ZsFD9X55u06n5z5M+E2SLixFce1pWAXrGhk1gY0JrtNYYGJZ5Air59pFTLx
	 bAFGSjiURafoQ5lWAEwcr4ZAkyPup3420p5fVS4eQBTg26vSLj9jEdd787HZMDRe5B
	 Nzq9FNRzNNACQ==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077hN-31vY;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Aleksandr Loktionov <aleksandr.loktionov@intel.com>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Randy Dunlap <rdunlap@infradead.org>
Subject: [PATCH v2 16/20] docs: kdoc_re: get rid of NestedMatch class
Date: Thu, 12 Mar 2026 08:12:24 +0100
Message-ID: 
 <f1618cace1b0c67245d110e7eb6345a6259eebf9.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

Now that everything was converted to CMatch, we can get rid of
the previous NestedMatch implementation.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_re.py | 202 -------------------------------
 1 file changed, 202 deletions(-)

diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_=
re.py
index ba601a4f5035..6f3ae28859ea 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -140,205 +140,3 @@ class KernRe:
         """
=20
         return self.last_match.groups()
-
-
-#: Nested delimited pairs (brackets and parenthesis)
-DELIMITER_PAIRS =3D {
-    '{': '}',
-    '(': ')',
-    '[': ']',
-}
-
-#: compiled delimiters
-RE_DELIM =3D KernRe(r'[\{\}\[\]\(\)]')
-
-
-class NestedMatch:
-    """
-    Finding nested delimiters is hard with regular expressions. It is
-    even harder on Python with its normal re module, as there are several
-    advanced regular expressions that are missing.
-
-    This is the case of this pattern::
-
-            '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
-
-    which is used to properly match open/close parentheses of the
-    string search STRUCT_GROUP(),
-
-    Add a class that counts pairs of delimiters, using it to match and
-    replace nested expressions.
-
-    The original approach was suggested by:
-
-        https://stackoverflow.com/questions/5454322/python-how-to-match-ne=
sted-parentheses-with-regex
-
-    Although I re-implemented it to make it more generic and match 3 types
-    of delimiters. The logic checks if delimiters are paired. If not, it
-    will ignore the search string.
-    """
-
-    # TODO: make NestedMatch handle multiple match groups
-    #
-    # Right now, regular expressions to match it are defined only up to
-    #       the start delimiter, e.g.:
-    #
-    #       \bSTRUCT_GROUP\(
-    #
-    # is similar to: STRUCT_GROUP\((.*)\)
-    # except that the content inside the match group is delimiter-aligned.
-    #
-    # The content inside parentheses is converted into a single replace
-    # group (e.g. r`\0').
-    #
-    # It would be nice to change such definition to support multiple
-    # match groups, allowing a regex equivalent to:
-    #
-    #   FOO\((.*), (.*), (.*)\)
-    #
-    # it is probably easier to define it not as a regular expression, but
-    # with some lexical definition like:
-    #
-    #   FOO(arg1, arg2, arg3)
-
-    def __init__(self, regex):
-        self.regex =3D KernRe(regex)
-
-    def _search(self, line):
-        """
-        Finds paired blocks for a regex that ends with a delimiter.
-
-        The suggestion of using finditer to match pairs came from:
-        https://stackoverflow.com/questions/5454322/python-how-to-match-ne=
sted-parentheses-with-regex
-        but I ended using a different implementation to align all three ty=
pes
-        of delimiters and seek for an initial regular expression.
-
-        The algorithm seeks for open/close paired delimiters and places th=
em
-        into a stack, yielding a start/stop position of each match when the
-        stack is zeroed.
-
-        The algorithm should work fine for properly paired lines, but will
-        silently ignore end delimiters that precede a start delimiter.
-        This should be OK for kernel-doc parser, as unaligned delimiters
-        would cause compilation errors. So, we don't need to raise excepti=
ons
-        to cover such issues.
-        """
-
-        stack =3D []
-
-        for match_re in self.regex.finditer(line):
-            start =3D match_re.start()
-            offset =3D match_re.end()
-            string_char =3D None
-            escape =3D False
-
-            d =3D line[offset - 1]
-            if d not in DELIMITER_PAIRS:
-                continue
-
-            end =3D DELIMITER_PAIRS[d]
-            stack.append(end)
-
-            for match in RE_DELIM.finditer(line[offset:]):
-                pos =3D match.start() + offset
-
-                d =3D line[pos]
-
-                if escape:
-                    escape =3D False
-                    continue
-
-                if string_char:
-                    if d =3D=3D '\\':
-                        escape =3D True
-                    elif d =3D=3D string_char:
-                        string_char =3D None
-
-                    continue
-
-                if d in ('"', "'"):
-                    string_char =3D d
-                    continue
-
-                if d in DELIMITER_PAIRS:
-                    end =3D DELIMITER_PAIRS[d]
-
-                    stack.append(end)
-                    continue
-
-                # Does the end delimiter match what is expected?
-                if stack and d =3D=3D stack[-1]:
-                    stack.pop()
-
-                    if not stack:
-                        yield start, offset, pos + 1
-                        break
-
-    def search(self, line):
-        """
-        This is similar to re.search:
-
-        It matches a regex that it is followed by a delimiter,
-        returning occurrences only if all delimiters are paired.
-        """
-
-        for t in self._search(line):
-
-            yield line[t[0]:t[2]]
-
-    def sub(self, sub, line, count=3D0):
-        """
-        This is similar to re.sub:
-
-        It matches a regex that it is followed by a delimiter,
-        replacing occurrences only if all delimiters are paired.
-
-        if the sub argument contains::
-
-            r'\0'
-
-        it will work just like re: it places there the matched paired data
-        with the delimiter stripped.
-
-        If count is different than zero, it will replace at most count
-        items.
-        """
-        out =3D ""
-
-        cur_pos =3D 0
-        n =3D 0
-
-        for start, end, pos in self._search(line):
-            out +=3D line[cur_pos:start]
-
-            # Value, ignoring start/end delimiters
-            value =3D line[end:pos - 1]
-
-            # replaces \0 at the sub string, if \0 is used there
-            new_sub =3D sub
-            new_sub =3D new_sub.replace(r'\0', value)
-
-            out +=3D new_sub
-
-            # Drop end ';' if any
-            if pos < len(line) and line[pos] =3D=3D ';':
-                pos +=3D 1
-
-            cur_pos =3D pos
-            n +=3D 1
-
-            if count and count >=3D n:
-                break
-
-        # Append the remaining string
-        l =3D len(line)
-        out +=3D line[cur_pos:l]
-
-        return out
-
-    def __repr__(self):
-        """
-        Returns a displayable version of the class init.
-        """
-
-        return f'NestedMatch("{self.regex.regex.pattern}")'
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id E05343AD50A;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299554; cv=none;
 b=JH/9KkTrnUvU5ScuwCmOrQW2583oj2oVHhAaoBd7c7oC1QGyOke68pBt+zxuKD3OPPVSvUpc7z5/k9Qkw/CyUkVtOgGjGcsUaJ5AbtH82aR9TlltWGoTBwjfbLVhV3rbCZZM/9lxVqCHNbyMCKkxp/lBeG/pEa5tpv4Gz7cKl4s=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299554; c=relaxed/simple;
	bh=lfMdLsY4UUVbT4YZ8MjdKmp981RdmGtRwfnQ6yFTZLM=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=jiok3LeqTk5ZSuQlDk6jOZzqektyg5v3dO+QnDut4RYjUsJRQw5BCRH4esWoTUSQ252+SWmX8tIkzfJHkG/SG9Ly3ARqUzmkhOR5BovagG6300s4Tx42BoWW2KEcw/ZRILx48J6m8mZWddsf0+OlrI67+0re7DgQE1UR+gHy6Jc=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=n1VcblTy; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="n1VcblTy"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 760D5C2BCB8;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=lfMdLsY4UUVbT4YZ8MjdKmp981RdmGtRwfnQ6yFTZLM=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=n1VcblTyC48yx9a5iP049td4c9IN5TbKgp78zOdDdr+e4aID5hiWnOMcbgFKOy0q5
	 QTL0YhLKIJ69KdF4PU5DDAx7LV50f4r7ZzqOtARe2s3dJu0J7ohmeJBNCOR07ZQk5W
	 e1FxItXvp0mQji9wA6K3GtRsUA4Rs0h0xLmAyOws3AN5qSk0GYAzI7UHALYMzIhz4h
	 tSlGVWO3wwNp3xZwsqlC9spx0w/JC+cdBeemQ1279KJCOzacjoQzN8a5q4KdcZzL7X
	 F0YoB5zW5LlZiDHwxp3G8Tn5yj9HDM5+KK0yYFwJOGGf8EzVnwWtGm2oOHnQ31u13N
	 uLiMj1oqGOteg==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077hR-399Q;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Aleksandr Loktionov <aleksandr.loktionov@intel.com>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Randy Dunlap <rdunlap@infradead.org>
Subject: [PATCH v2 17/20] docs: xforms_lists: handle struct_group directly
Date: Thu, 12 Mar 2026 08:12:25 +0100
Message-ID: 
 <732fdf506327ffc183213e22ed618fb2e05e3fd1.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

The previous logic was handling struct_group on two steps.
Remove the previous approach, as CMatch can do it the right
way on a single step.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/xforms_lists.py | 53 +++------------------------
 1 file changed, 6 insertions(+), 47 deletions(-)

diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/=
xforms_lists.py
index 7fa7f52cec7b..98632c50a146 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -32,52 +32,6 @@ class CTransforms:
         (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
         (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
         (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
-        #
-        # Unwrap struct_group macros based on this definition:
-        # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
-        # which has variants like: struct_group(NAME, MEMBERS...)
-        # Only MEMBERS arguments require documentation.
-        #
-        # Parsing them happens on two steps:
-        #
-        # 1. drop struct group arguments that aren't at MEMBERS,
-        #    storing them as STRUCT_GROUP(MEMBERS)
-        #
-        # 2. remove STRUCT_GROUP() ancillary macro.
-        #
-        # The original logic used to remove STRUCT_GROUP() using an
-        # advanced regex:
-        #
-        #   \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
-        #
-        # with two patterns that are incompatible with
-        # Python re module, as it has:
-        #
-        #   - a recursive pattern: (?1)
-        #   - an atomic grouping: (?>...)
-        #
-        # I tried a simpler version: but it didn't work either:
-        #   \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
-        #
-        # As it doesn't properly match the end parenthesis on some cases.
-        #
-        # So, a better solution was crafted: there's now a CMatch
-        # class that ensures that delimiters after a search are properly
-        # matched. So, the implementation to drop STRUCT_GROUP() will be
-        # handled in separate.
-        #
-        (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
-        (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GR=
OUP('),
-        (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'st=
ruct \1 \2; STRUCT_GROUP('),
-        (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP=
('),
-        #
-        # Replace macros
-        #
-        # TODO: use CMatch for FOO($1, $2, ...) matches
-        #
-        # it is better to also move those to the CMatch logic,
-        # to ensure that parentheses will be properly matched.
-        #
         (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
         r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
         (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
@@ -106,7 +60,12 @@ class CTransforms:
         (CMatch(r"__cond_acquires_shared"), ""),
         (CMatch(r"__acquires_shared"), ""),
         (CMatch(r"__releases_shared"), ""),
-        (CMatch(r"STRUCT_GROUP"), r'\0'),
+
+        (CMatch('struct_group'), r'\2'),
+        (CMatch('struct_group_attr'), r'\3'),
+        (CMatch('struct_group_tagged'), r'struct \1 \2; \3'),
+        (CMatch('__struct_group'), r'\4'),
+
     ]
=20
     #: Transforms for function prototypes.
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id DF41B3AD508;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299553; cv=none;
 b=pX/PabCRf3yqsEOzk6Ug0ZILuODSk5yIcAVHilu2v7T4b7kkNB5GqcAWQG51t0306zpE6tM/PMSVi1pxayi+L5r4QcLXiY0OHW9lZYFuBLBG6U6dyeSJ+gTqviNR6kGlJx2Ygq8D8FEdV+6VCRIYMg3ekCFDzUmSKOTKFC5IEAw=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299553; c=relaxed/simple;
	bh=xYvjmm/ULlst4Zx2kl8I7sdtVc4O1AC5AGaMbPXe+bU=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=FZh1c2FuXSVi0x4gJPX9piliqsdl2PJzA2vAS+KSh5PbrVgYl//FQoAikqxFPhpxYYURdyShlWPmvpvp+OUgzjiVIHBS6B4T+Stf4pguMjhXYa/5uh8x3zj052SykrIaVsTr7ul+R2DbM4s8CegRc7axeBgD58Tm1wmz5M3ll5c=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=VZCwd75c; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="VZCwd75c"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 7323BC4AF0E;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=xYvjmm/ULlst4Zx2kl8I7sdtVc4O1AC5AGaMbPXe+bU=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=VZCwd75cklJHweNbgCPIRWxS16Tc7Zylq4VOQ3+NyqbaJO6zW52rgMkDfrtLQl11o
	 fcnPQ9gdWUQioELafgA6Nos9iTNTFBAZzS60OeGVQ7cs/ifahxnZQZJ/WVc0HWjxDQ
	 aY2Iva18OMHBo+mHCaUi+TtYZCjJ/Q4Rsnu/ZQwLQ7V9p9kwaWoyR9ThrfXNZqkEot
	 nwGVReJ0cZ73d7euGnoZG0jnL8sBVIV+B7OemfgQbE8z4NzlJsvzChZcAnXIR8MbVX
	 S7EKPLSeqxmpUkwodLeRcg12b2uAPXZW+XZnwiN/jlr6SqyWJL7swSUVTT7SYMYdAD
	 /eEPLgJJD3m3g==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077hV-3GQn;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Aleksandr Loktionov <aleksandr.loktionov@intel.com>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Randy Dunlap <rdunlap@infradead.org>
Subject: [PATCH v2 18/20] docs: xforms_lists: better evaluate struct_group
 macros
Date: Thu, 12 Mar 2026 08:12:26 +0100
Message-ID: 
 <a7b64b6ac6454bf16d5c66b2e22834c6ee875856.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

The previous approach were to unwind nested structs/unions.

Now that we have a logic that can handle it well, use it to
ensure that struct_group macros will properly reflect the
actual struct.

Note that the replacemend logic still simplifies the code
a little bit, as the basic build block for struct group is:

	union { \
		struct { MEMBERS } ATTRS; \
		struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \
	} ATTRS

There:

- ATTRS is meant to add extra macro attributes like __packed
  which we already discard, as they aren't relevant to
  document struct members;

- TAG is used only when built with __cplusplus.

So, instead, convert them into just:

    struct { MEMBERS };

Please notice that here, we're using the greedy version of the
backrefs, as MEMBERS is actually MEMBERS... on all such macros.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/xforms_lists.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/=
xforms_lists.py
index 98632c50a146..2056572852fd 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -61,10 +61,16 @@ class CTransforms:
         (CMatch(r"__acquires_shared"), ""),
         (CMatch(r"__releases_shared"), ""),
=20
-        (CMatch('struct_group'), r'\2'),
-        (CMatch('struct_group_attr'), r'\3'),
-        (CMatch('struct_group_tagged'), r'struct \1 \2; \3'),
-        (CMatch('__struct_group'), r'\4'),
+        #
+        # Macro __struct_group() creates an union with an anonymous
+        # and a non-anonymous struct, depending on the parameters. We only
+        # need one of those at kernel-doc, as we won't be documenting the =
same
+        # members twice.
+        #
+        (CMatch('struct_group'), r'struct { \2+ };'),
+        (CMatch('struct_group_attr'), r'struct { \3+ };'),
+        (CMatch('struct_group_tagged'), r'struct { \3+ };'),
+        (CMatch('__struct_group'), r'struct { \4+ };'),
=20
     ]
=20
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 09FF03AD52E;
	Thu, 12 Mar 2026 07:12:34 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299554; cv=none;
 b=p8hZnDeBUOd26FjigUrLH7xEudlRqmtHlK0CWtmcS7C3z4t21vqQR8Sqo1rCWZD2C+cuWbViTKASy8aU32g7t/lO+UyRoK467V7K+IFiwrTfZQOR1CFcrV/5ueisdZ2Rr/Q/jFqGT4mdIIMvQxucYKAWXK0dQ1eAMoQOpSC42cM=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299554; c=relaxed/simple;
	bh=0chB5Q8mELtk7nIsVOn7tboqMd7he9vt6/MdSJhiwBk=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=lpBiE6HN8ZgLEMErmEAF0eiP1Ipk5Lcu6pdyHoLLTm0jIz5IC7nHIZbWmBoMxeZ9UCsio/+N8mD6NOVS24ZAfMZaRzvUv/b5hSAjzoRH6avzc4GAPOLHfWU9r7b9euFG22esanSjegP32+qW6u3qOL8j157ZxqyRt+PZLlk4SAE=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=QBDauuTQ; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="QBDauuTQ"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 864C9C2BCFB;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=0chB5Q8mELtk7nIsVOn7tboqMd7he9vt6/MdSJhiwBk=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=QBDauuTQpfbr9GhrzAdhauPG6l+HPthWj/CXUp9OrfOuFGVedv4HECwHBgBWmCMsu
	 XHklR+CcGIu9o7II1aOfTmNcYTrrZpwtF7I283Zb3C+m51GTCKGKQdQeTdbF8E6kRp
	 x0h5a5rITAcFAf4wBNn3dz6JuvXWahc2irK75aTmSSXR+4M6J9LUoXnXtcs60AoOfC
	 LjGQLo1Fc6qPv9BOdr6X08Et0U54zmqFzNQTBDRoJtmlyX3uh2nXk0sh4O3K8x2dV4
	 1qd7jjulFJmID8wEXGntt/E/P88sVVuRmWx18eEc+dxO2TytRjElKHFSqcQGPOAe2Z
	 y4/wMwcqvHxtw==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077hZ-3Ngw;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Mauro Carvalho Chehab <mchehab@kernel.org>
Subject: [PATCH v2 19/20] docs: c_lex: add support to work with pure name ids
Date: Thu, 12 Mar 2026 08:12:27 +0100
Message-ID: 
 <8ad16ddc6d68d0cc09a16818b240df467a2c7d93.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

Most of CMatch complexity is due to the need of parse macros
with arguments. Still, it is easy enough to support also simple
name identifiers.

Add support for it, as it simplifies xforms logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/c_lex.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
index 98031cb7907c..689ad64ecbe4 100644
--- a/tools/lib/python/kdoc/c_lex.py
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -477,9 +477,17 @@ class CMatch:
=20
                 continue
=20
-            if not started and tok.kind =3D=3D CToken.BEGIN:
-                started =3D True
-                continue
+            if not started:
+                if tok.kind =3D=3D CToken.SPACE:
+                    continue
+
+                if tok.kind =3D=3D CToken.BEGIN:
+                    started =3D True
+                    continue
+                else:
+                    # Name only token without BEGIN/END
+                    yield start, i
+                    start =3D None
=20
             if tok.kind =3D=3D CToken.END and tok.level =3D=3D stack[-1][1=
]:
                 start, level =3D stack.pop()
--=20
2.53.0
From nobody Tue Apr  7 18:00:33 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0F3A43AD533;
	Thu, 12 Mar 2026 07:12:34 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773299554; cv=none;
 b=LvRIXTteIgiayG5nngQ49kC/Z8CkCHKH5CtylpDGCw6zhWrXL+e1nUsU1cpYwimRjdfzVFu8rv1Hz24ZjAouGd58Bsg6EbYCz16EX6MFYfPYmWwdRbjXHRFiLSiLFsVyfKsu+v1WH+Cy43Cw6nn+QyJcX/SEhc9wvg/6MARfYfs=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773299554; c=relaxed/simple;
	bh=VgpdCURNXribzlBJhLz5siGjBHNFlZu1TMJYSSZaQ28=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=WgCZnztRVs1iFty+wcf8vvf6uNCNfmFhNZiZvGdn9FpHAW7IGfnp8gah+SWi4iPlIiuFxE0PTQBJ83SRpjnZM1guFwEDsoLNx8LKykzBDwHF0KxoC1fMGkzyJk6/z6o2B/1+FP3ijYRywkNiKniZDc+yTG5UY1IT0bR2uOVoKQU=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=jlhxQubG; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="jlhxQubG"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 964E3C2BD05;
	Thu, 12 Mar 2026 07:12:33 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1773299553;
	bh=VgpdCURNXribzlBJhLz5siGjBHNFlZu1TMJYSSZaQ28=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=jlhxQubGEyCAqY4x/wuIED1hepP7i9MBpBPwqvemO6sYA3kjaULCAiYmlSbb3dYHl
	 7NHuHKwhRy17nvtVnMqijtrgNErG5vgs7u84iTuVq8zVToGl2pD8UGAo9cprSQQet4
	 7PUoqs3RtBp392CsSc7RCcIbeM4NZCCK2en95r7L2Klw16gkxMxkU50ZUKSvRwi8ZW
	 arDx6GjhIK8HACzJLPAtQynr66STkrD/E/S/eKaIezMfcyb7H0aB7UPZr9JLgLp0Pi
	 i/8bVLmHP2tsZPchHzfvSEODq7XBo82NUkv8jrLef1dgKWdTzqsIOw6Z+Svm7oZjlO
	 II+jcnpgUKQrw==
Received: from mchehab by mail.kernel.org with local (Exim 4.99.1)
	(envelope-from <mchehab+huawei@kernel.org>)
	id 1w0aDf-000000077hd-3V1B;
	Thu, 12 Mar 2026 08:12:31 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>,
	Kees Cook <kees@kernel.org>,
	Linux Doc Mailing List <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	linux-hardening@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	"Gustavo A. R. Silva" <gustavoars@kernel.org>,
	Aleksandr Loktionov <aleksandr.loktionov@intel.com>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Randy Dunlap <rdunlap@infradead.org>
Subject: [PATCH v2 20/20] docs: xforms_lists: use CMatch for all identifiers
Date: Thu, 12 Mar 2026 08:12:28 +0100
Message-ID: 
 <716c6dfa1d5d50c92ca14e9ecbd1ed9ebd07f052.1773297828.git.mchehab+huawei@kernel.org>
X-Mailer: git-send-email 2.53.0
In-Reply-To: <cover.1773297828.git.mchehab+huawei@kernel.org>
References: <cover.1773297828.git.mchehab+huawei@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Sender: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

CMatch is lexically correct and replaces only identifiers,
which is exactly where macro transformations happen.

Use it to make the output safer and ensure that all arguments
will be parsed the right way, even on complex cases.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/xforms_lists.py | 151 +++++++++++++-------------
 1 file changed, 78 insertions(+), 73 deletions(-)

diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/=
xforms_lists.py
index 2056572852fd..ebb4bf485c3a 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -18,48 +18,46 @@ class CTransforms:
=20
     #: Transforms for structs and unions.
     struct_xforms =3D [
-        # Strip attributes
-        (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=3Dre=
.I | re.S, cache=3DFalse), ' '),
-        (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
-        (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
-        (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
-        (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '),
-        (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '),
-        (KernRe(r'\s*__packed\s*', re.S), ' '),
-        (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
-        (KernRe(r'\s*__private', re.S), ' '),
-        (KernRe(r'\s*__rcu', re.S), ' '),
-        (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
-        (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
-        (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
-        (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
-        r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
-        (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
-        r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
-        (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + s=
truct_args_pattern + r'\)',
-                re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
-        (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' =
+ struct_args_pattern + r'\)',
-                re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
-        (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + st=
ruct_args_pattern +
-                r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
-        (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
-                struct_args_pattern + r'\)', re.S), r'\2 *\1'),
-        (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + =
r',\s*' +
-                struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
-        (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)=
', re.S), r'dma_addr_t \1'),
-        (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)'=
, re.S), r'__u32 \1'),
-        (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1;=
 u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
+        (CMatch("__attribute__"), ""),
+        (CMatch('__aligned'), ""),
+        (CMatch('__counted_by'), ""),
+        (CMatch('__counted_by_(le|be)'), ""),
+        (CMatch('__guarded_by'), ""),
+        (CMatch('__pt_guarded_by'), ""),
=20
-        (CMatch(r"__cond_acquires"), ""),
-        (CMatch(r"__cond_releases"), ""),
-        (CMatch(r"__acquires"), ""),
-        (CMatch(r"__releases"), ""),
-        (CMatch(r"__must_hold"), ""),
-        (CMatch(r"__must_not_hold"), ""),
-        (CMatch(r"__must_hold_shared"), ""),
-        (CMatch(r"__cond_acquires_shared"), ""),
-        (CMatch(r"__acquires_shared"), ""),
-        (CMatch(r"__releases_shared"), ""),
+        (CMatch('__packed'), ""),
+        (CMatch('CRYPTO_MINALIGN_ATTR'), ""),
+        (CMatch('__private'), ""),
+        (CMatch('__rcu'), ""),
+        (CMatch('____cacheline_aligned_in_smp'), ""),
+        (CMatch('____cacheline_aligned'), ""),
+
+        (CMatch('__cacheline_group_(?:begin|end)'), ""),
+        (CMatch('__ETHTOOL_DECLARE_LINK_MODE_MASK'),
+                r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
+        (CMatch('DECLARE_PHY_INTERFACE_MASK',),
+                r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
+        (CMatch('DECLARE_BITMAP'), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
+
+        (CMatch('DECLARE_HASHTABLE'), r'unsigned long \1[1 << ((\2) - 1)]'=
),
+        (CMatch('DECLARE_KFIFO'), r'\2 *\1'),
+        (CMatch('DECLARE_KFIFO_PTR'), r'\2 *\1'),
+        (CMatch('(?:__)?DECLARE_FLEX_ARRAY'), r'\1 \2[]'),
+        (CMatch('DEFINE_DMA_UNMAP_ADDR'), r'dma_addr_t \1'),
+        (CMatch('DEFINE_DMA_UNMAP_LEN'), r'__u32 \1'),
+        (CMatch('VIRTIO_DECLARE_FEATURES'), r'union { u64 \1; u64 \1_array=
[VIRTIO_FEATURES_U64S]; }'),
+
+        (CMatch("__cond_acquires"), ""),
+        (CMatch("__cond_releases"), ""),
+        (CMatch("__acquires"), ""),
+        (CMatch("__releases"), ""),
+        (CMatch("__must_hold"), ""),
+        (CMatch("__must_not_hold"), ""),
+        (CMatch("__must_hold_shared"), ""),
+        (CMatch("__cond_acquires_shared"), ""),
+        (CMatch("__acquires_shared"), ""),
+        (CMatch("__releases_shared"), ""),
+        (CMatch("__attribute__"), ""),
=20
         #
         # Macro __struct_group() creates an union with an anonymous
@@ -71,47 +69,54 @@ class CTransforms:
         (CMatch('struct_group_attr'), r'struct { \3+ };'),
         (CMatch('struct_group_tagged'), r'struct { \3+ };'),
         (CMatch('__struct_group'), r'struct { \4+ };'),
-
     ]
=20
     #: Transforms for function prototypes.
     function_xforms =3D [
-        (KernRe(r"^static +"), ""),
-        (KernRe(r"^extern +"), ""),
-        (KernRe(r"^asmlinkage +"), ""),
-        (KernRe(r"^inline +"), ""),
-        (KernRe(r"^__inline__ +"), ""),
-        (KernRe(r"^__inline +"), ""),
-        (KernRe(r"^__always_inline +"), ""),
-        (KernRe(r"^noinline +"), ""),
-        (KernRe(r"^__FORTIFY_INLINE +"), ""),
-        (KernRe(r"__init +"), ""),
-        (KernRe(r"__init_or_module +"), ""),
-        (KernRe(r"__exit +"), ""),
-        (KernRe(r"__deprecated +"), ""),
-        (KernRe(r"__flatten +"), ""),
-        (KernRe(r"__meminit +"), ""),
-        (KernRe(r"__must_check +"), ""),
-        (KernRe(r"__weak +"), ""),
-        (KernRe(r"__sched +"), ""),
+        (CMatch(r"static"), ""),
+        (CMatch(r"extern"), ""),
+        (CMatch(r"asmlinkage"), ""),
+        (CMatch(r"inline"), ""),
+        (CMatch(r"__inline__"), ""),
+        (CMatch(r"__inline"), ""),
+        (CMatch(r"__always_inline"), ""),
+        (CMatch(r"noinline"), ""),
+        (CMatch(r"__FORTIFY_INLINE"), ""),
+        (CMatch(r"__init"), ""),
+        (CMatch(r"__init_or_module"), ""),
+        (CMatch(r"__exit"), ""),
+        (CMatch(r"__deprecated"), ""),
+        (CMatch(r"__flatten"), ""),
+        (CMatch(r"__meminit"), ""),
+        (CMatch(r"__must_check"), ""),
+        (CMatch(r"__weak"), ""),
+        (CMatch(r"__sched"), ""),
+
+        #
+        # HACK: this is similar to process_export() hack. It is meant to
+        # drop _noproof from function name. See for instance:
+        # ahash_request_alloc kernel-doc declaration at include/crypto/has=
h.h.
+        #
         (KernRe(r"_noprof"), ""),
-        (KernRe(r"__always_unused *"), ""),
-        (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
-        (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), =
""),
-        (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
-        (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1=
, \2"),
-        (KernRe(r"__no_context_analysis\s*"), ""),
-        (KernRe(r"__attribute_const__ +"), ""),
-        (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\=
s+"), ""),
+
+        (CMatch(r"__always_unused"), ""),
+        (CMatch('__printf'), ""),
+        (CMatch('__(?:re)?alloc_size'), ""),
+        (CMatch("__diagnose_as"), ""),
+        (CMatch("DECL_BUCKET_PARAMS"), r"\1, \2"),
+        (CMatch(r"__no_context_analysis"), ""),
+        (CMatch(r"__attribute_const__"), ""),
+        (CMatch("__attribute__"), ""),
     ]
=20
     #: Transforms for variable prototypes.
     var_xforms =3D [
-        (KernRe(r"__read_mostly"), ""),
-        (KernRe(r"__ro_after_init"), ""),
-        (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""),
-        (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""),
-        (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"),
+        (CMatch(r"__read_mostly"), ""),
+        (CMatch(r"__ro_after_init"), ""),
+        (CMatch('__guarded_by'), ""),
+        (CMatch('__pt_guarded_by'), ""),
+        (CMatch("LIST_HEAD"), r"struct list_head \1"),
+
         (KernRe(r"(?://.*)$"), ""),
         (KernRe(r"(?:/\*.*\*/)"), ""),
         (KernRe(r";$"), ""),
--=20
2.53.0