From nobody Sun Oct 5 18:19:59 2025 Received: from ms.lwn.net (ms.lwn.net [45.79.88.28]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7A01D8F6F; Fri, 1 Aug 2025 00:13:43 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=45.79.88.28 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1754007226; cv=none; b=MCrpeVx99jkyMDh77SusxsxXvC8ZqF6jLhKf5h0+FrzmEFao/vQIJep7+dFXcK+xZ7+wDo4To2xSm8QKgRvuWyJL0KJbCtEXaW9qWmW3YXE0keLCsniuEir+pRh9t0J7KN7B2XENk5nIrT7wnFFFmTLPRs4feZfWUTkOyd4bNF8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1754007226; c=relaxed/simple; bh=QHY5CU7DPP4vg/fuxV4AfA5GvYFe49NXylWjF6I+0eU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=SMzw19pKepQhfGwQBYpNrDWKXhlCL4EI4iEy3GrBYTl/ZY7HASEKcouu0rV0eZgFZwELv5WTOldrEnD06cVcHAIbLjSMi6u9HBxfsbZ7MvmXaWdoJGe/bO2MZeH7YJL+6PCeD4zScN/ZIeU1GPM4KbCoPGj5tcS1qn3Mb55s0tY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=lwn.net; spf=pass smtp.mailfrom=lwn.net; dkim=pass (2048-bit key) header.d=lwn.net header.i=@lwn.net header.b=Af4DUZAQ; arc=none smtp.client-ip=45.79.88.28 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=lwn.net Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=lwn.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=lwn.net header.i=@lwn.net header.b="Af4DUZAQ" DKIM-Filter: OpenDKIM Filter v2.11.0 ms.lwn.net D1AAF40AE9 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=lwn.net; s=20201203; t=1754007217; bh=K1rLw1lGMZaw6VPlI0c2e+hxCM9FeMejujw6wy4PgsQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Af4DUZAQQJD/H6KDcjSoOXfM8IYABuJKpeIvQVC30XvH4ZO+GPPbnfsfEkFgbW100 qBOeY4Erya6pukLLBg2vYJKO1zkv3Petm2aEcMTtx2trDv/SkzQ2TPWnLgI3r0AWuK /rkwbk8nM0iNnmr/MigY4VyFLlIJH3tWDnak2A6MAZ5yUUDajc19GK4UC9xQ7tEBk/ 6iGD9WcueyPubcB9a5m0+DP31czCyXoAnyjdceXHlW9GiDV0wbwm5q05F8+14HWWvd 78CHobWIIMwCrcG0rbTMoawCplXB9rnHoRUkBMHZXTO2w3ac6ya+2VSuo/QtBAHyVE 7/VUh9ZvQHS2w== Received: from trenco.lwn.net (unknown [IPv6:2601:280:4600:2da9::1fe]) by ms.lwn.net (Postfix) with ESMTPA id D1AAF40AE9; Fri, 1 Aug 2025 00:13:36 +0000 (UTC) From: Jonathan Corbet To: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org, Mauro Carvalho Chehab , Akira Yokosawa , Jonathan Corbet Subject: [PATCH 04/12] docs: kdoc: move the prefix transforms out of dump_struct() Date: Thu, 31 Jul 2025 18:13:18 -0600 Message-ID: <20250801001326.924276-5-corbet@lwn.net> X-Mailer: git-send-email 2.50.1 In-Reply-To: <20250801001326.924276-1-corbet@lwn.net> References: <20250801001326.924276-1-corbet@lwn.net> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" dump_struct is one of the longest functions in the kdoc_parser class, making it hard to read and reason about. Move the definition of the prefix transformations out of the function, join them with the definition of "attribute" (which was defined at the top of the file but only used here), and reformat the code slightly for shorter line widths. Just code movement in the end. Signed-off-by: Jonathan Corbet Reviewed-by: Mauro Carvalho Chehab --- scripts/lib/kdoc/kdoc_parser.py | 178 +++++++++++++++++--------------- 1 file changed, 96 insertions(+), 82 deletions(-) diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser= .py index e1efa65a3480..5e375318df9c 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -54,8 +54,6 @@ doc_inline_start =3D KernRe(r'^\s*/\*\*\s*$', cache=3DFal= se) doc_inline_sect =3D KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=3DF= alse) doc_inline_end =3D KernRe(r'^\s*\*/\s*$', cache=3DFalse) doc_inline_oneline =3D KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$',= cache=3DFalse) -attribute =3D KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", - flags=3Dre.I | re.S, cache=3DFalse) =20 export_symbol =3D KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*',= cache=3DFalse) export_symbol_ns =3D KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,= \s*"\S+"\)\s*', cache=3DFalse) @@ -74,6 +72,97 @@ doc_begin_func =3D KernRe(str(doc_com) + # initial " *= ' r'(?:[-:].*)?$', # description (not captured) cache =3D False) =20 +# +# Here begins a long set of transformations to turn structure member prefi= xes +# and macro invocations into something we can parse and generate kdoc for. +# +struct_attribute =3D KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", + flags=3Dre.I | re.S, cache=3DFalse) +struct_args_pattern =3D r'([^,)]+)' + +struct_prefixes =3D [ + # Strip attributes + (struct_attribute, ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + # + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + # + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP(= '), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct= \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + # + # Replace macros + # + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, + # to ensure that parenthesis will be properly matched. + # + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^)]+)\)', re.S), + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^)]+)\)', re.S), + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struc= t_args_pattern + r'\)', + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + st= ruct_args_pattern + r'\)', + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct= _args_pattern + + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\= s*' + + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', r= e.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re= .S), r'__u32 \1'), +] +# +# Regexes here are guaranteed to have the end limiter matching +# the start delimiter. Yet, right now, only one replace group +# is allowed. +# +struct_nested_prefixes =3D [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), +] + + # # A little helper to get rid of excess white space # @@ -579,90 +668,15 @@ class KernelDoc: f"expecting prototype for {decl_type} {self.entr= y.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") return =20 - args_pattern =3D r'([^,)]+)' - - sub_prefixes =3D [ - # Strip attributes - (attribute, ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cas= es. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are proper= ly - # matched. So, the implementation to drop STRUCT_GROUP() will = be - # handled in separate. - - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP(= '), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUC= T_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), = r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_G= ROUP('), - - # Replace macros - # - # TODO: use NestedMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the NestedMatch logic, - # to ensure that parenthesis will be properly matched. - - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^)]+)\)', re.= S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^)]+)\)', re.S), r'= DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args= _pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + a= rgs_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_= pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + a= rgs_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',= \s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', = re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', r= e.S), r'__u32 \1'), - ] - - # Regexes here are guaranteed to have the end limiter matching - # the start delimiter. Yet, right now, only one replace group - # is allowed. - - sub_nested_prefixes =3D [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), - ] - + # + # Go through the list of members applying all of our transformatio= ns. + # members =3D trim_private_members(members) - for search, sub in sub_prefixes: + for search, sub in struct_prefixes: members =3D search.sub(sub, members) =20 nested =3D NestedMatch() - - for search, sub in sub_nested_prefixes: + for search, sub in struct_nested_prefixes: members =3D nested.sub(search, sub, members) =20 # Keeps the original declaration as-is --=20 2.50.1