From nobody Sat Apr 4 07:47:31 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3F0A736074D; Fri, 20 Mar 2026 09:46:46 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774000006; cv=none; b=pRaOcZTQid0G6P91zl0JGYjDtq/Q7uxypzQ013f5n3vuIrLogELboZtrzJWjXv2gljDO+iWp6MXy2Pg1eK4nZ7nbdItUxovMOpiONIfW1SLQWJa/5T7mtN6sOyFe8iFfrtViv9eRbmmMlPdbP6GJWedCHMP8GCuXsHk8sR2CMDU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774000006; c=relaxed/simple; bh=amOEbNCdxScY/BrrxmdqbgqqhNfYJZMIZiArzjmasRg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=mgFeLKwAYYYBWCSVzpuGdQ4MrgRHpEaS/Kl+fOEHqMdI+SZQtG+gu+WdTBnipRquTJj6u4Re3eSousyeKGzNxIdS6n3SnJbJ3pnK0iWR5z4fPjflU+4cTRW55eigRStsacR0vvsyGrVQoUFb7KY/H3izkUlPd9B2tlUJBeOoMYg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=JUXnj1KH; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="JUXnj1KH" Received: by smtp.kernel.org (Postfix) with ESMTPSA id E17F6C4CEF7; Fri, 20 Mar 2026 09:46:45 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1774000005; bh=amOEbNCdxScY/BrrxmdqbgqqhNfYJZMIZiArzjmasRg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=JUXnj1KHzN8u2IVYJIL8rwLY/7CQ/PnSU6dPJSbtb0eYQoKmGiOGPXbjQiC841054 wT6OYzBrzR82xx5LAxPGTy9poPVFm2JoWDEChoVgK+WquBhovVhCquhMO0DQAhH7pj vFxLNpfXX/hyawtmRFbdF0exg2RVkhYMfqnxHNTF2uk1XhF02bMpQIGYUo3PThMxTH QF2ZOxTzPtrepSvsfI0pns8yPa4b/eWBCeBxw94Euujab/F2QE3x6efI1uS5VZ4AyA j10wLwsdcLCI4r0VS6QFyfgUf+mKkyqb+mVhfsS2gNrY7uzUqR0Bm1ipBgVMpKMUkI 6kqmQCAnNelvA== Received: from mchehab by mail.kernel.org with local (Exim 4.99.1) (envelope-from ) id 1w3WRH-0000000Cbv8-3VfL; Fri, 20 Mar 2026 10:46:43 +0100 From: Mauro Carvalho Chehab To: Jonathan Corbet , Linux Doc Mailing List Cc: Mauro Carvalho Chehab , linux-kernel@vger.kernel.org, Mauro Carvalho Chehab Subject: [PATCH RFC 1/2] docs: kdoc: add a class to parse data items Date: Fri, 20 Mar 2026 10:46:40 +0100 Message-ID: <202163ad179e3a88b0a2c32e0bbb256a4d7cee8f.1773998596.git.mchehab+huawei@kernel.org> X-Mailer: git-send-email 2.53.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Sender: Mauro Carvalho Chehab Instead of using very complex regular expressions and hamming inner structs/unions, use CTokenizer to handle data types. It should be noticed that this doesn't handle "typedef". Signed-off-by: Mauro Carvalho Chehab --- tools/lib/python/kdoc/data_parser.py | 211 +++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 tools/lib/python/kdoc/data_parser.py diff --git a/tools/lib/python/kdoc/data_parser.py b/tools/lib/python/kdoc/d= ata_parser.py new file mode 100644 index 000000000000..f04915b67d6b --- /dev/null +++ b/tools/lib/python/kdoc/data_parser.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . + +""" +C lexical parser for variables. +""" + +import logging +import re + +from .c_lex import CTokenizer, CToken + +class CDataItem: + """ + Represent a data declaration. + """ + def __init__(self): + self.decl_name =3D None + self.decl_type =3D None + self.parameterlist =3D [] + self.parametertypes =3D {} + + def __repr__(self) -> str: + """ + Return contents of the CDataItem. + Useful for debugging purposes. + """ + return (f"CDataItem(decl_type=3D{self.decl_type!r}, " + f"decl_name=3D{self.decl_name!r}, " + f"parameterlist=3D{self.parameterlist!r}, " + f"parametertypes=3D{self.parametertypes!r})") + +class CDataParser: + """ + Handles a C data prototype, converting it into a data element + describing it. + """ + + IGNORE_TOKENS =3D [CToken.SPACE, CToken.COMMENT] + + def __init__(self, source): + self.source =3D source + self.item =3D CDataItem() + + self._parse() + + def _push_struct(self, tokens, stack, prev_kind, i): + """ + Handles Structs and enums, picking the identifier just after + ``struct`` or ``union``. + """ + + if prev_kind: + j =3D prev_kind + 1 + while j < len(tokens) and tokens[j].kind in self.IGNORE_TOKENS: + j +=3D 1 + + if j < len(tokens) and tokens[j].kind =3D=3D CToken.NAME: + stack.append(tokens[j].value) + return + + name =3D "{unnamed " + tokens[prev_kind].value + "}" + stack.append(name) + self.item.parameterlist.append(name) + return + + # + # Empty block. We still need to append for stack levels to match + # + stack.append(None) + + def _parse(self): + """ + Core algorithm it is a lightweight rewrite of the + walk-the-tokens logic we sketched in the previous answer. + """ + tokens =3D CTokenizer(self.source).tokens + + stack=3D [] + current_type =3D [] + parameters =3D [] + types =3D {} + + prev_kind =3D None + get_id =3D False + level =3D 0 + + for i in range(0, len(tokens)): + tok =3D tokens[i] + if tok.kind =3D=3D CToken.COMMENT: + continue + + if tok.kind in [CToken.STRUCT, CToken.UNION, CToken.ENUM]: + prev_kind =3D i + + if tok.kind =3D=3D CToken.BEGIN: + if tok.value =3D=3D "{": + if (prev_kind and + tokens[prev_kind].kind in [CToken.STRUCT, CToken.U= NION]): + + self._push_struct(tokens, stack, prev_kind, i) + if not self.item.decl_name: + self.item.decl_name =3D stack[0] + else: + stack.append(None) + + # + # Add previous tokens + # + if prev_kind: + get_id =3D True + + if not self.item.decl_type: + self.item.decl_type =3D tokens[prev_kind].value + + current_type =3D [] + + continue + + level +=3D 1 + + if tok.kind =3D=3D CToken.END: + if tok.value =3D=3D "}": + if stack: + stack.pop() + + if get_id and prev_kind: + current_type =3D [] + for j in range(prev_kind, i + 1): + current_type.append((level, tokens[j])) + if tok.kind =3D=3D CToken.BEGIN: + break + + while j < len(tokens): + if tokens[j].kind not in self.IGNORE_TOKENS: + break + j +=3D 1 + + name =3D None + + if tokens[j].kind =3D=3D CToken.NAME: + name =3D tokens[j].value + + if not self.item.decl_type and len(stack) =3D=3D = 1: + self.item.decl_name =3D stack[0] + + self.item.parameterlist.append(name) + current_type.append((level, tok)) + + get_id =3D False + prev_kind =3D None + continue + + level -=3D 1 + + if tok.kind !=3D CToken.ENDSTMT: + current_type.append((level, tok)) + continue + + # + # End of an statement. Parse it if tokens are present + # + + if not current_type: + current_type =3D [] + continue + + # + # the last NAME token with level 0 is the field name + # + name_token =3D None + for pos, t in enumerate(reversed(current_type)): + cur_level, cur_tok =3D t + if not cur_level and cur_tok.kind =3D=3D CToken.NAME: + name_token =3D cur_tok. value + break + + # + # TODO: we should likely emit a Warning here + # + + if not name_token: + current_type =3D [] + continue + + # + # As we used reversed, we need to adjust pos here + # + pos =3D len(current_type) - pos - 1 + + # + # For the type, pick everything but the name + # + + out =3D "" + for l, t in current_type: + out +=3D t.value + + names =3D [] + for n in stack[1:] + [name_token]: + if n: + if not "{unnamed" in n: + names.append(n) + + full_name =3D ".".join(names) + + self.item.parameterlist.append(full_name) + self.item.parametertypes[full_name] =3D out.strip() + + current_type =3D [] --=20 2.53.0 From nobody Sat Apr 4 07:47:31 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1B6DE35E956; Fri, 20 Mar 2026 09:46:46 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774000006; cv=none; b=CY2bne/5405oY9lhGx3bDH5h0fHn8hlaZohNt0Hi3bAHU+wKUDxaGt7nQ+w+8LghI7I8oMikM4by0fbS2p47gj2FQ/NesgVoCRcpKza4w01HE+XraArUnFt5IqF8dobk9igLh6G4sEZMWVuuwivkNeEh0Mm1ZlX3z0HqzWw7RUk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774000006; c=relaxed/simple; bh=oGBXFVA6b5hSd8r6F4eDGUdi4rjMPmXLArBN5DXSie4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=k1BBFynBXhDD5oIDA4jvVpFfLLTz0PYUP7CxYHEwJegH8mhr+KMB1W4qTWjIE/I8mcbWrzHdOyBzOJCGxBSDWWiKL+VrtJ7AvdP7kP3Hp9clLAePU/WjMkU4PeI25xC0XVSRfosWfbw5kzcnt5TuJB3pTT8oCB9rVZPt1XqV5HQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=PVSXhx3p; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="PVSXhx3p" Received: by smtp.kernel.org (Postfix) with ESMTPSA id E63C1C2BCB0; Fri, 20 Mar 2026 09:46:45 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1774000006; bh=oGBXFVA6b5hSd8r6F4eDGUdi4rjMPmXLArBN5DXSie4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=PVSXhx3pC9IZPX3P8nkd7JZx7x+PSPFda1Aagvkj7mjsUlsrwhG0DeT0qTYNh/h/g 0gxNpWTkD3I3OV5njMXksKqmDDxyioN2vVxWydj19iAA//qTnURb1+yL3PUrTblpb+ 3GeyZjV3G7sk4mW1yiLo+Fv48cupajDvB5yiep/slUZb/fI8AQV+oBNV/xDP2S0by2 k6c7zLXGHDH//zy0cGJZbdv4EttKJ319clTSd6NqVEcjp8ZrutJeMF3tyAT8M//ZCX aFDZq/frRtQpSwQlkASuFGCKrF0ynBd/l9vjrwbCOEE03Pm6kCP04/Qc1j6pqRV0D6 eLPzBaha7QIyg== Received: from mchehab by mail.kernel.org with local (Exim 4.99.1) (envelope-from ) id 1w3WRH-0000000CbvC-3cZJ; Fri, 20 Mar 2026 10:46:43 +0100 From: Mauro Carvalho Chehab To: Jonathan Corbet , Linux Doc Mailing List Cc: Mauro Carvalho Chehab , linux-kernel@vger.kernel.org, Mauro Carvalho Chehab Subject: [PATCH RFC 2/2] HACK: add a parse_c.py file to test CDataParser Date: Fri, 20 Mar 2026 10:46:41 +0100 Message-ID: <99bb5de72b3510f45c6aec7b505a7b48b060dbe3.1773998596.git.mchehab+huawei@kernel.org> X-Mailer: git-send-email 2.53.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Sender: Mauro Carvalho Chehab This patch should not be merged. It is a quick tool to test CDataParser. Signed-off-by: Mauro Carvalho Chehab --- parse_c.py | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100755 parse_c.py diff --git a/parse_c.py b/parse_c.py new file mode 100755 index 000000000000..740445998965 --- /dev/null +++ b/parse_c.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# example.py +""" +Run a quick demo on a real C source file. + +Usage +----- + python -m c_struct_parser.example +""" + +import argparse + +from tools.lib.python.kdoc.data_parser import CDataParser + +TEST =3D """ +struct property_entry { + const char *name; + size_t length; + bool is_inline; /* TEST */ + struct foo { + char *bar[12]; + struct foobar { + enum enum_type my_enum; /* TEST 2 */ + struct { + uint_t test; /* TEST 3 */ + static const int anonymous; + }; + }; + ;; /* This is valid, but should not occur in practice */ + {}; /* Same here */ + }; + enum dev_prop_type type; + enum { + EXPRESSION_LITERAL, + EXPRESSION_BINARY, + EXPRESSION_UNARY, + EXPRESSION_FUNCTION, + EXPRESSION_ARRAY + } literal; + + union { + const void *pointer; + union { + u8 boou8_data[sizeof(u64) / sizeof(u8)]; + u16 u16_data[sizeof(u64) / sizeof(u16)]; + u32 u32_data[sizeof(u64) / sizeof(u32)]; + u64 u64_data[sizeof(u64) / sizeof(u64)]; + const char *str[sizeof(u64) / sizeof(char *)]; + }; + }; + char *prop_name; +}; +""" + + +def main(): + p =3D argparse.ArgumentParser(description=3D"Parse a C struct/union/en= um definition.") + + p.add_argument("fname", nargs=3D"?", help=3D"C source file to parse") + + args =3D p.parse_args() + + if args.fname: + with open(args.fname, "r", encoding=3D"utf-8") as f: + source =3D f.read() + else: + source =3D TEST + + parser =3D CDataParser(source) + + item =3D parser.item + + print(repr(item)) + + print(f"{item.decl_type} {item.decl_name}\n") + + print("parameterlist:") + for p in item.parameterlist: + print(f" - {p}") + + print("\nparametertypes:") + for k, v in item.parametertypes.items(): + print(f" - {k}: {v}") + + +if __name__ =3D=3D "__main__": + main() --=20 2.53.0