From nobody Fri Dec 19 16:05:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C0BA4266579; Tue, 8 Apr 2025 10:09:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1744106999; cv=none; b=muu6dFYggnrbUWzJxmJfk0whiNquK+PEV8juBG0DM9eBNj4mjvyjsYvFgjnJmK7qA+WmQdiTpk6A7qxi8Syu285A7YmYXTNvzC0JBEkP9Zq8c8TYGkzy02/2ePoym5DlLxBIMkoRb72zEbQZ5ucFp9m+QMGXZJcg8mDyiNIrEKU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1744106999; c=relaxed/simple; bh=F8pKPq7VopAvbcnhMqVHrebIqO/8qwK6q2nGyxBl+cs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=UhQX3XbnxgcueiOFztSOFwwOH4ZZ+KEsH0V5/bMorG9Iotvrt1f30eXVWfHBX5sS6oLndclG03rRYumnLZzifuLJHaX+Aothc9ND2cBLO1RkFAPGt43gK3gF6LOv4RY2PvgLeGkHzrBo2XUGnizOZtdkTjRfLoniQ7Uf1Su/LVM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=B/jTytkN; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="B/jTytkN" Received: by smtp.kernel.org (Postfix) with ESMTPSA id D6F82C4AF09; Tue, 8 Apr 2025 10:09:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1744106999; bh=F8pKPq7VopAvbcnhMqVHrebIqO/8qwK6q2nGyxBl+cs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=B/jTytkN0Qnm8btEjNfbMOfM0qiQ1r8f/0y0AHyV6D7DofIQ1T6lyPXHJrN55V2jv tGToDO06vOtkysHB5pLL21YGmjn+zl5xs4vmdYyp4DTq9J0fdDCPbTaeCR0AK/jl0b +B0xVjydXBPYuj5DWmYQOEJelU9f2Jl+Q6L7ziVzCqCZTiuVe65im1RTUEXl2NXuI9 AeRTDmGunXWOeFQNslPEzz1tx/XszLp6xHv9VII+GnvxON4jyQcCFeC3uLQjZ2S/lc 3X3l9ppUv/u1tnpqci0EUgjiUmomzV1gd3LYOTjhy+eyDAFTHBPl+9ybexqXJtaqqg OjdaRTmZ4OYig== Received: from mchehab by mail.kernel.org with local (Exim 4.98.2) (envelope-from ) id 1u25tt-00000008RVd-0WIz; Tue, 08 Apr 2025 18:09:49 +0800 From: Mauro Carvalho Chehab To: Linux Doc Mailing List , Jonathan Corbet Cc: Mauro Carvalho Chehab , "Gustavo A. R. Silva" , Kees Cook , Sean Anderson , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 08/33] scripts/kernel-doc.py: move KernelDoc class to a separate file Date: Tue, 8 Apr 2025 18:09:11 +0800 Message-ID: X-Mailer: git-send-email 2.49.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Sender: Mauro Carvalho Chehab Content-Type: text/plain; charset="utf-8" In preparation for letting kerneldoc Sphinx extension to import Python libraries, move regex ancillary classes to a separate file. Signed-off-by: Mauro Carvalho Chehab --- scripts/kernel-doc.py | 1634 +----------------------------- scripts/lib/kdoc/kdoc_parser.py | 1690 +++++++++++++++++++++++++++++++ 2 files changed, 1692 insertions(+), 1632 deletions(-) create mode 100755 scripts/lib/kdoc/kdoc_parser.py diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index 7f00c8c86a78..f030a36a165b 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -117,53 +117,15 @@ SRC_DIR =3D os.path.dirname(os.path.realpath(__file__= )) =20 sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) =20 -from kdoc_re import Re, NestedMatch +from kdoc_parser import KernelDoc, type_param +from kdoc_re import Re =20 - -# -# Regular expressions used to parse kernel-doc markups at KernelDoc class. -# -# Let's declare them in lowercase outside any class to make easier to -# convert from the python script. -# -# As those are evaluated at the beginning, no need to cache them -# - - -# Allow whitespace at end of comment start. -doc_start =3D Re(r'^/\*\*\s*$', cache=3DFalse) - -doc_end =3D Re(r'\*/', cache=3DFalse) -doc_com =3D Re(r'\s*\*\s*', cache=3DFalse) -doc_com_body =3D Re(r'\s*\* ?', cache=3DFalse) -doc_decl =3D doc_com + Re(r'(\w+)', cache=3DFalse) - -# @params and a strictly limited set of supported section names -# Specifically: -# Match @word: -# @...: -# @{section-name}: -# while trying to not match literal block starts like "example::" -# -doc_sect =3D doc_com + \ - Re(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?= |examples?)\s*:([^:].*)?$', - flags=3Dre.I, cache=3DFalse) - -doc_content =3D doc_com_body + Re(r'(.*)', cache=3DFalse) -doc_block =3D doc_com + Re(r'DOC:\s*(.*)?', cache=3DFalse) -doc_inline_start =3D Re(r'^\s*/\*\*\s*$', cache=3DFalse) -doc_inline_sect =3D Re(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=3DFalse) -doc_inline_end =3D Re(r'^\s*\*/\s*$', cache=3DFalse) -doc_inline_oneline =3D Re(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cac= he=3DFalse) function_pointer =3D Re(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=3DFalse) -attribute =3D Re(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", - flags=3Dre.I | re.S, cache=3DFalse) =20 # match expressions used to find embedded type information type_constant =3D Re(r"\b``([^\`]+)``\b", cache=3DFalse) type_constant2 =3D Re(r"\%([-_*\w]+)", cache=3DFalse) type_func =3D Re(r"(\w+)\(\)", cache=3DFalse) -type_param =3D Re(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=3DFalse) type_param_ref =3D Re(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cac= he=3DFalse) =20 # Special RST handling for func ptr params @@ -181,1598 +143,6 @@ type_member =3D Re(r"\&([_\w]+)(\.|->)([_\w]+)", cac= he=3DFalse) type_fallback =3D Re(r"\&([_\w]+)", cache=3DFalse) type_member_func =3D type_member + Re(r"\(\)", cache=3DFalse) =20 -export_symbol =3D Re(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cac= he=3DFalse) -export_symbol_ns =3D Re(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"= \S+"\)\s*', cache=3DFalse) - -class KernelDoc: - # Parser states - STATE_NORMAL =3D 0 # normal code - STATE_NAME =3D 1 # looking for function name - STATE_BODY_MAYBE =3D 2 # body - or maybe more description - STATE_BODY =3D 3 # the body of the comment - STATE_BODY_WITH_BLANK_LINE =3D 4 # the body which has a blank line - STATE_PROTO =3D 5 # scanning prototype - STATE_DOCBLOCK =3D 6 # documentation block - STATE_INLINE =3D 7 # gathering doc outside main block - - st_name =3D [ - "NORMAL", - "NAME", - "BODY_MAYBE", - "BODY", - "BODY_WITH_BLANK_LINE", - "PROTO", - "DOCBLOCK", - "INLINE", - ] - - # Inline documentation state - STATE_INLINE_NA =3D 0 # not applicable ($state !=3D STATE_INLINE) - STATE_INLINE_NAME =3D 1 # looking for member name (@foo:) - STATE_INLINE_TEXT =3D 2 # looking for member documentation - STATE_INLINE_END =3D 3 # done - STATE_INLINE_ERROR =3D 4 # error - Comment without header was found. - # Spit a warning as it's not - # proper kernel-doc and ignore the rest. - - st_inline_name =3D [ - "", - "_NAME", - "_TEXT", - "_END", - "_ERROR", - ] - - # Section names - - section_default =3D "Description" # default section - section_intro =3D "Introduction" - section_context =3D "Context" - section_return =3D "Return" - - undescribed =3D "-- undescribed --" - - def __init__(self, config, fname): - """Initialize internal variables""" - - self.fname =3D fname - self.config =3D config - - # Initial state for the state machines - self.state =3D self.STATE_NORMAL - self.inline_doc_state =3D self.STATE_INLINE_NA - - # Store entry currently being processed - self.entry =3D None - - # Place all potential outputs into an array - self.entries =3D [] - - def show_warnings(self, dtype, declaration_name): - # TODO: implement it - - return True - - # TODO: rename to emit_message - def emit_warning(self, ln, msg, warning=3DTrue): - """Emit a message""" - - if warning: - self.config.log.warning("%s:%d %s", self.fname, ln, msg) - else: - self.config.log.info("%s:%d %s", self.fname, ln, msg) - - def dump_section(self, start_new=3DTrue): - """ - Dumps section contents to arrays/hashes intended for that purpose. - """ - - name =3D self.entry.section - contents =3D self.entry.contents - - # TODO: we can prevent dumping empty sections here with: - # - # if self.entry.contents.strip("\n"): - # if start_new: - # self.entry.section =3D self.section_default - # self.entry.contents =3D "" - # - # return - # - # But, as we want to be producing the same output of the - # venerable kernel-doc Perl tool, let's just output everything, - # at least for now - - if type_param.match(name): - name =3D type_param.group(1) - - self.entry.parameterdescs[name] =3D contents - self.entry.parameterdesc_start_lines[name] =3D self.entry.new_= start_line - - self.entry.sectcheck +=3D name + " " - self.entry.new_start_line =3D 0 - - elif name =3D=3D "@...": - name =3D "..." - self.entry.parameterdescs[name] =3D contents - self.entry.sectcheck +=3D name + " " - self.entry.parameterdesc_start_lines[name] =3D self.entry.new_= start_line - self.entry.new_start_line =3D 0 - - else: - if name in self.entry.sections and self.entry.sections[name] != =3D "": - # Only warn on user-specified duplicate section names - if name !=3D self.section_default: - self.emit_warning(self.entry.new_start_line, - f"duplicate section name '{name}'\n") - self.entry.sections[name] +=3D contents - else: - self.entry.sections[name] =3D contents - self.entry.sectionlist.append(name) - self.entry.section_start_lines[name] =3D self.entry.new_st= art_line - self.entry.new_start_line =3D 0 - -# self.config.log.debug("Section: %s : %s", name, pformat(vars(self= .entry))) - - if start_new: - self.entry.section =3D self.section_default - self.entry.contents =3D "" - - # TODO: rename it to store_declaration - def output_declaration(self, dtype, name, **args): - """ - Stores the entry into an entry array. - - The actual output and output filters will be handled elsewhere - """ - - # The implementation here is different than the original kernel-do= c: - # instead of checking for output filters or actually output anythi= ng, - # it just stores the declaration content at self.entries, as the - # output will happen on a separate class. - # - # For now, we're keeping the same name of the function just to make - # easier to compare the source code of both scripts - - if "declaration_start_line" not in args: - args["declaration_start_line"] =3D self.entry.declaration_star= t_line - - args["type"] =3D dtype - - # TODO: use colletions.OrderedDict - - sections =3D args.get('sections', {}) - sectionlist =3D args.get('sectionlist', []) - - # Drop empty sections - # TODO: improve it to emit warnings - for section in [ "Description", "Return" ]: - if section in sectionlist: - if not sections[section].rstrip(): - del sections[section] - sectionlist.remove(section) - - self.entries.append((name, args)) - - self.config.log.debug("Output: %s:%s =3D %s", dtype, name, pformat= (args)) - - def reset_state(self, ln): - """ - Ancillary routine to create a new entry. It initializes all - variables used by the state machine. - """ - - self.entry =3D argparse.Namespace - - self.entry.contents =3D "" - self.entry.function =3D "" - self.entry.sectcheck =3D "" - self.entry.struct_actual =3D "" - self.entry.prototype =3D "" - - self.entry.parameterlist =3D [] - self.entry.parameterdescs =3D {} - self.entry.parametertypes =3D {} - self.entry.parameterdesc_start_lines =3D {} - - self.entry.section_start_lines =3D {} - self.entry.sectionlist =3D [] - self.entry.sections =3D {} - - self.entry.anon_struct_union =3D False - - self.entry.leading_space =3D None - - # State flags - self.state =3D self.STATE_NORMAL - self.inline_doc_state =3D self.STATE_INLINE_NA - self.entry.brcount =3D 0 - - self.entry.in_doc_sect =3D False - self.entry.declaration_start_line =3D ln - - def push_parameter(self, ln, decl_type, param, dtype, - org_arg, declaration_name): - if self.entry.anon_struct_union and dtype =3D=3D "" and param =3D= =3D "}": - return # Ignore the ending }; from anonymous struct/union - - self.entry.anon_struct_union =3D False - - param =3D Re(r'[\[\)].*').sub('', param, count=3D1) - - if dtype =3D=3D "" and param.endswith("..."): - if Re(r'\w\.\.\.$').search(param): - # For named variable parameters of the form `x...`, - # remove the dots - param =3D param[:-3] - else: - # Handles unnamed variable parameters - param =3D "..." - - if param not in self.entry.parameterdescs or \ - not self.entry.parameterdescs[param]: - - self.entry.parameterdescs[param] =3D "variable arguments" - - elif dtype =3D=3D "" and (not param or param =3D=3D "void"): - param =3D "void" - self.entry.parameterdescs[param] =3D "no arguments" - - elif dtype =3D=3D "" and param in ["struct", "union"]: - # Handle unnamed (anonymous) union or struct - dtype =3D param - param =3D "{unnamed_" + param + "}" - self.entry.parameterdescs[param] =3D "anonymous\n" - self.entry.anon_struct_union =3D True - - # Handle cache group enforcing variables: they do not need - # to be described in header files - elif "__cacheline_group" in param: - # Ignore __cacheline_group_begin and __cacheline_group_end - return - - # Warn if parameter has no description - # (but ignore ones starting with # as these are not parameters - # but inline preprocessor statements) - if param not in self.entry.parameterdescs and not param.startswith= ("#"): - self.entry.parameterdescs[param] =3D self.undescribed - - if self.show_warnings(dtype, declaration_name) and "." not in = param: - if decl_type =3D=3D 'function': - dname =3D f"{decl_type} parameter" - else: - dname =3D f"{decl_type} member" - - self.emit_warning(ln, - f"{dname} '{param}' not described in '{d= eclaration_name}'") - - # Strip spaces from param so that it is one continuous string on - # parameterlist. This fixes a problem where check_sections() - # cannot find a parameter like "addr[6 + 2]" because it actually - # appears as "addr[6", "+", "2]" on the parameter list. - # However, it's better to maintain the param string unchanged for - # output, so just weaken the string compare in check_sections() - # to ignore "[blah" in a parameter string. - - self.entry.parameterlist.append(param) - org_arg =3D Re(r'\s\s+').sub(' ', org_arg) - self.entry.parametertypes[param] =3D org_arg - - def save_struct_actual(self, actual): - """ - Strip all spaces from the actual param so that it looks like - one string item. - """ - - actual =3D Re(r'\s*').sub("", actual, count=3D1) - - self.entry.struct_actual +=3D actual + " " - - def create_parameter_list(self, ln, decl_type, args, splitter, declara= tion_name): - - # temporarily replace all commas inside function pointer definition - arg_expr =3D Re(r'(\([^\),]+),') - while arg_expr.search(args): - args =3D arg_expr.sub(r"\1#", args) - - for arg in args.split(splitter): - # Strip comments - arg =3D Re(r'\/\*.*\*\/').sub('', arg) - - # Ignore argument attributes - arg =3D Re(r'\sPOS0?\s').sub(' ', arg) - - # Strip leading/trailing spaces - arg =3D arg.strip() - arg =3D Re(r'\s+').sub(' ', arg, count=3D1) - - if arg.startswith('#'): - # Treat preprocessor directive as a typeless variable just= to fill - # corresponding data structures "correctly". Catch it late= r in - # output_* subs. - - # Treat preprocessor directive as a typeless variable - self.push_parameter(ln, decl_type, arg, "", - "", declaration_name) - - elif Re(r'\(.+\)\s*\(').search(arg): - # Pointer-to-function - - arg =3D arg.replace('#', ',') - - r =3D Re(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') - if r.match(arg): - param =3D r.group(1) - else: - self.emit_warning(ln, f"Invalid param: {arg}") - param =3D arg - - dtype =3D Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r= '\1', arg) - self.save_struct_actual(param) - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) - - elif Re(r'\(.+\)\s*\[').search(arg): - # Array-of-pointers - - arg =3D arg.replace('#', ',') - r =3D Re(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+= \s*\]\s*)*\)') - if r.match(arg): - param =3D r.group(1) - else: - self.emit_warning(ln, f"Invalid param: {arg}") - param =3D arg - - dtype =3D Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r= '\1', arg) - - self.save_struct_actual(param) - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) - - elif arg: - arg =3D Re(r'\s*:\s*').sub(":", arg) - arg =3D Re(r'\s*\[').sub('[', arg) - - args =3D Re(r'\s*,\s*').split(arg) - if args[0] and '*' in args[0]: - args[0] =3D re.sub(r'(\*+)\s*', r' \1', args[0]) - - first_arg =3D [] - r =3D Re(r'^(.*\s+)(.*?\[.*\].*)$') - if args[0] and r.match(args[0]): - args.pop(0) - first_arg.extend(r.group(1)) - first_arg.append(r.group(2)) - else: - first_arg =3D Re(r'\s+').split(args.pop(0)) - - args.insert(0, first_arg.pop()) - dtype =3D ' '.join(first_arg) - - for param in args: - if Re(r'^(\*+)\s*(.*)').match(param): - r =3D Re(r'^(\*+)\s*(.*)') - if not r.match(param): - self.emit_warning(ln, f"Invalid param: {param}= ") - continue - - param =3D r.group(1) - - self.save_struct_actual(r.group(2)) - self.push_parameter(ln, decl_type, r.group(2), - f"{dtype} {r.group(1)}", - arg, declaration_name) - - elif Re(r'(.*?):(\w+)').search(param): - r =3D Re(r'(.*?):(\w+)') - if not r.match(param): - self.emit_warning(ln, f"Invalid param: {param}= ") - continue - - if dtype !=3D "": # Skip unnamed bit-fields - self.save_struct_actual(r.group(1)) - self.push_parameter(ln, decl_type, r.group(1), - f"{dtype}:{r.group(2)}", - arg, declaration_name) - else: - self.save_struct_actual(param) - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) - - def check_sections(self, ln, decl_name, decl_type, sectcheck, prmschec= k): - sects =3D sectcheck.split() - prms =3D prmscheck.split() - err =3D False - - for sx in range(len(sects)): # pylint: disable=3D= C0200 - err =3D True - for px in range(len(prms)): # pylint: disable=3D= C0200 - prm_clean =3D prms[px] - prm_clean =3D Re(r'\[.*\]').sub('', prm_clean) - prm_clean =3D attribute.sub('', prm_clean) - - # ignore array size in a parameter string; - # however, the original param string may contain - # spaces, e.g.: addr[6 + 2] - # and this appears in @prms as "addr[6" since the - # parameter list is split at spaces; - # hence just ignore "[..." for the sections check; - prm_clean =3D Re(r'\[.*').sub('', prm_clean) - - if prm_clean =3D=3D sects[sx]: - err =3D False - break - - if err: - if decl_type =3D=3D 'function': - dname =3D f"{decl_type} parameter" - else: - dname =3D f"{decl_type} member" - - self.emit_warning(ln, - f"Excess {dname} '{sects[sx]}' descripti= on in '{decl_name}'") - - def check_return_section(self, ln, declaration_name, return_type): - - if not self.config.wreturn: - return - - # Ignore an empty return type (It's a macro) - # Ignore functions with a "void" return type (but not "void *") - if not return_type or Re(r'void\s*\w*\s*$').search(return_type): - return - - if not self.entry.sections.get("Return", None): - self.emit_warning(ln, - f"No description found for return value of '= {declaration_name}'") - - def dump_struct(self, ln, proto): - """ - Store an entry for an struct or union - """ - - type_pattern =3D r'(struct|union)' - - qualifiers =3D [ - "__attribute__", - "__packed", - "__aligned", - "____cacheline_aligned_in_smp", - "____cacheline_aligned", - ] - - definition_body =3D r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) = + ")?" - struct_members =3D Re(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\= })([^\{\}\;]*)(\;)') - - # Extract struct/union definition - members =3D None - declaration_name =3D None - decl_type =3D None - - r =3D Re(type_pattern + r'\s+(\w+)\s*' + definition_body) - if r.search(proto): - decl_type =3D r.group(1) - declaration_name =3D r.group(2) - members =3D r.group(3) - else: - r =3D Re(r'typedef\s+' + type_pattern + r'\s*' + definition_bo= dy + r'\s*(\w+)\s*;') - - if r.search(proto): - decl_type =3D r.group(1) - declaration_name =3D r.group(3) - members =3D r.group(2) - - if not members: - self.emit_warning(ln, f"{proto} error: Cannot parse struct or = union!") - self.config.errors +=3D 1 - return - - if self.entry.identifier !=3D declaration_name: - self.emit_warning(ln, - f"expecting prototype for {decl_type} {self.= entry.identifier}. Prototype was for {decl_type} {declaration_name} instead= \n") - return - - args_pattern =3Dr'([^,)]+)' - - sub_prefixes =3D [ - (Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), = ''), - (Re(r'\/\*\s*private:.*', re.S| re.I), ''), - - # Strip comments - (Re(r'\/\*.*?\*\/', re.S), ''), - - # Strip attributes - (attribute, ' '), - (Re(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__packed\s*', re.S), ' '), - (Re(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (Re(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (Re(r'\s*____cacheline_aligned', re.S), ' '), - - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cas= es. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are proper= ly - # matched. So, the implementation to drop STRUCT_GROUP() will = be - # handled in separate. - - (Re(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (Re(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_G= ROUP('), - (Re(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r's= truct \1 \2; STRUCT_GROUP('), - (Re(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROU= P('), - - # Replace macros - # - # TODO: it is better to also move those to the NestedMatch log= ic, - # to ensure that parenthesis will be properly matched. - - (Re(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),= r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (Re(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DE= CLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (Re(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pat= tern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (Re(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_= pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (Re(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_patt= ern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (Re(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_= pattern + r'\)', re.S), r'\2 *\1'), - (Re(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*'= + args_pattern + r'\)', re.S), r'\1 \2[]'), - (Re(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S= ), r'dma_addr_t \1'), - (Re(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S)= , r'__u32 \1'), - ] - - # Regexes here are guaranteed to have the end limiter matching - # the start delimiter. Yet, right now, only one replace group - # is allowed. - - sub_nested_prefixes =3D [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), - ] - - for search, sub in sub_prefixes: - members =3D search.sub(sub, members) - - nested =3D NestedMatch() - - for search, sub in sub_nested_prefixes: - members =3D nested.sub(search, sub, members) - - # Keeps the original declaration as-is - declaration =3D members - - # Split nested struct/union elements - # - # This loop was simpler at the original kernel-doc perl version, as - # while ($members =3D~ m/$struct_members/) { ... } - # reads 'members' string on each interaction. - # - # Python behavior is different: it parses 'members' only once, - # creating a list of tuples from the first interaction. - # - # On other words, this won't get nested structs. - # - # So, we need to have an extra loop on Python to override such - # re limitation. - - while True: - tuples =3D struct_members.findall(members) - if not tuples: - break - - for t in tuples: - newmember =3D "" - maintype =3D t[0] - s_ids =3D t[5] - content =3D t[3] - - oldmember =3D "".join(t) - - for s_id in s_ids.split(','): - s_id =3D s_id.strip() - - newmember +=3D f"{maintype} {s_id}; " - s_id =3D Re(r'[:\[].*').sub('', s_id) - s_id =3D Re(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) - - for arg in content.split(';'): - arg =3D arg.strip() - - if not arg: - continue - - r =3D Re(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') - if r.match(arg): - # Pointer-to-function - dtype =3D r.group(1) - name =3D r.group(2) - extra =3D r.group(3) - - if not name: - continue - - if not s_id: - # Anonymous struct/union - newmember +=3D f"{dtype}{name}{extra}; " - else: - newmember +=3D f"{dtype}{s_id}.{name}{extr= a}; " - - else: - arg =3D arg.strip() - # Handle bitmaps - arg =3D Re(r':\s*\d+\s*').sub('', arg) - - # Handle arrays - arg =3D Re(r'\[.*\]').sub('', arg) - - # Handle multiple IDs - arg =3D Re(r'\s*,\s*').sub(',', arg) - - - r =3D Re(r'(.*)\s+([\S+,]+)') - - if r.search(arg): - dtype =3D r.group(1) - names =3D r.group(2) - else: - newmember +=3D f"{arg}; " - continue - - for name in names.split(','): - name =3D Re(r'^\s*\**(\S+)\s*').sub(r'\1',= name).strip() - - if not name: - continue - - if not s_id: - # Anonymous struct/union - newmember +=3D f"{dtype} {name}; " - else: - newmember +=3D f"{dtype} {s_id}.{name}= ; " - - members =3D members.replace(oldmember, newmember) - - # Ignore other nested elements, like enums - members =3D re.sub(r'(\{[^\{\}]*\})', '', members) - - self.create_parameter_list(ln, decl_type, members, ';', - declaration_name) - self.check_sections(ln, declaration_name, decl_type, - self.entry.sectcheck, self.entry.struct_actual) - - # Adjust declaration for better display - declaration =3D Re(r'([\{;])').sub(r'\1\n', declaration) - declaration =3D Re(r'\}\s+;').sub('};', declaration) - - # Better handle inlined enums - while True: - r =3D Re(r'(enum\s+\{[^\}]+),([^\n])') - if not r.search(declaration): - break - - declaration =3D r.sub(r'\1,\n\2', declaration) - - def_args =3D declaration.split('\n') - level =3D 1 - declaration =3D "" - for clause in def_args: - - clause =3D clause.strip() - clause =3D Re(r'\s+').sub(' ', clause, count=3D1) - - if not clause: - continue - - if '}' in clause and level > 1: - level -=3D 1 - - if not Re(r'^\s*#').match(clause): - declaration +=3D "\t" * level - - declaration +=3D "\t" + clause + "\n" - if "{" in clause and "}" not in clause: - level +=3D 1 - - self.output_declaration(decl_type, declaration_name, - struct=3Ddeclaration_name, - module=3Dself.entry.modulename, - definition=3Ddeclaration, - parameterlist=3Dself.entry.parameterlist, - parameterdescs=3Dself.entry.parameterdescs, - parametertypes=3Dself.entry.parametertypes, - sectionlist=3Dself.entry.sectionlist, - sections=3Dself.entry.sections, - purpose=3Dself.entry.declaration_purpose) - - def dump_enum(self, ln, proto): - - # Ignore members marked private - proto =3D Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=3Dr= e.S).sub('', proto) - proto =3D Re(r'\/\*\s*private:.*}', flags=3Dre.S).sub('}', proto) - - # Strip comments - proto =3D Re(r'\/\*.*?\*\/', flags=3Dre.S).sub('', proto) - - # Strip #define macros inside enums - proto =3D Re(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=3Dre= .S).sub('', proto) - - members =3D None - declaration_name =3D None - - r =3D Re(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') - if r.search(proto): - declaration_name =3D r.group(2) - members =3D r.group(1).rstrip() - else: - r =3D Re(r'enum\s+(\w*)\s*\{(.*)\}') - if r.match(proto): - declaration_name =3D r.group(1) - members =3D r.group(2).rstrip() - - if not members: - self.emit_warning(ln, f"{proto}: error: Cannot parse enum!") - self.config.errors +=3D 1 - return - - if self.entry.identifier !=3D declaration_name: - if self.entry.identifier =3D=3D "": - self.emit_warning(ln, - f"{proto}: wrong kernel-doc identifier o= n prototype") - else: - self.emit_warning(ln, - f"expecting prototype for enum {self.ent= ry.identifier}. Prototype was for enum {declaration_name} instead") - return - - if not declaration_name: - declaration_name =3D "(anonymous)" - - member_set =3D set() - - members =3D Re(r'\([^;]*?[\)]').sub('', members) - - for arg in members.split(','): - if not arg: - continue - arg =3D Re(r'^\s*(\w+).*').sub(r'\1', arg) - self.entry.parameterlist.append(arg) - if arg not in self.entry.parameterdescs: - self.entry.parameterdescs[arg] =3D self.undescribed - if self.show_warnings("enum", declaration_name): - self.emit_warning(ln, - f"Enum value '{arg}' not described i= n enum '{declaration_name}'") - member_set.add(arg) - - for k in self.entry.parameterdescs: - if k not in member_set: - if self.show_warnings("enum", declaration_name): - self.emit_warning(ln, - f"Excess enum value '%{k}' descripti= on in '{declaration_name}'") - - self.output_declaration('enum', declaration_name, - enum=3Ddeclaration_name, - module=3Dself.config.modulename, - parameterlist=3Dself.entry.parameterlist, - parameterdescs=3Dself.entry.parameterdescs, - sectionlist=3Dself.entry.sectionlist, - sections=3Dself.entry.sections, - purpose=3Dself.entry.declaration_purpose) - - def dump_declaration(self, ln, prototype): - if self.entry.decl_type =3D=3D "enum": - self.dump_enum(ln, prototype) - return - - if self.entry.decl_type =3D=3D "typedef": - self.dump_typedef(ln, prototype) - return - - if self.entry.decl_type in ["union", "struct"]: - self.dump_struct(ln, prototype) - return - - # TODO: handle other types - self.output_declaration(self.entry.decl_type, prototype, - entry=3Dself.entry) - - def dump_function(self, ln, prototype): - - func_macro =3D False - return_type =3D '' - decl_type =3D 'function' - - # Prefixes that would be removed - sub_prefixes =3D [ - (r"^static +", "", 0), - (r"^extern +", "", 0), - (r"^asmlinkage +", "", 0), - (r"^inline +", "", 0), - (r"^__inline__ +", "", 0), - (r"^__inline +", "", 0), - (r"^__always_inline +", "", 0), - (r"^noinline +", "", 0), - (r"^__FORTIFY_INLINE +", "", 0), - (r"__init +", "", 0), - (r"__init_or_module +", "", 0), - (r"__deprecated +", "", 0), - (r"__flatten +", "", 0), - (r"__meminit +", "", 0), - (r"__must_check +", "", 0), - (r"__weak +", "", 0), - (r"__sched +", "", 0), - (r"_noprof", "", 0), - (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), - (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", = 0), - (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), - (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2= ", 0), - (r"__attribute_const__ +", "", 0), - - # It seems that Python support for re.X is broken: - # At least for me (Python 3.13), this didn't work -# (r""" -# __attribute__\s*\(\( -# (?: -# [\w\s]+ # attribute name -# (?:\([^)]*\))? # attribute arguments -# \s*,? # optional comma at the end -# )+ -# \)\)\s+ -# """, "", re.X), - - # So, remove whitespaces and comments from it - (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"= , "", 0), - ] - - for search, sub, flags in sub_prefixes: - prototype =3D Re(search, flags).sub(sub, prototype) - - # Macros are a special case, as they change the prototype format - new_proto =3D Re(r"^#\s*define\s+").sub("", prototype) - if new_proto !=3D prototype: - is_define_proto =3D True - prototype =3D new_proto - else: - is_define_proto =3D False - - # Yes, this truly is vile. We are looking for: - # 1. Return type (may be nothing if we're looking at a macro) - # 2. Function name - # 3. Function parameters. - # - # All the while we have to watch out for function pointer paramete= rs - # (which IIRC is what the two sections are for), C types (these - # regexps don't even start to express all the possibilities), and - # so on. - # - # If you mess with these regexps, it's a good idea to check that - # the following functions' documentation still comes out right: - # - parport_register_device (function pointer parameters) - # - atomic_set (macro) - # - pci_match_device, __copy_to_user (long return type) - - name =3D r'[a-zA-Z0-9_~:]+' - prototype_end1 =3D r'[^\(]*' - prototype_end2 =3D r'[^\{]*' - prototype_end =3D fr'\(({prototype_end1}|{prototype_end2})\)' - - # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing gro= up. - # So, this needs to be mapped in Python with (?:...)? or (?:...)+ - - type1 =3D r'(?:[\w\s]+)?' - type2 =3D r'(?:[\w\s]+\*+)+' - - found =3D False - - if is_define_proto: - r =3D Re(r'^()(' + name + r')\s+') - - if r.search(prototype): - return_type =3D '' - declaration_name =3D r.group(2) - func_macro =3D True - - found =3D True - - if not found: - patterns =3D [ - rf'^()({name})\s*{prototype_end}', - rf'^({type1})\s+({name})\s*{prototype_end}', - rf'^({type2})\s*({name})\s*{prototype_end}', - ] - - for p in patterns: - r =3D Re(p) - - if r.match(prototype): - - return_type =3D r.group(1) - declaration_name =3D r.group(2) - args =3D r.group(3) - - self.create_parameter_list(ln, decl_type, args, ',', - declaration_name) - - found =3D True - break - if not found: - self.emit_warning(ln, - f"cannot understand function prototype: '{pr= ototype}'") - return - - if self.entry.identifier !=3D declaration_name: - self.emit_warning(ln, - f"expecting prototype for {self.entry.identi= fier}(). Prototype was for {declaration_name}() instead") - return - - prms =3D " ".join(self.entry.parameterlist) - self.check_sections(ln, declaration_name, "function", - self.entry.sectcheck, prms) - - self.check_return_section(ln, declaration_name, return_type) - - if 'typedef' in return_type: - self.output_declaration(decl_type, declaration_name, - function=3Ddeclaration_name, - typedef=3DTrue, - module=3Dself.config.modulename, - functiontype=3Dreturn_type, - parameterlist=3Dself.entry.parameterlist, - parameterdescs=3Dself.entry.parameterdescs, - parametertypes=3Dself.entry.parametertypes, - sectionlist=3Dself.entry.sectionlist, - sections=3Dself.entry.sections, - purpose=3Dself.entry.declaration_purpose, - func_macro=3Dfunc_macro) - else: - self.output_declaration(decl_type, declaration_name, - function=3Ddeclaration_name, - typedef=3DFalse, - module=3Dself.config.modulename, - functiontype=3Dreturn_type, - parameterlist=3Dself.entry.parameterlist, - parameterdescs=3Dself.entry.parameterdescs, - parametertypes=3Dself.entry.parametertypes, - sectionlist=3Dself.entry.sectionlist, - sections=3Dself.entry.sections, - purpose=3Dself.entry.declaration_purpose, - func_macro=3Dfunc_macro) - - def dump_typedef(self, ln, proto): - typedef_type =3D r'((?:\s+[\w\*]+\b){1,8})\s*' - typedef_ident =3D r'\*?\s*(\w\S+)\s*' - typedef_args =3D r'\s*\((.*)\);' - - typedef1 =3D Re(r'typedef' + typedef_type + r'\(' + typedef_ident = + r'\)' + typedef_args) - typedef2 =3D Re(r'typedef' + typedef_type + typedef_ident + typede= f_args) - - # Strip comments - proto =3D Re(r'/\*.*?\*/', flags=3Dre.S).sub('', proto) - - # Parse function typedef prototypes - for r in [typedef1, typedef2]: - if not r.match(proto): - continue - - return_type =3D r.group(1).strip() - declaration_name =3D r.group(2) - args =3D r.group(3) - - if self.entry.identifier !=3D declaration_name: - self.emit_warning(ln, - f"expecting prototype for typedef {self.= entry.identifier}. Prototype was for typedef {declaration_name} instead\n") - return - - decl_type =3D 'function' - self.create_parameter_list(ln, decl_type, args, ',', declarati= on_name) - - self.output_declaration(decl_type, declaration_name, - function=3Ddeclaration_name, - typedef=3DTrue, - module=3Dself.entry.modulename, - functiontype=3Dreturn_type, - parameterlist=3Dself.entry.parameterlist, - parameterdescs=3Dself.entry.parameterdescs, - parametertypes=3Dself.entry.parametertypes, - sectionlist=3Dself.entry.sectionlist, - sections=3Dself.entry.sections, - purpose=3Dself.entry.declaration_purpose) - return - - # Handle nested parentheses or brackets - r =3D Re(r'(\(*.\)\s*|\[*.\]\s*);$') - while r.search(proto): - proto =3D r.sub('', proto) - - # Parse simple typedefs - r =3D Re(r'typedef.*\s+(\w+)\s*;') - if r.match(proto): - declaration_name =3D r.group(1) - - if self.entry.identifier !=3D declaration_name: - self.emit_warning(ln, f"expecting prototype for typedef {s= elf.entry.identifier}. Prototype was for typedef {declaration_name} instead= \n") - return - - self.output_declaration('typedef', declaration_name, - typedef=3Ddeclaration_name, - module=3Dself.entry.modulename, - sectionlist=3Dself.entry.sectionlist, - sections=3Dself.entry.sections, - purpose=3Dself.entry.declaration_purpose) - return - - self.emit_warning(ln, "error: Cannot parse typedef!") - self.config.errors +=3D 1 - - @staticmethod - def process_export(function_table, line): - """ - process EXPORT_SYMBOL* tags - - This method is called both internally and externally, so, it - doesn't use self. - """ - - if export_symbol.search(line): - symbol =3D export_symbol.group(2) - function_table.add(symbol) - - if export_symbol_ns.search(line): - symbol =3D export_symbol_ns.group(2) - function_table.add(symbol) - - def process_normal(self, ln, line): - """ - STATE_NORMAL: looking for the /** to begin everything. - """ - - if not doc_start.match(line): - return - - # start a new entry - self.reset_state(ln + 1) - self.entry.in_doc_sect =3D False - - # next line is always the function name - self.state =3D self.STATE_NAME - - def process_name(self, ln, line): - """ - STATE_NAME: Looking for the "name - description" line - """ - - if doc_block.search(line): - self.entry.new_start_line =3D ln - - if not doc_block.group(1): - self.entry.section =3D self.section_intro - else: - self.entry.section =3D doc_block.group(1) - - self.state =3D self.STATE_DOCBLOCK - return - - if doc_decl.search(line): - self.entry.identifier =3D doc_decl.group(1) - self.entry.is_kernel_comment =3D False - - decl_start =3D str(doc_com) # comment block asterisk - fn_type =3D r"(?:\w+\s*\*\s*)?" # type (for non-functions) - parenthesis =3D r"(?:\(\w*\))?" # optional parenthesis on fu= nction - decl_end =3D r"(?:[-:].*)" # end of the name part - - # test for pointer declaration type, foo * bar() - desc - r =3D Re(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}= ?$") - if r.search(line): - self.entry.identifier =3D r.group(1) - - # Test for data declaration - r =3D Re(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") - if r.search(line): - self.entry.decl_type =3D r.group(1) - self.entry.identifier =3D r.group(2) - self.entry.is_kernel_comment =3D True - else: - # Look for foo() or static void foo() - description; - # or misspelt identifier - - r1 =3D Re(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s= *{decl_end}?$") - r2 =3D Re(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis= }\s*{decl_end}$") - - for r in [r1, r2]: - if r.search(line): - self.entry.identifier =3D r.group(1) - self.entry.decl_type =3D "function" - - r =3D Re(r"define\s+") - self.entry.identifier =3D r.sub("", self.entry.ide= ntifier) - self.entry.is_kernel_comment =3D True - break - - self.entry.identifier =3D self.entry.identifier.strip(" ") - - self.state =3D self.STATE_BODY - - # if there's no @param blocks need to set up default section h= ere - self.entry.section =3D self.section_default - self.entry.new_start_line =3D ln + 1 - - r =3D Re("[-:](.*)") - if r.search(line): - # strip leading/trailing/multiple spaces - self.entry.descr =3D r.group(1).strip(" ") - - r =3D Re(r"\s+") - self.entry.descr =3D r.sub(" ", self.entry.descr) - self.entry.declaration_purpose =3D self.entry.descr - self.state =3D self.STATE_BODY_MAYBE - else: - self.entry.declaration_purpose =3D "" - - if not self.entry.is_kernel_comment: - self.emit_warning(ln, - f"This comment starts with '/**', but is= n't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{li= ne}") - self.state =3D self.STATE_NORMAL - - if not self.entry.declaration_purpose and self.config.wshort_d= esc: - self.emit_warning(ln, - f"missing initial short description on l= ine:\n{line}") - - if not self.entry.identifier and self.entry.decl_type !=3D "en= um": - self.emit_warning(ln, - f"wrong kernel-doc identifier on line:\n= {line}") - self.state =3D self.STATE_NORMAL - - if self.config.verbose: - self.emit_warning(ln, - f"Scanning doc for {self.entry.decl_type= } {self.entry.identifier}", - warning=3DFalse) - - return - - # Failed to find an identifier. Emit a warning - self.emit_warning(ln, f"Cannot find identifier on line:\n{line}") - - def process_body(self, ln, line): - """ - STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. - """ - - if self.state =3D=3D self.STATE_BODY_WITH_BLANK_LINE: - r =3D Re(r"\s*\*\s?\S") - if r.match(line): - self.dump_section() - self.entry.section =3D self.section_default - self.entry.new_start_line =3D line - self.entry.contents =3D "" - - if doc_sect.search(line): - self.entry.in_doc_sect =3D True - newsection =3D doc_sect.group(1) - - if newsection.lower() in ["description", "context"]: - newsection =3D newsection.title() - - # Special case: @return is a section, not a param description - if newsection.lower() in ["@return", "@returns", - "return", "returns"]: - newsection =3D "Return" - - # Perl kernel-doc has a check here for contents before section= s. - # the logic there is always false, as in_doc_sect variable is - # always true. So, just don't implement Wcontents_before_secti= ons - - # .title() - newcontents =3D doc_sect.group(2) - if not newcontents: - newcontents =3D "" - - if self.entry.contents.strip("\n"): - self.dump_section() - - self.entry.new_start_line =3D ln - self.entry.section =3D newsection - self.entry.leading_space =3D None - - self.entry.contents =3D newcontents.lstrip() - if self.entry.contents: - self.entry.contents +=3D "\n" - - self.state =3D self.STATE_BODY - return - - if doc_end.search(line): - self.dump_section() - - # Look for doc_com + + doc_end: - r =3D Re(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') - if r.match(line): - self.emit_warning(ln, f"suspicious ending line: {line}") - - self.entry.prototype =3D "" - self.entry.new_start_line =3D ln + 1 - - self.state =3D self.STATE_PROTO - return - - if doc_content.search(line): - cont =3D doc_content.group(1) - - if cont =3D=3D "": - if self.entry.section =3D=3D self.section_context: - self.dump_section() - - self.entry.new_start_line =3D ln - self.state =3D self.STATE_BODY - else: - if self.entry.section !=3D self.section_default: - self.state =3D self.STATE_BODY_WITH_BLANK_LINE - else: - self.state =3D self.STATE_BODY - - self.entry.contents +=3D "\n" - - elif self.state =3D=3D self.STATE_BODY_MAYBE: - - # Continued declaration purpose - self.entry.declaration_purpose =3D self.entry.declaration_= purpose.rstrip() - self.entry.declaration_purpose +=3D " " + cont - - r =3D Re(r"\s+") - self.entry.declaration_purpose =3D r.sub(' ', - self.entry.declarat= ion_purpose) - - else: - if self.entry.section.startswith('@') or \ - self.entry.section =3D=3D self.section_context: - if self.entry.leading_space is None: - r =3D Re(r'^(\s+)') - if r.match(cont): - self.entry.leading_space =3D len(r.group(1)) - else: - self.entry.leading_space =3D 0 - - # Double-check if leading space are realy spaces - pos =3D 0 - for i in range(0, self.entry.leading_space): - if cont[i] !=3D " ": - break - pos +=3D 1 - - cont =3D cont[pos:] - - # NEW LOGIC: - # In case it is different, update it - if self.entry.leading_space !=3D pos: - self.entry.leading_space =3D pos - - self.entry.contents +=3D cont + "\n" - return - - # Unknown line, ignore - self.emit_warning(ln, f"bad line: {line}") - - def process_inline(self, ln, line): - """STATE_INLINE: docbook comments within a prototype.""" - - if self.inline_doc_state =3D=3D self.STATE_INLINE_NAME and \ - doc_inline_sect.search(line): - self.entry.section =3D doc_inline_sect.group(1) - self.entry.new_start_line =3D ln - - self.entry.contents =3D doc_inline_sect.group(2).lstrip() - if self.entry.contents !=3D "": - self.entry.contents +=3D "\n" - - self.inline_doc_state =3D self.STATE_INLINE_TEXT - # Documentation block end */ - return - - if doc_inline_end.search(line): - if self.entry.contents not in ["", "\n"]: - self.dump_section() - - self.state =3D self.STATE_PROTO - self.inline_doc_state =3D self.STATE_INLINE_NA - return - - if doc_content.search(line): - if self.inline_doc_state =3D=3D self.STATE_INLINE_TEXT: - self.entry.contents +=3D doc_content.group(1) + "\n" - if not self.entry.contents.strip(" ").rstrip("\n"): - self.entry.contents =3D "" - - elif self.inline_doc_state =3D=3D self.STATE_INLINE_NAME: - self.emit_warning(ln, - f"Incorrect use of kernel-doc format: {l= ine}") - - self.inline_doc_state =3D self.STATE_INLINE_ERROR - - def syscall_munge(self, ln, proto): - """ - Handle syscall definitions - """ - - is_void =3D False - - # Strip newlines/CR's - proto =3D re.sub(r'[\r\n]+', ' ', proto) - - # Check if it's a SYSCALL_DEFINE0 - if 'SYSCALL_DEFINE0' in proto: - is_void =3D True - - # Replace SYSCALL_DEFINE with correct return type & function name - proto =3D Re(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) - - r =3D Re(r'long\s+(sys_.*?),') - if r.search(proto): - proto =3D proto.replace(',', '(', count=3D1) - elif is_void: - proto =3D proto.replace(')', '(void)', count=3D1) - - # Now delete all of the odd-numbered commas in the proto - # so that argument types & names don't have a comma between them - count =3D 0 - length =3D len(proto) - - if is_void: - length =3D 0 # skip the loop if is_void - - for ix in range(length): - if proto[ix] =3D=3D ',': - count +=3D 1 - if count % 2 =3D=3D 1: - proto =3D proto[:ix] + ' ' + proto[ix+1:] - - return proto - - def tracepoint_munge(self, ln, proto): - """ - Handle tracepoint definitions - """ - - tracepointname =3D None - tracepointargs =3D None - - # Match tracepoint name based on different patterns - r =3D Re(r'TRACE_EVENT\((.*?),') - if r.search(proto): - tracepointname =3D r.group(1) - - r =3D Re(r'DEFINE_SINGLE_EVENT\((.*?),') - if r.search(proto): - tracepointname =3D r.group(1) - - r =3D Re(r'DEFINE_EVENT\((.*?),(.*?),') - if r.search(proto): - tracepointname =3D r.group(2) - - if tracepointname: - tracepointname =3D tracepointname.lstrip() - - r =3D Re(r'TP_PROTO\((.*?)\)') - if r.search(proto): - tracepointargs =3D r.group(1) - - if not tracepointname or not tracepointargs: - self.emit_warning(ln, - f"Unrecognized tracepoint format:\n{proto}\n= ") - else: - proto =3D f"static inline void trace_{tracepointname}({tracepo= intargs})" - self.entry.identifier =3D f"trace_{self.entry.identifier}" - - return proto - - def process_proto_function(self, ln, line): - """Ancillary routine to process a function prototype""" - - # strip C99-style comments to end of line - r =3D Re(r"\/\/.*$", re.S) - line =3D r.sub('', line) - - if Re(r'\s*#\s*define').match(line): - self.entry.prototype =3D line - elif line.startswith('#'): - # Strip other macros like #ifdef/#ifndef/#endif/... - pass - else: - r =3D Re(r'([^\{]*)') - if r.match(line): - self.entry.prototype +=3D r.group(1) + " " - - if '{' in line or ';' in line or Re(r'\s*#\s*define').match(line): - # strip comments - r =3D Re(r'/\*.*?\*/') - self.entry.prototype =3D r.sub('', self.entry.prototype) - - # strip newlines/cr's - r =3D Re(r'[\r\n]+') - self.entry.prototype =3D r.sub(' ', self.entry.prototype) - - # strip leading spaces - r =3D Re(r'^\s+') - self.entry.prototype =3D r.sub('', self.entry.prototype) - - # Handle self.entry.prototypes for function pointers like: - # int (*pcs_config)(struct foo) - - r =3D Re(r'^(\S+\s+)\(\s*\*(\S+)\)') - self.entry.prototype =3D r.sub(r'\1\2', self.entry.prototype) - - if 'SYSCALL_DEFINE' in self.entry.prototype: - self.entry.prototype =3D self.syscall_munge(ln, - self.entry.proto= type) - - r =3D Re(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') - if r.search(self.entry.prototype): - self.entry.prototype =3D self.tracepoint_munge(ln, - self.entry.pr= ototype) - - self.dump_function(ln, self.entry.prototype) - self.reset_state(ln) - - def process_proto_type(self, ln, line): - """Ancillary routine to process a type""" - - # Strip newlines/cr's. - line =3D Re(r'[\r\n]+', re.S).sub(' ', line) - - # Strip leading spaces - line =3D Re(r'^\s+', re.S).sub('', line) - - # Strip trailing spaces - line =3D Re(r'\s+$', re.S).sub('', line) - - # Strip C99-style comments to the end of the line - line =3D Re(r"\/\/.*$", re.S).sub('', line) - - # To distinguish preprocessor directive from regular declaration l= ater. - if line.startswith('#'): - line +=3D ";" - - r =3D Re(r'([^\{\};]*)([\{\};])(.*)') - while True: - if r.search(line): - if self.entry.prototype: - self.entry.prototype +=3D " " - self.entry.prototype +=3D r.group(1) + r.group(2) - - self.entry.brcount +=3D r.group(2).count('{') - self.entry.brcount -=3D r.group(2).count('}') - - self.entry.brcount =3D max(self.entry.brcount, 0) - - if r.group(2) =3D=3D ';' and self.entry.brcount =3D=3D 0: - self.dump_declaration(ln, self.entry.prototype) - self.reset_state(ln) - break - - line =3D r.group(3) - else: - self.entry.prototype +=3D line - break - - def process_proto(self, ln, line): - """STATE_PROTO: reading a function/whatever prototype.""" - - if doc_inline_oneline.search(line): - self.entry.section =3D doc_inline_oneline.group(1) - self.entry.contents =3D doc_inline_oneline.group(2) - - if self.entry.contents !=3D "": - self.entry.contents +=3D "\n" - self.dump_section(start_new=3DFalse) - - elif doc_inline_start.search(line): - self.state =3D self.STATE_INLINE - self.inline_doc_state =3D self.STATE_INLINE_NAME - - elif self.entry.decl_type =3D=3D 'function': - self.process_proto_function(ln, line) - - else: - self.process_proto_type(ln, line) - - def process_docblock(self, ln, line): - """STATE_DOCBLOCK: within a DOC: block.""" - - if doc_end.search(line): - self.dump_section() - self.output_declaration("doc", None, - sectionlist=3Dself.entry.sectionlist, - sections=3Dself.entry.sections, = module=3Dself.config.modulename) - self.reset_state(ln) - - elif doc_content.search(line): - self.entry.contents +=3D doc_content.group(1) + "\n" - - def run(self): - """ - Open and process each line of a C source file. - he parsing is controlled via a state machine, and the line is pass= ed - to a different process function depending on the state. The process - function may update the state as needed. - """ - - cont =3D False - prev =3D "" - prev_ln =3D None - - try: - with open(self.fname, "r", encoding=3D"utf8", - errors=3D"backslashreplace") as fp: - for ln, line in enumerate(fp): - - line =3D line.expandtabs().strip("\n") - - # Group continuation lines on prototypes - if self.state =3D=3D self.STATE_PROTO: - if line.endswith("\\"): - prev +=3D line.removesuffix("\\") - cont =3D True - - if not prev_ln: - prev_ln =3D ln - - continue - - if cont: - ln =3D prev_ln - line =3D prev + line - prev =3D "" - cont =3D False - prev_ln =3D None - - self.config.log.debug("%d %s%s: %s", - ln, self.st_name[self.state], - self.st_inline_name[self.inline_= doc_state], - line) - - # TODO: not all states allow EXPORT_SYMBOL*, so this - # can be optimized later on to speedup parsing - self.process_export(self.config.function_table, line) - - # Hand this line to the appropriate state handler - if self.state =3D=3D self.STATE_NORMAL: - self.process_normal(ln, line) - elif self.state =3D=3D self.STATE_NAME: - self.process_name(ln, line) - elif self.state in [self.STATE_BODY, self.STATE_BODY_M= AYBE, - self.STATE_BODY_WITH_BLANK_LINE]: - self.process_body(ln, line) - elif self.state =3D=3D self.STATE_INLINE: # scanning = for inline parameters - self.process_inline(ln, line) - elif self.state =3D=3D self.STATE_PROTO: - self.process_proto(ln, line) - elif self.state =3D=3D self.STATE_DOCBLOCK: - self.process_docblock(ln, line) - except OSError: - self.config.log.error(f"Error: Cannot open file {self.fname}") - self.config.errors +=3D 1 - - class GlobSourceFiles: """ Parse C source code file names and directories via an Interactor. diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser= .py new file mode 100755 index 000000000000..3ce116595546 --- /dev/null +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -0,0 +1,1690 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=3DC0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 + +""" +kdoc_parser +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + +Read a C language source or header FILE and extract embedded +documentation comments +""" + +import argparse +import re +from pprint import pformat + +from kdoc_re import NestedMatch, Re + + +# +# Regular expressions used to parse kernel-doc markups at KernelDoc class. +# +# Let's declare them in lowercase outside any class to make easier to +# convert from the python script. +# +# As those are evaluated at the beginning, no need to cache them +# + +# Allow whitespace at end of comment start. +doc_start =3D Re(r'^/\*\*\s*$', cache=3DFalse) + +doc_end =3D Re(r'\*/', cache=3DFalse) +doc_com =3D Re(r'\s*\*\s*', cache=3DFalse) +doc_com_body =3D Re(r'\s*\* ?', cache=3DFalse) +doc_decl =3D doc_com + Re(r'(\w+)', cache=3DFalse) + +# @params and a strictly limited set of supported section names +# Specifically: +# Match @word: +# @...: +# @{section-name}: +# while trying to not match literal block starts like "example::" +# +doc_sect =3D doc_com + \ + Re(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?= |examples?)\s*:([^:].*)?$', + flags=3Dre.I, cache=3DFalse) + +doc_content =3D doc_com_body + Re(r'(.*)', cache=3DFalse) +doc_block =3D doc_com + Re(r'DOC:\s*(.*)?', cache=3DFalse) +doc_inline_start =3D Re(r'^\s*/\*\*\s*$', cache=3DFalse) +doc_inline_sect =3D Re(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=3DFalse) +doc_inline_end =3D Re(r'^\s*\*/\s*$', cache=3DFalse) +doc_inline_oneline =3D Re(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cac= he=3DFalse) +attribute =3D Re(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", + flags=3Dre.I | re.S, cache=3DFalse) + +export_symbol =3D Re(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cac= he=3DFalse) +export_symbol_ns =3D Re(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"= \S+"\)\s*', cache=3DFalse) + +type_param =3D Re(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=3DFalse) + + +class KernelDoc: + """ + Read a C language source or header FILE and extract embedded + documentation comments. + """ + + # Parser states + STATE_NORMAL =3D 0 # normal code + STATE_NAME =3D 1 # looking for function name + STATE_BODY_MAYBE =3D 2 # body - or maybe more description + STATE_BODY =3D 3 # the body of the comment + STATE_BODY_WITH_BLANK_LINE =3D 4 # the body which has a blank line + STATE_PROTO =3D 5 # scanning prototype + STATE_DOCBLOCK =3D 6 # documentation block + STATE_INLINE =3D 7 # gathering doc outside main block + + st_name =3D [ + "NORMAL", + "NAME", + "BODY_MAYBE", + "BODY", + "BODY_WITH_BLANK_LINE", + "PROTO", + "DOCBLOCK", + "INLINE", + ] + + # Inline documentation state + STATE_INLINE_NA =3D 0 # not applicable ($state !=3D STATE_INLINE) + STATE_INLINE_NAME =3D 1 # looking for member name (@foo:) + STATE_INLINE_TEXT =3D 2 # looking for member documentation + STATE_INLINE_END =3D 3 # done + STATE_INLINE_ERROR =3D 4 # error - Comment without header was found. + # Spit a warning as it's not + # proper kernel-doc and ignore the rest. + + st_inline_name =3D [ + "", + "_NAME", + "_TEXT", + "_END", + "_ERROR", + ] + + # Section names + + section_default =3D "Description" # default section + section_intro =3D "Introduction" + section_context =3D "Context" + section_return =3D "Return" + + undescribed =3D "-- undescribed --" + + def __init__(self, config, fname): + """Initialize internal variables""" + + self.fname =3D fname + self.config =3D config + + # Initial state for the state machines + self.state =3D self.STATE_NORMAL + self.inline_doc_state =3D self.STATE_INLINE_NA + + # Store entry currently being processed + self.entry =3D None + + # Place all potential outputs into an array + self.entries =3D [] + + def show_warnings(self, dtype, declaration_name): # pylint: disable= =3DW0613 + """ + Allow filtering out warnings + """ + + # TODO: implement it + + return True + + # TODO: rename to emit_message + def emit_warning(self, ln, msg, warning=3DTrue): + """Emit a message""" + + if warning: + self.config.log.warning("%s:%d %s", self.fname, ln, msg) + else: + self.config.log.info("%s:%d %s", self.fname, ln, msg) + + def dump_section(self, start_new=3DTrue): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + + name =3D self.entry.section + contents =3D self.entry.contents + + # TODO: we can prevent dumping empty sections here with: + # + # if self.entry.contents.strip("\n"): + # if start_new: + # self.entry.section =3D self.section_default + # self.entry.contents =3D "" + # + # return + # + # But, as we want to be producing the same output of the + # venerable kernel-doc Perl tool, let's just output everything, + # at least for now + + if type_param.match(name): + name =3D type_param.group(1) + + self.entry.parameterdescs[name] =3D contents + self.entry.parameterdesc_start_lines[name] =3D self.entry.new_= start_line + + self.entry.sectcheck +=3D name + " " + self.entry.new_start_line =3D 0 + + elif name =3D=3D "@...": + name =3D "..." + self.entry.parameterdescs[name] =3D contents + self.entry.sectcheck +=3D name + " " + self.entry.parameterdesc_start_lines[name] =3D self.entry.new_= start_line + self.entry.new_start_line =3D 0 + + else: + if name in self.entry.sections and self.entry.sections[name] != =3D "": + # Only warn on user-specified duplicate section names + if name !=3D self.section_default: + self.emit_warning(self.entry.new_start_line, + f"duplicate section name '{name}'\n") + self.entry.sections[name] +=3D contents + else: + self.entry.sections[name] =3D contents + self.entry.sectionlist.append(name) + self.entry.section_start_lines[name] =3D self.entry.new_st= art_line + self.entry.new_start_line =3D 0 + +# self.config.log.debug("Section: %s : %s", name, pformat(vars(self= .entry))) + + if start_new: + self.entry.section =3D self.section_default + self.entry.contents =3D "" + + # TODO: rename it to store_declaration + def output_declaration(self, dtype, name, **args): + """ + Stores the entry into an entry array. + + The actual output and output filters will be handled elsewhere + """ + + # The implementation here is different than the original kernel-do= c: + # instead of checking for output filters or actually output anythi= ng, + # it just stores the declaration content at self.entries, as the + # output will happen on a separate class. + # + # For now, we're keeping the same name of the function just to make + # easier to compare the source code of both scripts + + if "declaration_start_line" not in args: + args["declaration_start_line"] =3D self.entry.declaration_star= t_line + + args["type"] =3D dtype + + # TODO: use colletions.OrderedDict + + sections =3D args.get('sections', {}) + sectionlist =3D args.get('sectionlist', []) + + # Drop empty sections + # TODO: improve it to emit warnings + for section in ["Description", "Return"]: + if section in sectionlist: + if not sections[section].rstrip(): + del sections[section] + sectionlist.remove(section) + + self.entries.append((name, args)) + + self.config.log.debug("Output: %s:%s =3D %s", dtype, name, pformat= (args)) + + def reset_state(self, ln): + """ + Ancillary routine to create a new entry. It initializes all + variables used by the state machine. + """ + + self.entry =3D argparse.Namespace + + self.entry.contents =3D "" + self.entry.function =3D "" + self.entry.sectcheck =3D "" + self.entry.struct_actual =3D "" + self.entry.prototype =3D "" + + self.entry.parameterlist =3D [] + self.entry.parameterdescs =3D {} + self.entry.parametertypes =3D {} + self.entry.parameterdesc_start_lines =3D {} + + self.entry.section_start_lines =3D {} + self.entry.sectionlist =3D [] + self.entry.sections =3D {} + + self.entry.anon_struct_union =3D False + + self.entry.leading_space =3D None + + # State flags + self.state =3D self.STATE_NORMAL + self.inline_doc_state =3D self.STATE_INLINE_NA + self.entry.brcount =3D 0 + + self.entry.in_doc_sect =3D False + self.entry.declaration_start_line =3D ln + + def push_parameter(self, ln, decl_type, param, dtype, + org_arg, declaration_name): + """ + Store parameters and their descriptions at self.entry. + """ + + if self.entry.anon_struct_union and dtype =3D=3D "" and param =3D= =3D "}": + return # Ignore the ending }; from anonymous struct/union + + self.entry.anon_struct_union =3D False + + param =3D Re(r'[\[\)].*').sub('', param, count=3D1) + + if dtype =3D=3D "" and param.endswith("..."): + if Re(r'\w\.\.\.$').search(param): + # For named variable parameters of the form `x...`, + # remove the dots + param =3D param[:-3] + else: + # Handles unnamed variable parameters + param =3D "..." + + if param not in self.entry.parameterdescs or \ + not self.entry.parameterdescs[param]: + + self.entry.parameterdescs[param] =3D "variable arguments" + + elif dtype =3D=3D "" and (not param or param =3D=3D "void"): + param =3D "void" + self.entry.parameterdescs[param] =3D "no arguments" + + elif dtype =3D=3D "" and param in ["struct", "union"]: + # Handle unnamed (anonymous) union or struct + dtype =3D param + param =3D "{unnamed_" + param + "}" + self.entry.parameterdescs[param] =3D "anonymous\n" + self.entry.anon_struct_union =3D True + + # Handle cache group enforcing variables: they do not need + # to be described in header files + elif "__cacheline_group" in param: + # Ignore __cacheline_group_begin and __cacheline_group_end + return + + # Warn if parameter has no description + # (but ignore ones starting with # as these are not parameters + # but inline preprocessor statements) + if param not in self.entry.parameterdescs and not param.startswith= ("#"): + self.entry.parameterdescs[param] =3D self.undescribed + + if self.show_warnings(dtype, declaration_name) and "." not in = param: + if decl_type =3D=3D 'function': + dname =3D f"{decl_type} parameter" + else: + dname =3D f"{decl_type} member" + + self.emit_warning(ln, + f"{dname} '{param}' not described in '{d= eclaration_name}'") + + # Strip spaces from param so that it is one continuous string on + # parameterlist. This fixes a problem where check_sections() + # cannot find a parameter like "addr[6 + 2]" because it actually + # appears as "addr[6", "+", "2]" on the parameter list. + # However, it's better to maintain the param string unchanged for + # output, so just weaken the string compare in check_sections() + # to ignore "[blah" in a parameter string. + + self.entry.parameterlist.append(param) + org_arg =3D Re(r'\s\s+').sub(' ', org_arg) + self.entry.parametertypes[param] =3D org_arg + + def save_struct_actual(self, actual): + """ + Strip all spaces from the actual param so that it looks like + one string item. + """ + + actual =3D Re(r'\s*').sub("", actual, count=3D1) + + self.entry.struct_actual +=3D actual + " " + + def create_parameter_list(self, ln, decl_type, args, + splitter, declaration_name): + """ + Creates a list of parameters, storing them at self.entry. + """ + + # temporarily replace all commas inside function pointer definition + arg_expr =3D Re(r'(\([^\),]+),') + while arg_expr.search(args): + args =3D arg_expr.sub(r"\1#", args) + + for arg in args.split(splitter): + # Strip comments + arg =3D Re(r'\/\*.*\*\/').sub('', arg) + + # Ignore argument attributes + arg =3D Re(r'\sPOS0?\s').sub(' ', arg) + + # Strip leading/trailing spaces + arg =3D arg.strip() + arg =3D Re(r'\s+').sub(' ', arg, count=3D1) + + if arg.startswith('#'): + # Treat preprocessor directive as a typeless variable just= to fill + # corresponding data structures "correctly". Catch it late= r in + # output_* subs. + + # Treat preprocessor directive as a typeless variable + self.push_parameter(ln, decl_type, arg, "", + "", declaration_name) + + elif Re(r'\(.+\)\s*\(').search(arg): + # Pointer-to-function + + arg =3D arg.replace('#', ',') + + r =3D Re(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') + if r.match(arg): + param =3D r.group(1) + else: + self.emit_warning(ln, f"Invalid param: {arg}") + param =3D arg + + dtype =3D Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r= '\1', arg) + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + elif Re(r'\(.+\)\s*\[').search(arg): + # Array-of-pointers + + arg =3D arg.replace('#', ',') + r =3D Re(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+= \s*\]\s*)*\)') + if r.match(arg): + param =3D r.group(1) + else: + self.emit_warning(ln, f"Invalid param: {arg}") + param =3D arg + + dtype =3D Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r= '\1', arg) + + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + elif arg: + arg =3D Re(r'\s*:\s*').sub(":", arg) + arg =3D Re(r'\s*\[').sub('[', arg) + + args =3D Re(r'\s*,\s*').split(arg) + if args[0] and '*' in args[0]: + args[0] =3D re.sub(r'(\*+)\s*', r' \1', args[0]) + + first_arg =3D [] + r =3D Re(r'^(.*\s+)(.*?\[.*\].*)$') + if args[0] and r.match(args[0]): + args.pop(0) + first_arg.extend(r.group(1)) + first_arg.append(r.group(2)) + else: + first_arg =3D Re(r'\s+').split(args.pop(0)) + + args.insert(0, first_arg.pop()) + dtype =3D ' '.join(first_arg) + + for param in args: + if Re(r'^(\*+)\s*(.*)').match(param): + r =3D Re(r'^(\*+)\s*(.*)') + if not r.match(param): + self.emit_warning(ln, f"Invalid param: {param}= ") + continue + + param =3D r.group(1) + + self.save_struct_actual(r.group(2)) + self.push_parameter(ln, decl_type, r.group(2), + f"{dtype} {r.group(1)}", + arg, declaration_name) + + elif Re(r'(.*?):(\w+)').search(param): + r =3D Re(r'(.*?):(\w+)') + if not r.match(param): + self.emit_warning(ln, f"Invalid param: {param}= ") + continue + + if dtype !=3D "": # Skip unnamed bit-fields + self.save_struct_actual(r.group(1)) + self.push_parameter(ln, decl_type, r.group(1), + f"{dtype}:{r.group(2)}", + arg, declaration_name) + else: + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + def check_sections(self, ln, decl_name, decl_type, sectcheck, prmschec= k): + """ + Check for errors inside sections, emitting warnings if not found + parameters are described. + """ + + sects =3D sectcheck.split() + prms =3D prmscheck.split() + err =3D False + + for sx in range(len(sects)): # pylint: disable=3D= C0200 + err =3D True + for px in range(len(prms)): # pylint: disable=3D= C0200 + prm_clean =3D prms[px] + prm_clean =3D Re(r'\[.*\]').sub('', prm_clean) + prm_clean =3D attribute.sub('', prm_clean) + + # ignore array size in a parameter string; + # however, the original param string may contain + # spaces, e.g.: addr[6 + 2] + # and this appears in @prms as "addr[6" since the + # parameter list is split at spaces; + # hence just ignore "[..." for the sections check; + prm_clean =3D Re(r'\[.*').sub('', prm_clean) + + if prm_clean =3D=3D sects[sx]: + err =3D False + break + + if err: + if decl_type =3D=3D 'function': + dname =3D f"{decl_type} parameter" + else: + dname =3D f"{decl_type} member" + + self.emit_warning(ln, + f"Excess {dname} '{sects[sx]}' descripti= on in '{decl_name}'") + + def check_return_section(self, ln, declaration_name, return_type): + """ + If the function doesn't return void, warns about the lack of a + return description. + """ + + if not self.config.wreturn: + return + + # Ignore an empty return type (It's a macro) + # Ignore functions with a "void" return type (but not "void *") + if not return_type or Re(r'void\s*\w*\s*$').search(return_type): + return + + if not self.entry.sections.get("Return", None): + self.emit_warning(ln, + f"No description found for return value of '= {declaration_name}'") + + def dump_struct(self, ln, proto): + """ + Store an entry for an struct or union + """ + + type_pattern =3D r'(struct|union)' + + qualifiers =3D [ + "__attribute__", + "__packed", + "__aligned", + "____cacheline_aligned_in_smp", + "____cacheline_aligned", + ] + + definition_body =3D r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) = + ")?" + struct_members =3D Re(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\= })([^\{\}\;]*)(\;)') + + # Extract struct/union definition + members =3D None + declaration_name =3D None + decl_type =3D None + + r =3D Re(type_pattern + r'\s+(\w+)\s*' + definition_body) + if r.search(proto): + decl_type =3D r.group(1) + declaration_name =3D r.group(2) + members =3D r.group(3) + else: + r =3D Re(r'typedef\s+' + type_pattern + r'\s*' + definition_bo= dy + r'\s*(\w+)\s*;') + + if r.search(proto): + decl_type =3D r.group(1) + declaration_name =3D r.group(3) + members =3D r.group(2) + + if not members: + self.emit_warning(ln, f"{proto} error: Cannot parse struct or = union!") + self.config.errors +=3D 1 + return + + if self.entry.identifier !=3D declaration_name: + self.emit_warning(ln, + f"expecting prototype for {decl_type} {self.= entry.identifier}. Prototype was for {decl_type} {declaration_name} instead= \n") + return + + args_pattern =3D r'([^,)]+)' + + sub_prefixes =3D [ + (Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), = ''), + (Re(r'\/\*\s*private:.*', re.S | re.I), ''), + + # Strip comments + (Re(r'\/\*.*?\*\/', re.S), ''), + + # Strip attributes + (attribute, ' '), + (Re(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (Re(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (Re(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (Re(r'\s*__packed\s*', re.S), ' '), + (Re(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (Re(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (Re(r'\s*____cacheline_aligned', re.S), ' '), + + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cas= es. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are proper= ly + # matched. So, the implementation to drop STRUCT_GROUP() will = be + # handled in separate. + + (Re(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (Re(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GR= OUP('), + (Re(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'st= ruct \1 \2; STRUCT_GROUP('), + (Re(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP= ('), + + # Replace macros + # + # TODO: it is better to also move those to the NestedMatch log= ic, + # to ensure that parenthesis will be properly matched. + + (Re(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),= r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (Re(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DEC= LARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (Re(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pat= tern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (Re(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_= pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (Re(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_patt= ern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (Re(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_= pattern + r'\)', re.S), r'\2 *\1'), + (Re(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*'= + args_pattern + r'\)', re.S), r'\1 \2[]'), + (Re(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S= ), r'dma_addr_t \1'), + (Re(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S)= , r'__u32 \1'), + ] + + # Regexes here are guaranteed to have the end limiter matching + # the start delimiter. Yet, right now, only one replace group + # is allowed. + + sub_nested_prefixes =3D [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), + ] + + for search, sub in sub_prefixes: + members =3D search.sub(sub, members) + + nested =3D NestedMatch() + + for search, sub in sub_nested_prefixes: + members =3D nested.sub(search, sub, members) + + # Keeps the original declaration as-is + declaration =3D members + + # Split nested struct/union elements + # + # This loop was simpler at the original kernel-doc perl version, as + # while ($members =3D~ m/$struct_members/) { ... } + # reads 'members' string on each interaction. + # + # Python behavior is different: it parses 'members' only once, + # creating a list of tuples from the first interaction. + # + # On other words, this won't get nested structs. + # + # So, we need to have an extra loop on Python to override such + # re limitation. + + while True: + tuples =3D struct_members.findall(members) + if not tuples: + break + + for t in tuples: + newmember =3D "" + maintype =3D t[0] + s_ids =3D t[5] + content =3D t[3] + + oldmember =3D "".join(t) + + for s_id in s_ids.split(','): + s_id =3D s_id.strip() + + newmember +=3D f"{maintype} {s_id}; " + s_id =3D Re(r'[:\[].*').sub('', s_id) + s_id =3D Re(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) + + for arg in content.split(';'): + arg =3D arg.strip() + + if not arg: + continue + + r =3D Re(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') + if r.match(arg): + # Pointer-to-function + dtype =3D r.group(1) + name =3D r.group(2) + extra =3D r.group(3) + + if not name: + continue + + if not s_id: + # Anonymous struct/union + newmember +=3D f"{dtype}{name}{extra}; " + else: + newmember +=3D f"{dtype}{s_id}.{name}{extr= a}; " + + else: + arg =3D arg.strip() + # Handle bitmaps + arg =3D Re(r':\s*\d+\s*').sub('', arg) + + # Handle arrays + arg =3D Re(r'\[.*\]').sub('', arg) + + # Handle multiple IDs + arg =3D Re(r'\s*,\s*').sub(',', arg) + + r =3D Re(r'(.*)\s+([\S+,]+)') + + if r.search(arg): + dtype =3D r.group(1) + names =3D r.group(2) + else: + newmember +=3D f"{arg}; " + continue + + for name in names.split(','): + name =3D Re(r'^\s*\**(\S+)\s*').sub(r'\1',= name).strip() + + if not name: + continue + + if not s_id: + # Anonymous struct/union + newmember +=3D f"{dtype} {name}; " + else: + newmember +=3D f"{dtype} {s_id}.{name}= ; " + + members =3D members.replace(oldmember, newmember) + + # Ignore other nested elements, like enums + members =3D re.sub(r'(\{[^\{\}]*\})', '', members) + + self.create_parameter_list(ln, decl_type, members, ';', + declaration_name) + self.check_sections(ln, declaration_name, decl_type, + self.entry.sectcheck, self.entry.struct_actual) + + # Adjust declaration for better display + declaration =3D Re(r'([\{;])').sub(r'\1\n', declaration) + declaration =3D Re(r'\}\s+;').sub('};', declaration) + + # Better handle inlined enums + while True: + r =3D Re(r'(enum\s+\{[^\}]+),([^\n])') + if not r.search(declaration): + break + + declaration =3D r.sub(r'\1,\n\2', declaration) + + def_args =3D declaration.split('\n') + level =3D 1 + declaration =3D "" + for clause in def_args: + + clause =3D clause.strip() + clause =3D Re(r'\s+').sub(' ', clause, count=3D1) + + if not clause: + continue + + if '}' in clause and level > 1: + level -=3D 1 + + if not Re(r'^\s*#').match(clause): + declaration +=3D "\t" * level + + declaration +=3D "\t" + clause + "\n" + if "{" in clause and "}" not in clause: + level +=3D 1 + + self.output_declaration(decl_type, declaration_name, + struct=3Ddeclaration_name, + module=3Dself.entry.modulename, + definition=3Ddeclaration, + parameterlist=3Dself.entry.parameterlist, + parameterdescs=3Dself.entry.parameterdescs, + parametertypes=3Dself.entry.parametertypes, + sectionlist=3Dself.entry.sectionlist, + sections=3Dself.entry.sections, + purpose=3Dself.entry.declaration_purpose) + + def dump_enum(self, ln, proto): + """ + Stores an enum inside self.entries array. + """ + + # Ignore members marked private + proto =3D Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=3Dr= e.S).sub('', proto) + proto =3D Re(r'\/\*\s*private:.*}', flags=3Dre.S).sub('}', proto) + + # Strip comments + proto =3D Re(r'\/\*.*?\*\/', flags=3Dre.S).sub('', proto) + + # Strip #define macros inside enums + proto =3D Re(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=3Dre= .S).sub('', proto) + + members =3D None + declaration_name =3D None + + r =3D Re(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') + if r.search(proto): + declaration_name =3D r.group(2) + members =3D r.group(1).rstrip() + else: + r =3D Re(r'enum\s+(\w*)\s*\{(.*)\}') + if r.match(proto): + declaration_name =3D r.group(1) + members =3D r.group(2).rstrip() + + if not members: + self.emit_warning(ln, f"{proto}: error: Cannot parse enum!") + self.config.errors +=3D 1 + return + + if self.entry.identifier !=3D declaration_name: + if self.entry.identifier =3D=3D "": + self.emit_warning(ln, + f"{proto}: wrong kernel-doc identifier o= n prototype") + else: + self.emit_warning(ln, + f"expecting prototype for enum {self.ent= ry.identifier}. Prototype was for enum {declaration_name} instead") + return + + if not declaration_name: + declaration_name =3D "(anonymous)" + + member_set =3D set() + + members =3D Re(r'\([^;]*?[\)]').sub('', members) + + for arg in members.split(','): + if not arg: + continue + arg =3D Re(r'^\s*(\w+).*').sub(r'\1', arg) + self.entry.parameterlist.append(arg) + if arg not in self.entry.parameterdescs: + self.entry.parameterdescs[arg] =3D self.undescribed + if self.show_warnings("enum", declaration_name): + self.emit_warning(ln, + f"Enum value '{arg}' not described i= n enum '{declaration_name}'") + member_set.add(arg) + + for k in self.entry.parameterdescs: + if k not in member_set: + if self.show_warnings("enum", declaration_name): + self.emit_warning(ln, + f"Excess enum value '%{k}' descripti= on in '{declaration_name}'") + + self.output_declaration('enum', declaration_name, + enum=3Ddeclaration_name, + module=3Dself.config.modulename, + parameterlist=3Dself.entry.parameterlist, + parameterdescs=3Dself.entry.parameterdescs, + sectionlist=3Dself.entry.sectionlist, + sections=3Dself.entry.sections, + purpose=3Dself.entry.declaration_purpose) + + def dump_declaration(self, ln, prototype): + """ + Stores a data declaration inside self.entries array. + """ + + if self.entry.decl_type =3D=3D "enum": + self.dump_enum(ln, prototype) + return + + if self.entry.decl_type =3D=3D "typedef": + self.dump_typedef(ln, prototype) + return + + if self.entry.decl_type in ["union", "struct"]: + self.dump_struct(ln, prototype) + return + + # TODO: handle other types + self.output_declaration(self.entry.decl_type, prototype, + entry=3Dself.entry) + + def dump_function(self, ln, prototype): + """ + Stores a function of function macro inside self.entries array. + """ + + func_macro =3D False + return_type =3D '' + decl_type =3D 'function' + + # Prefixes that would be removed + sub_prefixes =3D [ + (r"^static +", "", 0), + (r"^extern +", "", 0), + (r"^asmlinkage +", "", 0), + (r"^inline +", "", 0), + (r"^__inline__ +", "", 0), + (r"^__inline +", "", 0), + (r"^__always_inline +", "", 0), + (r"^noinline +", "", 0), + (r"^__FORTIFY_INLINE +", "", 0), + (r"__init +", "", 0), + (r"__init_or_module +", "", 0), + (r"__deprecated +", "", 0), + (r"__flatten +", "", 0), + (r"__meminit +", "", 0), + (r"__must_check +", "", 0), + (r"__weak +", "", 0), + (r"__sched +", "", 0), + (r"_noprof", "", 0), + (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), + (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", = 0), + (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), + (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2= ", 0), + (r"__attribute_const__ +", "", 0), + + # It seems that Python support for re.X is broken: + # At least for me (Python 3.13), this didn't work +# (r""" +# __attribute__\s*\(\( +# (?: +# [\w\s]+ # attribute name +# (?:\([^)]*\))? # attribute arguments +# \s*,? # optional comma at the end +# )+ +# \)\)\s+ +# """, "", re.X), + + # So, remove whitespaces and comments from it + (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"= , "", 0), + ] + + for search, sub, flags in sub_prefixes: + prototype =3D Re(search, flags).sub(sub, prototype) + + # Macros are a special case, as they change the prototype format + new_proto =3D Re(r"^#\s*define\s+").sub("", prototype) + if new_proto !=3D prototype: + is_define_proto =3D True + prototype =3D new_proto + else: + is_define_proto =3D False + + # Yes, this truly is vile. We are looking for: + # 1. Return type (may be nothing if we're looking at a macro) + # 2. Function name + # 3. Function parameters. + # + # All the while we have to watch out for function pointer paramete= rs + # (which IIRC is what the two sections are for), C types (these + # regexps don't even start to express all the possibilities), and + # so on. + # + # If you mess with these regexps, it's a good idea to check that + # the following functions' documentation still comes out right: + # - parport_register_device (function pointer parameters) + # - atomic_set (macro) + # - pci_match_device, __copy_to_user (long return type) + + name =3D r'[a-zA-Z0-9_~:]+' + prototype_end1 =3D r'[^\(]*' + prototype_end2 =3D r'[^\{]*' + prototype_end =3D fr'\(({prototype_end1}|{prototype_end2})\)' + + # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing gro= up. + # So, this needs to be mapped in Python with (?:...)? or (?:...)+ + + type1 =3D r'(?:[\w\s]+)?' + type2 =3D r'(?:[\w\s]+\*+)+' + + found =3D False + + if is_define_proto: + r =3D Re(r'^()(' + name + r')\s+') + + if r.search(prototype): + return_type =3D '' + declaration_name =3D r.group(2) + func_macro =3D True + + found =3D True + + if not found: + patterns =3D [ + rf'^()({name})\s*{prototype_end}', + rf'^({type1})\s+({name})\s*{prototype_end}', + rf'^({type2})\s*({name})\s*{prototype_end}', + ] + + for p in patterns: + r =3D Re(p) + + if r.match(prototype): + + return_type =3D r.group(1) + declaration_name =3D r.group(2) + args =3D r.group(3) + + self.create_parameter_list(ln, decl_type, args, ',', + declaration_name) + + found =3D True + break + if not found: + self.emit_warning(ln, + f"cannot understand function prototype: '{pr= ototype}'") + return + + if self.entry.identifier !=3D declaration_name: + self.emit_warning(ln, + f"expecting prototype for {self.entry.identi= fier}(). Prototype was for {declaration_name}() instead") + return + + prms =3D " ".join(self.entry.parameterlist) + self.check_sections(ln, declaration_name, "function", + self.entry.sectcheck, prms) + + self.check_return_section(ln, declaration_name, return_type) + + if 'typedef' in return_type: + self.output_declaration(decl_type, declaration_name, + function=3Ddeclaration_name, + typedef=3DTrue, + module=3Dself.config.modulename, + functiontype=3Dreturn_type, + parameterlist=3Dself.entry.parameterli= st, + parameterdescs=3Dself.entry.parameterd= escs, + parametertypes=3Dself.entry.parametert= ypes, + sectionlist=3Dself.entry.sectionlist, + sections=3Dself.entry.sections, + purpose=3Dself.entry.declaration_purpo= se, + func_macro=3Dfunc_macro) + else: + self.output_declaration(decl_type, declaration_name, + function=3Ddeclaration_name, + typedef=3DFalse, + module=3Dself.config.modulename, + functiontype=3Dreturn_type, + parameterlist=3Dself.entry.parameterli= st, + parameterdescs=3Dself.entry.parameterd= escs, + parametertypes=3Dself.entry.parametert= ypes, + sectionlist=3Dself.entry.sectionlist, + sections=3Dself.entry.sections, + purpose=3Dself.entry.declaration_purpo= se, + func_macro=3Dfunc_macro) + + def dump_typedef(self, ln, proto): + """ + Stores a typedef inside self.entries array. + """ + + typedef_type =3D r'((?:\s+[\w\*]+\b){1,8})\s*' + typedef_ident =3D r'\*?\s*(\w\S+)\s*' + typedef_args =3D r'\s*\((.*)\);' + + typedef1 =3D Re(r'typedef' + typedef_type + r'\(' + typedef_ident = + r'\)' + typedef_args) + typedef2 =3D Re(r'typedef' + typedef_type + typedef_ident + typede= f_args) + + # Strip comments + proto =3D Re(r'/\*.*?\*/', flags=3Dre.S).sub('', proto) + + # Parse function typedef prototypes + for r in [typedef1, typedef2]: + if not r.match(proto): + continue + + return_type =3D r.group(1).strip() + declaration_name =3D r.group(2) + args =3D r.group(3) + + if self.entry.identifier !=3D declaration_name: + self.emit_warning(ln, + f"expecting prototype for typedef {self.= entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + decl_type =3D 'function' + self.create_parameter_list(ln, decl_type, args, ',', declarati= on_name) + + self.output_declaration(decl_type, declaration_name, + function=3Ddeclaration_name, + typedef=3DTrue, + module=3Dself.entry.modulename, + functiontype=3Dreturn_type, + parameterlist=3Dself.entry.parameterli= st, + parameterdescs=3Dself.entry.parameterd= escs, + parametertypes=3Dself.entry.parametert= ypes, + sectionlist=3Dself.entry.sectionlist, + sections=3Dself.entry.sections, + purpose=3Dself.entry.declaration_purpo= se) + return + + # Handle nested parentheses or brackets + r =3D Re(r'(\(*.\)\s*|\[*.\]\s*);$') + while r.search(proto): + proto =3D r.sub('', proto) + + # Parse simple typedefs + r =3D Re(r'typedef.*\s+(\w+)\s*;') + if r.match(proto): + declaration_name =3D r.group(1) + + if self.entry.identifier !=3D declaration_name: + self.emit_warning(ln, f"expecting prototype for typedef {s= elf.entry.identifier}. Prototype was for typedef {declaration_name} instead= \n") + return + + self.output_declaration('typedef', declaration_name, + typedef=3Ddeclaration_name, + module=3Dself.entry.modulename, + sectionlist=3Dself.entry.sectionlist, + sections=3Dself.entry.sections, + purpose=3Dself.entry.declaration_purpo= se) + return + + self.emit_warning(ln, "error: Cannot parse typedef!") + self.config.errors +=3D 1 + + @staticmethod + def process_export(function_table, line): + """ + process EXPORT_SYMBOL* tags + + This method is called both internally and externally, so, it + doesn't use self. + """ + + if export_symbol.search(line): + symbol =3D export_symbol.group(2) + function_table.add(symbol) + + if export_symbol_ns.search(line): + symbol =3D export_symbol_ns.group(2) + function_table.add(symbol) + + def process_normal(self, ln, line): + """ + STATE_NORMAL: looking for the /** to begin everything. + """ + + if not doc_start.match(line): + return + + # start a new entry + self.reset_state(ln + 1) + self.entry.in_doc_sect =3D False + + # next line is always the function name + self.state =3D self.STATE_NAME + + def process_name(self, ln, line): + """ + STATE_NAME: Looking for the "name - description" line + """ + + if doc_block.search(line): + self.entry.new_start_line =3D ln + + if not doc_block.group(1): + self.entry.section =3D self.section_intro + else: + self.entry.section =3D doc_block.group(1) + + self.state =3D self.STATE_DOCBLOCK + return + + if doc_decl.search(line): + self.entry.identifier =3D doc_decl.group(1) + self.entry.is_kernel_comment =3D False + + decl_start =3D str(doc_com) # comment block asterisk + fn_type =3D r"(?:\w+\s*\*\s*)?" # type (for non-functions) + parenthesis =3D r"(?:\(\w*\))?" # optional parenthesis on fu= nction + decl_end =3D r"(?:[-:].*)" # end of the name part + + # test for pointer declaration type, foo * bar() - desc + r =3D Re(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}= ?$") + if r.search(line): + self.entry.identifier =3D r.group(1) + + # Test for data declaration + r =3D Re(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") + if r.search(line): + self.entry.decl_type =3D r.group(1) + self.entry.identifier =3D r.group(2) + self.entry.is_kernel_comment =3D True + else: + # Look for foo() or static void foo() - description; + # or misspelt identifier + + r1 =3D Re(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s= *{decl_end}?$") + r2 =3D Re(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis= }\s*{decl_end}$") + + for r in [r1, r2]: + if r.search(line): + self.entry.identifier =3D r.group(1) + self.entry.decl_type =3D "function" + + r =3D Re(r"define\s+") + self.entry.identifier =3D r.sub("", self.entry.ide= ntifier) + self.entry.is_kernel_comment =3D True + break + + self.entry.identifier =3D self.entry.identifier.strip(" ") + + self.state =3D self.STATE_BODY + + # if there's no @param blocks need to set up default section h= ere + self.entry.section =3D self.section_default + self.entry.new_start_line =3D ln + 1 + + r =3D Re("[-:](.*)") + if r.search(line): + # strip leading/trailing/multiple spaces + self.entry.descr =3D r.group(1).strip(" ") + + r =3D Re(r"\s+") + self.entry.descr =3D r.sub(" ", self.entry.descr) + self.entry.declaration_purpose =3D self.entry.descr + self.state =3D self.STATE_BODY_MAYBE + else: + self.entry.declaration_purpose =3D "" + + if not self.entry.is_kernel_comment: + self.emit_warning(ln, + f"This comment starts with '/**', but is= n't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{li= ne}") + self.state =3D self.STATE_NORMAL + + if not self.entry.declaration_purpose and self.config.wshort_d= esc: + self.emit_warning(ln, + f"missing initial short description on l= ine:\n{line}") + + if not self.entry.identifier and self.entry.decl_type !=3D "en= um": + self.emit_warning(ln, + f"wrong kernel-doc identifier on line:\n= {line}") + self.state =3D self.STATE_NORMAL + + if self.config.verbose: + self.emit_warning(ln, + f"Scanning doc for {self.entry.decl_type= } {self.entry.identifier}", + warning=3DFalse) + + return + + # Failed to find an identifier. Emit a warning + self.emit_warning(ln, f"Cannot find identifier on line:\n{line}") + + def process_body(self, ln, line): + """ + STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. + """ + + if self.state =3D=3D self.STATE_BODY_WITH_BLANK_LINE: + r =3D Re(r"\s*\*\s?\S") + if r.match(line): + self.dump_section() + self.entry.section =3D self.section_default + self.entry.new_start_line =3D line + self.entry.contents =3D "" + + if doc_sect.search(line): + self.entry.in_doc_sect =3D True + newsection =3D doc_sect.group(1) + + if newsection.lower() in ["description", "context"]: + newsection =3D newsection.title() + + # Special case: @return is a section, not a param description + if newsection.lower() in ["@return", "@returns", + "return", "returns"]: + newsection =3D "Return" + + # Perl kernel-doc has a check here for contents before section= s. + # the logic there is always false, as in_doc_sect variable is + # always true. So, just don't implement Wcontents_before_secti= ons + + # .title() + newcontents =3D doc_sect.group(2) + if not newcontents: + newcontents =3D "" + + if self.entry.contents.strip("\n"): + self.dump_section() + + self.entry.new_start_line =3D ln + self.entry.section =3D newsection + self.entry.leading_space =3D None + + self.entry.contents =3D newcontents.lstrip() + if self.entry.contents: + self.entry.contents +=3D "\n" + + self.state =3D self.STATE_BODY + return + + if doc_end.search(line): + self.dump_section() + + # Look for doc_com + + doc_end: + r =3D Re(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') + if r.match(line): + self.emit_warning(ln, f"suspicious ending line: {line}") + + self.entry.prototype =3D "" + self.entry.new_start_line =3D ln + 1 + + self.state =3D self.STATE_PROTO + return + + if doc_content.search(line): + cont =3D doc_content.group(1) + + if cont =3D=3D "": + if self.entry.section =3D=3D self.section_context: + self.dump_section() + + self.entry.new_start_line =3D ln + self.state =3D self.STATE_BODY + else: + if self.entry.section !=3D self.section_default: + self.state =3D self.STATE_BODY_WITH_BLANK_LINE + else: + self.state =3D self.STATE_BODY + + self.entry.contents +=3D "\n" + + elif self.state =3D=3D self.STATE_BODY_MAYBE: + + # Continued declaration purpose + self.entry.declaration_purpose =3D self.entry.declaration_= purpose.rstrip() + self.entry.declaration_purpose +=3D " " + cont + + r =3D Re(r"\s+") + self.entry.declaration_purpose =3D r.sub(' ', + self.entry.declarat= ion_purpose) + + else: + if self.entry.section.startswith('@') or \ + self.entry.section =3D=3D self.section_context: + if self.entry.leading_space is None: + r =3D Re(r'^(\s+)') + if r.match(cont): + self.entry.leading_space =3D len(r.group(1)) + else: + self.entry.leading_space =3D 0 + + # Double-check if leading space are realy spaces + pos =3D 0 + for i in range(0, self.entry.leading_space): + if cont[i] !=3D " ": + break + pos +=3D 1 + + cont =3D cont[pos:] + + # NEW LOGIC: + # In case it is different, update it + if self.entry.leading_space !=3D pos: + self.entry.leading_space =3D pos + + self.entry.contents +=3D cont + "\n" + return + + # Unknown line, ignore + self.emit_warning(ln, f"bad line: {line}") + + def process_inline(self, ln, line): + """STATE_INLINE: docbook comments within a prototype.""" + + if self.inline_doc_state =3D=3D self.STATE_INLINE_NAME and \ + doc_inline_sect.search(line): + self.entry.section =3D doc_inline_sect.group(1) + self.entry.new_start_line =3D ln + + self.entry.contents =3D doc_inline_sect.group(2).lstrip() + if self.entry.contents !=3D "": + self.entry.contents +=3D "\n" + + self.inline_doc_state =3D self.STATE_INLINE_TEXT + # Documentation block end */ + return + + if doc_inline_end.search(line): + if self.entry.contents not in ["", "\n"]: + self.dump_section() + + self.state =3D self.STATE_PROTO + self.inline_doc_state =3D self.STATE_INLINE_NA + return + + if doc_content.search(line): + if self.inline_doc_state =3D=3D self.STATE_INLINE_TEXT: + self.entry.contents +=3D doc_content.group(1) + "\n" + if not self.entry.contents.strip(" ").rstrip("\n"): + self.entry.contents =3D "" + + elif self.inline_doc_state =3D=3D self.STATE_INLINE_NAME: + self.emit_warning(ln, + f"Incorrect use of kernel-doc format: {l= ine}") + + self.inline_doc_state =3D self.STATE_INLINE_ERROR + + def syscall_munge(self, ln, proto): # pylint: disable=3DW0613 + """ + Handle syscall definitions + """ + + is_void =3D False + + # Strip newlines/CR's + proto =3D re.sub(r'[\r\n]+', ' ', proto) + + # Check if it's a SYSCALL_DEFINE0 + if 'SYSCALL_DEFINE0' in proto: + is_void =3D True + + # Replace SYSCALL_DEFINE with correct return type & function name + proto =3D Re(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) + + r =3D Re(r'long\s+(sys_.*?),') + if r.search(proto): + proto =3D proto.replace(',', '(', count=3D1) + elif is_void: + proto =3D proto.replace(')', '(void)', count=3D1) + + # Now delete all of the odd-numbered commas in the proto + # so that argument types & names don't have a comma between them + count =3D 0 + length =3D len(proto) + + if is_void: + length =3D 0 # skip the loop if is_void + + for ix in range(length): + if proto[ix] =3D=3D ',': + count +=3D 1 + if count % 2 =3D=3D 1: + proto =3D proto[:ix] + ' ' + proto[ix + 1:] + + return proto + + def tracepoint_munge(self, ln, proto): + """ + Handle tracepoint definitions + """ + + tracepointname =3D None + tracepointargs =3D None + + # Match tracepoint name based on different patterns + r =3D Re(r'TRACE_EVENT\((.*?),') + if r.search(proto): + tracepointname =3D r.group(1) + + r =3D Re(r'DEFINE_SINGLE_EVENT\((.*?),') + if r.search(proto): + tracepointname =3D r.group(1) + + r =3D Re(r'DEFINE_EVENT\((.*?),(.*?),') + if r.search(proto): + tracepointname =3D r.group(2) + + if tracepointname: + tracepointname =3D tracepointname.lstrip() + + r =3D Re(r'TP_PROTO\((.*?)\)') + if r.search(proto): + tracepointargs =3D r.group(1) + + if not tracepointname or not tracepointargs: + self.emit_warning(ln, + f"Unrecognized tracepoint format:\n{proto}\n= ") + else: + proto =3D f"static inline void trace_{tracepointname}({tracepo= intargs})" + self.entry.identifier =3D f"trace_{self.entry.identifier}" + + return proto + + def process_proto_function(self, ln, line): + """Ancillary routine to process a function prototype""" + + # strip C99-style comments to end of line + r =3D Re(r"\/\/.*$", re.S) + line =3D r.sub('', line) + + if Re(r'\s*#\s*define').match(line): + self.entry.prototype =3D line + elif line.startswith('#'): + # Strip other macros like #ifdef/#ifndef/#endif/... + pass + else: + r =3D Re(r'([^\{]*)') + if r.match(line): + self.entry.prototype +=3D r.group(1) + " " + + if '{' in line or ';' in line or Re(r'\s*#\s*define').match(line): + # strip comments + r =3D Re(r'/\*.*?\*/') + self.entry.prototype =3D r.sub('', self.entry.prototype) + + # strip newlines/cr's + r =3D Re(r'[\r\n]+') + self.entry.prototype =3D r.sub(' ', self.entry.prototype) + + # strip leading spaces + r =3D Re(r'^\s+') + self.entry.prototype =3D r.sub('', self.entry.prototype) + + # Handle self.entry.prototypes for function pointers like: + # int (*pcs_config)(struct foo) + + r =3D Re(r'^(\S+\s+)\(\s*\*(\S+)\)') + self.entry.prototype =3D r.sub(r'\1\2', self.entry.prototype) + + if 'SYSCALL_DEFINE' in self.entry.prototype: + self.entry.prototype =3D self.syscall_munge(ln, + self.entry.proto= type) + + r =3D Re(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + if r.search(self.entry.prototype): + self.entry.prototype =3D self.tracepoint_munge(ln, + self.entry.pr= ototype) + + self.dump_function(ln, self.entry.prototype) + self.reset_state(ln) + + def process_proto_type(self, ln, line): + """Ancillary routine to process a type""" + + # Strip newlines/cr's. + line =3D Re(r'[\r\n]+', re.S).sub(' ', line) + + # Strip leading spaces + line =3D Re(r'^\s+', re.S).sub('', line) + + # Strip trailing spaces + line =3D Re(r'\s+$', re.S).sub('', line) + + # Strip C99-style comments to the end of the line + line =3D Re(r"\/\/.*$", re.S).sub('', line) + + # To distinguish preprocessor directive from regular declaration l= ater. + if line.startswith('#'): + line +=3D ";" + + r =3D Re(r'([^\{\};]*)([\{\};])(.*)') + while True: + if r.search(line): + if self.entry.prototype: + self.entry.prototype +=3D " " + self.entry.prototype +=3D r.group(1) + r.group(2) + + self.entry.brcount +=3D r.group(2).count('{') + self.entry.brcount -=3D r.group(2).count('}') + + self.entry.brcount =3D max(self.entry.brcount, 0) + + if r.group(2) =3D=3D ';' and self.entry.brcount =3D=3D 0: + self.dump_declaration(ln, self.entry.prototype) + self.reset_state(ln) + break + + line =3D r.group(3) + else: + self.entry.prototype +=3D line + break + + def process_proto(self, ln, line): + """STATE_PROTO: reading a function/whatever prototype.""" + + if doc_inline_oneline.search(line): + self.entry.section =3D doc_inline_oneline.group(1) + self.entry.contents =3D doc_inline_oneline.group(2) + + if self.entry.contents !=3D "": + self.entry.contents +=3D "\n" + self.dump_section(start_new=3DFalse) + + elif doc_inline_start.search(line): + self.state =3D self.STATE_INLINE + self.inline_doc_state =3D self.STATE_INLINE_NAME + + elif self.entry.decl_type =3D=3D 'function': + self.process_proto_function(ln, line) + + else: + self.process_proto_type(ln, line) + + def process_docblock(self, ln, line): + """STATE_DOCBLOCK: within a DOC: block.""" + + if doc_end.search(line): + self.dump_section() + self.output_declaration("doc", None, + sectionlist=3Dself.entry.sectionlist, + sections=3Dself.entry.sections, module= =3Dself.config.modulename) + self.reset_state(ln) + + elif doc_content.search(line): + self.entry.contents +=3D doc_content.group(1) + "\n" + + def run(self): + """ + Open and process each line of a C source file. + he parsing is controlled via a state machine, and the line is pass= ed + to a different process function depending on the state. The process + function may update the state as needed. + """ + + cont =3D False + prev =3D "" + prev_ln =3D None + + try: + with open(self.fname, "r", encoding=3D"utf8", + errors=3D"backslashreplace") as fp: + for ln, line in enumerate(fp): + + line =3D line.expandtabs().strip("\n") + + # Group continuation lines on prototypes + if self.state =3D=3D self.STATE_PROTO: + if line.endswith("\\"): + prev +=3D line.removesuffix("\\") + cont =3D True + + if not prev_ln: + prev_ln =3D ln + + continue + + if cont: + ln =3D prev_ln + line =3D prev + line + prev =3D "" + cont =3D False + prev_ln =3D None + + self.config.log.debug("%d %s%s: %s", + ln, self.st_name[self.state], + self.st_inline_name[self.inline_= doc_state], + line) + + # TODO: not all states allow EXPORT_SYMBOL*, so this + # can be optimized later on to speedup parsing + self.process_export(self.config.function_table, line) + + # Hand this line to the appropriate state handler + if self.state =3D=3D self.STATE_NORMAL: + self.process_normal(ln, line) + elif self.state =3D=3D self.STATE_NAME: + self.process_name(ln, line) + elif self.state in [self.STATE_BODY, self.STATE_BODY_M= AYBE, + self.STATE_BODY_WITH_BLANK_LINE]: + self.process_body(ln, line) + elif self.state =3D=3D self.STATE_INLINE: # scanning = for inline parameters + self.process_inline(ln, line) + elif self.state =3D=3D self.STATE_PROTO: + self.process_proto(ln, line) + elif self.state =3D=3D self.STATE_DOCBLOCK: + self.process_docblock(ln, line) + except OSError: + self.config.log.error(f"Error: Cannot open file {self.fname}") + self.config.errors +=3D 1 --=20 2.49.0