From nobody Thu Apr 2 20:28:05 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A4FF232720C; Thu, 26 Mar 2026 16:22:07 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774542127; cv=none; b=A5Sy/JHruhDnkjMTyutVjL9oylS6yM8XM70JxMbvphmghYsKI/rxBNFW6HodRzZ5kkQ4cMP72t2WXlleEvLWfZM5a3VysuHUMfr7TWBbo+nN+SvcYjjgsaoOnfs186RFHWULLfm4m6sNqLqFoBClyhk2P6YGqHW59mAUjc5+WCg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774542127; c=relaxed/simple; bh=592QBUZGL/JQcJK5kIpLRy9mh/dTMrd9H3kywePe958=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=ScNZWBVAp3mp5l+Iw15Zoa2GG+3nMCuddH9wetE5ry9IbMABRravJOgJNi70DMmkIUbMJGM9vQ1sF24Sk2xWJFVl/hdimJx9+0ZcwTjRmevh/WccKQxbF4Edooa5EqUK4sY699yrOX/ZzF+KZ+g3LVjm3nZ3Rv0Cen/tOM1u9lo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=bZCUtxDs; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="bZCUtxDs" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 36E97C19423; Thu, 26 Mar 2026 16:22:07 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1774542127; bh=592QBUZGL/JQcJK5kIpLRy9mh/dTMrd9H3kywePe958=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=bZCUtxDss57fZ4GT9ESBr+28BQVa79qHWetEB8dqOxMybuTFTGP5T7kjpvH/tRQ00 Hz+B7ndUiWagUotxBCnWjBnXzx8PHSvT/nUvglk8IScomNNQD2sR2P9ehbEN623lyP TLupPUIoEoMBUPCoM1jDhmXkcmNNIISglPcEmWtNWiUrP3RHcMSnM4uP3u6OmuOOEq EJSLBmBmtXdQpi2BMfS8w/RslKulqmI40mzB86+6j3kMmD6yxTY0z3NRDVriNWEq0H oxsrJ0IyCXkS6ch/2Q7asHsWc6Td0kXO7JuLHCURu5YLPeeiumXAEMhQfzNTQMwK90 N/lCdodykAwyw== Received: from mchehab by mail.kernel.org with local (Exim 4.99.1) (envelope-from ) id 1w5nTB-0000000AtfP-0bMu; Thu, 26 Mar 2026 17:22:05 +0100 From: Mauro Carvalho Chehab To: Jonathan Corbet , Linux Doc Mailing List Cc: Mauro Carvalho Chehab , linux-kernel@vger.kernel.org, Mauro Carvalho Chehab , Shuah Khan Subject: [PATCH 1/1] docs: kdoc_diff: add a helper tool to help checking kdoc regressions Date: Thu, 26 Mar 2026 17:22:00 +0100 Message-ID: <0eac43761d447c18d6686662a2d61ae726b02379.1774541999.git.mchehab+huawei@kernel.org> X-Mailer: git-send-email 2.52.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Sender: Mauro Carvalho Chehab Checking for regressions at kernel-doc can be hard. Add a helper tool to make such task easier. Signed-off-by: Mauro Carvalho Chehab --- tools/docs/kdoc_diff | 504 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 504 insertions(+) create mode 100755 tools/docs/kdoc_diff diff --git a/tools/docs/kdoc_diff b/tools/docs/kdoc_diff new file mode 100755 index 000000000000..5edd9b46a825 --- /dev/null +++ b/tools/docs/kdoc_diff @@ -0,0 +1,504 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2026: Mauro Carvalho Chehab . +# +# pylint: disable=3DR0903,R0912,R0913,R0914,R0915,R0917 + +""" +docdiff - Check differences between kernel=E2=80=91doc output between two = different +commits. + +Examples +-------- + +Compare the kernel=E2=80=91doc output between the last two 5.15 releases:: + + $ kdoc_diff v6.18..v6.19 + +Both outputs are cached + +Force a complete documentation scan and clean any previous cache from +6.19 to the current HEAD:: + + $ kdoc_diff 6.19.. --full --clean + +Check differences only on a single driver since origin/main:: + + $ kdoc_diff origin/main drivers/media + +Generate an YAML file and use it to check for regressions:: + + $ kdoc_diff HEAD~ drivers/media --regression + + +""" + +import os +import sys +import argparse +import subprocess +import shutil +import re +import signal + +from glob import iglob + + +SRC_DIR =3D os.path.dirname(os.path.realpath(__file__)) +WORK_DIR =3D os.path.abspath(os.path.join(SRC_DIR, "../..")) + +KDOC_BINARY =3D os.path.join(SRC_DIR, "kernel-doc") +KDOC_PARSER_TEST =3D os.path.join(WORK_DIR, "tools/unittests/test_kdoc_par= ser.py") + +CACHE_DIR =3D ".doc_diff_cache" + +DIR_NAME =3D { + "full": os.path.join(CACHE_DIR, "full"), + "partial": os.path.join(CACHE_DIR, "partial"), + "no-cache": os.path.join(CACHE_DIR, "no_cache"), + "tmp": os.path.join(CACHE_DIR, "__tmp__"), +} + +class GitHelper: + """Handles all Git operations""" + + def __init__(self, work_dir=3DNone): + self.work_dir =3D work_dir + + def is_inside_repository(self): + """Check if we're inside a Git repository""" + try: + output =3D subprocess.check_output(["git", "rev-parse", + "--is-inside-work-tree"], + cwd=3Dself.work_dir, + stderr=3Dsubprocess.STDOUT, + universal_newlines=3DTrue) + + return output.strip() =3D=3D "true" + except subprocess.CalledProcessError: + return False + + def is_valid_commit(self, commit_hash): + """ + Validate that a ref (branch, tag, commit hash, etc.) can be + resolved to a commit. + """ + try: + subprocess.check_output(["git", "rev-parse", commit_hash], + cwd=3Dself.work_dir, + stderr=3Dsubprocess.STDOUT) + return True + except subprocess.CalledProcessError: + return False + + def get_short_hash(self, commit_hash): + """Get short commit hash""" + try: + return subprocess.check_output(["git", "rev-parse", "--short", + commit_hash], + cwd=3Dself.work_dir, + stderr=3Dsubprocess.STDOUT, + universal_newlines=3DTrue).stri= p() + except subprocess.CalledProcessError: + return "" + + def has_uncommitted_changes(self): + """Check for uncommitted changes""" + try: + subprocess.check_output(["git", "diff-index", + "--quiet", "HEAD", "--"], + cwd=3Dself.work_dir, + stderr=3Dsubprocess.STDOUT) + return False + except subprocess.CalledProcessError: + return True + + def get_current_branch(self): + """Get current branch name""" + return subprocess.check_output(["git", "branch", "--show-current"], + cwd=3Dself.work_dir, + universal_newlines=3DTrue).strip() + + def checkout_commit(self, commit_hash, quiet=3DTrue): + """Checkout a commit safely""" + args =3D ["git", "checkout", "-f"] + if quiet: + args.append("-q") + args.append(commit_hash) + try: + subprocess.check_output(args, cwd=3Dself.work_dir, + stderr=3Dsubprocess.STDOUT) + + # Double-check if branch actually switched + branch =3D self.get_short_hash("HEAD") + if commit_hash !=3D branch: + raise RuntimeError(f"Branch changed to '{branch}' instead = of '{commit_hash}'") + + return True + except subprocess.CalledProcessError as e: + print(f"ERROR: Failed to checkout {commit_hash}: {e}", + file=3Dsys.stderr) + return False + + +class CacheManager: + """Manages persistent cache directories""" + + def __init__(self, work_dir): + self.work_dir =3D work_dir + + def initialize(self): + """Create cache directories if they don't exist""" + for dir_path in DIR_NAME.values(): + abs_path =3D os.path.join(self.work_dir, dir_path) + if not os.path.exists(abs_path): + os.makedirs(abs_path, exist_ok=3DTrue, mode=3D0o755) + + def get_commit_cache(self, commit_hash, path): + """Generate cache path for a commit""" + hash_short =3D GitHelper(self.work_dir).get_short_hash(commit_hash) + if not hash_short: + hash_short =3D commit_hash + + return os.path.join(path, hash_short) + +class KernelDocRunner: + """Runs kernel-doc documentation generator""" + + def __init__(self, work_dir, kdoc_binary): + self.work_dir =3D work_dir + self.kdoc_binary =3D kdoc_binary + self.kdoc_files =3D None + + def find_kdoc_references(self): + """Find all files marked with kernel-doc:: directives""" + if self.kdoc_files: + print("Using cached Kdoc refs") + return self.kdoc_files + + print("Finding kernel-doc entries in Documentation...") + + files =3D os.path.join(self.work_dir, 'Documentation/**/*.rst') + pattern =3D re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)") + kdoc_files =3D set() + + for file_path in iglob(files, recursive=3DTrue): + try: + with open(file_path, 'r', encoding=3D'utf-8') as fp: + for line in fp: + match =3D pattern.match(line.strip()) + if match: + kdoc_files.add(match.group(1)) + + except OSError: + continue + + self.kdoc_files =3D list(kdoc_files) + + return self.kdoc_files + + def gen_yaml(self, yaml_file, kdoc_files): + """Runs kernel-doc to generate a yaml file with man and rst.""" + cmd =3D [self.kdoc_binary, "--man", "--rst", "--yaml", yaml_file] + cmd +=3D kdoc_files + + try: + subprocess.check_call(cmd, cwd=3Dself.work_dir, + stdout=3Dsubprocess.DEVNULL, + stderr=3Dsubprocess.DEVNULL) + except subprocess.CalledProcessError: + return False + + return True + + def run_unittest(self, yaml_file): + """Run unit tests with the generated yaml file""" + try: + subprocess.check_call([KDOC_PARSER_TEST, "--yaml", yaml_file], + cwd=3Dself.work_dir) + except subprocess.CalledProcessError: + return False + + return True + + def normal_run(self, tmp_dir, output_dir, kdoc_files): + """Generate man, rst and errors, storing them at tmp_dir.""" + os.makedirs(tmp_dir, exist_ok=3DTrue) + + try: + with open(os.path.join(tmp_dir, "man.log"), "w", encoding=3D"u= tf-8") as out: + subprocess.check_call([self.kdoc_binary, "--man"] + kdoc_f= iles, + cwd=3Dself.work_dir, + stdout=3Dout, stderr=3Dsubprocess.DE= VNULL) + + with open(os.path.join(tmp_dir, "rst.log"), "w", encoding=3D"u= tf-8") as out: + with open(os.path.join(tmp_dir, "err.log"), "w", encoding= =3D"utf-8") as err: + subprocess.check_call([self.kdoc_binary, "--rst"] + kd= oc_files, + cwd=3Dself.work_dir, + stdout=3Dout, stderr=3Derr) + except subprocess.CalledProcessError: + return False + + if output_dir: + os.replace(tmp_dir, output_dir) + + return True + + def run(self, commit_hash, tmp_dir, output_dir, kdoc_files, is_regress= ion, + is_end): + """Run kernel-doc on its several ways""" + if not kdoc_files: + raise RuntimeError("No kernel-doc references found") + + git_helper =3D GitHelper(self.work_dir) + if not git_helper.checkout_commit(commit_hash, quiet=3DTrue): + raise RuntimeError(f"ERROR: can't checkout commit {commit_hash= }") + + print(f"Processing {commit_hash}...") + + if not is_regression: + return self.normal_run(tmp_dir, output_dir, kdoc_files) + + yaml_file =3D os.path.join(tmp_dir, "out.yaml") + + if not is_end: + return self.gen_yaml(yaml_file, kdoc_files) + + return self.run_unittest(yaml_file) + +class DiffManager: + """Compare documentation output directories with an external diff.""" + def __init__(self, diff_tool=3D"diff", diff_args=3DNone): + self.diff_tool =3D diff_tool + # default: unified, no context, ignore whitespace changes + self.diff_args =3D diff_args or ["-u0", "-w"] + + def diff_directories(self, dir1, dir2): + """Compare two directories using an external diff.""" + print(f"\nDiffing {dir1} and {dir2}:") + + dir1_files =3D set() + dir2_files =3D set() + has_diff =3D False + + for root, _, files in os.walk(dir1): + for file in files: + dir1_files.add(os.path.relpath(os.path.join(root, file), d= ir1)) + for root, _, files in os.walk(dir2): + for file in files: + dir2_files.add(os.path.relpath(os.path.join(root, file), d= ir2)) + + common_files =3D sorted(dir1_files & dir2_files) + for file in common_files: + f1 =3D os.path.join(dir1, file) + f2 =3D os.path.join(dir2, file) + + cmd =3D [self.diff_tool] + self.diff_args + [f1, f2] + try: + result =3D subprocess.run( + cmd, capture_output=3DTrue, text=3DTrue, check=3DFalse + ) + if result.stdout: + has_diff =3D True + print(f"\n{file}") + print(result.stdout, end=3D"") + except FileNotFoundError: + print(f"ERROR: {self.diff_tool} not found") + sys.exit(1) + + # Show files that exist only in one directory + only_in_dir1 =3D dir1_files - dir2_files + only_in_dir2 =3D dir2_files - dir1_files + if only_in_dir1 or only_in_dir2: + has_diff =3D True + print("\nDifferential files:") + for f in sorted(only_in_dir1): + print(f" - {f} (only in {dir1})") + for f in sorted(only_in_dir2): + print(f" + {f} (only in {dir2})") + + if not has_diff: + print("\nNo differences between those two commits") + + +class SignalHandler(): + """Signal handler class.""" + + def restore(self, force_exit=3DFalse): + """Restore original HEAD state.""" + if self.restored: + return + + print(f"Restoring original branch: {self.original_head}") + try: + subprocess.check_call( + ["git", "checkout", "-f", self.original_head], + cwd=3Dself.git_helper.work_dir, + stderr=3Dsubprocess.STDOUT, + ) + except subprocess.CalledProcessError as e: + print(f"Failed to restore: {e}", file=3Dsys.stderr) + + for sig, handler in self.old_handler.items(): + signal.signal(sig, handler) + + self.restored =3D True + + if force_exit: + sys.exit(1) + + def signal_handler(self, sig, _): + """Handle interrupt signals.""" + print(f"\nSignal {sig} received. Restoring original state...") + + self.restore(force_exit=3DTrue) + + def __enter__(self): + """Allow using it via with command.""" + for sig in [signal.SIGINT, signal.SIGTERM]: + self.old_handler[sig] =3D signal.getsignal(sig) + signal.signal(sig, self.signal_handler) + + return self + + def __exit__(self, *args): + """Restore signals at the end of with block.""" + self.restore() + + def __init__(self, git_helper, original_head): + self.git_helper =3D git_helper + self.original_head =3D original_head + self.old_handler =3D {} + self.restored =3D False + +def parse_commit_range(value): + """Handle a commit range.""" + if ".." not in value: + begin =3D value + end =3D "HEAD" + else: + begin, _, end =3D value.partition("..") + if not end: + end =3D "HEAD" + + if not begin: + raise argparse.ArgumentTypeError("Need a commit begginning") + + + print(f"Range: {begin} to {end}") + + return begin, end + + +def main(): + """Main code""" + parser =3D argparse.ArgumentParser(description=3D"Compare kernel docum= entation between commits") + parser.add_argument("commits", type=3Dparse_commit_range, + help=3D"commit range like old..new") + parser.add_argument("files", nargs=3D"*", + help=3D"files to process =E2=80=93 if supplied the= --full flag is ignored") + + parser.add_argument("--full", "-f", action=3D"store_true", + help=3D"Force a full scan of Documentation/*") + + parser.add_argument("--regression", "-r", action=3D"store_true", + help=3D"Use YAML format to check for regressions") + + parser.add_argument("--work-dir", "-w", default=3DWORK_DIR, + help=3D"work dir (default: %(default)s)") + + parser.add_argument("--clean", "-c", action=3D"store_true", + help=3D"Clean caches") + + args =3D parser.parse_args() + + if args.files and args.full: + raise argparse.ArgumentError(args.full, + "cannot combine '--full' with an expl= icit file list") + + work_dir =3D os.path.abspath(args.work_dir) + + # Initialize cache + cache =3D CacheManager(work_dir) + cache.initialize() + + # Validate git repository + git_helper =3D GitHelper(work_dir) + if not git_helper.is_inside_repository(): + raise RuntimeError("Must run inside Git repository") + + old_commit, new_commit =3D args.commits + + old_commit =3D git_helper.get_short_hash(old_commit) + new_commit =3D git_helper.get_short_hash(new_commit) + + # Validate commits + for commit in [old_commit, new_commit]: + if not git_helper.is_valid_commit(commit): + raise RuntimeError(f"Commit '{commit}' does not exist") + + # Check for uncommitted changes + if git_helper.has_uncommitted_changes(): + raise RuntimeError("Uncommitted changes present. Commit or stash f= irst.") + + runner =3D KernelDocRunner(git_helper.work_dir, KDOC_BINARY) + + # Get files to be parsed + cache_msg =3D " (results will be cached)" + if args.full: + kdoc_files =3D ["."] + diff_type =3D "full" + print(f"Parsing all files at {work_dir}") + if not args.files: + diff_type =3D "partial" + kdoc_files =3D runner.find_kdoc_references() + print(f"Parsing files with kernel-doc markups at {work_dir}/Docume= ntation") + else: + diff_type =3D "no-cache" + cache_msg =3D "" + kdoc_files =3D args.files + + if args.regression: + cache_msg =3D "" + + out_path =3D DIR_NAME[diff_type] + print(f"Output will be stored at: {out_path}{cache_msg}") + + # Just in case - should never happen in practice + if not kdoc_files: + raise argparse.ArgumentError(args.files, + "No kernel-doc references found") + + original_head =3D git_helper.get_current_branch() + tmp_dir =3D DIR_NAME["tmp"] + + old_cache =3D cache.get_commit_cache(old_commit, out_path) + new_cache =3D cache.get_commit_cache(new_commit, out_path) + + with SignalHandler(git_helper, original_head): + if args.clean or diff_type =3D=3D "no-cache": + for cache_dir in [old_cache, new_cache]: + if cache_dir and os.path.exists(cache_dir): + shutil.rmtree(cache_dir) + + if args.regression or not os.path.exists(old_cache): + old_success =3D runner.run(old_commit, tmp_dir, old_cache, kdo= c_files, + args.regression, False) + else: + old_success =3D True + + if args.regression or not os.path.exists(new_cache): + new_success =3D runner.run(new_commit, tmp_dir, new_cache, kdo= c_files, + args.regression, True) + else: + new_success =3D True + + if not (old_success and new_success): + raise RuntimeError("Failed to generate documentation") + + if not args.regression: + diff_manager =3D DiffManager() + diff_manager.diff_directories(old_cache, new_cache) + +if __name__ =3D=3D "__main__": + main() --=20 2.52.0