mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
docs: kdoc_diff: add a helper tool to help checking kdoc regressions
Checking for regressions at kernel-doc can be hard. Add a helper tool to make such task easier. Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet <corbet@lwn.net> Message-ID: <24b3116a78348b13a74d1ff5e141160ef9705dd3.1774551940.git.mchehab+huawei@kernel.org>
This commit is contained in:
committed by
Jonathan Corbet
parent
07f6cb18c5
commit
210a923aab
508
tools/docs/kdoc_diff
Executable file
508
tools/docs/kdoc_diff
Executable file
@@ -0,0 +1,508 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
|
||||
#
|
||||
# pylint: disable=R0903,R0912,R0913,R0914,R0915,R0917
|
||||
|
||||
"""
|
||||
docdiff - Check differences between kernel‑doc output between two different
|
||||
commits.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
Compare the kernel‑doc output between the last two 5.15 releases::
|
||||
|
||||
$ kdoc_diff v6.18..v6.19
|
||||
|
||||
Both outputs are cached
|
||||
|
||||
Force a complete documentation scan and clean any previous cache from
|
||||
6.19 to the current HEAD::
|
||||
|
||||
$ kdoc_diff 6.19.. --full --clean
|
||||
|
||||
Check differences only on a single driver since origin/main::
|
||||
|
||||
$ kdoc_diff origin/main drivers/media
|
||||
|
||||
Generate an YAML file and use it to check for regressions::
|
||||
|
||||
$ kdoc_diff HEAD~ drivers/media --regression
|
||||
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import subprocess
|
||||
import shutil
|
||||
import re
|
||||
import signal
|
||||
|
||||
from glob import iglob
|
||||
|
||||
|
||||
SRC_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
WORK_DIR = os.path.abspath(os.path.join(SRC_DIR, "../.."))
|
||||
|
||||
KDOC_BINARY = os.path.join(SRC_DIR, "kernel-doc")
|
||||
KDOC_PARSER_TEST = os.path.join(WORK_DIR, "tools/unittests/test_kdoc_parser.py")
|
||||
|
||||
CACHE_DIR = ".doc_diff_cache"
|
||||
YAML_NAME = "out.yaml"
|
||||
|
||||
DIR_NAME = {
|
||||
"full": os.path.join(CACHE_DIR, "full"),
|
||||
"partial": os.path.join(CACHE_DIR, "partial"),
|
||||
"no-cache": os.path.join(CACHE_DIR, "no_cache"),
|
||||
"tmp": os.path.join(CACHE_DIR, "__tmp__"),
|
||||
}
|
||||
|
||||
class GitHelper:
|
||||
"""Handles all Git operations"""
|
||||
|
||||
def __init__(self, work_dir=None):
|
||||
self.work_dir = work_dir
|
||||
|
||||
def is_inside_repository(self):
|
||||
"""Check if we're inside a Git repository"""
|
||||
try:
|
||||
output = subprocess.check_output(["git", "rev-parse",
|
||||
"--is-inside-work-tree"],
|
||||
cwd=self.work_dir,
|
||||
stderr=subprocess.STDOUT,
|
||||
universal_newlines=True)
|
||||
|
||||
return output.strip() == "true"
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
|
||||
def is_valid_commit(self, commit_hash):
|
||||
"""
|
||||
Validate that a ref (branch, tag, commit hash, etc.) can be
|
||||
resolved to a commit.
|
||||
"""
|
||||
try:
|
||||
subprocess.check_output(["git", "rev-parse", commit_hash],
|
||||
cwd=self.work_dir,
|
||||
stderr=subprocess.STDOUT)
|
||||
return True
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
|
||||
def get_short_hash(self, commit_hash):
|
||||
"""Get short commit hash"""
|
||||
try:
|
||||
return subprocess.check_output(["git", "rev-parse", "--short",
|
||||
commit_hash],
|
||||
cwd=self.work_dir,
|
||||
stderr=subprocess.STDOUT,
|
||||
universal_newlines=True).strip()
|
||||
except subprocess.CalledProcessError:
|
||||
return ""
|
||||
|
||||
def has_uncommitted_changes(self):
|
||||
"""Check for uncommitted changes"""
|
||||
try:
|
||||
subprocess.check_output(["git", "diff-index",
|
||||
"--quiet", "HEAD", "--"],
|
||||
cwd=self.work_dir,
|
||||
stderr=subprocess.STDOUT)
|
||||
return False
|
||||
except subprocess.CalledProcessError:
|
||||
return True
|
||||
|
||||
def get_current_branch(self):
|
||||
"""Get current branch name"""
|
||||
return subprocess.check_output(["git", "branch", "--show-current"],
|
||||
cwd=self.work_dir,
|
||||
universal_newlines=True).strip()
|
||||
|
||||
def checkout_commit(self, commit_hash, quiet=True):
|
||||
"""Checkout a commit safely"""
|
||||
args = ["git", "checkout", "-f"]
|
||||
if quiet:
|
||||
args.append("-q")
|
||||
args.append(commit_hash)
|
||||
try:
|
||||
subprocess.check_output(args, cwd=self.work_dir,
|
||||
stderr=subprocess.STDOUT)
|
||||
|
||||
# Double-check if branch actually switched
|
||||
branch = self.get_short_hash("HEAD")
|
||||
if commit_hash != branch:
|
||||
raise RuntimeError(f"Branch changed to '{branch}' instead of '{commit_hash}'")
|
||||
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"ERROR: Failed to checkout {commit_hash}: {e}",
|
||||
file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
class CacheManager:
|
||||
"""Manages persistent cache directories"""
|
||||
|
||||
def __init__(self, work_dir):
|
||||
self.work_dir = work_dir
|
||||
|
||||
def initialize(self):
|
||||
"""Create cache directories if they don't exist"""
|
||||
for dir_path in DIR_NAME.values():
|
||||
abs_path = os.path.join(self.work_dir, dir_path)
|
||||
if not os.path.exists(abs_path):
|
||||
os.makedirs(abs_path, exist_ok=True, mode=0o755)
|
||||
|
||||
def get_commit_cache(self, commit_hash, path):
|
||||
"""Generate cache path for a commit"""
|
||||
hash_short = GitHelper(self.work_dir).get_short_hash(commit_hash)
|
||||
if not hash_short:
|
||||
hash_short = commit_hash
|
||||
|
||||
return os.path.join(path, hash_short)
|
||||
|
||||
class KernelDocRunner:
|
||||
"""Runs kernel-doc documentation generator"""
|
||||
|
||||
def __init__(self, work_dir, kdoc_binary):
|
||||
self.work_dir = work_dir
|
||||
self.kdoc_binary = kdoc_binary
|
||||
self.kdoc_files = None
|
||||
|
||||
def find_kdoc_references(self):
|
||||
"""Find all files marked with kernel-doc:: directives"""
|
||||
if self.kdoc_files:
|
||||
print("Using cached Kdoc refs")
|
||||
return self.kdoc_files
|
||||
|
||||
print("Finding kernel-doc entries in Documentation...")
|
||||
|
||||
files = os.path.join(self.work_dir, 'Documentation/**/*.rst')
|
||||
pattern = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)")
|
||||
kdoc_files = set()
|
||||
|
||||
for file_path in iglob(files, recursive=True):
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as fp:
|
||||
for line in fp:
|
||||
match = pattern.match(line.strip())
|
||||
if match:
|
||||
kdoc_files.add(match.group(1))
|
||||
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
self.kdoc_files = list(kdoc_files)
|
||||
|
||||
return self.kdoc_files
|
||||
|
||||
def gen_yaml(self, yaml_file, kdoc_files):
|
||||
"""Runs kernel-doc to generate a yaml file with man and rst."""
|
||||
cmd = [self.kdoc_binary, "--man", "--rst", "--yaml", yaml_file]
|
||||
cmd += kdoc_files
|
||||
|
||||
print(f"YAML regression test file will be stored at: {yaml_file}")
|
||||
|
||||
try:
|
||||
subprocess.check_call(cmd, cwd=self.work_dir,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL)
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def run_unittest(self, yaml_file):
|
||||
"""Run unit tests with the generated yaml file"""
|
||||
cmd = [KDOC_PARSER_TEST, "-q", "--yaml", yaml_file]
|
||||
result = subprocess.run(cmd, cwd=self.work_dir)
|
||||
|
||||
if result.returncode:
|
||||
print("To check for problems, try to run it again with -v\n")
|
||||
print("Use -k <regex> to filter results\n\n\t$", end="")
|
||||
print(" ".join(cmd) + "\n")
|
||||
|
||||
return True
|
||||
|
||||
def normal_run(self, tmp_dir, output_dir, kdoc_files):
|
||||
"""Generate man, rst and errors, storing them at tmp_dir."""
|
||||
os.makedirs(tmp_dir, exist_ok=True)
|
||||
|
||||
try:
|
||||
with open(os.path.join(tmp_dir, "man.log"), "w", encoding="utf-8") as out:
|
||||
subprocess.check_call([self.kdoc_binary, "--man"] + kdoc_files,
|
||||
cwd=self.work_dir,
|
||||
stdout=out, stderr=subprocess.DEVNULL)
|
||||
|
||||
with open(os.path.join(tmp_dir, "rst.log"), "w", encoding="utf-8") as out:
|
||||
with open(os.path.join(tmp_dir, "err.log"), "w", encoding="utf-8") as err:
|
||||
subprocess.check_call([self.kdoc_binary, "--rst"] + kdoc_files,
|
||||
cwd=self.work_dir,
|
||||
stdout=out, stderr=err)
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
|
||||
if output_dir:
|
||||
os.replace(tmp_dir, output_dir)
|
||||
|
||||
return True
|
||||
|
||||
def run(self, commit_hash, tmp_dir, output_dir, kdoc_files, is_regression,
|
||||
is_end):
|
||||
"""Run kernel-doc on its several ways"""
|
||||
if not kdoc_files:
|
||||
raise RuntimeError("No kernel-doc references found")
|
||||
|
||||
git_helper = GitHelper(self.work_dir)
|
||||
if not git_helper.checkout_commit(commit_hash, quiet=True):
|
||||
raise RuntimeError(f"ERROR: can't checkout commit {commit_hash}")
|
||||
|
||||
print(f"Processing {commit_hash}...")
|
||||
|
||||
if not is_regression:
|
||||
return self.normal_run(tmp_dir, output_dir, kdoc_files)
|
||||
|
||||
yaml_file = os.path.join(tmp_dir, YAML_NAME)
|
||||
|
||||
if not is_end:
|
||||
return self.gen_yaml(yaml_file, kdoc_files)
|
||||
|
||||
return self.run_unittest(yaml_file)
|
||||
|
||||
class DiffManager:
|
||||
"""Compare documentation output directories with an external diff."""
|
||||
def __init__(self, diff_tool="diff", diff_args=None):
|
||||
self.diff_tool = diff_tool
|
||||
# default: unified, no context, ignore whitespace changes
|
||||
self.diff_args = diff_args or ["-u0", "-w"]
|
||||
|
||||
def diff_directories(self, dir1, dir2):
|
||||
"""Compare two directories using an external diff."""
|
||||
print(f"\nDiffing {dir1} and {dir2}:")
|
||||
|
||||
dir1_files = set()
|
||||
dir2_files = set()
|
||||
has_diff = False
|
||||
|
||||
for root, _, files in os.walk(dir1):
|
||||
for file in files:
|
||||
dir1_files.add(os.path.relpath(os.path.join(root, file), dir1))
|
||||
for root, _, files in os.walk(dir2):
|
||||
for file in files:
|
||||
dir2_files.add(os.path.relpath(os.path.join(root, file), dir2))
|
||||
|
||||
common_files = sorted(dir1_files & dir2_files)
|
||||
for file in common_files:
|
||||
f1 = os.path.join(dir1, file)
|
||||
f2 = os.path.join(dir2, file)
|
||||
|
||||
cmd = [self.diff_tool] + self.diff_args + [f1, f2]
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd, capture_output=True, text=True, check=False
|
||||
)
|
||||
if result.stdout:
|
||||
has_diff = True
|
||||
print(f"\n{file}")
|
||||
print(result.stdout, end="")
|
||||
except FileNotFoundError:
|
||||
print(f"ERROR: {self.diff_tool} not found")
|
||||
sys.exit(1)
|
||||
|
||||
# Show files that exist only in one directory
|
||||
only_in_dir1 = dir1_files - dir2_files
|
||||
only_in_dir2 = dir2_files - dir1_files
|
||||
if only_in_dir1 or only_in_dir2:
|
||||
has_diff = True
|
||||
print("\nDifferential files:")
|
||||
for f in sorted(only_in_dir1):
|
||||
print(f" - {f} (only in {dir1})")
|
||||
for f in sorted(only_in_dir2):
|
||||
print(f" + {f} (only in {dir2})")
|
||||
|
||||
if not has_diff:
|
||||
print("\nNo differences between those two commits")
|
||||
|
||||
|
||||
class SignalHandler():
|
||||
"""Signal handler class."""
|
||||
|
||||
def restore(self, force_exit=False):
|
||||
"""Restore original HEAD state."""
|
||||
if self.restored:
|
||||
return
|
||||
|
||||
print(f"Restoring original branch: {self.original_head}")
|
||||
try:
|
||||
subprocess.check_call(
|
||||
["git", "checkout", "-f", self.original_head],
|
||||
cwd=self.git_helper.work_dir,
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Failed to restore: {e}", file=sys.stderr)
|
||||
|
||||
for sig, handler in self.old_handler.items():
|
||||
signal.signal(sig, handler)
|
||||
|
||||
self.restored = True
|
||||
|
||||
if force_exit:
|
||||
sys.exit(1)
|
||||
|
||||
def signal_handler(self, sig, _):
|
||||
"""Handle interrupt signals."""
|
||||
print(f"\nSignal {sig} received. Restoring original state...")
|
||||
|
||||
self.restore(force_exit=True)
|
||||
|
||||
def __enter__(self):
|
||||
"""Allow using it via with command."""
|
||||
for sig in [signal.SIGINT, signal.SIGTERM]:
|
||||
self.old_handler[sig] = signal.getsignal(sig)
|
||||
signal.signal(sig, self.signal_handler)
|
||||
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
"""Restore signals at the end of with block."""
|
||||
self.restore()
|
||||
|
||||
def __init__(self, git_helper, original_head):
|
||||
self.git_helper = git_helper
|
||||
self.original_head = original_head
|
||||
self.old_handler = {}
|
||||
self.restored = False
|
||||
|
||||
def parse_commit_range(value):
|
||||
"""Handle a commit range."""
|
||||
if ".." not in value:
|
||||
begin = value
|
||||
end = "HEAD"
|
||||
else:
|
||||
begin, _, end = value.partition("..")
|
||||
if not end:
|
||||
end = "HEAD"
|
||||
|
||||
if not begin:
|
||||
raise argparse.ArgumentTypeError("Need a commit begginning")
|
||||
|
||||
|
||||
print(f"Range: {begin} to {end}")
|
||||
|
||||
return begin, end
|
||||
|
||||
|
||||
def main():
|
||||
"""Main code"""
|
||||
parser = argparse.ArgumentParser(description="Compare kernel documentation between commits")
|
||||
parser.add_argument("commits", type=parse_commit_range,
|
||||
help="commit range like old..new")
|
||||
parser.add_argument("files", nargs="*",
|
||||
help="files to process – if supplied the --full flag is ignored")
|
||||
|
||||
parser.add_argument("--full", "-f", action="store_true",
|
||||
help="Force a full scan of Documentation/*")
|
||||
|
||||
parser.add_argument("--regression", "-r", action="store_true",
|
||||
help="Use YAML format to check for regressions")
|
||||
|
||||
parser.add_argument("--work-dir", "-w", default=WORK_DIR,
|
||||
help="work dir (default: %(default)s)")
|
||||
|
||||
parser.add_argument("--clean", "-c", action="store_true",
|
||||
help="Clean caches")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.files and args.full:
|
||||
raise argparse.ArgumentError(args.full,
|
||||
"cannot combine '--full' with an explicit file list")
|
||||
|
||||
work_dir = os.path.abspath(args.work_dir)
|
||||
|
||||
# Initialize cache
|
||||
cache = CacheManager(work_dir)
|
||||
cache.initialize()
|
||||
|
||||
# Validate git repository
|
||||
git_helper = GitHelper(work_dir)
|
||||
if not git_helper.is_inside_repository():
|
||||
raise RuntimeError("Must run inside Git repository")
|
||||
|
||||
old_commit, new_commit = args.commits
|
||||
|
||||
old_commit = git_helper.get_short_hash(old_commit)
|
||||
new_commit = git_helper.get_short_hash(new_commit)
|
||||
|
||||
# Validate commits
|
||||
for commit in [old_commit, new_commit]:
|
||||
if not git_helper.is_valid_commit(commit):
|
||||
raise RuntimeError(f"Commit '{commit}' does not exist")
|
||||
|
||||
# Check for uncommitted changes
|
||||
if git_helper.has_uncommitted_changes():
|
||||
raise RuntimeError("Uncommitted changes present. Commit or stash first.")
|
||||
|
||||
runner = KernelDocRunner(git_helper.work_dir, KDOC_BINARY)
|
||||
|
||||
# Get files to be parsed
|
||||
cache_msg = " (results will be cached)"
|
||||
if args.full:
|
||||
kdoc_files = ["."]
|
||||
diff_type = "full"
|
||||
print(f"Parsing all files at {work_dir}")
|
||||
if not args.files:
|
||||
diff_type = "partial"
|
||||
kdoc_files = runner.find_kdoc_references()
|
||||
print(f"Parsing files with kernel-doc markups at {work_dir}/Documentation")
|
||||
else:
|
||||
diff_type = "no-cache"
|
||||
cache_msg = ""
|
||||
kdoc_files = args.files
|
||||
|
||||
tmp_dir = DIR_NAME["tmp"]
|
||||
out_path = DIR_NAME[diff_type]
|
||||
|
||||
if not args.regression:
|
||||
print(f"Output will be stored at: {out_path}{cache_msg}")
|
||||
|
||||
# Just in case - should never happen in practice
|
||||
if not kdoc_files:
|
||||
raise argparse.ArgumentError(args.files,
|
||||
"No kernel-doc references found")
|
||||
|
||||
original_head = git_helper.get_current_branch()
|
||||
|
||||
old_cache = cache.get_commit_cache(old_commit, out_path)
|
||||
new_cache = cache.get_commit_cache(new_commit, out_path)
|
||||
|
||||
with SignalHandler(git_helper, original_head):
|
||||
if args.clean or diff_type == "no-cache":
|
||||
for cache_dir in [old_cache, new_cache]:
|
||||
if cache_dir and os.path.exists(cache_dir):
|
||||
shutil.rmtree(cache_dir)
|
||||
|
||||
if args.regression or not os.path.exists(old_cache):
|
||||
old_success = runner.run(old_commit, tmp_dir, old_cache, kdoc_files,
|
||||
args.regression, False)
|
||||
else:
|
||||
old_success = True
|
||||
|
||||
if args.regression or not os.path.exists(new_cache):
|
||||
new_success = runner.run(new_commit, tmp_dir, new_cache, kdoc_files,
|
||||
args.regression, True)
|
||||
else:
|
||||
new_success = True
|
||||
|
||||
if not (old_success and new_success):
|
||||
raise RuntimeError("Failed to generate documentation")
|
||||
|
||||
if not args.regression:
|
||||
diff_manager = DiffManager()
|
||||
diff_manager.diff_directories(old_cache, new_cache)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user