mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 14:53:58 -04:00
Checking for regressions at kernel-doc can be hard. Add a helper tool to make such task easier. Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet <corbet@lwn.net> Message-ID: <24b3116a78348b13a74d1ff5e141160ef9705dd3.1774551940.git.mchehab+huawei@kernel.org>
509 lines
17 KiB
Python
Executable File
509 lines
17 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
# SPDX-License-Identifier: GPL-2.0
|
||
# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
|
||
#
|
||
# pylint: disable=R0903,R0912,R0913,R0914,R0915,R0917
|
||
|
||
"""
|
||
docdiff - Check differences between kernel‑doc output between two different
|
||
commits.
|
||
|
||
Examples
|
||
--------
|
||
|
||
Compare the kernel‑doc output between the last two 5.15 releases::
|
||
|
||
$ kdoc_diff v6.18..v6.19
|
||
|
||
Both outputs are cached
|
||
|
||
Force a complete documentation scan and clean any previous cache from
|
||
6.19 to the current HEAD::
|
||
|
||
$ kdoc_diff 6.19.. --full --clean
|
||
|
||
Check differences only on a single driver since origin/main::
|
||
|
||
$ kdoc_diff origin/main drivers/media
|
||
|
||
Generate an YAML file and use it to check for regressions::
|
||
|
||
$ kdoc_diff HEAD~ drivers/media --regression
|
||
|
||
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import argparse
|
||
import subprocess
|
||
import shutil
|
||
import re
|
||
import signal
|
||
|
||
from glob import iglob
|
||
|
||
|
||
SRC_DIR = os.path.dirname(os.path.realpath(__file__))
|
||
WORK_DIR = os.path.abspath(os.path.join(SRC_DIR, "../.."))
|
||
|
||
KDOC_BINARY = os.path.join(SRC_DIR, "kernel-doc")
|
||
KDOC_PARSER_TEST = os.path.join(WORK_DIR, "tools/unittests/test_kdoc_parser.py")
|
||
|
||
CACHE_DIR = ".doc_diff_cache"
|
||
YAML_NAME = "out.yaml"
|
||
|
||
DIR_NAME = {
|
||
"full": os.path.join(CACHE_DIR, "full"),
|
||
"partial": os.path.join(CACHE_DIR, "partial"),
|
||
"no-cache": os.path.join(CACHE_DIR, "no_cache"),
|
||
"tmp": os.path.join(CACHE_DIR, "__tmp__"),
|
||
}
|
||
|
||
class GitHelper:
|
||
"""Handles all Git operations"""
|
||
|
||
def __init__(self, work_dir=None):
|
||
self.work_dir = work_dir
|
||
|
||
def is_inside_repository(self):
|
||
"""Check if we're inside a Git repository"""
|
||
try:
|
||
output = subprocess.check_output(["git", "rev-parse",
|
||
"--is-inside-work-tree"],
|
||
cwd=self.work_dir,
|
||
stderr=subprocess.STDOUT,
|
||
universal_newlines=True)
|
||
|
||
return output.strip() == "true"
|
||
except subprocess.CalledProcessError:
|
||
return False
|
||
|
||
def is_valid_commit(self, commit_hash):
|
||
"""
|
||
Validate that a ref (branch, tag, commit hash, etc.) can be
|
||
resolved to a commit.
|
||
"""
|
||
try:
|
||
subprocess.check_output(["git", "rev-parse", commit_hash],
|
||
cwd=self.work_dir,
|
||
stderr=subprocess.STDOUT)
|
||
return True
|
||
except subprocess.CalledProcessError:
|
||
return False
|
||
|
||
def get_short_hash(self, commit_hash):
|
||
"""Get short commit hash"""
|
||
try:
|
||
return subprocess.check_output(["git", "rev-parse", "--short",
|
||
commit_hash],
|
||
cwd=self.work_dir,
|
||
stderr=subprocess.STDOUT,
|
||
universal_newlines=True).strip()
|
||
except subprocess.CalledProcessError:
|
||
return ""
|
||
|
||
def has_uncommitted_changes(self):
|
||
"""Check for uncommitted changes"""
|
||
try:
|
||
subprocess.check_output(["git", "diff-index",
|
||
"--quiet", "HEAD", "--"],
|
||
cwd=self.work_dir,
|
||
stderr=subprocess.STDOUT)
|
||
return False
|
||
except subprocess.CalledProcessError:
|
||
return True
|
||
|
||
def get_current_branch(self):
|
||
"""Get current branch name"""
|
||
return subprocess.check_output(["git", "branch", "--show-current"],
|
||
cwd=self.work_dir,
|
||
universal_newlines=True).strip()
|
||
|
||
def checkout_commit(self, commit_hash, quiet=True):
|
||
"""Checkout a commit safely"""
|
||
args = ["git", "checkout", "-f"]
|
||
if quiet:
|
||
args.append("-q")
|
||
args.append(commit_hash)
|
||
try:
|
||
subprocess.check_output(args, cwd=self.work_dir,
|
||
stderr=subprocess.STDOUT)
|
||
|
||
# Double-check if branch actually switched
|
||
branch = self.get_short_hash("HEAD")
|
||
if commit_hash != branch:
|
||
raise RuntimeError(f"Branch changed to '{branch}' instead of '{commit_hash}'")
|
||
|
||
return True
|
||
except subprocess.CalledProcessError as e:
|
||
print(f"ERROR: Failed to checkout {commit_hash}: {e}",
|
||
file=sys.stderr)
|
||
return False
|
||
|
||
|
||
class CacheManager:
|
||
"""Manages persistent cache directories"""
|
||
|
||
def __init__(self, work_dir):
|
||
self.work_dir = work_dir
|
||
|
||
def initialize(self):
|
||
"""Create cache directories if they don't exist"""
|
||
for dir_path in DIR_NAME.values():
|
||
abs_path = os.path.join(self.work_dir, dir_path)
|
||
if not os.path.exists(abs_path):
|
||
os.makedirs(abs_path, exist_ok=True, mode=0o755)
|
||
|
||
def get_commit_cache(self, commit_hash, path):
|
||
"""Generate cache path for a commit"""
|
||
hash_short = GitHelper(self.work_dir).get_short_hash(commit_hash)
|
||
if not hash_short:
|
||
hash_short = commit_hash
|
||
|
||
return os.path.join(path, hash_short)
|
||
|
||
class KernelDocRunner:
|
||
"""Runs kernel-doc documentation generator"""
|
||
|
||
def __init__(self, work_dir, kdoc_binary):
|
||
self.work_dir = work_dir
|
||
self.kdoc_binary = kdoc_binary
|
||
self.kdoc_files = None
|
||
|
||
def find_kdoc_references(self):
|
||
"""Find all files marked with kernel-doc:: directives"""
|
||
if self.kdoc_files:
|
||
print("Using cached Kdoc refs")
|
||
return self.kdoc_files
|
||
|
||
print("Finding kernel-doc entries in Documentation...")
|
||
|
||
files = os.path.join(self.work_dir, 'Documentation/**/*.rst')
|
||
pattern = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)")
|
||
kdoc_files = set()
|
||
|
||
for file_path in iglob(files, recursive=True):
|
||
try:
|
||
with open(file_path, 'r', encoding='utf-8') as fp:
|
||
for line in fp:
|
||
match = pattern.match(line.strip())
|
||
if match:
|
||
kdoc_files.add(match.group(1))
|
||
|
||
except OSError:
|
||
continue
|
||
|
||
self.kdoc_files = list(kdoc_files)
|
||
|
||
return self.kdoc_files
|
||
|
||
def gen_yaml(self, yaml_file, kdoc_files):
|
||
"""Runs kernel-doc to generate a yaml file with man and rst."""
|
||
cmd = [self.kdoc_binary, "--man", "--rst", "--yaml", yaml_file]
|
||
cmd += kdoc_files
|
||
|
||
print(f"YAML regression test file will be stored at: {yaml_file}")
|
||
|
||
try:
|
||
subprocess.check_call(cmd, cwd=self.work_dir,
|
||
stdout=subprocess.DEVNULL,
|
||
stderr=subprocess.DEVNULL)
|
||
except subprocess.CalledProcessError:
|
||
return False
|
||
|
||
return True
|
||
|
||
def run_unittest(self, yaml_file):
|
||
"""Run unit tests with the generated yaml file"""
|
||
cmd = [KDOC_PARSER_TEST, "-q", "--yaml", yaml_file]
|
||
result = subprocess.run(cmd, cwd=self.work_dir)
|
||
|
||
if result.returncode:
|
||
print("To check for problems, try to run it again with -v\n")
|
||
print("Use -k <regex> to filter results\n\n\t$", end="")
|
||
print(" ".join(cmd) + "\n")
|
||
|
||
return True
|
||
|
||
def normal_run(self, tmp_dir, output_dir, kdoc_files):
|
||
"""Generate man, rst and errors, storing them at tmp_dir."""
|
||
os.makedirs(tmp_dir, exist_ok=True)
|
||
|
||
try:
|
||
with open(os.path.join(tmp_dir, "man.log"), "w", encoding="utf-8") as out:
|
||
subprocess.check_call([self.kdoc_binary, "--man"] + kdoc_files,
|
||
cwd=self.work_dir,
|
||
stdout=out, stderr=subprocess.DEVNULL)
|
||
|
||
with open(os.path.join(tmp_dir, "rst.log"), "w", encoding="utf-8") as out:
|
||
with open(os.path.join(tmp_dir, "err.log"), "w", encoding="utf-8") as err:
|
||
subprocess.check_call([self.kdoc_binary, "--rst"] + kdoc_files,
|
||
cwd=self.work_dir,
|
||
stdout=out, stderr=err)
|
||
except subprocess.CalledProcessError:
|
||
return False
|
||
|
||
if output_dir:
|
||
os.replace(tmp_dir, output_dir)
|
||
|
||
return True
|
||
|
||
def run(self, commit_hash, tmp_dir, output_dir, kdoc_files, is_regression,
|
||
is_end):
|
||
"""Run kernel-doc on its several ways"""
|
||
if not kdoc_files:
|
||
raise RuntimeError("No kernel-doc references found")
|
||
|
||
git_helper = GitHelper(self.work_dir)
|
||
if not git_helper.checkout_commit(commit_hash, quiet=True):
|
||
raise RuntimeError(f"ERROR: can't checkout commit {commit_hash}")
|
||
|
||
print(f"Processing {commit_hash}...")
|
||
|
||
if not is_regression:
|
||
return self.normal_run(tmp_dir, output_dir, kdoc_files)
|
||
|
||
yaml_file = os.path.join(tmp_dir, YAML_NAME)
|
||
|
||
if not is_end:
|
||
return self.gen_yaml(yaml_file, kdoc_files)
|
||
|
||
return self.run_unittest(yaml_file)
|
||
|
||
class DiffManager:
|
||
"""Compare documentation output directories with an external diff."""
|
||
def __init__(self, diff_tool="diff", diff_args=None):
|
||
self.diff_tool = diff_tool
|
||
# default: unified, no context, ignore whitespace changes
|
||
self.diff_args = diff_args or ["-u0", "-w"]
|
||
|
||
def diff_directories(self, dir1, dir2):
|
||
"""Compare two directories using an external diff."""
|
||
print(f"\nDiffing {dir1} and {dir2}:")
|
||
|
||
dir1_files = set()
|
||
dir2_files = set()
|
||
has_diff = False
|
||
|
||
for root, _, files in os.walk(dir1):
|
||
for file in files:
|
||
dir1_files.add(os.path.relpath(os.path.join(root, file), dir1))
|
||
for root, _, files in os.walk(dir2):
|
||
for file in files:
|
||
dir2_files.add(os.path.relpath(os.path.join(root, file), dir2))
|
||
|
||
common_files = sorted(dir1_files & dir2_files)
|
||
for file in common_files:
|
||
f1 = os.path.join(dir1, file)
|
||
f2 = os.path.join(dir2, file)
|
||
|
||
cmd = [self.diff_tool] + self.diff_args + [f1, f2]
|
||
try:
|
||
result = subprocess.run(
|
||
cmd, capture_output=True, text=True, check=False
|
||
)
|
||
if result.stdout:
|
||
has_diff = True
|
||
print(f"\n{file}")
|
||
print(result.stdout, end="")
|
||
except FileNotFoundError:
|
||
print(f"ERROR: {self.diff_tool} not found")
|
||
sys.exit(1)
|
||
|
||
# Show files that exist only in one directory
|
||
only_in_dir1 = dir1_files - dir2_files
|
||
only_in_dir2 = dir2_files - dir1_files
|
||
if only_in_dir1 or only_in_dir2:
|
||
has_diff = True
|
||
print("\nDifferential files:")
|
||
for f in sorted(only_in_dir1):
|
||
print(f" - {f} (only in {dir1})")
|
||
for f in sorted(only_in_dir2):
|
||
print(f" + {f} (only in {dir2})")
|
||
|
||
if not has_diff:
|
||
print("\nNo differences between those two commits")
|
||
|
||
|
||
class SignalHandler():
|
||
"""Signal handler class."""
|
||
|
||
def restore(self, force_exit=False):
|
||
"""Restore original HEAD state."""
|
||
if self.restored:
|
||
return
|
||
|
||
print(f"Restoring original branch: {self.original_head}")
|
||
try:
|
||
subprocess.check_call(
|
||
["git", "checkout", "-f", self.original_head],
|
||
cwd=self.git_helper.work_dir,
|
||
stderr=subprocess.STDOUT,
|
||
)
|
||
except subprocess.CalledProcessError as e:
|
||
print(f"Failed to restore: {e}", file=sys.stderr)
|
||
|
||
for sig, handler in self.old_handler.items():
|
||
signal.signal(sig, handler)
|
||
|
||
self.restored = True
|
||
|
||
if force_exit:
|
||
sys.exit(1)
|
||
|
||
def signal_handler(self, sig, _):
|
||
"""Handle interrupt signals."""
|
||
print(f"\nSignal {sig} received. Restoring original state...")
|
||
|
||
self.restore(force_exit=True)
|
||
|
||
def __enter__(self):
|
||
"""Allow using it via with command."""
|
||
for sig in [signal.SIGINT, signal.SIGTERM]:
|
||
self.old_handler[sig] = signal.getsignal(sig)
|
||
signal.signal(sig, self.signal_handler)
|
||
|
||
return self
|
||
|
||
def __exit__(self, *args):
|
||
"""Restore signals at the end of with block."""
|
||
self.restore()
|
||
|
||
def __init__(self, git_helper, original_head):
|
||
self.git_helper = git_helper
|
||
self.original_head = original_head
|
||
self.old_handler = {}
|
||
self.restored = False
|
||
|
||
def parse_commit_range(value):
|
||
"""Handle a commit range."""
|
||
if ".." not in value:
|
||
begin = value
|
||
end = "HEAD"
|
||
else:
|
||
begin, _, end = value.partition("..")
|
||
if not end:
|
||
end = "HEAD"
|
||
|
||
if not begin:
|
||
raise argparse.ArgumentTypeError("Need a commit begginning")
|
||
|
||
|
||
print(f"Range: {begin} to {end}")
|
||
|
||
return begin, end
|
||
|
||
|
||
def main():
|
||
"""Main code"""
|
||
parser = argparse.ArgumentParser(description="Compare kernel documentation between commits")
|
||
parser.add_argument("commits", type=parse_commit_range,
|
||
help="commit range like old..new")
|
||
parser.add_argument("files", nargs="*",
|
||
help="files to process – if supplied the --full flag is ignored")
|
||
|
||
parser.add_argument("--full", "-f", action="store_true",
|
||
help="Force a full scan of Documentation/*")
|
||
|
||
parser.add_argument("--regression", "-r", action="store_true",
|
||
help="Use YAML format to check for regressions")
|
||
|
||
parser.add_argument("--work-dir", "-w", default=WORK_DIR,
|
||
help="work dir (default: %(default)s)")
|
||
|
||
parser.add_argument("--clean", "-c", action="store_true",
|
||
help="Clean caches")
|
||
|
||
args = parser.parse_args()
|
||
|
||
if args.files and args.full:
|
||
raise argparse.ArgumentError(args.full,
|
||
"cannot combine '--full' with an explicit file list")
|
||
|
||
work_dir = os.path.abspath(args.work_dir)
|
||
|
||
# Initialize cache
|
||
cache = CacheManager(work_dir)
|
||
cache.initialize()
|
||
|
||
# Validate git repository
|
||
git_helper = GitHelper(work_dir)
|
||
if not git_helper.is_inside_repository():
|
||
raise RuntimeError("Must run inside Git repository")
|
||
|
||
old_commit, new_commit = args.commits
|
||
|
||
old_commit = git_helper.get_short_hash(old_commit)
|
||
new_commit = git_helper.get_short_hash(new_commit)
|
||
|
||
# Validate commits
|
||
for commit in [old_commit, new_commit]:
|
||
if not git_helper.is_valid_commit(commit):
|
||
raise RuntimeError(f"Commit '{commit}' does not exist")
|
||
|
||
# Check for uncommitted changes
|
||
if git_helper.has_uncommitted_changes():
|
||
raise RuntimeError("Uncommitted changes present. Commit or stash first.")
|
||
|
||
runner = KernelDocRunner(git_helper.work_dir, KDOC_BINARY)
|
||
|
||
# Get files to be parsed
|
||
cache_msg = " (results will be cached)"
|
||
if args.full:
|
||
kdoc_files = ["."]
|
||
diff_type = "full"
|
||
print(f"Parsing all files at {work_dir}")
|
||
if not args.files:
|
||
diff_type = "partial"
|
||
kdoc_files = runner.find_kdoc_references()
|
||
print(f"Parsing files with kernel-doc markups at {work_dir}/Documentation")
|
||
else:
|
||
diff_type = "no-cache"
|
||
cache_msg = ""
|
||
kdoc_files = args.files
|
||
|
||
tmp_dir = DIR_NAME["tmp"]
|
||
out_path = DIR_NAME[diff_type]
|
||
|
||
if not args.regression:
|
||
print(f"Output will be stored at: {out_path}{cache_msg}")
|
||
|
||
# Just in case - should never happen in practice
|
||
if not kdoc_files:
|
||
raise argparse.ArgumentError(args.files,
|
||
"No kernel-doc references found")
|
||
|
||
original_head = git_helper.get_current_branch()
|
||
|
||
old_cache = cache.get_commit_cache(old_commit, out_path)
|
||
new_cache = cache.get_commit_cache(new_commit, out_path)
|
||
|
||
with SignalHandler(git_helper, original_head):
|
||
if args.clean or diff_type == "no-cache":
|
||
for cache_dir in [old_cache, new_cache]:
|
||
if cache_dir and os.path.exists(cache_dir):
|
||
shutil.rmtree(cache_dir)
|
||
|
||
if args.regression or not os.path.exists(old_cache):
|
||
old_success = runner.run(old_commit, tmp_dir, old_cache, kdoc_files,
|
||
args.regression, False)
|
||
else:
|
||
old_success = True
|
||
|
||
if args.regression or not os.path.exists(new_cache):
|
||
new_success = runner.run(new_commit, tmp_dir, new_cache, kdoc_files,
|
||
args.regression, True)
|
||
else:
|
||
new_success = True
|
||
|
||
if not (old_success and new_success):
|
||
raise RuntimeError("Failed to generate documentation")
|
||
|
||
if not args.regression:
|
||
diff_manager = DiffManager()
|
||
diff_manager.diff_directories(old_cache, new_cache)
|
||
|
||
if __name__ == "__main__":
|
||
main()
|