docs: kdoc_parser: move transform lists to a separate file

Over the time, most of the changes at kernel-doc are related
to maintaining a list of transforms to convert macros into pure
C code.

Place such transforms on a separate module, to cleanup the
parser module.

There is an advantage on that: QEMU also uses our own kernel-doc,
but the xforms list there is different. By placing it on a
separate module, we can minimize the differences and make it
easier to keep QEMU in sync with Kernel upstream.

No functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <ccd74b7589e1fff340a74bf8ed16a974532cb54f.1772469446.git.mchehab+huawei@kernel.org>
This commit is contained in:
Mauro Carvalho Chehab
2026-03-02 17:40:55 +01:00
committed by Jonathan Corbet
parent 95a9429cc6
commit d842057c4a
4 changed files with 168 additions and 141 deletions

View File

@@ -69,89 +69,6 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * '
r'(?:[-:].*)?$', # description (not captured)
cache = False)
#
# Here begins a long set of transformations to turn structure member prefixes
# and macro invocations into something we can parse and generate kdoc for.
#
struct_args_pattern = r'([^,)]+)'
struct_xforms = [
# Strip attributes
(KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
(KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
(KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
(KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
(KernRe(r'\s*__packed\s*', re.S), ' '),
(KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
(KernRe(r'\s*__private', re.S), ' '),
(KernRe(r'\s*__rcu', re.S), ' '),
(KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
(KernRe(r'\s*____cacheline_aligned', re.S), ' '),
(KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
#
# Unwrap struct_group macros based on this definition:
# __struct_group(TAG, NAME, ATTRS, MEMBERS...)
# which has variants like: struct_group(NAME, MEMBERS...)
# Only MEMBERS arguments require documentation.
#
# Parsing them happens on two steps:
#
# 1. drop struct group arguments that aren't at MEMBERS,
# storing them as STRUCT_GROUP(MEMBERS)
#
# 2. remove STRUCT_GROUP() ancillary macro.
#
# The original logic used to remove STRUCT_GROUP() using an
# advanced regex:
#
# \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
#
# with two patterns that are incompatible with
# Python re module, as it has:
#
# - a recursive pattern: (?1)
# - an atomic grouping: (?>...)
#
# I tried a simpler version: but it didn't work either:
# \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
#
# As it doesn't properly match the end parenthesis on some cases.
#
# So, a better solution was crafted: there's now a NestedMatch
# class that ensures that delimiters after a search are properly
# matched. So, the implementation to drop STRUCT_GROUP() will be
# handled in separate.
#
(KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
(KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
(KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
(KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
#
# Replace macros
#
# TODO: use NestedMatch for FOO($1, $2, ...) matches
#
# it is better to also move those to the NestedMatch logic,
# to ensure that parentheses will be properly matched.
#
(KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
(KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
(KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
(KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
(KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
(KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
struct_args_pattern + r'\)', re.S), r'\2 *\1'),
(KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
(KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
(KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
(KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
]
#
# Regexes here are guaranteed to have the end delimiter matching
# the start delimiter. Yet, right now, only one replace group
@@ -161,62 +78,10 @@ struct_nested_prefixes = [
(re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
]
#
# Transforms for function prototypes
#
function_xforms = [
(KernRe(r"^static +"), ""),
(KernRe(r"^extern +"), ""),
(KernRe(r"^asmlinkage +"), ""),
(KernRe(r"^inline +"), ""),
(KernRe(r"^__inline__ +"), ""),
(KernRe(r"^__inline +"), ""),
(KernRe(r"^__always_inline +"), ""),
(KernRe(r"^noinline +"), ""),
(KernRe(r"^__FORTIFY_INLINE +"), ""),
(KernRe(r"__init +"), ""),
(KernRe(r"__init_or_module +"), ""),
(KernRe(r"__exit +"), ""),
(KernRe(r"__deprecated +"), ""),
(KernRe(r"__flatten +"), ""),
(KernRe(r"__meminit +"), ""),
(KernRe(r"__must_check +"), ""),
(KernRe(r"__weak +"), ""),
(KernRe(r"__sched +"), ""),
(KernRe(r"_noprof"), ""),
(KernRe(r"__always_unused *"), ""),
(KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
(KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
(KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
(KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
(KernRe(r"__attribute_const__ +"), ""),
(KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
]
#
# Transforms for variable prototypes
#
var_xforms = [
(KernRe(r"__read_mostly"), ""),
(KernRe(r"__ro_after_init"), ""),
(KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"),
(KernRe(r"(?://.*)$"), ""),
(KernRe(r"(?:/\*.*\*/)"), ""),
(KernRe(r";$"), ""),
]
#
# Ancillary functions
#
def apply_transforms(xforms, text):
"""
Apply a set of transforms to a block of text.
"""
for search, subst in xforms:
text = search.sub(subst, text)
return text
multi_space = KernRe(r'\s\s+')
def trim_whitespace(s):
"""
@@ -395,11 +260,12 @@ class KernelDoc:
#: String to write when a parameter is not described.
undescribed = "-- undescribed --"
def __init__(self, config, fname):
def __init__(self, config, fname, xforms):
"""Initialize internal variables"""
self.fname = fname
self.config = config
self.xforms = xforms
# Initial state for the state machines
self.state = state.NORMAL
@@ -883,7 +749,7 @@ class KernelDoc:
# Go through the list of members applying all of our transformations.
#
members = trim_private_members(members)
members = apply_transforms(struct_xforms, members)
members = self.xforms.apply("struct", members)
nested = NestedMatch()
for search, sub in struct_nested_prefixes:
@@ -1009,8 +875,7 @@ class KernelDoc:
# Drop comments and macros to have a pure C prototype
#
if not declaration_name:
for r, sub in var_xforms:
proto = r.sub(sub, proto)
proto = self.xforms.apply("var", proto)
proto = proto.rstrip()
@@ -1091,7 +956,7 @@ class KernelDoc:
#
# Apply the initial transformations.
#
prototype = apply_transforms(function_xforms, prototype)
prototype = self.xforms.apply("func", prototype)
# Yes, this truly is vile. We are looking for:
# 1. Return type (may be nothing if we're looking at a macro)