From d966dc658ce381c56d85cd477e095944b8470379 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:11 +0800 Subject: scripts/kernel-doc.py: move KernelDoc class to a separate file In preparation for letting kerneldoc Sphinx extension to import Python libraries, move regex ancillary classes to a separate file. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/c76df228504e711c6b4bcd23d5a0ea1fda678cda.1744106241.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 1690 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 1690 insertions(+) create mode 100755 scripts/lib/kdoc/kdoc_parser.py (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py new file mode 100755 index 000000000000..3ce116595546 --- /dev/null +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -0,0 +1,1690 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 + +""" +kdoc_parser +=========== + +Read a C language source or header FILE and extract embedded +documentation comments +""" + +import argparse +import re +from pprint import pformat + +from kdoc_re import NestedMatch, Re + + +# +# Regular expressions used to parse kernel-doc markups at KernelDoc class. +# +# Let's declare them in lowercase outside any class to make easier to +# convert from the python script. +# +# As those are evaluated at the beginning, no need to cache them +# + +# Allow whitespace at end of comment start. +doc_start = Re(r'^/\*\*\s*$', cache=False) + +doc_end = Re(r'\*/', cache=False) +doc_com = Re(r'\s*\*\s*', cache=False) +doc_com_body = Re(r'\s*\* ?', cache=False) +doc_decl = doc_com + Re(r'(\w+)', cache=False) + +# @params and a strictly limited set of supported section names +# Specifically: +# Match @word: +# @...: +# @{section-name}: +# while trying to not match literal block starts like "example::" +# +doc_sect = doc_com + \ + Re(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', + flags=re.I, cache=False) + +doc_content = doc_com_body + Re(r'(.*)', cache=False) +doc_block = doc_com + Re(r'DOC:\s*(.*)?', cache=False) +doc_inline_start = Re(r'^\s*/\*\*\s*$', cache=False) +doc_inline_sect = Re(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) +doc_inline_end = Re(r'^\s*\*/\s*$', cache=False) +doc_inline_oneline = Re(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) +attribute = Re(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", + flags=re.I | re.S, cache=False) + +export_symbol = Re(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) +export_symbol_ns = Re(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) + +type_param = Re(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + + +class KernelDoc: + """ + Read a C language source or header FILE and extract embedded + documentation comments. + """ + + # Parser states + STATE_NORMAL = 0 # normal code + STATE_NAME = 1 # looking for function name + STATE_BODY_MAYBE = 2 # body - or maybe more description + STATE_BODY = 3 # the body of the comment + STATE_BODY_WITH_BLANK_LINE = 4 # the body which has a blank line + STATE_PROTO = 5 # scanning prototype + STATE_DOCBLOCK = 6 # documentation block + STATE_INLINE = 7 # gathering doc outside main block + + st_name = [ + "NORMAL", + "NAME", + "BODY_MAYBE", + "BODY", + "BODY_WITH_BLANK_LINE", + "PROTO", + "DOCBLOCK", + "INLINE", + ] + + # Inline documentation state + STATE_INLINE_NA = 0 # not applicable ($state != STATE_INLINE) + STATE_INLINE_NAME = 1 # looking for member name (@foo:) + STATE_INLINE_TEXT = 2 # looking for member documentation + STATE_INLINE_END = 3 # done + STATE_INLINE_ERROR = 4 # error - Comment without header was found. + # Spit a warning as it's not + # proper kernel-doc and ignore the rest. + + st_inline_name = [ + "", + "_NAME", + "_TEXT", + "_END", + "_ERROR", + ] + + # Section names + + section_default = "Description" # default section + section_intro = "Introduction" + section_context = "Context" + section_return = "Return" + + undescribed = "-- undescribed --" + + def __init__(self, config, fname): + """Initialize internal variables""" + + self.fname = fname + self.config = config + + # Initial state for the state machines + self.state = self.STATE_NORMAL + self.inline_doc_state = self.STATE_INLINE_NA + + # Store entry currently being processed + self.entry = None + + # Place all potential outputs into an array + self.entries = [] + + def show_warnings(self, dtype, declaration_name): # pylint: disable=W0613 + """ + Allow filtering out warnings + """ + + # TODO: implement it + + return True + + # TODO: rename to emit_message + def emit_warning(self, ln, msg, warning=True): + """Emit a message""" + + if warning: + self.config.log.warning("%s:%d %s", self.fname, ln, msg) + else: + self.config.log.info("%s:%d %s", self.fname, ln, msg) + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + + name = self.entry.section + contents = self.entry.contents + + # TODO: we can prevent dumping empty sections here with: + # + # if self.entry.contents.strip("\n"): + # if start_new: + # self.entry.section = self.section_default + # self.entry.contents = "" + # + # return + # + # But, as we want to be producing the same output of the + # venerable kernel-doc Perl tool, let's just output everything, + # at least for now + + if type_param.match(name): + name = type_param.group(1) + + self.entry.parameterdescs[name] = contents + self.entry.parameterdesc_start_lines[name] = self.entry.new_start_line + + self.entry.sectcheck += name + " " + self.entry.new_start_line = 0 + + elif name == "@...": + name = "..." + self.entry.parameterdescs[name] = contents + self.entry.sectcheck += name + " " + self.entry.parameterdesc_start_lines[name] = self.entry.new_start_line + self.entry.new_start_line = 0 + + else: + if name in self.entry.sections and self.entry.sections[name] != "": + # Only warn on user-specified duplicate section names + if name != self.section_default: + self.emit_warning(self.entry.new_start_line, + f"duplicate section name '{name}'\n") + self.entry.sections[name] += contents + else: + self.entry.sections[name] = contents + self.entry.sectionlist.append(name) + self.entry.section_start_lines[name] = self.entry.new_start_line + self.entry.new_start_line = 0 + +# self.config.log.debug("Section: %s : %s", name, pformat(vars(self.entry))) + + if start_new: + self.entry.section = self.section_default + self.entry.contents = "" + + # TODO: rename it to store_declaration + def output_declaration(self, dtype, name, **args): + """ + Stores the entry into an entry array. + + The actual output and output filters will be handled elsewhere + """ + + # The implementation here is different than the original kernel-doc: + # instead of checking for output filters or actually output anything, + # it just stores the declaration content at self.entries, as the + # output will happen on a separate class. + # + # For now, we're keeping the same name of the function just to make + # easier to compare the source code of both scripts + + if "declaration_start_line" not in args: + args["declaration_start_line"] = self.entry.declaration_start_line + + args["type"] = dtype + + # TODO: use colletions.OrderedDict + + sections = args.get('sections', {}) + sectionlist = args.get('sectionlist', []) + + # Drop empty sections + # TODO: improve it to emit warnings + for section in ["Description", "Return"]: + if section in sectionlist: + if not sections[section].rstrip(): + del sections[section] + sectionlist.remove(section) + + self.entries.append((name, args)) + + self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) + + def reset_state(self, ln): + """ + Ancillary routine to create a new entry. It initializes all + variables used by the state machine. + """ + + self.entry = argparse.Namespace + + self.entry.contents = "" + self.entry.function = "" + self.entry.sectcheck = "" + self.entry.struct_actual = "" + self.entry.prototype = "" + + self.entry.parameterlist = [] + self.entry.parameterdescs = {} + self.entry.parametertypes = {} + self.entry.parameterdesc_start_lines = {} + + self.entry.section_start_lines = {} + self.entry.sectionlist = [] + self.entry.sections = {} + + self.entry.anon_struct_union = False + + self.entry.leading_space = None + + # State flags + self.state = self.STATE_NORMAL + self.inline_doc_state = self.STATE_INLINE_NA + self.entry.brcount = 0 + + self.entry.in_doc_sect = False + self.entry.declaration_start_line = ln + + def push_parameter(self, ln, decl_type, param, dtype, + org_arg, declaration_name): + """ + Store parameters and their descriptions at self.entry. + """ + + if self.entry.anon_struct_union and dtype == "" and param == "}": + return # Ignore the ending }; from anonymous struct/union + + self.entry.anon_struct_union = False + + param = Re(r'[\[\)].*').sub('', param, count=1) + + if dtype == "" and param.endswith("..."): + if Re(r'\w\.\.\.$').search(param): + # For named variable parameters of the form `x...`, + # remove the dots + param = param[:-3] + else: + # Handles unnamed variable parameters + param = "..." + + if param not in self.entry.parameterdescs or \ + not self.entry.parameterdescs[param]: + + self.entry.parameterdescs[param] = "variable arguments" + + elif dtype == "" and (not param or param == "void"): + param = "void" + self.entry.parameterdescs[param] = "no arguments" + + elif dtype == "" and param in ["struct", "union"]: + # Handle unnamed (anonymous) union or struct + dtype = param + param = "{unnamed_" + param + "}" + self.entry.parameterdescs[param] = "anonymous\n" + self.entry.anon_struct_union = True + + # Handle cache group enforcing variables: they do not need + # to be described in header files + elif "__cacheline_group" in param: + # Ignore __cacheline_group_begin and __cacheline_group_end + return + + # Warn if parameter has no description + # (but ignore ones starting with # as these are not parameters + # but inline preprocessor statements) + if param not in self.entry.parameterdescs and not param.startswith("#"): + self.entry.parameterdescs[param] = self.undescribed + + if self.show_warnings(dtype, declaration_name) and "." not in param: + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + + self.emit_warning(ln, + f"{dname} '{param}' not described in '{declaration_name}'") + + # Strip spaces from param so that it is one continuous string on + # parameterlist. This fixes a problem where check_sections() + # cannot find a parameter like "addr[6 + 2]" because it actually + # appears as "addr[6", "+", "2]" on the parameter list. + # However, it's better to maintain the param string unchanged for + # output, so just weaken the string compare in check_sections() + # to ignore "[blah" in a parameter string. + + self.entry.parameterlist.append(param) + org_arg = Re(r'\s\s+').sub(' ', org_arg) + self.entry.parametertypes[param] = org_arg + + def save_struct_actual(self, actual): + """ + Strip all spaces from the actual param so that it looks like + one string item. + """ + + actual = Re(r'\s*').sub("", actual, count=1) + + self.entry.struct_actual += actual + " " + + def create_parameter_list(self, ln, decl_type, args, + splitter, declaration_name): + """ + Creates a list of parameters, storing them at self.entry. + """ + + # temporarily replace all commas inside function pointer definition + arg_expr = Re(r'(\([^\),]+),') + while arg_expr.search(args): + args = arg_expr.sub(r"\1#", args) + + for arg in args.split(splitter): + # Strip comments + arg = Re(r'\/\*.*\*\/').sub('', arg) + + # Ignore argument attributes + arg = Re(r'\sPOS0?\s').sub(' ', arg) + + # Strip leading/trailing spaces + arg = arg.strip() + arg = Re(r'\s+').sub(' ', arg, count=1) + + if arg.startswith('#'): + # Treat preprocessor directive as a typeless variable just to fill + # corresponding data structures "correctly". Catch it later in + # output_* subs. + + # Treat preprocessor directive as a typeless variable + self.push_parameter(ln, decl_type, arg, "", + "", declaration_name) + + elif Re(r'\(.+\)\s*\(').search(arg): + # Pointer-to-function + + arg = arg.replace('#', ',') + + r = Re(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') + if r.match(arg): + param = r.group(1) + else: + self.emit_warning(ln, f"Invalid param: {arg}") + param = arg + + dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + elif Re(r'\(.+\)\s*\[').search(arg): + # Array-of-pointers + + arg = arg.replace('#', ',') + r = Re(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') + if r.match(arg): + param = r.group(1) + else: + self.emit_warning(ln, f"Invalid param: {arg}") + param = arg + + dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + elif arg: + arg = Re(r'\s*:\s*').sub(":", arg) + arg = Re(r'\s*\[').sub('[', arg) + + args = Re(r'\s*,\s*').split(arg) + if args[0] and '*' in args[0]: + args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) + + first_arg = [] + r = Re(r'^(.*\s+)(.*?\[.*\].*)$') + if args[0] and r.match(args[0]): + args.pop(0) + first_arg.extend(r.group(1)) + first_arg.append(r.group(2)) + else: + first_arg = Re(r'\s+').split(args.pop(0)) + + args.insert(0, first_arg.pop()) + dtype = ' '.join(first_arg) + + for param in args: + if Re(r'^(\*+)\s*(.*)').match(param): + r = Re(r'^(\*+)\s*(.*)') + if not r.match(param): + self.emit_warning(ln, f"Invalid param: {param}") + continue + + param = r.group(1) + + self.save_struct_actual(r.group(2)) + self.push_parameter(ln, decl_type, r.group(2), + f"{dtype} {r.group(1)}", + arg, declaration_name) + + elif Re(r'(.*?):(\w+)').search(param): + r = Re(r'(.*?):(\w+)') + if not r.match(param): + self.emit_warning(ln, f"Invalid param: {param}") + continue + + if dtype != "": # Skip unnamed bit-fields + self.save_struct_actual(r.group(1)) + self.push_parameter(ln, decl_type, r.group(1), + f"{dtype}:{r.group(2)}", + arg, declaration_name) + else: + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): + """ + Check for errors inside sections, emitting warnings if not found + parameters are described. + """ + + sects = sectcheck.split() + prms = prmscheck.split() + err = False + + for sx in range(len(sects)): # pylint: disable=C0200 + err = True + for px in range(len(prms)): # pylint: disable=C0200 + prm_clean = prms[px] + prm_clean = Re(r'\[.*\]').sub('', prm_clean) + prm_clean = attribute.sub('', prm_clean) + + # ignore array size in a parameter string; + # however, the original param string may contain + # spaces, e.g.: addr[6 + 2] + # and this appears in @prms as "addr[6" since the + # parameter list is split at spaces; + # hence just ignore "[..." for the sections check; + prm_clean = Re(r'\[.*').sub('', prm_clean) + + if prm_clean == sects[sx]: + err = False + break + + if err: + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + + self.emit_warning(ln, + f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") + + def check_return_section(self, ln, declaration_name, return_type): + """ + If the function doesn't return void, warns about the lack of a + return description. + """ + + if not self.config.wreturn: + return + + # Ignore an empty return type (It's a macro) + # Ignore functions with a "void" return type (but not "void *") + if not return_type or Re(r'void\s*\w*\s*$').search(return_type): + return + + if not self.entry.sections.get("Return", None): + self.emit_warning(ln, + f"No description found for return value of '{declaration_name}'") + + def dump_struct(self, ln, proto): + """ + Store an entry for an struct or union + """ + + type_pattern = r'(struct|union)' + + qualifiers = [ + "__attribute__", + "__packed", + "__aligned", + "____cacheline_aligned_in_smp", + "____cacheline_aligned", + ] + + definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" + struct_members = Re(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') + + # Extract struct/union definition + members = None + declaration_name = None + decl_type = None + + r = Re(type_pattern + r'\s+(\w+)\s*' + definition_body) + if r.search(proto): + decl_type = r.group(1) + declaration_name = r.group(2) + members = r.group(3) + else: + r = Re(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') + + if r.search(proto): + decl_type = r.group(1) + declaration_name = r.group(3) + members = r.group(2) + + if not members: + self.emit_warning(ln, f"{proto} error: Cannot parse struct or union!") + self.config.errors += 1 + return + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, + f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") + return + + args_pattern = r'([^,)]+)' + + sub_prefixes = [ + (Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), + (Re(r'\/\*\s*private:.*', re.S | re.I), ''), + + # Strip comments + (Re(r'\/\*.*?\*\/', re.S), ''), + + # Strip attributes + (attribute, ' '), + (Re(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (Re(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (Re(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (Re(r'\s*__packed\s*', re.S), ' '), + (Re(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (Re(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (Re(r'\s*____cacheline_aligned', re.S), ' '), + + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + + (Re(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (Re(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (Re(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (Re(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + + # Replace macros + # + # TODO: it is better to also move those to the NestedMatch logic, + # to ensure that parenthesis will be properly matched. + + (Re(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (Re(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (Re(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (Re(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (Re(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (Re(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (Re(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), + (Re(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (Re(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), + ] + + # Regexes here are guaranteed to have the end limiter matching + # the start delimiter. Yet, right now, only one replace group + # is allowed. + + sub_nested_prefixes = [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), + ] + + for search, sub in sub_prefixes: + members = search.sub(sub, members) + + nested = NestedMatch() + + for search, sub in sub_nested_prefixes: + members = nested.sub(search, sub, members) + + # Keeps the original declaration as-is + declaration = members + + # Split nested struct/union elements + # + # This loop was simpler at the original kernel-doc perl version, as + # while ($members =~ m/$struct_members/) { ... } + # reads 'members' string on each interaction. + # + # Python behavior is different: it parses 'members' only once, + # creating a list of tuples from the first interaction. + # + # On other words, this won't get nested structs. + # + # So, we need to have an extra loop on Python to override such + # re limitation. + + while True: + tuples = struct_members.findall(members) + if not tuples: + break + + for t in tuples: + newmember = "" + maintype = t[0] + s_ids = t[5] + content = t[3] + + oldmember = "".join(t) + + for s_id in s_ids.split(','): + s_id = s_id.strip() + + newmember += f"{maintype} {s_id}; " + s_id = Re(r'[:\[].*').sub('', s_id) + s_id = Re(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) + + for arg in content.split(';'): + arg = arg.strip() + + if not arg: + continue + + r = Re(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') + if r.match(arg): + # Pointer-to-function + dtype = r.group(1) + name = r.group(2) + extra = r.group(3) + + if not name: + continue + + if not s_id: + # Anonymous struct/union + newmember += f"{dtype}{name}{extra}; " + else: + newmember += f"{dtype}{s_id}.{name}{extra}; " + + else: + arg = arg.strip() + # Handle bitmaps + arg = Re(r':\s*\d+\s*').sub('', arg) + + # Handle arrays + arg = Re(r'\[.*\]').sub('', arg) + + # Handle multiple IDs + arg = Re(r'\s*,\s*').sub(',', arg) + + r = Re(r'(.*)\s+([\S+,]+)') + + if r.search(arg): + dtype = r.group(1) + names = r.group(2) + else: + newmember += f"{arg}; " + continue + + for name in names.split(','): + name = Re(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() + + if not name: + continue + + if not s_id: + # Anonymous struct/union + newmember += f"{dtype} {name}; " + else: + newmember += f"{dtype} {s_id}.{name}; " + + members = members.replace(oldmember, newmember) + + # Ignore other nested elements, like enums + members = re.sub(r'(\{[^\{\}]*\})', '', members) + + self.create_parameter_list(ln, decl_type, members, ';', + declaration_name) + self.check_sections(ln, declaration_name, decl_type, + self.entry.sectcheck, self.entry.struct_actual) + + # Adjust declaration for better display + declaration = Re(r'([\{;])').sub(r'\1\n', declaration) + declaration = Re(r'\}\s+;').sub('};', declaration) + + # Better handle inlined enums + while True: + r = Re(r'(enum\s+\{[^\}]+),([^\n])') + if not r.search(declaration): + break + + declaration = r.sub(r'\1,\n\2', declaration) + + def_args = declaration.split('\n') + level = 1 + declaration = "" + for clause in def_args: + + clause = clause.strip() + clause = Re(r'\s+').sub(' ', clause, count=1) + + if not clause: + continue + + if '}' in clause and level > 1: + level -= 1 + + if not Re(r'^\s*#').match(clause): + declaration += "\t" * level + + declaration += "\t" + clause + "\n" + if "{" in clause and "}" not in clause: + level += 1 + + self.output_declaration(decl_type, declaration_name, + struct=declaration_name, + module=self.entry.modulename, + definition=declaration, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose) + + def dump_enum(self, ln, proto): + """ + Stores an enum inside self.entries array. + """ + + # Ignore members marked private + proto = Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) + proto = Re(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) + + # Strip comments + proto = Re(r'\/\*.*?\*\/', flags=re.S).sub('', proto) + + # Strip #define macros inside enums + proto = Re(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) + + members = None + declaration_name = None + + r = Re(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') + if r.search(proto): + declaration_name = r.group(2) + members = r.group(1).rstrip() + else: + r = Re(r'enum\s+(\w*)\s*\{(.*)\}') + if r.match(proto): + declaration_name = r.group(1) + members = r.group(2).rstrip() + + if not members: + self.emit_warning(ln, f"{proto}: error: Cannot parse enum!") + self.config.errors += 1 + return + + if self.entry.identifier != declaration_name: + if self.entry.identifier == "": + self.emit_warning(ln, + f"{proto}: wrong kernel-doc identifier on prototype") + else: + self.emit_warning(ln, + f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead") + return + + if not declaration_name: + declaration_name = "(anonymous)" + + member_set = set() + + members = Re(r'\([^;]*?[\)]').sub('', members) + + for arg in members.split(','): + if not arg: + continue + arg = Re(r'^\s*(\w+).*').sub(r'\1', arg) + self.entry.parameterlist.append(arg) + if arg not in self.entry.parameterdescs: + self.entry.parameterdescs[arg] = self.undescribed + if self.show_warnings("enum", declaration_name): + self.emit_warning(ln, + f"Enum value '{arg}' not described in enum '{declaration_name}'") + member_set.add(arg) + + for k in self.entry.parameterdescs: + if k not in member_set: + if self.show_warnings("enum", declaration_name): + self.emit_warning(ln, + f"Excess enum value '%{k}' description in '{declaration_name}'") + + self.output_declaration('enum', declaration_name, + enum=declaration_name, + module=self.config.modulename, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose) + + def dump_declaration(self, ln, prototype): + """ + Stores a data declaration inside self.entries array. + """ + + if self.entry.decl_type == "enum": + self.dump_enum(ln, prototype) + return + + if self.entry.decl_type == "typedef": + self.dump_typedef(ln, prototype) + return + + if self.entry.decl_type in ["union", "struct"]: + self.dump_struct(ln, prototype) + return + + # TODO: handle other types + self.output_declaration(self.entry.decl_type, prototype, + entry=self.entry) + + def dump_function(self, ln, prototype): + """ + Stores a function of function macro inside self.entries array. + """ + + func_macro = False + return_type = '' + decl_type = 'function' + + # Prefixes that would be removed + sub_prefixes = [ + (r"^static +", "", 0), + (r"^extern +", "", 0), + (r"^asmlinkage +", "", 0), + (r"^inline +", "", 0), + (r"^__inline__ +", "", 0), + (r"^__inline +", "", 0), + (r"^__always_inline +", "", 0), + (r"^noinline +", "", 0), + (r"^__FORTIFY_INLINE +", "", 0), + (r"__init +", "", 0), + (r"__init_or_module +", "", 0), + (r"__deprecated +", "", 0), + (r"__flatten +", "", 0), + (r"__meminit +", "", 0), + (r"__must_check +", "", 0), + (r"__weak +", "", 0), + (r"__sched +", "", 0), + (r"_noprof", "", 0), + (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), + (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), + (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), + (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), + (r"__attribute_const__ +", "", 0), + + # It seems that Python support for re.X is broken: + # At least for me (Python 3.13), this didn't work +# (r""" +# __attribute__\s*\(\( +# (?: +# [\w\s]+ # attribute name +# (?:\([^)]*\))? # attribute arguments +# \s*,? # optional comma at the end +# )+ +# \)\)\s+ +# """, "", re.X), + + # So, remove whitespaces and comments from it + (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), + ] + + for search, sub, flags in sub_prefixes: + prototype = Re(search, flags).sub(sub, prototype) + + # Macros are a special case, as they change the prototype format + new_proto = Re(r"^#\s*define\s+").sub("", prototype) + if new_proto != prototype: + is_define_proto = True + prototype = new_proto + else: + is_define_proto = False + + # Yes, this truly is vile. We are looking for: + # 1. Return type (may be nothing if we're looking at a macro) + # 2. Function name + # 3. Function parameters. + # + # All the while we have to watch out for function pointer parameters + # (which IIRC is what the two sections are for), C types (these + # regexps don't even start to express all the possibilities), and + # so on. + # + # If you mess with these regexps, it's a good idea to check that + # the following functions' documentation still comes out right: + # - parport_register_device (function pointer parameters) + # - atomic_set (macro) + # - pci_match_device, __copy_to_user (long return type) + + name = r'[a-zA-Z0-9_~:]+' + prototype_end1 = r'[^\(]*' + prototype_end2 = r'[^\{]*' + prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' + + # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. + # So, this needs to be mapped in Python with (?:...)? or (?:...)+ + + type1 = r'(?:[\w\s]+)?' + type2 = r'(?:[\w\s]+\*+)+' + + found = False + + if is_define_proto: + r = Re(r'^()(' + name + r')\s+') + + if r.search(prototype): + return_type = '' + declaration_name = r.group(2) + func_macro = True + + found = True + + if not found: + patterns = [ + rf'^()({name})\s*{prototype_end}', + rf'^({type1})\s+({name})\s*{prototype_end}', + rf'^({type2})\s*({name})\s*{prototype_end}', + ] + + for p in patterns: + r = Re(p) + + if r.match(prototype): + + return_type = r.group(1) + declaration_name = r.group(2) + args = r.group(3) + + self.create_parameter_list(ln, decl_type, args, ',', + declaration_name) + + found = True + break + if not found: + self.emit_warning(ln, + f"cannot understand function prototype: '{prototype}'") + return + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, + f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") + return + + prms = " ".join(self.entry.parameterlist) + self.check_sections(ln, declaration_name, "function", + self.entry.sectcheck, prms) + + self.check_return_section(ln, declaration_name, return_type) + + if 'typedef' in return_type: + self.output_declaration(decl_type, declaration_name, + function=declaration_name, + typedef=True, + module=self.config.modulename, + functiontype=return_type, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) + else: + self.output_declaration(decl_type, declaration_name, + function=declaration_name, + typedef=False, + module=self.config.modulename, + functiontype=return_type, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) + + def dump_typedef(self, ln, proto): + """ + Stores a typedef inside self.entries array. + """ + + typedef_type = r'((?:\s+[\w\*]+\b){1,8})\s*' + typedef_ident = r'\*?\s*(\w\S+)\s*' + typedef_args = r'\s*\((.*)\);' + + typedef1 = Re(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = Re(r'typedef' + typedef_type + typedef_ident + typedef_args) + + # Strip comments + proto = Re(r'/\*.*?\*/', flags=re.S).sub('', proto) + + # Parse function typedef prototypes + for r in [typedef1, typedef2]: + if not r.match(proto): + continue + + return_type = r.group(1).strip() + declaration_name = r.group(2) + args = r.group(3) + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + decl_type = 'function' + self.create_parameter_list(ln, decl_type, args, ',', declaration_name) + + self.output_declaration(decl_type, declaration_name, + function=declaration_name, + typedef=True, + module=self.entry.modulename, + functiontype=return_type, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose) + return + + # Handle nested parentheses or brackets + r = Re(r'(\(*.\)\s*|\[*.\]\s*);$') + while r.search(proto): + proto = r.sub('', proto) + + # Parse simple typedefs + r = Re(r'typedef.*\s+(\w+)\s*;') + if r.match(proto): + declaration_name = r.group(1) + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.output_declaration('typedef', declaration_name, + typedef=declaration_name, + module=self.entry.modulename, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose) + return + + self.emit_warning(ln, "error: Cannot parse typedef!") + self.config.errors += 1 + + @staticmethod + def process_export(function_table, line): + """ + process EXPORT_SYMBOL* tags + + This method is called both internally and externally, so, it + doesn't use self. + """ + + if export_symbol.search(line): + symbol = export_symbol.group(2) + function_table.add(symbol) + + if export_symbol_ns.search(line): + symbol = export_symbol_ns.group(2) + function_table.add(symbol) + + def process_normal(self, ln, line): + """ + STATE_NORMAL: looking for the /** to begin everything. + """ + + if not doc_start.match(line): + return + + # start a new entry + self.reset_state(ln + 1) + self.entry.in_doc_sect = False + + # next line is always the function name + self.state = self.STATE_NAME + + def process_name(self, ln, line): + """ + STATE_NAME: Looking for the "name - description" line + """ + + if doc_block.search(line): + self.entry.new_start_line = ln + + if not doc_block.group(1): + self.entry.section = self.section_intro + else: + self.entry.section = doc_block.group(1) + + self.state = self.STATE_DOCBLOCK + return + + if doc_decl.search(line): + self.entry.identifier = doc_decl.group(1) + self.entry.is_kernel_comment = False + + decl_start = str(doc_com) # comment block asterisk + fn_type = r"(?:\w+\s*\*\s*)?" # type (for non-functions) + parenthesis = r"(?:\(\w*\))?" # optional parenthesis on function + decl_end = r"(?:[-:].*)" # end of the name part + + # test for pointer declaration type, foo * bar() - desc + r = Re(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") + if r.search(line): + self.entry.identifier = r.group(1) + + # Test for data declaration + r = Re(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") + if r.search(line): + self.entry.decl_type = r.group(1) + self.entry.identifier = r.group(2) + self.entry.is_kernel_comment = True + else: + # Look for foo() or static void foo() - description; + # or misspelt identifier + + r1 = Re(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") + r2 = Re(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") + + for r in [r1, r2]: + if r.search(line): + self.entry.identifier = r.group(1) + self.entry.decl_type = "function" + + r = Re(r"define\s+") + self.entry.identifier = r.sub("", self.entry.identifier) + self.entry.is_kernel_comment = True + break + + self.entry.identifier = self.entry.identifier.strip(" ") + + self.state = self.STATE_BODY + + # if there's no @param blocks need to set up default section here + self.entry.section = self.section_default + self.entry.new_start_line = ln + 1 + + r = Re("[-:](.*)") + if r.search(line): + # strip leading/trailing/multiple spaces + self.entry.descr = r.group(1).strip(" ") + + r = Re(r"\s+") + self.entry.descr = r.sub(" ", self.entry.descr) + self.entry.declaration_purpose = self.entry.descr + self.state = self.STATE_BODY_MAYBE + else: + self.entry.declaration_purpose = "" + + if not self.entry.is_kernel_comment: + self.emit_warning(ln, + f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") + self.state = self.STATE_NORMAL + + if not self.entry.declaration_purpose and self.config.wshort_desc: + self.emit_warning(ln, + f"missing initial short description on line:\n{line}") + + if not self.entry.identifier and self.entry.decl_type != "enum": + self.emit_warning(ln, + f"wrong kernel-doc identifier on line:\n{line}") + self.state = self.STATE_NORMAL + + if self.config.verbose: + self.emit_warning(ln, + f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", + warning=False) + + return + + # Failed to find an identifier. Emit a warning + self.emit_warning(ln, f"Cannot find identifier on line:\n{line}") + + def process_body(self, ln, line): + """ + STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. + """ + + if self.state == self.STATE_BODY_WITH_BLANK_LINE: + r = Re(r"\s*\*\s?\S") + if r.match(line): + self.dump_section() + self.entry.section = self.section_default + self.entry.new_start_line = line + self.entry.contents = "" + + if doc_sect.search(line): + self.entry.in_doc_sect = True + newsection = doc_sect.group(1) + + if newsection.lower() in ["description", "context"]: + newsection = newsection.title() + + # Special case: @return is a section, not a param description + if newsection.lower() in ["@return", "@returns", + "return", "returns"]: + newsection = "Return" + + # Perl kernel-doc has a check here for contents before sections. + # the logic there is always false, as in_doc_sect variable is + # always true. So, just don't implement Wcontents_before_sections + + # .title() + newcontents = doc_sect.group(2) + if not newcontents: + newcontents = "" + + if self.entry.contents.strip("\n"): + self.dump_section() + + self.entry.new_start_line = ln + self.entry.section = newsection + self.entry.leading_space = None + + self.entry.contents = newcontents.lstrip() + if self.entry.contents: + self.entry.contents += "\n" + + self.state = self.STATE_BODY + return + + if doc_end.search(line): + self.dump_section() + + # Look for doc_com + + doc_end: + r = Re(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') + if r.match(line): + self.emit_warning(ln, f"suspicious ending line: {line}") + + self.entry.prototype = "" + self.entry.new_start_line = ln + 1 + + self.state = self.STATE_PROTO + return + + if doc_content.search(line): + cont = doc_content.group(1) + + if cont == "": + if self.entry.section == self.section_context: + self.dump_section() + + self.entry.new_start_line = ln + self.state = self.STATE_BODY + else: + if self.entry.section != self.section_default: + self.state = self.STATE_BODY_WITH_BLANK_LINE + else: + self.state = self.STATE_BODY + + self.entry.contents += "\n" + + elif self.state == self.STATE_BODY_MAYBE: + + # Continued declaration purpose + self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() + self.entry.declaration_purpose += " " + cont + + r = Re(r"\s+") + self.entry.declaration_purpose = r.sub(' ', + self.entry.declaration_purpose) + + else: + if self.entry.section.startswith('@') or \ + self.entry.section == self.section_context: + if self.entry.leading_space is None: + r = Re(r'^(\s+)') + if r.match(cont): + self.entry.leading_space = len(r.group(1)) + else: + self.entry.leading_space = 0 + + # Double-check if leading space are realy spaces + pos = 0 + for i in range(0, self.entry.leading_space): + if cont[i] != " ": + break + pos += 1 + + cont = cont[pos:] + + # NEW LOGIC: + # In case it is different, update it + if self.entry.leading_space != pos: + self.entry.leading_space = pos + + self.entry.contents += cont + "\n" + return + + # Unknown line, ignore + self.emit_warning(ln, f"bad line: {line}") + + def process_inline(self, ln, line): + """STATE_INLINE: docbook comments within a prototype.""" + + if self.inline_doc_state == self.STATE_INLINE_NAME and \ + doc_inline_sect.search(line): + self.entry.section = doc_inline_sect.group(1) + self.entry.new_start_line = ln + + self.entry.contents = doc_inline_sect.group(2).lstrip() + if self.entry.contents != "": + self.entry.contents += "\n" + + self.inline_doc_state = self.STATE_INLINE_TEXT + # Documentation block end */ + return + + if doc_inline_end.search(line): + if self.entry.contents not in ["", "\n"]: + self.dump_section() + + self.state = self.STATE_PROTO + self.inline_doc_state = self.STATE_INLINE_NA + return + + if doc_content.search(line): + if self.inline_doc_state == self.STATE_INLINE_TEXT: + self.entry.contents += doc_content.group(1) + "\n" + if not self.entry.contents.strip(" ").rstrip("\n"): + self.entry.contents = "" + + elif self.inline_doc_state == self.STATE_INLINE_NAME: + self.emit_warning(ln, + f"Incorrect use of kernel-doc format: {line}") + + self.inline_doc_state = self.STATE_INLINE_ERROR + + def syscall_munge(self, ln, proto): # pylint: disable=W0613 + """ + Handle syscall definitions + """ + + is_void = False + + # Strip newlines/CR's + proto = re.sub(r'[\r\n]+', ' ', proto) + + # Check if it's a SYSCALL_DEFINE0 + if 'SYSCALL_DEFINE0' in proto: + is_void = True + + # Replace SYSCALL_DEFINE with correct return type & function name + proto = Re(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) + + r = Re(r'long\s+(sys_.*?),') + if r.search(proto): + proto = proto.replace(',', '(', count=1) + elif is_void: + proto = proto.replace(')', '(void)', count=1) + + # Now delete all of the odd-numbered commas in the proto + # so that argument types & names don't have a comma between them + count = 0 + length = len(proto) + + if is_void: + length = 0 # skip the loop if is_void + + for ix in range(length): + if proto[ix] == ',': + count += 1 + if count % 2 == 1: + proto = proto[:ix] + ' ' + proto[ix + 1:] + + return proto + + def tracepoint_munge(self, ln, proto): + """ + Handle tracepoint definitions + """ + + tracepointname = None + tracepointargs = None + + # Match tracepoint name based on different patterns + r = Re(r'TRACE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = Re(r'DEFINE_SINGLE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = Re(r'DEFINE_EVENT\((.*?),(.*?),') + if r.search(proto): + tracepointname = r.group(2) + + if tracepointname: + tracepointname = tracepointname.lstrip() + + r = Re(r'TP_PROTO\((.*?)\)') + if r.search(proto): + tracepointargs = r.group(1) + + if not tracepointname or not tracepointargs: + self.emit_warning(ln, + f"Unrecognized tracepoint format:\n{proto}\n") + else: + proto = f"static inline void trace_{tracepointname}({tracepointargs})" + self.entry.identifier = f"trace_{self.entry.identifier}" + + return proto + + def process_proto_function(self, ln, line): + """Ancillary routine to process a function prototype""" + + # strip C99-style comments to end of line + r = Re(r"\/\/.*$", re.S) + line = r.sub('', line) + + if Re(r'\s*#\s*define').match(line): + self.entry.prototype = line + elif line.startswith('#'): + # Strip other macros like #ifdef/#ifndef/#endif/... + pass + else: + r = Re(r'([^\{]*)') + if r.match(line): + self.entry.prototype += r.group(1) + " " + + if '{' in line or ';' in line or Re(r'\s*#\s*define').match(line): + # strip comments + r = Re(r'/\*.*?\*/') + self.entry.prototype = r.sub('', self.entry.prototype) + + # strip newlines/cr's + r = Re(r'[\r\n]+') + self.entry.prototype = r.sub(' ', self.entry.prototype) + + # strip leading spaces + r = Re(r'^\s+') + self.entry.prototype = r.sub('', self.entry.prototype) + + # Handle self.entry.prototypes for function pointers like: + # int (*pcs_config)(struct foo) + + r = Re(r'^(\S+\s+)\(\s*\*(\S+)\)') + self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) + + if 'SYSCALL_DEFINE' in self.entry.prototype: + self.entry.prototype = self.syscall_munge(ln, + self.entry.prototype) + + r = Re(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + if r.search(self.entry.prototype): + self.entry.prototype = self.tracepoint_munge(ln, + self.entry.prototype) + + self.dump_function(ln, self.entry.prototype) + self.reset_state(ln) + + def process_proto_type(self, ln, line): + """Ancillary routine to process a type""" + + # Strip newlines/cr's. + line = Re(r'[\r\n]+', re.S).sub(' ', line) + + # Strip leading spaces + line = Re(r'^\s+', re.S).sub('', line) + + # Strip trailing spaces + line = Re(r'\s+$', re.S).sub('', line) + + # Strip C99-style comments to the end of the line + line = Re(r"\/\/.*$", re.S).sub('', line) + + # To distinguish preprocessor directive from regular declaration later. + if line.startswith('#'): + line += ";" + + r = Re(r'([^\{\};]*)([\{\};])(.*)') + while True: + if r.search(line): + if self.entry.prototype: + self.entry.prototype += " " + self.entry.prototype += r.group(1) + r.group(2) + + self.entry.brcount += r.group(2).count('{') + self.entry.brcount -= r.group(2).count('}') + + self.entry.brcount = max(self.entry.brcount, 0) + + if r.group(2) == ';' and self.entry.brcount == 0: + self.dump_declaration(ln, self.entry.prototype) + self.reset_state(ln) + break + + line = r.group(3) + else: + self.entry.prototype += line + break + + def process_proto(self, ln, line): + """STATE_PROTO: reading a function/whatever prototype.""" + + if doc_inline_oneline.search(line): + self.entry.section = doc_inline_oneline.group(1) + self.entry.contents = doc_inline_oneline.group(2) + + if self.entry.contents != "": + self.entry.contents += "\n" + self.dump_section(start_new=False) + + elif doc_inline_start.search(line): + self.state = self.STATE_INLINE + self.inline_doc_state = self.STATE_INLINE_NAME + + elif self.entry.decl_type == 'function': + self.process_proto_function(ln, line) + + else: + self.process_proto_type(ln, line) + + def process_docblock(self, ln, line): + """STATE_DOCBLOCK: within a DOC: block.""" + + if doc_end.search(line): + self.dump_section() + self.output_declaration("doc", None, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, module=self.config.modulename) + self.reset_state(ln) + + elif doc_content.search(line): + self.entry.contents += doc_content.group(1) + "\n" + + def run(self): + """ + Open and process each line of a C source file. + he parsing is controlled via a state machine, and the line is passed + to a different process function depending on the state. The process + function may update the state as needed. + """ + + cont = False + prev = "" + prev_ln = None + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + for ln, line in enumerate(fp): + + line = line.expandtabs().strip("\n") + + # Group continuation lines on prototypes + if self.state == self.STATE_PROTO: + if line.endswith("\\"): + prev += line.removesuffix("\\") + cont = True + + if not prev_ln: + prev_ln = ln + + continue + + if cont: + ln = prev_ln + line = prev + line + prev = "" + cont = False + prev_ln = None + + self.config.log.debug("%d %s%s: %s", + ln, self.st_name[self.state], + self.st_inline_name[self.inline_doc_state], + line) + + # TODO: not all states allow EXPORT_SYMBOL*, so this + # can be optimized later on to speedup parsing + self.process_export(self.config.function_table, line) + + # Hand this line to the appropriate state handler + if self.state == self.STATE_NORMAL: + self.process_normal(ln, line) + elif self.state == self.STATE_NAME: + self.process_name(ln, line) + elif self.state in [self.STATE_BODY, self.STATE_BODY_MAYBE, + self.STATE_BODY_WITH_BLANK_LINE]: + self.process_body(ln, line) + elif self.state == self.STATE_INLINE: # scanning for inline parameters + self.process_inline(ln, line) + elif self.state == self.STATE_PROTO: + self.process_proto(ln, line) + elif self.state == self.STATE_DOCBLOCK: + self.process_docblock(ln, line) + except OSError: + self.config.log.error(f"Error: Cannot open file {self.fname}") + self.config.errors += 1 -- cgit v1.2.3-59-g8ed1b From c3597ab27bc0e5eae23c74a76380000a0f8481e1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:17 +0800 Subject: scripts/kernel-doc.py: fix line number output With the Pyhton version, the actual output happens after parsing, from records stored at self.entries. Ensure that line numbers will be properly stored there and that they'll produce the desired results at the ReST output. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/5182a531d14b5fe9e1fc5da5f9dae05d66852a60.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_output.py | 13 +++++++------ scripts/lib/kdoc/kdoc_parser.py | 21 +++++++++++++++++---- 2 files changed, 24 insertions(+), 10 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py index a246d213523c..6a7187980bec 100755 --- a/scripts/lib/kdoc/kdoc_output.py +++ b/scripts/lib/kdoc/kdoc_output.py @@ -255,7 +255,8 @@ class RestFormat(OutputFormat): def print_lineno(self, ln): """Outputs a line number""" - if self.enable_lineno and ln: + if self.enable_lineno and ln is not None: + ln += 1 self.data += f".. LINENO {ln}\n" def output_highlight(self, args): @@ -358,7 +359,7 @@ class RestFormat(OutputFormat): parameterdescs = args.get('parameterdescs', {}) parameterdesc_start_lines = args.get('parameterdesc_start_lines', {}) - ln = args.get('ln', 0) + ln = args.get('declaration_start_line', 0) count = 0 for parameter in parameterlist: @@ -375,11 +376,11 @@ class RestFormat(OutputFormat): if not func_macro: signature += ")" + self.print_lineno(ln) if args.get('typedef') or not args.get('functiontype'): self.data += f".. c:macro:: {args['function']}\n\n" if args.get('typedef'): - self.print_lineno(ln) self.data += " **Typedef**: " self.lineprefix = "" self.output_highlight(args.get('purpose', "")) @@ -434,7 +435,7 @@ class RestFormat(OutputFormat): name = args.get('enum', '') parameterlist = args.get('parameterlist', []) parameterdescs = args.get('parameterdescs', {}) - ln = args.get('ln', 0) + ln = args.get('declaration_start_line', 0) self.data += f"\n\n.. c:enum:: {name}\n\n" @@ -464,7 +465,7 @@ class RestFormat(OutputFormat): oldprefix = self.lineprefix name = args.get('typedef', '') - ln = args.get('ln', 0) + ln = args.get('declaration_start_line', 0) self.data += f"\n\n.. c:type:: {name}\n\n" @@ -484,7 +485,7 @@ class RestFormat(OutputFormat): purpose = args.get('purpose', "") declaration = args.get('definition', "") dtype = args.get('type', "struct") - ln = args.get('ln', 0) + ln = args.get('declaration_start_line', 0) parameterlist = args.get('parameterlist', []) parameterdescs = args.get('parameterdescs', {}) diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 3ce116595546..e8c86448d6b5 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -276,7 +276,7 @@ class KernelDoc: self.entry.brcount = 0 self.entry.in_doc_sect = False - self.entry.declaration_start_line = ln + self.entry.declaration_start_line = ln + 1 def push_parameter(self, ln, decl_type, param, dtype, org_arg, declaration_name): @@ -806,8 +806,10 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) def dump_enum(self, ln, proto): @@ -882,8 +884,10 @@ class KernelDoc: module=self.config.modulename, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) def dump_declaration(self, ln, prototype): @@ -1054,8 +1058,10 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose, func_macro=func_macro) else: @@ -1067,8 +1073,10 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose, func_macro=func_macro) @@ -1112,8 +1120,10 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) return @@ -1136,6 +1146,7 @@ class KernelDoc: module=self.entry.modulename, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) return @@ -1168,7 +1179,7 @@ class KernelDoc: return # start a new entry - self.reset_state(ln + 1) + self.reset_state(ln) self.entry.in_doc_sect = False # next line is always the function name @@ -1281,7 +1292,7 @@ class KernelDoc: if r.match(line): self.dump_section() self.entry.section = self.section_default - self.entry.new_start_line = line + self.entry.new_start_line = ln self.entry.contents = "" if doc_sect.search(line): @@ -1619,7 +1630,9 @@ class KernelDoc: self.dump_section() self.output_declaration("doc", None, sectionlist=self.entry.sectionlist, - sections=self.entry.sections, module=self.config.modulename) + sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, + module=self.config.modulename) self.reset_state(ln) elif doc_content.search(line): -- cgit v1.2.3-59-g8ed1b From 408269ae35d6b88d48477af56a2376ea05e619ca Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:18 +0800 Subject: scripts/kernel-doc.py: fix handling of doc output check The filtering logic was seeking for the DOC name to check for symbols, but such data is stored only inside a section. Add it to the output_declaration, as it is quicker/easier to check the declaration name than to check inside each section. While here, make sure that the output for both ReST and man after filtering will be similar to what kernel-doc Perl version does. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/6d8b77af85295452c0191863ea1041f4195aeaaf.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_output.py | 29 ++++++++++++----------------- scripts/lib/kdoc/kdoc_parser.py | 3 ++- 2 files changed, 14 insertions(+), 18 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py index 6a7187980bec..7a945dd80c9b 100755 --- a/scripts/lib/kdoc/kdoc_output.py +++ b/scripts/lib/kdoc/kdoc_output.py @@ -122,13 +122,13 @@ class OutputFormat: if self.no_doc_sections: return False + if name in self.nosymbol: + return False + if self.out_mode == self.OUTPUT_ALL: return True if self.out_mode == self.OUTPUT_INCLUDE: - if name in self.nosymbol: - return False - if name in self.function_table: return True @@ -154,15 +154,6 @@ class OutputFormat: return False - def check_function(self, fname, name, args): - return True - - def check_enum(self, fname, name, args): - return True - - def check_typedef(self, fname, name, args): - return True - def msg(self, fname, name, args): self.data = "" @@ -306,7 +297,7 @@ class RestFormat(OutputFormat): for line in output.strip("\n").split("\n"): self.data += self.lineprefix + line + "\n" - def out_section(self, args, out_reference=False): + def out_section(self, args, out_docblock=False): """ Outputs a block section. @@ -325,7 +316,7 @@ class RestFormat(OutputFormat): continue if not self.out_mode == self.OUTPUT_INCLUDE: - if out_reference: + if out_docblock: self.data += f".. _{section}:\n\n" if not self.symbol: @@ -339,8 +330,7 @@ class RestFormat(OutputFormat): def out_doc(self, fname, name, args): if not self.check_doc(name): return - - self.out_section(args, out_reference=True) + self.out_section(args, out_docblock=True) def out_function(self, fname, name, args): @@ -583,8 +573,10 @@ class ManFormat(OutputFormat): for line in contents.strip("\n").split("\n"): line = Re(r"^\s*").sub("", line) + if not line: + continue - if line and line[0] == ".": + if line[0] == ".": self.data += "\\&" + line + "\n" else: self.data += line + "\n" @@ -594,6 +586,9 @@ class ManFormat(OutputFormat): sectionlist = args.get('sectionlist', []) sections = args.get('sections', {}) + if not self.check_doc(name): + return + self.data += f'.TH "{module}" 9 "{module}" "{self.man_date}" "API Manual" LINUX' + "\n" for section in sectionlist: diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index e8c86448d6b5..74b311c8184c 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1198,6 +1198,7 @@ class KernelDoc: else: self.entry.section = doc_block.group(1) + self.entry.identifier = self.entry.section self.state = self.STATE_DOCBLOCK return @@ -1628,7 +1629,7 @@ class KernelDoc: if doc_end.search(line): self.dump_section() - self.output_declaration("doc", None, + self.output_declaration("doc", self.entry.identifier, sectionlist=self.entry.sectionlist, sections=self.entry.sections, section_start_lines=self.entry.section_start_lines, -- cgit v1.2.3-59-g8ed1b From 9cbc2d3b137bfdb7937265c46e9e5d7e72952841 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:20 +0800 Subject: scripts/kernel-doc.py: postpone warnings to the output plugin We don't want to have warnings displayed for symbols that weren't output. So, postpone warnings print to the output plugin, where symbol output is validated. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/e6344711e390cf22af02a56bb5dd51ca67c0afb6.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_output.py | 24 +++++++++++++++++++----- scripts/lib/kdoc/kdoc_parser.py | 41 ++++++++++++++++++++--------------------- 2 files changed, 39 insertions(+), 26 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py index d0c8cedb0ea5..6582d1f64d1e 100755 --- a/scripts/lib/kdoc/kdoc_output.py +++ b/scripts/lib/kdoc/kdoc_output.py @@ -116,7 +116,16 @@ class OutputFormat: return block - def check_doc(self, name): + def out_warnings(self, args): + warnings = args.get('warnings', []) + + for warning, log_msg in warnings: + if warning: + self.config.log.warning(log_msg) + else: + self.config.log.info(log_msg) + + def check_doc(self, name, args): """Check if DOC should be output""" if self.no_doc_sections: @@ -126,19 +135,22 @@ class OutputFormat: return False if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) return True if self.out_mode == self.OUTPUT_INCLUDE: if name in self.function_table: + self.out_warnings(args) return True return False - def check_declaration(self, dtype, name): + def check_declaration(self, dtype, name, args): if name in self.nosymbol: return False if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) return True if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]: @@ -147,9 +159,11 @@ class OutputFormat: if self.out_mode == self.OUTPUT_INTERNAL: if dtype != "function": + self.out_warnings(args) return True if name not in self.function_table: + self.out_warnings(args) return True return False @@ -163,7 +177,7 @@ class OutputFormat: self.out_doc(fname, name, args) return self.data - if not self.check_declaration(dtype, name): + if not self.check_declaration(dtype, name, args): return self.data if dtype == "function": @@ -328,7 +342,7 @@ class RestFormat(OutputFormat): self.data += "\n" def out_doc(self, fname, name, args): - if not self.check_doc(name): + if not self.check_doc(name, args): return self.out_section(args, out_docblock=True) @@ -586,7 +600,7 @@ class ManFormat(OutputFormat): sectionlist = args.get('sectionlist', []) sections = args.get('sections', {}) - if not self.check_doc(name): + if not self.check_doc(name, args): return self.data += f'.TH "{module}" 9 "{module}" "{self.man_date}" "API Manual" LINUX' + "\n" diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 74b311c8184c..3698ef625367 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -131,23 +131,23 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] - def show_warnings(self, dtype, declaration_name): # pylint: disable=W0613 - """ - Allow filtering out warnings - """ - - # TODO: implement it - - return True - # TODO: rename to emit_message def emit_warning(self, ln, msg, warning=True): """Emit a message""" + log_msg = f"{self.fname}:{ln} {msg}" + + if self.entry: + # Delegate warning output to output logic, as this way it + # will report warnings/info only for symbols that are output + + self.entry.warnings.append((warning, log_msg)) + return + if warning: - self.config.log.warning("%s:%d %s", self.fname, ln, msg) + self.config.log.warning(log_msg) else: - self.config.log.info("%s:%d %s", self.fname, ln, msg) + self.config.log.info(log_msg) def dump_section(self, start_new=True): """ @@ -221,10 +221,9 @@ class KernelDoc: # For now, we're keeping the same name of the function just to make # easier to compare the source code of both scripts - if "declaration_start_line" not in args: - args["declaration_start_line"] = self.entry.declaration_start_line - + args["declaration_start_line"] = self.entry.declaration_start_line args["type"] = dtype + args["warnings"] = self.entry.warnings # TODO: use colletions.OrderedDict @@ -257,6 +256,8 @@ class KernelDoc: self.entry.struct_actual = "" self.entry.prototype = "" + self.entry.warnings = [] + self.entry.parameterlist = [] self.entry.parameterdescs = {} self.entry.parametertypes = {} @@ -328,7 +329,7 @@ class KernelDoc: if param not in self.entry.parameterdescs and not param.startswith("#"): self.entry.parameterdescs[param] = self.undescribed - if self.show_warnings(dtype, declaration_name) and "." not in param: + if "." not in param: if decl_type == 'function': dname = f"{decl_type} parameter" else: @@ -868,16 +869,14 @@ class KernelDoc: self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed - if self.show_warnings("enum", declaration_name): - self.emit_warning(ln, - f"Enum value '{arg}' not described in enum '{declaration_name}'") + self.emit_warning(ln, + f"Enum value '{arg}' not described in enum '{declaration_name}'") member_set.add(arg) for k in self.entry.parameterdescs: if k not in member_set: - if self.show_warnings("enum", declaration_name): - self.emit_warning(ln, - f"Excess enum value '%{k}' description in '{declaration_name}'") + self.emit_warning(ln, + f"Excess enum value '%{k}' description in '{declaration_name}'") self.output_declaration('enum', declaration_name, enum=declaration_name, -- cgit v1.2.3-59-g8ed1b From 485f6f7960c468d9e27665f61517dc5fc097ea98 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:26 +0800 Subject: scripts/kernel-doc.py: adjust some coding style issues Make pylint happier by adding some missing documentation and addressing a couple of pylint warnings. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/0f9d5473105e4c09c6c41e3db72cc63f1d4d55f9.1744106242.git.mchehab+huawei@kernel.org --- scripts/kernel-doc.py | 12 +++++----- scripts/lib/kdoc/kdoc_files.py | 4 +--- scripts/lib/kdoc/kdoc_output.py | 50 ++++++++++++++++++++++++++++++++--------- scripts/lib/kdoc/kdoc_parser.py | 30 ++++++------------------- scripts/lib/kdoc/kdoc_re.py | 3 ++- 5 files changed, 57 insertions(+), 42 deletions(-) mode change 100755 => 100644 scripts/lib/kdoc/kdoc_files.py (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index 90aacd17499a..eca7e34f9d03 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright(c) 2025: Mauro Carvalho Chehab . # -# pylint: disable=C0103 +# pylint: disable=C0103,R0915 # # Converted from the kernel-doc script originally written in Perl # under GPLv2, copyrighted since 1998 by the following authors: @@ -165,6 +165,8 @@ neither here nor at the original Perl script. class MsgFormatter(logging.Formatter): + """Helper class to format warnings on a similar way to kernel-doc.pl""" + def format(self, record): record.levelname = record.levelname.capitalize() return logging.Formatter.format(self, record) @@ -241,7 +243,7 @@ def main(): # Those are valid for all 3 types of filter parser.add_argument("-n", "-nosymbol", "--nosymbol", action='append', - help=NOSYMBOL_DESC) + help=NOSYMBOL_DESC) parser.add_argument("-D", "-no-doc-sections", "--no-doc-sections", action='store_true', help="Don't outputt DOC sections") @@ -286,9 +288,9 @@ def main(): kfiles.parse(args.files, export_file=args.export_file) for t in kfiles.msg(enable_lineno=args.enable_lineno, export=args.export, - internal=args.internal, symbol=args.symbol, - nosymbol=args.nosymbol, - no_doc_sections=args.no_doc_sections): + internal=args.internal, symbol=args.symbol, + nosymbol=args.nosymbol, + no_doc_sections=args.no_doc_sections): msg = t[1] if msg: print(msg) diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py old mode 100755 new mode 100644 index dd3dbe87520b..e2221db7022a --- a/scripts/lib/kdoc/kdoc_files.py +++ b/scripts/lib/kdoc/kdoc_files.py @@ -4,8 +4,6 @@ # # pylint: disable=R0903,R0913,R0914,R0917 -# TODO: implement warning filtering - """ Parse lernel-doc tags on multiple kernel source files. """ @@ -128,7 +126,7 @@ class KernelFiles(): def __init__(self, verbose=False, out_style=None, werror=False, wreturn=False, wshort_desc=False, wcontents_before_sections=False, - logger=None, modulename=None, export_file=None): + logger=None, modulename=None): """ Initialize startup variables and parse all files """ diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py index 6582d1f64d1e..7f84bf12f1e1 100755 --- a/scripts/lib/kdoc/kdoc_output.py +++ b/scripts/lib/kdoc/kdoc_output.py @@ -2,9 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright(c) 2025: Mauro Carvalho Chehab . # -# pylint: disable=C0301,R0911,R0912,R0913,R0914,R0915,R0917 - -# TODO: implement warning filtering +# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917 """ Implement output filters to print kernel-doc documentation. @@ -52,6 +50,11 @@ type_member_func = type_member + Re(r"\(\)", cache=False) class OutputFormat: + """ + Base class for OutputFormat. If used as-is, it means that only + warnings will be displayed. + """ + # output mode. OUTPUT_ALL = 0 # output all symbols and doc sections OUTPUT_INCLUDE = 1 # output only specified symbols @@ -75,6 +78,10 @@ class OutputFormat: self.data = "" def set_config(self, config): + """ + Setup global config variables used by both parser and output. + """ + self.config = config def set_filter(self, export, internal, symbol, nosymbol, function_table, @@ -117,6 +124,10 @@ class OutputFormat: return block def out_warnings(self, args): + """ + Output warnings for identifiers that will be displayed. + """ + warnings = args.get('warnings', []) for warning, log_msg in warnings: @@ -146,6 +157,11 @@ class OutputFormat: return False def check_declaration(self, dtype, name, args): + """ + Checks if a declaration should be output or not based on the + filtering criteria. + """ + if name in self.nosymbol: return False @@ -169,6 +185,10 @@ class OutputFormat: return False def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser + """ + self.data = "" dtype = args.get('type', "") @@ -203,24 +223,25 @@ class OutputFormat: return None # Virtual methods to be overridden by inherited classes + # At the base class, those do nothing. def out_doc(self, fname, name, args): - pass + """Outputs a DOC block""" def out_function(self, fname, name, args): - pass + """Outputs a function""" def out_enum(self, fname, name, args): - pass + """Outputs an enum""" def out_typedef(self, fname, name, args): - pass + """Outputs a typedef""" def out_struct(self, fname, name, args): - pass + """Outputs a struct""" class RestFormat(OutputFormat): - # """Consts and functions used by ReST output""" + """Consts and functions used by ReST output""" highlights = [ (type_constant, r"``\1``"), @@ -265,6 +286,11 @@ class RestFormat(OutputFormat): self.data += f".. LINENO {ln}\n" def output_highlight(self, args): + """ + Outputs a C symbol that may require being converted to ReST using + the self.highlights variable + """ + input_text = args output = "" in_literal = False @@ -579,6 +605,10 @@ class ManFormat(OutputFormat): self.man_date = dt.strftime("%B %Y") def output_highlight(self, block): + """ + Outputs a C symbol that may require being highlighted with + self.highlights variable using troff syntax + """ contents = self.highlight_block(block) @@ -601,7 +631,7 @@ class ManFormat(OutputFormat): sections = args.get('sections', {}) if not self.check_doc(name, args): - return + return self.data += f'.TH "{module}" 9 "{module}" "{self.man_date}" "API Manual" LINUX' + "\n" diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 3698ef625367..dcb9515fc40b 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -131,7 +131,7 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] - # TODO: rename to emit_message + # TODO: rename to emit_message after removal of kernel-doc.pl def emit_warning(self, ln, msg, warning=True): """Emit a message""" @@ -157,19 +157,6 @@ class KernelDoc: name = self.entry.section contents = self.entry.contents - # TODO: we can prevent dumping empty sections here with: - # - # if self.entry.contents.strip("\n"): - # if start_new: - # self.entry.section = self.section_default - # self.entry.contents = "" - # - # return - # - # But, as we want to be producing the same output of the - # venerable kernel-doc Perl tool, let's just output everything, - # at least for now - if type_param.match(name): name = type_param.group(1) @@ -205,7 +192,7 @@ class KernelDoc: self.entry.section = self.section_default self.entry.contents = "" - # TODO: rename it to store_declaration + # TODO: rename it to store_declaration after removal of kernel-doc.pl def output_declaration(self, dtype, name, **args): """ Stores the entry into an entry array. @@ -225,13 +212,13 @@ class KernelDoc: args["type"] = dtype args["warnings"] = self.entry.warnings - # TODO: use colletions.OrderedDict + # TODO: use colletions.OrderedDict to remove sectionlist sections = args.get('sections', {}) sectionlist = args.get('sectionlist', []) # Drop empty sections - # TODO: improve it to emit warnings + # TODO: improve empty sections logic to emit warnings for section in ["Description", "Return"]: if section in sectionlist: if not sections[section].rstrip(): @@ -636,7 +623,9 @@ class KernelDoc: # Replace macros # - # TODO: it is better to also move those to the NestedMatch logic, + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, # to ensure that parenthesis will be properly matched. (Re(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), @@ -906,7 +895,6 @@ class KernelDoc: self.dump_struct(ln, prototype) return - # TODO: handle other types self.output_declaration(self.entry.decl_type, prototype, entry=self.entry) @@ -1680,10 +1668,6 @@ class KernelDoc: self.st_inline_name[self.inline_doc_state], line) - # TODO: not all states allow EXPORT_SYMBOL*, so this - # can be optimized later on to speedup parsing - self.process_export(self.config.function_table, line) - # Hand this line to the appropriate state handler if self.state == self.STATE_NORMAL: self.process_normal(ln, line) diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py index 512b6521e79d..d28485ff94d6 100755 --- a/scripts/lib/kdoc/kdoc_re.py +++ b/scripts/lib/kdoc/kdoc_re.py @@ -131,7 +131,8 @@ class NestedMatch: will ignore the search string. """ - # TODO: + # TODO: make NestedMatch handle multiple match groups + # # Right now, regular expressions to match it are defined only up to # the start delimiter, e.g.: # -- cgit v1.2.3-59-g8ed1b From 78ea748f7978d39a6ee29897d3bd32e6208f74ac Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:27 +0800 Subject: scripts/lib/kdoc/kdoc_parser.py: fix Python compat with < v3.13 - str.replace count was introduced only in Python 3.13; - before Python 3.13, f-string dict arguments can't use the same delimiter of the main string. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/e2b8e8361294558dae09236e4b8fbea5d86be5a3.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_output.py | 8 ++++---- scripts/lib/kdoc/kdoc_parser.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py index 7f84bf12f1e1..e0ed79e4d985 100755 --- a/scripts/lib/kdoc/kdoc_output.py +++ b/scripts/lib/kdoc/kdoc_output.py @@ -647,16 +647,16 @@ class ManFormat(OutputFormat): sectionlist = args.get('sectionlist', []) sections = args.get('sections', {}) - self.data += f'.TH "{args['function']}" 9 "{args['function']}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" + self.data += f'.TH "{args["function"]}" 9 "{args["function"]}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" self.data += ".SH NAME\n" self.data += f"{args['function']} \\- {args['purpose']}\n" self.data += ".SH SYNOPSIS\n" if args.get('functiontype', ''): - self.data += f'.B "{args['functiontype']}" {args['function']}' + "\n" + self.data += f'.B "{args["functiontype"]}" {args["function"]}' + "\n" else: - self.data += f'.B "{args['function']}' + "\n" + self.data += f'.B "{args["function"]}' + "\n" count = 0 parenth = "(" @@ -697,7 +697,7 @@ class ManFormat(OutputFormat): sectionlist = args.get('sectionlist', []) sections = args.get('sections', {}) - self.data += f'.TH "{args['module']}" 9 "enum {args['enum']}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.data += f'.TH "{args["module"]}" 9 "enum {args["enum"]}" "{self.man_date}" "API Manual" LINUX' + "\n" self.data += ".SH NAME\n" self.data += f"enum {args['enum']} \\- {args['purpose']}\n" diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index dcb9515fc40b..e48ed128ca04 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1444,9 +1444,9 @@ class KernelDoc: r = Re(r'long\s+(sys_.*?),') if r.search(proto): - proto = proto.replace(',', '(', count=1) + proto = Re(',').sub('(', proto, count=1) elif is_void: - proto = proto.replace(')', '(void)', count=1) + proto = Re(r'\)').sub('(void)', proto, count=1) # Now delete all of the odd-numbered commas in the proto # so that argument types & names don't have a comma between them -- cgit v1.2.3-59-g8ed1b From 2ab867a4941de2e9d7804e76ab002ad74c73b078 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:28 +0800 Subject: scripts/kernel-doc.py: move modulename to man class Only man output requires a modulename. Move its definition to the man class. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/583085e3885b0075d16ef9961b4f2ad870f30a55.1744106242.git.mchehab+huawei@kernel.org --- scripts/kernel-doc.py | 6 +++--- scripts/lib/kdoc/kdoc_files.py | 6 +----- scripts/lib/kdoc/kdoc_output.py | 12 ++++++------ scripts/lib/kdoc/kdoc_parser.py | 9 +-------- 4 files changed, 11 insertions(+), 22 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index eca7e34f9d03..6a6bc81efd31 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -186,6 +186,7 @@ def main(): help="Enable debug messages") parser.add_argument("-M", "-modulename", "--modulename", + default="Kernel API", help="Allow setting a module name at the output.") parser.add_argument("-l", "-enable-lineno", "--enable_lineno", @@ -273,7 +274,7 @@ def main(): logger.addHandler(handler) if args.man: - out_style = ManFormat() + out_style = ManFormat(modulename=args.modulename) elif args.none: out_style = None else: @@ -282,8 +283,7 @@ def main(): kfiles = KernelFiles(verbose=args.verbose, out_style=out_style, werror=args.werror, wreturn=args.wreturn, wshort_desc=args.wshort_desc, - wcontents_before_sections=args.wcontents_before_sections, - modulename=args.modulename) + wcontents_before_sections=args.wcontents_before_sections) kfiles.parse(args.files, export_file=args.export_file) diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py index e2221db7022a..5a6e92e34d05 100644 --- a/scripts/lib/kdoc/kdoc_files.py +++ b/scripts/lib/kdoc/kdoc_files.py @@ -126,7 +126,7 @@ class KernelFiles(): def __init__(self, verbose=False, out_style=None, werror=False, wreturn=False, wshort_desc=False, wcontents_before_sections=False, - logger=None, modulename=None): + logger=None): """ Initialize startup variables and parse all files """ @@ -134,9 +134,6 @@ class KernelFiles(): if not verbose: verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) - if not modulename: - modulename = "Kernel API" - if out_style is None: out_style = OutputFormat() @@ -168,7 +165,6 @@ class KernelFiles(): self.config.wreturn = wreturn self.config.wshort_desc = wshort_desc self.config.wcontents_before_sections = wcontents_before_sections - self.config.modulename = modulename self.config.function_table = set() self.config.source_map = {} diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py index e0ed79e4d985..8be69245c0d0 100755 --- a/scripts/lib/kdoc/kdoc_output.py +++ b/scripts/lib/kdoc/kdoc_output.py @@ -586,7 +586,7 @@ class ManFormat(OutputFormat): ) blankline = "" - def __init__(self): + def __init__(self, modulename): """ Creates class variables. @@ -595,6 +595,7 @@ class ManFormat(OutputFormat): """ super().__init__() + self.modulename = modulename dt = datetime.now() if os.environ.get("KBUILD_BUILD_TIMESTAMP", None): @@ -626,14 +627,13 @@ class ManFormat(OutputFormat): self.data += line + "\n" def out_doc(self, fname, name, args): - module = args.get('module') sectionlist = args.get('sectionlist', []) sections = args.get('sections', {}) if not self.check_doc(name, args): return - self.data += f'.TH "{module}" 9 "{module}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.data += f'.TH "{self.modulename}" 9 "{self.modulename}" "{self.man_date}" "API Manual" LINUX' + "\n" for section in sectionlist: self.data += f'.SH "{section}"' + "\n" @@ -697,7 +697,7 @@ class ManFormat(OutputFormat): sectionlist = args.get('sectionlist', []) sections = args.get('sections', {}) - self.data += f'.TH "{args["module"]}" 9 "enum {args["enum"]}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.data += f'.TH "{self.modulename}" 9 "enum {args["enum"]}" "{self.man_date}" "API Manual" LINUX' + "\n" self.data += ".SH NAME\n" self.data += f"enum {args['enum']} \\- {args['purpose']}\n" @@ -727,7 +727,7 @@ class ManFormat(OutputFormat): self.output_highlight(sections[section]) def out_typedef(self, fname, name, args): - module = args.get('module') + module = self.modulename typedef = args.get('typedef') purpose = args.get('purpose') sectionlist = args.get('sectionlist', []) @@ -743,7 +743,7 @@ class ManFormat(OutputFormat): self.output_highlight(sections.get(section)) def out_struct(self, fname, name, args): - module = args.get('module') + module = self.modulename struct_type = args.get('type') struct_name = args.get('struct') purpose = args.get('purpose') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index e48ed128ca04..f923600561f8 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -791,7 +791,6 @@ class KernelDoc: self.output_declaration(decl_type, declaration_name, struct=declaration_name, - module=self.entry.modulename, definition=declaration, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, @@ -869,7 +868,6 @@ class KernelDoc: self.output_declaration('enum', declaration_name, enum=declaration_name, - module=self.config.modulename, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parameterdesc_start_lines=self.entry.parameterdesc_start_lines, @@ -1040,7 +1038,6 @@ class KernelDoc: self.output_declaration(decl_type, declaration_name, function=declaration_name, typedef=True, - module=self.config.modulename, functiontype=return_type, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, @@ -1055,7 +1052,6 @@ class KernelDoc: self.output_declaration(decl_type, declaration_name, function=declaration_name, typedef=False, - module=self.config.modulename, functiontype=return_type, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, @@ -1102,7 +1098,6 @@ class KernelDoc: self.output_declaration(decl_type, declaration_name, function=declaration_name, typedef=True, - module=self.entry.modulename, functiontype=return_type, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, @@ -1130,7 +1125,6 @@ class KernelDoc: self.output_declaration('typedef', declaration_name, typedef=declaration_name, - module=self.entry.modulename, sectionlist=self.entry.sectionlist, sections=self.entry.sections, section_start_lines=self.entry.section_start_lines, @@ -1619,8 +1613,7 @@ class KernelDoc: self.output_declaration("doc", self.entry.identifier, sectionlist=self.entry.sectionlist, sections=self.entry.sections, - section_start_lines=self.entry.section_start_lines, - module=self.config.modulename) + section_start_lines=self.entry.section_start_lines) self.reset_state(ln) elif doc_content.search(line): -- cgit v1.2.3-59-g8ed1b From e4b2bd908c3d8f071d4fac6e588fffc6110c1b1f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:30 +0800 Subject: scripts/lib/kdoc/kdoc_parser.py: remove a python 3.9 dependency str.removesuffix() was added on Python 3.9, but rstrip() actually does the same thing, as we just want to remove a single character. It is also shorter. So, use it. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/f64cc4adef107ada26da4bfb7e4b7002dd783173.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index f923600561f8..77e8bfeccc8e 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1641,7 +1641,7 @@ class KernelDoc: # Group continuation lines on prototypes if self.state == self.STATE_PROTO: if line.endswith("\\"): - prev += line.removesuffix("\\") + prev += line.rstrip("\\") cont = True if not prev_ln: -- cgit v1.2.3-59-g8ed1b From 11afeab6d74d1be80420b47113c4893c88dcc04b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:31 +0800 Subject: scripts/kernel-doc.py: Properly handle Werror and exit codes The original kernel-doc script has a logic to return warnings as errors, and to report the number of warnings found, if in verbose mode. Implement it to be fully compatible with the original script. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/de33b0cebd9fdf82d8b221bcfe41db7269286222.1744106242.git.mchehab+huawei@kernel.org --- scripts/kernel-doc.py | 18 ++++++++++++++++-- scripts/lib/kdoc/kdoc_files.py | 12 ++++++++++-- scripts/lib/kdoc/kdoc_output.py | 8 +++----- scripts/lib/kdoc/kdoc_parser.py | 15 ++++++--------- 4 files changed, 35 insertions(+), 18 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index 6a6bc81efd31..2f2fad813024 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -78,8 +78,6 @@ # Yacine Belkadi # Yujie Liu -# TODO: implement warning filtering - """ kernel_doc ========== @@ -295,6 +293,22 @@ def main(): if msg: print(msg) + error_count = kfiles.errors + if not error_count: + sys.exit(0) + + if args.werror: + print(f"{error_count} warnings as errors") + sys.exit(error_count) + + if args.verbose: + print(f"{error_count} errors") + + if args.none: + sys.exit(0) + + sys.exit(error_count) + # Call main method if __name__ == "__main__": diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py index e52a6d05237e..182d9ed58a72 100644 --- a/scripts/lib/kdoc/kdoc_files.py +++ b/scripts/lib/kdoc/kdoc_files.py @@ -12,7 +12,6 @@ import argparse import logging import os import re -import sys from kdoc_parser import KernelDoc from kdoc_output import OutputFormat @@ -109,7 +108,7 @@ class KernelFiles(): KernelDoc.process_export(self.config.function_table, line) except IOError: - print(f"Error: Cannot open fname {fname}", fname=sys.stderr) + self.config.log.error("Error: Cannot open fname %s", fname) self.config.errors += 1 def file_not_found_cb(self, fname): @@ -262,3 +261,12 @@ class KernelFiles(): fname, ln, dtype) if msg: yield fname, msg + + @property + def errors(self): + """ + Return a count of the number of warnings found, including + the ones displayed while interacting over self.msg. + """ + + return self.config.errors diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py index eb013075da84..e9b4d0093084 100755 --- a/scripts/lib/kdoc/kdoc_output.py +++ b/scripts/lib/kdoc/kdoc_output.py @@ -128,11 +128,9 @@ class OutputFormat: warnings = args.get('warnings', []) - for warning, log_msg in warnings: - if warning: - self.config.log.warning(log_msg) - else: - self.config.log.info(log_msg) + for log_msg in warnings: + self.config.log.warning(log_msg) + self.config.errors += 1 def check_doc(self, name, args): """Check if DOC should be output""" diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 77e8bfeccc8e..43e6ffbdcc2c 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -137,17 +137,18 @@ class KernelDoc: log_msg = f"{self.fname}:{ln} {msg}" + if not warning: + self.config.log.info(log_msg) + return + if self.entry: # Delegate warning output to output logic, as this way it # will report warnings/info only for symbols that are output - self.entry.warnings.append((warning, log_msg)) + self.entry.warnings.append(log_msg) return - if warning: - self.config.log.warning(log_msg) - else: - self.config.log.info(log_msg) + self.config.log.warning(log_msg) def dump_section(self, start_new=True): """ @@ -556,7 +557,6 @@ class KernelDoc: if not members: self.emit_warning(ln, f"{proto} error: Cannot parse struct or union!") - self.config.errors += 1 return if self.entry.identifier != declaration_name: @@ -831,7 +831,6 @@ class KernelDoc: if not members: self.emit_warning(ln, f"{proto}: error: Cannot parse enum!") - self.config.errors += 1 return if self.entry.identifier != declaration_name: @@ -1132,7 +1131,6 @@ class KernelDoc: return self.emit_warning(ln, "error: Cannot parse typedef!") - self.config.errors += 1 @staticmethod def process_export(function_table, line): @@ -1677,4 +1675,3 @@ class KernelDoc: self.process_docblock(ln, line) except OSError: self.config.log.error(f"Error: Cannot open file {self.fname}") - self.config.errors += 1 -- cgit v1.2.3-59-g8ed1b From 16740c29dbf3275a22691d3d7c63701992872898 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:34 +0800 Subject: scripts/kernel_doc.py: better handle exported symbols Change the logic which detects internal/external symbols in a way that we can re-use it when calling via Sphinx extension. While here, remove an unused self.config var and let it clearer that self.config variables are read-only. This helps to allow handling multiple times in parallel if ever needed. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/6a69ba8d2b7ee6a6427abb53e60d09bd4d3565ee.1744106242.git.mchehab+huawei@kernel.org --- scripts/kernel-doc.py | 2 +- scripts/lib/kdoc/kdoc_files.py | 140 +++++++++++++++++++++------------------- scripts/lib/kdoc/kdoc_output.py | 9 +-- scripts/lib/kdoc/kdoc_parser.py | 52 +++++++++++++-- 4 files changed, 124 insertions(+), 79 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index 2f2fad813024..12ae66f40bd7 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -287,7 +287,7 @@ def main(): for t in kfiles.msg(enable_lineno=args.enable_lineno, export=args.export, internal=args.internal, symbol=args.symbol, - nosymbol=args.nosymbol, + nosymbol=args.nosymbol, export_file=args.export_file, no_doc_sections=args.no_doc_sections): msg = t[1] if msg: diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py index 527ab9117268..dd003feefd1b 100644 --- a/scripts/lib/kdoc/kdoc_files.py +++ b/scripts/lib/kdoc/kdoc_files.py @@ -68,6 +68,9 @@ class GlobSourceFiles: handling directories if any """ + if not file_list: + return + for fname in file_list: if self.srctree: f = os.path.join(self.srctree, fname) @@ -84,40 +87,70 @@ class GlobSourceFiles: class KernelFiles(): """ - Parse lernel-doc tags on multiple kernel source files. + Parse kernel-doc tags on multiple kernel source files. + + There are two type of parsers defined here: + - self.parse_file(): parses both kernel-doc markups and + EXPORT_SYMBOL* macros; + - self.process_export_file(): parses only EXPORT_SYMBOL* macros. """ + def warning(self, msg): + """Ancillary routine to output a warning and increment error count""" + + self.config.log.warning(msg) + self.errors += 1 + + def error(self, msg): + """Ancillary routine to output an error and increment error count""" + + self.config.log.error(msg) + self.errors += 1 + def parse_file(self, fname): """ Parse a single Kernel source. """ + # Prevent parsing the same file twice if results are cached + if fname in self.files: + return + doc = KernelDoc(self.config, fname) - doc.run() + export_table, entries = doc.parse_kdoc() + + self.export_table[fname] = export_table + + self.files.add(fname) + self.export_files.add(fname) # parse_kdoc() already check exports - return doc.entries + self.results[fname] = entries def process_export_file(self, fname): """ Parses EXPORT_SYMBOL* macros from a single Kernel source file. """ - try: - with open(fname, "r", encoding="utf8", - errors="backslashreplace") as fp: - for line in fp: - KernelDoc.process_export(self.config.function_table, line) - except IOError: - self.config.log.error("Error: Cannot open fname %s", fname) - self.config.errors += 1 + # Prevent parsing the same file twice if results are cached + if fname in self.export_files: + return + + doc = KernelDoc(self.config, fname) + export_table = doc.parse_export() + + if not export_table: + self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}") + export_table = set() + + self.export_table[fname] = export_table + self.export_files.add(fname) def file_not_found_cb(self, fname): """ Callback to warn if a file was not found. """ - self.config.log.error("Cannot find file %s", fname) - self.config.errors += 1 + self.error(f"Cannot find file {fname}") def __init__(self, verbose=False, out_style=None, werror=False, wreturn=False, wshort_desc=False, @@ -147,7 +180,9 @@ class KernelFiles(): if kdoc_werror: werror = kdoc_werror - # Set global config data used on all files + # Some variables are global to the parser logic as a whole as they are + # used to send control configuration to KernelDoc class. As such, + # those variables are read-only inside the KernelDoc. self.config = argparse.Namespace self.config.verbose = verbose @@ -156,27 +191,25 @@ class KernelFiles(): self.config.wshort_desc = wshort_desc self.config.wcontents_before_sections = wcontents_before_sections - self.config.function_table = set() - self.config.source_map = {} - if not logger: self.config.log = logging.getLogger("kernel-doc") else: self.config.log = logger - self.config.kernel_version = os.environ.get("KERNELVERSION", - "unknown kernel version'") + self.config.warning = self.warning + self.config.src_tree = os.environ.get("SRCTREE", None) - self.out_style = out_style + # Initialize variables that are internal to KernelFiles - # Initialize internal variables + self.out_style = out_style - self.config.errors = 0 + self.errors = 0 self.results = {} self.files = set() self.export_files = set() + self.export_table = {} def parse(self, file_list, export_file=None): """ @@ -185,28 +218,11 @@ class KernelFiles(): glob = GlobSourceFiles(srctree=self.config.src_tree) - # Prevent parsing the same file twice to speedup parsing and - # avoid reporting errors multiple times - for fname in glob.parse_files(file_list, self.file_not_found_cb): - if fname not in self.files: - self.results[fname] = self.parse_file(fname) - self.files.add(fname) - - # If a list of export files was provided, parse EXPORT_SYMBOL* - # from files that weren't fully parsed - - if not export_file: - return - - self.export_files |= self.files - - glob = GlobSourceFiles(srctree=self.config.src_tree) + self.parse_file(fname) for fname in glob.parse_files(export_file, self.file_not_found_cb): - if fname not in self.export_files: - self.process_export_file(fname) - self.export_files.add(fname) + self.process_export_file(fname) def out_msg(self, fname, name, arg): """ @@ -223,32 +239,35 @@ class KernelFiles(): def msg(self, enable_lineno=False, export=False, internal=False, symbol=None, nosymbol=None, no_doc_sections=False, - filenames=None): + filenames=None, export_file=None): """ Interacts over the kernel-doc results and output messages, returning kernel-doc markups on each interaction """ - function_table = self.config.function_table - - if symbol: - for s in symbol: - function_table.add(s) - - # Output none mode: only warnings will be shown - if not self.out_style: - return - self.out_style.set_config(self.config) - self.out_style.set_filter(export, internal, symbol, nosymbol, - function_table, enable_lineno, - no_doc_sections) - if not filenames: filenames = sorted(self.results.keys()) for fname in filenames: + function_table = set() + + if internal or export: + if not export_file: + export_file = [fname] + + for f in export_file: + function_table |= self.export_table[f] + + if symbol: + for s in symbol: + function_table.add(s) + + self.out_style.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + msg = "" for name, arg in self.results[fname]: msg += self.out_msg(fname, name, arg) @@ -261,12 +280,3 @@ class KernelFiles(): fname, ln, dtype) if msg: yield fname, msg - - @property - def errors(self): - """ - Return a count of the number of warnings found, including - the ones displayed while interacting over self.msg. - """ - - return self.config.errors diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py index e9b4d0093084..c352b7f8d3fd 100755 --- a/scripts/lib/kdoc/kdoc_output.py +++ b/scripts/lib/kdoc/kdoc_output.py @@ -69,7 +69,7 @@ class OutputFormat: self.enable_lineno = None self.nosymbol = {} self.symbol = None - self.function_table = set() + self.function_table = None self.config = None self.no_doc_sections = False @@ -94,10 +94,10 @@ class OutputFormat: self.enable_lineno = enable_lineno self.no_doc_sections = no_doc_sections + self.function_table = function_table if symbol: self.out_mode = self.OUTPUT_INCLUDE - function_table = symbol elif export: self.out_mode = self.OUTPUT_EXPORTED elif internal: @@ -108,8 +108,6 @@ class OutputFormat: if nosymbol: self.nosymbol = set(nosymbol) - if function_table: - self.function_table = function_table def highlight_block(self, block): """ @@ -129,8 +127,7 @@ class OutputFormat: warnings = args.get('warnings', []) for log_msg in warnings: - self.config.log.warning(log_msg) - self.config.errors += 1 + self.config.warning(log_msg) def check_doc(self, name, args): """Check if DOC should be output""" diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 43e6ffbdcc2c..33f00c77dd5f 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1133,21 +1133,25 @@ class KernelDoc: self.emit_warning(ln, "error: Cannot parse typedef!") @staticmethod - def process_export(function_table, line): + def process_export(function_set, line): """ process EXPORT_SYMBOL* tags - This method is called both internally and externally, so, it - doesn't use self. + This method doesn't use any variable from the class, so declare it + with a staticmethod decorator. """ + # Note: it accepts only one EXPORT_SYMBOL* per line, as having + # multiple export lines would violate Kernel coding style. + if export_symbol.search(line): symbol = export_symbol.group(2) - function_table.add(symbol) + function_set.add(symbol) + return if export_symbol_ns.search(line): symbol = export_symbol_ns.group(2) - function_table.add(symbol) + function_set.add(symbol) def process_normal(self, ln, line): """ @@ -1617,17 +1621,39 @@ class KernelDoc: elif doc_content.search(line): self.entry.contents += doc_content.group(1) + "\n" - def run(self): + def parse_export(self): + """ + Parses EXPORT_SYMBOL* macros from a single Kernel source file. + """ + + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + + for line in fp: + self.process_export(export_table, line) + + except IOError: + return None + + return export_table + + def parse_kdoc(self): """ Open and process each line of a C source file. - he parsing is controlled via a state machine, and the line is passed + The parsing is controlled via a state machine, and the line is passed to a different process function depending on the state. The process function may update the state as needed. + + Besides parsing kernel-doc tags, it also parses export symbols. """ cont = False prev = "" prev_ln = None + export_table = set() try: with open(self.fname, "r", encoding="utf8", @@ -1659,6 +1685,16 @@ class KernelDoc: self.st_inline_name[self.inline_doc_state], line) + # This is an optimization over the original script. + # There, when export_file was used for the same file, + # it was read twice. Here, we use the already-existing + # loop to parse exported symbols as well. + # + # TODO: It should be noticed that not all states are + # needed here. On a future cleanup, process export only + # at the states that aren't handling comment markups. + self.process_export(export_table, line) + # Hand this line to the appropriate state handler if self.state == self.STATE_NORMAL: self.process_normal(ln, line) @@ -1675,3 +1711,5 @@ class KernelDoc: self.process_docblock(ln, line) except OSError: self.config.log.error(f"Error: Cannot open file {self.fname}") + + return export_table, self.entries -- cgit v1.2.3-59-g8ed1b From 04a383ced6965fedc9c1b6c83d841acce076b53c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:35 +0800 Subject: scripts/kernel-doc.py: Rename the kernel doc Re class to KernRe Using just "Re" makes it harder to distinguish from the native "re" class. So, let's rename it. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/4e095ecd5235a3e811ddcf5bad4cfb92f1da0a4a.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_output.py | 50 ++++---- scripts/lib/kdoc/kdoc_parser.py | 264 ++++++++++++++++++++-------------------- scripts/lib/kdoc/kdoc_re.py | 4 +- 3 files changed, 159 insertions(+), 159 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py index c352b7f8d3fd..86102e628d91 100755 --- a/scripts/lib/kdoc/kdoc_output.py +++ b/scripts/lib/kdoc/kdoc_output.py @@ -20,31 +20,31 @@ import re from datetime import datetime from kdoc_parser import KernelDoc, type_param -from kdoc_re import Re +from kdoc_re import KernRe -function_pointer = Re(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) +function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) # match expressions used to find embedded type information -type_constant = Re(r"\b``([^\`]+)``\b", cache=False) -type_constant2 = Re(r"\%([-_*\w]+)", cache=False) -type_func = Re(r"(\w+)\(\)", cache=False) -type_param_ref = Re(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) +type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False) +type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False) +type_func = KernRe(r"(\w+)\(\)", cache=False) +type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) # Special RST handling for func ptr params -type_fp_param = Re(r"\@(\w+)\(\)", cache=False) +type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False) # Special RST handling for structs with func ptr params -type_fp_param2 = Re(r"\@(\w+->\S+)\(\)", cache=False) +type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False) -type_env = Re(r"(\$\w+)", cache=False) -type_enum = Re(r"\&(enum\s*([_\w]+))", cache=False) -type_struct = Re(r"\&(struct\s*([_\w]+))", cache=False) -type_typedef = Re(r"\&(typedef\s*([_\w]+))", cache=False) -type_union = Re(r"\&(union\s*([_\w]+))", cache=False) -type_member = Re(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False) -type_fallback = Re(r"\&([_\w]+)", cache=False) -type_member_func = type_member + Re(r"\(\)", cache=False) +type_env = KernRe(r"(\$\w+)", cache=False) +type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False) +type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False) +type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False) +type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False) +type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False) +type_fallback = KernRe(r"\&([_\w]+)", cache=False) +type_member_func = type_member + KernRe(r"\(\)", cache=False) class OutputFormat: @@ -257,8 +257,8 @@ class RestFormat(OutputFormat): ] blankline = "\n" - sphinx_literal = Re(r'^[^.].*::$', cache=False) - sphinx_cblock = Re(r'^\.\.\ +code-block::', cache=False) + sphinx_literal = KernRe(r'^[^.].*::$', cache=False) + sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False) def __init__(self): """ @@ -299,14 +299,14 @@ class RestFormat(OutputFormat): # If this is the first non-blank line in a literal block, # figure out the proper indent. if not litprefix: - r = Re(r'^(\s*)') + r = KernRe(r'^(\s*)') if r.match(line): litprefix = '^' + r.group(1) else: litprefix = "" output += line + "\n" - elif not Re(litprefix).match(line): + elif not KernRe(litprefix).match(line): in_literal = False else: output += line + "\n" @@ -429,7 +429,7 @@ class RestFormat(OutputFormat): self.data += f"{self.lineprefix}**Parameters**\n\n" for parameter in parameterlist: - parameter_name = Re(r'\[.*').sub('', parameter) + parameter_name = KernRe(r'\[.*').sub('', parameter) dtype = args['parametertypes'].get(parameter, "") if dtype: @@ -626,7 +626,7 @@ class ManFormat(OutputFormat): contents = "\n".join(contents) for line in contents.strip("\n").split("\n"): - line = Re(r"^\s*").sub("", line) + line = KernRe(r"^\s*").sub("", line) if not line: continue @@ -680,7 +680,7 @@ class ManFormat(OutputFormat): # Pointer-to-function self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n" else: - dtype = Re(r'([^\*])$').sub(r'\1 ', dtype) + dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype) self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n" count += 1 @@ -727,7 +727,7 @@ class ManFormat(OutputFormat): self.data += ".SH Constants\n" for parameter in parameterlist: - parameter_name = Re(r'\[.*').sub('', parameter) + parameter_name = KernRe(r'\[.*').sub('', parameter) self.data += f'.IP "{parameter}" 12' + "\n" self.output_highlight(args['parameterdescs'].get(parameter_name, "")) @@ -769,7 +769,7 @@ class ManFormat(OutputFormat): # Replace tabs with two spaces and handle newlines declaration = definition.replace("\t", " ") - declaration = Re(r"\n").sub('"\n.br\n.BI "', declaration) + declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration) self.data += ".SH SYNOPSIS\n" self.data += f"{struct_type} {struct_name} " + "{" + "\n.br\n" diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 33f00c77dd5f..f60722bcc687 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -16,7 +16,7 @@ import argparse import re from pprint import pformat -from kdoc_re import NestedMatch, Re +from kdoc_re import NestedMatch, KernRe # @@ -29,12 +29,12 @@ from kdoc_re import NestedMatch, Re # # Allow whitespace at end of comment start. -doc_start = Re(r'^/\*\*\s*$', cache=False) +doc_start = KernRe(r'^/\*\*\s*$', cache=False) -doc_end = Re(r'\*/', cache=False) -doc_com = Re(r'\s*\*\s*', cache=False) -doc_com_body = Re(r'\s*\* ?', cache=False) -doc_decl = doc_com + Re(r'(\w+)', cache=False) +doc_end = KernRe(r'\*/', cache=False) +doc_com = KernRe(r'\s*\*\s*', cache=False) +doc_com_body = KernRe(r'\s*\* ?', cache=False) +doc_decl = doc_com + KernRe(r'(\w+)', cache=False) # @params and a strictly limited set of supported section names # Specifically: @@ -44,22 +44,22 @@ doc_decl = doc_com + Re(r'(\w+)', cache=False) # while trying to not match literal block starts like "example::" # doc_sect = doc_com + \ - Re(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', + KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', flags=re.I, cache=False) -doc_content = doc_com_body + Re(r'(.*)', cache=False) -doc_block = doc_com + Re(r'DOC:\s*(.*)?', cache=False) -doc_inline_start = Re(r'^\s*/\*\*\s*$', cache=False) -doc_inline_sect = Re(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) -doc_inline_end = Re(r'^\s*\*/\s*$', cache=False) -doc_inline_oneline = Re(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) -attribute = Re(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", +doc_content = doc_com_body + KernRe(r'(.*)', cache=False) +doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) +doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) +doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) +doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) +doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) +attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False) -export_symbol = Re(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) -export_symbol_ns = Re(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) +export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) +export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) -type_param = Re(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) +type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) class KernelDoc: @@ -278,10 +278,10 @@ class KernelDoc: self.entry.anon_struct_union = False - param = Re(r'[\[\)].*').sub('', param, count=1) + param = KernRe(r'[\[\)].*').sub('', param, count=1) if dtype == "" and param.endswith("..."): - if Re(r'\w\.\.\.$').search(param): + if KernRe(r'\w\.\.\.$').search(param): # For named variable parameters of the form `x...`, # remove the dots param = param[:-3] @@ -335,7 +335,7 @@ class KernelDoc: # to ignore "[blah" in a parameter string. self.entry.parameterlist.append(param) - org_arg = Re(r'\s\s+').sub(' ', org_arg) + org_arg = KernRe(r'\s\s+').sub(' ', org_arg) self.entry.parametertypes[param] = org_arg def save_struct_actual(self, actual): @@ -344,7 +344,7 @@ class KernelDoc: one string item. """ - actual = Re(r'\s*').sub("", actual, count=1) + actual = KernRe(r'\s*').sub("", actual, count=1) self.entry.struct_actual += actual + " " @@ -355,20 +355,20 @@ class KernelDoc: """ # temporarily replace all commas inside function pointer definition - arg_expr = Re(r'(\([^\),]+),') + arg_expr = KernRe(r'(\([^\),]+),') while arg_expr.search(args): args = arg_expr.sub(r"\1#", args) for arg in args.split(splitter): # Strip comments - arg = Re(r'\/\*.*\*\/').sub('', arg) + arg = KernRe(r'\/\*.*\*\/').sub('', arg) # Ignore argument attributes - arg = Re(r'\sPOS0?\s').sub(' ', arg) + arg = KernRe(r'\sPOS0?\s').sub(' ', arg) # Strip leading/trailing spaces arg = arg.strip() - arg = Re(r'\s+').sub(' ', arg, count=1) + arg = KernRe(r'\s+').sub(' ', arg, count=1) if arg.startswith('#'): # Treat preprocessor directive as a typeless variable just to fill @@ -379,63 +379,63 @@ class KernelDoc: self.push_parameter(ln, decl_type, arg, "", "", declaration_name) - elif Re(r'\(.+\)\s*\(').search(arg): + elif KernRe(r'\(.+\)\s*\(').search(arg): # Pointer-to-function arg = arg.replace('#', ',') - r = Re(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') + r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') if r.match(arg): param = r.group(1) else: self.emit_warning(ln, f"Invalid param: {arg}") param = arg - dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) self.save_struct_actual(param) self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - elif Re(r'\(.+\)\s*\[').search(arg): + elif KernRe(r'\(.+\)\s*\[').search(arg): # Array-of-pointers arg = arg.replace('#', ',') - r = Re(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') + r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') if r.match(arg): param = r.group(1) else: self.emit_warning(ln, f"Invalid param: {arg}") param = arg - dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) self.save_struct_actual(param) self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) elif arg: - arg = Re(r'\s*:\s*').sub(":", arg) - arg = Re(r'\s*\[').sub('[', arg) + arg = KernRe(r'\s*:\s*').sub(":", arg) + arg = KernRe(r'\s*\[').sub('[', arg) - args = Re(r'\s*,\s*').split(arg) + args = KernRe(r'\s*,\s*').split(arg) if args[0] and '*' in args[0]: args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) first_arg = [] - r = Re(r'^(.*\s+)(.*?\[.*\].*)$') + r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') if args[0] and r.match(args[0]): args.pop(0) first_arg.extend(r.group(1)) first_arg.append(r.group(2)) else: - first_arg = Re(r'\s+').split(args.pop(0)) + first_arg = KernRe(r'\s+').split(args.pop(0)) args.insert(0, first_arg.pop()) dtype = ' '.join(first_arg) for param in args: - if Re(r'^(\*+)\s*(.*)').match(param): - r = Re(r'^(\*+)\s*(.*)') + if KernRe(r'^(\*+)\s*(.*)').match(param): + r = KernRe(r'^(\*+)\s*(.*)') if not r.match(param): self.emit_warning(ln, f"Invalid param: {param}") continue @@ -447,8 +447,8 @@ class KernelDoc: f"{dtype} {r.group(1)}", arg, declaration_name) - elif Re(r'(.*?):(\w+)').search(param): - r = Re(r'(.*?):(\w+)') + elif KernRe(r'(.*?):(\w+)').search(param): + r = KernRe(r'(.*?):(\w+)') if not r.match(param): self.emit_warning(ln, f"Invalid param: {param}") continue @@ -477,7 +477,7 @@ class KernelDoc: err = True for px in range(len(prms)): # pylint: disable=C0200 prm_clean = prms[px] - prm_clean = Re(r'\[.*\]').sub('', prm_clean) + prm_clean = KernRe(r'\[.*\]').sub('', prm_clean) prm_clean = attribute.sub('', prm_clean) # ignore array size in a parameter string; @@ -486,7 +486,7 @@ class KernelDoc: # and this appears in @prms as "addr[6" since the # parameter list is split at spaces; # hence just ignore "[..." for the sections check; - prm_clean = Re(r'\[.*').sub('', prm_clean) + prm_clean = KernRe(r'\[.*').sub('', prm_clean) if prm_clean == sects[sx]: err = False @@ -512,7 +512,7 @@ class KernelDoc: # Ignore an empty return type (It's a macro) # Ignore functions with a "void" return type (but not "void *") - if not return_type or Re(r'void\s*\w*\s*$').search(return_type): + if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): return if not self.entry.sections.get("Return", None): @@ -535,20 +535,20 @@ class KernelDoc: ] definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" - struct_members = Re(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') + struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') # Extract struct/union definition members = None declaration_name = None decl_type = None - r = Re(type_pattern + r'\s+(\w+)\s*' + definition_body) + r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) if r.search(proto): decl_type = r.group(1) declaration_name = r.group(2) members = r.group(3) else: - r = Re(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') + r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') if r.search(proto): decl_type = r.group(1) @@ -567,21 +567,21 @@ class KernelDoc: args_pattern = r'([^,)]+)' sub_prefixes = [ - (Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), - (Re(r'\/\*\s*private:.*', re.S | re.I), ''), + (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), + (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), # Strip comments - (Re(r'\/\*.*?\*\/', re.S), ''), + (KernRe(r'\/\*.*?\*\/', re.S), ''), # Strip attributes (attribute, ' '), - (Re(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__packed\s*', re.S), ' '), - (Re(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (Re(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (Re(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), # Unwrap struct_group macros based on this definition: # __struct_group(TAG, NAME, ATTRS, MEMBERS...) @@ -616,10 +616,10 @@ class KernelDoc: # matched. So, the implementation to drop STRUCT_GROUP() will be # handled in separate. - (Re(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (Re(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (Re(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (Re(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), # Replace macros # @@ -628,15 +628,15 @@ class KernelDoc: # it is better to also move those to the NestedMatch logic, # to ensure that parenthesis will be properly matched. - (Re(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (Re(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (Re(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (Re(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (Re(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (Re(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (Re(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), - (Re(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (Re(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), ] # Regexes here are guaranteed to have the end limiter matching @@ -689,8 +689,8 @@ class KernelDoc: s_id = s_id.strip() newmember += f"{maintype} {s_id}; " - s_id = Re(r'[:\[].*').sub('', s_id) - s_id = Re(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) + s_id = KernRe(r'[:\[].*').sub('', s_id) + s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) for arg in content.split(';'): arg = arg.strip() @@ -698,7 +698,7 @@ class KernelDoc: if not arg: continue - r = Re(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') + r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') if r.match(arg): # Pointer-to-function dtype = r.group(1) @@ -717,15 +717,15 @@ class KernelDoc: else: arg = arg.strip() # Handle bitmaps - arg = Re(r':\s*\d+\s*').sub('', arg) + arg = KernRe(r':\s*\d+\s*').sub('', arg) # Handle arrays - arg = Re(r'\[.*\]').sub('', arg) + arg = KernRe(r'\[.*\]').sub('', arg) # Handle multiple IDs - arg = Re(r'\s*,\s*').sub(',', arg) + arg = KernRe(r'\s*,\s*').sub(',', arg) - r = Re(r'(.*)\s+([\S+,]+)') + r = KernRe(r'(.*)\s+([\S+,]+)') if r.search(arg): dtype = r.group(1) @@ -735,7 +735,7 @@ class KernelDoc: continue for name in names.split(','): - name = Re(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() + name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() if not name: continue @@ -757,12 +757,12 @@ class KernelDoc: self.entry.sectcheck, self.entry.struct_actual) # Adjust declaration for better display - declaration = Re(r'([\{;])').sub(r'\1\n', declaration) - declaration = Re(r'\}\s+;').sub('};', declaration) + declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) + declaration = KernRe(r'\}\s+;').sub('};', declaration) # Better handle inlined enums while True: - r = Re(r'(enum\s+\{[^\}]+),([^\n])') + r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') if not r.search(declaration): break @@ -774,7 +774,7 @@ class KernelDoc: for clause in def_args: clause = clause.strip() - clause = Re(r'\s+').sub(' ', clause, count=1) + clause = KernRe(r'\s+').sub(' ', clause, count=1) if not clause: continue @@ -782,7 +782,7 @@ class KernelDoc: if '}' in clause and level > 1: level -= 1 - if not Re(r'^\s*#').match(clause): + if not KernRe(r'^\s*#').match(clause): declaration += "\t" * level declaration += "\t" + clause + "\n" @@ -807,24 +807,24 @@ class KernelDoc: """ # Ignore members marked private - proto = Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) - proto = Re(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) + proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) + proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) # Strip comments - proto = Re(r'\/\*.*?\*\/', flags=re.S).sub('', proto) + proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) # Strip #define macros inside enums - proto = Re(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) + proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) members = None declaration_name = None - r = Re(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') + r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) members = r.group(1).rstrip() else: - r = Re(r'enum\s+(\w*)\s*\{(.*)\}') + r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) members = r.group(2).rstrip() @@ -847,12 +847,12 @@ class KernelDoc: member_set = set() - members = Re(r'\([^;]*?[\)]').sub('', members) + members = KernRe(r'\([^;]*?[\)]').sub('', members) for arg in members.split(','): if not arg: continue - arg = Re(r'^\s*(\w+).*').sub(r'\1', arg) + arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed @@ -947,10 +947,10 @@ class KernelDoc: ] for search, sub, flags in sub_prefixes: - prototype = Re(search, flags).sub(sub, prototype) + prototype = KernRe(search, flags).sub(sub, prototype) # Macros are a special case, as they change the prototype format - new_proto = Re(r"^#\s*define\s+").sub("", prototype) + new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) if new_proto != prototype: is_define_proto = True prototype = new_proto @@ -987,7 +987,7 @@ class KernelDoc: found = False if is_define_proto: - r = Re(r'^()(' + name + r')\s+') + r = KernRe(r'^()(' + name + r')\s+') if r.search(prototype): return_type = '' @@ -1004,7 +1004,7 @@ class KernelDoc: ] for p in patterns: - r = Re(p) + r = KernRe(p) if r.match(prototype): @@ -1071,11 +1071,11 @@ class KernelDoc: typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' - typedef1 = Re(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) - typedef2 = Re(r'typedef' + typedef_type + typedef_ident + typedef_args) + typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) # Strip comments - proto = Re(r'/\*.*?\*/', flags=re.S).sub('', proto) + proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) # Parse function typedef prototypes for r in [typedef1, typedef2]: @@ -1109,12 +1109,12 @@ class KernelDoc: return # Handle nested parentheses or brackets - r = Re(r'(\(*.\)\s*|\[*.\]\s*);$') + r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') while r.search(proto): proto = r.sub('', proto) # Parse simple typedefs - r = Re(r'typedef.*\s+(\w+)\s*;') + r = KernRe(r'typedef.*\s+(\w+)\s*;') if r.match(proto): declaration_name = r.group(1) @@ -1195,12 +1195,12 @@ class KernelDoc: decl_end = r"(?:[-:].*)" # end of the name part # test for pointer declaration type, foo * bar() - desc - r = Re(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") + r = KernRe(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") if r.search(line): self.entry.identifier = r.group(1) # Test for data declaration - r = Re(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") + r = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") if r.search(line): self.entry.decl_type = r.group(1) self.entry.identifier = r.group(2) @@ -1209,15 +1209,15 @@ class KernelDoc: # Look for foo() or static void foo() - description; # or misspelt identifier - r1 = Re(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") - r2 = Re(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") + r1 = KernRe(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") + r2 = KernRe(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") for r in [r1, r2]: if r.search(line): self.entry.identifier = r.group(1) self.entry.decl_type = "function" - r = Re(r"define\s+") + r = KernRe(r"define\s+") self.entry.identifier = r.sub("", self.entry.identifier) self.entry.is_kernel_comment = True break @@ -1230,12 +1230,12 @@ class KernelDoc: self.entry.section = self.section_default self.entry.new_start_line = ln + 1 - r = Re("[-:](.*)") + r = KernRe("[-:](.*)") if r.search(line): # strip leading/trailing/multiple spaces self.entry.descr = r.group(1).strip(" ") - r = Re(r"\s+") + r = KernRe(r"\s+") self.entry.descr = r.sub(" ", self.entry.descr) self.entry.declaration_purpose = self.entry.descr self.state = self.STATE_BODY_MAYBE @@ -1272,7 +1272,7 @@ class KernelDoc: """ if self.state == self.STATE_BODY_WITH_BLANK_LINE: - r = Re(r"\s*\*\s?\S") + r = KernRe(r"\s*\*\s?\S") if r.match(line): self.dump_section() self.entry.section = self.section_default @@ -1318,7 +1318,7 @@ class KernelDoc: self.dump_section() # Look for doc_com + + doc_end: - r = Re(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') + r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') if r.match(line): self.emit_warning(ln, f"suspicious ending line: {line}") @@ -1351,7 +1351,7 @@ class KernelDoc: self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() self.entry.declaration_purpose += " " + cont - r = Re(r"\s+") + r = KernRe(r"\s+") self.entry.declaration_purpose = r.sub(' ', self.entry.declaration_purpose) @@ -1359,7 +1359,7 @@ class KernelDoc: if self.entry.section.startswith('@') or \ self.entry.section == self.section_context: if self.entry.leading_space is None: - r = Re(r'^(\s+)') + r = KernRe(r'^(\s+)') if r.match(cont): self.entry.leading_space = len(r.group(1)) else: @@ -1436,13 +1436,13 @@ class KernelDoc: is_void = True # Replace SYSCALL_DEFINE with correct return type & function name - proto = Re(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) + proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) - r = Re(r'long\s+(sys_.*?),') + r = KernRe(r'long\s+(sys_.*?),') if r.search(proto): - proto = Re(',').sub('(', proto, count=1) + proto = KernRe(',').sub('(', proto, count=1) elif is_void: - proto = Re(r'\)').sub('(void)', proto, count=1) + proto = KernRe(r'\)').sub('(void)', proto, count=1) # Now delete all of the odd-numbered commas in the proto # so that argument types & names don't have a comma between them @@ -1469,22 +1469,22 @@ class KernelDoc: tracepointargs = None # Match tracepoint name based on different patterns - r = Re(r'TRACE_EVENT\((.*?),') + r = KernRe(r'TRACE_EVENT\((.*?),') if r.search(proto): tracepointname = r.group(1) - r = Re(r'DEFINE_SINGLE_EVENT\((.*?),') + r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') if r.search(proto): tracepointname = r.group(1) - r = Re(r'DEFINE_EVENT\((.*?),(.*?),') + r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') if r.search(proto): tracepointname = r.group(2) if tracepointname: tracepointname = tracepointname.lstrip() - r = Re(r'TP_PROTO\((.*?)\)') + r = KernRe(r'TP_PROTO\((.*?)\)') if r.search(proto): tracepointargs = r.group(1) @@ -1501,43 +1501,43 @@ class KernelDoc: """Ancillary routine to process a function prototype""" # strip C99-style comments to end of line - r = Re(r"\/\/.*$", re.S) + r = KernRe(r"\/\/.*$", re.S) line = r.sub('', line) - if Re(r'\s*#\s*define').match(line): + if KernRe(r'\s*#\s*define').match(line): self.entry.prototype = line elif line.startswith('#'): # Strip other macros like #ifdef/#ifndef/#endif/... pass else: - r = Re(r'([^\{]*)') + r = KernRe(r'([^\{]*)') if r.match(line): self.entry.prototype += r.group(1) + " " - if '{' in line or ';' in line or Re(r'\s*#\s*define').match(line): + if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): # strip comments - r = Re(r'/\*.*?\*/') + r = KernRe(r'/\*.*?\*/') self.entry.prototype = r.sub('', self.entry.prototype) # strip newlines/cr's - r = Re(r'[\r\n]+') + r = KernRe(r'[\r\n]+') self.entry.prototype = r.sub(' ', self.entry.prototype) # strip leading spaces - r = Re(r'^\s+') + r = KernRe(r'^\s+') self.entry.prototype = r.sub('', self.entry.prototype) # Handle self.entry.prototypes for function pointers like: # int (*pcs_config)(struct foo) - r = Re(r'^(\S+\s+)\(\s*\*(\S+)\)') + r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) if 'SYSCALL_DEFINE' in self.entry.prototype: self.entry.prototype = self.syscall_munge(ln, self.entry.prototype) - r = Re(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') if r.search(self.entry.prototype): self.entry.prototype = self.tracepoint_munge(ln, self.entry.prototype) @@ -1549,22 +1549,22 @@ class KernelDoc: """Ancillary routine to process a type""" # Strip newlines/cr's. - line = Re(r'[\r\n]+', re.S).sub(' ', line) + line = KernRe(r'[\r\n]+', re.S).sub(' ', line) # Strip leading spaces - line = Re(r'^\s+', re.S).sub('', line) + line = KernRe(r'^\s+', re.S).sub('', line) # Strip trailing spaces - line = Re(r'\s+$', re.S).sub('', line) + line = KernRe(r'\s+$', re.S).sub('', line) # Strip C99-style comments to the end of the line - line = Re(r"\/\/.*$", re.S).sub('', line) + line = KernRe(r"\/\/.*$", re.S).sub('', line) # To distinguish preprocessor directive from regular declaration later. if line.startswith('#'): line += ";" - r = Re(r'([^\{\};]*)([\{\};])(.*)') + r = KernRe(r'([^\{\};]*)([\{\};])(.*)') while True: if r.search(line): if self.entry.prototype: diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py index d28485ff94d6..e81695b273bf 100755 --- a/scripts/lib/kdoc/kdoc_re.py +++ b/scripts/lib/kdoc/kdoc_re.py @@ -14,7 +14,7 @@ import re re_cache = {} -class Re: +class KernRe: """ Helper class to simplify regex declaration and usage, @@ -59,7 +59,7 @@ class Re: Allows adding two regular expressions into one. """ - return Re(str(self) + str(other), cache=self.cache or other.cache, + return KernRe(str(self) + str(other), cache=self.cache or other.cache, flags=self.regex.flags | other.regex.flags) def match(self, string): -- cgit v1.2.3-59-g8ed1b From de258fa8ca8d72ef17f4d71162cfbbd2d9f397e6 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Tue, 8 Apr 2025 18:09:36 +0800 Subject: scripts: kernel-doc: fix parsing function-like typedefs (again) Typedefs like typedef struct phylink_pcs *(*pcs_xlate_t)(const u64 *args); have a typedef_type that ends with a * and therefore has no word boundary. Add an extra clause for the final group of the typedef_type so we only require a word boundary if we match a word. [mchehab: modify also kernel-doc.py, as we're deprecating the perl version] Fixes: 7d2c6b1edf79 ("scripts: kernel-doc: fix parsing function-like typedefs") Signed-off-by: Sean Anderson Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/e0abb103c73a96d76602d909f60ab8fd6e2fd0bd.1744106242.git.mchehab+huawei@kernel.org --- scripts/kernel-doc.pl | 2 +- scripts/lib/kdoc/kdoc_parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/kernel-doc.pl b/scripts/kernel-doc.pl index af6cf408b96d..5db23cbf4eb2 100755 --- a/scripts/kernel-doc.pl +++ b/scripts/kernel-doc.pl @@ -1325,7 +1325,7 @@ sub dump_enum($$) { } } -my $typedef_type = qr { ((?:\s+[\w\*]+\b){1,8})\s* }x; +my $typedef_type = qr { ((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s* }x; my $typedef_ident = qr { \*?\s*(\w\S+)\s* }x; my $typedef_args = qr { \s*\((.*)\); }x; diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index f60722bcc687..4f036c720b36 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1067,7 +1067,7 @@ class KernelDoc: Stores a typedef inside self.entries array. """ - typedef_type = r'((?:\s+[\w\*]+\b){1,8})\s*' + typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' -- cgit v1.2.3-59-g8ed1b