Path: blob/master/tools/lib/python/kdoc/kdoc_parser.py
122941 views
#!/usr/bin/env python31# SPDX-License-Identifier: GPL-2.02# Copyright(c) 2025: Mauro Carvalho Chehab <[email protected]>.3#4# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R170256"""7Classes and functions related to reading a C language source or header FILE8and extract embedded documentation comments from it.9"""1011import sys12import re13from pprint import pformat1415from kdoc.kdoc_re import NestedMatch, KernRe16from kdoc.kdoc_item import KdocItem1718#19# Regular expressions used to parse kernel-doc markups at KernelDoc class.20#21# Let's declare them in lowercase outside any class to make it easier to22# convert from the Perl script.23#24# As those are evaluated at the beginning, no need to cache them25#2627# Allow whitespace at end of comment start.28doc_start = KernRe(r'^/\*\*\s*$', cache=False)2930doc_end = KernRe(r'\*/', cache=False)31doc_com = KernRe(r'\s*\*\s*', cache=False)32doc_com_body = KernRe(r'\s*\* ?', cache=False)33doc_decl = doc_com + KernRe(r'(\w+)', cache=False)3435# @params and a strictly limited set of supported section names36# Specifically:37# Match @word:38# @...:39# @{section-name}:40# while trying to not match literal block starts like "example::"41#42known_section_names = 'description|context|returns?|notes?|examples?'43known_sections = KernRe(known_section_names, flags = re.I)44doc_sect = doc_com + \45KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$',46flags=re.I, cache=False)4748doc_content = doc_com_body + KernRe(r'(.*)', cache=False)49doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)50doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)51doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)52doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False)5354export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)55export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False)5657type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)5859#60# Tests for the beginning of a kerneldoc block in its various forms.61#62doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)63doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False)64doc_begin_func = KernRe(str(doc_com) + # initial " * '65r"(?:\w+\s*\*\s*)?" + # type (not captured)66r'(?:define\s+)?' + # possible "define" (not captured)67r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)"68r'(?:[-:].*)?$', # description (not captured)69cache = False)7071#72# Here begins a long set of transformations to turn structure member prefixes73# and macro invocations into something we can parse and generate kdoc for.74#75struct_args_pattern = r'([^,)]+)'7677struct_xforms = [78# Strip attributes79(KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),80(KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),81(KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),82(KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),83(KernRe(r'\s*__packed\s*', re.S), ' '),84(KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),85(KernRe(r'\s*__private', re.S), ' '),86(KernRe(r'\s*__rcu', re.S), ' '),87(KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),88(KernRe(r'\s*____cacheline_aligned', re.S), ' '),89(KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),90#91# Unwrap struct_group macros based on this definition:92# __struct_group(TAG, NAME, ATTRS, MEMBERS...)93# which has variants like: struct_group(NAME, MEMBERS...)94# Only MEMBERS arguments require documentation.95#96# Parsing them happens on two steps:97#98# 1. drop struct group arguments that aren't at MEMBERS,99# storing them as STRUCT_GROUP(MEMBERS)100#101# 2. remove STRUCT_GROUP() ancillary macro.102#103# The original logic used to remove STRUCT_GROUP() using an104# advanced regex:105#106# \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;107#108# with two patterns that are incompatible with109# Python re module, as it has:110#111# - a recursive pattern: (?1)112# - an atomic grouping: (?>...)113#114# I tried a simpler version: but it didn't work either:115# \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;116#117# As it doesn't properly match the end parenthesis on some cases.118#119# So, a better solution was crafted: there's now a NestedMatch120# class that ensures that delimiters after a search are properly121# matched. So, the implementation to drop STRUCT_GROUP() will be122# handled in separate.123#124(KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),125(KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),126(KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),127(KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),128#129# Replace macros130#131# TODO: use NestedMatch for FOO($1, $2, ...) matches132#133# it is better to also move those to the NestedMatch logic,134# to ensure that parentheses will be properly matched.135#136(KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),137r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),138(KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),139r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),140(KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',141re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),142(KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',143re.S), r'unsigned long \1[1 << ((\2) - 1)]'),144(KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +145r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),146(KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +147struct_args_pattern + r'\)', re.S), r'\2 *\1'),148(KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +149struct_args_pattern + r'\)', re.S), r'\1 \2[]'),150(KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),151(KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),152]153#154# Regexes here are guaranteed to have the end delimiter matching155# the start delimiter. Yet, right now, only one replace group156# is allowed.157#158struct_nested_prefixes = [159(re.compile(r'\bSTRUCT_GROUP\('), r'\1'),160]161162#163# Transforms for function prototypes164#165function_xforms = [166(KernRe(r"^static +"), ""),167(KernRe(r"^extern +"), ""),168(KernRe(r"^asmlinkage +"), ""),169(KernRe(r"^inline +"), ""),170(KernRe(r"^__inline__ +"), ""),171(KernRe(r"^__inline +"), ""),172(KernRe(r"^__always_inline +"), ""),173(KernRe(r"^noinline +"), ""),174(KernRe(r"^__FORTIFY_INLINE +"), ""),175(KernRe(r"__init +"), ""),176(KernRe(r"__init_or_module +"), ""),177(KernRe(r"__exit +"), ""),178(KernRe(r"__deprecated +"), ""),179(KernRe(r"__flatten +"), ""),180(KernRe(r"__meminit +"), ""),181(KernRe(r"__must_check +"), ""),182(KernRe(r"__weak +"), ""),183(KernRe(r"__sched +"), ""),184(KernRe(r"_noprof"), ""),185(KernRe(r"__always_unused *"), ""),186(KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),187(KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),188(KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),189(KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),190(KernRe(r"__attribute_const__ +"), ""),191(KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),192]193194#195# Ancillary functions196#197198def apply_transforms(xforms, text):199"""200Apply a set of transforms to a block of text.201"""202for search, subst in xforms:203text = search.sub(subst, text)204return text205206multi_space = KernRe(r'\s\s+')207def trim_whitespace(s):208"""209A little helper to get rid of excess white space.210"""211return multi_space.sub(' ', s.strip())212213def trim_private_members(text):214"""215Remove ``struct``/``enum`` members that have been marked "private".216"""217# First look for a "public:" block that ends a private region, then218# handle the "private until the end" case.219#220text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)221text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)222#223# We needed the comments to do the above, but now we can take them out.224#225return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()226227class state:228"""229States used by the parser's state machine.230"""231232# Parser states233NORMAL = 0 #: Normal code.234NAME = 1 #: Looking for function name.235DECLARATION = 2 #: We have seen a declaration which might not be done.236BODY = 3 #: The body of the comment.237SPECIAL_SECTION = 4 #: Doc section ending with a blank line.238PROTO = 5 #: Scanning prototype.239DOCBLOCK = 6 #: Documentation block.240INLINE_NAME = 7 #: Gathering doc outside main block.241INLINE_TEXT = 8 #: Reading the body of inline docs.242243#: Names for each parser state.244name = [245"NORMAL",246"NAME",247"DECLARATION",248"BODY",249"SPECIAL_SECTION",250"PROTO",251"DOCBLOCK",252"INLINE_NAME",253"INLINE_TEXT",254]255256257SECTION_DEFAULT = "Description" #: Default section.258259class KernelEntry:260"""261Encapsulates a Kernel documentation entry.262"""263264def __init__(self, config, fname, ln):265self.config = config266self.fname = fname267268self._contents = []269self.prototype = ""270271self.warnings = []272273self.parameterlist = []274self.parameterdescs = {}275self.parametertypes = {}276self.parameterdesc_start_lines = {}277278self.section_start_lines = {}279self.sections = {}280281self.anon_struct_union = False282283self.leading_space = None284285self.fname = fname286287# State flags288self.brcount = 0289self.declaration_start_line = ln + 1290291#292# Management of section contents293#294def add_text(self, text):295"""Add a new text to the entry contents list."""296self._contents.append(text)297298def contents(self):299"""Returns a string with all content texts that were added."""300return '\n'.join(self._contents) + '\n'301302# TODO: rename to emit_message after removal of kernel-doc.pl303def emit_msg(self, ln, msg, *, warning=True):304"""Emit a message."""305306log_msg = f"{self.fname}:{ln} {msg}"307308if not warning:309self.config.log.info(log_msg)310return311312# Delegate warning output to output logic, as this way it313# will report warnings/info only for symbols that are output314315self.warnings.append(log_msg)316return317318def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False):319"""320Begin a new section.321"""322if dump:323self.dump_section(start_new = True)324self.section = title325self.new_start_line = line_no326327def dump_section(self, start_new=True):328"""329Dumps section contents to arrays/hashes intended for that purpose.330"""331#332# If we have accumulated no contents in the default ("description")333# section, don't bother.334#335if self.section == SECTION_DEFAULT and not self._contents:336return337name = self.section338contents = self.contents()339340if type_param.match(name):341name = type_param.group(1)342343self.parameterdescs[name] = contents344self.parameterdesc_start_lines[name] = self.new_start_line345346self.new_start_line = 0347348else:349if name in self.sections and self.sections[name] != "":350# Only warn on user-specified duplicate section names351if name != SECTION_DEFAULT:352self.emit_msg(self.new_start_line,353f"duplicate section name '{name}'")354# Treat as a new paragraph - add a blank line355self.sections[name] += '\n' + contents356else:357self.sections[name] = contents358self.section_start_lines[name] = self.new_start_line359self.new_start_line = 0360361# self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))362363if start_new:364self.section = SECTION_DEFAULT365self._contents = []366367python_warning = False368369class KernelDoc:370"""371Read a C language source or header FILE and extract embedded372documentation comments.373"""374375#: Name of context section.376section_context = "Context"377378#: Name of return section.379section_return = "Return"380381#: String to write when a parameter is not described.382undescribed = "-- undescribed --"383384def __init__(self, config, fname):385"""Initialize internal variables"""386387self.fname = fname388self.config = config389390# Initial state for the state machines391self.state = state.NORMAL392393# Store entry currently being processed394self.entry = None395396# Place all potential outputs into an array397self.entries = []398399#400# We need Python 3.7 for its "dicts remember the insertion401# order" guarantee402#403global python_warning404if (not python_warning and405sys.version_info.major == 3 and sys.version_info.minor < 7):406407self.emit_msg(0,408'Python 3.7 or later is required for correct results')409python_warning = True410411def emit_msg(self, ln, msg, *, warning=True):412"""Emit a message"""413414if self.entry:415self.entry.emit_msg(ln, msg, warning=warning)416return417418log_msg = f"{self.fname}:{ln} {msg}"419420if warning:421self.config.log.warning(log_msg)422else:423self.config.log.info(log_msg)424425def dump_section(self, start_new=True):426"""427Dump section contents to arrays/hashes intended for that purpose.428"""429430if self.entry:431self.entry.dump_section(start_new)432433# TODO: rename it to store_declaration after removal of kernel-doc.pl434def output_declaration(self, dtype, name, **args):435"""436Store the entry into an entry array.437438The actual output and output filters will be handled elsewhere.439"""440441item = KdocItem(name, self.fname, dtype,442self.entry.declaration_start_line, **args)443item.warnings = self.entry.warnings444445# Drop empty sections446# TODO: improve empty sections logic to emit warnings447sections = self.entry.sections448for section in ["Description", "Return"]:449if section in sections and not sections[section].rstrip():450del sections[section]451item.set_sections(sections, self.entry.section_start_lines)452item.set_params(self.entry.parameterlist, self.entry.parameterdescs,453self.entry.parametertypes,454self.entry.parameterdesc_start_lines)455self.entries.append(item)456457self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args))458459def emit_unused_warnings(self):460"""461When the parser fails to produce a valid entry, it places some462warnings under `entry.warnings` that will be discarded when resetting463the state.464465Ensure that those warnings are not lost.466467.. note::468469Because we are calling `config.warning()` here, those470warnings are not filtered by the `-W` parameters: they will all471be produced even when `-Wreturn`, `-Wshort-desc`, and/or472`-Wcontents-before-sections` are used.473474Allowing those warnings to be filtered is complex, because it475would require storing them in a buffer and then filtering them476during the output step of the code, depending on the477selected symbols.478"""479if self.entry and self.entry not in self.entries:480for log_msg in self.entry.warnings:481self.config.warning(log_msg)482483def reset_state(self, ln):484"""485Ancillary routine to create a new entry. It initializes all486variables used by the state machine.487"""488489self.emit_unused_warnings()490491self.entry = KernelEntry(self.config, self.fname, ln)492493# State flags494self.state = state.NORMAL495496def push_parameter(self, ln, decl_type, param, dtype,497org_arg, declaration_name):498"""499Store parameters and their descriptions at self.entry.500"""501502if self.entry.anon_struct_union and dtype == "" and param == "}":503return # Ignore the ending }; from anonymous struct/union504505self.entry.anon_struct_union = False506507param = KernRe(r'[\[\)].*').sub('', param, count=1)508509#510# Look at various "anonymous type" cases.511#512if dtype == '':513if param.endswith("..."):514if len(param) > 3: # there is a name provided, use that515param = param[:-3]516if not self.entry.parameterdescs.get(param):517self.entry.parameterdescs[param] = "variable arguments"518519elif (not param) or param == "void":520param = "void"521self.entry.parameterdescs[param] = "no arguments"522523elif param in ["struct", "union"]:524# Handle unnamed (anonymous) union or struct525dtype = param526param = "{unnamed_" + param + "}"527self.entry.parameterdescs[param] = "anonymous\n"528self.entry.anon_struct_union = True529530# Warn if parameter has no description531# (but ignore ones starting with # as these are not parameters532# but inline preprocessor statements)533if param not in self.entry.parameterdescs and not param.startswith("#"):534self.entry.parameterdescs[param] = self.undescribed535536if "." not in param:537if decl_type == 'function':538dname = f"{decl_type} parameter"539else:540dname = f"{decl_type} member"541542self.emit_msg(ln,543f"{dname} '{param}' not described in '{declaration_name}'")544545# Strip spaces from param so that it is one continuous string on546# parameterlist. This fixes a problem where check_sections()547# cannot find a parameter like "addr[6 + 2]" because it actually548# appears as "addr[6", "+", "2]" on the parameter list.549# However, it's better to maintain the param string unchanged for550# output, so just weaken the string compare in check_sections()551# to ignore "[blah" in a parameter string.552553self.entry.parameterlist.append(param)554org_arg = KernRe(r'\s\s+').sub(' ', org_arg)555self.entry.parametertypes[param] = org_arg556557558def create_parameter_list(self, ln, decl_type, args,559splitter, declaration_name):560"""561Creates a list of parameters, storing them at self.entry.562"""563564# temporarily replace all commas inside function pointer definition565arg_expr = KernRe(r'(\([^\),]+),')566while arg_expr.search(args):567args = arg_expr.sub(r"\1#", args)568569for arg in args.split(splitter):570# Ignore argument attributes571arg = KernRe(r'\sPOS0?\s').sub(' ', arg)572573# Strip leading/trailing spaces574arg = arg.strip()575arg = KernRe(r'\s+').sub(' ', arg, count=1)576577if arg.startswith('#'):578# Treat preprocessor directive as a typeless variable just to fill579# corresponding data structures "correctly". Catch it later in580# output_* subs.581582# Treat preprocessor directive as a typeless variable583self.push_parameter(ln, decl_type, arg, "",584"", declaration_name)585#586# The pointer-to-function case.587#588elif KernRe(r'\(.+\)\s*\(').search(arg):589arg = arg.replace('#', ',')590r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*"591r'([\w\[\].]*)' # Capture the name and possible [array]592r'\s*\)') # Make sure the trailing ")" is there593if r.match(arg):594param = r.group(1)595else:596self.emit_msg(ln, f"Invalid param: {arg}")597param = arg598dtype = arg.replace(param, '')599self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)600#601# The array-of-pointers case. Dig the parameter name out from the middle602# of the declaration.603#604elif KernRe(r'\(.+\)\s*\[').search(arg):605r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*"606r'([\w.]*?)' # The actual pointer name607r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion]608if r.match(arg):609param = r.group(1)610else:611self.emit_msg(ln, f"Invalid param: {arg}")612param = arg613dtype = arg.replace(param, '')614self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)615elif arg:616#617# Clean up extraneous spaces and split the string at commas; the first618# element of the resulting list will also include the type information.619#620arg = KernRe(r'\s*:\s*').sub(":", arg)621arg = KernRe(r'\s*\[').sub('[', arg)622args = KernRe(r'\s*,\s*').split(arg)623args[0] = re.sub(r'(\*+)\s*', r' \1', args[0])624#625# args[0] has a string of "type a". If "a" includes an [array]626# declaration, we want to not be fooled by any white space inside627# the brackets, so detect and handle that case specially.628#629r = KernRe(r'^([^[\]]*\s+)(.*)$')630if r.match(args[0]):631args[0] = r.group(2)632dtype = r.group(1)633else:634# No space in args[0]; this seems wrong but preserves previous behavior635dtype = ''636637bitfield_re = KernRe(r'(.*?):(\w+)')638for param in args:639#640# For pointers, shift the star(s) from the variable name to the641# type declaration.642#643r = KernRe(r'^(\*+)\s*(.*)')644if r.match(param):645self.push_parameter(ln, decl_type, r.group(2),646f"{dtype} {r.group(1)}",647arg, declaration_name)648#649# Perform a similar shift for bitfields.650#651elif bitfield_re.search(param):652if dtype != "": # Skip unnamed bit-fields653self.push_parameter(ln, decl_type, bitfield_re.group(1),654f"{dtype}:{bitfield_re.group(2)}",655arg, declaration_name)656else:657self.push_parameter(ln, decl_type, param, dtype,658arg, declaration_name)659660def check_sections(self, ln, decl_name, decl_type):661"""662Check for errors inside sections, emitting warnings if not found663parameters are described.664"""665for section in self.entry.sections:666if section not in self.entry.parameterlist and \667not known_sections.search(section):668if decl_type == 'function':669dname = f"{decl_type} parameter"670else:671dname = f"{decl_type} member"672self.emit_msg(ln,673f"Excess {dname} '{section}' description in '{decl_name}'")674675def check_return_section(self, ln, declaration_name, return_type):676"""677If the function doesn't return void, warns about the lack of a678return description.679"""680681if not self.config.wreturn:682return683684# Ignore an empty return type (It's a macro)685# Ignore functions with a "void" return type (but not "void *")686if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type):687return688689if not self.entry.sections.get("Return", None):690self.emit_msg(ln,691f"No description found for return value of '{declaration_name}'")692693def split_struct_proto(self, proto):694"""695Split apart a structure prototype; returns (struct|union, name,696members) or ``None``.697"""698699type_pattern = r'(struct|union)'700qualifiers = [701"__attribute__",702"__packed",703"__aligned",704"____cacheline_aligned_in_smp",705"____cacheline_aligned",706]707definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?"708709r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)710if r.search(proto):711return (r.group(1), r.group(2), r.group(3))712else:713r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')714if r.search(proto):715return (r.group(1), r.group(3), r.group(2))716return None717718def rewrite_struct_members(self, members):719"""720Process ``struct``/``union`` members from the most deeply nested721outward.722723Rewrite the members of a ``struct`` or ``union`` for easier formatting724later on. Among other things, this function will turn a member like::725726struct { inner_members; } foo;727728into::729730struct foo; inner_members;731"""732733#734# The trick is in the ``^{`` below - it prevents a match of an outer735# ``struct``/``union`` until the inner one has been munged736# (removing the ``{`` in the process).737#738struct_members = KernRe(r'(struct|union)' # 0: declaration type739r'([^\{\};]+)' # 1: possible name740r'(\{)'741r'([^\{\}]*)' # 3: Contents of declaration742r'(\})'743r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration744tuples = struct_members.findall(members)745while tuples:746for t in tuples:747newmember = ""748oldmember = "".join(t) # Reconstruct the original formatting749dtype, name, lbr, content, rbr, rest, semi = t750#751# Pass through each field name, normalizing the form and formatting.752#753for s_id in rest.split(','):754s_id = s_id.strip()755newmember += f"{dtype} {s_id}; "756#757# Remove bitfield/array/pointer info, getting the bare name.758#759s_id = KernRe(r'[:\[].*').sub('', s_id)760s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)761#762# Pass through the members of this inner structure/union.763#764for arg in content.split(';'):765arg = arg.strip()766#767# Look for (type)(*name)(args) - pointer to function768#769r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')770if r.match(arg):771dtype, name, extra = r.group(1), r.group(2), r.group(3)772# Pointer-to-function773if not s_id:774# Anonymous struct/union775newmember += f"{dtype}{name}{extra}; "776else:777newmember += f"{dtype}{s_id}.{name}{extra}; "778#779# Otherwise a non-function member.780#781else:782#783# Remove bitmap and array portions and spaces around commas784#785arg = KernRe(r':\s*\d+\s*').sub('', arg)786arg = KernRe(r'\[.*\]').sub('', arg)787arg = KernRe(r'\s*,\s*').sub(',', arg)788#789# Look for a normal decl - "type name[,name...]"790#791r = KernRe(r'(.*)\s+([\S+,]+)')792if r.search(arg):793for name in r.group(2).split(','):794name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name)795if not s_id:796# Anonymous struct/union797newmember += f"{r.group(1)} {name}; "798else:799newmember += f"{r.group(1)} {s_id}.{name}; "800else:801newmember += f"{arg}; "802#803# At the end of the s_id loop, replace the original declaration with804# the munged version.805#806members = members.replace(oldmember, newmember)807#808# End of the tuple loop - search again and see if there are outer members809# that now turn up.810#811tuples = struct_members.findall(members)812return members813814def format_struct_decl(self, declaration):815"""816Format the ``struct`` declaration into a standard form for inclusion817in the resulting docs.818"""819820#821# Insert newlines, get rid of extra spaces.822#823declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)824declaration = KernRe(r'\}\s+;').sub('};', declaration)825#826# Format inline enums with each member on its own line.827#828r = KernRe(r'(enum\s+\{[^\}]+),([^\n])')829while r.search(declaration):830declaration = r.sub(r'\1,\n\2', declaration)831#832# Now go through and supply the right number of tabs833# for each line.834#835def_args = declaration.split('\n')836level = 1837declaration = ""838for clause in def_args:839clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1)840if clause:841if '}' in clause and level > 1:842level -= 1843if not clause.startswith('#'):844declaration += "\t" * level845declaration += "\t" + clause + "\n"846if "{" in clause and "}" not in clause:847level += 1848return declaration849850851def dump_struct(self, ln, proto):852"""853Store an entry for a ``struct`` or ``union``854"""855#856# Do the basic parse to get the pieces of the declaration.857#858struct_parts = self.split_struct_proto(proto)859if not struct_parts:860self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")861return862decl_type, declaration_name, members = struct_parts863864if self.entry.identifier != declaration_name:865self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. "866f"Prototype was for {decl_type} {declaration_name} instead\n")867return868#869# Go through the list of members applying all of our transformations.870#871members = trim_private_members(members)872members = apply_transforms(struct_xforms, members)873874nested = NestedMatch()875for search, sub in struct_nested_prefixes:876members = nested.sub(search, sub, members)877#878# Deal with embedded struct and union members, and drop enums entirely.879#880declaration = members881members = self.rewrite_struct_members(members)882members = re.sub(r'(\{[^\{\}]*\})', '', members)883#884# Output the result and we are done.885#886self.create_parameter_list(ln, decl_type, members, ';',887declaration_name)888self.check_sections(ln, declaration_name, decl_type)889self.output_declaration(decl_type, declaration_name,890definition=self.format_struct_decl(declaration),891purpose=self.entry.declaration_purpose)892893def dump_enum(self, ln, proto):894"""895Store an ``enum`` inside self.entries array.896"""897#898# Strip preprocessor directives. Note that this depends on the899# trailing semicolon we added in process_proto_type().900#901proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)902#903# Parse out the name and members of the enum. Typedef form first.904#905r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')906if r.search(proto):907declaration_name = r.group(2)908members = trim_private_members(r.group(1))909#910# Failing that, look for a straight enum911#912else:913r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')914if r.match(proto):915declaration_name = r.group(1)916members = trim_private_members(r.group(2))917#918# OK, this isn't going to work.919#920else:921self.emit_msg(ln, f"{proto}: error: Cannot parse enum!")922return923#924# Make sure we found what we were expecting.925#926if self.entry.identifier != declaration_name:927if self.entry.identifier == "":928self.emit_msg(ln,929f"{proto}: wrong kernel-doc identifier on prototype")930else:931self.emit_msg(ln,932f"expecting prototype for enum {self.entry.identifier}. "933f"Prototype was for enum {declaration_name} instead")934return935936if not declaration_name:937declaration_name = "(anonymous)"938#939# Parse out the name of each enum member, and verify that we940# have a description for it.941#942member_set = set()943members = KernRe(r'\([^;)]*\)').sub('', members)944for arg in members.split(','):945if not arg:946continue947arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)948self.entry.parameterlist.append(arg)949if arg not in self.entry.parameterdescs:950self.entry.parameterdescs[arg] = self.undescribed951self.emit_msg(ln,952f"Enum value '{arg}' not described in enum '{declaration_name}'")953member_set.add(arg)954#955# Ensure that every described member actually exists in the enum.956#957for k in self.entry.parameterdescs:958if k not in member_set:959self.emit_msg(ln,960f"Excess enum value '@{k}' description in '{declaration_name}'")961962self.output_declaration('enum', declaration_name,963purpose=self.entry.declaration_purpose)964965def dump_var(self, ln, proto):966"""967Store variables that are part of kAPI.968"""969VAR_ATTRIBS = [970"extern",971]972OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?"973974sub_prefixes = [975(KernRe(r"__read_mostly"), ""),976(KernRe(r"__ro_after_init"), ""),977(KernRe(r"(?://.*)$"), ""),978(KernRe(r"(?:/\*.*\*/)"), ""),979(KernRe(r";$"), ""),980(KernRe(r"=.*"), ""),981]982983#984# Store the full prototype before modifying it985#986full_proto = proto987declaration_name = None988989#990# Handle macro definitions991#992macro_prefixes = [993KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"),994]995996for r in macro_prefixes:997match = r.search(proto)998if match:999declaration_name = match.group(1)1000break10011002#1003# Drop comments and macros to have a pure C prototype1004#1005if not declaration_name:1006for r, sub in sub_prefixes:1007proto = r.sub(sub, proto)10081009proto = proto.rstrip()10101011#1012# Variable name is at the end of the declaration1013#10141015default_val = None10161017r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")1018if r.match(proto):1019if not declaration_name:1020declaration_name = r.group(1)10211022default_val = r.group(2)1023else:1024r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")1025if r.match(proto):1026default_val = r.group(1)10271028if not declaration_name:1029self.emit_msg(ln,f"{proto}: can't parse variable")1030return10311032if default_val:1033default_val = default_val.lstrip("=").strip()10341035self.output_declaration("var", declaration_name,1036full_proto=full_proto,1037default_val=default_val,1038purpose=self.entry.declaration_purpose)10391040def dump_declaration(self, ln, prototype):1041"""1042Store a data declaration inside self.entries array.1043"""10441045if self.entry.decl_type == "enum":1046self.dump_enum(ln, prototype)1047elif self.entry.decl_type == "typedef":1048self.dump_typedef(ln, prototype)1049elif self.entry.decl_type in ["union", "struct"]:1050self.dump_struct(ln, prototype)1051elif self.entry.decl_type == "var":1052self.dump_var(ln, prototype)1053else:1054# This would be a bug1055self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')10561057def dump_function(self, ln, prototype):1058"""1059Store a function or function macro inside self.entries array.1060"""10611062found = func_macro = False1063return_type = ''1064decl_type = 'function'1065#1066# Apply the initial transformations.1067#1068prototype = apply_transforms(function_xforms, prototype)1069#1070# If we have a macro, remove the "#define" at the front.1071#1072new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)1073if new_proto != prototype:1074prototype = new_proto1075#1076# Dispense with the simple "#define A B" case here; the key1077# is the space after the name of the symbol being defined.1078# NOTE that the seemingly misnamed "func_macro" indicates a1079# macro *without* arguments.1080#1081r = KernRe(r'^(\w+)\s+')1082if r.search(prototype):1083return_type = ''1084declaration_name = r.group(1)1085func_macro = True1086found = True10871088# Yes, this truly is vile. We are looking for:1089# 1. Return type (may be nothing if we're looking at a macro)1090# 2. Function name1091# 3. Function parameters.1092#1093# All the while we have to watch out for function pointer parameters1094# (which IIRC is what the two sections are for), C types (these1095# regexps don't even start to express all the possibilities), and1096# so on.1097#1098# If you mess with these regexps, it's a good idea to check that1099# the following functions' documentation still comes out right:1100# - parport_register_device (function pointer parameters)1101# - atomic_set (macro)1102# - pci_match_device, __copy_to_user (long return type)11031104name = r'\w+'1105type1 = r'(?:[\w\s]+)?'1106type2 = r'(?:[\w\s]+\*+)+'1107#1108# Attempt to match first on (args) with no internal parentheses; this1109# lets us easily filter out __acquires() and other post-args stuff. If1110# that fails, just grab the rest of the line to the last closing1111# parenthesis.1112#1113proto_args = r'\(([^\(]*|.*)\)'1114#1115# (Except for the simple macro case) attempt to split up the prototype1116# in the various ways we understand.1117#1118if not found:1119patterns = [1120rf'^()({name})\s*{proto_args}',1121rf'^({type1})\s+({name})\s*{proto_args}',1122rf'^({type2})\s*({name})\s*{proto_args}',1123]11241125for p in patterns:1126r = KernRe(p)1127if r.match(prototype):1128return_type = r.group(1)1129declaration_name = r.group(2)1130args = r.group(3)1131self.create_parameter_list(ln, decl_type, args, ',',1132declaration_name)1133found = True1134break1135#1136# Parsing done; make sure that things are as we expect.1137#1138if not found:1139self.emit_msg(ln,1140f"cannot understand function prototype: '{prototype}'")1141return1142if self.entry.identifier != declaration_name:1143self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). "1144f"Prototype was for {declaration_name}() instead")1145return1146self.check_sections(ln, declaration_name, "function")1147self.check_return_section(ln, declaration_name, return_type)1148#1149# Store the result.1150#1151self.output_declaration(decl_type, declaration_name,1152typedef=('typedef' in return_type),1153functiontype=return_type,1154purpose=self.entry.declaration_purpose,1155func_macro=func_macro)115611571158def dump_typedef(self, ln, proto):1159"""1160Store a ``typedef`` inside self.entries array.1161"""1162#1163# We start by looking for function typedefs.1164#1165typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'1166typedef_ident = r'\*?\s*(\w\S+)\s*'1167typedef_args = r'\s*\((.*)\);'11681169typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)1170typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)11711172# Parse function typedef prototypes1173for r in [typedef1, typedef2]:1174if not r.match(proto):1175continue11761177return_type = r.group(1).strip()1178declaration_name = r.group(2)1179args = r.group(3)11801181if self.entry.identifier != declaration_name:1182self.emit_msg(ln,1183f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")1184return11851186self.create_parameter_list(ln, 'function', args, ',', declaration_name)11871188self.output_declaration('function', declaration_name,1189typedef=True,1190functiontype=return_type,1191purpose=self.entry.declaration_purpose)1192return1193#1194# Not a function, try to parse a simple typedef.1195#1196r = KernRe(r'typedef.*\s+(\w+)\s*;')1197if r.match(proto):1198declaration_name = r.group(1)11991200if self.entry.identifier != declaration_name:1201self.emit_msg(ln,1202f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")1203return12041205self.output_declaration('typedef', declaration_name,1206purpose=self.entry.declaration_purpose)1207return12081209self.emit_msg(ln, "error: Cannot parse typedef!")12101211@staticmethod1212def process_export(function_set, line):1213"""1214process ``EXPORT_SYMBOL*`` tags12151216This method doesn't use any variable from the class, so declare it1217with a staticmethod decorator.1218"""12191220# We support documenting some exported symbols with different1221# names. A horrible hack.1222suffixes = [ '_noprof' ]12231224# Note: it accepts only one EXPORT_SYMBOL* per line, as having1225# multiple export lines would violate Kernel coding style.12261227if export_symbol.search(line):1228symbol = export_symbol.group(2)1229elif export_symbol_ns.search(line):1230symbol = export_symbol_ns.group(2)1231else:1232return False1233#1234# Found an export, trim out any special suffixes1235#1236for suffix in suffixes:1237# Be backward compatible with Python < 3.91238if symbol.endswith(suffix):1239symbol = symbol[:-len(suffix)]1240function_set.add(symbol)1241return True12421243def process_normal(self, ln, line):1244"""1245STATE_NORMAL: looking for the ``/**`` to begin everything.1246"""12471248if not doc_start.match(line):1249return12501251# start a new entry1252self.reset_state(ln)12531254# next line is always the function name1255self.state = state.NAME12561257def process_name(self, ln, line):1258"""1259STATE_NAME: Looking for the "name - description" line1260"""1261#1262# Check for a DOC: block and handle them specially.1263#1264if doc_block.search(line):12651266if not doc_block.group(1):1267self.entry.begin_section(ln, "Introduction")1268else:1269self.entry.begin_section(ln, doc_block.group(1))12701271self.entry.identifier = self.entry.section1272self.state = state.DOCBLOCK1273#1274# Otherwise we're looking for a normal kerneldoc declaration line.1275#1276elif doc_decl.search(line):1277self.entry.identifier = doc_decl.group(1)12781279# Test for data declaration1280if doc_begin_data.search(line):1281self.entry.decl_type = doc_begin_data.group(1)1282self.entry.identifier = doc_begin_data.group(2)1283#1284# Look for a function description1285#1286elif doc_begin_func.search(line):1287self.entry.identifier = doc_begin_func.group(1)1288self.entry.decl_type = "function"1289#1290# We struck out.1291#1292else:1293self.emit_msg(ln,1294f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}")1295self.state = state.NORMAL1296return1297#1298# OK, set up for a new kerneldoc entry.1299#1300self.state = state.BODY1301self.entry.identifier = self.entry.identifier.strip(" ")1302# if there's no @param blocks need to set up default section here1303self.entry.begin_section(ln + 1)1304#1305# Find the description portion, which *should* be there but1306# isn't always.1307# (We should be able to capture this from the previous parsing - someday)1308#1309r = KernRe("[-:](.*)")1310if r.search(line):1311self.entry.declaration_purpose = trim_whitespace(r.group(1))1312self.state = state.DECLARATION1313else:1314self.entry.declaration_purpose = ""13151316if not self.entry.declaration_purpose and self.config.wshort_desc:1317self.emit_msg(ln,1318f"missing initial short description on line:\n{line}")13191320if not self.entry.identifier and self.entry.decl_type != "enum":1321self.emit_msg(ln,1322f"wrong kernel-doc identifier on line:\n{line}")1323self.state = state.NORMAL13241325if self.config.verbose:1326self.emit_msg(ln,1327f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",1328warning=False)1329#1330# Failed to find an identifier. Emit a warning1331#1332else:1333self.emit_msg(ln, f"Cannot find identifier on line:\n{line}")13341335def is_new_section(self, ln, line):1336"""1337Helper function to determine if a new section is being started.1338"""1339if doc_sect.search(line):1340self.state = state.BODY1341#1342# Pick out the name of our new section, tweaking it if need be.1343#1344newsection = doc_sect.group(1)1345if newsection.lower() == 'description':1346newsection = 'Description'1347elif newsection.lower() == 'context':1348newsection = 'Context'1349self.state = state.SPECIAL_SECTION1350elif newsection.lower() in ["@return", "@returns",1351"return", "returns"]:1352newsection = "Return"1353self.state = state.SPECIAL_SECTION1354elif newsection[0] == '@':1355self.state = state.SPECIAL_SECTION1356#1357# Initialize the contents, and get the new section going.1358#1359newcontents = doc_sect.group(2)1360if not newcontents:1361newcontents = ""1362self.dump_section()1363self.entry.begin_section(ln, newsection)1364self.entry.leading_space = None13651366self.entry.add_text(newcontents.lstrip())1367return True1368return False13691370def is_comment_end(self, ln, line):1371"""1372Helper function to detect (and effect) the end of a kerneldoc comment.1373"""1374if doc_end.search(line):1375self.dump_section()13761377# Look for doc_com + <text> + doc_end:1378r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/')1379if r.match(line):1380self.emit_msg(ln, f"suspicious ending line: {line}")13811382self.entry.prototype = ""1383self.entry.new_start_line = ln + 113841385self.state = state.PROTO1386return True1387return False138813891390def process_decl(self, ln, line):1391"""1392STATE_DECLARATION: We've seen the beginning of a declaration.1393"""1394if self.is_new_section(ln, line) or self.is_comment_end(ln, line):1395return1396#1397# Look for anything with the " * " line beginning.1398#1399if doc_content.search(line):1400cont = doc_content.group(1)1401#1402# A blank line means that we have moved out of the declaration1403# part of the comment (without any "special section" parameter1404# descriptions).1405#1406if cont == "":1407self.state = state.BODY1408#1409# Otherwise we have more of the declaration section to soak up.1410#1411else:1412self.entry.declaration_purpose = \1413trim_whitespace(self.entry.declaration_purpose + ' ' + cont)1414else:1415# Unknown line, ignore1416self.emit_msg(ln, f"bad line: {line}")141714181419def process_special(self, ln, line):1420"""1421STATE_SPECIAL_SECTION: a section ending with a blank line.1422"""1423#1424# If we have hit a blank line (only the " * " marker), then this1425# section is done.1426#1427if KernRe(r"\s*\*\s*$").match(line):1428self.entry.begin_section(ln, dump = True)1429self.state = state.BODY1430return1431#1432# Not a blank line, look for the other ways to end the section.1433#1434if self.is_new_section(ln, line) or self.is_comment_end(ln, line):1435return1436#1437# OK, we should have a continuation of the text for this section.1438#1439if doc_content.search(line):1440cont = doc_content.group(1)1441#1442# If the lines of text after the first in a special section have1443# leading white space, we need to trim it out or Sphinx will get1444# confused. For the second line (the None case), see what we1445# find there and remember it.1446#1447if self.entry.leading_space is None:1448r = KernRe(r'^(\s+)')1449if r.match(cont):1450self.entry.leading_space = len(r.group(1))1451else:1452self.entry.leading_space = 01453#1454# Otherwise, before trimming any leading chars, be *sure*1455# that they are white space. We should maybe warn if this1456# isn't the case.1457#1458for i in range(0, self.entry.leading_space):1459if cont[i] != " ":1460self.entry.leading_space = i1461break1462#1463# Add the trimmed result to the section and we're done.1464#1465self.entry.add_text(cont[self.entry.leading_space:])1466else:1467# Unknown line, ignore1468self.emit_msg(ln, f"bad line: {line}")14691470def process_body(self, ln, line):1471"""1472STATE_BODY: the bulk of a kerneldoc comment.1473"""1474if self.is_new_section(ln, line) or self.is_comment_end(ln, line):1475return14761477if doc_content.search(line):1478cont = doc_content.group(1)1479self.entry.add_text(cont)1480else:1481# Unknown line, ignore1482self.emit_msg(ln, f"bad line: {line}")14831484def process_inline_name(self, ln, line):1485"""STATE_INLINE_NAME: beginning of docbook comments within a prototype."""14861487if doc_inline_sect.search(line):1488self.entry.begin_section(ln, doc_inline_sect.group(1))1489self.entry.add_text(doc_inline_sect.group(2).lstrip())1490self.state = state.INLINE_TEXT1491elif doc_inline_end.search(line):1492self.dump_section()1493self.state = state.PROTO1494elif doc_content.search(line):1495self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")1496self.state = state.PROTO1497# else ... ??14981499def process_inline_text(self, ln, line):1500"""STATE_INLINE_TEXT: docbook comments within a prototype."""15011502if doc_inline_end.search(line):1503self.dump_section()1504self.state = state.PROTO1505elif doc_content.search(line):1506self.entry.add_text(doc_content.group(1))1507# else ... ??15081509def syscall_munge(self, ln, proto): # pylint: disable=W06131510"""1511Handle syscall definitions.1512"""15131514is_void = False15151516# Strip newlines/CR's1517proto = re.sub(r'[\r\n]+', ' ', proto)15181519# Check if it's a SYSCALL_DEFINE01520if 'SYSCALL_DEFINE0' in proto:1521is_void = True15221523# Replace SYSCALL_DEFINE with correct return type & function name1524proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto)15251526r = KernRe(r'long\s+(sys_.*?),')1527if r.search(proto):1528proto = KernRe(',').sub('(', proto, count=1)1529elif is_void:1530proto = KernRe(r'\)').sub('(void)', proto, count=1)15311532# Now delete all of the odd-numbered commas in the proto1533# so that argument types & names don't have a comma between them1534count = 01535length = len(proto)15361537if is_void:1538length = 0 # skip the loop if is_void15391540for ix in range(length):1541if proto[ix] == ',':1542count += 11543if count % 2 == 1:1544proto = proto[:ix] + ' ' + proto[ix + 1:]15451546return proto15471548def tracepoint_munge(self, ln, proto):1549"""1550Handle tracepoint definitions.1551"""15521553tracepointname = None1554tracepointargs = None15551556# Match tracepoint name based on different patterns1557r = KernRe(r'TRACE_EVENT\((.*?),')1558if r.search(proto):1559tracepointname = r.group(1)15601561r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),')1562if r.search(proto):1563tracepointname = r.group(1)15641565r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),')1566if r.search(proto):1567tracepointname = r.group(2)15681569if tracepointname:1570tracepointname = tracepointname.lstrip()15711572r = KernRe(r'TP_PROTO\((.*?)\)')1573if r.search(proto):1574tracepointargs = r.group(1)15751576if not tracepointname or not tracepointargs:1577self.emit_msg(ln,1578f"Unrecognized tracepoint format:\n{proto}\n")1579else:1580proto = f"static inline void trace_{tracepointname}({tracepointargs})"1581self.entry.identifier = f"trace_{self.entry.identifier}"15821583return proto15841585def process_proto_function(self, ln, line):1586"""Ancillary routine to process a function prototype."""15871588# strip C99-style comments to end of line1589line = KernRe(r"//.*$", re.S).sub('', line)1590#1591# Soak up the line's worth of prototype text, stopping at { or ; if present.1592#1593if KernRe(r'\s*#\s*define').match(line):1594self.entry.prototype = line1595elif not line.startswith('#'): # skip other preprocessor stuff1596r = KernRe(r'([^\{]*)')1597if r.match(line):1598self.entry.prototype += r.group(1) + " "1599#1600# If we now have the whole prototype, clean it up and declare victory.1601#1602if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line):1603# strip comments and surrounding spaces1604self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip()1605#1606# Handle self.entry.prototypes for function pointers like:1607# int (*pcs_config)(struct foo)1608# by turning it into1609# int pcs_config(struct foo)1610#1611r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')1612self.entry.prototype = r.sub(r'\1\2', self.entry.prototype)1613#1614# Handle special declaration syntaxes1615#1616if 'SYSCALL_DEFINE' in self.entry.prototype:1617self.entry.prototype = self.syscall_munge(ln,1618self.entry.prototype)1619else:1620r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT')1621if r.search(self.entry.prototype):1622self.entry.prototype = self.tracepoint_munge(ln,1623self.entry.prototype)1624#1625# ... and we're done1626#1627self.dump_function(ln, self.entry.prototype)1628self.reset_state(ln)16291630def process_proto_type(self, ln, line):1631"""1632Ancillary routine to process a type.1633"""16341635# Strip C99-style comments and surrounding whitespace1636line = KernRe(r"//.*$", re.S).sub('', line).strip()1637if not line:1638return # nothing to see here16391640# To distinguish preprocessor directive from regular declaration later.1641if line.startswith('#'):1642line += ";"1643#1644# Split the declaration on any of { } or ;, and accumulate pieces1645# until we hit a semicolon while not inside {brackets}1646#1647r = KernRe(r'(.*?)([{};])')1648for chunk in r.split(line):1649if chunk: # Ignore empty matches1650self.entry.prototype += chunk1651#1652# This cries out for a match statement ... someday after we can1653# drop Python 3.9 ...1654#1655if chunk == '{':1656self.entry.brcount += 11657elif chunk == '}':1658self.entry.brcount -= 11659elif chunk == ';' and self.entry.brcount <= 0:1660self.dump_declaration(ln, self.entry.prototype)1661self.reset_state(ln)1662return1663#1664# We hit the end of the line while still in the declaration; put1665# in a space to represent the newline.1666#1667self.entry.prototype += ' '16681669def process_proto(self, ln, line):1670"""STATE_PROTO: reading a function/whatever prototype."""16711672if doc_inline_oneline.search(line):1673self.entry.begin_section(ln, doc_inline_oneline.group(1))1674self.entry.add_text(doc_inline_oneline.group(2))1675self.dump_section()16761677elif doc_inline_start.search(line):1678self.state = state.INLINE_NAME16791680elif self.entry.decl_type == 'function':1681self.process_proto_function(ln, line)16821683else:1684self.process_proto_type(ln, line)16851686def process_docblock(self, ln, line):1687"""STATE_DOCBLOCK: within a ``DOC:`` block."""16881689if doc_end.search(line):1690self.dump_section()1691self.output_declaration("doc", self.entry.identifier)1692self.reset_state(ln)16931694elif doc_content.search(line):1695self.entry.add_text(doc_content.group(1))16961697def parse_export(self):1698"""1699Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file.1700"""17011702export_table = set()17031704try:1705with open(self.fname, "r", encoding="utf8",1706errors="backslashreplace") as fp:17071708for line in fp:1709self.process_export(export_table, line)17101711except IOError:1712return None17131714return export_table17151716#: The state/action table telling us which function to invoke in each state.1717state_actions = {1718state.NORMAL: process_normal,1719state.NAME: process_name,1720state.BODY: process_body,1721state.DECLARATION: process_decl,1722state.SPECIAL_SECTION: process_special,1723state.INLINE_NAME: process_inline_name,1724state.INLINE_TEXT: process_inline_text,1725state.PROTO: process_proto,1726state.DOCBLOCK: process_docblock,1727}17281729def parse_kdoc(self):1730"""1731Open and process each line of a C source file.1732The parsing is controlled via a state machine, and the line is passed1733to a different process function depending on the state. The process1734function may update the state as needed.17351736Besides parsing kernel-doc tags, it also parses export symbols.1737"""17381739prev = ""1740prev_ln = None1741export_table = set()17421743try:1744with open(self.fname, "r", encoding="utf8",1745errors="backslashreplace") as fp:1746for ln, line in enumerate(fp):17471748line = line.expandtabs().strip("\n")17491750# Group continuation lines on prototypes1751if self.state == state.PROTO:1752if line.endswith("\\"):1753prev += line.rstrip("\\")1754if not prev_ln:1755prev_ln = ln1756continue17571758if prev:1759ln = prev_ln1760line = prev + line1761prev = ""1762prev_ln = None17631764self.config.log.debug("%d %s: %s",1765ln, state.name[self.state],1766line)17671768# This is an optimization over the original script.1769# There, when export_file was used for the same file,1770# it was read twice. Here, we use the already-existing1771# loop to parse exported symbols as well.1772#1773if (self.state != state.NORMAL) or \1774not self.process_export(export_table, line):1775# Hand this line to the appropriate state handler1776self.state_actions[self.state](self, ln, line)17771778self.emit_unused_warnings()17791780except OSError:1781self.config.log.error(f"Error: Cannot open file {self.fname}")17821783return export_table, self.entries178417851786