Path: blob/master/tools/lib/python/kdoc/parse_data_structs.py
122941 views
#!/usr/bin/env python31# SPDX-License-Identifier: GPL-2.02# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <[email protected]>.3# pylint: disable=R0912,R091545"""6Parse a source file or header, creating ReStructured Text cross references.78It accepts an optional file to change the default symbol reference or to9suppress symbols from the output.1011It is capable of identifying ``define``, function, ``struct``, ``typedef``,12``enum`` and ``enum`` symbols and create cross-references for all of them.13It is also capable of distinguish #define used for specifying a Linux14ioctl.1516The optional rules file contains a set of rules like::1718ignore ioctl VIDIOC_ENUM_FMT19replace ioctl VIDIOC_DQBUF vidioc_qbuf20replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`21"""2223import os24import re25import sys262728class ParseDataStructs:29"""30Creates an enriched version of a Kernel header file with cross-links31to each C data structure type.3233It is meant to allow having a more comprehensive documentation, where34uAPI headers will create cross-reference links to the code.3536It is capable of identifying ``define``, function, ``struct``, ``typedef``,37``enum`` and ``enum`` symbols and create cross-references for all of them.38It is also capable of distinguish #define used for specifying a Linux39ioctl.4041By default, it create rules for all symbols and defines, but it also42allows parsing an exception file. Such file contains a set of rules43using the syntax below:44451. Ignore rules::4647ignore <type> <symbol>`4849Removes the symbol from reference generation.50512. Replace rules::5253replace <type> <old_symbol> <new_reference>5455Replaces how old_symbol with a new reference. The new_reference can be:5657- A simple symbol name;58- A full Sphinx reference.59603. Namespace rules::6162namespace <namespace>6364Sets C namespace to be used during cross-reference generation. Can65be overridden by replace rules.6667On ignore and replace rules, ``<type>`` can be:68- ``ioctl``: for defines that end with ``_IO*``, e.g. ioctl definitions69- ``define``: for other defines70- ``symbol``: for symbols defined within enums;71- ``typedef``: for typedefs;72- ``enum``: for the name of a non-anonymous enum;73- ``struct``: for structs.7475Examples::7677ignore define __LINUX_MEDIA_H78ignore ioctl VIDIOC_ENUM_FMT79replace ioctl VIDIOC_DQBUF vidioc_qbuf80replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`8182namespace MC83"""8485#: Parser regex with multiple ways to capture enums.86RE_ENUMS = [87re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),88re.compile(r"^\s*enum\s+([\w_]+)\s*$"),89re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),90re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),91]9293#: Parser regex with multiple ways to capture structs.94RE_STRUCTS = [95re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),96re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),97re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),98re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),99]100101# NOTE: the original code was written a long time before Sphinx C102# domain to have multiple namespaces. To avoid to much turn at the103# existing hyperlinks, the code kept using "c:type" instead of the104# right types. To change that, we need to change the types not only105# here, but also at the uAPI media documentation.106107#: Dictionary containing C type identifiers to be transformed.108DEF_SYMBOL_TYPES = {109"ioctl": {110"prefix": "\\ ",111"suffix": "\\ ",112"ref_type": ":ref",113"description": "IOCTL Commands",114},115"define": {116"prefix": "\\ ",117"suffix": "\\ ",118"ref_type": ":ref",119"description": "Macros and Definitions",120},121# We're calling each definition inside an enum as "symbol"122"symbol": {123"prefix": "\\ ",124"suffix": "\\ ",125"ref_type": ":ref",126"description": "Enumeration values",127},128"typedef": {129"prefix": "\\ ",130"suffix": "\\ ",131"ref_type": ":c:type",132"description": "Type Definitions",133},134# This is the description of the enum itself135"enum": {136"prefix": "\\ ",137"suffix": "\\ ",138"ref_type": ":c:type",139"description": "Enumerations",140},141"struct": {142"prefix": "\\ ",143"suffix": "\\ ",144"ref_type": ":c:type",145"description": "Structures",146},147}148149def __init__(self, debug: bool = False):150"""Initialize internal vars"""151self.debug = debug152self.data = ""153154self.symbols = {}155156self.namespace = None157self.ignore = []158self.replace = []159160for symbol_type in self.DEF_SYMBOL_TYPES:161self.symbols[symbol_type] = {}162163def read_exceptions(self, fname: str):164"""165Read an optional exceptions file, used to override defaults.166"""167168if not fname:169return170171name = os.path.basename(fname)172173with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:174for ln, line in enumerate(f):175ln += 1176line = line.strip()177if not line or line.startswith("#"):178continue179180# ignore rules181match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)182183if match:184self.ignore.append((ln, match.group(1), match.group(2)))185continue186187# replace rules188match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)189if match:190self.replace.append((ln, match.group(1), match.group(2),191match.group(3)))192continue193194match = re.match(r"^namespace\s+(\S+)", line)195if match:196self.namespace = match.group(1)197continue198199sys.exit(f"{name}:{ln}: invalid line: {line}")200201def apply_exceptions(self):202"""203Process exceptions file with rules to ignore or replace references.204"""205206# Handle ignore rules207for ln, c_type, symbol in self.ignore:208if c_type not in self.DEF_SYMBOL_TYPES:209sys.exit(f"{name}:{ln}: {c_type} is invalid")210211d = self.symbols[c_type]212if symbol in d:213del d[symbol]214215# Handle replace rules216for ln, c_type, old, new in self.replace:217if c_type not in self.DEF_SYMBOL_TYPES:218sys.exit(f"{name}:{ln}: {c_type} is invalid")219220reftype = None221222# Parse reference type when the type is specified223224match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new)225if match:226reftype = f":c:{match.group(1)}"227new = match.group(2)228else:229match = re.search(r"(\:ref)\:\`(.+)\`", new)230if match:231reftype = match.group(1)232new = match.group(2)233234# If the replacement rule doesn't have a type, get default235if not reftype:236reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")237if not reftype:238reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")239240new_ref = f"{reftype}:`{old} <{new}>`"241242# Change self.symbols to use the replacement rule243if old in self.symbols[c_type]:244(_, ln) = self.symbols[c_type][old]245self.symbols[c_type][old] = (new_ref, ln)246else:247print(f"{name}:{ln}: Warning: can't find {old} {c_type}")248249def store_type(self, ln, symbol_type: str, symbol: str,250ref_name: str = None, replace_underscores: bool = True):251"""252Store a new symbol at self.symbols under symbol_type.253254By default, underscores are replaced by ``-``.255"""256defs = self.DEF_SYMBOL_TYPES[symbol_type]257258prefix = defs.get("prefix", "")259suffix = defs.get("suffix", "")260ref_type = defs.get("ref_type")261262# Determine ref_link based on symbol type263if ref_type or self.namespace:264if not ref_name:265ref_name = symbol.lower()266267# c-type references don't support hash268if ref_type == ":ref" and replace_underscores:269ref_name = ref_name.replace("_", "-")270271# C domain references may have namespaces272if ref_type.startswith(":c:"):273if self.namespace:274ref_name = f"{self.namespace}.{ref_name}"275276if ref_type:277ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"278else:279ref_link = f"`{symbol} <{ref_name}>`"280else:281ref_link = symbol282283self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln)284285def store_line(self, line):286"""287Store a line at self.data, properly indented.288"""289line = " " + line.expandtabs()290self.data += line.rstrip(" ")291292def parse_file(self, file_in: str, exceptions: str = None):293"""294Read a C source file and get identifiers.295"""296self.data = ""297is_enum = False298is_comment = False299multiline = ""300301self.read_exceptions(exceptions)302303with open(file_in, "r",304encoding="utf-8", errors="backslashreplace") as f:305for line_no, line in enumerate(f):306self.store_line(line)307line = line.strip("\n")308309# Handle continuation lines310if line.endswith(r"\\"):311multiline += line[-1]312continue313314if multiline:315line = multiline + line316multiline = ""317318# Handle comments. They can be multilined319if not is_comment:320if re.search(r"/\*.*", line):321is_comment = True322else:323# Strip C99-style comments324line = re.sub(r"(//.*)", "", line)325326if is_comment:327if re.search(r".*\*/", line):328is_comment = False329else:330multiline = line331continue332333# At this point, line variable may be a multilined statement,334# if lines end with \ or if they have multi-line comments335# With that, it can safely remove the entire comments,336# and there's no need to use re.DOTALL for the logic below337338line = re.sub(r"(/\*.*\*/)", "", line)339if not line.strip():340continue341342# It can be useful for debug purposes to print the file after343# having comments stripped and multi-lines grouped.344if self.debug > 1:345print(f"line {line_no + 1}: {line}")346347# Now the fun begins: parse each type and store it.348349# We opted for a two parsing logic here due to:350# 1. it makes easier to debug issues not-parsed symbols;351# 2. we want symbol replacement at the entire content, not352# just when the symbol is detected.353354if is_enum:355match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)356if match:357self.store_type(line_no, "symbol", match.group(1))358if "}" in line:359is_enum = False360continue361362match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)363if match:364self.store_type(line_no, "ioctl", match.group(1),365replace_underscores=False)366continue367368match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)369if match:370self.store_type(line_no, "define", match.group(1))371continue372373match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",374line)375if match:376name = match.group(2).strip()377symbol = match.group(3)378self.store_type(line_no, "typedef", symbol, ref_name=name)379continue380381for re_enum in self.RE_ENUMS:382match = re_enum.match(line)383if match:384self.store_type(line_no, "enum", match.group(1))385is_enum = True386break387388for re_struct in self.RE_STRUCTS:389match = re_struct.match(line)390if match:391self.store_type(line_no, "struct", match.group(1))392break393394self.apply_exceptions()395396def debug_print(self):397"""398Print debug information containing the replacement rules per symbol.399To make easier to check, group them per type.400"""401if not self.debug:402return403404for c_type, refs in self.symbols.items():405if not refs: # Skip empty dictionaries406continue407408print(f"{c_type}:")409410for symbol, (ref, ln) in sorted(refs.items()):411print(f" #{ln:<5d} {symbol} -> {ref}")412413print()414415def gen_output(self):416"""Write the formatted output to a file."""417418# Avoid extra blank lines419text = re.sub(r"\s+$", "", self.data) + "\n"420text = re.sub(r"\n\s+\n", "\n\n", text)421422# Escape Sphinx special characters423text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)424425# Source uAPI files may have special notes. Use bold font for them426text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)427428# Delimiters to catch the entire symbol after escaped429start_delim = r"([ \n\t\(=\*\@])"430end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"431432# Process all reference types433for ref_dict in self.symbols.values():434for symbol, (replacement, _) in ref_dict.items():435symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))436text = re.sub(fr'{start_delim}{symbol}{end_delim}',437fr'\1{replacement}\2', text)438439# Remove "\ " where not needed: before spaces and at the end of lines440text = re.sub(r"\\ ([\n ])", r"\1", text)441text = re.sub(r" \\ ", " ", text)442443return text444445def gen_toc(self):446"""447Create a list of symbols to be part of a TOC contents table.448"""449text = []450451# Sort symbol types per description452symbol_descriptions = []453for k, v in self.DEF_SYMBOL_TYPES.items():454symbol_descriptions.append((v['description'], k))455456symbol_descriptions.sort()457458# Process each category459for description, c_type in symbol_descriptions:460461refs = self.symbols[c_type]462if not refs: # Skip empty categories463continue464465text.append(f"{description}")466text.append("-" * len(description))467text.append("")468469# Sort symbols alphabetically470for symbol, (ref, ln) in sorted(refs.items()):471text.append(f"- LINENO_{ln}: {ref}")472473text.append("") # Add empty line between categories474475return "\n".join(text)476477def write_output(self, file_in: str, file_out: str, toc: bool):478"""479Write a ReST output file.480"""481482title = os.path.basename(file_in)483484if toc:485text = self.gen_toc()486else:487text = self.gen_output()488489with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:490f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")491f.write(f"{title}\n")492f.write("=" * len(title) + "\n\n")493494if not toc:495f.write(".. parsed-literal::\n\n")496497f.write(text)498499500