Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/lib/python/kdoc/parse_data_structs.py
122941 views
1
#!/usr/bin/env python3
2
# SPDX-License-Identifier: GPL-2.0
3
# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <[email protected]>.
4
# pylint: disable=R0912,R0915
5
6
"""
7
Parse a source file or header, creating ReStructured Text cross references.
8
9
It accepts an optional file to change the default symbol reference or to
10
suppress symbols from the output.
11
12
It is capable of identifying ``define``, function, ``struct``, ``typedef``,
13
``enum`` and ``enum`` symbols and create cross-references for all of them.
14
It is also capable of distinguish #define used for specifying a Linux
15
ioctl.
16
17
The optional rules file contains a set of rules like::
18
19
ignore ioctl VIDIOC_ENUM_FMT
20
replace ioctl VIDIOC_DQBUF vidioc_qbuf
21
replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
22
"""
23
24
import os
25
import re
26
import sys
27
28
29
class ParseDataStructs:
30
"""
31
Creates an enriched version of a Kernel header file with cross-links
32
to each C data structure type.
33
34
It is meant to allow having a more comprehensive documentation, where
35
uAPI headers will create cross-reference links to the code.
36
37
It is capable of identifying ``define``, function, ``struct``, ``typedef``,
38
``enum`` and ``enum`` symbols and create cross-references for all of them.
39
It is also capable of distinguish #define used for specifying a Linux
40
ioctl.
41
42
By default, it create rules for all symbols and defines, but it also
43
allows parsing an exception file. Such file contains a set of rules
44
using the syntax below:
45
46
1. Ignore rules::
47
48
ignore <type> <symbol>`
49
50
Removes the symbol from reference generation.
51
52
2. Replace rules::
53
54
replace <type> <old_symbol> <new_reference>
55
56
Replaces how old_symbol with a new reference. The new_reference can be:
57
58
- A simple symbol name;
59
- A full Sphinx reference.
60
61
3. Namespace rules::
62
63
namespace <namespace>
64
65
Sets C namespace to be used during cross-reference generation. Can
66
be overridden by replace rules.
67
68
On ignore and replace rules, ``<type>`` can be:
69
- ``ioctl``: for defines that end with ``_IO*``, e.g. ioctl definitions
70
- ``define``: for other defines
71
- ``symbol``: for symbols defined within enums;
72
- ``typedef``: for typedefs;
73
- ``enum``: for the name of a non-anonymous enum;
74
- ``struct``: for structs.
75
76
Examples::
77
78
ignore define __LINUX_MEDIA_H
79
ignore ioctl VIDIOC_ENUM_FMT
80
replace ioctl VIDIOC_DQBUF vidioc_qbuf
81
replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
82
83
namespace MC
84
"""
85
86
#: Parser regex with multiple ways to capture enums.
87
RE_ENUMS = [
88
re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),
89
re.compile(r"^\s*enum\s+([\w_]+)\s*$"),
90
re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),
91
re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),
92
]
93
94
#: Parser regex with multiple ways to capture structs.
95
RE_STRUCTS = [
96
re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),
97
re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),
98
re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),
99
re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),
100
]
101
102
# NOTE: the original code was written a long time before Sphinx C
103
# domain to have multiple namespaces. To avoid to much turn at the
104
# existing hyperlinks, the code kept using "c:type" instead of the
105
# right types. To change that, we need to change the types not only
106
# here, but also at the uAPI media documentation.
107
108
#: Dictionary containing C type identifiers to be transformed.
109
DEF_SYMBOL_TYPES = {
110
"ioctl": {
111
"prefix": "\\ ",
112
"suffix": "\\ ",
113
"ref_type": ":ref",
114
"description": "IOCTL Commands",
115
},
116
"define": {
117
"prefix": "\\ ",
118
"suffix": "\\ ",
119
"ref_type": ":ref",
120
"description": "Macros and Definitions",
121
},
122
# We're calling each definition inside an enum as "symbol"
123
"symbol": {
124
"prefix": "\\ ",
125
"suffix": "\\ ",
126
"ref_type": ":ref",
127
"description": "Enumeration values",
128
},
129
"typedef": {
130
"prefix": "\\ ",
131
"suffix": "\\ ",
132
"ref_type": ":c:type",
133
"description": "Type Definitions",
134
},
135
# This is the description of the enum itself
136
"enum": {
137
"prefix": "\\ ",
138
"suffix": "\\ ",
139
"ref_type": ":c:type",
140
"description": "Enumerations",
141
},
142
"struct": {
143
"prefix": "\\ ",
144
"suffix": "\\ ",
145
"ref_type": ":c:type",
146
"description": "Structures",
147
},
148
}
149
150
def __init__(self, debug: bool = False):
151
"""Initialize internal vars"""
152
self.debug = debug
153
self.data = ""
154
155
self.symbols = {}
156
157
self.namespace = None
158
self.ignore = []
159
self.replace = []
160
161
for symbol_type in self.DEF_SYMBOL_TYPES:
162
self.symbols[symbol_type] = {}
163
164
def read_exceptions(self, fname: str):
165
"""
166
Read an optional exceptions file, used to override defaults.
167
"""
168
169
if not fname:
170
return
171
172
name = os.path.basename(fname)
173
174
with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
175
for ln, line in enumerate(f):
176
ln += 1
177
line = line.strip()
178
if not line or line.startswith("#"):
179
continue
180
181
# ignore rules
182
match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
183
184
if match:
185
self.ignore.append((ln, match.group(1), match.group(2)))
186
continue
187
188
# replace rules
189
match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
190
if match:
191
self.replace.append((ln, match.group(1), match.group(2),
192
match.group(3)))
193
continue
194
195
match = re.match(r"^namespace\s+(\S+)", line)
196
if match:
197
self.namespace = match.group(1)
198
continue
199
200
sys.exit(f"{name}:{ln}: invalid line: {line}")
201
202
def apply_exceptions(self):
203
"""
204
Process exceptions file with rules to ignore or replace references.
205
"""
206
207
# Handle ignore rules
208
for ln, c_type, symbol in self.ignore:
209
if c_type not in self.DEF_SYMBOL_TYPES:
210
sys.exit(f"{name}:{ln}: {c_type} is invalid")
211
212
d = self.symbols[c_type]
213
if symbol in d:
214
del d[symbol]
215
216
# Handle replace rules
217
for ln, c_type, old, new in self.replace:
218
if c_type not in self.DEF_SYMBOL_TYPES:
219
sys.exit(f"{name}:{ln}: {c_type} is invalid")
220
221
reftype = None
222
223
# Parse reference type when the type is specified
224
225
match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new)
226
if match:
227
reftype = f":c:{match.group(1)}"
228
new = match.group(2)
229
else:
230
match = re.search(r"(\:ref)\:\`(.+)\`", new)
231
if match:
232
reftype = match.group(1)
233
new = match.group(2)
234
235
# If the replacement rule doesn't have a type, get default
236
if not reftype:
237
reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
238
if not reftype:
239
reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
240
241
new_ref = f"{reftype}:`{old} <{new}>`"
242
243
# Change self.symbols to use the replacement rule
244
if old in self.symbols[c_type]:
245
(_, ln) = self.symbols[c_type][old]
246
self.symbols[c_type][old] = (new_ref, ln)
247
else:
248
print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
249
250
def store_type(self, ln, symbol_type: str, symbol: str,
251
ref_name: str = None, replace_underscores: bool = True):
252
"""
253
Store a new symbol at self.symbols under symbol_type.
254
255
By default, underscores are replaced by ``-``.
256
"""
257
defs = self.DEF_SYMBOL_TYPES[symbol_type]
258
259
prefix = defs.get("prefix", "")
260
suffix = defs.get("suffix", "")
261
ref_type = defs.get("ref_type")
262
263
# Determine ref_link based on symbol type
264
if ref_type or self.namespace:
265
if not ref_name:
266
ref_name = symbol.lower()
267
268
# c-type references don't support hash
269
if ref_type == ":ref" and replace_underscores:
270
ref_name = ref_name.replace("_", "-")
271
272
# C domain references may have namespaces
273
if ref_type.startswith(":c:"):
274
if self.namespace:
275
ref_name = f"{self.namespace}.{ref_name}"
276
277
if ref_type:
278
ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"
279
else:
280
ref_link = f"`{symbol} <{ref_name}>`"
281
else:
282
ref_link = symbol
283
284
self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln)
285
286
def store_line(self, line):
287
"""
288
Store a line at self.data, properly indented.
289
"""
290
line = " " + line.expandtabs()
291
self.data += line.rstrip(" ")
292
293
def parse_file(self, file_in: str, exceptions: str = None):
294
"""
295
Read a C source file and get identifiers.
296
"""
297
self.data = ""
298
is_enum = False
299
is_comment = False
300
multiline = ""
301
302
self.read_exceptions(exceptions)
303
304
with open(file_in, "r",
305
encoding="utf-8", errors="backslashreplace") as f:
306
for line_no, line in enumerate(f):
307
self.store_line(line)
308
line = line.strip("\n")
309
310
# Handle continuation lines
311
if line.endswith(r"\\"):
312
multiline += line[-1]
313
continue
314
315
if multiline:
316
line = multiline + line
317
multiline = ""
318
319
# Handle comments. They can be multilined
320
if not is_comment:
321
if re.search(r"/\*.*", line):
322
is_comment = True
323
else:
324
# Strip C99-style comments
325
line = re.sub(r"(//.*)", "", line)
326
327
if is_comment:
328
if re.search(r".*\*/", line):
329
is_comment = False
330
else:
331
multiline = line
332
continue
333
334
# At this point, line variable may be a multilined statement,
335
# if lines end with \ or if they have multi-line comments
336
# With that, it can safely remove the entire comments,
337
# and there's no need to use re.DOTALL for the logic below
338
339
line = re.sub(r"(/\*.*\*/)", "", line)
340
if not line.strip():
341
continue
342
343
# It can be useful for debug purposes to print the file after
344
# having comments stripped and multi-lines grouped.
345
if self.debug > 1:
346
print(f"line {line_no + 1}: {line}")
347
348
# Now the fun begins: parse each type and store it.
349
350
# We opted for a two parsing logic here due to:
351
# 1. it makes easier to debug issues not-parsed symbols;
352
# 2. we want symbol replacement at the entire content, not
353
# just when the symbol is detected.
354
355
if is_enum:
356
match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)
357
if match:
358
self.store_type(line_no, "symbol", match.group(1))
359
if "}" in line:
360
is_enum = False
361
continue
362
363
match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)
364
if match:
365
self.store_type(line_no, "ioctl", match.group(1),
366
replace_underscores=False)
367
continue
368
369
match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)
370
if match:
371
self.store_type(line_no, "define", match.group(1))
372
continue
373
374
match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",
375
line)
376
if match:
377
name = match.group(2).strip()
378
symbol = match.group(3)
379
self.store_type(line_no, "typedef", symbol, ref_name=name)
380
continue
381
382
for re_enum in self.RE_ENUMS:
383
match = re_enum.match(line)
384
if match:
385
self.store_type(line_no, "enum", match.group(1))
386
is_enum = True
387
break
388
389
for re_struct in self.RE_STRUCTS:
390
match = re_struct.match(line)
391
if match:
392
self.store_type(line_no, "struct", match.group(1))
393
break
394
395
self.apply_exceptions()
396
397
def debug_print(self):
398
"""
399
Print debug information containing the replacement rules per symbol.
400
To make easier to check, group them per type.
401
"""
402
if not self.debug:
403
return
404
405
for c_type, refs in self.symbols.items():
406
if not refs: # Skip empty dictionaries
407
continue
408
409
print(f"{c_type}:")
410
411
for symbol, (ref, ln) in sorted(refs.items()):
412
print(f" #{ln:<5d} {symbol} -> {ref}")
413
414
print()
415
416
def gen_output(self):
417
"""Write the formatted output to a file."""
418
419
# Avoid extra blank lines
420
text = re.sub(r"\s+$", "", self.data) + "\n"
421
text = re.sub(r"\n\s+\n", "\n\n", text)
422
423
# Escape Sphinx special characters
424
text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)
425
426
# Source uAPI files may have special notes. Use bold font for them
427
text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)
428
429
# Delimiters to catch the entire symbol after escaped
430
start_delim = r"([ \n\t\(=\*\@])"
431
end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"
432
433
# Process all reference types
434
for ref_dict in self.symbols.values():
435
for symbol, (replacement, _) in ref_dict.items():
436
symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))
437
text = re.sub(fr'{start_delim}{symbol}{end_delim}',
438
fr'\1{replacement}\2', text)
439
440
# Remove "\ " where not needed: before spaces and at the end of lines
441
text = re.sub(r"\\ ([\n ])", r"\1", text)
442
text = re.sub(r" \\ ", " ", text)
443
444
return text
445
446
def gen_toc(self):
447
"""
448
Create a list of symbols to be part of a TOC contents table.
449
"""
450
text = []
451
452
# Sort symbol types per description
453
symbol_descriptions = []
454
for k, v in self.DEF_SYMBOL_TYPES.items():
455
symbol_descriptions.append((v['description'], k))
456
457
symbol_descriptions.sort()
458
459
# Process each category
460
for description, c_type in symbol_descriptions:
461
462
refs = self.symbols[c_type]
463
if not refs: # Skip empty categories
464
continue
465
466
text.append(f"{description}")
467
text.append("-" * len(description))
468
text.append("")
469
470
# Sort symbols alphabetically
471
for symbol, (ref, ln) in sorted(refs.items()):
472
text.append(f"- LINENO_{ln}: {ref}")
473
474
text.append("") # Add empty line between categories
475
476
return "\n".join(text)
477
478
def write_output(self, file_in: str, file_out: str, toc: bool):
479
"""
480
Write a ReST output file.
481
"""
482
483
title = os.path.basename(file_in)
484
485
if toc:
486
text = self.gen_toc()
487
else:
488
text = self.gen_output()
489
490
with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:
491
f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")
492
f.write(f"{title}\n")
493
f.write("=" * len(title) + "\n\n")
494
495
if not toc:
496
f.write(".. parsed-literal::\n\n")
497
498
f.write(text)
499
500