Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/docs/lib/parse_data_structs.py
29520 views
1
#!/usr/bin/env python3
2
# SPDX-License-Identifier: GPL-2.0
3
# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <[email protected]>.
4
# pylint: disable=R0912,R0915
5
6
"""
7
Parse a source file or header, creating ReStructured Text cross references.
8
9
It accepts an optional file to change the default symbol reference or to
10
suppress symbols from the output.
11
12
It is capable of identifying defines, functions, structs, typedefs,
13
enums and enum symbols and create cross-references for all of them.
14
It is also capable of distinguish #define used for specifying a Linux
15
ioctl.
16
17
The optional rules file contains a set of rules like:
18
19
ignore ioctl VIDIOC_ENUM_FMT
20
replace ioctl VIDIOC_DQBUF vidioc_qbuf
21
replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
22
"""
23
24
import os
25
import re
26
import sys
27
28
29
class ParseDataStructs:
30
"""
31
Creates an enriched version of a Kernel header file with cross-links
32
to each C data structure type.
33
34
It is meant to allow having a more comprehensive documentation, where
35
uAPI headers will create cross-reference links to the code.
36
37
It is capable of identifying defines, functions, structs, typedefs,
38
enums and enum symbols and create cross-references for all of them.
39
It is also capable of distinguish #define used for specifying a Linux
40
ioctl.
41
42
By default, it create rules for all symbols and defines, but it also
43
allows parsing an exception file. Such file contains a set of rules
44
using the syntax below:
45
46
1. Ignore rules:
47
48
ignore <type> <symbol>`
49
50
Removes the symbol from reference generation.
51
52
2. Replace rules:
53
54
replace <type> <old_symbol> <new_reference>
55
56
Replaces how old_symbol with a new reference. The new_reference can be:
57
- A simple symbol name;
58
- A full Sphinx reference.
59
60
On both cases, <type> can be:
61
- ioctl: for defines that end with _IO*, e.g. ioctl definitions
62
- define: for other defines
63
- symbol: for symbols defined within enums;
64
- typedef: for typedefs;
65
- enum: for the name of a non-anonymous enum;
66
- struct: for structs.
67
68
Examples:
69
70
ignore define __LINUX_MEDIA_H
71
ignore ioctl VIDIOC_ENUM_FMT
72
replace ioctl VIDIOC_DQBUF vidioc_qbuf
73
replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
74
"""
75
76
# Parser regexes with multiple ways to capture enums and structs
77
RE_ENUMS = [
78
re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),
79
re.compile(r"^\s*enum\s+([\w_]+)\s*$"),
80
re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),
81
re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),
82
]
83
RE_STRUCTS = [
84
re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),
85
re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),
86
re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),
87
re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),
88
]
89
90
# FIXME: the original code was written a long time before Sphinx C
91
# domain to have multiple namespaces. To avoid to much turn at the
92
# existing hyperlinks, the code kept using "c:type" instead of the
93
# right types. To change that, we need to change the types not only
94
# here, but also at the uAPI media documentation.
95
DEF_SYMBOL_TYPES = {
96
"ioctl": {
97
"prefix": "\\ ",
98
"suffix": "\\ ",
99
"ref_type": ":ref",
100
"description": "IOCTL Commands",
101
},
102
"define": {
103
"prefix": "\\ ",
104
"suffix": "\\ ",
105
"ref_type": ":ref",
106
"description": "Macros and Definitions",
107
},
108
# We're calling each definition inside an enum as "symbol"
109
"symbol": {
110
"prefix": "\\ ",
111
"suffix": "\\ ",
112
"ref_type": ":ref",
113
"description": "Enumeration values",
114
},
115
"typedef": {
116
"prefix": "\\ ",
117
"suffix": "\\ ",
118
"ref_type": ":c:type",
119
"description": "Type Definitions",
120
},
121
# This is the description of the enum itself
122
"enum": {
123
"prefix": "\\ ",
124
"suffix": "\\ ",
125
"ref_type": ":c:type",
126
"description": "Enumerations",
127
},
128
"struct": {
129
"prefix": "\\ ",
130
"suffix": "\\ ",
131
"ref_type": ":c:type",
132
"description": "Structures",
133
},
134
}
135
136
def __init__(self, debug: bool = False):
137
"""Initialize internal vars"""
138
self.debug = debug
139
self.data = ""
140
141
self.symbols = {}
142
143
for symbol_type in self.DEF_SYMBOL_TYPES:
144
self.symbols[symbol_type] = {}
145
146
def store_type(self, symbol_type: str, symbol: str,
147
ref_name: str = None, replace_underscores: bool = True):
148
"""
149
Stores a new symbol at self.symbols under symbol_type.
150
151
By default, underscores are replaced by "-"
152
"""
153
defs = self.DEF_SYMBOL_TYPES[symbol_type]
154
155
prefix = defs.get("prefix", "")
156
suffix = defs.get("suffix", "")
157
ref_type = defs.get("ref_type")
158
159
# Determine ref_link based on symbol type
160
if ref_type:
161
if symbol_type == "enum":
162
ref_link = f"{ref_type}:`{symbol}`"
163
else:
164
if not ref_name:
165
ref_name = symbol.lower()
166
167
# c-type references don't support hash
168
if ref_type == ":ref" and replace_underscores:
169
ref_name = ref_name.replace("_", "-")
170
171
ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"
172
else:
173
ref_link = symbol
174
175
self.symbols[symbol_type][symbol] = f"{prefix}{ref_link}{suffix}"
176
177
def store_line(self, line):
178
"""Stores a line at self.data, properly indented"""
179
line = " " + line.expandtabs()
180
self.data += line.rstrip(" ")
181
182
def parse_file(self, file_in: str):
183
"""Reads a C source file and get identifiers"""
184
self.data = ""
185
is_enum = False
186
is_comment = False
187
multiline = ""
188
189
with open(file_in, "r",
190
encoding="utf-8", errors="backslashreplace") as f:
191
for line_no, line in enumerate(f):
192
self.store_line(line)
193
line = line.strip("\n")
194
195
# Handle continuation lines
196
if line.endswith(r"\\"):
197
multiline += line[-1]
198
continue
199
200
if multiline:
201
line = multiline + line
202
multiline = ""
203
204
# Handle comments. They can be multilined
205
if not is_comment:
206
if re.search(r"/\*.*", line):
207
is_comment = True
208
else:
209
# Strip C99-style comments
210
line = re.sub(r"(//.*)", "", line)
211
212
if is_comment:
213
if re.search(r".*\*/", line):
214
is_comment = False
215
else:
216
multiline = line
217
continue
218
219
# At this point, line variable may be a multilined statement,
220
# if lines end with \ or if they have multi-line comments
221
# With that, it can safely remove the entire comments,
222
# and there's no need to use re.DOTALL for the logic below
223
224
line = re.sub(r"(/\*.*\*/)", "", line)
225
if not line.strip():
226
continue
227
228
# It can be useful for debug purposes to print the file after
229
# having comments stripped and multi-lines grouped.
230
if self.debug > 1:
231
print(f"line {line_no + 1}: {line}")
232
233
# Now the fun begins: parse each type and store it.
234
235
# We opted for a two parsing logic here due to:
236
# 1. it makes easier to debug issues not-parsed symbols;
237
# 2. we want symbol replacement at the entire content, not
238
# just when the symbol is detected.
239
240
if is_enum:
241
match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)
242
if match:
243
self.store_type("symbol", match.group(1))
244
if "}" in line:
245
is_enum = False
246
continue
247
248
match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)
249
if match:
250
self.store_type("ioctl", match.group(1),
251
replace_underscores=False)
252
continue
253
254
match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)
255
if match:
256
self.store_type("define", match.group(1))
257
continue
258
259
match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",
260
line)
261
if match:
262
name = match.group(2).strip()
263
symbol = match.group(3)
264
self.store_type("typedef", symbol, ref_name=name)
265
continue
266
267
for re_enum in self.RE_ENUMS:
268
match = re_enum.match(line)
269
if match:
270
self.store_type("enum", match.group(1))
271
is_enum = True
272
break
273
274
for re_struct in self.RE_STRUCTS:
275
match = re_struct.match(line)
276
if match:
277
self.store_type("struct", match.group(1))
278
break
279
280
def process_exceptions(self, fname: str):
281
"""
282
Process exceptions file with rules to ignore or replace references.
283
"""
284
if not fname:
285
return
286
287
name = os.path.basename(fname)
288
289
with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
290
for ln, line in enumerate(f):
291
ln += 1
292
line = line.strip()
293
if not line or line.startswith("#"):
294
continue
295
296
# Handle ignore rules
297
match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
298
if match:
299
c_type = match.group(1)
300
symbol = match.group(2)
301
302
if c_type not in self.DEF_SYMBOL_TYPES:
303
sys.exit(f"{name}:{ln}: {c_type} is invalid")
304
305
d = self.symbols[c_type]
306
if symbol in d:
307
del d[symbol]
308
309
continue
310
311
# Handle replace rules
312
match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
313
if not match:
314
sys.exit(f"{name}:{ln}: invalid line: {line}")
315
316
c_type, old, new = match.groups()
317
318
if c_type not in self.DEF_SYMBOL_TYPES:
319
sys.exit(f"{name}:{ln}: {c_type} is invalid")
320
321
reftype = None
322
323
# Parse reference type when the type is specified
324
325
match = re.match(r"^\:c\:(data|func|macro|type)\:\`(.+)\`", new)
326
if match:
327
reftype = f":c:{match.group(1)}"
328
new = match.group(2)
329
else:
330
match = re.search(r"(\:ref)\:\`(.+)\`", new)
331
if match:
332
reftype = match.group(1)
333
new = match.group(2)
334
335
# If the replacement rule doesn't have a type, get default
336
if not reftype:
337
reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
338
if not reftype:
339
reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
340
341
new_ref = f"{reftype}:`{old} <{new}>`"
342
343
# Change self.symbols to use the replacement rule
344
if old in self.symbols[c_type]:
345
self.symbols[c_type][old] = new_ref
346
else:
347
print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
348
349
def debug_print(self):
350
"""
351
Print debug information containing the replacement rules per symbol.
352
To make easier to check, group them per type.
353
"""
354
if not self.debug:
355
return
356
357
for c_type, refs in self.symbols.items():
358
if not refs: # Skip empty dictionaries
359
continue
360
361
print(f"{c_type}:")
362
363
for symbol, ref in sorted(refs.items()):
364
print(f" {symbol} -> {ref}")
365
366
print()
367
368
def gen_output(self):
369
"""Write the formatted output to a file."""
370
371
# Avoid extra blank lines
372
text = re.sub(r"\s+$", "", self.data) + "\n"
373
text = re.sub(r"\n\s+\n", "\n\n", text)
374
375
# Escape Sphinx special characters
376
text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)
377
378
# Source uAPI files may have special notes. Use bold font for them
379
text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)
380
381
# Delimiters to catch the entire symbol after escaped
382
start_delim = r"([ \n\t\(=\*\@])"
383
end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"
384
385
# Process all reference types
386
for ref_dict in self.symbols.values():
387
for symbol, replacement in ref_dict.items():
388
symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))
389
text = re.sub(fr'{start_delim}{symbol}{end_delim}',
390
fr'\1{replacement}\2', text)
391
392
# Remove "\ " where not needed: before spaces and at the end of lines
393
text = re.sub(r"\\ ([\n ])", r"\1", text)
394
text = re.sub(r" \\ ", " ", text)
395
396
return text
397
398
def gen_toc(self):
399
"""
400
Create a TOC table pointing to each symbol from the header
401
"""
402
text = []
403
404
# Add header
405
text.append(".. contents:: Table of Contents")
406
text.append(" :depth: 2")
407
text.append(" :local:")
408
text.append("")
409
410
# Sort symbol types per description
411
symbol_descriptions = []
412
for k, v in self.DEF_SYMBOL_TYPES.items():
413
symbol_descriptions.append((v['description'], k))
414
415
symbol_descriptions.sort()
416
417
# Process each category
418
for description, c_type in symbol_descriptions:
419
420
refs = self.symbols[c_type]
421
if not refs: # Skip empty categories
422
continue
423
424
text.append(f"{description}")
425
text.append("-" * len(description))
426
text.append("")
427
428
# Sort symbols alphabetically
429
for symbol, ref in sorted(refs.items()):
430
text.append(f"* :{ref}:")
431
432
text.append("") # Add empty line between categories
433
434
return "\n".join(text)
435
436
def write_output(self, file_in: str, file_out: str, toc: bool):
437
title = os.path.basename(file_in)
438
439
if toc:
440
text = self.gen_toc()
441
else:
442
text = self.gen_output()
443
444
with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:
445
f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")
446
f.write(f"{title}\n")
447
f.write("=" * len(title) + "\n\n")
448
449
if not toc:
450
f.write(".. parsed-literal::\n\n")
451
452
f.write(text)
453
454