Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/lib/python/abi/abi_regex.py
122941 views
1
#!/usr/bin/env python3
2
# xxpylint: disable=R0903
3
# Copyright(c) 2025: Mauro Carvalho Chehab <[email protected]>.
4
# SPDX-License-Identifier: GPL-2.0
5
6
"""
7
Convert ABI what into regular expressions
8
"""
9
10
import re
11
import sys
12
13
from pprint import pformat
14
15
from abi.abi_parser import AbiParser
16
from abi.helpers import AbiDebug
17
18
class AbiRegex(AbiParser):
19
"""
20
Extends AbiParser to search ABI nodes with regular expressions.
21
22
There some optimizations here to allow a quick symbol search:
23
instead of trying to place all symbols altogether an doing linear
24
search which is very time consuming, create a tree with one depth,
25
grouping similar symbols altogether.
26
27
Yet, sometimes a full search will be needed, so we have a special branch
28
on such group tree where other symbols are placed.
29
"""
30
31
#: Escape only ASCII visible characters.
32
escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])"
33
34
#: Special group for other nodes.
35
leave_others = "others"
36
37
# Tuples with regular expressions to be compiled and replacement data
38
re_whats = [
39
# Drop escape characters that might exist
40
(re.compile("\\\\"), ""),
41
42
# Temporarily escape dot characters
43
(re.compile(r"\."), "\xf6"),
44
45
# Temporarily change [0-9]+ type of patterns
46
(re.compile(r"\[0\-9\]\+"), "\xff"),
47
48
# Temporarily change [\d+-\d+] type of patterns
49
(re.compile(r"\[0\-\d+\]"), "\xff"),
50
(re.compile(r"\[0:\d+\]"), "\xff"),
51
(re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"),
52
53
# Temporarily change [0-9] type of patterns
54
(re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"),
55
56
# Handle multiple option patterns
57
(re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"),
58
59
# Handle wildcards
60
(re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"),
61
(re.compile(r"/\*/"), "/.*/"),
62
(re.compile(r"/\xf6\xf6\xf6"), "/.*"),
63
(re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"),
64
(re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"),
65
(re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"),
66
67
(re.compile(r"XX+"), "\\\\w\xf7"),
68
(re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"),
69
(re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"),
70
(re.compile(r"_[AB]_"), "_\\\\w\xf7_"),
71
72
# Recover [0-9] type of patterns
73
(re.compile(r"\xf4"), "["),
74
(re.compile(r"\xf5"), "]"),
75
76
# Remove duplicated spaces
77
(re.compile(r"\s+"), r" "),
78
79
# Special case: drop comparison as in:
80
# What: foo = <something>
81
# (this happens on a few IIO definitions)
82
(re.compile(r"\s*\=.*$"), ""),
83
84
# Escape all other symbols
85
(re.compile(escape_symbols), r"\\\1"),
86
(re.compile(r"\\\\"), r"\\"),
87
(re.compile(r"\\([\[\]\(\)\|])"), r"\1"),
88
(re.compile(r"(\d+)\\(-\d+)"), r"\1\2"),
89
90
(re.compile(r"\xff"), r"\\d+"),
91
92
# Special case: IIO ABI which a parenthesis.
93
(re.compile(r"sqrt(.*)"), r"sqrt(.*)"),
94
95
# Simplify regexes with multiple .*
96
(re.compile(r"(?:\.\*){2,}"), ""),
97
98
# Recover dot characters
99
(re.compile(r"\xf6"), "\\."),
100
# Recover plus characters
101
(re.compile(r"\xf7"), "+"),
102
]
103
104
#: Regex to check if the symbol name has a number on it.
105
re_has_num = re.compile(r"\\d")
106
107
#: Symbol name after escape_chars that are considered a devnode basename.
108
re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$")
109
110
#: List of popular group names to be skipped to minimize regex group size
111
#: Use AbiDebug.SUBGROUP_SIZE to detect those.
112
skip_names = set(["devices", "hwmon"])
113
114
def regex_append(self, what, new):
115
"""
116
Get a search group for a subset of regular expressions.
117
118
As ABI may have thousands of symbols, using a for to search all
119
regular expressions is at least O(n^2). When there are wildcards,
120
the complexity increases substantially, eventually becoming exponential.
121
122
To avoid spending too much time on them, use a logic to split
123
them into groups. The smaller the group, the better, as it would
124
mean that searches will be confined to a small number of regular
125
expressions.
126
127
The conversion to a regex subset is tricky, as we need something
128
that can be easily obtained from the sysfs symbol and from the
129
regular expression. So, we need to discard nodes that have
130
wildcards.
131
132
If it can't obtain a subgroup, place the regular expression inside
133
a special group (self.leave_others).
134
"""
135
136
search_group = None
137
138
for search_group in reversed(new.split("/")):
139
if not search_group or search_group in self.skip_names:
140
continue
141
if self.re_symbol_name.match(search_group):
142
break
143
144
if not search_group:
145
search_group = self.leave_others
146
147
if self.debug & AbiDebug.SUBGROUP_MAP:
148
self.log.debug("%s: mapped as %s", what, search_group)
149
150
try:
151
if search_group not in self.regex_group:
152
self.regex_group[search_group] = []
153
154
self.regex_group[search_group].append(re.compile(new))
155
if self.search_string:
156
if what.find(self.search_string) >= 0:
157
print(f"What: {what}")
158
except re.PatternError:
159
self.log.warning("Ignoring '%s' as it produced an invalid regex:\n"
160
" '%s'", what, new)
161
162
def get_regexes(self, what):
163
"""
164
Given an ABI devnode, return a list of all regular expressions that
165
may match it, based on the sub-groups created by regex_append().
166
"""
167
168
re_list = []
169
170
patches = what.split("/")
171
patches.reverse()
172
patches.append(self.leave_others)
173
174
for search_group in patches:
175
if search_group in self.regex_group:
176
re_list += self.regex_group[search_group]
177
178
return re_list
179
180
def __init__(self, *args, **kwargs):
181
"""
182
Override init method to get verbose argument
183
"""
184
185
self.regex_group = None
186
self.search_string = None
187
self.re_string = None
188
189
if "search_string" in kwargs:
190
self.search_string = kwargs.get("search_string")
191
del kwargs["search_string"]
192
193
if self.search_string:
194
195
try:
196
self.re_string = re.compile(self.search_string)
197
except re.PatternError as e:
198
msg = f"{self.search_string} is not a valid regular expression"
199
raise ValueError(msg) from e
200
201
super().__init__(*args, **kwargs)
202
203
def parse_abi(self, *args, **kwargs):
204
205
super().parse_abi(*args, **kwargs)
206
207
self.regex_group = {}
208
209
print("Converting ABI What fields into regexes...", file=sys.stderr)
210
211
for t in sorted(self.data.items(), key=lambda x: x[0]):
212
v = t[1]
213
if v.get("type") == "File":
214
continue
215
216
v["regex"] = []
217
218
for what in v.get("what", []):
219
if not what.startswith("/sys"):
220
continue
221
222
new = what
223
for r, s in self.re_whats:
224
try:
225
new = r.sub(s, new)
226
except re.PatternError as e:
227
# Help debugging troubles with new regexes
228
raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e
229
230
v["regex"].append(new)
231
232
if self.debug & AbiDebug.REGEX:
233
self.log.debug("%-90s <== %s", new, what)
234
235
# Store regex into a subgroup to speedup searches
236
self.regex_append(what, new)
237
238
if self.debug & AbiDebug.SUBGROUP_DICT:
239
self.log.debug("%s", pformat(self.regex_group))
240
241
if self.debug & AbiDebug.SUBGROUP_SIZE:
242
biggestd_keys = sorted(self.regex_group.keys(),
243
key= lambda k: len(self.regex_group[k]),
244
reverse=True)
245
246
print("Top regex subgroups:", file=sys.stderr)
247
for k in biggestd_keys[:10]:
248
print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr)
249
250