Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/android/binder/range_alloc/tree.rs
29536 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
// Copyright (C) 2025 Google LLC.
4
5
use kernel::{
6
page::PAGE_SIZE,
7
prelude::*,
8
rbtree::{RBTree, RBTreeNode, RBTreeNodeReservation},
9
seq_file::SeqFile,
10
seq_print,
11
task::Pid,
12
};
13
14
use crate::range_alloc::{DescriptorState, FreedRange, Range};
15
16
/// Keeps track of allocations in a process' mmap.
17
///
18
/// Each process has an mmap where the data for incoming transactions will be placed. This struct
19
/// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that
20
/// has metadata related to the allocation. We also keep track of available free space.
21
pub(super) struct TreeRangeAllocator<T> {
22
/// This collection contains descriptors for *both* ranges containing an allocation, *and* free
23
/// ranges between allocations. The free ranges get merged, so there are never two free ranges
24
/// next to each other.
25
tree: RBTree<usize, Descriptor<T>>,
26
/// Contains an entry for every free range in `self.tree`. This tree sorts the ranges by size,
27
/// letting us look up the smallest range whose size is at least some lower bound.
28
free_tree: RBTree<FreeKey, ()>,
29
size: usize,
30
free_oneway_space: usize,
31
}
32
33
impl<T> TreeRangeAllocator<T> {
34
pub(crate) fn from_array(
35
size: usize,
36
ranges: &mut KVec<Range<T>>,
37
alloc: &mut FromArrayAllocs<T>,
38
) -> Self {
39
let mut tree = TreeRangeAllocator {
40
tree: RBTree::new(),
41
free_tree: RBTree::new(),
42
size,
43
free_oneway_space: size / 2,
44
};
45
46
let mut free_offset = 0;
47
for range in ranges.drain_all() {
48
let free_size = range.offset - free_offset;
49
if free_size > 0 {
50
let free_node = alloc.free_tree.pop().unwrap();
51
tree.free_tree
52
.insert(free_node.into_node((free_size, free_offset), ()));
53
let tree_node = alloc.tree.pop().unwrap();
54
tree.tree.insert(
55
tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)),
56
);
57
}
58
free_offset = range.endpoint();
59
60
if range.state.is_oneway() {
61
tree.free_oneway_space = tree.free_oneway_space.saturating_sub(range.size);
62
}
63
64
let free_res = alloc.free_tree.pop().unwrap();
65
let tree_node = alloc.tree.pop().unwrap();
66
let mut desc = Descriptor::new(range.offset, range.size);
67
desc.state = Some((range.state, free_res));
68
tree.tree.insert(tree_node.into_node(range.offset, desc));
69
}
70
71
// After the last range, we may need a free range.
72
if free_offset < size {
73
let free_size = size - free_offset;
74
let free_node = alloc.free_tree.pop().unwrap();
75
tree.free_tree
76
.insert(free_node.into_node((free_size, free_offset), ()));
77
let tree_node = alloc.tree.pop().unwrap();
78
tree.tree
79
.insert(tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)));
80
}
81
82
tree
83
}
84
85
pub(crate) fn is_empty(&self) -> bool {
86
let mut tree_iter = self.tree.values();
87
// There's always at least one range, because index zero is either the start of a free or
88
// allocated range.
89
let first_value = tree_iter.next().unwrap();
90
if tree_iter.next().is_some() {
91
// There are never two free ranges next to each other, so if there is more than one
92
// descriptor, then at least one of them must hold an allocated range.
93
return false;
94
}
95
// There is only one descriptor. Return true if it is for a free range.
96
first_value.state.is_none()
97
}
98
99
pub(crate) fn total_size(&self) -> usize {
100
self.size
101
}
102
103
pub(crate) fn free_oneway_space(&self) -> usize {
104
self.free_oneway_space
105
}
106
107
pub(crate) fn count_buffers(&self) -> usize {
108
self.tree
109
.values()
110
.filter(|desc| desc.state.is_some())
111
.count()
112
}
113
114
pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> {
115
for desc in self.tree.values() {
116
let state = match &desc.state {
117
Some(state) => &state.0,
118
None => continue,
119
};
120
seq_print!(
121
m,
122
" buffer: {} size {} pid {}",
123
desc.offset,
124
desc.size,
125
state.pid(),
126
);
127
if state.is_oneway() {
128
seq_print!(m, " oneway");
129
}
130
match state {
131
DescriptorState::Reserved(_res) => {
132
seq_print!(m, " reserved\n");
133
}
134
DescriptorState::Allocated(_alloc) => {
135
seq_print!(m, " allocated\n");
136
}
137
}
138
}
139
Ok(())
140
}
141
142
fn find_best_match(&mut self, size: usize) -> Option<&mut Descriptor<T>> {
143
let free_cursor = self.free_tree.cursor_lower_bound(&(size, 0))?;
144
let ((_, offset), ()) = free_cursor.current();
145
self.tree.get_mut(offset)
146
}
147
148
/// Try to reserve a new buffer, using the provided allocation if necessary.
149
pub(crate) fn reserve_new(
150
&mut self,
151
debug_id: usize,
152
size: usize,
153
is_oneway: bool,
154
pid: Pid,
155
alloc: ReserveNewTreeAlloc<T>,
156
) -> Result<(usize, bool)> {
157
// Compute new value of free_oneway_space, which is set only on success.
158
let new_oneway_space = if is_oneway {
159
match self.free_oneway_space.checked_sub(size) {
160
Some(new_oneway_space) => new_oneway_space,
161
None => return Err(ENOSPC),
162
}
163
} else {
164
self.free_oneway_space
165
};
166
167
// Start detecting spammers once we have less than 20%
168
// of async space left (which is less than 10% of total
169
// buffer size).
170
//
171
// (This will short-circut, so `low_oneway_space` is
172
// only called when necessary.)
173
let oneway_spam_detected =
174
is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid);
175
176
let (found_size, found_off, tree_node, free_tree_node) = match self.find_best_match(size) {
177
None => {
178
pr_warn!("ENOSPC from range_alloc.reserve_new - size: {}", size);
179
return Err(ENOSPC);
180
}
181
Some(desc) => {
182
let found_size = desc.size;
183
let found_offset = desc.offset;
184
185
// In case we need to break up the descriptor
186
let new_desc = Descriptor::new(found_offset + size, found_size - size);
187
let (tree_node, free_tree_node, desc_node_res) = alloc.initialize(new_desc);
188
189
desc.state = Some((
190
DescriptorState::new(is_oneway, debug_id, pid),
191
desc_node_res,
192
));
193
desc.size = size;
194
195
(found_size, found_offset, tree_node, free_tree_node)
196
}
197
};
198
self.free_oneway_space = new_oneway_space;
199
self.free_tree.remove(&(found_size, found_off));
200
201
if found_size != size {
202
self.tree.insert(tree_node);
203
self.free_tree.insert(free_tree_node);
204
}
205
206
Ok((found_off, oneway_spam_detected))
207
}
208
209
pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> {
210
let mut cursor = self.tree.cursor_lower_bound(&offset).ok_or_else(|| {
211
pr_warn!(
212
"EINVAL from range_alloc.reservation_abort - offset: {}",
213
offset
214
);
215
EINVAL
216
})?;
217
218
let (_, desc) = cursor.current_mut();
219
220
if desc.offset != offset {
221
pr_warn!(
222
"EINVAL from range_alloc.reservation_abort - offset: {}",
223
offset
224
);
225
return Err(EINVAL);
226
}
227
228
let (reservation, free_node_res) = desc.try_change_state(|state| match state {
229
Some((DescriptorState::Reserved(reservation), free_node_res)) => {
230
(None, Ok((reservation, free_node_res)))
231
}
232
None => {
233
pr_warn!(
234
"EINVAL from range_alloc.reservation_abort - offset: {}",
235
offset
236
);
237
(None, Err(EINVAL))
238
}
239
allocated => {
240
pr_warn!(
241
"EPERM from range_alloc.reservation_abort - offset: {}",
242
offset
243
);
244
(allocated, Err(EPERM))
245
}
246
})?;
247
248
let mut size = desc.size;
249
let mut offset = desc.offset;
250
let free_oneway_space_add = if reservation.is_oneway { size } else { 0 };
251
252
self.free_oneway_space += free_oneway_space_add;
253
254
let mut freed_range = FreedRange::interior_pages(offset, size);
255
// Compute how large the next free region needs to be to include one more page in
256
// the newly freed range.
257
let add_next_page_needed = match (offset + size) % PAGE_SIZE {
258
0 => usize::MAX,
259
unalign => PAGE_SIZE - unalign,
260
};
261
// Compute how large the previous free region needs to be to include one more page
262
// in the newly freed range.
263
let add_prev_page_needed = match offset % PAGE_SIZE {
264
0 => usize::MAX,
265
unalign => unalign,
266
};
267
268
// Merge next into current if next is free
269
let remove_next = match cursor.peek_next() {
270
Some((_, next)) if next.state.is_none() => {
271
if next.size >= add_next_page_needed {
272
freed_range.end_page_idx += 1;
273
}
274
self.free_tree.remove(&(next.size, next.offset));
275
size += next.size;
276
true
277
}
278
_ => false,
279
};
280
281
if remove_next {
282
let (_, desc) = cursor.current_mut();
283
desc.size = size;
284
cursor.remove_next();
285
}
286
287
// Merge current into prev if prev is free
288
match cursor.peek_prev_mut() {
289
Some((_, prev)) if prev.state.is_none() => {
290
if prev.size >= add_prev_page_needed {
291
freed_range.start_page_idx -= 1;
292
}
293
// merge previous with current, remove current
294
self.free_tree.remove(&(prev.size, prev.offset));
295
offset = prev.offset;
296
size += prev.size;
297
prev.size = size;
298
cursor.remove_current();
299
}
300
_ => {}
301
};
302
303
self.free_tree
304
.insert(free_node_res.into_node((size, offset), ()));
305
306
Ok(freed_range)
307
}
308
309
pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result {
310
let desc = self.tree.get_mut(&offset).ok_or(ENOENT)?;
311
312
desc.try_change_state(|state| match state {
313
Some((DescriptorState::Reserved(reservation), free_node_res)) => (
314
Some((
315
DescriptorState::Allocated(reservation.allocate(data.take())),
316
free_node_res,
317
)),
318
Ok(()),
319
),
320
other => (other, Err(ENOENT)),
321
})
322
}
323
324
/// Takes an entry at the given offset from [`DescriptorState::Allocated`] to
325
/// [`DescriptorState::Reserved`].
326
///
327
/// Returns the size of the existing entry and the data associated with it.
328
pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> {
329
let desc = self.tree.get_mut(&offset).ok_or_else(|| {
330
pr_warn!(
331
"ENOENT from range_alloc.reserve_existing - offset: {}",
332
offset
333
);
334
ENOENT
335
})?;
336
337
let (debug_id, data) = desc.try_change_state(|state| match state {
338
Some((DescriptorState::Allocated(allocation), free_node_res)) => {
339
let (reservation, data) = allocation.deallocate();
340
let debug_id = reservation.debug_id;
341
(
342
Some((DescriptorState::Reserved(reservation), free_node_res)),
343
Ok((debug_id, data)),
344
)
345
}
346
other => {
347
pr_warn!(
348
"ENOENT from range_alloc.reserve_existing - offset: {}",
349
offset
350
);
351
(other, Err(ENOENT))
352
}
353
})?;
354
355
Ok((desc.size, debug_id, data))
356
}
357
358
/// Call the provided callback at every allocated region.
359
///
360
/// This destroys the range allocator. Used only during shutdown.
361
pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) {
362
for (_, desc) in self.tree.iter_mut() {
363
if let Some((DescriptorState::Allocated(allocation), _)) = &mut desc.state {
364
callback(
365
desc.offset,
366
desc.size,
367
allocation.debug_id(),
368
allocation.take(),
369
);
370
}
371
}
372
}
373
374
/// Find the amount and size of buffers allocated by the current caller.
375
///
376
/// The idea is that once we cross the threshold, whoever is responsible
377
/// for the low async space is likely to try to send another async transaction,
378
/// and at some point we'll catch them in the act. This is more efficient
379
/// than keeping a map per pid.
380
fn low_oneway_space(&self, calling_pid: Pid) -> bool {
381
let mut total_alloc_size = 0;
382
let mut num_buffers = 0;
383
for (_, desc) in self.tree.iter() {
384
if let Some((state, _)) = &desc.state {
385
if state.is_oneway() && state.pid() == calling_pid {
386
total_alloc_size += desc.size;
387
num_buffers += 1;
388
}
389
}
390
}
391
392
// Warn if this pid has more than 50 transactions, or more than 50% of
393
// async space (which is 25% of total buffer size). Oneway spam is only
394
// detected when the threshold is exceeded.
395
num_buffers > 50 || total_alloc_size > self.size / 4
396
}
397
}
398
399
type TreeDescriptorState<T> = (DescriptorState<T>, FreeNodeRes);
400
struct Descriptor<T> {
401
size: usize,
402
offset: usize,
403
state: Option<TreeDescriptorState<T>>,
404
}
405
406
impl<T> Descriptor<T> {
407
fn new(offset: usize, size: usize) -> Self {
408
Self {
409
size,
410
offset,
411
state: None,
412
}
413
}
414
415
fn try_change_state<F, Data>(&mut self, f: F) -> Result<Data>
416
where
417
F: FnOnce(Option<TreeDescriptorState<T>>) -> (Option<TreeDescriptorState<T>>, Result<Data>),
418
{
419
let (new_state, result) = f(self.state.take());
420
self.state = new_state;
421
result
422
}
423
}
424
425
// (Descriptor.size, Descriptor.offset)
426
type FreeKey = (usize, usize);
427
type FreeNodeRes = RBTreeNodeReservation<FreeKey, ()>;
428
429
/// An allocation for use by `reserve_new`.
430
pub(crate) struct ReserveNewTreeAlloc<T> {
431
tree_node_res: RBTreeNodeReservation<usize, Descriptor<T>>,
432
free_tree_node_res: FreeNodeRes,
433
desc_node_res: FreeNodeRes,
434
}
435
436
impl<T> ReserveNewTreeAlloc<T> {
437
pub(crate) fn try_new() -> Result<Self> {
438
let tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
439
let free_tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
440
let desc_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
441
Ok(Self {
442
tree_node_res,
443
free_tree_node_res,
444
desc_node_res,
445
})
446
}
447
448
fn initialize(
449
self,
450
desc: Descriptor<T>,
451
) -> (
452
RBTreeNode<usize, Descriptor<T>>,
453
RBTreeNode<FreeKey, ()>,
454
FreeNodeRes,
455
) {
456
let size = desc.size;
457
let offset = desc.offset;
458
(
459
self.tree_node_res.into_node(offset, desc),
460
self.free_tree_node_res.into_node((size, offset), ()),
461
self.desc_node_res,
462
)
463
}
464
}
465
466
/// An allocation for creating a tree from an `ArrayRangeAllocator`.
467
pub(crate) struct FromArrayAllocs<T> {
468
tree: KVec<RBTreeNodeReservation<usize, Descriptor<T>>>,
469
free_tree: KVec<RBTreeNodeReservation<FreeKey, ()>>,
470
}
471
472
impl<T> FromArrayAllocs<T> {
473
pub(crate) fn try_new(len: usize) -> Result<Self> {
474
let num_descriptors = 2 * len + 1;
475
476
let mut tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;
477
for _ in 0..num_descriptors {
478
tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;
479
}
480
481
let mut free_tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;
482
for _ in 0..num_descriptors {
483
free_tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;
484
}
485
486
Ok(Self { tree, free_tree })
487
}
488
}
489
490