Path: blob/master/drivers/android/binder/range_alloc/tree.rs
29536 views
// SPDX-License-Identifier: GPL-2.012// Copyright (C) 2025 Google LLC.34use kernel::{5page::PAGE_SIZE,6prelude::*,7rbtree::{RBTree, RBTreeNode, RBTreeNodeReservation},8seq_file::SeqFile,9seq_print,10task::Pid,11};1213use crate::range_alloc::{DescriptorState, FreedRange, Range};1415/// Keeps track of allocations in a process' mmap.16///17/// Each process has an mmap where the data for incoming transactions will be placed. This struct18/// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that19/// has metadata related to the allocation. We also keep track of available free space.20pub(super) struct TreeRangeAllocator<T> {21/// This collection contains descriptors for *both* ranges containing an allocation, *and* free22/// ranges between allocations. The free ranges get merged, so there are never two free ranges23/// next to each other.24tree: RBTree<usize, Descriptor<T>>,25/// Contains an entry for every free range in `self.tree`. This tree sorts the ranges by size,26/// letting us look up the smallest range whose size is at least some lower bound.27free_tree: RBTree<FreeKey, ()>,28size: usize,29free_oneway_space: usize,30}3132impl<T> TreeRangeAllocator<T> {33pub(crate) fn from_array(34size: usize,35ranges: &mut KVec<Range<T>>,36alloc: &mut FromArrayAllocs<T>,37) -> Self {38let mut tree = TreeRangeAllocator {39tree: RBTree::new(),40free_tree: RBTree::new(),41size,42free_oneway_space: size / 2,43};4445let mut free_offset = 0;46for range in ranges.drain_all() {47let free_size = range.offset - free_offset;48if free_size > 0 {49let free_node = alloc.free_tree.pop().unwrap();50tree.free_tree51.insert(free_node.into_node((free_size, free_offset), ()));52let tree_node = alloc.tree.pop().unwrap();53tree.tree.insert(54tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)),55);56}57free_offset = range.endpoint();5859if range.state.is_oneway() {60tree.free_oneway_space = tree.free_oneway_space.saturating_sub(range.size);61}6263let free_res = alloc.free_tree.pop().unwrap();64let tree_node = alloc.tree.pop().unwrap();65let mut desc = Descriptor::new(range.offset, range.size);66desc.state = Some((range.state, free_res));67tree.tree.insert(tree_node.into_node(range.offset, desc));68}6970// After the last range, we may need a free range.71if free_offset < size {72let free_size = size - free_offset;73let free_node = alloc.free_tree.pop().unwrap();74tree.free_tree75.insert(free_node.into_node((free_size, free_offset), ()));76let tree_node = alloc.tree.pop().unwrap();77tree.tree78.insert(tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)));79}8081tree82}8384pub(crate) fn is_empty(&self) -> bool {85let mut tree_iter = self.tree.values();86// There's always at least one range, because index zero is either the start of a free or87// allocated range.88let first_value = tree_iter.next().unwrap();89if tree_iter.next().is_some() {90// There are never two free ranges next to each other, so if there is more than one91// descriptor, then at least one of them must hold an allocated range.92return false;93}94// There is only one descriptor. Return true if it is for a free range.95first_value.state.is_none()96}9798pub(crate) fn total_size(&self) -> usize {99self.size100}101102pub(crate) fn free_oneway_space(&self) -> usize {103self.free_oneway_space104}105106pub(crate) fn count_buffers(&self) -> usize {107self.tree108.values()109.filter(|desc| desc.state.is_some())110.count()111}112113pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> {114for desc in self.tree.values() {115let state = match &desc.state {116Some(state) => &state.0,117None => continue,118};119seq_print!(120m,121" buffer: {} size {} pid {}",122desc.offset,123desc.size,124state.pid(),125);126if state.is_oneway() {127seq_print!(m, " oneway");128}129match state {130DescriptorState::Reserved(_res) => {131seq_print!(m, " reserved\n");132}133DescriptorState::Allocated(_alloc) => {134seq_print!(m, " allocated\n");135}136}137}138Ok(())139}140141fn find_best_match(&mut self, size: usize) -> Option<&mut Descriptor<T>> {142let free_cursor = self.free_tree.cursor_lower_bound(&(size, 0))?;143let ((_, offset), ()) = free_cursor.current();144self.tree.get_mut(offset)145}146147/// Try to reserve a new buffer, using the provided allocation if necessary.148pub(crate) fn reserve_new(149&mut self,150debug_id: usize,151size: usize,152is_oneway: bool,153pid: Pid,154alloc: ReserveNewTreeAlloc<T>,155) -> Result<(usize, bool)> {156// Compute new value of free_oneway_space, which is set only on success.157let new_oneway_space = if is_oneway {158match self.free_oneway_space.checked_sub(size) {159Some(new_oneway_space) => new_oneway_space,160None => return Err(ENOSPC),161}162} else {163self.free_oneway_space164};165166// Start detecting spammers once we have less than 20%167// of async space left (which is less than 10% of total168// buffer size).169//170// (This will short-circut, so `low_oneway_space` is171// only called when necessary.)172let oneway_spam_detected =173is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid);174175let (found_size, found_off, tree_node, free_tree_node) = match self.find_best_match(size) {176None => {177pr_warn!("ENOSPC from range_alloc.reserve_new - size: {}", size);178return Err(ENOSPC);179}180Some(desc) => {181let found_size = desc.size;182let found_offset = desc.offset;183184// In case we need to break up the descriptor185let new_desc = Descriptor::new(found_offset + size, found_size - size);186let (tree_node, free_tree_node, desc_node_res) = alloc.initialize(new_desc);187188desc.state = Some((189DescriptorState::new(is_oneway, debug_id, pid),190desc_node_res,191));192desc.size = size;193194(found_size, found_offset, tree_node, free_tree_node)195}196};197self.free_oneway_space = new_oneway_space;198self.free_tree.remove(&(found_size, found_off));199200if found_size != size {201self.tree.insert(tree_node);202self.free_tree.insert(free_tree_node);203}204205Ok((found_off, oneway_spam_detected))206}207208pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> {209let mut cursor = self.tree.cursor_lower_bound(&offset).ok_or_else(|| {210pr_warn!(211"EINVAL from range_alloc.reservation_abort - offset: {}",212offset213);214EINVAL215})?;216217let (_, desc) = cursor.current_mut();218219if desc.offset != offset {220pr_warn!(221"EINVAL from range_alloc.reservation_abort - offset: {}",222offset223);224return Err(EINVAL);225}226227let (reservation, free_node_res) = desc.try_change_state(|state| match state {228Some((DescriptorState::Reserved(reservation), free_node_res)) => {229(None, Ok((reservation, free_node_res)))230}231None => {232pr_warn!(233"EINVAL from range_alloc.reservation_abort - offset: {}",234offset235);236(None, Err(EINVAL))237}238allocated => {239pr_warn!(240"EPERM from range_alloc.reservation_abort - offset: {}",241offset242);243(allocated, Err(EPERM))244}245})?;246247let mut size = desc.size;248let mut offset = desc.offset;249let free_oneway_space_add = if reservation.is_oneway { size } else { 0 };250251self.free_oneway_space += free_oneway_space_add;252253let mut freed_range = FreedRange::interior_pages(offset, size);254// Compute how large the next free region needs to be to include one more page in255// the newly freed range.256let add_next_page_needed = match (offset + size) % PAGE_SIZE {2570 => usize::MAX,258unalign => PAGE_SIZE - unalign,259};260// Compute how large the previous free region needs to be to include one more page261// in the newly freed range.262let add_prev_page_needed = match offset % PAGE_SIZE {2630 => usize::MAX,264unalign => unalign,265};266267// Merge next into current if next is free268let remove_next = match cursor.peek_next() {269Some((_, next)) if next.state.is_none() => {270if next.size >= add_next_page_needed {271freed_range.end_page_idx += 1;272}273self.free_tree.remove(&(next.size, next.offset));274size += next.size;275true276}277_ => false,278};279280if remove_next {281let (_, desc) = cursor.current_mut();282desc.size = size;283cursor.remove_next();284}285286// Merge current into prev if prev is free287match cursor.peek_prev_mut() {288Some((_, prev)) if prev.state.is_none() => {289if prev.size >= add_prev_page_needed {290freed_range.start_page_idx -= 1;291}292// merge previous with current, remove current293self.free_tree.remove(&(prev.size, prev.offset));294offset = prev.offset;295size += prev.size;296prev.size = size;297cursor.remove_current();298}299_ => {}300};301302self.free_tree303.insert(free_node_res.into_node((size, offset), ()));304305Ok(freed_range)306}307308pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result {309let desc = self.tree.get_mut(&offset).ok_or(ENOENT)?;310311desc.try_change_state(|state| match state {312Some((DescriptorState::Reserved(reservation), free_node_res)) => (313Some((314DescriptorState::Allocated(reservation.allocate(data.take())),315free_node_res,316)),317Ok(()),318),319other => (other, Err(ENOENT)),320})321}322323/// Takes an entry at the given offset from [`DescriptorState::Allocated`] to324/// [`DescriptorState::Reserved`].325///326/// Returns the size of the existing entry and the data associated with it.327pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> {328let desc = self.tree.get_mut(&offset).ok_or_else(|| {329pr_warn!(330"ENOENT from range_alloc.reserve_existing - offset: {}",331offset332);333ENOENT334})?;335336let (debug_id, data) = desc.try_change_state(|state| match state {337Some((DescriptorState::Allocated(allocation), free_node_res)) => {338let (reservation, data) = allocation.deallocate();339let debug_id = reservation.debug_id;340(341Some((DescriptorState::Reserved(reservation), free_node_res)),342Ok((debug_id, data)),343)344}345other => {346pr_warn!(347"ENOENT from range_alloc.reserve_existing - offset: {}",348offset349);350(other, Err(ENOENT))351}352})?;353354Ok((desc.size, debug_id, data))355}356357/// Call the provided callback at every allocated region.358///359/// This destroys the range allocator. Used only during shutdown.360pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) {361for (_, desc) in self.tree.iter_mut() {362if let Some((DescriptorState::Allocated(allocation), _)) = &mut desc.state {363callback(364desc.offset,365desc.size,366allocation.debug_id(),367allocation.take(),368);369}370}371}372373/// Find the amount and size of buffers allocated by the current caller.374///375/// The idea is that once we cross the threshold, whoever is responsible376/// for the low async space is likely to try to send another async transaction,377/// and at some point we'll catch them in the act. This is more efficient378/// than keeping a map per pid.379fn low_oneway_space(&self, calling_pid: Pid) -> bool {380let mut total_alloc_size = 0;381let mut num_buffers = 0;382for (_, desc) in self.tree.iter() {383if let Some((state, _)) = &desc.state {384if state.is_oneway() && state.pid() == calling_pid {385total_alloc_size += desc.size;386num_buffers += 1;387}388}389}390391// Warn if this pid has more than 50 transactions, or more than 50% of392// async space (which is 25% of total buffer size). Oneway spam is only393// detected when the threshold is exceeded.394num_buffers > 50 || total_alloc_size > self.size / 4395}396}397398type TreeDescriptorState<T> = (DescriptorState<T>, FreeNodeRes);399struct Descriptor<T> {400size: usize,401offset: usize,402state: Option<TreeDescriptorState<T>>,403}404405impl<T> Descriptor<T> {406fn new(offset: usize, size: usize) -> Self {407Self {408size,409offset,410state: None,411}412}413414fn try_change_state<F, Data>(&mut self, f: F) -> Result<Data>415where416F: FnOnce(Option<TreeDescriptorState<T>>) -> (Option<TreeDescriptorState<T>>, Result<Data>),417{418let (new_state, result) = f(self.state.take());419self.state = new_state;420result421}422}423424// (Descriptor.size, Descriptor.offset)425type FreeKey = (usize, usize);426type FreeNodeRes = RBTreeNodeReservation<FreeKey, ()>;427428/// An allocation for use by `reserve_new`.429pub(crate) struct ReserveNewTreeAlloc<T> {430tree_node_res: RBTreeNodeReservation<usize, Descriptor<T>>,431free_tree_node_res: FreeNodeRes,432desc_node_res: FreeNodeRes,433}434435impl<T> ReserveNewTreeAlloc<T> {436pub(crate) fn try_new() -> Result<Self> {437let tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;438let free_tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;439let desc_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;440Ok(Self {441tree_node_res,442free_tree_node_res,443desc_node_res,444})445}446447fn initialize(448self,449desc: Descriptor<T>,450) -> (451RBTreeNode<usize, Descriptor<T>>,452RBTreeNode<FreeKey, ()>,453FreeNodeRes,454) {455let size = desc.size;456let offset = desc.offset;457(458self.tree_node_res.into_node(offset, desc),459self.free_tree_node_res.into_node((size, offset), ()),460self.desc_node_res,461)462}463}464465/// An allocation for creating a tree from an `ArrayRangeAllocator`.466pub(crate) struct FromArrayAllocs<T> {467tree: KVec<RBTreeNodeReservation<usize, Descriptor<T>>>,468free_tree: KVec<RBTreeNodeReservation<FreeKey, ()>>,469}470471impl<T> FromArrayAllocs<T> {472pub(crate) fn try_new(len: usize) -> Result<Self> {473let num_descriptors = 2 * len + 1;474475let mut tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;476for _ in 0..num_descriptors {477tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;478}479480let mut free_tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;481for _ in 0..num_descriptors {482free_tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;483}484485Ok(Self { tree, free_tree })486}487}488489490