linux/rust/kernel/gpu/buddy.rs

// SPDX-License-Identifier: GPL-2.0

//! GPU buddy allocator bindings.
//!
//! C header: [`include/linux/gpu_buddy.h`](srctree/include/linux/gpu_buddy.h)
//!
//! This module provides Rust abstractions over the Linux kernel's GPU buddy
//! allocator, which implements a binary buddy memory allocator.
//!
//! The buddy allocator manages a contiguous address space and allocates blocks
//! in power-of-two sizes, useful for GPU physical memory management.
//!
//! # Examples
//!
//! Create a buddy allocator and perform a basic range allocation:
//!
//! ```
//! use kernel::{
//!     gpu::buddy::{
//!         GpuBuddy,
//!         GpuBuddyAllocFlags,
//!         GpuBuddyAllocMode,
//!         GpuBuddyParams, //
//!     },
//!     prelude::*,
//!     ptr::Alignment,
//!     sizes::*, //
//! };
//!
//! // Create a 1GB buddy allocator with 4KB minimum chunk size.
//! let buddy = GpuBuddy::new(GpuBuddyParams {
//!     base_offset: 0,
//!     size: SZ_1G as u64,
//!     chunk_size: Alignment::new::<SZ_4K>(),
//! })?;
//!
//! assert_eq!(buddy.size(), SZ_1G as u64);
//! assert_eq!(buddy.chunk_size(), Alignment::new::<SZ_4K>());
//! let initial_free = buddy.avail();
//!
//! // Allocate 16MB. Block lands at the top of the address range.
//! let allocated = KBox::pin_init(
//!     buddy.alloc_blocks(
//!         GpuBuddyAllocMode::Simple,
//!         SZ_16M as u64,
//!         Alignment::new::<SZ_16M>(),
//!         GpuBuddyAllocFlags::default(),
//!     ),
//!     GFP_KERNEL,
//! )?;
//! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64);
//!
//! let block = allocated.iter().next().expect("expected one block");
//! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64);
//! assert_eq!(block.order(), 12); // 2^12 pages = 16MB
//! assert_eq!(block.size(), SZ_16M as u64);
//! assert_eq!(allocated.iter().count(), 1);
//!
//! // Dropping the allocation returns the range to the buddy allocator.
//! drop(allocated);
//! assert_eq!(buddy.avail(), initial_free);
//! # Ok::<(), Error>(())
//! ```
//!
//! Top-down allocation allocates from the highest addresses:
//!
//! ```
//! # use kernel::{
//! #     gpu::buddy::{GpuBuddy, GpuBuddyAllocMode, GpuBuddyAllocFlags, GpuBuddyParams},
//! #     prelude::*,
//! #     ptr::Alignment,
//! #     sizes::*, //
//! # };
//! # let buddy = GpuBuddy::new(GpuBuddyParams {
//! #     base_offset: 0,
//! #     size: SZ_1G as u64,
//! #     chunk_size: Alignment::new::<SZ_4K>(),
//! # })?;
//! # let initial_free = buddy.avail();
//! let topdown = KBox::pin_init(
//!     buddy.alloc_blocks(
//!         GpuBuddyAllocMode::TopDown,
//!         SZ_16M as u64,
//!         Alignment::new::<SZ_16M>(),
//!         GpuBuddyAllocFlags::default(),
//!     ),
//!     GFP_KERNEL,
//! )?;
//! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64);
//!
//! let block = topdown.iter().next().expect("expected one block");
//! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64);
//! assert_eq!(block.order(), 12);
//! assert_eq!(block.size(), SZ_16M as u64);
//!
//! // Dropping the allocation returns the range to the buddy allocator.
//! drop(topdown);
//! assert_eq!(buddy.avail(), initial_free);
//! # Ok::<(), Error>(())
//! ```
//!
//! Non-contiguous allocation can fill fragmented memory by returning multiple
//! blocks:
//!
//! ```
//! # use kernel::{
//! #     gpu::buddy::{
//! #         GpuBuddy, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams,
//! #     },
//! #     prelude::*,
//! #     ptr::Alignment,
//! #     sizes::*, //
//! # };
//! # let buddy = GpuBuddy::new(GpuBuddyParams {
//! #     base_offset: 0,
//! #     size: SZ_1G as u64,
//! #     chunk_size: Alignment::new::<SZ_4K>(),
//! # })?;
//! # let initial_free = buddy.avail();
//! // Create fragmentation by allocating 4MB blocks at [0,4M) and [8M,12M).
//! let frag1 = KBox::pin_init(
//!     buddy.alloc_blocks(
//!         GpuBuddyAllocMode::Range(0..SZ_4M as u64),
//!         SZ_4M as u64,
//!         Alignment::new::<SZ_4M>(),
//!         GpuBuddyAllocFlags::default(),
//!     ),
//!     GFP_KERNEL,
//! )?;
//! assert_eq!(buddy.avail(), initial_free - SZ_4M as u64);
//!
//! let frag2 = KBox::pin_init(
//!     buddy.alloc_blocks(
//!         GpuBuddyAllocMode::Range(SZ_8M as u64..(SZ_8M + SZ_4M) as u64),
//!         SZ_4M as u64,
//!         Alignment::new::<SZ_4M>(),
//!         GpuBuddyAllocFlags::default(),
//!     ),
//!     GFP_KERNEL,
//! )?;
//! assert_eq!(buddy.avail(), initial_free - SZ_8M as u64);
//!
//! // Allocate 8MB, this returns 2 blocks from the holes.
//! let fragmented = KBox::pin_init(
//!     buddy.alloc_blocks(
//!         GpuBuddyAllocMode::Range(0..SZ_16M as u64),
//!         SZ_8M as u64,
//!         Alignment::new::<SZ_4M>(),
//!         GpuBuddyAllocFlags::default(),
//!     ),
//!     GFP_KERNEL,
//! )?;
//! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64);
//!
//! let (mut count, mut total) = (0u32, 0u64);
//! for block in fragmented.iter() {
//!     assert_eq!(block.size(), SZ_4M as u64);
//!     total += block.size();
//!     count += 1;
//! }
//! assert_eq!(total, SZ_8M as u64);
//! assert_eq!(count, 2);
//! # Ok::<(), Error>(())
//! ```
//!
//! Contiguous allocation fails when only fragmented space is available:
//!
//! ```
//! # use kernel::{
//! #     gpu::buddy::{
//! #         GpuBuddy, GpuBuddyAllocFlag, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams,
//! #     },
//! #     prelude::*,
//! #     ptr::Alignment,
//! #     sizes::*, //
//! # };
//! // Create a small 16MB buddy allocator with fragmented memory.
//! let small = GpuBuddy::new(GpuBuddyParams {
//!     base_offset: 0,
//!     size: SZ_16M as u64,
//!     chunk_size: Alignment::new::<SZ_4K>(),
//! })?;
//!
//! let _hole1 = KBox::pin_init(
//!     small.alloc_blocks(
//!         GpuBuddyAllocMode::Range(0..SZ_4M as u64),
//!         SZ_4M as u64,
//!         Alignment::new::<SZ_4M>(),
//!         GpuBuddyAllocFlags::default(),
//!     ),
//!     GFP_KERNEL,
//! )?;
//!
//! let _hole2 = KBox::pin_init(
//!     small.alloc_blocks(
//!         GpuBuddyAllocMode::Range(SZ_8M as u64..(SZ_8M + SZ_4M) as u64),
//!         SZ_4M as u64,
//!         Alignment::new::<SZ_4M>(),
//!         GpuBuddyAllocFlags::default(),
//!     ),
//!     GFP_KERNEL,
//! )?;
//!
//! // 8MB contiguous should fail, only two non-contiguous 4MB holes exist.
//! let result = KBox::pin_init(
//!     small.alloc_blocks(
//!         GpuBuddyAllocMode::Simple,
//!         SZ_8M as u64,
//!         Alignment::new::<SZ_4M>(),
//!         GpuBuddyAllocFlag::Contiguous,
//!     ),
//!     GFP_KERNEL,
//! );
//! assert!(result.is_err());
//! # Ok::<(), Error>(())
//! ```

use core::ops::Range;

use crate::{
    bindings,
    clist_create,
    error::to_result,
    interop::list::CListHead,
    new_mutex,
    prelude::*,
    ptr::Alignment,
    sync::{
        lock::mutex::MutexGuard,
        Arc,
        Mutex, //
    },
    types::Opaque, //
};

/// Allocation mode for the GPU buddy allocator.
///
/// The mode determines the primary allocation strategy. Modes are mutually
/// exclusive: an allocation is either simple, range-constrained, or top-down.
///
/// Orthogonal modifier flags (e.g., contiguous, clear) are specified separately
/// via [`GpuBuddyAllocFlags`].
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum GpuBuddyAllocMode {
    /// Simple allocation without constraints.
    Simple,
    /// Range-based allocation within the given address range.
    Range(Range<u64>),
    /// Allocate from top of address space downward.
    TopDown,
}

impl GpuBuddyAllocMode {
    /// Returns the C flags corresponding to the allocation mode.
    fn as_flags(&self) -> usize {
        match self {
            Self::Simple => 0,
            Self::Range(_) => bindings::GPU_BUDDY_RANGE_ALLOCATION,
            Self::TopDown => bindings::GPU_BUDDY_TOPDOWN_ALLOCATION,
        }
    }

    /// Extracts the range start/end, defaulting to `(0, 0)` for non-range modes.
    fn range(&self) -> (u64, u64) {
        match self {
            Self::Range(range) => (range.start, range.end),
            _ => (0, 0),
        }
    }
}

crate::impl_flags!(
    /// Modifier flags for GPU buddy allocation.
    ///
    /// These flags can be combined with any [`GpuBuddyAllocMode`] to control
    /// additional allocation behavior.
    #[derive(Clone, Copy, Default, PartialEq, Eq)]
    pub struct GpuBuddyAllocFlags(usize);

    /// Individual modifier flag for GPU buddy allocation.
    #[derive(Clone, Copy, PartialEq, Eq)]
    pub enum GpuBuddyAllocFlag {
        /// Allocate physically contiguous blocks.
        Contiguous = bindings::GPU_BUDDY_CONTIGUOUS_ALLOCATION,

        /// Request allocation from cleared (zeroed) memory.
        Clear = bindings::GPU_BUDDY_CLEAR_ALLOCATION,

        /// Disable trimming of partially used blocks.
        TrimDisable = bindings::GPU_BUDDY_TRIM_DISABLE,
    }
);

/// Parameters for creating a GPU buddy allocator.
pub struct GpuBuddyParams {
    /// Base offset (in bytes) where the managed memory region starts.
    /// Allocations will be offset by this value.
    pub base_offset: u64,
    /// Total size (in bytes) of the address space managed by the allocator.
    pub size: u64,
    /// Minimum allocation unit / chunk size; must be >= 4KB.
    pub chunk_size: Alignment,
}

/// Inner structure holding the actual buddy allocator.
///
/// # Synchronization
///
/// The C `gpu_buddy` API requires synchronization (see `include/linux/gpu_buddy.h`).
/// Internal locking ensures all allocator and free operations are properly
/// synchronized, preventing races between concurrent allocations and the
/// freeing that occurs when [`AllocatedBlocks`] is dropped.
///
/// # Invariants
///
/// The inner [`Opaque`] contains an initialized buddy allocator.
#[pin_data(PinnedDrop)]
struct GpuBuddyInner {
    #[pin]
    inner: Opaque<bindings::gpu_buddy>,

    // TODO: Replace `Mutex<()>` with `Mutex<Opaque<..>>` once `Mutex::new()`
    // accepts `impl PinInit<T>`.
    #[pin]
    lock: Mutex<()>,
    /// Cached creation parameters (do not change after init).
    params: GpuBuddyParams,
}

impl GpuBuddyInner {
    /// Create a pin-initializer for the buddy allocator.
    fn new(params: GpuBuddyParams) -> impl PinInit<Self, Error> {
        let size = params.size;
        let chunk_size = params.chunk_size;

        // INVARIANT: `gpu_buddy_init` returns 0 on success, at which point the
        // `gpu_buddy` structure is initialized and ready for use with all
        // `gpu_buddy_*` APIs. `try_pin_init!` only completes if all fields succeed,
        // so the invariant holds when construction finishes.
        try_pin_init!(Self {
            inner <- Opaque::try_ffi_init(|ptr| {
                // SAFETY: `ptr` points to valid uninitialized memory from the pin-init
                // infrastructure. `gpu_buddy_init` will initialize the structure.
                to_result(unsafe {
                    bindings::gpu_buddy_init(ptr, size, chunk_size.as_usize() as u64)
                })
            }),
            lock <- new_mutex!(()),
            params,
        })
    }

    /// Lock the mutex and return a guard for accessing the allocator.
    fn lock(&self) -> GpuBuddyGuard<'_> {
        GpuBuddyGuard {
            inner: self,
            _guard: self.lock.lock(),
        }
    }
}

#[pinned_drop]
impl PinnedDrop for GpuBuddyInner {
    fn drop(self: Pin<&mut Self>) {
        let guard = self.lock();

        // SAFETY: Per the type invariant, `inner` contains an initialized
        // allocator. `guard` provides exclusive access.
        unsafe { bindings::gpu_buddy_fini(guard.as_raw()) };
    }
}

// SAFETY: `GpuBuddyInner` can be sent between threads.
unsafe impl Send for GpuBuddyInner {}

// SAFETY: `GpuBuddyInner` is `Sync` because `GpuBuddyInner::lock`
// serializes all access to the C allocator, preventing data races.
unsafe impl Sync for GpuBuddyInner {}

/// Guard that proves the lock is held, enabling access to the allocator.
///
/// The `_guard` holds the lock for the duration of this guard's lifetime.
struct GpuBuddyGuard<'a> {
    inner: &'a GpuBuddyInner,
    _guard: MutexGuard<'a, ()>,
}

impl GpuBuddyGuard<'_> {
    /// Get a raw pointer to the underlying C `gpu_buddy` structure.
    fn as_raw(&self) -> *mut bindings::gpu_buddy {
        self.inner.inner.get()
    }
}

/// GPU buddy allocator instance.
///
/// This structure wraps the C `gpu_buddy` allocator using reference counting.
/// The allocator is automatically cleaned up when all references are dropped.
///
/// Refer to the module-level documentation for usage examples.
pub struct GpuBuddy(Arc<GpuBuddyInner>);

impl GpuBuddy {
    /// Create a new buddy allocator.
    ///
    /// The allocator manages a contiguous address space of the given size, with the
    /// specified minimum allocation unit (chunk_size must be at least 4KB).
    pub fn new(params: GpuBuddyParams) -> Result<Self> {
        Arc::pin_init(GpuBuddyInner::new(params), GFP_KERNEL).map(Self)
    }

    /// Get the base offset for allocations.
    pub fn base_offset(&self) -> u64 {
        self.0.params.base_offset
    }

    /// Get the chunk size (minimum allocation unit).
    pub fn chunk_size(&self) -> Alignment {
        self.0.params.chunk_size
    }

    /// Get the total managed size.
    pub fn size(&self) -> u64 {
        self.0.params.size
    }

    /// Get the available (free) memory in bytes.
    pub fn avail(&self) -> u64 {
        let guard = self.0.lock();

        // SAFETY: Per the type invariant, `inner` contains an initialized allocator.
        // `guard` provides exclusive access.
        unsafe { (*guard.as_raw()).avail }
    }

    /// Allocate blocks from the buddy allocator.
    ///
    /// Returns a pin-initializer for [`AllocatedBlocks`].
    pub fn alloc_blocks(
        &self,
        mode: GpuBuddyAllocMode,
        size: u64,
        min_block_size: Alignment,
        flags: impl Into<GpuBuddyAllocFlags>,
    ) -> impl PinInit<AllocatedBlocks, Error> {
        let buddy_arc = Arc::clone(&self.0);
        let (start, end) = mode.range();
        let mode_flags = mode.as_flags();
        let modifier_flags = flags.into();

        // Create pin-initializer that initializes list and allocates blocks.
        try_pin_init!(AllocatedBlocks {
            buddy: buddy_arc,
            list <- CListHead::new(),
            _: {
                // Reject zero-sized or inverted ranges.
                if let GpuBuddyAllocMode::Range(range) = &mode {
                    if range.is_empty() {
                        Err::<(), Error>(EINVAL)?;
                    }
                }

                // Lock while allocating to serialize with concurrent frees.
                let guard = buddy.lock();

                // SAFETY: Per the type invariant, `inner` contains an initialized
                // allocator. `guard` provides exclusive access.
                to_result(unsafe {
                    bindings::gpu_buddy_alloc_blocks(
                        guard.as_raw(),
                        start,
                        end,
                        size,
                        min_block_size.as_usize() as u64,
                        list.as_raw(),
                        mode_flags | usize::from(modifier_flags),
                    )
                })?
            }
        })
    }
}

/// Allocated blocks from the buddy allocator with automatic cleanup.
///
/// This structure owns a list of allocated blocks and ensures they are
/// automatically freed when dropped. Use `iter()` to iterate over all
/// allocated blocks.
///
/// # Invariants
///
/// - `list` is an initialized, valid list head containing allocated blocks.
#[pin_data(PinnedDrop)]
pub struct AllocatedBlocks {
    #[pin]
    list: CListHead,
    buddy: Arc<GpuBuddyInner>,
}

impl AllocatedBlocks {
    /// Check if the block list is empty.
    pub fn is_empty(&self) -> bool {
        // An empty list head points to itself.
        !self.list.is_linked()
    }

    /// Iterate over allocated blocks.
    ///
    /// Returns an iterator yielding [`AllocatedBlock`] values. Each [`AllocatedBlock`]
    /// borrows `self` and is only valid for the duration of that borrow.
    pub fn iter(&self) -> impl Iterator<Item = AllocatedBlock<'_>> + '_ {
        let head = self.list.as_raw();
        // SAFETY: Per the type invariant, `list` is an initialized sentinel `list_head`
        // and is not concurrently modified (we hold a `&self` borrow). The list contains
        // `gpu_buddy_block` items linked via `__bindgen_anon_1.link`. `Block` is
        // `#[repr(transparent)]` over `gpu_buddy_block`.
        let clist = unsafe {
            clist_create!(
                head,
                Block,
                bindings::gpu_buddy_block,
                __bindgen_anon_1.link
            )
        };

        clist
            .iter()
            .map(|this| AllocatedBlock { this, blocks: self })
    }
}

#[pinned_drop]
impl PinnedDrop for AllocatedBlocks {
    fn drop(self: Pin<&mut Self>) {
        let guard = self.buddy.lock();

        // SAFETY:
        // - list is valid per the type's invariants.
        // - guard provides exclusive access to the allocator.
        unsafe {
            bindings::gpu_buddy_free_list(guard.as_raw(), self.list.as_raw(), 0);
        }
    }
}

/// A GPU buddy block.
///
/// Transparent wrapper over C `gpu_buddy_block` structure. This type is returned
/// as references during iteration over [`AllocatedBlocks`].
///
/// # Invariants
///
/// The inner [`Opaque`] contains a valid, allocated `gpu_buddy_block`.
#[repr(transparent)]
struct Block(Opaque<bindings::gpu_buddy_block>);

impl Block {
    /// Get a raw pointer to the underlying C block.
    fn as_raw(&self) -> *mut bindings::gpu_buddy_block {
        self.0.get()
    }

    /// Get the block's raw offset in the buddy address space (without base offset).
    fn offset(&self) -> u64 {
        // SAFETY: `self.as_raw()` is valid per the type's invariants.
        unsafe { bindings::gpu_buddy_block_offset(self.as_raw()) }
    }

    /// Get the block order.
    fn order(&self) -> u32 {
        // SAFETY: `self.as_raw()` is valid per the type's invariants.
        unsafe { bindings::gpu_buddy_block_order(self.as_raw()) }
    }
}

// SAFETY: `Block` is a wrapper around `gpu_buddy_block` which can be
// sent across threads safely.
unsafe impl Send for Block {}

// SAFETY: `Block` is only accessed through shared references after
// allocation, and thus safe to access concurrently across threads.
unsafe impl Sync for Block {}

/// A buddy block paired with its owning [`AllocatedBlocks`] context.
///
/// Unlike a raw block, which only knows its offset within the buddy address
/// space, an [`AllocatedBlock`] also has access to the allocator's `base_offset`
/// and `chunk_size`, enabling it to compute absolute offsets and byte sizes.
///
/// Returned by [`AllocatedBlocks::iter()`].
pub struct AllocatedBlock<'a> {
    this: &'a Block,
    blocks: &'a AllocatedBlocks,
}

impl AllocatedBlock<'_> {
    /// Get the block's offset in the address space.
    ///
    /// Returns the absolute offset including the allocator's base offset.
    /// This is the actual address to use for accessing the allocated memory.
    pub fn offset(&self) -> u64 {
        self.blocks.buddy.params.base_offset + self.this.offset()
    }

    /// Get the block order (size = chunk_size << order).
    pub fn order(&self) -> u32 {
        self.this.order()
    }

    /// Get the block's size in bytes.
    pub fn size(&self) -> u64 {
        (self.blocks.buddy.params.chunk_size.as_usize() as u64) << self.this.order()
    }
}