diff --git a/Documentation/gpu/nova/core/todo.rst b/Documentation/gpu/nova/core/todo.rst index d1964eb645e2..d5130b2b08fb 100644 --- a/Documentation/gpu/nova/core/todo.rst +++ b/Documentation/gpu/nova/core/todo.rst @@ -51,82 +51,6 @@ There also have been considerations of ToPrimitive [2]. | Link: https://lore.kernel.org/all/cover.1750689857.git.y.j3ms.n@gmail.com/ [1] | Link: https://rust-for-linux.zulipchat.com/#narrow/channel/288089-General/topic/Implement.20.60FromPrimitive.60.20trait.20.2B.20derive.20macro.20for.20nova-core/with/541971854 [2] -Generic register abstraction [REGA] ------------------------------------ - -Work out how register constants and structures can be automatically generated -through generalized macros. - -Example: - -.. code-block:: rust - - register!(BOOT0, 0x0, u32, pci::Bar, Fields [ - MINOR_REVISION(3:0, RO), - MAJOR_REVISION(7:4, RO), - REVISION(7:0, RO), // Virtual register combining major and minor rev. - ]) - -This could expand to something like: - -.. code-block:: rust - - const BOOT0_OFFSET: usize = 0x00000000; - const BOOT0_MINOR_REVISION_SHIFT: u8 = 0; - const BOOT0_MINOR_REVISION_MASK: u32 = 0x0000000f; - const BOOT0_MAJOR_REVISION_SHIFT: u8 = 4; - const BOOT0_MAJOR_REVISION_MASK: u32 = 0x000000f0; - const BOOT0_REVISION_SHIFT: u8 = BOOT0_MINOR_REVISION_SHIFT; - const BOOT0_REVISION_MASK: u32 = BOOT0_MINOR_REVISION_MASK | BOOT0_MAJOR_REVISION_MASK; - - struct Boot0(u32); - - impl Boot0 { - #[inline] - fn read(bar: &RevocableGuard<'_, pci::Bar>) -> Self { - Self(bar.readl(BOOT0_OFFSET)) - } - - #[inline] - fn minor_revision(&self) -> u32 { - (self.0 & BOOT0_MINOR_REVISION_MASK) >> BOOT0_MINOR_REVISION_SHIFT - } - - #[inline] - fn major_revision(&self) -> u32 { - (self.0 & BOOT0_MAJOR_REVISION_MASK) >> BOOT0_MAJOR_REVISION_SHIFT - } - - #[inline] - fn revision(&self) -> u32 { - (self.0 & BOOT0_REVISION_MASK) >> BOOT0_REVISION_SHIFT - } - } - -Usage: - -.. code-block:: rust - - let bar = bar.try_access().ok_or(ENXIO)?; - - let boot0 = Boot0::read(&bar); - pr_info!("Revision: {}\n", boot0.revision()); - -A work-in-progress implementation currently resides in -`drivers/gpu/nova-core/regs/macros.rs` and is used in nova-core. It would be -nice to improve it (possibly using proc macros) and move it to the `kernel` -crate so it can be used by other components as well. - -Features desired before this happens: - -* Make I/O optional I/O (for field values that are not registers), -* Support other sizes than `u32`, -* Allow visibility control for registers and individual fields, -* Use Rust slice syntax to express fields ranges. - -| Complexity: Advanced -| Contact: Alexandre Courbot - Numerical operations [NUMM] --------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index ae8a01f30ff4..608a5c69df4f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7534,6 +7534,7 @@ F: include/linux/*fence.h F: include/linux/dma-buf.h F: include/linux/dma-buf/ F: include/linux/dma-resv.h +F: rust/helpers/dma-resv.c K: \bdma_(?:buf|fence|resv)\b DMA GENERIC OFFLOAD ENGINE SUBSYSTEM @@ -8513,7 +8514,10 @@ T: git https://gitlab.freedesktop.org/drm/rust/kernel.git F: drivers/gpu/drm/nova/ F: drivers/gpu/drm/tyr/ F: drivers/gpu/nova-core/ +F: rust/helpers/gpu.c F: rust/kernel/drm/ +F: rust/kernel/gpu.rs +F: rust/kernel/gpu/ DRM DRIVERS FOR ALLWINNER A10 M: Chen-Yu Tsai @@ -8931,7 +8935,7 @@ F: include/drm/ttm/ GPU BUDDY ALLOCATOR M: Matthew Auld M: Arun Pravin -R: Christian Koenig +R: Joel Fernandes L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git @@ -8940,6 +8944,9 @@ F: drivers/gpu/drm/drm_buddy.c F: drivers/gpu/tests/gpu_buddy_test.c F: include/drm/drm_buddy.h F: include/linux/gpu_buddy.h +F: rust/helpers/gpu.c +F: rust/kernel/gpu.rs +F: rust/kernel/gpu/ DRM AUTOMATED TESTING M: Helen Koike @@ -23208,6 +23215,15 @@ T: git https://github.com/Rust-for-Linux/linux.git alloc-next F: rust/kernel/alloc.rs F: rust/kernel/alloc/ +RUST [INTEROP] +M: Joel Fernandes +M: Alexandre Courbot +L: rust-for-linux@vger.kernel.org +S: Maintained +T: git https://github.com/Rust-for-Linux/linux.git interop-next +F: rust/kernel/interop.rs +F: rust/kernel/interop/ + RUST [NUM] M: Alexandre Courbot R: Yury Norov diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 5386248e75b6..8f5a8d3012e4 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -268,6 +268,13 @@ config DRM_GEM_SHMEM_HELPER help Choose this if you need the GEM shmem helper functions +config RUST_DRM_GEM_SHMEM_HELPER + bool + depends on DRM && MMU + select DRM_GEM_SHMEM_HELPER + help + Choose this if you need the GEM shmem helper functions In Rust + config DRM_SUBALLOC_HELPER tristate depends on DRM diff --git a/drivers/gpu/drm/nova/gem.rs b/drivers/gpu/drm/nova/gem.rs index 6ccfa5da5761..e073e174e257 100644 --- a/drivers/gpu/drm/nova/gem.rs +++ b/drivers/gpu/drm/nova/gem.rs @@ -19,8 +19,9 @@ pub(crate) struct NovaObject {} impl gem::DriverObject for NovaObject { type Driver = NovaDriver; + type Args = (); - fn new(_dev: &NovaDevice, _size: usize) -> impl PinInit { + fn new(_dev: &NovaDevice, _size: usize, _args: Self::Args) -> impl PinInit { try_pin_init!(NovaObject {}) } } @@ -33,7 +34,7 @@ impl NovaObject { } let aligned_size = page::page_align(size).ok_or(EINVAL)?; - gem::Object::new(dev, aligned_size) + gem::Object::new(dev, aligned_size, ()) } /// Look up a GEM object handle for a `File` and return an `ObjectRef` for it. diff --git a/drivers/gpu/drm/tyr/driver.rs b/drivers/gpu/drm/tyr/driver.rs index beeffe36b6cb..611434641580 100644 --- a/drivers/gpu/drm/tyr/driver.rs +++ b/drivers/gpu/drm/tyr/driver.rs @@ -1,44 +1,56 @@ // SPDX-License-Identifier: GPL-2.0 or MIT -use kernel::clk::Clk; -use kernel::clk::OptionalClk; -use kernel::device::Bound; -use kernel::device::Core; -use kernel::device::Device; -use kernel::devres::Devres; -use kernel::drm; -use kernel::drm::ioctl; -use kernel::io::poll; -use kernel::new_mutex; -use kernel::of; -use kernel::platform; -use kernel::prelude::*; -use kernel::regulator; -use kernel::regulator::Regulator; -use kernel::sizes::SZ_2M; -use kernel::sync::aref::ARef; -use kernel::sync::Arc; -use kernel::sync::Mutex; -use kernel::time; +use kernel::{ + clk::{ + Clk, + OptionalClk, // + }, + device::{ + Bound, + Core, + Device, // + }, + devres::Devres, + drm, + drm::ioctl, + io::poll, + new_mutex, + of, + platform, + prelude::*, + regulator, + regulator::Regulator, + sizes::SZ_2M, + sync::{ + aref::ARef, + Arc, + Mutex, // + }, + time, // +}; -use crate::file::File; -use crate::gem::TyrObject; -use crate::gpu; -use crate::gpu::GpuInfo; -use crate::regs; +use crate::{ + file::TyrDrmFileData, + gem::TyrObject, + gpu, + gpu::GpuInfo, + regs, // +}; pub(crate) type IoMem = kernel::io::mem::IoMem; +pub(crate) struct TyrDrmDriver; + /// Convenience type alias for the DRM device type for this driver. -pub(crate) type TyrDevice = drm::Device; +pub(crate) type TyrDrmDevice = drm::Device; #[pin_data(PinnedDrop)] -pub(crate) struct TyrDriver { - _device: ARef, +pub(crate) struct TyrPlatformDriverData { + _device: ARef, } #[pin_data(PinnedDrop)] -pub(crate) struct TyrData { +pub(crate) struct TyrDrmDeviceData { pub(crate) pdev: ARef, #[pin] @@ -61,9 +73,9 @@ pub(crate) struct TyrData { // that it will be removed in a future patch. // // SAFETY: This will be removed in a future patch. -unsafe impl Send for TyrData {} +unsafe impl Send for TyrDrmDeviceData {} // SAFETY: This will be removed in a future patch. -unsafe impl Sync for TyrData {} +unsafe impl Sync for TyrDrmDeviceData {} fn issue_soft_reset(dev: &Device, iomem: &Devres) -> Result { regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?; @@ -82,14 +94,14 @@ fn issue_soft_reset(dev: &Device, iomem: &Devres) -> Result { kernel::of_device_table!( OF_TABLE, MODULE_OF_TABLE, - ::IdInfo, + ::IdInfo, [ (of::DeviceId::new(c"rockchip,rk3588-mali"), ()), (of::DeviceId::new(c"arm,mali-valhall-csf"), ()) ] ); -impl platform::Driver for TyrDriver { +impl platform::Driver for TyrPlatformDriverData { type IdInfo = (); const OF_ID_TABLE: Option> = Some(&OF_TABLE); @@ -119,7 +131,7 @@ impl platform::Driver for TyrDriver { let platform: ARef = pdev.into(); - let data = try_pin_init!(TyrData { + let data = try_pin_init!(TyrDrmDeviceData { pdev: platform.clone(), clks <- new_mutex!(Clocks { core: core_clk, @@ -133,10 +145,10 @@ impl platform::Driver for TyrDriver { gpu_info, }); - let tdev: ARef = drm::Device::new(pdev.as_ref(), data)?; - drm::driver::Registration::new_foreign_owned(&tdev, pdev.as_ref(), 0)?; + let ddev: ARef = drm::Device::new(pdev.as_ref(), data)?; + drm::driver::Registration::new_foreign_owned(&ddev, pdev.as_ref(), 0)?; - let driver = TyrDriver { _device: tdev }; + let driver = TyrPlatformDriverData { _device: ddev }; // We need this to be dev_info!() because dev_dbg!() does not work at // all in Rust for now, and we need to see whether probe succeeded. @@ -146,12 +158,12 @@ impl platform::Driver for TyrDriver { } #[pinned_drop] -impl PinnedDrop for TyrDriver { +impl PinnedDrop for TyrPlatformDriverData { fn drop(self: Pin<&mut Self>) {} } #[pinned_drop] -impl PinnedDrop for TyrData { +impl PinnedDrop for TyrDrmDeviceData { fn drop(self: Pin<&mut Self>) { // TODO: the type-state pattern for Clks will fix this. let clks = self.clks.lock(); @@ -172,15 +184,15 @@ const INFO: drm::DriverInfo = drm::DriverInfo { }; #[vtable] -impl drm::Driver for TyrDriver { - type Data = TyrData; - type File = File; +impl drm::Driver for TyrDrmDriver { + type Data = TyrDrmDeviceData; + type File = TyrDrmFileData; type Object = drm::gem::Object; const INFO: drm::DriverInfo = INFO; kernel::declare_drm_ioctls! { - (PANTHOR_DEV_QUERY, drm_panthor_dev_query, ioctl::RENDER_ALLOW, File::dev_query), + (PANTHOR_DEV_QUERY, drm_panthor_dev_query, ioctl::RENDER_ALLOW, TyrDrmFileData::dev_query), } } diff --git a/drivers/gpu/drm/tyr/file.rs b/drivers/gpu/drm/tyr/file.rs index 0ef432947b73..31411da203c5 100644 --- a/drivers/gpu/drm/tyr/file.rs +++ b/drivers/gpu/drm/tyr/file.rs @@ -1,37 +1,41 @@ // SPDX-License-Identifier: GPL-2.0 or MIT -use kernel::drm; -use kernel::prelude::*; -use kernel::uaccess::UserSlice; -use kernel::uapi; +use kernel::{ + drm, + prelude::*, + uaccess::UserSlice, + uapi, // +}; -use crate::driver::TyrDevice; -use crate::TyrDriver; +use crate::driver::{ + TyrDrmDevice, + TyrDrmDriver, // +}; #[pin_data] -pub(crate) struct File {} +pub(crate) struct TyrDrmFileData {} /// Convenience type alias for our DRM `File` type -pub(crate) type DrmFile = drm::file::File; +pub(crate) type TyrDrmFile = drm::file::File; -impl drm::file::DriverFile for File { - type Driver = TyrDriver; +impl drm::file::DriverFile for TyrDrmFileData { + type Driver = TyrDrmDriver; fn open(_dev: &drm::Device) -> Result>> { KBox::try_pin_init(try_pin_init!(Self {}), GFP_KERNEL) } } -impl File { +impl TyrDrmFileData { pub(crate) fn dev_query( - tdev: &TyrDevice, + ddev: &TyrDrmDevice, devquery: &mut uapi::drm_panthor_dev_query, - _file: &DrmFile, + _file: &TyrDrmFile, ) -> Result { if devquery.pointer == 0 { match devquery.type_ { uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { - devquery.size = core::mem::size_of_val(&tdev.gpu_info) as u32; + devquery.size = core::mem::size_of_val(&ddev.gpu_info) as u32; Ok(0) } _ => Err(EINVAL), @@ -45,7 +49,7 @@ impl File { ) .writer(); - writer.write(&tdev.gpu_info)?; + writer.write(&ddev.gpu_info)?; Ok(0) } diff --git a/drivers/gpu/drm/tyr/gem.rs b/drivers/gpu/drm/tyr/gem.rs index 1273bf89dbd5..5cc6eb0b5d3f 100644 --- a/drivers/gpu/drm/tyr/gem.rs +++ b/drivers/gpu/drm/tyr/gem.rs @@ -1,18 +1,24 @@ // SPDX-License-Identifier: GPL-2.0 or MIT -use crate::driver::TyrDevice; -use crate::driver::TyrDriver; -use kernel::drm::gem; -use kernel::prelude::*; +use kernel::{ + drm::gem, + prelude::*, // +}; + +use crate::driver::{ + TyrDrmDevice, + TyrDrmDriver, // +}; /// GEM Object inner driver data #[pin_data] pub(crate) struct TyrObject {} impl gem::DriverObject for TyrObject { - type Driver = TyrDriver; + type Driver = TyrDrmDriver; + type Args = (); - fn new(_dev: &TyrDevice, _size: usize) -> impl PinInit { + fn new(_dev: &TyrDrmDevice, _size: usize, _args: ()) -> impl PinInit { try_pin_init!(TyrObject {}) } } diff --git a/drivers/gpu/drm/tyr/gpu.rs b/drivers/gpu/drm/tyr/gpu.rs index 64ca8311d4e8..a88775160f98 100644 --- a/drivers/gpu/drm/tyr/gpu.rs +++ b/drivers/gpu/drm/tyr/gpu.rs @@ -1,20 +1,28 @@ // SPDX-License-Identifier: GPL-2.0 or MIT -use core::ops::Deref; -use core::ops::DerefMut; -use kernel::bits::genmask_u32; -use kernel::device::Bound; -use kernel::device::Device; -use kernel::devres::Devres; -use kernel::io::poll; -use kernel::platform; -use kernel::prelude::*; -use kernel::time::Delta; -use kernel::transmute::AsBytes; -use kernel::uapi; +use core::ops::{ + Deref, + DerefMut, // +}; +use kernel::{ + bits::genmask_u32, + device::{ + Bound, + Device, // + }, + devres::Devres, + io::poll, + platform, + prelude::*, + time::Delta, + transmute::AsBytes, + uapi, // +}; -use crate::driver::IoMem; -use crate::regs; +use crate::{ + driver::IoMem, + regs, // +}; /// Struct containing information that can be queried by userspace. This is read from /// the GPU's registers. @@ -84,13 +92,11 @@ impl GpuInfo { } pub(crate) fn log(&self, pdev: &platform::Device) { - let major = (self.gpu_id >> 16) & 0xff; - let minor = (self.gpu_id >> 8) & 0xff; - let status = self.gpu_id & 0xff; + let gpu_id = GpuId::from(self.gpu_id); let model_name = if let Some(model) = GPU_MODELS .iter() - .find(|&f| f.major == major && f.minor == minor) + .find(|&f| f.arch_major == gpu_id.arch_major && f.prod_major == gpu_id.prod_major) { model.name } else { @@ -102,9 +108,9 @@ impl GpuInfo { "mali-{} id 0x{:x} major 0x{:x} minor 0x{:x} status 0x{:x}", model_name, self.gpu_id >> 16, - major, - minor, - status + gpu_id.ver_major, + gpu_id.ver_minor, + gpu_id.ver_status ); dev_info!( @@ -166,14 +172,14 @@ unsafe impl AsBytes for GpuInfo {} struct GpuModels { name: &'static str, - major: u32, - minor: u32, + arch_major: u32, + prod_major: u32, } const GPU_MODELS: [GpuModels; 1] = [GpuModels { name: "g610", - major: 10, - minor: 7, + arch_major: 10, + prod_major: 7, }]; #[allow(dead_code)] diff --git a/drivers/gpu/drm/tyr/regs.rs b/drivers/gpu/drm/tyr/regs.rs index d3a541cb37c6..611870c2e6af 100644 --- a/drivers/gpu/drm/tyr/regs.rs +++ b/drivers/gpu/drm/tyr/regs.rs @@ -7,12 +7,16 @@ // does. #![allow(dead_code)] -use kernel::bits::bit_u32; -use kernel::device::Bound; -use kernel::device::Device; -use kernel::devres::Devres; -use kernel::io::Io; -use kernel::prelude::*; +use kernel::{ + bits::bit_u32, + device::{ + Bound, + Device, // + }, + devres::Devres, + io::Io, + prelude::*, // +}; use crate::driver::IoMem; diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs index 861d1db43072..9432ddd6b5b8 100644 --- a/drivers/gpu/drm/tyr/tyr.rs +++ b/drivers/gpu/drm/tyr/tyr.rs @@ -5,7 +5,7 @@ //! The name "Tyr" is inspired by Norse mythology, reflecting Arm's tradition of //! naming their GPUs after Nordic mythological figures and places. -use crate::driver::TyrDriver; +use crate::driver::TyrPlatformDriverData; mod driver; mod file; @@ -14,7 +14,7 @@ mod gpu; mod regs; kernel::module_platform_driver! { - type: TyrDriver, + type: TyrPlatformDriverData, name: "tyr", authors: ["The Tyr driver authors"], description: "Arm Mali Tyr DRM driver", diff --git a/drivers/gpu/nova-core/Kconfig b/drivers/gpu/nova-core/Kconfig index 527920f9c4d3..a4f2380654e2 100644 --- a/drivers/gpu/nova-core/Kconfig +++ b/drivers/gpu/nova-core/Kconfig @@ -3,8 +3,8 @@ config NOVA_CORE depends on 64BIT depends on PCI depends on RUST - select RUST_FW_LOADER_ABSTRACTIONS select AUXILIARY_BUS + select RUST_FW_LOADER_ABSTRACTIONS default n help Choose this if you want to build the Nova Core driver for Nvidia diff --git a/drivers/gpu/nova-core/dma.rs b/drivers/gpu/nova-core/dma.rs deleted file mode 100644 index 7215398969da..000000000000 --- a/drivers/gpu/nova-core/dma.rs +++ /dev/null @@ -1,54 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -//! Simple DMA object wrapper. - -use core::ops::{ - Deref, - DerefMut, // -}; - -use kernel::{ - device, - dma::CoherentAllocation, - page::PAGE_SIZE, - prelude::*, // -}; - -pub(crate) struct DmaObject { - dma: CoherentAllocation, -} - -impl DmaObject { - pub(crate) fn new(dev: &device::Device, len: usize) -> Result { - let len = core::alloc::Layout::from_size_align(len, PAGE_SIZE) - .map_err(|_| EINVAL)? - .pad_to_align() - .size(); - let dma = CoherentAllocation::alloc_coherent(dev, len, GFP_KERNEL | __GFP_ZERO)?; - - Ok(Self { dma }) - } - - pub(crate) fn from_data(dev: &device::Device, data: &[u8]) -> Result { - Self::new(dev, data.len()).and_then(|mut dma_obj| { - // SAFETY: We have just allocated the DMA memory, we are the only users and - // we haven't made the device aware of the handle yet. - unsafe { dma_obj.write(data, 0)? } - Ok(dma_obj) - }) - } -} - -impl Deref for DmaObject { - type Target = CoherentAllocation; - - fn deref(&self) -> &Self::Target { - &self.dma - } -} - -impl DerefMut for DmaObject { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.dma - } -} diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index 5a4cc047bcfc..84b0e1703150 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -14,11 +14,20 @@ use kernel::{ }, prelude::*, sizes::SZ_16M, - sync::Arc, // + sync::{ + atomic::{ + Atomic, + Relaxed, // + }, + Arc, + }, }; use crate::gpu::Gpu; +/// Counter for generating unique auxiliary device IDs. +static AUXILIARY_ID_COUNTER: Atomic = Atomic::new(0); + #[pin_data] pub(crate) struct NovaCore { #[pin] @@ -70,7 +79,7 @@ impl pci::Driver for NovaCore { fn probe(pdev: &pci::Device, _info: &Self::IdInfo) -> impl PinInit { pin_init::pin_init_scope(move || { - dev_dbg!(pdev.as_ref(), "Probe Nova Core GPU driver.\n"); + dev_dbg!(pdev, "Probe Nova Core GPU driver.\n"); pdev.enable_device_mem()?; pdev.set_master(); @@ -90,7 +99,9 @@ impl pci::Driver for NovaCore { _reg <- auxiliary::Registration::new( pdev.as_ref(), c"nova-drm", - 0, // TODO[XARR]: Once it lands, use XArray; for now we don't use the ID. + // TODO[XARR]: Use XArray or perhaps IDA for proper ID allocation/recycling. For + // now, use a simple atomic counter that never recycles IDs. + AUXILIARY_ID_COUNTER.fetch_add(1, Relaxed), crate::MODULE_NAME ), })) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 37bfee1d0949..e0315fda576b 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -2,243 +2,135 @@ //! Falcon microprocessor base support -use core::ops::Deref; - use hal::FalconHal; use kernel::{ - device, + device::{ + self, + Device, // + }, dma::{ + Coherent, DmaAddress, DmaMask, // }, - io::poll::read_poll_timeout, + io::{ + poll::read_poll_timeout, + register::{ + RegisterBase, + WithBase, // + }, + Io, + }, prelude::*, sync::aref::ARef, - time::{ - Delta, // - }, + time::Delta, }; use crate::{ - dma::DmaObject, + bounded_enum, driver::Bar0, falcon::hal::LoadMethod, gpu::Chipset, num::{ - FromSafeCast, - IntoSafeCast, // + self, + FromSafeCast, // }, regs, - regs::macros::RegisterBase, // }; pub(crate) mod gsp; mod hal; pub(crate) mod sec2; -// TODO[FPRI]: Replace with `ToPrimitive`. -macro_rules! impl_from_enum_to_u8 { - ($enum_type:ty) => { - impl From<$enum_type> for u8 { - fn from(value: $enum_type) -> Self { - value as u8 - } - } - }; -} +/// Alignment (in bytes) of falcon memory blocks. +pub(crate) const MEM_BLOCK_ALIGNMENT: usize = 256; -/// Revision number of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] -/// register. -#[repr(u8)] -#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) enum FalconCoreRev { - #[default] - Rev1 = 1, - Rev2 = 2, - Rev3 = 3, - Rev4 = 4, - Rev5 = 5, - Rev6 = 6, - Rev7 = 7, -} -impl_from_enum_to_u8!(FalconCoreRev); - -// TODO[FPRI]: replace with `FromPrimitive`. -impl TryFrom for FalconCoreRev { - type Error = Error; - - fn try_from(value: u8) -> Result { - use FalconCoreRev::*; - - let rev = match value { - 1 => Rev1, - 2 => Rev2, - 3 => Rev3, - 4 => Rev4, - 5 => Rev5, - 6 => Rev6, - 7 => Rev7, - _ => return Err(EINVAL), - }; - - Ok(rev) +bounded_enum! { + /// Revision number of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] + /// register. + #[derive(Debug, Copy, Clone)] + pub(crate) enum FalconCoreRev with TryFrom> { + Rev1 = 1, + Rev2 = 2, + Rev3 = 3, + Rev4 = 4, + Rev5 = 5, + Rev6 = 6, + Rev7 = 7, } } -/// Revision subversion number of a falcon core, used in the -/// [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] register. -#[repr(u8)] -#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) enum FalconCoreRevSubversion { - #[default] - Subversion0 = 0, - Subversion1 = 1, - Subversion2 = 2, - Subversion3 = 3, -} -impl_from_enum_to_u8!(FalconCoreRevSubversion); - -// TODO[FPRI]: replace with `FromPrimitive`. -impl TryFrom for FalconCoreRevSubversion { - type Error = Error; - - fn try_from(value: u8) -> Result { - use FalconCoreRevSubversion::*; - - let sub_version = match value & 0b11 { - 0 => Subversion0, - 1 => Subversion1, - 2 => Subversion2, - 3 => Subversion3, - _ => return Err(EINVAL), - }; - - Ok(sub_version) +bounded_enum! { + /// Revision subversion number of a falcon core, used in the + /// [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] register. + #[derive(Debug, Copy, Clone)] + pub(crate) enum FalconCoreRevSubversion with From> { + Subversion0 = 0, + Subversion1 = 1, + Subversion2 = 2, + Subversion3 = 3, } } -/// Security model of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] -/// register. -#[repr(u8)] -#[derive(Debug, Default, Copy, Clone)] -/// Security mode of the Falcon microprocessor. -/// -/// See `falcon.rst` for more details. -pub(crate) enum FalconSecurityModel { - /// Non-Secure: runs unsigned code without privileges. - #[default] - None = 0, - /// Light-Secured (LS): Runs signed code with some privileges. - /// Entry into this mode is only possible from 'Heavy-secure' mode, which verifies the code's - /// signature. +bounded_enum! { + /// Security mode of the Falcon microprocessor. /// - /// Also known as Low-Secure, Privilege Level 2 or PL2. - Light = 2, - /// Heavy-Secured (HS): Runs signed code with full privileges. - /// The code's signature is verified by the Falcon Boot ROM (BROM). - /// - /// Also known as High-Secure, Privilege Level 3 or PL3. - Heavy = 3, -} -impl_from_enum_to_u8!(FalconSecurityModel); - -// TODO[FPRI]: replace with `FromPrimitive`. -impl TryFrom for FalconSecurityModel { - type Error = Error; - - fn try_from(value: u8) -> Result { - use FalconSecurityModel::*; - - let sec_model = match value { - 0 => None, - 2 => Light, - 3 => Heavy, - _ => return Err(EINVAL), - }; - - Ok(sec_model) + /// See `falcon.rst` for more details. + #[derive(Debug, Copy, Clone)] + pub(crate) enum FalconSecurityModel with TryFrom> { + /// Non-Secure: runs unsigned code without privileges. + None = 0, + /// Light-Secured (LS): Runs signed code with some privileges. + /// Entry into this mode is only possible from 'Heavy-secure' mode, which verifies the + /// code's signature. + /// + /// Also known as Low-Secure, Privilege Level 2 or PL2. + Light = 2, + /// Heavy-Secured (HS): Runs signed code with full privileges. + /// The code's signature is verified by the Falcon Boot ROM (BROM). + /// + /// Also known as High-Secure, Privilege Level 3 or PL3. + Heavy = 3, } } -/// Signing algorithm for a given firmware, used in the [`crate::regs::NV_PFALCON2_FALCON_MOD_SEL`] -/// register. It is passed to the Falcon Boot ROM (BROM) as a parameter. -#[repr(u8)] -#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] -pub(crate) enum FalconModSelAlgo { - /// AES. - #[expect(dead_code)] - Aes = 0, - /// RSA3K. - #[default] - Rsa3k = 1, -} -impl_from_enum_to_u8!(FalconModSelAlgo); - -// TODO[FPRI]: replace with `FromPrimitive`. -impl TryFrom for FalconModSelAlgo { - type Error = Error; - - fn try_from(value: u8) -> Result { - match value { - 1 => Ok(FalconModSelAlgo::Rsa3k), - _ => Err(EINVAL), - } +bounded_enum! { + /// Signing algorithm for a given firmware, used in the + /// [`crate::regs::NV_PFALCON2_FALCON_MOD_SEL`] register. It is passed to the Falcon Boot ROM + /// (BROM) as a parameter. + #[derive(Debug, Copy, Clone)] + pub(crate) enum FalconModSelAlgo with TryFrom> { + /// AES. + Aes = 0, + /// RSA3K. + Rsa3k = 1, } } -/// Valid values for the `size` field of the [`crate::regs::NV_PFALCON_FALCON_DMATRFCMD`] register. -#[repr(u8)] -#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] -pub(crate) enum DmaTrfCmdSize { - /// 256 bytes transfer. - #[default] - Size256B = 0x6, -} -impl_from_enum_to_u8!(DmaTrfCmdSize); - -// TODO[FPRI]: replace with `FromPrimitive`. -impl TryFrom for DmaTrfCmdSize { - type Error = Error; - - fn try_from(value: u8) -> Result { - match value { - 0x6 => Ok(Self::Size256B), - _ => Err(EINVAL), - } +bounded_enum! { + /// Valid values for the `size` field of the [`crate::regs::NV_PFALCON_FALCON_DMATRFCMD`] + /// register. + #[derive(Debug, Copy, Clone)] + pub(crate) enum DmaTrfCmdSize with TryFrom> { + /// 256 bytes transfer. + Size256B = 0x6, } } -/// Currently active core on a dual falcon/riscv (Peregrine) controller. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -pub(crate) enum PeregrineCoreSelect { - /// Falcon core is active. - #[default] - Falcon = 0, - /// RISC-V core is active. - Riscv = 1, -} - -impl From for PeregrineCoreSelect { - fn from(value: bool) -> Self { - match value { - false => PeregrineCoreSelect::Falcon, - true => PeregrineCoreSelect::Riscv, - } - } -} - -impl From for bool { - fn from(value: PeregrineCoreSelect) -> Self { - match value { - PeregrineCoreSelect::Falcon => false, - PeregrineCoreSelect::Riscv => true, - } +bounded_enum! { + /// Currently active core on a dual falcon/riscv (Peregrine) controller. + #[derive(Debug, Copy, Clone, PartialEq, Eq)] + pub(crate) enum PeregrineCoreSelect with From> { + /// Falcon core is active. + Falcon = 0, + /// RISC-V core is active. + Riscv = 1, } } /// Different types of memory present in a falcon core. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Copy, Clone, PartialEq, Eq)] pub(crate) enum FalconMem { /// Secure Instruction Memory. ImemSecure, @@ -249,64 +141,29 @@ pub(crate) enum FalconMem { Dmem, } -/// Defines the Framebuffer Interface (FBIF) aperture type. -/// This determines the memory type for external memory access during a DMA transfer, which is -/// performed by the Falcon's Framebuffer DMA (FBDMA) engine. See falcon.rst for more details. -#[derive(Debug, Clone, Default)] -pub(crate) enum FalconFbifTarget { - /// VRAM. - #[default] - /// Local Framebuffer (GPU's VRAM memory). - LocalFb = 0, - /// Coherent system memory (System DRAM). - CoherentSysmem = 1, - /// Non-coherent system memory (System DRAM). - NoncoherentSysmem = 2, -} -impl_from_enum_to_u8!(FalconFbifTarget); - -// TODO[FPRI]: replace with `FromPrimitive`. -impl TryFrom for FalconFbifTarget { - type Error = Error; - - fn try_from(value: u8) -> Result { - let res = match value { - 0 => Self::LocalFb, - 1 => Self::CoherentSysmem, - 2 => Self::NoncoherentSysmem, - _ => return Err(EINVAL), - }; - - Ok(res) +bounded_enum! { + /// Defines the Framebuffer Interface (FBIF) aperture type. + /// This determines the memory type for external memory access during a DMA transfer, which is + /// performed by the Falcon's Framebuffer DMA (FBDMA) engine. See falcon.rst for more details. + #[derive(Debug, Copy, Clone)] + pub(crate) enum FalconFbifTarget with TryFrom> { + /// Local Framebuffer (GPU's VRAM memory). + LocalFb = 0, + /// Coherent system memory (System DRAM). + CoherentSysmem = 1, + /// Non-coherent system memory (System DRAM). + NoncoherentSysmem = 2, } } -/// Type of memory addresses to use. -#[derive(Debug, Clone, Default)] -pub(crate) enum FalconFbifMemType { - /// Virtual memory addresses. - #[default] - Virtual = 0, - /// Physical memory addresses. - Physical = 1, -} - -/// Conversion from a single-bit register field. -impl From for FalconFbifMemType { - fn from(value: bool) -> Self { - match value { - false => Self::Virtual, - true => Self::Physical, - } - } -} - -impl From for bool { - fn from(value: FalconFbifMemType) -> Self { - match value { - FalconFbifMemType::Virtual => false, - FalconFbifMemType::Physical => true, - } +bounded_enum! { + /// Type of memory addresses to use. + #[derive(Debug, Copy, Clone)] + pub(crate) enum FalconFbifMemType with From> { + /// Virtual memory addresses. + Virtual = 0, + /// Physical memory addresses. + Physical = 1, } } @@ -318,18 +175,16 @@ pub(crate) struct PFalcon2Base(()); /// Trait defining the parameters of a given Falcon engine. /// -/// Each engine provides one base for `PFALCON` and `PFALCON2` registers. The `ID` constant is used -/// to identify a given Falcon instance with register I/O methods. +/// Each engine provides one base for `PFALCON` and `PFALCON2` registers. pub(crate) trait FalconEngine: Send + Sync + RegisterBase + RegisterBase + Sized { - /// Singleton of the engine, used to identify it with register I/O methods. - const ID: Self; } -/// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM). +/// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM) +/// using DMA. #[derive(Debug, Clone)] -pub(crate) struct FalconLoadTarget { +pub(crate) struct FalconDmaLoadTarget { /// Offset from the start of the source object to copy from. pub(crate) src_start: u32, /// Offset from the start of the destination memory to copy into. @@ -349,17 +204,149 @@ pub(crate) struct FalconBromParams { pub(crate) ucode_id: u8, } -/// Trait for providing load parameters of falcon firmwares. -pub(crate) trait FalconLoadParams { +/// Trait implemented by falcon firmwares that can be loaded using DMA. +pub(crate) trait FalconDmaLoadable { + /// Returns the firmware data as a slice of bytes. + fn as_slice(&self) -> &[u8]; + /// Returns the load parameters for Secure `IMEM`. - fn imem_sec_load_params(&self) -> FalconLoadTarget; + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget; /// Returns the load parameters for Non-Secure `IMEM`, /// used only on Turing and GA100. - fn imem_ns_load_params(&self) -> Option; + fn imem_ns_load_params(&self) -> Option; /// Returns the load parameters for `DMEM`. - fn dmem_load_params(&self) -> FalconLoadTarget; + fn dmem_load_params(&self) -> FalconDmaLoadTarget; + + /// Returns an adapter that provides the required parameter to load this firmware using PIO. + /// + /// This can only fail if some `u32` fields cannot be converted to `u16`, or if the indices in + /// the headers are invalid. + fn try_as_pio_loadable(&self) -> Result> { + let new_pio_imem = |params: FalconDmaLoadTarget, secure| { + let start = usize::from_safe_cast(params.src_start); + let end = start + usize::from_safe_cast(params.len); + let data = self.as_slice().get(start..end).ok_or(EINVAL)?; + + let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?; + + Ok::<_, Error>(FalconPioImemLoadTarget { + data, + dst_start, + secure, + start_tag: dst_start >> 8, + }) + }; + + let imem_sec = new_pio_imem(self.imem_sec_load_params(), true)?; + + let imem_ns = if let Some(params) = self.imem_ns_load_params() { + Some(new_pio_imem(params, false)?) + } else { + None + }; + + let dmem = { + let params = self.dmem_load_params(); + let start = usize::from_safe_cast(params.src_start); + let end = start + usize::from_safe_cast(params.len); + let data = self.as_slice().get(start..end).ok_or(EINVAL)?; + + let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?; + + FalconPioDmemLoadTarget { data, dst_start } + }; + + Ok(FalconDmaFirmwarePioAdapter { + fw: self, + imem_sec, + imem_ns, + dmem, + }) + } +} + +/// Represents a portion of the firmware to be loaded into IMEM using PIO. +#[derive(Clone)] +pub(crate) struct FalconPioImemLoadTarget<'a> { + pub(crate) data: &'a [u8], + pub(crate) dst_start: u16, + pub(crate) secure: bool, + pub(crate) start_tag: u16, +} + +/// Represents a portion of the firmware to be loaded into DMEM using PIO. +#[derive(Clone)] +pub(crate) struct FalconPioDmemLoadTarget<'a> { + pub(crate) data: &'a [u8], + pub(crate) dst_start: u16, +} + +/// Trait for providing PIO load parameters of falcon firmwares. +pub(crate) trait FalconPioLoadable { + /// Returns the load parameters for Secure `IMEM`, if any. + fn imem_sec_load_params(&self) -> Option>; + + /// Returns the load parameters for Non-Secure `IMEM`, if any. + fn imem_ns_load_params(&self) -> Option>; + + /// Returns the load parameters for `DMEM`. + fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_>; +} + +/// Adapter type that makes any DMA-loadable firmware also loadable via PIO. +/// +/// Created using [`FalconDmaLoadable::try_as_pio_loadable`]. +pub(crate) struct FalconDmaFirmwarePioAdapter<'a, T: FalconDmaLoadable + ?Sized> { + /// Reference to the DMA firmware. + fw: &'a T, + /// Validated secure IMEM parameters. + imem_sec: FalconPioImemLoadTarget<'a>, + /// Validated non-secure IMEM parameters. + imem_ns: Option>, + /// Validated DMEM parameters. + dmem: FalconPioDmemLoadTarget<'a>, +} + +impl<'a, T> FalconPioLoadable for FalconDmaFirmwarePioAdapter<'a, T> +where + T: FalconDmaLoadable + ?Sized, +{ + fn imem_sec_load_params(&self) -> Option> { + Some(self.imem_sec.clone()) + } + + fn imem_ns_load_params(&self) -> Option> { + self.imem_ns.clone() + } + + fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_> { + self.dmem.clone() + } +} + +impl<'a, T> FalconFirmware for FalconDmaFirmwarePioAdapter<'a, T> +where + T: FalconDmaLoadable + FalconFirmware + ?Sized, +{ + type Target = ::Target; + + fn brom_params(&self) -> FalconBromParams { + self.fw.brom_params() + } + + fn boot_addr(&self) -> u32 { + self.fw.boot_addr() + } +} + +/// Trait for a falcon firmware. +/// +/// A falcon firmware can be loaded on a given engine. +pub(crate) trait FalconFirmware { + /// Engine on which this firmware is to be loaded. + type Target: FalconEngine; /// Returns the parameters to write into the BROM registers. fn brom_params(&self) -> FalconBromParams; @@ -368,15 +355,6 @@ pub(crate) trait FalconLoadParams { fn boot_addr(&self) -> u32; } -/// Trait for a falcon firmware. -/// -/// A falcon firmware can be loaded on a given engine, and is presented in the form of a DMA -/// object. -pub(crate) trait FalconFirmware: FalconLoadParams + Deref { - /// Engine on which this firmware is to be loaded. - type Target: FalconEngine; -} - /// Contains the base parameters common to all Falcon instances. pub(crate) struct Falcon { hal: KBox>, @@ -394,8 +372,14 @@ impl Falcon { /// Resets DMA-related registers. pub(crate) fn dma_reset(&self, bar: &Bar0) { - regs::NV_PFALCON_FBIF_CTL::update(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); - regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); + bar.update(regs::NV_PFALCON_FBIF_CTL::of::(), |v| { + v.with_allow_phys_no_ctx(true) + }); + + bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_DMACTL::zeroed(), + ); } /// Reset the controller, select the falcon core, and wait for memory scrubbing to complete. @@ -404,9 +388,111 @@ impl Falcon { self.hal.select_core(self, bar)?; self.hal.reset_wait_mem_scrubbing(bar)?; - regs::NV_PFALCON_FALCON_RM::default() - .set_value(regs::NV_PMC_BOOT_0::read(bar).into()) - .write(bar, &E::ID); + bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_RM::from(bar.read(regs::NV_PMC_BOOT_0).into_raw()), + ); + + Ok(()) + } + + /// Falcons supports up to four ports, but we only ever use one, so just hard-code it. + const PIO_PORT: usize = 0; + + /// Write a slice to Falcon IMEM memory using programmed I/O (PIO). + /// + /// Returns `EINVAL` if `img.len()` is not a multiple of 4. + fn pio_wr_imem_slice(&self, bar: &Bar0, load_offsets: FalconPioImemLoadTarget<'_>) -> Result { + // Rejecting misaligned images here allows us to avoid checking + // inside the loops. + if load_offsets.data.len() % 4 != 0 { + return Err(EINVAL); + } + + bar.write( + WithBase::of::().at(Self::PIO_PORT), + regs::NV_PFALCON_FALCON_IMEMC::zeroed() + .with_secure(load_offsets.secure) + .with_aincw(true) + .with_offs(load_offsets.dst_start), + ); + + for (n, block) in load_offsets.data.chunks(MEM_BLOCK_ALIGNMENT).enumerate() { + let n = u16::try_from(n)?; + let tag: u16 = load_offsets.start_tag.checked_add(n).ok_or(ERANGE)?; + bar.write( + WithBase::of::().at(Self::PIO_PORT), + regs::NV_PFALCON_FALCON_IMEMT::zeroed().with_tag(tag), + ); + for word in block.chunks_exact(4) { + let w = [word[0], word[1], word[2], word[3]]; + bar.write( + WithBase::of::().at(Self::PIO_PORT), + regs::NV_PFALCON_FALCON_IMEMD::zeroed().with_data(u32::from_le_bytes(w)), + ); + } + } + + Ok(()) + } + + /// Write a slice to Falcon DMEM memory using programmed I/O (PIO). + /// + /// Returns `EINVAL` if `img.len()` is not a multiple of 4. + fn pio_wr_dmem_slice(&self, bar: &Bar0, load_offsets: FalconPioDmemLoadTarget<'_>) -> Result { + // Rejecting misaligned images here allows us to avoid checking + // inside the loops. + if load_offsets.data.len() % 4 != 0 { + return Err(EINVAL); + } + + bar.write( + WithBase::of::().at(Self::PIO_PORT), + regs::NV_PFALCON_FALCON_DMEMC::zeroed() + .with_aincw(true) + .with_offs(load_offsets.dst_start), + ); + + for word in load_offsets.data.chunks_exact(4) { + let w = [word[0], word[1], word[2], word[3]]; + bar.write( + WithBase::of::().at(Self::PIO_PORT), + regs::NV_PFALCON_FALCON_DMEMD::zeroed().with_data(u32::from_le_bytes(w)), + ); + } + + Ok(()) + } + + /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. + pub(crate) fn pio_load + FalconPioLoadable>( + &self, + bar: &Bar0, + fw: &F, + ) -> Result { + bar.update(regs::NV_PFALCON_FBIF_CTL::of::(), |v| { + v.with_allow_phys_no_ctx(true) + }); + + bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_DMACTL::zeroed(), + ); + + if let Some(imem_ns) = fw.imem_ns_load_params() { + self.pio_wr_imem_slice(bar, imem_ns)?; + } + if let Some(imem_sec) = fw.imem_sec_load_params() { + self.pio_wr_imem_slice(bar, imem_sec)?; + } + self.pio_wr_dmem_slice(bar, fw.dmem_load_params())?; + + self.hal.program_brom(self, bar, &fw.brom_params())?; + + bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_BOOTVEC::zeroed().with_value(fw.boot_addr()), + ); Ok(()) } @@ -415,14 +501,14 @@ impl Falcon { /// `target_mem`. /// /// `sec` is set if the loaded firmware is expected to run in secure mode. - fn dma_wr>( + fn dma_wr( &self, bar: &Bar0, - fw: &F, + dma_obj: &Coherent<[u8]>, target_mem: FalconMem, - load_offsets: FalconLoadTarget, + load_offsets: FalconDmaLoadTarget, ) -> Result { - const DMA_LEN: u32 = 256; + const DMA_LEN: u32 = num::usize_into_u32::<{ MEM_BLOCK_ALIGNMENT }>(); // For IMEM, we want to use the start offset as a virtual address tag for each page, since // code addresses in the firmware (and the boot vector) are virtual. @@ -430,11 +516,11 @@ impl Falcon { // For DMEM we can fold the start offset into the DMA handle. let (src_start, dma_start) = match target_mem { FalconMem::ImemSecure | FalconMem::ImemNonSecure => { - (load_offsets.src_start, fw.dma_handle()) + (load_offsets.src_start, dma_obj.dma_handle()) } FalconMem::Dmem => ( 0, - fw.dma_handle_with_offset(load_offsets.src_start.into_safe_cast())?, + dma_obj.dma_handle() + DmaAddress::from(load_offsets.src_start), ), }; if dma_start % DmaAddress::from(DMA_LEN) > 0 { @@ -466,7 +552,7 @@ impl Falcon { dev_err!(self.dev, "DMA transfer length overflow\n"); return Err(EOVERFLOW); } - Some(upper_bound) if usize::from_safe_cast(upper_bound) > fw.size() => { + Some(upper_bound) if usize::from_safe_cast(upper_bound) > dma_obj.size() => { dev_err!(self.dev, "DMA transfer goes beyond range of DMA object\n"); return Err(EINVAL); } @@ -475,36 +561,42 @@ impl Falcon { // Set up the base source DMA address. - regs::NV_PFALCON_FALCON_DMATRFBASE::default() - // CAST: `as u32` is used on purpose since we do want to strip the upper bits, which - // will be written to `NV_PFALCON_FALCON_DMATRFBASE1`. - .set_base((dma_start >> 8) as u32) - .write(bar, &E::ID); - regs::NV_PFALCON_FALCON_DMATRFBASE1::default() - // CAST: `as u16` is used on purpose since the remaining bits are guaranteed to fit - // within a `u16`. - .set_base((dma_start >> 40) as u16) - .write(bar, &E::ID); + bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_DMATRFBASE::zeroed().with_base( + // CAST: `as u32` is used on purpose since we do want to strip the upper bits, + // which will be written to `NV_PFALCON_FALCON_DMATRFBASE1`. + (dma_start >> 8) as u32, + ), + ); + bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_DMATRFBASE1::zeroed().try_with_base(dma_start >> 40)?, + ); - let cmd = regs::NV_PFALCON_FALCON_DMATRFCMD::default() - .set_size(DmaTrfCmdSize::Size256B) + let cmd = regs::NV_PFALCON_FALCON_DMATRFCMD::zeroed() + .with_size(DmaTrfCmdSize::Size256B) .with_falcon_mem(target_mem); for pos in (0..num_transfers).map(|i| i * DMA_LEN) { // Perform a transfer of size `DMA_LEN`. - regs::NV_PFALCON_FALCON_DMATRFMOFFS::default() - .set_offs(load_offsets.dst_start + pos) - .write(bar, &E::ID); - regs::NV_PFALCON_FALCON_DMATRFFBOFFS::default() - .set_offs(src_start + pos) - .write(bar, &E::ID); - cmd.write(bar, &E::ID); + bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_DMATRFMOFFS::zeroed() + .try_with_offs(load_offsets.dst_start + pos)?, + ); + bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_DMATRFFBOFFS::zeroed().with_offs(src_start + pos), + ); + + bar.write(WithBase::of::(), cmd); // Wait for the transfer to complete. // TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories // should ever take that long. read_poll_timeout( - || Ok(regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, &E::ID)), + || Ok(bar.read(regs::NV_PFALCON_FALCON_DMATRFCMD::of::())), |r| r.idle(), Delta::ZERO, Delta::from_secs(2), @@ -515,29 +607,36 @@ impl Falcon { } /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. - fn dma_load>(&self, bar: &Bar0, fw: &F) -> Result { - // The Non-Secure section only exists on firmware used by Turing and GA100, and - // those platforms do not use DMA. - if fw.imem_ns_load_params().is_some() { - debug_assert!(false); - return Err(EINVAL); - } + fn dma_load + FalconDmaLoadable>( + &self, + dev: &Device, + bar: &Bar0, + fw: &F, + ) -> Result { + // Create DMA object with firmware content as the source of the DMA engine. + let dma_obj = Coherent::from_slice(dev, fw.as_slice(), GFP_KERNEL)?; self.dma_reset(bar); - regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 0, |v| { - v.set_target(FalconFbifTarget::CoherentSysmem) - .set_mem_type(FalconFbifMemType::Physical) + bar.update(regs::NV_PFALCON_FBIF_TRANSCFG::of::().at(0), |v| { + v.with_target(FalconFbifTarget::CoherentSysmem) + .with_mem_type(FalconFbifMemType::Physical) }); - self.dma_wr(bar, fw, FalconMem::ImemSecure, fw.imem_sec_load_params())?; - self.dma_wr(bar, fw, FalconMem::Dmem, fw.dmem_load_params())?; + self.dma_wr( + bar, + &dma_obj, + FalconMem::ImemSecure, + fw.imem_sec_load_params(), + )?; + self.dma_wr(bar, &dma_obj, FalconMem::Dmem, fw.dmem_load_params())?; self.hal.program_brom(self, bar, &fw.brom_params())?; // Set `BootVec` to start of non-secure code. - regs::NV_PFALCON_FALCON_BOOTVEC::default() - .set_value(fw.boot_addr()) - .write(bar, &E::ID); + bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_BOOTVEC::zeroed().with_value(fw.boot_addr()), + ); Ok(()) } @@ -546,7 +645,7 @@ impl Falcon { pub(crate) fn wait_till_halted(&self, bar: &Bar0) -> Result<()> { // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. read_poll_timeout( - || Ok(regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID)), + || Ok(bar.read(regs::NV_PFALCON_FALCON_CPUCTL::of::())), |r| r.halted(), Delta::ZERO, Delta::from_secs(2), @@ -557,13 +656,18 @@ impl Falcon { /// Start the falcon CPU. pub(crate) fn start(&self, bar: &Bar0) -> Result<()> { - match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID).alias_en() { - true => regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::default() - .set_startcpu(true) - .write(bar, &E::ID), - false => regs::NV_PFALCON_FALCON_CPUCTL::default() - .set_startcpu(true) - .write(bar, &E::ID), + match bar + .read(regs::NV_PFALCON_FALCON_CPUCTL::of::()) + .alias_en() + { + true => bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::zeroed().with_startcpu(true), + ), + false => bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_CPUCTL::zeroed().with_startcpu(true), + ), } Ok(()) @@ -572,26 +676,30 @@ impl Falcon { /// Writes values to the mailbox registers if provided. pub(crate) fn write_mailboxes(&self, bar: &Bar0, mbox0: Option, mbox1: Option) { if let Some(mbox0) = mbox0 { - regs::NV_PFALCON_FALCON_MAILBOX0::default() - .set_value(mbox0) - .write(bar, &E::ID); + bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_MAILBOX0::zeroed().with_value(mbox0), + ); } if let Some(mbox1) = mbox1 { - regs::NV_PFALCON_FALCON_MAILBOX1::default() - .set_value(mbox1) - .write(bar, &E::ID); + bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_MAILBOX1::zeroed().with_value(mbox1), + ); } } /// Reads the value from `mbox0` register. pub(crate) fn read_mailbox0(&self, bar: &Bar0) -> u32 { - regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value() + bar.read(regs::NV_PFALCON_FALCON_MAILBOX0::of::()) + .value() } /// Reads the value from `mbox1` register. pub(crate) fn read_mailbox1(&self, bar: &Bar0) -> u32 { - regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, &E::ID).value() + bar.read(regs::NV_PFALCON_FALCON_MAILBOX1::of::()) + .value() } /// Reads values from both mailbox registers. @@ -640,18 +748,25 @@ impl Falcon { self.hal.is_riscv_active(bar) } - // Load a firmware image into Falcon memory - pub(crate) fn load>(&self, bar: &Bar0, fw: &F) -> Result { + /// Load a firmware image into Falcon memory, using the preferred method for the current + /// chipset. + pub(crate) fn load + FalconDmaLoadable>( + &self, + dev: &Device, + bar: &Bar0, + fw: &F, + ) -> Result { match self.hal.load_method() { - LoadMethod::Dma => self.dma_load(bar, fw), - LoadMethod::Pio => Err(ENOTSUPP), + LoadMethod::Dma => self.dma_load(dev, bar, fw), + LoadMethod::Pio => self.pio_load(bar, &fw.try_as_pio_loadable()?), } } /// Write the application version to the OS register. pub(crate) fn write_os_version(&self, bar: &Bar0, app_version: u32) { - regs::NV_PFALCON_FALCON_OS::default() - .set_value(app_version) - .write(bar, &E::ID); + bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_OS::zeroed().with_value(app_version), + ); } } diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs index 67edef3636c1..df6d5a382c7a 100644 --- a/drivers/gpu/nova-core/falcon/gsp.rs +++ b/drivers/gpu/nova-core/falcon/gsp.rs @@ -1,7 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 use kernel::{ - io::poll::read_poll_timeout, + io::{ + poll::read_poll_timeout, + register::{ + RegisterBase, + WithBase, // + }, + Io, + }, prelude::*, time::Delta, // }; @@ -14,10 +21,7 @@ use crate::{ PFalcon2Base, PFalconBase, // }, - regs::{ - self, - macros::RegisterBase, // - }, + regs, }; /// Type specifying the `Gsp` falcon engine. Cannot be instantiated. @@ -31,23 +35,22 @@ impl RegisterBase for Gsp { const BASE: usize = 0x00111000; } -impl FalconEngine for Gsp { - const ID: Self = Gsp(()); -} +impl FalconEngine for Gsp {} impl Falcon { /// Clears the SWGEN0 bit in the Falcon's IRQ status clear register to /// allow GSP to signal CPU for processing new messages in message queue. pub(crate) fn clear_swgen0_intr(&self, bar: &Bar0) { - regs::NV_PFALCON_FALCON_IRQSCLR::default() - .set_swgen0(true) - .write(bar, &Gsp::ID); + bar.write( + WithBase::of::(), + regs::NV_PFALCON_FALCON_IRQSCLR::zeroed().with_swgen0(true), + ); } /// Checks if GSP reload/resume has completed during the boot process. pub(crate) fn check_reload_completed(&self, bar: &Bar0, timeout: Delta) -> Result { read_poll_timeout( - || Ok(regs::NV_PGC6_BSI_SECURE_SCRATCH_14::read(bar)), + || Ok(bar.read(regs::NV_PGC6_BSI_SECURE_SCRATCH_14)), |val| val.boot_stage_3_handoff(), Delta::ZERO, timeout, diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs index 89babd5f9325..a7e5ea8d0272 100644 --- a/drivers/gpu/nova-core/falcon/hal.rs +++ b/drivers/gpu/nova-core/falcon/hal.rs @@ -58,7 +58,11 @@ pub(crate) trait FalconHal: Send + Sync { /// Reset the falcon engine. fn reset_eng(&self, bar: &Bar0) -> Result; - /// returns the method needed to load data into Falcon memory + /// Returns the method used to load data into the falcon's memory. + /// + /// The only chipsets supporting PIO are those < GA102, and PIO is the preferred method for + /// these. For anything above, the PIO registers appear to be masked to the CPU, so DMA is the + /// only usable method. fn load_method(&self) -> LoadMethod; } diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs index 8f62df10da0a..8368a61ddeef 100644 --- a/drivers/gpu/nova-core/falcon/hal/ga102.rs +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -4,7 +4,14 @@ use core::marker::PhantomData; use kernel::{ device, - io::poll::read_poll_timeout, + io::{ + poll::read_poll_timeout, + register::{ + Array, + WithBase, // + }, + Io, // + }, prelude::*, time::Delta, // }; @@ -25,15 +32,16 @@ use crate::{ use super::FalconHal; fn select_core_ga102(bar: &Bar0) -> Result { - let bcr_ctrl = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); + let bcr_ctrl = bar.read(regs::NV_PRISCV_RISCV_BCR_CTRL::of::()); if bcr_ctrl.core_select() != PeregrineCoreSelect::Falcon { - regs::NV_PRISCV_RISCV_BCR_CTRL::default() - .set_core_select(PeregrineCoreSelect::Falcon) - .write(bar, &E::ID); + bar.write( + WithBase::of::(), + regs::NV_PRISCV_RISCV_BCR_CTRL::zeroed().with_core_select(PeregrineCoreSelect::Falcon), + ); // TIMEOUT: falcon core should take less than 10ms to report being enabled. read_poll_timeout( - || Ok(regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID)), + || Ok(bar.read(regs::NV_PRISCV_RISCV_BCR_CTRL::of::())), |r| r.valid(), Delta::ZERO, Delta::from_millis(10), @@ -60,12 +68,15 @@ fn signature_reg_fuse_version_ga102( // `ucode_idx` is guaranteed to be in the range [0..15], making the `read` calls provable valid // at build-time. - let reg_fuse_version = if engine_id_mask & 0x0001 != 0 { - regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::read(bar, ucode_idx).data() + let reg_fuse_version: u16 = if engine_id_mask & 0x0001 != 0 { + bar.read(regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::at(ucode_idx)) + .data() } else if engine_id_mask & 0x0004 != 0 { - regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::read(bar, ucode_idx).data() + bar.read(regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::at(ucode_idx)) + .data() } else if engine_id_mask & 0x0400 != 0 { - regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::read(bar, ucode_idx).data() + bar.read(regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::at(ucode_idx)) + .data() } else { dev_err!(dev, "unexpected engine_id_mask {:#x}\n", engine_id_mask); return Err(EINVAL); @@ -76,18 +87,23 @@ fn signature_reg_fuse_version_ga102( } fn program_brom_ga102(bar: &Bar0, params: &FalconBromParams) -> Result { - regs::NV_PFALCON2_FALCON_BROM_PARAADDR::default() - .set_value(params.pkc_data_offset) - .write(bar, &E::ID, 0); - regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::default() - .set_value(u32::from(params.engine_id_mask)) - .write(bar, &E::ID); - regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::default() - .set_ucode_id(params.ucode_id) - .write(bar, &E::ID); - regs::NV_PFALCON2_FALCON_MOD_SEL::default() - .set_algo(FalconModSelAlgo::Rsa3k) - .write(bar, &E::ID); + bar.write( + WithBase::of::().at(0), + regs::NV_PFALCON2_FALCON_BROM_PARAADDR::zeroed().with_value(params.pkc_data_offset), + ); + bar.write( + WithBase::of::(), + regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::zeroed() + .with_value(u32::from(params.engine_id_mask)), + ); + bar.write( + WithBase::of::(), + regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::zeroed().with_ucode_id(params.ucode_id), + ); + bar.write( + WithBase::of::(), + regs::NV_PFALCON2_FALCON_MOD_SEL::zeroed().with_algo(FalconModSelAlgo::Rsa3k), + ); Ok(()) } @@ -120,14 +136,14 @@ impl FalconHal for Ga102 { } fn is_riscv_active(&self, bar: &Bar0) -> bool { - let cpuctl = regs::NV_PRISCV_RISCV_CPUCTL::read(bar, &E::ID); - cpuctl.active_stat() + bar.read(regs::NV_PRISCV_RISCV_CPUCTL::of::()) + .active_stat() } fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { // TIMEOUT: memory scrubbing should complete in less than 20ms. read_poll_timeout( - || Ok(regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID)), + || Ok(bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::())), |r| r.mem_scrubbing_done(), Delta::ZERO, Delta::from_millis(20), @@ -136,12 +152,12 @@ impl FalconHal for Ga102 { } fn reset_eng(&self, bar: &Bar0) -> Result { - let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); + let _ = bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::()); // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set // RESET_READY so a non-failing timeout is used. let _ = read_poll_timeout( - || Ok(regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID)), + || Ok(bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::())), |r| r.reset_ready(), Delta::ZERO, Delta::from_micros(150), diff --git a/drivers/gpu/nova-core/falcon/hal/tu102.rs b/drivers/gpu/nova-core/falcon/hal/tu102.rs index 7de6f24cc0a0..c7a90266cb44 100644 --- a/drivers/gpu/nova-core/falcon/hal/tu102.rs +++ b/drivers/gpu/nova-core/falcon/hal/tu102.rs @@ -3,7 +3,11 @@ use core::marker::PhantomData; use kernel::{ - io::poll::read_poll_timeout, + io::{ + poll::read_poll_timeout, + register::WithBase, + Io, // + }, prelude::*, time::Delta, // }; @@ -49,14 +53,14 @@ impl FalconHal for Tu102 { } fn is_riscv_active(&self, bar: &Bar0) -> bool { - let cpuctl = regs::NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS::read(bar, &E::ID); - cpuctl.active_stat() + bar.read(regs::NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS::of::()) + .active_stat() } fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { // TIMEOUT: memory scrubbing should complete in less than 10ms. read_poll_timeout( - || Ok(regs::NV_PFALCON_FALCON_DMACTL::read(bar, &E::ID)), + || Ok(bar.read(regs::NV_PFALCON_FALCON_DMACTL::of::())), |r| r.mem_scrubbing_done(), Delta::ZERO, Delta::from_millis(10), diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs index b57d362e576a..91ec7d49c1f5 100644 --- a/drivers/gpu/nova-core/falcon/sec2.rs +++ b/drivers/gpu/nova-core/falcon/sec2.rs @@ -1,12 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 -use crate::{ - falcon::{ - FalconEngine, - PFalcon2Base, - PFalconBase, // - }, - regs::macros::RegisterBase, +use kernel::io::register::RegisterBase; + +use crate::falcon::{ + FalconEngine, + PFalcon2Base, + PFalconBase, // }; /// Type specifying the `Sec2` falcon engine. Cannot be instantiated. @@ -20,6 +19,4 @@ impl RegisterBase for Sec2 { const BASE: usize = 0x00841000; } -impl FalconEngine for Sec2 { - const ID: Self = Sec2(()); -} +impl FalconEngine for Sec2 {} diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs index c62abcaed547..bdd5eed760e1 100644 --- a/drivers/gpu/nova-core/fb.rs +++ b/drivers/gpu/nova-core/fb.rs @@ -1,9 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 -use core::ops::Range; +use core::ops::{ + Deref, + Range, // +}; use kernel::{ device, + dma::CoherentHandle, + fmt, + io::Io, prelude::*, ptr::{ Alignable, @@ -14,7 +20,6 @@ use kernel::{ }; use crate::{ - dma::DmaObject, driver::Bar0, firmware::gsp::GspFirmware, gpu::Chipset, @@ -48,7 +53,7 @@ pub(crate) struct SysmemFlush { chipset: Chipset, device: ARef, /// Keep the page alive as long as we need it. - page: DmaObject, + page: CoherentHandle, } impl SysmemFlush { @@ -58,7 +63,7 @@ impl SysmemFlush { bar: &Bar0, chipset: Chipset, ) -> Result { - let page = DmaObject::new(dev, kernel::page::PAGE_SIZE)?; + let page = CoherentHandle::alloc(dev, kernel::page::PAGE_SIZE, GFP_KERNEL)?; hal::fb_hal(chipset).write_sysmem_flush_page(bar, page.dma_handle())?; @@ -94,26 +99,77 @@ impl SysmemFlush { } } +pub(crate) struct FbRange(Range); + +impl FbRange { + pub(crate) fn len(&self) -> u64 { + self.0.end - self.0.start + } +} + +impl From> for FbRange { + fn from(range: Range) -> Self { + Self(range) + } +} + +impl Deref for FbRange { + type Target = Range; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl fmt::Debug for FbRange { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Use alternate format ({:#?}) to include size, compact format ({:?}) for just the range. + if f.alternate() { + let size = self.len(); + + if size < usize_as_u64(SZ_1M) { + let size_kib = size / usize_as_u64(SZ_1K); + f.write_fmt(fmt!( + "{:#x}..{:#x} ({} KiB)", + self.0.start, + self.0.end, + size_kib + )) + } else { + let size_mib = size / usize_as_u64(SZ_1M); + f.write_fmt(fmt!( + "{:#x}..{:#x} ({} MiB)", + self.0.start, + self.0.end, + size_mib + )) + } + } else { + f.write_fmt(fmt!("{:#x}..{:#x}", self.0.start, self.0.end)) + } + } +} + /// Layout of the GPU framebuffer memory. /// /// Contains ranges of GPU memory reserved for a given purpose during the GSP boot process. #[derive(Debug)] pub(crate) struct FbLayout { /// Range of the framebuffer. Starts at `0`. - pub(crate) fb: Range, + pub(crate) fb: FbRange, /// VGA workspace, small area of reserved memory at the end of the framebuffer. - pub(crate) vga_workspace: Range, + pub(crate) vga_workspace: FbRange, /// FRTS range. - pub(crate) frts: Range, + pub(crate) frts: FbRange, /// Memory area containing the GSP bootloader image. - pub(crate) boot: Range, + pub(crate) boot: FbRange, /// Memory area containing the GSP firmware image. - pub(crate) elf: Range, + pub(crate) elf: FbRange, /// WPR2 heap. - pub(crate) wpr2_heap: Range, + pub(crate) wpr2_heap: FbRange, /// WPR2 region range, starting with an instance of `GspFwWprMeta`. - pub(crate) wpr2: Range, - pub(crate) heap: Range, + pub(crate) wpr2: FbRange, + pub(crate) heap: FbRange, pub(crate) vf_partition_count: u8, } @@ -125,7 +181,7 @@ impl FbLayout { let fb = { let fb_size = hal.vidmem_size(bar); - 0..fb_size + FbRange(0..fb_size) }; let vga_workspace = { @@ -134,7 +190,10 @@ impl FbLayout { let base = fb.end - NV_PRAMIN_SIZE; if hal.supports_display(bar) { - match regs::NV_PDISP_VGA_WORKSPACE_BASE::read(bar).vga_workspace_addr() { + match bar + .read(regs::NV_PDISP_VGA_WORKSPACE_BASE) + .vga_workspace_addr() + { Some(addr) => { if addr < base { const VBIOS_WORKSPACE_SIZE: u64 = usize_as_u64(SZ_128K); @@ -152,7 +211,7 @@ impl FbLayout { } }; - vga_base..fb.end + FbRange(vga_base..fb.end) }; let frts = { @@ -160,7 +219,7 @@ impl FbLayout { const FRTS_SIZE: u64 = usize_as_u64(SZ_1M); let frts_base = vga_workspace.start.align_down(FRTS_DOWN_ALIGN) - FRTS_SIZE; - frts_base..frts_base + FRTS_SIZE + FbRange(frts_base..frts_base + FRTS_SIZE) }; let boot = { @@ -168,7 +227,7 @@ impl FbLayout { let bootloader_size = u64::from_safe_cast(gsp_fw.bootloader.ucode.size()); let bootloader_base = (frts.start - bootloader_size).align_down(BOOTLOADER_DOWN_ALIGN); - bootloader_base..bootloader_base + bootloader_size + FbRange(bootloader_base..bootloader_base + bootloader_size) }; let elf = { @@ -176,7 +235,7 @@ impl FbLayout { let elf_size = u64::from_safe_cast(gsp_fw.size); let elf_addr = (boot.start - elf_size).align_down(ELF_DOWN_ALIGN); - elf_addr..elf_addr + elf_size + FbRange(elf_addr..elf_addr + elf_size) }; let wpr2_heap = { @@ -185,7 +244,7 @@ impl FbLayout { gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset, fb.end); let wpr2_heap_addr = (elf.start - wpr2_heap_size).align_down(WPR2_HEAP_DOWN_ALIGN); - wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN) + FbRange(wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN)) }; let wpr2 = { @@ -193,13 +252,13 @@ impl FbLayout { let wpr2_addr = (wpr2_heap.start - u64::from_safe_cast(size_of::())) .align_down(WPR2_DOWN_ALIGN); - wpr2_addr..frts.end + FbRange(wpr2_addr..frts.end) }; let heap = { const HEAP_SIZE: u64 = usize_as_u64(SZ_1M); - wpr2.start - HEAP_SIZE..wpr2.start + FbRange(wpr2.start - HEAP_SIZE..wpr2.start) }; Ok(Self { diff --git a/drivers/gpu/nova-core/fb/hal/ga100.rs b/drivers/gpu/nova-core/fb/hal/ga100.rs index e0acc41aa7cd..1c03783cddef 100644 --- a/drivers/gpu/nova-core/fb/hal/ga100.rs +++ b/drivers/gpu/nova-core/fb/hal/ga100.rs @@ -1,6 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::prelude::*; +use kernel::{ + io::Io, + num::Bounded, + prelude::*, // +}; use crate::{ driver::Bar0, @@ -13,26 +17,31 @@ use super::tu102::FLUSH_SYSMEM_ADDR_SHIFT; struct Ga100; pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> u64 { - u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT - | u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::read(bar).adr_63_40()) + u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT + | u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI).adr_63_40()) << FLUSH_SYSMEM_ADDR_SHIFT_HI } pub(super) fn write_sysmem_flush_page_ga100(bar: &Bar0, addr: u64) { - regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::default() - // CAST: `as u32` is used on purpose since the remaining bits are guaranteed to fit within - // a `u32`. - .set_adr_63_40((addr >> FLUSH_SYSMEM_ADDR_SHIFT_HI) as u32) - .write(bar); - regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() - // CAST: `as u32` is used on purpose since we want to strip the upper bits that have been - // written to `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`. - .set_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32) - .write(bar); + bar.write_reg( + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr_63_40( + Bounded::::from(addr) + .shr::() + .cast(), + ), + ); + + bar.write_reg( + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::zeroed() + // CAST: `as u32` is used on purpose since we want to strip the upper bits that have + // been written to `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`. + .with_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32), + ); } pub(super) fn display_enabled_ga100(bar: &Bar0) -> bool { - !regs::ga100::NV_FUSE_STATUS_OPT_DISPLAY::read(bar).display_disabled() + !bar.read(regs::ga100::NV_FUSE_STATUS_OPT_DISPLAY) + .display_disabled() } /// Shift applied to the sysmem address before it is written into diff --git a/drivers/gpu/nova-core/fb/hal/ga102.rs b/drivers/gpu/nova-core/fb/hal/ga102.rs index 734605905031..4b9f0f74d0e7 100644 --- a/drivers/gpu/nova-core/fb/hal/ga102.rs +++ b/drivers/gpu/nova-core/fb/hal/ga102.rs @@ -1,6 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::prelude::*; +use kernel::{ + io::Io, + prelude::*, // +}; use crate::{ driver::Bar0, @@ -9,7 +12,7 @@ use crate::{ }; fn vidmem_size_ga102(bar: &Bar0) -> u64 { - regs::NV_USABLE_FB_SIZE_IN_MB::read(bar).usable_fb_size() + bar.read(regs::NV_USABLE_FB_SIZE_IN_MB).usable_fb_size() } struct Ga102; diff --git a/drivers/gpu/nova-core/fb/hal/tu102.rs b/drivers/gpu/nova-core/fb/hal/tu102.rs index eec984f4e816..281bb796e198 100644 --- a/drivers/gpu/nova-core/fb/hal/tu102.rs +++ b/drivers/gpu/nova-core/fb/hal/tu102.rs @@ -1,6 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::prelude::*; +use kernel::{ + io::Io, + prelude::*, // +}; use crate::{ driver::Bar0, @@ -13,7 +16,7 @@ use crate::{ pub(super) const FLUSH_SYSMEM_ADDR_SHIFT: u32 = 8; pub(super) fn read_sysmem_flush_page_gm107(bar: &Bar0) -> u64 { - u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT + u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT } pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result { @@ -21,18 +24,18 @@ pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result { u32::try_from(addr >> FLUSH_SYSMEM_ADDR_SHIFT) .map_err(|_| EINVAL) .map(|addr| { - regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() - .set_adr_39_08(addr) - .write(bar) + bar.write_reg(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::zeroed().with_adr_39_08(addr)) }) } pub(super) fn display_enabled_gm107(bar: &Bar0) -> bool { - !regs::gm107::NV_FUSE_STATUS_OPT_DISPLAY::read(bar).display_disabled() + !bar.read(regs::gm107::NV_FUSE_STATUS_OPT_DISPLAY) + .display_disabled() } pub(super) fn vidmem_size_gp102(bar: &Bar0) -> u64 { - regs::NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE::read(bar).usable_fb_size() + bar.read(regs::NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE) + .usable_fb_size() } struct Tu102; diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 68779540aa28..6c2ab69cb605 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -15,10 +15,9 @@ use kernel::{ }; use crate::{ - dma::DmaObject, falcon::{ - FalconFirmware, - FalconLoadTarget, // + FalconDmaLoadTarget, + FalconFirmware, // }, gpu, num::{ @@ -64,7 +63,8 @@ pub(crate) struct FalconUCodeDescV2 { pub(crate) interface_offset: u32, /// Base address at which to load the code segment into 'IMEM'. pub(crate) imem_phys_base: u32, - /// Size in bytes of the code to copy into 'IMEM'. + /// Size in bytes of the code to copy into 'IMEM' (includes both secure and non-secure + /// segments). pub(crate) imem_load_size: u32, /// Virtual 'IMEM' address (i.e. 'tag') at which the code should start. pub(crate) imem_virt_base: u32, @@ -171,9 +171,9 @@ pub(crate) trait FalconUCodeDescriptor { ((hdr & HDR_SIZE_MASK) >> HDR_SIZE_SHIFT).into_safe_cast() } - fn imem_sec_load_params(&self) -> FalconLoadTarget; - fn imem_ns_load_params(&self) -> Option; - fn dmem_load_params(&self) -> FalconLoadTarget; + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget; + fn imem_ns_load_params(&self) -> Option; + fn dmem_load_params(&self) -> FalconDmaLoadTarget; } impl FalconUCodeDescriptor for FalconUCodeDescV2 { @@ -205,24 +205,31 @@ impl FalconUCodeDescriptor for FalconUCodeDescV2 { 0 } - fn imem_sec_load_params(&self) -> FalconLoadTarget { - FalconLoadTarget { - src_start: 0, - dst_start: self.imem_sec_base, + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget { + // `imem_sec_base` is the *virtual* start address of the secure IMEM segment, so subtract + // `imem_virt_base` to get its physical offset. + let imem_sec_start = self.imem_sec_base.saturating_sub(self.imem_virt_base); + + FalconDmaLoadTarget { + src_start: imem_sec_start, + dst_start: self.imem_phys_base.saturating_add(imem_sec_start), len: self.imem_sec_size, } } - fn imem_ns_load_params(&self) -> Option { - Some(FalconLoadTarget { + fn imem_ns_load_params(&self) -> Option { + Some(FalconDmaLoadTarget { + // Non-secure code always starts at offset 0. src_start: 0, dst_start: self.imem_phys_base, - len: self.imem_load_size.checked_sub(self.imem_sec_size)?, + // `imem_load_size` includes the size of the secure segment, so subtract it to + // get the correct amount of data to copy. + len: self.imem_load_size.saturating_sub(self.imem_sec_size), }) } - fn dmem_load_params(&self) -> FalconLoadTarget { - FalconLoadTarget { + fn dmem_load_params(&self) -> FalconDmaLoadTarget { + FalconDmaLoadTarget { src_start: self.dmem_offset, dst_start: self.dmem_phys_base, len: self.dmem_load_size, @@ -259,21 +266,23 @@ impl FalconUCodeDescriptor for FalconUCodeDescV3 { self.signature_versions } - fn imem_sec_load_params(&self) -> FalconLoadTarget { - FalconLoadTarget { + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget { + FalconDmaLoadTarget { + // IMEM segment always starts at offset 0. src_start: 0, dst_start: self.imem_phys_base, len: self.imem_load_size, } } - fn imem_ns_load_params(&self) -> Option { + fn imem_ns_load_params(&self) -> Option { // Not used on V3 platforms None } - fn dmem_load_params(&self) -> FalconLoadTarget { - FalconLoadTarget { + fn dmem_load_params(&self) -> FalconDmaLoadTarget { + FalconDmaLoadTarget { + // DMEM segment starts right after the IMEM one. src_start: self.imem_load_size, dst_start: self.dmem_phys_base, len: self.dmem_load_size, @@ -292,7 +301,7 @@ impl SignedState for Unsigned {} struct Signed; impl SignedState for Signed {} -/// A [`DmaObject`] containing a specific microcode ready to be loaded into a falcon. +/// Microcode to be loaded into a specific falcon. /// /// This is module-local and meant for sub-modules to use internally. /// @@ -300,34 +309,35 @@ impl SignedState for Signed {} /// before it can be loaded (with an exception for development hardware). The /// [`Self::patch_signature`] and [`Self::no_patch_signature`] methods are used to transition the /// firmware to its [`Signed`] state. -struct FirmwareDmaObject(DmaObject, PhantomData<(F, S)>); +// TODO: Consider replacing this with a coherent memory object once `CoherentAllocation` supports +// temporary CPU-exclusive access to the object without unsafe methods. +struct FirmwareObject(KVVec, PhantomData<(F, S)>); /// Trait for signatures to be patched directly into a given firmware. /// /// This is module-local and meant for sub-modules to use internally. trait FirmwareSignature: AsRef<[u8]> {} -impl FirmwareDmaObject { - /// Patches the firmware at offset `sig_base_img` with `signature`. +impl FirmwareObject { + /// Patches the firmware at offset `signature_start` with `signature`. fn patch_signature>( mut self, signature: &S, - sig_base_img: usize, - ) -> Result> { + signature_start: usize, + ) -> Result> { let signature_bytes = signature.as_ref(); - if sig_base_img + signature_bytes.len() > self.0.size() { - return Err(EINVAL); - } + let signature_end = signature_start + .checked_add(signature_bytes.len()) + .ok_or(EOVERFLOW)?; + let dst = self + .0 + .get_mut(signature_start..signature_end) + .ok_or(EINVAL)?; - // SAFETY: We are the only user of this object, so there cannot be any race. - let dst = unsafe { self.0.start_ptr_mut().add(sig_base_img) }; + // PANIC: `dst` and `signature_bytes` have the same length. + dst.copy_from_slice(signature_bytes); - // SAFETY: `signature` and `dst` are valid, properly aligned, and do not overlap. - unsafe { - core::ptr::copy_nonoverlapping(signature_bytes.as_ptr(), dst, signature_bytes.len()) - }; - - Ok(FirmwareDmaObject(self.0, PhantomData)) + Ok(FirmwareObject(self.0, PhantomData)) } /// Mark the firmware as signed without patching it. @@ -335,8 +345,8 @@ impl FirmwareDmaObject { /// This method is used to explicitly confirm that we do not need to sign the firmware, while /// allowing us to continue as if it was. This is typically only needed for development /// hardware. - fn no_patch_signature(self) -> FirmwareDmaObject { - FirmwareDmaObject(self.0, PhantomData) + fn no_patch_signature(self) -> FirmwareObject { + FirmwareObject(self.0, PhantomData) } } @@ -394,8 +404,9 @@ impl<'a> BinFirmware<'a> { fn data(&self) -> Option<&[u8]> { let fw_start = usize::from_safe_cast(self.hdr.data_offset); let fw_size = usize::from_safe_cast(self.hdr.data_size); + let fw_end = fw_start.checked_add(fw_size)?; - self.fw.get(fw_start..fw_start + fw_size) + self.fw.get(fw_start..fw_end) } } @@ -416,24 +427,111 @@ impl ModInfoBuilder { ) } - const fn make_entry_chipset(self, chipset: &str) -> Self { - self.make_entry_file(chipset, "booter_load") - .make_entry_file(chipset, "booter_unload") - .make_entry_file(chipset, "bootloader") - .make_entry_file(chipset, "gsp") + const fn make_entry_chipset(self, chipset: gpu::Chipset) -> Self { + let name = chipset.name(); + + let this = self + .make_entry_file(name, "booter_load") + .make_entry_file(name, "booter_unload") + .make_entry_file(name, "bootloader") + .make_entry_file(name, "gsp"); + + if chipset.needs_fwsec_bootloader() { + this.make_entry_file(name, "gen_bootloader") + } else { + this + } } pub(crate) const fn create( - module_name: &'static kernel::str::CStr, + module_name: &'static core::ffi::CStr, ) -> firmware::ModInfoBuilder { let mut this = Self(firmware::ModInfoBuilder::new(module_name)); let mut i = 0; while i < gpu::Chipset::ALL.len() { - this = this.make_entry_chipset(gpu::Chipset::ALL[i].name()); + this = this.make_entry_chipset(gpu::Chipset::ALL[i]); i += 1; } this.0 } } + +/// Ad-hoc and temporary module to extract sections from ELF images. +/// +/// Some firmware images are currently packaged as ELF files, where sections names are used as keys +/// to specific and related bits of data. Future firmware versions are scheduled to move away from +/// that scheme before nova-core becomes stable, which means this module will eventually be +/// removed. +mod elf { + use core::mem::size_of; + + use kernel::{ + bindings, + str::CStr, + transmute::FromBytes, // + }; + + /// Newtype to provide a [`FromBytes`] implementation. + #[repr(transparent)] + struct Elf64Hdr(bindings::elf64_hdr); + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. + unsafe impl FromBytes for Elf64Hdr {} + + #[repr(transparent)] + struct Elf64SHdr(bindings::elf64_shdr); + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. + unsafe impl FromBytes for Elf64SHdr {} + + /// Returns a NULL-terminated string from the ELF image at `offset`. + fn elf_str(elf: &[u8], offset: u64) -> Option<&str> { + let idx = usize::try_from(offset).ok()?; + let bytes = elf.get(idx..)?; + CStr::from_bytes_until_nul(bytes).ok()?.to_str().ok() + } + + /// Tries to extract section with name `name` from the ELF64 image `elf`, and returns it. + pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8], name: &'b str) -> Option<&'a [u8]> { + let hdr = &elf + .get(0..size_of::()) + .and_then(Elf64Hdr::from_bytes)? + .0; + + // Get all the section headers. + let mut shdr = { + let shdr_num = usize::from(hdr.e_shnum); + let shdr_start = usize::try_from(hdr.e_shoff).ok()?; + let shdr_end = shdr_num + .checked_mul(size_of::()) + .and_then(|v| v.checked_add(shdr_start))?; + + elf.get(shdr_start..shdr_end) + .map(|slice| slice.chunks_exact(size_of::()))? + }; + + // Get the strings table. + let strhdr = shdr + .clone() + .nth(usize::from(hdr.e_shstrndx)) + .and_then(Elf64SHdr::from_bytes)?; + + // Find the section which name matches `name` and return it. + shdr.find_map(|sh| { + let hdr = Elf64SHdr::from_bytes(sh)?; + let name_offset = strhdr.0.sh_offset.checked_add(u64::from(hdr.0.sh_name))?; + let section_name = elf_str(elf, name_offset)?; + + if section_name != name { + return None; + } + + let start = usize::try_from(hdr.0.sh_offset).ok()?; + let end = usize::try_from(hdr.0.sh_size) + .ok() + .and_then(|sh_size| start.checked_add(sh_size))?; + + elf.get(start..end) + }) + } +} diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs index 86556cee8e67..de2a4536b532 100644 --- a/drivers/gpu/nova-core/firmware/booter.rs +++ b/drivers/gpu/nova-core/firmware/booter.rs @@ -4,10 +4,7 @@ //! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon //! (and optionally unload it through a separate firmware image). -use core::{ - marker::PhantomData, - ops::Deref, // -}; +use core::marker::PhantomData; use kernel::{ device, @@ -16,19 +13,18 @@ use kernel::{ }; use crate::{ - dma::DmaObject, driver::Bar0, falcon::{ sec2::Sec2, Falcon, FalconBromParams, - FalconFirmware, - FalconLoadParams, - FalconLoadTarget, // + FalconDmaLoadTarget, + FalconDmaLoadable, + FalconFirmware, // }, firmware::{ BinFirmware, - FirmwareDmaObject, + FirmwareObject, FirmwareSignature, Signed, Unsigned, // @@ -43,8 +39,9 @@ use crate::{ /// Local convenience function to return a copy of `S` by reinterpreting the bytes starting at /// `offset` in `slice`. fn frombytes_at(slice: &[u8], offset: usize) -> Result { + let end = offset.checked_add(size_of::()).ok_or(EINVAL)?; slice - .get(offset..offset + size_of::()) + .get(offset..end) .and_then(S::from_bytes_copy) .ok_or(EINVAL) } @@ -119,14 +116,21 @@ impl<'a> HsFirmwareV2<'a> { Some(sig_size) => { let patch_sig = frombytes_at::(self.fw, self.hdr.patch_sig_offset.into_safe_cast())?; - let signatures_start = usize::from_safe_cast(self.hdr.sig_prod_offset + patch_sig); + + let signatures_start = self + .hdr + .sig_prod_offset + .checked_add(patch_sig) + .map(usize::from_safe_cast) + .ok_or(EINVAL)?; + + let signatures_end = signatures_start + .checked_add(usize::from_safe_cast(self.hdr.sig_prod_size)) + .ok_or(EINVAL)?; self.fw // Get signatures range. - .get( - signatures_start - ..signatures_start + usize::from_safe_cast(self.hdr.sig_prod_size), - ) + .get(signatures_start..signatures_end) .ok_or(EINVAL)? .chunks_exact(sig_size.into_safe_cast()) } @@ -252,21 +256,24 @@ impl<'a> FirmwareSignature for BooterSignature<'a> {} /// The `Booter` loader firmware, responsible for loading the GSP. pub(crate) struct BooterFirmware { // Load parameters for Secure `IMEM` falcon memory. - imem_sec_load_target: FalconLoadTarget, + imem_sec_load_target: FalconDmaLoadTarget, // Load parameters for Non-Secure `IMEM` falcon memory, // used only on Turing and GA100 - imem_ns_load_target: Option, + imem_ns_load_target: Option, // Load parameters for `DMEM` falcon memory. - dmem_load_target: FalconLoadTarget, + dmem_load_target: FalconDmaLoadTarget, // BROM falcon parameters. brom_params: FalconBromParams, // Device-mapped firmware image. - ucode: FirmwareDmaObject, + ucode: FirmwareObject, } -impl FirmwareDmaObject { - fn new_booter(dev: &device::Device, data: &[u8]) -> Result { - DmaObject::from_data(dev, data).map(|ucode| Self(ucode, PhantomData)) +impl FirmwareObject { + fn new_booter(data: &[u8]) -> Result { + let mut ucode = KVVec::new(); + ucode.extend_from_slice(data, GFP_KERNEL)?; + + Ok(Self(ucode, PhantomData)) } } @@ -320,7 +327,7 @@ impl BooterFirmware { let ucode = bin_fw .data() .ok_or(EINVAL) - .and_then(|data| FirmwareDmaObject::::new_booter(dev, data))?; + .and_then(FirmwareObject::::new_booter)?; let ucode_signed = { let mut signatures = hs_fw.signatures_iter()?.peekable(); @@ -363,7 +370,7 @@ impl BooterFirmware { let (imem_sec_dst_start, imem_ns_load_target) = if chipset <= Chipset::GA100 { ( app0.offset, - Some(FalconLoadTarget { + Some(FalconDmaLoadTarget { src_start: 0, dst_start: load_hdr.os_code_offset, len: load_hdr.os_code_size, @@ -374,13 +381,13 @@ impl BooterFirmware { }; Ok(Self { - imem_sec_load_target: FalconLoadTarget { + imem_sec_load_target: FalconDmaLoadTarget { src_start: app0.offset, dst_start: imem_sec_dst_start, len: app0.len, }, imem_ns_load_target, - dmem_load_target: FalconLoadTarget { + dmem_load_target: FalconDmaLoadTarget { src_start: load_hdr.os_data_offset, dst_start: 0, len: load_hdr.os_data_size, @@ -391,18 +398,26 @@ impl BooterFirmware { } } -impl FalconLoadParams for BooterFirmware { - fn imem_sec_load_params(&self) -> FalconLoadTarget { +impl FalconDmaLoadable for BooterFirmware { + fn as_slice(&self) -> &[u8] { + self.ucode.0.as_slice() + } + + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget { self.imem_sec_load_target.clone() } - fn imem_ns_load_params(&self) -> Option { + fn imem_ns_load_params(&self) -> Option { self.imem_ns_load_target.clone() } - fn dmem_load_params(&self) -> FalconLoadTarget { + fn dmem_load_params(&self) -> FalconDmaLoadTarget { self.dmem_load_target.clone() } +} + +impl FalconFirmware for BooterFirmware { + type Target = Sec2; fn brom_params(&self) -> FalconBromParams { self.brom_params.clone() @@ -416,15 +431,3 @@ impl FalconLoadParams for BooterFirmware { } } } - -impl Deref for BooterFirmware { - type Target = DmaObject; - - fn deref(&self) -> &Self::Target { - &self.ucode.0 - } -} - -impl FalconFirmware for BooterFirmware { - type Target = Sec2; -} diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index bfb7b06b13d1..8810cb49db67 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -10,10 +10,9 @@ //! - The command to be run, as this firmware can perform several tasks ; //! - The ucode signature, so the GSP falcon can run FWSEC in HS mode. -use core::{ - marker::PhantomData, - ops::Deref, // -}; +pub(crate) mod bootloader; + +use core::marker::PhantomData; use kernel::{ device::{ @@ -28,27 +27,23 @@ use kernel::{ }; use crate::{ - dma::DmaObject, driver::Bar0, falcon::{ gsp::Gsp, Falcon, FalconBromParams, - FalconFirmware, - FalconLoadParams, - FalconLoadTarget, // + FalconDmaLoadTarget, + FalconDmaLoadable, + FalconFirmware, // }, firmware::{ FalconUCodeDesc, - FirmwareDmaObject, + FirmwareObject, FirmwareSignature, Signed, Unsigned, // }, - num::{ - FromSafeCast, - IntoSafeCast, // - }, + num::FromSafeCast, vbios::Vbios, }; @@ -177,63 +172,36 @@ impl AsRef<[u8]> for Bcrt30Rsa3kSignature { impl FirmwareSignature for Bcrt30Rsa3kSignature {} -/// Reinterpret the area starting from `offset` in `fw` as an instance of `T` (which must implement -/// [`FromBytes`]) and return a reference to it. -/// -/// # Safety -/// -/// * Callers must ensure that the device does not read/write to/from memory while the returned -/// reference is live. -/// * Callers must ensure that this call does not race with a write to the same region while -/// the returned reference is live. -unsafe fn transmute(fw: &DmaObject, offset: usize) -> Result<&T> { - // SAFETY: The safety requirements of the function guarantee the device won't read - // or write to memory while the reference is alive and that this call won't race - // with writes to the same memory region. - T::from_bytes(unsafe { fw.as_slice(offset, size_of::())? }).ok_or(EINVAL) -} - -/// Reinterpret the area starting from `offset` in `fw` as a mutable instance of `T` (which must -/// implement [`FromBytes`]) and return a reference to it. -/// -/// # Safety -/// -/// * Callers must ensure that the device does not read/write to/from memory while the returned -/// slice is live. -/// * Callers must ensure that this call does not race with a read or write to the same region -/// while the returned slice is live. -unsafe fn transmute_mut( - fw: &mut DmaObject, - offset: usize, -) -> Result<&mut T> { - // SAFETY: The safety requirements of the function guarantee the device won't read - // or write to memory while the reference is alive and that this call won't race - // with writes or reads to the same memory region. - T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::())? }).ok_or(EINVAL) -} - /// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon. /// /// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow. pub(crate) struct FwsecFirmware { /// Descriptor of the firmware. desc: FalconUCodeDesc, - /// GPU-accessible DMA object containing the firmware. - ucode: FirmwareDmaObject, + /// Object containing the firmware binary. + ucode: FirmwareObject, } -impl FalconLoadParams for FwsecFirmware { - fn imem_sec_load_params(&self) -> FalconLoadTarget { +impl FalconDmaLoadable for FwsecFirmware { + fn as_slice(&self) -> &[u8] { + self.ucode.0.as_slice() + } + + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget { self.desc.imem_sec_load_params() } - fn imem_ns_load_params(&self) -> Option { + fn imem_ns_load_params(&self) -> Option { self.desc.imem_ns_load_params() } - fn dmem_load_params(&self) -> FalconLoadTarget { + fn dmem_load_params(&self) -> FalconDmaLoadTarget { self.desc.dmem_load_params() } +} + +impl FalconFirmware for FwsecFirmware { + type Target = Gsp; fn brom_params(&self) -> FalconBromParams { FalconBromParams { @@ -248,27 +216,23 @@ impl FalconLoadParams for FwsecFirmware { } } -impl Deref for FwsecFirmware { - type Target = DmaObject; - - fn deref(&self) -> &Self::Target { - &self.ucode.0 - } -} - -impl FalconFirmware for FwsecFirmware { - type Target = Gsp; -} - -impl FirmwareDmaObject { - fn new_fwsec(dev: &Device, bios: &Vbios, cmd: FwsecCommand) -> Result { +impl FirmwareObject { + fn new_fwsec(bios: &Vbios, cmd: FwsecCommand) -> Result { let desc = bios.fwsec_image().header()?; - let ucode = bios.fwsec_image().ucode(&desc)?; - let mut dma_object = DmaObject::from_data(dev, ucode)?; + let mut ucode = KVVec::new(); + ucode.extend_from_slice(bios.fwsec_image().ucode(&desc)?, GFP_KERNEL)?; - let hdr_offset = usize::from_safe_cast(desc.imem_load_size() + desc.interface_offset()); - // SAFETY: we have exclusive access to `dma_object`. - let hdr: &FalconAppifHdrV1 = unsafe { transmute(&dma_object, hdr_offset) }?; + let hdr_offset = desc + .imem_load_size() + .checked_add(desc.interface_offset()) + .map(usize::from_safe_cast) + .ok_or(EINVAL)?; + + let hdr = ucode + .get(hdr_offset..) + .and_then(FalconAppifHdrV1::from_bytes_prefix) + .ok_or(EINVAL)? + .0; if hdr.version != 1 { return Err(EINVAL); @@ -276,26 +240,34 @@ impl FirmwareDmaObject { // Find the DMEM mapper section in the firmware. for i in 0..usize::from(hdr.entry_count) { - // SAFETY: we have exclusive access to `dma_object`. - let app: &FalconAppifV1 = unsafe { - transmute( - &dma_object, - hdr_offset + usize::from(hdr.header_size) + i * usize::from(hdr.entry_size), - ) - }?; + // CALC: hdr_offset + header_size + i * entry_size. + let entry_offset = hdr_offset + .checked_add(usize::from(hdr.header_size)) + .and_then(|o| o.checked_add(i.checked_mul(usize::from(hdr.entry_size))?)) + .ok_or(EINVAL)?; + + let app = ucode + .get(entry_offset..) + .and_then(FalconAppifV1::from_bytes_prefix) + .ok_or(EINVAL)? + .0; if app.id != NVFW_FALCON_APPIF_ID_DMEMMAPPER { continue; } let dmem_base = app.dmem_base; - // SAFETY: we have exclusive access to `dma_object`. - let dmem_mapper: &mut FalconAppifDmemmapperV3 = unsafe { - transmute_mut( - &mut dma_object, - (desc.imem_load_size() + dmem_base).into_safe_cast(), - ) - }?; + let dmem_mapper_offset = desc + .imem_load_size() + .checked_add(dmem_base) + .map(usize::from_safe_cast) + .ok_or(EINVAL)?; + + let dmem_mapper = ucode + .get_mut(dmem_mapper_offset..) + .and_then(FalconAppifDmemmapperV3::from_bytes_mut_prefix) + .ok_or(EINVAL)? + .0; dmem_mapper.init_cmd = match cmd { FwsecCommand::Frts { .. } => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS, @@ -303,13 +275,17 @@ impl FirmwareDmaObject { }; let cmd_in_buffer_offset = dmem_mapper.cmd_in_buffer_offset; - // SAFETY: we have exclusive access to `dma_object`. - let frts_cmd: &mut FrtsCmd = unsafe { - transmute_mut( - &mut dma_object, - (desc.imem_load_size() + cmd_in_buffer_offset).into_safe_cast(), - ) - }?; + let frts_cmd_offset = desc + .imem_load_size() + .checked_add(cmd_in_buffer_offset) + .map(usize::from_safe_cast) + .ok_or(EINVAL)?; + + let frts_cmd = ucode + .get_mut(frts_cmd_offset..) + .and_then(FrtsCmd::from_bytes_mut_prefix) + .ok_or(EINVAL)? + .0; frts_cmd.read_vbios = ReadVbios { ver: 1, @@ -333,7 +309,7 @@ impl FirmwareDmaObject { } // Return early as we found and patched the DMEMMAPPER region. - return Ok(Self(dma_object, PhantomData)); + return Ok(Self(ucode, PhantomData)); } Err(ENOTSUPP) @@ -350,13 +326,16 @@ impl FwsecFirmware { bios: &Vbios, cmd: FwsecCommand, ) -> Result { - let ucode_dma = FirmwareDmaObject::::new_fwsec(dev, bios, cmd)?; + let ucode_dma = FirmwareObject::::new_fwsec(bios, cmd)?; // Patch signature if needed. let desc = bios.fwsec_image().header()?; let ucode_signed = if desc.signature_count() != 0 { - let sig_base_img = - usize::from_safe_cast(desc.imem_load_size() + desc.pkc_data_offset()); + let sig_base_img = desc + .imem_load_size() + .checked_add(desc.pkc_data_offset()) + .map(usize::from_safe_cast) + .ok_or(EINVAL)?; let desc_sig_versions = u32::from(desc.signature_versions()); let reg_fuse_version = falcon.signature_reg_fuse_version(bar, desc.engine_id_mask(), desc.ucode_id())?; @@ -408,6 +387,10 @@ impl FwsecFirmware { } /// Loads the FWSEC firmware into `falcon` and execute it. + /// + /// This must only be called on chipsets that do not need the FWSEC bootloader (i.e., where + /// [`Chipset::needs_fwsec_bootloader()`](crate::gpu::Chipset::needs_fwsec_bootloader) returns + /// `false`). On chipsets that do, use [`bootloader::FwsecFirmwareWithBl`] instead. pub(crate) fn run( &self, dev: &Device, @@ -419,7 +402,7 @@ impl FwsecFirmware { .reset(bar) .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; falcon - .load(bar, self) + .load(dev, bar, self) .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; let (mbox0, _) = falcon .boot(bar, Some(0), None) diff --git a/drivers/gpu/nova-core/firmware/fwsec/bootloader.rs b/drivers/gpu/nova-core/firmware/fwsec/bootloader.rs new file mode 100644 index 000000000000..bcb713a868e2 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/fwsec/bootloader.rs @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Bootloader support for the FWSEC firmware. +//! +//! On Turing, the FWSEC firmware is not loaded directly, but is instead loaded through a small +//! bootloader program that performs the required DMA operations. This bootloader itself needs to +//! be loaded using PIO. + +use kernel::{ + alloc::KVec, + device::{ + self, + Device, // + }, + dma::Coherent, + io::{ + register::WithBase, // + Io, + }, + prelude::*, + ptr::{ + Alignable, + Alignment, // + }, + sizes, + transmute::{ + AsBytes, + FromBytes, // + }, +}; + +use crate::{ + driver::Bar0, + falcon::{ + self, + gsp::Gsp, + Falcon, + FalconBromParams, + FalconDmaLoadable, + FalconFbifMemType, + FalconFbifTarget, + FalconFirmware, + FalconPioDmemLoadTarget, + FalconPioImemLoadTarget, + FalconPioLoadable, // + }, + firmware::{ + fwsec::FwsecFirmware, + request_firmware, + BinHdr, + FIRMWARE_VERSION, // + }, + gpu::Chipset, + num::FromSafeCast, + regs, +}; + +/// Descriptor used by RM to figure out the requirements of the boot loader. +/// +/// Most of its fields appear to be legacy and carry incorrect values, so they are left unused. +#[repr(C)] +#[derive(Debug, Clone)] +struct BootloaderDesc { + /// Starting tag of bootloader. + start_tag: u32, + /// DMEM load offset - unused here as we always load at offset `0`. + _dmem_load_off: u32, + /// Offset of code section in the image. Unused as there is only one section in the bootloader + /// binary. + _code_off: u32, + /// Size of code section in the image. + code_size: u32, + /// Offset of data section in the image. Unused as we build the data section ourselves. + _data_off: u32, + /// Size of data section in the image. Unused as we build the data section ourselves. + _data_size: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for BootloaderDesc {} + +/// Structure used by the boot-loader to load the rest of the code. +/// +/// This has to be filled by the GPU driver and copied into DMEM at offset +/// [`BootloaderDesc.dmem_load_off`]. +#[repr(C, packed)] +#[derive(Debug, Clone)] +struct BootloaderDmemDescV2 { + /// Reserved, should always be first element. + reserved: [u32; 4], + /// 16B signature for secure code, 0s if no secure code. + signature: [u32; 4], + /// DMA context used by the bootloader while loading code/data. + ctx_dma: u32, + /// 256B-aligned physical FB address where code is located. + code_dma_base: u64, + /// Offset from `code_dma_base` where the non-secure code is located. + /// + /// Also used as destination IMEM offset of non-secure code as the DMA firmware object is + /// expected to be a mirror image of its loaded state. + /// + /// Must be multiple of 256. + non_sec_code_off: u32, + /// Size of the non-secure code part. + non_sec_code_size: u32, + /// Offset from `code_dma_base` where the secure code is located (must be multiple of 256). + /// + /// Also used as destination IMEM offset of secure code as the DMA firmware object is expected + /// to be a mirror image of its loaded state. + /// + /// Must be multiple of 256. + sec_code_off: u32, + /// Size of the secure code part. + sec_code_size: u32, + /// Code entry point invoked by the bootloader after code is loaded. + code_entry_point: u32, + /// 256B-aligned physical FB address where data is located. + data_dma_base: u64, + /// Size of data block (should be multiple of 256B). + data_size: u32, + /// Number of arguments to be passed to the target firmware being loaded. + argc: u32, + /// Arguments to be passed to the target firmware being loaded. + argv: u32, +} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for BootloaderDmemDescV2 {} + +/// Wrapper for [`FwsecFirmware`] that includes the bootloader performing the actual load +/// operation. +pub(crate) struct FwsecFirmwareWithBl { + /// DMA object the bootloader will copy the firmware from. + _firmware_dma: Coherent<[u8]>, + /// Code of the bootloader to be loaded into non-secure IMEM. + ucode: KVec, + /// Descriptor to be loaded into DMEM for the bootloader to read. + dmem_desc: BootloaderDmemDescV2, + /// Range-validated start offset of the firmware code in IMEM. + imem_dst_start: u16, + /// BROM parameters of the loaded firmware. + brom_params: FalconBromParams, + /// Range-validated `desc.start_tag`. + start_tag: u16, +} + +impl FwsecFirmwareWithBl { + /// Loads the bootloader firmware for `dev` and `chipset`, and wrap `firmware` so it can be + /// loaded using it. + pub(crate) fn new( + firmware: FwsecFirmware, + dev: &Device, + chipset: Chipset, + ) -> Result { + let fw = request_firmware(dev, chipset, "gen_bootloader", FIRMWARE_VERSION)?; + let hdr = fw + .data() + .get(0..size_of::()) + .and_then(BinHdr::from_bytes_copy) + .ok_or(EINVAL)?; + + let desc = { + let desc_offset = usize::from_safe_cast(hdr.header_offset); + + fw.data() + .get(desc_offset..) + .and_then(BootloaderDesc::from_bytes_copy_prefix) + .ok_or(EINVAL)? + .0 + }; + + let ucode = { + let ucode_start = usize::from_safe_cast(hdr.data_offset); + let code_size = usize::from_safe_cast(desc.code_size); + // Align to falcon block size (256 bytes). + let aligned_code_size = code_size + .align_up(Alignment::new::<{ falcon::MEM_BLOCK_ALIGNMENT }>()) + .ok_or(EINVAL)?; + + let mut ucode = KVec::with_capacity(aligned_code_size, GFP_KERNEL)?; + ucode.extend_from_slice( + fw.data() + .get(ucode_start..ucode_start + code_size) + .ok_or(EINVAL)?, + GFP_KERNEL, + )?; + ucode.resize(aligned_code_size, 0, GFP_KERNEL)?; + + ucode + }; + + // `BootloaderDmemDescV2` expects the source to be a mirror image of the destination and + // uses the same offset parameter for both. + // + // Thus, the start of the source object needs to be padded with the difference between the + // destination and source offsets. + // + // In practice, this is expected to always be zero but is required for code correctness. + let (align_padding, firmware_dma) = { + let align_padding = { + let imem_sec = firmware.imem_sec_load_params(); + + imem_sec + .dst_start + .checked_sub(imem_sec.src_start) + .map(usize::from_safe_cast) + .ok_or(EOVERFLOW)? + }; + + let mut firmware_obj = KVVec::new(); + firmware_obj.extend_with(align_padding, 0u8, GFP_KERNEL)?; + firmware_obj.extend_from_slice(firmware.ucode.0.as_slice(), GFP_KERNEL)?; + + ( + align_padding, + Coherent::from_slice(dev, firmware_obj.as_slice(), GFP_KERNEL)?, + ) + }; + + let dmem_desc = { + // Bootloader payload is in non-coherent system memory. + const FALCON_DMAIDX_PHYS_SYS_NCOH: u32 = 4; + + let imem_sec = firmware.imem_sec_load_params(); + let imem_ns = firmware.imem_ns_load_params().ok_or(EINVAL)?; + let dmem = firmware.dmem_load_params(); + + // The bootloader does not have a data destination offset field and copies the data at + // the start of DMEM, so it can only be used if the destination offset of the firmware + // is 0. + if dmem.dst_start != 0 { + return Err(EINVAL); + } + + BootloaderDmemDescV2 { + reserved: [0; 4], + signature: [0; 4], + ctx_dma: FALCON_DMAIDX_PHYS_SYS_NCOH, + code_dma_base: firmware_dma.dma_handle(), + // `dst_start` is also valid as the source offset since the firmware DMA object is + // a mirror image of the target IMEM layout. + non_sec_code_off: imem_ns.dst_start, + non_sec_code_size: imem_ns.len, + // `dst_start` is also valid as the source offset since the firmware DMA object is + // a mirror image of the target IMEM layout. + sec_code_off: imem_sec.dst_start, + sec_code_size: imem_sec.len, + code_entry_point: 0, + // Start of data section is the added padding + the DMEM `src_start` field. + data_dma_base: firmware_dma + .dma_handle() + .checked_add(u64::from_safe_cast(align_padding)) + .and_then(|offset| offset.checked_add(dmem.src_start.into())) + .ok_or(EOVERFLOW)?, + data_size: dmem.len, + argc: 0, + argv: 0, + } + }; + + // The bootloader's code must be loaded in the area right below the first 64K of IMEM. + const BOOTLOADER_LOAD_CEILING: usize = sizes::SZ_64K; + let imem_dst_start = BOOTLOADER_LOAD_CEILING + .checked_sub(ucode.len()) + .ok_or(EOVERFLOW)?; + + Ok(Self { + _firmware_dma: firmware_dma, + ucode, + dmem_desc, + brom_params: firmware.brom_params(), + imem_dst_start: u16::try_from(imem_dst_start)?, + start_tag: u16::try_from(desc.start_tag)?, + }) + } + + /// Loads the bootloader into `falcon` and execute it. + /// + /// The bootloader will load the FWSEC firmware and then execute it. This function returns + /// after FWSEC has reached completion. + pub(crate) fn run( + &self, + dev: &Device, + falcon: &Falcon, + bar: &Bar0, + ) -> Result<()> { + // Reset falcon, load the firmware, and run it. + falcon + .reset(bar) + .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; + falcon + .pio_load(bar, self) + .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; + + // Configure DMA index for the bootloader to fetch the FWSEC firmware from system memory. + bar.update( + regs::NV_PFALCON_FBIF_TRANSCFG::of::() + .try_at(usize::from_safe_cast(self.dmem_desc.ctx_dma)) + .ok_or(EINVAL)?, + |v| { + v.with_target(FalconFbifTarget::CoherentSysmem) + .with_mem_type(FalconFbifMemType::Physical) + }, + ); + + let (mbox0, _) = falcon + .boot(bar, Some(0), None) + .inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware: {:?}\n", e))?; + if mbox0 != 0 { + dev_err!(dev, "FWSEC firmware returned error {}\n", mbox0); + Err(EIO) + } else { + Ok(()) + } + } +} + +impl FalconFirmware for FwsecFirmwareWithBl { + type Target = Gsp; + + fn brom_params(&self) -> FalconBromParams { + self.brom_params.clone() + } + + fn boot_addr(&self) -> u32 { + // On V2 platforms, the boot address is extracted from the generic bootloader, because the + // gbl is what actually copies FWSEC into memory, so that is what needs to be booted. + u32::from(self.start_tag) << 8 + } +} + +impl FalconPioLoadable for FwsecFirmwareWithBl { + fn imem_sec_load_params(&self) -> Option> { + None + } + + fn imem_ns_load_params(&self) -> Option> { + Some(FalconPioImemLoadTarget { + data: self.ucode.as_ref(), + dst_start: self.imem_dst_start, + secure: false, + start_tag: self.start_tag, + }) + } + + fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_> { + FalconPioDmemLoadTarget { + data: self.dmem_desc.as_bytes(), + dst_start: 0, + } + } +} diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs index 9488a626352f..2fcc255c3bc8 100644 --- a/drivers/gpu/nova-core/firmware/gsp.rs +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -3,10 +3,11 @@ use kernel::{ device, dma::{ + Coherent, + CoherentBox, DataDirection, DmaAddress, // }, - kvec, prelude::*, scatterlist::{ Owned, @@ -15,8 +16,10 @@ use kernel::{ }; use crate::{ - dma::DmaObject, - firmware::riscv::RiscvFirmware, + firmware::{ + elf, + riscv::RiscvFirmware, // + }, gpu::{ Architecture, Chipset, // @@ -25,92 +28,6 @@ use crate::{ num::FromSafeCast, }; -/// Ad-hoc and temporary module to extract sections from ELF images. -/// -/// Some firmware images are currently packaged as ELF files, where sections names are used as keys -/// to specific and related bits of data. Future firmware versions are scheduled to move away from -/// that scheme before nova-core becomes stable, which means this module will eventually be -/// removed. -mod elf { - use kernel::{ - bindings, - prelude::*, - transmute::FromBytes, // - }; - - /// Newtype to provide a [`FromBytes`] implementation. - #[repr(transparent)] - struct Elf64Hdr(bindings::elf64_hdr); - // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. - unsafe impl FromBytes for Elf64Hdr {} - - #[repr(transparent)] - struct Elf64SHdr(bindings::elf64_shdr); - // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. - unsafe impl FromBytes for Elf64SHdr {} - - /// Tries to extract section with name `name` from the ELF64 image `elf`, and returns it. - pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8], name: &'b str) -> Option<&'a [u8]> { - let hdr = &elf - .get(0..size_of::()) - .and_then(Elf64Hdr::from_bytes)? - .0; - - // Get all the section headers. - let mut shdr = { - let shdr_num = usize::from(hdr.e_shnum); - let shdr_start = usize::try_from(hdr.e_shoff).ok()?; - let shdr_end = shdr_num - .checked_mul(size_of::()) - .and_then(|v| v.checked_add(shdr_start))?; - - elf.get(shdr_start..shdr_end) - .map(|slice| slice.chunks_exact(size_of::()))? - }; - - // Get the strings table. - let strhdr = shdr - .clone() - .nth(usize::from(hdr.e_shstrndx)) - .and_then(Elf64SHdr::from_bytes)?; - - // Find the section which name matches `name` and return it. - shdr.find(|&sh| { - let Some(hdr) = Elf64SHdr::from_bytes(sh) else { - return false; - }; - - let Some(name_idx) = strhdr - .0 - .sh_offset - .checked_add(u64::from(hdr.0.sh_name)) - .and_then(|idx| usize::try_from(idx).ok()) - else { - return false; - }; - - // Get the start of the name. - elf.get(name_idx..) - .and_then(|nstr| CStr::from_bytes_until_nul(nstr).ok()) - // Convert into str. - .and_then(|c_str| c_str.to_str().ok()) - // Check that the name matches. - .map(|str| str == name) - .unwrap_or(false) - }) - // Return the slice containing the section. - .and_then(|sh| { - let hdr = Elf64SHdr::from_bytes(sh)?; - let start = usize::try_from(hdr.0.sh_offset).ok()?; - let end = usize::try_from(hdr.0.sh_size) - .ok() - .and_then(|sh_size| start.checked_add(sh_size))?; - - elf.get(start..end) - }) - } -} - /// GSP firmware with 3-level radix page tables for the GSP bootloader. /// /// The bootloader expects firmware to be mapped starting at address 0 in GSP's virtual address @@ -136,11 +53,11 @@ pub(crate) struct GspFirmware { #[pin] level1: SGTable>>, /// Level 0 page table (single 4KB page) with one entry: DMA address of first level 1 page. - level0: DmaObject, + level0: Coherent<[u64]>, /// Size in bytes of the firmware contained in [`Self::fw`]. pub(crate) size: usize, /// Device-mapped GSP signatures matching the GPU's [`Chipset`]. - pub(crate) signatures: DmaObject, + pub(crate) signatures: Coherent<[u8]>, /// GSP bootloader, verifies the GSP firmware before loading and running it. pub(crate) bootloader: RiscvFirmware, } @@ -197,17 +114,20 @@ impl GspFirmware { // Allocate the level 0 page table as a device-visible DMA object, and map the // level 1 page table onto it. - // Level 0 page table data. - let mut level0_data = kvec![0u8; GSP_PAGE_SIZE]?; - // Fill level 1 page entry. let level1_entry = level1.iter().next().ok_or(EINVAL)?; let level1_entry_addr = level1_entry.dma_address(); - let dst = &mut level0_data[..size_of_val(&level1_entry_addr)]; - dst.copy_from_slice(&level1_entry_addr.to_le_bytes()); - // Turn the level0 page table into a [`DmaObject`]. - DmaObject::from_data(dev, &level0_data)? + // Create level 0 page table data and fill its first entry with the level 1 + // table. + let mut level0 = CoherentBox::<[u64]>::zeroed_slice( + dev, + GSP_PAGE_SIZE / size_of::(), + GFP_KERNEL + )?; + level0[0] = level1_entry_addr.to_le(); + + level0.into() }, size, signatures: { @@ -226,7 +146,7 @@ impl GspFirmware { elf::elf64_section(firmware.data(), sigs_section) .ok_or(EINVAL) - .and_then(|data| DmaObject::from_data(dev, data))? + .and_then(|data| Coherent::from_slice(dev, data, GFP_KERNEL))? }, bootloader: { let bl = super::request_firmware(dev, chipset, "bootloader", ver)?; diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs index 4bdd89bd0757..2afa7f36404e 100644 --- a/drivers/gpu/nova-core/firmware/riscv.rs +++ b/drivers/gpu/nova-core/firmware/riscv.rs @@ -5,13 +5,13 @@ use kernel::{ device, + dma::Coherent, firmware::Firmware, prelude::*, transmute::FromBytes, // }; use crate::{ - dma::DmaObject, firmware::BinFirmware, num::FromSafeCast, // }; @@ -45,10 +45,11 @@ impl RmRiscvUCodeDesc { /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. fn new(bin_fw: &BinFirmware<'_>) -> Result { let offset = usize::from_safe_cast(bin_fw.hdr.header_offset); + let end = offset.checked_add(size_of::()).ok_or(EINVAL)?; bin_fw .fw - .get(offset..offset + size_of::()) + .get(offset..end) .and_then(Self::from_bytes_copy) .ok_or(EINVAL) } @@ -65,7 +66,7 @@ pub(crate) struct RiscvFirmware { /// Application version. pub(crate) app_version: u32, /// Device-mapped firmware image. - pub(crate) ucode: DmaObject, + pub(crate) ucode: Coherent<[u8]>, } impl RiscvFirmware { @@ -78,8 +79,9 @@ impl RiscvFirmware { let ucode = { let start = usize::from_safe_cast(bin_fw.hdr.data_offset); let len = usize::from_safe_cast(bin_fw.hdr.data_size); + let end = start.checked_add(len).ok_or(EINVAL)?; - DmaObject::from_data(dev, fw.data().get(start..start + len).ok_or(EINVAL)?)? + Coherent::from_slice(dev, fw.data().get(start..end).ok_or(EINVAL)?, GFP_KERNEL)? }; Ok(Self { diff --git a/drivers/gpu/nova-core/gfw.rs b/drivers/gpu/nova-core/gfw.rs index 9121f400046d..fb75dd10a172 100644 --- a/drivers/gpu/nova-core/gfw.rs +++ b/drivers/gpu/nova-core/gfw.rs @@ -19,7 +19,10 @@ //! Note that the devinit sequence also needs to run during suspend/resume. use kernel::{ - io::poll::read_poll_timeout, + io::{ + poll::read_poll_timeout, + Io, // + }, prelude::*, time::Delta, // }; @@ -58,9 +61,11 @@ pub(crate) fn wait_gfw_boot_completion(bar: &Bar0) -> Result { Ok( // Check that FWSEC has lowered its protection level before reading the GFW_BOOT // status. - regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK::read(bar) + bar.read(regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK) .read_protection_level0() - && regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT::read(bar).completed(), + && bar + .read(regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT) + .completed(), ) }, |&gfw_booted| gfw_booted, diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 9b042ef1a308..0f6fe9a1b955 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -4,12 +4,15 @@ use kernel::{ device, devres::Devres, fmt, + io::Io, + num::Bounded, pci, prelude::*, sync::Arc, // }; use crate::{ + bounded_enum, driver::Bar0, falcon::{ gsp::Gsp as GspFalcon, @@ -92,7 +95,7 @@ define_chipset!({ }); impl Chipset { - pub(crate) fn arch(&self) -> Architecture { + pub(crate) const fn arch(self) -> Architecture { match self { Self::TU102 | Self::TU104 | Self::TU106 | Self::TU117 | Self::TU116 => { Architecture::Turing @@ -105,6 +108,13 @@ impl Chipset { } } } + + /// Returns `true` if this chipset requires the PIO-loaded bootloader in order to boot FWSEC. + /// + /// This includes all chipsets < GA102. + pub(crate) const fn needs_fwsec_bootloader(self) -> bool { + matches!(self.arch(), Architecture::Turing) || matches!(self, Self::GA100) + } } // TODO @@ -121,50 +131,26 @@ impl fmt::Display for Chipset { } } -/// Enum representation of the GPU generation. -/// -/// TODO: remove the `Default` trait implementation, and the `#[default]` -/// attribute, once the register!() macro (which creates Architecture items) no -/// longer requires it for read-only fields. -#[derive(fmt::Debug, Default, Copy, Clone)] -#[repr(u8)] -pub(crate) enum Architecture { - #[default] - Turing = 0x16, - Ampere = 0x17, - Ada = 0x19, -} - -impl TryFrom for Architecture { - type Error = Error; - - fn try_from(value: u8) -> Result { - match value { - 0x16 => Ok(Self::Turing), - 0x17 => Ok(Self::Ampere), - 0x19 => Ok(Self::Ada), - _ => Err(ENODEV), - } - } -} - -impl From for u8 { - fn from(value: Architecture) -> Self { - // CAST: `Architecture` is `repr(u8)`, so this cast is always lossless. - value as u8 +bounded_enum! { + /// Enum representation of the GPU generation. + #[derive(fmt::Debug, Copy, Clone)] + pub(crate) enum Architecture with TryFrom> { + Turing = 0x16, + Ampere = 0x17, + Ada = 0x19, } } pub(crate) struct Revision { - major: u8, - minor: u8, + major: Bounded, + minor: Bounded, } impl From for Revision { fn from(boot0: regs::NV_PMC_BOOT_42) -> Self { Self { - major: boot0.major_revision(), - minor: boot0.minor_revision(), + major: boot0.major_revision().cast(), + minor: boot0.minor_revision().cast(), } } } @@ -201,13 +187,13 @@ impl Spec { // from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU. // Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs. - let boot0 = regs::NV_PMC_BOOT_0::read(bar); + let boot0 = bar.read(regs::NV_PMC_BOOT_0); if boot0.is_older_than_fermi() { return Err(ENODEV); } - let boot42 = regs::NV_PMC_BOOT_42::read(bar); + let boot42 = bar.read(regs::NV_PMC_BOOT_42); Spec::try_from(boot42).inspect_err(|_| { dev_err!(dev, "Unsupported chipset: {}\n", boot42); }) @@ -262,13 +248,13 @@ impl Gpu { ) -> impl PinInit + 'a { try_pin_init!(Self { spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| { - dev_info!(pdev.as_ref(),"NVIDIA ({})\n", spec); + dev_info!(pdev,"NVIDIA ({})\n", spec); })?, // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. _: { gfw::wait_gfw_boot_completion(bar) - .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete\n"))?; + .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?; }, sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index c69adaa92bbe..ba5b7f990031 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -3,15 +3,19 @@ mod boot; use kernel::{ + debugfs, device, dma::{ - CoherentAllocation, + Coherent, + CoherentBox, DmaAddress, // }, - dma_write, pci, prelude::*, - transmute::AsBytes, // + transmute::{ + AsBytes, + FromBytes, // + }, // }; pub(crate) mod cmdq; @@ -38,11 +42,15 @@ pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT; /// Number of GSP pages to use in a RM log buffer. const RM_LOG_BUFFER_NUM_PAGES: usize = 0x10; +const LOG_BUFFER_SIZE: usize = RM_LOG_BUFFER_NUM_PAGES * GSP_PAGE_SIZE; /// Array of page table entries, as understood by the GSP bootloader. #[repr(C)] struct PteArray([u64; NUM_ENTRIES]); +/// SAFETY: arrays of `u64` implement `FromBytes` and we are but a wrapper around one. +unsafe impl FromBytes for PteArray {} + /// SAFETY: arrays of `u64` implement `AsBytes` and we are but a wrapper around one. unsafe impl AsBytes for PteArray {} @@ -70,25 +78,18 @@ impl PteArray { /// then pp points to index into the buffer where the next logging entry will /// be written. Therefore, the logging data is valid if: /// 1 <= pp < sizeof(buffer)/sizeof(u64) -struct LogBuffer(CoherentAllocation); +struct LogBuffer(Coherent<[u8; LOG_BUFFER_SIZE]>); impl LogBuffer { /// Creates a new `LogBuffer` mapped on `dev`. fn new(dev: &device::Device) -> Result { - const NUM_PAGES: usize = RM_LOG_BUFFER_NUM_PAGES; - - let mut obj = Self(CoherentAllocation::::alloc_coherent( - dev, - NUM_PAGES * GSP_PAGE_SIZE, - GFP_KERNEL | __GFP_ZERO, - )?); + let obj = Self(Coherent::zeroed(dev, GFP_KERNEL)?); let start_addr = obj.0.dma_handle(); // SAFETY: `obj` has just been created and we are its sole user. let pte_region = unsafe { - obj.0 - .as_slice_mut(size_of::(), NUM_PAGES * size_of::())? + &mut obj.0.as_mut()[size_of::()..][..RM_LOG_BUFFER_NUM_PAGES * size_of::()] }; // Write values one by one to avoid an on-stack instance of `PteArray`. @@ -102,21 +103,28 @@ impl LogBuffer { } } -/// GSP runtime data. -#[pin_data] -pub(crate) struct Gsp { - /// Libos arguments. - pub(crate) libos: CoherentAllocation, +struct LogBuffers { /// Init log buffer. loginit: LogBuffer, /// Interrupts log buffer. logintr: LogBuffer, /// RM log buffer. logrm: LogBuffer, +} + +/// GSP runtime data. +#[pin_data] +pub(crate) struct Gsp { + /// Libos arguments. + pub(crate) libos: Coherent<[LibosMemoryRegionInitArgument]>, + /// Log buffers, optionally exposed via debugfs. + #[pin] + logs: debugfs::Scope, /// Command queue. + #[pin] pub(crate) cmdq: Cmdq, /// RM arguments. - rmargs: CoherentAllocation, + rmargs: Coherent, } impl Gsp { @@ -125,34 +133,52 @@ impl Gsp { pin_init::pin_init_scope(move || { let dev = pdev.as_ref(); + let loginit = LogBuffer::new(dev)?; + let logintr = LogBuffer::new(dev)?; + let logrm = LogBuffer::new(dev)?; + + // Initialise the logging structures. The OpenRM equivalents are in: + // _kgspInitLibosLoggingStructures (allocates memory for buffers) + // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) Ok(try_pin_init!(Self { - libos: CoherentAllocation::::alloc_coherent( - dev, - GSP_PAGE_SIZE / size_of::(), - GFP_KERNEL | __GFP_ZERO, - )?, - loginit: LogBuffer::new(dev)?, - logintr: LogBuffer::new(dev)?, - logrm: LogBuffer::new(dev)?, - cmdq: Cmdq::new(dev)?, - rmargs: CoherentAllocation::::alloc_coherent( - dev, - 1, - GFP_KERNEL | __GFP_ZERO, - )?, - _: { - // Initialise the logging structures. The OpenRM equivalents are in: - // _kgspInitLibosLoggingStructures (allocates memory for buffers) - // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) - dma_write!( - libos, [0]?, LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0) - ); - dma_write!( - libos, [1]?, LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0) - ); - dma_write!(libos, [2]?, LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0)); - dma_write!(rmargs, [0]?.inner, fw::GspArgumentsCached::new(cmdq)); - dma_write!(libos, [3]?, LibosMemoryRegionInitArgument::new("RMARGS", rmargs)); + cmdq <- Cmdq::new(dev), + rmargs: Coherent::init(dev, GFP_KERNEL, GspArgumentsPadded::new(&cmdq))?, + libos: { + let mut libos = CoherentBox::zeroed_slice( + dev, + GSP_PAGE_SIZE / size_of::(), + GFP_KERNEL, + )?; + + libos.init_at(0, LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0))?; + libos.init_at(1, LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0))?; + libos.init_at(2, LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0))?; + libos.init_at(3, LibosMemoryRegionInitArgument::new("RMARGS", rmargs))?; + + libos.into() + }, + logs <- { + let log_buffers = LogBuffers { + loginit, + logintr, + logrm, + }; + + #[allow(static_mut_refs)] + // SAFETY: `DEBUGFS_ROOT` is created before driver registration and cleared + // after driver unregistration, so no probe() can race with its modification. + // + // PANIC: `DEBUGFS_ROOT` cannot be `None` here. It is set before driver + // registration and cleared after driver unregistration, so it is always + // `Some` for the entire lifetime that probe() can be called. + let log_parent: &debugfs::Dir = unsafe { crate::DEBUGFS_ROOT.as_ref() } + .expect("DEBUGFS_ROOT not initialized"); + + log_parent.scope(log_buffers, dev.name(), |logs, dir| { + dir.read_binary_file(c"loginit", &logs.loginit.0); + dir.read_binary_file(c"logintr", &logs.logintr.0); + dir.read_binary_file(c"logrm", &logs.logrm.0); + }) }, })) }) diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 94833f7996e8..6f707b3d1a54 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -2,9 +2,9 @@ use kernel::{ device, - dma::CoherentAllocation, - dma_write, + dma::Coherent, io::poll::read_poll_timeout, + io::Io, pci, prelude::*, time::Delta, // @@ -24,6 +24,7 @@ use crate::{ BooterKind, // }, fwsec::{ + bootloader::FwsecFirmwareWithBl, FwsecCommand, FwsecFirmware, // }, @@ -48,6 +49,7 @@ impl super::Gsp { /// created the WPR2 region. fn run_fwsec_frts( dev: &device::Device, + chipset: Chipset, falcon: &Falcon, bar: &Bar0, bios: &Vbios, @@ -55,7 +57,7 @@ impl super::Gsp { ) -> Result<()> { // Check that the WPR2 region does not already exists - if it does, we cannot run // FWSEC-FRTS until the GPU is reset. - if regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound() != 0 { + if bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound() != 0 { dev_err!( dev, "WPR2 region already exists - GPU needs to be reset to proceed\n" @@ -63,6 +65,7 @@ impl super::Gsp { return Err(EBUSY); } + // FWSEC-FRTS will create the WPR2 region. let fwsec_frts = FwsecFirmware::new( dev, falcon, @@ -70,15 +73,23 @@ impl super::Gsp { bios, FwsecCommand::Frts { frts_addr: fb_layout.frts.start, - frts_size: fb_layout.frts.end - fb_layout.frts.start, + frts_size: fb_layout.frts.len(), }, )?; - // Run FWSEC-FRTS to create the WPR2 region. - fwsec_frts.run(dev, falcon, bar)?; + if chipset.needs_fwsec_bootloader() { + let fwsec_frts_bl = FwsecFirmwareWithBl::new(fwsec_frts, dev, chipset)?; + // Load and run the bootloader, which will load FWSEC-FRTS and run it. + fwsec_frts_bl.run(dev, falcon, bar)?; + } else { + // Load and run FWSEC-FRTS directly. + fwsec_frts.run(dev, falcon, bar)?; + } // SCRATCH_E contains the error code for FWSEC-FRTS. - let frts_status = regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR::read(bar).frts_err_code(); + let frts_status = bar + .read(regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR) + .frts_err_code(); if frts_status != 0 { dev_err!( dev, @@ -91,8 +102,8 @@ impl super::Gsp { // Check that the WPR2 region has been created as we requested. let (wpr2_lo, wpr2_hi) = ( - regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO::read(bar).lower_bound(), - regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound(), + bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO).lower_bound(), + bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound(), ); match (wpr2_lo, wpr2_hi) { @@ -128,7 +139,7 @@ impl super::Gsp { /// /// Upon return, the GSP is up and running, and its runtime object given as return value. pub(crate) fn boot( - mut self: Pin<&mut Self>, + self: Pin<&mut Self>, pdev: &pci::Device, bar: &Bar0, chipset: Chipset, @@ -144,7 +155,7 @@ impl super::Gsp { let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?; dev_dbg!(dev, "{:#x?}\n", fb_layout); - Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; + Self::run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, &fb_layout)?; let booter_loader = BooterFirmware::new( dev, @@ -155,13 +166,12 @@ impl super::Gsp { bar, )?; - let wpr_meta = - CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; - dma_write!(wpr_meta, [0]?, GspFwWprMeta::new(&gsp_fw, &fb_layout)); + let wpr_meta = Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new(&gsp_fw, &fb_layout))?; self.cmdq - .send_command(bar, commands::SetSystemInfo::new(pdev))?; - self.cmdq.send_command(bar, commands::SetRegistry::new())?; + .send_command_no_wait(bar, commands::SetSystemInfo::new(pdev))?; + self.cmdq + .send_command_no_wait(bar, commands::SetRegistry::new())?; gsp_falcon.reset(bar)?; let libos_handle = self.libos.dma_handle(); @@ -170,39 +180,25 @@ impl super::Gsp { Some(libos_handle as u32), Some((libos_handle >> 32) as u32), )?; - dev_dbg!( - pdev.as_ref(), - "GSP MBOX0: {:#x}, MBOX1: {:#x}\n", - mbox0, - mbox1 - ); + dev_dbg!(pdev, "GSP MBOX0: {:#x}, MBOX1: {:#x}\n", mbox0, mbox1); dev_dbg!( - pdev.as_ref(), + pdev, "Using SEC2 to load and run the booter_load firmware...\n" ); sec2_falcon.reset(bar)?; - sec2_falcon.load(bar, &booter_loader)?; + sec2_falcon.load(dev, bar, &booter_loader)?; let wpr_handle = wpr_meta.dma_handle(); let (mbox0, mbox1) = sec2_falcon.boot( bar, Some(wpr_handle as u32), Some((wpr_handle >> 32) as u32), )?; - dev_dbg!( - pdev.as_ref(), - "SEC2 MBOX0: {:#x}, MBOX1{:#x}\n", - mbox0, - mbox1 - ); + dev_dbg!(pdev, "SEC2 MBOX0: {:#x}, MBOX1{:#x}\n", mbox0, mbox1); if mbox0 != 0 { - dev_err!( - pdev.as_ref(), - "Booter-load failed with error {:#x}\n", - mbox0 - ); + dev_err!(pdev, "Booter-load failed with error {:#x}\n", mbox0); return Err(ENODEV); } @@ -216,11 +212,7 @@ impl super::Gsp { Delta::from_secs(5), )?; - dev_dbg!( - pdev.as_ref(), - "RISC-V active? {}\n", - gsp_falcon.is_riscv_active(bar), - ); + dev_dbg!(pdev, "RISC-V active? {}\n", gsp_falcon.is_riscv_active(bar),); // Create and run the GSP sequencer. let seq_params = GspSequencerParams { @@ -231,16 +223,16 @@ impl super::Gsp { dev: pdev.as_ref().into(), bar, }; - GspSequencer::run(&mut self.cmdq, seq_params)?; + GspSequencer::run(&self.cmdq, seq_params)?; // Wait until GSP is fully initialized. - commands::wait_gsp_init_done(&mut self.cmdq)?; + commands::wait_gsp_init_done(&self.cmdq)?; // Obtain and display basic GPU information. - let info = commands::get_gsp_info(&mut self.cmdq, bar)?; + let info = commands::get_gsp_info(&self.cmdq, bar)?; match info.gpu_name() { - Ok(name) => dev_info!(pdev.as_ref(), "GPU name: {}\n", name), - Err(e) => dev_warn!(pdev.as_ref(), "GPU name unavailable: {:?}\n", e), + Ok(name) => dev_info!(pdev, "GPU name: {}\n", name), + Err(e) => dev_warn!(pdev, "GPU name unavailable: {:?}\n", e), } Ok(()) diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs index 03a4f3599849..2224896ccc89 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -1,20 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 -use core::{ - cmp, - mem, // -}; +mod continuation; + +use core::mem; use kernel::{ device, dma::{ - CoherentAllocation, + Coherent, DmaAddress, // }, dma_write, - io::poll::read_poll_timeout, + io::{ + poll::read_poll_timeout, + Io, // + }, + new_mutex, prelude::*, - sync::aref::ARef, + sync::{ + aref::ARef, + Mutex, // + }, time::Delta, transmute::{ AsBytes, @@ -22,6 +28,13 @@ use kernel::{ }, }; +use continuation::{ + ContinuationRecord, + SplitState, // +}; + +use pin_init::pin_init_scope; + use crate::{ driver::Bar0, gsp::{ @@ -29,7 +42,8 @@ use crate::{ GspMsgElement, MsgFunction, MsgqRxHeader, - MsgqTxHeader, // + MsgqTxHeader, + GSP_MSG_QUEUE_ELEMENT_SIZE_MAX, // }, PteArray, GSP_PAGE_SHIFT, @@ -40,10 +54,14 @@ use crate::{ sbuffer::SBufferIter, // }; +/// Marker type representing the absence of a reply for a command. Commands using this as their +/// reply type are sent using [`Cmdq::send_command_no_wait`]. +pub(crate) struct NoReply; + /// Trait implemented by types representing a command to send to the GSP. /// -/// The main purpose of this trait is to provide [`Cmdq::send_command`] with the information it -/// needs to send a given command. +/// The main purpose of this trait is to provide [`Cmdq`] with the information it needs to send +/// a given command. /// /// [`CommandToGsp::init`] in particular is responsible for initializing the command directly /// into the space reserved for it in the command queue buffer. @@ -58,6 +76,10 @@ pub(crate) trait CommandToGsp { /// Type generated by [`CommandToGsp::init`], to be written into the command queue buffer. type Command: FromBytes + AsBytes; + /// Type of the reply expected from the GSP, or [`NoReply`] for commands that don't + /// have a reply. + type Reply; + /// Error type returned by [`CommandToGsp::init`]. type InitError; @@ -90,6 +112,12 @@ pub(crate) trait CommandToGsp { ) -> Result { Ok(()) } + + /// Total size of the command (including its variable-length payload) without the + /// [`GspMsgElement`] header. + fn size(&self) -> usize { + size_of::() + self.variable_payload_len() + } } /// Trait representing messages received from the GSP. @@ -159,12 +187,14 @@ pub(super) struct GspMem { /// Self-mapping page table entries. ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>, /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the - /// write and read pointers that the CPU updates. + /// write and read pointers that the CPU updates. This means that the read pointer here is an + /// index into the GSP queue. /// /// This member is read-only for the GSP. pub(super) cpuq: Msgq, /// GSP queue: the GSP writes messages here, and the driver reads them. It also contains the - /// write and read pointers that the GSP updates. + /// write and read pointers that the GSP updates. This means that the read pointer here is an + /// index into the CPU queue. /// /// This member is read-only for the driver. pub(super) gspq: Msgq, @@ -182,7 +212,7 @@ unsafe impl AsBytes for GspMem {} // that is not a problem because they are not used outside the kernel. unsafe impl FromBytes for GspMem {} -/// Wrapper around [`GspMem`] to share it with the GPU using a [`CoherentAllocation`]. +/// Wrapper around [`GspMem`] to share it with the GPU using a [`Coherent`]. /// /// This provides the low-level functionality to communicate with the GSP, including allocation of /// queue space to write messages to and management of read/write pointers. @@ -193,7 +223,7 @@ unsafe impl FromBytes for GspMem {} /// pointer and the GSP read pointer. This region is returned by [`Self::driver_write_area`]. /// * The driver owns (i.e. can read from) the part of the GSP message queue between the CPU read /// pointer and the GSP write pointer. This region is returned by [`Self::driver_read_area`]. -struct DmaGspMem(CoherentAllocation); +struct DmaGspMem(Coherent); impl DmaGspMem { /// Allocate a new instance and map it for `dev`. @@ -201,21 +231,20 @@ impl DmaGspMem { const MSGQ_SIZE: u32 = num::usize_into_u32::<{ size_of::() }>(); const RX_HDR_OFF: u32 = num::usize_into_u32::<{ mem::offset_of!(Msgq, rx) }>(); - let gsp_mem = - CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; + let gsp_mem = Coherent::::zeroed(dev, GFP_KERNEL)?; let start = gsp_mem.dma_handle(); // Write values one by one to avoid an on-stack instance of `PteArray`. for i in 0..GspMem::PTE_ARRAY_SIZE { - dma_write!(gsp_mem, [0]?.ptes.0[i], PteArray::<0>::entry(start, i)?); + dma_write!(gsp_mem, .ptes.0[i], PteArray::<0>::entry(start, i)?); } dma_write!( gsp_mem, - [0]?.cpuq.tx, + .cpuq.tx, MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES) ); - dma_write!(gsp_mem, [0]?.cpuq.rx, MsgqRxHeader::new()); + dma_write!(gsp_mem, .cpuq.rx, MsgqRxHeader::new()); Ok(Self(gsp_mem)) } @@ -230,31 +259,49 @@ impl DmaGspMem { let rx = self.gsp_read_ptr() as usize; // SAFETY: - // - The `CoherentAllocation` contains exactly one object. // - We will only access the driver-owned part of the shared memory. // - Per the safety statement of the function, no concurrent access will be performed. - let gsp_mem = &mut unsafe { self.0.as_slice_mut(0, 1) }.unwrap()[0]; - // PANIC: per the invariant of `cpu_write_ptr`, `tx` is `<= MSGQ_NUM_PAGES`. + let gsp_mem = unsafe { &mut *self.0.as_mut() }; + // PANIC: per the invariant of `cpu_write_ptr`, `tx` is `< MSGQ_NUM_PAGES`. let (before_tx, after_tx) = gsp_mem.cpuq.msgq.data.split_at_mut(tx); - if rx <= tx { - // The area from `tx` up to the end of the ring, and from the beginning of the ring up - // to `rx`, minus one unit, belongs to the driver. - if rx == 0 { - let last = after_tx.len() - 1; - (&mut after_tx[..last], &mut before_tx[0..0]) - } else { - (after_tx, &mut before_tx[..rx]) - } + // The area starting at `tx` and ending at `rx - 2` modulo MSGQ_NUM_PAGES, inclusive, + // belongs to the driver for writing. + + if rx == 0 { + // Since `rx` is zero, leave an empty slot at end of the buffer. + let last = after_tx.len() - 1; + (&mut after_tx[..last], &mut []) + } else if rx <= tx { + // The area is discontiguous and we leave an empty slot before `rx`. + // PANIC: + // - The index `rx - 1` is non-negative because `rx != 0` in this branch. + // - The index does not exceed `before_tx.len()` (which equals `tx`) because + // `rx <= tx` in this branch. + (after_tx, &mut before_tx[..(rx - 1)]) } else { - // The area from `tx` to `rx`, minus one unit, belongs to the driver. - // - // PANIC: per the invariants of `cpu_write_ptr` and `gsp_read_ptr`, `rx` and `tx` are - // `<= MSGQ_NUM_PAGES`, and the test above ensured that `rx > tx`. - (after_tx.split_at_mut(rx - tx).0, &mut before_tx[0..0]) + // The area is contiguous and we leave an empty slot before `rx`. + // PANIC: + // - The index `rx - tx - 1` is non-negative because `rx > tx` in this branch. + // - The index does not exceed `after_tx.len()` (which is `MSGQ_NUM_PAGES - tx`) + // because `rx < MSGQ_NUM_PAGES` by the `gsp_read_ptr` invariant. + (&mut after_tx[..(rx - tx - 1)], &mut []) } } + /// Returns the size of the region of the CPU message queue that the driver is currently allowed + /// to write to, in bytes. + fn driver_write_area_size(&self) -> usize { + let tx = self.cpu_write_ptr(); + let rx = self.gsp_read_ptr(); + + // `rx` and `tx` are both in `0..MSGQ_NUM_PAGES` per the invariants of `gsp_read_ptr` and + // `cpu_write_ptr`. The minimum value case is where `rx == 0` and `tx == MSGQ_NUM_PAGES - + // 1`, which gives `0 + MSGQ_NUM_PAGES - (MSGQ_NUM_PAGES - 1) - 1 == 0`. + let slots = (rx + MSGQ_NUM_PAGES - tx - 1) % MSGQ_NUM_PAGES; + num::u32_as_usize(slots) * GSP_PAGE_SIZE + } + /// Returns the region of the GSP message queue that the driver is currently allowed to read /// from. /// @@ -265,30 +312,46 @@ impl DmaGspMem { let rx = self.cpu_read_ptr() as usize; // SAFETY: - // - The `CoherentAllocation` contains exactly one object. // - We will only access the driver-owned part of the shared memory. // - Per the safety statement of the function, no concurrent access will be performed. - let gsp_mem = &unsafe { self.0.as_slice(0, 1) }.unwrap()[0]; - // PANIC: per the invariant of `cpu_read_ptr`, `xx` is `<= MSGQ_NUM_PAGES`. - let (before_rx, after_rx) = gsp_mem.gspq.msgq.data.split_at(rx); + let gsp_mem = unsafe { &*self.0.as_ptr() }; + let data = &gsp_mem.gspq.msgq.data; - match tx.cmp(&rx) { - cmp::Ordering::Equal => (&after_rx[0..0], &after_rx[0..0]), - cmp::Ordering::Greater => (&after_rx[..tx], &before_rx[0..0]), - cmp::Ordering::Less => (after_rx, &before_rx[..tx]), + // The area starting at `rx` and ending at `tx - 1` modulo MSGQ_NUM_PAGES, inclusive, + // belongs to the driver for reading. + // PANIC: + // - per the invariant of `cpu_read_ptr`, `rx < MSGQ_NUM_PAGES` + // - per the invariant of `gsp_write_ptr`, `tx < MSGQ_NUM_PAGES` + if rx <= tx { + // The area is contiguous. + (&data[rx..tx], &[]) + } else { + // The area is discontiguous. + (&data[rx..], &data[..tx]) } } /// Allocates a region on the command queue that is large enough to send a command of `size` - /// bytes. + /// bytes, waiting for space to become available based on the provided timeout. /// /// This returns a [`GspCommand`] ready to be written to by the caller. /// /// # Errors /// - /// - `EAGAIN` if the driver area is too small to hold the requested command. + /// - `EMSGSIZE` if the command is larger than [`GSP_MSG_QUEUE_ELEMENT_SIZE_MAX`]. + /// - `ETIMEDOUT` if space does not become available within the timeout. /// - `EIO` if the command header is not properly aligned. - fn allocate_command(&mut self, size: usize) -> Result> { + fn allocate_command(&mut self, size: usize, timeout: Delta) -> Result> { + if size_of::() + size > GSP_MSG_QUEUE_ELEMENT_SIZE_MAX { + return Err(EMSGSIZE); + } + read_poll_timeout( + || Ok(self.driver_write_area_size()), + |available_bytes| *available_bytes >= size_of::() + size, + Delta::from_micros(1), + timeout, + )?; + // Get the current writable area as an array of bytes. let (slice_1, slice_2) = { let (slice_1, slice_2) = self.driver_write_area(); @@ -297,13 +360,6 @@ impl DmaGspMem { (slice_1.as_flattened_mut(), slice_2.as_flattened_mut()) }; - // If the GSP is still processing previous messages the shared region - // may be full in which case we will have to retry once the GSP has - // processed the existing commands. - if size_of::() + size > slice_1.len() + slice_2.len() { - return Err(EAGAIN); - } - // Extract area for the `GspMsgElement`. let (header, slice_1) = GspMsgElement::from_bytes_mut_prefix(slice_1).ok_or(EIO)?; @@ -327,7 +383,7 @@ impl DmaGspMem { // // # Invariants // - // - The returned value is between `0` and `MSGQ_NUM_PAGES`. + // - The returned value is within `0..MSGQ_NUM_PAGES`. fn gsp_write_ptr(&self) -> u32 { super::fw::gsp_mem::gsp_write_ptr(&self.0) } @@ -336,7 +392,7 @@ impl DmaGspMem { // // # Invariants // - // - The returned value is between `0` and `MSGQ_NUM_PAGES`. + // - The returned value is within `0..MSGQ_NUM_PAGES`. fn gsp_read_ptr(&self) -> u32 { super::fw::gsp_mem::gsp_read_ptr(&self.0) } @@ -345,7 +401,7 @@ impl DmaGspMem { // // # Invariants // - // - The returned value is between `0` and `MSGQ_NUM_PAGES`. + // - The returned value is within `0..MSGQ_NUM_PAGES`. fn cpu_read_ptr(&self) -> u32 { super::fw::gsp_mem::cpu_read_ptr(&self.0) } @@ -359,7 +415,7 @@ impl DmaGspMem { // // # Invariants // - // - The returned value is between `0` and `MSGQ_NUM_PAGES`. + // - The returned value is within `0..MSGQ_NUM_PAGES`. fn cpu_write_ptr(&self) -> u32 { super::fw::gsp_mem::cpu_write_ptr(&self.0) } @@ -396,13 +452,13 @@ struct GspMessage<'a> { /// /// Provides the ability to send commands and receive messages from the GSP using a shared memory /// area. +#[pin_data] pub(crate) struct Cmdq { - /// Device this command queue belongs to. - dev: ARef, - /// Current command sequence number. - seq: u32, - /// Memory area shared with the GSP for communicating commands and messages. - gsp_mem: DmaGspMem, + /// Inner mutex-protected state. + #[pin] + inner: Mutex, + /// DMA handle of the command queue's shared memory region. + pub(super) dma_handle: DmaAddress, } impl Cmdq { @@ -422,14 +478,22 @@ impl Cmdq { /// Number of page table entries for the GSP shared region. pub(crate) const NUM_PTES: usize = size_of::() >> GSP_PAGE_SHIFT; - /// Creates a new command queue for `dev`. - pub(crate) fn new(dev: &device::Device) -> Result { - let gsp_mem = DmaGspMem::new(dev)?; + /// Default timeout for receiving a message from the GSP. + pub(super) const RECEIVE_TIMEOUT: Delta = Delta::from_secs(5); - Ok(Cmdq { - dev: dev.into(), - seq: 0, - gsp_mem, + /// Creates a new command queue for `dev`. + pub(crate) fn new(dev: &device::Device) -> impl PinInit + '_ { + pin_init_scope(move || { + let gsp_mem = DmaGspMem::new(dev)?; + + Ok(try_pin_init!(Self { + dma_handle: gsp_mem.0.dma_handle(), + inner <- new_mutex!(CmdqInner { + dev: dev.into(), + gsp_mem, + seq: 0, + }), + })) }) } @@ -448,34 +512,115 @@ impl Cmdq { /// Notifies the GSP that we have updated the command queue pointers. fn notify_gsp(bar: &Bar0) { - regs::NV_PGSP_QUEUE_HEAD::default() - .set_address(0) - .write(bar); + bar.write_reg(regs::NV_PGSP_QUEUE_HEAD::zeroed().with_address(0u32)); } - /// Sends `command` to the GSP. + /// Sends `command` to the GSP and waits for the reply. + /// + /// Messages with non-matching function codes are silently consumed until the expected reply + /// arrives. + /// + /// The queue is locked for the entire send+receive cycle to ensure that no other command can + /// be interleaved. /// /// # Errors /// - /// - `EAGAIN` if there was not enough space in the command queue to send the command. + /// - `ETIMEDOUT` if space does not become available to send the command, or if the reply is + /// not received within the timeout. + /// - `EIO` if the variable payload requested by the command has not been entirely + /// written to by its [`CommandToGsp::init_variable_payload`] method. + /// + /// Error codes returned by the command and reply initializers are propagated as-is. + pub(crate) fn send_command(&self, bar: &Bar0, command: M) -> Result + where + M: CommandToGsp, + M::Reply: MessageFromGsp, + Error: From, + Error: From<::InitError>, + { + let mut inner = self.inner.lock(); + inner.send_command(bar, command)?; + + loop { + match inner.receive_msg::(Self::RECEIVE_TIMEOUT) { + Ok(reply) => break Ok(reply), + Err(ERANGE) => continue, + Err(e) => break Err(e), + } + } + } + + /// Sends `command` to the GSP without waiting for a reply. + /// + /// # Errors + /// + /// - `ETIMEDOUT` if space does not become available within the timeout. /// - `EIO` if the variable payload requested by the command has not been entirely /// written to by its [`CommandToGsp::init_variable_payload`] method. /// /// Error codes returned by the command initializers are propagated as-is. - pub(crate) fn send_command(&mut self, bar: &Bar0, command: M) -> Result + pub(crate) fn send_command_no_wait(&self, bar: &Bar0, command: M) -> Result + where + M: CommandToGsp, + Error: From, + { + self.inner.lock().send_command(bar, command) + } + + /// Receive a message from the GSP. + /// + /// See [`CmdqInner::receive_msg`] for details. + pub(crate) fn receive_msg(&self, timeout: Delta) -> Result + where + // This allows all error types, including `Infallible`, to be used for `M::InitError`. + Error: From, + { + self.inner.lock().receive_msg(timeout) + } +} + +/// Inner mutex protected state of [`Cmdq`]. +struct CmdqInner { + /// Device this command queue belongs to. + dev: ARef, + /// Current command sequence number. + seq: u32, + /// Memory area shared with the GSP for communicating commands and messages. + gsp_mem: DmaGspMem, +} + +impl CmdqInner { + /// Timeout for waiting for space on the command queue. + const ALLOCATE_TIMEOUT: Delta = Delta::from_secs(1); + + /// Sends `command` to the GSP, without splitting it. + /// + /// # Errors + /// + /// - `EMSGSIZE` if the command exceeds the maximum queue element size. + /// - `ETIMEDOUT` if space does not become available within the timeout. + /// - `EIO` if the variable payload requested by the command has not been entirely + /// written to by its [`CommandToGsp::init_variable_payload`] method. + /// + /// Error codes returned by the command initializers are propagated as-is. + fn send_single_command(&mut self, bar: &Bar0, command: M) -> Result where M: CommandToGsp, // This allows all error types, including `Infallible`, to be used for `M::InitError`. Error: From, { - let command_size = size_of::() + command.variable_payload_len(); - let dst = self.gsp_mem.allocate_command(command_size)?; + let size_in_bytes = command.size(); + let dst = self + .gsp_mem + .allocate_command(size_in_bytes, Self::ALLOCATE_TIMEOUT)?; - // Extract area for the command itself. + // Extract area for the command itself. The GSP message header and the command header + // together are guaranteed to fit entirely into a single page, so it's ok to only look + // at `dst.contents.0` here. let (cmd, payload_1) = M::Command::from_bytes_mut_prefix(dst.contents.0).ok_or(EIO)?; // Fill the header and command in-place. - let msg_element = GspMsgElement::init(self.seq, command_size, M::FUNCTION); + let msg_element = GspMsgElement::init(self.seq, size_in_bytes, M::FUNCTION); // SAFETY: `msg_header` and `cmd` are valid references, and not touched if the initializer // fails. unsafe { @@ -483,16 +628,14 @@ impl Cmdq { command.init().__init(core::ptr::from_mut(cmd))?; } - // Fill the variable-length payload. - if command_size > size_of::() { - let mut sbuffer = - SBufferIter::new_writer([&mut payload_1[..], &mut dst.contents.1[..]]); - command.init_variable_payload(&mut sbuffer)?; + // Fill the variable-length payload, which may be empty. + let mut sbuffer = SBufferIter::new_writer([&mut payload_1[..], &mut dst.contents.1[..]]); + command.init_variable_payload(&mut sbuffer)?; - if !sbuffer.is_empty() { - return Err(EIO); - } + if !sbuffer.is_empty() { + return Err(EIO); } + drop(sbuffer); // Compute checksum now that the whole message is ready. dst.header @@ -504,7 +647,7 @@ impl Cmdq { dev_dbg!( &self.dev, - "GSP RPC: send: seq# {}, function={}, length=0x{:x}\n", + "GSP RPC: send: seq# {}, function={:?}, length=0x{:x}\n", self.seq, M::FUNCTION, dst.header.length(), @@ -519,6 +662,37 @@ impl Cmdq { Ok(()) } + /// Sends `command` to the GSP. + /// + /// The command may be split into multiple messages if it is large. + /// + /// # Errors + /// + /// - `ETIMEDOUT` if space does not become available within the timeout. + /// - `EIO` if the variable payload requested by the command has not been entirely + /// written to by its [`CommandToGsp::init_variable_payload`] method. + /// + /// Error codes returned by the command initializers are propagated as-is. + fn send_command(&mut self, bar: &Bar0, command: M) -> Result + where + M: CommandToGsp, + Error: From, + { + match SplitState::new(command)? { + SplitState::Single(command) => self.send_single_command(bar, command), + SplitState::Split(command, mut continuations) => { + self.send_single_command(bar, command)?; + + while let Some(continuation) = continuations.next() { + // Turbofish needed because the compiler cannot infer M here. + self.send_single_command::>(bar, continuation)?; + } + + Ok(()) + } + } + } + /// Wait for a message to become available on the message queue. /// /// This works purely at the transport layer and does not interpret or validate the message @@ -554,7 +728,7 @@ impl Cmdq { let (header, slice_1) = GspMsgElement::from_bytes_prefix(slice_1).ok_or(EIO)?; dev_dbg!( - self.dev, + &self.dev, "GSP RPC: receive: seq# {}, function={:?}, length=0x{:x}\n", header.sequence(), header.function(), @@ -589,7 +763,7 @@ impl Cmdq { ])) != 0 { dev_err!( - self.dev, + &self.dev, "GSP RPC: receive: Call {} - bad checksum\n", header.sequence() ); @@ -604,23 +778,21 @@ impl Cmdq { /// Receive a message from the GSP. /// - /// `init` is a closure tasked with processing the message. It receives a reference to the - /// message in the message queue, and a [`SBufferIter`] pointing to its variable-length - /// payload, if any. + /// The expected message type is specified using the `M` generic parameter. If the pending + /// message has a different function code, `ERANGE` is returned and the message is consumed. /// - /// The expected message is specified using the `M` generic parameter. If the pending message - /// is different, `EAGAIN` is returned and the unexpected message is dropped. - /// - /// This design is by no means final, but it is simple and will let us go through GSP - /// initialization. + /// The read pointer is always advanced past the message, regardless of whether it matched. /// /// # Errors /// /// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available. /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the /// message queue. - /// - `EINVAL` if the function of the message was unrecognized. - pub(crate) fn receive_msg(&mut self, timeout: Delta) -> Result + /// - `EINVAL` if the function code of the message was not recognized. + /// - `ERANGE` if the message had a recognized but non-matching function code. + /// + /// Error codes returned by [`MessageFromGsp::read`] are propagated as-is. + fn receive_msg(&mut self, timeout: Delta) -> Result where // This allows all error types, including `Infallible`, to be used for `M::InitError`. Error: From, @@ -634,7 +806,17 @@ impl Cmdq { let (cmd, contents_1) = M::Message::from_bytes_prefix(message.contents.0).ok_or(EIO)?; let mut sbuffer = SBufferIter::new_reader([contents_1, message.contents.1]); - M::read(cmd, &mut sbuffer).map_err(|e| e.into()) + M::read(cmd, &mut sbuffer) + .map_err(|e| e.into()) + .inspect(|_| { + if !sbuffer.is_empty() { + dev_warn!( + &self.dev, + "GSP message {:?} has unprocessed data\n", + function + ); + } + }) } else { Err(ERANGE) }; @@ -646,9 +828,4 @@ impl Cmdq { result } - - /// Returns the DMA handle of the command queue's shared memory region. - pub(crate) fn dma_handle(&self) -> DmaAddress { - self.gsp_mem.0.dma_handle() - } } diff --git a/drivers/gpu/nova-core/gsp/cmdq/continuation.rs b/drivers/gpu/nova-core/gsp/cmdq/continuation.rs new file mode 100644 index 000000000000..05e904f18097 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/cmdq/continuation.rs @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for splitting large GSP commands across continuation records. + +use core::convert::Infallible; + +use kernel::prelude::*; + +use super::{ + CommandToGsp, + NoReply, // +}; + +use crate::{ + gsp::fw::{ + GspMsgElement, + MsgFunction, + GSP_MSG_QUEUE_ELEMENT_SIZE_MAX, // + }, + sbuffer::SBufferIter, +}; + +/// Maximum command size that fits in a single queue element. +const MAX_CMD_SIZE: usize = GSP_MSG_QUEUE_ELEMENT_SIZE_MAX - size_of::(); + +/// Acts as an iterator over the continuation records for a split command. +pub(super) struct ContinuationRecords { + payload: KVVec, + offset: usize, +} + +impl ContinuationRecords { + /// Creates a new iterator over continuation records for the given payload. + fn new(payload: KVVec) -> Self { + Self { payload, offset: 0 } + } + + /// Returns the next continuation record, or [`None`] if there are no more. + pub(super) fn next(&mut self) -> Option> { + let remaining = self.payload.len() - self.offset; + + if remaining > 0 { + let chunk_size = remaining.min(MAX_CMD_SIZE); + let record = + ContinuationRecord::new(&self.payload[self.offset..(self.offset + chunk_size)]); + self.offset += chunk_size; + Some(record) + } else { + None + } + } +} + +/// The [`ContinuationRecord`] command. +pub(super) struct ContinuationRecord<'a> { + data: &'a [u8], +} + +impl<'a> ContinuationRecord<'a> { + /// Creates a new [`ContinuationRecord`] command with the given data. + fn new(data: &'a [u8]) -> Self { + Self { data } + } +} + +impl<'a> CommandToGsp for ContinuationRecord<'a> { + const FUNCTION: MsgFunction = MsgFunction::ContinuationRecord; + type Command = (); + type Reply = NoReply; + type InitError = Infallible; + + fn init(&self) -> impl Init { + <()>::init_zeroed() + } + + fn variable_payload_len(&self) -> usize { + self.data.len() + } + + fn init_variable_payload( + &self, + dst: &mut SBufferIter>, + ) -> Result { + dst.write_all(self.data) + } +} + +/// Whether a command needs to be split across continuation records or not. +pub(super) enum SplitState { + /// A command that fits in a single queue element. + Single(C), + /// A command split across continuation records. + Split(SplitCommand, ContinuationRecords), +} + +impl SplitState { + /// Maximum variable payload size that fits in the first command alongside the command header. + const MAX_FIRST_PAYLOAD: usize = MAX_CMD_SIZE - size_of::(); + + /// Creates a new [`SplitState`] for the given command. + /// + /// If the command is too large, it will be split into a main command and some number of + /// continuation records. + pub(super) fn new(command: C) -> Result { + let payload_len = command.variable_payload_len(); + + if command.size() > MAX_CMD_SIZE { + let mut command_payload = + KVVec::::from_elem(0u8, payload_len.min(Self::MAX_FIRST_PAYLOAD), GFP_KERNEL)?; + let mut continuation_payload = + KVVec::::from_elem(0u8, payload_len - command_payload.len(), GFP_KERNEL)?; + let mut sbuffer = SBufferIter::new_writer([ + command_payload.as_mut_slice(), + continuation_payload.as_mut_slice(), + ]); + + command.init_variable_payload(&mut sbuffer)?; + if !sbuffer.is_empty() { + return Err(EIO); + } + drop(sbuffer); + + Ok(Self::Split( + SplitCommand::new(command, command_payload), + ContinuationRecords::new(continuation_payload), + )) + } else { + Ok(Self::Single(command)) + } + } +} + +/// A command that has been truncated to maximum accepted length of the command queue. +/// +/// The remainder of its payload is expected to be sent using [`ContinuationRecords`]. +pub(super) struct SplitCommand { + command: C, + payload: KVVec, +} + +impl SplitCommand { + /// Creates a new [`SplitCommand`] wrapping `command` with the given truncated payload. + fn new(command: C, payload: KVVec) -> Self { + Self { command, payload } + } +} + +impl CommandToGsp for SplitCommand { + const FUNCTION: MsgFunction = C::FUNCTION; + type Command = C::Command; + type Reply = C::Reply; + type InitError = C::InitError; + + fn init(&self) -> impl Init { + self.command.init() + } + + fn variable_payload_len(&self) -> usize { + self.payload.len() + } + + fn init_variable_payload( + &self, + dst: &mut SBufferIter>, + ) -> Result { + dst.write_all(&self.payload) + } +} + +#[kunit_tests(nova_core_gsp_continuation)] +mod tests { + use super::*; + + use kernel::transmute::{ + AsBytes, + FromBytes, // + }; + + /// Non-zero-sized command header for testing. + #[repr(C)] + #[derive(Clone, Copy, Zeroable)] + struct TestHeader([u8; 64]); + + // SAFETY: `TestHeader` is a plain array of bytes for which all bit patterns are valid. + unsafe impl FromBytes for TestHeader {} + + // SAFETY: `TestHeader` is a plain array of bytes for which all bit patterns are valid. + unsafe impl AsBytes for TestHeader {} + + struct TestPayload { + data: KVVec, + } + + impl TestPayload { + fn generate_pattern(len: usize) -> Result> { + let mut data = KVVec::with_capacity(len, GFP_KERNEL)?; + for i in 0..len { + // Mix in higher bits so the pattern does not repeat every 256 bytes. + data.push((i ^ (i >> 8)) as u8, GFP_KERNEL)?; + } + Ok(data) + } + + fn new(len: usize) -> Result { + Ok(Self { + data: Self::generate_pattern(len)?, + }) + } + } + + impl CommandToGsp for TestPayload { + const FUNCTION: MsgFunction = MsgFunction::Nop; + type Command = TestHeader; + type Reply = NoReply; + type InitError = Infallible; + + fn init(&self) -> impl Init { + TestHeader::init_zeroed() + } + + fn variable_payload_len(&self) -> usize { + self.data.len() + } + + fn init_variable_payload( + &self, + dst: &mut SBufferIter>, + ) -> Result { + dst.write_all(self.data.as_slice()) + } + } + + /// Maximum variable payload size that fits in the first command alongside the header. + const MAX_FIRST_PAYLOAD: usize = SplitState::::MAX_FIRST_PAYLOAD; + + fn read_payload(cmd: impl CommandToGsp) -> Result> { + let len = cmd.variable_payload_len(); + let mut buf = KVVec::from_elem(0u8, len, GFP_KERNEL)?; + let mut sbuf = SBufferIter::new_writer([buf.as_mut_slice(), &mut []]); + cmd.init_variable_payload(&mut sbuf)?; + drop(sbuf); + Ok(buf) + } + + struct SplitTest { + payload_size: usize, + num_continuations: usize, + } + + fn check_split(t: SplitTest) -> Result { + let payload = TestPayload::new(t.payload_size)?; + let mut num_continuations = 0; + + let buf = match SplitState::new(payload)? { + SplitState::Single(cmd) => read_payload(cmd)?, + SplitState::Split(cmd, mut continuations) => { + let mut buf = read_payload(cmd)?; + assert!(size_of::() + buf.len() <= MAX_CMD_SIZE); + + while let Some(cont) = continuations.next() { + let payload = read_payload(cont)?; + assert!(payload.len() <= MAX_CMD_SIZE); + buf.extend_from_slice(&payload, GFP_KERNEL)?; + num_continuations += 1; + } + + buf + } + }; + + assert_eq!(num_continuations, t.num_continuations); + assert_eq!( + buf.as_slice(), + TestPayload::generate_pattern(t.payload_size)?.as_slice() + ); + Ok(()) + } + + #[test] + fn split_command() -> Result { + check_split(SplitTest { + payload_size: 0, + num_continuations: 0, + })?; + check_split(SplitTest { + payload_size: MAX_FIRST_PAYLOAD, + num_continuations: 0, + })?; + check_split(SplitTest { + payload_size: MAX_FIRST_PAYLOAD + 1, + num_continuations: 1, + })?; + check_split(SplitTest { + payload_size: MAX_FIRST_PAYLOAD + MAX_CMD_SIZE, + num_continuations: 1, + })?; + check_split(SplitTest { + payload_size: MAX_FIRST_PAYLOAD + MAX_CMD_SIZE + 1, + num_continuations: 2, + })?; + check_split(SplitTest { + payload_size: MAX_FIRST_PAYLOAD + MAX_CMD_SIZE * 3 + MAX_CMD_SIZE / 2, + num_continuations: 4, + })?; + Ok(()) + } +} diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs index 8f270eca33be..c89c7b57a751 100644 --- a/drivers/gpu/nova-core/gsp/commands.rs +++ b/drivers/gpu/nova-core/gsp/commands.rs @@ -11,7 +11,6 @@ use kernel::{ device, pci, prelude::*, - time::Delta, transmute::{ AsBytes, FromBytes, // @@ -24,7 +23,8 @@ use crate::{ cmdq::{ Cmdq, CommandToGsp, - MessageFromGsp, // + MessageFromGsp, + NoReply, // }, fw::{ commands::*, @@ -49,6 +49,7 @@ impl<'a> SetSystemInfo<'a> { impl<'a> CommandToGsp for SetSystemInfo<'a> { const FUNCTION: MsgFunction = MsgFunction::GspSetSystemInfo; type Command = GspSetSystemInfo; + type Reply = NoReply; type InitError = Error; fn init(&self) -> impl Init { @@ -100,6 +101,7 @@ impl SetRegistry { impl CommandToGsp for SetRegistry { const FUNCTION: MsgFunction = MsgFunction::SetRegistry; type Command = PackedRegistryTable; + type Reply = NoReply; type InitError = Infallible; fn init(&self) -> impl Init { @@ -163,9 +165,9 @@ impl MessageFromGsp for GspInitDone { } /// Waits for GSP initialization to complete. -pub(crate) fn wait_gsp_init_done(cmdq: &mut Cmdq) -> Result { +pub(crate) fn wait_gsp_init_done(cmdq: &Cmdq) -> Result { loop { - match cmdq.receive_msg::(Delta::from_secs(10)) { + match cmdq.receive_msg::(Cmdq::RECEIVE_TIMEOUT) { Ok(_) => break Ok(()), Err(ERANGE) => continue, Err(e) => break Err(e), @@ -179,6 +181,7 @@ struct GetGspStaticInfo; impl CommandToGsp for GetGspStaticInfo { const FUNCTION: MsgFunction = MsgFunction::GetGspStaticInfo; type Command = GspStaticConfigInfo; + type Reply = GetGspStaticInfoReply; type InitError = Infallible; fn init(&self) -> impl Init { @@ -231,14 +234,6 @@ impl GetGspStaticInfoReply { } /// Send the [`GetGspInfo`] command and awaits for its reply. -pub(crate) fn get_gsp_info(cmdq: &mut Cmdq, bar: &Bar0) -> Result { - cmdq.send_command(bar, GetGspStaticInfo)?; - - loop { - match cmdq.receive_msg::(Delta::from_secs(5)) { - Ok(info) => return Ok(info), - Err(ERANGE) => continue, - Err(e) => return Err(e), - } - } +pub(crate) fn get_gsp_info(cmdq: &Cmdq, bar: &Bar0) -> Result { + cmdq.send_command(bar, GetGspStaticInfo) } diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index 040b30ec3089..0c8a74f0e8ac 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -9,12 +9,12 @@ use r570_144 as bindings; use core::ops::Range; use kernel::{ - dma::CoherentAllocation, - fmt, + dma::Coherent, prelude::*, ptr::{ Alignable, - Alignment, // + Alignment, + KnownSize, // }, sizes::{ SZ_128K, @@ -40,8 +40,7 @@ use crate::{ }, }; -// TODO: Replace with `IoView` projections once available; the `unwrap()` calls go away once we -// switch to the new `dma::Coherent` API. +// TODO: Replace with `IoView` projections once available. pub(super) mod gsp_mem { use core::sync::atomic::{ fence, @@ -49,10 +48,9 @@ pub(super) mod gsp_mem { }; use kernel::{ - dma::CoherentAllocation, + dma::Coherent, dma_read, - dma_write, - prelude::*, // + dma_write, // }; use crate::gsp::cmdq::{ @@ -60,55 +58,45 @@ pub(super) mod gsp_mem { MSGQ_NUM_PAGES, // }; - pub(in crate::gsp) fn gsp_write_ptr(qs: &CoherentAllocation) -> u32 { - // PANIC: A `dma::CoherentAllocation` always contains at least one element. - || -> Result { Ok(dma_read!(qs, [0]?.gspq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap() + pub(in crate::gsp) fn gsp_write_ptr(qs: &Coherent) -> u32 { + dma_read!(qs, .gspq.tx.0.writePtr) % MSGQ_NUM_PAGES } - pub(in crate::gsp) fn gsp_read_ptr(qs: &CoherentAllocation) -> u32 { - // PANIC: A `dma::CoherentAllocation` always contains at least one element. - || -> Result { Ok(dma_read!(qs, [0]?.gspq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap() + pub(in crate::gsp) fn gsp_read_ptr(qs: &Coherent) -> u32 { + dma_read!(qs, .gspq.rx.0.readPtr) % MSGQ_NUM_PAGES } - pub(in crate::gsp) fn cpu_read_ptr(qs: &CoherentAllocation) -> u32 { - // PANIC: A `dma::CoherentAllocation` always contains at least one element. - || -> Result { Ok(dma_read!(qs, [0]?.cpuq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap() + pub(in crate::gsp) fn cpu_read_ptr(qs: &Coherent) -> u32 { + dma_read!(qs, .cpuq.rx.0.readPtr) % MSGQ_NUM_PAGES } - pub(in crate::gsp) fn advance_cpu_read_ptr(qs: &CoherentAllocation, count: u32) { + pub(in crate::gsp) fn advance_cpu_read_ptr(qs: &Coherent, count: u32) { let rptr = cpu_read_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES; // Ensure read pointer is properly ordered. fence(Ordering::SeqCst); - // PANIC: A `dma::CoherentAllocation` always contains at least one element. - || -> Result { - dma_write!(qs, [0]?.cpuq.rx.0.readPtr, rptr); - Ok(()) - }() - .unwrap() + dma_write!(qs, .cpuq.rx.0.readPtr, rptr); } - pub(in crate::gsp) fn cpu_write_ptr(qs: &CoherentAllocation) -> u32 { - // PANIC: A `dma::CoherentAllocation` always contains at least one element. - || -> Result { Ok(dma_read!(qs, [0]?.cpuq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap() + pub(in crate::gsp) fn cpu_write_ptr(qs: &Coherent) -> u32 { + dma_read!(qs, .cpuq.tx.0.writePtr) % MSGQ_NUM_PAGES } - pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &CoherentAllocation, count: u32) { + pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &Coherent, count: u32) { let wptr = cpu_write_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES; - // PANIC: A `dma::CoherentAllocation` always contains at least one element. - || -> Result { - dma_write!(qs, [0]?.cpuq.tx.0.writePtr, wptr); - Ok(()) - }() - .unwrap(); + dma_write!(qs, .cpuq.tx.0.writePtr, wptr); // Ensure all command data is visible before triggering the GSP read. fence(Ordering::SeqCst); } } +/// Maximum size of a single GSP message queue element in bytes. +pub(crate) const GSP_MSG_QUEUE_ELEMENT_SIZE_MAX: usize = + num::u32_as_usize(bindings::GSP_MSG_QUEUE_ELEMENT_SIZE_MAX); + /// Empty type to group methods related to heap parameters for running the GSP firmware. enum GspFwHeapParams {} @@ -201,7 +189,9 @@ impl LibosParams { /// Structure passed to the GSP bootloader, containing the framebuffer layout as well as the DMA /// addresses of the GSP bootloader and firmware. #[repr(transparent)] -pub(crate) struct GspFwWprMeta(bindings::GspFwWprMeta); +pub(crate) struct GspFwWprMeta { + inner: bindings::GspFwWprMeta, +} // SAFETY: Padding is explicit and does not contain uninitialized data. unsafe impl AsBytes for GspFwWprMeta {} @@ -214,10 +204,14 @@ type GspFwWprMetaBootResumeInfo = bindings::GspFwWprMeta__bindgen_ty_1; type GspFwWprMetaBootInfo = bindings::GspFwWprMeta__bindgen_ty_1__bindgen_ty_1; impl GspFwWprMeta { - /// Fill in and return a `GspFwWprMeta` suitable for booting `gsp_firmware` using the + /// Returns an initializer for a `GspFwWprMeta` suitable for booting `gsp_firmware` using the /// `fb_layout` layout. - pub(crate) fn new(gsp_firmware: &GspFirmware, fb_layout: &FbLayout) -> Self { - Self(bindings::GspFwWprMeta { + pub(crate) fn new<'a>( + gsp_firmware: &'a GspFirmware, + fb_layout: &'a FbLayout, + ) -> impl Init + 'a { + #[allow(non_snake_case)] + let init_inner = init!(bindings::GspFwWprMeta { // CAST: we want to store the bits of `GSP_FW_WPR_META_MAGIC` unmodified. magic: bindings::GSP_FW_WPR_META_MAGIC as u64, revision: u64::from(bindings::GSP_FW_WPR_META_REVISION), @@ -252,7 +246,11 @@ impl GspFwWprMeta { fbSize: fb_layout.fb.end - fb_layout.fb.start, vgaWorkspaceOffset: fb_layout.vga_workspace.start, vgaWorkspaceSize: fb_layout.vga_workspace.end - fb_layout.vga_workspace.start, - ..Default::default() + ..Zeroable::init_zeroed() + }); + + init!(GspFwWprMeta { + inner <- init_inner, }) } } @@ -261,111 +259,81 @@ impl GspFwWprMeta { #[repr(u32)] pub(crate) enum MsgFunction { // Common function codes - Nop = bindings::NV_VGPU_MSG_FUNCTION_NOP, - SetGuestSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO, - AllocRoot = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT, + AllocChannelDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA, + AllocCtxDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA, AllocDevice = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE, AllocMemory = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY, - AllocCtxDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA, - AllocChannelDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA, - MapMemory = bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY, - BindCtxDma = bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA, AllocObject = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT, + AllocRoot = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT, + BindCtxDma = bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA, + ContinuationRecord = bindings::NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD, Free = bindings::NV_VGPU_MSG_FUNCTION_FREE, - Log = bindings::NV_VGPU_MSG_FUNCTION_LOG, GetGspStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO, - SetRegistry = bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY, - GspSetSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO, + GetStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO, GspInitPostObjGpu = bindings::NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU, GspRmControl = bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL, - GetStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO, + GspSetSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO, + Log = bindings::NV_VGPU_MSG_FUNCTION_LOG, + MapMemory = bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY, + Nop = bindings::NV_VGPU_MSG_FUNCTION_NOP, + SetGuestSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO, + SetRegistry = bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY, // Event codes GspInitDone = bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE, + GspLockdownNotice = bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE, + GspPostNoCat = bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD, GspRunCpuSequencer = bindings::NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER, - PostEvent = bindings::NV_VGPU_MSG_EVENT_POST_EVENT, - RcTriggered = bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED, MmuFaultQueued = bindings::NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED, OsErrorLog = bindings::NV_VGPU_MSG_EVENT_OS_ERROR_LOG, - GspPostNoCat = bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD, - GspLockdownNotice = bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE, + PostEvent = bindings::NV_VGPU_MSG_EVENT_POST_EVENT, + RcTriggered = bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED, UcodeLibOsPrint = bindings::NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT, } -impl fmt::Display for MsgFunction { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - // Common function codes - MsgFunction::Nop => write!(f, "NOP"), - MsgFunction::SetGuestSystemInfo => write!(f, "SET_GUEST_SYSTEM_INFO"), - MsgFunction::AllocRoot => write!(f, "ALLOC_ROOT"), - MsgFunction::AllocDevice => write!(f, "ALLOC_DEVICE"), - MsgFunction::AllocMemory => write!(f, "ALLOC_MEMORY"), - MsgFunction::AllocCtxDma => write!(f, "ALLOC_CTX_DMA"), - MsgFunction::AllocChannelDma => write!(f, "ALLOC_CHANNEL_DMA"), - MsgFunction::MapMemory => write!(f, "MAP_MEMORY"), - MsgFunction::BindCtxDma => write!(f, "BIND_CTX_DMA"), - MsgFunction::AllocObject => write!(f, "ALLOC_OBJECT"), - MsgFunction::Free => write!(f, "FREE"), - MsgFunction::Log => write!(f, "LOG"), - MsgFunction::GetGspStaticInfo => write!(f, "GET_GSP_STATIC_INFO"), - MsgFunction::SetRegistry => write!(f, "SET_REGISTRY"), - MsgFunction::GspSetSystemInfo => write!(f, "GSP_SET_SYSTEM_INFO"), - MsgFunction::GspInitPostObjGpu => write!(f, "GSP_INIT_POST_OBJGPU"), - MsgFunction::GspRmControl => write!(f, "GSP_RM_CONTROL"), - MsgFunction::GetStaticInfo => write!(f, "GET_STATIC_INFO"), - - // Event codes - MsgFunction::GspInitDone => write!(f, "INIT_DONE"), - MsgFunction::GspRunCpuSequencer => write!(f, "RUN_CPU_SEQUENCER"), - MsgFunction::PostEvent => write!(f, "POST_EVENT"), - MsgFunction::RcTriggered => write!(f, "RC_TRIGGERED"), - MsgFunction::MmuFaultQueued => write!(f, "MMU_FAULT_QUEUED"), - MsgFunction::OsErrorLog => write!(f, "OS_ERROR_LOG"), - MsgFunction::GspPostNoCat => write!(f, "NOCAT"), - MsgFunction::GspLockdownNotice => write!(f, "LOCKDOWN_NOTICE"), - MsgFunction::UcodeLibOsPrint => write!(f, "LIBOS_PRINT"), - } - } -} - impl TryFrom for MsgFunction { type Error = kernel::error::Error; fn try_from(value: u32) -> Result { match value { - bindings::NV_VGPU_MSG_FUNCTION_NOP => Ok(MsgFunction::Nop), - bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO => { - Ok(MsgFunction::SetGuestSystemInfo) - } - bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT => Ok(MsgFunction::AllocRoot), + // Common function codes + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA => Ok(MsgFunction::AllocChannelDma), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA => Ok(MsgFunction::AllocCtxDma), bindings::NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE => Ok(MsgFunction::AllocDevice), bindings::NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY => Ok(MsgFunction::AllocMemory), - bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA => Ok(MsgFunction::AllocCtxDma), - bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA => Ok(MsgFunction::AllocChannelDma), - bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY => Ok(MsgFunction::MapMemory), - bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA => Ok(MsgFunction::BindCtxDma), bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT => Ok(MsgFunction::AllocObject), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT => Ok(MsgFunction::AllocRoot), + bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA => Ok(MsgFunction::BindCtxDma), + bindings::NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD => { + Ok(MsgFunction::ContinuationRecord) + } bindings::NV_VGPU_MSG_FUNCTION_FREE => Ok(MsgFunction::Free), - bindings::NV_VGPU_MSG_FUNCTION_LOG => Ok(MsgFunction::Log), bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO => Ok(MsgFunction::GetGspStaticInfo), - bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY => Ok(MsgFunction::SetRegistry), - bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO => Ok(MsgFunction::GspSetSystemInfo), + bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO => Ok(MsgFunction::GetStaticInfo), bindings::NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU => { Ok(MsgFunction::GspInitPostObjGpu) } bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL => Ok(MsgFunction::GspRmControl), - bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO => Ok(MsgFunction::GetStaticInfo), + bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO => Ok(MsgFunction::GspSetSystemInfo), + bindings::NV_VGPU_MSG_FUNCTION_LOG => Ok(MsgFunction::Log), + bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY => Ok(MsgFunction::MapMemory), + bindings::NV_VGPU_MSG_FUNCTION_NOP => Ok(MsgFunction::Nop), + bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO => { + Ok(MsgFunction::SetGuestSystemInfo) + } + bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY => Ok(MsgFunction::SetRegistry), + + // Event codes bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE => Ok(MsgFunction::GspInitDone), + bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE => Ok(MsgFunction::GspLockdownNotice), + bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD => Ok(MsgFunction::GspPostNoCat), bindings::NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER => { Ok(MsgFunction::GspRunCpuSequencer) } - bindings::NV_VGPU_MSG_EVENT_POST_EVENT => Ok(MsgFunction::PostEvent), - bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED => Ok(MsgFunction::RcTriggered), bindings::NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED => Ok(MsgFunction::MmuFaultQueued), bindings::NV_VGPU_MSG_EVENT_OS_ERROR_LOG => Ok(MsgFunction::OsErrorLog), - bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD => Ok(MsgFunction::GspPostNoCat), - bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE => Ok(MsgFunction::GspLockdownNotice), + bindings::NV_VGPU_MSG_EVENT_POST_EVENT => Ok(MsgFunction::PostEvent), + bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED => Ok(MsgFunction::RcTriggered), bindings::NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT => Ok(MsgFunction::UcodeLibOsPrint), _ => Err(EINVAL), } @@ -399,22 +367,6 @@ pub(crate) enum SeqBufOpcode { RegWrite = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE, } -impl fmt::Display for SeqBufOpcode { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - SeqBufOpcode::CoreReset => write!(f, "CORE_RESET"), - SeqBufOpcode::CoreResume => write!(f, "CORE_RESUME"), - SeqBufOpcode::CoreStart => write!(f, "CORE_START"), - SeqBufOpcode::CoreWaitForHalt => write!(f, "CORE_WAIT_FOR_HALT"), - SeqBufOpcode::DelayUs => write!(f, "DELAY_US"), - SeqBufOpcode::RegModify => write!(f, "REG_MODIFY"), - SeqBufOpcode::RegPoll => write!(f, "REG_POLL"), - SeqBufOpcode::RegStore => write!(f, "REG_STORE"), - SeqBufOpcode::RegWrite => write!(f, "REG_WRITE"), - } - } -} - impl TryFrom for SeqBufOpcode { type Error = kernel::error::Error; @@ -453,7 +405,7 @@ impl From for u32 { /// Wrapper for GSP sequencer register write payload. #[repr(transparent)] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub(crate) struct RegWritePayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_WRITE); impl RegWritePayload { @@ -476,7 +428,7 @@ unsafe impl AsBytes for RegWritePayload {} /// Wrapper for GSP sequencer register modify payload. #[repr(transparent)] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub(crate) struct RegModifyPayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_MODIFY); impl RegModifyPayload { @@ -504,7 +456,7 @@ unsafe impl AsBytes for RegModifyPayload {} /// Wrapper for GSP sequencer register poll payload. #[repr(transparent)] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub(crate) struct RegPollPayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_POLL); impl RegPollPayload { @@ -537,7 +489,7 @@ unsafe impl AsBytes for RegPollPayload {} /// Wrapper for GSP sequencer delay payload. #[repr(transparent)] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub(crate) struct DelayUsPayload(bindings::GSP_SEQ_BUF_PAYLOAD_DELAY_US); impl DelayUsPayload { @@ -555,7 +507,7 @@ unsafe impl AsBytes for DelayUsPayload {} /// Wrapper for GSP sequencer register store payload. #[repr(transparent)] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub(crate) struct RegStorePayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_STORE); impl RegStorePayload { @@ -595,13 +547,7 @@ impl SequencerBufferCmd { return Err(EINVAL); } // SAFETY: Opcode is verified to be `RegWrite`, so union contains valid `RegWritePayload`. - let payload_bytes = unsafe { - core::slice::from_raw_parts( - core::ptr::addr_of!(self.0.payload.regWrite).cast::(), - core::mem::size_of::(), - ) - }; - Ok(*RegWritePayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + Ok(RegWritePayload(unsafe { self.0.payload.regWrite })) } /// Returns the register modify payload by value. @@ -612,13 +558,7 @@ impl SequencerBufferCmd { return Err(EINVAL); } // SAFETY: Opcode is verified to be `RegModify`, so union contains valid `RegModifyPayload`. - let payload_bytes = unsafe { - core::slice::from_raw_parts( - core::ptr::addr_of!(self.0.payload.regModify).cast::(), - core::mem::size_of::(), - ) - }; - Ok(*RegModifyPayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + Ok(RegModifyPayload(unsafe { self.0.payload.regModify })) } /// Returns the register poll payload by value. @@ -629,13 +569,7 @@ impl SequencerBufferCmd { return Err(EINVAL); } // SAFETY: Opcode is verified to be `RegPoll`, so union contains valid `RegPollPayload`. - let payload_bytes = unsafe { - core::slice::from_raw_parts( - core::ptr::addr_of!(self.0.payload.regPoll).cast::(), - core::mem::size_of::(), - ) - }; - Ok(*RegPollPayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + Ok(RegPollPayload(unsafe { self.0.payload.regPoll })) } /// Returns the delay payload by value. @@ -646,13 +580,7 @@ impl SequencerBufferCmd { return Err(EINVAL); } // SAFETY: Opcode is verified to be `DelayUs`, so union contains valid `DelayUsPayload`. - let payload_bytes = unsafe { - core::slice::from_raw_parts( - core::ptr::addr_of!(self.0.payload.delayUs).cast::(), - core::mem::size_of::(), - ) - }; - Ok(*DelayUsPayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + Ok(DelayUsPayload(unsafe { self.0.payload.delayUs })) } /// Returns the register store payload by value. @@ -663,13 +591,7 @@ impl SequencerBufferCmd { return Err(EINVAL); } // SAFETY: Opcode is verified to be `RegStore`, so union contains valid `RegStorePayload`. - let payload_bytes = unsafe { - core::slice::from_raw_parts( - core::ptr::addr_of!(self.0.payload.regStore).cast::(), - core::mem::size_of::(), - ) - }; - Ok(*RegStorePayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + Ok(RegStorePayload(unsafe { self.0.payload.regStore })) } } @@ -711,7 +633,9 @@ unsafe impl AsBytes for RunCpuSequencer {} /// The memory allocated for the arguments must remain until the GSP sends the /// init_done RPC. #[repr(transparent)] -pub(crate) struct LibosMemoryRegionInitArgument(bindings::LibosMemoryRegionInitArgument); +pub(crate) struct LibosMemoryRegionInitArgument { + inner: bindings::LibosMemoryRegionInitArgument, +} // SAFETY: Padding is explicit and does not contain uninitialized data. unsafe impl AsBytes for LibosMemoryRegionInitArgument {} @@ -721,10 +645,10 @@ unsafe impl AsBytes for LibosMemoryRegionInitArgument {} unsafe impl FromBytes for LibosMemoryRegionInitArgument {} impl LibosMemoryRegionInitArgument { - pub(crate) fn new( + pub(crate) fn new<'a, A: AsBytes + FromBytes + KnownSize + ?Sized>( name: &'static str, - obj: &CoherentAllocation, - ) -> Self { + obj: &'a Coherent, + ) -> impl Init + 'a { /// Generates the `ID8` identifier required for some GSP objects. fn id8(name: &str) -> u64 { let mut bytes = [0u8; core::mem::size_of::()]; @@ -736,7 +660,8 @@ impl LibosMemoryRegionInitArgument { u64::from_ne_bytes(bytes) } - Self(bindings::LibosMemoryRegionInitArgument { + #[allow(non_snake_case)] + let init_inner = init!(bindings::LibosMemoryRegionInitArgument { id8: id8(name), pa: obj.dma_handle(), size: num::usize_as_u64(obj.size()), @@ -746,7 +671,11 @@ impl LibosMemoryRegionInitArgument { loc: num::u32_into_u8::< { bindings::LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM }, >(), - ..Default::default() + ..Zeroable::init_zeroed() + }); + + init!(LibosMemoryRegionInitArgument { + inner <- init_inner, }) } } @@ -925,15 +854,23 @@ unsafe impl FromBytes for GspMsgElement {} /// Arguments for GSP startup. #[repr(transparent)] -pub(crate) struct GspArgumentsCached(bindings::GSP_ARGUMENTS_CACHED); +#[derive(Zeroable)] +pub(crate) struct GspArgumentsCached { + inner: bindings::GSP_ARGUMENTS_CACHED, +} impl GspArgumentsCached { /// Creates the arguments for starting the GSP up using `cmdq` as its command queue. - pub(crate) fn new(cmdq: &Cmdq) -> Self { - Self(bindings::GSP_ARGUMENTS_CACHED { - messageQueueInitArguments: MessageQueueInitArguments::new(cmdq).0, + pub(crate) fn new(cmdq: &Cmdq) -> impl Init + '_ { + #[allow(non_snake_case)] + let init_inner = init!(bindings::GSP_ARGUMENTS_CACHED { + messageQueueInitArguments <- MessageQueueInitArguments::new(cmdq), bDmemStack: 1, - ..Default::default() + ..Zeroable::init_zeroed() + }); + + init!(GspArgumentsCached { + inner <- init_inner, }) } } @@ -945,11 +882,21 @@ unsafe impl AsBytes for GspArgumentsCached {} /// must all be a multiple of GSP_PAGE_SIZE in size, so add padding to force it /// to that size. #[repr(C)] +#[derive(Zeroable)] pub(crate) struct GspArgumentsPadded { pub(crate) inner: GspArgumentsCached, _padding: [u8; GSP_PAGE_SIZE - core::mem::size_of::()], } +impl GspArgumentsPadded { + pub(crate) fn new(cmdq: &Cmdq) -> impl Init + '_ { + init!(GspArgumentsPadded { + inner <- GspArgumentsCached::new(cmdq), + ..Zeroable::init_zeroed() + }) + } +} + // SAFETY: Padding is explicit and will not contain uninitialized data. unsafe impl AsBytes for GspArgumentsPadded {} @@ -958,18 +905,18 @@ unsafe impl AsBytes for GspArgumentsPadded {} unsafe impl FromBytes for GspArgumentsPadded {} /// Init arguments for the message queue. -#[repr(transparent)] -struct MessageQueueInitArguments(bindings::MESSAGE_QUEUE_INIT_ARGUMENTS); +type MessageQueueInitArguments = bindings::MESSAGE_QUEUE_INIT_ARGUMENTS; impl MessageQueueInitArguments { /// Creates a new init arguments structure for `cmdq`. - fn new(cmdq: &Cmdq) -> Self { - Self(bindings::MESSAGE_QUEUE_INIT_ARGUMENTS { - sharedMemPhysAddr: cmdq.dma_handle(), + #[allow(non_snake_case)] + fn new(cmdq: &Cmdq) -> impl Init + '_ { + init!(MessageQueueInitArguments { + sharedMemPhysAddr: cmdq.dma_handle, pageTableEntryCount: num::usize_into_u32::<{ Cmdq::NUM_PTES }>(), cmdQueueOffset: num::usize_as_u64(Cmdq::CMDQ_OFFSET), statQueueOffset: num::usize_as_u64(Cmdq::STATQ_OFFSET), - ..Default::default() + ..Zeroable::init_zeroed() }) } } diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs b/drivers/gpu/nova-core/gsp/fw/commands.rs index 21be44199693..db46276430be 100644 --- a/drivers/gpu/nova-core/gsp/fw/commands.rs +++ b/drivers/gpu/nova-core/gsp/fw/commands.rs @@ -1,8 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::prelude::*; -use kernel::transmute::{AsBytes, FromBytes}; -use kernel::{device, pci}; +use kernel::{ + device, + pci, + prelude::*, + transmute::{ + AsBytes, + FromBytes, // + }, // +}; use crate::gsp::GSP_PAGE_SIZE; @@ -107,6 +113,7 @@ unsafe impl FromBytes for PackedRegistryTable {} /// Payload of the `GetGspStaticInfo` command and message. #[repr(transparent)] +#[derive(Zeroable)] pub(crate) struct GspStaticConfigInfo(bindings::GspStaticConfigInfo_t); impl GspStaticConfigInfo { @@ -122,7 +129,3 @@ unsafe impl AsBytes for GspStaticConfigInfo {} // SAFETY: This struct only contains integer types for which all bit patterns // are valid. unsafe impl FromBytes for GspStaticConfigInfo {} - -// SAFETY: This struct only contains integer types and fixed-size arrays for which -// all bit patterns are valid. -unsafe impl Zeroable for GspStaticConfigInfo {} diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs index 6d25fe0bffa9..334e8be5fde8 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -43,6 +43,7 @@ pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB: u32 = 280; pub const GSP_FW_WPR_META_REVISION: u32 = 1; pub const GSP_FW_WPR_META_MAGIC: i64 = -2577556379034558285; pub const REGISTRY_TABLE_ENTRY_TYPE_DWORD: u32 = 1; +pub const GSP_MSG_QUEUE_ELEMENT_SIZE_MAX: u32 = 65536; pub type __u8 = ffi::c_uchar; pub type __u16 = ffi::c_ushort; pub type __u32 = ffi::c_uint; diff --git a/drivers/gpu/nova-core/gsp/sequencer.rs b/drivers/gpu/nova-core/gsp/sequencer.rs index e415a2aa3203..474e4c8021db 100644 --- a/drivers/gpu/nova-core/gsp/sequencer.rs +++ b/drivers/gpu/nova-core/gsp/sequencer.rs @@ -67,6 +67,7 @@ const CMD_SIZE: usize = size_of::(); /// GSP Sequencer Command types with payload data. /// Commands have an opcode and an opcode-dependent struct. #[allow(clippy::enum_variant_names)] +#[derive(Debug)] pub(crate) enum GspSeqCmd { RegWrite(fw::RegWritePayload), RegModify(fw::RegModifyPayload), @@ -144,12 +145,7 @@ pub(crate) struct GspSequencer<'a> { dev: ARef, } -/// Trait for running sequencer commands. -pub(crate) trait GspSeqCmdRunner { - fn run(&self, sequencer: &GspSequencer<'_>) -> Result; -} - -impl GspSeqCmdRunner for fw::RegWritePayload { +impl fw::RegWritePayload { fn run(&self, sequencer: &GspSequencer<'_>) -> Result { let addr = usize::from_safe_cast(self.addr()); @@ -157,7 +153,7 @@ impl GspSeqCmdRunner for fw::RegWritePayload { } } -impl GspSeqCmdRunner for fw::RegModifyPayload { +impl fw::RegModifyPayload { fn run(&self, sequencer: &GspSequencer<'_>) -> Result { let addr = usize::from_safe_cast(self.addr()); @@ -169,7 +165,7 @@ impl GspSeqCmdRunner for fw::RegModifyPayload { } } -impl GspSeqCmdRunner for fw::RegPollPayload { +impl fw::RegPollPayload { fn run(&self, sequencer: &GspSequencer<'_>) -> Result { let addr = usize::from_safe_cast(self.addr()); @@ -194,14 +190,14 @@ impl GspSeqCmdRunner for fw::RegPollPayload { } } -impl GspSeqCmdRunner for fw::DelayUsPayload { +impl fw::DelayUsPayload { fn run(&self, _sequencer: &GspSequencer<'_>) -> Result { fsleep(Delta::from_micros(i64::from(self.val()))); Ok(()) } } -impl GspSeqCmdRunner for fw::RegStorePayload { +impl fw::RegStorePayload { fn run(&self, sequencer: &GspSequencer<'_>) -> Result { let addr = usize::from_safe_cast(self.addr()); @@ -209,7 +205,7 @@ impl GspSeqCmdRunner for fw::RegStorePayload { } } -impl GspSeqCmdRunner for GspSeqCmd { +impl GspSeqCmd { fn run(&self, seq: &GspSequencer<'_>) -> Result { match self { GspSeqCmd::RegWrite(cmd) => cmd.run(seq), @@ -360,9 +356,9 @@ pub(crate) struct GspSequencerParams<'a> { } impl<'a> GspSequencer<'a> { - pub(crate) fn run(cmdq: &mut Cmdq, params: GspSequencerParams<'a>) -> Result { + pub(crate) fn run(cmdq: &Cmdq, params: GspSequencerParams<'a>) -> Result { let seq_info = loop { - match cmdq.receive_msg::(Delta::from_secs(10)) { + match cmdq.receive_msg::(Cmdq::RECEIVE_TIMEOUT) { Ok(seq_info) => break seq_info, Err(ERANGE) => continue, Err(e) => return Err(e), diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index c1121e7c64c5..04a1fa6b25f8 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -2,10 +2,17 @@ //! Nova Core GPU Driver +use kernel::{ + debugfs, + driver::Registration, + pci, + prelude::*, + InPlaceModule, // +}; + #[macro_use] mod bitfield; -mod dma; mod driver; mod falcon; mod fb; @@ -13,15 +20,54 @@ mod firmware; mod gfw; mod gpu; mod gsp; +#[macro_use] mod num; mod regs; mod sbuffer; mod vbios; -pub(crate) const MODULE_NAME: &kernel::str::CStr = ::NAME; +pub(crate) const MODULE_NAME: &core::ffi::CStr = ::NAME; -kernel::module_pci_driver! { - type: driver::NovaCore, +// TODO: Move this into per-module data once that exists. +static mut DEBUGFS_ROOT: Option = None; + +/// Guard that clears `DEBUGFS_ROOT` when dropped. +struct DebugfsRootGuard; + +impl Drop for DebugfsRootGuard { + fn drop(&mut self) { + // SAFETY: This guard is dropped after `_driver` (due to field order), + // so the driver is unregistered and no probe() can be running. + unsafe { DEBUGFS_ROOT = None }; + } +} + +#[pin_data] +struct NovaCoreModule { + // Fields are dropped in declaration order, so `_driver` is dropped first, + // then `_debugfs_guard` clears `DEBUGFS_ROOT`. + #[pin] + _driver: Registration>, + _debugfs_guard: DebugfsRootGuard, +} + +impl InPlaceModule for NovaCoreModule { + fn init(module: &'static kernel::ThisModule) -> impl PinInit { + let dir = debugfs::Dir::new(kernel::c_str!("nova_core")); + + // SAFETY: We are the only driver code running during init, so there + // cannot be any concurrent access to `DEBUGFS_ROOT`. + unsafe { DEBUGFS_ROOT = Some(dir) }; + + try_pin_init!(Self { + _driver <- Registration::new(MODULE_NAME, module), + _debugfs_guard: DebugfsRootGuard, + }) + } +} + +module! { + type: NovaCoreModule, name: "NovaCore", authors: ["Danilo Krummrich"], description: "Nova Core GPU driver", diff --git a/drivers/gpu/nova-core/num.rs b/drivers/gpu/nova-core/num.rs index c952a834e662..6c824b8d7b97 100644 --- a/drivers/gpu/nova-core/num.rs +++ b/drivers/gpu/nova-core/num.rs @@ -215,3 +215,83 @@ impl_const_into!(usize => { u8, u16, u32 }); impl_const_into!(u64 => { u8, u16, u32 }); impl_const_into!(u32 => { u8, u16 }); impl_const_into!(u16 => { u8 }); + +/// Creates an enum type associated to a [`Bounded`](kernel::num::Bounded), with a [`From`] +/// conversion to the associated `Bounded` and either a [`TryFrom`] or `From` conversion from the +/// associated `Bounded`. +// TODO[FPRI]: This is a temporary solution to be replaced with the corresponding derive macros +// once they land. +#[macro_export] +macro_rules! bounded_enum { + ( + $(#[$enum_meta:meta])* + $vis:vis enum $enum_type:ident with $from_impl:ident> { + $( $(#[doc = $variant_doc:expr])* $variant:ident = $value:expr),* $(,)* + } + ) => { + $(#[$enum_meta])* + $vis enum $enum_type { + $( + $(#[doc = $variant_doc])* + $variant = $value + ),* + } + + impl core::convert::From<$enum_type> for kernel::num::Bounded<$width, $length> { + fn from(value: $enum_type) -> Self { + match value { + $($enum_type::$variant => + kernel::num::Bounded::<$width, _>::new::<{ $value }>()),* + } + } + } + + bounded_enum!(@impl_from $enum_type with $from_impl> { + $($variant = $value),* + }); + }; + + // `TryFrom` implementation from associated `Bounded` to enum type. + (@impl_from $enum_type:ident with TryFrom> { + $($variant:ident = $value:expr),* $(,)* + }) => { + impl core::convert::TryFrom> for $enum_type { + type Error = kernel::error::Error; + + fn try_from( + value: kernel::num::Bounded<$width, $length> + ) -> kernel::error::Result { + match value.get() { + $( + $value => Ok($enum_type::$variant), + )* + _ => Err(kernel::error::code::EINVAL), + } + } + } + }; + + // `From` implementation from associated `Bounded` to enum type. Triggers a build-time error if + // all possible values of the `Bounded` are not covered by the enum type. + (@impl_from $enum_type:ident with From> { + $($variant:ident = $value:expr),* $(,)* + }) => { + impl core::convert::From> for $enum_type { + fn from(value: kernel::num::Bounded<$width, $length>) -> Self { + const MAX: $width = 1 << $length; + + // Makes the compiler optimizer aware of the possible range of values. + let value = value.get() & ((1 << $length) - 1); + match value { + $( + $value => $enum_type::$variant, + )* + // PANIC: we cannot reach this arm as all possible variants are handled by the + // match arms above. It is here to make the compiler complain if `$enum_type` + // does not cover all values of the `0..MAX` range. + MAX.. => unreachable!(), + } + } + } + } +} diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index ea0d32f5396c..2f171a4ff9ba 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -1,13 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 -// Required to retain the original register names used by OpenRM, which are all capital snake case -// but are mapped to types. -#![allow(non_camel_case_types)] - -#[macro_use] -pub(crate) mod macros; - use kernel::{ + io::{ + register, + register::WithBase, + Io, // + }, prelude::*, time, // }; @@ -37,18 +35,38 @@ use crate::{ // PMC -register!(NV_PMC_BOOT_0 @ 0x00000000, "Basic revision information about the GPU" { - 3:0 minor_revision as u8, "Minor revision of the chip"; - 7:4 major_revision as u8, "Major revision of the chip"; - 8:8 architecture_1 as u8, "MSB of the architecture"; - 23:20 implementation as u8, "Implementation version of the architecture"; - 28:24 architecture_0 as u8, "Lower bits of the architecture"; -}); +register! { + /// Basic revision information about the GPU. + pub(crate) NV_PMC_BOOT_0(u32) @ 0x00000000 { + /// Lower bits of the architecture. + 28:24 architecture_0; + /// Implementation version of the architecture. + 23:20 implementation; + /// MSB of the architecture. + 8:8 architecture_1; + /// Major revision of the chip. + 7:4 major_revision; + /// Minor revision of the chip. + 3:0 minor_revision; + } + + /// Extended architecture information. + pub(crate) NV_PMC_BOOT_42(u32) @ 0x00000a00 { + /// Architecture value. + 29:24 architecture ?=> Architecture; + /// Implementation version of the architecture. + 23:20 implementation; + /// Major revision of the chip. + 19:16 major_revision; + /// Minor revision of the chip. + 15:12 minor_revision; + } +} impl NV_PMC_BOOT_0 { pub(crate) fn is_older_than_fermi(self) -> bool { // From https://github.com/NVIDIA/open-gpu-doc/tree/master/manuals : - const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u8 = 0xc; + const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u32 = 0xc; // Older chips left arch1 zeroed out. That, combined with an arch0 value that is less than // GF100, means "older than Fermi". @@ -56,13 +74,6 @@ impl NV_PMC_BOOT_0 { } } -register!(NV_PMC_BOOT_42 @ 0x00000a00, "Extended architecture information" { - 15:12 minor_revision as u8, "Minor revision of the chip"; - 19:16 major_revision as u8, "Major revision of the chip"; - 23:20 implementation as u8, "Implementation version of the architecture"; - 29:24 architecture as u8 ?=> Architecture, "Architecture value"; -}); - impl NV_PMC_BOOT_42 { /// Combines `architecture` and `implementation` to obtain a code unique to the chipset. pub(crate) fn chipset(self) -> Result { @@ -76,8 +87,8 @@ impl NV_PMC_BOOT_42 { /// Returns the raw architecture value from the register. fn architecture_raw(self) -> u8 { - ((self.0 >> Self::ARCHITECTURE_RANGE.start()) & ((1 << Self::ARCHITECTURE_RANGE.len()) - 1)) - as u8 + ((self.into_raw() >> Self::ARCHITECTURE_RANGE.start()) + & ((1 << Self::ARCHITECTURE_RANGE.len()) - 1)) as u8 } } @@ -86,7 +97,7 @@ impl kernel::fmt::Display for NV_PMC_BOOT_42 { write!( f, "boot42 = 0x{:08x} (architecture 0x{:x}, implementation 0x{:x})", - self.0, + self.inner, self.architecture_raw(), self.implementation() ) @@ -95,35 +106,46 @@ impl kernel::fmt::Display for NV_PMC_BOOT_42 { // PBUS -register!(NV_PBUS_SW_SCRATCH @ 0x00001400[64] {}); +register! { + pub(crate) NV_PBUS_SW_SCRATCH(u32)[64] @ 0x00001400 {} -register!(NV_PBUS_SW_SCRATCH_0E_FRTS_ERR => NV_PBUS_SW_SCRATCH[0xe], - "scratch register 0xe used as FRTS firmware error code" { - 31:16 frts_err_code as u16; -}); + /// Scratch register 0xe used as FRTS firmware error code. + pub(crate) NV_PBUS_SW_SCRATCH_0E_FRTS_ERR(u32) => NV_PBUS_SW_SCRATCH[0xe] { + 31:16 frts_err_code; + } +} // PFB -// The following two registers together hold the physical system memory address that is used by the -// GPU to perform sysmembar operations (see `fb::SysmemFlush`). +register! { + /// Low bits of the physical system memory address used by the GPU to perform sysmembar + /// operations (see [`crate::fb::SysmemFlush`]). + pub(crate) NV_PFB_NISO_FLUSH_SYSMEM_ADDR(u32) @ 0x00100c10 { + 31:0 adr_39_08; + } -register!(NV_PFB_NISO_FLUSH_SYSMEM_ADDR @ 0x00100c10 { - 31:0 adr_39_08 as u32; -}); + /// High bits of the physical system memory address used by the GPU to perform sysmembar + /// operations (see [`crate::fb::SysmemFlush`]). + pub(crate) NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x00100c40 { + 23:0 adr_63_40; + } -register!(NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI @ 0x00100c40 { - 23:0 adr_63_40 as u32; -}); + pub(crate) NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE(u32) @ 0x00100ce0 { + 30:30 ecc_mode_enabled => bool; + 9:4 lower_mag; + 3:0 lower_scale; + } -register!(NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE @ 0x00100ce0 { - 3:0 lower_scale as u8; - 9:4 lower_mag as u8; - 30:30 ecc_mode_enabled as bool; -}); + pub(crate) NV_PFB_PRI_MMU_WPR2_ADDR_LO(u32) @ 0x001fa824 { + /// Bits 12..40 of the lower (inclusive) bound of the WPR2 region. + 31:4 lo_val; + } -register!(NV_PGSP_QUEUE_HEAD @ 0x00110c00 { - 31:0 address as u32; -}); + pub(crate) NV_PFB_PRI_MMU_WPR2_ADDR_HI(u32) @ 0x001fa828 { + /// Bits 12..40 of the higher (exclusive) bound of the WPR2 region. + 31:4 hi_val; + } +} impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE { /// Returns the usable framebuffer size, in bytes. @@ -140,10 +162,6 @@ impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE { } } -register!(NV_PFB_PRI_MMU_WPR2_ADDR_LO@0x001fa824 { - 31:4 lo_val as u32, "Bits 12..40 of the lower (inclusive) bound of the WPR2 region"; -}); - impl NV_PFB_PRI_MMU_WPR2_ADDR_LO { /// Returns the lower (inclusive) bound of the WPR2 region. pub(crate) fn lower_bound(self) -> u64 { @@ -151,10 +169,6 @@ impl NV_PFB_PRI_MMU_WPR2_ADDR_LO { } } -register!(NV_PFB_PRI_MMU_WPR2_ADDR_HI@0x001fa828 { - 31:4 hi_val as u32, "Bits 12..40 of the higher (exclusive) bound of the WPR2 region"; -}); - impl NV_PFB_PRI_MMU_WPR2_ADDR_HI { /// Returns the higher (exclusive) bound of the WPR2 region. /// @@ -164,6 +178,14 @@ impl NV_PFB_PRI_MMU_WPR2_ADDR_HI { } } +// PGSP + +register! { + pub(crate) NV_PGSP_QUEUE_HEAD(u32) @ 0x00110c00 { + 31:0 address; + } +} + // PGC6 register space. // // `GC6` is a GPU low-power state where VRAM is in self-refresh and the GPU is powered down (except @@ -173,29 +195,41 @@ impl NV_PFB_PRI_MMU_WPR2_ADDR_HI { // These scratch registers remain powered on even in a low-power state and have a designated group // number. -// Boot Sequence Interface (BSI) register used to determine -// if GSP reload/resume has completed during the boot process. -register!(NV_PGC6_BSI_SECURE_SCRATCH_14 @ 0x001180f8 { - 26:26 boot_stage_3_handoff as bool; -}); - -// Privilege level mask register. It dictates whether the host CPU has privilege to access the -// `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read GFW_BOOT). -register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128, - "Privilege level mask register" { - 0:0 read_protection_level0 as bool, "Set after FWSEC lowers its protection level"; -}); - -// OpenRM defines this as a register array, but doesn't specify its size and only uses its first -// element. Be conservative until we know the actual size or need to use more registers. -register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234[1] {}); - -register!( - NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05[0], - "Scratch group 05 register 0 used as GFW boot progress indicator" { - 7:0 progress as u8, "Progress of GFW boot (0xff means completed)"; +register! { + /// Boot Sequence Interface (BSI) register used to determine + /// if GSP reload/resume has completed during the boot process. + pub(crate) NV_PGC6_BSI_SECURE_SCRATCH_14(u32) @ 0x001180f8 { + 26:26 boot_stage_3_handoff => bool; } -); + + /// Privilege level mask register. It dictates whether the host CPU has privilege to access the + /// `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read GFW_BOOT). + pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK(u32) @ 0x00118128 { + /// Set after FWSEC lowers its protection level. + 0:0 read_protection_level0 => bool; + } + + /// OpenRM defines this as a register array, but doesn't specify its size and only uses its + /// first element. Be conservative until we know the actual size or need to use more registers. + pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_05(u32)[1] @ 0x00118234 {} + + /// Scratch group 05 register 0 used as GFW boot progress indicator. + pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT(u32) + => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05[0] { + /// Progress of GFW boot (0xff means completed). + 7:0 progress; + } + + pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_42(u32) @ 0x001183a4 { + 31:0 value; + } + + /// Scratch group 42 register used as framebuffer size. + pub(crate) NV_USABLE_FB_SIZE_IN_MB(u32) => NV_PGC6_AON_SECURE_SCRATCH_GROUP_42 { + /// Usable framebuffer size, in megabytes. + 31:0 value; + } +} impl NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT { /// Returns `true` if GFW boot is completed. @@ -204,17 +238,6 @@ impl NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT { } } -register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_42 @ 0x001183a4 { - 31:0 value as u32; -}); - -register!( - NV_USABLE_FB_SIZE_IN_MB => NV_PGC6_AON_SECURE_SCRATCH_GROUP_42, - "Scratch group 42 register used as framebuffer size" { - 31:0 value as u32, "Usable framebuffer size, in megabytes"; - } -); - impl NV_USABLE_FB_SIZE_IN_MB { /// Returns the usable framebuffer size, in bytes. pub(crate) fn usable_fb_size(self) -> u64 { @@ -224,10 +247,14 @@ impl NV_USABLE_FB_SIZE_IN_MB { // PDISP -register!(NV_PDISP_VGA_WORKSPACE_BASE @ 0x00625f04 { - 3:3 status_valid as bool, "Set if the `addr` field is valid"; - 31:8 addr as u32, "VGA workspace base address divided by 0x10000"; -}); +register! { + pub(crate) NV_PDISP_VGA_WORKSPACE_BASE(u32) @ 0x00625f04 { + /// VGA workspace base address divided by 0x10000. + 31:8 addr; + /// Set if the `addr` field is valid. + 3:3 status_valid => bool; + } +} impl NV_PDISP_VGA_WORKSPACE_BASE { /// Returns the base address of the VGA workspace, or `None` if none exists. @@ -244,73 +271,162 @@ impl NV_PDISP_VGA_WORKSPACE_BASE { pub(crate) const NV_FUSE_OPT_FPF_SIZE: usize = 16; -register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100[NV_FUSE_OPT_FPF_SIZE] { - 15:0 data as u16; -}); +register! { + pub(crate) NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION(u32)[NV_FUSE_OPT_FPF_SIZE] @ 0x00824100 { + 15:0 data => u16; + } -register!(NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION @ 0x00824140[NV_FUSE_OPT_FPF_SIZE] { - 15:0 data as u16; -}); + pub(crate) NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION(u32)[NV_FUSE_OPT_FPF_SIZE] @ 0x00824140 { + 15:0 data => u16; + } -register!(NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION @ 0x008241c0[NV_FUSE_OPT_FPF_SIZE] { - 15:0 data as u16; -}); - -// PFALCON - -register!(NV_PFALCON_FALCON_IRQSCLR @ PFalconBase[0x00000004] { - 4:4 halt as bool; - 6:6 swgen0 as bool; -}); - -register!(NV_PFALCON_FALCON_MAILBOX0 @ PFalconBase[0x00000040] { - 31:0 value as u32; -}); - -register!(NV_PFALCON_FALCON_MAILBOX1 @ PFalconBase[0x00000044] { - 31:0 value as u32; -}); - -// Used to store version information about the firmware running -// on the Falcon processor. -register!(NV_PFALCON_FALCON_OS @ PFalconBase[0x00000080] { - 31:0 value as u32; -}); - -register!(NV_PFALCON_FALCON_RM @ PFalconBase[0x00000084] { - 31:0 value as u32; -}); - -register!(NV_PFALCON_FALCON_HWCFG2 @ PFalconBase[0x000000f4] { - 10:10 riscv as bool; - 12:12 mem_scrubbing as bool, "Set to 0 after memory scrubbing is completed"; - 31:31 reset_ready as bool, "Signal indicating that reset is completed (GA102+)"; -}); - -impl NV_PFALCON_FALCON_HWCFG2 { - /// Returns `true` if memory scrubbing is completed. - pub(crate) fn mem_scrubbing_done(self) -> bool { - !self.mem_scrubbing() + pub(crate) NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION(u32)[NV_FUSE_OPT_FPF_SIZE] @ 0x008241c0 { + 15:0 data => u16; } } -register!(NV_PFALCON_FALCON_CPUCTL @ PFalconBase[0x00000100] { - 1:1 startcpu as bool; - 4:4 halted as bool; - 6:6 alias_en as bool; -}); +// PFALCON -register!(NV_PFALCON_FALCON_BOOTVEC @ PFalconBase[0x00000104] { - 31:0 value as u32; -}); +register! { + pub(crate) NV_PFALCON_FALCON_IRQSCLR(u32) @ PFalconBase + 0x00000004 { + 6:6 swgen0 => bool; + 4:4 halt => bool; + } -register!(NV_PFALCON_FALCON_DMACTL @ PFalconBase[0x0000010c] { - 0:0 require_ctx as bool; - 1:1 dmem_scrubbing as bool; - 2:2 imem_scrubbing as bool; - 6:3 dmaq_num as u8; - 7:7 secure_stat as bool; -}); + pub(crate) NV_PFALCON_FALCON_MAILBOX0(u32) @ PFalconBase + 0x00000040 { + 31:0 value => u32; + } + + pub(crate) NV_PFALCON_FALCON_MAILBOX1(u32) @ PFalconBase + 0x00000044 { + 31:0 value => u32; + } + + /// Used to store version information about the firmware running + /// on the Falcon processor. + pub(crate) NV_PFALCON_FALCON_OS(u32) @ PFalconBase + 0x00000080 { + 31:0 value => u32; + } + + pub(crate) NV_PFALCON_FALCON_RM(u32) @ PFalconBase + 0x00000084 { + 31:0 value => u32; + } + + pub(crate) NV_PFALCON_FALCON_HWCFG2(u32) @ PFalconBase + 0x000000f4 { + /// Signal indicating that reset is completed (GA102+). + 31:31 reset_ready => bool; + /// Set to 0 after memory scrubbing is completed. + 12:12 mem_scrubbing => bool; + 10:10 riscv => bool; + } + + pub(crate) NV_PFALCON_FALCON_CPUCTL(u32) @ PFalconBase + 0x00000100 { + 6:6 alias_en => bool; + 4:4 halted => bool; + 1:1 startcpu => bool; + } + + pub(crate) NV_PFALCON_FALCON_BOOTVEC(u32) @ PFalconBase + 0x00000104 { + 31:0 value => u32; + } + + pub(crate) NV_PFALCON_FALCON_DMACTL(u32) @ PFalconBase + 0x0000010c { + 7:7 secure_stat => bool; + 6:3 dmaq_num; + 2:2 imem_scrubbing => bool; + 1:1 dmem_scrubbing => bool; + 0:0 require_ctx => bool; + } + + pub(crate) NV_PFALCON_FALCON_DMATRFBASE(u32) @ PFalconBase + 0x00000110 { + 31:0 base => u32; + } + + pub(crate) NV_PFALCON_FALCON_DMATRFMOFFS(u32) @ PFalconBase + 0x00000114 { + 23:0 offs; + } + + pub(crate) NV_PFALCON_FALCON_DMATRFCMD(u32) @ PFalconBase + 0x00000118 { + 16:16 set_dmtag; + 14:12 ctxdma; + 10:8 size ?=> DmaTrfCmdSize; + 5:5 is_write => bool; + 4:4 imem => bool; + 3:2 sec; + 1:1 idle => bool; + 0:0 full => bool; + } + + pub(crate) NV_PFALCON_FALCON_DMATRFFBOFFS(u32) @ PFalconBase + 0x0000011c { + 31:0 offs => u32; + } + + pub(crate) NV_PFALCON_FALCON_DMATRFBASE1(u32) @ PFalconBase + 0x00000128 { + 8:0 base; + } + + pub(crate) NV_PFALCON_FALCON_HWCFG1(u32) @ PFalconBase + 0x0000012c { + /// Core revision subversion. + 7:6 core_rev_subversion => FalconCoreRevSubversion; + /// Security model. + 5:4 security_model ?=> FalconSecurityModel; + /// Core revision. + 3:0 core_rev ?=> FalconCoreRev; + } + + pub(crate) NV_PFALCON_FALCON_CPUCTL_ALIAS(u32) @ PFalconBase + 0x00000130 { + 1:1 startcpu => bool; + } + + /// IMEM access control register. Up to 4 ports are available for IMEM access. + pub(crate) NV_PFALCON_FALCON_IMEMC(u32)[4, stride = 16] @ PFalconBase + 0x00000180 { + /// Access secure IMEM. + 28:28 secure => bool; + /// Auto-increment on write. + 24:24 aincw => bool; + /// IMEM block and word offset. + 15:0 offs; + } + + /// IMEM data register. Reading/writing this register accesses IMEM at the address + /// specified by the corresponding IMEMC register. + pub(crate) NV_PFALCON_FALCON_IMEMD(u32)[4, stride = 16] @ PFalconBase + 0x00000184 { + 31:0 data; + } + + /// IMEM tag register. Used to set the tag for the current IMEM block. + pub(crate) NV_PFALCON_FALCON_IMEMT(u32)[4, stride = 16] @ PFalconBase + 0x00000188 { + 15:0 tag; + } + + /// DMEM access control register. Up to 8 ports are available for DMEM access. + pub(crate) NV_PFALCON_FALCON_DMEMC(u32)[8, stride = 8] @ PFalconBase + 0x000001c0 { + /// Auto-increment on write. + 24:24 aincw => bool; + /// DMEM block and word offset. + 15:0 offs; + } + + /// DMEM data register. Reading/writing this register accesses DMEM at the address + /// specified by the corresponding DMEMC register. + pub(crate) NV_PFALCON_FALCON_DMEMD(u32)[8, stride = 8] @ PFalconBase + 0x000001c4 { + 31:0 data; + } + + /// Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the + /// falcon instance. + pub(crate) NV_PFALCON_FALCON_ENGINE(u32) @ PFalconBase + 0x000003c0 { + 0:0 reset => bool; + } + + pub(crate) NV_PFALCON_FBIF_TRANSCFG(u32)[8] @ PFalconBase + 0x00000600 { + 2:2 mem_type => FalconFbifMemType; + 1:0 target ?=> FalconFbifTarget; + } + + pub(crate) NV_PFALCON_FBIF_CTL(u32) @ PFalconBase + 0x00000624 { + 7:7 allow_phys_no_ctx => bool; + } +} impl NV_PFALCON_FALCON_DMACTL { /// Returns `true` if memory scrubbing is completed. @@ -319,133 +435,106 @@ impl NV_PFALCON_FALCON_DMACTL { } } -register!(NV_PFALCON_FALCON_DMATRFBASE @ PFalconBase[0x00000110] { - 31:0 base as u32; -}); - -register!(NV_PFALCON_FALCON_DMATRFMOFFS @ PFalconBase[0x00000114] { - 23:0 offs as u32; -}); - -register!(NV_PFALCON_FALCON_DMATRFCMD @ PFalconBase[0x00000118] { - 0:0 full as bool; - 1:1 idle as bool; - 3:2 sec as u8; - 4:4 imem as bool; - 5:5 is_write as bool; - 10:8 size as u8 ?=> DmaTrfCmdSize; - 14:12 ctxdma as u8; - 16:16 set_dmtag as u8; -}); - impl NV_PFALCON_FALCON_DMATRFCMD { /// Programs the `imem` and `sec` fields for the given FalconMem pub(crate) fn with_falcon_mem(self, mem: FalconMem) -> Self { - self.set_imem(mem != FalconMem::Dmem) - .set_sec(if mem == FalconMem::ImemSecure { 1 } else { 0 }) + let this = self.with_imem(mem != FalconMem::Dmem); + + match mem { + FalconMem::ImemSecure => this.with_const_sec::<1>(), + _ => this.with_const_sec::<0>(), + } } } -register!(NV_PFALCON_FALCON_DMATRFFBOFFS @ PFalconBase[0x0000011c] { - 31:0 offs as u32; -}); - -register!(NV_PFALCON_FALCON_DMATRFBASE1 @ PFalconBase[0x00000128] { - 8:0 base as u16; -}); - -register!(NV_PFALCON_FALCON_HWCFG1 @ PFalconBase[0x0000012c] { - 3:0 core_rev as u8 ?=> FalconCoreRev, "Core revision"; - 5:4 security_model as u8 ?=> FalconSecurityModel, "Security model"; - 7:6 core_rev_subversion as u8 ?=> FalconCoreRevSubversion, "Core revision subversion"; -}); - -register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ PFalconBase[0x00000130] { - 1:1 startcpu as bool; -}); - -// Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon -// instance. -register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] { - 0:0 reset as bool; -}); - impl NV_PFALCON_FALCON_ENGINE { /// Resets the falcon pub(crate) fn reset_engine(bar: &Bar0) { - Self::read(bar, &E::ID).set_reset(true).write(bar, &E::ID); + bar.update(Self::of::(), |r| r.with_reset(true)); // TIMEOUT: falcon engine should not take more than 10us to reset. time::delay::fsleep(time::Delta::from_micros(10)); - Self::read(bar, &E::ID).set_reset(false).write(bar, &E::ID); + bar.update(Self::of::(), |r| r.with_reset(false)); } } -register!(NV_PFALCON_FBIF_TRANSCFG @ PFalconBase[0x00000600[8]] { - 1:0 target as u8 ?=> FalconFbifTarget; - 2:2 mem_type as bool => FalconFbifMemType; -}); - -register!(NV_PFALCON_FBIF_CTL @ PFalconBase[0x00000624] { - 7:7 allow_phys_no_ctx as bool; -}); +impl NV_PFALCON_FALCON_HWCFG2 { + /// Returns `true` if memory scrubbing is completed. + pub(crate) fn mem_scrubbing_done(self) -> bool { + !self.mem_scrubbing() + } +} /* PFALCON2 */ -register!(NV_PFALCON2_FALCON_MOD_SEL @ PFalcon2Base[0x00000180] { - 7:0 algo as u8 ?=> FalconModSelAlgo; -}); +register! { + pub(crate) NV_PFALCON2_FALCON_MOD_SEL(u32) @ PFalcon2Base + 0x00000180 { + 7:0 algo ?=> FalconModSelAlgo; + } -register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ PFalcon2Base[0x00000198] { - 7:0 ucode_id as u8; -}); + pub(crate) NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID(u32) @ PFalcon2Base + 0x00000198 { + 7:0 ucode_id => u8; + } -register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ PFalcon2Base[0x0000019c] { - 31:0 value as u32; -}); + pub(crate) NV_PFALCON2_FALCON_BROM_ENGIDMASK(u32) @ PFalcon2Base + 0x0000019c { + 31:0 value => u32; + } -// OpenRM defines this as a register array, but doesn't specify its size and only uses its first -// element. Be conservative until we know the actual size or need to use more registers. -register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalcon2Base[0x00000210[1]] { - 31:0 value as u32; -}); + /// OpenRM defines this as a register array, but doesn't specify its size and only uses its + /// first element. Be conservative until we know the actual size or need to use more registers. + pub(crate) NV_PFALCON2_FALCON_BROM_PARAADDR(u32)[1] @ PFalcon2Base + 0x00000210 { + 31:0 value => u32; + } +} // PRISCV -// RISC-V status register for debug (Turing and GA100 only). -// Reflects current RISC-V core status. -register!(NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS @ PFalcon2Base[0x00000240] { - 0:0 active_stat as bool, "RISC-V core active/inactive status"; -}); +register! { + /// RISC-V status register for debug (Turing and GA100 only). + /// Reflects current RISC-V core status. + pub(crate) NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS(u32) @ PFalcon2Base + 0x00000240 { + /// RISC-V core active/inactive status. + 0:0 active_stat => bool; + } -// GA102 and later -register!(NV_PRISCV_RISCV_CPUCTL @ PFalcon2Base[0x00000388] { - 0:0 halted as bool; - 7:7 active_stat as bool; -}); + /// GA102 and later. + pub(crate) NV_PRISCV_RISCV_CPUCTL(u32) @ PFalcon2Base + 0x00000388 { + 7:7 active_stat => bool; + 0:0 halted => bool; + } -register!(NV_PRISCV_RISCV_BCR_CTRL @ PFalcon2Base[0x00000668] { - 0:0 valid as bool; - 4:4 core_select as bool => PeregrineCoreSelect; - 8:8 br_fetch as bool; -}); + /// GA102 and later. + pub(crate) NV_PRISCV_RISCV_BCR_CTRL(u32) @ PFalcon2Base + 0x00000668 { + 8:8 br_fetch => bool; + 4:4 core_select => PeregrineCoreSelect; + 0:0 valid => bool; + } +} // The modules below provide registers that are not identical on all supported chips. They should // only be used in HAL modules. pub(crate) mod gm107 { + use kernel::io::register; + // FUSE - register!(NV_FUSE_STATUS_OPT_DISPLAY @ 0x00021c04 { - 0:0 display_disabled as bool; - }); + register! { + pub(crate) NV_FUSE_STATUS_OPT_DISPLAY(u32) @ 0x00021c04 { + 0:0 display_disabled => bool; + } + } } pub(crate) mod ga100 { + use kernel::io::register; + // FUSE - register!(NV_FUSE_STATUS_OPT_DISPLAY @ 0x00820c04 { - 0:0 display_disabled as bool; - }); + register! { + pub(crate) NV_FUSE_STATUS_OPT_DISPLAY(u32) @ 0x00820c04 { + 0:0 display_disabled => bool; + } + } } diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs deleted file mode 100644 index ed624be1f39b..000000000000 --- a/drivers/gpu/nova-core/regs/macros.rs +++ /dev/null @@ -1,739 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -//! `register!` macro to define register layout and accessors. -//! -//! A single register typically includes several fields, which are accessed through a combination -//! of bit-shift and mask operations that introduce a class of potential mistakes, notably because -//! not all possible field values are necessarily valid. -//! -//! The `register!` macro in this module provides an intuitive and readable syntax for defining a -//! dedicated type for each register. Each such type comes with its own field accessors that can -//! return an error if a field's value is invalid. Please look at the [`bitfield`] macro for the -//! complete syntax of fields definitions. - -/// Trait providing a base address to be added to the offset of a relative register to obtain -/// its actual offset. -/// -/// The `T` generic argument is used to distinguish which base to use, in case a type provides -/// several bases. It is given to the `register!` macro to restrict the use of the register to -/// implementors of this particular variant. -pub(crate) trait RegisterBase { - const BASE: usize; -} - -/// Defines a dedicated type for a register with an absolute offset, including getter and setter -/// methods for its fields and methods to read and write it from an `Io` region. -/// -/// Example: -/// -/// ```no_run -/// register!(BOOT_0 @ 0x00000100, "Basic revision information about the GPU" { -/// 3:0 minor_revision as u8, "Minor revision of the chip"; -/// 7:4 major_revision as u8, "Major revision of the chip"; -/// 28:20 chipset as u32 ?=> Chipset, "Chipset model"; -/// }); -/// ``` -/// -/// This defines a `BOOT_0` type which can be read or written from offset `0x100` of an `Io` -/// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 least -/// significant bits of the register. Each field can be accessed and modified using accessor -/// methods: -/// -/// ```no_run -/// // Read from the register's defined offset (0x100). -/// let boot0 = BOOT_0::read(&bar); -/// pr_info!("chip revision: {}.{}", boot0.major_revision(), boot0.minor_revision()); -/// -/// // `Chipset::try_from` is called with the value of the `chipset` field and returns an -/// // error if it is invalid. -/// let chipset = boot0.chipset()?; -/// -/// // Update some fields and write the value back. -/// boot0.set_major_revision(3).set_minor_revision(10).write(&bar); -/// -/// // Or, just read and update the register in a single step: -/// BOOT_0::update(&bar, |r| r.set_major_revision(3).set_minor_revision(10)); -/// ``` -/// -/// The documentation strings are optional. If present, they will be added to the type's -/// definition, or the field getter and setter methods they are attached to. -/// -/// It is also possible to create a alias register by using the `=> ALIAS` syntax. This is useful -/// for cases where a register's interpretation depends on the context: -/// -/// ```no_run -/// register!(SCRATCH @ 0x00000200, "Scratch register" { -/// 31:0 value as u32, "Raw value"; -/// }); -/// -/// register!(SCRATCH_BOOT_STATUS => SCRATCH, "Boot status of the firmware" { -/// 0:0 completed as bool, "Whether the firmware has completed booting"; -/// }); -/// ``` -/// -/// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH`, while also -/// providing its own `completed` field. -/// -/// ## Relative registers -/// -/// A register can be defined as being accessible from a fixed offset of a provided base. For -/// instance, imagine the following I/O space: -/// -/// ```text -/// +-----------------------------+ -/// | ... | -/// | | -/// 0x100--->+------------CPU0-------------+ -/// | | -/// 0x110--->+-----------------------------+ -/// | CPU_CTL | -/// +-----------------------------+ -/// | ... | -/// | | -/// | | -/// 0x200--->+------------CPU1-------------+ -/// | | -/// 0x210--->+-----------------------------+ -/// | CPU_CTL | -/// +-----------------------------+ -/// | ... | -/// +-----------------------------+ -/// ``` -/// -/// `CPU0` and `CPU1` both have a `CPU_CTL` register that starts at offset `0x10` of their I/O -/// space segment. Since both instances of `CPU_CTL` share the same layout, we don't want to define -/// them twice and would prefer a way to select which one to use from a single definition -/// -/// This can be done using the `Base[Offset]` syntax when specifying the register's address. -/// -/// `Base` is an arbitrary type (typically a ZST) to be used as a generic parameter of the -/// [`RegisterBase`] trait to provide the base as a constant, i.e. each type providing a base for -/// this register needs to implement `RegisterBase`. Here is the above example translated -/// into code: -/// -/// ```no_run -/// // Type used to identify the base. -/// pub(crate) struct CpuCtlBase; -/// -/// // ZST describing `CPU0`. -/// struct Cpu0; -/// impl RegisterBase for Cpu0 { -/// const BASE: usize = 0x100; -/// } -/// // Singleton of `CPU0` used to identify it. -/// const CPU0: Cpu0 = Cpu0; -/// -/// // ZST describing `CPU1`. -/// struct Cpu1; -/// impl RegisterBase for Cpu1 { -/// const BASE: usize = 0x200; -/// } -/// // Singleton of `CPU1` used to identify it. -/// const CPU1: Cpu1 = Cpu1; -/// -/// // This makes `CPU_CTL` accessible from all implementors of `RegisterBase`. -/// register!(CPU_CTL @ CpuCtlBase[0x10], "CPU core control" { -/// 0:0 start as bool, "Start the CPU core"; -/// }); -/// -/// // The `read`, `write` and `update` methods of relative registers take an extra `base` argument -/// // that is used to resolve its final address by adding its `BASE` to the offset of the -/// // register. -/// -/// // Start `CPU0`. -/// CPU_CTL::update(bar, &CPU0, |r| r.set_start(true)); -/// -/// // Start `CPU1`. -/// CPU_CTL::update(bar, &CPU1, |r| r.set_start(true)); -/// -/// // Aliases can also be defined for relative register. -/// register!(CPU_CTL_ALIAS => CpuCtlBase[CPU_CTL], "Alias to CPU core control" { -/// 1:1 alias_start as bool, "Start the aliased CPU core"; -/// }); -/// -/// // Start the aliased `CPU0`. -/// CPU_CTL_ALIAS::update(bar, &CPU0, |r| r.set_alias_start(true)); -/// ``` -/// -/// ## Arrays of registers -/// -/// Some I/O areas contain consecutive values that can be interpreted in the same way. These areas -/// can be defined as an array of identical registers, allowing them to be accessed by index with -/// compile-time or runtime bound checking. Simply define their address as `Address[Size]`, and add -/// an `idx` parameter to their `read`, `write` and `update` methods: -/// -/// ```no_run -/// # fn no_run() -> Result<(), Error> { -/// # fn get_scratch_idx() -> usize { -/// # 0x15 -/// # } -/// // Array of 64 consecutive registers with the same layout starting at offset `0x80`. -/// register!(SCRATCH @ 0x00000080[64], "Scratch registers" { -/// 31:0 value as u32; -/// }); -/// -/// // Read scratch register 0, i.e. I/O address `0x80`. -/// let scratch_0 = SCRATCH::read(bar, 0).value(); -/// // Read scratch register 15, i.e. I/O address `0x80 + (15 * 4)`. -/// let scratch_15 = SCRATCH::read(bar, 15).value(); -/// -/// // This is out of bounds and won't build. -/// // let scratch_128 = SCRATCH::read(bar, 128).value(); -/// -/// // Runtime-obtained array index. -/// let scratch_idx = get_scratch_idx(); -/// // Access on a runtime index returns an error if it is out-of-bounds. -/// let some_scratch = SCRATCH::try_read(bar, scratch_idx)?.value(); -/// -/// // Alias to a particular register in an array. -/// // Here `SCRATCH[8]` is used to convey the firmware exit code. -/// register!(FIRMWARE_STATUS => SCRATCH[8], "Firmware exit status code" { -/// 7:0 status as u8; -/// }); -/// -/// let status = FIRMWARE_STATUS::read(bar).status(); -/// -/// // Non-contiguous register arrays can be defined by adding a stride parameter. -/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the -/// // registers of the two declarations below are interleaved. -/// register!(SCRATCH_INTERLEAVED_0 @ 0x000000c0[16 ; 8], "Scratch registers bank 0" { -/// 31:0 value as u32; -/// }); -/// register!(SCRATCH_INTERLEAVED_1 @ 0x000000c4[16 ; 8], "Scratch registers bank 1" { -/// 31:0 value as u32; -/// }); -/// # Ok(()) -/// # } -/// ``` -/// -/// ## Relative arrays of registers -/// -/// Combining the two features described in the sections above, arrays of registers accessible from -/// a base can also be defined: -/// -/// ```no_run -/// # fn no_run() -> Result<(), Error> { -/// # fn get_scratch_idx() -> usize { -/// # 0x15 -/// # } -/// // Type used as parameter of `RegisterBase` to specify the base. -/// pub(crate) struct CpuCtlBase; -/// -/// // ZST describing `CPU0`. -/// struct Cpu0; -/// impl RegisterBase for Cpu0 { -/// const BASE: usize = 0x100; -/// } -/// // Singleton of `CPU0` used to identify it. -/// const CPU0: Cpu0 = Cpu0; -/// -/// // ZST describing `CPU1`. -/// struct Cpu1; -/// impl RegisterBase for Cpu1 { -/// const BASE: usize = 0x200; -/// } -/// // Singleton of `CPU1` used to identify it. -/// const CPU1: Cpu1 = Cpu1; -/// -/// // 64 per-cpu scratch registers, arranged as an contiguous array. -/// register!(CPU_SCRATCH @ CpuCtlBase[0x00000080[64]], "Per-CPU scratch registers" { -/// 31:0 value as u32; -/// }); -/// -/// let cpu0_scratch_0 = CPU_SCRATCH::read(bar, &Cpu0, 0).value(); -/// let cpu1_scratch_15 = CPU_SCRATCH::read(bar, &Cpu1, 15).value(); -/// -/// // This won't build. -/// // let cpu0_scratch_128 = CPU_SCRATCH::read(bar, &Cpu0, 128).value(); -/// -/// // Runtime-obtained array index. -/// let scratch_idx = get_scratch_idx(); -/// // Access on a runtime value returns an error if it is out-of-bounds. -/// let cpu0_some_scratch = CPU_SCRATCH::try_read(bar, &Cpu0, scratch_idx)?.value(); -/// -/// // `SCRATCH[8]` is used to convey the firmware exit code. -/// register!(CPU_FIRMWARE_STATUS => CpuCtlBase[CPU_SCRATCH[8]], -/// "Per-CPU firmware exit status code" { -/// 7:0 status as u8; -/// }); -/// -/// let cpu0_status = CPU_FIRMWARE_STATUS::read(bar, &Cpu0).status(); -/// -/// // Non-contiguous register arrays can be defined by adding a stride parameter. -/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the -/// // registers of the two declarations below are interleaved. -/// register!(CPU_SCRATCH_INTERLEAVED_0 @ CpuCtlBase[0x00000d00[16 ; 8]], -/// "Scratch registers bank 0" { -/// 31:0 value as u32; -/// }); -/// register!(CPU_SCRATCH_INTERLEAVED_1 @ CpuCtlBase[0x00000d04[16 ; 8]], -/// "Scratch registers bank 1" { -/// 31:0 value as u32; -/// }); -/// # Ok(()) -/// # } -/// ``` -macro_rules! register { - // Creates a register at a fixed offset of the MMIO space. - ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); - register!(@io_fixed $name @ $offset); - }; - - // Creates an alias register of fixed offset register `alias` with its own fields. - ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); - register!(@io_fixed $name @ $alias::OFFSET); - }; - - // Creates a register at a relative offset from a base address provider. - ($name:ident @ $base:ty [ $offset:literal ] $(, $comment:literal)? { $($fields:tt)* } ) => { - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); - register!(@io_relative $name @ $base [ $offset ]); - }; - - // Creates an alias register of relative offset register `alias` with its own fields. - ($name:ident => $base:ty [ $alias:ident ] $(, $comment:literal)? { $($fields:tt)* }) => { - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); - register!(@io_relative $name @ $base [ $alias::OFFSET ]); - }; - - // Creates an array of registers at a fixed offset of the MMIO space. - ( - $name:ident @ $offset:literal [ $size:expr ; $stride:expr ] $(, $comment:literal)? { - $($fields:tt)* - } - ) => { - static_assert!(::core::mem::size_of::() <= $stride); - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); - register!(@io_array $name @ $offset [ $size ; $stride ]); - }; - - // Shortcut for contiguous array of registers (stride == size of element). - ( - $name:ident @ $offset:literal [ $size:expr ] $(, $comment:literal)? { - $($fields:tt)* - } - ) => { - register!($name @ $offset [ $size ; ::core::mem::size_of::() ] $(, $comment)? { - $($fields)* - } ); - }; - - // Creates an array of registers at a relative offset from a base address provider. - ( - $name:ident @ $base:ty [ $offset:literal [ $size:expr ; $stride:expr ] ] - $(, $comment:literal)? { $($fields:tt)* } - ) => { - static_assert!(::core::mem::size_of::() <= $stride); - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); - register!(@io_relative_array $name @ $base [ $offset [ $size ; $stride ] ]); - }; - - // Shortcut for contiguous array of relative registers (stride == size of element). - ( - $name:ident @ $base:ty [ $offset:literal [ $size:expr ] ] $(, $comment:literal)? { - $($fields:tt)* - } - ) => { - register!($name @ $base [ $offset [ $size ; ::core::mem::size_of::() ] ] - $(, $comment)? { $($fields)* } ); - }; - - // Creates an alias of register `idx` of relative array of registers `alias` with its own - // fields. - ( - $name:ident => $base:ty [ $alias:ident [ $idx:expr ] ] $(, $comment:literal)? { - $($fields:tt)* - } - ) => { - static_assert!($idx < $alias::SIZE); - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); - register!(@io_relative $name @ $base [ $alias::OFFSET + $idx * $alias::STRIDE ] ); - }; - - // Creates an alias of register `idx` of array of registers `alias` with its own fields. - // This rule belongs to the (non-relative) register arrays set, but needs to be put last - // to avoid it being interpreted in place of the relative register array alias rule. - ($name:ident => $alias:ident [ $idx:expr ] $(, $comment:literal)? { $($fields:tt)* }) => { - static_assert!($idx < $alias::SIZE); - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); - register!(@io_fixed $name @ $alias::OFFSET + $idx * $alias::STRIDE ); - }; - - // Generates the IO accessors for a fixed offset register. - (@io_fixed $name:ident @ $offset:expr) => { - #[allow(dead_code)] - impl $name { - pub(crate) const OFFSET: usize = $offset; - - /// Read the register from its address in `io`. - #[inline(always)] - pub(crate) fn read(io: &T) -> Self where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - { - Self(io.read32($offset)) - } - - /// Write the value contained in `self` to the register address in `io`. - #[inline(always)] - pub(crate) fn write(self, io: &T) where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - { - io.write32(self.0, $offset) - } - - /// Read the register from its address in `io` and run `f` on its value to obtain a new - /// value to write back. - #[inline(always)] - pub(crate) fn update( - io: &T, - f: F, - ) where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - F: ::core::ops::FnOnce(Self) -> Self, - { - let reg = f(Self::read(io)); - reg.write(io); - } - } - }; - - // Generates the IO accessors for a relative offset register. - (@io_relative $name:ident @ $base:ty [ $offset:expr ]) => { - #[allow(dead_code)] - impl $name { - pub(crate) const OFFSET: usize = $offset; - - /// Read the register from `io`, using the base address provided by `base` and adding - /// the register's offset to it. - #[inline(always)] - pub(crate) fn read( - io: &T, - #[allow(unused_variables)] - base: &B, - ) -> Self where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - B: crate::regs::macros::RegisterBase<$base>, - { - const OFFSET: usize = $name::OFFSET; - - let value = io.read32( - >::BASE + OFFSET - ); - - Self(value) - } - - /// Write the value contained in `self` to `io`, using the base address provided by - /// `base` and adding the register's offset to it. - #[inline(always)] - pub(crate) fn write( - self, - io: &T, - #[allow(unused_variables)] - base: &B, - ) where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - B: crate::regs::macros::RegisterBase<$base>, - { - const OFFSET: usize = $name::OFFSET; - - io.write32( - self.0, - >::BASE + OFFSET - ); - } - - /// Read the register from `io`, using the base address provided by `base` and adding - /// the register's offset to it, then run `f` on its value to obtain a new value to - /// write back. - #[inline(always)] - pub(crate) fn update( - io: &T, - base: &B, - f: F, - ) where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - B: crate::regs::macros::RegisterBase<$base>, - F: ::core::ops::FnOnce(Self) -> Self, - { - let reg = f(Self::read(io, base)); - reg.write(io, base); - } - } - }; - - // Generates the IO accessors for an array of registers. - (@io_array $name:ident @ $offset:literal [ $size:expr ; $stride:expr ]) => { - #[allow(dead_code)] - impl $name { - pub(crate) const OFFSET: usize = $offset; - pub(crate) const SIZE: usize = $size; - pub(crate) const STRIDE: usize = $stride; - - /// Read the array register at index `idx` from its address in `io`. - #[inline(always)] - pub(crate) fn read( - io: &T, - idx: usize, - ) -> Self where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - { - build_assert!(idx < Self::SIZE); - - let offset = Self::OFFSET + (idx * Self::STRIDE); - let value = io.read32(offset); - - Self(value) - } - - /// Write the value contained in `self` to the array register with index `idx` in `io`. - #[inline(always)] - pub(crate) fn write( - self, - io: &T, - idx: usize - ) where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - { - build_assert!(idx < Self::SIZE); - - let offset = Self::OFFSET + (idx * Self::STRIDE); - - io.write32(self.0, offset); - } - - /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a - /// new value to write back. - #[inline(always)] - pub(crate) fn update( - io: &T, - idx: usize, - f: F, - ) where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - F: ::core::ops::FnOnce(Self) -> Self, - { - let reg = f(Self::read(io, idx)); - reg.write(io, idx); - } - - /// Read the array register at index `idx` from its address in `io`. - /// - /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the - /// access was out-of-bounds. - #[inline(always)] - pub(crate) fn try_read( - io: &T, - idx: usize, - ) -> ::kernel::error::Result where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - { - if idx < Self::SIZE { - Ok(Self::read(io, idx)) - } else { - Err(EINVAL) - } - } - - /// Write the value contained in `self` to the array register with index `idx` in `io`. - /// - /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the - /// access was out-of-bounds. - #[inline(always)] - pub(crate) fn try_write( - self, - io: &T, - idx: usize, - ) -> ::kernel::error::Result where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - { - if idx < Self::SIZE { - Ok(self.write(io, idx)) - } else { - Err(EINVAL) - } - } - - /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a - /// new value to write back. - /// - /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the - /// access was out-of-bounds. - #[inline(always)] - pub(crate) fn try_update( - io: &T, - idx: usize, - f: F, - ) -> ::kernel::error::Result where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - F: ::core::ops::FnOnce(Self) -> Self, - { - if idx < Self::SIZE { - Ok(Self::update(io, idx, f)) - } else { - Err(EINVAL) - } - } - } - }; - - // Generates the IO accessors for an array of relative registers. - ( - @io_relative_array $name:ident @ $base:ty - [ $offset:literal [ $size:expr ; $stride:expr ] ] - ) => { - #[allow(dead_code)] - impl $name { - pub(crate) const OFFSET: usize = $offset; - pub(crate) const SIZE: usize = $size; - pub(crate) const STRIDE: usize = $stride; - - /// Read the array register at index `idx` from `io`, using the base address provided - /// by `base` and adding the register's offset to it. - #[inline(always)] - pub(crate) fn read( - io: &T, - #[allow(unused_variables)] - base: &B, - idx: usize, - ) -> Self where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - B: crate::regs::macros::RegisterBase<$base>, - { - build_assert!(idx < Self::SIZE); - - let offset = >::BASE + - Self::OFFSET + (idx * Self::STRIDE); - let value = io.read32(offset); - - Self(value) - } - - /// Write the value contained in `self` to `io`, using the base address provided by - /// `base` and adding the offset of array register `idx` to it. - #[inline(always)] - pub(crate) fn write( - self, - io: &T, - #[allow(unused_variables)] - base: &B, - idx: usize - ) where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - B: crate::regs::macros::RegisterBase<$base>, - { - build_assert!(idx < Self::SIZE); - - let offset = >::BASE + - Self::OFFSET + (idx * Self::STRIDE); - - io.write32(self.0, offset); - } - - /// Read the array register at index `idx` from `io`, using the base address provided - /// by `base` and adding the register's offset to it, then run `f` on its value to - /// obtain a new value to write back. - #[inline(always)] - pub(crate) fn update( - io: &T, - base: &B, - idx: usize, - f: F, - ) where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - B: crate::regs::macros::RegisterBase<$base>, - F: ::core::ops::FnOnce(Self) -> Self, - { - let reg = f(Self::read(io, base, idx)); - reg.write(io, base, idx); - } - - /// Read the array register at index `idx` from `io`, using the base address provided - /// by `base` and adding the register's offset to it. - /// - /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the - /// access was out-of-bounds. - #[inline(always)] - pub(crate) fn try_read( - io: &T, - base: &B, - idx: usize, - ) -> ::kernel::error::Result where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - B: crate::regs::macros::RegisterBase<$base>, - { - if idx < Self::SIZE { - Ok(Self::read(io, base, idx)) - } else { - Err(EINVAL) - } - } - - /// Write the value contained in `self` to `io`, using the base address provided by - /// `base` and adding the offset of array register `idx` to it. - /// - /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the - /// access was out-of-bounds. - #[inline(always)] - pub(crate) fn try_write( - self, - io: &T, - base: &B, - idx: usize, - ) -> ::kernel::error::Result where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - B: crate::regs::macros::RegisterBase<$base>, - { - if idx < Self::SIZE { - Ok(self.write(io, base, idx)) - } else { - Err(EINVAL) - } - } - - /// Read the array register at index `idx` from `io`, using the base address provided - /// by `base` and adding the register's offset to it, then run `f` on its value to - /// obtain a new value to write back. - /// - /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the - /// access was out-of-bounds. - #[inline(always)] - pub(crate) fn try_update( - io: &T, - base: &B, - idx: usize, - f: F, - ) -> ::kernel::error::Result where - T: ::core::ops::Deref, - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable, - B: crate::regs::macros::RegisterBase<$base>, - F: ::core::ops::FnOnce(Self) -> Self, - { - if idx < Self::SIZE { - Ok(Self::update(io, base, idx, f)) - } else { - Err(EINVAL) - } - } - } - }; -} diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 083cc44aa952..eda8f50d3a3c 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -29,10 +29,12 @@ #include #include +#include #include #include #include #include +#include #include #include #include @@ -51,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -61,6 +64,7 @@ #include #include #include +#include #include #include #include @@ -146,6 +150,16 @@ const vm_flags_t RUST_CONST_HELPER_VM_MIXEDMAP = VM_MIXEDMAP; const vm_flags_t RUST_CONST_HELPER_VM_HUGEPAGE = VM_HUGEPAGE; const vm_flags_t RUST_CONST_HELPER_VM_NOHUGEPAGE = VM_NOHUGEPAGE; +#if IS_ENABLED(CONFIG_GPU_BUDDY) +const unsigned long RUST_CONST_HELPER_GPU_BUDDY_RANGE_ALLOCATION = GPU_BUDDY_RANGE_ALLOCATION; +const unsigned long RUST_CONST_HELPER_GPU_BUDDY_TOPDOWN_ALLOCATION = GPU_BUDDY_TOPDOWN_ALLOCATION; +const unsigned long RUST_CONST_HELPER_GPU_BUDDY_CONTIGUOUS_ALLOCATION = + GPU_BUDDY_CONTIGUOUS_ALLOCATION; +const unsigned long RUST_CONST_HELPER_GPU_BUDDY_CLEAR_ALLOCATION = GPU_BUDDY_CLEAR_ALLOCATION; +const unsigned long RUST_CONST_HELPER_GPU_BUDDY_CLEARED = GPU_BUDDY_CLEARED; +const unsigned long RUST_CONST_HELPER_GPU_BUDDY_TRIM_DISABLE = GPU_BUDDY_TRIM_DISABLE; +#endif + #if IS_ENABLED(CONFIG_ANDROID_BINDER_IPC_RUST) #include "../../drivers/android/binder/rust_binder.h" #include "../../drivers/android/binder/rust_binder_events.h" diff --git a/rust/helpers/device.c b/rust/helpers/device.c index a8ab931a9bd1..3be4ee590784 100644 --- a/rust/helpers/device.c +++ b/rust/helpers/device.c @@ -25,3 +25,8 @@ __rust_helper void rust_helper_dev_set_drvdata(struct device *dev, void *data) { dev_set_drvdata(dev, data); } + +__rust_helper const char *rust_helper_dev_name(const struct device *dev) +{ + return dev_name(dev); +} diff --git a/rust/helpers/dma-resv.c b/rust/helpers/dma-resv.c new file mode 100644 index 000000000000..71914d8241e2 --- /dev/null +++ b/rust/helpers/dma-resv.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +__rust_helper +int rust_helper_dma_resv_lock(struct dma_resv *obj, struct ww_acquire_ctx *ctx) +{ + return dma_resv_lock(obj, ctx); +} + +__rust_helper void rust_helper_dma_resv_unlock(struct dma_resv *obj) +{ + dma_resv_unlock(obj); +} diff --git a/rust/helpers/drm.c b/rust/helpers/drm.c index fe226f7b53ef..65f3f22b0e1d 100644 --- a/rust/helpers/drm.c +++ b/rust/helpers/drm.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #ifdef CONFIG_DRM @@ -21,4 +22,57 @@ rust_helper_drm_vma_node_offset_addr(struct drm_vma_offset_node *node) return drm_vma_node_offset_addr(node); } -#endif +#ifdef CONFIG_DRM_GEM_SHMEM_HELPER +__rust_helper void +rust_helper_drm_gem_shmem_object_free(struct drm_gem_object *obj) +{ + return drm_gem_shmem_object_free(obj); +} + +__rust_helper void +rust_helper_drm_gem_shmem_object_print_info(struct drm_printer *p, unsigned int indent, + const struct drm_gem_object *obj) +{ + drm_gem_shmem_object_print_info(p, indent, obj); +} + +__rust_helper int +rust_helper_drm_gem_shmem_object_pin(struct drm_gem_object *obj) +{ + return drm_gem_shmem_object_pin(obj); +} + +__rust_helper void +rust_helper_drm_gem_shmem_object_unpin(struct drm_gem_object *obj) +{ + drm_gem_shmem_object_unpin(obj); +} + +__rust_helper struct sg_table * +rust_helper_drm_gem_shmem_object_get_sg_table(struct drm_gem_object *obj) +{ + return drm_gem_shmem_object_get_sg_table(obj); +} + +__rust_helper int +rust_helper_drm_gem_shmem_object_vmap(struct drm_gem_object *obj, + struct iosys_map *map) +{ + return drm_gem_shmem_object_vmap(obj, map); +} + +__rust_helper void +rust_helper_drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, + struct iosys_map *map) +{ + drm_gem_shmem_object_vunmap(obj, map); +} + +__rust_helper int +rust_helper_drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + return drm_gem_shmem_object_mmap(obj, vma); +} + +#endif /* CONFIG_DRM_GEM_SHMEM_HELPER */ +#endif /* CONFIG_DRM */ diff --git a/rust/helpers/gpu.c b/rust/helpers/gpu.c new file mode 100644 index 000000000000..a25448d54d72 --- /dev/null +++ b/rust/helpers/gpu.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#ifdef CONFIG_GPU_BUDDY + +__rust_helper u64 rust_helper_gpu_buddy_block_offset(const struct gpu_buddy_block *block) +{ + return gpu_buddy_block_offset(block); +} + +__rust_helper unsigned int rust_helper_gpu_buddy_block_order(struct gpu_buddy_block *block) +{ + return gpu_buddy_block_order(block); +} + +#endif /* CONFIG_GPU_BUDDY */ diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index a3c42e51f00a..b6b20ad2e0e6 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -28,13 +28,16 @@ #include "cred.c" #include "device.c" #include "dma.c" +#include "dma-resv.c" #include "drm.c" #include "err.c" #include "irq.c" #include "fs.c" +#include "gpu.c" #include "io.c" #include "jump_label.c" #include "kunit.c" +#include "list.c" #include "maple_tree.c" #include "mm.c" #include "mutex.c" diff --git a/rust/helpers/list.c b/rust/helpers/list.c new file mode 100644 index 000000000000..18095a5593c5 --- /dev/null +++ b/rust/helpers/list.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Helpers for C circular doubly linked list implementation. + */ + +#include + +__rust_helper void rust_helper_INIT_LIST_HEAD(struct list_head *list) +{ + INIT_LIST_HEAD(list); +} + +__rust_helper void rust_helper_list_add_tail(struct list_head *new, struct list_head *head) +{ + list_add_tail(new, head); +} diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 94e0548e7687..6d5396a43ebe 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -489,6 +489,17 @@ impl Device { // defined as a `#[repr(transparent)]` wrapper around `fwnode_handle`. Some(unsafe { &*fwnode_handle.cast() }) } + + /// Returns the name of the device. + /// + /// This is the kobject name of the device, or its initial name if the kobject is not yet + /// available. + #[inline] + pub fn name(&self) -> &CStr { + // SAFETY: By its type invariant `self.as_raw()` is a valid pointer to a `struct device`. + // The returned string is valid for the lifetime of the device. + unsafe { CStr::from_char_ptr(bindings::dev_name(self.as_raw())) } + } } // SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s generic @@ -575,7 +586,7 @@ pub struct CoreInternal; /// The bound context indicates that for the entire duration of the lifetime of a [`Device`] /// reference, the [`Device`] is guaranteed to be bound to a driver. /// -/// Some APIs, such as [`dma::CoherentAllocation`] or [`Devres`] rely on the [`Device`] to be bound, +/// Some APIs, such as [`dma::Coherent`] or [`Devres`] rely on the [`Device`] to be bound, /// which can be proven with the [`Bound`] device context. /// /// Any abstraction that can guarantee a scope where the corresponding bus device is bound, should @@ -584,7 +595,7 @@ pub struct CoreInternal; /// /// [`Devres`]: kernel::devres::Devres /// [`Devres::access`]: kernel::devres::Devres::access -/// [`dma::CoherentAllocation`]: kernel::dma::CoherentAllocation +/// [`dma::Coherent`]: kernel::dma::Coherent pub struct Bound; mod private { diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index a396f8435739..4995ee5dc689 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -5,14 +5,31 @@ //! C header: [`include/linux/dma-mapping.h`](srctree/include/linux/dma-mapping.h) use crate::{ - bindings, build_assert, device, - device::{Bound, Core}, - error::{to_result, Result}, + bindings, + debugfs, + device::{ + self, + Bound, + Core, // + }, + error::to_result, + fs::file, prelude::*, + ptr::KnownSize, sync::aref::ARef, - transmute::{AsBytes, FromBytes}, + transmute::{ + AsBytes, + FromBytes, // + }, // + uaccess::UserSliceWriter, +}; +use core::{ + ops::{ + Deref, + DerefMut, // + }, + ptr::NonNull, // }; -use core::ptr::NonNull; /// DMA address type. /// @@ -39,7 +56,7 @@ pub trait Device: AsRef> { /// # Safety /// /// This method must not be called concurrently with any DMA allocation or mapping primitives, - /// such as [`CoherentAllocation::alloc_attrs`]. + /// such as [`Coherent::zeroed`]. unsafe fn dma_set_mask(&self, mask: DmaMask) -> Result { // SAFETY: // - By the type invariant of `device::Device`, `self.as_ref().as_raw()` is valid. @@ -56,7 +73,7 @@ pub trait Device: AsRef> { /// # Safety /// /// This method must not be called concurrently with any DMA allocation or mapping primitives, - /// such as [`CoherentAllocation::alloc_attrs`]. + /// such as [`Coherent::zeroed`]. unsafe fn dma_set_coherent_mask(&self, mask: DmaMask) -> Result { // SAFETY: // - By the type invariant of `device::Device`, `self.as_ref().as_raw()` is valid. @@ -75,7 +92,7 @@ pub trait Device: AsRef> { /// # Safety /// /// This method must not be called concurrently with any DMA allocation or mapping primitives, - /// such as [`CoherentAllocation::alloc_attrs`]. + /// such as [`Coherent::zeroed`]. unsafe fn dma_set_mask_and_coherent(&self, mask: DmaMask) -> Result { // SAFETY: // - By the type invariant of `device::Device`, `self.as_ref().as_raw()` is valid. @@ -94,7 +111,7 @@ pub trait Device: AsRef> { /// # Safety /// /// This method must not be called concurrently with any DMA allocation or mapping primitives, - /// such as [`CoherentAllocation::alloc_attrs`]. + /// such as [`Coherent::zeroed`]. unsafe fn dma_set_max_seg_size(&self, size: u32) { // SAFETY: // - By the type invariant of `device::Device`, `self.as_ref().as_raw()` is valid. @@ -194,12 +211,12 @@ impl DmaMask { /// /// ``` /// # use kernel::device::{Bound, Device}; -/// use kernel::dma::{attrs::*, CoherentAllocation}; +/// use kernel::dma::{attrs::*, Coherent}; /// /// # fn test(dev: &Device) -> Result { /// let attribs = DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NO_WARN; -/// let c: CoherentAllocation = -/// CoherentAllocation::alloc_attrs(dev, 4, GFP_KERNEL, attribs)?; +/// let c: Coherent<[u64]> = +/// Coherent::zeroed_slice_with_attrs(dev, 4, GFP_KERNEL, attribs)?; /// # Ok::<(), Error>(()) } /// ``` #[derive(Clone, Copy, PartialEq)] @@ -250,9 +267,6 @@ pub mod attrs { /// Specifies that writes to the mapping may be buffered to improve performance. pub const DMA_ATTR_WRITE_COMBINE: Attrs = Attrs(bindings::DMA_ATTR_WRITE_COMBINE); - /// Lets the platform to avoid creating a kernel virtual mapping for the allocated buffer. - pub const DMA_ATTR_NO_KERNEL_MAPPING: Attrs = Attrs(bindings::DMA_ATTR_NO_KERNEL_MAPPING); - /// Allows platform code to skip synchronization of the CPU cache for the given buffer assuming /// that it has been already transferred to 'device' domain. pub const DMA_ATTR_SKIP_CPU_SYNC: Attrs = Attrs(bindings::DMA_ATTR_SKIP_CPU_SYNC); @@ -344,23 +358,228 @@ impl From for bindings::dma_data_direction { } } +/// CPU-owned DMA allocation that can be converted into a device-shared [`Coherent`] object. +/// +/// Unlike [`Coherent`], a [`CoherentBox`] is guaranteed to be fully owned by the CPU -- its DMA +/// address is not exposed and it cannot be accessed by a device. This means it can safely be used +/// like a normal boxed allocation (e.g. direct reads, writes, and mutable slices are all safe). +/// +/// A typical use is to allocate a [`CoherentBox`], populate it with normal CPU access, and then +/// convert it into a [`Coherent`] object to share it with the device. +/// +/// # Examples +/// +/// `CoherentBox`: +/// +/// ``` +/// # use kernel::device::{ +/// # Bound, +/// # Device, +/// # }; +/// use kernel::dma::{attrs::*, +/// Coherent, +/// CoherentBox, +/// }; +/// +/// # fn test(dev: &Device) -> Result { +/// let mut dmem: CoherentBox = CoherentBox::zeroed(dev, GFP_KERNEL)?; +/// *dmem = 42; +/// let dmem: Coherent = dmem.into(); +/// # Ok::<(), Error>(()) } +/// ``` +/// +/// `CoherentBox<[T]>`: +/// +/// +/// ``` +/// # use kernel::device::{ +/// # Bound, +/// # Device, +/// # }; +/// use kernel::dma::{attrs::*, +/// Coherent, +/// CoherentBox, +/// }; +/// +/// # fn test(dev: &Device) -> Result { +/// let mut dmem: CoherentBox<[u64]> = CoherentBox::zeroed_slice(dev, 4, GFP_KERNEL)?; +/// dmem.fill(42); +/// let dmem: Coherent<[u64]> = dmem.into(); +/// # Ok::<(), Error>(()) } +/// ``` +pub struct CoherentBox(Coherent); + +impl CoherentBox<[T]> { + /// [`CoherentBox`] variant of [`Coherent::zeroed_slice_with_attrs`]. + #[inline] + pub fn zeroed_slice_with_attrs( + dev: &device::Device, + count: usize, + gfp_flags: kernel::alloc::Flags, + dma_attrs: Attrs, + ) -> Result { + Coherent::zeroed_slice_with_attrs(dev, count, gfp_flags, dma_attrs).map(Self) + } + + /// Same as [CoherentBox::zeroed_slice_with_attrs], but with `dma::Attrs(0)`. + #[inline] + pub fn zeroed_slice( + dev: &device::Device, + count: usize, + gfp_flags: kernel::alloc::Flags, + ) -> Result { + Self::zeroed_slice_with_attrs(dev, count, gfp_flags, Attrs(0)) + } + + /// Initializes the element at `i` using the given initializer. + /// + /// Returns `EINVAL` if `i` is out of bounds. + pub fn init_at(&mut self, i: usize, init: impl Init) -> Result + where + Error: From, + { + if i >= self.0.len() { + return Err(EINVAL); + } + + let ptr = &raw mut self[i]; + + // SAFETY: + // - `ptr` is valid, properly aligned, and within this allocation. + // - `T: AsBytes + FromBytes` guarantees all bit patterns are valid, so partial writes on + // error cannot leave the element in an invalid state. + // - The DMA address has not been exposed yet, so there is no concurrent device access. + unsafe { init.__init(ptr)? }; + + Ok(()) + } + + /// Allocates a region of coherent memory of the same size as `data` and initializes it with a + /// copy of its contents. + /// + /// This is the [`CoherentBox`] variant of [`Coherent::from_slice_with_attrs`]. + /// + /// # Examples + /// + /// ``` + /// use core::ops::Deref; + /// + /// # use kernel::device::{Bound, Device}; + /// use kernel::dma::{ + /// attrs::*, + /// CoherentBox + /// }; + /// + /// # fn test(dev: &Device) -> Result { + /// let data = [0u8, 1u8, 2u8, 3u8]; + /// let c: CoherentBox<[u8]> = + /// CoherentBox::from_slice_with_attrs(dev, &data, GFP_KERNEL, DMA_ATTR_NO_WARN)?; + /// + /// assert_eq!(c.deref(), &data); + /// # Ok::<(), Error>(()) } + /// ``` + pub fn from_slice_with_attrs( + dev: &device::Device, + data: &[T], + gfp_flags: kernel::alloc::Flags, + dma_attrs: Attrs, + ) -> Result + where + T: Copy, + { + let mut slice = Self(Coherent::::alloc_slice_with_attrs( + dev, + data.len(), + gfp_flags, + dma_attrs, + )?); + + // PANIC: `slice` was created with length `data.len()`. + slice.copy_from_slice(data); + + Ok(slice) + } + + /// Performs the same functionality as [`CoherentBox::from_slice_with_attrs`], except the + /// `dma_attrs` is 0 by default. + #[inline] + pub fn from_slice( + dev: &device::Device, + data: &[T], + gfp_flags: kernel::alloc::Flags, + ) -> Result + where + T: Copy, + { + Self::from_slice_with_attrs(dev, data, gfp_flags, Attrs(0)) + } +} + +impl CoherentBox { + /// Same as [`CoherentBox::zeroed_slice_with_attrs`], but for a single element. + #[inline] + pub fn zeroed_with_attrs( + dev: &device::Device, + gfp_flags: kernel::alloc::Flags, + dma_attrs: Attrs, + ) -> Result { + Coherent::zeroed_with_attrs(dev, gfp_flags, dma_attrs).map(Self) + } + + /// Same as [`CoherentBox::zeroed_slice`], but for a single element. + #[inline] + pub fn zeroed(dev: &device::Device, gfp_flags: kernel::alloc::Flags) -> Result { + Self::zeroed_with_attrs(dev, gfp_flags, Attrs(0)) + } +} + +impl Deref for CoherentBox { + type Target = T; + + #[inline] + fn deref(&self) -> &Self::Target { + // SAFETY: + // - We have not exposed the DMA address yet, so there can't be any concurrent access by a + // device. + // - We have exclusive access to `self.0`. + unsafe { self.0.as_ref() } + } +} + +impl DerefMut for CoherentBox { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + // SAFETY: + // - We have not exposed the DMA address yet, so there can't be any concurrent access by a + // device. + // - We have exclusive access to `self.0`. + unsafe { self.0.as_mut() } + } +} + +impl From> for Coherent { + #[inline] + fn from(value: CoherentBox) -> Self { + value.0 + } +} + /// An abstraction of the `dma_alloc_coherent` API. /// /// This is an abstraction around the `dma_alloc_coherent` API which is used to allocate and map /// large coherent DMA regions. /// -/// A [`CoherentAllocation`] instance contains a pointer to the allocated region (in the +/// A [`Coherent`] instance contains a pointer to the allocated region (in the /// processor's virtual address space) and the device address which can be given to the device -/// as the DMA address base of the region. The region is released once [`CoherentAllocation`] +/// as the DMA address base of the region. The region is released once [`Coherent`] /// is dropped. /// /// # Invariants /// -/// - For the lifetime of an instance of [`CoherentAllocation`], the `cpu_addr` is a valid pointer +/// - For the lifetime of an instance of [`Coherent`], the `cpu_addr` is a valid pointer /// to an allocated region of coherent memory and `dma_handle` is the DMA address base of the /// region. -/// - The size in bytes of the allocation is equal to `size_of:: * count`. -/// - `size_of:: * count` fits into a `usize`. +/// - The size in bytes of the allocation is equal to size information via pointer. // TODO // // DMA allocations potentially carry device resources (e.g.IOMMU mappings), hence for soundness @@ -371,155 +590,43 @@ impl From for bindings::dma_data_direction { // allocation from surviving device unbind; it would require RCU read side critical sections to // access the memory, which may require subsequent unnecessary copies. // -// Hence, find a way to revoke the device resources of a `CoherentAllocation`, but not the -// entire `CoherentAllocation` including the allocated memory itself. -pub struct CoherentAllocation { +// Hence, find a way to revoke the device resources of a `Coherent`, but not the +// entire `Coherent` including the allocated memory itself. +pub struct Coherent { dev: ARef, dma_handle: DmaAddress, - count: usize, cpu_addr: NonNull, dma_attrs: Attrs, } -impl CoherentAllocation { - /// Allocates a region of `size_of:: * count` of coherent memory. - /// - /// # Examples - /// - /// ``` - /// # use kernel::device::{Bound, Device}; - /// use kernel::dma::{attrs::*, CoherentAllocation}; - /// - /// # fn test(dev: &Device) -> Result { - /// let c: CoherentAllocation = - /// CoherentAllocation::alloc_attrs(dev, 4, GFP_KERNEL, DMA_ATTR_NO_WARN)?; - /// # Ok::<(), Error>(()) } - /// ``` - pub fn alloc_attrs( - dev: &device::Device, - count: usize, - gfp_flags: kernel::alloc::Flags, - dma_attrs: Attrs, - ) -> Result> { - build_assert!( - core::mem::size_of::() > 0, - "It doesn't make sense for the allocated type to be a ZST" - ); - - let size = count - .checked_mul(core::mem::size_of::()) - .ok_or(EOVERFLOW)?; - let mut dma_handle = 0; - // SAFETY: Device pointer is guaranteed as valid by the type invariant on `Device`. - let addr = unsafe { - bindings::dma_alloc_attrs( - dev.as_raw(), - size, - &mut dma_handle, - gfp_flags.as_raw(), - dma_attrs.as_raw(), - ) - }; - let addr = NonNull::new(addr).ok_or(ENOMEM)?; - // INVARIANT: - // - We just successfully allocated a coherent region which is accessible for - // `count` elements, hence the cpu address is valid. We also hold a refcounted reference - // to the device. - // - The allocated `size` is equal to `size_of:: * count`. - // - The allocated `size` fits into a `usize`. - Ok(Self { - dev: dev.into(), - dma_handle, - count, - cpu_addr: addr.cast(), - dma_attrs, - }) - } - - /// Performs the same functionality as [`CoherentAllocation::alloc_attrs`], except the - /// `dma_attrs` is 0 by default. - pub fn alloc_coherent( - dev: &device::Device, - count: usize, - gfp_flags: kernel::alloc::Flags, - ) -> Result> { - CoherentAllocation::alloc_attrs(dev, count, gfp_flags, Attrs(0)) - } - - /// Returns the number of elements `T` in this allocation. - /// - /// Note that this is not the size of the allocation in bytes, which is provided by - /// [`Self::size`]. - pub fn count(&self) -> usize { - self.count - } - +impl Coherent { /// Returns the size in bytes of this allocation. + #[inline] pub fn size(&self) -> usize { - // INVARIANT: The type invariant of `Self` guarantees that `size_of:: * count` fits into - // a `usize`. - self.count * core::mem::size_of::() + T::size(self.cpu_addr.as_ptr()) } /// Returns the raw pointer to the allocated region in the CPU's virtual address space. #[inline] - pub fn as_ptr(&self) -> *const [T] { - core::ptr::slice_from_raw_parts(self.cpu_addr.as_ptr(), self.count) + pub fn as_ptr(&self) -> *const T { + self.cpu_addr.as_ptr() } /// Returns the raw pointer to the allocated region in the CPU's virtual address space as /// a mutable pointer. #[inline] - pub fn as_mut_ptr(&self) -> *mut [T] { - core::ptr::slice_from_raw_parts_mut(self.cpu_addr.as_ptr(), self.count) - } - - /// Returns the base address to the allocated region in the CPU's virtual address space. - pub fn start_ptr(&self) -> *const T { - self.cpu_addr.as_ptr() - } - - /// Returns the base address to the allocated region in the CPU's virtual address space as - /// a mutable pointer. - pub fn start_ptr_mut(&mut self) -> *mut T { + pub fn as_mut_ptr(&self) -> *mut T { self.cpu_addr.as_ptr() } /// Returns a DMA handle which may be given to the device as the DMA address base of /// the region. + #[inline] pub fn dma_handle(&self) -> DmaAddress { self.dma_handle } - /// Returns a DMA handle starting at `offset` (in units of `T`) which may be given to the - /// device as the DMA address base of the region. - /// - /// Returns `EINVAL` if `offset` is not within the bounds of the allocation. - pub fn dma_handle_with_offset(&self, offset: usize) -> Result { - if offset >= self.count { - Err(EINVAL) - } else { - // INVARIANT: The type invariant of `Self` guarantees that `size_of:: * count` fits - // into a `usize`, and `offset` is inferior to `count`. - Ok(self.dma_handle + (offset * core::mem::size_of::()) as DmaAddress) - } - } - - /// Common helper to validate a range applied from the allocated region in the CPU's virtual - /// address space. - fn validate_range(&self, offset: usize, count: usize) -> Result { - if offset.checked_add(count).ok_or(EOVERFLOW)? > self.count { - return Err(EINVAL); - } - Ok(()) - } - - /// Returns the data from the region starting from `offset` as a slice. - /// `offset` and `count` are in units of `T`, not the number of bytes. - /// - /// For ringbuffer type of r/w access or use-cases where the pointer to the live data is needed, - /// [`CoherentAllocation::start_ptr`] or [`CoherentAllocation::start_ptr_mut`] could be used - /// instead. + /// Returns a reference to the data in the region. /// /// # Safety /// @@ -527,19 +634,13 @@ impl CoherentAllocation { /// slice is live. /// * Callers must ensure that this call does not race with a write to the same region while /// the returned slice is live. - pub unsafe fn as_slice(&self, offset: usize, count: usize) -> Result<&[T]> { - self.validate_range(offset, count)?; - // SAFETY: - // - The pointer is valid due to type invariant on `CoherentAllocation`, - // we've just checked that the range and index is within bounds. The immutability of the - // data is also guaranteed by the safety requirements of the function. - // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked - // that `self.count` won't overflow early in the constructor. - Ok(unsafe { core::slice::from_raw_parts(self.start_ptr().add(offset), count) }) + #[inline] + pub unsafe fn as_ref(&self) -> &T { + // SAFETY: per safety requirement. + unsafe { &*self.as_ptr() } } - /// Performs the same functionality as [`CoherentAllocation::as_slice`], except that a mutable - /// slice is returned. + /// Returns a mutable reference to the data in the region. /// /// # Safety /// @@ -547,51 +648,11 @@ impl CoherentAllocation { /// slice is live. /// * Callers must ensure that this call does not race with a read or write to the same region /// while the returned slice is live. - pub unsafe fn as_slice_mut(&mut self, offset: usize, count: usize) -> Result<&mut [T]> { - self.validate_range(offset, count)?; - // SAFETY: - // - The pointer is valid due to type invariant on `CoherentAllocation`, - // we've just checked that the range and index is within bounds. The immutability of the - // data is also guaranteed by the safety requirements of the function. - // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked - // that `self.count` won't overflow early in the constructor. - Ok(unsafe { core::slice::from_raw_parts_mut(self.start_ptr_mut().add(offset), count) }) - } - - /// Writes data to the region starting from `offset`. `offset` is in units of `T`, not the - /// number of bytes. - /// - /// # Safety - /// - /// * Callers must ensure that this call does not race with a read or write to the same region - /// that overlaps with this write. - /// - /// # Examples - /// - /// ``` - /// # fn test(alloc: &mut kernel::dma::CoherentAllocation) -> Result { - /// let somedata: [u8; 4] = [0xf; 4]; - /// let buf: &[u8] = &somedata; - /// // SAFETY: There is no concurrent HW operation on the device and no other R/W access to the - /// // region. - /// unsafe { alloc.write(buf, 0)?; } - /// # Ok::<(), Error>(()) } - /// ``` - pub unsafe fn write(&mut self, src: &[T], offset: usize) -> Result { - self.validate_range(offset, src.len())?; - // SAFETY: - // - The pointer is valid due to type invariant on `CoherentAllocation` - // and we've just checked that the range and index is within bounds. - // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked - // that `self.count` won't overflow early in the constructor. - unsafe { - core::ptr::copy_nonoverlapping( - src.as_ptr(), - self.start_ptr_mut().add(offset), - src.len(), - ) - }; - Ok(()) + #[expect(clippy::mut_from_ref, reason = "unsafe to use API")] + #[inline] + pub unsafe fn as_mut(&self) -> &mut T { + // SAFETY: per safety requirement. + unsafe { &mut *self.as_mut_ptr() } } /// Reads the value of `field` and ensures that its type is [`FromBytes`]. @@ -641,18 +702,276 @@ impl CoherentAllocation { } } +impl Coherent { + /// Allocates a region of `T` of coherent memory. + fn alloc_with_attrs( + dev: &device::Device, + gfp_flags: kernel::alloc::Flags, + dma_attrs: Attrs, + ) -> Result { + const { + assert!( + core::mem::size_of::() > 0, + "It doesn't make sense for the allocated type to be a ZST" + ); + } + + let mut dma_handle = 0; + // SAFETY: Device pointer is guaranteed as valid by the type invariant on `Device`. + let addr = unsafe { + bindings::dma_alloc_attrs( + dev.as_raw(), + core::mem::size_of::(), + &mut dma_handle, + gfp_flags.as_raw(), + dma_attrs.as_raw(), + ) + }; + let cpu_addr = NonNull::new(addr.cast()).ok_or(ENOMEM)?; + // INVARIANT: + // - We just successfully allocated a coherent region which is adequately sized for `T`, + // hence the cpu address is valid. + // - We also hold a refcounted reference to the device. + Ok(Self { + dev: dev.into(), + dma_handle, + cpu_addr, + dma_attrs, + }) + } + + /// Allocates a region of type `T` of coherent memory. + /// + /// # Examples + /// + /// ``` + /// # use kernel::device::{ + /// # Bound, + /// # Device, + /// # }; + /// use kernel::dma::{ + /// attrs::*, + /// Coherent, + /// }; + /// + /// # fn test(dev: &Device) -> Result { + /// let c: Coherent<[u64; 4]> = + /// Coherent::zeroed_with_attrs(dev, GFP_KERNEL, DMA_ATTR_NO_WARN)?; + /// # Ok::<(), Error>(()) } + /// ``` + #[inline] + pub fn zeroed_with_attrs( + dev: &device::Device, + gfp_flags: kernel::alloc::Flags, + dma_attrs: Attrs, + ) -> Result { + Self::alloc_with_attrs(dev, gfp_flags | __GFP_ZERO, dma_attrs) + } + + /// Performs the same functionality as [`Coherent::zeroed_with_attrs`], except the + /// `dma_attrs` is 0 by default. + #[inline] + pub fn zeroed(dev: &device::Device, gfp_flags: kernel::alloc::Flags) -> Result { + Self::zeroed_with_attrs(dev, gfp_flags, Attrs(0)) + } + + /// Same as [`Coherent::zeroed_with_attrs`], but instead of a zero-initialization the memory is + /// initialized with `init`. + pub fn init_with_attrs( + dev: &device::Device, + gfp_flags: kernel::alloc::Flags, + dma_attrs: Attrs, + init: impl Init, + ) -> Result + where + Error: From, + { + let dmem = Self::alloc_with_attrs(dev, gfp_flags, dma_attrs)?; + let ptr = dmem.as_mut_ptr(); + + // SAFETY: + // - `ptr` is valid, properly aligned, and points to exclusively owned memory. + // - If `__init` fails, `self` is dropped, which safely frees the underlying `Coherent`'s + // DMA memory. `T: AsBytes + FromBytes` ensures there are no complex `Drop` requirements + // we are bypassing. + unsafe { init.__init(ptr)? }; + + Ok(dmem) + } + + /// Same as [`Coherent::zeroed`], but instead of a zero-initialization the memory is initialized + /// with `init`. + #[inline] + pub fn init( + dev: &device::Device, + gfp_flags: kernel::alloc::Flags, + init: impl Init, + ) -> Result + where + Error: From, + { + Self::init_with_attrs(dev, gfp_flags, Attrs(0), init) + } + + /// Allocates a region of `[T; len]` of coherent memory. + fn alloc_slice_with_attrs( + dev: &device::Device, + len: usize, + gfp_flags: kernel::alloc::Flags, + dma_attrs: Attrs, + ) -> Result> { + const { + assert!( + core::mem::size_of::() > 0, + "It doesn't make sense for the allocated type to be a ZST" + ); + } + + // `dma_alloc_attrs` cannot handle zero-length allocation, bail early. + if len == 0 { + Err(EINVAL)?; + } + + let size = core::mem::size_of::().checked_mul(len).ok_or(ENOMEM)?; + let mut dma_handle = 0; + // SAFETY: Device pointer is guaranteed as valid by the type invariant on `Device`. + let addr = unsafe { + bindings::dma_alloc_attrs( + dev.as_raw(), + size, + &mut dma_handle, + gfp_flags.as_raw(), + dma_attrs.as_raw(), + ) + }; + let cpu_addr = NonNull::slice_from_raw_parts(NonNull::new(addr.cast()).ok_or(ENOMEM)?, len); + // INVARIANT: + // - We just successfully allocated a coherent region which is adequately sized for + // `[T; len]`, hence the cpu address is valid. + // - We also hold a refcounted reference to the device. + Ok(Coherent { + dev: dev.into(), + dma_handle, + cpu_addr, + dma_attrs, + }) + } + + /// Allocates a zeroed region of type `T` of coherent memory. + /// + /// Unlike `Coherent::<[T; N]>::zeroed_with_attrs`, `Coherent::::zeroed_slices` support + /// a runtime length. + /// + /// # Examples + /// + /// ``` + /// # use kernel::device::{ + /// # Bound, + /// # Device, + /// # }; + /// use kernel::dma::{ + /// attrs::*, + /// Coherent, + /// }; + /// + /// # fn test(dev: &Device) -> Result { + /// let c: Coherent<[u64]> = + /// Coherent::zeroed_slice_with_attrs(dev, 4, GFP_KERNEL, DMA_ATTR_NO_WARN)?; + /// # Ok::<(), Error>(()) } + /// ``` + #[inline] + pub fn zeroed_slice_with_attrs( + dev: &device::Device, + len: usize, + gfp_flags: kernel::alloc::Flags, + dma_attrs: Attrs, + ) -> Result> { + Coherent::alloc_slice_with_attrs(dev, len, gfp_flags | __GFP_ZERO, dma_attrs) + } + + /// Performs the same functionality as [`Coherent::zeroed_slice_with_attrs`], except the + /// `dma_attrs` is 0 by default. + #[inline] + pub fn zeroed_slice( + dev: &device::Device, + len: usize, + gfp_flags: kernel::alloc::Flags, + ) -> Result> { + Self::zeroed_slice_with_attrs(dev, len, gfp_flags, Attrs(0)) + } + + /// Allocates a region of coherent memory of the same size as `data` and initializes it with a + /// copy of its contents. + /// + /// # Examples + /// + /// ``` + /// # use kernel::device::{Bound, Device}; + /// use kernel::dma::{ + /// attrs::*, + /// Coherent + /// }; + /// + /// # fn test(dev: &Device) -> Result { + /// let data = [0u8, 1u8, 2u8, 3u8]; + /// // `c` has the same content as `data`. + /// let c: Coherent<[u8]> = + /// Coherent::from_slice_with_attrs(dev, &data, GFP_KERNEL, DMA_ATTR_NO_WARN)?; + /// + /// # Ok::<(), Error>(()) } + /// ``` + #[inline] + pub fn from_slice_with_attrs( + dev: &device::Device, + data: &[T], + gfp_flags: kernel::alloc::Flags, + dma_attrs: Attrs, + ) -> Result> + where + T: Copy, + { + CoherentBox::from_slice_with_attrs(dev, data, gfp_flags, dma_attrs).map(Into::into) + } + + /// Performs the same functionality as [`Coherent::from_slice_with_attrs`], except the + /// `dma_attrs` is 0 by default. + #[inline] + pub fn from_slice( + dev: &device::Device, + data: &[T], + gfp_flags: kernel::alloc::Flags, + ) -> Result> + where + T: Copy, + { + Self::from_slice_with_attrs(dev, data, gfp_flags, Attrs(0)) + } +} + +impl Coherent<[T]> { + /// Returns the number of elements `T` in this allocation. + /// + /// Note that this is not the size of the allocation in bytes, which is provided by + /// [`Self::size`]. + #[inline] + #[expect(clippy::len_without_is_empty, reason = "Coherent slice is never empty")] + pub fn len(&self) -> usize { + self.cpu_addr.len() + } +} + /// Note that the device configured to do DMA must be halted before this object is dropped. -impl Drop for CoherentAllocation { +impl Drop for Coherent { fn drop(&mut self) { - let size = self.count * core::mem::size_of::(); + let size = T::size(self.cpu_addr.as_ptr()); // SAFETY: Device pointer is guaranteed as valid by the type invariant on `Device`. // The cpu address, and the dma handle are valid due to the type invariants on - // `CoherentAllocation`. + // `Coherent`. unsafe { bindings::dma_free_attrs( self.dev.as_raw(), size, - self.start_ptr_mut().cast(), + self.cpu_addr.as_ptr().cast(), self.dma_handle, self.dma_attrs.as_raw(), ) @@ -660,20 +979,170 @@ impl Drop for CoherentAllocation { } } -// SAFETY: It is safe to send a `CoherentAllocation` to another thread if `T` +// SAFETY: It is safe to send a `Coherent` to another thread if `T` // can be sent to another thread. -unsafe impl Send for CoherentAllocation {} +unsafe impl Send for Coherent {} + +// SAFETY: Sharing `&Coherent` across threads is safe if `T` is `Sync`, because all +// methods that access the buffer contents (`field_read`, `field_write`, `as_slice`, +// `as_slice_mut`) are `unsafe`, and callers are responsible for ensuring no data races occur. +// The safe methods only return metadata or raw pointers whose use requires `unsafe`. +unsafe impl Sync for Coherent {} + +impl debugfs::BinaryWriter for Coherent { + fn write_to_slice( + &self, + writer: &mut UserSliceWriter, + offset: &mut file::Offset, + ) -> Result { + if offset.is_negative() { + return Err(EINVAL); + } + + // If the offset is too large for a usize (e.g. on 32-bit platforms), + // then consider that as past EOF and just return 0 bytes. + let Ok(offset_val) = usize::try_from(*offset) else { + return Ok(0); + }; + + let count = self.size().saturating_sub(offset_val).min(writer.len()); + + writer.write_dma(self, offset_val, count)?; + + *offset += count as i64; + Ok(count) + } +} + +/// An opaque DMA allocation without a kernel virtual mapping. +/// +/// Unlike [`Coherent`], a `CoherentHandle` does not provide CPU access to the allocated memory. +/// The allocation is always performed with `DMA_ATTR_NO_KERNEL_MAPPING`, meaning no kernel +/// virtual mapping is created for the buffer. The value returned by the C API as the CPU +/// address is an opaque handle used only to free the allocation. +/// +/// This is useful for buffers that are only ever accessed by hardware. +/// +/// # Invariants +/// +/// - `cpu_handle` holds the opaque handle returned by `dma_alloc_attrs` with +/// `DMA_ATTR_NO_KERNEL_MAPPING` set, and is only valid for passing back to `dma_free_attrs`. +/// - `dma_handle` is the corresponding bus address for device DMA. +/// - `size` is the allocation size in bytes as passed to `dma_alloc_attrs`. +/// - `dma_attrs` contains the attributes used for the allocation, always including +/// `DMA_ATTR_NO_KERNEL_MAPPING`. +pub struct CoherentHandle { + dev: ARef, + dma_handle: DmaAddress, + cpu_handle: NonNull, + size: usize, + dma_attrs: Attrs, +} + +impl CoherentHandle { + /// Allocates `size` bytes of coherent DMA memory without creating a kernel virtual mapping. + /// + /// Additional DMA attributes may be passed via `dma_attrs`; `DMA_ATTR_NO_KERNEL_MAPPING` is + /// always set implicitly. + /// + /// Returns `EINVAL` if `size` is zero, `ENOMEM` if the allocation fails. + pub fn alloc_with_attrs( + dev: &device::Device, + size: usize, + gfp_flags: kernel::alloc::Flags, + dma_attrs: Attrs, + ) -> Result { + if size == 0 { + return Err(EINVAL); + } + + let dma_attrs = dma_attrs | Attrs(bindings::DMA_ATTR_NO_KERNEL_MAPPING); + let mut dma_handle = 0; + // SAFETY: `dev.as_raw()` is valid by the type invariant on `device::Device`. + let cpu_handle = unsafe { + bindings::dma_alloc_attrs( + dev.as_raw(), + size, + &mut dma_handle, + gfp_flags.as_raw(), + dma_attrs.as_raw(), + ) + }; + + let cpu_handle = NonNull::new(cpu_handle).ok_or(ENOMEM)?; + + // INVARIANT: `cpu_handle` is the opaque handle from a successful `dma_alloc_attrs` call + // with `DMA_ATTR_NO_KERNEL_MAPPING`, `dma_handle` is the corresponding DMA address, + // and we hold a refcounted reference to the device. + Ok(Self { + dev: dev.into(), + dma_handle, + cpu_handle, + size, + dma_attrs, + }) + } + + /// Allocates `size` bytes of coherent DMA memory without creating a kernel virtual mapping. + #[inline] + pub fn alloc( + dev: &device::Device, + size: usize, + gfp_flags: kernel::alloc::Flags, + ) -> Result { + Self::alloc_with_attrs(dev, size, gfp_flags, Attrs(0)) + } + + /// Returns the DMA handle for this allocation. + /// + /// This address can be programmed into device hardware for DMA access. + #[inline] + pub fn dma_handle(&self) -> DmaAddress { + self.dma_handle + } + + /// Returns the size in bytes of this allocation. + #[inline] + pub fn size(&self) -> usize { + self.size + } +} + +impl Drop for CoherentHandle { + fn drop(&mut self) { + // SAFETY: All values are valid by the type invariants on `CoherentHandle`. + // `cpu_handle` is the opaque handle from `dma_alloc_attrs` and is passed back unchanged. + unsafe { + bindings::dma_free_attrs( + self.dev.as_raw(), + self.size, + self.cpu_handle.as_ptr(), + self.dma_handle, + self.dma_attrs.as_raw(), + ) + } + } +} + +// SAFETY: `CoherentHandle` only holds a device reference, a DMA handle, an opaque CPU handle, +// and a size. None of these are tied to a specific thread. +unsafe impl Send for CoherentHandle {} + +// SAFETY: `CoherentHandle` provides no CPU access to the underlying allocation. The only +// operations on `&CoherentHandle` are reading the DMA handle and size, both of which are +// plain `Copy` values. +unsafe impl Sync for CoherentHandle {} /// Reads a field of an item from an allocated region of structs. /// /// The syntax is of the form `kernel::dma_read!(dma, proj)` where `dma` is an expression evaluating -/// to a [`CoherentAllocation`] and `proj` is a [projection specification](kernel::ptr::project!). +/// to a [`Coherent`] and `proj` is a [projection specification](kernel::ptr::project!). /// /// # Examples /// /// ``` /// use kernel::device::Device; -/// use kernel::dma::{attrs::*, CoherentAllocation}; +/// use kernel::dma::{attrs::*, Coherent}; /// /// struct MyStruct { field: u32, } /// @@ -682,7 +1151,7 @@ unsafe impl Send for CoherentAllocation {} /// // SAFETY: Instances of `MyStruct` have no uninitialized portions. /// unsafe impl kernel::transmute::AsBytes for MyStruct{}; /// -/// # fn test(alloc: &kernel::dma::CoherentAllocation) -> Result { +/// # fn test(alloc: &kernel::dma::Coherent<[MyStruct]>) -> Result { /// let whole = kernel::dma_read!(alloc, [2]?); /// let field = kernel::dma_read!(alloc, [1]?.field); /// # Ok::<(), Error>(()) } @@ -692,17 +1161,17 @@ macro_rules! dma_read { ($dma:expr, $($proj:tt)*) => {{ let dma = &$dma; let ptr = $crate::ptr::project!( - $crate::dma::CoherentAllocation::as_ptr(dma), $($proj)* + $crate::dma::Coherent::as_ptr(dma), $($proj)* ); // SAFETY: The pointer created by the projection is within the DMA region. - unsafe { $crate::dma::CoherentAllocation::field_read(dma, ptr) } + unsafe { $crate::dma::Coherent::field_read(dma, ptr) } }}; } /// Writes to a field of an item from an allocated region of structs. /// /// The syntax is of the form `kernel::dma_write!(dma, proj, val)` where `dma` is an expression -/// evaluating to a [`CoherentAllocation`], `proj` is a +/// evaluating to a [`Coherent`], `proj` is a /// [projection specification](kernel::ptr::project!), and `val` is the value to be written to the /// projected location. /// @@ -710,7 +1179,7 @@ macro_rules! dma_read { /// /// ``` /// use kernel::device::Device; -/// use kernel::dma::{attrs::*, CoherentAllocation}; +/// use kernel::dma::{attrs::*, Coherent}; /// /// struct MyStruct { member: u32, } /// @@ -719,7 +1188,7 @@ macro_rules! dma_read { /// // SAFETY: Instances of `MyStruct` have no uninitialized portions. /// unsafe impl kernel::transmute::AsBytes for MyStruct{}; /// -/// # fn test(alloc: &kernel::dma::CoherentAllocation) -> Result { +/// # fn test(alloc: &kernel::dma::Coherent<[MyStruct]>) -> Result { /// kernel::dma_write!(alloc, [2]?.member, 0xf); /// kernel::dma_write!(alloc, [1]?, MyStruct { member: 0xf }); /// # Ok::<(), Error>(()) } @@ -729,11 +1198,11 @@ macro_rules! dma_write { (@parse [$dma:expr] [$($proj:tt)*] [, $val:expr]) => {{ let dma = &$dma; let ptr = $crate::ptr::project!( - mut $crate::dma::CoherentAllocation::as_mut_ptr(dma), $($proj)* + mut $crate::dma::Coherent::as_mut_ptr(dma), $($proj)* ); let val = $val; // SAFETY: The pointer created by the projection is within the DMA region. - unsafe { $crate::dma::CoherentAllocation::field_write(dma, ptr, val) } + unsafe { $crate::dma::Coherent::field_write(dma, ptr, val) } }}; (@parse [$dma:expr] [$($proj:tt)*] [.$field:tt $($rest:tt)*]) => { $crate::dma_write!(@parse [$dma] [$($proj)* .$field] [$($rest)*]) diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index 3ce8f62a0056..adbafe8db54d 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -6,15 +6,34 @@ use crate::{ alloc::allocator::Kmalloc, - bindings, device, drm, - drm::driver::AllocImpl, + bindings, device, + drm::{ + self, + driver::AllocImpl, // + }, error::from_err_ptr, - error::Result, prelude::*, - sync::aref::{ARef, AlwaysRefCounted}, + sync::aref::{ + ARef, + AlwaysRefCounted, // + }, types::Opaque, + workqueue::{ + HasDelayedWork, + HasWork, + Work, + WorkItem, // + }, +}; +use core::{ + alloc::Layout, + mem, + ops::Deref, + ptr::{ + self, + NonNull, // + }, }; -use core::{alloc::Layout, mem, ops::Deref, ptr, ptr::NonNull}; #[cfg(CONFIG_DRM_LEGACY)] macro_rules! drm_legacy_fields { @@ -227,3 +246,61 @@ unsafe impl Send for Device {} // SAFETY: A `drm::Device` can be shared among threads because all immutable methods are protected // by the synchronization in `struct drm_device`. unsafe impl Sync for Device {} + +impl WorkItem for Device +where + T: drm::Driver, + T::Data: WorkItem>>, + T::Data: HasWork, ID>, +{ + type Pointer = ARef>; + + fn run(ptr: ARef>) { + T::Data::run(ptr); + } +} + +// SAFETY: +// +// - `raw_get_work` and `work_container_of` return valid pointers by relying on +// `T::Data::raw_get_work` and `container_of`. In particular, `T::Data` is +// stored inline in `drm::Device`, so the `container_of` call is valid. +// +// - The two methods are true inverses of each other: given `ptr: *mut +// Device`, `raw_get_work` will return a `*mut Work, ID>` through +// `T::Data::raw_get_work` and given a `ptr: *mut Work, ID>`, +// `work_container_of` will return a `*mut Device` through `container_of`. +unsafe impl HasWork, ID> for Device +where + T: drm::Driver, + T::Data: HasWork, ID>, +{ + unsafe fn raw_get_work(ptr: *mut Self) -> *mut Work, ID> { + // SAFETY: The caller promises that `ptr` points to a valid `Device`. + let data_ptr = unsafe { &raw mut (*ptr).data }; + + // SAFETY: `data_ptr` is a valid pointer to `T::Data`. + unsafe { T::Data::raw_get_work(data_ptr) } + } + + unsafe fn work_container_of(ptr: *mut Work, ID>) -> *mut Self { + // SAFETY: The caller promises that `ptr` points at a `Work` field in + // `T::Data`. + let data_ptr = unsafe { T::Data::work_container_of(ptr) }; + + // SAFETY: `T::Data` is stored as the `data` field in `Device`. + unsafe { crate::container_of!(data_ptr, Self, data) } + } +} + +// SAFETY: Our `HasWork` implementation returns a `work_struct` that is +// stored in the `work` field of a `delayed_work` with the same access rules as +// the `work_struct` owing to the bound on `T::Data: HasDelayedWork, +// ID>`, which requires that `T::Data::raw_get_work` return a `work_struct` that +// is inside a `delayed_work`. +unsafe impl HasDelayedWork, ID> for Device +where + T: drm::Driver, + T::Data: HasDelayedWork, ID>, +{ +} diff --git a/rust/kernel/drm/driver.rs b/rust/kernel/drm/driver.rs index e09f977b5b51..5233bdebc9fc 100644 --- a/rust/kernel/drm/driver.rs +++ b/rust/kernel/drm/driver.rs @@ -5,12 +5,14 @@ //! C header: [`include/drm/drm_drv.h`](srctree/include/drm/drm_drv.h) use crate::{ - bindings, device, devres, drm, - error::{to_result, Result}, + bindings, + device, + devres, + drm, + error::to_result, prelude::*, - sync::aref::ARef, + sync::aref::ARef, // }; -use macros::vtable; /// Driver use the GEM memory manager. This should be set for all modern drivers. pub(crate) const FEAT_GEM: u32 = bindings::drm_driver_feature_DRIVER_GEM; diff --git a/rust/kernel/drm/file.rs b/rust/kernel/drm/file.rs index 8c46f8d51951..10160601ce5a 100644 --- a/rust/kernel/drm/file.rs +++ b/rust/kernel/drm/file.rs @@ -4,9 +4,13 @@ //! //! C header: [`include/drm/drm_file.h`](srctree/include/drm/drm_file.h) -use crate::{bindings, drm, error::Result, prelude::*, types::Opaque}; +use crate::{ + bindings, + drm, + prelude::*, + types::Opaque, // +}; use core::marker::PhantomData; -use core::pin::Pin; /// Trait that must be implemented by DRM drivers to represent a DRM File (a client instance). pub trait DriverFile { diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index d49a9ba02635..75acda7ba500 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -5,15 +5,66 @@ //! C header: [`include/drm/drm_gem.h`](srctree/include/drm/drm_gem.h) use crate::{ - alloc::flags::*, - bindings, drm, - drm::driver::{AllocImpl, AllocOps}, - error::{to_result, Result}, + bindings, + drm::{ + self, + driver::{ + AllocImpl, + AllocOps, // + }, + }, + error::to_result, prelude::*, - sync::aref::{ARef, AlwaysRefCounted}, + sync::aref::{ + ARef, + AlwaysRefCounted, // + }, types::Opaque, }; -use core::{ops::Deref, ptr::NonNull}; +use core::{ + ops::Deref, + ptr::NonNull, // +}; + +#[cfg(CONFIG_RUST_DRM_GEM_SHMEM_HELPER)] +pub mod shmem; + +/// A macro for implementing [`AlwaysRefCounted`] for any GEM object type. +/// +/// Since all GEM objects use the same refcounting scheme. +#[macro_export] +macro_rules! impl_aref_for_gem_obj { + ( + impl $( <$( $tparam_id:ident ),+> )? for $type:ty + $( + where + $( $bind_param:path : $bind_trait:path ),+ + )? + ) => { + // SAFETY: All GEM objects are refcounted. + unsafe impl $( <$( $tparam_id ),+> )? $crate::sync::aref::AlwaysRefCounted for $type + where + Self: IntoGEMObject, + $( $( $bind_param : $bind_trait ),+ )? + { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference guarantees that the refcount is + // non-zero. + unsafe { bindings::drm_gem_object_get(self.as_raw()) }; + } + + unsafe fn dec_ref(obj: core::ptr::NonNull) { + // SAFETY: `obj` is a valid pointer to an `Object`. + let obj = unsafe { obj.as_ref() }.as_raw(); + + // SAFETY: The safety requirements guarantee that the refcount is non-zero. + unsafe { bindings::drm_gem_object_put(obj) }; + } + } + }; +} +#[cfg_attr(not(CONFIG_RUST_DRM_GEM_SHMEM_HELPER), allow(unused))] +pub(crate) use impl_aref_for_gem_obj; /// A type alias for retrieving a [`Driver`]s [`DriverFile`] implementation from its /// [`DriverObject`] implementation. @@ -27,8 +78,15 @@ pub trait DriverObject: Sync + Send + Sized { /// Parent `Driver` for this object. type Driver: drm::Driver; + /// The data type to use for passing arguments to [`DriverObject::new`]. + type Args; + /// Create a new driver data object for a GEM object of a given size. - fn new(dev: &drm::Device, size: usize) -> impl PinInit; + fn new( + dev: &drm::Device, + size: usize, + args: Self::Args, + ) -> impl PinInit; /// Open a new handle to an existing object, associated with a File. fn open(_obj: &::Object, _file: &DriverFile) -> Result { @@ -162,6 +220,18 @@ pub trait BaseObject: IntoGEMObject { impl BaseObject for T {} +/// Crate-private base operations shared by all GEM object classes. +#[cfg_attr(not(CONFIG_RUST_DRM_GEM_SHMEM_HELPER), expect(unused))] +pub(crate) trait BaseObjectPrivate: IntoGEMObject { + /// Return a pointer to this object's dma_resv. + fn raw_dma_resv(&self) -> *mut bindings::dma_resv { + // SAFETY: `self.as_raw()` always returns a valid pointer to the base DRM GEM object. + unsafe { (*self.as_raw()).resv } + } +} + +impl BaseObjectPrivate for T {} + /// A base GEM object. /// /// # Invariants @@ -195,11 +265,11 @@ impl Object { }; /// Create a new GEM object. - pub fn new(dev: &drm::Device, size: usize) -> Result> { + pub fn new(dev: &drm::Device, size: usize, args: T::Args) -> Result> { let obj: Pin> = KBox::pin_init( try_pin_init!(Self { obj: Opaque::new(bindings::drm_gem_object::default()), - data <- T::new(dev, size), + data <- T::new(dev, size, args), }), GFP_KERNEL, )?; @@ -252,21 +322,7 @@ impl Object { } } -// SAFETY: Instances of `Object` are always reference-counted. -unsafe impl crate::sync::aref::AlwaysRefCounted for Object { - fn inc_ref(&self) { - // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero. - unsafe { bindings::drm_gem_object_get(self.as_raw()) }; - } - - unsafe fn dec_ref(obj: NonNull) { - // SAFETY: `obj` is a valid pointer to an `Object`. - let obj = unsafe { obj.as_ref() }; - - // SAFETY: The safety requirements guarantee that the refcount is non-zero. - unsafe { bindings::drm_gem_object_put(obj.as_raw()) } - } -} +impl_aref_for_gem_obj!(impl for Object where T: DriverObject); impl super::private::Sealed for Object {} diff --git a/rust/kernel/drm/gem/shmem.rs b/rust/kernel/drm/gem/shmem.rs new file mode 100644 index 000000000000..d025fb035195 --- /dev/null +++ b/rust/kernel/drm/gem/shmem.rs @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! DRM GEM shmem helper objects +//! +//! C header: [`include/linux/drm/drm_gem_shmem_helper.h`](srctree/include/drm/drm_gem_shmem_helper.h) + +// TODO: +// - There are a number of spots here that manually acquire/release the DMA reservation lock using +// dma_resv_(un)lock(). In the future we should add support for ww mutex, expose a method to +// acquire a reference to the WwMutex, and then use that directly instead of the C functions here. + +use crate::{ + container_of, + drm::{ + device, + driver, + gem, + private::Sealed, // + }, + error::to_result, + prelude::*, + types::{ + ARef, + Opaque, // + }, // +}; +use core::{ + ops::{ + Deref, + DerefMut, // + }, + ptr::NonNull, +}; +use gem::{ + BaseObjectPrivate, + DriverObject, + IntoGEMObject, // +}; + +/// A struct for controlling the creation of shmem-backed GEM objects. +/// +/// This is used with [`Object::new()`] to control various properties that can only be set when +/// initially creating a shmem-backed GEM object. +#[derive(Default)] +pub struct ObjectConfig<'a, T: DriverObject> { + /// Whether to set the write-combine map flag. + pub map_wc: bool, + + /// Reuse the DMA reservation from another GEM object. + /// + /// The newly created [`Object`] will hold an owned refcount to `parent_resv_obj` if specified. + pub parent_resv_obj: Option<&'a Object>, +} + +/// A shmem-backed GEM object. +/// +/// # Invariants +/// +/// `obj` contains a valid initialized `struct drm_gem_shmem_object` for the lifetime of this +/// object. +#[repr(C)] +#[pin_data] +pub struct Object { + #[pin] + obj: Opaque, + /// Parent object that owns this object's DMA reservation object. + parent_resv_obj: Option>>, + #[pin] + inner: T, +} + +super::impl_aref_for_gem_obj!(impl for Object where T: DriverObject); + +// SAFETY: All GEM objects are thread-safe. +unsafe impl Send for Object {} + +// SAFETY: All GEM objects are thread-safe. +unsafe impl Sync for Object {} + +impl Object { + /// `drm_gem_object_funcs` vtable suitable for GEM shmem objects. + const VTABLE: bindings::drm_gem_object_funcs = bindings::drm_gem_object_funcs { + free: Some(Self::free_callback), + open: Some(super::open_callback::), + close: Some(super::close_callback::), + print_info: Some(bindings::drm_gem_shmem_object_print_info), + export: None, + pin: Some(bindings::drm_gem_shmem_object_pin), + unpin: Some(bindings::drm_gem_shmem_object_unpin), + get_sg_table: Some(bindings::drm_gem_shmem_object_get_sg_table), + vmap: Some(bindings::drm_gem_shmem_object_vmap), + vunmap: Some(bindings::drm_gem_shmem_object_vunmap), + mmap: Some(bindings::drm_gem_shmem_object_mmap), + status: None, + rss: None, + #[allow(unused_unsafe, reason = "Safe since Rust 1.82.0")] + // SAFETY: `drm_gem_shmem_vm_ops` is a valid, static const on the C side. + vm_ops: unsafe { &raw const bindings::drm_gem_shmem_vm_ops }, + evict: None, + }; + + /// Return a raw pointer to the embedded drm_gem_shmem_object. + fn as_raw_shmem(&self) -> *mut bindings::drm_gem_shmem_object { + self.obj.get() + } + + /// Create a new shmem-backed DRM object of the given size. + /// + /// Additional config options can be specified using `config`. + pub fn new( + dev: &device::Device, + size: usize, + config: ObjectConfig<'_, T>, + args: T::Args, + ) -> Result> { + let new: Pin> = KBox::try_pin_init( + try_pin_init!(Self { + obj <- Opaque::init_zeroed(), + parent_resv_obj: config.parent_resv_obj.map(|p| p.into()), + inner <- T::new(dev, size, args), + }), + GFP_KERNEL, + )?; + + // SAFETY: `obj.as_raw()` is guaranteed to be valid by the initialization above. + unsafe { (*new.as_raw()).funcs = &Self::VTABLE }; + + // SAFETY: The arguments are all valid via the type invariants. + to_result(unsafe { bindings::drm_gem_shmem_init(dev.as_raw(), new.as_raw_shmem(), size) })?; + + // SAFETY: We never move out of `self`. + let new = KBox::into_raw(unsafe { Pin::into_inner_unchecked(new) }); + + // SAFETY: We're taking over the owned refcount from `drm_gem_shmem_init`. + let obj = unsafe { ARef::from_raw(NonNull::new_unchecked(new)) }; + + // Start filling out values from `config` + if let Some(parent_resv) = config.parent_resv_obj { + // SAFETY: We have yet to expose the new gem object outside of this function, so it is + // safe to modify this field. + unsafe { (*obj.obj.get()).base.resv = parent_resv.raw_dma_resv() }; + } + + // SAFETY: We have yet to expose this object outside of this function, so we're guaranteed + // to have exclusive access - thus making this safe to hold a mutable reference to. + let shmem = unsafe { &mut *obj.as_raw_shmem() }; + shmem.set_map_wc(config.map_wc); + + Ok(obj) + } + + /// Returns the `Device` that owns this GEM object. + pub fn dev(&self) -> &device::Device { + // SAFETY: `dev` will have been initialized in `Self::new()` by `drm_gem_shmem_init()`. + unsafe { device::Device::from_raw((*self.as_raw()).dev) } + } + + extern "C" fn free_callback(obj: *mut bindings::drm_gem_object) { + // SAFETY: + // - DRM always passes a valid gem object here + // - We used drm_gem_shmem_create() in our create_gem_object callback, so we know that + // `obj` is contained within a drm_gem_shmem_object + let this = unsafe { container_of!(obj, bindings::drm_gem_shmem_object, base) }; + + // SAFETY: + // - We're in free_callback - so this function is safe to call. + // - We won't be using the gem resources on `this` after this call. + unsafe { bindings::drm_gem_shmem_release(this) }; + + // SAFETY: + // - We verified above that `obj` is valid, which makes `this` valid + // - This function is set in AllocOps, so we know that `this` is contained within a + // `Object` + let this = unsafe { container_of!(Opaque::cast_from(this), Self, obj) }.cast_mut(); + + // SAFETY: We're recovering the Kbox<> we created in gem_create_object() + let _ = unsafe { KBox::from_raw(this) }; + } +} + +impl Deref for Object { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for Object { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +impl Sealed for Object {} + +impl gem::IntoGEMObject for Object { + fn as_raw(&self) -> *mut bindings::drm_gem_object { + // SAFETY: + // - Our immutable reference is proof that this is safe to dereference. + // - `obj` is always a valid drm_gem_shmem_object via our type invariants. + unsafe { &raw mut (*self.obj.get()).base } + } + + unsafe fn from_raw<'a>(obj: *mut bindings::drm_gem_object) -> &'a Object { + // SAFETY: The safety contract of from_gem_obj() guarantees that `obj` is contained within + // `Self` + unsafe { + let obj = Opaque::cast_from(container_of!(obj, bindings::drm_gem_shmem_object, base)); + + &*container_of!(obj, Object, obj) + } + } +} + +impl driver::AllocImpl for Object { + type Driver = T::Driver; + + const ALLOC_OPS: driver::AllocOps = driver::AllocOps { + gem_create_object: None, + prime_handle_to_fd: None, + prime_fd_to_handle: None, + gem_prime_import: None, + gem_prime_import_sg_table: Some(bindings::drm_gem_shmem_prime_import_sg_table), + dumb_create: Some(bindings::drm_gem_shmem_dumb_create), + dumb_map_offset: None, + }; +} diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 258b12afdcba..10fcf1f0404d 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -67,6 +67,7 @@ pub mod code { declare_err!(EDOM, "Math argument out of domain of func."); declare_err!(ERANGE, "Math result not representable."); declare_err!(EOVERFLOW, "Value too large for defined data type."); + declare_err!(EMSGSIZE, "Message too long."); declare_err!(ETIMEDOUT, "Connection timed out."); declare_err!(ERESTARTSYS, "Restart the system call."); declare_err!(ERESTARTNOINTR, "System call was interrupted by a signal and will be restarted."); diff --git a/rust/kernel/gpu.rs b/rust/kernel/gpu.rs new file mode 100644 index 000000000000..1dc5d0c8c09d --- /dev/null +++ b/rust/kernel/gpu.rs @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! GPU subsystem abstractions. + +#[cfg(CONFIG_GPU_BUDDY = "y")] +pub mod buddy; diff --git a/rust/kernel/gpu/buddy.rs b/rust/kernel/gpu/buddy.rs new file mode 100644 index 000000000000..d502ada6ebbd --- /dev/null +++ b/rust/kernel/gpu/buddy.rs @@ -0,0 +1,614 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! GPU buddy allocator bindings. +//! +//! C header: [`include/linux/gpu_buddy.h`](srctree/include/linux/gpu_buddy.h) +//! +//! This module provides Rust abstractions over the Linux kernel's GPU buddy +//! allocator, which implements a binary buddy memory allocator. +//! +//! The buddy allocator manages a contiguous address space and allocates blocks +//! in power-of-two sizes, useful for GPU physical memory management. +//! +//! # Examples +//! +//! Create a buddy allocator and perform a basic range allocation: +//! +//! ``` +//! use kernel::{ +//! gpu::buddy::{ +//! GpuBuddy, +//! GpuBuddyAllocFlags, +//! GpuBuddyAllocMode, +//! GpuBuddyParams, // +//! }, +//! prelude::*, +//! ptr::Alignment, +//! sizes::*, // +//! }; +//! +//! // Create a 1GB buddy allocator with 4KB minimum chunk size. +//! let buddy = GpuBuddy::new(GpuBuddyParams { +//! base_offset: 0, +//! size: SZ_1G as u64, +//! chunk_size: Alignment::new::(), +//! })?; +//! +//! assert_eq!(buddy.size(), SZ_1G as u64); +//! assert_eq!(buddy.chunk_size(), Alignment::new::()); +//! let initial_free = buddy.avail(); +//! +//! // Allocate 16MB. Block lands at the top of the address range. +//! let allocated = KBox::pin_init( +//! buddy.alloc_blocks( +//! GpuBuddyAllocMode::Simple, +//! SZ_16M as u64, +//! Alignment::new::(), +//! GpuBuddyAllocFlags::default(), +//! ), +//! GFP_KERNEL, +//! )?; +//! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64); +//! +//! let block = allocated.iter().next().expect("expected one block"); +//! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64); +//! assert_eq!(block.order(), 12); // 2^12 pages = 16MB +//! assert_eq!(block.size(), SZ_16M as u64); +//! assert_eq!(allocated.iter().count(), 1); +//! +//! // Dropping the allocation returns the range to the buddy allocator. +//! drop(allocated); +//! assert_eq!(buddy.avail(), initial_free); +//! # Ok::<(), Error>(()) +//! ``` +//! +//! Top-down allocation allocates from the highest addresses: +//! +//! ``` +//! # use kernel::{ +//! # gpu::buddy::{GpuBuddy, GpuBuddyAllocMode, GpuBuddyAllocFlags, GpuBuddyParams}, +//! # prelude::*, +//! # ptr::Alignment, +//! # sizes::*, // +//! # }; +//! # let buddy = GpuBuddy::new(GpuBuddyParams { +//! # base_offset: 0, +//! # size: SZ_1G as u64, +//! # chunk_size: Alignment::new::(), +//! # })?; +//! # let initial_free = buddy.avail(); +//! let topdown = KBox::pin_init( +//! buddy.alloc_blocks( +//! GpuBuddyAllocMode::TopDown, +//! SZ_16M as u64, +//! Alignment::new::(), +//! GpuBuddyAllocFlags::default(), +//! ), +//! GFP_KERNEL, +//! )?; +//! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64); +//! +//! let block = topdown.iter().next().expect("expected one block"); +//! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64); +//! assert_eq!(block.order(), 12); +//! assert_eq!(block.size(), SZ_16M as u64); +//! +//! // Dropping the allocation returns the range to the buddy allocator. +//! drop(topdown); +//! assert_eq!(buddy.avail(), initial_free); +//! # Ok::<(), Error>(()) +//! ``` +//! +//! Non-contiguous allocation can fill fragmented memory by returning multiple +//! blocks: +//! +//! ``` +//! # use kernel::{ +//! # gpu::buddy::{ +//! # GpuBuddy, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams, +//! # }, +//! # prelude::*, +//! # ptr::Alignment, +//! # sizes::*, // +//! # }; +//! # let buddy = GpuBuddy::new(GpuBuddyParams { +//! # base_offset: 0, +//! # size: SZ_1G as u64, +//! # chunk_size: Alignment::new::(), +//! # })?; +//! # let initial_free = buddy.avail(); +//! // Create fragmentation by allocating 4MB blocks at [0,4M) and [8M,12M). +//! let frag1 = KBox::pin_init( +//! buddy.alloc_blocks( +//! GpuBuddyAllocMode::Range(0..SZ_4M as u64), +//! SZ_4M as u64, +//! Alignment::new::(), +//! GpuBuddyAllocFlags::default(), +//! ), +//! GFP_KERNEL, +//! )?; +//! assert_eq!(buddy.avail(), initial_free - SZ_4M as u64); +//! +//! let frag2 = KBox::pin_init( +//! buddy.alloc_blocks( +//! GpuBuddyAllocMode::Range(SZ_8M as u64..(SZ_8M + SZ_4M) as u64), +//! SZ_4M as u64, +//! Alignment::new::(), +//! GpuBuddyAllocFlags::default(), +//! ), +//! GFP_KERNEL, +//! )?; +//! assert_eq!(buddy.avail(), initial_free - SZ_8M as u64); +//! +//! // Allocate 8MB, this returns 2 blocks from the holes. +//! let fragmented = KBox::pin_init( +//! buddy.alloc_blocks( +//! GpuBuddyAllocMode::Range(0..SZ_16M as u64), +//! SZ_8M as u64, +//! Alignment::new::(), +//! GpuBuddyAllocFlags::default(), +//! ), +//! GFP_KERNEL, +//! )?; +//! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64); +//! +//! let (mut count, mut total) = (0u32, 0u64); +//! for block in fragmented.iter() { +//! assert_eq!(block.size(), SZ_4M as u64); +//! total += block.size(); +//! count += 1; +//! } +//! assert_eq!(total, SZ_8M as u64); +//! assert_eq!(count, 2); +//! # Ok::<(), Error>(()) +//! ``` +//! +//! Contiguous allocation fails when only fragmented space is available: +//! +//! ``` +//! # use kernel::{ +//! # gpu::buddy::{ +//! # GpuBuddy, GpuBuddyAllocFlag, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams, +//! # }, +//! # prelude::*, +//! # ptr::Alignment, +//! # sizes::*, // +//! # }; +//! // Create a small 16MB buddy allocator with fragmented memory. +//! let small = GpuBuddy::new(GpuBuddyParams { +//! base_offset: 0, +//! size: SZ_16M as u64, +//! chunk_size: Alignment::new::(), +//! })?; +//! +//! let _hole1 = KBox::pin_init( +//! small.alloc_blocks( +//! GpuBuddyAllocMode::Range(0..SZ_4M as u64), +//! SZ_4M as u64, +//! Alignment::new::(), +//! GpuBuddyAllocFlags::default(), +//! ), +//! GFP_KERNEL, +//! )?; +//! +//! let _hole2 = KBox::pin_init( +//! small.alloc_blocks( +//! GpuBuddyAllocMode::Range(SZ_8M as u64..(SZ_8M + SZ_4M) as u64), +//! SZ_4M as u64, +//! Alignment::new::(), +//! GpuBuddyAllocFlags::default(), +//! ), +//! GFP_KERNEL, +//! )?; +//! +//! // 8MB contiguous should fail, only two non-contiguous 4MB holes exist. +//! let result = KBox::pin_init( +//! small.alloc_blocks( +//! GpuBuddyAllocMode::Simple, +//! SZ_8M as u64, +//! Alignment::new::(), +//! GpuBuddyAllocFlag::Contiguous, +//! ), +//! GFP_KERNEL, +//! ); +//! assert!(result.is_err()); +//! # Ok::<(), Error>(()) +//! ``` + +use core::ops::Range; + +use crate::{ + bindings, + clist_create, + error::to_result, + interop::list::CListHead, + new_mutex, + prelude::*, + ptr::Alignment, + sync::{ + lock::mutex::MutexGuard, + Arc, + Mutex, // + }, + types::Opaque, // +}; + +/// Allocation mode for the GPU buddy allocator. +/// +/// The mode determines the primary allocation strategy. Modes are mutually +/// exclusive: an allocation is either simple, range-constrained, or top-down. +/// +/// Orthogonal modifier flags (e.g., contiguous, clear) are specified separately +/// via [`GpuBuddyAllocFlags`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum GpuBuddyAllocMode { + /// Simple allocation without constraints. + Simple, + /// Range-based allocation within the given address range. + Range(Range), + /// Allocate from top of address space downward. + TopDown, +} + +impl GpuBuddyAllocMode { + /// Returns the C flags corresponding to the allocation mode. + fn as_flags(&self) -> usize { + match self { + Self::Simple => 0, + Self::Range(_) => bindings::GPU_BUDDY_RANGE_ALLOCATION, + Self::TopDown => bindings::GPU_BUDDY_TOPDOWN_ALLOCATION, + } + } + + /// Extracts the range start/end, defaulting to `(0, 0)` for non-range modes. + fn range(&self) -> (u64, u64) { + match self { + Self::Range(range) => (range.start, range.end), + _ => (0, 0), + } + } +} + +crate::impl_flags!( + /// Modifier flags for GPU buddy allocation. + /// + /// These flags can be combined with any [`GpuBuddyAllocMode`] to control + /// additional allocation behavior. + #[derive(Clone, Copy, Default, PartialEq, Eq)] + pub struct GpuBuddyAllocFlags(usize); + + /// Individual modifier flag for GPU buddy allocation. + #[derive(Clone, Copy, PartialEq, Eq)] + pub enum GpuBuddyAllocFlag { + /// Allocate physically contiguous blocks. + Contiguous = bindings::GPU_BUDDY_CONTIGUOUS_ALLOCATION, + + /// Request allocation from cleared (zeroed) memory. + Clear = bindings::GPU_BUDDY_CLEAR_ALLOCATION, + + /// Disable trimming of partially used blocks. + TrimDisable = bindings::GPU_BUDDY_TRIM_DISABLE, + } +); + +/// Parameters for creating a GPU buddy allocator. +pub struct GpuBuddyParams { + /// Base offset (in bytes) where the managed memory region starts. + /// Allocations will be offset by this value. + pub base_offset: u64, + /// Total size (in bytes) of the address space managed by the allocator. + pub size: u64, + /// Minimum allocation unit / chunk size; must be >= 4KB. + pub chunk_size: Alignment, +} + +/// Inner structure holding the actual buddy allocator. +/// +/// # Synchronization +/// +/// The C `gpu_buddy` API requires synchronization (see `include/linux/gpu_buddy.h`). +/// Internal locking ensures all allocator and free operations are properly +/// synchronized, preventing races between concurrent allocations and the +/// freeing that occurs when [`AllocatedBlocks`] is dropped. +/// +/// # Invariants +/// +/// The inner [`Opaque`] contains an initialized buddy allocator. +#[pin_data(PinnedDrop)] +struct GpuBuddyInner { + #[pin] + inner: Opaque, + + // TODO: Replace `Mutex<()>` with `Mutex>` once `Mutex::new()` + // accepts `impl PinInit`. + #[pin] + lock: Mutex<()>, + /// Cached creation parameters (do not change after init). + params: GpuBuddyParams, +} + +impl GpuBuddyInner { + /// Create a pin-initializer for the buddy allocator. + fn new(params: GpuBuddyParams) -> impl PinInit { + let size = params.size; + let chunk_size = params.chunk_size; + + // INVARIANT: `gpu_buddy_init` returns 0 on success, at which point the + // `gpu_buddy` structure is initialized and ready for use with all + // `gpu_buddy_*` APIs. `try_pin_init!` only completes if all fields succeed, + // so the invariant holds when construction finishes. + try_pin_init!(Self { + inner <- Opaque::try_ffi_init(|ptr| { + // SAFETY: `ptr` points to valid uninitialized memory from the pin-init + // infrastructure. `gpu_buddy_init` will initialize the structure. + to_result(unsafe { + bindings::gpu_buddy_init(ptr, size, chunk_size.as_usize() as u64) + }) + }), + lock <- new_mutex!(()), + params, + }) + } + + /// Lock the mutex and return a guard for accessing the allocator. + fn lock(&self) -> GpuBuddyGuard<'_> { + GpuBuddyGuard { + inner: self, + _guard: self.lock.lock(), + } + } +} + +#[pinned_drop] +impl PinnedDrop for GpuBuddyInner { + fn drop(self: Pin<&mut Self>) { + let guard = self.lock(); + + // SAFETY: Per the type invariant, `inner` contains an initialized + // allocator. `guard` provides exclusive access. + unsafe { bindings::gpu_buddy_fini(guard.as_raw()) }; + } +} + +// SAFETY: `GpuBuddyInner` can be sent between threads. +unsafe impl Send for GpuBuddyInner {} + +// SAFETY: `GpuBuddyInner` is `Sync` because `GpuBuddyInner::lock` +// serializes all access to the C allocator, preventing data races. +unsafe impl Sync for GpuBuddyInner {} + +/// Guard that proves the lock is held, enabling access to the allocator. +/// +/// The `_guard` holds the lock for the duration of this guard's lifetime. +struct GpuBuddyGuard<'a> { + inner: &'a GpuBuddyInner, + _guard: MutexGuard<'a, ()>, +} + +impl GpuBuddyGuard<'_> { + /// Get a raw pointer to the underlying C `gpu_buddy` structure. + fn as_raw(&self) -> *mut bindings::gpu_buddy { + self.inner.inner.get() + } +} + +/// GPU buddy allocator instance. +/// +/// This structure wraps the C `gpu_buddy` allocator using reference counting. +/// The allocator is automatically cleaned up when all references are dropped. +/// +/// Refer to the module-level documentation for usage examples. +pub struct GpuBuddy(Arc); + +impl GpuBuddy { + /// Create a new buddy allocator. + /// + /// The allocator manages a contiguous address space of the given size, with the + /// specified minimum allocation unit (chunk_size must be at least 4KB). + pub fn new(params: GpuBuddyParams) -> Result { + Arc::pin_init(GpuBuddyInner::new(params), GFP_KERNEL).map(Self) + } + + /// Get the base offset for allocations. + pub fn base_offset(&self) -> u64 { + self.0.params.base_offset + } + + /// Get the chunk size (minimum allocation unit). + pub fn chunk_size(&self) -> Alignment { + self.0.params.chunk_size + } + + /// Get the total managed size. + pub fn size(&self) -> u64 { + self.0.params.size + } + + /// Get the available (free) memory in bytes. + pub fn avail(&self) -> u64 { + let guard = self.0.lock(); + + // SAFETY: Per the type invariant, `inner` contains an initialized allocator. + // `guard` provides exclusive access. + unsafe { (*guard.as_raw()).avail } + } + + /// Allocate blocks from the buddy allocator. + /// + /// Returns a pin-initializer for [`AllocatedBlocks`]. + pub fn alloc_blocks( + &self, + mode: GpuBuddyAllocMode, + size: u64, + min_block_size: Alignment, + flags: impl Into, + ) -> impl PinInit { + let buddy_arc = Arc::clone(&self.0); + let (start, end) = mode.range(); + let mode_flags = mode.as_flags(); + let modifier_flags = flags.into(); + + // Create pin-initializer that initializes list and allocates blocks. + try_pin_init!(AllocatedBlocks { + buddy: buddy_arc, + list <- CListHead::new(), + _: { + // Reject zero-sized or inverted ranges. + if let GpuBuddyAllocMode::Range(range) = &mode { + if range.is_empty() { + Err::<(), Error>(EINVAL)?; + } + } + + // Lock while allocating to serialize with concurrent frees. + let guard = buddy.lock(); + + // SAFETY: Per the type invariant, `inner` contains an initialized + // allocator. `guard` provides exclusive access. + to_result(unsafe { + bindings::gpu_buddy_alloc_blocks( + guard.as_raw(), + start, + end, + size, + min_block_size.as_usize() as u64, + list.as_raw(), + mode_flags | usize::from(modifier_flags), + ) + })? + } + }) + } +} + +/// Allocated blocks from the buddy allocator with automatic cleanup. +/// +/// This structure owns a list of allocated blocks and ensures they are +/// automatically freed when dropped. Use `iter()` to iterate over all +/// allocated blocks. +/// +/// # Invariants +/// +/// - `list` is an initialized, valid list head containing allocated blocks. +#[pin_data(PinnedDrop)] +pub struct AllocatedBlocks { + #[pin] + list: CListHead, + buddy: Arc, +} + +impl AllocatedBlocks { + /// Check if the block list is empty. + pub fn is_empty(&self) -> bool { + // An empty list head points to itself. + !self.list.is_linked() + } + + /// Iterate over allocated blocks. + /// + /// Returns an iterator yielding [`AllocatedBlock`] values. Each [`AllocatedBlock`] + /// borrows `self` and is only valid for the duration of that borrow. + pub fn iter(&self) -> impl Iterator> + '_ { + let head = self.list.as_raw(); + // SAFETY: Per the type invariant, `list` is an initialized sentinel `list_head` + // and is not concurrently modified (we hold a `&self` borrow). The list contains + // `gpu_buddy_block` items linked via `__bindgen_anon_1.link`. `Block` is + // `#[repr(transparent)]` over `gpu_buddy_block`. + let clist = unsafe { + clist_create!( + head, + Block, + bindings::gpu_buddy_block, + __bindgen_anon_1.link + ) + }; + + clist + .iter() + .map(|this| AllocatedBlock { this, blocks: self }) + } +} + +#[pinned_drop] +impl PinnedDrop for AllocatedBlocks { + fn drop(self: Pin<&mut Self>) { + let guard = self.buddy.lock(); + + // SAFETY: + // - list is valid per the type's invariants. + // - guard provides exclusive access to the allocator. + unsafe { + bindings::gpu_buddy_free_list(guard.as_raw(), self.list.as_raw(), 0); + } + } +} + +/// A GPU buddy block. +/// +/// Transparent wrapper over C `gpu_buddy_block` structure. This type is returned +/// as references during iteration over [`AllocatedBlocks`]. +/// +/// # Invariants +/// +/// The inner [`Opaque`] contains a valid, allocated `gpu_buddy_block`. +#[repr(transparent)] +struct Block(Opaque); + +impl Block { + /// Get a raw pointer to the underlying C block. + fn as_raw(&self) -> *mut bindings::gpu_buddy_block { + self.0.get() + } + + /// Get the block's raw offset in the buddy address space (without base offset). + fn offset(&self) -> u64 { + // SAFETY: `self.as_raw()` is valid per the type's invariants. + unsafe { bindings::gpu_buddy_block_offset(self.as_raw()) } + } + + /// Get the block order. + fn order(&self) -> u32 { + // SAFETY: `self.as_raw()` is valid per the type's invariants. + unsafe { bindings::gpu_buddy_block_order(self.as_raw()) } + } +} + +// SAFETY: `Block` is a wrapper around `gpu_buddy_block` which can be +// sent across threads safely. +unsafe impl Send for Block {} + +// SAFETY: `Block` is only accessed through shared references after +// allocation, and thus safe to access concurrently across threads. +unsafe impl Sync for Block {} + +/// A buddy block paired with its owning [`AllocatedBlocks`] context. +/// +/// Unlike a raw block, which only knows its offset within the buddy address +/// space, an [`AllocatedBlock`] also has access to the allocator's `base_offset` +/// and `chunk_size`, enabling it to compute absolute offsets and byte sizes. +/// +/// Returned by [`AllocatedBlocks::iter()`]. +pub struct AllocatedBlock<'a> { + this: &'a Block, + blocks: &'a AllocatedBlocks, +} + +impl AllocatedBlock<'_> { + /// Get the block's offset in the address space. + /// + /// Returns the absolute offset including the allocator's base offset. + /// This is the actual address to use for accessing the allocated memory. + pub fn offset(&self) -> u64 { + self.blocks.buddy.params.base_offset + self.this.offset() + } + + /// Get the block order (size = chunk_size << order). + pub fn order(&self) -> u32 { + self.this.order() + } + + /// Get the block's size in bytes. + pub fn size(&self) -> u64 { + (self.blocks.buddy.params.chunk_size.as_usize() as u64) << self.this.order() + } +} diff --git a/rust/kernel/interop.rs b/rust/kernel/interop.rs new file mode 100644 index 000000000000..3b371d782a59 --- /dev/null +++ b/rust/kernel/interop.rs @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Infrastructure for interfacing Rust code with C kernel subsystems. +//! +//! This module is intended for low-level, unsafe Rust infrastructure code +//! that interoperates between Rust and C. It is *not* for use directly in +//! Rust drivers. + +pub mod list; diff --git a/rust/kernel/interop/list.rs b/rust/kernel/interop/list.rs new file mode 100644 index 000000000000..54265ea036bb --- /dev/null +++ b/rust/kernel/interop/list.rs @@ -0,0 +1,339 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust interface for C doubly circular intrusive linked lists. +//! +//! This module provides Rust abstractions for iterating over C `list_head`-based +//! linked lists. It should only be used for cases where C and Rust code share +//! direct access to the same linked list through a C interop interface. +//! +//! Note: This *must not* be used by Rust components that just need a linked list +//! primitive. Use [`kernel::list::List`] instead. +//! +//! # Examples +//! +//! ``` +//! use kernel::{ +//! bindings, +//! interop::list::clist_create, +//! types::Opaque, +//! }; +//! # // Create test list with values (0, 10, 20) - normally done by C code but it is +//! # // emulated here for doctests using the C bindings. +//! # use core::mem::MaybeUninit; +//! # +//! # /// C struct with embedded `list_head` (typically will be allocated by C code). +//! # #[repr(C)] +//! # pub struct SampleItemC { +//! # pub value: i32, +//! # pub link: bindings::list_head, +//! # } +//! # +//! # let mut head = MaybeUninit::::uninit(); +//! # +//! # let head = head.as_mut_ptr(); +//! # // SAFETY: `head` and all the items are test objects allocated in this scope. +//! # unsafe { bindings::INIT_LIST_HEAD(head) }; +//! # +//! # let mut items = [ +//! # MaybeUninit::::uninit(), +//! # MaybeUninit::::uninit(), +//! # MaybeUninit::::uninit(), +//! # ]; +//! # +//! # for (i, item) in items.iter_mut().enumerate() { +//! # let ptr = item.as_mut_ptr(); +//! # // SAFETY: `ptr` points to a valid `MaybeUninit`. +//! # unsafe { (*ptr).value = i as i32 * 10 }; +//! # // SAFETY: `&raw mut` creates a pointer valid for `INIT_LIST_HEAD`. +//! # unsafe { bindings::INIT_LIST_HEAD(&raw mut (*ptr).link) }; +//! # // SAFETY: `link` was just initialized and `head` is a valid list head. +//! # unsafe { bindings::list_add_tail(&mut (*ptr).link, head) }; +//! # } +//! +//! /// Rust wrapper for the C struct. +//! /// +//! /// The list item struct in this example is defined in C code as: +//! /// +//! /// ```c +//! /// struct SampleItemC { +//! /// int value; +//! /// struct list_head link; +//! /// }; +//! /// ``` +//! #[repr(transparent)] +//! pub struct Item(Opaque); +//! +//! impl Item { +//! pub fn value(&self) -> i32 { +//! // SAFETY: `Item` has the same layout as `SampleItemC`. +//! unsafe { (*self.0.get()).value } +//! } +//! } +//! +//! // Create typed [`CList`] from sentinel head. +//! // SAFETY: `head` is valid and initialized, items are `SampleItemC` with +//! // embedded `link` field, and `Item` is `#[repr(transparent)]` over `SampleItemC`. +//! let list = unsafe { clist_create!(head, Item, SampleItemC, link) }; +//! +//! // Iterate directly over typed items. +//! let mut found_0 = false; +//! let mut found_10 = false; +//! let mut found_20 = false; +//! +//! for item in list.iter() { +//! let val = item.value(); +//! if val == 0 { found_0 = true; } +//! if val == 10 { found_10 = true; } +//! if val == 20 { found_20 = true; } +//! } +//! +//! assert!(found_0 && found_10 && found_20); +//! ``` + +use core::{ + iter::FusedIterator, + marker::PhantomData, // +}; + +use crate::{ + bindings, + types::Opaque, // +}; + +use pin_init::{ + pin_data, + pin_init, + PinInit, // +}; + +/// FFI wrapper for a C `list_head` object used in intrusive linked lists. +/// +/// # Invariants +/// +/// - The underlying `list_head` is initialized with valid non-`NULL` `next`/`prev` pointers. +#[pin_data] +#[repr(transparent)] +pub struct CListHead { + #[pin] + inner: Opaque, +} + +impl CListHead { + /// Create a `&CListHead` reference from a raw `list_head` pointer. + /// + /// # Safety + /// + /// - `ptr` must be a valid pointer to an initialized `list_head` (e.g. via + /// `INIT_LIST_HEAD()`), with valid non-`NULL` `next`/`prev` pointers. + /// - `ptr` must remain valid for the lifetime `'a`. + /// - The list and all linked `list_head` nodes must not be modified from + /// anywhere for the lifetime `'a`, unless done so via any [`CListHead`] APIs. + #[inline] + pub unsafe fn from_raw<'a>(ptr: *mut bindings::list_head) -> &'a Self { + // SAFETY: + // - `CListHead` has the same layout as `list_head`. + // - `ptr` is valid and unmodified for `'a` per caller guarantees. + unsafe { &*ptr.cast() } + } + + /// Get the raw `list_head` pointer. + #[inline] + pub fn as_raw(&self) -> *mut bindings::list_head { + self.inner.get() + } + + /// Get the next [`CListHead`] in the list. + #[inline] + pub fn next(&self) -> &Self { + let raw = self.as_raw(); + // SAFETY: + // - `self.as_raw()` is valid and initialized per type invariants. + // - The `next` pointer is valid and non-`NULL` per type invariants + // (initialized via `INIT_LIST_HEAD()` or equivalent). + unsafe { Self::from_raw((*raw).next) } + } + + /// Check if this node is linked in a list (not isolated). + #[inline] + pub fn is_linked(&self) -> bool { + let raw = self.as_raw(); + // SAFETY: `self.as_raw()` is valid per type invariants. + unsafe { (*raw).next != raw && (*raw).prev != raw } + } + + /// Returns a pin-initializer for the list head. + pub fn new() -> impl PinInit { + pin_init!(Self { + // SAFETY: `INIT_LIST_HEAD` initializes `slot` to a valid empty list. + inner <- Opaque::ffi_init(|slot| unsafe { bindings::INIT_LIST_HEAD(slot) }), + }) + } +} + +// SAFETY: `list_head` contains no thread-bound state; it only holds +// `next`/`prev` pointers. +unsafe impl Send for CListHead {} + +// SAFETY: `CListHead` can be shared among threads as modifications are +// not allowed at the moment. +unsafe impl Sync for CListHead {} + +impl PartialEq for CListHead { + #[inline] + fn eq(&self, other: &Self) -> bool { + core::ptr::eq(self, other) + } +} + +impl Eq for CListHead {} + +/// Low-level iterator over `list_head` nodes. +/// +/// An iterator used to iterate over a C intrusive linked list (`list_head`). The caller has to +/// perform conversion of returned [`CListHead`] to an item (using [`container_of`] or similar). +/// +/// # Invariants +/// +/// `current` and `sentinel` are valid references into an initialized linked list. +struct CListHeadIter<'a> { + /// Current position in the list. + current: &'a CListHead, + /// The sentinel head (used to detect end of iteration). + sentinel: &'a CListHead, +} + +impl<'a> Iterator for CListHeadIter<'a> { + type Item = &'a CListHead; + + #[inline] + fn next(&mut self) -> Option { + // Check if we've reached the sentinel (end of list). + if self.current == self.sentinel { + return None; + } + + let item = self.current; + self.current = item.next(); + Some(item) + } +} + +impl<'a> FusedIterator for CListHeadIter<'a> {} + +/// A typed C linked list with a sentinel head intended for FFI use-cases where +/// a C subsystem manages a linked list that Rust code needs to read. Generally +/// required only for special cases. +/// +/// A sentinel head [`CListHead`] represents the entire linked list and can be used +/// for iteration over items of type `T`; it is not associated with a specific item. +/// +/// The const generic `OFFSET` specifies the byte offset of the `list_head` field within +/// the struct that `T` wraps. +/// +/// # Invariants +/// +/// - The sentinel [`CListHead`] has valid non-`NULL` `next`/`prev` pointers. +/// - `OFFSET` is the byte offset of the `list_head` field within the struct that `T` wraps. +/// - All the list's `list_head` nodes have valid non-`NULL` `next`/`prev` pointers. +#[repr(transparent)] +pub struct CList(CListHead, PhantomData); + +impl CList { + /// Create a typed [`CList`] reference from a raw sentinel `list_head` pointer. + /// + /// # Safety + /// + /// - `ptr` must be a valid pointer to an initialized sentinel `list_head` (e.g. via + /// `INIT_LIST_HEAD()`), with valid non-`NULL` `next`/`prev` pointers. + /// - `ptr` must remain valid for the lifetime `'a`. + /// - The list and all linked nodes must not be concurrently modified for the lifetime `'a`. + /// - The list must contain items where the `list_head` field is at byte offset `OFFSET`. + /// - `T` must be `#[repr(transparent)]` over the C struct. + #[inline] + pub unsafe fn from_raw<'a>(ptr: *mut bindings::list_head) -> &'a Self { + // SAFETY: + // - `CList` has the same layout as `CListHead` due to `#[repr(transparent)]`. + // - Caller guarantees `ptr` is a valid, sentinel `list_head` object. + unsafe { &*ptr.cast() } + } + + /// Check if the list is empty. + #[inline] + pub fn is_empty(&self) -> bool { + !self.0.is_linked() + } + + /// Create an iterator over typed items. + #[inline] + pub fn iter(&self) -> CListIter<'_, T, OFFSET> { + let head = &self.0; + CListIter { + head_iter: CListHeadIter { + current: head.next(), + sentinel: head, + }, + _phantom: PhantomData, + } + } +} + +/// High-level iterator over typed list items. +pub struct CListIter<'a, T, const OFFSET: usize> { + head_iter: CListHeadIter<'a>, + _phantom: PhantomData<&'a T>, +} + +impl<'a, T, const OFFSET: usize> Iterator for CListIter<'a, T, OFFSET> { + type Item = &'a T; + + #[inline] + fn next(&mut self) -> Option { + let head = self.head_iter.next()?; + + // Convert to item using `OFFSET`. + // + // SAFETY: The pointer calculation is valid because `OFFSET` is derived + // from `offset_of!` per type invariants. + Some(unsafe { &*head.as_raw().byte_sub(OFFSET).cast::() }) + } +} + +impl<'a, T, const OFFSET: usize> FusedIterator for CListIter<'a, T, OFFSET> {} + +/// Create a C doubly-circular linked list interface [`CList`] from a raw `list_head` pointer. +/// +/// This macro creates a `CList` that can iterate over items of type `$rust_type` +/// linked via the `$field` field in the underlying C struct `$c_type`. +/// +/// # Arguments +/// +/// - `$head`: Raw pointer to the sentinel `list_head` object (`*mut bindings::list_head`). +/// - `$rust_type`: Each item's Rust wrapper type. +/// - `$c_type`: Each item's C struct type that contains the embedded `list_head`. +/// - `$field`: The name of the `list_head` field within the C struct. +/// +/// # Safety +/// +/// The caller must ensure: +/// +/// - `$head` is a valid, initialized sentinel `list_head` (e.g. via `INIT_LIST_HEAD()`) +/// pointing to a list that is not concurrently modified for the lifetime of the [`CList`]. +/// - The list contains items of type `$c_type` linked via an embedded `$field`. +/// - `$rust_type` is `#[repr(transparent)]` over `$c_type` or has compatible layout. +/// +/// # Examples +/// +/// Refer to the examples in the [`crate::interop::list`] module documentation. +#[macro_export] +macro_rules! clist_create { + ($head:expr, $rust_type:ty, $c_type:ty, $($field:tt).+) => {{ + // Compile-time check that field path is a `list_head`. + let _: fn(*const $c_type) -> *const $crate::bindings::list_head = + |p| &raw const (*p).$($field).+; + + // Calculate offset and create `CList`. + const OFFSET: usize = ::core::mem::offset_of!($c_type, $($field).+); + $crate::interop::list::CList::<$rust_type, OFFSET>::from_raw($head) + }}; +} +pub use clist_create; diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index e5fba6bf6db0..fcc7678fd9e3 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -11,10 +11,14 @@ use crate::{ pub mod mem; pub mod poll; +pub mod register; pub mod resource; +pub use crate::register; pub use resource::Resource; +use register::LocatedRegister; + /// Physical address type. /// /// This is a type alias to either `u32` or `u64` depending on the config option @@ -137,177 +141,6 @@ impl MmioRaw { #[repr(transparent)] pub struct Mmio(MmioRaw); -/// Internal helper macros used to invoke C MMIO read functions. -/// -/// This macro is intended to be used by higher-level MMIO access macros (io_define_read) and -/// provides a unified expansion for infallible vs. fallible read semantics. It emits a direct call -/// into the corresponding C helper and performs the required cast to the Rust return type. -/// -/// # Parameters -/// -/// * `$c_fn` – The C function performing the MMIO read. -/// * `$self` – The I/O backend object. -/// * `$ty` – The type of the value to be read. -/// * `$addr` – The MMIO address to read. -/// -/// This macro does not perform any validation; all invariants must be upheld by the higher-level -/// abstraction invoking it. -macro_rules! call_mmio_read { - (infallible, $c_fn:ident, $self:ident, $type:ty, $addr:expr) => { - // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$c_fn($addr as *const c_void) as $type } - }; - - (fallible, $c_fn:ident, $self:ident, $type:ty, $addr:expr) => {{ - // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - Ok(unsafe { bindings::$c_fn($addr as *const c_void) as $type }) - }}; -} - -/// Internal helper macros used to invoke C MMIO write functions. -/// -/// This macro is intended to be used by higher-level MMIO access macros (io_define_write) and -/// provides a unified expansion for infallible vs. fallible write semantics. It emits a direct call -/// into the corresponding C helper and performs the required cast to the Rust return type. -/// -/// # Parameters -/// -/// * `$c_fn` – The C function performing the MMIO write. -/// * `$self` – The I/O backend object. -/// * `$ty` – The type of the written value. -/// * `$addr` – The MMIO address to write. -/// * `$value` – The value to write. -/// -/// This macro does not perform any validation; all invariants must be upheld by the higher-level -/// abstraction invoking it. -macro_rules! call_mmio_write { - (infallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr, $value:expr) => { - // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$c_fn($value, $addr as *mut c_void) } - }; - - (fallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr, $value:expr) => {{ - // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$c_fn($value, $addr as *mut c_void) }; - Ok(()) - }}; -} - -/// Generates an accessor method for reading from an I/O backend. -/// -/// This macro reduces boilerplate by automatically generating either compile-time bounds-checked -/// (infallible) or runtime bounds-checked (fallible) read methods. It abstracts the address -/// calculation and bounds checking, and delegates the actual I/O read operation to a specified -/// helper macro, making it generic over different I/O backends. -/// -/// # Parameters -/// -/// * `infallible` / `fallible` - Determines the bounds-checking strategy. `infallible` relies on -/// `IoKnownSize` for compile-time checks and returns the value directly. `fallible` performs -/// runtime checks against `maxsize()` and returns a `Result`. -/// * `$(#[$attr:meta])*` - Optional attributes to apply to the generated method (e.g., -/// `#[cfg(CONFIG_64BIT)]` or inline directives). -/// * `$vis:vis` - The visibility of the generated method (e.g., `pub`). -/// * `$name:ident` / `$try_name:ident` - The name of the generated method (e.g., `read32`, -/// `try_read8`). -/// * `$call_macro:ident` - The backend-specific helper macro used to emit the actual I/O call -/// (e.g., `call_mmio_read`). -/// * `$c_fn:ident` - The backend-specific C function or identifier to be passed into the -/// `$call_macro`. -/// * `$type_name:ty` - The Rust type of the value being read (e.g., `u8`, `u32`). -#[macro_export] -macro_rules! io_define_read { - (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) -> - $type_name:ty) => { - /// Read IO data from a given offset known at compile time. - /// - /// Bound checks are performed on compile time, hence if the offset is not known at compile - /// time, the build will fail. - $(#[$attr])* - // Always inline to optimize out error path of `io_addr_assert`. - #[inline(always)] - $vis fn $name(&self, offset: usize) -> $type_name { - let addr = self.io_addr_assert::<$type_name>(offset); - - // SAFETY: By the type invariant `addr` is a valid address for IO operations. - $call_macro!(infallible, $c_fn, self, $type_name, addr) - } - }; - - (fallible, $(#[$attr:meta])* $vis:vis $try_name:ident, $call_macro:ident($c_fn:ident) -> - $type_name:ty) => { - /// Read IO data from a given offset. - /// - /// Bound checks are performed on runtime, it fails if the offset (plus the type size) is - /// out of bounds. - $(#[$attr])* - $vis fn $try_name(&self, offset: usize) -> Result<$type_name> { - let addr = self.io_addr::<$type_name>(offset)?; - - // SAFETY: By the type invariant `addr` is a valid address for IO operations. - $call_macro!(fallible, $c_fn, self, $type_name, addr) - } - }; -} -pub use io_define_read; - -/// Generates an accessor method for writing to an I/O backend. -/// -/// This macro reduces boilerplate by automatically generating either compile-time bounds-checked -/// (infallible) or runtime bounds-checked (fallible) write methods. It abstracts the address -/// calculation and bounds checking, and delegates the actual I/O write operation to a specified -/// helper macro, making it generic over different I/O backends. -/// -/// # Parameters -/// -/// * `infallible` / `fallible` - Determines the bounds-checking strategy. `infallible` relies on -/// `IoKnownSize` for compile-time checks and returns `()`. `fallible` performs runtime checks -/// against `maxsize()` and returns a `Result`. -/// * `$(#[$attr:meta])*` - Optional attributes to apply to the generated method (e.g., -/// `#[cfg(CONFIG_64BIT)]` or inline directives). -/// * `$vis:vis` - The visibility of the generated method (e.g., `pub`). -/// * `$name:ident` / `$try_name:ident` - The name of the generated method (e.g., `write32`, -/// `try_write8`). -/// * `$call_macro:ident` - The backend-specific helper macro used to emit the actual I/O call -/// (e.g., `call_mmio_write`). -/// * `$c_fn:ident` - The backend-specific C function or identifier to be passed into the -/// `$call_macro`. -/// * `$type_name:ty` - The Rust type of the value being written (e.g., `u8`, `u32`). Note the use -/// of `<-` before the type to denote a write operation. -#[macro_export] -macro_rules! io_define_write { - (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) <- - $type_name:ty) => { - /// Write IO data from a given offset known at compile time. - /// - /// Bound checks are performed on compile time, hence if the offset is not known at compile - /// time, the build will fail. - $(#[$attr])* - // Always inline to optimize out error path of `io_addr_assert`. - #[inline(always)] - $vis fn $name(&self, value: $type_name, offset: usize) { - let addr = self.io_addr_assert::<$type_name>(offset); - - $call_macro!(infallible, $c_fn, self, $type_name, addr, value); - } - }; - - (fallible, $(#[$attr:meta])* $vis:vis $try_name:ident, $call_macro:ident($c_fn:ident) <- - $type_name:ty) => { - /// Write IO data from a given offset. - /// - /// Bound checks are performed on runtime, it fails if the offset (plus the type size) is - /// out of bounds. - $(#[$attr])* - $vis fn $try_name(&self, value: $type_name, offset: usize) -> Result { - let addr = self.io_addr::<$type_name>(offset)?; - - $call_macro!(fallible, $c_fn, self, $type_name, addr, value) - } - }; -} -pub use io_define_write; - /// Checks whether an access of type `U` at the given `offset` /// is valid within this region. #[inline] @@ -320,14 +153,74 @@ const fn offset_valid(offset: usize, size: usize) -> bool { } } -/// Marker trait indicating that an I/O backend supports operations of a certain type. +/// Trait indicating that an I/O backend supports operations of a certain type and providing an +/// implementation for these operations. /// /// Different I/O backends can implement this trait to expose only the operations they support. /// /// For example, a PCI configuration space may implement `IoCapable`, `IoCapable`, /// and `IoCapable`, but not `IoCapable`, while an MMIO region on a 64-bit /// system might implement all four. -pub trait IoCapable {} +pub trait IoCapable { + /// Performs an I/O read of type `T` at `address` and returns the result. + /// + /// # Safety + /// + /// The range `[address..address + size_of::()]` must be within the bounds of `Self`. + unsafe fn io_read(&self, address: usize) -> T; + + /// Performs an I/O write of `value` at `address`. + /// + /// # Safety + /// + /// The range `[address..address + size_of::()]` must be within the bounds of `Self`. + unsafe fn io_write(&self, value: T, address: usize); +} + +/// Describes a given I/O location: its offset, width, and type to convert the raw value from and +/// into. +/// +/// This trait is the key abstraction allowing [`Io::read`], [`Io::write`], and [`Io::update`] (and +/// their fallible [`try_read`](Io::try_read), [`try_write`](Io::try_write) and +/// [`try_update`](Io::try_update) counterparts) to work uniformly with both raw [`usize`] offsets +/// (for primitive types like [`u32`]) and typed ones (like those generated by the [`register!`] +/// macro). +/// +/// An `IoLoc` carries three pieces of information: +/// +/// - The offset to access (returned by [`IoLoc::offset`]), +/// - The width of the access (determined by [`IoLoc::IoType`]), +/// - The type `T` in which the raw data is returned or provided. +/// +/// `T` and `IoLoc::IoType` may differ: for instance, a typed register has `T` = the register type +/// with its bitfields, and `IoType` = its backing primitive (e.g. `u32`). +pub trait IoLoc { + /// Size ([`u8`], [`u16`], etc) of the I/O performed on the returned [`offset`](IoLoc::offset). + type IoType: Into + From; + + /// Consumes `self` and returns the offset of this location. + fn offset(self) -> usize; +} + +/// Implements [`IoLoc<$ty>`] for [`usize`], allowing [`usize`] to be used as a parameter of +/// [`Io::read`] and [`Io::write`]. +macro_rules! impl_usize_ioloc { + ($($ty:ty),*) => { + $( + impl IoLoc<$ty> for usize { + type IoType = $ty; + + #[inline(always)] + fn offset(self) -> usize { + self + } + } + )* + } +} + +// Provide the ability to read any primitive type from a [`usize`]. +impl_usize_ioloc!(u8, u16, u32, u64); /// Types implementing this trait (e.g. MMIO BARs or PCI config regions) /// can perform I/O operations on regions of memory. @@ -369,146 +262,445 @@ pub trait Io { /// Fallible 8-bit read with runtime bounds check. #[inline(always)] - fn try_read8(&self, _offset: usize) -> Result + fn try_read8(&self, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 8-bit read") + self.try_read(offset) } /// Fallible 16-bit read with runtime bounds check. #[inline(always)] - fn try_read16(&self, _offset: usize) -> Result + fn try_read16(&self, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 16-bit read") + self.try_read(offset) } /// Fallible 32-bit read with runtime bounds check. #[inline(always)] - fn try_read32(&self, _offset: usize) -> Result + fn try_read32(&self, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 32-bit read") + self.try_read(offset) } /// Fallible 64-bit read with runtime bounds check. #[inline(always)] - fn try_read64(&self, _offset: usize) -> Result + fn try_read64(&self, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 64-bit read") + self.try_read(offset) } /// Fallible 8-bit write with runtime bounds check. #[inline(always)] - fn try_write8(&self, _value: u8, _offset: usize) -> Result + fn try_write8(&self, value: u8, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 8-bit write") + self.try_write(offset, value) } /// Fallible 16-bit write with runtime bounds check. #[inline(always)] - fn try_write16(&self, _value: u16, _offset: usize) -> Result + fn try_write16(&self, value: u16, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 16-bit write") + self.try_write(offset, value) } /// Fallible 32-bit write with runtime bounds check. #[inline(always)] - fn try_write32(&self, _value: u32, _offset: usize) -> Result + fn try_write32(&self, value: u32, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 32-bit write") + self.try_write(offset, value) } /// Fallible 64-bit write with runtime bounds check. #[inline(always)] - fn try_write64(&self, _value: u64, _offset: usize) -> Result + fn try_write64(&self, value: u64, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 64-bit write") + self.try_write(offset, value) } /// Infallible 8-bit read with compile-time bounds check. #[inline(always)] - fn read8(&self, _offset: usize) -> u8 + fn read8(&self, offset: usize) -> u8 where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 8-bit read") + self.read(offset) } /// Infallible 16-bit read with compile-time bounds check. #[inline(always)] - fn read16(&self, _offset: usize) -> u16 + fn read16(&self, offset: usize) -> u16 where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 16-bit read") + self.read(offset) } /// Infallible 32-bit read with compile-time bounds check. #[inline(always)] - fn read32(&self, _offset: usize) -> u32 + fn read32(&self, offset: usize) -> u32 where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 32-bit read") + self.read(offset) } /// Infallible 64-bit read with compile-time bounds check. #[inline(always)] - fn read64(&self, _offset: usize) -> u64 + fn read64(&self, offset: usize) -> u64 where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 64-bit read") + self.read(offset) } /// Infallible 8-bit write with compile-time bounds check. #[inline(always)] - fn write8(&self, _value: u8, _offset: usize) + fn write8(&self, value: u8, offset: usize) where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 8-bit write") + self.write(offset, value) } /// Infallible 16-bit write with compile-time bounds check. #[inline(always)] - fn write16(&self, _value: u16, _offset: usize) + fn write16(&self, value: u16, offset: usize) where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 16-bit write") + self.write(offset, value) } /// Infallible 32-bit write with compile-time bounds check. #[inline(always)] - fn write32(&self, _value: u32, _offset: usize) + fn write32(&self, value: u32, offset: usize) where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 32-bit write") + self.write(offset, value) } /// Infallible 64-bit write with compile-time bounds check. #[inline(always)] - fn write64(&self, _value: u64, _offset: usize) + fn write64(&self, value: u64, offset: usize) where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 64-bit write") + self.write(offset, value) + } + + /// Generic fallible read with runtime bounds check. + /// + /// # Examples + /// + /// Read a primitive type from an I/O address: + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// }; + /// + /// fn do_reads(io: &Mmio) -> Result { + /// // 32-bit read from address `0x10`. + /// let v: u32 = io.try_read(0x10)?; + /// + /// // 8-bit read from address `0xfff`. + /// let v: u8 = io.try_read(0xfff)?; + /// + /// Ok(()) + /// } + /// ``` + #[inline(always)] + fn try_read(&self, location: L) -> Result + where + L: IoLoc, + Self: IoCapable, + { + let address = self.io_addr::(location.offset())?; + + // SAFETY: `address` has been validated by `io_addr`. + Ok(unsafe { self.io_read(address) }.into()) + } + + /// Generic fallible write with runtime bounds check. + /// + /// # Examples + /// + /// Write a primitive type to an I/O address: + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// }; + /// + /// fn do_writes(io: &Mmio) -> Result { + /// // 32-bit write of value `1` at address `0x10`. + /// io.try_write(0x10, 1u32)?; + /// + /// // 8-bit write of value `0xff` at address `0xfff`. + /// io.try_write(0xfff, 0xffu8)?; + /// + /// Ok(()) + /// } + /// ``` + #[inline(always)] + fn try_write(&self, location: L, value: T) -> Result + where + L: IoLoc, + Self: IoCapable, + { + let address = self.io_addr::(location.offset())?; + let io_value = value.into(); + + // SAFETY: `address` has been validated by `io_addr`. + unsafe { self.io_write(io_value, address) } + + Ok(()) + } + + /// Generic fallible write of a fully-located register value. + /// + /// # Examples + /// + /// Tuples carrying a location and a value can be used with this method: + /// + /// ```no_run + /// use kernel::io::{ + /// register, + /// Io, + /// Mmio, + /// }; + /// + /// register! { + /// VERSION(u32) @ 0x100 { + /// 15:8 major; + /// 7:0 minor; + /// } + /// } + /// + /// impl VERSION { + /// fn new(major: u8, minor: u8) -> Self { + /// VERSION::zeroed().with_major(major).with_minor(minor) + /// } + /// } + /// + /// fn do_write_reg(io: &Mmio) -> Result { + /// + /// io.try_write_reg(VERSION::new(1, 0)) + /// } + /// ``` + #[inline(always)] + fn try_write_reg(&self, value: V) -> Result + where + L: IoLoc, + V: LocatedRegister, + Self: IoCapable, + { + let (location, value) = value.into_io_op(); + + self.try_write(location, value) + } + + /// Generic fallible update with runtime bounds check. + /// + /// Note: this does not perform any synchronization. The caller is responsible for ensuring + /// exclusive access if required. + /// + /// # Examples + /// + /// Read the u32 value at address `0x10`, increment it, and store the updated value back: + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// }; + /// + /// fn do_update(io: &Mmio<0x1000>) -> Result { + /// io.try_update(0x10, |v: u32| { + /// v + 1 + /// }) + /// } + /// ``` + #[inline(always)] + fn try_update(&self, location: L, f: F) -> Result + where + L: IoLoc, + Self: IoCapable, + F: FnOnce(T) -> T, + { + let address = self.io_addr::(location.offset())?; + + // SAFETY: `address` has been validated by `io_addr`. + let value: T = unsafe { self.io_read(address) }.into(); + let io_value = f(value).into(); + + // SAFETY: `address` has been validated by `io_addr`. + unsafe { self.io_write(io_value, address) } + + Ok(()) + } + + /// Generic infallible read with compile-time bounds check. + /// + /// # Examples + /// + /// Read a primitive type from an I/O address: + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// }; + /// + /// fn do_reads(io: &Mmio<0x1000>) { + /// // 32-bit read from address `0x10`. + /// let v: u32 = io.read(0x10); + /// + /// // 8-bit read from the top of the I/O space. + /// let v: u8 = io.read(0xfff); + /// } + /// ``` + #[inline(always)] + fn read(&self, location: L) -> T + where + L: IoLoc, + Self: IoKnownSize + IoCapable, + { + let address = self.io_addr_assert::(location.offset()); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_read(address) }.into() + } + + /// Generic infallible write with compile-time bounds check. + /// + /// # Examples + /// + /// Write a primitive type to an I/O address: + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// }; + /// + /// fn do_writes(io: &Mmio<0x1000>) { + /// // 32-bit write of value `1` at address `0x10`. + /// io.write(0x10, 1u32); + /// + /// // 8-bit write of value `0xff` at the top of the I/O space. + /// io.write(0xfff, 0xffu8); + /// } + /// ``` + #[inline(always)] + fn write(&self, location: L, value: T) + where + L: IoLoc, + Self: IoKnownSize + IoCapable, + { + let address = self.io_addr_assert::(location.offset()); + let io_value = value.into(); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_write(io_value, address) } + } + + /// Generic infallible write of a fully-located register value. + /// + /// # Examples + /// + /// Tuples carrying a location and a value can be used with this method: + /// + /// ```no_run + /// use kernel::io::{ + /// register, + /// Io, + /// Mmio, + /// }; + /// + /// register! { + /// VERSION(u32) @ 0x100 { + /// 15:8 major; + /// 7:0 minor; + /// } + /// } + /// + /// impl VERSION { + /// fn new(major: u8, minor: u8) -> Self { + /// VERSION::zeroed().with_major(major).with_minor(minor) + /// } + /// } + /// + /// fn do_write_reg(io: &Mmio<0x1000>) { + /// io.write_reg(VERSION::new(1, 0)); + /// } + /// ``` + #[inline(always)] + fn write_reg(&self, value: V) + where + L: IoLoc, + V: LocatedRegister, + Self: IoKnownSize + IoCapable, + { + let (location, value) = value.into_io_op(); + + self.write(location, value) + } + + /// Generic infallible update with compile-time bounds check. + /// + /// Note: this does not perform any synchronization. The caller is responsible for ensuring + /// exclusive access if required. + /// + /// # Examples + /// + /// Read the u32 value at address `0x10`, increment it, and store the updated value back: + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// }; + /// + /// fn do_update(io: &Mmio<0x1000>) { + /// io.update(0x10, |v: u32| { + /// v + 1 + /// }) + /// } + /// ``` + #[inline(always)] + fn update(&self, location: L, f: F) + where + L: IoLoc, + Self: IoKnownSize + IoCapable + Sized, + F: FnOnce(T) -> T, + { + let address = self.io_addr_assert::(location.offset()); + + // SAFETY: `address` has been validated by `io_addr_assert`. + let value: T = unsafe { self.io_read(address) }.into(); + let io_value = f(value).into(); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_write(io_value, address) } } } @@ -534,14 +726,36 @@ pub trait IoKnownSize: Io { } } -// MMIO regions support 8, 16, and 32-bit accesses. -impl IoCapable for Mmio {} -impl IoCapable for Mmio {} -impl IoCapable for Mmio {} +/// Implements [`IoCapable`] on `$mmio` for `$ty` using `$read_fn` and `$write_fn`. +macro_rules! impl_mmio_io_capable { + ($mmio:ident, $(#[$attr:meta])* $ty:ty, $read_fn:ident, $write_fn:ident) => { + $(#[$attr])* + impl IoCapable<$ty> for $mmio { + unsafe fn io_read(&self, address: usize) -> $ty { + // SAFETY: By the trait invariant `address` is a valid address for MMIO operations. + unsafe { bindings::$read_fn(address as *const c_void) } + } + unsafe fn io_write(&self, value: $ty, address: usize) { + // SAFETY: By the trait invariant `address` is a valid address for MMIO operations. + unsafe { bindings::$write_fn(value, address as *mut c_void) } + } + } + }; +} + +// MMIO regions support 8, 16, and 32-bit accesses. +impl_mmio_io_capable!(Mmio, u8, readb, writeb); +impl_mmio_io_capable!(Mmio, u16, readw, writew); +impl_mmio_io_capable!(Mmio, u32, readl, writel); // MMIO regions on 64-bit systems also support 64-bit accesses. -#[cfg(CONFIG_64BIT)] -impl IoCapable for Mmio {} +impl_mmio_io_capable!( + Mmio, + #[cfg(CONFIG_64BIT)] + u64, + readq, + writeq +); impl Io for Mmio { /// Returns the base address of this mapping. @@ -555,46 +769,6 @@ impl Io for Mmio { fn maxsize(&self) -> usize { self.0.maxsize() } - - io_define_read!(fallible, try_read8, call_mmio_read(readb) -> u8); - io_define_read!(fallible, try_read16, call_mmio_read(readw) -> u16); - io_define_read!(fallible, try_read32, call_mmio_read(readl) -> u32); - io_define_read!( - fallible, - #[cfg(CONFIG_64BIT)] - try_read64, - call_mmio_read(readq) -> u64 - ); - - io_define_write!(fallible, try_write8, call_mmio_write(writeb) <- u8); - io_define_write!(fallible, try_write16, call_mmio_write(writew) <- u16); - io_define_write!(fallible, try_write32, call_mmio_write(writel) <- u32); - io_define_write!( - fallible, - #[cfg(CONFIG_64BIT)] - try_write64, - call_mmio_write(writeq) <- u64 - ); - - io_define_read!(infallible, read8, call_mmio_read(readb) -> u8); - io_define_read!(infallible, read16, call_mmio_read(readw) -> u16); - io_define_read!(infallible, read32, call_mmio_read(readl) -> u32); - io_define_read!( - infallible, - #[cfg(CONFIG_64BIT)] - read64, - call_mmio_read(readq) -> u64 - ); - - io_define_write!(infallible, write8, call_mmio_write(writeb) <- u8); - io_define_write!(infallible, write16, call_mmio_write(writew) <- u16); - io_define_write!(infallible, write32, call_mmio_write(writel) <- u32); - io_define_write!( - infallible, - #[cfg(CONFIG_64BIT)] - write64, - call_mmio_write(writeq) <- u64 - ); } impl IoKnownSize for Mmio { @@ -612,44 +786,70 @@ impl Mmio { // SAFETY: `Mmio` is a transparent wrapper around `MmioRaw`. unsafe { &*core::ptr::from_ref(raw).cast() } } - - io_define_read!(infallible, pub read8_relaxed, call_mmio_read(readb_relaxed) -> u8); - io_define_read!(infallible, pub read16_relaxed, call_mmio_read(readw_relaxed) -> u16); - io_define_read!(infallible, pub read32_relaxed, call_mmio_read(readl_relaxed) -> u32); - io_define_read!( - infallible, - #[cfg(CONFIG_64BIT)] - pub read64_relaxed, - call_mmio_read(readq_relaxed) -> u64 - ); - - io_define_read!(fallible, pub try_read8_relaxed, call_mmio_read(readb_relaxed) -> u8); - io_define_read!(fallible, pub try_read16_relaxed, call_mmio_read(readw_relaxed) -> u16); - io_define_read!(fallible, pub try_read32_relaxed, call_mmio_read(readl_relaxed) -> u32); - io_define_read!( - fallible, - #[cfg(CONFIG_64BIT)] - pub try_read64_relaxed, - call_mmio_read(readq_relaxed) -> u64 - ); - - io_define_write!(infallible, pub write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); - io_define_write!(infallible, pub write16_relaxed, call_mmio_write(writew_relaxed) <- u16); - io_define_write!(infallible, pub write32_relaxed, call_mmio_write(writel_relaxed) <- u32); - io_define_write!( - infallible, - #[cfg(CONFIG_64BIT)] - pub write64_relaxed, - call_mmio_write(writeq_relaxed) <- u64 - ); - - io_define_write!(fallible, pub try_write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); - io_define_write!(fallible, pub try_write16_relaxed, call_mmio_write(writew_relaxed) <- u16); - io_define_write!(fallible, pub try_write32_relaxed, call_mmio_write(writel_relaxed) <- u32); - io_define_write!( - fallible, - #[cfg(CONFIG_64BIT)] - pub try_write64_relaxed, - call_mmio_write(writeq_relaxed) <- u64 - ); } + +/// [`Mmio`] wrapper using relaxed accessors. +/// +/// This type provides an implementation of [`Io`] that uses relaxed I/O MMIO operands instead of +/// the regular ones. +/// +/// See [`Mmio::relaxed`] for a usage example. +#[repr(transparent)] +pub struct RelaxedMmio(Mmio); + +impl Io for RelaxedMmio { + #[inline] + fn addr(&self) -> usize { + self.0.addr() + } + + #[inline] + fn maxsize(&self) -> usize { + self.0.maxsize() + } +} + +impl IoKnownSize for RelaxedMmio { + const MIN_SIZE: usize = SIZE; +} + +impl Mmio { + /// Returns a [`RelaxedMmio`] reference that performs relaxed I/O operations. + /// + /// Relaxed accessors do not provide ordering guarantees with respect to DMA or memory accesses + /// and can be used when such ordering is not required. + /// + /// # Examples + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// RelaxedMmio, + /// }; + /// + /// fn do_io(io: &Mmio<0x100>) { + /// // The access is performed using `readl_relaxed` instead of `readl`. + /// let v = io.relaxed().read32(0x10); + /// } + /// + /// ``` + pub fn relaxed(&self) -> &RelaxedMmio { + // SAFETY: `RelaxedMmio` is `#[repr(transparent)]` over `Mmio`, so `Mmio` and + // `RelaxedMmio` have identical layout. + unsafe { core::mem::transmute(self) } + } +} + +// MMIO regions support 8, 16, and 32-bit accesses. +impl_mmio_io_capable!(RelaxedMmio, u8, readb_relaxed, writeb_relaxed); +impl_mmio_io_capable!(RelaxedMmio, u16, readw_relaxed, writew_relaxed); +impl_mmio_io_capable!(RelaxedMmio, u32, readl_relaxed, writel_relaxed); +// MMIO regions on 64-bit systems also support 64-bit accesses. +impl_mmio_io_capable!( + RelaxedMmio, + #[cfg(CONFIG_64BIT)] + u64, + readq_relaxed, + writeq_relaxed +); diff --git a/rust/kernel/io/mem.rs b/rust/kernel/io/mem.rs index 620022cff401..7dc78d547f7a 100644 --- a/rust/kernel/io/mem.rs +++ b/rust/kernel/io/mem.rs @@ -54,6 +54,7 @@ impl<'a> IoRequest<'a> { /// use kernel::{ /// bindings, /// device::Core, + /// io::Io, /// of, /// platform, /// }; @@ -78,9 +79,9 @@ impl<'a> IoRequest<'a> { /// let io = iomem.access(pdev.as_ref())?; /// /// // Read and write a 32-bit value at `offset`. - /// let data = io.read32_relaxed(offset); + /// let data = io.read32(offset); /// - /// io.write32_relaxed(data, offset); + /// io.write32(data, offset); /// /// # Ok(SampleDriver) /// } @@ -117,6 +118,7 @@ impl<'a> IoRequest<'a> { /// use kernel::{ /// bindings, /// device::Core, + /// io::Io, /// of, /// platform, /// }; @@ -141,9 +143,9 @@ impl<'a> IoRequest<'a> { /// /// let io = iomem.access(pdev.as_ref())?; /// - /// let data = io.try_read32_relaxed(offset)?; + /// let data = io.try_read32(offset)?; /// - /// io.try_write32_relaxed(data, offset)?; + /// io.try_write32(data, offset)?; /// /// # Ok(SampleDriver) /// } diff --git a/rust/kernel/io/register.rs b/rust/kernel/io/register.rs new file mode 100644 index 000000000000..abc49926abfe --- /dev/null +++ b/rust/kernel/io/register.rs @@ -0,0 +1,1260 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Macro to define register layout and accessors. +//! +//! The [`register!`](kernel::io::register!) macro provides an intuitive and readable syntax for +//! defining a dedicated type for each register and accessing it using [`Io`](super::Io). Each such +//! type comes with its own field accessors that can return an error if a field's value is invalid. +//! +//! Note: most of the items in this module are public so they can be referenced by the macro, but +//! most are not to be used directly by users. Outside of the `register!` macro itself, the only +//! items you might want to import from this module are [`WithBase`] and [`Array`]. +//! +//! # Simple example +//! +//! ```no_run +//! use kernel::io::register; +//! +//! register! { +//! /// Basic information about the chip. +//! pub BOOT_0(u32) @ 0x00000100 { +//! /// Vendor ID. +//! 15:8 vendor_id; +//! /// Major revision of the chip. +//! 7:4 major_revision; +//! /// Minor revision of the chip. +//! 3:0 minor_revision; +//! } +//! } +//! ``` +//! +//! This defines a 32-bit `BOOT_0` type which can be read from or written to offset `0x100` of an +//! `Io` region, with the described bitfields. For instance, `minor_revision` consists of the 4 +//! least significant bits of the type. +//! +//! Fields are instances of [`Bounded`](kernel::num::Bounded) and can be read by calling their +//! getter method, which is named after them. They also have setter methods prefixed with `with_` +//! for runtime values and `with_const_` for constant values. All setters return the updated +//! register value. +//! +//! Fields can also be transparently converted from/to an arbitrary type by using the `=>` and +//! `?=>` syntaxes. +//! +//! If present, doc comments above register or fields definitions are added to the relevant item +//! they document (the register type itself, or the field's setter and getter methods). +//! +//! Note that multiple registers can be defined in a single `register!` invocation. This can be +//! useful to group related registers together. +//! +//! Here is how the register defined above can be used in code: +//! +//! +//! ```no_run +//! use kernel::{ +//! io::{ +//! register, +//! Io, +//! IoLoc, +//! }, +//! num::Bounded, +//! }; +//! # use kernel::io::Mmio; +//! # register! { +//! # pub BOOT_0(u32) @ 0x00000100 { +//! # 15:8 vendor_id; +//! # 7:4 major_revision; +//! # 3:0 minor_revision; +//! # } +//! # } +//! # fn test(io: &Mmio<0x1000>) { +//! # fn obtain_vendor_id() -> u8 { 0xff } +//! +//! // Read from the register's defined offset (0x100). +//! let boot0 = io.read(BOOT_0); +//! pr_info!("chip revision: {}.{}", boot0.major_revision().get(), boot0.minor_revision().get()); +//! +//! // Update some fields and write the new value back. +//! let new_boot0 = boot0 +//! // Constant values. +//! .with_const_major_revision::<3>() +//! .with_const_minor_revision::<10>() +//! // Runtime value. +//! .with_vendor_id(obtain_vendor_id()); +//! io.write_reg(new_boot0); +//! +//! // Or, build a new value from zero and write it: +//! io.write_reg(BOOT_0::zeroed() +//! .with_const_major_revision::<3>() +//! .with_const_minor_revision::<10>() +//! .with_vendor_id(obtain_vendor_id()) +//! ); +//! +//! // Or, read and update the register in a single step. +//! io.update(BOOT_0, |r| r +//! .with_const_major_revision::<3>() +//! .with_const_minor_revision::<10>() +//! .with_vendor_id(obtain_vendor_id()) +//! ); +//! +//! // Constant values can also be built using the const setters. +//! const V: BOOT_0 = pin_init::zeroed::() +//! .with_const_major_revision::<3>() +//! .with_const_minor_revision::<10>(); +//! # } +//! ``` +//! +//! For more extensive documentation about how to define registers, see the +//! [`register!`](kernel::io::register!) macro. + +use core::marker::PhantomData; + +use crate::io::IoLoc; + +use kernel::build_assert; + +/// Trait implemented by all registers. +pub trait Register: Sized { + /// Backing primitive type of the register. + type Storage: Into + From; + + /// Start offset of the register. + /// + /// The interpretation of this offset depends on the type of the register. + const OFFSET: usize; +} + +/// Trait implemented by registers with a fixed offset. +pub trait FixedRegister: Register {} + +/// Allows `()` to be used as the `location` parameter of [`Io::write`](super::Io::write) when +/// passing a [`FixedRegister`] value. +impl IoLoc for () +where + T: FixedRegister, +{ + type IoType = T::Storage; + + #[inline(always)] + fn offset(self) -> usize { + T::OFFSET + } +} + +/// A [`FixedRegister`] carries its location in its type. Thus `FixedRegister` values can be used +/// as an [`IoLoc`]. +impl IoLoc for T +where + T: FixedRegister, +{ + type IoType = T::Storage; + + #[inline(always)] + fn offset(self) -> usize { + T::OFFSET + } +} + +/// Location of a fixed register. +pub struct FixedRegisterLoc(PhantomData); + +impl FixedRegisterLoc { + /// Returns the location of `T`. + #[inline(always)] + // We do not implement `Default` so we can be const. + #[expect(clippy::new_without_default)] + pub const fn new() -> Self { + Self(PhantomData) + } +} + +impl IoLoc for FixedRegisterLoc +where + T: FixedRegister, +{ + type IoType = T::Storage; + + #[inline(always)] + fn offset(self) -> usize { + T::OFFSET + } +} + +/// Trait providing a base address to be added to the offset of a relative register to obtain +/// its actual offset. +/// +/// The `T` generic argument is used to distinguish which base to use, in case a type provides +/// several bases. It is given to the `register!` macro to restrict the use of the register to +/// implementors of this particular variant. +pub trait RegisterBase { + /// Base address to which register offsets are added. + const BASE: usize; +} + +/// Trait implemented by all registers that are relative to a base. +pub trait WithBase { + /// Family of bases applicable to this register. + type BaseFamily; + + /// Returns the absolute location of this type when using `B` as its base. + #[inline(always)] + fn of>() -> RelativeRegisterLoc + where + Self: Register, + { + RelativeRegisterLoc::new() + } +} + +/// Trait implemented by relative registers. +pub trait RelativeRegister: Register + WithBase {} + +/// Location of a relative register. +/// +/// This can either be an immediately accessible regular [`RelativeRegister`], or a +/// [`RelativeRegisterArray`] that needs one additional resolution through +/// [`RelativeRegisterLoc::at`]. +pub struct RelativeRegisterLoc(PhantomData, PhantomData); + +impl RelativeRegisterLoc +where + T: Register + WithBase, + B: RegisterBase + ?Sized, +{ + /// Returns the location of a relative register or register array. + #[inline(always)] + // We do not implement `Default` so we can be const. + #[expect(clippy::new_without_default)] + pub const fn new() -> Self { + Self(PhantomData, PhantomData) + } + + // Returns the absolute offset of the relative register using base `B`. + // + // This is implemented as a private const method so it can be reused by the [`IoLoc`] + // implementations of both [`RelativeRegisterLoc`] and [`RelativeRegisterArrayLoc`]. + #[inline] + const fn offset(self) -> usize { + B::BASE + T::OFFSET + } +} + +impl IoLoc for RelativeRegisterLoc +where + T: RelativeRegister, + B: RegisterBase + ?Sized, +{ + type IoType = T::Storage; + + #[inline(always)] + fn offset(self) -> usize { + RelativeRegisterLoc::offset(self) + } +} + +/// Trait implemented by arrays of registers. +pub trait RegisterArray: Register { + /// Number of elements in the registers array. + const SIZE: usize; + /// Number of bytes between the start of elements in the registers array. + const STRIDE: usize; +} + +/// Location of an array register. +pub struct RegisterArrayLoc(usize, PhantomData); + +impl RegisterArrayLoc { + /// Returns the location of register `T` at position `idx`, with build-time validation. + #[inline(always)] + pub fn new(idx: usize) -> Self { + build_assert!(idx < T::SIZE); + + Self(idx, PhantomData) + } + + /// Attempts to return the location of register `T` at position `idx`, with runtime validation. + #[inline(always)] + pub fn try_new(idx: usize) -> Option { + if idx < T::SIZE { + Some(Self(idx, PhantomData)) + } else { + None + } + } +} + +impl IoLoc for RegisterArrayLoc +where + T: RegisterArray, +{ + type IoType = T::Storage; + + #[inline(always)] + fn offset(self) -> usize { + T::OFFSET + self.0 * T::STRIDE + } +} + +/// Trait providing location builders for [`RegisterArray`]s. +pub trait Array { + /// Returns the location of the register at position `idx`, with build-time validation. + #[inline(always)] + fn at(idx: usize) -> RegisterArrayLoc + where + Self: RegisterArray, + { + RegisterArrayLoc::new(idx) + } + + /// Returns the location of the register at position `idx`, with runtime validation. + #[inline(always)] + fn try_at(idx: usize) -> Option> + where + Self: RegisterArray, + { + RegisterArrayLoc::try_new(idx) + } +} + +/// Trait implemented by arrays of relative registers. +pub trait RelativeRegisterArray: RegisterArray + WithBase {} + +/// Location of a relative array register. +pub struct RelativeRegisterArrayLoc< + T: RelativeRegisterArray, + B: RegisterBase + ?Sized, +>(RelativeRegisterLoc, usize); + +impl RelativeRegisterArrayLoc +where + T: RelativeRegisterArray, + B: RegisterBase + ?Sized, +{ + /// Returns the location of register `T` from the base `B` at index `idx`, with build-time + /// validation. + #[inline(always)] + pub fn new(idx: usize) -> Self { + build_assert!(idx < T::SIZE); + + Self(RelativeRegisterLoc::new(), idx) + } + + /// Attempts to return the location of register `T` from the base `B` at index `idx`, with + /// runtime validation. + #[inline(always)] + pub fn try_new(idx: usize) -> Option { + if idx < T::SIZE { + Some(Self(RelativeRegisterLoc::new(), idx)) + } else { + None + } + } +} + +/// Methods exclusive to [`RelativeRegisterLoc`]s created with a [`RelativeRegisterArray`]. +impl RelativeRegisterLoc +where + T: RelativeRegisterArray, + B: RegisterBase + ?Sized, +{ + /// Returns the location of the register at position `idx`, with build-time validation. + #[inline(always)] + pub fn at(self, idx: usize) -> RelativeRegisterArrayLoc { + RelativeRegisterArrayLoc::new(idx) + } + + /// Returns the location of the register at position `idx`, with runtime validation. + #[inline(always)] + pub fn try_at(self, idx: usize) -> Option> { + RelativeRegisterArrayLoc::try_new(idx) + } +} + +impl IoLoc for RelativeRegisterArrayLoc +where + T: RelativeRegisterArray, + B: RegisterBase + ?Sized, +{ + type IoType = T::Storage; + + #[inline(always)] + fn offset(self) -> usize { + self.0.offset() + self.1 * T::STRIDE + } +} + +/// Trait implemented by items that contain both a register value and the absolute I/O location at +/// which to write it. +/// +/// Implementors can be used with [`Io::write_reg`](super::Io::write_reg). +pub trait LocatedRegister { + /// Register value to write. + type Value: Register; + /// Full location information at which to write the value. + type Location: IoLoc; + + /// Consumes `self` and returns a `(location, value)` tuple describing a valid I/O write + /// operation. + fn into_io_op(self) -> (Self::Location, Self::Value); +} + +impl LocatedRegister for T +where + T: FixedRegister, +{ + type Location = FixedRegisterLoc; + type Value = T; + + #[inline(always)] + fn into_io_op(self) -> (FixedRegisterLoc, T) { + (FixedRegisterLoc::new(), self) + } +} + +/// Defines a dedicated type for a register, including getter and setter methods for its fields and +/// methods to read and write it from an [`Io`](kernel::io::Io) region. +/// +/// This documentation focuses on how to declare registers. See the [module-level +/// documentation](mod@kernel::io::register) for examples of how to access them. +/// +/// There are 4 possible kinds of registers: fixed offset registers, relative registers, arrays of +/// registers, and relative arrays of registers. +/// +/// ## Fixed offset registers +/// +/// These are the simplest kind of registers. Their location is simply an offset inside the I/O +/// region. For instance: +/// +/// ```ignore +/// register! { +/// pub FIXED_REG(u16) @ 0x80 { +/// ... +/// } +/// } +/// ``` +/// +/// This creates a 16-bit register named `FIXED_REG` located at offset `0x80` of an I/O region. +/// +/// These registers' location can be built simply by referencing their name: +/// +/// ```no_run +/// use kernel::{ +/// io::{ +/// register, +/// Io, +/// }, +/// }; +/// # use kernel::io::Mmio; +/// +/// register! { +/// FIXED_REG(u32) @ 0x100 { +/// 16:8 high_byte; +/// 7:0 low_byte; +/// } +/// } +/// +/// # fn test(io: &Mmio<0x1000>) { +/// let val = io.read(FIXED_REG); +/// +/// // Write from an already-existing value. +/// io.write(FIXED_REG, val.with_low_byte(0xff)); +/// +/// // Create a register value from scratch. +/// let val2 = FIXED_REG::zeroed().with_high_byte(0x80); +/// +/// // The location of fixed offset registers is already contained in their type. Thus, the +/// // `location` argument of `Io::write` is technically redundant and can be replaced by `()`. +/// io.write((), val2); +/// +/// // Or, the single-argument `Io::write_reg` can be used. +/// io.write_reg(val2); +/// # } +/// +/// ``` +/// +/// It is possible to create an alias of an existing register with new field definitions by using +/// the `=> ALIAS` syntax. This is useful for cases where a register's interpretation depends on +/// the context: +/// +/// ```no_run +/// use kernel::io::register; +/// +/// register! { +/// /// Scratch register. +/// pub SCRATCH(u32) @ 0x00000200 { +/// 31:0 value; +/// } +/// +/// /// Boot status of the firmware. +/// pub SCRATCH_BOOT_STATUS(u32) => SCRATCH { +/// 0:0 completed; +/// } +/// } +/// ``` +/// +/// In this example, `SCRATCH_BOOT_STATUS` uses the same I/O address as `SCRATCH`, while providing +/// its own `completed` field. +/// +/// ## Relative registers +/// +/// Relative registers can be instantiated several times at a relative offset of a group of bases. +/// For instance, imagine the following I/O space: +/// +/// ```text +/// +-----------------------------+ +/// | ... | +/// | | +/// 0x100--->+------------CPU0-------------+ +/// | | +/// 0x110--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// | | +/// | | +/// 0x200--->+------------CPU1-------------+ +/// | | +/// 0x210--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// +-----------------------------+ +/// ``` +/// +/// `CPU0` and `CPU1` both have a `CPU_CTL` register that starts at offset `0x10` of their I/O +/// space segment. Since both instances of `CPU_CTL` share the same layout, we don't want to define +/// them twice and would prefer a way to select which one to use from a single definition. +/// +/// This can be done using the `Base + Offset` syntax when specifying the register's address: +/// +/// ```ignore +/// register! { +/// pub RELATIVE_REG(u32) @ Base + 0x80 { +/// ... +/// } +/// } +/// ``` +/// +/// This creates a register with an offset of `0x80` from a given base. +/// +/// `Base` is an arbitrary type (typically a ZST) to be used as a generic parameter of the +/// [`RegisterBase`] trait to provide the base as a constant, i.e. each type providing a base for +/// this register needs to implement `RegisterBase`. +/// +/// The location of relative registers can be built using the [`WithBase::of`] method to specify +/// its base. All relative registers implement [`WithBase`]. +/// +/// Here is the above layout translated into code: +/// +/// ```no_run +/// use kernel::{ +/// io::{ +/// register, +/// register::{ +/// RegisterBase, +/// WithBase, +/// }, +/// Io, +/// }, +/// }; +/// # use kernel::io::Mmio; +/// +/// // Type used to identify the base. +/// pub struct CpuCtlBase; +/// +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// +/// // This makes `CPU_CTL` accessible from all implementors of `RegisterBase`. +/// register! { +/// /// CPU core control. +/// pub CPU_CTL(u32) @ CpuCtlBase + 0x10 { +/// 0:0 start; +/// } +/// } +/// +/// # fn test(io: Mmio<0x1000>) { +/// // Read the status of `Cpu0`. +/// let cpu0_started = io.read(CPU_CTL::of::()); +/// +/// // Stop `Cpu0`. +/// io.write(WithBase::of::(), CPU_CTL::zeroed()); +/// # } +/// +/// // Aliases can also be defined for relative register. +/// register! { +/// /// Alias to CPU core control. +/// pub CPU_CTL_ALIAS(u32) => CpuCtlBase + CPU_CTL { +/// /// Start the aliased CPU core. +/// 1:1 alias_start; +/// } +/// } +/// +/// # fn test2(io: Mmio<0x1000>) { +/// // Start the aliased `CPU0`, leaving its other fields untouched. +/// io.update(CPU_CTL_ALIAS::of::(), |r| r.with_alias_start(true)); +/// # } +/// ``` +/// +/// ## Arrays of registers +/// +/// Some I/O areas contain consecutive registers that share the same field layout. These areas can +/// be defined as an array of identical registers, allowing them to be accessed by index with +/// compile-time or runtime bound checking: +/// +/// ```ignore +/// register! { +/// pub REGISTER_ARRAY(u8)[10, stride = 4] @ 0x100 { +/// ... +/// } +/// } +/// ``` +/// +/// This defines `REGISTER_ARRAY`, an array of 10 byte registers starting at offset `0x100`. Each +/// register is separated from its neighbor by 4 bytes. +/// +/// The `stride` parameter is optional; if unspecified, the registers are placed consecutively from +/// each other. +/// +/// A location for a register in a register array is built using the [`Array::at`] trait method. +/// All arrays of registers implement [`Array`]. +/// +/// ```no_run +/// use kernel::{ +/// io::{ +/// register, +/// register::Array, +/// Io, +/// }, +/// }; +/// # use kernel::io::Mmio; +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// +/// // Array of 64 consecutive registers with the same layout starting at offset `0x80`. +/// register! { +/// /// Scratch registers. +/// pub SCRATCH(u32)[64] @ 0x00000080 { +/// 31:0 value; +/// } +/// } +/// +/// # fn test(io: &Mmio<0x1000>) +/// # -> Result<(), Error>{ +/// // Read scratch register 0, i.e. I/O address `0x80`. +/// let scratch_0 = io.read(SCRATCH::at(0)).value(); +/// +/// // Write scratch register 15, i.e. I/O address `0x80 + (15 * 4)`. +/// io.write(Array::at(15), SCRATCH::from(0xffeeaabb)); +/// +/// // This is out of bounds and won't build. +/// // let scratch_128 = io.read(SCRATCH::at(128)).value(); +/// +/// // Runtime-obtained array index. +/// let idx = get_scratch_idx(); +/// // Access on a runtime index returns an error if it is out-of-bounds. +/// let some_scratch = io.read(SCRATCH::try_at(idx).ok_or(EINVAL)?).value(); +/// +/// // Alias to a specific register in an array. +/// // Here `SCRATCH[8]` is used to convey the firmware exit code. +/// register! { +/// /// Firmware exit status code. +/// pub FIRMWARE_STATUS(u32) => SCRATCH[8] { +/// 7:0 status; +/// } +/// } +/// +/// let status = io.read(FIRMWARE_STATUS).status(); +/// +/// // Non-contiguous register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array is separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register! { +/// /// Scratch registers bank 0. +/// pub SCRATCH_INTERLEAVED_0(u32)[16, stride = 8] @ 0x000000c0 { +/// 31:0 value; +/// } +/// +/// /// Scratch registers bank 1. +/// pub SCRATCH_INTERLEAVED_1(u32)[16, stride = 8] @ 0x000000c4 { +/// 31:0 value; +/// } +/// } +/// # Ok(()) +/// # } +/// ``` +/// +/// ## Relative arrays of registers +/// +/// Combining the two features described in the sections above, arrays of registers accessible from +/// a base can also be defined: +/// +/// ```ignore +/// register! { +/// pub RELATIVE_REGISTER_ARRAY(u8)[10, stride = 4] @ Base + 0x100 { +/// ... +/// } +/// } +/// ``` +/// +/// Like relative registers, they implement the [`WithBase`] trait. However the return value of +/// [`WithBase::of`] cannot be used directly as a location and must be further specified using the +/// [`at`](RelativeRegisterLoc::at) method. +/// +/// ```no_run +/// use kernel::{ +/// io::{ +/// register, +/// register::{ +/// RegisterBase, +/// WithBase, +/// }, +/// Io, +/// }, +/// }; +/// # use kernel::io::Mmio; +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// +/// // Type used as parameter of `RegisterBase` to specify the base. +/// pub struct CpuCtlBase; +/// +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// +/// // 64 per-cpu scratch registers, arranged as a contiguous array. +/// register! { +/// /// Per-CPU scratch registers. +/// pub CPU_SCRATCH(u32)[64] @ CpuCtlBase + 0x00000080 { +/// 31:0 value; +/// } +/// } +/// +/// # fn test(io: &Mmio<0x1000>) -> Result<(), Error> { +/// // Read scratch register 0 of CPU0. +/// let scratch = io.read(CPU_SCRATCH::of::().at(0)); +/// +/// // Write the retrieved value into scratch register 15 of CPU1. +/// io.write(WithBase::of::().at(15), scratch); +/// +/// // This won't build. +/// // let cpu0_scratch_128 = io.read(CPU_SCRATCH::of::().at(128)).value(); +/// +/// // Runtime-obtained array index. +/// let scratch_idx = get_scratch_idx(); +/// // Access on a runtime index returns an error if it is out-of-bounds. +/// let cpu0_scratch = io.read( +/// CPU_SCRATCH::of::().try_at(scratch_idx).ok_or(EINVAL)? +/// ).value(); +/// # Ok(()) +/// # } +/// +/// // Alias to `SCRATCH[8]` used to convey the firmware exit code. +/// register! { +/// /// Per-CPU firmware exit status code. +/// pub CPU_FIRMWARE_STATUS(u32) => CpuCtlBase + CPU_SCRATCH[8] { +/// 7:0 status; +/// } +/// } +/// +/// // Non-contiguous relative register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array is separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register! { +/// /// Scratch registers bank 0. +/// pub CPU_SCRATCH_INTERLEAVED_0(u32)[16, stride = 8] @ CpuCtlBase + 0x00000d00 { +/// 31:0 value; +/// } +/// +/// /// Scratch registers bank 1. +/// pub CPU_SCRATCH_INTERLEAVED_1(u32)[16, stride = 8] @ CpuCtlBase + 0x00000d04 { +/// 31:0 value; +/// } +/// } +/// +/// # fn test2(io: &Mmio<0x1000>) -> Result<(), Error> { +/// let cpu0_status = io.read(CPU_FIRMWARE_STATUS::of::()).status(); +/// # Ok(()) +/// # } +/// ``` +#[macro_export] +macro_rules! register { + // Entry point for the macro, allowing multiple registers to be defined in one call. + // It matches all possible register declaration patterns to dispatch them to corresponding + // `@reg` rule that defines a single register. + ( + $( + $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) + $([ $size:expr $(, stride = $stride:expr)? ])? + $(@ $($base:ident +)? $offset:literal)? + $(=> $alias:ident $(+ $alias_offset:ident)? $([$alias_idx:expr])? )? + { $($fields:tt)* } + )* + ) => { + $( + $crate::register!( + @reg $(#[$attr])* $vis $name ($storage) $([$size $(, stride = $stride)?])? + $(@ $($base +)? $offset)? + $(=> $alias $(+ $alias_offset)? $([$alias_idx])? )? + { $($fields)* } + ); + )* + }; + + // All the rules below are private helpers. + + // Creates a register at a fixed offset of the MMIO space. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) @ $offset:literal + { $($fields:tt)* } + ) => { + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!(@io_base $name($storage) @ $offset); + $crate::register!(@io_fixed $(#[$attr])* $vis $name($storage)); + }; + + // Creates an alias register of fixed offset register `alias` with its own fields. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) => $alias:ident + { $($fields:tt)* } + ) => { + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!( + @io_base $name($storage) @ + <$alias as $crate::io::register::Register>::OFFSET + ); + $crate::register!(@io_fixed $(#[$attr])* $vis $name($storage)); + }; + + // Creates a register at a relative offset from a base address provider. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) @ $base:ident + $offset:literal + { $($fields:tt)* } + ) => { + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!(@io_base $name($storage) @ $offset); + $crate::register!(@io_relative $vis $name($storage) @ $base); + }; + + // Creates an alias register of relative offset register `alias` with its own fields. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) => $base:ident + $alias:ident + { $($fields:tt)* } + ) => { + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!( + @io_base $name($storage) @ <$alias as $crate::io::register::Register>::OFFSET + ); + $crate::register!(@io_relative $vis $name($storage) @ $base); + }; + + // Creates an array of registers at a fixed offset of the MMIO space. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) + [ $size:expr, stride = $stride:expr ] @ $offset:literal { $($fields:tt)* } + ) => { + ::kernel::static_assert!(::core::mem::size_of::<$storage>() <= $stride); + + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!(@io_base $name($storage) @ $offset); + $crate::register!(@io_array $vis $name($storage) [ $size, stride = $stride ]); + }; + + // Shortcut for contiguous array of registers (stride == size of element). + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) [ $size:expr ] @ $offset:literal + { $($fields:tt)* } + ) => { + $crate::register!( + $(#[$attr])* $vis $name($storage) [ $size, stride = ::core::mem::size_of::<$storage>() ] + @ $offset { $($fields)* } + ); + }; + + // Creates an alias of register `idx` of array of registers `alias` with its own fields. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) => $alias:ident [ $idx:expr ] + { $($fields:tt)* } + ) => { + ::kernel::static_assert!($idx < <$alias as $crate::io::register::RegisterArray>::SIZE); + + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!( + @io_base $name($storage) @ + <$alias as $crate::io::register::Register>::OFFSET + + $idx * <$alias as $crate::io::register::RegisterArray>::STRIDE + ); + $crate::register!(@io_fixed $(#[$attr])* $vis $name($storage)); + }; + + // Creates an array of registers at a relative offset from a base address provider. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) + [ $size:expr, stride = $stride:expr ] + @ $base:ident + $offset:literal { $($fields:tt)* } + ) => { + ::kernel::static_assert!(::core::mem::size_of::<$storage>() <= $stride); + + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!(@io_base $name($storage) @ $offset); + $crate::register!( + @io_relative_array $vis $name($storage) [ $size, stride = $stride ] @ $base + $offset + ); + }; + + // Shortcut for contiguous array of relative registers (stride == size of element). + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) [ $size:expr ] + @ $base:ident + $offset:literal { $($fields:tt)* } + ) => { + $crate::register!( + $(#[$attr])* $vis $name($storage) [ $size, stride = ::core::mem::size_of::<$storage>() ] + @ $base + $offset { $($fields)* } + ); + }; + + // Creates an alias of register `idx` of relative array of registers `alias` with its own + // fields. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) + => $base:ident + $alias:ident [ $idx:expr ] { $($fields:tt)* } + ) => { + ::kernel::static_assert!($idx < <$alias as $crate::io::register::RegisterArray>::SIZE); + + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!( + @io_base $name($storage) @ + <$alias as $crate::io::register::Register>::OFFSET + + $idx * <$alias as $crate::io::register::RegisterArray>::STRIDE + ); + $crate::register!(@io_relative $vis $name($storage) @ $base); + }; + + // Generates the bitfield for the register. + // + // `#[allow(non_camel_case_types)]` is added since register names typically use + // `SCREAMING_CASE`. + ( + @bitfield $(#[$attr:meta])* $vis:vis struct $name:ident($storage:ty) { $($fields:tt)* } + ) => { + $crate::register!(@bitfield_core + #[allow(non_camel_case_types)] + $(#[$attr])* $vis $name $storage + ); + $crate::register!(@bitfield_fields $vis $name $storage { $($fields)* }); + }; + + // Implementations shared by all registers types. + (@io_base $name:ident($storage:ty) @ $offset:expr) => { + impl $crate::io::register::Register for $name { + type Storage = $storage; + + const OFFSET: usize = $offset; + } + }; + + // Implementations of fixed registers. + (@io_fixed $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty)) => { + impl $crate::io::register::FixedRegister for $name {} + + $(#[$attr])* + $vis const $name: $crate::io::register::FixedRegisterLoc<$name> = + $crate::io::register::FixedRegisterLoc::<$name>::new(); + }; + + // Implementations of relative registers. + (@io_relative $vis:vis $name:ident ($storage:ty) @ $base:ident) => { + impl $crate::io::register::WithBase for $name { + type BaseFamily = $base; + } + + impl $crate::io::register::RelativeRegister for $name {} + }; + + // Implementations of register arrays. + (@io_array $vis:vis $name:ident ($storage:ty) [ $size:expr, stride = $stride:expr ]) => { + impl $crate::io::register::Array for $name {} + + impl $crate::io::register::RegisterArray for $name { + const SIZE: usize = $size; + const STRIDE: usize = $stride; + } + }; + + // Implementations of relative array registers. + ( + @io_relative_array $vis:vis $name:ident ($storage:ty) [ $size:expr, stride = $stride:expr ] + @ $base:ident + $offset:literal + ) => { + impl $crate::io::register::WithBase for $name { + type BaseFamily = $base; + } + + impl $crate::io::register::RegisterArray for $name { + const SIZE: usize = $size; + const STRIDE: usize = $stride; + } + + impl $crate::io::register::RelativeRegisterArray for $name {} + }; + + // Defines the wrapper `$name` type and its conversions from/to the storage type. + (@bitfield_core $(#[$attr:meta])* $vis:vis $name:ident $storage:ty) => { + $(#[$attr])* + #[repr(transparent)] + #[derive(Clone, Copy, PartialEq, Eq)] + $vis struct $name { + inner: $storage, + } + + #[allow(dead_code)] + impl $name { + /// Creates a bitfield from a raw value. + #[inline(always)] + $vis const fn from_raw(value: $storage) -> Self { + Self{ inner: value } + } + + /// Turns this bitfield into its raw value. + /// + /// This is similar to the [`From`] implementation, but is shorter to invoke in + /// most cases. + #[inline(always)] + $vis const fn into_raw(self) -> $storage { + self.inner + } + } + + // SAFETY: `$storage` is `Zeroable` and `$name` is transparent. + unsafe impl ::pin_init::Zeroable for $name {} + + impl ::core::convert::From<$name> for $storage { + #[inline(always)] + fn from(val: $name) -> $storage { + val.into_raw() + } + } + + impl ::core::convert::From<$storage> for $name { + #[inline(always)] + fn from(val: $storage) -> $name { + Self::from_raw(val) + } + } + }; + + // Definitions requiring knowledge of individual fields: private and public field accessors, + // and `Debug` implementation. + (@bitfield_fields $vis:vis $name:ident $storage:ty { + $($(#[doc = $doc:expr])* $hi:literal:$lo:literal $field:ident + $(?=> $try_into_type:ty)? + $(=> $into_type:ty)? + ; + )* + } + ) => { + #[allow(dead_code)] + impl $name { + $( + $crate::register!(@private_field_accessors $vis $name $storage : $hi:$lo $field); + $crate::register!( + @public_field_accessors $(#[doc = $doc])* $vis $name $storage : $hi:$lo $field + $(?=> $try_into_type)? + $(=> $into_type)? + ); + )* + } + + $crate::register!(@debug $name { $($field;)* }); + }; + + // Private field accessors working with the exact `Bounded` type for the field. + ( + @private_field_accessors $vis:vis $name:ident $storage:ty : $hi:tt:$lo:tt $field:ident + ) => { + ::kernel::macros::paste!( + $vis const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive = $lo..=$hi; + $vis const [<$field:upper _MASK>]: $storage = + ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1); + $vis const [<$field:upper _SHIFT>]: u32 = $lo; + ); + + ::kernel::macros::paste!( + fn [<__ $field>](self) -> + ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }> { + // Left shift to align the field's MSB with the storage MSB. + const ALIGN_TOP: u32 = $storage::BITS - ($hi + 1); + // Right shift to move the top-aligned field to bit 0 of the storage. + const ALIGN_BOTTOM: u32 = ALIGN_TOP + $lo; + + // Extract the field using two shifts. `Bounded::shr` produces the correctly-sized + // output type. + let val = ::kernel::num::Bounded::<$storage, { $storage::BITS }>::from( + self.inner << ALIGN_TOP + ); + val.shr::() + } + + const fn [<__with_ $field>]( + mut self, + value: ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }>, + ) -> Self + { + const MASK: $storage = <$name>::[<$field:upper _MASK>]; + const SHIFT: u32 = <$name>::[<$field:upper _SHIFT>]; + + let value = value.get() << SHIFT; + self.inner = (self.inner & !MASK) | value; + + self + } + ); + }; + + // Public accessors for fields infallibly (`=>`) converted to a type. + ( + @public_field_accessors $(#[doc = $doc:expr])* $vis:vis $name:ident $storage:ty : + $hi:literal:$lo:literal $field:ident => $into_type:ty + ) => { + ::kernel::macros::paste!( + + $(#[doc = $doc])* + #[doc = "Returns the value of this field."] + #[inline(always)] + $vis fn $field(self) -> $into_type + { + self.[<__ $field>]().into() + } + + $(#[doc = $doc])* + #[doc = "Sets this field to the given `value`."] + #[inline(always)] + $vis fn [](self, value: $into_type) -> Self + { + self.[<__with_ $field>](value.into()) + } + + ); + }; + + // Public accessors for fields fallibly (`?=>`) converted to a type. + ( + @public_field_accessors $(#[doc = $doc:expr])* $vis:vis $name:ident $storage:ty : + $hi:tt:$lo:tt $field:ident ?=> $try_into_type:ty + ) => { + ::kernel::macros::paste!( + + $(#[doc = $doc])* + #[doc = "Returns the value of this field."] + #[inline(always)] + $vis fn $field(self) -> + Result< + $try_into_type, + <$try_into_type as ::core::convert::TryFrom< + ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }> + >>::Error + > + { + self.[<__ $field>]().try_into() + } + + $(#[doc = $doc])* + #[doc = "Sets this field to the given `value`."] + #[inline(always)] + $vis fn [](self, value: $try_into_type) -> Self + { + self.[<__with_ $field>](value.into()) + } + + ); + }; + + // Public accessors for fields not converted to a type. + ( + @public_field_accessors $(#[doc = $doc:expr])* $vis:vis $name:ident $storage:ty : + $hi:tt:$lo:tt $field:ident + ) => { + ::kernel::macros::paste!( + + $(#[doc = $doc])* + #[doc = "Returns the value of this field."] + #[inline(always)] + $vis fn $field(self) -> + ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }> + { + self.[<__ $field>]() + } + + $(#[doc = $doc])* + #[doc = "Sets this field to the compile-time constant `VALUE`."] + #[inline(always)] + $vis const fn [](self) -> Self { + self.[<__with_ $field>]( + ::kernel::num::Bounded::<$storage, { $hi + 1 - $lo }>::new::() + ) + } + + $(#[doc = $doc])* + #[doc = "Sets this field to the given `value`."] + #[inline(always)] + $vis fn []( + self, + value: T, + ) -> Self + where T: Into<::kernel::num::Bounded<$storage, { $hi + 1 - $lo }>>, + { + self.[<__with_ $field>](value.into()) + } + + $(#[doc = $doc])* + #[doc = "Tries to set this field to `value`, returning an error if it is out of range."] + #[inline(always)] + $vis fn []( + self, + value: T, + ) -> ::kernel::error::Result + where T: ::kernel::num::TryIntoBounded<$storage, { $hi + 1 - $lo }>, + { + Ok( + self.[<__with_ $field>]( + value.try_into_bounded().ok_or(::kernel::error::code::EOVERFLOW)? + ) + ) + } + + ); + }; + + // `Debug` implementation. + (@debug $name:ident { $($field:ident;)* }) => { + impl ::kernel::fmt::Debug for $name { + fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result { + f.debug_struct(stringify!($name)) + .field("", &::kernel::prelude::fmt!("{:#x}", self.inner)) + $( + .field(stringify!($field), &self.$field()) + )* + .finish() + } + } + }; +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index d93292d47420..40de00ce4f97 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -29,6 +29,7 @@ #![feature(lint_reasons)] // // Stable since Rust 1.82.0. +#![feature(offset_of_nested)] #![feature(raw_ref_op)] // // Stable since Rust 1.83.0. @@ -37,10 +38,14 @@ #![feature(const_option)] #![feature(const_ptr_write)] #![feature(const_refs_to_cell)] +#![feature(const_refs_to_static)] // // Stable since Rust 1.84.0. #![feature(strict_provenance)] // +// Stable since Rust 1.89.0. +#![feature(generic_arg_infer)] +// // Expected to become stable. #![feature(arbitrary_self_types)] // @@ -101,12 +106,15 @@ pub mod faux; pub mod firmware; pub mod fmt; pub mod fs; +#[cfg(CONFIG_GPU_BUDDY = "y")] +pub mod gpu; #[cfg(CONFIG_I2C = "y")] pub mod i2c; pub mod id_pool; #[doc(hidden)] pub mod impl_flags; pub mod init; +pub mod interop; pub mod io; pub mod ioctl; pub mod iommu; diff --git a/rust/kernel/num/bounded.rs b/rust/kernel/num/bounded.rs index fa81acbdc8c2..bbab6bbcb315 100644 --- a/rust/kernel/num/bounded.rs +++ b/rust/kernel/num/bounded.rs @@ -379,6 +379,9 @@ where /// Returns the wrapped value as the backing type. /// + /// This is similar to the [`Deref`] implementation, but doesn't enforce the size invariant of + /// the [`Bounded`], which might produce slightly less optimal code. + /// /// # Examples /// /// ``` @@ -387,8 +390,8 @@ where /// let v = Bounded::::new::<7>(); /// assert_eq!(v.get(), 7u32); /// ``` - pub fn get(self) -> T { - *self.deref() + pub const fn get(self) -> T { + self.0 } /// Increases the number of bits usable for `self`. @@ -473,6 +476,48 @@ where // `N` bits, and with the same signedness. unsafe { Bounded::__new(value) } } + + /// Right-shifts `self` by `SHIFT` and returns the result as a `Bounded<_, RES>`, where `RES >= + /// N - SHIFT`. + /// + /// # Examples + /// + /// ``` + /// use kernel::num::Bounded; + /// + /// let v = Bounded::::new::<0xff00>(); + /// let v_shifted: Bounded:: = v.shr::<8, _>(); + /// + /// assert_eq!(v_shifted.get(), 0xff); + /// ``` + pub fn shr(self) -> Bounded { + const { assert!(RES + SHIFT >= N) } + + // SAFETY: We shift the value right by `SHIFT`, reducing the number of bits needed to + // represent the shifted value by as much, and just asserted that `RES >= N - SHIFT`. + unsafe { Bounded::__new(self.0 >> SHIFT) } + } + + /// Left-shifts `self` by `SHIFT` and returns the result as a `Bounded<_, RES>`, where `RES >= + /// N + SHIFT`. + /// + /// # Examples + /// + /// ``` + /// use kernel::num::Bounded; + /// + /// let v = Bounded::::new::<0xff>(); + /// let v_shifted: Bounded:: = v.shl::<8, _>(); + /// + /// assert_eq!(v_shifted.get(), 0xff00); + /// ``` + pub fn shl(self) -> Bounded { + const { assert!(RES >= N + SHIFT) } + + // SAFETY: We shift the value left by `SHIFT`, augmenting the number of bits needed to + // represent the shifted value by as much, and just asserted that `RES >= N + SHIFT`. + unsafe { Bounded::__new(self.0 << SHIFT) } + } } impl Deref for Bounded @@ -1059,3 +1104,24 @@ where unsafe { Self::__new(T::from(value)) } } } + +impl Bounded +where + T: Integer + Zeroable, +{ + /// Converts this [`Bounded`] into a [`bool`]. + /// + /// This is a shorter way of writing `bool::from(self)`. + /// + /// # Examples + /// + /// ``` + /// use kernel::num::Bounded; + /// + /// assert_eq!(Bounded::::new::<0>().into_bool(), false); + /// assert_eq!(Bounded::::new::<1>().into_bool(), true); + /// ``` + pub fn into_bool(self) -> bool { + self.into() + } +} diff --git a/rust/kernel/pci/io.rs b/rust/kernel/pci/io.rs index fb6edab2aea7..ae78676c927f 100644 --- a/rust/kernel/pci/io.rs +++ b/rust/kernel/pci/io.rs @@ -8,8 +8,6 @@ use crate::{ device, devres::Devres, io::{ - io_define_read, - io_define_write, Io, IoCapable, IoKnownSize, @@ -85,67 +83,41 @@ pub struct ConfigSpace<'a, S: ConfigSpaceKind = Extended> { _marker: PhantomData, } -/// Internal helper macros used to invoke C PCI configuration space read functions. -/// -/// This macro is intended to be used by higher-level PCI configuration space access macros -/// (io_define_read) and provides a unified expansion for infallible vs. fallible read semantics. It -/// emits a direct call into the corresponding C helper and performs the required cast to the Rust -/// return type. -/// -/// # Parameters -/// -/// * `$c_fn` – The C function performing the PCI configuration space write. -/// * `$self` – The I/O backend object. -/// * `$ty` – The type of the value to read. -/// * `$addr` – The PCI configuration space offset to read. -/// -/// This macro does not perform any validation; all invariants must be upheld by the higher-level -/// abstraction invoking it. -macro_rules! call_config_read { - (infallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr) => {{ - let mut val: $ty = 0; - // SAFETY: By the type invariant `$self.pdev` is a valid address. - // CAST: The offset is cast to `i32` because the C functions expect a 32-bit signed offset - // parameter. PCI configuration space size is at most 4096 bytes, so the value always fits - // within `i32` without truncation or sign change. - // Return value from C function is ignored in infallible accessors. - let _ret = unsafe { bindings::$c_fn($self.pdev.as_raw(), $addr as i32, &mut val) }; - val - }}; -} +/// Implements [`IoCapable`] on [`ConfigSpace`] for `$ty` using `$read_fn` and `$write_fn`. +macro_rules! impl_config_space_io_capable { + ($ty:ty, $read_fn:ident, $write_fn:ident) => { + impl<'a, S: ConfigSpaceKind> IoCapable<$ty> for ConfigSpace<'a, S> { + unsafe fn io_read(&self, address: usize) -> $ty { + let mut val: $ty = 0; -/// Internal helper macros used to invoke C PCI configuration space write functions. -/// -/// This macro is intended to be used by higher-level PCI configuration space access macros -/// (io_define_write) and provides a unified expansion for infallible vs. fallible read semantics. -/// It emits a direct call into the corresponding C helper and performs the required cast to the -/// Rust return type. -/// -/// # Parameters -/// -/// * `$c_fn` – The C function performing the PCI configuration space write. -/// * `$self` – The I/O backend object. -/// * `$ty` – The type of the written value. -/// * `$addr` – The configuration space offset to write. -/// * `$value` – The value to write. -/// -/// This macro does not perform any validation; all invariants must be upheld by the higher-level -/// abstraction invoking it. -macro_rules! call_config_write { - (infallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr, $value:expr) => { - // SAFETY: By the type invariant `$self.pdev` is a valid address. - // CAST: The offset is cast to `i32` because the C functions expect a 32-bit signed offset - // parameter. PCI configuration space size is at most 4096 bytes, so the value always fits - // within `i32` without truncation or sign change. - // Return value from C function is ignored in infallible accessors. - let _ret = unsafe { bindings::$c_fn($self.pdev.as_raw(), $addr as i32, $value) }; + // Return value from C function is ignored in infallible accessors. + let _ret = + // SAFETY: By the type invariant `self.pdev` is a valid address. + // CAST: The offset is cast to `i32` because the C functions expect a 32-bit + // signed offset parameter. PCI configuration space size is at most 4096 bytes, + // so the value always fits within `i32` without truncation or sign change. + unsafe { bindings::$read_fn(self.pdev.as_raw(), address as i32, &mut val) }; + + val + } + + unsafe fn io_write(&self, value: $ty, address: usize) { + // Return value from C function is ignored in infallible accessors. + let _ret = + // SAFETY: By the type invariant `self.pdev` is a valid address. + // CAST: The offset is cast to `i32` because the C functions expect a 32-bit + // signed offset parameter. PCI configuration space size is at most 4096 bytes, + // so the value always fits within `i32` without truncation or sign change. + unsafe { bindings::$write_fn(self.pdev.as_raw(), address as i32, value) }; + } + } }; } // PCI configuration space supports 8, 16, and 32-bit accesses. -impl<'a, S: ConfigSpaceKind> IoCapable for ConfigSpace<'a, S> {} -impl<'a, S: ConfigSpaceKind> IoCapable for ConfigSpace<'a, S> {} -impl<'a, S: ConfigSpaceKind> IoCapable for ConfigSpace<'a, S> {} +impl_config_space_io_capable!(u8, pci_read_config_byte, pci_write_config_byte); +impl_config_space_io_capable!(u16, pci_read_config_word, pci_write_config_word); +impl_config_space_io_capable!(u32, pci_read_config_dword, pci_write_config_dword); impl<'a, S: ConfigSpaceKind> Io for ConfigSpace<'a, S> { /// Returns the base address of the I/O region. It is always 0 for configuration space. @@ -159,17 +131,6 @@ impl<'a, S: ConfigSpaceKind> Io for ConfigSpace<'a, S> { fn maxsize(&self) -> usize { self.pdev.cfg_size().into_raw() } - - // PCI configuration space does not support fallible operations. - // The default implementations from the Io trait are not used. - - io_define_read!(infallible, read8, call_config_read(pci_read_config_byte) -> u8); - io_define_read!(infallible, read16, call_config_read(pci_read_config_word) -> u16); - io_define_read!(infallible, read32, call_config_read(pci_read_config_dword) -> u32); - - io_define_write!(infallible, write8, call_config_write(pci_write_config_byte) <- u8); - io_define_write!(infallible, write16, call_config_write(pci_write_config_word) <- u16); - io_define_write!(infallible, write32, call_config_write(pci_write_config_dword) <- u32); } impl<'a, S: ConfigSpaceKind> IoKnownSize for ConfigSpace<'a, S> { diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs index f989539a31b4..6c9c1cce3c63 100644 --- a/rust/kernel/uaccess.rs +++ b/rust/kernel/uaccess.rs @@ -7,10 +7,12 @@ use crate::{ alloc::{Allocator, Flags}, bindings, + dma::Coherent, error::Result, ffi::{c_char, c_void}, fs::file, prelude::*, + ptr::KnownSize, transmute::{AsBytes, FromBytes}, }; use core::mem::{size_of, MaybeUninit}; @@ -459,20 +461,19 @@ impl UserSliceWriter { self.length == 0 } - /// Writes raw data to this user pointer from a kernel buffer. + /// Low-level write from a raw pointer. /// - /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of - /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even - /// if it returns an error. - pub fn write_slice(&mut self, data: &[u8]) -> Result { - let len = data.len(); - let data_ptr = data.as_ptr().cast::(); + /// # Safety + /// + /// The caller must ensure that `from` is valid for reads of `len` bytes. + unsafe fn write_raw(&mut self, from: *const u8, len: usize) -> Result { if len > self.length { return Err(EFAULT); } - // SAFETY: `data_ptr` points into an immutable slice of length `len`, so we may read - // that many bytes from it. - let res = unsafe { bindings::copy_to_user(self.ptr.as_mut_ptr(), data_ptr, len) }; + + // SAFETY: Caller guarantees `from` is valid for `len` bytes (see this function's + // safety contract). + let res = unsafe { bindings::copy_to_user(self.ptr.as_mut_ptr(), from.cast(), len) }; if res != 0 { return Err(EFAULT); } @@ -481,6 +482,76 @@ impl UserSliceWriter { Ok(()) } + /// Writes raw data to this user pointer from a kernel buffer. + /// + /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of + /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even + /// if it returns an error. + pub fn write_slice(&mut self, data: &[u8]) -> Result { + // SAFETY: `data` is a valid slice, so `data.as_ptr()` is valid for + // reading `data.len()` bytes. + unsafe { self.write_raw(data.as_ptr(), data.len()) } + } + + /// Writes raw data to this user pointer from a DMA coherent allocation. + /// + /// Copies `count` bytes from `alloc` starting from `offset` into this userspace slice. + /// + /// # Errors + /// + /// - [`EOVERFLOW`]: `offset + count` overflows. + /// - [`ERANGE`]: `offset + count` exceeds the size of `alloc`, or `count` exceeds the + /// size of the user-space buffer. + /// - [`EFAULT`]: the write hits a bad address or goes out of bounds of this + /// [`UserSliceWriter`]. + /// + /// This call may modify the associated userspace slice even if it returns an error. + /// + /// Note: The memory may be concurrently modified by hardware (e.g., DMA). In such cases, + /// the copied data may be inconsistent, but this does not cause undefined behavior. + /// + /// # Example + /// + /// Copy the first 256 bytes of a DMA coherent allocation into a userspace buffer: + /// + /// ```no_run + /// use kernel::uaccess::UserSliceWriter; + /// use kernel::dma::Coherent; + /// + /// fn copy_dma_to_user( + /// mut writer: UserSliceWriter, + /// alloc: &Coherent<[u8]>, + /// ) -> Result { + /// writer.write_dma(alloc, 0, 256) + /// } + /// ``` + pub fn write_dma( + &mut self, + alloc: &Coherent, + offset: usize, + count: usize, + ) -> Result { + let len = alloc.size(); + if offset.checked_add(count).ok_or(EOVERFLOW)? > len { + return Err(ERANGE); + } + + if count > self.len() { + return Err(ERANGE); + } + + // SAFETY: `as_ptr()` returns a valid pointer to a memory region of `count()` bytes, as + // guaranteed by the `Coherent` invariants. The check above ensures `offset + count <= len`. + let src_ptr = unsafe { alloc.as_ptr().cast::().add(offset) }; + + // Note: Use `write_raw` instead of `write_slice` because the allocation is coherent + // memory that hardware may modify (e.g., DMA); we cannot form a `&[u8]` slice over + // such volatile memory. + // + // SAFETY: `src_ptr` points into the allocation and is valid for `count` bytes (see above). + unsafe { self.write_raw(src_ptr, count) } + } + /// Writes raw data to this user pointer from a kernel buffer partially. /// /// This is the same as [`Self::write_slice`] but considers the given `offset` into `data` and diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 706e833e9702..7e253b6f299c 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -189,12 +189,18 @@ use crate::{ alloc::{AllocError, Flags}, container_of, prelude::*, - sync::Arc, - sync::LockClassKey, + sync::{ + aref::{ + ARef, + AlwaysRefCounted, // + }, + Arc, + LockClassKey, // + }, time::Jiffies, types::Opaque, }; -use core::marker::PhantomData; +use core::{marker::PhantomData, ptr::NonNull}; /// Creates a [`Work`] initialiser with the given name and a newly-created lock class. #[macro_export] @@ -425,10 +431,11 @@ pub unsafe trait RawDelayedWorkItem: RawWorkItem {} /// Defines the method that should be called directly when a work item is executed. /// -/// This trait is implemented by `Pin>` and [`Arc`], and is mainly intended to be -/// implemented for smart pointer types. For your own structs, you would implement [`WorkItem`] -/// instead. The [`run`] method on this trait will usually just perform the appropriate -/// `container_of` translation and then call into the [`run`][WorkItem::run] method from the +/// This trait is implemented by `Pin>`, [`Arc`] and [`ARef`], and +/// is mainly intended to be implemented for smart pointer types. For your own +/// structs, you would implement [`WorkItem`] instead. The [`run`] method on +/// this trait will usually just perform the appropriate `container_of` +/// translation and then call into the [`run`][WorkItem::run] method from the /// [`WorkItem`] trait. /// /// This trait is used when the `work_struct` field is defined using the [`Work`] helper. @@ -934,6 +941,89 @@ where { } +// SAFETY: Like the `Arc` implementation, the `__enqueue` implementation for +// `ARef` obtains a `work_struct` from the `Work` field using +// `T::raw_get_work`, so the same safety reasoning applies: +// +// - `__enqueue` gets the `work_struct` from the `Work` field, using `T::raw_get_work`. +// - The only safe way to create a `Work` object is through `Work::new`. +// - `Work::new` makes sure that `T::Pointer::run` is passed to `init_work_with_key`. +// - Finally `Work` and `RawWorkItem` guarantee that the correct `Work` field +// will be used because of the ID const generic bound. This makes sure that `T::raw_get_work` +// uses the correct offset for the `Work` field, and `Work::new` picks the correct +// implementation of `WorkItemPointer` for `ARef`. +unsafe impl WorkItemPointer for ARef +where + T: AlwaysRefCounted, + T: WorkItem, + T: HasWork, +{ + unsafe extern "C" fn run(ptr: *mut bindings::work_struct) { + // The `__enqueue` method always uses a `work_struct` stored in a `Work`. + let ptr = ptr.cast::>(); + + // SAFETY: This computes the pointer that `__enqueue` got from + // `ARef::into_raw`. + let ptr = unsafe { T::work_container_of(ptr) }; + + // SAFETY: The safety contract of `work_container_of` ensures that it + // returns a valid non-null pointer. + let ptr = unsafe { NonNull::new_unchecked(ptr) }; + + // SAFETY: This pointer comes from `ARef::into_raw` and we've been given + // back ownership. + let aref = unsafe { ARef::from_raw(ptr) }; + + T::run(aref) + } +} + +// SAFETY: The `work_struct` raw pointer is guaranteed to be valid for the duration of the call to +// the closure because we get it from an `ARef`, which means that the ref count will be at least 1, +// and we don't drop the `ARef` ourselves. If `queue_work_on` returns true, it is further guaranteed +// to be valid until a call to the function pointer in `work_struct` because we leak the memory it +// points to, and only reclaim it if the closure returns false, or in `WorkItemPointer::run`, which +// is what the function pointer in the `work_struct` must be pointing to, according to the safety +// requirements of `WorkItemPointer`. +unsafe impl RawWorkItem for ARef +where + T: AlwaysRefCounted, + T: WorkItem, + T: HasWork, +{ + type EnqueueOutput = Result<(), Self>; + + unsafe fn __enqueue(self, queue_work_on: F) -> Self::EnqueueOutput + where + F: FnOnce(*mut bindings::work_struct) -> bool, + { + let ptr = ARef::into_raw(self); + + // SAFETY: Pointers from ARef::into_raw are valid and non-null. + let work_ptr = unsafe { T::raw_get_work(ptr.as_ptr()) }; + // SAFETY: `raw_get_work` returns a pointer to a valid value. + let work_ptr = unsafe { Work::raw_get(work_ptr) }; + + if queue_work_on(work_ptr) { + Ok(()) + } else { + // SAFETY: The work queue has not taken ownership of the pointer. + Err(unsafe { ARef::from_raw(ptr) }) + } + } +} + +// SAFETY: By the safety requirements of `HasDelayedWork`, the `work_struct` returned by methods in +// `HasWork` provides a `work_struct` that is the `work` field of a `delayed_work`, and the rest of +// the `delayed_work` has the same access rules as its `work` field. +unsafe impl RawDelayedWorkItem for ARef +where + T: WorkItem, + T: HasDelayedWork, + T: AlwaysRefCounted, +{ +} + /// Returns the system work queue (`system_wq`). /// /// It is the one used by `schedule[_delayed]_work[_on]()`. Multi-CPU multi-threaded. There are diff --git a/samples/rust/rust_dma.rs b/samples/rust/rust_dma.rs index ce39b5545097..129bb4b39c04 100644 --- a/samples/rust/rust_dma.rs +++ b/samples/rust/rust_dma.rs @@ -6,7 +6,12 @@ use kernel::{ device::Core, - dma::{CoherentAllocation, DataDirection, Device, DmaMask}, + dma::{ + Coherent, + DataDirection, + Device, + DmaMask, // + }, page, pci, prelude::*, scatterlist::{Owned, SGTable}, @@ -16,7 +21,7 @@ use kernel::{ #[pin_data(PinnedDrop)] struct DmaSampleDriver { pdev: ARef, - ca: CoherentAllocation, + ca: Coherent<[MyStruct]>, #[pin] sgt: SGTable>>, } @@ -64,8 +69,8 @@ impl pci::Driver for DmaSampleDriver { // SAFETY: There are no concurrent calls to DMA allocation and mapping primitives. unsafe { pdev.dma_set_mask_and_coherent(mask)? }; - let ca: CoherentAllocation = - CoherentAllocation::alloc_coherent(pdev.as_ref(), TEST_VALUES.len(), GFP_KERNEL)?; + let ca: Coherent<[MyStruct]> = + Coherent::zeroed_slice(pdev.as_ref(), TEST_VALUES.len(), GFP_KERNEL)?; for (i, value) in TEST_VALUES.into_iter().enumerate() { kernel::dma_write!(ca, [i]?, MyStruct::new(value.0, value.1)); diff --git a/samples/rust/rust_driver_pci.rs b/samples/rust/rust_driver_pci.rs index d3d4a7931deb..47d3e84fab63 100644 --- a/samples/rust/rust_driver_pci.rs +++ b/samples/rust/rust_driver_pci.rs @@ -5,30 +5,63 @@ //! To make this driver probe, QEMU must be run with `-device pci-testdev`. use kernel::{ - device::Bound, - device::Core, + device::{ + Bound, + Core, // + }, devres::Devres, - io::Io, + io::{ + register, + register::Array, + Io, // + }, + num::Bounded, pci, prelude::*, sync::aref::ARef, // }; -struct Regs; +mod regs { + use super::*; -impl Regs { - const TEST: usize = 0x0; - const OFFSET: usize = 0x4; - const DATA: usize = 0x8; - const COUNT: usize = 0xC; - const END: usize = 0x10; + register! { + pub(super) TEST(u8) @ 0x0 { + 7:0 index => TestIndex; + } + + pub(super) OFFSET(u32) @ 0x4 { + 31:0 offset; + } + + pub(super) DATA(u8) @ 0x8 { + 7:0 data; + } + + pub(super) COUNT(u32) @ 0xC { + 31:0 count; + } + } + + pub(super) const END: usize = 0x10; } -type Bar0 = pci::Bar<{ Regs::END }>; +type Bar0 = pci::Bar<{ regs::END }>; #[derive(Copy, Clone, Debug)] struct TestIndex(u8); +impl From> for TestIndex { + fn from(value: Bounded) -> Self { + Self(value.into()) + } +} + +impl From for Bounded { + fn from(value: TestIndex) -> Self { + value.0.into() + } +} + impl TestIndex { const NO_EVENTFD: Self = Self(0); } @@ -54,40 +87,53 @@ kernel::pci_device_table!( impl SampleDriver { fn testdev(index: &TestIndex, bar: &Bar0) -> Result { // Select the test. - bar.write8(index.0, Regs::TEST); + bar.write_reg(regs::TEST::zeroed().with_index(*index)); - let offset = bar.read32(Regs::OFFSET) as usize; - let data = bar.read8(Regs::DATA); + let offset = bar.read(regs::OFFSET).into_raw() as usize; + let data = bar.read(regs::DATA).into(); // Write `data` to `offset` to increase `count` by one. // // Note that we need `try_write8`, since `offset` can't be checked at compile-time. bar.try_write8(data, offset)?; - Ok(bar.read32(Regs::COUNT)) + Ok(bar.read(regs::COUNT).into()) } fn config_space(pdev: &pci::Device) { let config = pdev.config_space(); - // TODO: use the register!() macro for defining PCI configuration space registers once it - // has been move out of nova-core. + // Some PCI configuration space registers. + register! { + VENDOR_ID(u16) @ 0x0 { + 15:0 vendor_id; + } + + REVISION_ID(u8) @ 0x8 { + 7:0 revision_id; + } + + BAR(u32)[6] @ 0x10 { + 31:0 value; + } + } + dev_info!( pdev, "pci-testdev config space read8 rev ID: {:x}\n", - config.read8(0x8) + config.read(REVISION_ID).revision_id() ); dev_info!( pdev, "pci-testdev config space read16 vendor ID: {:x}\n", - config.read16(0) + config.read(VENDOR_ID).vendor_id() ); dev_info!( pdev, "pci-testdev config space read32 BAR 0: {:x}\n", - config.read32(0x10) + config.read(BAR::at(0)).value() ); } } @@ -111,7 +157,7 @@ impl pci::Driver for SampleDriver { pdev.set_master(); Ok(try_pin_init!(Self { - bar <- pdev.iomap_region_sized::<{ Regs::END }>(0, c"rust_driver_pci"), + bar <- pdev.iomap_region_sized::<{ regs::END }>(0, c"rust_driver_pci"), index: *info, _: { let bar = bar.access(pdev.as_ref())?; @@ -131,7 +177,7 @@ impl pci::Driver for SampleDriver { fn unbind(pdev: &pci::Device, this: Pin<&Self>) { if let Ok(bar) = this.bar.access(pdev.as_ref()) { // Reset pci-testdev by writing a new test index. - bar.write8(this.index.0, Regs::TEST); + bar.write_reg(regs::TEST::zeroed().with_index(this.index)); } } } diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 3652b85be545..010d08472fb2 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -316,12 +316,13 @@ $(obj)/%.lst: $(obj)/%.c FORCE # `feature(offset_of_nested)`, `feature(raw_ref_op)`. # - Stable since Rust 1.84.0: `feature(strict_provenance)`. # - Stable since Rust 1.87.0: `feature(asm_goto)`. +# - Stable since Rust 1.89.0: `feature(generic_arg_infer)`. # - Expected to become stable: `feature(arbitrary_self_types)`. # - To be determined: `feature(used_with_arg)`. # # Please see https://github.com/Rust-for-Linux/linux/issues/2 for details on # the unstable features in use. -rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,offset_of_nested,raw_ref_op,slice_ptr_len,strict_provenance,used_with_arg +rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,generic_arg_infer,lint_reasons,offset_of_nested,raw_ref_op,slice_ptr_len,strict_provenance,used_with_arg # `--out-dir` is required to avoid temporaries being created by `rustc` in the # current working directory, which may be not accessible in the out-of-tree