mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
In a PowerNV environment, for devices that supports DMA mask less than
64 bit but larger than 32 bits, iommu is incorrectly bypassing DMA
APIs while allocating and mapping buffers for DMA operations.
Devices are failing with ENOMEN during probe with the following messages
amdgpu 0000:01:00.0: [drm] Detected VRAM RAM=4096M, BAR=4096M
amdgpu 0000:01:00.0: [drm] RAM width 128bits GDDR5
amdgpu 0000:01:00.0: iommu: 64-bit OK but direct DMA is limited by 0
amdgpu 0000:01:00.0: dma_iommu_get_required_mask: returning bypass mask 0xfffffffffffffff
amdgpu 0000:01:00.0: 4096M of VRAM memory ready
amdgpu 0000:01:00.0: 32570M of GTT memory ready.
amdgpu 0000:01:00.0: (-12) failed to allocate kernel bo
amdgpu 0000:01:00.0: [drm] Debug VRAM access will use slowpath MM access
amdgpu 0000:01:00.0: [drm] GART: num cpu pages 4096, num gpu pages 65536
amdgpu 0000:01:00.0: [drm] PCIE GART of 256M enabled (table at 0x000000F4FFF80000).
amdgpu 0000:01:00.0: (-12) failed to allocate kernel bo
amdgpu 0000:01:00.0: (-12) create WB bo failed
amdgpu 0000:01:00.0: amdgpu_device_wb_init failed -12
amdgpu 0000:01:00.0: amdgpu_device_ip_init failed
amdgpu 0000:01:00.0: Fatal error during GPU init
amdgpu 0000:01:00.0: finishing device.
amdgpu 0000:01:00.0: probe with driver amdgpu failed with error -12
amdgpu 0000:01:00.0: ttm finalized
Fixes: 1471c517cf ("powerpc/iommu: bypass DMA APIs for coherent allocations for pre-mapped memory")
Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reported-by: Dan Horák <dan@danny.cz>
Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/5039
Tested-by: Dan Horak <dan@danny.cz>
Closes: https://lore.kernel.org/linuxppc-dev/20260313142351.609bc4c3efe1184f64ca5f44@danny.cz/
Signed-off-by: Gaurav Batra <gbatra@linux.ibm.com>
Closes: https://lore.kernel.org/linuxppc-dev/20260313142351.609bc4c3efe1184f64ca5f44@danny.cz/
[Maddy: Fixed tags]
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260331223022.47488-1-gbatra@linux.ibm.com
230 lines
6.0 KiB
C
230 lines
6.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
|
|
*
|
|
* Provide default implementations of the DMA mapping callbacks for
|
|
* busses using the iommu infrastructure
|
|
*/
|
|
|
|
#include <linux/dma-direct.h>
|
|
#include <linux/pci.h>
|
|
#include <asm/iommu.h>
|
|
|
|
#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT
|
|
#define can_map_direct(dev, addr) \
|
|
((dev)->bus_dma_limit >= phys_to_dma((dev), (addr)))
|
|
|
|
bool arch_dma_map_phys_direct(struct device *dev, phys_addr_t addr)
|
|
{
|
|
if (likely(!dev->bus_dma_limit))
|
|
return false;
|
|
|
|
return can_map_direct(dev, addr);
|
|
}
|
|
|
|
#define is_direct_handle(dev, h) ((h) >= (dev)->archdata.dma_offset)
|
|
|
|
bool arch_dma_unmap_phys_direct(struct device *dev, dma_addr_t dma_handle)
|
|
{
|
|
if (likely(!dev->bus_dma_limit))
|
|
return false;
|
|
|
|
return is_direct_handle(dev, dma_handle);
|
|
}
|
|
|
|
bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg,
|
|
int nents)
|
|
{
|
|
struct scatterlist *s;
|
|
int i;
|
|
|
|
if (likely(!dev->bus_dma_limit))
|
|
return false;
|
|
|
|
for_each_sg(sg, s, nents, i) {
|
|
if (!can_map_direct(dev, sg_phys(s) + s->offset + s->length))
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg,
|
|
int nents)
|
|
{
|
|
struct scatterlist *s;
|
|
int i;
|
|
|
|
if (likely(!dev->bus_dma_limit))
|
|
return false;
|
|
|
|
for_each_sg(sg, s, nents, i) {
|
|
if (!is_direct_handle(dev, s->dma_address + s->length))
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
bool arch_dma_alloc_direct(struct device *dev)
|
|
{
|
|
if (dev->dma_ops_bypass && dev->bus_dma_limit)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
bool arch_dma_free_direct(struct device *dev, dma_addr_t dma_handle)
|
|
{
|
|
if (!dev->dma_ops_bypass || !dev->bus_dma_limit)
|
|
return false;
|
|
|
|
return is_direct_handle(dev, dma_handle);
|
|
}
|
|
#endif /* CONFIG_ARCH_HAS_DMA_MAP_DIRECT */
|
|
|
|
/*
|
|
* Generic iommu implementation
|
|
*/
|
|
|
|
/* Allocates a contiguous real buffer and creates mappings over it.
|
|
* Returns the virtual address of the buffer and sets dma_handle
|
|
* to the dma address (mapping) of the first page.
|
|
*/
|
|
static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
|
|
dma_addr_t *dma_handle, gfp_t flag,
|
|
unsigned long attrs)
|
|
{
|
|
return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
|
|
dma_handle, dev->coherent_dma_mask, flag,
|
|
dev_to_node(dev));
|
|
}
|
|
|
|
static void dma_iommu_free_coherent(struct device *dev, size_t size,
|
|
void *vaddr, dma_addr_t dma_handle,
|
|
unsigned long attrs)
|
|
{
|
|
iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
|
|
}
|
|
|
|
/* Creates TCEs for a user provided buffer. The user buffer must be
|
|
* contiguous real kernel storage (not vmalloc). The address passed here
|
|
* is a physical address to that page. The dma_addr_t returned will point
|
|
* to the same byte within the page as was passed in.
|
|
*/
|
|
static dma_addr_t dma_iommu_map_phys(struct device *dev, phys_addr_t phys,
|
|
size_t size,
|
|
enum dma_data_direction direction,
|
|
unsigned long attrs)
|
|
{
|
|
return iommu_map_phys(dev, get_iommu_table_base(dev), phys, size,
|
|
dma_get_mask(dev), direction, attrs);
|
|
}
|
|
|
|
static void dma_iommu_unmap_phys(struct device *dev, dma_addr_t dma_handle,
|
|
size_t size, enum dma_data_direction direction,
|
|
unsigned long attrs)
|
|
{
|
|
iommu_unmap_phys(get_iommu_table_base(dev), dma_handle, size, direction,
|
|
attrs);
|
|
}
|
|
|
|
static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
|
|
int nelems, enum dma_data_direction direction,
|
|
unsigned long attrs)
|
|
{
|
|
return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
|
|
dma_get_mask(dev), direction, attrs);
|
|
}
|
|
|
|
static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
|
|
int nelems, enum dma_data_direction direction,
|
|
unsigned long attrs)
|
|
{
|
|
ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
|
|
direction, attrs);
|
|
}
|
|
|
|
static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
|
|
{
|
|
struct pci_dev *pdev = to_pci_dev(dev);
|
|
struct pci_controller *phb = pci_bus_to_host(pdev->bus);
|
|
|
|
if (!phb->controller_ops.iommu_bypass_supported)
|
|
return false;
|
|
return phb->controller_ops.iommu_bypass_supported(pdev, mask);
|
|
}
|
|
|
|
/* We support DMA to/from any memory page via the iommu */
|
|
int dma_iommu_dma_supported(struct device *dev, u64 mask)
|
|
{
|
|
struct iommu_table *tbl;
|
|
|
|
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
|
|
/*
|
|
* fixed ops will be used for RAM. This is limited by
|
|
* bus_dma_limit which is set when RAM is pre-mapped.
|
|
*/
|
|
dev->dma_ops_bypass = true;
|
|
dev_info(dev, "iommu: 64-bit OK but direct DMA is limited by %llx\n",
|
|
dev->bus_dma_limit);
|
|
return 1;
|
|
}
|
|
|
|
tbl = get_iommu_table_base(dev);
|
|
|
|
if (!tbl) {
|
|
dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
|
|
return 0;
|
|
}
|
|
|
|
if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
|
|
dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
|
|
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
|
|
mask, tbl->it_offset << tbl->it_page_shift);
|
|
return 0;
|
|
}
|
|
|
|
dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
|
|
dev->dma_ops_bypass = false;
|
|
return 1;
|
|
}
|
|
|
|
u64 dma_iommu_get_required_mask(struct device *dev)
|
|
{
|
|
struct iommu_table *tbl = get_iommu_table_base(dev);
|
|
u64 mask;
|
|
|
|
if (dev_is_pci(dev)) {
|
|
u64 bypass_mask = dma_direct_get_required_mask(dev);
|
|
|
|
if (dma_iommu_dma_supported(dev, bypass_mask)) {
|
|
dev_info(dev, "%s: returning bypass mask 0x%llx\n", __func__, bypass_mask);
|
|
return bypass_mask;
|
|
}
|
|
}
|
|
|
|
if (!tbl)
|
|
return 0;
|
|
|
|
mask = 1ULL << (fls_long(tbl->it_offset + tbl->it_size) +
|
|
tbl->it_page_shift - 1);
|
|
mask += mask - 1;
|
|
|
|
return mask;
|
|
}
|
|
|
|
const struct dma_map_ops dma_iommu_ops = {
|
|
.alloc = dma_iommu_alloc_coherent,
|
|
.free = dma_iommu_free_coherent,
|
|
.map_sg = dma_iommu_map_sg,
|
|
.unmap_sg = dma_iommu_unmap_sg,
|
|
.dma_supported = dma_iommu_dma_supported,
|
|
.map_phys = dma_iommu_map_phys,
|
|
.unmap_phys = dma_iommu_unmap_phys,
|
|
.get_required_mask = dma_iommu_get_required_mask,
|
|
.mmap = dma_common_mmap,
|
|
.get_sgtable = dma_common_get_sgtable,
|
|
.alloc_pages_op = dma_common_alloc_pages,
|
|
.free_pages = dma_common_free_pages,
|
|
};
|