mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Merge tag 'dma-mapping-7.1-2026-04-16' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux
Pull dma-mapping updates from Marek Szyprowski:
- added support for batched cache sync, what improves performance of
dma_map/unmap_sg() operations on ARM64 architecture (Barry Song)
- introduced DMA_ATTR_CC_SHARED attribute for explicitly shared memory
used in confidential computing (Jiri Pirko)
- refactored spaghetti-like code in drivers/of/of_reserved_mem.c and
its clients (Marek Szyprowski, shared branch with device-tree updates
to avoid merge conflicts)
- prepared Contiguous Memory Allocator related code for making dma-buf
drivers modularized (Maxime Ripard)
- added support for benchmarking dma_map_sg() calls to tools/dma
utility (Qinxin Xia)
* tag 'dma-mapping-7.1-2026-04-16' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux: (24 commits)
dma-buf: heaps: system: document system_cc_shared heap
dma-buf: heaps: system: add system_cc_shared heap for explicitly shared memory
dma-mapping: introduce DMA_ATTR_CC_SHARED for shared memory
mm: cma: Export cma_alloc(), cma_release() and cma_get_name()
dma: contiguous: Export dev_get_cma_area()
dma: contiguous: Make dma_contiguous_default_area static
dma: contiguous: Make dev_get_cma_area() a proper function
dma: contiguous: Turn heap registration logic around
of: reserved_mem: rework fdt_init_reserved_mem_node()
of: reserved_mem: clarify fdt_scan_reserved_mem*() functions
of: reserved_mem: rearrange code a bit
of: reserved_mem: replace CMA quirks by generic methods
of: reserved_mem: switch to ops based OF_DECLARE()
of: reserved_mem: use -ENODEV instead of -ENOENT
of: reserved_mem: remove fdt node from the structure
dma-mapping: fix false kernel-doc comment marker
dma-mapping: Support batch mode for dma_direct_{map,unmap}_sg
dma-mapping: Separate DMA sync issuing and completion waiting
arm64: Provide dcache_inval_poc_nosync helper
arm64: Provide dcache_clean_poc_nosync helper
...
This commit is contained in:
@@ -16,6 +16,13 @@ following heaps:
|
||||
|
||||
- The ``system`` heap allocates virtually contiguous, cacheable, buffers.
|
||||
|
||||
- The ``system_cc_shared`` heap allocates virtually contiguous, cacheable,
|
||||
buffers using shared (decrypted) memory. It is only present on
|
||||
confidential computing (CoCo) VMs where memory encryption is active
|
||||
(e.g., AMD SEV, Intel TDX). The allocated pages have the encryption
|
||||
bit cleared, making them accessible for device DMA without TDISP
|
||||
support. On non-CoCo VM configurations, this heap is not registered.
|
||||
|
||||
- The ``default_cma_region`` heap allocates physically contiguous,
|
||||
cacheable, buffers. Only present if a CMA region is present. Such a
|
||||
region is usually created either through the kernel commandline
|
||||
|
||||
@@ -54,6 +54,7 @@ config ARM64
|
||||
select ARCH_HAS_STRICT_MODULE_RWX
|
||||
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
|
||||
select ARCH_HAS_SYNC_DMA_FOR_CPU
|
||||
select ARCH_HAS_BATCHED_DMA_SYNC
|
||||
select ARCH_HAS_SYSCALL_WRAPPER
|
||||
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
|
||||
select ARCH_HAS_ZONE_DMA_SET if EXPERT
|
||||
|
||||
@@ -371,14 +371,13 @@ alternative_endif
|
||||
* [start, end) with dcache line size explicitly provided.
|
||||
*
|
||||
* op: operation passed to dc instruction
|
||||
* domain: domain used in dsb instruction
|
||||
* start: starting virtual address of the region
|
||||
* end: end virtual address of the region
|
||||
* linesz: dcache line size
|
||||
* fixup: optional label to branch to on user fault
|
||||
* Corrupts: start, end, tmp
|
||||
*/
|
||||
.macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup
|
||||
.macro dcache_by_myline_op_nosync op, start, end, linesz, tmp, fixup
|
||||
sub \tmp, \linesz, #1
|
||||
bic \start, \start, \tmp
|
||||
alternative_if ARM64_WORKAROUND_4311569
|
||||
@@ -412,14 +411,28 @@ alternative_if ARM64_WORKAROUND_4311569
|
||||
cbnz \start, .Ldcache_op\@
|
||||
.endif
|
||||
alternative_else_nop_endif
|
||||
dsb \domain
|
||||
|
||||
_cond_uaccess_extable .Ldcache_op\@, \fixup
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Macro to perform a data cache maintenance for the interval
|
||||
* [start, end)
|
||||
* [start, end) without waiting for completion
|
||||
*
|
||||
* op: operation passed to dc instruction
|
||||
* start: starting virtual address of the region
|
||||
* end: end virtual address of the region
|
||||
* fixup: optional label to branch to on user fault
|
||||
* Corrupts: start, end, tmp1, tmp2
|
||||
*/
|
||||
.macro dcache_by_line_op_nosync op, start, end, tmp1, tmp2, fixup
|
||||
dcache_line_size \tmp1, \tmp2
|
||||
dcache_by_myline_op_nosync \op, \start, \end, \tmp1, \tmp2, \fixup
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Macro to perform a data cache maintenance for the interval
|
||||
* [start, end) and wait for completion
|
||||
*
|
||||
* op: operation passed to dc instruction
|
||||
* domain: domain used in dsb instruction
|
||||
@@ -429,8 +442,8 @@ alternative_else_nop_endif
|
||||
* Corrupts: start, end, tmp1, tmp2
|
||||
*/
|
||||
.macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
|
||||
dcache_line_size \tmp1, \tmp2
|
||||
dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup
|
||||
dcache_by_line_op_nosync \op, \start, \end, \tmp1, \tmp2, \fixup
|
||||
dsb \domain
|
||||
.endm
|
||||
|
||||
/*
|
||||
|
||||
@@ -87,6 +87,11 @@ int cache_line_size(void);
|
||||
|
||||
#define dma_get_cache_alignment cache_line_size
|
||||
|
||||
static inline void arch_sync_dma_flush(void)
|
||||
{
|
||||
dsb(sy);
|
||||
}
|
||||
|
||||
/* Compress a u64 MPIDR value into 32 bits. */
|
||||
static inline u64 arch_compact_of_hwid(u64 id)
|
||||
{
|
||||
|
||||
@@ -74,6 +74,8 @@ extern void icache_inval_pou(unsigned long start, unsigned long end);
|
||||
extern void dcache_clean_inval_poc(unsigned long start, unsigned long end);
|
||||
extern void dcache_inval_poc(unsigned long start, unsigned long end);
|
||||
extern void dcache_clean_poc(unsigned long start, unsigned long end);
|
||||
extern void dcache_inval_poc_nosync(unsigned long start, unsigned long end);
|
||||
extern void dcache_clean_poc_nosync(unsigned long start, unsigned long end);
|
||||
extern void dcache_clean_pop(unsigned long start, unsigned long end);
|
||||
extern void dcache_clean_pou(unsigned long start, unsigned long end);
|
||||
extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end);
|
||||
|
||||
@@ -64,7 +64,8 @@ SYM_CODE_START(arm64_relocate_new_kernel)
|
||||
mov x19, x13
|
||||
copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
|
||||
add x1, x19, #PAGE_SIZE
|
||||
dcache_by_myline_op civac, sy, x19, x1, x15, x20
|
||||
dcache_by_myline_op_nosync civac, x19, x1, x15, x20
|
||||
dsb sy
|
||||
b .Lnext
|
||||
.Ltest_indirection:
|
||||
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
|
||||
|
||||
@@ -132,17 +132,7 @@ alternative_else_nop_endif
|
||||
ret
|
||||
SYM_FUNC_END(dcache_clean_pou)
|
||||
|
||||
/*
|
||||
* dcache_inval_poc(start, end)
|
||||
*
|
||||
* Ensure that any D-cache lines for the interval [start, end)
|
||||
* are invalidated. Any partial lines at the ends of the interval are
|
||||
* also cleaned to PoC to prevent data loss.
|
||||
*
|
||||
* - start - kernel start address of region
|
||||
* - end - kernel end address of region
|
||||
*/
|
||||
SYM_FUNC_START(__pi_dcache_inval_poc)
|
||||
.macro __dcache_inval_poc_nosync
|
||||
dcache_line_size x2, x3
|
||||
sub x3, x2, #1
|
||||
tst x1, x3 // end cache line aligned?
|
||||
@@ -158,11 +148,41 @@ SYM_FUNC_START(__pi_dcache_inval_poc)
|
||||
3: add x0, x0, x2
|
||||
cmp x0, x1
|
||||
b.lo 2b
|
||||
.endm
|
||||
|
||||
/*
|
||||
* dcache_inval_poc(start, end)
|
||||
*
|
||||
* Ensure that any D-cache lines for the interval [start, end)
|
||||
* are invalidated. Any partial lines at the ends of the interval are
|
||||
* also cleaned to PoC to prevent data loss.
|
||||
*
|
||||
* - start - kernel start address of region
|
||||
* - end - kernel end address of region
|
||||
*/
|
||||
SYM_FUNC_START(__pi_dcache_inval_poc)
|
||||
__dcache_inval_poc_nosync
|
||||
dsb sy
|
||||
ret
|
||||
SYM_FUNC_END(__pi_dcache_inval_poc)
|
||||
SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc)
|
||||
|
||||
/*
|
||||
* dcache_inval_poc_nosync(start, end)
|
||||
*
|
||||
* Issue the instructions of D-cache lines for the interval [start, end)
|
||||
* for invalidation. Not necessarily cleaned to PoC till an explicit dsb
|
||||
* sy is issued later
|
||||
*
|
||||
* - start - kernel start address of region
|
||||
* - end - kernel end address of region
|
||||
*/
|
||||
SYM_FUNC_START(__pi_dcache_inval_poc_nosync)
|
||||
__dcache_inval_poc_nosync
|
||||
ret
|
||||
SYM_FUNC_END(__pi_dcache_inval_poc_nosync)
|
||||
SYM_FUNC_ALIAS(dcache_inval_poc_nosync, __pi_dcache_inval_poc_nosync)
|
||||
|
||||
/*
|
||||
* dcache_clean_poc(start, end)
|
||||
*
|
||||
@@ -178,6 +198,21 @@ SYM_FUNC_START(__pi_dcache_clean_poc)
|
||||
SYM_FUNC_END(__pi_dcache_clean_poc)
|
||||
SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc)
|
||||
|
||||
/*
|
||||
* dcache_clean_poc_nosync(start, end)
|
||||
*
|
||||
* Issue the instructions of D-cache lines for the interval [start, end).
|
||||
* not necessarily cleaned to the PoC till an explicit dsb sy afterward.
|
||||
*
|
||||
* - start - virtual start address of region
|
||||
* - end - virtual end address of region
|
||||
*/
|
||||
SYM_FUNC_START(__pi_dcache_clean_poc_nosync)
|
||||
dcache_by_line_op_nosync cvac, x0, x1, x2, x3
|
||||
ret
|
||||
SYM_FUNC_END(__pi_dcache_clean_poc_nosync)
|
||||
SYM_FUNC_ALIAS(dcache_clean_poc_nosync, __pi_dcache_clean_poc_nosync)
|
||||
|
||||
/*
|
||||
* dcache_clean_pop(start, end)
|
||||
*
|
||||
|
||||
@@ -17,7 +17,7 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
|
||||
{
|
||||
unsigned long start = (unsigned long)phys_to_virt(paddr);
|
||||
|
||||
dcache_clean_poc(start, start + size);
|
||||
dcache_clean_poc_nosync(start, start + size);
|
||||
}
|
||||
|
||||
void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
|
||||
@@ -28,7 +28,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
|
||||
if (dir == DMA_TO_DEVICE)
|
||||
return;
|
||||
|
||||
dcache_inval_poc(start, start + size);
|
||||
dcache_inval_poc_nosync(start, start + size);
|
||||
}
|
||||
|
||||
void arch_dma_prep_coherent(struct page *page, size_t size)
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
#include <linux/cma.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/dma-buf/heaps/cma.h>
|
||||
#include <linux/dma-heap.h>
|
||||
#include <linux/dma-map-ops.h>
|
||||
#include <linux/err.h>
|
||||
@@ -30,19 +29,6 @@
|
||||
|
||||
#define DEFAULT_CMA_NAME "default_cma_region"
|
||||
|
||||
static struct cma *dma_areas[MAX_CMA_AREAS] __initdata;
|
||||
static unsigned int dma_areas_num __initdata;
|
||||
|
||||
int __init dma_heap_cma_register_heap(struct cma *cma)
|
||||
{
|
||||
if (dma_areas_num >= ARRAY_SIZE(dma_areas))
|
||||
return -EINVAL;
|
||||
|
||||
dma_areas[dma_areas_num++] = cma;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct cma_heap {
|
||||
struct dma_heap *heap;
|
||||
struct cma *cma;
|
||||
@@ -411,6 +397,7 @@ static int __init __add_cma_heap(struct cma *cma, const char *name)
|
||||
static int __init add_cma_heaps(void)
|
||||
{
|
||||
struct cma *default_cma = dev_get_cma_area(NULL);
|
||||
struct cma *cma;
|
||||
unsigned int i;
|
||||
int ret;
|
||||
|
||||
@@ -420,9 +407,7 @@ static int __init add_cma_heaps(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (i = 0; i < dma_areas_num; i++) {
|
||||
struct cma *cma = dma_areas[i];
|
||||
|
||||
for (i = 0; (cma = dma_contiguous_get_area_by_idx(i)) != NULL; i++) {
|
||||
ret = __add_cma_heap(cma, cma_get_name(cma));
|
||||
if (ret) {
|
||||
pr_warn("Failed to add CMA heap %s", cma_get_name(cma));
|
||||
|
||||
@@ -10,17 +10,25 @@
|
||||
* Andrew F. Davis <afd@ti.com>
|
||||
*/
|
||||
|
||||
#include <linux/cc_platform.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/dma-heap.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/mem_encrypt.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/set_memory.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
struct system_heap_priv {
|
||||
bool cc_shared;
|
||||
};
|
||||
|
||||
struct system_heap_buffer {
|
||||
struct dma_heap *heap;
|
||||
struct list_head attachments;
|
||||
@@ -29,6 +37,7 @@ struct system_heap_buffer {
|
||||
struct sg_table sg_table;
|
||||
int vmap_cnt;
|
||||
void *vaddr;
|
||||
bool cc_shared;
|
||||
};
|
||||
|
||||
struct dma_heap_attachment {
|
||||
@@ -36,6 +45,7 @@ struct dma_heap_attachment {
|
||||
struct sg_table table;
|
||||
struct list_head list;
|
||||
bool mapped;
|
||||
bool cc_shared;
|
||||
};
|
||||
|
||||
#define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO)
|
||||
@@ -52,6 +62,34 @@ static gfp_t order_flags[] = {HIGH_ORDER_GFP, HIGH_ORDER_GFP, LOW_ORDER_GFP};
|
||||
static const unsigned int orders[] = {8, 4, 0};
|
||||
#define NUM_ORDERS ARRAY_SIZE(orders)
|
||||
|
||||
static int system_heap_set_page_decrypted(struct page *page)
|
||||
{
|
||||
unsigned long addr = (unsigned long)page_address(page);
|
||||
unsigned int nr_pages = 1 << compound_order(page);
|
||||
int ret;
|
||||
|
||||
ret = set_memory_decrypted(addr, nr_pages);
|
||||
if (ret)
|
||||
pr_warn_ratelimited("dma-buf system heap: failed to decrypt page at %p\n",
|
||||
page_address(page));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int system_heap_set_page_encrypted(struct page *page)
|
||||
{
|
||||
unsigned long addr = (unsigned long)page_address(page);
|
||||
unsigned int nr_pages = 1 << compound_order(page);
|
||||
int ret;
|
||||
|
||||
ret = set_memory_encrypted(addr, nr_pages);
|
||||
if (ret)
|
||||
pr_warn_ratelimited("dma-buf system heap: failed to re-encrypt page at %p, leaking memory\n",
|
||||
page_address(page));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dup_sg_table(struct sg_table *from, struct sg_table *to)
|
||||
{
|
||||
struct scatterlist *sg, *new_sg;
|
||||
@@ -90,6 +128,7 @@ static int system_heap_attach(struct dma_buf *dmabuf,
|
||||
a->dev = attachment->dev;
|
||||
INIT_LIST_HEAD(&a->list);
|
||||
a->mapped = false;
|
||||
a->cc_shared = buffer->cc_shared;
|
||||
|
||||
attachment->priv = a;
|
||||
|
||||
@@ -119,9 +158,11 @@ static struct sg_table *system_heap_map_dma_buf(struct dma_buf_attachment *attac
|
||||
{
|
||||
struct dma_heap_attachment *a = attachment->priv;
|
||||
struct sg_table *table = &a->table;
|
||||
unsigned long attrs;
|
||||
int ret;
|
||||
|
||||
ret = dma_map_sgtable(attachment->dev, table, direction, 0);
|
||||
attrs = a->cc_shared ? DMA_ATTR_CC_SHARED : 0;
|
||||
ret = dma_map_sgtable(attachment->dev, table, direction, attrs);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
@@ -188,8 +229,13 @@ static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
|
||||
unsigned long addr = vma->vm_start;
|
||||
unsigned long pgoff = vma->vm_pgoff;
|
||||
struct scatterlist *sg;
|
||||
pgprot_t prot;
|
||||
int i, ret;
|
||||
|
||||
prot = vma->vm_page_prot;
|
||||
if (buffer->cc_shared)
|
||||
prot = pgprot_decrypted(prot);
|
||||
|
||||
for_each_sgtable_sg(table, sg, i) {
|
||||
unsigned long n = sg->length >> PAGE_SHIFT;
|
||||
|
||||
@@ -206,8 +252,7 @@ static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
|
||||
if (addr + size > vma->vm_end)
|
||||
size = vma->vm_end - addr;
|
||||
|
||||
ret = remap_pfn_range(vma, addr, page_to_pfn(page),
|
||||
size, vma->vm_page_prot);
|
||||
ret = remap_pfn_range(vma, addr, page_to_pfn(page), size, prot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -225,6 +270,7 @@ static void *system_heap_do_vmap(struct system_heap_buffer *buffer)
|
||||
struct page **pages = vmalloc(sizeof(struct page *) * npages);
|
||||
struct page **tmp = pages;
|
||||
struct sg_page_iter piter;
|
||||
pgprot_t prot;
|
||||
void *vaddr;
|
||||
|
||||
if (!pages)
|
||||
@@ -235,7 +281,10 @@ static void *system_heap_do_vmap(struct system_heap_buffer *buffer)
|
||||
*tmp++ = sg_page_iter_page(&piter);
|
||||
}
|
||||
|
||||
vaddr = vmap(pages, npages, VM_MAP, PAGE_KERNEL);
|
||||
prot = PAGE_KERNEL;
|
||||
if (buffer->cc_shared)
|
||||
prot = pgprot_decrypted(prot);
|
||||
vaddr = vmap(pages, npages, VM_MAP, prot);
|
||||
vfree(pages);
|
||||
|
||||
if (!vaddr)
|
||||
@@ -296,6 +345,14 @@ static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
|
||||
for_each_sgtable_sg(table, sg, i) {
|
||||
struct page *page = sg_page(sg);
|
||||
|
||||
/*
|
||||
* Intentionally leak pages that cannot be re-encrypted
|
||||
* to prevent shared memory from being reused.
|
||||
*/
|
||||
if (buffer->cc_shared &&
|
||||
system_heap_set_page_encrypted(page))
|
||||
continue;
|
||||
|
||||
__free_pages(page, compound_order(page));
|
||||
}
|
||||
sg_free_table(table);
|
||||
@@ -347,6 +404,8 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
|
||||
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
|
||||
unsigned long size_remaining = len;
|
||||
unsigned int max_order = orders[0];
|
||||
struct system_heap_priv *priv = dma_heap_get_drvdata(heap);
|
||||
bool cc_shared = priv->cc_shared;
|
||||
struct dma_buf *dmabuf;
|
||||
struct sg_table *table;
|
||||
struct scatterlist *sg;
|
||||
@@ -362,6 +421,7 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
|
||||
mutex_init(&buffer->lock);
|
||||
buffer->heap = heap;
|
||||
buffer->len = len;
|
||||
buffer->cc_shared = cc_shared;
|
||||
|
||||
INIT_LIST_HEAD(&pages);
|
||||
i = 0;
|
||||
@@ -396,6 +456,14 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
|
||||
list_del(&page->lru);
|
||||
}
|
||||
|
||||
if (cc_shared) {
|
||||
for_each_sgtable_sg(table, sg, i) {
|
||||
ret = system_heap_set_page_decrypted(sg_page(sg));
|
||||
if (ret)
|
||||
goto free_pages;
|
||||
}
|
||||
}
|
||||
|
||||
/* create the dmabuf */
|
||||
exp_info.exp_name = dma_heap_get_name(heap);
|
||||
exp_info.ops = &system_heap_buf_ops;
|
||||
@@ -413,6 +481,13 @@ free_pages:
|
||||
for_each_sgtable_sg(table, sg, i) {
|
||||
struct page *p = sg_page(sg);
|
||||
|
||||
/*
|
||||
* Intentionally leak pages that cannot be re-encrypted
|
||||
* to prevent shared memory from being reused.
|
||||
*/
|
||||
if (buffer->cc_shared &&
|
||||
system_heap_set_page_encrypted(p))
|
||||
continue;
|
||||
__free_pages(p, compound_order(p));
|
||||
}
|
||||
sg_free_table(table);
|
||||
@@ -428,6 +503,14 @@ static const struct dma_heap_ops system_heap_ops = {
|
||||
.allocate = system_heap_allocate,
|
||||
};
|
||||
|
||||
static struct system_heap_priv system_heap_priv = {
|
||||
.cc_shared = false,
|
||||
};
|
||||
|
||||
static struct system_heap_priv system_heap_cc_shared_priv = {
|
||||
.cc_shared = true,
|
||||
};
|
||||
|
||||
static int __init system_heap_create(void)
|
||||
{
|
||||
struct dma_heap_export_info exp_info;
|
||||
@@ -435,8 +518,18 @@ static int __init system_heap_create(void)
|
||||
|
||||
exp_info.name = "system";
|
||||
exp_info.ops = &system_heap_ops;
|
||||
exp_info.priv = NULL;
|
||||
exp_info.priv = &system_heap_priv;
|
||||
|
||||
sys_heap = dma_heap_add(&exp_info);
|
||||
if (IS_ERR(sys_heap))
|
||||
return PTR_ERR(sys_heap);
|
||||
|
||||
if (IS_ENABLED(CONFIG_HIGHMEM) ||
|
||||
!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
|
||||
return 0;
|
||||
|
||||
exp_info.name = "system_cc_shared";
|
||||
exp_info.priv = &system_heap_cc_shared_priv;
|
||||
sys_heap = dma_heap_add(&exp_info);
|
||||
if (IS_ERR(sys_heap))
|
||||
return PTR_ERR(sys_heap);
|
||||
|
||||
@@ -1106,8 +1106,10 @@ void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
|
||||
return;
|
||||
|
||||
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
if (!dev_is_dma_coherent(dev)) {
|
||||
arch_sync_dma_for_cpu(phys, size, dir);
|
||||
arch_sync_dma_flush();
|
||||
}
|
||||
|
||||
swiotlb_sync_single_for_cpu(dev, phys, size, dir);
|
||||
}
|
||||
@@ -1123,8 +1125,10 @@ void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
|
||||
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
|
||||
swiotlb_sync_single_for_device(dev, phys, size, dir);
|
||||
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
if (!dev_is_dma_coherent(dev)) {
|
||||
arch_sync_dma_for_device(phys, size, dir);
|
||||
arch_sync_dma_flush();
|
||||
}
|
||||
}
|
||||
|
||||
void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
|
||||
@@ -1133,13 +1137,15 @@ void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
|
||||
if (sg_dma_is_swiotlb(sgl))
|
||||
if (sg_dma_is_swiotlb(sgl)) {
|
||||
for_each_sg(sgl, sg, nelems, i)
|
||||
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
|
||||
sg->length, dir);
|
||||
else if (!dev_is_dma_coherent(dev))
|
||||
} else if (!dev_is_dma_coherent(dev)) {
|
||||
for_each_sg(sgl, sg, nelems, i)
|
||||
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
|
||||
arch_sync_dma_flush();
|
||||
}
|
||||
}
|
||||
|
||||
void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
|
||||
@@ -1148,14 +1154,16 @@ void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
|
||||
if (sg_dma_is_swiotlb(sgl))
|
||||
if (sg_dma_is_swiotlb(sgl)) {
|
||||
for_each_sg(sgl, sg, nelems, i)
|
||||
iommu_dma_sync_single_for_device(dev,
|
||||
sg_dma_address(sg),
|
||||
sg->length, dir);
|
||||
else if (!dev_is_dma_coherent(dev))
|
||||
} else if (!dev_is_dma_coherent(dev)) {
|
||||
for_each_sg(sgl, sg, nelems, i)
|
||||
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
|
||||
arch_sync_dma_flush();
|
||||
}
|
||||
}
|
||||
|
||||
static phys_addr_t iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
|
||||
@@ -1230,8 +1238,10 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
|
||||
return DMA_MAPPING_ERROR;
|
||||
}
|
||||
|
||||
if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
|
||||
if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
|
||||
arch_sync_dma_for_device(phys, size, dir);
|
||||
arch_sync_dma_flush();
|
||||
}
|
||||
|
||||
iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
|
||||
if (iova == DMA_MAPPING_ERROR &&
|
||||
@@ -1254,8 +1264,10 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle,
|
||||
if (WARN_ON(!phys))
|
||||
return;
|
||||
|
||||
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
|
||||
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) {
|
||||
arch_sync_dma_for_cpu(phys, size, dir);
|
||||
arch_sync_dma_flush();
|
||||
}
|
||||
|
||||
__iommu_dma_unmap(dev, dma_handle, size);
|
||||
|
||||
@@ -2004,6 +2016,8 @@ int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
|
||||
dma_addr_t addr = state->addr + offset;
|
||||
size_t iova_start_pad = iova_offset(iovad, addr);
|
||||
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_flush();
|
||||
return iommu_sync_map(domain, addr - iova_start_pad,
|
||||
iova_align(iovad, size + iova_start_pad));
|
||||
}
|
||||
@@ -2017,6 +2031,8 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev,
|
||||
struct iommu_dma_cookie *cookie = domain->iova_cookie;
|
||||
struct iova_domain *iovad = &cookie->iovad;
|
||||
size_t iova_start_pad = iova_offset(iovad, addr);
|
||||
bool need_sync_dma = !dev_is_dma_coherent(dev) &&
|
||||
!(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO));
|
||||
dma_addr_t end = addr + size;
|
||||
|
||||
do {
|
||||
@@ -2040,6 +2056,9 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev,
|
||||
addr += len;
|
||||
iova_start_pad = 0;
|
||||
} while (addr < end);
|
||||
|
||||
if (need_sync_dma)
|
||||
arch_sync_dma_flush();
|
||||
}
|
||||
|
||||
static void __iommu_dma_iova_unlink(struct device *dev,
|
||||
|
||||
@@ -70,19 +70,20 @@ static void tegra210_emc_table_device_release(struct reserved_mem *rmem,
|
||||
memunmap(timings);
|
||||
}
|
||||
|
||||
static const struct reserved_mem_ops tegra210_emc_table_ops = {
|
||||
.device_init = tegra210_emc_table_device_init,
|
||||
.device_release = tegra210_emc_table_device_release,
|
||||
};
|
||||
|
||||
static int tegra210_emc_table_init(struct reserved_mem *rmem)
|
||||
static int tegra210_emc_table_init(unsigned long node,
|
||||
struct reserved_mem *rmem)
|
||||
{
|
||||
pr_debug("Tegra210 EMC table at %pa, size %lu bytes\n", &rmem->base,
|
||||
(unsigned long)rmem->size);
|
||||
|
||||
rmem->ops = &tegra210_emc_table_ops;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct reserved_mem_ops tegra210_emc_table_ops = {
|
||||
.node_init = tegra210_emc_table_init,
|
||||
.device_init = tegra210_emc_table_device_init,
|
||||
.device_release = tegra210_emc_table_device_release,
|
||||
};
|
||||
|
||||
RESERVEDMEM_OF_DECLARE(tegra210_emc_table, "nvidia,tegra210-emc-table",
|
||||
tegra210_emc_table_init);
|
||||
&tegra210_emc_table_ops);
|
||||
|
||||
@@ -1295,7 +1295,7 @@ void __init unflatten_device_tree(void)
|
||||
void *fdt = initial_boot_params;
|
||||
|
||||
/* Save the statically-placed regions in the reserved_mem array */
|
||||
fdt_scan_reserved_mem_reg_nodes();
|
||||
fdt_scan_reserved_mem_late();
|
||||
|
||||
/* Populate an empty root node when bootloader doesn't provide one */
|
||||
if (!fdt) {
|
||||
|
||||
@@ -186,7 +186,7 @@ static inline struct device_node *__of_get_dma_parent(const struct device_node *
|
||||
#endif
|
||||
|
||||
int fdt_scan_reserved_mem(void);
|
||||
void __init fdt_scan_reserved_mem_reg_nodes(void);
|
||||
void __init fdt_scan_reserved_mem_late(void);
|
||||
|
||||
bool of_fdt_device_is_available(const void *blob, unsigned long node);
|
||||
|
||||
|
||||
@@ -24,8 +24,6 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/cma.h>
|
||||
#include <linux/dma-map-ops.h>
|
||||
|
||||
#include "of_private.h"
|
||||
|
||||
@@ -104,30 +102,12 @@ static void __init alloc_reserved_mem_array(void)
|
||||
reserved_mem = new_array;
|
||||
}
|
||||
|
||||
static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem);
|
||||
/*
|
||||
* fdt_reserved_mem_save_node() - save fdt node for second pass initialization
|
||||
*/
|
||||
static void __init fdt_reserved_mem_save_node(unsigned long node, const char *uname,
|
||||
phys_addr_t base, phys_addr_t size)
|
||||
{
|
||||
struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
|
||||
|
||||
if (reserved_mem_count == total_reserved_mem_cnt) {
|
||||
pr_err("not enough space for all defined regions.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
rmem->fdt_node = node;
|
||||
rmem->name = uname;
|
||||
rmem->base = base;
|
||||
rmem->size = size;
|
||||
|
||||
/* Call the region specific initialization function */
|
||||
fdt_init_reserved_mem_node(rmem);
|
||||
|
||||
reserved_mem_count++;
|
||||
}
|
||||
static void fdt_init_reserved_mem_node(unsigned long node, const char *uname,
|
||||
phys_addr_t base, phys_addr_t size);
|
||||
static int fdt_validate_reserved_mem_node(unsigned long node,
|
||||
phys_addr_t *align);
|
||||
static int fdt_fixup_reserved_mem_node(unsigned long node,
|
||||
phys_addr_t base, phys_addr_t size);
|
||||
|
||||
static int __init early_init_dt_reserve_memory(phys_addr_t base,
|
||||
phys_addr_t size, bool nomap)
|
||||
@@ -154,21 +134,19 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
|
||||
const char *uname)
|
||||
{
|
||||
phys_addr_t base, size;
|
||||
int i, len;
|
||||
int i, len, err;
|
||||
const __be32 *prop;
|
||||
bool nomap, default_cma;
|
||||
bool nomap;
|
||||
|
||||
prop = of_flat_dt_get_addr_size_prop(node, "reg", &len);
|
||||
if (!prop)
|
||||
return -ENOENT;
|
||||
|
||||
nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
|
||||
default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
|
||||
|
||||
if (default_cma && cma_skip_dt_default_reserved_mem()) {
|
||||
pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
err = fdt_validate_reserved_mem_node(node, NULL);
|
||||
if (err && err != -ENODEV)
|
||||
return err;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
u64 b, s;
|
||||
@@ -179,10 +157,7 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
|
||||
size = s;
|
||||
|
||||
if (size && early_init_dt_reserve_memory(base, size, nomap) == 0) {
|
||||
/* Architecture specific contiguous memory fixup. */
|
||||
if (of_flat_dt_is_compatible(node, "shared-dma-pool") &&
|
||||
of_get_flat_dt_prop(node, "reusable", NULL))
|
||||
dma_contiguous_early_fixup(base, size);
|
||||
fdt_fixup_reserved_mem_node(node, base, size);
|
||||
pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
|
||||
uname, &base, (unsigned long)(size / SZ_1M));
|
||||
} else {
|
||||
@@ -216,19 +191,66 @@ static int __init __reserved_mem_check_root(unsigned long node)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __init __rmem_check_for_overlap(void);
|
||||
static int __init __rmem_cmp(const void *a, const void *b)
|
||||
{
|
||||
const struct reserved_mem *ra = a, *rb = b;
|
||||
|
||||
if (ra->base < rb->base)
|
||||
return -1;
|
||||
|
||||
if (ra->base > rb->base)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Put the dynamic allocations (address == 0, size == 0) before static
|
||||
* allocations at address 0x0 so that overlap detection works
|
||||
* correctly.
|
||||
*/
|
||||
if (ra->size < rb->size)
|
||||
return -1;
|
||||
if (ra->size > rb->size)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __init __rmem_check_for_overlap(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (reserved_mem_count < 2)
|
||||
return;
|
||||
|
||||
sort(reserved_mem, reserved_mem_count, sizeof(reserved_mem[0]),
|
||||
__rmem_cmp, NULL);
|
||||
for (i = 0; i < reserved_mem_count - 1; i++) {
|
||||
struct reserved_mem *this, *next;
|
||||
|
||||
this = &reserved_mem[i];
|
||||
next = &reserved_mem[i + 1];
|
||||
|
||||
if (this->base + this->size > next->base) {
|
||||
phys_addr_t this_end, next_end;
|
||||
|
||||
this_end = this->base + this->size;
|
||||
next_end = next->base + next->size;
|
||||
pr_err("OVERLAP DETECTED!\n%s (%pa--%pa) overlaps with %s (%pa--%pa)\n",
|
||||
this->name, &this->base, &this_end,
|
||||
next->name, &next->base, &next_end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* fdt_scan_reserved_mem_reg_nodes() - Store info for the "reg" defined
|
||||
* reserved memory regions.
|
||||
* fdt_scan_reserved_mem_late() - Scan FDT and initialize remaining reserved
|
||||
* memory regions.
|
||||
*
|
||||
* This function is used to scan through the DT and store the
|
||||
* information for the reserved memory regions that are defined using
|
||||
* the "reg" property. The region node number, name, base address, and
|
||||
* size are all stored in the reserved_mem array by calling the
|
||||
* fdt_reserved_mem_save_node() function.
|
||||
* This function is used to scan again through the DT and initialize the
|
||||
* "static" reserved memory regions, that are defined using the "reg"
|
||||
* property. Each such region is then initialized with its specific init
|
||||
* function and stored in the global reserved_mem array.
|
||||
*/
|
||||
void __init fdt_scan_reserved_mem_reg_nodes(void)
|
||||
void __init fdt_scan_reserved_mem_late(void)
|
||||
{
|
||||
const void *fdt = initial_boot_params;
|
||||
phys_addr_t base, size;
|
||||
@@ -253,23 +275,25 @@ void __init fdt_scan_reserved_mem_reg_nodes(void)
|
||||
|
||||
fdt_for_each_subnode(child, fdt, node) {
|
||||
const char *uname;
|
||||
bool default_cma = of_get_flat_dt_prop(child, "linux,cma-default", NULL);
|
||||
u64 b, s;
|
||||
int ret;
|
||||
|
||||
if (!of_fdt_device_is_available(fdt, child))
|
||||
continue;
|
||||
if (default_cma && cma_skip_dt_default_reserved_mem())
|
||||
continue;
|
||||
|
||||
if (!of_flat_dt_get_addr_size(child, "reg", &b, &s))
|
||||
continue;
|
||||
|
||||
ret = fdt_validate_reserved_mem_node(child, NULL);
|
||||
if (ret && ret != -ENODEV)
|
||||
continue;
|
||||
|
||||
base = b;
|
||||
size = s;
|
||||
|
||||
if (size) {
|
||||
uname = fdt_get_name(fdt, child, NULL);
|
||||
fdt_reserved_mem_save_node(child, uname, base, size);
|
||||
fdt_init_reserved_mem_node(child, uname, base, size);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -280,7 +304,14 @@ void __init fdt_scan_reserved_mem_reg_nodes(void)
|
||||
static int __init __reserved_mem_alloc_size(unsigned long node, const char *uname);
|
||||
|
||||
/*
|
||||
* fdt_scan_reserved_mem() - scan a single FDT node for reserved memory
|
||||
* fdt_scan_reserved_mem() - reserve and allocate memory occupied by
|
||||
* reserved memory regions.
|
||||
*
|
||||
* This function is used to scan through the FDT and mark memory occupied
|
||||
* by all static (defined by the "reg" property) reserved memory regions.
|
||||
* Then memory for all dynamic regions (defined by size & alignment) is
|
||||
* allocated, a region specific init function is called and region information
|
||||
* is stored in the reserved_mem array.
|
||||
*/
|
||||
int __init fdt_scan_reserved_mem(void)
|
||||
{
|
||||
@@ -397,7 +428,7 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam
|
||||
phys_addr_t base = 0, align = 0, size;
|
||||
int i, len;
|
||||
const __be32 *prop;
|
||||
bool nomap, default_cma;
|
||||
bool nomap;
|
||||
int ret;
|
||||
|
||||
prop = of_get_flat_dt_prop(node, "size", &len);
|
||||
@@ -421,19 +452,10 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam
|
||||
}
|
||||
|
||||
nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
|
||||
default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
|
||||
|
||||
if (default_cma && cma_skip_dt_default_reserved_mem()) {
|
||||
pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Need adjust the alignment to satisfy the CMA requirement */
|
||||
if (IS_ENABLED(CONFIG_CMA)
|
||||
&& of_flat_dt_is_compatible(node, "shared-dma-pool")
|
||||
&& of_get_flat_dt_prop(node, "reusable", NULL)
|
||||
&& !nomap)
|
||||
align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES);
|
||||
ret = fdt_validate_reserved_mem_node(node, &align);
|
||||
if (ret && ret != -ENODEV)
|
||||
return ret;
|
||||
|
||||
prop = of_flat_dt_get_addr_size_prop(node, "alloc-ranges", &len);
|
||||
if (prop) {
|
||||
@@ -468,121 +490,151 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam
|
||||
uname, (unsigned long)(size / SZ_1M));
|
||||
return -ENOMEM;
|
||||
}
|
||||
/* Architecture specific contiguous memory fixup. */
|
||||
if (of_flat_dt_is_compatible(node, "shared-dma-pool") &&
|
||||
of_get_flat_dt_prop(node, "reusable", NULL))
|
||||
dma_contiguous_early_fixup(base, size);
|
||||
/* Save region in the reserved_mem array */
|
||||
fdt_reserved_mem_save_node(node, uname, base, size);
|
||||
|
||||
fdt_fixup_reserved_mem_node(node, base, size);
|
||||
fdt_init_reserved_mem_node(node, uname, base, size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern const struct of_device_id __reservedmem_of_table[];
|
||||
static const struct of_device_id __rmem_of_table_sentinel
|
||||
__used __section("__reservedmem_of_table_end");
|
||||
|
||||
/*
|
||||
* __reserved_mem_init_node() - call region specific reserved memory init code
|
||||
/**
|
||||
* fdt_fixup_reserved_mem_node() - call fixup function for a reserved memory node
|
||||
* @node: FDT node to fixup
|
||||
* @base: base address of the reserved memory region
|
||||
* @size: size of the reserved memory region
|
||||
*
|
||||
* This function iterates through the reserved memory drivers and calls
|
||||
* the node_fixup callback for the compatible entry matching the node.
|
||||
*
|
||||
* Return: 0 on success, -ENODEV if no compatible match found
|
||||
*/
|
||||
static int __init __reserved_mem_init_node(struct reserved_mem *rmem)
|
||||
static int __init fdt_fixup_reserved_mem_node(unsigned long node,
|
||||
phys_addr_t base, phys_addr_t size)
|
||||
{
|
||||
extern const struct of_device_id __reservedmem_of_table[];
|
||||
const struct of_device_id *i;
|
||||
int ret = -ENOENT;
|
||||
int ret = -ENODEV;
|
||||
|
||||
for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) {
|
||||
reservedmem_of_init_fn initfn = i->data;
|
||||
const char *compat = i->compatible;
|
||||
for (i = __reservedmem_of_table; ret == -ENODEV &&
|
||||
i < &__rmem_of_table_sentinel; i++) {
|
||||
const struct reserved_mem_ops *ops = i->data;
|
||||
|
||||
if (!of_flat_dt_is_compatible(rmem->fdt_node, compat))
|
||||
if (!of_flat_dt_is_compatible(node, i->compatible))
|
||||
continue;
|
||||
|
||||
ret = initfn(rmem);
|
||||
if (ops->node_fixup)
|
||||
ret = ops->node_fixup(node, base, size);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* fdt_validate_reserved_mem_node() - validate a reserved memory node
|
||||
* @node: FDT node to validate
|
||||
* @align: pointer to store the validated alignment (may be modified by callback)
|
||||
*
|
||||
* This function iterates through the reserved memory drivers and calls
|
||||
* the node_validate callback for the compatible entry matching the node.
|
||||
*
|
||||
* Return: 0 on success, -ENODEV if no compatible match found
|
||||
*/
|
||||
static int __init fdt_validate_reserved_mem_node(unsigned long node, phys_addr_t *align)
|
||||
{
|
||||
const struct of_device_id *i;
|
||||
int ret = -ENODEV;
|
||||
|
||||
for (i = __reservedmem_of_table; ret == -ENODEV &&
|
||||
i < &__rmem_of_table_sentinel; i++) {
|
||||
const struct reserved_mem_ops *ops = i->data;
|
||||
|
||||
if (!of_flat_dt_is_compatible(node, i->compatible))
|
||||
continue;
|
||||
|
||||
if (ops->node_validate)
|
||||
ret = ops->node_validate(node, align);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* __reserved_mem_init_node() - initialize a reserved memory region
|
||||
* @rmem: reserved_mem structure to initialize
|
||||
* @node: FDT node describing the reserved memory region
|
||||
*
|
||||
* This function iterates through the reserved memory drivers and calls the
|
||||
* node_init callback for the compatible entry matching the node. On success,
|
||||
* the operations pointer is stored in the reserved_mem structure.
|
||||
*
|
||||
* Return: 0 on success, -ENODEV if no compatible match found
|
||||
*/
|
||||
static int __init __reserved_mem_init_node(struct reserved_mem *rmem,
|
||||
unsigned long node)
|
||||
{
|
||||
const struct of_device_id *i;
|
||||
int ret = -ENODEV;
|
||||
|
||||
for (i = __reservedmem_of_table; ret == -ENODEV &&
|
||||
i < &__rmem_of_table_sentinel; i++) {
|
||||
const struct reserved_mem_ops *ops = i->data;
|
||||
const char *compat = i->compatible;
|
||||
|
||||
if (!of_flat_dt_is_compatible(node, compat))
|
||||
continue;
|
||||
|
||||
ret = ops->node_init(node, rmem);
|
||||
if (ret == 0) {
|
||||
rmem->ops = ops;
|
||||
pr_info("initialized node %s, compatible id %s\n",
|
||||
rmem->name, compat);
|
||||
break;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __init __rmem_cmp(const void *a, const void *b)
|
||||
{
|
||||
const struct reserved_mem *ra = a, *rb = b;
|
||||
|
||||
if (ra->base < rb->base)
|
||||
return -1;
|
||||
|
||||
if (ra->base > rb->base)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Put the dynamic allocations (address == 0, size == 0) before static
|
||||
* allocations at address 0x0 so that overlap detection works
|
||||
* correctly.
|
||||
*/
|
||||
if (ra->size < rb->size)
|
||||
return -1;
|
||||
if (ra->size > rb->size)
|
||||
return 1;
|
||||
|
||||
if (ra->fdt_node < rb->fdt_node)
|
||||
return -1;
|
||||
if (ra->fdt_node > rb->fdt_node)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __init __rmem_check_for_overlap(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (reserved_mem_count < 2)
|
||||
return;
|
||||
|
||||
sort(reserved_mem, reserved_mem_count, sizeof(reserved_mem[0]),
|
||||
__rmem_cmp, NULL);
|
||||
for (i = 0; i < reserved_mem_count - 1; i++) {
|
||||
struct reserved_mem *this, *next;
|
||||
|
||||
this = &reserved_mem[i];
|
||||
next = &reserved_mem[i + 1];
|
||||
|
||||
if (this->base + this->size > next->base) {
|
||||
phys_addr_t this_end, next_end;
|
||||
|
||||
this_end = this->base + this->size;
|
||||
next_end = next->base + next->size;
|
||||
pr_err("OVERLAP DETECTED!\n%s (%pa--%pa) overlaps with %s (%pa--%pa)\n",
|
||||
this->name, &this->base, &this_end,
|
||||
next->name, &next->base, &next_end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* fdt_init_reserved_mem_node() - Initialize a reserved memory region
|
||||
* @rmem: reserved_mem struct of the memory region to be initialized.
|
||||
* @node: fdt node of the initialized region
|
||||
* @uname: name of the reserved memory node
|
||||
* @base: base address of the reserved memory region
|
||||
* @size: size of the reserved memory region
|
||||
*
|
||||
* This function is used to call the region specific initialization
|
||||
* function for a reserved memory region.
|
||||
* This function calls the region-specific initialization function for a
|
||||
* reserved memory region and saves all region-specific data to the
|
||||
* reserved_mem array to allow of_reserved_mem_lookup() to find it.
|
||||
*/
|
||||
static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem)
|
||||
static void __init fdt_init_reserved_mem_node(unsigned long node, const char *uname,
|
||||
phys_addr_t base, phys_addr_t size)
|
||||
{
|
||||
unsigned long node = rmem->fdt_node;
|
||||
int err = 0;
|
||||
bool nomap;
|
||||
|
||||
struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
|
||||
|
||||
if (reserved_mem_count == total_reserved_mem_cnt) {
|
||||
pr_err("not enough space for all defined regions.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
rmem->name = uname;
|
||||
rmem->base = base;
|
||||
rmem->size = size;
|
||||
|
||||
nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
|
||||
|
||||
err = __reserved_mem_init_node(rmem);
|
||||
if (err != 0 && err != -ENOENT) {
|
||||
err = __reserved_mem_init_node(rmem, node);
|
||||
if (err != 0 && err != -ENODEV) {
|
||||
pr_info("node %s compatible matching fail\n", rmem->name);
|
||||
rmem->name = NULL;
|
||||
|
||||
if (nomap)
|
||||
memblock_clear_nomap(rmem->base, rmem->size);
|
||||
else
|
||||
memblock_phys_free(rmem->base, rmem->size);
|
||||
return;
|
||||
} else {
|
||||
phys_addr_t end = rmem->base + rmem->size - 1;
|
||||
bool reusable =
|
||||
@@ -594,6 +646,8 @@ static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem)
|
||||
reusable ? "reusable" : "non-reusable",
|
||||
rmem->name ? rmem->name : "unknown");
|
||||
}
|
||||
|
||||
reserved_mem_count++;
|
||||
}
|
||||
|
||||
struct rmem_assigned_device {
|
||||
|
||||
@@ -262,10 +262,12 @@ static dma_addr_t xen_swiotlb_map_phys(struct device *dev, phys_addr_t phys,
|
||||
|
||||
done:
|
||||
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
|
||||
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr))))
|
||||
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) {
|
||||
arch_sync_dma_for_device(phys, size, dir);
|
||||
else
|
||||
arch_sync_dma_flush();
|
||||
} else {
|
||||
xen_dma_sync_for_device(dev, dev_addr, size, dir);
|
||||
}
|
||||
}
|
||||
return dev_addr;
|
||||
}
|
||||
@@ -287,10 +289,12 @@ static void xen_swiotlb_unmap_phys(struct device *hwdev, dma_addr_t dev_addr,
|
||||
BUG_ON(dir == DMA_NONE);
|
||||
|
||||
if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
|
||||
if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr))))
|
||||
if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) {
|
||||
arch_sync_dma_for_cpu(paddr, size, dir);
|
||||
else
|
||||
arch_sync_dma_flush();
|
||||
} else {
|
||||
xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir);
|
||||
}
|
||||
}
|
||||
|
||||
/* NOTE: We use dev_addr here, not paddr! */
|
||||
@@ -308,10 +312,12 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
|
||||
struct io_tlb_pool *pool;
|
||||
|
||||
if (!dev_is_dma_coherent(dev)) {
|
||||
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
|
||||
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) {
|
||||
arch_sync_dma_for_cpu(paddr, size, dir);
|
||||
else
|
||||
arch_sync_dma_flush();
|
||||
} else {
|
||||
xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
|
||||
}
|
||||
}
|
||||
|
||||
pool = xen_swiotlb_find_pool(dev, dma_addr);
|
||||
@@ -331,10 +337,12 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
|
||||
__swiotlb_sync_single_for_device(dev, paddr, size, dir, pool);
|
||||
|
||||
if (!dev_is_dma_coherent(dev)) {
|
||||
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
|
||||
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) {
|
||||
arch_sync_dma_for_device(paddr, size, dir);
|
||||
else
|
||||
arch_sync_dma_flush();
|
||||
} else {
|
||||
xen_dma_sync_for_device(dev, dma_addr, size, dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -61,14 +61,4 @@ extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data)
|
||||
extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end);
|
||||
|
||||
extern void cma_reserve_pages_on_error(struct cma *cma);
|
||||
|
||||
#ifdef CONFIG_DMA_CMA
|
||||
extern bool cma_skip_dt_default_reserved_mem(void);
|
||||
#else
|
||||
static inline bool cma_skip_dt_default_reserved_mem(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef DMA_BUF_HEAP_CMA_H_
|
||||
#define DMA_BUF_HEAP_CMA_H_
|
||||
|
||||
struct cma;
|
||||
|
||||
#ifdef CONFIG_DMABUF_HEAPS_CMA
|
||||
int dma_heap_cma_register_heap(struct cma *cma);
|
||||
#else
|
||||
static inline int dma_heap_cma_register_heap(struct cma *cma)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif // CONFIG_DMABUF_HEAPS_CMA
|
||||
|
||||
#endif // DMA_BUF_HEAP_CMA_H_
|
||||
@@ -91,14 +91,8 @@ static inline void set_dma_ops(struct device *dev,
|
||||
#endif /* CONFIG_ARCH_HAS_DMA_OPS */
|
||||
|
||||
#ifdef CONFIG_DMA_CMA
|
||||
extern struct cma *dma_contiguous_default_area;
|
||||
|
||||
static inline struct cma *dev_get_cma_area(struct device *dev)
|
||||
{
|
||||
if (dev && dev->cma_area)
|
||||
return dev->cma_area;
|
||||
return dma_contiguous_default_area;
|
||||
}
|
||||
struct cma *dev_get_cma_area(struct device *dev);
|
||||
struct cma *dma_contiguous_get_area_by_idx(unsigned int idx);
|
||||
|
||||
void dma_contiguous_reserve(phys_addr_t addr_limit);
|
||||
int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
|
||||
@@ -117,6 +111,10 @@ static inline struct cma *dev_get_cma_area(struct device *dev)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline struct cma *dma_contiguous_get_area_by_idx(unsigned int idx)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline void dma_contiguous_reserve(phys_addr_t limit)
|
||||
{
|
||||
}
|
||||
@@ -147,9 +145,6 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page,
|
||||
{
|
||||
__free_pages(page, get_order(size));
|
||||
}
|
||||
static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_DMA_CMA*/
|
||||
|
||||
#ifdef CONFIG_DMA_DECLARE_COHERENT
|
||||
@@ -361,6 +356,12 @@ static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
|
||||
}
|
||||
#endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */
|
||||
|
||||
#ifndef CONFIG_ARCH_HAS_BATCHED_DMA_SYNC
|
||||
static inline void arch_sync_dma_flush(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
|
||||
void arch_sync_dma_for_cpu_all(void);
|
||||
#else
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#include <linux/bug.h>
|
||||
#include <linux/cache.h>
|
||||
|
||||
/**
|
||||
/*
|
||||
* List of possible attributes associated with a DMA mapping. The semantics
|
||||
* of each attribute should be defined in Documentation/core-api/dma-attributes.rst.
|
||||
*/
|
||||
@@ -92,6 +92,16 @@
|
||||
* flushing.
|
||||
*/
|
||||
#define DMA_ATTR_REQUIRE_COHERENT (1UL << 12)
|
||||
/*
|
||||
* DMA_ATTR_CC_SHARED: Indicates the DMA mapping is shared (decrypted) for
|
||||
* confidential computing guests. For normal system memory the caller must have
|
||||
* called set_memory_decrypted(), and pgprot_decrypted must be used when
|
||||
* creating CPU PTEs for the mapping. The same shared semantic may be passed
|
||||
* to the vIOMMU when it sets up the IOPTE. For MMIO use together with
|
||||
* DMA_ATTR_MMIO to indicate shared MMIO. Unless DMA_ATTR_MMIO is provided
|
||||
* a struct page is required.
|
||||
*/
|
||||
#define DMA_ATTR_CC_SHARED (1UL << 13)
|
||||
|
||||
/*
|
||||
* A dma_addr_t can hold any valid DMA or bus address for the platform. It can
|
||||
|
||||
@@ -11,7 +11,6 @@ struct resource;
|
||||
|
||||
struct reserved_mem {
|
||||
const char *name;
|
||||
unsigned long fdt_node;
|
||||
const struct reserved_mem_ops *ops;
|
||||
phys_addr_t base;
|
||||
phys_addr_t size;
|
||||
@@ -19,18 +18,20 @@ struct reserved_mem {
|
||||
};
|
||||
|
||||
struct reserved_mem_ops {
|
||||
int (*node_validate)(unsigned long fdt_node, phys_addr_t *align);
|
||||
int (*node_fixup)(unsigned long fdt_node, phys_addr_t base,
|
||||
phys_addr_t size);
|
||||
int (*node_init)(unsigned long fdt_node, struct reserved_mem *rmem);
|
||||
int (*device_init)(struct reserved_mem *rmem,
|
||||
struct device *dev);
|
||||
void (*device_release)(struct reserved_mem *rmem,
|
||||
struct device *dev);
|
||||
};
|
||||
|
||||
typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
|
||||
|
||||
#ifdef CONFIG_OF_RESERVED_MEM
|
||||
|
||||
#define RESERVEDMEM_OF_DECLARE(name, compat, init) \
|
||||
_OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn)
|
||||
#define RESERVEDMEM_OF_DECLARE(name, compat, ops) \
|
||||
_OF_DECLARE(reservedmem, name, compat, ops, struct reserved_mem_ops *)
|
||||
|
||||
int of_reserved_mem_device_init_by_idx(struct device *dev,
|
||||
struct device_node *np, int idx);
|
||||
@@ -48,8 +49,9 @@ int of_reserved_mem_region_count(const struct device_node *np);
|
||||
|
||||
#else
|
||||
|
||||
#define RESERVEDMEM_OF_DECLARE(name, compat, init) \
|
||||
_OF_DECLARE_STUB(reservedmem, name, compat, init, reservedmem_of_init_fn)
|
||||
#define RESERVEDMEM_OF_DECLARE(name, compat, ops) \
|
||||
_OF_DECLARE_STUB(reservedmem, name, compat, ops, \
|
||||
struct reserved_mem_ops *)
|
||||
|
||||
static inline int of_reserved_mem_device_init_by_idx(struct device *dev,
|
||||
struct device_node *np, int idx)
|
||||
|
||||
@@ -34,7 +34,8 @@ TRACE_DEFINE_ENUM(DMA_NONE);
|
||||
{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \
|
||||
{ DMA_ATTR_MMIO, "MMIO" }, \
|
||||
{ DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \
|
||||
{ DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" })
|
||||
{ DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" }, \
|
||||
{ DMA_ATTR_CC_SHARED, "CC_SHARED" })
|
||||
|
||||
DECLARE_EVENT_CLASS(dma_map,
|
||||
TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
|
||||
|
||||
@@ -17,6 +17,12 @@
|
||||
#define DMA_MAP_TO_DEVICE 1
|
||||
#define DMA_MAP_FROM_DEVICE 2
|
||||
|
||||
enum {
|
||||
DMA_MAP_BENCH_SINGLE_MODE,
|
||||
DMA_MAP_BENCH_SG_MODE,
|
||||
DMA_MAP_BENCH_MODE_MAX
|
||||
};
|
||||
|
||||
struct map_benchmark {
|
||||
__u64 avg_map_100ns; /* average map latency in 100ns */
|
||||
__u64 map_stddev; /* standard deviation of map latency */
|
||||
@@ -28,8 +34,11 @@ struct map_benchmark {
|
||||
__u32 dma_bits; /* DMA addressing capability */
|
||||
__u32 dma_dir; /* DMA data direction */
|
||||
__u32 dma_trans_ns; /* time for DMA transmission in ns */
|
||||
__u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */
|
||||
__u8 expansion[76]; /* For future use */
|
||||
__u32 granule; /* - SINGLE_MODE: number of pages mapped/unmapped per operation
|
||||
* - SG_MODE: number of scatterlist entries (each maps one page)
|
||||
*/
|
||||
__u8 map_mode; /* the mode of dma map */
|
||||
__u8 expansion[75]; /* For future use */
|
||||
};
|
||||
|
||||
#endif /* _UAPI_DMA_BENCHMARK_H */
|
||||
|
||||
@@ -72,6 +72,9 @@ config ARCH_HAS_DMA_PREP_COHERENT
|
||||
config ARCH_HAS_FORCE_DMA_UNENCRYPTED
|
||||
bool
|
||||
|
||||
config ARCH_HAS_BATCHED_DMA_SYNC
|
||||
bool
|
||||
|
||||
#
|
||||
# Select this option if the architecture assumes DMA devices are coherent
|
||||
# by default.
|
||||
|
||||
@@ -362,17 +362,11 @@ static void rmem_dma_device_release(struct reserved_mem *rmem,
|
||||
dev->dma_mem = NULL;
|
||||
}
|
||||
|
||||
static const struct reserved_mem_ops rmem_dma_ops = {
|
||||
.device_init = rmem_dma_device_init,
|
||||
.device_release = rmem_dma_device_release,
|
||||
};
|
||||
|
||||
static int __init rmem_dma_setup(struct reserved_mem *rmem)
|
||||
static int __init rmem_dma_setup(unsigned long node, struct reserved_mem *rmem)
|
||||
{
|
||||
unsigned long node = rmem->fdt_node;
|
||||
|
||||
if (of_get_flat_dt_prop(node, "reusable", NULL))
|
||||
return -EINVAL;
|
||||
return -ENODEV;
|
||||
|
||||
#ifdef CONFIG_ARM
|
||||
if (!of_get_flat_dt_prop(node, "no-map", NULL)) {
|
||||
@@ -390,7 +384,6 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem)
|
||||
}
|
||||
#endif
|
||||
|
||||
rmem->ops = &rmem_dma_ops;
|
||||
pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n",
|
||||
&rmem->base, (unsigned long)rmem->size / SZ_1M);
|
||||
return 0;
|
||||
@@ -407,5 +400,11 @@ static int __init dma_init_reserved_memory(void)
|
||||
core_initcall(dma_init_reserved_memory);
|
||||
#endif /* CONFIG_DMA_GLOBAL_POOL */
|
||||
|
||||
RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup);
|
||||
static const struct reserved_mem_ops rmem_dma_ops = {
|
||||
.node_init = rmem_dma_setup,
|
||||
.device_init = rmem_dma_device_init,
|
||||
.device_release = rmem_dma_device_release,
|
||||
};
|
||||
|
||||
RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", &rmem_dma_ops);
|
||||
#endif
|
||||
|
||||
@@ -42,7 +42,6 @@
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/dma-buf/heaps/cma.h>
|
||||
#include <linux/dma-map-ops.h>
|
||||
#include <linux/cma.h>
|
||||
#include <linux/nospec.h>
|
||||
@@ -53,7 +52,38 @@
|
||||
#define CMA_SIZE_MBYTES 0
|
||||
#endif
|
||||
|
||||
struct cma *dma_contiguous_default_area;
|
||||
static struct cma *dma_contiguous_areas[MAX_CMA_AREAS];
|
||||
static unsigned int dma_contiguous_areas_num;
|
||||
|
||||
static int dma_contiguous_insert_area(struct cma *cma)
|
||||
{
|
||||
if (dma_contiguous_areas_num >= ARRAY_SIZE(dma_contiguous_areas))
|
||||
return -EINVAL;
|
||||
|
||||
dma_contiguous_areas[dma_contiguous_areas_num++] = cma;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* dma_contiguous_get_area_by_idx() - Get contiguous area at given index
|
||||
* @idx: index of the area we query
|
||||
*
|
||||
* Queries for the contiguous area located at index @idx.
|
||||
*
|
||||
* Returns:
|
||||
* A pointer to the requested contiguous area, or NULL otherwise.
|
||||
*/
|
||||
struct cma *dma_contiguous_get_area_by_idx(unsigned int idx)
|
||||
{
|
||||
if (idx >= dma_contiguous_areas_num)
|
||||
return NULL;
|
||||
|
||||
return dma_contiguous_areas[idx];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dma_contiguous_get_area_by_idx);
|
||||
|
||||
static struct cma *dma_contiguous_default_area;
|
||||
|
||||
/*
|
||||
* Default global CMA area size can be defined in kernel's .config.
|
||||
@@ -91,15 +121,14 @@ static int __init early_cma(char *p)
|
||||
}
|
||||
early_param("cma", early_cma);
|
||||
|
||||
/*
|
||||
* cma_skip_dt_default_reserved_mem - This is called from the
|
||||
* reserved_mem framework to detect if the default cma region is being
|
||||
* set by the "cma=" kernel parameter.
|
||||
*/
|
||||
bool __init cma_skip_dt_default_reserved_mem(void)
|
||||
struct cma *dev_get_cma_area(struct device *dev)
|
||||
{
|
||||
return size_cmdline != -1;
|
||||
if (dev && dev->cma_area)
|
||||
return dev->cma_area;
|
||||
|
||||
return dma_contiguous_default_area;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dev_get_cma_area);
|
||||
|
||||
#ifdef CONFIG_DMA_NUMA_CMA
|
||||
|
||||
@@ -264,9 +293,24 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
ret = dma_heap_cma_register_heap(dma_contiguous_default_area);
|
||||
/*
|
||||
* We need to insert the new area in our list to avoid
|
||||
* any inconsistencies between having the default area
|
||||
* listed in the DT or not.
|
||||
*
|
||||
* The DT case is handled by rmem_cma_setup() and will
|
||||
* always insert all its areas in our list. However, if
|
||||
* it didn't run (because OF_RESERVED_MEM isn't set, or
|
||||
* there's no DT region specified), then we don't have a
|
||||
* default area yet, and no area in our list.
|
||||
*
|
||||
* This block creates the default area in such a case,
|
||||
* but we also need to insert it in our list to avoid
|
||||
* having a default area but an empty list.
|
||||
*/
|
||||
ret = dma_contiguous_insert_area(dma_contiguous_default_area);
|
||||
if (ret)
|
||||
pr_warn("Couldn't register default CMA heap.");
|
||||
pr_warn("Couldn't queue default CMA region for heap creation.");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -470,47 +514,89 @@ static void rmem_cma_device_release(struct reserved_mem *rmem,
|
||||
dev->cma_area = NULL;
|
||||
}
|
||||
|
||||
static const struct reserved_mem_ops rmem_cma_ops = {
|
||||
.device_init = rmem_cma_device_init,
|
||||
.device_release = rmem_cma_device_release,
|
||||
};
|
||||
|
||||
static int __init rmem_cma_setup(struct reserved_mem *rmem)
|
||||
static int __init __rmem_cma_verify_node(unsigned long node)
|
||||
{
|
||||
unsigned long node = rmem->fdt_node;
|
||||
bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
|
||||
struct cma *cma;
|
||||
int err;
|
||||
|
||||
if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
|
||||
of_get_flat_dt_prop(node, "no-map", NULL))
|
||||
return -EINVAL;
|
||||
return -ENODEV;
|
||||
|
||||
if (size_cmdline != -1 &&
|
||||
of_get_flat_dt_prop(node, "linux,cma-default", NULL)) {
|
||||
pr_err("Skipping dt linux,cma-default node in favor for \"cma=\" kernel param.\n");
|
||||
return -EBUSY;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init rmem_cma_validate(unsigned long node, phys_addr_t *align)
|
||||
{
|
||||
int ret = __rmem_cma_verify_node(node);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (align)
|
||||
*align = max_t(phys_addr_t, *align, CMA_MIN_ALIGNMENT_BYTES);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init rmem_cma_fixup(unsigned long node, phys_addr_t base,
|
||||
phys_addr_t size)
|
||||
{
|
||||
int ret = __rmem_cma_verify_node(node);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Architecture specific contiguous memory fixup. */
|
||||
dma_contiguous_early_fixup(base, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init rmem_cma_setup(unsigned long node, struct reserved_mem *rmem)
|
||||
{
|
||||
bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
|
||||
struct cma *cma;
|
||||
int ret;
|
||||
|
||||
ret = __rmem_cma_verify_node(node);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!IS_ALIGNED(rmem->base | rmem->size, CMA_MIN_ALIGNMENT_BYTES)) {
|
||||
pr_err("Reserved memory: incorrect alignment of CMA region\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
|
||||
if (err) {
|
||||
ret = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
|
||||
if (ret) {
|
||||
pr_err("Reserved memory: unable to setup CMA region\n");
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (default_cma)
|
||||
dma_contiguous_default_area = cma;
|
||||
|
||||
rmem->ops = &rmem_cma_ops;
|
||||
rmem->priv = cma;
|
||||
|
||||
pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
|
||||
&rmem->base, (unsigned long)rmem->size / SZ_1M);
|
||||
|
||||
err = dma_heap_cma_register_heap(cma);
|
||||
if (err)
|
||||
pr_warn("Couldn't register CMA heap.");
|
||||
ret = dma_contiguous_insert_area(cma);
|
||||
if (ret)
|
||||
pr_warn("Couldn't store CMA reserved area.");
|
||||
|
||||
return 0;
|
||||
}
|
||||
RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup);
|
||||
|
||||
static const struct reserved_mem_ops rmem_cma_ops = {
|
||||
.node_validate = rmem_cma_validate,
|
||||
.node_fixup = rmem_cma_fixup,
|
||||
.node_init = rmem_cma_setup,
|
||||
.device_init = rmem_cma_device_init,
|
||||
.device_release = rmem_cma_device_release,
|
||||
};
|
||||
|
||||
RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", &rmem_cma_ops);
|
||||
#endif
|
||||
|
||||
@@ -406,6 +406,8 @@ void dma_direct_sync_sg_for_device(struct device *dev,
|
||||
arch_sync_dma_for_device(paddr, sg->length,
|
||||
dir);
|
||||
}
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_flush();
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -427,8 +429,10 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
|
||||
swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
|
||||
}
|
||||
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
if (!dev_is_dma_coherent(dev)) {
|
||||
arch_sync_dma_flush();
|
||||
arch_sync_dma_for_cpu_all();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -440,14 +444,19 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
bool need_sync = false;
|
||||
|
||||
for_each_sg(sgl, sg, nents, i) {
|
||||
if (sg_dma_is_bus_address(sg))
|
||||
if (sg_dma_is_bus_address(sg)) {
|
||||
sg_dma_unmark_bus_address(sg);
|
||||
else
|
||||
} else {
|
||||
need_sync = true;
|
||||
dma_direct_unmap_phys(dev, sg->dma_address,
|
||||
sg_dma_len(sg), dir, attrs);
|
||||
sg_dma_len(sg), dir, attrs, false);
|
||||
}
|
||||
}
|
||||
if (need_sync && !dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_flush();
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -457,6 +466,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
|
||||
struct pci_p2pdma_map_state p2pdma_state = {};
|
||||
struct scatterlist *sg;
|
||||
int i, ret;
|
||||
bool need_sync = false;
|
||||
|
||||
for_each_sg(sgl, sg, nents, i) {
|
||||
switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) {
|
||||
@@ -468,8 +478,9 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
|
||||
*/
|
||||
break;
|
||||
case PCI_P2PDMA_MAP_NONE:
|
||||
need_sync = true;
|
||||
sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg),
|
||||
sg->length, dir, attrs);
|
||||
sg->length, dir, attrs, false);
|
||||
if (sg->dma_address == DMA_MAPPING_ERROR) {
|
||||
ret = -EIO;
|
||||
goto out_unmap;
|
||||
@@ -488,6 +499,8 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
|
||||
sg_dma_len(sg) = sg->length;
|
||||
}
|
||||
|
||||
if (need_sync && !dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_flush();
|
||||
return nents;
|
||||
|
||||
out_unmap:
|
||||
|
||||
@@ -60,17 +60,22 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
|
||||
|
||||
swiotlb_sync_single_for_device(dev, paddr, size, dir);
|
||||
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
if (!dev_is_dma_coherent(dev)) {
|
||||
arch_sync_dma_for_device(paddr, size, dir);
|
||||
arch_sync_dma_flush();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void dma_direct_sync_single_for_cpu(struct device *dev,
|
||||
dma_addr_t addr, size_t size, enum dma_data_direction dir)
|
||||
dma_addr_t addr, size_t size, enum dma_data_direction dir,
|
||||
bool flush)
|
||||
{
|
||||
phys_addr_t paddr = dma_to_phys(dev, addr);
|
||||
|
||||
if (!dev_is_dma_coherent(dev)) {
|
||||
arch_sync_dma_for_cpu(paddr, size, dir);
|
||||
if (flush)
|
||||
arch_sync_dma_flush();
|
||||
arch_sync_dma_for_cpu_all();
|
||||
}
|
||||
|
||||
@@ -79,21 +84,29 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
|
||||
|
||||
static inline dma_addr_t dma_direct_map_phys(struct device *dev,
|
||||
phys_addr_t phys, size_t size, enum dma_data_direction dir,
|
||||
unsigned long attrs)
|
||||
unsigned long attrs, bool flush)
|
||||
{
|
||||
dma_addr_t dma_addr;
|
||||
|
||||
if (is_swiotlb_force_bounce(dev)) {
|
||||
if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
|
||||
return DMA_MAPPING_ERROR;
|
||||
if (!(attrs & DMA_ATTR_CC_SHARED)) {
|
||||
if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
return swiotlb_map(dev, phys, size, dir, attrs);
|
||||
return swiotlb_map(dev, phys, size, dir, attrs);
|
||||
}
|
||||
} else if (attrs & DMA_ATTR_CC_SHARED) {
|
||||
return DMA_MAPPING_ERROR;
|
||||
}
|
||||
|
||||
if (attrs & DMA_ATTR_MMIO) {
|
||||
dma_addr = phys;
|
||||
if (unlikely(!dma_capable(dev, dma_addr, size, false)))
|
||||
goto err_overflow;
|
||||
} else if (attrs & DMA_ATTR_CC_SHARED) {
|
||||
dma_addr = phys_to_dma_unencrypted(dev, phys);
|
||||
if (unlikely(!dma_capable(dev, dma_addr, size, false)))
|
||||
goto err_overflow;
|
||||
} else {
|
||||
dma_addr = phys_to_dma(dev, phys);
|
||||
if (unlikely(!dma_capable(dev, dma_addr, size, true)) ||
|
||||
@@ -107,8 +120,11 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
|
||||
}
|
||||
|
||||
if (!dev_is_dma_coherent(dev) &&
|
||||
!(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
|
||||
!(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
|
||||
arch_sync_dma_for_device(phys, size, dir);
|
||||
if (flush)
|
||||
arch_sync_dma_flush();
|
||||
}
|
||||
return dma_addr;
|
||||
|
||||
err_overflow:
|
||||
@@ -120,7 +136,8 @@ err_overflow:
|
||||
}
|
||||
|
||||
static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
|
||||
size_t size, enum dma_data_direction dir, unsigned long attrs)
|
||||
size_t size, enum dma_data_direction dir, unsigned long attrs,
|
||||
bool flush)
|
||||
{
|
||||
phys_addr_t phys;
|
||||
|
||||
@@ -130,7 +147,7 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
|
||||
|
||||
phys = dma_to_phys(dev, addr);
|
||||
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
|
||||
dma_direct_sync_single_for_cpu(dev, addr, size, dir);
|
||||
dma_direct_sync_single_for_cpu(dev, addr, size, dir, flush);
|
||||
|
||||
swiotlb_tbl_unmap_single(dev, phys, size, dir,
|
||||
attrs | DMA_ATTR_SKIP_CPU_SYNC);
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/cleanup.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/device.h>
|
||||
@@ -15,6 +16,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/timekeeping.h>
|
||||
#include <uapi/linux/map_benchmark.h>
|
||||
@@ -31,17 +33,219 @@ struct map_benchmark_data {
|
||||
atomic64_t loops;
|
||||
};
|
||||
|
||||
static int map_benchmark_thread(void *data)
|
||||
struct map_benchmark_ops {
|
||||
void *(*prepare)(struct map_benchmark_data *map);
|
||||
void (*unprepare)(void *mparam);
|
||||
void (*initialize_data)(void *mparam);
|
||||
int (*do_map)(void *mparam);
|
||||
void (*do_unmap)(void *mparam);
|
||||
};
|
||||
|
||||
struct dma_single_map_param {
|
||||
struct device *dev;
|
||||
dma_addr_t addr;
|
||||
void *xbuf;
|
||||
u32 npages;
|
||||
u32 dma_dir;
|
||||
};
|
||||
|
||||
static void *dma_single_map_benchmark_prepare(struct map_benchmark_data *map)
|
||||
{
|
||||
void *buf;
|
||||
dma_addr_t dma_addr;
|
||||
struct map_benchmark_data *map = data;
|
||||
int npages = map->bparam.granule;
|
||||
u64 size = npages * PAGE_SIZE;
|
||||
struct dma_single_map_param *params __free(kfree) = kzalloc(sizeof(*params),
|
||||
GFP_KERNEL);
|
||||
if (!params)
|
||||
return NULL;
|
||||
|
||||
params->npages = map->bparam.granule;
|
||||
params->dma_dir = map->bparam.dma_dir;
|
||||
params->dev = map->dev;
|
||||
params->xbuf = alloc_pages_exact(params->npages * PAGE_SIZE, GFP_KERNEL);
|
||||
if (!params->xbuf)
|
||||
return NULL;
|
||||
|
||||
return_ptr(params);
|
||||
}
|
||||
|
||||
static void dma_single_map_benchmark_unprepare(void *mparam)
|
||||
{
|
||||
struct dma_single_map_param *params = mparam;
|
||||
|
||||
free_pages_exact(params->xbuf, params->npages * PAGE_SIZE);
|
||||
kfree(params);
|
||||
}
|
||||
|
||||
static void dma_single_map_benchmark_initialize_data(void *mparam)
|
||||
{
|
||||
struct dma_single_map_param *params = mparam;
|
||||
|
||||
/*
|
||||
* for a non-coherent device, if we don't stain them in the
|
||||
* cache, this will give an underestimate of the real-world
|
||||
* overhead of BIDIRECTIONAL or TO_DEVICE mappings;
|
||||
* 66 means everything goes well! 66 is lucky.
|
||||
*/
|
||||
if (params->dma_dir != DMA_FROM_DEVICE)
|
||||
memset(params->xbuf, 0x66, params->npages * PAGE_SIZE);
|
||||
}
|
||||
|
||||
static int dma_single_map_benchmark_do_map(void *mparam)
|
||||
{
|
||||
struct dma_single_map_param *params = mparam;
|
||||
|
||||
params->addr = dma_map_single(params->dev, params->xbuf,
|
||||
params->npages * PAGE_SIZE, params->dma_dir);
|
||||
if (unlikely(dma_mapping_error(params->dev, params->addr))) {
|
||||
pr_err("dma_map_single failed on %s\n", dev_name(params->dev));
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dma_single_map_benchmark_do_unmap(void *mparam)
|
||||
{
|
||||
struct dma_single_map_param *params = mparam;
|
||||
|
||||
dma_unmap_single(params->dev, params->addr,
|
||||
params->npages * PAGE_SIZE, params->dma_dir);
|
||||
}
|
||||
|
||||
static struct map_benchmark_ops dma_single_map_benchmark_ops = {
|
||||
.prepare = dma_single_map_benchmark_prepare,
|
||||
.unprepare = dma_single_map_benchmark_unprepare,
|
||||
.initialize_data = dma_single_map_benchmark_initialize_data,
|
||||
.do_map = dma_single_map_benchmark_do_map,
|
||||
.do_unmap = dma_single_map_benchmark_do_unmap,
|
||||
};
|
||||
|
||||
struct dma_sg_map_param {
|
||||
struct sg_table sgt;
|
||||
struct device *dev;
|
||||
void **buf;
|
||||
u32 npages;
|
||||
u32 dma_dir;
|
||||
};
|
||||
|
||||
static void *dma_sg_map_benchmark_prepare(struct map_benchmark_data *map)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
|
||||
struct dma_sg_map_param *params = kzalloc(sizeof(*params), GFP_KERNEL);
|
||||
|
||||
if (!params)
|
||||
return NULL;
|
||||
/*
|
||||
* Set the number of scatterlist entries based on the granule.
|
||||
* In SG mode, 'granule' represents the number of scatterlist entries.
|
||||
* Each scatterlist entry corresponds to a single page.
|
||||
*/
|
||||
params->npages = map->bparam.granule;
|
||||
params->dma_dir = map->bparam.dma_dir;
|
||||
params->dev = map->dev;
|
||||
params->buf = kmalloc_array(params->npages, sizeof(*params->buf),
|
||||
GFP_KERNEL);
|
||||
if (!params->buf)
|
||||
goto out;
|
||||
|
||||
if (sg_alloc_table(¶ms->sgt, params->npages, GFP_KERNEL))
|
||||
goto free_buf;
|
||||
|
||||
for_each_sgtable_sg(¶ms->sgt, sg, i) {
|
||||
params->buf[i] = (void *)__get_free_page(GFP_KERNEL);
|
||||
if (!params->buf[i])
|
||||
goto free_page;
|
||||
|
||||
sg_set_buf(sg, params->buf[i], PAGE_SIZE);
|
||||
}
|
||||
|
||||
return params;
|
||||
|
||||
free_page:
|
||||
while (i-- > 0)
|
||||
free_page((unsigned long)params->buf[i]);
|
||||
|
||||
sg_free_table(¶ms->sgt);
|
||||
free_buf:
|
||||
kfree(params->buf);
|
||||
out:
|
||||
kfree(params);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void dma_sg_map_benchmark_unprepare(void *mparam)
|
||||
{
|
||||
struct dma_sg_map_param *params = mparam;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < params->npages; i++)
|
||||
free_page((unsigned long)params->buf[i]);
|
||||
|
||||
sg_free_table(¶ms->sgt);
|
||||
|
||||
kfree(params->buf);
|
||||
kfree(params);
|
||||
}
|
||||
|
||||
static void dma_sg_map_benchmark_initialize_data(void *mparam)
|
||||
{
|
||||
struct dma_sg_map_param *params = mparam;
|
||||
struct scatterlist *sg;
|
||||
int i = 0;
|
||||
|
||||
if (params->dma_dir == DMA_FROM_DEVICE)
|
||||
return;
|
||||
|
||||
for_each_sgtable_sg(¶ms->sgt, sg, i)
|
||||
memset(params->buf[i], 0x66, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static int dma_sg_map_benchmark_do_map(void *mparam)
|
||||
{
|
||||
struct dma_sg_map_param *params = mparam;
|
||||
int ret = 0;
|
||||
|
||||
buf = alloc_pages_exact(size, GFP_KERNEL);
|
||||
if (!buf)
|
||||
int sg_mapped = dma_map_sg(params->dev, params->sgt.sgl,
|
||||
params->npages, params->dma_dir);
|
||||
if (!sg_mapped) {
|
||||
pr_err("dma_map_sg failed on %s\n", dev_name(params->dev));
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void dma_sg_map_benchmark_do_unmap(void *mparam)
|
||||
{
|
||||
struct dma_sg_map_param *params = mparam;
|
||||
|
||||
dma_unmap_sg(params->dev, params->sgt.sgl, params->npages,
|
||||
params->dma_dir);
|
||||
}
|
||||
|
||||
static struct map_benchmark_ops dma_sg_map_benchmark_ops = {
|
||||
.prepare = dma_sg_map_benchmark_prepare,
|
||||
.unprepare = dma_sg_map_benchmark_unprepare,
|
||||
.initialize_data = dma_sg_map_benchmark_initialize_data,
|
||||
.do_map = dma_sg_map_benchmark_do_map,
|
||||
.do_unmap = dma_sg_map_benchmark_do_unmap,
|
||||
};
|
||||
|
||||
static struct map_benchmark_ops *dma_map_benchmark_ops[DMA_MAP_BENCH_MODE_MAX] = {
|
||||
[DMA_MAP_BENCH_SINGLE_MODE] = &dma_single_map_benchmark_ops,
|
||||
[DMA_MAP_BENCH_SG_MODE] = &dma_sg_map_benchmark_ops,
|
||||
};
|
||||
|
||||
static int map_benchmark_thread(void *data)
|
||||
{
|
||||
struct map_benchmark_data *map = data;
|
||||
__u8 map_mode = map->bparam.map_mode;
|
||||
int ret = 0;
|
||||
|
||||
struct map_benchmark_ops *mb_ops = dma_map_benchmark_ops[map_mode];
|
||||
void *mparam = mb_ops->prepare(map);
|
||||
|
||||
if (!mparam)
|
||||
return -ENOMEM;
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
@@ -49,23 +253,12 @@ static int map_benchmark_thread(void *data)
|
||||
ktime_t map_stime, map_etime, unmap_stime, unmap_etime;
|
||||
ktime_t map_delta, unmap_delta;
|
||||
|
||||
/*
|
||||
* for a non-coherent device, if we don't stain them in the
|
||||
* cache, this will give an underestimate of the real-world
|
||||
* overhead of BIDIRECTIONAL or TO_DEVICE mappings;
|
||||
* 66 means evertything goes well! 66 is lucky.
|
||||
*/
|
||||
if (map->dir != DMA_FROM_DEVICE)
|
||||
memset(buf, 0x66, size);
|
||||
|
||||
mb_ops->initialize_data(mparam);
|
||||
map_stime = ktime_get();
|
||||
dma_addr = dma_map_single(map->dev, buf, size, map->dir);
|
||||
if (unlikely(dma_mapping_error(map->dev, dma_addr))) {
|
||||
pr_err("dma_map_single failed on %s\n",
|
||||
dev_name(map->dev));
|
||||
ret = -ENOMEM;
|
||||
ret = mb_ops->do_map(mparam);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
map_etime = ktime_get();
|
||||
map_delta = ktime_sub(map_etime, map_stime);
|
||||
|
||||
@@ -73,7 +266,8 @@ static int map_benchmark_thread(void *data)
|
||||
ndelay(map->bparam.dma_trans_ns);
|
||||
|
||||
unmap_stime = ktime_get();
|
||||
dma_unmap_single(map->dev, dma_addr, size, map->dir);
|
||||
mb_ops->do_unmap(mparam);
|
||||
|
||||
unmap_etime = ktime_get();
|
||||
unmap_delta = ktime_sub(unmap_etime, unmap_stime);
|
||||
|
||||
@@ -108,7 +302,7 @@ static int map_benchmark_thread(void *data)
|
||||
}
|
||||
|
||||
out:
|
||||
free_pages_exact(buf, size);
|
||||
mb_ops->unprepare(mparam);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -209,6 +403,12 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
|
||||
|
||||
switch (cmd) {
|
||||
case DMA_MAP_BENCHMARK:
|
||||
if (map->bparam.map_mode < 0 ||
|
||||
map->bparam.map_mode >= DMA_MAP_BENCH_MODE_MAX) {
|
||||
pr_err("invalid map mode\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (map->bparam.threads == 0 ||
|
||||
map->bparam.threads > DMA_MAP_MAX_THREADS) {
|
||||
pr_err("invalid thread number\n");
|
||||
|
||||
@@ -157,6 +157,7 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
bool is_mmio = attrs & DMA_ATTR_MMIO;
|
||||
bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED;
|
||||
dma_addr_t addr = DMA_MAPPING_ERROR;
|
||||
|
||||
BUG_ON(!valid_dma_direction(dir));
|
||||
@@ -168,8 +169,11 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
if (dma_map_direct(dev, ops) ||
|
||||
(!is_mmio && arch_dma_map_phys_direct(dev, phys + size)))
|
||||
addr = dma_direct_map_phys(dev, phys, size, dir, attrs);
|
||||
(!is_mmio && !is_cc_shared &&
|
||||
arch_dma_map_phys_direct(dev, phys + size)))
|
||||
addr = dma_direct_map_phys(dev, phys, size, dir, attrs, true);
|
||||
else if (is_cc_shared)
|
||||
return DMA_MAPPING_ERROR;
|
||||
else if (use_dma_iommu(dev))
|
||||
addr = iommu_dma_map_phys(dev, phys, size, dir, attrs);
|
||||
else if (ops->map_phys)
|
||||
@@ -206,11 +210,16 @@ void dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size,
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
bool is_mmio = attrs & DMA_ATTR_MMIO;
|
||||
bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED;
|
||||
|
||||
BUG_ON(!valid_dma_direction(dir));
|
||||
|
||||
if (dma_map_direct(dev, ops) ||
|
||||
(!is_mmio && arch_dma_unmap_phys_direct(dev, addr + size)))
|
||||
dma_direct_unmap_phys(dev, addr, size, dir, attrs);
|
||||
(!is_mmio && !is_cc_shared &&
|
||||
arch_dma_unmap_phys_direct(dev, addr + size)))
|
||||
dma_direct_unmap_phys(dev, addr, size, dir, attrs, true);
|
||||
else if (is_cc_shared)
|
||||
return;
|
||||
else if (use_dma_iommu(dev))
|
||||
iommu_dma_unmap_phys(dev, addr, size, dir, attrs);
|
||||
else if (ops->unmap_phys)
|
||||
@@ -379,7 +388,7 @@ void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
|
||||
|
||||
BUG_ON(!valid_dma_direction(dir));
|
||||
if (dma_map_direct(dev, ops))
|
||||
dma_direct_sync_single_for_cpu(dev, addr, size, dir);
|
||||
dma_direct_sync_single_for_cpu(dev, addr, size, dir, true);
|
||||
else if (use_dma_iommu(dev))
|
||||
iommu_dma_sync_single_for_cpu(dev, addr, size, dir);
|
||||
else if (ops->sync_single_for_cpu)
|
||||
|
||||
@@ -868,6 +868,9 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
|
||||
if (orig_addr == INVALID_PHYS_ADDR)
|
||||
return;
|
||||
|
||||
if (dir == DMA_FROM_DEVICE && !dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_flush();
|
||||
|
||||
/*
|
||||
* It's valid for tlb_offset to be negative. This can happen when the
|
||||
* "offset" returned by swiotlb_align_offset() is non-zero, and the
|
||||
@@ -1612,8 +1615,10 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
|
||||
return DMA_MAPPING_ERROR;
|
||||
}
|
||||
|
||||
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
|
||||
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
|
||||
arch_sync_dma_for_device(swiotlb_addr, size, dir);
|
||||
arch_sync_dma_flush();
|
||||
}
|
||||
return dma_addr;
|
||||
}
|
||||
|
||||
@@ -1872,26 +1877,25 @@ static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
|
||||
dev->dma_io_tlb_mem = &io_tlb_default_mem;
|
||||
}
|
||||
|
||||
static const struct reserved_mem_ops rmem_swiotlb_ops = {
|
||||
.device_init = rmem_swiotlb_device_init,
|
||||
.device_release = rmem_swiotlb_device_release,
|
||||
};
|
||||
|
||||
static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
|
||||
static int __init rmem_swiotlb_setup(unsigned long node,
|
||||
struct reserved_mem *rmem)
|
||||
{
|
||||
unsigned long node = rmem->fdt_node;
|
||||
|
||||
if (of_get_flat_dt_prop(node, "reusable", NULL) ||
|
||||
of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
|
||||
of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
|
||||
of_get_flat_dt_prop(node, "no-map", NULL))
|
||||
return -EINVAL;
|
||||
|
||||
rmem->ops = &rmem_swiotlb_ops;
|
||||
pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n",
|
||||
&rmem->base, (unsigned long)rmem->size / SZ_1M);
|
||||
return 0;
|
||||
}
|
||||
|
||||
RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
|
||||
static const struct reserved_mem_ops rmem_swiotlb_ops = {
|
||||
.node_init = rmem_swiotlb_setup,
|
||||
.device_init = rmem_swiotlb_device_init,
|
||||
.device_release = rmem_swiotlb_device_release,
|
||||
};
|
||||
|
||||
RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", &rmem_swiotlb_ops);
|
||||
#endif /* CONFIG_DMA_RESTRICTED_POOL */
|
||||
|
||||
3
mm/cma.c
3
mm/cma.c
@@ -52,6 +52,7 @@ const char *cma_get_name(const struct cma *cma)
|
||||
{
|
||||
return cma->name;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cma_get_name);
|
||||
|
||||
static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
|
||||
unsigned int align_order)
|
||||
@@ -951,6 +952,7 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
|
||||
|
||||
return page;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cma_alloc);
|
||||
|
||||
static struct cma_memrange *find_cma_memrange(struct cma *cma,
|
||||
const struct page *pages, unsigned long count)
|
||||
@@ -1030,6 +1032,7 @@ bool cma_release(struct cma *cma, const struct page *pages,
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cma_release);
|
||||
|
||||
bool cma_release_frozen(struct cma *cma, const struct page *pages,
|
||||
unsigned long count)
|
||||
|
||||
@@ -20,12 +20,19 @@ static char *directions[] = {
|
||||
"FROM_DEVICE",
|
||||
};
|
||||
|
||||
static char *mode[] = {
|
||||
"SINGLE_MODE",
|
||||
"SG_MODE",
|
||||
};
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct map_benchmark map;
|
||||
int fd, opt;
|
||||
/* default single thread, run 20 seconds on NUMA_NO_NODE */
|
||||
int threads = 1, seconds = 20, node = -1;
|
||||
/* default single map mode */
|
||||
int map_mode = DMA_MAP_BENCH_SINGLE_MODE;
|
||||
/* default dma mask 32bit, bidirectional DMA */
|
||||
int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
|
||||
/* default granule 1 PAGESIZE */
|
||||
@@ -33,7 +40,7 @@ int main(int argc, char **argv)
|
||||
|
||||
int cmd = DMA_MAP_BENCHMARK;
|
||||
|
||||
while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:m:")) != -1) {
|
||||
switch (opt) {
|
||||
case 't':
|
||||
threads = atoi(optarg);
|
||||
@@ -56,11 +63,20 @@ int main(int argc, char **argv)
|
||||
case 'g':
|
||||
granule = atoi(optarg);
|
||||
break;
|
||||
case 'm':
|
||||
map_mode = atoi(optarg);
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (map_mode < 0 || map_mode >= DMA_MAP_BENCH_MODE_MAX) {
|
||||
fprintf(stderr, "invalid map mode, SINGLE_MODE:%d, SG_MODE: %d\n",
|
||||
DMA_MAP_BENCH_SINGLE_MODE, DMA_MAP_BENCH_SG_MODE);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) {
|
||||
fprintf(stderr, "invalid number of threads, must be in 1-%d\n",
|
||||
DMA_MAP_MAX_THREADS);
|
||||
@@ -110,14 +126,15 @@ int main(int argc, char **argv)
|
||||
map.dma_dir = dir;
|
||||
map.dma_trans_ns = xdelay;
|
||||
map.granule = granule;
|
||||
map.map_mode = map_mode;
|
||||
|
||||
if (ioctl(fd, cmd, &map)) {
|
||||
perror("ioctl");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n",
|
||||
threads, seconds, node, directions[dir], granule);
|
||||
printf("dma mapping benchmark(%s): threads:%d seconds:%d node:%d dir:%s granule:%d\n",
|
||||
mode[map_mode], threads, seconds, node, directions[dir], granule);
|
||||
printf("average map latency(us):%.1f standard deviation:%.1f\n",
|
||||
map.avg_map_100ns/10.0, map.map_stddev/10.0);
|
||||
printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
|
||||
|
||||
Reference in New Issue
Block a user