Merge tag 'dma-mapping-7.1-2026-04-16' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux

Pull dma-mapping updates from Marek Szyprowski:

 - added support for batched cache sync, what improves performance of
   dma_map/unmap_sg() operations on ARM64 architecture (Barry Song)

 - introduced DMA_ATTR_CC_SHARED attribute for explicitly shared memory
   used in confidential computing (Jiri Pirko)

 - refactored spaghetti-like code in drivers/of/of_reserved_mem.c and
   its clients (Marek Szyprowski, shared branch with device-tree updates
   to avoid merge conflicts)

 - prepared Contiguous Memory Allocator related code for making dma-buf
   drivers modularized (Maxime Ripard)

 - added support for benchmarking dma_map_sg() calls to tools/dma
   utility (Qinxin Xia)

* tag 'dma-mapping-7.1-2026-04-16' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux: (24 commits)
  dma-buf: heaps: system: document system_cc_shared heap
  dma-buf: heaps: system: add system_cc_shared heap for explicitly shared memory
  dma-mapping: introduce DMA_ATTR_CC_SHARED for shared memory
  mm: cma: Export cma_alloc(), cma_release() and cma_get_name()
  dma: contiguous: Export dev_get_cma_area()
  dma: contiguous: Make dma_contiguous_default_area static
  dma: contiguous: Make dev_get_cma_area() a proper function
  dma: contiguous: Turn heap registration logic around
  of: reserved_mem: rework fdt_init_reserved_mem_node()
  of: reserved_mem: clarify fdt_scan_reserved_mem*() functions
  of: reserved_mem: rearrange code a bit
  of: reserved_mem: replace CMA quirks by generic methods
  of: reserved_mem: switch to ops based OF_DECLARE()
  of: reserved_mem: use -ENODEV instead of -ENOENT
  of: reserved_mem: remove fdt node from the structure
  dma-mapping: fix false kernel-doc comment marker
  dma-mapping: Support batch mode for dma_direct_{map,unmap}_sg
  dma-mapping: Separate DMA sync issuing and completion waiting
  arm64: Provide dcache_inval_poc_nosync helper
  arm64: Provide dcache_clean_poc_nosync helper
  ...
This commit is contained in:
Linus Torvalds
2026-04-17 11:12:42 -07:00
33 changed files with 931 additions and 359 deletions

View File

@@ -16,6 +16,13 @@ following heaps:
- The ``system`` heap allocates virtually contiguous, cacheable, buffers. - The ``system`` heap allocates virtually contiguous, cacheable, buffers.
- The ``system_cc_shared`` heap allocates virtually contiguous, cacheable,
buffers using shared (decrypted) memory. It is only present on
confidential computing (CoCo) VMs where memory encryption is active
(e.g., AMD SEV, Intel TDX). The allocated pages have the encryption
bit cleared, making them accessible for device DMA without TDISP
support. On non-CoCo VM configurations, this heap is not registered.
- The ``default_cma_region`` heap allocates physically contiguous, - The ``default_cma_region`` heap allocates physically contiguous,
cacheable, buffers. Only present if a CMA region is present. Such a cacheable, buffers. Only present if a CMA region is present. Such a
region is usually created either through the kernel commandline region is usually created either through the kernel commandline

View File

@@ -54,6 +54,7 @@ config ARM64
select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_BATCHED_DMA_SYNC
select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_ZONE_DMA_SET if EXPERT select ARCH_HAS_ZONE_DMA_SET if EXPERT

View File

@@ -371,14 +371,13 @@ alternative_endif
* [start, end) with dcache line size explicitly provided. * [start, end) with dcache line size explicitly provided.
* *
* op: operation passed to dc instruction * op: operation passed to dc instruction
* domain: domain used in dsb instruction
* start: starting virtual address of the region * start: starting virtual address of the region
* end: end virtual address of the region * end: end virtual address of the region
* linesz: dcache line size * linesz: dcache line size
* fixup: optional label to branch to on user fault * fixup: optional label to branch to on user fault
* Corrupts: start, end, tmp * Corrupts: start, end, tmp
*/ */
.macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup .macro dcache_by_myline_op_nosync op, start, end, linesz, tmp, fixup
sub \tmp, \linesz, #1 sub \tmp, \linesz, #1
bic \start, \start, \tmp bic \start, \start, \tmp
alternative_if ARM64_WORKAROUND_4311569 alternative_if ARM64_WORKAROUND_4311569
@@ -412,14 +411,28 @@ alternative_if ARM64_WORKAROUND_4311569
cbnz \start, .Ldcache_op\@ cbnz \start, .Ldcache_op\@
.endif .endif
alternative_else_nop_endif alternative_else_nop_endif
dsb \domain
_cond_uaccess_extable .Ldcache_op\@, \fixup _cond_uaccess_extable .Ldcache_op\@, \fixup
.endm .endm
/* /*
* Macro to perform a data cache maintenance for the interval * Macro to perform a data cache maintenance for the interval
* [start, end) * [start, end) without waiting for completion
*
* op: operation passed to dc instruction
* start: starting virtual address of the region
* end: end virtual address of the region
* fixup: optional label to branch to on user fault
* Corrupts: start, end, tmp1, tmp2
*/
.macro dcache_by_line_op_nosync op, start, end, tmp1, tmp2, fixup
dcache_line_size \tmp1, \tmp2
dcache_by_myline_op_nosync \op, \start, \end, \tmp1, \tmp2, \fixup
.endm
/*
* Macro to perform a data cache maintenance for the interval
* [start, end) and wait for completion
* *
* op: operation passed to dc instruction * op: operation passed to dc instruction
* domain: domain used in dsb instruction * domain: domain used in dsb instruction
@@ -429,8 +442,8 @@ alternative_else_nop_endif
* Corrupts: start, end, tmp1, tmp2 * Corrupts: start, end, tmp1, tmp2
*/ */
.macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
dcache_line_size \tmp1, \tmp2 dcache_by_line_op_nosync \op, \start, \end, \tmp1, \tmp2, \fixup
dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup dsb \domain
.endm .endm
/* /*

View File

@@ -87,6 +87,11 @@ int cache_line_size(void);
#define dma_get_cache_alignment cache_line_size #define dma_get_cache_alignment cache_line_size
static inline void arch_sync_dma_flush(void)
{
dsb(sy);
}
/* Compress a u64 MPIDR value into 32 bits. */ /* Compress a u64 MPIDR value into 32 bits. */
static inline u64 arch_compact_of_hwid(u64 id) static inline u64 arch_compact_of_hwid(u64 id)
{ {

View File

@@ -74,6 +74,8 @@ extern void icache_inval_pou(unsigned long start, unsigned long end);
extern void dcache_clean_inval_poc(unsigned long start, unsigned long end); extern void dcache_clean_inval_poc(unsigned long start, unsigned long end);
extern void dcache_inval_poc(unsigned long start, unsigned long end); extern void dcache_inval_poc(unsigned long start, unsigned long end);
extern void dcache_clean_poc(unsigned long start, unsigned long end); extern void dcache_clean_poc(unsigned long start, unsigned long end);
extern void dcache_inval_poc_nosync(unsigned long start, unsigned long end);
extern void dcache_clean_poc_nosync(unsigned long start, unsigned long end);
extern void dcache_clean_pop(unsigned long start, unsigned long end); extern void dcache_clean_pop(unsigned long start, unsigned long end);
extern void dcache_clean_pou(unsigned long start, unsigned long end); extern void dcache_clean_pou(unsigned long start, unsigned long end);
extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end); extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end);

View File

@@ -64,7 +64,8 @@ SYM_CODE_START(arm64_relocate_new_kernel)
mov x19, x13 mov x19, x13
copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8 copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
add x1, x19, #PAGE_SIZE add x1, x19, #PAGE_SIZE
dcache_by_myline_op civac, sy, x19, x1, x15, x20 dcache_by_myline_op_nosync civac, x19, x1, x15, x20
dsb sy
b .Lnext b .Lnext
.Ltest_indirection: .Ltest_indirection:
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination tbz x16, IND_INDIRECTION_BIT, .Ltest_destination

View File

@@ -132,17 +132,7 @@ alternative_else_nop_endif
ret ret
SYM_FUNC_END(dcache_clean_pou) SYM_FUNC_END(dcache_clean_pou)
/* .macro __dcache_inval_poc_nosync
* dcache_inval_poc(start, end)
*
* Ensure that any D-cache lines for the interval [start, end)
* are invalidated. Any partial lines at the ends of the interval are
* also cleaned to PoC to prevent data loss.
*
* - start - kernel start address of region
* - end - kernel end address of region
*/
SYM_FUNC_START(__pi_dcache_inval_poc)
dcache_line_size x2, x3 dcache_line_size x2, x3
sub x3, x2, #1 sub x3, x2, #1
tst x1, x3 // end cache line aligned? tst x1, x3 // end cache line aligned?
@@ -158,11 +148,41 @@ SYM_FUNC_START(__pi_dcache_inval_poc)
3: add x0, x0, x2 3: add x0, x0, x2
cmp x0, x1 cmp x0, x1
b.lo 2b b.lo 2b
.endm
/*
* dcache_inval_poc(start, end)
*
* Ensure that any D-cache lines for the interval [start, end)
* are invalidated. Any partial lines at the ends of the interval are
* also cleaned to PoC to prevent data loss.
*
* - start - kernel start address of region
* - end - kernel end address of region
*/
SYM_FUNC_START(__pi_dcache_inval_poc)
__dcache_inval_poc_nosync
dsb sy dsb sy
ret ret
SYM_FUNC_END(__pi_dcache_inval_poc) SYM_FUNC_END(__pi_dcache_inval_poc)
SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc) SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc)
/*
* dcache_inval_poc_nosync(start, end)
*
* Issue the instructions of D-cache lines for the interval [start, end)
* for invalidation. Not necessarily cleaned to PoC till an explicit dsb
* sy is issued later
*
* - start - kernel start address of region
* - end - kernel end address of region
*/
SYM_FUNC_START(__pi_dcache_inval_poc_nosync)
__dcache_inval_poc_nosync
ret
SYM_FUNC_END(__pi_dcache_inval_poc_nosync)
SYM_FUNC_ALIAS(dcache_inval_poc_nosync, __pi_dcache_inval_poc_nosync)
/* /*
* dcache_clean_poc(start, end) * dcache_clean_poc(start, end)
* *
@@ -178,6 +198,21 @@ SYM_FUNC_START(__pi_dcache_clean_poc)
SYM_FUNC_END(__pi_dcache_clean_poc) SYM_FUNC_END(__pi_dcache_clean_poc)
SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc) SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc)
/*
* dcache_clean_poc_nosync(start, end)
*
* Issue the instructions of D-cache lines for the interval [start, end).
* not necessarily cleaned to the PoC till an explicit dsb sy afterward.
*
* - start - virtual start address of region
* - end - virtual end address of region
*/
SYM_FUNC_START(__pi_dcache_clean_poc_nosync)
dcache_by_line_op_nosync cvac, x0, x1, x2, x3
ret
SYM_FUNC_END(__pi_dcache_clean_poc_nosync)
SYM_FUNC_ALIAS(dcache_clean_poc_nosync, __pi_dcache_clean_poc_nosync)
/* /*
* dcache_clean_pop(start, end) * dcache_clean_pop(start, end)
* *

View File

@@ -17,7 +17,7 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
{ {
unsigned long start = (unsigned long)phys_to_virt(paddr); unsigned long start = (unsigned long)phys_to_virt(paddr);
dcache_clean_poc(start, start + size); dcache_clean_poc_nosync(start, start + size);
} }
void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
@@ -28,7 +28,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
if (dir == DMA_TO_DEVICE) if (dir == DMA_TO_DEVICE)
return; return;
dcache_inval_poc(start, start + size); dcache_inval_poc_nosync(start, start + size);
} }
void arch_dma_prep_coherent(struct page *page, size_t size) void arch_dma_prep_coherent(struct page *page, size_t size)

View File

@@ -14,7 +14,6 @@
#include <linux/cma.h> #include <linux/cma.h>
#include <linux/dma-buf.h> #include <linux/dma-buf.h>
#include <linux/dma-buf/heaps/cma.h>
#include <linux/dma-heap.h> #include <linux/dma-heap.h>
#include <linux/dma-map-ops.h> #include <linux/dma-map-ops.h>
#include <linux/err.h> #include <linux/err.h>
@@ -30,19 +29,6 @@
#define DEFAULT_CMA_NAME "default_cma_region" #define DEFAULT_CMA_NAME "default_cma_region"
static struct cma *dma_areas[MAX_CMA_AREAS] __initdata;
static unsigned int dma_areas_num __initdata;
int __init dma_heap_cma_register_heap(struct cma *cma)
{
if (dma_areas_num >= ARRAY_SIZE(dma_areas))
return -EINVAL;
dma_areas[dma_areas_num++] = cma;
return 0;
}
struct cma_heap { struct cma_heap {
struct dma_heap *heap; struct dma_heap *heap;
struct cma *cma; struct cma *cma;
@@ -411,6 +397,7 @@ static int __init __add_cma_heap(struct cma *cma, const char *name)
static int __init add_cma_heaps(void) static int __init add_cma_heaps(void)
{ {
struct cma *default_cma = dev_get_cma_area(NULL); struct cma *default_cma = dev_get_cma_area(NULL);
struct cma *cma;
unsigned int i; unsigned int i;
int ret; int ret;
@@ -420,9 +407,7 @@ static int __init add_cma_heaps(void)
return ret; return ret;
} }
for (i = 0; i < dma_areas_num; i++) { for (i = 0; (cma = dma_contiguous_get_area_by_idx(i)) != NULL; i++) {
struct cma *cma = dma_areas[i];
ret = __add_cma_heap(cma, cma_get_name(cma)); ret = __add_cma_heap(cma, cma_get_name(cma));
if (ret) { if (ret) {
pr_warn("Failed to add CMA heap %s", cma_get_name(cma)); pr_warn("Failed to add CMA heap %s", cma_get_name(cma));

View File

@@ -10,17 +10,25 @@
* Andrew F. Davis <afd@ti.com> * Andrew F. Davis <afd@ti.com>
*/ */
#include <linux/cc_platform.h>
#include <linux/dma-buf.h> #include <linux/dma-buf.h>
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/dma-heap.h> #include <linux/dma-heap.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/mem_encrypt.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/set_memory.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/pgtable.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
struct system_heap_priv {
bool cc_shared;
};
struct system_heap_buffer { struct system_heap_buffer {
struct dma_heap *heap; struct dma_heap *heap;
struct list_head attachments; struct list_head attachments;
@@ -29,6 +37,7 @@ struct system_heap_buffer {
struct sg_table sg_table; struct sg_table sg_table;
int vmap_cnt; int vmap_cnt;
void *vaddr; void *vaddr;
bool cc_shared;
}; };
struct dma_heap_attachment { struct dma_heap_attachment {
@@ -36,6 +45,7 @@ struct dma_heap_attachment {
struct sg_table table; struct sg_table table;
struct list_head list; struct list_head list;
bool mapped; bool mapped;
bool cc_shared;
}; };
#define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO) #define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO)
@@ -52,6 +62,34 @@ static gfp_t order_flags[] = {HIGH_ORDER_GFP, HIGH_ORDER_GFP, LOW_ORDER_GFP};
static const unsigned int orders[] = {8, 4, 0}; static const unsigned int orders[] = {8, 4, 0};
#define NUM_ORDERS ARRAY_SIZE(orders) #define NUM_ORDERS ARRAY_SIZE(orders)
static int system_heap_set_page_decrypted(struct page *page)
{
unsigned long addr = (unsigned long)page_address(page);
unsigned int nr_pages = 1 << compound_order(page);
int ret;
ret = set_memory_decrypted(addr, nr_pages);
if (ret)
pr_warn_ratelimited("dma-buf system heap: failed to decrypt page at %p\n",
page_address(page));
return ret;
}
static int system_heap_set_page_encrypted(struct page *page)
{
unsigned long addr = (unsigned long)page_address(page);
unsigned int nr_pages = 1 << compound_order(page);
int ret;
ret = set_memory_encrypted(addr, nr_pages);
if (ret)
pr_warn_ratelimited("dma-buf system heap: failed to re-encrypt page at %p, leaking memory\n",
page_address(page));
return ret;
}
static int dup_sg_table(struct sg_table *from, struct sg_table *to) static int dup_sg_table(struct sg_table *from, struct sg_table *to)
{ {
struct scatterlist *sg, *new_sg; struct scatterlist *sg, *new_sg;
@@ -90,6 +128,7 @@ static int system_heap_attach(struct dma_buf *dmabuf,
a->dev = attachment->dev; a->dev = attachment->dev;
INIT_LIST_HEAD(&a->list); INIT_LIST_HEAD(&a->list);
a->mapped = false; a->mapped = false;
a->cc_shared = buffer->cc_shared;
attachment->priv = a; attachment->priv = a;
@@ -119,9 +158,11 @@ static struct sg_table *system_heap_map_dma_buf(struct dma_buf_attachment *attac
{ {
struct dma_heap_attachment *a = attachment->priv; struct dma_heap_attachment *a = attachment->priv;
struct sg_table *table = &a->table; struct sg_table *table = &a->table;
unsigned long attrs;
int ret; int ret;
ret = dma_map_sgtable(attachment->dev, table, direction, 0); attrs = a->cc_shared ? DMA_ATTR_CC_SHARED : 0;
ret = dma_map_sgtable(attachment->dev, table, direction, attrs);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
@@ -188,8 +229,13 @@ static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
unsigned long addr = vma->vm_start; unsigned long addr = vma->vm_start;
unsigned long pgoff = vma->vm_pgoff; unsigned long pgoff = vma->vm_pgoff;
struct scatterlist *sg; struct scatterlist *sg;
pgprot_t prot;
int i, ret; int i, ret;
prot = vma->vm_page_prot;
if (buffer->cc_shared)
prot = pgprot_decrypted(prot);
for_each_sgtable_sg(table, sg, i) { for_each_sgtable_sg(table, sg, i) {
unsigned long n = sg->length >> PAGE_SHIFT; unsigned long n = sg->length >> PAGE_SHIFT;
@@ -206,8 +252,7 @@ static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
if (addr + size > vma->vm_end) if (addr + size > vma->vm_end)
size = vma->vm_end - addr; size = vma->vm_end - addr;
ret = remap_pfn_range(vma, addr, page_to_pfn(page), ret = remap_pfn_range(vma, addr, page_to_pfn(page), size, prot);
size, vma->vm_page_prot);
if (ret) if (ret)
return ret; return ret;
@@ -225,6 +270,7 @@ static void *system_heap_do_vmap(struct system_heap_buffer *buffer)
struct page **pages = vmalloc(sizeof(struct page *) * npages); struct page **pages = vmalloc(sizeof(struct page *) * npages);
struct page **tmp = pages; struct page **tmp = pages;
struct sg_page_iter piter; struct sg_page_iter piter;
pgprot_t prot;
void *vaddr; void *vaddr;
if (!pages) if (!pages)
@@ -235,7 +281,10 @@ static void *system_heap_do_vmap(struct system_heap_buffer *buffer)
*tmp++ = sg_page_iter_page(&piter); *tmp++ = sg_page_iter_page(&piter);
} }
vaddr = vmap(pages, npages, VM_MAP, PAGE_KERNEL); prot = PAGE_KERNEL;
if (buffer->cc_shared)
prot = pgprot_decrypted(prot);
vaddr = vmap(pages, npages, VM_MAP, prot);
vfree(pages); vfree(pages);
if (!vaddr) if (!vaddr)
@@ -296,6 +345,14 @@ static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
for_each_sgtable_sg(table, sg, i) { for_each_sgtable_sg(table, sg, i) {
struct page *page = sg_page(sg); struct page *page = sg_page(sg);
/*
* Intentionally leak pages that cannot be re-encrypted
* to prevent shared memory from being reused.
*/
if (buffer->cc_shared &&
system_heap_set_page_encrypted(page))
continue;
__free_pages(page, compound_order(page)); __free_pages(page, compound_order(page));
} }
sg_free_table(table); sg_free_table(table);
@@ -347,6 +404,8 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
DEFINE_DMA_BUF_EXPORT_INFO(exp_info); DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
unsigned long size_remaining = len; unsigned long size_remaining = len;
unsigned int max_order = orders[0]; unsigned int max_order = orders[0];
struct system_heap_priv *priv = dma_heap_get_drvdata(heap);
bool cc_shared = priv->cc_shared;
struct dma_buf *dmabuf; struct dma_buf *dmabuf;
struct sg_table *table; struct sg_table *table;
struct scatterlist *sg; struct scatterlist *sg;
@@ -362,6 +421,7 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
mutex_init(&buffer->lock); mutex_init(&buffer->lock);
buffer->heap = heap; buffer->heap = heap;
buffer->len = len; buffer->len = len;
buffer->cc_shared = cc_shared;
INIT_LIST_HEAD(&pages); INIT_LIST_HEAD(&pages);
i = 0; i = 0;
@@ -396,6 +456,14 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
list_del(&page->lru); list_del(&page->lru);
} }
if (cc_shared) {
for_each_sgtable_sg(table, sg, i) {
ret = system_heap_set_page_decrypted(sg_page(sg));
if (ret)
goto free_pages;
}
}
/* create the dmabuf */ /* create the dmabuf */
exp_info.exp_name = dma_heap_get_name(heap); exp_info.exp_name = dma_heap_get_name(heap);
exp_info.ops = &system_heap_buf_ops; exp_info.ops = &system_heap_buf_ops;
@@ -413,6 +481,13 @@ free_pages:
for_each_sgtable_sg(table, sg, i) { for_each_sgtable_sg(table, sg, i) {
struct page *p = sg_page(sg); struct page *p = sg_page(sg);
/*
* Intentionally leak pages that cannot be re-encrypted
* to prevent shared memory from being reused.
*/
if (buffer->cc_shared &&
system_heap_set_page_encrypted(p))
continue;
__free_pages(p, compound_order(p)); __free_pages(p, compound_order(p));
} }
sg_free_table(table); sg_free_table(table);
@@ -428,6 +503,14 @@ static const struct dma_heap_ops system_heap_ops = {
.allocate = system_heap_allocate, .allocate = system_heap_allocate,
}; };
static struct system_heap_priv system_heap_priv = {
.cc_shared = false,
};
static struct system_heap_priv system_heap_cc_shared_priv = {
.cc_shared = true,
};
static int __init system_heap_create(void) static int __init system_heap_create(void)
{ {
struct dma_heap_export_info exp_info; struct dma_heap_export_info exp_info;
@@ -435,8 +518,18 @@ static int __init system_heap_create(void)
exp_info.name = "system"; exp_info.name = "system";
exp_info.ops = &system_heap_ops; exp_info.ops = &system_heap_ops;
exp_info.priv = NULL; exp_info.priv = &system_heap_priv;
sys_heap = dma_heap_add(&exp_info);
if (IS_ERR(sys_heap))
return PTR_ERR(sys_heap);
if (IS_ENABLED(CONFIG_HIGHMEM) ||
!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
return 0;
exp_info.name = "system_cc_shared";
exp_info.priv = &system_heap_cc_shared_priv;
sys_heap = dma_heap_add(&exp_info); sys_heap = dma_heap_add(&exp_info);
if (IS_ERR(sys_heap)) if (IS_ERR(sys_heap))
return PTR_ERR(sys_heap); return PTR_ERR(sys_heap);

View File

@@ -1106,8 +1106,10 @@ void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
return; return;
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
if (!dev_is_dma_coherent(dev)) if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_cpu(phys, size, dir); arch_sync_dma_for_cpu(phys, size, dir);
arch_sync_dma_flush();
}
swiotlb_sync_single_for_cpu(dev, phys, size, dir); swiotlb_sync_single_for_cpu(dev, phys, size, dir);
} }
@@ -1123,8 +1125,10 @@ void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
swiotlb_sync_single_for_device(dev, phys, size, dir); swiotlb_sync_single_for_device(dev, phys, size, dir);
if (!dev_is_dma_coherent(dev)) if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_device(phys, size, dir); arch_sync_dma_for_device(phys, size, dir);
arch_sync_dma_flush();
}
} }
void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
@@ -1133,13 +1137,15 @@ void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
struct scatterlist *sg; struct scatterlist *sg;
int i; int i;
if (sg_dma_is_swiotlb(sgl)) if (sg_dma_is_swiotlb(sgl)) {
for_each_sg(sgl, sg, nelems, i) for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg), iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
sg->length, dir); sg->length, dir);
else if (!dev_is_dma_coherent(dev)) } else if (!dev_is_dma_coherent(dev)) {
for_each_sg(sgl, sg, nelems, i) for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
arch_sync_dma_flush();
}
} }
void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
@@ -1148,14 +1154,16 @@ void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
struct scatterlist *sg; struct scatterlist *sg;
int i; int i;
if (sg_dma_is_swiotlb(sgl)) if (sg_dma_is_swiotlb(sgl)) {
for_each_sg(sgl, sg, nelems, i) for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_device(dev, iommu_dma_sync_single_for_device(dev,
sg_dma_address(sg), sg_dma_address(sg),
sg->length, dir); sg->length, dir);
else if (!dev_is_dma_coherent(dev)) } else if (!dev_is_dma_coherent(dev)) {
for_each_sg(sgl, sg, nelems, i) for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
arch_sync_dma_flush();
}
} }
static phys_addr_t iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys, static phys_addr_t iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
@@ -1230,8 +1238,10 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
return DMA_MAPPING_ERROR; return DMA_MAPPING_ERROR;
} }
if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
arch_sync_dma_for_device(phys, size, dir); arch_sync_dma_for_device(phys, size, dir);
arch_sync_dma_flush();
}
iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && if (iova == DMA_MAPPING_ERROR &&
@@ -1254,8 +1264,10 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle,
if (WARN_ON(!phys)) if (WARN_ON(!phys))
return; return;
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) {
arch_sync_dma_for_cpu(phys, size, dir); arch_sync_dma_for_cpu(phys, size, dir);
arch_sync_dma_flush();
}
__iommu_dma_unmap(dev, dma_handle, size); __iommu_dma_unmap(dev, dma_handle, size);
@@ -2004,6 +2016,8 @@ int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
dma_addr_t addr = state->addr + offset; dma_addr_t addr = state->addr + offset;
size_t iova_start_pad = iova_offset(iovad, addr); size_t iova_start_pad = iova_offset(iovad, addr);
if (!dev_is_dma_coherent(dev))
arch_sync_dma_flush();
return iommu_sync_map(domain, addr - iova_start_pad, return iommu_sync_map(domain, addr - iova_start_pad,
iova_align(iovad, size + iova_start_pad)); iova_align(iovad, size + iova_start_pad));
} }
@@ -2017,6 +2031,8 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev,
struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad; struct iova_domain *iovad = &cookie->iovad;
size_t iova_start_pad = iova_offset(iovad, addr); size_t iova_start_pad = iova_offset(iovad, addr);
bool need_sync_dma = !dev_is_dma_coherent(dev) &&
!(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO));
dma_addr_t end = addr + size; dma_addr_t end = addr + size;
do { do {
@@ -2040,6 +2056,9 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev,
addr += len; addr += len;
iova_start_pad = 0; iova_start_pad = 0;
} while (addr < end); } while (addr < end);
if (need_sync_dma)
arch_sync_dma_flush();
} }
static void __iommu_dma_iova_unlink(struct device *dev, static void __iommu_dma_iova_unlink(struct device *dev,

View File

@@ -70,19 +70,20 @@ static void tegra210_emc_table_device_release(struct reserved_mem *rmem,
memunmap(timings); memunmap(timings);
} }
static const struct reserved_mem_ops tegra210_emc_table_ops = { static int tegra210_emc_table_init(unsigned long node,
.device_init = tegra210_emc_table_device_init, struct reserved_mem *rmem)
.device_release = tegra210_emc_table_device_release,
};
static int tegra210_emc_table_init(struct reserved_mem *rmem)
{ {
pr_debug("Tegra210 EMC table at %pa, size %lu bytes\n", &rmem->base, pr_debug("Tegra210 EMC table at %pa, size %lu bytes\n", &rmem->base,
(unsigned long)rmem->size); (unsigned long)rmem->size);
rmem->ops = &tegra210_emc_table_ops;
return 0; return 0;
} }
static const struct reserved_mem_ops tegra210_emc_table_ops = {
.node_init = tegra210_emc_table_init,
.device_init = tegra210_emc_table_device_init,
.device_release = tegra210_emc_table_device_release,
};
RESERVEDMEM_OF_DECLARE(tegra210_emc_table, "nvidia,tegra210-emc-table", RESERVEDMEM_OF_DECLARE(tegra210_emc_table, "nvidia,tegra210-emc-table",
tegra210_emc_table_init); &tegra210_emc_table_ops);

View File

@@ -1295,7 +1295,7 @@ void __init unflatten_device_tree(void)
void *fdt = initial_boot_params; void *fdt = initial_boot_params;
/* Save the statically-placed regions in the reserved_mem array */ /* Save the statically-placed regions in the reserved_mem array */
fdt_scan_reserved_mem_reg_nodes(); fdt_scan_reserved_mem_late();
/* Populate an empty root node when bootloader doesn't provide one */ /* Populate an empty root node when bootloader doesn't provide one */
if (!fdt) { if (!fdt) {

View File

@@ -186,7 +186,7 @@ static inline struct device_node *__of_get_dma_parent(const struct device_node *
#endif #endif
int fdt_scan_reserved_mem(void); int fdt_scan_reserved_mem(void);
void __init fdt_scan_reserved_mem_reg_nodes(void); void __init fdt_scan_reserved_mem_late(void);
bool of_fdt_device_is_available(const void *blob, unsigned long node); bool of_fdt_device_is_available(const void *blob, unsigned long node);

View File

@@ -24,8 +24,6 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/memblock.h> #include <linux/memblock.h>
#include <linux/kmemleak.h> #include <linux/kmemleak.h>
#include <linux/cma.h>
#include <linux/dma-map-ops.h>
#include "of_private.h" #include "of_private.h"
@@ -104,30 +102,12 @@ static void __init alloc_reserved_mem_array(void)
reserved_mem = new_array; reserved_mem = new_array;
} }
static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem); static void fdt_init_reserved_mem_node(unsigned long node, const char *uname,
/* phys_addr_t base, phys_addr_t size);
* fdt_reserved_mem_save_node() - save fdt node for second pass initialization static int fdt_validate_reserved_mem_node(unsigned long node,
*/ phys_addr_t *align);
static void __init fdt_reserved_mem_save_node(unsigned long node, const char *uname, static int fdt_fixup_reserved_mem_node(unsigned long node,
phys_addr_t base, phys_addr_t size) phys_addr_t base, phys_addr_t size);
{
struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
if (reserved_mem_count == total_reserved_mem_cnt) {
pr_err("not enough space for all defined regions.\n");
return;
}
rmem->fdt_node = node;
rmem->name = uname;
rmem->base = base;
rmem->size = size;
/* Call the region specific initialization function */
fdt_init_reserved_mem_node(rmem);
reserved_mem_count++;
}
static int __init early_init_dt_reserve_memory(phys_addr_t base, static int __init early_init_dt_reserve_memory(phys_addr_t base,
phys_addr_t size, bool nomap) phys_addr_t size, bool nomap)
@@ -154,21 +134,19 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
const char *uname) const char *uname)
{ {
phys_addr_t base, size; phys_addr_t base, size;
int i, len; int i, len, err;
const __be32 *prop; const __be32 *prop;
bool nomap, default_cma; bool nomap;
prop = of_flat_dt_get_addr_size_prop(node, "reg", &len); prop = of_flat_dt_get_addr_size_prop(node, "reg", &len);
if (!prop) if (!prop)
return -ENOENT; return -ENOENT;
nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
if (default_cma && cma_skip_dt_default_reserved_mem()) { err = fdt_validate_reserved_mem_node(node, NULL);
pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n"); if (err && err != -ENODEV)
return -EINVAL; return err;
}
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
u64 b, s; u64 b, s;
@@ -179,10 +157,7 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
size = s; size = s;
if (size && early_init_dt_reserve_memory(base, size, nomap) == 0) { if (size && early_init_dt_reserve_memory(base, size, nomap) == 0) {
/* Architecture specific contiguous memory fixup. */ fdt_fixup_reserved_mem_node(node, base, size);
if (of_flat_dt_is_compatible(node, "shared-dma-pool") &&
of_get_flat_dt_prop(node, "reusable", NULL))
dma_contiguous_early_fixup(base, size);
pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n", pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
uname, &base, (unsigned long)(size / SZ_1M)); uname, &base, (unsigned long)(size / SZ_1M));
} else { } else {
@@ -216,19 +191,66 @@ static int __init __reserved_mem_check_root(unsigned long node)
return 0; return 0;
} }
static void __init __rmem_check_for_overlap(void); static int __init __rmem_cmp(const void *a, const void *b)
{
const struct reserved_mem *ra = a, *rb = b;
if (ra->base < rb->base)
return -1;
if (ra->base > rb->base)
return 1;
/*
* Put the dynamic allocations (address == 0, size == 0) before static
* allocations at address 0x0 so that overlap detection works
* correctly.
*/
if (ra->size < rb->size)
return -1;
if (ra->size > rb->size)
return 1;
return 0;
}
static void __init __rmem_check_for_overlap(void)
{
int i;
if (reserved_mem_count < 2)
return;
sort(reserved_mem, reserved_mem_count, sizeof(reserved_mem[0]),
__rmem_cmp, NULL);
for (i = 0; i < reserved_mem_count - 1; i++) {
struct reserved_mem *this, *next;
this = &reserved_mem[i];
next = &reserved_mem[i + 1];
if (this->base + this->size > next->base) {
phys_addr_t this_end, next_end;
this_end = this->base + this->size;
next_end = next->base + next->size;
pr_err("OVERLAP DETECTED!\n%s (%pa--%pa) overlaps with %s (%pa--%pa)\n",
this->name, &this->base, &this_end,
next->name, &next->base, &next_end);
}
}
}
/** /**
* fdt_scan_reserved_mem_reg_nodes() - Store info for the "reg" defined * fdt_scan_reserved_mem_late() - Scan FDT and initialize remaining reserved
* reserved memory regions. * memory regions.
* *
* This function is used to scan through the DT and store the * This function is used to scan again through the DT and initialize the
* information for the reserved memory regions that are defined using * "static" reserved memory regions, that are defined using the "reg"
* the "reg" property. The region node number, name, base address, and * property. Each such region is then initialized with its specific init
* size are all stored in the reserved_mem array by calling the * function and stored in the global reserved_mem array.
* fdt_reserved_mem_save_node() function.
*/ */
void __init fdt_scan_reserved_mem_reg_nodes(void) void __init fdt_scan_reserved_mem_late(void)
{ {
const void *fdt = initial_boot_params; const void *fdt = initial_boot_params;
phys_addr_t base, size; phys_addr_t base, size;
@@ -253,23 +275,25 @@ void __init fdt_scan_reserved_mem_reg_nodes(void)
fdt_for_each_subnode(child, fdt, node) { fdt_for_each_subnode(child, fdt, node) {
const char *uname; const char *uname;
bool default_cma = of_get_flat_dt_prop(child, "linux,cma-default", NULL);
u64 b, s; u64 b, s;
int ret;
if (!of_fdt_device_is_available(fdt, child)) if (!of_fdt_device_is_available(fdt, child))
continue; continue;
if (default_cma && cma_skip_dt_default_reserved_mem())
continue;
if (!of_flat_dt_get_addr_size(child, "reg", &b, &s)) if (!of_flat_dt_get_addr_size(child, "reg", &b, &s))
continue; continue;
ret = fdt_validate_reserved_mem_node(child, NULL);
if (ret && ret != -ENODEV)
continue;
base = b; base = b;
size = s; size = s;
if (size) { if (size) {
uname = fdt_get_name(fdt, child, NULL); uname = fdt_get_name(fdt, child, NULL);
fdt_reserved_mem_save_node(child, uname, base, size); fdt_init_reserved_mem_node(child, uname, base, size);
} }
} }
@@ -280,7 +304,14 @@ void __init fdt_scan_reserved_mem_reg_nodes(void)
static int __init __reserved_mem_alloc_size(unsigned long node, const char *uname); static int __init __reserved_mem_alloc_size(unsigned long node, const char *uname);
/* /*
* fdt_scan_reserved_mem() - scan a single FDT node for reserved memory * fdt_scan_reserved_mem() - reserve and allocate memory occupied by
* reserved memory regions.
*
* This function is used to scan through the FDT and mark memory occupied
* by all static (defined by the "reg" property) reserved memory regions.
* Then memory for all dynamic regions (defined by size & alignment) is
* allocated, a region specific init function is called and region information
* is stored in the reserved_mem array.
*/ */
int __init fdt_scan_reserved_mem(void) int __init fdt_scan_reserved_mem(void)
{ {
@@ -397,7 +428,7 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam
phys_addr_t base = 0, align = 0, size; phys_addr_t base = 0, align = 0, size;
int i, len; int i, len;
const __be32 *prop; const __be32 *prop;
bool nomap, default_cma; bool nomap;
int ret; int ret;
prop = of_get_flat_dt_prop(node, "size", &len); prop = of_get_flat_dt_prop(node, "size", &len);
@@ -421,19 +452,10 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam
} }
nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
if (default_cma && cma_skip_dt_default_reserved_mem()) { ret = fdt_validate_reserved_mem_node(node, &align);
pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n"); if (ret && ret != -ENODEV)
return -EINVAL; return ret;
}
/* Need adjust the alignment to satisfy the CMA requirement */
if (IS_ENABLED(CONFIG_CMA)
&& of_flat_dt_is_compatible(node, "shared-dma-pool")
&& of_get_flat_dt_prop(node, "reusable", NULL)
&& !nomap)
align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES);
prop = of_flat_dt_get_addr_size_prop(node, "alloc-ranges", &len); prop = of_flat_dt_get_addr_size_prop(node, "alloc-ranges", &len);
if (prop) { if (prop) {
@@ -468,121 +490,151 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam
uname, (unsigned long)(size / SZ_1M)); uname, (unsigned long)(size / SZ_1M));
return -ENOMEM; return -ENOMEM;
} }
/* Architecture specific contiguous memory fixup. */
if (of_flat_dt_is_compatible(node, "shared-dma-pool") && fdt_fixup_reserved_mem_node(node, base, size);
of_get_flat_dt_prop(node, "reusable", NULL)) fdt_init_reserved_mem_node(node, uname, base, size);
dma_contiguous_early_fixup(base, size);
/* Save region in the reserved_mem array */
fdt_reserved_mem_save_node(node, uname, base, size);
return 0; return 0;
} }
extern const struct of_device_id __reservedmem_of_table[];
static const struct of_device_id __rmem_of_table_sentinel static const struct of_device_id __rmem_of_table_sentinel
__used __section("__reservedmem_of_table_end"); __used __section("__reservedmem_of_table_end");
/* /**
* __reserved_mem_init_node() - call region specific reserved memory init code * fdt_fixup_reserved_mem_node() - call fixup function for a reserved memory node
* @node: FDT node to fixup
* @base: base address of the reserved memory region
* @size: size of the reserved memory region
*
* This function iterates through the reserved memory drivers and calls
* the node_fixup callback for the compatible entry matching the node.
*
* Return: 0 on success, -ENODEV if no compatible match found
*/ */
static int __init __reserved_mem_init_node(struct reserved_mem *rmem) static int __init fdt_fixup_reserved_mem_node(unsigned long node,
phys_addr_t base, phys_addr_t size)
{ {
extern const struct of_device_id __reservedmem_of_table[];
const struct of_device_id *i; const struct of_device_id *i;
int ret = -ENOENT; int ret = -ENODEV;
for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) { for (i = __reservedmem_of_table; ret == -ENODEV &&
reservedmem_of_init_fn initfn = i->data; i < &__rmem_of_table_sentinel; i++) {
const char *compat = i->compatible; const struct reserved_mem_ops *ops = i->data;
if (!of_flat_dt_is_compatible(rmem->fdt_node, compat)) if (!of_flat_dt_is_compatible(node, i->compatible))
continue; continue;
ret = initfn(rmem); if (ops->node_fixup)
ret = ops->node_fixup(node, base, size);
}
return ret;
}
/**
* fdt_validate_reserved_mem_node() - validate a reserved memory node
* @node: FDT node to validate
* @align: pointer to store the validated alignment (may be modified by callback)
*
* This function iterates through the reserved memory drivers and calls
* the node_validate callback for the compatible entry matching the node.
*
* Return: 0 on success, -ENODEV if no compatible match found
*/
static int __init fdt_validate_reserved_mem_node(unsigned long node, phys_addr_t *align)
{
const struct of_device_id *i;
int ret = -ENODEV;
for (i = __reservedmem_of_table; ret == -ENODEV &&
i < &__rmem_of_table_sentinel; i++) {
const struct reserved_mem_ops *ops = i->data;
if (!of_flat_dt_is_compatible(node, i->compatible))
continue;
if (ops->node_validate)
ret = ops->node_validate(node, align);
}
return ret;
}
/**
* __reserved_mem_init_node() - initialize a reserved memory region
* @rmem: reserved_mem structure to initialize
* @node: FDT node describing the reserved memory region
*
* This function iterates through the reserved memory drivers and calls the
* node_init callback for the compatible entry matching the node. On success,
* the operations pointer is stored in the reserved_mem structure.
*
* Return: 0 on success, -ENODEV if no compatible match found
*/
static int __init __reserved_mem_init_node(struct reserved_mem *rmem,
unsigned long node)
{
const struct of_device_id *i;
int ret = -ENODEV;
for (i = __reservedmem_of_table; ret == -ENODEV &&
i < &__rmem_of_table_sentinel; i++) {
const struct reserved_mem_ops *ops = i->data;
const char *compat = i->compatible;
if (!of_flat_dt_is_compatible(node, compat))
continue;
ret = ops->node_init(node, rmem);
if (ret == 0) { if (ret == 0) {
rmem->ops = ops;
pr_info("initialized node %s, compatible id %s\n", pr_info("initialized node %s, compatible id %s\n",
rmem->name, compat); rmem->name, compat);
break; return ret;
} }
} }
return ret; return ret;
} }
static int __init __rmem_cmp(const void *a, const void *b)
{
const struct reserved_mem *ra = a, *rb = b;
if (ra->base < rb->base)
return -1;
if (ra->base > rb->base)
return 1;
/*
* Put the dynamic allocations (address == 0, size == 0) before static
* allocations at address 0x0 so that overlap detection works
* correctly.
*/
if (ra->size < rb->size)
return -1;
if (ra->size > rb->size)
return 1;
if (ra->fdt_node < rb->fdt_node)
return -1;
if (ra->fdt_node > rb->fdt_node)
return 1;
return 0;
}
static void __init __rmem_check_for_overlap(void)
{
int i;
if (reserved_mem_count < 2)
return;
sort(reserved_mem, reserved_mem_count, sizeof(reserved_mem[0]),
__rmem_cmp, NULL);
for (i = 0; i < reserved_mem_count - 1; i++) {
struct reserved_mem *this, *next;
this = &reserved_mem[i];
next = &reserved_mem[i + 1];
if (this->base + this->size > next->base) {
phys_addr_t this_end, next_end;
this_end = this->base + this->size;
next_end = next->base + next->size;
pr_err("OVERLAP DETECTED!\n%s (%pa--%pa) overlaps with %s (%pa--%pa)\n",
this->name, &this->base, &this_end,
next->name, &next->base, &next_end);
}
}
}
/** /**
* fdt_init_reserved_mem_node() - Initialize a reserved memory region * fdt_init_reserved_mem_node() - Initialize a reserved memory region
* @rmem: reserved_mem struct of the memory region to be initialized. * @node: fdt node of the initialized region
* @uname: name of the reserved memory node
* @base: base address of the reserved memory region
* @size: size of the reserved memory region
* *
* This function is used to call the region specific initialization * This function calls the region-specific initialization function for a
* function for a reserved memory region. * reserved memory region and saves all region-specific data to the
* reserved_mem array to allow of_reserved_mem_lookup() to find it.
*/ */
static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem) static void __init fdt_init_reserved_mem_node(unsigned long node, const char *uname,
phys_addr_t base, phys_addr_t size)
{ {
unsigned long node = rmem->fdt_node;
int err = 0; int err = 0;
bool nomap; bool nomap;
struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
if (reserved_mem_count == total_reserved_mem_cnt) {
pr_err("not enough space for all defined regions.\n");
return;
}
rmem->name = uname;
rmem->base = base;
rmem->size = size;
nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
err = __reserved_mem_init_node(rmem); err = __reserved_mem_init_node(rmem, node);
if (err != 0 && err != -ENOENT) { if (err != 0 && err != -ENODEV) {
pr_info("node %s compatible matching fail\n", rmem->name); pr_info("node %s compatible matching fail\n", rmem->name);
rmem->name = NULL;
if (nomap) if (nomap)
memblock_clear_nomap(rmem->base, rmem->size); memblock_clear_nomap(rmem->base, rmem->size);
else else
memblock_phys_free(rmem->base, rmem->size); memblock_phys_free(rmem->base, rmem->size);
return;
} else { } else {
phys_addr_t end = rmem->base + rmem->size - 1; phys_addr_t end = rmem->base + rmem->size - 1;
bool reusable = bool reusable =
@@ -594,6 +646,8 @@ static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem)
reusable ? "reusable" : "non-reusable", reusable ? "reusable" : "non-reusable",
rmem->name ? rmem->name : "unknown"); rmem->name ? rmem->name : "unknown");
} }
reserved_mem_count++;
} }
struct rmem_assigned_device { struct rmem_assigned_device {

View File

@@ -262,11 +262,13 @@ static dma_addr_t xen_swiotlb_map_phys(struct device *dev, phys_addr_t phys,
done: done:
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) {
arch_sync_dma_for_device(phys, size, dir); arch_sync_dma_for_device(phys, size, dir);
else arch_sync_dma_flush();
} else {
xen_dma_sync_for_device(dev, dev_addr, size, dir); xen_dma_sync_for_device(dev, dev_addr, size, dir);
} }
}
return dev_addr; return dev_addr;
} }
@@ -287,11 +289,13 @@ static void xen_swiotlb_unmap_phys(struct device *hwdev, dma_addr_t dev_addr,
BUG_ON(dir == DMA_NONE); BUG_ON(dir == DMA_NONE);
if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) {
arch_sync_dma_for_cpu(paddr, size, dir); arch_sync_dma_for_cpu(paddr, size, dir);
else arch_sync_dma_flush();
} else {
xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir); xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir);
} }
}
/* NOTE: We use dev_addr here, not paddr! */ /* NOTE: We use dev_addr here, not paddr! */
pool = xen_swiotlb_find_pool(hwdev, dev_addr); pool = xen_swiotlb_find_pool(hwdev, dev_addr);
@@ -308,11 +312,13 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
struct io_tlb_pool *pool; struct io_tlb_pool *pool;
if (!dev_is_dma_coherent(dev)) { if (!dev_is_dma_coherent(dev)) {
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) {
arch_sync_dma_for_cpu(paddr, size, dir); arch_sync_dma_for_cpu(paddr, size, dir);
else arch_sync_dma_flush();
} else {
xen_dma_sync_for_cpu(dev, dma_addr, size, dir); xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
} }
}
pool = xen_swiotlb_find_pool(dev, dma_addr); pool = xen_swiotlb_find_pool(dev, dma_addr);
if (pool) if (pool)
@@ -331,12 +337,14 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
__swiotlb_sync_single_for_device(dev, paddr, size, dir, pool); __swiotlb_sync_single_for_device(dev, paddr, size, dir, pool);
if (!dev_is_dma_coherent(dev)) { if (!dev_is_dma_coherent(dev)) {
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) {
arch_sync_dma_for_device(paddr, size, dir); arch_sync_dma_for_device(paddr, size, dir);
else arch_sync_dma_flush();
} else {
xen_dma_sync_for_device(dev, dma_addr, size, dir); xen_dma_sync_for_device(dev, dma_addr, size, dir);
} }
} }
}
/* /*
* Unmap a set of streaming mode DMA translations. Again, cpu read rules * Unmap a set of streaming mode DMA translations. Again, cpu read rules

View File

@@ -61,14 +61,4 @@ extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data)
extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end); extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end);
extern void cma_reserve_pages_on_error(struct cma *cma); extern void cma_reserve_pages_on_error(struct cma *cma);
#ifdef CONFIG_DMA_CMA
extern bool cma_skip_dt_default_reserved_mem(void);
#else
static inline bool cma_skip_dt_default_reserved_mem(void)
{
return false;
}
#endif
#endif #endif

View File

@@ -1,16 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef DMA_BUF_HEAP_CMA_H_
#define DMA_BUF_HEAP_CMA_H_
struct cma;
#ifdef CONFIG_DMABUF_HEAPS_CMA
int dma_heap_cma_register_heap(struct cma *cma);
#else
static inline int dma_heap_cma_register_heap(struct cma *cma)
{
return 0;
}
#endif // CONFIG_DMABUF_HEAPS_CMA
#endif // DMA_BUF_HEAP_CMA_H_

View File

@@ -91,14 +91,8 @@ static inline void set_dma_ops(struct device *dev,
#endif /* CONFIG_ARCH_HAS_DMA_OPS */ #endif /* CONFIG_ARCH_HAS_DMA_OPS */
#ifdef CONFIG_DMA_CMA #ifdef CONFIG_DMA_CMA
extern struct cma *dma_contiguous_default_area; struct cma *dev_get_cma_area(struct device *dev);
struct cma *dma_contiguous_get_area_by_idx(unsigned int idx);
static inline struct cma *dev_get_cma_area(struct device *dev)
{
if (dev && dev->cma_area)
return dev->cma_area;
return dma_contiguous_default_area;
}
void dma_contiguous_reserve(phys_addr_t addr_limit); void dma_contiguous_reserve(phys_addr_t addr_limit);
int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
@@ -117,6 +111,10 @@ static inline struct cma *dev_get_cma_area(struct device *dev)
{ {
return NULL; return NULL;
} }
static inline struct cma *dma_contiguous_get_area_by_idx(unsigned int idx)
{
return NULL;
}
static inline void dma_contiguous_reserve(phys_addr_t limit) static inline void dma_contiguous_reserve(phys_addr_t limit)
{ {
} }
@@ -147,9 +145,6 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page,
{ {
__free_pages(page, get_order(size)); __free_pages(page, get_order(size));
} }
static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
{
}
#endif /* CONFIG_DMA_CMA*/ #endif /* CONFIG_DMA_CMA*/
#ifdef CONFIG_DMA_DECLARE_COHERENT #ifdef CONFIG_DMA_DECLARE_COHERENT
@@ -361,6 +356,12 @@ static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
} }
#endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ #endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */
#ifndef CONFIG_ARCH_HAS_BATCHED_DMA_SYNC
static inline void arch_sync_dma_flush(void)
{
}
#endif
#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
void arch_sync_dma_for_cpu_all(void); void arch_sync_dma_for_cpu_all(void);
#else #else

View File

@@ -9,7 +9,7 @@
#include <linux/bug.h> #include <linux/bug.h>
#include <linux/cache.h> #include <linux/cache.h>
/** /*
* List of possible attributes associated with a DMA mapping. The semantics * List of possible attributes associated with a DMA mapping. The semantics
* of each attribute should be defined in Documentation/core-api/dma-attributes.rst. * of each attribute should be defined in Documentation/core-api/dma-attributes.rst.
*/ */
@@ -92,6 +92,16 @@
* flushing. * flushing.
*/ */
#define DMA_ATTR_REQUIRE_COHERENT (1UL << 12) #define DMA_ATTR_REQUIRE_COHERENT (1UL << 12)
/*
* DMA_ATTR_CC_SHARED: Indicates the DMA mapping is shared (decrypted) for
* confidential computing guests. For normal system memory the caller must have
* called set_memory_decrypted(), and pgprot_decrypted must be used when
* creating CPU PTEs for the mapping. The same shared semantic may be passed
* to the vIOMMU when it sets up the IOPTE. For MMIO use together with
* DMA_ATTR_MMIO to indicate shared MMIO. Unless DMA_ATTR_MMIO is provided
* a struct page is required.
*/
#define DMA_ATTR_CC_SHARED (1UL << 13)
/* /*
* A dma_addr_t can hold any valid DMA or bus address for the platform. It can * A dma_addr_t can hold any valid DMA or bus address for the platform. It can

View File

@@ -11,7 +11,6 @@ struct resource;
struct reserved_mem { struct reserved_mem {
const char *name; const char *name;
unsigned long fdt_node;
const struct reserved_mem_ops *ops; const struct reserved_mem_ops *ops;
phys_addr_t base; phys_addr_t base;
phys_addr_t size; phys_addr_t size;
@@ -19,18 +18,20 @@ struct reserved_mem {
}; };
struct reserved_mem_ops { struct reserved_mem_ops {
int (*node_validate)(unsigned long fdt_node, phys_addr_t *align);
int (*node_fixup)(unsigned long fdt_node, phys_addr_t base,
phys_addr_t size);
int (*node_init)(unsigned long fdt_node, struct reserved_mem *rmem);
int (*device_init)(struct reserved_mem *rmem, int (*device_init)(struct reserved_mem *rmem,
struct device *dev); struct device *dev);
void (*device_release)(struct reserved_mem *rmem, void (*device_release)(struct reserved_mem *rmem,
struct device *dev); struct device *dev);
}; };
typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
#ifdef CONFIG_OF_RESERVED_MEM #ifdef CONFIG_OF_RESERVED_MEM
#define RESERVEDMEM_OF_DECLARE(name, compat, init) \ #define RESERVEDMEM_OF_DECLARE(name, compat, ops) \
_OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn) _OF_DECLARE(reservedmem, name, compat, ops, struct reserved_mem_ops *)
int of_reserved_mem_device_init_by_idx(struct device *dev, int of_reserved_mem_device_init_by_idx(struct device *dev,
struct device_node *np, int idx); struct device_node *np, int idx);
@@ -48,8 +49,9 @@ int of_reserved_mem_region_count(const struct device_node *np);
#else #else
#define RESERVEDMEM_OF_DECLARE(name, compat, init) \ #define RESERVEDMEM_OF_DECLARE(name, compat, ops) \
_OF_DECLARE_STUB(reservedmem, name, compat, init, reservedmem_of_init_fn) _OF_DECLARE_STUB(reservedmem, name, compat, ops, \
struct reserved_mem_ops *)
static inline int of_reserved_mem_device_init_by_idx(struct device *dev, static inline int of_reserved_mem_device_init_by_idx(struct device *dev,
struct device_node *np, int idx) struct device_node *np, int idx)

View File

@@ -34,7 +34,8 @@ TRACE_DEFINE_ENUM(DMA_NONE);
{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \ { DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \
{ DMA_ATTR_MMIO, "MMIO" }, \ { DMA_ATTR_MMIO, "MMIO" }, \
{ DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \ { DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \
{ DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" }) { DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" }, \
{ DMA_ATTR_CC_SHARED, "CC_SHARED" })
DECLARE_EVENT_CLASS(dma_map, DECLARE_EVENT_CLASS(dma_map,
TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,

View File

@@ -17,6 +17,12 @@
#define DMA_MAP_TO_DEVICE 1 #define DMA_MAP_TO_DEVICE 1
#define DMA_MAP_FROM_DEVICE 2 #define DMA_MAP_FROM_DEVICE 2
enum {
DMA_MAP_BENCH_SINGLE_MODE,
DMA_MAP_BENCH_SG_MODE,
DMA_MAP_BENCH_MODE_MAX
};
struct map_benchmark { struct map_benchmark {
__u64 avg_map_100ns; /* average map latency in 100ns */ __u64 avg_map_100ns; /* average map latency in 100ns */
__u64 map_stddev; /* standard deviation of map latency */ __u64 map_stddev; /* standard deviation of map latency */
@@ -28,8 +34,11 @@ struct map_benchmark {
__u32 dma_bits; /* DMA addressing capability */ __u32 dma_bits; /* DMA addressing capability */
__u32 dma_dir; /* DMA data direction */ __u32 dma_dir; /* DMA data direction */
__u32 dma_trans_ns; /* time for DMA transmission in ns */ __u32 dma_trans_ns; /* time for DMA transmission in ns */
__u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ __u32 granule; /* - SINGLE_MODE: number of pages mapped/unmapped per operation
__u8 expansion[76]; /* For future use */ * - SG_MODE: number of scatterlist entries (each maps one page)
*/
__u8 map_mode; /* the mode of dma map */
__u8 expansion[75]; /* For future use */
}; };
#endif /* _UAPI_DMA_BENCHMARK_H */ #endif /* _UAPI_DMA_BENCHMARK_H */

View File

@@ -72,6 +72,9 @@ config ARCH_HAS_DMA_PREP_COHERENT
config ARCH_HAS_FORCE_DMA_UNENCRYPTED config ARCH_HAS_FORCE_DMA_UNENCRYPTED
bool bool
config ARCH_HAS_BATCHED_DMA_SYNC
bool
# #
# Select this option if the architecture assumes DMA devices are coherent # Select this option if the architecture assumes DMA devices are coherent
# by default. # by default.

View File

@@ -362,17 +362,11 @@ static void rmem_dma_device_release(struct reserved_mem *rmem,
dev->dma_mem = NULL; dev->dma_mem = NULL;
} }
static const struct reserved_mem_ops rmem_dma_ops = {
.device_init = rmem_dma_device_init,
.device_release = rmem_dma_device_release,
};
static int __init rmem_dma_setup(struct reserved_mem *rmem) static int __init rmem_dma_setup(unsigned long node, struct reserved_mem *rmem)
{ {
unsigned long node = rmem->fdt_node;
if (of_get_flat_dt_prop(node, "reusable", NULL)) if (of_get_flat_dt_prop(node, "reusable", NULL))
return -EINVAL; return -ENODEV;
#ifdef CONFIG_ARM #ifdef CONFIG_ARM
if (!of_get_flat_dt_prop(node, "no-map", NULL)) { if (!of_get_flat_dt_prop(node, "no-map", NULL)) {
@@ -390,7 +384,6 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem)
} }
#endif #endif
rmem->ops = &rmem_dma_ops;
pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n", pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n",
&rmem->base, (unsigned long)rmem->size / SZ_1M); &rmem->base, (unsigned long)rmem->size / SZ_1M);
return 0; return 0;
@@ -407,5 +400,11 @@ static int __init dma_init_reserved_memory(void)
core_initcall(dma_init_reserved_memory); core_initcall(dma_init_reserved_memory);
#endif /* CONFIG_DMA_GLOBAL_POOL */ #endif /* CONFIG_DMA_GLOBAL_POOL */
RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup); static const struct reserved_mem_ops rmem_dma_ops = {
.node_init = rmem_dma_setup,
.device_init = rmem_dma_device_init,
.device_release = rmem_dma_device_release,
};
RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", &rmem_dma_ops);
#endif #endif

View File

@@ -42,7 +42,6 @@
#include <linux/memblock.h> #include <linux/memblock.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/sizes.h> #include <linux/sizes.h>
#include <linux/dma-buf/heaps/cma.h>
#include <linux/dma-map-ops.h> #include <linux/dma-map-ops.h>
#include <linux/cma.h> #include <linux/cma.h>
#include <linux/nospec.h> #include <linux/nospec.h>
@@ -53,7 +52,38 @@
#define CMA_SIZE_MBYTES 0 #define CMA_SIZE_MBYTES 0
#endif #endif
struct cma *dma_contiguous_default_area; static struct cma *dma_contiguous_areas[MAX_CMA_AREAS];
static unsigned int dma_contiguous_areas_num;
static int dma_contiguous_insert_area(struct cma *cma)
{
if (dma_contiguous_areas_num >= ARRAY_SIZE(dma_contiguous_areas))
return -EINVAL;
dma_contiguous_areas[dma_contiguous_areas_num++] = cma;
return 0;
}
/**
* dma_contiguous_get_area_by_idx() - Get contiguous area at given index
* @idx: index of the area we query
*
* Queries for the contiguous area located at index @idx.
*
* Returns:
* A pointer to the requested contiguous area, or NULL otherwise.
*/
struct cma *dma_contiguous_get_area_by_idx(unsigned int idx)
{
if (idx >= dma_contiguous_areas_num)
return NULL;
return dma_contiguous_areas[idx];
}
EXPORT_SYMBOL_GPL(dma_contiguous_get_area_by_idx);
static struct cma *dma_contiguous_default_area;
/* /*
* Default global CMA area size can be defined in kernel's .config. * Default global CMA area size can be defined in kernel's .config.
@@ -91,15 +121,14 @@ static int __init early_cma(char *p)
} }
early_param("cma", early_cma); early_param("cma", early_cma);
/* struct cma *dev_get_cma_area(struct device *dev)
* cma_skip_dt_default_reserved_mem - This is called from the
* reserved_mem framework to detect if the default cma region is being
* set by the "cma=" kernel parameter.
*/
bool __init cma_skip_dt_default_reserved_mem(void)
{ {
return size_cmdline != -1; if (dev && dev->cma_area)
return dev->cma_area;
return dma_contiguous_default_area;
} }
EXPORT_SYMBOL_GPL(dev_get_cma_area);
#ifdef CONFIG_DMA_NUMA_CMA #ifdef CONFIG_DMA_NUMA_CMA
@@ -264,9 +293,24 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
if (ret) if (ret)
return; return;
ret = dma_heap_cma_register_heap(dma_contiguous_default_area); /*
* We need to insert the new area in our list to avoid
* any inconsistencies between having the default area
* listed in the DT or not.
*
* The DT case is handled by rmem_cma_setup() and will
* always insert all its areas in our list. However, if
* it didn't run (because OF_RESERVED_MEM isn't set, or
* there's no DT region specified), then we don't have a
* default area yet, and no area in our list.
*
* This block creates the default area in such a case,
* but we also need to insert it in our list to avoid
* having a default area but an empty list.
*/
ret = dma_contiguous_insert_area(dma_contiguous_default_area);
if (ret) if (ret)
pr_warn("Couldn't register default CMA heap."); pr_warn("Couldn't queue default CMA region for heap creation.");
} }
} }
@@ -470,47 +514,89 @@ static void rmem_cma_device_release(struct reserved_mem *rmem,
dev->cma_area = NULL; dev->cma_area = NULL;
} }
static const struct reserved_mem_ops rmem_cma_ops = { static int __init __rmem_cma_verify_node(unsigned long node)
.device_init = rmem_cma_device_init,
.device_release = rmem_cma_device_release,
};
static int __init rmem_cma_setup(struct reserved_mem *rmem)
{ {
unsigned long node = rmem->fdt_node;
bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
struct cma *cma;
int err;
if (!of_get_flat_dt_prop(node, "reusable", NULL) || if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
of_get_flat_dt_prop(node, "no-map", NULL)) of_get_flat_dt_prop(node, "no-map", NULL))
return -EINVAL; return -ENODEV;
if (size_cmdline != -1 &&
of_get_flat_dt_prop(node, "linux,cma-default", NULL)) {
pr_err("Skipping dt linux,cma-default node in favor for \"cma=\" kernel param.\n");
return -EBUSY;
}
return 0;
}
static int __init rmem_cma_validate(unsigned long node, phys_addr_t *align)
{
int ret = __rmem_cma_verify_node(node);
if (ret)
return ret;
if (align)
*align = max_t(phys_addr_t, *align, CMA_MIN_ALIGNMENT_BYTES);
return 0;
}
static int __init rmem_cma_fixup(unsigned long node, phys_addr_t base,
phys_addr_t size)
{
int ret = __rmem_cma_verify_node(node);
if (ret)
return ret;
/* Architecture specific contiguous memory fixup. */
dma_contiguous_early_fixup(base, size);
return 0;
}
static int __init rmem_cma_setup(unsigned long node, struct reserved_mem *rmem)
{
bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
struct cma *cma;
int ret;
ret = __rmem_cma_verify_node(node);
if (ret)
return ret;
if (!IS_ALIGNED(rmem->base | rmem->size, CMA_MIN_ALIGNMENT_BYTES)) { if (!IS_ALIGNED(rmem->base | rmem->size, CMA_MIN_ALIGNMENT_BYTES)) {
pr_err("Reserved memory: incorrect alignment of CMA region\n"); pr_err("Reserved memory: incorrect alignment of CMA region\n");
return -EINVAL; return -EINVAL;
} }
err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma); ret = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
if (err) { if (ret) {
pr_err("Reserved memory: unable to setup CMA region\n"); pr_err("Reserved memory: unable to setup CMA region\n");
return err; return ret;
} }
if (default_cma) if (default_cma)
dma_contiguous_default_area = cma; dma_contiguous_default_area = cma;
rmem->ops = &rmem_cma_ops;
rmem->priv = cma; rmem->priv = cma;
pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n", pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
&rmem->base, (unsigned long)rmem->size / SZ_1M); &rmem->base, (unsigned long)rmem->size / SZ_1M);
err = dma_heap_cma_register_heap(cma); ret = dma_contiguous_insert_area(cma);
if (err) if (ret)
pr_warn("Couldn't register CMA heap."); pr_warn("Couldn't store CMA reserved area.");
return 0; return 0;
} }
RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup);
static const struct reserved_mem_ops rmem_cma_ops = {
.node_validate = rmem_cma_validate,
.node_fixup = rmem_cma_fixup,
.node_init = rmem_cma_setup,
.device_init = rmem_cma_device_init,
.device_release = rmem_cma_device_release,
};
RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", &rmem_cma_ops);
#endif #endif

View File

@@ -406,6 +406,8 @@ void dma_direct_sync_sg_for_device(struct device *dev,
arch_sync_dma_for_device(paddr, sg->length, arch_sync_dma_for_device(paddr, sg->length,
dir); dir);
} }
if (!dev_is_dma_coherent(dev))
arch_sync_dma_flush();
} }
#endif #endif
@@ -427,9 +429,11 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir); swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
} }
if (!dev_is_dma_coherent(dev)) if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_flush();
arch_sync_dma_for_cpu_all(); arch_sync_dma_for_cpu_all();
} }
}
/* /*
* Unmaps segments, except for ones marked as pci_p2pdma which do not * Unmaps segments, except for ones marked as pci_p2pdma which do not
@@ -440,15 +444,20 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
{ {
struct scatterlist *sg; struct scatterlist *sg;
int i; int i;
bool need_sync = false;
for_each_sg(sgl, sg, nents, i) { for_each_sg(sgl, sg, nents, i) {
if (sg_dma_is_bus_address(sg)) if (sg_dma_is_bus_address(sg)) {
sg_dma_unmark_bus_address(sg); sg_dma_unmark_bus_address(sg);
else } else {
need_sync = true;
dma_direct_unmap_phys(dev, sg->dma_address, dma_direct_unmap_phys(dev, sg->dma_address,
sg_dma_len(sg), dir, attrs); sg_dma_len(sg), dir, attrs, false);
} }
} }
if (need_sync && !dev_is_dma_coherent(dev))
arch_sync_dma_flush();
}
#endif #endif
int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
@@ -457,6 +466,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
struct pci_p2pdma_map_state p2pdma_state = {}; struct pci_p2pdma_map_state p2pdma_state = {};
struct scatterlist *sg; struct scatterlist *sg;
int i, ret; int i, ret;
bool need_sync = false;
for_each_sg(sgl, sg, nents, i) { for_each_sg(sgl, sg, nents, i) {
switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) { switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) {
@@ -468,8 +478,9 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
*/ */
break; break;
case PCI_P2PDMA_MAP_NONE: case PCI_P2PDMA_MAP_NONE:
need_sync = true;
sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg), sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg),
sg->length, dir, attrs); sg->length, dir, attrs, false);
if (sg->dma_address == DMA_MAPPING_ERROR) { if (sg->dma_address == DMA_MAPPING_ERROR) {
ret = -EIO; ret = -EIO;
goto out_unmap; goto out_unmap;
@@ -488,6 +499,8 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
sg_dma_len(sg) = sg->length; sg_dma_len(sg) = sg->length;
} }
if (need_sync && !dev_is_dma_coherent(dev))
arch_sync_dma_flush();
return nents; return nents;
out_unmap: out_unmap:

View File

@@ -60,17 +60,22 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
swiotlb_sync_single_for_device(dev, paddr, size, dir); swiotlb_sync_single_for_device(dev, paddr, size, dir);
if (!dev_is_dma_coherent(dev)) if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_device(paddr, size, dir); arch_sync_dma_for_device(paddr, size, dir);
arch_sync_dma_flush();
}
} }
static inline void dma_direct_sync_single_for_cpu(struct device *dev, static inline void dma_direct_sync_single_for_cpu(struct device *dev,
dma_addr_t addr, size_t size, enum dma_data_direction dir) dma_addr_t addr, size_t size, enum dma_data_direction dir,
bool flush)
{ {
phys_addr_t paddr = dma_to_phys(dev, addr); phys_addr_t paddr = dma_to_phys(dev, addr);
if (!dev_is_dma_coherent(dev)) { if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_cpu(paddr, size, dir); arch_sync_dma_for_cpu(paddr, size, dir);
if (flush)
arch_sync_dma_flush();
arch_sync_dma_for_cpu_all(); arch_sync_dma_for_cpu_all();
} }
@@ -79,21 +84,29 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
static inline dma_addr_t dma_direct_map_phys(struct device *dev, static inline dma_addr_t dma_direct_map_phys(struct device *dev,
phys_addr_t phys, size_t size, enum dma_data_direction dir, phys_addr_t phys, size_t size, enum dma_data_direction dir,
unsigned long attrs) unsigned long attrs, bool flush)
{ {
dma_addr_t dma_addr; dma_addr_t dma_addr;
if (is_swiotlb_force_bounce(dev)) { if (is_swiotlb_force_bounce(dev)) {
if (!(attrs & DMA_ATTR_CC_SHARED)) {
if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
return DMA_MAPPING_ERROR; return DMA_MAPPING_ERROR;
return swiotlb_map(dev, phys, size, dir, attrs); return swiotlb_map(dev, phys, size, dir, attrs);
} }
} else if (attrs & DMA_ATTR_CC_SHARED) {
return DMA_MAPPING_ERROR;
}
if (attrs & DMA_ATTR_MMIO) { if (attrs & DMA_ATTR_MMIO) {
dma_addr = phys; dma_addr = phys;
if (unlikely(!dma_capable(dev, dma_addr, size, false))) if (unlikely(!dma_capable(dev, dma_addr, size, false)))
goto err_overflow; goto err_overflow;
} else if (attrs & DMA_ATTR_CC_SHARED) {
dma_addr = phys_to_dma_unencrypted(dev, phys);
if (unlikely(!dma_capable(dev, dma_addr, size, false)))
goto err_overflow;
} else { } else {
dma_addr = phys_to_dma(dev, phys); dma_addr = phys_to_dma(dev, phys);
if (unlikely(!dma_capable(dev, dma_addr, size, true)) || if (unlikely(!dma_capable(dev, dma_addr, size, true)) ||
@@ -107,8 +120,11 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
} }
if (!dev_is_dma_coherent(dev) && if (!dev_is_dma_coherent(dev) &&
!(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
arch_sync_dma_for_device(phys, size, dir); arch_sync_dma_for_device(phys, size, dir);
if (flush)
arch_sync_dma_flush();
}
return dma_addr; return dma_addr;
err_overflow: err_overflow:
@@ -120,7 +136,8 @@ err_overflow:
} }
static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr, static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir, unsigned long attrs) size_t size, enum dma_data_direction dir, unsigned long attrs,
bool flush)
{ {
phys_addr_t phys; phys_addr_t phys;
@@ -130,7 +147,7 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
phys = dma_to_phys(dev, addr); phys = dma_to_phys(dev, addr);
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
dma_direct_sync_single_for_cpu(dev, addr, size, dir); dma_direct_sync_single_for_cpu(dev, addr, size, dir, flush);
swiotlb_tbl_unmap_single(dev, phys, size, dir, swiotlb_tbl_unmap_single(dev, phys, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC); attrs | DMA_ATTR_SKIP_CPU_SYNC);

View File

@@ -5,6 +5,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cleanup.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/device.h> #include <linux/device.h>
@@ -15,6 +16,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/scatterlist.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/timekeeping.h> #include <linux/timekeeping.h>
#include <uapi/linux/map_benchmark.h> #include <uapi/linux/map_benchmark.h>
@@ -31,17 +33,219 @@ struct map_benchmark_data {
atomic64_t loops; atomic64_t loops;
}; };
static int map_benchmark_thread(void *data) struct map_benchmark_ops {
void *(*prepare)(struct map_benchmark_data *map);
void (*unprepare)(void *mparam);
void (*initialize_data)(void *mparam);
int (*do_map)(void *mparam);
void (*do_unmap)(void *mparam);
};
struct dma_single_map_param {
struct device *dev;
dma_addr_t addr;
void *xbuf;
u32 npages;
u32 dma_dir;
};
static void *dma_single_map_benchmark_prepare(struct map_benchmark_data *map)
{ {
void *buf; struct dma_single_map_param *params __free(kfree) = kzalloc(sizeof(*params),
dma_addr_t dma_addr; GFP_KERNEL);
struct map_benchmark_data *map = data; if (!params)
int npages = map->bparam.granule; return NULL;
u64 size = npages * PAGE_SIZE;
params->npages = map->bparam.granule;
params->dma_dir = map->bparam.dma_dir;
params->dev = map->dev;
params->xbuf = alloc_pages_exact(params->npages * PAGE_SIZE, GFP_KERNEL);
if (!params->xbuf)
return NULL;
return_ptr(params);
}
static void dma_single_map_benchmark_unprepare(void *mparam)
{
struct dma_single_map_param *params = mparam;
free_pages_exact(params->xbuf, params->npages * PAGE_SIZE);
kfree(params);
}
static void dma_single_map_benchmark_initialize_data(void *mparam)
{
struct dma_single_map_param *params = mparam;
/*
* for a non-coherent device, if we don't stain them in the
* cache, this will give an underestimate of the real-world
* overhead of BIDIRECTIONAL or TO_DEVICE mappings;
* 66 means everything goes well! 66 is lucky.
*/
if (params->dma_dir != DMA_FROM_DEVICE)
memset(params->xbuf, 0x66, params->npages * PAGE_SIZE);
}
static int dma_single_map_benchmark_do_map(void *mparam)
{
struct dma_single_map_param *params = mparam;
params->addr = dma_map_single(params->dev, params->xbuf,
params->npages * PAGE_SIZE, params->dma_dir);
if (unlikely(dma_mapping_error(params->dev, params->addr))) {
pr_err("dma_map_single failed on %s\n", dev_name(params->dev));
return -ENOMEM;
}
return 0;
}
static void dma_single_map_benchmark_do_unmap(void *mparam)
{
struct dma_single_map_param *params = mparam;
dma_unmap_single(params->dev, params->addr,
params->npages * PAGE_SIZE, params->dma_dir);
}
static struct map_benchmark_ops dma_single_map_benchmark_ops = {
.prepare = dma_single_map_benchmark_prepare,
.unprepare = dma_single_map_benchmark_unprepare,
.initialize_data = dma_single_map_benchmark_initialize_data,
.do_map = dma_single_map_benchmark_do_map,
.do_unmap = dma_single_map_benchmark_do_unmap,
};
struct dma_sg_map_param {
struct sg_table sgt;
struct device *dev;
void **buf;
u32 npages;
u32 dma_dir;
};
static void *dma_sg_map_benchmark_prepare(struct map_benchmark_data *map)
{
struct scatterlist *sg;
int i;
struct dma_sg_map_param *params = kzalloc(sizeof(*params), GFP_KERNEL);
if (!params)
return NULL;
/*
* Set the number of scatterlist entries based on the granule.
* In SG mode, 'granule' represents the number of scatterlist entries.
* Each scatterlist entry corresponds to a single page.
*/
params->npages = map->bparam.granule;
params->dma_dir = map->bparam.dma_dir;
params->dev = map->dev;
params->buf = kmalloc_array(params->npages, sizeof(*params->buf),
GFP_KERNEL);
if (!params->buf)
goto out;
if (sg_alloc_table(&params->sgt, params->npages, GFP_KERNEL))
goto free_buf;
for_each_sgtable_sg(&params->sgt, sg, i) {
params->buf[i] = (void *)__get_free_page(GFP_KERNEL);
if (!params->buf[i])
goto free_page;
sg_set_buf(sg, params->buf[i], PAGE_SIZE);
}
return params;
free_page:
while (i-- > 0)
free_page((unsigned long)params->buf[i]);
sg_free_table(&params->sgt);
free_buf:
kfree(params->buf);
out:
kfree(params);
return NULL;
}
static void dma_sg_map_benchmark_unprepare(void *mparam)
{
struct dma_sg_map_param *params = mparam;
int i;
for (i = 0; i < params->npages; i++)
free_page((unsigned long)params->buf[i]);
sg_free_table(&params->sgt);
kfree(params->buf);
kfree(params);
}
static void dma_sg_map_benchmark_initialize_data(void *mparam)
{
struct dma_sg_map_param *params = mparam;
struct scatterlist *sg;
int i = 0;
if (params->dma_dir == DMA_FROM_DEVICE)
return;
for_each_sgtable_sg(&params->sgt, sg, i)
memset(params->buf[i], 0x66, PAGE_SIZE);
}
static int dma_sg_map_benchmark_do_map(void *mparam)
{
struct dma_sg_map_param *params = mparam;
int ret = 0; int ret = 0;
buf = alloc_pages_exact(size, GFP_KERNEL); int sg_mapped = dma_map_sg(params->dev, params->sgt.sgl,
if (!buf) params->npages, params->dma_dir);
if (!sg_mapped) {
pr_err("dma_map_sg failed on %s\n", dev_name(params->dev));
ret = -ENOMEM;
}
return ret;
}
static void dma_sg_map_benchmark_do_unmap(void *mparam)
{
struct dma_sg_map_param *params = mparam;
dma_unmap_sg(params->dev, params->sgt.sgl, params->npages,
params->dma_dir);
}
static struct map_benchmark_ops dma_sg_map_benchmark_ops = {
.prepare = dma_sg_map_benchmark_prepare,
.unprepare = dma_sg_map_benchmark_unprepare,
.initialize_data = dma_sg_map_benchmark_initialize_data,
.do_map = dma_sg_map_benchmark_do_map,
.do_unmap = dma_sg_map_benchmark_do_unmap,
};
static struct map_benchmark_ops *dma_map_benchmark_ops[DMA_MAP_BENCH_MODE_MAX] = {
[DMA_MAP_BENCH_SINGLE_MODE] = &dma_single_map_benchmark_ops,
[DMA_MAP_BENCH_SG_MODE] = &dma_sg_map_benchmark_ops,
};
static int map_benchmark_thread(void *data)
{
struct map_benchmark_data *map = data;
__u8 map_mode = map->bparam.map_mode;
int ret = 0;
struct map_benchmark_ops *mb_ops = dma_map_benchmark_ops[map_mode];
void *mparam = mb_ops->prepare(map);
if (!mparam)
return -ENOMEM; return -ENOMEM;
while (!kthread_should_stop()) { while (!kthread_should_stop()) {
@@ -49,23 +253,12 @@ static int map_benchmark_thread(void *data)
ktime_t map_stime, map_etime, unmap_stime, unmap_etime; ktime_t map_stime, map_etime, unmap_stime, unmap_etime;
ktime_t map_delta, unmap_delta; ktime_t map_delta, unmap_delta;
/* mb_ops->initialize_data(mparam);
* for a non-coherent device, if we don't stain them in the
* cache, this will give an underestimate of the real-world
* overhead of BIDIRECTIONAL or TO_DEVICE mappings;
* 66 means evertything goes well! 66 is lucky.
*/
if (map->dir != DMA_FROM_DEVICE)
memset(buf, 0x66, size);
map_stime = ktime_get(); map_stime = ktime_get();
dma_addr = dma_map_single(map->dev, buf, size, map->dir); ret = mb_ops->do_map(mparam);
if (unlikely(dma_mapping_error(map->dev, dma_addr))) { if (ret)
pr_err("dma_map_single failed on %s\n",
dev_name(map->dev));
ret = -ENOMEM;
goto out; goto out;
}
map_etime = ktime_get(); map_etime = ktime_get();
map_delta = ktime_sub(map_etime, map_stime); map_delta = ktime_sub(map_etime, map_stime);
@@ -73,7 +266,8 @@ static int map_benchmark_thread(void *data)
ndelay(map->bparam.dma_trans_ns); ndelay(map->bparam.dma_trans_ns);
unmap_stime = ktime_get(); unmap_stime = ktime_get();
dma_unmap_single(map->dev, dma_addr, size, map->dir); mb_ops->do_unmap(mparam);
unmap_etime = ktime_get(); unmap_etime = ktime_get();
unmap_delta = ktime_sub(unmap_etime, unmap_stime); unmap_delta = ktime_sub(unmap_etime, unmap_stime);
@@ -108,7 +302,7 @@ static int map_benchmark_thread(void *data)
} }
out: out:
free_pages_exact(buf, size); mb_ops->unprepare(mparam);
return ret; return ret;
} }
@@ -209,6 +403,12 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
switch (cmd) { switch (cmd) {
case DMA_MAP_BENCHMARK: case DMA_MAP_BENCHMARK:
if (map->bparam.map_mode < 0 ||
map->bparam.map_mode >= DMA_MAP_BENCH_MODE_MAX) {
pr_err("invalid map mode\n");
return -EINVAL;
}
if (map->bparam.threads == 0 || if (map->bparam.threads == 0 ||
map->bparam.threads > DMA_MAP_MAX_THREADS) { map->bparam.threads > DMA_MAP_MAX_THREADS) {
pr_err("invalid thread number\n"); pr_err("invalid thread number\n");

View File

@@ -157,6 +157,7 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
{ {
const struct dma_map_ops *ops = get_dma_ops(dev); const struct dma_map_ops *ops = get_dma_ops(dev);
bool is_mmio = attrs & DMA_ATTR_MMIO; bool is_mmio = attrs & DMA_ATTR_MMIO;
bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED;
dma_addr_t addr = DMA_MAPPING_ERROR; dma_addr_t addr = DMA_MAPPING_ERROR;
BUG_ON(!valid_dma_direction(dir)); BUG_ON(!valid_dma_direction(dir));
@@ -168,8 +169,11 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
return DMA_MAPPING_ERROR; return DMA_MAPPING_ERROR;
if (dma_map_direct(dev, ops) || if (dma_map_direct(dev, ops) ||
(!is_mmio && arch_dma_map_phys_direct(dev, phys + size))) (!is_mmio && !is_cc_shared &&
addr = dma_direct_map_phys(dev, phys, size, dir, attrs); arch_dma_map_phys_direct(dev, phys + size)))
addr = dma_direct_map_phys(dev, phys, size, dir, attrs, true);
else if (is_cc_shared)
return DMA_MAPPING_ERROR;
else if (use_dma_iommu(dev)) else if (use_dma_iommu(dev))
addr = iommu_dma_map_phys(dev, phys, size, dir, attrs); addr = iommu_dma_map_phys(dev, phys, size, dir, attrs);
else if (ops->map_phys) else if (ops->map_phys)
@@ -206,11 +210,16 @@ void dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size,
{ {
const struct dma_map_ops *ops = get_dma_ops(dev); const struct dma_map_ops *ops = get_dma_ops(dev);
bool is_mmio = attrs & DMA_ATTR_MMIO; bool is_mmio = attrs & DMA_ATTR_MMIO;
bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED;
BUG_ON(!valid_dma_direction(dir)); BUG_ON(!valid_dma_direction(dir));
if (dma_map_direct(dev, ops) || if (dma_map_direct(dev, ops) ||
(!is_mmio && arch_dma_unmap_phys_direct(dev, addr + size))) (!is_mmio && !is_cc_shared &&
dma_direct_unmap_phys(dev, addr, size, dir, attrs); arch_dma_unmap_phys_direct(dev, addr + size)))
dma_direct_unmap_phys(dev, addr, size, dir, attrs, true);
else if (is_cc_shared)
return;
else if (use_dma_iommu(dev)) else if (use_dma_iommu(dev))
iommu_dma_unmap_phys(dev, addr, size, dir, attrs); iommu_dma_unmap_phys(dev, addr, size, dir, attrs);
else if (ops->unmap_phys) else if (ops->unmap_phys)
@@ -379,7 +388,7 @@ void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
BUG_ON(!valid_dma_direction(dir)); BUG_ON(!valid_dma_direction(dir));
if (dma_map_direct(dev, ops)) if (dma_map_direct(dev, ops))
dma_direct_sync_single_for_cpu(dev, addr, size, dir); dma_direct_sync_single_for_cpu(dev, addr, size, dir, true);
else if (use_dma_iommu(dev)) else if (use_dma_iommu(dev))
iommu_dma_sync_single_for_cpu(dev, addr, size, dir); iommu_dma_sync_single_for_cpu(dev, addr, size, dir);
else if (ops->sync_single_for_cpu) else if (ops->sync_single_for_cpu)

View File

@@ -868,6 +868,9 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
if (orig_addr == INVALID_PHYS_ADDR) if (orig_addr == INVALID_PHYS_ADDR)
return; return;
if (dir == DMA_FROM_DEVICE && !dev_is_dma_coherent(dev))
arch_sync_dma_flush();
/* /*
* It's valid for tlb_offset to be negative. This can happen when the * It's valid for tlb_offset to be negative. This can happen when the
* "offset" returned by swiotlb_align_offset() is non-zero, and the * "offset" returned by swiotlb_align_offset() is non-zero, and the
@@ -1612,8 +1615,10 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
return DMA_MAPPING_ERROR; return DMA_MAPPING_ERROR;
} }
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
arch_sync_dma_for_device(swiotlb_addr, size, dir); arch_sync_dma_for_device(swiotlb_addr, size, dir);
arch_sync_dma_flush();
}
return dma_addr; return dma_addr;
} }
@@ -1872,26 +1877,25 @@ static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
dev->dma_io_tlb_mem = &io_tlb_default_mem; dev->dma_io_tlb_mem = &io_tlb_default_mem;
} }
static const struct reserved_mem_ops rmem_swiotlb_ops = { static int __init rmem_swiotlb_setup(unsigned long node,
.device_init = rmem_swiotlb_device_init, struct reserved_mem *rmem)
.device_release = rmem_swiotlb_device_release,
};
static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
{ {
unsigned long node = rmem->fdt_node;
if (of_get_flat_dt_prop(node, "reusable", NULL) || if (of_get_flat_dt_prop(node, "reusable", NULL) ||
of_get_flat_dt_prop(node, "linux,cma-default", NULL) || of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
of_get_flat_dt_prop(node, "linux,dma-default", NULL) || of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
of_get_flat_dt_prop(node, "no-map", NULL)) of_get_flat_dt_prop(node, "no-map", NULL))
return -EINVAL; return -EINVAL;
rmem->ops = &rmem_swiotlb_ops;
pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n", pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n",
&rmem->base, (unsigned long)rmem->size / SZ_1M); &rmem->base, (unsigned long)rmem->size / SZ_1M);
return 0; return 0;
} }
RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup); static const struct reserved_mem_ops rmem_swiotlb_ops = {
.node_init = rmem_swiotlb_setup,
.device_init = rmem_swiotlb_device_init,
.device_release = rmem_swiotlb_device_release,
};
RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", &rmem_swiotlb_ops);
#endif /* CONFIG_DMA_RESTRICTED_POOL */ #endif /* CONFIG_DMA_RESTRICTED_POOL */

View File

@@ -52,6 +52,7 @@ const char *cma_get_name(const struct cma *cma)
{ {
return cma->name; return cma->name;
} }
EXPORT_SYMBOL_GPL(cma_get_name);
static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
unsigned int align_order) unsigned int align_order)
@@ -951,6 +952,7 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
return page; return page;
} }
EXPORT_SYMBOL_GPL(cma_alloc);
static struct cma_memrange *find_cma_memrange(struct cma *cma, static struct cma_memrange *find_cma_memrange(struct cma *cma,
const struct page *pages, unsigned long count) const struct page *pages, unsigned long count)
@@ -1030,6 +1032,7 @@ bool cma_release(struct cma *cma, const struct page *pages,
return true; return true;
} }
EXPORT_SYMBOL_GPL(cma_release);
bool cma_release_frozen(struct cma *cma, const struct page *pages, bool cma_release_frozen(struct cma *cma, const struct page *pages,
unsigned long count) unsigned long count)

View File

@@ -20,12 +20,19 @@ static char *directions[] = {
"FROM_DEVICE", "FROM_DEVICE",
}; };
static char *mode[] = {
"SINGLE_MODE",
"SG_MODE",
};
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
struct map_benchmark map; struct map_benchmark map;
int fd, opt; int fd, opt;
/* default single thread, run 20 seconds on NUMA_NO_NODE */ /* default single thread, run 20 seconds on NUMA_NO_NODE */
int threads = 1, seconds = 20, node = -1; int threads = 1, seconds = 20, node = -1;
/* default single map mode */
int map_mode = DMA_MAP_BENCH_SINGLE_MODE;
/* default dma mask 32bit, bidirectional DMA */ /* default dma mask 32bit, bidirectional DMA */
int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL; int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
/* default granule 1 PAGESIZE */ /* default granule 1 PAGESIZE */
@@ -33,7 +40,7 @@ int main(int argc, char **argv)
int cmd = DMA_MAP_BENCHMARK; int cmd = DMA_MAP_BENCHMARK;
while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) { while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:m:")) != -1) {
switch (opt) { switch (opt) {
case 't': case 't':
threads = atoi(optarg); threads = atoi(optarg);
@@ -56,11 +63,20 @@ int main(int argc, char **argv)
case 'g': case 'g':
granule = atoi(optarg); granule = atoi(optarg);
break; break;
case 'm':
map_mode = atoi(optarg);
break;
default: default:
return -1; return -1;
} }
} }
if (map_mode < 0 || map_mode >= DMA_MAP_BENCH_MODE_MAX) {
fprintf(stderr, "invalid map mode, SINGLE_MODE:%d, SG_MODE: %d\n",
DMA_MAP_BENCH_SINGLE_MODE, DMA_MAP_BENCH_SG_MODE);
exit(1);
}
if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) { if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) {
fprintf(stderr, "invalid number of threads, must be in 1-%d\n", fprintf(stderr, "invalid number of threads, must be in 1-%d\n",
DMA_MAP_MAX_THREADS); DMA_MAP_MAX_THREADS);
@@ -110,14 +126,15 @@ int main(int argc, char **argv)
map.dma_dir = dir; map.dma_dir = dir;
map.dma_trans_ns = xdelay; map.dma_trans_ns = xdelay;
map.granule = granule; map.granule = granule;
map.map_mode = map_mode;
if (ioctl(fd, cmd, &map)) { if (ioctl(fd, cmd, &map)) {
perror("ioctl"); perror("ioctl");
exit(1); exit(1);
} }
printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n", printf("dma mapping benchmark(%s): threads:%d seconds:%d node:%d dir:%s granule:%d\n",
threads, seconds, node, directions[dir], granule); mode[map_mode], threads, seconds, node, directions[dir], granule);
printf("average map latency(us):%.1f standard deviation:%.1f\n", printf("average map latency(us):%.1f standard deviation:%.1f\n",
map.avg_map_100ns/10.0, map.map_stddev/10.0); map.avg_map_100ns/10.0, map.map_stddev/10.0);
printf("average unmap latency(us):%.1f standard deviation:%.1f\n", printf("average unmap latency(us):%.1f standard deviation:%.1f\n",