diff --git a/Documentation/userspace-api/dma-buf-heaps.rst b/Documentation/userspace-api/dma-buf-heaps.rst index 05445c83b79a..f56b743cdb36 100644 --- a/Documentation/userspace-api/dma-buf-heaps.rst +++ b/Documentation/userspace-api/dma-buf-heaps.rst @@ -16,6 +16,13 @@ following heaps: - The ``system`` heap allocates virtually contiguous, cacheable, buffers. + - The ``system_cc_shared`` heap allocates virtually contiguous, cacheable, + buffers using shared (decrypted) memory. It is only present on + confidential computing (CoCo) VMs where memory encryption is active + (e.g., AMD SEV, Intel TDX). The allocated pages have the encryption + bit cleared, making them accessible for device DMA without TDISP + support. On non-CoCo VM configurations, this heap is not registered. + - The ``default_cma_region`` heap allocates physically contiguous, cacheable, buffers. Only present if a CMA region is present. Such a region is usually created either through the kernel commandline diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 94bf7cc43063..a0af483c0488 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -54,6 +54,7 @@ config ARM64 select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SYNC_DMA_FOR_CPU + select ARCH_HAS_BATCHED_DMA_SYNC select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_ZONE_DMA_SET if EXPERT diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 9d7c9ae5ac96..effae53e9739 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -371,14 +371,13 @@ alternative_endif * [start, end) with dcache line size explicitly provided. * * op: operation passed to dc instruction - * domain: domain used in dsb instruction * start: starting virtual address of the region * end: end virtual address of the region * linesz: dcache line size * fixup: optional label to branch to on user fault * Corrupts: start, end, tmp */ - .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup + .macro dcache_by_myline_op_nosync op, start, end, linesz, tmp, fixup sub \tmp, \linesz, #1 bic \start, \start, \tmp alternative_if ARM64_WORKAROUND_4311569 @@ -412,14 +411,28 @@ alternative_if ARM64_WORKAROUND_4311569 cbnz \start, .Ldcache_op\@ .endif alternative_else_nop_endif - dsb \domain _cond_uaccess_extable .Ldcache_op\@, \fixup .endm /* * Macro to perform a data cache maintenance for the interval - * [start, end) + * [start, end) without waiting for completion + * + * op: operation passed to dc instruction + * start: starting virtual address of the region + * end: end virtual address of the region + * fixup: optional label to branch to on user fault + * Corrupts: start, end, tmp1, tmp2 + */ + .macro dcache_by_line_op_nosync op, start, end, tmp1, tmp2, fixup + dcache_line_size \tmp1, \tmp2 + dcache_by_myline_op_nosync \op, \start, \end, \tmp1, \tmp2, \fixup + .endm + +/* + * Macro to perform a data cache maintenance for the interval + * [start, end) and wait for completion * * op: operation passed to dc instruction * domain: domain used in dsb instruction @@ -429,8 +442,8 @@ alternative_else_nop_endif * Corrupts: start, end, tmp1, tmp2 */ .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup - dcache_line_size \tmp1, \tmp2 - dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup + dcache_by_line_op_nosync \op, \start, \end, \tmp1, \tmp2, \fixup + dsb \domain .endm /* diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index dd2c8586a725..10a7ffadee3d 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -87,6 +87,11 @@ int cache_line_size(void); #define dma_get_cache_alignment cache_line_size +static inline void arch_sync_dma_flush(void) +{ + dsb(sy); +} + /* Compress a u64 MPIDR value into 32 bits. */ static inline u64 arch_compact_of_hwid(u64 id) { diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 28ab96e808ef..382b4ac3734d 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -74,6 +74,8 @@ extern void icache_inval_pou(unsigned long start, unsigned long end); extern void dcache_clean_inval_poc(unsigned long start, unsigned long end); extern void dcache_inval_poc(unsigned long start, unsigned long end); extern void dcache_clean_poc(unsigned long start, unsigned long end); +extern void dcache_inval_poc_nosync(unsigned long start, unsigned long end); +extern void dcache_clean_poc_nosync(unsigned long start, unsigned long end); extern void dcache_clean_pop(unsigned long start, unsigned long end); extern void dcache_clean_pou(unsigned long start, unsigned long end); extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end); diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 413f899e4ac6..6cb4209f5dab 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -64,7 +64,8 @@ SYM_CODE_START(arm64_relocate_new_kernel) mov x19, x13 copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8 add x1, x19, #PAGE_SIZE - dcache_by_myline_op civac, sy, x19, x1, x15, x20 + dcache_by_myline_op_nosync civac, x19, x1, x15, x20 + dsb sy b .Lnext .Ltest_indirection: tbz x16, IND_INDIRECTION_BIT, .Ltest_destination diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 503567c864fd..ab75c050f559 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -132,17 +132,7 @@ alternative_else_nop_endif ret SYM_FUNC_END(dcache_clean_pou) -/* - * dcache_inval_poc(start, end) - * - * Ensure that any D-cache lines for the interval [start, end) - * are invalidated. Any partial lines at the ends of the interval are - * also cleaned to PoC to prevent data loss. - * - * - start - kernel start address of region - * - end - kernel end address of region - */ -SYM_FUNC_START(__pi_dcache_inval_poc) +.macro __dcache_inval_poc_nosync dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -158,11 +148,41 @@ SYM_FUNC_START(__pi_dcache_inval_poc) 3: add x0, x0, x2 cmp x0, x1 b.lo 2b +.endm + +/* + * dcache_inval_poc(start, end) + * + * Ensure that any D-cache lines for the interval [start, end) + * are invalidated. Any partial lines at the ends of the interval are + * also cleaned to PoC to prevent data loss. + * + * - start - kernel start address of region + * - end - kernel end address of region + */ +SYM_FUNC_START(__pi_dcache_inval_poc) + __dcache_inval_poc_nosync dsb sy ret SYM_FUNC_END(__pi_dcache_inval_poc) SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc) +/* + * dcache_inval_poc_nosync(start, end) + * + * Issue the instructions of D-cache lines for the interval [start, end) + * for invalidation. Not necessarily cleaned to PoC till an explicit dsb + * sy is issued later + * + * - start - kernel start address of region + * - end - kernel end address of region + */ +SYM_FUNC_START(__pi_dcache_inval_poc_nosync) + __dcache_inval_poc_nosync + ret +SYM_FUNC_END(__pi_dcache_inval_poc_nosync) +SYM_FUNC_ALIAS(dcache_inval_poc_nosync, __pi_dcache_inval_poc_nosync) + /* * dcache_clean_poc(start, end) * @@ -178,6 +198,21 @@ SYM_FUNC_START(__pi_dcache_clean_poc) SYM_FUNC_END(__pi_dcache_clean_poc) SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc) +/* + * dcache_clean_poc_nosync(start, end) + * + * Issue the instructions of D-cache lines for the interval [start, end). + * not necessarily cleaned to the PoC till an explicit dsb sy afterward. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +SYM_FUNC_START(__pi_dcache_clean_poc_nosync) + dcache_by_line_op_nosync cvac, x0, x1, x2, x3 + ret +SYM_FUNC_END(__pi_dcache_clean_poc_nosync) +SYM_FUNC_ALIAS(dcache_clean_poc_nosync, __pi_dcache_clean_poc_nosync) + /* * dcache_clean_pop(start, end) * diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index b2b5792b2caa..ae1ae0280eef 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -17,7 +17,7 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, { unsigned long start = (unsigned long)phys_to_virt(paddr); - dcache_clean_poc(start, start + size); + dcache_clean_poc_nosync(start, start + size); } void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, @@ -28,7 +28,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, if (dir == DMA_TO_DEVICE) return; - dcache_inval_poc(start, start + size); + dcache_inval_poc_nosync(start, start + size); } void arch_dma_prep_coherent(struct page *page, size_t size) diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index 92865786cfc9..a359aac45579 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -30,19 +29,6 @@ #define DEFAULT_CMA_NAME "default_cma_region" -static struct cma *dma_areas[MAX_CMA_AREAS] __initdata; -static unsigned int dma_areas_num __initdata; - -int __init dma_heap_cma_register_heap(struct cma *cma) -{ - if (dma_areas_num >= ARRAY_SIZE(dma_areas)) - return -EINVAL; - - dma_areas[dma_areas_num++] = cma; - - return 0; -} - struct cma_heap { struct dma_heap *heap; struct cma *cma; @@ -411,6 +397,7 @@ static int __init __add_cma_heap(struct cma *cma, const char *name) static int __init add_cma_heaps(void) { struct cma *default_cma = dev_get_cma_area(NULL); + struct cma *cma; unsigned int i; int ret; @@ -420,9 +407,7 @@ static int __init add_cma_heaps(void) return ret; } - for (i = 0; i < dma_areas_num; i++) { - struct cma *cma = dma_areas[i]; - + for (i = 0; (cma = dma_contiguous_get_area_by_idx(i)) != NULL; i++) { ret = __add_cma_heap(cma, cma_get_name(cma)); if (ret) { pr_warn("Failed to add CMA heap %s", cma_get_name(cma)); diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index b3650d8fd651..03c2b87cb111 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -10,17 +10,25 @@ * Andrew F. Davis */ +#include #include #include #include #include #include +#include #include +#include #include +#include #include #include #include +struct system_heap_priv { + bool cc_shared; +}; + struct system_heap_buffer { struct dma_heap *heap; struct list_head attachments; @@ -29,6 +37,7 @@ struct system_heap_buffer { struct sg_table sg_table; int vmap_cnt; void *vaddr; + bool cc_shared; }; struct dma_heap_attachment { @@ -36,6 +45,7 @@ struct dma_heap_attachment { struct sg_table table; struct list_head list; bool mapped; + bool cc_shared; }; #define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO) @@ -52,6 +62,34 @@ static gfp_t order_flags[] = {HIGH_ORDER_GFP, HIGH_ORDER_GFP, LOW_ORDER_GFP}; static const unsigned int orders[] = {8, 4, 0}; #define NUM_ORDERS ARRAY_SIZE(orders) +static int system_heap_set_page_decrypted(struct page *page) +{ + unsigned long addr = (unsigned long)page_address(page); + unsigned int nr_pages = 1 << compound_order(page); + int ret; + + ret = set_memory_decrypted(addr, nr_pages); + if (ret) + pr_warn_ratelimited("dma-buf system heap: failed to decrypt page at %p\n", + page_address(page)); + + return ret; +} + +static int system_heap_set_page_encrypted(struct page *page) +{ + unsigned long addr = (unsigned long)page_address(page); + unsigned int nr_pages = 1 << compound_order(page); + int ret; + + ret = set_memory_encrypted(addr, nr_pages); + if (ret) + pr_warn_ratelimited("dma-buf system heap: failed to re-encrypt page at %p, leaking memory\n", + page_address(page)); + + return ret; +} + static int dup_sg_table(struct sg_table *from, struct sg_table *to) { struct scatterlist *sg, *new_sg; @@ -90,6 +128,7 @@ static int system_heap_attach(struct dma_buf *dmabuf, a->dev = attachment->dev; INIT_LIST_HEAD(&a->list); a->mapped = false; + a->cc_shared = buffer->cc_shared; attachment->priv = a; @@ -119,9 +158,11 @@ static struct sg_table *system_heap_map_dma_buf(struct dma_buf_attachment *attac { struct dma_heap_attachment *a = attachment->priv; struct sg_table *table = &a->table; + unsigned long attrs; int ret; - ret = dma_map_sgtable(attachment->dev, table, direction, 0); + attrs = a->cc_shared ? DMA_ATTR_CC_SHARED : 0; + ret = dma_map_sgtable(attachment->dev, table, direction, attrs); if (ret) return ERR_PTR(ret); @@ -188,8 +229,13 @@ static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) unsigned long addr = vma->vm_start; unsigned long pgoff = vma->vm_pgoff; struct scatterlist *sg; + pgprot_t prot; int i, ret; + prot = vma->vm_page_prot; + if (buffer->cc_shared) + prot = pgprot_decrypted(prot); + for_each_sgtable_sg(table, sg, i) { unsigned long n = sg->length >> PAGE_SHIFT; @@ -206,8 +252,7 @@ static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) if (addr + size > vma->vm_end) size = vma->vm_end - addr; - ret = remap_pfn_range(vma, addr, page_to_pfn(page), - size, vma->vm_page_prot); + ret = remap_pfn_range(vma, addr, page_to_pfn(page), size, prot); if (ret) return ret; @@ -225,6 +270,7 @@ static void *system_heap_do_vmap(struct system_heap_buffer *buffer) struct page **pages = vmalloc(sizeof(struct page *) * npages); struct page **tmp = pages; struct sg_page_iter piter; + pgprot_t prot; void *vaddr; if (!pages) @@ -235,7 +281,10 @@ static void *system_heap_do_vmap(struct system_heap_buffer *buffer) *tmp++ = sg_page_iter_page(&piter); } - vaddr = vmap(pages, npages, VM_MAP, PAGE_KERNEL); + prot = PAGE_KERNEL; + if (buffer->cc_shared) + prot = pgprot_decrypted(prot); + vaddr = vmap(pages, npages, VM_MAP, prot); vfree(pages); if (!vaddr) @@ -296,6 +345,14 @@ static void system_heap_dma_buf_release(struct dma_buf *dmabuf) for_each_sgtable_sg(table, sg, i) { struct page *page = sg_page(sg); + /* + * Intentionally leak pages that cannot be re-encrypted + * to prevent shared memory from being reused. + */ + if (buffer->cc_shared && + system_heap_set_page_encrypted(page)) + continue; + __free_pages(page, compound_order(page)); } sg_free_table(table); @@ -347,6 +404,8 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap, DEFINE_DMA_BUF_EXPORT_INFO(exp_info); unsigned long size_remaining = len; unsigned int max_order = orders[0]; + struct system_heap_priv *priv = dma_heap_get_drvdata(heap); + bool cc_shared = priv->cc_shared; struct dma_buf *dmabuf; struct sg_table *table; struct scatterlist *sg; @@ -362,6 +421,7 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap, mutex_init(&buffer->lock); buffer->heap = heap; buffer->len = len; + buffer->cc_shared = cc_shared; INIT_LIST_HEAD(&pages); i = 0; @@ -396,6 +456,14 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap, list_del(&page->lru); } + if (cc_shared) { + for_each_sgtable_sg(table, sg, i) { + ret = system_heap_set_page_decrypted(sg_page(sg)); + if (ret) + goto free_pages; + } + } + /* create the dmabuf */ exp_info.exp_name = dma_heap_get_name(heap); exp_info.ops = &system_heap_buf_ops; @@ -413,6 +481,13 @@ free_pages: for_each_sgtable_sg(table, sg, i) { struct page *p = sg_page(sg); + /* + * Intentionally leak pages that cannot be re-encrypted + * to prevent shared memory from being reused. + */ + if (buffer->cc_shared && + system_heap_set_page_encrypted(p)) + continue; __free_pages(p, compound_order(p)); } sg_free_table(table); @@ -428,6 +503,14 @@ static const struct dma_heap_ops system_heap_ops = { .allocate = system_heap_allocate, }; +static struct system_heap_priv system_heap_priv = { + .cc_shared = false, +}; + +static struct system_heap_priv system_heap_cc_shared_priv = { + .cc_shared = true, +}; + static int __init system_heap_create(void) { struct dma_heap_export_info exp_info; @@ -435,8 +518,18 @@ static int __init system_heap_create(void) exp_info.name = "system"; exp_info.ops = &system_heap_ops; - exp_info.priv = NULL; + exp_info.priv = &system_heap_priv; + sys_heap = dma_heap_add(&exp_info); + if (IS_ERR(sys_heap)) + return PTR_ERR(sys_heap); + + if (IS_ENABLED(CONFIG_HIGHMEM) || + !cc_platform_has(CC_ATTR_MEM_ENCRYPT)) + return 0; + + exp_info.name = "system_cc_shared"; + exp_info.priv = &system_heap_cc_shared_priv; sys_heap = dma_heap_add(&exp_info); if (IS_ERR(sys_heap)) return PTR_ERR(sys_heap); diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 095235334eaf..54d96e847f16 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1106,8 +1106,10 @@ void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, return; phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - if (!dev_is_dma_coherent(dev)) + if (!dev_is_dma_coherent(dev)) { arch_sync_dma_for_cpu(phys, size, dir); + arch_sync_dma_flush(); + } swiotlb_sync_single_for_cpu(dev, phys, size, dir); } @@ -1123,8 +1125,10 @@ void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); swiotlb_sync_single_for_device(dev, phys, size, dir); - if (!dev_is_dma_coherent(dev)) + if (!dev_is_dma_coherent(dev)) { arch_sync_dma_for_device(phys, size, dir); + arch_sync_dma_flush(); + } } void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, @@ -1133,13 +1137,15 @@ void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, struct scatterlist *sg; int i; - if (sg_dma_is_swiotlb(sgl)) + if (sg_dma_is_swiotlb(sgl)) { for_each_sg(sgl, sg, nelems, i) iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg), sg->length, dir); - else if (!dev_is_dma_coherent(dev)) + } else if (!dev_is_dma_coherent(dev)) { for_each_sg(sgl, sg, nelems, i) arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); + arch_sync_dma_flush(); + } } void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, @@ -1148,14 +1154,16 @@ void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, struct scatterlist *sg; int i; - if (sg_dma_is_swiotlb(sgl)) + if (sg_dma_is_swiotlb(sgl)) { for_each_sg(sgl, sg, nelems, i) iommu_dma_sync_single_for_device(dev, sg_dma_address(sg), sg->length, dir); - else if (!dev_is_dma_coherent(dev)) + } else if (!dev_is_dma_coherent(dev)) { for_each_sg(sgl, sg, nelems, i) arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); + arch_sync_dma_flush(); + } } static phys_addr_t iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys, @@ -1230,8 +1238,10 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, return DMA_MAPPING_ERROR; } - if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) + if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) { arch_sync_dma_for_device(phys, size, dir); + arch_sync_dma_flush(); + } iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); if (iova == DMA_MAPPING_ERROR && @@ -1254,8 +1264,10 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle, if (WARN_ON(!phys)) return; - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) { arch_sync_dma_for_cpu(phys, size, dir); + arch_sync_dma_flush(); + } __iommu_dma_unmap(dev, dma_handle, size); @@ -2004,6 +2016,8 @@ int dma_iova_sync(struct device *dev, struct dma_iova_state *state, dma_addr_t addr = state->addr + offset; size_t iova_start_pad = iova_offset(iovad, addr); + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_flush(); return iommu_sync_map(domain, addr - iova_start_pad, iova_align(iovad, size + iova_start_pad)); } @@ -2017,6 +2031,8 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev, struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; size_t iova_start_pad = iova_offset(iovad, addr); + bool need_sync_dma = !dev_is_dma_coherent(dev) && + !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)); dma_addr_t end = addr + size; do { @@ -2040,6 +2056,9 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev, addr += len; iova_start_pad = 0; } while (addr < end); + + if (need_sync_dma) + arch_sync_dma_flush(); } static void __iommu_dma_iova_unlink(struct device *dev, diff --git a/drivers/memory/tegra/tegra210-emc-table.c b/drivers/memory/tegra/tegra210-emc-table.c index 34a8785d2861..4b3c478b2743 100644 --- a/drivers/memory/tegra/tegra210-emc-table.c +++ b/drivers/memory/tegra/tegra210-emc-table.c @@ -70,19 +70,20 @@ static void tegra210_emc_table_device_release(struct reserved_mem *rmem, memunmap(timings); } -static const struct reserved_mem_ops tegra210_emc_table_ops = { - .device_init = tegra210_emc_table_device_init, - .device_release = tegra210_emc_table_device_release, -}; - -static int tegra210_emc_table_init(struct reserved_mem *rmem) +static int tegra210_emc_table_init(unsigned long node, + struct reserved_mem *rmem) { pr_debug("Tegra210 EMC table at %pa, size %lu bytes\n", &rmem->base, (unsigned long)rmem->size); - rmem->ops = &tegra210_emc_table_ops; - return 0; } + +static const struct reserved_mem_ops tegra210_emc_table_ops = { + .node_init = tegra210_emc_table_init, + .device_init = tegra210_emc_table_device_init, + .device_release = tegra210_emc_table_device_release, +}; + RESERVEDMEM_OF_DECLARE(tegra210_emc_table, "nvidia,tegra210-emc-table", - tegra210_emc_table_init); + &tegra210_emc_table_ops); diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 2967e4aff807..104e697bee7b 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1295,7 +1295,7 @@ void __init unflatten_device_tree(void) void *fdt = initial_boot_params; /* Save the statically-placed regions in the reserved_mem array */ - fdt_scan_reserved_mem_reg_nodes(); + fdt_scan_reserved_mem_late(); /* Populate an empty root node when bootloader doesn't provide one */ if (!fdt) { diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index df0bb00349e0..0ae16da066e2 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -186,7 +186,7 @@ static inline struct device_node *__of_get_dma_parent(const struct device_node * #endif int fdt_scan_reserved_mem(void); -void __init fdt_scan_reserved_mem_reg_nodes(void); +void __init fdt_scan_reserved_mem_late(void); bool of_fdt_device_is_available(const void *blob, unsigned long node); diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 1fd28f805610..8d5777cb5d1b 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -24,8 +24,6 @@ #include #include #include -#include -#include #include "of_private.h" @@ -104,30 +102,12 @@ static void __init alloc_reserved_mem_array(void) reserved_mem = new_array; } -static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem); -/* - * fdt_reserved_mem_save_node() - save fdt node for second pass initialization - */ -static void __init fdt_reserved_mem_save_node(unsigned long node, const char *uname, - phys_addr_t base, phys_addr_t size) -{ - struct reserved_mem *rmem = &reserved_mem[reserved_mem_count]; - - if (reserved_mem_count == total_reserved_mem_cnt) { - pr_err("not enough space for all defined regions.\n"); - return; - } - - rmem->fdt_node = node; - rmem->name = uname; - rmem->base = base; - rmem->size = size; - - /* Call the region specific initialization function */ - fdt_init_reserved_mem_node(rmem); - - reserved_mem_count++; -} +static void fdt_init_reserved_mem_node(unsigned long node, const char *uname, + phys_addr_t base, phys_addr_t size); +static int fdt_validate_reserved_mem_node(unsigned long node, + phys_addr_t *align); +static int fdt_fixup_reserved_mem_node(unsigned long node, + phys_addr_t base, phys_addr_t size); static int __init early_init_dt_reserve_memory(phys_addr_t base, phys_addr_t size, bool nomap) @@ -154,21 +134,19 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, const char *uname) { phys_addr_t base, size; - int i, len; + int i, len, err; const __be32 *prop; - bool nomap, default_cma; + bool nomap; prop = of_flat_dt_get_addr_size_prop(node, "reg", &len); if (!prop) return -ENOENT; nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; - default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL); - if (default_cma && cma_skip_dt_default_reserved_mem()) { - pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n"); - return -EINVAL; - } + err = fdt_validate_reserved_mem_node(node, NULL); + if (err && err != -ENODEV) + return err; for (i = 0; i < len; i++) { u64 b, s; @@ -179,10 +157,7 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, size = s; if (size && early_init_dt_reserve_memory(base, size, nomap) == 0) { - /* Architecture specific contiguous memory fixup. */ - if (of_flat_dt_is_compatible(node, "shared-dma-pool") && - of_get_flat_dt_prop(node, "reusable", NULL)) - dma_contiguous_early_fixup(base, size); + fdt_fixup_reserved_mem_node(node, base, size); pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n", uname, &base, (unsigned long)(size / SZ_1M)); } else { @@ -216,19 +191,66 @@ static int __init __reserved_mem_check_root(unsigned long node) return 0; } -static void __init __rmem_check_for_overlap(void); +static int __init __rmem_cmp(const void *a, const void *b) +{ + const struct reserved_mem *ra = a, *rb = b; + + if (ra->base < rb->base) + return -1; + + if (ra->base > rb->base) + return 1; + + /* + * Put the dynamic allocations (address == 0, size == 0) before static + * allocations at address 0x0 so that overlap detection works + * correctly. + */ + if (ra->size < rb->size) + return -1; + if (ra->size > rb->size) + return 1; + + return 0; +} + +static void __init __rmem_check_for_overlap(void) +{ + int i; + + if (reserved_mem_count < 2) + return; + + sort(reserved_mem, reserved_mem_count, sizeof(reserved_mem[0]), + __rmem_cmp, NULL); + for (i = 0; i < reserved_mem_count - 1; i++) { + struct reserved_mem *this, *next; + + this = &reserved_mem[i]; + next = &reserved_mem[i + 1]; + + if (this->base + this->size > next->base) { + phys_addr_t this_end, next_end; + + this_end = this->base + this->size; + next_end = next->base + next->size; + pr_err("OVERLAP DETECTED!\n%s (%pa--%pa) overlaps with %s (%pa--%pa)\n", + this->name, &this->base, &this_end, + next->name, &next->base, &next_end); + } + } +} /** - * fdt_scan_reserved_mem_reg_nodes() - Store info for the "reg" defined - * reserved memory regions. + * fdt_scan_reserved_mem_late() - Scan FDT and initialize remaining reserved + * memory regions. * - * This function is used to scan through the DT and store the - * information for the reserved memory regions that are defined using - * the "reg" property. The region node number, name, base address, and - * size are all stored in the reserved_mem array by calling the - * fdt_reserved_mem_save_node() function. + * This function is used to scan again through the DT and initialize the + * "static" reserved memory regions, that are defined using the "reg" + * property. Each such region is then initialized with its specific init + * function and stored in the global reserved_mem array. */ -void __init fdt_scan_reserved_mem_reg_nodes(void) +void __init fdt_scan_reserved_mem_late(void) { const void *fdt = initial_boot_params; phys_addr_t base, size; @@ -253,23 +275,25 @@ void __init fdt_scan_reserved_mem_reg_nodes(void) fdt_for_each_subnode(child, fdt, node) { const char *uname; - bool default_cma = of_get_flat_dt_prop(child, "linux,cma-default", NULL); u64 b, s; + int ret; if (!of_fdt_device_is_available(fdt, child)) continue; - if (default_cma && cma_skip_dt_default_reserved_mem()) - continue; if (!of_flat_dt_get_addr_size(child, "reg", &b, &s)) continue; + ret = fdt_validate_reserved_mem_node(child, NULL); + if (ret && ret != -ENODEV) + continue; + base = b; size = s; if (size) { uname = fdt_get_name(fdt, child, NULL); - fdt_reserved_mem_save_node(child, uname, base, size); + fdt_init_reserved_mem_node(child, uname, base, size); } } @@ -280,7 +304,14 @@ void __init fdt_scan_reserved_mem_reg_nodes(void) static int __init __reserved_mem_alloc_size(unsigned long node, const char *uname); /* - * fdt_scan_reserved_mem() - scan a single FDT node for reserved memory + * fdt_scan_reserved_mem() - reserve and allocate memory occupied by + * reserved memory regions. + * + * This function is used to scan through the FDT and mark memory occupied + * by all static (defined by the "reg" property) reserved memory regions. + * Then memory for all dynamic regions (defined by size & alignment) is + * allocated, a region specific init function is called and region information + * is stored in the reserved_mem array. */ int __init fdt_scan_reserved_mem(void) { @@ -397,7 +428,7 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam phys_addr_t base = 0, align = 0, size; int i, len; const __be32 *prop; - bool nomap, default_cma; + bool nomap; int ret; prop = of_get_flat_dt_prop(node, "size", &len); @@ -421,19 +452,10 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam } nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; - default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL); - if (default_cma && cma_skip_dt_default_reserved_mem()) { - pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n"); - return -EINVAL; - } - - /* Need adjust the alignment to satisfy the CMA requirement */ - if (IS_ENABLED(CONFIG_CMA) - && of_flat_dt_is_compatible(node, "shared-dma-pool") - && of_get_flat_dt_prop(node, "reusable", NULL) - && !nomap) - align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES); + ret = fdt_validate_reserved_mem_node(node, &align); + if (ret && ret != -ENODEV) + return ret; prop = of_flat_dt_get_addr_size_prop(node, "alloc-ranges", &len); if (prop) { @@ -468,121 +490,151 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam uname, (unsigned long)(size / SZ_1M)); return -ENOMEM; } - /* Architecture specific contiguous memory fixup. */ - if (of_flat_dt_is_compatible(node, "shared-dma-pool") && - of_get_flat_dt_prop(node, "reusable", NULL)) - dma_contiguous_early_fixup(base, size); - /* Save region in the reserved_mem array */ - fdt_reserved_mem_save_node(node, uname, base, size); + + fdt_fixup_reserved_mem_node(node, base, size); + fdt_init_reserved_mem_node(node, uname, base, size); + return 0; } +extern const struct of_device_id __reservedmem_of_table[]; static const struct of_device_id __rmem_of_table_sentinel __used __section("__reservedmem_of_table_end"); -/* - * __reserved_mem_init_node() - call region specific reserved memory init code +/** + * fdt_fixup_reserved_mem_node() - call fixup function for a reserved memory node + * @node: FDT node to fixup + * @base: base address of the reserved memory region + * @size: size of the reserved memory region + * + * This function iterates through the reserved memory drivers and calls + * the node_fixup callback for the compatible entry matching the node. + * + * Return: 0 on success, -ENODEV if no compatible match found */ -static int __init __reserved_mem_init_node(struct reserved_mem *rmem) +static int __init fdt_fixup_reserved_mem_node(unsigned long node, + phys_addr_t base, phys_addr_t size) { - extern const struct of_device_id __reservedmem_of_table[]; const struct of_device_id *i; - int ret = -ENOENT; + int ret = -ENODEV; - for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) { - reservedmem_of_init_fn initfn = i->data; - const char *compat = i->compatible; + for (i = __reservedmem_of_table; ret == -ENODEV && + i < &__rmem_of_table_sentinel; i++) { + const struct reserved_mem_ops *ops = i->data; - if (!of_flat_dt_is_compatible(rmem->fdt_node, compat)) + if (!of_flat_dt_is_compatible(node, i->compatible)) continue; - ret = initfn(rmem); + if (ops->node_fixup) + ret = ops->node_fixup(node, base, size); + } + return ret; +} + +/** + * fdt_validate_reserved_mem_node() - validate a reserved memory node + * @node: FDT node to validate + * @align: pointer to store the validated alignment (may be modified by callback) + * + * This function iterates through the reserved memory drivers and calls + * the node_validate callback for the compatible entry matching the node. + * + * Return: 0 on success, -ENODEV if no compatible match found + */ +static int __init fdt_validate_reserved_mem_node(unsigned long node, phys_addr_t *align) +{ + const struct of_device_id *i; + int ret = -ENODEV; + + for (i = __reservedmem_of_table; ret == -ENODEV && + i < &__rmem_of_table_sentinel; i++) { + const struct reserved_mem_ops *ops = i->data; + + if (!of_flat_dt_is_compatible(node, i->compatible)) + continue; + + if (ops->node_validate) + ret = ops->node_validate(node, align); + } + return ret; +} + +/** + * __reserved_mem_init_node() - initialize a reserved memory region + * @rmem: reserved_mem structure to initialize + * @node: FDT node describing the reserved memory region + * + * This function iterates through the reserved memory drivers and calls the + * node_init callback for the compatible entry matching the node. On success, + * the operations pointer is stored in the reserved_mem structure. + * + * Return: 0 on success, -ENODEV if no compatible match found + */ +static int __init __reserved_mem_init_node(struct reserved_mem *rmem, + unsigned long node) +{ + const struct of_device_id *i; + int ret = -ENODEV; + + for (i = __reservedmem_of_table; ret == -ENODEV && + i < &__rmem_of_table_sentinel; i++) { + const struct reserved_mem_ops *ops = i->data; + const char *compat = i->compatible; + + if (!of_flat_dt_is_compatible(node, compat)) + continue; + + ret = ops->node_init(node, rmem); if (ret == 0) { + rmem->ops = ops; pr_info("initialized node %s, compatible id %s\n", rmem->name, compat); - break; + return ret; } } return ret; } -static int __init __rmem_cmp(const void *a, const void *b) -{ - const struct reserved_mem *ra = a, *rb = b; - - if (ra->base < rb->base) - return -1; - - if (ra->base > rb->base) - return 1; - - /* - * Put the dynamic allocations (address == 0, size == 0) before static - * allocations at address 0x0 so that overlap detection works - * correctly. - */ - if (ra->size < rb->size) - return -1; - if (ra->size > rb->size) - return 1; - - if (ra->fdt_node < rb->fdt_node) - return -1; - if (ra->fdt_node > rb->fdt_node) - return 1; - - return 0; -} - -static void __init __rmem_check_for_overlap(void) -{ - int i; - - if (reserved_mem_count < 2) - return; - - sort(reserved_mem, reserved_mem_count, sizeof(reserved_mem[0]), - __rmem_cmp, NULL); - for (i = 0; i < reserved_mem_count - 1; i++) { - struct reserved_mem *this, *next; - - this = &reserved_mem[i]; - next = &reserved_mem[i + 1]; - - if (this->base + this->size > next->base) { - phys_addr_t this_end, next_end; - - this_end = this->base + this->size; - next_end = next->base + next->size; - pr_err("OVERLAP DETECTED!\n%s (%pa--%pa) overlaps with %s (%pa--%pa)\n", - this->name, &this->base, &this_end, - next->name, &next->base, &next_end); - } - } -} - /** * fdt_init_reserved_mem_node() - Initialize a reserved memory region - * @rmem: reserved_mem struct of the memory region to be initialized. + * @node: fdt node of the initialized region + * @uname: name of the reserved memory node + * @base: base address of the reserved memory region + * @size: size of the reserved memory region * - * This function is used to call the region specific initialization - * function for a reserved memory region. + * This function calls the region-specific initialization function for a + * reserved memory region and saves all region-specific data to the + * reserved_mem array to allow of_reserved_mem_lookup() to find it. */ -static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem) +static void __init fdt_init_reserved_mem_node(unsigned long node, const char *uname, + phys_addr_t base, phys_addr_t size) { - unsigned long node = rmem->fdt_node; int err = 0; bool nomap; + struct reserved_mem *rmem = &reserved_mem[reserved_mem_count]; + + if (reserved_mem_count == total_reserved_mem_cnt) { + pr_err("not enough space for all defined regions.\n"); + return; + } + + rmem->name = uname; + rmem->base = base; + rmem->size = size; + nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; - err = __reserved_mem_init_node(rmem); - if (err != 0 && err != -ENOENT) { + err = __reserved_mem_init_node(rmem, node); + if (err != 0 && err != -ENODEV) { pr_info("node %s compatible matching fail\n", rmem->name); + rmem->name = NULL; + if (nomap) memblock_clear_nomap(rmem->base, rmem->size); else memblock_phys_free(rmem->base, rmem->size); + return; } else { phys_addr_t end = rmem->base + rmem->size - 1; bool reusable = @@ -594,6 +646,8 @@ static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem) reusable ? "reusable" : "non-reusable", rmem->name ? rmem->name : "unknown"); } + + reserved_mem_count++; } struct rmem_assigned_device { diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 4a734ee38994..2cbf2b588f5b 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -262,10 +262,12 @@ static dma_addr_t xen_swiotlb_map_phys(struct device *dev, phys_addr_t phys, done: if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { - if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) + if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) { arch_sync_dma_for_device(phys, size, dir); - else + arch_sync_dma_flush(); + } else { xen_dma_sync_for_device(dev, dev_addr, size, dir); + } } return dev_addr; } @@ -287,10 +289,12 @@ static void xen_swiotlb_unmap_phys(struct device *hwdev, dma_addr_t dev_addr, BUG_ON(dir == DMA_NONE); if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { - if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) + if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) { arch_sync_dma_for_cpu(paddr, size, dir); - else + arch_sync_dma_flush(); + } else { xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir); + } } /* NOTE: We use dev_addr here, not paddr! */ @@ -308,10 +312,12 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, struct io_tlb_pool *pool; if (!dev_is_dma_coherent(dev)) { - if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) + if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) { arch_sync_dma_for_cpu(paddr, size, dir); - else + arch_sync_dma_flush(); + } else { xen_dma_sync_for_cpu(dev, dma_addr, size, dir); + } } pool = xen_swiotlb_find_pool(dev, dma_addr); @@ -331,10 +337,12 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr, __swiotlb_sync_single_for_device(dev, paddr, size, dir, pool); if (!dev_is_dma_coherent(dev)) { - if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) + if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) { arch_sync_dma_for_device(paddr, size, dir); - else + arch_sync_dma_flush(); + } else { xen_dma_sync_for_device(dev, dma_addr, size, dir); + } } } diff --git a/include/linux/cma.h b/include/linux/cma.h index d0793eaaadaa..8555d38a97b1 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -61,14 +61,4 @@ extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data) extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end); extern void cma_reserve_pages_on_error(struct cma *cma); - -#ifdef CONFIG_DMA_CMA -extern bool cma_skip_dt_default_reserved_mem(void); -#else -static inline bool cma_skip_dt_default_reserved_mem(void) -{ - return false; -} -#endif - #endif diff --git a/include/linux/dma-buf/heaps/cma.h b/include/linux/dma-buf/heaps/cma.h deleted file mode 100644 index e751479e21e7..000000000000 --- a/include/linux/dma-buf/heaps/cma.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef DMA_BUF_HEAP_CMA_H_ -#define DMA_BUF_HEAP_CMA_H_ - -struct cma; - -#ifdef CONFIG_DMABUF_HEAPS_CMA -int dma_heap_cma_register_heap(struct cma *cma); -#else -static inline int dma_heap_cma_register_heap(struct cma *cma) -{ - return 0; -} -#endif // CONFIG_DMABUF_HEAPS_CMA - -#endif // DMA_BUF_HEAP_CMA_H_ diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 60b63756df82..6a1832a73cad 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -91,14 +91,8 @@ static inline void set_dma_ops(struct device *dev, #endif /* CONFIG_ARCH_HAS_DMA_OPS */ #ifdef CONFIG_DMA_CMA -extern struct cma *dma_contiguous_default_area; - -static inline struct cma *dev_get_cma_area(struct device *dev) -{ - if (dev && dev->cma_area) - return dev->cma_area; - return dma_contiguous_default_area; -} +struct cma *dev_get_cma_area(struct device *dev); +struct cma *dma_contiguous_get_area_by_idx(unsigned int idx); void dma_contiguous_reserve(phys_addr_t addr_limit); int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, @@ -117,6 +111,10 @@ static inline struct cma *dev_get_cma_area(struct device *dev) { return NULL; } +static inline struct cma *dma_contiguous_get_area_by_idx(unsigned int idx) +{ + return NULL; +} static inline void dma_contiguous_reserve(phys_addr_t limit) { } @@ -147,9 +145,6 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, { __free_pages(page, get_order(size)); } -static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) -{ -} #endif /* CONFIG_DMA_CMA*/ #ifdef CONFIG_DMA_DECLARE_COHERENT @@ -361,6 +356,12 @@ static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, } #endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ +#ifndef CONFIG_ARCH_HAS_BATCHED_DMA_SYNC +static inline void arch_sync_dma_flush(void) +{ +} +#endif + #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL void arch_sync_dma_for_cpu_all(void); #else diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 99ef042ecdb4..db8ab24a54f4 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -9,7 +9,7 @@ #include #include -/** +/* * List of possible attributes associated with a DMA mapping. The semantics * of each attribute should be defined in Documentation/core-api/dma-attributes.rst. */ @@ -92,6 +92,16 @@ * flushing. */ #define DMA_ATTR_REQUIRE_COHERENT (1UL << 12) +/* + * DMA_ATTR_CC_SHARED: Indicates the DMA mapping is shared (decrypted) for + * confidential computing guests. For normal system memory the caller must have + * called set_memory_decrypted(), and pgprot_decrypted must be used when + * creating CPU PTEs for the mapping. The same shared semantic may be passed + * to the vIOMMU when it sets up the IOPTE. For MMIO use together with + * DMA_ATTR_MMIO to indicate shared MMIO. Unless DMA_ATTR_MMIO is provided + * a struct page is required. + */ +#define DMA_ATTR_CC_SHARED (1UL << 13) /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h index f573423359f4..e8b20b29fa68 100644 --- a/include/linux/of_reserved_mem.h +++ b/include/linux/of_reserved_mem.h @@ -11,7 +11,6 @@ struct resource; struct reserved_mem { const char *name; - unsigned long fdt_node; const struct reserved_mem_ops *ops; phys_addr_t base; phys_addr_t size; @@ -19,18 +18,20 @@ struct reserved_mem { }; struct reserved_mem_ops { + int (*node_validate)(unsigned long fdt_node, phys_addr_t *align); + int (*node_fixup)(unsigned long fdt_node, phys_addr_t base, + phys_addr_t size); + int (*node_init)(unsigned long fdt_node, struct reserved_mem *rmem); int (*device_init)(struct reserved_mem *rmem, struct device *dev); void (*device_release)(struct reserved_mem *rmem, struct device *dev); }; -typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem); - #ifdef CONFIG_OF_RESERVED_MEM -#define RESERVEDMEM_OF_DECLARE(name, compat, init) \ - _OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn) +#define RESERVEDMEM_OF_DECLARE(name, compat, ops) \ + _OF_DECLARE(reservedmem, name, compat, ops, struct reserved_mem_ops *) int of_reserved_mem_device_init_by_idx(struct device *dev, struct device_node *np, int idx); @@ -48,8 +49,9 @@ int of_reserved_mem_region_count(const struct device_node *np); #else -#define RESERVEDMEM_OF_DECLARE(name, compat, init) \ - _OF_DECLARE_STUB(reservedmem, name, compat, init, reservedmem_of_init_fn) +#define RESERVEDMEM_OF_DECLARE(name, compat, ops) \ + _OF_DECLARE_STUB(reservedmem, name, compat, ops, \ + struct reserved_mem_ops *) static inline int of_reserved_mem_device_init_by_idx(struct device *dev, struct device_node *np, int idx) diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index 63597b004424..31c9ddf72c9d 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -34,7 +34,8 @@ TRACE_DEFINE_ENUM(DMA_NONE); { DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \ { DMA_ATTR_MMIO, "MMIO" }, \ { DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \ - { DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" }) + { DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" }, \ + { DMA_ATTR_CC_SHARED, "CC_SHARED" }) DECLARE_EVENT_CLASS(dma_map, TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, diff --git a/include/uapi/linux/map_benchmark.h b/include/uapi/linux/map_benchmark.h index c2d91088a40d..4b17829a9f17 100644 --- a/include/uapi/linux/map_benchmark.h +++ b/include/uapi/linux/map_benchmark.h @@ -17,6 +17,12 @@ #define DMA_MAP_TO_DEVICE 1 #define DMA_MAP_FROM_DEVICE 2 +enum { + DMA_MAP_BENCH_SINGLE_MODE, + DMA_MAP_BENCH_SG_MODE, + DMA_MAP_BENCH_MODE_MAX +}; + struct map_benchmark { __u64 avg_map_100ns; /* average map latency in 100ns */ __u64 map_stddev; /* standard deviation of map latency */ @@ -28,8 +34,11 @@ struct map_benchmark { __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ __u32 dma_trans_ns; /* time for DMA transmission in ns */ - __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ - __u8 expansion[76]; /* For future use */ + __u32 granule; /* - SINGLE_MODE: number of pages mapped/unmapped per operation + * - SG_MODE: number of scatterlist entries (each maps one page) + */ + __u8 map_mode; /* the mode of dma map */ + __u8 expansion[75]; /* For future use */ }; #endif /* _UAPI_DMA_BENCHMARK_H */ diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 159900736f25..bfef21b4a9ae 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -72,6 +72,9 @@ config ARCH_HAS_DMA_PREP_COHERENT config ARCH_HAS_FORCE_DMA_UNENCRYPTED bool +config ARCH_HAS_BATCHED_DMA_SYNC + bool + # # Select this option if the architecture assumes DMA devices are coherent # by default. diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index 1147497bc512..bcdc0f76d2e8 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -362,17 +362,11 @@ static void rmem_dma_device_release(struct reserved_mem *rmem, dev->dma_mem = NULL; } -static const struct reserved_mem_ops rmem_dma_ops = { - .device_init = rmem_dma_device_init, - .device_release = rmem_dma_device_release, -}; -static int __init rmem_dma_setup(struct reserved_mem *rmem) +static int __init rmem_dma_setup(unsigned long node, struct reserved_mem *rmem) { - unsigned long node = rmem->fdt_node; - if (of_get_flat_dt_prop(node, "reusable", NULL)) - return -EINVAL; + return -ENODEV; #ifdef CONFIG_ARM if (!of_get_flat_dt_prop(node, "no-map", NULL)) { @@ -390,7 +384,6 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem) } #endif - rmem->ops = &rmem_dma_ops; pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n", &rmem->base, (unsigned long)rmem->size / SZ_1M); return 0; @@ -407,5 +400,11 @@ static int __init dma_init_reserved_memory(void) core_initcall(dma_init_reserved_memory); #endif /* CONFIG_DMA_GLOBAL_POOL */ -RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup); +static const struct reserved_mem_ops rmem_dma_ops = { + .node_init = rmem_dma_setup, + .device_init = rmem_dma_device_init, + .device_release = rmem_dma_device_release, +}; + +RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", &rmem_dma_ops); #endif diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index c56004d314dc..03f52bd17120 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -53,7 +52,38 @@ #define CMA_SIZE_MBYTES 0 #endif -struct cma *dma_contiguous_default_area; +static struct cma *dma_contiguous_areas[MAX_CMA_AREAS]; +static unsigned int dma_contiguous_areas_num; + +static int dma_contiguous_insert_area(struct cma *cma) +{ + if (dma_contiguous_areas_num >= ARRAY_SIZE(dma_contiguous_areas)) + return -EINVAL; + + dma_contiguous_areas[dma_contiguous_areas_num++] = cma; + + return 0; +} + +/** + * dma_contiguous_get_area_by_idx() - Get contiguous area at given index + * @idx: index of the area we query + * + * Queries for the contiguous area located at index @idx. + * + * Returns: + * A pointer to the requested contiguous area, or NULL otherwise. + */ +struct cma *dma_contiguous_get_area_by_idx(unsigned int idx) +{ + if (idx >= dma_contiguous_areas_num) + return NULL; + + return dma_contiguous_areas[idx]; +} +EXPORT_SYMBOL_GPL(dma_contiguous_get_area_by_idx); + +static struct cma *dma_contiguous_default_area; /* * Default global CMA area size can be defined in kernel's .config. @@ -91,15 +121,14 @@ static int __init early_cma(char *p) } early_param("cma", early_cma); -/* - * cma_skip_dt_default_reserved_mem - This is called from the - * reserved_mem framework to detect if the default cma region is being - * set by the "cma=" kernel parameter. - */ -bool __init cma_skip_dt_default_reserved_mem(void) +struct cma *dev_get_cma_area(struct device *dev) { - return size_cmdline != -1; + if (dev && dev->cma_area) + return dev->cma_area; + + return dma_contiguous_default_area; } +EXPORT_SYMBOL_GPL(dev_get_cma_area); #ifdef CONFIG_DMA_NUMA_CMA @@ -264,9 +293,24 @@ void __init dma_contiguous_reserve(phys_addr_t limit) if (ret) return; - ret = dma_heap_cma_register_heap(dma_contiguous_default_area); + /* + * We need to insert the new area in our list to avoid + * any inconsistencies between having the default area + * listed in the DT or not. + * + * The DT case is handled by rmem_cma_setup() and will + * always insert all its areas in our list. However, if + * it didn't run (because OF_RESERVED_MEM isn't set, or + * there's no DT region specified), then we don't have a + * default area yet, and no area in our list. + * + * This block creates the default area in such a case, + * but we also need to insert it in our list to avoid + * having a default area but an empty list. + */ + ret = dma_contiguous_insert_area(dma_contiguous_default_area); if (ret) - pr_warn("Couldn't register default CMA heap."); + pr_warn("Couldn't queue default CMA region for heap creation."); } } @@ -470,47 +514,89 @@ static void rmem_cma_device_release(struct reserved_mem *rmem, dev->cma_area = NULL; } -static const struct reserved_mem_ops rmem_cma_ops = { - .device_init = rmem_cma_device_init, - .device_release = rmem_cma_device_release, -}; - -static int __init rmem_cma_setup(struct reserved_mem *rmem) +static int __init __rmem_cma_verify_node(unsigned long node) { - unsigned long node = rmem->fdt_node; - bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL); - struct cma *cma; - int err; - if (!of_get_flat_dt_prop(node, "reusable", NULL) || of_get_flat_dt_prop(node, "no-map", NULL)) - return -EINVAL; + return -ENODEV; + + if (size_cmdline != -1 && + of_get_flat_dt_prop(node, "linux,cma-default", NULL)) { + pr_err("Skipping dt linux,cma-default node in favor for \"cma=\" kernel param.\n"); + return -EBUSY; + } + return 0; +} + +static int __init rmem_cma_validate(unsigned long node, phys_addr_t *align) +{ + int ret = __rmem_cma_verify_node(node); + + if (ret) + return ret; + + if (align) + *align = max_t(phys_addr_t, *align, CMA_MIN_ALIGNMENT_BYTES); + + return 0; +} + +static int __init rmem_cma_fixup(unsigned long node, phys_addr_t base, + phys_addr_t size) +{ + int ret = __rmem_cma_verify_node(node); + + if (ret) + return ret; + + /* Architecture specific contiguous memory fixup. */ + dma_contiguous_early_fixup(base, size); + return 0; +} + +static int __init rmem_cma_setup(unsigned long node, struct reserved_mem *rmem) +{ + bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL); + struct cma *cma; + int ret; + + ret = __rmem_cma_verify_node(node); + if (ret) + return ret; if (!IS_ALIGNED(rmem->base | rmem->size, CMA_MIN_ALIGNMENT_BYTES)) { pr_err("Reserved memory: incorrect alignment of CMA region\n"); return -EINVAL; } - err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma); - if (err) { + ret = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma); + if (ret) { pr_err("Reserved memory: unable to setup CMA region\n"); - return err; + return ret; } if (default_cma) dma_contiguous_default_area = cma; - rmem->ops = &rmem_cma_ops; rmem->priv = cma; pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n", &rmem->base, (unsigned long)rmem->size / SZ_1M); - err = dma_heap_cma_register_heap(cma); - if (err) - pr_warn("Couldn't register CMA heap."); + ret = dma_contiguous_insert_area(cma); + if (ret) + pr_warn("Couldn't store CMA reserved area."); return 0; } -RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup); + +static const struct reserved_mem_ops rmem_cma_ops = { + .node_validate = rmem_cma_validate, + .node_fixup = rmem_cma_fixup, + .node_init = rmem_cma_setup, + .device_init = rmem_cma_device_init, + .device_release = rmem_cma_device_release, +}; + +RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", &rmem_cma_ops); #endif diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 8f43a930716d..ec887f443741 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -406,6 +406,8 @@ void dma_direct_sync_sg_for_device(struct device *dev, arch_sync_dma_for_device(paddr, sg->length, dir); } + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_flush(); } #endif @@ -427,8 +429,10 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir); } - if (!dev_is_dma_coherent(dev)) + if (!dev_is_dma_coherent(dev)) { + arch_sync_dma_flush(); arch_sync_dma_for_cpu_all(); + } } /* @@ -440,14 +444,19 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, { struct scatterlist *sg; int i; + bool need_sync = false; for_each_sg(sgl, sg, nents, i) { - if (sg_dma_is_bus_address(sg)) + if (sg_dma_is_bus_address(sg)) { sg_dma_unmark_bus_address(sg); - else + } else { + need_sync = true; dma_direct_unmap_phys(dev, sg->dma_address, - sg_dma_len(sg), dir, attrs); + sg_dma_len(sg), dir, attrs, false); + } } + if (need_sync && !dev_is_dma_coherent(dev)) + arch_sync_dma_flush(); } #endif @@ -457,6 +466,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, struct pci_p2pdma_map_state p2pdma_state = {}; struct scatterlist *sg; int i, ret; + bool need_sync = false; for_each_sg(sgl, sg, nents, i) { switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) { @@ -468,8 +478,9 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, */ break; case PCI_P2PDMA_MAP_NONE: + need_sync = true; sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg), - sg->length, dir, attrs); + sg->length, dir, attrs, false); if (sg->dma_address == DMA_MAPPING_ERROR) { ret = -EIO; goto out_unmap; @@ -488,6 +499,8 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, sg_dma_len(sg) = sg->length; } + if (need_sync && !dev_is_dma_coherent(dev)) + arch_sync_dma_flush(); return nents; out_unmap: diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index 6184ff303f08..7140c208c123 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -60,17 +60,22 @@ static inline void dma_direct_sync_single_for_device(struct device *dev, swiotlb_sync_single_for_device(dev, paddr, size, dir); - if (!dev_is_dma_coherent(dev)) + if (!dev_is_dma_coherent(dev)) { arch_sync_dma_for_device(paddr, size, dir); + arch_sync_dma_flush(); + } } static inline void dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) + dma_addr_t addr, size_t size, enum dma_data_direction dir, + bool flush) { phys_addr_t paddr = dma_to_phys(dev, addr); if (!dev_is_dma_coherent(dev)) { arch_sync_dma_for_cpu(paddr, size, dir); + if (flush) + arch_sync_dma_flush(); arch_sync_dma_for_cpu_all(); } @@ -79,21 +84,29 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, static inline dma_addr_t dma_direct_map_phys(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, - unsigned long attrs) + unsigned long attrs, bool flush) { dma_addr_t dma_addr; if (is_swiotlb_force_bounce(dev)) { - if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) - return DMA_MAPPING_ERROR; + if (!(attrs & DMA_ATTR_CC_SHARED)) { + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) + return DMA_MAPPING_ERROR; - return swiotlb_map(dev, phys, size, dir, attrs); + return swiotlb_map(dev, phys, size, dir, attrs); + } + } else if (attrs & DMA_ATTR_CC_SHARED) { + return DMA_MAPPING_ERROR; } if (attrs & DMA_ATTR_MMIO) { dma_addr = phys; if (unlikely(!dma_capable(dev, dma_addr, size, false))) goto err_overflow; + } else if (attrs & DMA_ATTR_CC_SHARED) { + dma_addr = phys_to_dma_unencrypted(dev, phys); + if (unlikely(!dma_capable(dev, dma_addr, size, false))) + goto err_overflow; } else { dma_addr = phys_to_dma(dev, phys); if (unlikely(!dma_capable(dev, dma_addr, size, true)) || @@ -107,8 +120,11 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev, } if (!dev_is_dma_coherent(dev) && - !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) + !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) { arch_sync_dma_for_device(phys, size, dir); + if (flush) + arch_sync_dma_flush(); + } return dma_addr; err_overflow: @@ -120,7 +136,8 @@ err_overflow: } static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) + size_t size, enum dma_data_direction dir, unsigned long attrs, + bool flush) { phys_addr_t phys; @@ -130,7 +147,7 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr, phys = dma_to_phys(dev, addr); if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_direct_sync_single_for_cpu(dev, addr, size, dir); + dma_direct_sync_single_for_cpu(dev, addr, size, dir, flush); swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index 0f33b3ea7daf..29eeb5fdf199 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -5,6 +5,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -15,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -31,17 +33,219 @@ struct map_benchmark_data { atomic64_t loops; }; -static int map_benchmark_thread(void *data) +struct map_benchmark_ops { + void *(*prepare)(struct map_benchmark_data *map); + void (*unprepare)(void *mparam); + void (*initialize_data)(void *mparam); + int (*do_map)(void *mparam); + void (*do_unmap)(void *mparam); +}; + +struct dma_single_map_param { + struct device *dev; + dma_addr_t addr; + void *xbuf; + u32 npages; + u32 dma_dir; +}; + +static void *dma_single_map_benchmark_prepare(struct map_benchmark_data *map) { - void *buf; - dma_addr_t dma_addr; - struct map_benchmark_data *map = data; - int npages = map->bparam.granule; - u64 size = npages * PAGE_SIZE; + struct dma_single_map_param *params __free(kfree) = kzalloc(sizeof(*params), + GFP_KERNEL); + if (!params) + return NULL; + + params->npages = map->bparam.granule; + params->dma_dir = map->bparam.dma_dir; + params->dev = map->dev; + params->xbuf = alloc_pages_exact(params->npages * PAGE_SIZE, GFP_KERNEL); + if (!params->xbuf) + return NULL; + + return_ptr(params); +} + +static void dma_single_map_benchmark_unprepare(void *mparam) +{ + struct dma_single_map_param *params = mparam; + + free_pages_exact(params->xbuf, params->npages * PAGE_SIZE); + kfree(params); +} + +static void dma_single_map_benchmark_initialize_data(void *mparam) +{ + struct dma_single_map_param *params = mparam; + + /* + * for a non-coherent device, if we don't stain them in the + * cache, this will give an underestimate of the real-world + * overhead of BIDIRECTIONAL or TO_DEVICE mappings; + * 66 means everything goes well! 66 is lucky. + */ + if (params->dma_dir != DMA_FROM_DEVICE) + memset(params->xbuf, 0x66, params->npages * PAGE_SIZE); +} + +static int dma_single_map_benchmark_do_map(void *mparam) +{ + struct dma_single_map_param *params = mparam; + + params->addr = dma_map_single(params->dev, params->xbuf, + params->npages * PAGE_SIZE, params->dma_dir); + if (unlikely(dma_mapping_error(params->dev, params->addr))) { + pr_err("dma_map_single failed on %s\n", dev_name(params->dev)); + return -ENOMEM; + } + + return 0; +} + +static void dma_single_map_benchmark_do_unmap(void *mparam) +{ + struct dma_single_map_param *params = mparam; + + dma_unmap_single(params->dev, params->addr, + params->npages * PAGE_SIZE, params->dma_dir); +} + +static struct map_benchmark_ops dma_single_map_benchmark_ops = { + .prepare = dma_single_map_benchmark_prepare, + .unprepare = dma_single_map_benchmark_unprepare, + .initialize_data = dma_single_map_benchmark_initialize_data, + .do_map = dma_single_map_benchmark_do_map, + .do_unmap = dma_single_map_benchmark_do_unmap, +}; + +struct dma_sg_map_param { + struct sg_table sgt; + struct device *dev; + void **buf; + u32 npages; + u32 dma_dir; +}; + +static void *dma_sg_map_benchmark_prepare(struct map_benchmark_data *map) +{ + struct scatterlist *sg; + int i; + + struct dma_sg_map_param *params = kzalloc(sizeof(*params), GFP_KERNEL); + + if (!params) + return NULL; + /* + * Set the number of scatterlist entries based on the granule. + * In SG mode, 'granule' represents the number of scatterlist entries. + * Each scatterlist entry corresponds to a single page. + */ + params->npages = map->bparam.granule; + params->dma_dir = map->bparam.dma_dir; + params->dev = map->dev; + params->buf = kmalloc_array(params->npages, sizeof(*params->buf), + GFP_KERNEL); + if (!params->buf) + goto out; + + if (sg_alloc_table(¶ms->sgt, params->npages, GFP_KERNEL)) + goto free_buf; + + for_each_sgtable_sg(¶ms->sgt, sg, i) { + params->buf[i] = (void *)__get_free_page(GFP_KERNEL); + if (!params->buf[i]) + goto free_page; + + sg_set_buf(sg, params->buf[i], PAGE_SIZE); + } + + return params; + +free_page: + while (i-- > 0) + free_page((unsigned long)params->buf[i]); + + sg_free_table(¶ms->sgt); +free_buf: + kfree(params->buf); +out: + kfree(params); + return NULL; +} + +static void dma_sg_map_benchmark_unprepare(void *mparam) +{ + struct dma_sg_map_param *params = mparam; + int i; + + for (i = 0; i < params->npages; i++) + free_page((unsigned long)params->buf[i]); + + sg_free_table(¶ms->sgt); + + kfree(params->buf); + kfree(params); +} + +static void dma_sg_map_benchmark_initialize_data(void *mparam) +{ + struct dma_sg_map_param *params = mparam; + struct scatterlist *sg; + int i = 0; + + if (params->dma_dir == DMA_FROM_DEVICE) + return; + + for_each_sgtable_sg(¶ms->sgt, sg, i) + memset(params->buf[i], 0x66, PAGE_SIZE); +} + +static int dma_sg_map_benchmark_do_map(void *mparam) +{ + struct dma_sg_map_param *params = mparam; int ret = 0; - buf = alloc_pages_exact(size, GFP_KERNEL); - if (!buf) + int sg_mapped = dma_map_sg(params->dev, params->sgt.sgl, + params->npages, params->dma_dir); + if (!sg_mapped) { + pr_err("dma_map_sg failed on %s\n", dev_name(params->dev)); + ret = -ENOMEM; + } + + return ret; +} + +static void dma_sg_map_benchmark_do_unmap(void *mparam) +{ + struct dma_sg_map_param *params = mparam; + + dma_unmap_sg(params->dev, params->sgt.sgl, params->npages, + params->dma_dir); +} + +static struct map_benchmark_ops dma_sg_map_benchmark_ops = { + .prepare = dma_sg_map_benchmark_prepare, + .unprepare = dma_sg_map_benchmark_unprepare, + .initialize_data = dma_sg_map_benchmark_initialize_data, + .do_map = dma_sg_map_benchmark_do_map, + .do_unmap = dma_sg_map_benchmark_do_unmap, +}; + +static struct map_benchmark_ops *dma_map_benchmark_ops[DMA_MAP_BENCH_MODE_MAX] = { + [DMA_MAP_BENCH_SINGLE_MODE] = &dma_single_map_benchmark_ops, + [DMA_MAP_BENCH_SG_MODE] = &dma_sg_map_benchmark_ops, +}; + +static int map_benchmark_thread(void *data) +{ + struct map_benchmark_data *map = data; + __u8 map_mode = map->bparam.map_mode; + int ret = 0; + + struct map_benchmark_ops *mb_ops = dma_map_benchmark_ops[map_mode]; + void *mparam = mb_ops->prepare(map); + + if (!mparam) return -ENOMEM; while (!kthread_should_stop()) { @@ -49,23 +253,12 @@ static int map_benchmark_thread(void *data) ktime_t map_stime, map_etime, unmap_stime, unmap_etime; ktime_t map_delta, unmap_delta; - /* - * for a non-coherent device, if we don't stain them in the - * cache, this will give an underestimate of the real-world - * overhead of BIDIRECTIONAL or TO_DEVICE mappings; - * 66 means evertything goes well! 66 is lucky. - */ - if (map->dir != DMA_FROM_DEVICE) - memset(buf, 0x66, size); - + mb_ops->initialize_data(mparam); map_stime = ktime_get(); - dma_addr = dma_map_single(map->dev, buf, size, map->dir); - if (unlikely(dma_mapping_error(map->dev, dma_addr))) { - pr_err("dma_map_single failed on %s\n", - dev_name(map->dev)); - ret = -ENOMEM; + ret = mb_ops->do_map(mparam); + if (ret) goto out; - } + map_etime = ktime_get(); map_delta = ktime_sub(map_etime, map_stime); @@ -73,7 +266,8 @@ static int map_benchmark_thread(void *data) ndelay(map->bparam.dma_trans_ns); unmap_stime = ktime_get(); - dma_unmap_single(map->dev, dma_addr, size, map->dir); + mb_ops->do_unmap(mparam); + unmap_etime = ktime_get(); unmap_delta = ktime_sub(unmap_etime, unmap_stime); @@ -108,7 +302,7 @@ static int map_benchmark_thread(void *data) } out: - free_pages_exact(buf, size); + mb_ops->unprepare(mparam); return ret; } @@ -209,6 +403,12 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case DMA_MAP_BENCHMARK: + if (map->bparam.map_mode < 0 || + map->bparam.map_mode >= DMA_MAP_BENCH_MODE_MAX) { + pr_err("invalid map mode\n"); + return -EINVAL; + } + if (map->bparam.threads == 0 || map->bparam.threads > DMA_MAP_MAX_THREADS) { pr_err("invalid thread number\n"); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 6d3dd0bd3a88..23ed8eb9233e 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -157,6 +157,7 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, { const struct dma_map_ops *ops = get_dma_ops(dev); bool is_mmio = attrs & DMA_ATTR_MMIO; + bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED; dma_addr_t addr = DMA_MAPPING_ERROR; BUG_ON(!valid_dma_direction(dir)); @@ -168,8 +169,11 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, return DMA_MAPPING_ERROR; if (dma_map_direct(dev, ops) || - (!is_mmio && arch_dma_map_phys_direct(dev, phys + size))) - addr = dma_direct_map_phys(dev, phys, size, dir, attrs); + (!is_mmio && !is_cc_shared && + arch_dma_map_phys_direct(dev, phys + size))) + addr = dma_direct_map_phys(dev, phys, size, dir, attrs, true); + else if (is_cc_shared) + return DMA_MAPPING_ERROR; else if (use_dma_iommu(dev)) addr = iommu_dma_map_phys(dev, phys, size, dir, attrs); else if (ops->map_phys) @@ -206,11 +210,16 @@ void dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size, { const struct dma_map_ops *ops = get_dma_ops(dev); bool is_mmio = attrs & DMA_ATTR_MMIO; + bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED; BUG_ON(!valid_dma_direction(dir)); + if (dma_map_direct(dev, ops) || - (!is_mmio && arch_dma_unmap_phys_direct(dev, addr + size))) - dma_direct_unmap_phys(dev, addr, size, dir, attrs); + (!is_mmio && !is_cc_shared && + arch_dma_unmap_phys_direct(dev, addr + size))) + dma_direct_unmap_phys(dev, addr, size, dir, attrs, true); + else if (is_cc_shared) + return; else if (use_dma_iommu(dev)) iommu_dma_unmap_phys(dev, addr, size, dir, attrs); else if (ops->unmap_phys) @@ -379,7 +388,7 @@ void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) - dma_direct_sync_single_for_cpu(dev, addr, size, dir); + dma_direct_sync_single_for_cpu(dev, addr, size, dir, true); else if (use_dma_iommu(dev)) iommu_dma_sync_single_for_cpu(dev, addr, size, dir); else if (ops->sync_single_for_cpu) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 9fd73700ddcf..9a15e7231e39 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -868,6 +868,9 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size if (orig_addr == INVALID_PHYS_ADDR) return; + if (dir == DMA_FROM_DEVICE && !dev_is_dma_coherent(dev)) + arch_sync_dma_flush(); + /* * It's valid for tlb_offset to be negative. This can happen when the * "offset" returned by swiotlb_align_offset() is non-zero, and the @@ -1612,8 +1615,10 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, return DMA_MAPPING_ERROR; } - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { arch_sync_dma_for_device(swiotlb_addr, size, dir); + arch_sync_dma_flush(); + } return dma_addr; } @@ -1872,26 +1877,25 @@ static void rmem_swiotlb_device_release(struct reserved_mem *rmem, dev->dma_io_tlb_mem = &io_tlb_default_mem; } -static const struct reserved_mem_ops rmem_swiotlb_ops = { - .device_init = rmem_swiotlb_device_init, - .device_release = rmem_swiotlb_device_release, -}; - -static int __init rmem_swiotlb_setup(struct reserved_mem *rmem) +static int __init rmem_swiotlb_setup(unsigned long node, + struct reserved_mem *rmem) { - unsigned long node = rmem->fdt_node; - if (of_get_flat_dt_prop(node, "reusable", NULL) || of_get_flat_dt_prop(node, "linux,cma-default", NULL) || of_get_flat_dt_prop(node, "linux,dma-default", NULL) || of_get_flat_dt_prop(node, "no-map", NULL)) return -EINVAL; - rmem->ops = &rmem_swiotlb_ops; pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n", &rmem->base, (unsigned long)rmem->size / SZ_1M); return 0; } -RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup); +static const struct reserved_mem_ops rmem_swiotlb_ops = { + .node_init = rmem_swiotlb_setup, + .device_init = rmem_swiotlb_device_init, + .device_release = rmem_swiotlb_device_release, +}; + +RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", &rmem_swiotlb_ops); #endif /* CONFIG_DMA_RESTRICTED_POOL */ diff --git a/mm/cma.c b/mm/cma.c index 15cc0ae76c8e..c7ca567f4c5c 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -52,6 +52,7 @@ const char *cma_get_name(const struct cma *cma) { return cma->name; } +EXPORT_SYMBOL_GPL(cma_get_name); static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, unsigned int align_order) @@ -951,6 +952,7 @@ struct page *cma_alloc(struct cma *cma, unsigned long count, return page; } +EXPORT_SYMBOL_GPL(cma_alloc); static struct cma_memrange *find_cma_memrange(struct cma *cma, const struct page *pages, unsigned long count) @@ -1030,6 +1032,7 @@ bool cma_release(struct cma *cma, const struct page *pages, return true; } +EXPORT_SYMBOL_GPL(cma_release); bool cma_release_frozen(struct cma *cma, const struct page *pages, unsigned long count) diff --git a/tools/dma/dma_map_benchmark.c b/tools/dma/dma_map_benchmark.c index dd0ed528e6df..eab0ac611a23 100644 --- a/tools/dma/dma_map_benchmark.c +++ b/tools/dma/dma_map_benchmark.c @@ -20,12 +20,19 @@ static char *directions[] = { "FROM_DEVICE", }; +static char *mode[] = { + "SINGLE_MODE", + "SG_MODE", +}; + int main(int argc, char **argv) { struct map_benchmark map; int fd, opt; /* default single thread, run 20 seconds on NUMA_NO_NODE */ int threads = 1, seconds = 20, node = -1; + /* default single map mode */ + int map_mode = DMA_MAP_BENCH_SINGLE_MODE; /* default dma mask 32bit, bidirectional DMA */ int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL; /* default granule 1 PAGESIZE */ @@ -33,7 +40,7 @@ int main(int argc, char **argv) int cmd = DMA_MAP_BENCHMARK; - while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) { + while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:m:")) != -1) { switch (opt) { case 't': threads = atoi(optarg); @@ -56,11 +63,20 @@ int main(int argc, char **argv) case 'g': granule = atoi(optarg); break; + case 'm': + map_mode = atoi(optarg); + break; default: return -1; } } + if (map_mode < 0 || map_mode >= DMA_MAP_BENCH_MODE_MAX) { + fprintf(stderr, "invalid map mode, SINGLE_MODE:%d, SG_MODE: %d\n", + DMA_MAP_BENCH_SINGLE_MODE, DMA_MAP_BENCH_SG_MODE); + exit(1); + } + if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) { fprintf(stderr, "invalid number of threads, must be in 1-%d\n", DMA_MAP_MAX_THREADS); @@ -110,14 +126,15 @@ int main(int argc, char **argv) map.dma_dir = dir; map.dma_trans_ns = xdelay; map.granule = granule; + map.map_mode = map_mode; if (ioctl(fd, cmd, &map)) { perror("ioctl"); exit(1); } - printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n", - threads, seconds, node, directions[dir], granule); + printf("dma mapping benchmark(%s): threads:%d seconds:%d node:%d dir:%s granule:%d\n", + mode[map_mode], threads, seconds, node, directions[dir], granule); printf("average map latency(us):%.1f standard deviation:%.1f\n", map.avg_map_100ns/10.0, map.map_stddev/10.0); printf("average unmap latency(us):%.1f standard deviation:%.1f\n",