Files
linux/arch/arm64/mm/dma-mapping.c
Barry Song d7eafe655b dma-mapping: Separate DMA sync issuing and completion waiting
Currently, arch_sync_dma_for_cpu and arch_sync_dma_for_device
always wait for the completion of each DMA buffer. That is,
issuing the DMA sync and waiting for completion is done in a
single API call.

For scatter-gather lists with multiple entries, this means
issuing and waiting is repeated for each entry, which can hurt
performance. Architectures like ARM64 may be able to issue all
DMA sync operations for all entries first and then wait for
completion together.

To address this, arch_sync_dma_for_* now batches DMA operations
and performs a flush afterward. On ARM64, the flush is implemented
with a dsb instruction in arch_sync_dma_flush(). On other
architectures, arch_sync_dma_flush() is currently a nop.

Cc: Leon Romanovsky <leon@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Ada Couprie Diaz <ada.coupriediaz@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Cc: Tangquan Zheng <zhengtangquan@oppo.com>
Reviewed-by: Juergen Gross <jgross@suse.com> # drivers/xen/swiotlb-xen.c
Tested-by: Xueyuan Chen <xueyuan.chen21@gmail.com>
Signed-off-by: Barry Song <baohua@kernel.org>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260228221316.59934-1-21cnbao@gmail.com
2026-03-13 23:47:31 +01:00

55 lines
1.3 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2012 ARM Ltd.
* Author: Catalin Marinas <catalin.marinas@arm.com>
*/
#include <linux/gfp.h>
#include <linux/cache.h>
#include <linux/dma-map-ops.h>
#include <xen/xen.h>
#include <asm/cacheflush.h>
#include <asm/xen/xen-ops.h>
void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
{
unsigned long start = (unsigned long)phys_to_virt(paddr);
dcache_clean_poc_nosync(start, start + size);
}
void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
{
unsigned long start = (unsigned long)phys_to_virt(paddr);
if (dir == DMA_TO_DEVICE)
return;
dcache_inval_poc_nosync(start, start + size);
}
void arch_dma_prep_coherent(struct page *page, size_t size)
{
unsigned long start = (unsigned long)page_address(page);
dcache_clean_poc(start, start + size);
}
void arch_setup_dma_ops(struct device *dev, bool coherent)
{
int cls = cache_line_size_of_cpu();
WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN,
TAINT_CPU_OUT_OF_SPEC,
"%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
dev_driver_string(dev), dev_name(dev),
ARCH_DMA_MINALIGN, cls);
dev->dma_coherent = coherent;
xen_setup_dma_ops(dev);
}