whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit cff229491af5df946781edfbeafd43e9cf66a3b4
parent 13775dacca5c158a257320f4b47e1220b82e3b21
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Mon, 22 Oct 2018 18:16:03 +0100

Merge tag 'dma-mapping-4.20' of git://git.infradead.org/users/hch/dma-mapping

Pull dma mapping updates from Christoph Hellwig:
 "First batch of dma-mapping changes for 4.20.

  There will be a second PR as some big changes were only applied just
  before the end of the merge window, and I want to give them a few more
  days in linux-next.

  Summary:

   - mostly more consolidation of the direct mapping code, including
     converting over hexagon, and merging the coherent and non-coherent
     code into a single dma_map_ops instance (me)

   - cleanups for the dma_configure/dma_unconfigure callchains (me)

   - better handling of dma_masks in odd setups (me, Alexander Duyck)

   - better debugging of passing vmalloc address to the DMA API (Stephen
     Boyd)

   - CMA command line parsing fix (He Zhe)"

* tag 'dma-mapping-4.20' of git://git.infradead.org/users/hch/dma-mapping: (27 commits)
  dma-direct: respect DMA_ATTR_NO_WARN
  dma-mapping: translate __GFP_NOFAIL to DMA_ATTR_NO_WARN
  dma-direct: document the zone selection logic
  dma-debug: Check for drivers mapping invalid addresses in dma_map_single()
  dma-direct: fix return value of dma_direct_supported
  dma-mapping: move dma_default_get_required_mask under ifdef
  dma-direct: always allow dma mask <= physiscal memory size
  dma-direct: implement complete bus_dma_mask handling
  dma-direct: refine dma_direct_alloc zone selection
  dma-direct: add an explicit dma_direct_get_required_mask
  dma-mapping: make the get_required_mask method available unconditionally
  unicore32: remove swiotlb support
  Revert "dma-mapping: clear dev->dma_ops in arch_teardown_dma_ops"
  dma-mapping: support non-coherent devices in dma_common_get_sgtable
  dma-mapping: consolidate the dma mmap implementations
  dma-mapping: merge direct and noncoherent ops
  dma-mapping: move the dma_coherent flag to struct device
  MIPS: don't select DMA_MAYBE_COHERENT from DMA_PERDEV_COHERENT
  dma-mapping: add the missing ARCH_HAS_SYNC_DMA_FOR_CPU_ALL declaration
  dma-mapping: fix panic caused by passing empty cma command line argument
  ...

Diffstat:
March/arc/Kconfig | 4++--
March/arc/mm/dma.c | 41++++++++++-------------------------------
March/arm/include/asm/dma-mapping.h | 2++
March/arm/mm/dma-mapping-nommu.c | 11++++-------
March/c6x/Kconfig | 2+-
March/hexagon/Kconfig | 2++
March/hexagon/include/asm/Kbuild | 1+
Darch/hexagon/include/asm/dma-mapping.h | 40----------------------------------------
March/hexagon/kernel/dma.c | 143+++++--------------------------------------------------------------------------
March/ia64/include/asm/dma-mapping.h | 2--
March/ia64/include/asm/machvec.h | 7-------
March/ia64/include/asm/machvec_init.h | 1-
March/ia64/include/asm/machvec_sn2.h | 2--
March/ia64/pci/pci.c | 26--------------------------
March/ia64/sn/pci/pci_dma.c | 4++--
March/m68k/Kconfig | 2+-
March/microblaze/Kconfig | 4++--
March/microblaze/include/asm/pgtable.h | 2--
March/microblaze/kernel/dma.c | 22----------------------
March/microblaze/mm/consistent.c | 3++-
March/mips/Kconfig | 7++++---
March/mips/include/asm/Kbuild | 1+
Darch/mips/include/asm/device.h | 19-------------------
March/mips/include/asm/dma-coherence.h | 6++++++
March/mips/include/asm/dma-mapping.h | 4+---
March/mips/jazz/jazzdma.c | 7+++----
March/mips/kernel/setup.c | 2+-
March/mips/mm/c-r4k.c | 17++++++++---------
March/mips/mm/dma-noncoherent.c | 79++++++++++++++++---------------------------------------------------------------
March/nds32/Kconfig | 2+-
March/nios2/Kconfig | 2+-
March/openrisc/Kconfig | 2+-
March/parisc/Kconfig | 2+-
March/parisc/kernel/setup.c | 2+-
March/sh/Kconfig | 3+--
March/sparc/Kconfig | 2+-
March/sparc/include/asm/dma-mapping.h | 4++--
March/unicore32/Kconfig | 2+-
March/unicore32/include/asm/Kbuild | 1+
Darch/unicore32/include/asm/dma-mapping.h | 22----------------------
March/unicore32/mm/init.c | 3---
March/x86/kernel/amd_gart_64.c | 6+++---
March/xtensa/Kconfig | 2+-
Mdrivers/acpi/arm64/iort.c | 2+-
Mdrivers/acpi/scan.c | 10----------
Mdrivers/base/dd.c | 12+++++++-----
Mdrivers/base/platform.c | 11++++++++++-
Mdrivers/of/device.c | 12------------
Mdrivers/pci/controller/vmd.c | 4----
Mdrivers/xen/swiotlb-xen.c | 4++--
Minclude/acpi/acpi_bus.h | 1-
Minclude/asm-generic/dma-mapping.h | 9---------
Minclude/linux/acpi.h | 2--
Minclude/linux/device.h | 7+++++++
Minclude/linux/dma-debug.h | 8++++++++
Minclude/linux/dma-direct.h | 8+++++++-
Minclude/linux/dma-mapping.h | 42++++++++++++++++++------------------------
Minclude/linux/dma-noncoherent.h | 27++++++++++++++++++++++-----
Minclude/linux/of_device.h | 3---
Mkernel/dma/Kconfig | 16++++++++--------
Mkernel/dma/Makefile | 1-
Mkernel/dma/contiguous.c | 6+++++-
Mkernel/dma/debug.c | 16++++++++++++++++
Mkernel/dma/direct.c | 222++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Mkernel/dma/mapping.c | 71+++++++++++++++++++++++++++++++++++++----------------------------------
Dkernel/dma/noncoherent.c | 106-------------------------------------------------------------------------------
66 files changed, 420 insertions(+), 698 deletions(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig @@ -9,6 +9,7 @@ config ARC def_bool y select ARC_TIMERS + select ARCH_HAS_DMA_COHERENT_TO_PFN select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE @@ -17,8 +18,7 @@ config ARC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK - select DMA_NONCOHERENT_OPS - select DMA_NONCOHERENT_MMAP + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) select GENERIC_CLOCKEVENTS select GENERIC_FIND_FIRST_BIT diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c @@ -84,29 +84,10 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, __free_pages(page, get_order(size)); } -int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr) { - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long pfn = __phys_to_pfn(dma_addr); - unsigned long off = vma->vm_pgoff; - int ret = -ENXIO; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (off < count && user_count <= (count - off)) { - ret = remap_pfn_range(vma, vma->vm_start, - pfn + off, - user_count << PAGE_SHIFT, - vma->vm_page_prot); - } - - return ret; + return __phys_to_pfn(dma_addr); } /* @@ -167,7 +148,7 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, } /* - * Plug in coherent or noncoherent dma ops + * Plug in direct dma map ops. */ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) @@ -175,13 +156,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, /* * IOC hardware snoops all DMA traffic keeping the caches consistent * with memory - eliding need for any explicit cache maintenance of - * DMA buffers - so we can use dma_direct cache ops. + * DMA buffers. */ - if (is_isa_arcv2() && ioc_enable && coherent) { - set_dma_ops(dev, &dma_direct_ops); - dev_info(dev, "use dma_direct_ops cache ops\n"); - } else { - set_dma_ops(dev, &dma_noncoherent_ops); - dev_info(dev, "use dma_noncoherent_ops cache ops\n"); - } + if (is_isa_arcv2() && ioc_enable && coherent) + dev->dma_coherent = true; + + dev_info(dev, "use %sncoherent DMA ops\n", + dev->dma_coherent ? "" : "non"); } diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h @@ -100,8 +100,10 @@ static inline unsigned long dma_max_pfn(struct device *dev) extern void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent); +#ifdef CONFIG_MMU #define arch_teardown_dma_ops arch_teardown_dma_ops extern void arch_teardown_dma_ops(struct device *dev); +#endif /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c @@ -47,7 +47,8 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, */ if (attrs & DMA_ATTR_NON_CONSISTENT) - return dma_direct_alloc(dev, size, dma_handle, gfp, attrs); + return dma_direct_alloc_pages(dev, size, dma_handle, gfp, + attrs); ret = dma_alloc_from_global_coherent(size, dma_handle); @@ -70,7 +71,7 @@ static void arm_nommu_dma_free(struct device *dev, size_t size, unsigned long attrs) { if (attrs & DMA_ATTR_NON_CONSISTENT) { - dma_direct_free(dev, size, cpu_addr, dma_addr, attrs); + dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); } else { int ret = dma_release_from_global_coherent(get_order(size), cpu_addr); @@ -90,7 +91,7 @@ static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret)) return ret; - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); } @@ -237,7 +238,3 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, set_dma_ops(dev, dma_ops); } - -void arch_teardown_dma_ops(struct device *dev) -{ -} diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig @@ -9,7 +9,7 @@ config C6X select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select CLKDEV_LOOKUP - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_IRQ_SHOW select HAVE_ARCH_TRACEHOOK diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig @@ -4,6 +4,7 @@ comment "Linux Kernel Configuration for Hexagon" config HEXAGON def_bool y + select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_PREEMPT select HAVE_OPROFILE # Other pending projects/to-do items. @@ -29,6 +30,7 @@ config HEXAGON select GENERIC_CLOCKEVENTS_BROADCAST select MODULES_USE_ELF_RELA select GENERIC_CPU_DEVICES + select DMA_DIRECT_OPS ---help--- Qualcomm Hexagon is a processor architecture designed for high performance and low power across a wide variety of applications. diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild @@ -6,6 +6,7 @@ generic-y += compat.h generic-y += current.h generic-y += device.h generic-y += div64.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += extable.h generic-y += fb.h diff --git a/arch/hexagon/include/asm/dma-mapping.h b/arch/hexagon/include/asm/dma-mapping.h @@ -1,40 +0,0 @@ -/* - * DMA operations for the Hexagon architecture - * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef _ASM_DMA_MAPPING_H -#define _ASM_DMA_MAPPING_H - -#include <linux/types.h> -#include <linux/cache.h> -#include <linux/mm.h> -#include <linux/scatterlist.h> -#include <linux/dma-debug.h> -#include <asm/io.h> - -struct device; - -extern const struct dma_map_ops *dma_ops; - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return dma_ops; -} - -#endif diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c @@ -18,32 +18,19 @@ * 02110-1301, USA. */ -#include <linux/dma-mapping.h> -#include <linux/dma-direct.h> +#include <linux/dma-noncoherent.h> #include <linux/bootmem.h> #include <linux/genalloc.h> -#include <asm/dma-mapping.h> #include <linux/module.h> #include <asm/page.h> -#define HEXAGON_MAPPING_ERROR 0 - -const struct dma_map_ops *dma_ops; -EXPORT_SYMBOL(dma_ops); - -static inline void *dma_addr_to_virt(dma_addr_t dma_addr) -{ - return phys_to_virt((unsigned long) dma_addr); -} - static struct gen_pool *coherent_pool; /* Allocates from a pool of uncached memory that was reserved at boot time */ -static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_addr, + gfp_t flag, unsigned long attrs) { void *ret; @@ -75,58 +62,17 @@ static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size, return ret; } -static void hexagon_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs) +void arch_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr, unsigned long attrs) { gen_pool_free(coherent_pool, (unsigned long) vaddr, size); } -static int check_addr(const char *name, struct device *hwdev, - dma_addr_t bus, size_t size) -{ - if (hwdev && hwdev->dma_mask && !dma_capable(hwdev, bus, size)) { - if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) - printk(KERN_ERR - "%s: overflow %Lx+%zu of device mask %Lx\n", - name, (long long)bus, size, - (long long)*hwdev->dma_mask); - return 0; - } - return 1; -} - -static int hexagon_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - struct scatterlist *s; - int i; - - WARN_ON(nents == 0 || sg[0].length == 0); - - for_each_sg(sg, s, nents, i) { - s->dma_address = sg_phys(s); - if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) - return 0; - - s->dma_length = s->length; + void *addr = phys_to_virt(paddr); - if (attrs & DMA_ATTR_SKIP_CPU_SYNC) - continue; - - flush_dcache_range(dma_addr_to_virt(s->dma_address), - dma_addr_to_virt(s->dma_address + s->length)); - } - - return nents; -} - -/* - * address is virtual - */ -static inline void dma_sync(void *addr, size_t size, - enum dma_data_direction dir) -{ switch (dir) { case DMA_TO_DEVICE: hexagon_clean_dcache_range((unsigned long) addr, @@ -144,76 +90,3 @@ static inline void dma_sync(void *addr, size_t size, BUG(); } } - -/** - * hexagon_map_page() - maps an address for device DMA - * @dev: pointer to DMA device - * @page: pointer to page struct of DMA memory - * @offset: offset within page - * @size: size of memory to map - * @dir: transfer direction - * @attrs: pointer to DMA attrs (not used) - * - * Called to map a memory address to a DMA address prior - * to accesses to/from device. - * - * We don't particularly have many hoops to jump through - * so far. Straight translation between phys and virtual. - * - * DMA is not cache coherent so sync is necessary; this - * seems to be a convenient place to do it. - * - */ -static dma_addr_t hexagon_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t bus = page_to_phys(page) + offset; - WARN_ON(size == 0); - - if (!check_addr("map_single", dev, bus, size)) - return HEXAGON_MAPPING_ERROR; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_sync(dma_addr_to_virt(bus), size, dir); - - return bus; -} - -static void hexagon_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir) -{ - dma_sync(dma_addr_to_virt(dma_handle), size, dir); -} - -static void hexagon_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir) -{ - dma_sync(dma_addr_to_virt(dma_handle), size, dir); -} - -static int hexagon_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == HEXAGON_MAPPING_ERROR; -} - -const struct dma_map_ops hexagon_dma_ops = { - .alloc = hexagon_dma_alloc_coherent, - .free = hexagon_free_coherent, - .map_sg = hexagon_map_sg, - .map_page = hexagon_map_page, - .sync_single_for_cpu = hexagon_sync_single_for_cpu, - .sync_single_for_device = hexagon_sync_single_for_device, - .mapping_error = hexagon_mapping_error, -}; - -void __init hexagon_dma_init(void) -{ - if (dma_ops) - return; - - dma_ops = &hexagon_dma_ops; -} diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h @@ -10,8 +10,6 @@ #include <linux/scatterlist.h> #include <linux/dma-debug.h> -#define ARCH_HAS_DMA_GET_REQUIRED_MASK - extern const struct dma_map_ops *dma_ops; extern struct ia64_machine_vector ia64_mv; extern void set_iommu_machvec(void); diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h @@ -44,7 +44,6 @@ typedef void ia64_mv_kernel_launch_event_t(void); /* DMA-mapping interface: */ typedef void ia64_mv_dma_init (void); -typedef u64 ia64_mv_dma_get_required_mask (struct device *); typedef const struct dma_map_ops *ia64_mv_dma_get_ops(struct device *); /* @@ -127,7 +126,6 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *); # define platform_global_tlb_purge ia64_mv.global_tlb_purge # define platform_tlb_migrate_finish ia64_mv.tlb_migrate_finish # define platform_dma_init ia64_mv.dma_init -# define platform_dma_get_required_mask ia64_mv.dma_get_required_mask # define platform_dma_get_ops ia64_mv.dma_get_ops # define platform_irq_to_vector ia64_mv.irq_to_vector # define platform_local_vector_to_irq ia64_mv.local_vector_to_irq @@ -171,7 +169,6 @@ struct ia64_machine_vector { ia64_mv_global_tlb_purge_t *global_tlb_purge; ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish; ia64_mv_dma_init *dma_init; - ia64_mv_dma_get_required_mask *dma_get_required_mask; ia64_mv_dma_get_ops *dma_get_ops; ia64_mv_irq_to_vector *irq_to_vector; ia64_mv_local_vector_to_irq *local_vector_to_irq; @@ -211,7 +208,6 @@ struct ia64_machine_vector { platform_global_tlb_purge, \ platform_tlb_migrate_finish, \ platform_dma_init, \ - platform_dma_get_required_mask, \ platform_dma_get_ops, \ platform_irq_to_vector, \ platform_local_vector_to_irq, \ @@ -286,9 +282,6 @@ extern const struct dma_map_ops *dma_get_ops(struct device *); #ifndef platform_dma_get_ops # define platform_dma_get_ops dma_get_ops #endif -#ifndef platform_dma_get_required_mask -# define platform_dma_get_required_mask ia64_dma_get_required_mask -#endif #ifndef platform_irq_to_vector # define platform_irq_to_vector __ia64_irq_to_vector #endif diff --git a/arch/ia64/include/asm/machvec_init.h b/arch/ia64/include/asm/machvec_init.h @@ -4,7 +4,6 @@ extern ia64_mv_send_ipi_t ia64_send_ipi; extern ia64_mv_global_tlb_purge_t ia64_global_tlb_purge; -extern ia64_mv_dma_get_required_mask ia64_dma_get_required_mask; extern ia64_mv_irq_to_vector __ia64_irq_to_vector; extern ia64_mv_local_vector_to_irq __ia64_local_vector_to_irq; extern ia64_mv_pci_get_legacy_mem_t ia64_pci_get_legacy_mem; diff --git a/arch/ia64/include/asm/machvec_sn2.h b/arch/ia64/include/asm/machvec_sn2.h @@ -55,7 +55,6 @@ extern ia64_mv_readb_t __sn_readb_relaxed; extern ia64_mv_readw_t __sn_readw_relaxed; extern ia64_mv_readl_t __sn_readl_relaxed; extern ia64_mv_readq_t __sn_readq_relaxed; -extern ia64_mv_dma_get_required_mask sn_dma_get_required_mask; extern ia64_mv_dma_init sn_dma_init; extern ia64_mv_migrate_t sn_migrate; extern ia64_mv_kernel_launch_event_t sn_kernel_launch_event; @@ -100,7 +99,6 @@ extern ia64_mv_pci_fixup_bus_t sn_pci_fixup_bus; #define platform_pci_get_legacy_mem sn_pci_get_legacy_mem #define platform_pci_legacy_read sn_pci_legacy_read #define platform_pci_legacy_write sn_pci_legacy_write -#define platform_dma_get_required_mask sn_dma_get_required_mask #define platform_dma_init sn_dma_init #define platform_migrate sn_migrate #define platform_kernel_launch_event sn_kernel_launch_event diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c @@ -568,32 +568,6 @@ static void __init set_pci_dfl_cacheline_size(void) pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4; } -u64 ia64_dma_get_required_mask(struct device *dev) -{ - u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); - u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT)); - u64 mask; - - if (!high_totalram) { - /* convert to mask just covering totalram */ - low_totalram = (1 << (fls(low_totalram) - 1)); - low_totalram += low_totalram - 1; - mask = low_totalram; - } else { - high_totalram = (1 << (fls(high_totalram) - 1)); - high_totalram += high_totalram - 1; - mask = (((u64)high_totalram) << 32) + 0xffffffff; - } - return mask; -} -EXPORT_SYMBOL_GPL(ia64_dma_get_required_mask); - -u64 dma_get_required_mask(struct device *dev) -{ - return platform_dma_get_required_mask(dev); -} -EXPORT_SYMBOL_GPL(dma_get_required_mask); - static int __init pcibios_init(void) { set_pci_dfl_cacheline_size(); diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c @@ -344,11 +344,10 @@ static int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) return 0; } -u64 sn_dma_get_required_mask(struct device *dev) +static u64 sn_dma_get_required_mask(struct device *dev) { return DMA_BIT_MASK(64); } -EXPORT_SYMBOL_GPL(sn_dma_get_required_mask); char *sn_pci_get_legacy_mem(struct pci_bus *bus) { @@ -473,6 +472,7 @@ static struct dma_map_ops sn_dma_ops = { .sync_sg_for_device = sn_dma_sync_sg_for_device, .mapping_error = sn_dma_mapping_error, .dma_supported = sn_dma_supported, + .get_required_mask = sn_dma_get_required_mask, }; void sn_dma_init(void) diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig @@ -26,7 +26,7 @@ config M68K select MODULES_USE_ELF_RELA select OLD_SIGSUSPEND3 select OLD_SIGACTION - select DMA_NONCOHERENT_OPS if HAS_DMA + select DMA_DIRECT_OPS if HAS_DMA select HAVE_MEMBLOCK select ARCH_DISCARD_MEMBLOCK select NO_BOOTMEM diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig @@ -1,6 +1,7 @@ config MICROBLAZE def_bool y select ARCH_NO_SWAP + select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE @@ -11,8 +12,7 @@ config MICROBLAZE select TIMER_OF select CLONE_BACKWARDS3 select COMMON_CLK - select DMA_NONCOHERENT_OPS - select DMA_NONCOHERENT_MMAP + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h @@ -553,8 +553,6 @@ void __init *early_get_page(void); extern unsigned long ioremap_bot, ioremap_base; -unsigned long consistent_virt_to_pfn(void *vaddr); - void setup_memory(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c @@ -42,25 +42,3 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, { __dma_sync(dev, paddr, size, dir); } - -int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t handle, size_t size, - unsigned long attrs) -{ -#ifdef CONFIG_MMU - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long off = vma->vm_pgoff; - unsigned long pfn; - - if (off >= count || user_count > (count - off)) - return -ENXIO; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - pfn = consistent_virt_to_pfn(cpu_addr); - return remap_pfn_range(vma, vma->vm_start, pfn + off, - vma->vm_end - vma->vm_start, vma->vm_page_prot); -#else - return -ENXIO; -#endif -} diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c @@ -165,7 +165,8 @@ static pte_t *consistent_virt_to_pte(void *vaddr) return pte_offset_kernel(pmd_offset(pgd_offset_k(addr), addr), addr); } -unsigned long consistent_virt_to_pfn(void *vaddr) +long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr, + dma_addr_t dma_addr) { pte_t *ptep = consistent_virt_to_pte(vaddr); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig @@ -1106,21 +1106,22 @@ config ARCH_SUPPORTS_UPROBES bool config DMA_MAYBE_COHERENT + select ARCH_HAS_DMA_COHERENCE_H select DMA_NONCOHERENT bool config DMA_PERDEV_COHERENT bool - select DMA_MAYBE_COHERENT + select DMA_NONCOHERENT config DMA_NONCOHERENT bool + select ARCH_HAS_DMA_MMAP_PGPROT select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SYNC_DMA_FOR_CPU select NEED_DMA_MAP_STATE - select DMA_NONCOHERENT_MMAP + select ARCH_HAS_DMA_COHERENT_TO_PFN select DMA_NONCOHERENT_CACHE_SYNC - select DMA_NONCOHERENT_OPS config SYS_HAS_EARLY_PRINTK bool diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild @@ -1,6 +1,7 @@ # MIPS headers generic-(CONFIG_GENERIC_CSUM) += checksum.h generic-y += current.h +generic-y += device.h generic-y += dma-contiguous.h generic-y += emergency-restart.h generic-y += export.h diff --git a/arch/mips/include/asm/device.h b/arch/mips/include/asm/device.h @@ -1,19 +0,0 @@ -/* - * Arch specific extensions to struct device - * - * This file is released under the GPLv2 - */ -#ifndef _ASM_MIPS_DEVICE_H -#define _ASM_MIPS_DEVICE_H - -struct dev_archdata { -#ifdef CONFIG_DMA_PERDEV_COHERENT - /* Non-zero if DMA is coherent with CPU caches */ - bool dma_coherent; -#endif -}; - -struct pdev_archdata { -}; - -#endif /* _ASM_MIPS_DEVICE_H*/ diff --git a/arch/mips/include/asm/dma-coherence.h b/arch/mips/include/asm/dma-coherence.h @@ -20,6 +20,12 @@ enum coherent_io_user_state { #elif defined(CONFIG_DMA_MAYBE_COHERENT) extern enum coherent_io_user_state coherentio; extern int hw_coherentio; + +static inline bool dev_is_dma_coherent(struct device *dev) +{ + return coherentio == IO_COHERENCE_ENABLED || + (coherentio == IO_COHERENCE_DEFAULT && hw_coherentio); +} #else #ifdef CONFIG_DMA_NONCOHERENT #define coherentio IO_COHERENCE_DISABLED diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h @@ -12,8 +12,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return &jazz_dma_ops; #elif defined(CONFIG_SWIOTLB) return &swiotlb_dma_ops; -#elif defined(CONFIG_DMA_NONCOHERENT_OPS) - return &dma_noncoherent_ops; #else return &dma_direct_ops; #endif @@ -25,7 +23,7 @@ static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, bool coherent) { #ifdef CONFIG_DMA_PERDEV_COHERENT - dev->archdata.dma_coherent = coherent; + dev->dma_coherent = coherent; #endif } diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c @@ -564,13 +564,13 @@ static void *jazz_dma_alloc(struct device *dev, size_t size, { void *ret; - ret = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); + ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); if (!ret) return NULL; *dma_handle = vdma_alloc(virt_to_phys(ret), size); if (*dma_handle == VDMA_ERROR) { - dma_direct_free(dev, size, ret, *dma_handle, attrs); + dma_direct_free_pages(dev, size, ret, *dma_handle, attrs); return NULL; } @@ -587,7 +587,7 @@ static void jazz_dma_free(struct device *dev, size_t size, void *vaddr, vdma_free(dma_handle); if (!(attrs & DMA_ATTR_NON_CONSISTENT)) vaddr = (void *)CAC_ADDR((unsigned long)vaddr); - return dma_direct_free(dev, size, vaddr, dma_handle, attrs); + dma_direct_free_pages(dev, size, vaddr, dma_handle, attrs); } static dma_addr_t jazz_dma_map_page(struct device *dev, struct page *page, @@ -682,7 +682,6 @@ static int jazz_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) const struct dma_map_ops jazz_dma_ops = { .alloc = jazz_dma_alloc, .free = jazz_dma_free, - .mmap = arch_dma_mmap, .map_page = jazz_dma_map_page, .unmap_page = jazz_dma_unmap_page, .map_sg = jazz_dma_map_sg, diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c @@ -1075,7 +1075,7 @@ static int __init debugfs_mips(void) arch_initcall(debugfs_mips); #endif -#if defined(CONFIG_DMA_MAYBE_COHERENT) && !defined(CONFIG_DMA_PERDEV_COHERENT) +#ifdef CONFIG_DMA_MAYBE_COHERENT /* User defined DMA coherency from command line. */ enum coherent_io_user_state coherentio = IO_COHERENCE_DEFAULT; EXPORT_SYMBOL_GPL(coherentio); diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c @@ -1955,22 +1955,21 @@ void r4k_cache_init(void) __flush_icache_user_range = r4k_flush_icache_user_range; __local_flush_icache_user_range = local_r4k_flush_icache_user_range; -#if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_DMA_MAYBE_COHERENT) -# if defined(CONFIG_DMA_PERDEV_COHERENT) - if (0) { -# else - if ((coherentio == IO_COHERENCE_ENABLED) || - ((coherentio == IO_COHERENCE_DEFAULT) && hw_coherentio)) { -# endif +#ifdef CONFIG_DMA_NONCOHERENT +#ifdef CONFIG_DMA_MAYBE_COHERENT + if (coherentio == IO_COHERENCE_ENABLED || + (coherentio == IO_COHERENCE_DEFAULT && hw_coherentio)) { _dma_cache_wback_inv = (void *)cache_noop; _dma_cache_wback = (void *)cache_noop; _dma_cache_inv = (void *)cache_noop; - } else { + } else +#endif /* CONFIG_DMA_MAYBE_COHERENT */ + { _dma_cache_wback_inv = r4k_dma_cache_wback_inv; _dma_cache_wback = r4k_dma_cache_wback_inv; _dma_cache_inv = r4k_dma_cache_inv; } -#endif +#endif /* CONFIG_DMA_NONCOHERENT */ build_clear_page(); build_copy_page(); diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c @@ -14,26 +14,6 @@ #include <asm/dma-coherence.h> #include <asm/io.h> -#ifdef CONFIG_DMA_PERDEV_COHERENT -static inline int dev_is_coherent(struct device *dev) -{ - return dev->archdata.dma_coherent; -} -#else -static inline int dev_is_coherent(struct device *dev) -{ - switch (coherentio) { - default: - case IO_COHERENCE_DEFAULT: - return hw_coherentio; - case IO_COHERENCE_ENABLED: - return 1; - case IO_COHERENCE_DISABLED: - return 0; - } -} -#endif /* CONFIG_DMA_PERDEV_COHERENT */ - /* * The affected CPUs below in 'cpu_needs_post_dma_flush()' can speculatively * fill random cachelines with stale data at any time, requiring an extra @@ -49,9 +29,6 @@ static inline int dev_is_coherent(struct device *dev) */ static inline bool cpu_needs_post_dma_flush(struct device *dev) { - if (dev_is_coherent(dev)) - return false; - switch (boot_cpu_type()) { case CPU_R10000: case CPU_R12000: @@ -72,11 +49,8 @@ void *arch_dma_alloc(struct device *dev, size_t size, { void *ret; - ret = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); - if (!ret) - return NULL; - - if (!dev_is_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT)) { + ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); + if (!ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) { dma_cache_wback_inv((unsigned long) ret, size); ret = (void *)UNCAC_ADDR(ret); } @@ -87,43 +61,24 @@ void *arch_dma_alloc(struct device *dev, size_t size, void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - if (!(attrs & DMA_ATTR_NON_CONSISTENT) && !dev_is_coherent(dev)) + if (!(attrs & DMA_ATTR_NON_CONSISTENT)) cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr); - dma_direct_free(dev, size, cpu_addr, dma_addr, attrs); + dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); } -int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr) { - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long addr = (unsigned long)cpu_addr; - unsigned long off = vma->vm_pgoff; - unsigned long pfn; - int ret = -ENXIO; - - if (!dev_is_coherent(dev)) - addr = CAC_ADDR(addr); - - pfn = page_to_pfn(virt_to_page((void *)addr)); + unsigned long addr = CAC_ADDR((unsigned long)cpu_addr); + return page_to_pfn(virt_to_page((void *)addr)); +} +pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) +{ if (attrs & DMA_ATTR_WRITE_COMBINE) - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - else - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (off < count && user_count <= (count - off)) { - ret = remap_pfn_range(vma, vma->vm_start, - pfn + off, - user_count << PAGE_SHIFT, - vma->vm_page_prot); - } - - return ret; + return pgprot_writecombine(prot); + return pgprot_noncached(prot); } static inline void dma_sync_virt(void *addr, size_t size, @@ -187,8 +142,7 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size, void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - if (!dev_is_coherent(dev)) - dma_sync_phys(paddr, size, dir); + dma_sync_phys(paddr, size, dir); } void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, @@ -203,6 +157,5 @@ void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, { BUG_ON(direction == DMA_NONE); - if (!dev_is_coherent(dev)) - dma_sync_virt(vaddr, size, direction); + dma_sync_virt(vaddr, size, direction); } diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig @@ -11,7 +11,7 @@ config NDS32 select CLKSRC_MMIO select CLONE_BACKWARDS select COMMON_CLK - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig @@ -4,7 +4,7 @@ config NIOS2 select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_SWAP - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select TIMER_OF select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig @@ -7,7 +7,7 @@ config OPENRISC def_bool y select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select OF select OF_EARLY_FLATTREE select IRQ_DOMAIN diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig @@ -186,7 +186,7 @@ config PA11 depends on PA7000 || PA7100LC || PA7200 || PA7300LC select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select DMA_NONCOHERENT_CACHE_SYNC config PREFETCH diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c @@ -102,7 +102,7 @@ void __init dma_ops_init(void) case pcxl: /* falls through */ case pcxs: case pcxt: - hppa_dma_ops = &dma_noncoherent_ops; + hppa_dma_ops = &dma_direct_ops; break; default: break; diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig @@ -7,6 +7,7 @@ config SUPERH select ARCH_NO_COHERENT_DMA_MMAP if !MMU select HAVE_PATA_PLATFORM select CLKDEV_LOOKUP + select DMA_DIRECT_OPS select HAVE_IDE if HAS_IOPORT_MAP select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP @@ -158,13 +159,11 @@ config SWAP_IO_SPACE bool config DMA_COHERENT - select DMA_DIRECT_OPS bool config DMA_NONCOHERENT def_bool !DMA_COHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select DMA_NONCOHERENT_OPS config PGTABLE_LEVELS default 3 if X2TLB diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig @@ -51,7 +51,7 @@ config SPARC config SPARC32 def_bool !64BIT select ARCH_HAS_SYNC_DMA_FOR_CPU - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select CLZ_TAB select HAVE_UID16 diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h @@ -14,11 +14,11 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { #ifdef CONFIG_SPARC_LEON if (sparc_cpu_model == sparc_leon) - return &dma_noncoherent_ops; + return &dma_direct_ops; #endif #if defined(CONFIG_SPARC32) && defined(CONFIG_PCI) if (bus == &pci_bus_type) - return &dma_noncoherent_ops; + return &dma_direct_ops; #endif return dma_ops; } diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig @@ -4,6 +4,7 @@ config UNICORE32 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select DMA_DIRECT_OPS select HAVE_MEMBLOCK select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP @@ -20,7 +21,6 @@ config UNICORE32 select GENERIC_IOMAP select MODULES_USE_ELF_REL select NEED_DMA_MAP_STATE - select SWIOTLB help UniCore-32 is 32-bit Instruction Set Architecture, including a series of low-power-consumption RISC chip diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += compat.h generic-y += current.h generic-y += device.h generic-y += div64.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += exec.h generic-y += extable.h diff --git a/arch/unicore32/include/asm/dma-mapping.h b/arch/unicore32/include/asm/dma-mapping.h @@ -1,22 +0,0 @@ -/* - * linux/arch/unicore32/include/asm/dma-mapping.h - * - * Code specific to PKUnity SoC and UniCore ISA - * - * Copyright (C) 2001-2010 GUAN Xue-tao - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef __UNICORE_DMA_MAPPING_H__ -#define __UNICORE_DMA_MAPPING_H__ - -#include <linux/swiotlb.h> - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return &swiotlb_dma_ops; -} - -#endif diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c @@ -234,9 +234,6 @@ void __init bootmem_init(void) uc32_bootmem_init(min, max_low); -#ifdef CONFIG_SWIOTLB - swiotlb_init(1); -#endif /* * Sparsemem tries to allocate bootmem in memory_present(), * so must be done after the fixed reservations diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c @@ -482,7 +482,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, { void *vaddr; - vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs); + vaddr = dma_direct_alloc_pages(dev, size, dma_addr, flag, attrs); if (!vaddr || !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24)) return vaddr; @@ -494,7 +494,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, goto out_free; return vaddr; out_free: - dma_direct_free(dev, size, vaddr, *dma_addr, attrs); + dma_direct_free_pages(dev, size, vaddr, *dma_addr, attrs); return NULL; } @@ -504,7 +504,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0); - dma_direct_free(dev, size, vaddr, dma_addr, attrs); + dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs); } static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig @@ -13,7 +13,7 @@ config XTENSA select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_IRQ_SHOW diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c @@ -1428,7 +1428,7 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node, return 0; dma_deconfigure: - acpi_dma_deconfigure(&pdev->dev); + arch_teardown_dma_ops(&pdev->dev); dev_put: platform_device_put(pdev); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c @@ -1469,16 +1469,6 @@ int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr) } EXPORT_SYMBOL_GPL(acpi_dma_configure); -/** - * acpi_dma_deconfigure - Tear-down DMA configuration for the device. - * @dev: The pointer to the device - */ -void acpi_dma_deconfigure(struct device *dev) -{ - arch_teardown_dma_ops(dev); -} -EXPORT_SYMBOL_GPL(acpi_dma_deconfigure); - static void acpi_init_coherency(struct acpi_device *adev) { unsigned long long cca = 0; diff --git a/drivers/base/dd.c b/drivers/base/dd.c @@ -480,9 +480,11 @@ re_probe: if (ret) goto pinctrl_bind_failed; - ret = dma_configure(dev); - if (ret) - goto dma_failed; + if (dev->bus->dma_configure) { + ret = dev->bus->dma_configure(dev); + if (ret) + goto dma_failed; + } if (driver_sysfs_add(dev)) { printk(KERN_ERR "%s: driver_sysfs_add(%s) failed\n", @@ -537,7 +539,7 @@ re_probe: goto done; probe_failed: - dma_deconfigure(dev); + arch_teardown_dma_ops(dev); dma_failed: if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, @@ -966,7 +968,7 @@ static void __device_release_driver(struct device *dev, struct device *parent) drv->remove(dev); device_links_driver_cleanup(dev); - dma_deconfigure(dev); + arch_teardown_dma_ops(dev); devres_release_all(dev); dev->driver = NULL; diff --git a/drivers/base/platform.c b/drivers/base/platform.c @@ -1180,7 +1180,7 @@ int __init platform_bus_init(void) } #ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK -u64 dma_get_required_mask(struct device *dev) +static u64 dma_default_get_required_mask(struct device *dev) { u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT)); @@ -1198,6 +1198,15 @@ u64 dma_get_required_mask(struct device *dev) } return mask; } + +u64 dma_get_required_mask(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (ops->get_required_mask) + return ops->get_required_mask(dev); + return dma_default_get_required_mask(dev); +} EXPORT_SYMBOL_GPL(dma_get_required_mask); #endif diff --git a/drivers/of/device.c b/drivers/of/device.c @@ -170,18 +170,6 @@ int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma) } EXPORT_SYMBOL_GPL(of_dma_configure); -/** - * of_dma_deconfigure - Clean up DMA configuration - * @dev: Device for which to clean up DMA configuration - * - * Clean up all configuration performed by of_dma_configure_ops() and free all - * resources that have been allocated. - */ -void of_dma_deconfigure(struct device *dev) -{ - arch_teardown_dma_ops(dev); -} - int of_device_register(struct platform_device *pdev) { device_initialize(&pdev->dev); diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c @@ -404,12 +404,10 @@ static int vmd_dma_supported(struct device *dev, u64 mask) return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); } -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK static u64 vmd_get_required_mask(struct device *dev) { return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev)); } -#endif static void vmd_teardown_dma_ops(struct vmd_dev *vmd) { @@ -450,9 +448,7 @@ static void vmd_setup_dma_ops(struct vmd_dev *vmd) ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device); ASSIGN_VMD_DMA_OPS(source, dest, mapping_error); ASSIGN_VMD_DMA_OPS(source, dest, dma_supported); -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask); -#endif add_dma_domain(domain); } #undef ASSIGN_VMD_DMA_OPS diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c @@ -662,7 +662,7 @@ xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, return xen_get_dma_ops(dev)->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); #endif - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); } /* @@ -689,7 +689,7 @@ xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, handle, size, attrs); } #endif - return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size); + return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs); } static int xen_swiotlb_mapping_error(struct device *dev, dma_addr_t dma_addr) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h @@ -595,7 +595,6 @@ enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, u64 *size); int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr); -void acpi_dma_deconfigure(struct device *dev); struct acpi_device *acpi_find_child_device(struct acpi_device *parent, u64 address, bool check_children); diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h @@ -4,16 +4,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - /* - * Use the non-coherent ops if available. If an architecture wants a - * more fine-grained selection of operations it will have to implement - * get_arch_dma_ops itself or use the per-device dma_ops. - */ -#ifdef CONFIG_DMA_NONCOHERENT_OPS - return &dma_noncoherent_ops; -#else return &dma_direct_ops; -#endif } #endif /* _ASM_GENERIC_DMA_MAPPING_H */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h @@ -831,8 +831,6 @@ static inline int acpi_dma_configure(struct device *dev, return 0; } -static inline void acpi_dma_deconfigure(struct device *dev) { } - #define ACPI_PTR(_ptr) (NULL) static inline void acpi_device_set_enumerated(struct acpi_device *adev) diff --git a/include/linux/device.h b/include/linux/device.h @@ -927,6 +927,8 @@ struct dev_links_info { * @offline: Set after successful invocation of bus type's .offline(). * @of_node_reused: Set if the device-tree node is shared with an ancestor * device. + * @dma_coherent: this particular device is dma coherent, even if the + * architecture supports non-coherent devices. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -1016,6 +1018,11 @@ struct device { bool offline_disabled:1; bool offline:1; bool of_node_reused:1; +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) + bool dma_coherent:1; +#endif }; static inline struct device *kobj_to_dev(struct kobject *kobj) diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h @@ -32,6 +32,9 @@ extern void dma_debug_add_bus(struct bus_type *bus); extern int dma_debug_resize_entries(u32 num_entries); +extern void debug_dma_map_single(struct device *dev, const void *addr, + unsigned long len); + extern void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, @@ -103,6 +106,11 @@ static inline int dma_debug_resize_entries(u32 num_entries) return 0; } +static inline void debug_dma_map_single(struct device *dev, const void *addr, + unsigned long len) +{ +} + static inline void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h @@ -27,7 +27,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) if (!dev->dma_mask) return false; - return addr + size - 1 <= *dev->dma_mask; + return addr + size - 1 <= + min_not_zero(*dev->dma_mask, dev->bus_dma_mask); } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ @@ -55,10 +56,15 @@ static inline void dma_mark_clean(void *addr, size_t size) } #endif /* CONFIG_ARCH_HAS_DMA_MARK_CLEAN */ +u64 dma_direct_get_required_mask(struct device *dev); void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +void *dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); +void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs); dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h @@ -130,13 +130,10 @@ struct dma_map_ops { enum dma_data_direction direction); int (*mapping_error)(struct device *dev, dma_addr_t dma_addr); int (*dma_supported)(struct device *dev, u64 mask); -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK u64 (*get_required_mask)(struct device *dev); -#endif }; extern const struct dma_map_ops dma_direct_ops; -extern const struct dma_map_ops dma_noncoherent_ops; extern const struct dma_map_ops dma_virt_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) @@ -232,6 +229,7 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); + debug_dma_map_single(dev, ptr, size); addr = ops->map_page(dev, virt_to_page(ptr), offset_in_page(ptr), size, dir, attrs); @@ -445,7 +443,8 @@ dma_cache_sync(struct device *dev, void *vaddr, size_t size, } extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size); + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); void *dma_common_contiguous_remap(struct page *page, size_t size, unsigned long vm_flags, @@ -477,14 +476,14 @@ dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, BUG_ON(!ops); if (ops->mmap) return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); } #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0) int -dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t dma_addr, size_t size); +dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, + dma_addr_t dma_addr, size_t size, unsigned long attrs); static inline int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, @@ -496,7 +495,8 @@ dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, if (ops->get_sgtable) return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); - return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size); + return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, + attrs); } #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) @@ -558,9 +558,11 @@ static inline void dma_free_attrs(struct device *dev, size_t size, } static inline void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) + dma_addr_t *dma_handle, gfp_t gfp) { - return dma_alloc_attrs(dev, size, dma_handle, flag, 0); + + return dma_alloc_attrs(dev, size, dma_handle, gfp, + (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0); } static inline void dma_free_coherent(struct device *dev, size_t size, @@ -753,18 +755,6 @@ dma_mark_declared_memory_occupied(struct device *dev, } #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ -#ifdef CONFIG_HAS_DMA -int dma_configure(struct device *dev); -void dma_deconfigure(struct device *dev); -#else -static inline int dma_configure(struct device *dev) -{ - return 0; -} - -static inline void dma_deconfigure(struct device *dev) {} -#endif - /* * Managed DMA API */ @@ -806,8 +796,12 @@ static inline void dmam_release_declared_memory(struct device *dev) static inline void *dma_alloc_wc(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t gfp) { - return dma_alloc_attrs(dev, size, dma_addr, gfp, - DMA_ATTR_WRITE_COMBINE); + unsigned long attrs = DMA_ATTR_NO_WARN; + + if (gfp & __GFP_NOWARN) + attrs |= DMA_ATTR_NO_WARN; + + return dma_alloc_attrs(dev, size, dma_addr, gfp, attrs); } #ifndef dma_alloc_writecombine #define dma_alloc_writecombine dma_alloc_wc diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h @@ -4,18 +4,35 @@ #include <linux/dma-mapping.h> +#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H +#include <asm/dma-coherence.h> +#elif defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) +static inline bool dev_is_dma_coherent(struct device *dev) +{ + return dev->dma_coherent; +} +#else +static inline bool dev_is_dma_coherent(struct device *dev) +{ + return true; +} +#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ + void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr); -#ifdef CONFIG_DMA_NONCOHERENT_MMAP -int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, +#ifdef CONFIG_ARCH_HAS_DMA_MMAP_PGPROT +pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs); #else -#define arch_dma_mmap NULL -#endif /* CONFIG_DMA_NONCOHERENT_MMAP */ +# define arch_dma_mmap_pgprot(dev, prot, attrs) pgprot_noncached(prot) +#endif #ifdef CONFIG_DMA_NONCOHERENT_CACHE_SYNC void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, diff --git a/include/linux/of_device.h b/include/linux/of_device.h @@ -58,7 +58,6 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma); -void of_dma_deconfigure(struct device *dev); #else /* CONFIG_OF */ static inline int of_driver_match_device(struct device *dev, @@ -113,8 +112,6 @@ static inline int of_dma_configure(struct device *dev, { return 0; } -static inline void of_dma_deconfigure(struct device *dev) -{} #endif /* CONFIG_OF */ #endif /* _LINUX_OF_DEVICE_H */ diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig @@ -13,6 +13,9 @@ config NEED_DMA_MAP_STATE config ARCH_DMA_ADDR_T_64BIT def_bool 64BIT || PHYS_ADDR_T_64BIT +config ARCH_HAS_DMA_COHERENCE_H + bool + config HAVE_GENERIC_DMA_COHERENT bool @@ -26,22 +29,19 @@ config ARCH_HAS_SYNC_DMA_FOR_CPU config ARCH_HAS_SYNC_DMA_FOR_CPU_ALL bool -config DMA_DIRECT_OPS +config ARCH_HAS_DMA_COHERENT_TO_PFN bool - depends on HAS_DMA -config DMA_NONCOHERENT_OPS +config ARCH_HAS_DMA_MMAP_PGPROT bool - depends on HAS_DMA - select DMA_DIRECT_OPS -config DMA_NONCOHERENT_MMAP +config DMA_DIRECT_OPS bool - depends on DMA_NONCOHERENT_OPS + depends on HAS_DMA config DMA_NONCOHERENT_CACHE_SYNC bool - depends on DMA_NONCOHERENT_OPS + depends on DMA_DIRECT_OPS config DMA_VIRT_OPS bool diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile @@ -4,7 +4,6 @@ obj-$(CONFIG_HAS_DMA) += mapping.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += coherent.o obj-$(CONFIG_DMA_DIRECT_OPS) += direct.o -obj-$(CONFIG_DMA_NONCOHERENT_OPS) += noncoherent.o obj-$(CONFIG_DMA_VIRT_OPS) += virt.o obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c @@ -49,7 +49,11 @@ static phys_addr_t limit_cmdline; static int __init early_cma(char *p) { - pr_debug("%s(%s)\n", __func__, p); + if (!p) { + pr_err("Config string not provided\n"); + return -EINVAL; + } + size_cmdline = memparse(p, &p); if (*p != '@') return 0; diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c @@ -1312,6 +1312,22 @@ static void check_sg_segment(struct device *dev, struct scatterlist *sg) #endif } +void debug_dma_map_single(struct device *dev, const void *addr, + unsigned long len) +{ + if (unlikely(dma_debug_disabled())) + return; + + if (!virt_addr_valid(addr)) + err_printk(dev, NULL, "DMA-API: device driver maps memory from invalid area [addr=%p] [len=%lu]\n", + addr, len); + + if (is_vmalloc_addr(addr)) + err_printk(dev, NULL, "DMA-API: device driver maps memory from vmalloc area [addr=%p] [len=%lu]\n", + addr, len); +} +EXPORT_SYMBOL(debug_dma_map_single); + void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, bool map_single) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c @@ -1,13 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 /* - * DMA operations that map physical memory directly without using an IOMMU or - * flushing caches. + * Copyright (C) 2018 Christoph Hellwig. + * + * DMA operations that map physical memory directly without using an IOMMU. */ +#include <linux/bootmem.h> /* for max_pfn */ #include <linux/export.h> #include <linux/mm.h> #include <linux/dma-direct.h> #include <linux/scatterlist.h> #include <linux/dma-contiguous.h> +#include <linux/dma-noncoherent.h> #include <linux/pfn.h> #include <linux/set_memory.h> @@ -41,40 +44,83 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, return false; } - if (*dev->dma_mask >= DMA_BIT_MASK(32)) { + if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) { dev_err(dev, - "%s: overflow %pad+%zu of device mask %llx\n", - caller, &dma_addr, size, *dev->dma_mask); + "%s: overflow %pad+%zu of device mask %llx bus mask %llx\n", + caller, &dma_addr, size, + *dev->dma_mask, dev->bus_dma_mask); } return false; } return true; } +static inline dma_addr_t phys_to_dma_direct(struct device *dev, + phys_addr_t phys) +{ + if (force_dma_unencrypted()) + return __phys_to_dma(dev, phys); + return phys_to_dma(dev, phys); +} + +u64 dma_direct_get_required_mask(struct device *dev) +{ + u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT); + + if (dev->bus_dma_mask && dev->bus_dma_mask < max_dma) + max_dma = dev->bus_dma_mask; + + return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; +} + +static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, + u64 *phys_mask) +{ + if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask) + dma_mask = dev->bus_dma_mask; + + if (force_dma_unencrypted()) + *phys_mask = __dma_to_phys(dev, dma_mask); + else + *phys_mask = dma_to_phys(dev, dma_mask); + + /* + * Optimistically try the zone that the physical address mask falls + * into first. If that returns memory that isn't actually addressable + * we will fallback to the next lower zone and try again. + * + * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding + * zones. + */ + if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) + return GFP_DMA; + if (*phys_mask <= DMA_BIT_MASK(32)) + return GFP_DMA32; + return 0; +} + static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { - dma_addr_t addr = force_dma_unencrypted() ? - __phys_to_dma(dev, phys) : phys_to_dma(dev, phys); - return addr + size - 1 <= dev->coherent_dma_mask; + return phys_to_dma_direct(dev, phys) + size - 1 <= + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask); } -void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) +void *dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; int page_order = get_order(size); struct page *page = NULL; + u64 phys_mask; void *ret; + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; + /* we always manually zero the memory once we are done: */ gfp &= ~__GFP_ZERO; - - /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */ - if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) - gfp |= GFP_DMA; - if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) - gfp |= GFP_DMA32; - + gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, + &phys_mask); again: /* CMA can be used only in the context which permits sleeping */ if (gfpflags_allow_blocking(gfp)) { @@ -93,15 +139,14 @@ again: page = NULL; if (IS_ENABLED(CONFIG_ZONE_DMA32) && - dev->coherent_dma_mask < DMA_BIT_MASK(64) && + phys_mask < DMA_BIT_MASK(64) && !(gfp & (GFP_DMA32 | GFP_DMA))) { gfp |= GFP_DMA32; goto again; } if (IS_ENABLED(CONFIG_ZONE_DMA) && - dev->coherent_dma_mask < DMA_BIT_MASK(32) && - !(gfp & GFP_DMA)) { + phys_mask < DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) { gfp = (gfp & ~GFP_DMA32) | GFP_DMA; goto again; } @@ -124,7 +169,7 @@ again: * NOTE: this function must never look at the dma_addr argument, because we want * to be able to use it as a helper for iommu implementations as well. */ -void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, +void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -136,14 +181,96 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, free_pages((unsigned long)cpu_addr, page_order); } +void *dma_direct_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +{ + if (!dev_is_dma_coherent(dev)) + return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); + return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); +} + +void dma_direct_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) +{ + if (!dev_is_dma_coherent(dev)) + arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); + else + dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); +} + +static void dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + if (dev_is_dma_coherent(dev)) + return; + arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); +} + +static void dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + if (dev_is_dma_coherent(dev)) + return; + + for_each_sg(sgl, sg, nents, i) + arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); +} + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) +static void dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + if (dev_is_dma_coherent(dev)) + return; + arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir); + arch_sync_dma_for_cpu_all(dev); +} + +static void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + if (dev_is_dma_coherent(dev)) + return; + + for_each_sg(sgl, sg, nents, i) + arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); + arch_sync_dma_for_cpu_all(dev); +} + +static void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_direct_sync_single_for_cpu(dev, addr, size, dir); +} + +static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir); +} +#endif + dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { - dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset; + phys_addr_t phys = page_to_phys(page) + offset; + dma_addr_t dma_addr = phys_to_dma(dev, phys); if (!check_addr(dev, dma_addr, size, __func__)) return DIRECT_MAPPING_ERROR; + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_direct_sync_single_for_device(dev, dma_addr, size, dir); return dma_addr; } @@ -162,31 +289,29 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, sg_dma_len(sg) = sg->length; } + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_direct_sync_sg_for_device(dev, sgl, nents, dir); return nents; } +/* + * Because 32-bit DMA masks are so common we expect every architecture to be + * able to satisfy them - either by not supporting more physical memory, or by + * providing a ZONE_DMA32. If neither is the case, the architecture needs to + * use an IOMMU instead of the direct mapping. + */ int dma_direct_supported(struct device *dev, u64 mask) { -#ifdef CONFIG_ZONE_DMA - if (mask < phys_to_dma(dev, DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))) - return 0; -#else - /* - * Because 32-bit DMA masks are so common we expect every architecture - * to be able to satisfy them - either by not supporting more physical - * memory, or by providing a ZONE_DMA32. If neither is the case, the - * architecture needs to use an IOMMU instead of the direct mapping. - */ - if (mask < phys_to_dma(dev, DMA_BIT_MASK(32))) - return 0; -#endif - /* - * Upstream PCI/PCIe bridges or SoC interconnects may not carry - * as many DMA address bits as the device itself supports. - */ - if (dev->bus_dma_mask && mask > dev->bus_dma_mask) - return 0; - return 1; + u64 min_mask; + + if (IS_ENABLED(CONFIG_ZONE_DMA)) + min_mask = DMA_BIT_MASK(ARCH_ZONE_DMA_BITS); + else + min_mask = DMA_BIT_MASK(32); + + min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT); + + return mask >= phys_to_dma(dev, min_mask); } int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -199,7 +324,20 @@ const struct dma_map_ops dma_direct_ops = { .free = dma_direct_free, .map_page = dma_direct_map_page, .map_sg = dma_direct_map_sg, +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) + .sync_single_for_device = dma_direct_sync_single_for_device, + .sync_sg_for_device = dma_direct_sync_sg_for_device, +#endif +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) + .sync_single_for_cpu = dma_direct_sync_single_for_cpu, + .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu, + .unmap_page = dma_direct_unmap_page, + .unmap_sg = dma_direct_unmap_sg, +#endif + .get_required_mask = dma_direct_get_required_mask, .dma_supported = dma_direct_supported, .mapping_error = dma_direct_mapping_error, + .cache_sync = arch_dma_cache_sync, }; EXPORT_SYMBOL(dma_direct_ops); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c @@ -7,7 +7,7 @@ */ #include <linux/acpi.h> -#include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> #include <linux/export.h> #include <linux/gfp.h> #include <linux/of_device.h> @@ -202,17 +202,26 @@ EXPORT_SYMBOL(dmam_release_declared_memory); * Create scatter-list for the already allocated DMA buffer. */ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t handle, size_t size) + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) { - struct page *page = virt_to_page(cpu_addr); + struct page *page; int ret; - ret = sg_alloc_table(sgt, 1, GFP_KERNEL); - if (unlikely(ret)) - return ret; + if (!dev_is_dma_coherent(dev)) { + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN)) + return -ENXIO; - sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); - return 0; + page = pfn_to_page(arch_dma_coherent_to_pfn(dev, cpu_addr, + dma_addr)); + } else { + page = virt_to_page(cpu_addr); + } + + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (!ret) + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + return ret; } EXPORT_SYMBOL(dma_common_get_sgtable); @@ -220,27 +229,37 @@ EXPORT_SYMBOL(dma_common_get_sgtable); * Create userspace mapping for the DMA-coherent memory. */ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) { - int ret = -ENXIO; #ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP unsigned long user_count = vma_pages(vma); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long off = vma->vm_pgoff; + unsigned long pfn; + int ret = -ENXIO; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; - if (off < count && user_count <= (count - off)) - ret = remap_pfn_range(vma, vma->vm_start, - page_to_pfn(virt_to_page(cpu_addr)) + off, - user_count << PAGE_SHIFT, - vma->vm_page_prot); -#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */ + if (off >= count || user_count > count - off) + return -ENXIO; - return ret; + if (!dev_is_dma_coherent(dev)) { + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN)) + return -ENXIO; + pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr); + } else { + pfn = page_to_pfn(virt_to_page(cpu_addr)); + } + + return remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, + user_count << PAGE_SHIFT, vma->vm_page_prot); +#else + return -ENXIO; +#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */ } EXPORT_SYMBOL(dma_common_mmap); @@ -327,19 +346,3 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) vunmap(cpu_addr); } #endif - -/* - * enables DMA API use for a device - */ -int dma_configure(struct device *dev) -{ - if (dev->bus->dma_configure) - return dev->bus->dma_configure(dev); - return 0; -} - -void dma_deconfigure(struct device *dev) -{ - of_dma_deconfigure(dev); - acpi_dma_deconfigure(dev); -} diff --git a/kernel/dma/noncoherent.c b/kernel/dma/noncoherent.c @@ -1,106 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2018 Christoph Hellwig. - * - * DMA operations that map physical memory directly without providing cache - * coherence. - */ -#include <linux/export.h> -#include <linux/mm.h> -#include <linux/dma-direct.h> -#include <linux/dma-noncoherent.h> -#include <linux/scatterlist.h> - -static void dma_noncoherent_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); -} - -static void dma_noncoherent_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); -} - -static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t addr; - - addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); - if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(dev, page_to_phys(page) + offset, - size, dir); - return addr; -} - -static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs); - if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir); - return nents; -} - -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) -static void dma_noncoherent_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir); - arch_sync_dma_for_cpu_all(dev); -} - -static void dma_noncoherent_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); - arch_sync_dma_for_cpu_all(dev); -} - -static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir); -} - -static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir); -} -#endif - -const struct dma_map_ops dma_noncoherent_ops = { - .alloc = arch_dma_alloc, - .free = arch_dma_free, - .mmap = arch_dma_mmap, - .sync_single_for_device = dma_noncoherent_sync_single_for_device, - .sync_sg_for_device = dma_noncoherent_sync_sg_for_device, - .map_page = dma_noncoherent_map_page, - .map_sg = dma_noncoherent_map_sg, -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) - .sync_single_for_cpu = dma_noncoherent_sync_single_for_cpu, - .sync_sg_for_cpu = dma_noncoherent_sync_sg_for_cpu, - .unmap_page = dma_noncoherent_unmap_page, - .unmap_sg = dma_noncoherent_unmap_sg, -#endif - .dma_supported = dma_direct_supported, - .mapping_error = dma_direct_mapping_error, - .cache_sync = arch_dma_cache_sync, -}; -EXPORT_SYMBOL(dma_noncoherent_ops);