whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit d1f2b1710d92a80d60351503bbf41cdac95ed7a8
parent 18d0eae30e6a4f8644d589243d7ac1d70d29203d
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Fri, 26 Oct 2018 10:50:10 -0700

Merge tag 'iommu-updates-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull IOMMU updates from Joerg Roedel:

 - Debugfs support for the Intel VT-d driver.

   When enabled, it now also exposes some of its internal data
   structures to user-space for debugging purposes.

 - ARM-SMMU driver now uses the generic deferred flushing and fast-path
   iova allocation code.

   This is expected to be a major performance improvement, as this
   allocation path scales a lot better.

 - Support for r8a7744 in the Renesas iommu driver

 - Couple of minor fixes and improvements all over the place

* tag 'iommu-updates-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (39 commits)
  iommu/arm-smmu-v3: Remove unnecessary wrapper function
  iommu/arm-smmu-v3: Add SPDX header
  iommu/amd: Add default branch in amd_iommu_capable()
  dt-bindings: iommu: ipmmu-vmsa: Add r8a7744 support
  iommu/amd: Move iommu_init_pci() to .init section
  iommu/arm-smmu: Support non-strict mode
  iommu/io-pgtable-arm-v7s: Add support for non-strict mode
  iommu/arm-smmu-v3: Add support for non-strict mode
  iommu/io-pgtable-arm: Add support for non-strict mode
  iommu: Add "iommu.strict" command line option
  iommu/dma: Add support for non-strict mode
  iommu/arm-smmu: Ensure that page-table updates are visible before TLBI
  iommu/arm-smmu-v3: Implement flush_iotlb_all hook
  iommu/arm-smmu-v3: Avoid back-to-back CMD_SYNC operations
  iommu/arm-smmu-v3: Fix unexpected CMD_SYNC timeout
  iommu/io-pgtable-arm: Fix race handling in split_blk_unmap()
  iommu/arm-smmu-v3: Fix a couple of minor comment typos
  iommu: Fix a typo
  iommu: Remove .domain_{get,set}_windows
  iommu: Tidy up window attributes
  ...

Diffstat:
MDocumentation/admin-guide/kernel-parameters.txt | 12++++++++++++
MDocumentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt | 1+
MDocumentation/devicetree/bindings/misc/fsl,qoriq-mc.txt | 39+++++++++++++++++++++++++++++++++++++++
March/arm64/boot/dts/freescale/fsl-ls208xa.dtsi | 7++++++-
March/arm64/mm/dma-mapping.c | 10+++++-----
March/x86/include/asm/irq_remapping.h | 2++
Mdrivers/bus/fsl-mc/fsl-mc-bus.c | 16++++++++++++----
Mdrivers/iommu/Kconfig | 13+++++++++++++
Mdrivers/iommu/Makefile | 1+
Mdrivers/iommu/amd_iommu.c | 2++
Mdrivers/iommu/amd_iommu_init.c | 2+-
Mdrivers/iommu/arm-smmu-v3.c | 140+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
Mdrivers/iommu/arm-smmu.c | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
Mdrivers/iommu/dma-iommu.c | 55+++++++++++++++++++++++++++++++++++++++++++------------
Mdrivers/iommu/fsl_pamu_domain.c | 119+++++++++++++++++++++++++++++++++++++++----------------------------------------
Adrivers/iommu/intel-iommu-debugfs.c | 314+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdrivers/iommu/intel-iommu.c | 32++++----------------------------
Mdrivers/iommu/intel_irq_remapping.c | 2+-
Mdrivers/iommu/io-pgtable-arm-v7s.c | 11++++++++++-
Mdrivers/iommu/io-pgtable-arm.c | 23++++++++++++++++-------
Mdrivers/iommu/io-pgtable.h | 5+++++
Mdrivers/iommu/iommu.c | 58+++++++++++++++++++++++++++++++++++++---------------------
Mdrivers/iommu/iova.c | 22+++++++++++++++-------
Mdrivers/iommu/ipmmu-vmsa.c | 5+----
Mdrivers/iommu/of_iommu.c | 25++++++++++++++++++++++---
Mdrivers/of/base.c | 102+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdrivers/of/irq.c | 5++---
Mdrivers/pci/of.c | 101-------------------------------------------------------------------------------
Minclude/linux/fsl/mc.h | 8++++++++
Minclude/linux/intel-iommu.h | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/linux/iommu.h | 10++++------
Minclude/linux/iova.h | 1+
Minclude/linux/of.h | 11+++++++++++
Minclude/linux/of_pci.h | 10----------
34 files changed, 986 insertions(+), 356 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt @@ -1759,6 +1759,18 @@ nobypass [PPC/POWERNV] Disable IOMMU bypass, using IOMMU for PCI devices. + iommu.strict= [ARM64] Configure TLB invalidation behaviour + Format: { "0" | "1" } + 0 - Lazy mode. + Request that DMA unmap operations use deferred + invalidation of hardware TLBs, for increased + throughput at the cost of reduced device isolation. + Will fall back to strict mode if not supported by + the relevant IOMMU driver. + 1 - Strict mode (default). + DMA unmap operations invalidate IOMMU hardware TLBs + synchronously. + iommu.passthrough= [ARM64] Configure DMA to bypass the IOMMU by default. Format: { "0" | "1" } diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt @@ -12,6 +12,7 @@ Required Properties: - "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU. - "renesas,ipmmu-r8a7743" for the R8A7743 (RZ/G1M) IPMMU. + - "renesas,ipmmu-r8a7744" for the R8A7744 (RZ/G1N) IPMMU. - "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU. - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU. - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU. diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt @@ -9,6 +9,25 @@ blocks that can be used to create functional hardware objects/devices such as network interfaces, crypto accelerator instances, L2 switches, etc. +For an overview of the DPAA2 architecture and fsl-mc bus see: +Documentation/networking/dpaa2/overview.rst + +As described in the above overview, all DPAA2 objects in a DPRC share the +same hardware "isolation context" and a 10-bit value called an ICID +(isolation context id) is expressed by the hardware to identify +the requester. + +The generic 'iommus' property is insufficient to describe the relationship +between ICIDs and IOMMUs, so an iommu-map property is used to define +the set of possible ICIDs under a root DPRC and how they map to +an IOMMU. + +For generic IOMMU bindings, see +Documentation/devicetree/bindings/iommu/iommu.txt. + +For arm-smmu binding, see: +Documentation/devicetree/bindings/iommu/arm,smmu.txt. + Required properties: - compatible @@ -88,14 +107,34 @@ Sub-nodes: Value type: <phandle> Definition: Specifies the phandle to the PHY device node associated with the this dpmac. +Optional properties: + +- iommu-map: Maps an ICID to an IOMMU and associated iommu-specifier + data. + + The property is an arbitrary number of tuples of + (icid-base,iommu,iommu-base,length). + + Any ICID i in the interval [icid-base, icid-base + length) is + associated with the listed IOMMU, with the iommu-specifier + (i - icid-base + iommu-base). Example: + smmu: iommu@5000000 { + compatible = "arm,mmu-500"; + #iommu-cells = <1>; + stream-match-mask = <0x7C00>; + ... + }; + fsl_mc: fsl-mc@80c000000 { compatible = "fsl,qoriq-mc"; reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */ <0x00000000 0x08340000 0 0x40000>; /* MC control reg */ msi-parent = <&its>; + /* define map for ICIDs 23-64 */ + iommu-map = <23 &smmu 23 41>; #address-cells = <3>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -148,6 +148,7 @@ #address-cells = <2>; #size-cells = <2>; ranges; + dma-ranges = <0x0 0x0 0x0 0x0 0x10000 0x00000000>; clockgen: clocking@1300000 { compatible = "fsl,ls2080a-clockgen"; @@ -321,6 +322,8 @@ reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */ <0x00000000 0x08340000 0 0x40000>; /* MC control reg */ msi-parent = <&its>; + iommu-map = <0 &smmu 0 0>; /* This is fixed-up by u-boot */ + dma-coherent; #address-cells = <3>; #size-cells = <1>; @@ -424,6 +427,9 @@ compatible = "arm,mmu-500"; reg = <0 0x5000000 0 0x800000>; #global-interrupts = <12>; + #iommu-cells = <1>; + stream-match-mask = <0x7C00>; + dma-coherent; interrupts = <0 13 4>, /* global secure fault */ <0 14 4>, /* combined secure interrupt */ <0 15 4>, /* global non-secure fault */ @@ -466,7 +472,6 @@ <0 204 4>, <0 205 4>, <0 206 4>, <0 207 4>, <0 208 4>, <0 209 4>; - mmu-masters = <&fsl_mc 0x300 0>; }; dspi: dspi@2100000 { diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c @@ -712,7 +712,7 @@ static void __iommu_sync_single_for_cpu(struct device *dev, if (is_device_dma_coherent(dev)) return; - phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); __dma_unmap_area(phys_to_virt(phys), size, dir); } @@ -725,7 +725,7 @@ static void __iommu_sync_single_for_device(struct device *dev, if (is_device_dma_coherent(dev)) return; - phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); __dma_map_area(phys_to_virt(phys), size, dir); } @@ -738,9 +738,9 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, int prot = dma_info_to_prot(dir, coherent, attrs); dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); - if (!iommu_dma_mapping_error(dev, dev_addr) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __iommu_sync_single_for_device(dev, dev_addr, size, dir); + if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + !iommu_dma_mapping_error(dev, dev_addr)) + __dma_map_area(page_address(page) + offset, size, dir); return dev_addr; } diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h @@ -45,6 +45,8 @@ struct vcpu_data { #ifdef CONFIG_IRQ_REMAP +extern raw_spinlock_t irq_2_ir_lock; + extern bool irq_remapping_cap(enum irq_remap_cap cap); extern void set_irq_remapping_broken(void); extern int irq_remapping_prepare(void); diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -127,6 +127,16 @@ static int fsl_mc_bus_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +static int fsl_mc_dma_configure(struct device *dev) +{ + struct device *dma_dev = dev; + + while (dev_is_fsl_mc(dma_dev)) + dma_dev = dma_dev->parent; + + return of_dma_configure(dev, dma_dev->of_node, 0); +} + static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -148,6 +158,7 @@ struct bus_type fsl_mc_bus_type = { .name = "fsl-mc", .match = fsl_mc_bus_match, .uevent = fsl_mc_bus_uevent, + .dma_configure = fsl_mc_dma_configure, .dev_groups = fsl_mc_dev_groups, }; EXPORT_SYMBOL_GPL(fsl_mc_bus_type); @@ -621,6 +632,7 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc, mc_dev->icid = parent_mc_dev->icid; mc_dev->dma_mask = FSL_MC_DEFAULT_DMA_MASK; mc_dev->dev.dma_mask = &mc_dev->dma_mask; + mc_dev->dev.coherent_dma_mask = mc_dev->dma_mask; dev_set_msi_domain(&mc_dev->dev, dev_get_msi_domain(&parent_mc_dev->dev)); } @@ -638,10 +650,6 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc, goto error_cleanup_dev; } - /* Objects are coherent, unless 'no shareability' flag set. */ - if (!(obj_desc->flags & FSL_MC_OBJ_FLAG_NO_MEM_SHAREABILITY)) - arch_setup_dma_ops(&mc_dev->dev, 0, 0, NULL, true); - /* * The device-specific probe callback will get invoked by device_add() */ diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig @@ -186,6 +186,19 @@ config INTEL_IOMMU and include PCI device scope covered by these DMA remapping devices. +config INTEL_IOMMU_DEBUGFS + bool "Export Intel IOMMU internals in Debugfs" + depends on INTEL_IOMMU && IOMMU_DEBUGFS + help + !!!WARNING!!! + + DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!!! + + Expose Intel IOMMU internals in Debugfs. + + This option is -NOT- intended for production environments, and should + only be enabled for debugging Intel IOMMU. + config INTEL_IOMMU_SVM bool "Support for Shared Virtual Memory with Intel IOMMU" depends on INTEL_IOMMU && X86 diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o +obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += intel-iommu-debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c @@ -3083,6 +3083,8 @@ static bool amd_iommu_capable(enum iommu_cap cap) return (irq_remapping_enabled == 1); case IOMMU_CAP_NOEXEC: return false; + default: + break; } return false; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c @@ -1719,7 +1719,7 @@ static const struct attribute_group *amd_iommu_groups[] = { NULL, }; -static int iommu_init_pci(struct amd_iommu *iommu) +static int __init iommu_init_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; u32 range, misc, low, high; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IOMMU API for ARM architected SMMUv3 implementations. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * * Copyright (C) 2015 ARM Limited * * Author: Will Deacon <will.deacon@arm.com> @@ -567,7 +556,8 @@ struct arm_smmu_device { int gerr_irq; int combined_irq; - atomic_t sync_nr; + u32 sync_nr; + u8 prev_cmd_opcode; unsigned long ias; /* IPA */ unsigned long oas; /* PA */ @@ -611,6 +601,7 @@ struct arm_smmu_domain { struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; + bool non_strict; enum arm_smmu_domain_stage stage; union { @@ -708,7 +699,7 @@ static void queue_inc_prod(struct arm_smmu_queue *q) } /* - * Wait for the SMMU to consume items. If drain is true, wait until the queue + * Wait for the SMMU to consume items. If sync is true, wait until the queue * is empty. Otherwise, wait until there is at least one free slot. */ static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe) @@ -901,6 +892,8 @@ static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd) struct arm_smmu_queue *q = &smmu->cmdq.q; bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]); + while (queue_insert_raw(q, cmd) == -ENOSPC) { if (queue_poll_cons(q, false, wfe)) dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); @@ -948,15 +941,21 @@ static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu) struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC, .sync = { - .msidata = atomic_inc_return_relaxed(&smmu->sync_nr), .msiaddr = virt_to_phys(&smmu->sync_count), }, }; - arm_smmu_cmdq_build_cmd(cmd, &ent); - spin_lock_irqsave(&smmu->cmdq.lock, flags); - arm_smmu_cmdq_insert_cmd(smmu, cmd); + + /* Piggy-back on the previous command if it's a SYNC */ + if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) { + ent.sync.msidata = smmu->sync_nr; + } else { + ent.sync.msidata = ++smmu->sync_nr; + arm_smmu_cmdq_build_cmd(cmd, &ent); + arm_smmu_cmdq_insert_cmd(smmu, cmd); + } + spin_unlock_irqrestore(&smmu->cmdq.lock, flags); return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata); @@ -1372,15 +1371,11 @@ static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) } /* IO_PGTABLE API */ -static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) -{ - arm_smmu_cmdq_issue_sync(smmu); -} - static void arm_smmu_tlb_sync(void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; - __arm_smmu_tlb_sync(smmu_domain->smmu); + + arm_smmu_cmdq_issue_sync(smmu_domain->smmu); } static void arm_smmu_tlb_inv_context(void *cookie) @@ -1398,8 +1393,14 @@ static void arm_smmu_tlb_inv_context(void *cookie) cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; } + /* + * NOTE: when io-pgtable is in non-strict mode, we may get here with + * PTEs previously cleared by unmaps on the current CPU not yet visible + * to the SMMU. We are relying on the DSB implicit in queue_inc_prod() + * to guarantee those are observed before the TLBI. Do be careful, 007. + */ arm_smmu_cmdq_issue_cmd(smmu, &cmd); - __arm_smmu_tlb_sync(smmu); + arm_smmu_cmdq_issue_sync(smmu); } static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, @@ -1624,6 +1625,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) if (smmu->features & ARM_SMMU_FEAT_COHERENCY) pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; + if (smmu_domain->non_strict) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; + pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) return -ENOMEM; @@ -1772,12 +1776,20 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) return ops->unmap(ops, iova, size); } +static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + if (smmu_domain->smmu) + arm_smmu_tlb_inv_context(smmu_domain); +} + static void arm_smmu_iotlb_sync(struct iommu_domain *domain) { struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; if (smmu) - __arm_smmu_tlb_sync(smmu); + arm_smmu_cmdq_issue_sync(smmu); } static phys_addr_t @@ -1917,15 +1929,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - - switch (attr) { - case DOMAIN_ATTR_NESTING: - *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); - return 0; + switch (domain->type) { + case IOMMU_DOMAIN_UNMANAGED: + switch (attr) { + case DOMAIN_ATTR_NESTING: + *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); + return 0; + default: + return -ENODEV; + } + break; + case IOMMU_DOMAIN_DMA: + switch (attr) { + case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: + *(int *)data = smmu_domain->non_strict; + return 0; + default: + return -ENODEV; + } + break; default: - return -ENODEV; + return -EINVAL; } } @@ -1935,26 +1959,37 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, int ret = 0; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - mutex_lock(&smmu_domain->init_mutex); - switch (attr) { - case DOMAIN_ATTR_NESTING: - if (smmu_domain->smmu) { - ret = -EPERM; - goto out_unlock; + switch (domain->type) { + case IOMMU_DOMAIN_UNMANAGED: + switch (attr) { + case DOMAIN_ATTR_NESTING: + if (smmu_domain->smmu) { + ret = -EPERM; + goto out_unlock; + } + + if (*(int *)data) + smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; + else + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + break; + default: + ret = -ENODEV; + } + break; + case IOMMU_DOMAIN_DMA: + switch(attr) { + case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: + smmu_domain->non_strict = *(int *)data; + break; + default: + ret = -ENODEV; } - - if (*(int *)data) - smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_S1; - break; default: - ret = -ENODEV; + ret = -EINVAL; } out_unlock: @@ -1999,7 +2034,7 @@ static struct iommu_ops arm_smmu_ops = { .attach_dev = arm_smmu_attach_dev, .map = arm_smmu_map, .unmap = arm_smmu_unmap, - .flush_iotlb_all = arm_smmu_iotlb_sync, + .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, @@ -2180,7 +2215,6 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) { int ret; - atomic_set(&smmu->sync_nr, 0); ret = arm_smmu_init_queues(smmu); if (ret) return ret; @@ -2353,8 +2387,8 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) irq = smmu->combined_irq; if (irq) { /* - * Cavium ThunderX2 implementation doesn't not support unique - * irq lines. Use single irq line for all the SMMUv3 interrupts. + * Cavium ThunderX2 implementation doesn't support unique irq + * lines. Use a single irq line for all the SMMUv3 interrupts. */ ret = devm_request_threaded_irq(smmu->dev, irq, arm_smmu_combined_irq_handler, diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c @@ -52,6 +52,7 @@ #include <linux/spinlock.h> #include <linux/amba/bus.h> +#include <linux/fsl/mc.h> #include "io-pgtable.h" #include "arm-smmu-regs.h" @@ -246,6 +247,7 @@ struct arm_smmu_domain { const struct iommu_gather_ops *tlb_ops; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; + bool non_strict; struct mutex init_mutex; /* Protects smmu pointer */ spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ struct iommu_domain domain; @@ -447,7 +449,11 @@ static void arm_smmu_tlb_inv_context_s1(void *cookie) struct arm_smmu_cfg *cfg = &smmu_domain->cfg; void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); - writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); + /* + * NOTE: this is not a relaxed write; it needs to guarantee that PTEs + * cleared by the current CPU are visible to the SMMU before the TLBI. + */ + writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); arm_smmu_tlb_sync_context(cookie); } @@ -457,7 +463,8 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie) struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *base = ARM_SMMU_GR0(smmu); - writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); + /* NOTE: see above */ + writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); arm_smmu_tlb_sync_global(smmu); } @@ -469,6 +476,9 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + wmb(); + if (stage1) { reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; @@ -510,6 +520,9 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, struct arm_smmu_domain *smmu_domain = cookie; void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu); + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + wmb(); + writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); } @@ -863,6 +876,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; + if (smmu_domain->non_strict) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; + smmu_domain->smmu = smmu; pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) { @@ -1252,6 +1268,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, return ops->unmap(ops, iova, size); } +static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + if (smmu_domain->tlb_ops) + smmu_domain->tlb_ops->tlb_flush_all(smmu_domain); +} + static void arm_smmu_iotlb_sync(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -1459,6 +1483,8 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) if (dev_is_pci(dev)) group = pci_device_group(dev); + else if (dev_is_fsl_mc(dev)) + group = fsl_mc_device_group(dev); else group = generic_device_group(dev); @@ -1470,15 +1496,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - - switch (attr) { - case DOMAIN_ATTR_NESTING: - *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); - return 0; + switch(domain->type) { + case IOMMU_DOMAIN_UNMANAGED: + switch (attr) { + case DOMAIN_ATTR_NESTING: + *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); + return 0; + default: + return -ENODEV; + } + break; + case IOMMU_DOMAIN_DMA: + switch (attr) { + case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: + *(int *)data = smmu_domain->non_strict; + return 0; + default: + return -ENODEV; + } + break; default: - return -ENODEV; + return -EINVAL; } } @@ -1488,28 +1526,38 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, int ret = 0; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - mutex_lock(&smmu_domain->init_mutex); - switch (attr) { - case DOMAIN_ATTR_NESTING: - if (smmu_domain->smmu) { - ret = -EPERM; - goto out_unlock; + switch(domain->type) { + case IOMMU_DOMAIN_UNMANAGED: + switch (attr) { + case DOMAIN_ATTR_NESTING: + if (smmu_domain->smmu) { + ret = -EPERM; + goto out_unlock; + } + + if (*(int *)data) + smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; + else + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + break; + default: + ret = -ENODEV; + } + break; + case IOMMU_DOMAIN_DMA: + switch (attr) { + case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: + smmu_domain->non_strict = *(int *)data; + break; + default: + ret = -ENODEV; } - - if (*(int *)data) - smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_S1; - break; default: - ret = -ENODEV; + ret = -EINVAL; } - out_unlock: mutex_unlock(&smmu_domain->init_mutex); return ret; @@ -1562,7 +1610,7 @@ static struct iommu_ops arm_smmu_ops = { .attach_dev = arm_smmu_attach_dev, .map = arm_smmu_map, .unmap = arm_smmu_unmap, - .flush_iotlb_all = arm_smmu_iotlb_sync, + .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, @@ -2036,6 +2084,10 @@ static void arm_smmu_bus_init(void) bus_set_iommu(&pci_bus_type, &arm_smmu_ops); } #endif +#ifdef CONFIG_FSL_MC_BUS + if (!iommu_present(&fsl_mc_bus_type)) + bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops); +#endif } static int arm_smmu_device_probe(struct platform_device *pdev) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c @@ -55,6 +55,9 @@ struct iommu_dma_cookie { }; struct list_head msi_page_list; spinlock_t msi_lock; + + /* Domain for flush queue callback; NULL if flush queue not in use */ + struct iommu_domain *fq_domain; }; static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) @@ -257,6 +260,20 @@ static int iova_reserve_iommu_regions(struct device *dev, return ret; } +static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad) +{ + struct iommu_dma_cookie *cookie; + struct iommu_domain *domain; + + cookie = container_of(iovad, struct iommu_dma_cookie, iovad); + domain = cookie->fq_domain; + /* + * The IOMMU driver supporting DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE + * implies that ops->flush_iotlb_all must be non-NULL. + */ + domain->ops->flush_iotlb_all(domain); +} + /** * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() @@ -275,6 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; unsigned long order, base_pfn, end_pfn; + int attr; if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) return -EINVAL; @@ -308,6 +326,13 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, } init_iova_domain(iovad, 1UL << order, base_pfn); + + if (!cookie->fq_domain && !iommu_domain_get_attr(domain, + DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) { + cookie->fq_domain = domain; + init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, NULL); + } + if (!dev) return 0; @@ -393,6 +418,9 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, /* The MSI case is only ever cleaning up its most recent allocation */ if (cookie->type == IOMMU_DMA_MSI_COOKIE) cookie->msi_iova -= size; + else if (cookie->fq_domain) /* non-strict mode */ + queue_iova(iovad, iova_pfn(iovad, iova), + size >> iova_shift(iovad), 0); else free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); @@ -408,7 +436,9 @@ static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr, dma_addr -= iova_off; size = iova_align(iovad, size + iova_off); - WARN_ON(iommu_unmap(domain, dma_addr, size) != size); + WARN_ON(iommu_unmap_fast(domain, dma_addr, size) != size); + if (!cookie->fq_domain) + iommu_tlb_sync(domain); iommu_dma_free_iova(cookie, dma_addr, size); } @@ -491,7 +521,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, void iommu_dma_free(struct device *dev, struct page **pages, size_t size, dma_addr_t *handle) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size); + __iommu_dma_unmap(iommu_get_dma_domain(dev), *handle, size); __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); *handle = IOMMU_MAPPING_ERROR; } @@ -518,7 +548,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, unsigned long attrs, int prot, dma_addr_t *handle, void (*flush_page)(struct device *, const void *, phys_addr_t)) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; struct page **pages; @@ -606,9 +636,8 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma) } static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, - size_t size, int prot) + size_t size, int prot, struct iommu_domain *domain) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; size_t iova_off = 0; dma_addr_t iova; @@ -632,13 +661,14 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, int prot) { - return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot); + return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot, + iommu_get_dma_domain(dev)); } void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); + __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); } /* @@ -726,7 +756,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int prot) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; struct scatterlist *s, *prev = NULL; @@ -811,20 +841,21 @@ void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, sg = tmp; } end = sg_dma_address(sg) + sg_dma_len(sg); - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start); + __iommu_dma_unmap(iommu_get_dma_domain(dev), start, end - start); } dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs) { return __iommu_dma_map(dev, phys, size, - dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO); + dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO, + iommu_get_dma_domain(dev)); } void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); + __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); } int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -850,7 +881,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, if (!msi_page) return NULL; - iova = __iommu_dma_map(dev, msi_addr, size, prot); + iova = __iommu_dma_map(dev, msi_addr, size, prot, domain); if (iommu_dma_mapping_error(dev, iova)) goto out_free_page; diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c @@ -814,6 +814,55 @@ static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool en return 0; } +static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count) +{ + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + /* Ensure domain is inactive i.e. DMA should be disabled for the domain */ + if (dma_domain->enabled) { + pr_debug("Can't set geometry attributes as domain is active\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EBUSY; + } + + /* Ensure that the geometry has been set for the domain */ + if (!dma_domain->geom_size) { + pr_debug("Please configure geometry before setting the number of windows\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + /* + * Ensure we have valid window count i.e. it should be less than + * maximum permissible limit and should be a power of two. + */ + if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) { + pr_debug("Invalid window count\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + ret = pamu_set_domain_geometry(dma_domain, &domain->geometry, + w_count > 1 ? w_count : 0); + if (!ret) { + kfree(dma_domain->win_arr); + dma_domain->win_arr = kcalloc(w_count, + sizeof(*dma_domain->win_arr), + GFP_ATOMIC); + if (!dma_domain->win_arr) { + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -ENOMEM; + } + dma_domain->win_cnt = w_count; + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + static int fsl_pamu_set_domain_attr(struct iommu_domain *domain, enum iommu_attr attr_type, void *data) { @@ -830,6 +879,9 @@ static int fsl_pamu_set_domain_attr(struct iommu_domain *domain, case DOMAIN_ATTR_FSL_PAMU_ENABLE: ret = configure_domain_dma_state(dma_domain, *(int *)data); break; + case DOMAIN_ATTR_WINDOWS: + ret = fsl_pamu_set_windows(domain, *(u32 *)data); + break; default: pr_debug("Unsupported attribute type\n"); ret = -EINVAL; @@ -856,6 +908,9 @@ static int fsl_pamu_get_domain_attr(struct iommu_domain *domain, case DOMAIN_ATTR_FSL_PAMUV1: *(int *)data = DOMAIN_ATTR_FSL_PAMUV1; break; + case DOMAIN_ATTR_WINDOWS: + *(u32 *)data = dma_domain->win_cnt; + break; default: pr_debug("Unsupported attribute type\n"); ret = -EINVAL; @@ -916,13 +971,13 @@ static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev) static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) { struct pci_controller *pci_ctl; - bool pci_endpt_partioning; + bool pci_endpt_partitioning; struct iommu_group *group = NULL; pci_ctl = pci_bus_to_host(pdev->bus); - pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl); + pci_endpt_partitioning = check_pci_ctl_endpt_part(pci_ctl); /* We can partition PCIe devices so assign device group to the device */ - if (pci_endpt_partioning) { + if (pci_endpt_partitioning) { group = pci_device_group(&pdev->dev); /* @@ -994,62 +1049,6 @@ static void fsl_pamu_remove_device(struct device *dev) iommu_group_remove_device(dev); } -static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count) -{ - struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); - unsigned long flags; - int ret; - - spin_lock_irqsave(&dma_domain->domain_lock, flags); - /* Ensure domain is inactive i.e. DMA should be disabled for the domain */ - if (dma_domain->enabled) { - pr_debug("Can't set geometry attributes as domain is active\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EBUSY; - } - - /* Ensure that the geometry has been set for the domain */ - if (!dma_domain->geom_size) { - pr_debug("Please configure geometry before setting the number of windows\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EINVAL; - } - - /* - * Ensure we have valid window count i.e. it should be less than - * maximum permissible limit and should be a power of two. - */ - if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) { - pr_debug("Invalid window count\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EINVAL; - } - - ret = pamu_set_domain_geometry(dma_domain, &domain->geometry, - w_count > 1 ? w_count : 0); - if (!ret) { - kfree(dma_domain->win_arr); - dma_domain->win_arr = kcalloc(w_count, - sizeof(*dma_domain->win_arr), - GFP_ATOMIC); - if (!dma_domain->win_arr) { - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -ENOMEM; - } - dma_domain->win_cnt = w_count; - } - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - - return ret; -} - -static u32 fsl_pamu_get_windows(struct iommu_domain *domain) -{ - struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); - - return dma_domain->win_cnt; -} - static const struct iommu_ops fsl_pamu_ops = { .capable = fsl_pamu_capable, .domain_alloc = fsl_pamu_domain_alloc, @@ -1058,8 +1057,6 @@ static const struct iommu_ops fsl_pamu_ops = { .detach_dev = fsl_pamu_detach_device, .domain_window_enable = fsl_pamu_window_enable, .domain_window_disable = fsl_pamu_window_disable, - .domain_get_windows = fsl_pamu_get_windows, - .domain_set_windows = fsl_pamu_set_windows, .iova_to_phys = fsl_pamu_iova_to_phys, .domain_set_attr = fsl_pamu_set_domain_attr, .domain_get_attr = fsl_pamu_get_domain_attr, diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2018 Intel Corporation. + * + * Authors: Gayatri Kammela <gayatri.kammela@intel.com> + * Sohil Mehta <sohil.mehta@intel.com> + * Jacob Pan <jacob.jun.pan@linux.intel.com> + */ + +#include <linux/debugfs.h> +#include <linux/dmar.h> +#include <linux/intel-iommu.h> +#include <linux/pci.h> + +#include <asm/irq_remapping.h> + +struct iommu_regset { + int offset; + const char *regs; +}; + +#define IOMMU_REGSET_ENTRY(_reg_) \ + { DMAR_##_reg_##_REG, __stringify(_reg_) } +static const struct iommu_regset iommu_regs[] = { + IOMMU_REGSET_ENTRY(VER), + IOMMU_REGSET_ENTRY(CAP), + IOMMU_REGSET_ENTRY(ECAP), + IOMMU_REGSET_ENTRY(GCMD), + IOMMU_REGSET_ENTRY(GSTS), + IOMMU_REGSET_ENTRY(RTADDR), + IOMMU_REGSET_ENTRY(CCMD), + IOMMU_REGSET_ENTRY(FSTS), + IOMMU_REGSET_ENTRY(FECTL), + IOMMU_REGSET_ENTRY(FEDATA), + IOMMU_REGSET_ENTRY(FEADDR), + IOMMU_REGSET_ENTRY(FEUADDR), + IOMMU_REGSET_ENTRY(AFLOG), + IOMMU_REGSET_ENTRY(PMEN), + IOMMU_REGSET_ENTRY(PLMBASE), + IOMMU_REGSET_ENTRY(PLMLIMIT), + IOMMU_REGSET_ENTRY(PHMBASE), + IOMMU_REGSET_ENTRY(PHMLIMIT), + IOMMU_REGSET_ENTRY(IQH), + IOMMU_REGSET_ENTRY(IQT), + IOMMU_REGSET_ENTRY(IQA), + IOMMU_REGSET_ENTRY(ICS), + IOMMU_REGSET_ENTRY(IRTA), + IOMMU_REGSET_ENTRY(PQH), + IOMMU_REGSET_ENTRY(PQT), + IOMMU_REGSET_ENTRY(PQA), + IOMMU_REGSET_ENTRY(PRS), + IOMMU_REGSET_ENTRY(PECTL), + IOMMU_REGSET_ENTRY(PEDATA), + IOMMU_REGSET_ENTRY(PEADDR), + IOMMU_REGSET_ENTRY(PEUADDR), + IOMMU_REGSET_ENTRY(MTRRCAP), + IOMMU_REGSET_ENTRY(MTRRDEF), + IOMMU_REGSET_ENTRY(MTRR_FIX64K_00000), + IOMMU_REGSET_ENTRY(MTRR_FIX16K_80000), + IOMMU_REGSET_ENTRY(MTRR_FIX16K_A0000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_C0000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_C8000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_D0000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_D8000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_E0000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_E8000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_F0000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_F8000), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE0), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK0), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE1), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK1), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE2), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK2), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE3), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK3), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE4), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK4), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE5), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK5), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE6), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK6), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE7), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK7), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE8), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK8), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE9), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK9), + IOMMU_REGSET_ENTRY(VCCAP), + IOMMU_REGSET_ENTRY(VCMD), + IOMMU_REGSET_ENTRY(VCRSP), +}; + +static int iommu_regset_show(struct seq_file *m, void *unused) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + unsigned long flag; + int i, ret = 0; + u64 value; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!drhd->reg_base_addr) { + seq_puts(m, "IOMMU: Invalid base address\n"); + ret = -EINVAL; + goto out; + } + + seq_printf(m, "IOMMU: %s Register Base Address: %llx\n", + iommu->name, drhd->reg_base_addr); + seq_puts(m, "Name\t\t\tOffset\t\tContents\n"); + /* + * Publish the contents of the 64-bit hardware registers + * by adding the offset to the pointer (virtual address). + */ + raw_spin_lock_irqsave(&iommu->register_lock, flag); + for (i = 0 ; i < ARRAY_SIZE(iommu_regs); i++) { + value = dmar_readq(iommu->reg + iommu_regs[i].offset); + seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n", + iommu_regs[i].regs, iommu_regs[i].offset, + value); + } + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); + seq_putc(m, '\n'); + } +out: + rcu_read_unlock(); + + return ret; +} +DEFINE_SHOW_ATTRIBUTE(iommu_regset); + +static void ctx_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu, + int bus) +{ + struct context_entry *context; + int devfn; + + seq_printf(m, " Context Table Entries for Bus: %d\n", bus); + seq_puts(m, " Entry\tB:D.F\tHigh\tLow\n"); + + for (devfn = 0; devfn < 256; devfn++) { + context = iommu_context_addr(iommu, bus, devfn, 0); + if (!context) + return; + + if (!context_present(context)) + continue; + + seq_printf(m, " %-5d\t%02x:%02x.%x\t%-6llx\t%llx\n", devfn, + bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + context[0].hi, context[0].lo); + } +} + +static void root_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu) +{ + unsigned long flags; + int bus; + + spin_lock_irqsave(&iommu->lock, flags); + seq_printf(m, "IOMMU %s: Root Table Address:%llx\n", iommu->name, + (u64)virt_to_phys(iommu->root_entry)); + seq_puts(m, "Root Table Entries:\n"); + + for (bus = 0; bus < 256; bus++) { + if (!(iommu->root_entry[bus].lo & 1)) + continue; + + seq_printf(m, " Bus: %d H: %llx L: %llx\n", bus, + iommu->root_entry[bus].hi, + iommu->root_entry[bus].lo); + + ctx_tbl_entry_show(m, iommu, bus); + seq_putc(m, '\n'); + } + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static int dmar_translation_struct_show(struct seq_file *m, void *unused) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + root_tbl_entry_show(m, iommu); + seq_putc(m, '\n'); + } + rcu_read_unlock(); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(dmar_translation_struct); + +#ifdef CONFIG_IRQ_REMAP +static void ir_tbl_remap_entry_show(struct seq_file *m, + struct intel_iommu *iommu) +{ + struct irte *ri_entry; + unsigned long flags; + int idx; + + seq_puts(m, " Entry SrcID DstID Vct IRTE_high\t\tIRTE_low\n"); + + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + for (idx = 0; idx < INTR_REMAP_TABLE_ENTRIES; idx++) { + ri_entry = &iommu->ir_table->base[idx]; + if (!ri_entry->present || ri_entry->p_pst) + continue; + + seq_printf(m, " %-5d %02x:%02x.%01x %08x %02x %016llx\t%016llx\n", + idx, PCI_BUS_NUM(ri_entry->sid), + PCI_SLOT(ri_entry->sid), PCI_FUNC(ri_entry->sid), + ri_entry->dest_id, ri_entry->vector, + ri_entry->high, ri_entry->low); + } + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); +} + +static void ir_tbl_posted_entry_show(struct seq_file *m, + struct intel_iommu *iommu) +{ + struct irte *pi_entry; + unsigned long flags; + int idx; + + seq_puts(m, " Entry SrcID PDA_high PDA_low Vct IRTE_high\t\tIRTE_low\n"); + + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + for (idx = 0; idx < INTR_REMAP_TABLE_ENTRIES; idx++) { + pi_entry = &iommu->ir_table->base[idx]; + if (!pi_entry->present || !pi_entry->p_pst) + continue; + + seq_printf(m, " %-5d %02x:%02x.%01x %08x %08x %02x %016llx\t%016llx\n", + idx, PCI_BUS_NUM(pi_entry->sid), + PCI_SLOT(pi_entry->sid), PCI_FUNC(pi_entry->sid), + pi_entry->pda_h, pi_entry->pda_l << 6, + pi_entry->vector, pi_entry->high, + pi_entry->low); + } + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); +} + +/* + * For active IOMMUs go through the Interrupt remapping + * table and print valid entries in a table format for + * Remapped and Posted Interrupts. + */ +static int ir_translation_struct_show(struct seq_file *m, void *unused) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + u64 irta; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!ecap_ir_support(iommu->ecap)) + continue; + + seq_printf(m, "Remapped Interrupt supported on IOMMU: %s\n", + iommu->name); + + if (iommu->ir_table) { + irta = virt_to_phys(iommu->ir_table->base); + seq_printf(m, " IR table address:%llx\n", irta); + ir_tbl_remap_entry_show(m, iommu); + } else { + seq_puts(m, "Interrupt Remapping is not enabled\n"); + } + seq_putc(m, '\n'); + } + + seq_puts(m, "****\n\n"); + + for_each_active_iommu(iommu, drhd) { + if (!cap_pi_support(iommu->cap)) + continue; + + seq_printf(m, "Posted Interrupt supported on IOMMU: %s\n", + iommu->name); + + if (iommu->ir_table) { + irta = virt_to_phys(iommu->ir_table->base); + seq_printf(m, " IR table address:%llx\n", irta); + ir_tbl_posted_entry_show(m, iommu); + } else { + seq_puts(m, "Interrupt Remapping is not enabled\n"); + } + seq_putc(m, '\n'); + } + rcu_read_unlock(); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(ir_translation_struct); +#endif + +void __init intel_iommu_debugfs_init(void) +{ + struct dentry *intel_iommu_debug = debugfs_create_dir("intel", + iommu_debugfs_dir); + + debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL, + &iommu_regset_fops); + debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug, + NULL, &dmar_translation_struct_fops); +#ifdef CONFIG_IRQ_REMAP + debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug, + NULL, &ir_translation_struct_fops); +#endif +} diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c @@ -185,16 +185,6 @@ static int rwbf_quirk; static int force_on = 0; int intel_iommu_tboot_noforce; -/* - * 0: Present - * 1-11: Reserved - * 12-63: Context Ptr (12 - (haw-1)) - * 64-127: Reserved - */ -struct root_entry { - u64 lo; - u64 hi; -}; #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) /* @@ -220,21 +210,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re) return re->hi & VTD_PAGE_MASK; } -/* - * low 64 bits: - * 0: present - * 1: fault processing disable - * 2-3: translation type - * 12-63: address space root - * high 64 bits: - * 0-2: address width - * 3-6: aval - * 8-23: domain id - */ -struct context_entry { - u64 lo; - u64 hi; -}; static inline void context_clear_pasid_enable(struct context_entry *context) { @@ -261,7 +236,7 @@ static inline bool __context_present(struct context_entry *context) return (context->lo & 1); } -static inline bool context_present(struct context_entry *context) +bool context_present(struct context_entry *context) { return context_pasid_enabled(context) ? __context_present(context) : @@ -788,8 +763,8 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) domain->iommu_superpage = domain_update_iommu_superpage(NULL); } -static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu, - u8 bus, u8 devfn, int alloc) +struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, + u8 devfn, int alloc) { struct root_entry *root = &iommu->root_entry[bus]; struct context_entry *context; @@ -4860,6 +4835,7 @@ int __init intel_iommu_init(void) cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL, intel_iommu_cpu_dead); intel_iommu_enabled = 1; + intel_iommu_debugfs_init(); return 0; diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c @@ -76,7 +76,7 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS]; * in single-threaded environment with interrupt disabled, so no need to tabke * the dmar_global_lock. */ -static DEFINE_RAW_SPINLOCK(irq_2_ir_lock); +DEFINE_RAW_SPINLOCK(irq_2_ir_lock); static const struct irq_domain_ops intel_ir_domain_ops; static void iommu_disable_irq_remapping(struct intel_iommu *iommu); diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c @@ -587,6 +587,7 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, } io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); + io_pgtable_tlb_sync(&data->iop); return size; } @@ -642,6 +643,13 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, io_pgtable_tlb_sync(iop); ptep = iopte_deref(pte[i], lvl); __arm_v7s_free_table(ptep, lvl + 1, data); + } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) { + /* + * Order the PTE update against queueing the IOVA, to + * guarantee that a flush callback from a different CPU + * has observed it before the TLBIALL can be issued. + */ + smp_wmb(); } else { io_pgtable_tlb_add_flush(iop, iova, blk_size, blk_size, true); @@ -712,7 +720,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, IO_PGTABLE_QUIRK_NO_PERMS | IO_PGTABLE_QUIRK_TLBI_ON_MAP | IO_PGTABLE_QUIRK_ARM_MTK_4GB | - IO_PGTABLE_QUIRK_NO_DMA)) + IO_PGTABLE_QUIRK_NO_DMA | + IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */ diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c @@ -574,13 +574,13 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, return 0; tablep = iopte_deref(pte, data); + } else if (unmap_idx >= 0) { + io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); + io_pgtable_tlb_sync(&data->iop); + return size; } - if (unmap_idx < 0) - return __arm_lpae_unmap(data, iova, size, lvl, tablep); - - io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); - return size; + return __arm_lpae_unmap(data, iova, size, lvl, tablep); } static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, @@ -610,6 +610,13 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, io_pgtable_tlb_sync(iop); ptep = iopte_deref(pte, data); __arm_lpae_free_pgtable(data, lvl + 1, ptep); + } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) { + /* + * Order the PTE update against queueing the IOVA, to + * guarantee that a flush callback from a different CPU + * has observed it before the TLBIALL can be issued. + */ + smp_wmb(); } else { io_pgtable_tlb_add_flush(iop, iova, size, size, true); } @@ -772,7 +779,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) u64 reg; struct arm_lpae_io_pgtable *data; - if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA)) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA | + IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -864,7 +872,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) struct arm_lpae_io_pgtable *data; /* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA | + IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; data = arm_lpae_alloc_pgtable(cfg); diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h @@ -71,12 +71,17 @@ struct io_pgtable_cfg { * be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a * software-emulated IOMMU), such that pagetable updates need not * be treated as explicit DMA data. + * + * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs + * on unmap, for DMA domains using the flush queue mechanism for + * delayed invalidation. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3) #define IO_PGTABLE_QUIRK_NO_DMA BIT(4) + #define IO_PGTABLE_QUIRK_NON_STRICT BIT(5) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c @@ -32,6 +32,7 @@ #include <linux/pci.h> #include <linux/bitops.h> #include <linux/property.h> +#include <linux/fsl/mc.h> #include <trace/events/iommu.h> static struct kset *iommu_group_kset; @@ -41,6 +42,7 @@ static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; #else static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA; #endif +static bool iommu_dma_strict __read_mostly = true; struct iommu_callback_data { const struct iommu_ops *ops; @@ -131,6 +133,12 @@ static int __init iommu_set_def_domain_type(char *str) } early_param("iommu.passthrough", iommu_set_def_domain_type); +static int __init iommu_dma_setup(char *str) +{ + return kstrtobool(str, &iommu_dma_strict); +} +early_param("iommu.strict", iommu_dma_setup); + static ssize_t iommu_group_attr_show(struct kobject *kobj, struct attribute *__attr, char *buf) { @@ -1024,6 +1032,18 @@ struct iommu_group *pci_device_group(struct device *dev) return iommu_group_alloc(); } +/* Get the IOMMU group for device on fsl-mc bus */ +struct iommu_group *fsl_mc_device_group(struct device *dev) +{ + struct device *cont_dev = fsl_mc_cont_dev(dev); + struct iommu_group *group; + + group = iommu_group_get(cont_dev); + if (!group) + group = iommu_group_alloc(); + return group; +} + /** * iommu_group_get_for_dev - Find or create the IOMMU group for a device * @dev: target device @@ -1072,6 +1092,13 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) group->default_domain = dom; if (!group->domain) group->domain = dom; + + if (dom && !iommu_dma_strict) { + int attr = 1; + iommu_domain_set_attr(dom, + DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, + &attr); + } } ret = iommu_group_add_device(group, dev); @@ -1416,7 +1443,16 @@ struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); /* - * IOMMU groups are really the natrual working unit of the IOMMU, but + * For IOMMU_DOMAIN_DMA implementations which already provide their own + * guarantees that the group and its default domain are valid and correct. + */ +struct iommu_domain *iommu_get_dma_domain(struct device *dev) +{ + return dev->iommu_group->default_domain; +} + +/* + * IOMMU groups are really the natural working unit of the IOMMU, but * the IOMMU API works on domains and devices. Bridge that gap by * iterating over the devices in a group. Ideally we'd have a single * device which represents the requestor ID of the group, but we also @@ -1796,7 +1832,6 @@ int iommu_domain_get_attr(struct iommu_domain *domain, struct iommu_domain_geometry *geometry; bool *paging; int ret = 0; - u32 *count; switch (attr) { case DOMAIN_ATTR_GEOMETRY: @@ -1808,15 +1843,6 @@ int iommu_domain_get_attr(struct iommu_domain *domain, paging = data; *paging = (domain->pgsize_bitmap != 0UL); break; - case DOMAIN_ATTR_WINDOWS: - count = data; - - if (domain->ops->domain_get_windows != NULL) - *count = domain->ops->domain_get_windows(domain); - else - ret = -ENODEV; - - break; default: if (!domain->ops->domain_get_attr) return -EINVAL; @@ -1832,18 +1858,8 @@ int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr attr, void *data) { int ret = 0; - u32 *count; switch (attr) { - case DOMAIN_ATTR_WINDOWS: - count = data; - - if (domain->ops->domain_set_windows != NULL) - ret = domain->ops->domain_set_windows(domain, *count); - else - ret = -ENODEV; - - break; default: if (domain->ops->domain_set_attr == NULL) return -EINVAL; diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c @@ -56,6 +56,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->granule = granule; iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); + iovad->max32_alloc_size = iovad->dma_32bit_pfn; iovad->flush_cb = NULL; iovad->fq = NULL; iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; @@ -139,8 +140,10 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) cached_iova = rb_entry(iovad->cached32_node, struct iova, node); if (free->pfn_hi < iovad->dma_32bit_pfn && - free->pfn_lo >= cached_iova->pfn_lo) + free->pfn_lo >= cached_iova->pfn_lo) { iovad->cached32_node = rb_next(&free->node); + iovad->max32_alloc_size = iovad->dma_32bit_pfn; + } cached_iova = rb_entry(iovad->cached_node, struct iova, node); if (free->pfn_lo >= cached_iova->pfn_lo) @@ -190,6 +193,10 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + if (limit_pfn <= iovad->dma_32bit_pfn && + size >= iovad->max32_alloc_size) + goto iova32_full; + curr = __get_cached_rbnode(iovad, limit_pfn); curr_iova = rb_entry(curr, struct iova, node); do { @@ -200,10 +207,8 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, curr_iova = rb_entry(curr, struct iova, node); } while (curr && new_pfn <= curr_iova->pfn_hi); - if (limit_pfn < size || new_pfn < iovad->start_pfn) { - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - return -ENOMEM; - } + if (limit_pfn < size || new_pfn < iovad->start_pfn) + goto iova32_full; /* pfn_lo will point to size aligned address if size_aligned is set */ new->pfn_lo = new_pfn; @@ -214,9 +219,12 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, __cached_rbnode_insert_update(iovad, new); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - - return 0; + +iova32_full: + iovad->max32_alloc_size = size; + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return -ENOMEM; } static struct kmem_cache *iova_cache; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IPMMU VMSA * * Copyright (C) 2014 Renesas Electronics Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. */ #include <linux/bitmap.h> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c @@ -24,6 +24,7 @@ #include <linux/of_iommu.h> #include <linux/of_pci.h> #include <linux/slab.h> +#include <linux/fsl/mc.h> #define NO_IOMMU 1 @@ -132,9 +133,8 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) struct of_phandle_args iommu_spec = { .args_count = 1 }; int err; - err = of_pci_map_rid(info->np, alias, "iommu-map", - "iommu-map-mask", &iommu_spec.np, - iommu_spec.args); + err = of_map_rid(info->np, alias, "iommu-map", "iommu-map-mask", + &iommu_spec.np, iommu_spec.args); if (err) return err == -ENODEV ? NO_IOMMU : err; @@ -143,6 +143,23 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) return err; } +static int of_fsl_mc_iommu_init(struct fsl_mc_device *mc_dev, + struct device_node *master_np) +{ + struct of_phandle_args iommu_spec = { .args_count = 1 }; + int err; + + err = of_map_rid(master_np, mc_dev->icid, "iommu-map", + "iommu-map-mask", &iommu_spec.np, + iommu_spec.args); + if (err) + return err == -ENODEV ? NO_IOMMU : err; + + err = of_iommu_xlate(&mc_dev->dev, &iommu_spec); + of_node_put(iommu_spec.np); + return err; +} + const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np) { @@ -174,6 +191,8 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, err = pci_for_each_dma_alias(to_pci_dev(dev), of_pci_iommu_init, &info); + } else if (dev_is_fsl_mc(dev)) { + err = of_fsl_mc_iommu_init(to_fsl_mc_device(dev), master_np); } else { struct of_phandle_args iommu_spec; int idx = 0; diff --git a/drivers/of/base.c b/drivers/of/base.c @@ -2045,3 +2045,105 @@ int of_find_last_cache_level(unsigned int cpu) return cache_level; } + +/** + * of_map_rid - Translate a requester ID through a downstream mapping. + * @np: root complex device node. + * @rid: device requester ID to map. + * @map_name: property name of the map to use. + * @map_mask_name: optional property name of the mask to use. + * @target: optional pointer to a target device node. + * @id_out: optional pointer to receive the translated ID. + * + * Given a device requester ID, look up the appropriate implementation-defined + * platform ID and/or the target device which receives transactions on that + * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or + * @id_out may be NULL if only the other is required. If @target points to + * a non-NULL device node pointer, only entries targeting that node will be + * matched; if it points to a NULL value, it will receive the device node of + * the first matching target phandle, with a reference held. + * + * Return: 0 on success or a standard error code on failure. + */ +int of_map_rid(struct device_node *np, u32 rid, + const char *map_name, const char *map_mask_name, + struct device_node **target, u32 *id_out) +{ + u32 map_mask, masked_rid; + int map_len; + const __be32 *map = NULL; + + if (!np || !map_name || (!target && !id_out)) + return -EINVAL; + + map = of_get_property(np, map_name, &map_len); + if (!map) { + if (target) + return -ENODEV; + /* Otherwise, no map implies no translation */ + *id_out = rid; + return 0; + } + + if (!map_len || map_len % (4 * sizeof(*map))) { + pr_err("%pOF: Error: Bad %s length: %d\n", np, + map_name, map_len); + return -EINVAL; + } + + /* The default is to select all bits. */ + map_mask = 0xffffffff; + + /* + * Can be overridden by "{iommu,msi}-map-mask" property. + * If of_property_read_u32() fails, the default is used. + */ + if (map_mask_name) + of_property_read_u32(np, map_mask_name, &map_mask); + + masked_rid = map_mask & rid; + for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) { + struct device_node *phandle_node; + u32 rid_base = be32_to_cpup(map + 0); + u32 phandle = be32_to_cpup(map + 1); + u32 out_base = be32_to_cpup(map + 2); + u32 rid_len = be32_to_cpup(map + 3); + + if (rid_base & ~map_mask) { + pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n", + np, map_name, map_name, + map_mask, rid_base); + return -EFAULT; + } + + if (masked_rid < rid_base || masked_rid >= rid_base + rid_len) + continue; + + phandle_node = of_find_node_by_phandle(phandle); + if (!phandle_node) + return -ENODEV; + + if (target) { + if (*target) + of_node_put(phandle_node); + else + *target = phandle_node; + + if (*target != phandle_node) + continue; + } + + if (id_out) + *id_out = masked_rid - rid_base + out_base; + + pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n", + np, map_name, map_mask, rid_base, out_base, + rid_len, rid, masked_rid - rid_base + out_base); + return 0; + } + + pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n", + np, map_name, rid, target && *target ? *target : NULL); + return -EFAULT; +} +EXPORT_SYMBOL_GPL(of_map_rid); diff --git a/drivers/of/irq.c b/drivers/of/irq.c @@ -22,7 +22,6 @@ #include <linux/module.h> #include <linux/of.h> #include <linux/of_irq.h> -#include <linux/of_pci.h> #include <linux/string.h> #include <linux/slab.h> @@ -588,8 +587,8 @@ static u32 __of_msi_map_rid(struct device *dev, struct device_node **np, * "msi-map" property. */ for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) - if (!of_pci_map_rid(parent_dev->of_node, rid_in, "msi-map", - "msi-map-mask", np, &rid_out)) + if (!of_map_rid(parent_dev->of_node, rid_in, "msi-map", + "msi-map-mask", np, &rid_out)) break; return rid_out; } diff --git a/drivers/pci/of.c b/drivers/pci/of.c @@ -355,107 +355,6 @@ failed: EXPORT_SYMBOL_GPL(devm_of_pci_get_host_bridge_resources); #endif /* CONFIG_OF_ADDRESS */ -/** - * of_pci_map_rid - Translate a requester ID through a downstream mapping. - * @np: root complex device node. - * @rid: PCI requester ID to map. - * @map_name: property name of the map to use. - * @map_mask_name: optional property name of the mask to use. - * @target: optional pointer to a target device node. - * @id_out: optional pointer to receive the translated ID. - * - * Given a PCI requester ID, look up the appropriate implementation-defined - * platform ID and/or the target device which receives transactions on that - * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or - * @id_out may be NULL if only the other is required. If @target points to - * a non-NULL device node pointer, only entries targeting that node will be - * matched; if it points to a NULL value, it will receive the device node of - * the first matching target phandle, with a reference held. - * - * Return: 0 on success or a standard error code on failure. - */ -int of_pci_map_rid(struct device_node *np, u32 rid, - const char *map_name, const char *map_mask_name, - struct device_node **target, u32 *id_out) -{ - u32 map_mask, masked_rid; - int map_len; - const __be32 *map = NULL; - - if (!np || !map_name || (!target && !id_out)) - return -EINVAL; - - map = of_get_property(np, map_name, &map_len); - if (!map) { - if (target) - return -ENODEV; - /* Otherwise, no map implies no translation */ - *id_out = rid; - return 0; - } - - if (!map_len || map_len % (4 * sizeof(*map))) { - pr_err("%pOF: Error: Bad %s length: %d\n", np, - map_name, map_len); - return -EINVAL; - } - - /* The default is to select all bits. */ - map_mask = 0xffffffff; - - /* - * Can be overridden by "{iommu,msi}-map-mask" property. - * If of_property_read_u32() fails, the default is used. - */ - if (map_mask_name) - of_property_read_u32(np, map_mask_name, &map_mask); - - masked_rid = map_mask & rid; - for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) { - struct device_node *phandle_node; - u32 rid_base = be32_to_cpup(map + 0); - u32 phandle = be32_to_cpup(map + 1); - u32 out_base = be32_to_cpup(map + 2); - u32 rid_len = be32_to_cpup(map + 3); - - if (rid_base & ~map_mask) { - pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n", - np, map_name, map_name, - map_mask, rid_base); - return -EFAULT; - } - - if (masked_rid < rid_base || masked_rid >= rid_base + rid_len) - continue; - - phandle_node = of_find_node_by_phandle(phandle); - if (!phandle_node) - return -ENODEV; - - if (target) { - if (*target) - of_node_put(phandle_node); - else - *target = phandle_node; - - if (*target != phandle_node) - continue; - } - - if (id_out) - *id_out = masked_rid - rid_base + out_base; - - pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n", - np, map_name, map_mask, rid_base, out_base, - rid_len, rid, masked_rid - rid_base + out_base); - return 0; - } - - pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n", - np, map_name, rid, target && *target ? *target : NULL); - return -EFAULT; -} - #if IS_ENABLED(CONFIG_OF_IRQ) /** * of_irq_parse_pci - Resolve the interrupt for a PCI device diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h @@ -351,6 +351,14 @@ int mc_send_command(struct fsl_mc_io *mc_io, struct fsl_mc_command *cmd); #define dev_is_fsl_mc(_dev) (0) #endif +/* Macro to check if a device is a container device */ +#define fsl_mc_is_cont_dev(_dev) (to_fsl_mc_device(_dev)->flags & \ + FSL_MC_IS_DPRC) + +/* Macro to get the container device of a MC device */ +#define fsl_mc_cont_dev(_dev) (fsl_mc_is_cont_dev(_dev) ? \ + (_dev) : (_dev)->parent) + /* * module_fsl_mc_driver() - Helper macro for drivers that don't do * anything special in module init/exit. This eliminates a lot of diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h @@ -72,6 +72,42 @@ #define DMAR_PEDATA_REG 0xe4 /* Page request event interrupt data register */ #define DMAR_PEADDR_REG 0xe8 /* Page request event interrupt addr register */ #define DMAR_PEUADDR_REG 0xec /* Page request event Upper address register */ +#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability register */ +#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type register */ +#define DMAR_MTRR_FIX64K_00000_REG 0x120 /* MTRR Fixed range registers */ +#define DMAR_MTRR_FIX16K_80000_REG 0x128 +#define DMAR_MTRR_FIX16K_A0000_REG 0x130 +#define DMAR_MTRR_FIX4K_C0000_REG 0x138 +#define DMAR_MTRR_FIX4K_C8000_REG 0x140 +#define DMAR_MTRR_FIX4K_D0000_REG 0x148 +#define DMAR_MTRR_FIX4K_D8000_REG 0x150 +#define DMAR_MTRR_FIX4K_E0000_REG 0x158 +#define DMAR_MTRR_FIX4K_E8000_REG 0x160 +#define DMAR_MTRR_FIX4K_F0000_REG 0x168 +#define DMAR_MTRR_FIX4K_F8000_REG 0x170 +#define DMAR_MTRR_PHYSBASE0_REG 0x180 /* MTRR Variable range registers */ +#define DMAR_MTRR_PHYSMASK0_REG 0x188 +#define DMAR_MTRR_PHYSBASE1_REG 0x190 +#define DMAR_MTRR_PHYSMASK1_REG 0x198 +#define DMAR_MTRR_PHYSBASE2_REG 0x1a0 +#define DMAR_MTRR_PHYSMASK2_REG 0x1a8 +#define DMAR_MTRR_PHYSBASE3_REG 0x1b0 +#define DMAR_MTRR_PHYSMASK3_REG 0x1b8 +#define DMAR_MTRR_PHYSBASE4_REG 0x1c0 +#define DMAR_MTRR_PHYSMASK4_REG 0x1c8 +#define DMAR_MTRR_PHYSBASE5_REG 0x1d0 +#define DMAR_MTRR_PHYSMASK5_REG 0x1d8 +#define DMAR_MTRR_PHYSBASE6_REG 0x1e0 +#define DMAR_MTRR_PHYSMASK6_REG 0x1e8 +#define DMAR_MTRR_PHYSBASE7_REG 0x1f0 +#define DMAR_MTRR_PHYSMASK7_REG 0x1f8 +#define DMAR_MTRR_PHYSBASE8_REG 0x200 +#define DMAR_MTRR_PHYSMASK8_REG 0x208 +#define DMAR_MTRR_PHYSBASE9_REG 0x210 +#define DMAR_MTRR_PHYSMASK9_REG 0x218 +#define DMAR_VCCAP_REG 0xe00 /* Virtual command capability register */ +#define DMAR_VCMD_REG 0xe10 /* Virtual command register */ +#define DMAR_VCRSP_REG 0xe20 /* Virtual command response register */ #define OFFSET_STRIDE (9) @@ -389,6 +425,33 @@ struct pasid_entry; struct pasid_state_entry; struct page_req_dsc; +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 lo; + u64 hi; +}; + +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; + struct dmar_domain { int nid; /* node id */ @@ -558,6 +621,15 @@ extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_ extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev); #endif +#ifdef CONFIG_INTEL_IOMMU_DEBUGFS +void intel_iommu_debugfs_init(void); +#else +static inline void intel_iommu_debugfs_init(void) {} +#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */ + extern const struct attribute_group *intel_iommu_groups[]; +bool context_present(struct context_entry *context); +struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, + u8 devfn, int alloc); #endif diff --git a/include/linux/iommu.h b/include/linux/iommu.h @@ -124,6 +124,7 @@ enum iommu_attr { DOMAIN_ATTR_FSL_PAMU_ENABLE, DOMAIN_ATTR_FSL_PAMUV1, DOMAIN_ATTR_NESTING, /* two stages of translation */ + DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, DOMAIN_ATTR_MAX, }; @@ -181,8 +182,6 @@ struct iommu_resv_region { * @apply_resv_region: Temporary helper call-back for iova reserved ranges * @domain_window_enable: Configure and enable a particular window for a domain * @domain_window_disable: Disable a particular window for a domain - * @domain_set_windows: Set the number of windows for a domain - * @domain_get_windows: Return the number of windows for a domain * @of_xlate: add OF master IDs to iommu grouping * @pgsize_bitmap: bitmap of all possible supported page sizes */ @@ -223,10 +222,6 @@ struct iommu_ops { int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, u64 size, int prot); void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr); - /* Set the number of windows per domain */ - int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count); - /* Get the number of windows per domain */ - u32 (*domain_get_windows)(struct iommu_domain *domain); int (*of_xlate)(struct device *dev, struct of_phandle_args *args); bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); @@ -293,6 +288,7 @@ extern int iommu_attach_device(struct iommu_domain *domain, extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); +extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, @@ -377,6 +373,8 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain) extern struct iommu_group *pci_device_group(struct device *dev); /* Generic device grouping function */ extern struct iommu_group *generic_device_group(struct device *dev); +/* FSL-MC device grouping function */ +struct iommu_group *fsl_mc_device_group(struct device *dev); /** * struct iommu_fwspec - per-device IOMMU instance data diff --git a/include/linux/iova.h b/include/linux/iova.h @@ -75,6 +75,7 @@ struct iova_domain { unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; + unsigned long max32_alloc_size; /* Size of last failed allocation */ struct iova anchor; /* rbtree lookup anchor */ struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ diff --git a/include/linux/of.h b/include/linux/of.h @@ -550,6 +550,10 @@ bool of_console_check(struct device_node *dn, char *name, int index); extern int of_cpu_node_to_id(struct device_node *np); +int of_map_rid(struct device_node *np, u32 rid, + const char *map_name, const char *map_mask_name, + struct device_node **target, u32 *id_out); + #else /* CONFIG_OF */ static inline void of_core_init(void) @@ -952,6 +956,13 @@ static inline int of_cpu_node_to_id(struct device_node *np) return -ENODEV; } +static inline int of_map_rid(struct device_node *np, u32 rid, + const char *map_name, const char *map_mask_name, + struct device_node **target, u32 *id_out) +{ + return -EINVAL; +} + #define of_match_ptr(_ptr) NULL #define of_match_node(_matches, _node) NULL #endif /* CONFIG_OF */ diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h @@ -14,9 +14,6 @@ struct device_node *of_pci_find_child_device(struct device_node *parent, unsigned int devfn); int of_pci_get_devfn(struct device_node *np); void of_pci_check_probe_only(void); -int of_pci_map_rid(struct device_node *np, u32 rid, - const char *map_name, const char *map_mask_name, - struct device_node **target, u32 *id_out); #else static inline struct device_node *of_pci_find_child_device(struct device_node *parent, unsigned int devfn) @@ -29,13 +26,6 @@ static inline int of_pci_get_devfn(struct device_node *np) return -EINVAL; } -static inline int of_pci_map_rid(struct device_node *np, u32 rid, - const char *map_name, const char *map_mask_name, - struct device_node **target, u32 *id_out) -{ - return -EINVAL; -} - static inline void of_pci_check_probe_only(void) { } #endif