whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 18b8bfdfbae5821a7df691bc1e542bbab6c31e9c
parent d972604f6f87478212f012af5560c4fd4bb2b01d
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Fri, 24 Aug 2018 13:10:38 -0700

Merge tag 'iommu-updates-v4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull IOMMU updates from Joerg Roedel:

 - PASID table handling updates for the Intel VT-d driver. It implements
   a global PASID space now so that applications usings multiple devices
   will just have one PASID.

 - A new config option to make iommu passthroug mode the default.

 - New sysfs attribute for iommu groups to export the type of the
   default domain.

 - A debugfs interface (for debug only) usable by IOMMU drivers to
   export internals to user-space.

 - R-Car Gen3 SoCs support for the ipmmu-vmsa driver

 - The ARM-SMMU now aborts transactions from unknown devices and devices
   not attached to any domain.

 - Various cleanups and smaller fixes all over the place.

* tag 'iommu-updates-v4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (42 commits)
  iommu/omap: Fix cache flushes on L2 table entries
  iommu: Remove the ->map_sg indirection
  iommu/arm-smmu-v3: Abort all transactions if SMMU is enabled in kdump kernel
  iommu/arm-smmu-v3: Prevent any devices access to memory without registration
  iommu/ipmmu-vmsa: Don't register as BUS IOMMU if machine doesn't have IPMMU-VMSA
  iommu/ipmmu-vmsa: Clarify supported platforms
  iommu/ipmmu-vmsa: Fix allocation in atomic context
  iommu: Add config option to set passthrough as default
  iommu: Add sysfs attribyte for domain type
  iommu/arm-smmu-v3: sync the OVACKFLG to PRIQ consumer register
  iommu/arm-smmu: Error out only if not enough context interrupts
  iommu/io-pgtable-arm-v7s: Abort allocation when table address overflows the PTE
  iommu/io-pgtable-arm: Fix pgtable allocation in selftest
  iommu/vt-d: Remove the obsolete per iommu pasid tables
  iommu/vt-d: Apply per pci device pasid table in SVA
  iommu/vt-d: Allocate and free pasid table
  iommu/vt-d: Per PCI device pasid table interfaces
  iommu/vt-d: Add for_each_device_domain() helper
  iommu/vt-d: Move device_domain_info to header
  iommu/vt-d: Apply global PASID in SVA
  ...

Diffstat:
MDocumentation/admin-guide/kernel-parameters.txt | 3++-
MDocumentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt | 3+++
March/x86/include/asm/irq_remapping.h | 5+++++
March/x86/kernel/pci-dma.c | 8++++++++
Mdrivers/iommu/Kconfig | 37+++++++++++++++++++++++++++++++++++--
Mdrivers/iommu/Makefile | 4+++-
Mdrivers/iommu/amd_iommu.c | 38+++++++++++++++++++-------------------
Adrivers/iommu/amd_iommu_debugfs.c | 33+++++++++++++++++++++++++++++++++
Mdrivers/iommu/amd_iommu_init.c | 57++++++++++++++++++++++++++++++++++++++++-----------------
Mdrivers/iommu/amd_iommu_proto.h | 6++++++
Mdrivers/iommu/amd_iommu_types.h | 22++++++++++++++++------
Mdrivers/iommu/arm-smmu-v3.c | 26++++++++++++++++++--------
Mdrivers/iommu/arm-smmu.c | 17++++++++++-------
Mdrivers/iommu/dmar.c | 6+++---
Mdrivers/iommu/exynos-iommu.c | 1-
Mdrivers/iommu/intel-iommu.c | 160++++++++++++++++++++++++++++++++++++++-----------------------------------------
Adrivers/iommu/intel-pasid.c | 239+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adrivers/iommu/intel-pasid.h | 39+++++++++++++++++++++++++++++++++++++++
Mdrivers/iommu/intel-svm.c | 79+++++++++++++++++++++++++++++++++----------------------------------------------
Mdrivers/iommu/io-pgtable-arm-v7s.c | 7++++++-
Mdrivers/iommu/io-pgtable-arm.c | 3++-
Adrivers/iommu/iommu-debugfs.c | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdrivers/iommu/iommu.c | 44+++++++++++++++++++++++++++++++++++++++++---
Mdrivers/iommu/ipmmu-vmsa.c | 59++++++++++++++++++++++++++++++++++++++++++++++-------------
Mdrivers/iommu/msm_iommu.c | 17+++++------------
Mdrivers/iommu/mtk_iommu.c | 1-
Mdrivers/iommu/mtk_iommu_v1.c | 1-
Mdrivers/iommu/omap-iommu.c | 5++---
Mdrivers/iommu/qcom_iommu.c | 1-
Mdrivers/iommu/rockchip-iommu.c | 1-
Mdrivers/iommu/tegra-gart.c | 1-
Mdrivers/iommu/tegra-smmu.c | 1-
Minclude/linux/dmar.h | 5-----
Minclude/linux/intel-iommu.h | 80++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Minclude/linux/iommu.h | 23+++++++++--------------
Dinclude/linux/platform_data/sh_ipmmu.h | 18------------------
36 files changed, 839 insertions(+), 277 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt @@ -1744,7 +1744,8 @@ merge nomerge soft - pt [x86, IA-64] + pt [x86] + nopt [x86] nobypass [PPC/POWERNV] Disable IOMMU bypass, using IOMMU for PCI devices. diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt @@ -19,7 +19,10 @@ Required Properties: - "renesas,ipmmu-r8a7794" for the R8A7794 (R-Car E2) IPMMU. - "renesas,ipmmu-r8a7795" for the R8A7795 (R-Car H3) IPMMU. - "renesas,ipmmu-r8a7796" for the R8A7796 (R-Car M3-W) IPMMU. + - "renesas,ipmmu-r8a77965" for the R8A77965 (R-Car M3-N) IPMMU. - "renesas,ipmmu-r8a77970" for the R8A77970 (R-Car V3M) IPMMU. + - "renesas,ipmmu-r8a77980" for the R8A77980 (R-Car V3H) IPMMU. + - "renesas,ipmmu-r8a77990" for the R8A77990 (R-Car E3) IPMMU. - "renesas,ipmmu-r8a77995" for the R8A77995 (R-Car D3) IPMMU. - "renesas,ipmmu-vmsa" for generic R-Car Gen2 or RZ/G1 VMSA-compatible IPMMU. diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h @@ -33,6 +33,11 @@ enum irq_remap_cap { IRQ_POSTING_CAP = 0, }; +enum { + IRQ_REMAP_XAPIC_MODE, + IRQ_REMAP_X2APIC_MODE, +}; + struct vcpu_data { u64 pi_desc_addr; /* Physical address of PI Descriptor */ u32 vector; /* Guest vector of the interrupt */ diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c @@ -40,8 +40,14 @@ int iommu_detected __read_mostly = 0; * devices and allow every device to access to whole physical memory. This is * useful if a user wants to use an IOMMU only for KVM device assignment to * guests and not for driver dma translation. + * It is also possible to disable by default in kernel config, and enable with + * iommu=nopt at boot time. */ +#ifdef CONFIG_IOMMU_DEFAULT_PASSTHROUGH +int iommu_pass_through __read_mostly = 1; +#else int iommu_pass_through __read_mostly; +#endif extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; @@ -135,6 +141,8 @@ static __init int iommu_setup(char *p) #endif if (!strncmp(p, "pt", 2)) iommu_pass_through = 1; + if (!strncmp(p, "nopt", 4)) + iommu_pass_through = 0; gart_parse_options(p); diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig @@ -60,6 +60,27 @@ config IOMMU_IO_PGTABLE_ARMV7S_SELFTEST endmenu +config IOMMU_DEBUGFS + bool "Export IOMMU internals in DebugFS" + depends on DEBUG_FS + help + Allows exposure of IOMMU device internals. This option enables + the use of debugfs by IOMMU drivers as required. Devices can, + at initialization time, cause the IOMMU code to create a top-level + debug/iommu directory, and then populate a subdirectory with + entries as required. + +config IOMMU_DEFAULT_PASSTHROUGH + bool "IOMMU passthrough by default" + depends on IOMMU_API + help + Enable passthrough by default, removing the need to pass in + iommu.passthrough=on or iommu=pt through command line. If this + is enabled, you can still disable with iommu.passthrough=off + or iommu=nopt depending on the architecture. + + If unsure, say N here. + config IOMMU_IOVA tristate @@ -135,6 +156,18 @@ config AMD_IOMMU_V2 hardware. Select this option if you want to use devices that support the PCI PRI and PASID interface. +config AMD_IOMMU_DEBUGFS + bool "Enable AMD IOMMU internals in DebugFS" + depends on AMD_IOMMU && IOMMU_DEBUGFS + ---help--- + !!!WARNING!!! !!!WARNING!!! !!!WARNING!!! !!!WARNING!!! + + DO NOT ENABLE THIS OPTION UNLESS YOU REALLY, -REALLY- KNOW WHAT YOU ARE DOING!!! + Exposes AMD IOMMU device internals in DebugFS. + + This option is -NOT- intended for production environments, and should + not generally be enabled. + # Intel IOMMU support config DMAR_TABLE bool @@ -284,8 +317,8 @@ config IPMMU_VMSA select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU help - Support for the Renesas VMSA-compatible IPMMU Renesas found in the - R-Mobile APE6 and R-Car H2/M2 SoCs. + Support for the Renesas VMSA-compatible IPMMU found in the R-Mobile + APE6, R-Car Gen2, and R-Car Gen3 SoCs. If unsure, say N. diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o +obj-$(CONFIG_IOMMU_DEBUGFS) += iommu-debugfs.o obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o @@ -10,11 +11,12 @@ obj-$(CONFIG_IOMMU_IOVA) += iova.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o +obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o obj-$(CONFIG_ARM_SMMU) += arm-smmu.o obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o obj-$(CONFIG_DMAR_TABLE) += dmar.o -obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o +obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c @@ -1404,6 +1404,8 @@ static u64 *fetch_pte(struct protection_domain *domain, int level; u64 *pte; + *page_size = 0; + if (address > PM_LEVEL_SIZE(domain->mode)) return NULL; @@ -1944,12 +1946,6 @@ static int __attach_device(struct iommu_dev_data *dev_data, { int ret; - /* - * Must be called with IRQs disabled. Warn here to detect early - * when its not. - */ - WARN_ON(!irqs_disabled()); - /* lock domain */ spin_lock(&domain->lock); @@ -2115,12 +2111,6 @@ static void __detach_device(struct iommu_dev_data *dev_data) { struct protection_domain *domain; - /* - * Must be called with IRQs disabled. Warn here to detect early - * when its not. - */ - WARN_ON(!irqs_disabled()); - domain = dev_data->domain; spin_lock(&domain->lock); @@ -2405,9 +2395,9 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, } if (amd_iommu_unmap_flush) { - dma_ops_free_iova(dma_dom, dma_addr, pages); domain_flush_tlb(&dma_dom->domain); domain_flush_complete(&dma_dom->domain); + dma_ops_free_iova(dma_dom, dma_addr, pages); } else { pages = __roundup_pow_of_two(pages); queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0); @@ -3192,7 +3182,6 @@ const struct iommu_ops amd_iommu_ops = { .detach_dev = amd_iommu_detach_device, .map = amd_iommu_map, .unmap = amd_iommu_unmap, - .map_sg = default_iommu_map_sg, .iova_to_phys = amd_iommu_iova_to_phys, .add_device = amd_iommu_add_device, .remove_device = amd_iommu_remove_device, @@ -3874,7 +3863,8 @@ static void irte_ga_prepare(void *entry, irte->lo.fields_remap.int_type = delivery_mode; irte->lo.fields_remap.dm = dest_mode; irte->hi.fields.vector = vector; - irte->lo.fields_remap.destination = dest_apicid; + irte->lo.fields_remap.destination = APICID_TO_IRTE_DEST_LO(dest_apicid); + irte->hi.fields.destination = APICID_TO_IRTE_DEST_HI(dest_apicid); irte->lo.fields_remap.valid = 1; } @@ -3927,7 +3917,10 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index, if (!irte->lo.fields_remap.guest_mode) { irte->hi.fields.vector = vector; - irte->lo.fields_remap.destination = dest_apicid; + irte->lo.fields_remap.destination = + APICID_TO_IRTE_DEST_LO(dest_apicid); + irte->hi.fields.destination = + APICID_TO_IRTE_DEST_HI(dest_apicid); modify_irte_ga(devid, index, irte, NULL); } } @@ -4344,7 +4337,10 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) irte->lo.val = 0; irte->hi.fields.vector = cfg->vector; irte->lo.fields_remap.guest_mode = 0; - irte->lo.fields_remap.destination = cfg->dest_apicid; + irte->lo.fields_remap.destination = + APICID_TO_IRTE_DEST_LO(cfg->dest_apicid); + irte->hi.fields.destination = + APICID_TO_IRTE_DEST_HI(cfg->dest_apicid); irte->lo.fields_remap.int_type = apic->irq_delivery_mode; irte->lo.fields_remap.dm = apic->irq_dest_mode; @@ -4461,8 +4457,12 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data) raw_spin_lock_irqsave(&table->lock, flags); if (ref->lo.fields_vapic.guest_mode) { - if (cpu >= 0) - ref->lo.fields_vapic.destination = cpu; + if (cpu >= 0) { + ref->lo.fields_vapic.destination = + APICID_TO_IRTE_DEST_LO(cpu); + ref->hi.fields.destination = + APICID_TO_IRTE_DEST_HI(cpu); + } ref->lo.fields_vapic.is_run = is_run; barrier(); } diff --git a/drivers/iommu/amd_iommu_debugfs.c b/drivers/iommu/amd_iommu_debugfs.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD IOMMU driver + * + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Author: Gary R Hook <gary.hook@amd.com> + */ + +#include <linux/debugfs.h> +#include <linux/iommu.h> +#include <linux/pci.h> +#include "amd_iommu_proto.h" +#include "amd_iommu_types.h" + +static struct dentry *amd_iommu_debugfs; +static DEFINE_MUTEX(amd_iommu_debugfs_lock); + +#define MAX_NAME_LEN 20 + +void amd_iommu_debugfs_setup(struct amd_iommu *iommu) +{ + char name[MAX_NAME_LEN + 1]; + + mutex_lock(&amd_iommu_debugfs_lock); + if (!amd_iommu_debugfs) + amd_iommu_debugfs = debugfs_create_dir("amd", + iommu_debugfs_dir); + mutex_unlock(&amd_iommu_debugfs_lock); + + snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu->index); + iommu->debugfs = debugfs_create_dir(name, amd_iommu_debugfs); +} diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c @@ -153,6 +153,7 @@ bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; +static int amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; static bool amd_iommu_detected; static bool __initdata amd_iommu_disabled; @@ -280,9 +281,9 @@ static void clear_translation_pre_enabled(struct amd_iommu *iommu) static void init_translation_status(struct amd_iommu *iommu) { - u32 ctrl; + u64 ctrl; - ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); if (ctrl & (1<<CONTROL_IOMMU_EN)) iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; } @@ -386,30 +387,30 @@ static void iommu_set_device_table(struct amd_iommu *iommu) /* Generic functions to enable/disable certain features of the IOMMU. */ static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) { - u32 ctrl; + u64 ctrl; - ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl |= (1 << bit); - writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl |= (1ULL << bit); + writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) { - u32 ctrl; + u64 ctrl; - ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl &= ~(1 << bit); - writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl &= ~(1ULL << bit); + writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout) { - u32 ctrl; + u64 ctrl; - ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); ctrl &= ~CTRL_INV_TO_MASK; ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK; - writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); + writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } /* Function to enable the hardware */ @@ -827,6 +828,19 @@ static int iommu_init_ga(struct amd_iommu *iommu) return ret; } +static void iommu_enable_xt(struct amd_iommu *iommu) +{ +#ifdef CONFIG_IRQ_REMAP + /* + * XT mode (32-bit APIC destination ID) requires + * GA mode (128-bit IRTE support) as a prerequisite. + */ + if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) && + amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) + iommu_feature_enable(iommu, CONTROL_XT_EN); +#endif /* CONFIG_IRQ_REMAP */ +} + static void iommu_enable_gt(struct amd_iommu *iommu) { if (!iommu_feature(iommu, FEATURE_GT)) @@ -1507,6 +1521,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; if (((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; + if (((h->efr_attr & (0x1 << IOMMU_FEAT_XTSUP_SHIFT)) == 0)) + amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; break; case 0x11: case 0x40: @@ -1516,6 +1532,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; if (((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; + if (((h->efr_reg & (0x1 << IOMMU_EFR_XTSUP_SHIFT)) == 0)) + amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; break; default: return -EINVAL; @@ -1832,6 +1850,8 @@ static void print_iommu_info(void) pr_info("AMD-Vi: Interrupt remapping enabled\n"); if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) pr_info("AMD-Vi: virtual APIC enabled\n"); + if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) + pr_info("AMD-Vi: X2APIC enabled\n"); } } @@ -2168,6 +2188,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_enable_event_buffer(iommu); iommu_set_exclusion_range(iommu); iommu_enable_ga(iommu); + iommu_enable_xt(iommu); iommu_enable(iommu); iommu_flush_all_caches(iommu); } @@ -2212,6 +2233,7 @@ static void early_enable_iommus(void) iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); iommu_enable_ga(iommu); + iommu_enable_xt(iommu); iommu_set_device_table(iommu); iommu_flush_all_caches(iommu); } @@ -2691,8 +2713,7 @@ int __init amd_iommu_enable(void) return ret; irq_remapping_enabled = 1; - - return 0; + return amd_iommu_xt_mode; } void amd_iommu_disable(void) @@ -2721,6 +2742,7 @@ int __init amd_iommu_enable_faulting(void) */ static int __init amd_iommu_init(void) { + struct amd_iommu *iommu; int ret; ret = iommu_go_to_state(IOMMU_INITIALIZED); @@ -2730,14 +2752,15 @@ static int __init amd_iommu_init(void) disable_iommus(); free_iommu_resources(); } else { - struct amd_iommu *iommu; - uninit_device_table_dma(); for_each_iommu(iommu) iommu_flush_all_caches(iommu); } } + for_each_iommu(iommu) + amd_iommu_debugfs_setup(iommu); + return ret; } diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h @@ -33,6 +33,12 @@ extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); extern int amd_iommu_init_api(void); +#ifdef CONFIG_AMD_IOMMU_DEBUGFS +void amd_iommu_debugfs_setup(struct amd_iommu *iommu); +#else +static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {} +#endif + /* Needed for interrupt remapping */ extern int amd_iommu_prepare(void); extern int amd_iommu_enable(void); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h @@ -161,6 +161,7 @@ #define CONTROL_GAM_EN 0x19ULL #define CONTROL_GALOG_EN 0x1CULL #define CONTROL_GAINT_EN 0x1DULL +#define CONTROL_XT_EN 0x32ULL #define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) #define CTRL_INV_TO_NONE 0 @@ -378,9 +379,11 @@ #define IOMMU_CAP_EFR 27 /* IOMMU Feature Reporting Field (for IVHD type 10h */ +#define IOMMU_FEAT_XTSUP_SHIFT 0 #define IOMMU_FEAT_GASUP_SHIFT 6 /* IOMMU Extended Feature Register (EFR) */ +#define IOMMU_EFR_XTSUP_SHIFT 2 #define IOMMU_EFR_GASUP_SHIFT 7 #define MAX_DOMAIN_ID 65536 @@ -437,7 +440,6 @@ extern struct kmem_cache *amd_iommu_irq_cache; #define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) - /* * This struct is used to pass information about * incoming PPR faults around. @@ -594,6 +596,11 @@ struct amd_iommu { u32 flags; volatile u64 __aligned(8) cmd_sem; + +#ifdef CONFIG_AMD_IOMMU_DEBUGFS + /* DebugFS Info */ + struct dentry *debugfs; +#endif }; static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev) @@ -810,6 +817,9 @@ union irte { } fields; }; +#define APICID_TO_IRTE_DEST_LO(x) (x & 0xffffff) +#define APICID_TO_IRTE_DEST_HI(x) ((x >> 24) & 0xff) + union irte_ga_lo { u64 val; @@ -823,8 +833,8 @@ union irte_ga_lo { dm : 1, /* ------ */ guest_mode : 1, - destination : 8, - rsvd : 48; + destination : 24, + ga_tag : 32; } fields_remap; /* For guest vAPIC */ @@ -837,8 +847,7 @@ union irte_ga_lo { is_run : 1, /* ------ */ guest_mode : 1, - destination : 8, - rsvd2 : 16, + destination : 24, ga_tag : 32; } fields_vapic; }; @@ -849,7 +858,8 @@ union irte_ga_hi { u64 vector : 8, rsvd_1 : 4, ga_root_ptr : 40, - rsvd_2 : 12; + rsvd_2 : 4, + destination : 8; } fields; }; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c @@ -24,6 +24,7 @@ #include <linux/acpi_iort.h> #include <linux/bitfield.h> #include <linux/bitops.h> +#include <linux/crash_dump.h> #include <linux/delay.h> #include <linux/dma-iommu.h> #include <linux/err.h> @@ -366,7 +367,7 @@ #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 -static bool disable_bypass; +static bool disable_bypass = 1; module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); MODULE_PARM_DESC(disable_bypass, "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU."); @@ -1301,6 +1302,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) /* Sync our overflow flag, as we believe we're up to speed */ q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons); + writel(q->cons, q->cons_reg); return IRQ_HANDLED; } @@ -1997,7 +1999,6 @@ static struct iommu_ops arm_smmu_ops = { .attach_dev = arm_smmu_attach_dev, .map = arm_smmu_map, .unmap = arm_smmu_unmap, - .map_sg = default_iommu_map_sg, .flush_iotlb_all = arm_smmu_iotlb_sync, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, @@ -2211,8 +2212,12 @@ static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr) reg &= ~clr; reg |= set; writel_relaxed(reg | GBPA_UPDATE, gbpa); - return readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE), - 1, ARM_SMMU_POLL_TIMEOUT_US); + ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE), + 1, ARM_SMMU_POLL_TIMEOUT_US); + + if (ret) + dev_err(smmu->dev, "GBPA not responding to update\n"); + return ret; } static void arm_smmu_free_msis(void *data) @@ -2392,8 +2397,15 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) /* Clear CR0 and sync (disables SMMU and queue processing) */ reg = readl_relaxed(smmu->base + ARM_SMMU_CR0); - if (reg & CR0_SMMUEN) + if (reg & CR0_SMMUEN) { + if (is_kdump_kernel()) { + arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0); + arm_smmu_device_disable(smmu); + return -EBUSY; + } + dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n"); + } ret = arm_smmu_device_disable(smmu); if (ret) @@ -2491,10 +2503,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) enables |= CR0_SMMUEN; } else { ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT); - if (ret) { - dev_err(smmu->dev, "GBPA not responding to update\n"); + if (ret) return ret; - } } ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, ARM_SMMU_CR0ACK); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c @@ -1562,7 +1562,6 @@ static struct iommu_ops arm_smmu_ops = { .attach_dev = arm_smmu_attach_dev, .map = arm_smmu_map, .unmap = arm_smmu_unmap, - .map_sg = default_iommu_map_sg, .flush_iotlb_all = arm_smmu_iotlb_sync, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, @@ -2103,12 +2102,16 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (err) return err; - if (smmu->version == ARM_SMMU_V2 && - smmu->num_context_banks != smmu->num_context_irqs) { - dev_err(dev, - "found only %d context interrupt(s) but %d required\n", - smmu->num_context_irqs, smmu->num_context_banks); - return -ENODEV; + if (smmu->version == ARM_SMMU_V2) { + if (smmu->num_context_banks > smmu->num_context_irqs) { + dev_err(dev, + "found only %d context irq(s) but %d required\n", + smmu->num_context_irqs, smmu->num_context_banks); + return -ENODEV; + } + + /* Ignore superfluous interrupts */ + smmu->num_context_irqs = smmu->num_context_banks; } for (i = 0; i < smmu->num_global_irqs; ++i) { diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c @@ -1339,8 +1339,8 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, qi_submit_sync(&desc, iommu); } -void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, - u64 addr, unsigned mask) +void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u16 qdep, u64 addr, unsigned mask) { struct qi_desc desc; @@ -1355,7 +1355,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, qdep = 0; desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | - QI_DIOTLB_TYPE; + QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); qi_submit_sync(&desc, iommu); } diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c @@ -1332,7 +1332,6 @@ static const struct iommu_ops exynos_iommu_ops = { .detach_dev = exynos_iommu_detach_device, .map = exynos_iommu_map, .unmap = exynos_iommu_unmap, - .map_sg = default_iommu_map_sg, .iova_to_phys = exynos_iommu_iova_to_phys, .device_group = generic_device_group, .add_device = exynos_iommu_add_device, diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c @@ -52,6 +52,7 @@ #include <asm/iommu.h> #include "irq_remapping.h" +#include "intel-pasid.h" #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE @@ -379,60 +380,6 @@ static int hw_pass_through = 1; for (idx = 0; idx < g_num_of_iommus; idx++) \ if (domain->iommu_refcnt[idx]) -struct dmar_domain { - int nid; /* node id */ - - unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED]; - /* Refcount of devices per iommu */ - - - u16 iommu_did[DMAR_UNITS_SUPPORTED]; - /* Domain ids per IOMMU. Use u16 since - * domain ids are 16 bit wide according - * to VT-d spec, section 9.3 */ - - bool has_iotlb_device; - struct list_head devices; /* all devices' list */ - struct iova_domain iovad; /* iova's that belong to this domain */ - - struct dma_pte *pgd; /* virtual address */ - int gaw; /* max guest address width */ - - /* adjusted guest address width, 0 is level 2 30-bit */ - int agaw; - - int flags; /* flags to find out type of domain */ - - int iommu_coherency;/* indicate coherency of iommu access */ - int iommu_snooping; /* indicate snooping control feature*/ - int iommu_count; /* reference count of iommu */ - int iommu_superpage;/* Level of superpages supported: - 0 == 4KiB (no superpages), 1 == 2MiB, - 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ - u64 max_addr; /* maximum mapped address */ - - struct iommu_domain domain; /* generic domain data structure for - iommu core */ -}; - -/* PCI domain-device relationship */ -struct device_domain_info { - struct list_head link; /* link to domain siblings */ - struct list_head global; /* link to global list */ - u8 bus; /* PCI bus number */ - u8 devfn; /* PCI devfn number */ - u8 pasid_supported:3; - u8 pasid_enabled:1; - u8 pri_supported:1; - u8 pri_enabled:1; - u8 ats_supported:1; - u8 ats_enabled:1; - u8 ats_qdep; - struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ - struct intel_iommu *iommu; /* IOMMU used by this device */ - struct dmar_domain *domain; /* pointer to domain */ -}; - struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -523,6 +470,27 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); static DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); +/* + * Iterate over elements in device_domain_list and call the specified + * callback @fn against each element. This helper should only be used + * in the context where the device_domain_lock has already been holden. + */ +int for_each_device_domain(int (*fn)(struct device_domain_info *info, + void *data), void *data) +{ + int ret = 0; + struct device_domain_info *info; + + assert_spin_locked(&device_domain_lock); + list_for_each_entry(info, &device_domain_list, global) { + ret = fn(info, data); + if (ret) + return ret; + } + + return 0; +} + const struct iommu_ops intel_iommu_ops; static bool translation_pre_enabled(struct intel_iommu *iommu) @@ -629,7 +597,7 @@ static void set_iommu_domain(struct intel_iommu *iommu, u16 did, domains[did & 0xff] = domain; } -static inline void *alloc_pgtable_page(int node) +void *alloc_pgtable_page(int node) { struct page *page; void *vaddr = NULL; @@ -640,7 +608,7 @@ static inline void *alloc_pgtable_page(int node) return vaddr; } -static inline void free_pgtable_page(void *vaddr) +void free_pgtable_page(void *vaddr) { free_page((unsigned long)vaddr); } @@ -723,7 +691,7 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) } /* This functionin only returns single iommu in a domain */ -static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) +struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) { int iommu_id; @@ -1501,6 +1469,20 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) return; pdev = to_pci_dev(info->dev); + /* For IOMMU that supports device IOTLB throttling (DIT), we assign + * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge + * queue depth at PF level. If DIT is not set, PFSID will be treated as + * reserved, which should be set to 0. + */ + if (!ecap_dit(info->iommu->ecap)) + info->pfsid = 0; + else { + struct pci_dev *pf_pdev; + + /* pdev will be returned if device is not a vf */ + pf_pdev = pci_physfn(pdev); + info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn); + } #ifdef CONFIG_INTEL_IOMMU_SVM /* The PCIe spec, in its wisdom, declares that the behaviour of @@ -1566,7 +1548,8 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, sid = info->bus << 8 | info->devfn; qdep = info->ats_qdep; - qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, addr, mask); } spin_unlock_irqrestore(&device_domain_lock, flags); } @@ -1800,7 +1783,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu) if (pasid_enabled(iommu)) { if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); - intel_svm_free_pasid_tables(iommu); + intel_svm_exit(iommu); } #endif } @@ -2495,6 +2478,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->dev = dev; info->domain = domain; info->iommu = iommu; + info->pasid_table = NULL; if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); @@ -2552,6 +2536,15 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, list_add(&info->global, &device_domain_list); if (dev) dev->archdata.iommu = info; + + if (dev && dev_is_pci(dev) && info->pasid_supported) { + ret = intel_pasid_alloc_table(dev); + if (ret) { + __dmar_remove_one_dev_info(info); + spin_unlock_irqrestore(&device_domain_lock, flags); + return NULL; + } + } spin_unlock_irqrestore(&device_domain_lock, flags); if (dev && domain_context_mapping(domain, dev)) { @@ -3304,6 +3297,18 @@ static int __init init_dmars(void) } for_each_active_iommu(iommu, drhd) { + /* + * Find the max pasid size of all IOMMU's in the system. + * We need to ensure the system pasid table is no bigger + * than the smallest supported. + */ + if (pasid_enabled(iommu)) { + u32 temp = 2 << ecap_pss(iommu->ecap); + + intel_pasid_max_id = min_t(u32, temp, + intel_pasid_max_id); + } + g_iommus[iommu->seq_id] = iommu; intel_iommu_init_qi(iommu); @@ -3359,7 +3364,7 @@ static int __init init_dmars(void) hw_pass_through = 0; #ifdef CONFIG_INTEL_IOMMU_SVM if (pasid_enabled(iommu)) - intel_svm_alloc_pasid_tables(iommu); + intel_svm_init(iommu); #endif } @@ -3526,7 +3531,7 @@ static unsigned long intel_alloc_iova(struct device *dev, return iova_pfn; } -static struct dmar_domain *get_valid_domain_for_dev(struct device *dev) +struct dmar_domain *get_valid_domain_for_dev(struct device *dev) { struct dmar_domain *domain, *tmp; struct dmar_rmrr_unit *rmrr; @@ -4354,7 +4359,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) #ifdef CONFIG_INTEL_IOMMU_SVM if (pasid_enabled(iommu)) - intel_svm_alloc_pasid_tables(iommu); + intel_svm_init(iommu); #endif if (dmaru->ignored) { @@ -4906,6 +4911,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) if (info->dev) { iommu_disable_dev_iotlb(info); domain_context_clear(iommu, info->dev); + intel_pasid_free_table(info->dev); } unlink_domain_info(info); @@ -5231,22 +5237,16 @@ static void intel_iommu_put_resv_regions(struct device *dev, #ifdef CONFIG_INTEL_IOMMU_SVM #define MAX_NR_PASID_BITS (20) -static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu) +static inline unsigned long intel_iommu_get_pts(struct device *dev) { - /* - * Convert ecap_pss to extend context entry pts encoding, also - * respect the soft pasid_max value set by the iommu. - * - number of PASID bits = ecap_pss + 1 - * - number of PASID table entries = 2^(pts + 5) - * Therefore, pts = ecap_pss - 4 - * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15 - */ - if (ecap_pss(iommu->ecap) < 5) + int pts, max_pasid; + + max_pasid = intel_pasid_get_dev_max_id(dev); + pts = find_first_bit((unsigned long *)&max_pasid, MAX_NR_PASID_BITS); + if (pts < 5) return 0; - /* pasid_max is encoded as actual number of entries not the bits */ - return find_first_bit((unsigned long *)&iommu->pasid_max, - MAX_NR_PASID_BITS) - 5; + return pts - 5; } int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) @@ -5282,8 +5282,8 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd if (!(ctx_lo & CONTEXT_PASIDE)) { if (iommu->pasid_state_table) context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table); - context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | - intel_iommu_get_pts(iommu); + context[1].lo = (u64)virt_to_phys(info->pasid_table->table) | + intel_iommu_get_pts(sdev->dev); wmb(); /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both @@ -5350,11 +5350,6 @@ struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) return NULL; } - if (!iommu->pasid_table) { - dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n"); - return NULL; - } - return iommu; } #endif /* CONFIG_INTEL_IOMMU_SVM */ @@ -5367,7 +5362,6 @@ const struct iommu_ops intel_iommu_ops = { .detach_dev = intel_iommu_detach_device, .map = intel_iommu_map, .unmap = intel_iommu_unmap, - .map_sg = default_iommu_map_sg, .iova_to_phys = intel_iommu_iova_to_phys, .add_device = intel_iommu_add_device, .remove_device = intel_iommu_remove_device, diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 +/** + * intel-pasid.c - PASID idr, table and entry manipulation + * + * Copyright (C) 2018 Intel Corporation + * + * Author: Lu Baolu <baolu.lu@linux.intel.com> + */ + +#define pr_fmt(fmt) "DMAR: " fmt + +#include <linux/dmar.h> +#include <linux/intel-iommu.h> +#include <linux/iommu.h> +#include <linux/memory.h> +#include <linux/pci.h> +#include <linux/pci-ats.h> +#include <linux/spinlock.h> + +#include "intel-pasid.h" + +/* + * Intel IOMMU system wide PASID name space: + */ +static DEFINE_SPINLOCK(pasid_lock); +u32 intel_pasid_max_id = PASID_MAX; +static DEFINE_IDR(pasid_idr); + +int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp) +{ + int ret, min, max; + + min = max_t(int, start, PASID_MIN); + max = min_t(int, end, intel_pasid_max_id); + + WARN_ON(in_interrupt()); + idr_preload(gfp); + spin_lock(&pasid_lock); + ret = idr_alloc(&pasid_idr, ptr, min, max, GFP_ATOMIC); + spin_unlock(&pasid_lock); + idr_preload_end(); + + return ret; +} + +void intel_pasid_free_id(int pasid) +{ + spin_lock(&pasid_lock); + idr_remove(&pasid_idr, pasid); + spin_unlock(&pasid_lock); +} + +void *intel_pasid_lookup_id(int pasid) +{ + void *p; + + spin_lock(&pasid_lock); + p = idr_find(&pasid_idr, pasid); + spin_unlock(&pasid_lock); + + return p; +} + +/* + * Per device pasid table management: + */ +static inline void +device_attach_pasid_table(struct device_domain_info *info, + struct pasid_table *pasid_table) +{ + info->pasid_table = pasid_table; + list_add(&info->table, &pasid_table->dev); +} + +static inline void +device_detach_pasid_table(struct device_domain_info *info, + struct pasid_table *pasid_table) +{ + info->pasid_table = NULL; + list_del(&info->table); +} + +struct pasid_table_opaque { + struct pasid_table **pasid_table; + int segment; + int bus; + int devfn; +}; + +static int search_pasid_table(struct device_domain_info *info, void *opaque) +{ + struct pasid_table_opaque *data = opaque; + + if (info->iommu->segment == data->segment && + info->bus == data->bus && + info->devfn == data->devfn && + info->pasid_table) { + *data->pasid_table = info->pasid_table; + return 1; + } + + return 0; +} + +static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque) +{ + struct pasid_table_opaque *data = opaque; + + data->segment = pci_domain_nr(pdev->bus); + data->bus = PCI_BUS_NUM(alias); + data->devfn = alias & 0xff; + + return for_each_device_domain(&search_pasid_table, data); +} + +/* + * Allocate a pasid table for @dev. It should be called in a + * single-thread context. + */ +int intel_pasid_alloc_table(struct device *dev) +{ + struct device_domain_info *info; + struct pasid_table *pasid_table; + struct pasid_table_opaque data; + struct page *pages; + size_t size, count; + int ret, order; + + info = dev->archdata.iommu; + if (WARN_ON(!info || !dev_is_pci(dev) || + !info->pasid_supported || info->pasid_table)) + return -EINVAL; + + /* DMA alias device already has a pasid table, use it: */ + data.pasid_table = &pasid_table; + ret = pci_for_each_dma_alias(to_pci_dev(dev), + &get_alias_pasid_table, &data); + if (ret) + goto attach_out; + + pasid_table = kzalloc(sizeof(*pasid_table), GFP_ATOMIC); + if (!pasid_table) + return -ENOMEM; + INIT_LIST_HEAD(&pasid_table->dev); + + size = sizeof(struct pasid_entry); + count = min_t(int, pci_max_pasids(to_pci_dev(dev)), intel_pasid_max_id); + order = get_order(size * count); + pages = alloc_pages_node(info->iommu->node, + GFP_ATOMIC | __GFP_ZERO, + order); + if (!pages) + return -ENOMEM; + + pasid_table->table = page_address(pages); + pasid_table->order = order; + pasid_table->max_pasid = count; + +attach_out: + device_attach_pasid_table(info, pasid_table); + + return 0; +} + +void intel_pasid_free_table(struct device *dev) +{ + struct device_domain_info *info; + struct pasid_table *pasid_table; + + info = dev->archdata.iommu; + if (!info || !dev_is_pci(dev) || + !info->pasid_supported || !info->pasid_table) + return; + + pasid_table = info->pasid_table; + device_detach_pasid_table(info, pasid_table); + + if (!list_empty(&pasid_table->dev)) + return; + + free_pages((unsigned long)pasid_table->table, pasid_table->order); + kfree(pasid_table); +} + +struct pasid_table *intel_pasid_get_table(struct device *dev) +{ + struct device_domain_info *info; + + info = dev->archdata.iommu; + if (!info) + return NULL; + + return info->pasid_table; +} + +int intel_pasid_get_dev_max_id(struct device *dev) +{ + struct device_domain_info *info; + + info = dev->archdata.iommu; + if (!info || !info->pasid_table) + return 0; + + return info->pasid_table->max_pasid; +} + +struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) +{ + struct pasid_table *pasid_table; + struct pasid_entry *entries; + + pasid_table = intel_pasid_get_table(dev); + if (WARN_ON(!pasid_table || pasid < 0 || + pasid >= intel_pasid_get_dev_max_id(dev))) + return NULL; + + entries = pasid_table->table; + + return &entries[pasid]; +} + +/* + * Interfaces for PASID table entry manipulation: + */ +static inline void pasid_clear_entry(struct pasid_entry *pe) +{ + WRITE_ONCE(pe->val, 0); +} + +void intel_pasid_clear_entry(struct device *dev, int pasid) +{ + struct pasid_entry *pe; + + pe = intel_pasid_get_entry(dev, pasid); + if (WARN_ON(!pe)) + return; + + pasid_clear_entry(pe); +} diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * intel-pasid.h - PASID idr, table and entry header + * + * Copyright (C) 2018 Intel Corporation + * + * Author: Lu Baolu <baolu.lu@linux.intel.com> + */ + +#ifndef __INTEL_PASID_H +#define __INTEL_PASID_H + +#define PASID_MIN 0x1 +#define PASID_MAX 0x100000 + +struct pasid_entry { + u64 val; +}; + +/* The representative of a PASID table */ +struct pasid_table { + void *table; /* pasid table pointer */ + int order; /* page order of pasid table */ + int max_pasid; /* max pasid */ + struct list_head dev; /* device list */ +}; + +extern u32 intel_pasid_max_id; +int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp); +void intel_pasid_free_id(int pasid); +void *intel_pasid_lookup_id(int pasid); +int intel_pasid_alloc_table(struct device *dev); +void intel_pasid_free_table(struct device *dev); +struct pasid_table *intel_pasid_get_table(struct device *dev); +int intel_pasid_get_dev_max_id(struct device *dev); +struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid); +void intel_pasid_clear_entry(struct device *dev, int pasid); + +#endif /* __INTEL_PASID_H */ diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c @@ -27,21 +27,19 @@ #include <linux/mm_types.h> #include <asm/page.h> +#include "intel-pasid.h" + #define PASID_ENTRY_P BIT_ULL(0) #define PASID_ENTRY_FLPM_5LP BIT_ULL(9) #define PASID_ENTRY_SRE BIT_ULL(11) static irqreturn_t prq_event_thread(int irq, void *d); -struct pasid_entry { - u64 val; -}; - struct pasid_state_entry { u64 val; }; -int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) +int intel_svm_init(struct intel_iommu *iommu) { struct page *pages; int order; @@ -66,15 +64,6 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) iommu->pasid_max = 0x20000; order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!pages) { - pr_warn("IOMMU: %s: Failed to allocate PASID table\n", - iommu->name); - return -ENOMEM; - } - iommu->pasid_table = page_address(pages); - pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order); - if (ecap_dis(iommu->ecap)) { /* Just making it explicit... */ BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry)); @@ -86,24 +75,18 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) iommu->name); } - idr_init(&iommu->pasid_idr); - return 0; } -int intel_svm_free_pasid_tables(struct intel_iommu *iommu) +int intel_svm_exit(struct intel_iommu *iommu) { int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); - if (iommu->pasid_table) { - free_pages((unsigned long)iommu->pasid_table, order); - iommu->pasid_table = NULL; - } if (iommu->pasid_state_table) { free_pages((unsigned long)iommu->pasid_state_table, order); iommu->pasid_state_table = NULL; } - idr_destroy(&iommu->pasid_idr); + return 0; } @@ -279,11 +262,9 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) * page) so that we end up taking a fault that the hardware really * *has* to handle gracefully without affecting other processes. */ - svm->iommu->pasid_table[svm->pasid].val = 0; - wmb(); - rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) { + intel_pasid_clear_entry(sdev->dev, svm->pasid); intel_flush_pasid_dev(svm, sdev, svm->pasid); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); } @@ -299,10 +280,12 @@ static const struct mmu_notifier_ops intel_mmuops = { }; static DEFINE_MUTEX(pasid_mutex); +static LIST_HEAD(global_svm_list); int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct pasid_entry *entry; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; struct mm_struct *mm = NULL; @@ -310,7 +293,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ int pasid_max; int ret; - if (WARN_ON(!iommu || !iommu->pasid_table)) + if (!iommu) return -EINVAL; if (dev_is_pci(dev)) { @@ -330,13 +313,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ mutex_lock(&pasid_mutex); if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) { - int i; + struct intel_svm *t; - idr_for_each_entry(&iommu->pasid_idr, svm, i) { - if (svm->mm != mm || - (svm->flags & SVM_FLAG_PRIVATE_PASID)) + list_for_each_entry(t, &global_svm_list, list) { + if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID)) continue; + svm = t; if (svm->pasid >= pasid_max) { dev_warn(dev, "Limited PASID width. Cannot use existing PASID %d\n", @@ -388,13 +371,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } svm->iommu = iommu; - if (pasid_max > iommu->pasid_max) - pasid_max = iommu->pasid_max; + if (pasid_max > intel_pasid_max_id) + pasid_max = intel_pasid_max_id; /* Do not use PASID 0 in caching mode (virtualised IOMMU) */ - ret = idr_alloc(&iommu->pasid_idr, svm, - !!cap_caching_mode(iommu->cap), - pasid_max - 1, GFP_KERNEL); + ret = intel_pasid_alloc_id(svm, + !!cap_caching_mode(iommu->cap), + pasid_max - 1, GFP_KERNEL); if (ret < 0) { kfree(svm); kfree(sdev); @@ -405,11 +388,12 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ svm->mm = mm; svm->flags = flags; INIT_LIST_HEAD_RCU(&svm->devs); + INIT_LIST_HEAD(&svm->list); ret = -ENOMEM; if (mm) { ret = mmu_notifier_register(&svm->notifier, mm); if (ret) { - idr_remove(&svm->iommu->pasid_idr, svm->pasid); + intel_pasid_free_id(svm->pasid); kfree(svm); kfree(sdev); goto out; @@ -421,7 +405,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ if (cpu_feature_enabled(X86_FEATURE_LA57)) pasid_entry_val |= PASID_ENTRY_FLPM_5LP; - iommu->pasid_table[svm->pasid].val = pasid_entry_val; + entry = intel_pasid_get_entry(dev, svm->pasid); + entry->val = pasid_entry_val; wmb(); @@ -431,6 +416,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ */ if (cap_caching_mode(iommu->cap)) intel_flush_pasid_dev(svm, sdev, svm->pasid); + + list_add_tail(&svm->list, &global_svm_list); } list_add_rcu(&sdev->list, &svm->devs); @@ -454,10 +441,10 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) mutex_lock(&pasid_mutex); iommu = intel_svm_device_to_iommu(dev); - if (!iommu || !iommu->pasid_table) + if (!iommu) goto out; - svm = idr_find(&iommu->pasid_idr, pasid); + svm = intel_pasid_lookup_id(pasid); if (!svm) goto out; @@ -477,15 +464,15 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) intel_flush_pasid_dev(svm, sdev, svm->pasid); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); kfree_rcu(sdev, rcu); + intel_pasid_clear_entry(dev, svm->pasid); if (list_empty(&svm->devs)) { - svm->iommu->pasid_table[svm->pasid].val = 0; - wmb(); - - idr_remove(&svm->iommu->pasid_idr, svm->pasid); + intel_pasid_free_id(svm->pasid); if (svm->mm) mmu_notifier_unregister(&svm->notifier, svm->mm); + list_del(&svm->list); + /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. * If that is not obeyed, subtle errors will happen. @@ -512,10 +499,10 @@ int intel_svm_is_pasid_valid(struct device *dev, int pasid) mutex_lock(&pasid_mutex); iommu = intel_svm_device_to_iommu(dev); - if (!iommu || !iommu->pasid_table) + if (!iommu) goto out; - svm = idr_find(&iommu->pasid_idr, pasid); + svm = intel_pasid_lookup_id(pasid); if (!svm) goto out; @@ -614,7 +601,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (!svm || svm->pasid != req->pasid) { rcu_read_lock(); - svm = idr_find(&iommu->pasid_idr, req->pasid); + svm = intel_pasid_lookup_id(req->pasid); /* It *can't* go away, because the driver is not permitted * to unbind the mm while any page faults are outstanding. * So we only need RCU to protect the internal idr code. */ diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c @@ -192,6 +192,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, { struct io_pgtable_cfg *cfg = &data->iop.cfg; struct device *dev = cfg->iommu_dev; + phys_addr_t phys; dma_addr_t dma; size_t size = ARM_V7S_TABLE_SIZE(lvl); void *table = NULL; @@ -200,6 +201,10 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size)); else if (lvl == 2) table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA); + phys = virt_to_phys(table); + if (phys != (arm_v7s_iopte)phys) + /* Doesn't fit in PTE */ + goto out_free; if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) @@ -209,7 +214,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, * address directly, so if the DMA layer suggests otherwise by * translating or truncating them, that bodes very badly... */ - if (dma != virt_to_phys(table)) + if (dma != phys) goto out_unmap; } kmemleak_ignore(table); diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c @@ -237,7 +237,8 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, void *pages; VM_BUG_ON((gfp & __GFP_HIGHMEM)); - p = alloc_pages_node(dev_to_node(dev), gfp | __GFP_ZERO, order); + p = alloc_pages_node(dev ? dev_to_node(dev) : NUMA_NO_NODE, + gfp | __GFP_ZERO, order); if (!p) return NULL; diff --git a/drivers/iommu/iommu-debugfs.c b/drivers/iommu/iommu-debugfs.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * IOMMU debugfs core infrastructure + * + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Author: Gary R Hook <gary.hook@amd.com> + */ + +#include <linux/pci.h> +#include <linux/iommu.h> +#include <linux/debugfs.h> + +struct dentry *iommu_debugfs_dir; + +/** + * iommu_debugfs_setup - create the top-level iommu directory in debugfs + * + * Provide base enablement for using debugfs to expose internal data of an + * IOMMU driver. When called, this function creates the + * /sys/kernel/debug/iommu directory. + * + * Emit a strong warning at boot time to indicate that this feature is + * enabled. + * + * This function is called from iommu_init; drivers may then call + * iommu_debugfs_new_driver_dir() to instantiate a vendor-specific + * directory to be used to expose internal data. + */ +void iommu_debugfs_setup(void) +{ + if (!iommu_debugfs_dir) { + iommu_debugfs_dir = debugfs_create_dir("iommu", NULL); + pr_warn("\n"); + pr_warn("*************************************************************\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("** **\n"); + pr_warn("** IOMMU DebugFS SUPPORT HAS BEEN ENABLED IN THIS KERNEL **\n"); + pr_warn("** **\n"); + pr_warn("** This means that this kernel is built to expose internal **\n"); + pr_warn("** IOMMU data structures, which may compromise security on **\n"); + pr_warn("** your system. **\n"); + pr_warn("** **\n"); + pr_warn("** If you see this message and you are not debugging the **\n"); + pr_warn("** kernel, report this immediately to your vendor! **\n"); + pr_warn("** **\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("*************************************************************\n"); + } +} + +/** + * iommu_debugfs_new_driver_dir - create a vendor directory under debugfs/iommu + * @vendor: name of the vendor-specific subdirectory to create + * + * This function is called by an IOMMU driver to create the top-level debugfs + * directory for that driver. + * + * Return: upon success, a pointer to the dentry for the new directory. + * NULL in case of failure. + */ +struct dentry *iommu_debugfs_new_driver_dir(const char *vendor) +{ + return debugfs_create_dir(vendor, iommu_debugfs_dir); +} +EXPORT_SYMBOL_GPL(iommu_debugfs_new_driver_dir); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c @@ -36,7 +36,11 @@ static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); +#ifdef CONFIG_IOMMU_DEFAULT_PASSTHROUGH +static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; +#else static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA; +#endif struct iommu_callback_data { const struct iommu_ops *ops; @@ -294,11 +298,39 @@ static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, return (str - buf); } +static ssize_t iommu_group_show_type(struct iommu_group *group, + char *buf) +{ + char *type = "unknown\n"; + + if (group->default_domain) { + switch (group->default_domain->type) { + case IOMMU_DOMAIN_BLOCKED: + type = "blocked\n"; + break; + case IOMMU_DOMAIN_IDENTITY: + type = "identity\n"; + break; + case IOMMU_DOMAIN_UNMANAGED: + type = "unmanaged\n"; + break; + case IOMMU_DOMAIN_DMA: + type = "DMA"; + break; + } + } + strcpy(buf, type); + + return strlen(type); +} + static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); static IOMMU_GROUP_ATTR(reserved_regions, 0444, iommu_group_show_resv_regions, NULL); +static IOMMU_GROUP_ATTR(type, 0444, iommu_group_show_type, NULL); + static void iommu_group_release(struct kobject *kobj) { struct iommu_group *group = to_iommu_group(kobj); @@ -380,6 +412,10 @@ struct iommu_group *iommu_group_alloc(void) if (ret) return ERR_PTR(ret); + ret = iommu_group_create_file(group, &iommu_group_attr_type); + if (ret) + return ERR_PTR(ret); + pr_debug("Allocated group %d\n", group->id); return group; @@ -1637,8 +1673,8 @@ size_t iommu_unmap_fast(struct iommu_domain *domain, } EXPORT_SYMBOL_GPL(iommu_unmap_fast); -size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot) +size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, + struct scatterlist *sg, unsigned int nents, int prot) { struct scatterlist *s; size_t mapped = 0; @@ -1678,7 +1714,7 @@ out_err: return 0; } -EXPORT_SYMBOL_GPL(default_iommu_map_sg); +EXPORT_SYMBOL_GPL(iommu_map_sg); int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, u64 size, int prot) @@ -1748,6 +1784,8 @@ static int __init iommu_init(void) NULL, kernel_kobj); BUG_ON(!iommu_group_kset); + iommu_debugfs_setup(); + return 0; } core_initcall(iommu_init); diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c @@ -47,6 +47,7 @@ struct ipmmu_features { unsigned int number_of_contexts; bool setup_imbuscr; bool twobit_imttbcr_sl0; + bool reserved_context; }; struct ipmmu_vmsa_device { @@ -73,7 +74,7 @@ struct ipmmu_vmsa_domain { struct io_pgtable_ops *iop; unsigned int context_id; - spinlock_t lock; /* Protects mappings */ + struct mutex mutex; /* Protects mappings */ }; static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) @@ -194,7 +195,9 @@ static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev) #define IMPMBA(n) (0x0280 + ((n) * 4)) #define IMPMBD(n) (0x02c0 + ((n) * 4)) -#define IMUCTR(n) (0x0300 + ((n) * 16)) +#define IMUCTR(n) ((n) < 32 ? IMUCTR0(n) : IMUCTR32(n)) +#define IMUCTR0(n) (0x0300 + ((n) * 16)) +#define IMUCTR32(n) (0x0600 + (((n) - 32) * 16)) #define IMUCTR_FIXADDEN (1 << 31) #define IMUCTR_FIXADD_MASK (0xff << 16) #define IMUCTR_FIXADD_SHIFT 16 @@ -204,7 +207,9 @@ static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev) #define IMUCTR_FLUSH (1 << 1) #define IMUCTR_MMUEN (1 << 0) -#define IMUASID(n) (0x0308 + ((n) * 16)) +#define IMUASID(n) ((n) < 32 ? IMUASID0(n) : IMUASID32(n)) +#define IMUASID0(n) (0x0308 + ((n) * 16)) +#define IMUASID32(n) (0x0608 + (((n) - 32) * 16)) #define IMUASID_ASID8_MASK (0xff << 8) #define IMUASID_ASID8_SHIFT 8 #define IMUASID_ASID0_MASK (0xff << 0) @@ -595,7 +600,7 @@ static struct iommu_domain *__ipmmu_domain_alloc(unsigned type) if (!domain) return NULL; - spin_lock_init(&domain->lock); + mutex_init(&domain->mutex); return &domain->io_domain; } @@ -641,7 +646,6 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, struct iommu_fwspec *fwspec = dev->iommu_fwspec; struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); - unsigned long flags; unsigned int i; int ret = 0; @@ -650,7 +654,7 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, return -ENXIO; } - spin_lock_irqsave(&domain->lock, flags); + mutex_lock(&domain->mutex); if (!domain->mmu) { /* The domain hasn't been used yet, initialize it. */ @@ -674,7 +678,7 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, } else dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id); - spin_unlock_irqrestore(&domain->lock, flags); + mutex_unlock(&domain->mutex); if (ret < 0) return ret; @@ -756,8 +760,12 @@ static bool ipmmu_slave_whitelist(struct device *dev) return false; } -static const struct soc_device_attribute soc_r8a7795[] = { +static const struct soc_device_attribute soc_rcar_gen3[] = { { .soc_id = "r8a7795", }, + { .soc_id = "r8a7796", }, + { .soc_id = "r8a77965", }, + { .soc_id = "r8a77970", }, + { .soc_id = "r8a77995", }, { /* sentinel */ } }; @@ -765,7 +773,7 @@ static int ipmmu_of_xlate(struct device *dev, struct of_phandle_args *spec) { /* For R-Car Gen3 use a white list to opt-in slave devices */ - if (soc_device_match(soc_r8a7795) && !ipmmu_slave_whitelist(dev)) + if (soc_device_match(soc_rcar_gen3) && !ipmmu_slave_whitelist(dev)) return -ENODEV; iommu_fwspec_add_ids(dev, spec->args, 1); @@ -889,7 +897,6 @@ static const struct iommu_ops ipmmu_ops = { .unmap = ipmmu_unmap, .flush_iotlb_all = ipmmu_iotlb_sync, .iotlb_sync = ipmmu_iotlb_sync, - .map_sg = default_iommu_map_sg, .iova_to_phys = ipmmu_iova_to_phys, .add_device = ipmmu_add_device, .remove_device = ipmmu_remove_device, @@ -917,14 +924,16 @@ static const struct ipmmu_features ipmmu_features_default = { .number_of_contexts = 1, /* software only tested with one context */ .setup_imbuscr = true, .twobit_imttbcr_sl0 = false, + .reserved_context = false, }; -static const struct ipmmu_features ipmmu_features_r8a7795 = { +static const struct ipmmu_features ipmmu_features_rcar_gen3 = { .use_ns_alias_offset = false, .has_cache_leaf_nodes = true, .number_of_contexts = 8, .setup_imbuscr = false, .twobit_imttbcr_sl0 = true, + .reserved_context = true, }; static const struct of_device_id ipmmu_of_ids[] = { @@ -933,7 +942,19 @@ static const struct of_device_id ipmmu_of_ids[] = { .data = &ipmmu_features_default, }, { .compatible = "renesas,ipmmu-r8a7795", - .data = &ipmmu_features_r8a7795, + .data = &ipmmu_features_rcar_gen3, + }, { + .compatible = "renesas,ipmmu-r8a7796", + .data = &ipmmu_features_rcar_gen3, + }, { + .compatible = "renesas,ipmmu-r8a77965", + .data = &ipmmu_features_rcar_gen3, + }, { + .compatible = "renesas,ipmmu-r8a77970", + .data = &ipmmu_features_rcar_gen3, + }, { + .compatible = "renesas,ipmmu-r8a77995", + .data = &ipmmu_features_rcar_gen3, }, { /* Terminator */ }, @@ -955,7 +976,7 @@ static int ipmmu_probe(struct platform_device *pdev) } mmu->dev = &pdev->dev; - mmu->num_utlbs = 32; + mmu->num_utlbs = 48; spin_lock_init(&mmu->lock); bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); mmu->features = of_device_get_match_data(&pdev->dev); @@ -1018,6 +1039,11 @@ static int ipmmu_probe(struct platform_device *pdev) } ipmmu_device_reset(mmu); + + if (mmu->features->reserved_context) { + dev_info(&pdev->dev, "IPMMU context 0 is reserved\n"); + set_bit(0, mmu->ctx); + } } /* @@ -1081,12 +1107,19 @@ static struct platform_driver ipmmu_driver = { static int __init ipmmu_init(void) { + struct device_node *np; static bool setup_done; int ret; if (setup_done) return 0; + np = of_find_matching_node(NULL, ipmmu_of_ids); + if (!np) + return 0; + + of_node_put(np); + ret = platform_driver_register(&ipmmu_driver); if (ret < 0) return ret; diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c @@ -395,20 +395,15 @@ static int msm_iommu_add_device(struct device *dev) struct msm_iommu_dev *iommu; struct iommu_group *group; unsigned long flags; - int ret = 0; spin_lock_irqsave(&msm_iommu_lock, flags); - iommu = find_iommu_for_dev(dev); + spin_unlock_irqrestore(&msm_iommu_lock, flags); + if (iommu) iommu_device_link(&iommu->iommu, dev); else - ret = -ENODEV; - - spin_unlock_irqrestore(&msm_iommu_lock, flags); - - if (ret) - return ret; + return -ENODEV; group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) @@ -425,13 +420,12 @@ static void msm_iommu_remove_device(struct device *dev) unsigned long flags; spin_lock_irqsave(&msm_iommu_lock, flags); - iommu = find_iommu_for_dev(dev); + spin_unlock_irqrestore(&msm_iommu_lock, flags); + if (iommu) iommu_device_unlink(&iommu->iommu, dev); - spin_unlock_irqrestore(&msm_iommu_lock, flags); - iommu_group_remove_device(dev); } @@ -708,7 +702,6 @@ static struct iommu_ops msm_iommu_ops = { .detach_dev = msm_iommu_detach_dev, .map = msm_iommu_map, .unmap = msm_iommu_unmap, - .map_sg = default_iommu_map_sg, .iova_to_phys = msm_iommu_iova_to_phys, .add_device = msm_iommu_add_device, .remove_device = msm_iommu_remove_device, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c @@ -495,7 +495,6 @@ static struct iommu_ops mtk_iommu_ops = { .detach_dev = mtk_iommu_detach_device, .map = mtk_iommu_map, .unmap = mtk_iommu_unmap, - .map_sg = default_iommu_map_sg, .flush_iotlb_all = mtk_iommu_iotlb_sync, .iotlb_sync = mtk_iommu_iotlb_sync, .iova_to_phys = mtk_iommu_iova_to_phys, diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c @@ -531,7 +531,6 @@ static struct iommu_ops mtk_iommu_ops = { .detach_dev = mtk_iommu_detach_device, .map = mtk_iommu_map, .unmap = mtk_iommu_unmap, - .map_sg = default_iommu_map_sg, .iova_to_phys = mtk_iommu_iova_to_phys, .add_device = mtk_iommu_add_device, .remove_device = mtk_iommu_remove_device, diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c @@ -550,7 +550,7 @@ static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, pte_ready: iopte = iopte_offset(iopgd, da); - *pt_dma = virt_to_phys(iopte); + *pt_dma = iopgd_page_paddr(iopgd); dev_vdbg(obj->dev, "%s: da:%08x pgd:%p *pgd:%08x pte:%p *pte:%08x\n", __func__, da, iopgd, *iopgd, iopte, *iopte); @@ -738,7 +738,7 @@ static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da) } bytes *= nent; memset(iopte, 0, nent * sizeof(*iopte)); - pt_dma = virt_to_phys(iopte); + pt_dma = iopgd_page_paddr(iopgd); flush_iopte_range(obj->dev, pt_dma, pt_offset, nent); /* @@ -1548,7 +1548,6 @@ static const struct iommu_ops omap_iommu_ops = { .detach_dev = omap_iommu_detach_dev, .map = omap_iommu_map, .unmap = omap_iommu_unmap, - .map_sg = default_iommu_map_sg, .iova_to_phys = omap_iommu_iova_to_phys, .add_device = omap_iommu_add_device, .remove_device = omap_iommu_remove_device, diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c @@ -590,7 +590,6 @@ static const struct iommu_ops qcom_iommu_ops = { .detach_dev = qcom_iommu_detach_dev, .map = qcom_iommu_map, .unmap = qcom_iommu_unmap, - .map_sg = default_iommu_map_sg, .flush_iotlb_all = qcom_iommu_iotlb_sync, .iotlb_sync = qcom_iommu_iotlb_sync, .iova_to_phys = qcom_iommu_iova_to_phys, diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c @@ -1110,7 +1110,6 @@ static const struct iommu_ops rk_iommu_ops = { .detach_dev = rk_iommu_detach_device, .map = rk_iommu_map, .unmap = rk_iommu_unmap, - .map_sg = default_iommu_map_sg, .add_device = rk_iommu_add_device, .remove_device = rk_iommu_remove_device, .iova_to_phys = rk_iommu_iova_to_phys, diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c @@ -377,7 +377,6 @@ static const struct iommu_ops gart_iommu_ops = { .remove_device = gart_iommu_remove_device, .device_group = generic_device_group, .map = gart_iommu_map, - .map_sg = default_iommu_map_sg, .unmap = gart_iommu_unmap, .iova_to_phys = gart_iommu_iova_to_phys, .pgsize_bitmap = GART_IOMMU_PGSIZES, diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c @@ -876,7 +876,6 @@ static const struct iommu_ops tegra_smmu_ops = { .device_group = tegra_smmu_device_group, .map = tegra_smmu_map, .unmap = tegra_smmu_unmap, - .map_sg = default_iommu_map_sg, .iova_to_phys = tegra_smmu_iova_to_phys, .of_xlate = tegra_smmu_of_xlate, .pgsize_bitmap = SZ_4K, diff --git a/include/linux/dmar.h b/include/linux/dmar.h @@ -265,11 +265,6 @@ static inline void dmar_copy_shared_irte(struct irte *dst, struct irte *src) #define PDA_LOW_BIT 26 #define PDA_HIGH_BIT 32 -enum { - IRQ_REMAP_XAPIC_MODE, - IRQ_REMAP_X2APIC_MODE, -}; - /* Can't use the common MSI interrupt functions * since DMAR is not a pci device */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h @@ -31,6 +31,7 @@ #include <linux/list.h> #include <linux/iommu.h> #include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/dmar.h> #include <asm/cacheflush.h> #include <asm/iommu.h> @@ -114,6 +115,7 @@ * Extended Capability Register */ +#define ecap_dit(e) ((e >> 41) & 0x1) #define ecap_pasid(e) ((e >> 40) & 0x1) #define ecap_pss(e) ((e >> 35) & 0x1f) #define ecap_eafs(e) ((e >> 34) & 0x1) @@ -284,6 +286,7 @@ enum { #define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) #define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16) #define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) +#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) #define QI_DEV_IOTLB_SIZE 1 #define QI_DEV_IOTLB_MAX_INVS 32 @@ -308,6 +311,7 @@ enum { #define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32) #define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) #define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) +#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) #define QI_DEV_EIOTLB_MAX_INVS 32 #define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) @@ -385,6 +389,42 @@ struct pasid_entry; struct pasid_state_entry; struct page_req_dsc; +struct dmar_domain { + int nid; /* node id */ + + unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED]; + /* Refcount of devices per iommu */ + + + u16 iommu_did[DMAR_UNITS_SUPPORTED]; + /* Domain ids per IOMMU. Use u16 since + * domain ids are 16 bit wide according + * to VT-d spec, section 9.3 */ + + bool has_iotlb_device; + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ + + struct dma_pte *pgd; /* virtual address */ + int gaw; /* max guest address width */ + + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; + + int flags; /* flags to find out type of domain */ + + int iommu_coherency;/* indicate coherency of iommu access */ + int iommu_snooping; /* indicate snooping control feature*/ + int iommu_count; /* reference count of iommu */ + int iommu_superpage;/* Level of superpages supported: + 0 == 4KiB (no superpages), 1 == 2MiB, + 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ + u64 max_addr; /* maximum mapped address */ + + struct iommu_domain domain; /* generic domain data structure for + iommu core */ +}; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 reg_phys; /* physical address of hw register set */ @@ -414,11 +454,9 @@ struct intel_iommu { * devices away to userspace processes (e.g. for DPDK) and don't * want to trust that userspace will use *only* the PASID it was * told to. But while it's all driver-arbitrated, we're fine. */ - struct pasid_entry *pasid_table; struct pasid_state_entry *pasid_state_table; struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ - struct idr pasid_idr; u32 pasid_max; #endif struct q_inval *qi; /* Queued invalidation info */ @@ -434,6 +472,27 @@ struct intel_iommu { u32 flags; /* Software defined flags */ }; +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + struct list_head table; /* link to pasid table */ + u8 bus; /* PCI bus number */ + u8 devfn; /* PCI devfn number */ + u16 pfsid; /* SRIOV physical function source ID */ + u8 pasid_supported:3; + u8 pasid_enabled:1; + u8 pri_supported:1; + u8 pri_enabled:1; + u8 ats_supported:1; + u8 ats_enabled:1; + u8 ats_qdep; + struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ + struct intel_iommu *iommu; /* IOMMU used by this device */ + struct dmar_domain *domain; /* pointer to domain */ + struct pasid_table *pasid_table; /* pasid table */ +}; + static inline void __iommu_flush_cache( struct intel_iommu *iommu, void *addr, int size) { @@ -453,16 +512,22 @@ extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, u64 type); extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type); -extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, - u64 addr, unsigned mask); - +extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u16 qdep, u64 addr, unsigned mask); extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern int dmar_ir_support(void); +struct dmar_domain *get_valid_domain_for_dev(struct device *dev); +void *alloc_pgtable_page(int node); +void free_pgtable_page(void *vaddr); +struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); +int for_each_device_domain(int (*fn)(struct device_domain_info *info, + void *data), void *data); + #ifdef CONFIG_INTEL_IOMMU_SVM -extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu); -extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu); +int intel_svm_init(struct intel_iommu *iommu); +int intel_svm_exit(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); @@ -486,6 +551,7 @@ struct intel_svm { int flags; int pasid; struct list_head devs; + struct list_head list; }; extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev); diff --git a/include/linux/iommu.h b/include/linux/iommu.h @@ -166,8 +166,6 @@ struct iommu_resv_region { * @detach_dev: detach device from an iommu domain * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain - * @map_sg: map a scatter-gather list of physically contiguous memory chunks - * to an iommu domain * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain * @tlb_range_add: Add a given iova range to the flush queue for this domain * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush @@ -201,8 +199,6 @@ struct iommu_ops { phys_addr_t paddr, size_t size, int prot); size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size); - size_t (*map_sg)(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot); void (*flush_iotlb_all)(struct iommu_domain *domain); void (*iotlb_range_add)(struct iommu_domain *domain, unsigned long iova, size_t size); @@ -303,9 +299,8 @@ extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern size_t iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova, size_t size); -extern size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg,unsigned int nents, - int prot); +extern size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, + struct scatterlist *sg,unsigned int nents, int prot); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token); @@ -378,13 +373,6 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain) domain->ops->iotlb_sync(domain); } -static inline size_t iommu_map_sg(struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot) -{ - return domain->ops->map_sg(domain, iova, sg, nents, prot); -} - /* PCI device grouping function */ extern struct iommu_group *pci_device_group(struct device *dev); /* Generic device grouping function */ @@ -698,4 +686,11 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) #endif /* CONFIG_IOMMU_API */ +#ifdef CONFIG_IOMMU_DEBUGFS +extern struct dentry *iommu_debugfs_dir; +void iommu_debugfs_setup(void); +#else +static inline void iommu_debugfs_setup(void) {} +#endif + #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/platform_data/sh_ipmmu.h b/include/linux/platform_data/sh_ipmmu.h @@ -1,18 +0,0 @@ -/* sh_ipmmu.h - * - * Copyright (C) 2012 Hideki EIRAKU - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - */ - -#ifndef __SH_IPMMU_H__ -#define __SH_IPMMU_H__ - -struct shmobile_ipmmu_platform_data { - const char * const *dev_names; - unsigned int num_dev_names; -}; - -#endif /* __SH_IPMMU_H__ */