whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit bb97be23db2a296c5f8b8b4c40feb0435b068c5e
parent b7a7d1c1ec688104fdc922568c26395a756f616d
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Sun, 10 Mar 2019 12:29:52 -0700

Merge tag 'iommu-updates-v5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull IOMMU updates from Joerg Roedel:

 - A big cleanup and optimization patch-set for the Tegra GART driver

 - Documentation updates and fixes for the IOMMU-API

 - Support for page request in Intel VT-d scalable mode

 - Intel VT-d dma_[un]map_resource() support

 - Updates to the ATS enabling code for PCI (acked by Bjorn) and Intel
   VT-d to align with the latest version of the ATS spec

 - Relaxed IRQ source checking in the Intel VT-d driver for some aliased
   devices, needed for future devices which send IRQ messages from more
   than on request-ID

 - IRQ remapping driver for Hyper-V

 - Patches to make generic IOVA and IO-Page-Table code usable outside of
   the IOMMU code

 - Various other small fixes and cleanups

* tag 'iommu-updates-v5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (60 commits)
  iommu/vt-d: Get domain ID before clear pasid entry
  iommu/vt-d: Fix NULL pointer reference in intel_svm_bind_mm()
  iommu/vt-d: Set context field after value initialized
  iommu/vt-d: Disable ATS support on untrusted devices
  iommu/mediatek: Fix semicolon code style issue
  MAINTAINERS: Add Hyper-V IOMMU driver into Hyper-V CORE AND DRIVERS scope
  iommu/hyper-v: Add Hyper-V stub IOMMU driver
  x86/Hyper-V: Set x2apic destination mode to physical when x2apic is available
  PCI/ATS: Add inline to pci_prg_resp_pasid_required()
  iommu/vt-d: Check identity map for hot-added devices
  iommu: Fix IOMMU debugfs fallout
  iommu: Document iommu_ops.is_attach_deferred()
  iommu: Document iommu_ops.iotlb_sync_map()
  iommu/vt-d: Enable ATS only if the device uses page aligned address.
  PCI/ATS: Add pci_ats_page_aligned() interface
  iommu/vt-d: Fix PRI/PASID dependency issue.
  PCI/ATS: Add pci_prg_resp_pasid_required() interface.
  iommu/vt-d: Allow interrupts from the entire bus for aliased devices
  iommu/vt-d: Add helper to set an IRTE to verify only the bus number
  iommu: Fix flush_tlb_all typo
  ...

Diffstat:
DDocumentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt | 14--------------
MDocumentation/devicetree/bindings/memory-controllers/nvidia,tegra20-mc.txt | 27+++++++++++++++++++--------
MMAINTAINERS | 1+
March/arm/boot/dts/tegra20.dtsi | 15++++++---------
March/x86/kernel/cpu/mshyperv.c | 12++++++++++++
Mdrivers/Makefile | 2+-
Mdrivers/iommu/Kconfig | 17++++++++++++++---
Mdrivers/iommu/Makefile | 1+
Mdrivers/iommu/amd_iommu.c | 28+++++++++++++---------------
Mdrivers/iommu/amd_iommu_init.c | 20++++++++++----------
Mdrivers/iommu/amd_iommu_v2.c | 24------------------------
Mdrivers/iommu/arm-smmu-v3.c | 3+--
Mdrivers/iommu/arm-smmu.c | 2+-
Mdrivers/iommu/dma-iommu.c | 3+--
Adrivers/iommu/hyperv-iommu.c | 196+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdrivers/iommu/intel-iommu.c | 158++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mdrivers/iommu/intel-pasid.c | 2+-
Mdrivers/iommu/intel-svm.c | 88+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Mdrivers/iommu/intel_irq_remapping.c | 32+++++++++++++++++++++++++++++---
Mdrivers/iommu/io-pgtable-arm-v7s.c | 6+++---
Mdrivers/iommu/io-pgtable-arm.c | 3+--
Mdrivers/iommu/io-pgtable.c | 5+++--
Mdrivers/iommu/iommu-debugfs.c | 23++++-------------------
Mdrivers/iommu/iommu.c | 16++++++++++------
Mdrivers/iommu/ipmmu-vmsa.c | 3+--
Mdrivers/iommu/irq_remapping.c | 3+++
Mdrivers/iommu/irq_remapping.h | 1+
Mdrivers/iommu/msm_iommu.c | 10+++++-----
Mdrivers/iommu/mtk_iommu.h | 3+--
Mdrivers/iommu/mtk_iommu_v1.c | 2+-
Mdrivers/iommu/qcom_iommu.c | 2+-
Mdrivers/iommu/tegra-gart.c | 473++++++++++++++++++++++++++++++-------------------------------------------------
Mdrivers/iommu/tegra-smmu.c | 4----
Mdrivers/memory/tegra/mc.c | 118+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mdrivers/memory/tegra/mc.h | 10++--------
Mdrivers/pci/ats.c | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/linux/intel-iommu.h | 21+++++++++------------
Minclude/linux/intel-svm.h | 2+-
Rdrivers/iommu/io-pgtable.h -> include/linux/io-pgtable.h | 0
Minclude/linux/iommu.h | 6+++++-
Minclude/linux/pci-ats.h | 5+++++
Minclude/linux/pci.h | 2++
Minclude/soc/tegra/mc.h | 27++++++++++++++++++++++++++-
Minclude/uapi/linux/pci_regs.h | 2++
44 files changed, 838 insertions(+), 611 deletions(-)

diff --git a/Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt b/Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt @@ -1,14 +0,0 @@ -NVIDIA Tegra 20 GART - -Required properties: -- compatible: "nvidia,tegra20-gart" -- reg: Two pairs of cells specifying the physical address and size of - the memory controller registers and the GART aperture respectively. - -Example: - - gart { - compatible = "nvidia,tegra20-gart"; - reg = <0x7000f024 0x00000018 /* controller registers */ - 0x58000000 0x02000000>; /* GART aperture */ - }; diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-mc.txt b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-mc.txt @@ -1,26 +1,37 @@ NVIDIA Tegra20 MC(Memory Controller) Required properties: -- compatible : "nvidia,tegra20-mc" -- reg : Should contain 2 register ranges(address and length); see the - example below. Note that the MC registers are interleaved with the - GART registers, and hence must be represented as multiple ranges. +- compatible : "nvidia,tegra20-mc-gart" +- reg : Should contain 2 register ranges: physical base address and length of + the controller's registers and the GART aperture respectively. +- clocks: Must contain an entry for each entry in clock-names. + See ../clocks/clock-bindings.txt for details. +- clock-names: Must include the following entries: + - mc: the module's clock input - interrupts : Should contain MC General interrupt. - #reset-cells : Should be 1. This cell represents memory client module ID. The assignments may be found in header file <dt-bindings/memory/tegra20-mc.h> or in the TRM documentation. +- #iommu-cells: Should be 0. This cell represents the number of cells in an + IOMMU specifier needed to encode an address. GART supports only a single + address space that is shared by all devices, therefore no additional + information needed for the address encoding. Example: mc: memory-controller@7000f000 { - compatible = "nvidia,tegra20-mc"; - reg = <0x7000f000 0x024 - 0x7000f03c 0x3c4>; - interrupts = <0 77 0x04>; + compatible = "nvidia,tegra20-mc-gart"; + reg = <0x7000f000 0x400 /* controller registers */ + 0x58000000 0x02000000>; /* GART aperture */ + clocks = <&tegra_car TEGRA20_CLK_MC>; + clock-names = "mc"; + interrupts = <GIC_SPI 77 0x04>; #reset-cells = <1>; + #iommu-cells = <0>; }; video-codec@6001a000 { compatible = "nvidia,tegra20-vde"; ... resets = <&mc TEGRA20_MC_RESET_VDE>; + iommus = <&mc>; }; diff --git a/MAINTAINERS b/MAINTAINERS @@ -7170,6 +7170,7 @@ F: drivers/net/hyperv/ F: drivers/scsi/storvsc_drv.c F: drivers/uio/uio_hv_generic.c F: drivers/video/fbdev/hyperv_fb.c +F: drivers/iommu/hyperv_iommu.c F: net/vmw_vsock/hyperv_transport.c F: include/linux/hyperv.h F: include/uapi/linux/hyperv.h diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi @@ -616,17 +616,14 @@ }; mc: memory-controller@7000f000 { - compatible = "nvidia,tegra20-mc"; - reg = <0x7000f000 0x024 - 0x7000f03c 0x3c4>; + compatible = "nvidia,tegra20-mc-gart"; + reg = <0x7000f000 0x400 /* controller registers */ + 0x58000000 0x02000000>; /* GART aperture */ + clocks = <&tegra_car TEGRA20_CLK_MC>; + clock-names = "mc"; interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>; #reset-cells = <1>; - }; - - iommu@7000f024 { - compatible = "nvidia,tegra20-gart"; - reg = <0x7000f024 0x00000018 /* controller registers */ - 0x58000000 0x02000000>; /* GART aperture */ + #iommu-cells = <0>; }; memory-controller@7000f400 { diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c @@ -328,6 +328,18 @@ static void __init ms_hyperv_init_platform(void) # ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; # endif + + /* + * Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic, + * set x2apic destination mode to physcial mode when x2apic is available + * and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs + * have 8-bit APIC id. + */ +# ifdef CONFIG_X86_X2APIC + if (x2apic_supported()) + x2apic_phys = 1; +# endif + #endif } diff --git a/drivers/Makefile b/drivers/Makefile @@ -56,7 +56,7 @@ obj-y += tty/ obj-y += char/ # iommu/ comes before gpu as gpu are using iommu controllers -obj-$(CONFIG_IOMMU_SUPPORT) += iommu/ +obj-y += iommu/ # gpu/ comes after char for AGP vs DRM startup and after iommu obj-y += gpu/ diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig @@ -1,3 +1,7 @@ +# The IOVA library may also be used by non-IOMMU_API users +config IOMMU_IOVA + tristate + # IOMMU_API always gets selected by whoever wants it. config IOMMU_API bool @@ -81,9 +85,6 @@ config IOMMU_DEFAULT_PASSTHROUGH If unsure, say N here. -config IOMMU_IOVA - tristate - config OF_IOMMU def_bool y depends on OF && IOMMU_API @@ -282,6 +283,7 @@ config ROCKCHIP_IOMMU config TEGRA_IOMMU_GART bool "Tegra GART IOMMU Support" depends on ARCH_TEGRA_2x_SOC + depends on TEGRA_MC select IOMMU_API help Enables support for remapping discontiguous physical memory @@ -435,4 +437,13 @@ config QCOM_IOMMU help Support for IOMMU on certain Qualcomm SoCs. +config HYPERV_IOMMU + bool "Hyper-V x2APIC IRQ Handling" + depends on HYPERV + select IOMMU_API + default HYPERV + help + Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux + guests to run with x2APIC mode enabled. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile @@ -32,3 +32,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o +obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c @@ -18,6 +18,7 @@ */ #define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) #include <linux/ratelimit.h> #include <linux/pci.h> @@ -279,10 +280,10 @@ static u16 get_alias(struct device *dev) return pci_alias; } - pr_info("Using IVRS reported alias %02x:%02x.%d " - "for device %s[%04x:%04x], kernel reported alias " + pci_info(pdev, "Using IVRS reported alias %02x:%02x.%d " + "for device [%04x:%04x], kernel reported alias " "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), - PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, + PCI_FUNC(ivrs_alias), pdev->vendor, pdev->device, PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), PCI_FUNC(pci_alias)); @@ -293,9 +294,8 @@ static u16 get_alias(struct device *dev) if (pci_alias == devid && PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { pci_add_dma_alias(pdev, ivrs_alias & 0xff); - pr_info("Added PCI DMA alias %02x.%d for %s\n", - PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), - dev_name(dev)); + pci_info(pdev, "Added PCI DMA alias %02x.%d\n", + PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias)); } return ivrs_alias; @@ -545,7 +545,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, dev_data = get_dev_data(&pdev->dev); if (dev_data && __ratelimit(&dev_data->rs)) { - dev_err(&pdev->dev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n", + pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n", domain_id, address, flags); } else if (printk_ratelimit()) { pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", @@ -2258,8 +2258,7 @@ static int amd_iommu_add_device(struct device *dev) ret = iommu_init_device(dev); if (ret) { if (ret != -ENOTSUPP) - pr_err("Failed to initialize device %s - trying to proceed anyway\n", - dev_name(dev)); + dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); iommu_ignore_device(dev); dev->dma_ops = NULL; @@ -2569,6 +2568,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, struct scatterlist *s; unsigned long address; u64 dma_mask; + int ret; domain = get_domain(dev); if (IS_ERR(domain)) @@ -2591,7 +2591,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, for (j = 0; j < pages; ++j) { unsigned long bus_addr, phys_addr; - int ret; bus_addr = address + s->dma_address + (j << PAGE_SHIFT); phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT); @@ -2612,8 +2611,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, return nelems; out_unmap: - pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n", - dev_name(dev), npages); + dev_err(dev, "IOMMU mapping error in map_sg (io-pages: %d reason: %d)\n", + npages, ret); for_each_sg(sglist, s, nelems, i) { int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE); @@ -2807,7 +2806,7 @@ static int init_reserved_iova_ranges(void) IOVA_PFN(r->start), IOVA_PFN(r->end)); if (!val) { - pr_err("Reserve pci-resource range failed\n"); + pci_err(pdev, "Reserve pci-resource range %pR failed\n", r); return -ENOMEM; } } @@ -3177,8 +3176,7 @@ static void amd_iommu_get_resv_regions(struct device *dev, length, prot, IOMMU_RESV_DIRECT); if (!region) { - pr_err("Out of memory allocating dm-regions for %s\n", - dev_name(dev)); + dev_err(dev, "Out of memory allocating dm-regions\n"); return; } list_add_tail(&region->list, head); diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c @@ -18,6 +18,7 @@ */ #define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) #include <linux/pci.h> #include <linux/acpi.h> @@ -1457,8 +1458,7 @@ static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8)); pci_write_config_dword(iommu->dev, 0xf4, value | 0x4); - pr_info("Applying erratum 746 workaround for IOMMU at %s\n", - dev_name(&iommu->dev->dev)); + pci_info(iommu->dev, "Applying erratum 746 workaround\n"); /* Clear the enable writing bit */ pci_write_config_dword(iommu->dev, 0xf0, 0x90); @@ -1488,8 +1488,7 @@ static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ iommu_write_l2(iommu, 0x47, value | BIT(0)); - pr_info("Applying ATS write check workaround for IOMMU at %s\n", - dev_name(&iommu->dev->dev)); + pci_info(iommu->dev, "Applying ATS write check workaround\n"); } /* @@ -1665,6 +1664,7 @@ static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, static void init_iommu_perf_ctr(struct amd_iommu *iommu) { + struct pci_dev *pdev = iommu->dev; u64 val = 0xabcd, val2 = 0; if (!iommu_feature(iommu, FEATURE_PC)) @@ -1676,12 +1676,12 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || (val != val2)) { - pr_err("Unable to write to IOMMU perf counter.\n"); + pci_err(pdev, "Unable to write to IOMMU perf counter.\n"); amd_iommu_pc_present = false; return; } - pr_info("IOMMU performance counters supported\n"); + pci_info(pdev, "IOMMU performance counters supported\n"); val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); iommu->max_banks = (u8) ((val >> 12) & 0x3f); @@ -1840,14 +1840,14 @@ static void print_iommu_info(void) struct amd_iommu *iommu; for_each_iommu(iommu) { + struct pci_dev *pdev = iommu->dev; int i; - pr_info("Found IOMMU at %s cap 0x%hx\n", - dev_name(&iommu->dev->dev), iommu->cap_ptr); + pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr); if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - pr_info("Extended features (%#llx):\n", - iommu->features); + pci_info(pdev, "Extended features (%#llx):\n", + iommu->features); for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { if (iommu_feature(iommu, (1ULL << i))) pr_cont(" %s", feat_str[i]); diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c @@ -370,29 +370,6 @@ static struct pasid_state *mn_to_state(struct mmu_notifier *mn) return container_of(mn, struct pasid_state, mn); } -static void __mn_flush_page(struct mmu_notifier *mn, - unsigned long address) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - - pasid_state = mn_to_state(mn); - dev_state = pasid_state->device_state; - - amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address); -} - -static int mn_clear_flush_young(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end) -{ - for (; start < end; start += PAGE_SIZE) - __mn_flush_page(mn, start); - - return 0; -} - static void mn_invalidate_range(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) @@ -430,7 +407,6 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) static const struct mmu_notifier_ops iommu_mn = { .release = mn_release, - .clear_flush_young = mn_clear_flush_young, .invalidate_range = mn_invalidate_range, }; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c @@ -18,6 +18,7 @@ #include <linux/dma-iommu.h> #include <linux/err.h> #include <linux/interrupt.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/init.h> @@ -32,8 +33,6 @@ #include <linux/amba/bus.h> -#include "io-pgtable.h" - /* MMIO registers */ #define ARM_SMMU_IDR0 0x0 #define IDR0_ST_LVL GENMASK(28, 27) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c @@ -39,6 +39,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/io-64-nonatomic-hi-lo.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/init.h> @@ -56,7 +57,6 @@ #include <linux/amba/bus.h> #include <linux/fsl/mc.h> -#include "io-pgtable.h" #include "arm-smmu-regs.h" #define ARM_MMU500_ACTLR_CPRE (1 << 1) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c @@ -289,7 +289,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; - unsigned long order, base_pfn, end_pfn; + unsigned long order, base_pfn; int attr; if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) @@ -298,7 +298,6 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, /* Use the smallest supported page size for IOVA granularity */ order = __ffs(domain->pgsize_bitmap); base_pfn = max_t(unsigned long, 1, base >> order); - end_pfn = (base + size - 1) >> order; /* Check the domain allows at least some access to the device... */ if (domain->geometry.force_aperture) { diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V stub IOMMU driver. + * + * Copyright (C) 2019, Microsoft, Inc. + * + * Author : Lan Tianyu <Tianyu.Lan@microsoft.com> + */ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/iommu.h> +#include <linux/module.h> + +#include <asm/apic.h> +#include <asm/cpu.h> +#include <asm/hw_irq.h> +#include <asm/io_apic.h> +#include <asm/irq_remapping.h> +#include <asm/hypervisor.h> + +#include "irq_remapping.h" + +#ifdef CONFIG_IRQ_REMAP + +/* + * According 82093AA IO-APIC spec , IO APIC has a 24-entry Interrupt + * Redirection Table. Hyper-V exposes one single IO-APIC and so define + * 24 IO APIC remmapping entries. + */ +#define IOAPIC_REMAPPING_ENTRY 24 + +static cpumask_t ioapic_max_cpumask = { CPU_BITS_NONE }; +static struct irq_domain *ioapic_ir_domain; + +static int hyperv_ir_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + struct irq_data *parent = data->parent_data; + struct irq_cfg *cfg = irqd_cfg(data); + struct IO_APIC_route_entry *entry; + int ret; + + /* Return error If new irq affinity is out of ioapic_max_cpumask. */ + if (!cpumask_subset(mask, &ioapic_max_cpumask)) + return -EINVAL; + + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; + + entry = data->chip_data; + entry->dest = cfg->dest_apicid; + entry->vector = cfg->vector; + send_cleanup_vector(cfg); + + return 0; +} + +static struct irq_chip hyperv_ir_chip = { + .name = "HYPERV-IR", + .irq_ack = apic_ack_irq, + .irq_set_affinity = hyperv_ir_set_affinity, +}; + +static int hyperv_irq_remapping_alloc(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs, + void *arg) +{ + struct irq_alloc_info *info = arg; + struct irq_data *irq_data; + struct irq_desc *desc; + int ret = 0; + + if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1) + return -EINVAL; + + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret < 0) + return ret; + + irq_data = irq_domain_get_irq_data(domain, virq); + if (!irq_data) { + irq_domain_free_irqs_common(domain, virq, nr_irqs); + return -EINVAL; + } + + irq_data->chip = &hyperv_ir_chip; + + /* + * If there is interrupt remapping function of IOMMU, setting irq + * affinity only needs to change IRTE of IOMMU. But Hyper-V doesn't + * support interrupt remapping function, setting irq affinity of IO-APIC + * interrupts still needs to change IO-APIC registers. But ioapic_ + * configure_entry() will ignore value of cfg->vector and cfg-> + * dest_apicid when IO-APIC's parent irq domain is not the vector + * domain.(See ioapic_configure_entry()) In order to setting vector + * and dest_apicid to IO-APIC register, IO-APIC entry pointer is saved + * in the chip_data and hyperv_irq_remapping_activate()/hyperv_ir_set_ + * affinity() set vector and dest_apicid directly into IO-APIC entry. + */ + irq_data->chip_data = info->ioapic_entry; + + /* + * Hypver-V IO APIC irq affinity should be in the scope of + * ioapic_max_cpumask because no irq remapping support. + */ + desc = irq_data_to_desc(irq_data); + cpumask_copy(desc->irq_common_data.affinity, &ioapic_max_cpumask); + + return 0; +} + +static void hyperv_irq_remapping_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + irq_domain_free_irqs_common(domain, virq, nr_irqs); +} + +static int hyperv_irq_remapping_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool reserve) +{ + struct irq_cfg *cfg = irqd_cfg(irq_data); + struct IO_APIC_route_entry *entry = irq_data->chip_data; + + entry->dest = cfg->dest_apicid; + entry->vector = cfg->vector; + + return 0; +} + +static struct irq_domain_ops hyperv_ir_domain_ops = { + .alloc = hyperv_irq_remapping_alloc, + .free = hyperv_irq_remapping_free, + .activate = hyperv_irq_remapping_activate, +}; + +static int __init hyperv_prepare_irq_remapping(void) +{ + struct fwnode_handle *fn; + int i; + + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) || + !x2apic_supported()) + return -ENODEV; + + fn = irq_domain_alloc_named_id_fwnode("HYPERV-IR", 0); + if (!fn) + return -ENOMEM; + + ioapic_ir_domain = + irq_domain_create_hierarchy(arch_get_ir_parent_domain(), + 0, IOAPIC_REMAPPING_ENTRY, fn, + &hyperv_ir_domain_ops, NULL); + + irq_domain_free_fwnode(fn); + + /* + * Hyper-V doesn't provide irq remapping function for + * IO-APIC and so IO-APIC only accepts 8-bit APIC ID. + * Cpu's APIC ID is read from ACPI MADT table and APIC IDs + * in the MADT table on Hyper-v are sorted monotonic increasingly. + * APIC ID reflects cpu topology. There maybe some APIC ID + * gaps when cpu number in a socket is not power of two. Prepare + * max cpu affinity for IOAPIC irqs. Scan cpu 0-255 and set cpu + * into ioapic_max_cpumask if its APIC ID is less than 256. + */ + for (i = min_t(unsigned int, num_possible_cpus() - 1, 255); i >= 0; i--) + if (cpu_physical_id(i) < 256) + cpumask_set_cpu(i, &ioapic_max_cpumask); + + return 0; +} + +static int __init hyperv_enable_irq_remapping(void) +{ + return IRQ_REMAP_X2APIC_MODE; +} + +static struct irq_domain *hyperv_get_ir_irq_domain(struct irq_alloc_info *info) +{ + if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) + return ioapic_ir_domain; + else + return NULL; +} + +struct irq_remap_ops hyperv_irq_remap_ops = { + .prepare = hyperv_prepare_irq_remapping, + .enable = hyperv_enable_irq_remapping, + .get_ir_irq_domain = hyperv_get_ir_irq_domain, +}; + +#endif diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c @@ -19,6 +19,7 @@ */ #define pr_fmt(fmt) "DMAR: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) #include <linux/init.h> #include <linux/bitmap.h> @@ -343,8 +344,7 @@ static int g_num_of_iommus; static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); -static void dmar_remove_one_dev_info(struct dmar_domain *domain, - struct device *dev); +static void dmar_remove_one_dev_info(struct device *dev); static void __dmar_remove_one_dev_info(struct device_domain_info *info); static void domain_context_clear(struct intel_iommu *iommu, struct device *dev); @@ -865,7 +865,7 @@ out: static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, unsigned long pfn, int *target_level) { - struct dma_pte *parent, *pte = NULL; + struct dma_pte *parent, *pte; int level = agaw_to_level(domain->agaw); int offset; @@ -922,7 +922,7 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, unsigned long pfn, int level, int *large_page) { - struct dma_pte *parent, *pte = NULL; + struct dma_pte *parent, *pte; int total = agaw_to_level(domain->agaw); int offset; @@ -954,7 +954,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { - unsigned int large_page = 1; + unsigned int large_page; struct dma_pte *first_pte, *pte; BUG_ON(!domain_pfn_supported(domain, start_pfn)); @@ -1132,7 +1132,7 @@ static struct page *domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { - struct page *freelist = NULL; + struct page *freelist; BUG_ON(!domain_pfn_supported(domain, start_pfn)); BUG_ON(!domain_pfn_supported(domain, last_pfn)); @@ -1403,10 +1403,13 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) + if (info->pri_supported && + (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) && + !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) info->pri_enabled = 1; #endif if (!pdev->untrusted && info->ats_supported && + pci_ats_page_aligned(pdev) && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { info->ats_enabled = 1; domain_update_iotlb(info->domain); @@ -1763,7 +1766,7 @@ static int domain_attach_iommu(struct dmar_domain *domain, static int domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) { - int num, count = INT_MAX; + int num, count; assert_spin_locked(&device_domain_lock); assert_spin_locked(&iommu->lock); @@ -1816,7 +1819,7 @@ static int dmar_init_reserved_ranges(void) IOVA_PFN(r->start), IOVA_PFN(r->end)); if (!iova) { - pr_err("Reserve iova failed\n"); + pci_err(pdev, "Reserve iova for %pR failed\n", r); return -ENODEV; } } @@ -1902,11 +1905,7 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, static void domain_exit(struct dmar_domain *domain) { - struct page *freelist = NULL; - - /* Domain 0 is reserved, so dont process it */ - if (!domain) - return; + struct page *freelist; /* Remove associated devices and clear attached or cached domains */ rcu_read_lock(); @@ -2058,7 +2057,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int agaw; context_set_domain_id(context, did); - context_set_translation_type(context, translation); if (translation != CONTEXT_TT_PASS_THROUGH) { /* @@ -2088,6 +2086,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, */ context_set_address_width(context, iommu->msagaw); } + + context_set_translation_type(context, translation); } context_set_fault_enable(context); @@ -2486,7 +2486,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); - if (!pci_ats_disabled() && + if (!pdev->untrusted && + !pci_ats_disabled() && ecap_dev_iotlb_support(iommu->ecap) && pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) && dmar_find_matched_atsr_unit(pdev)) @@ -2545,9 +2546,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (dev && dev_is_pci(dev) && sm_supported(iommu)) { ret = intel_pasid_alloc_table(dev); if (ret) { - pr_err("PASID table allocation for %s failed\n", - dev_name(dev)); - dmar_remove_one_dev_info(domain, dev); + dev_err(dev, "PASID table allocation failed\n"); + dmar_remove_one_dev_info(dev); return NULL; } @@ -2561,16 +2561,15 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, dev, PASID_RID2PASID); spin_unlock(&iommu->lock); if (ret) { - pr_err("Setup RID2PASID for %s failed\n", - dev_name(dev)); - dmar_remove_one_dev_info(domain, dev); + dev_err(dev, "Setup RID2PASID failed\n"); + dmar_remove_one_dev_info(dev); return NULL; } } if (dev && domain_context_mapping(domain, dev)) { - pr_err("Domain context map for %s failed\n", dev_name(dev)); - dmar_remove_one_dev_info(domain, dev); + dev_err(dev, "Domain context map failed\n"); + dmar_remove_one_dev_info(dev); return NULL; } @@ -2585,7 +2584,7 @@ static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) { - struct device_domain_info *info = NULL; + struct device_domain_info *info; struct dmar_domain *domain = NULL; struct intel_iommu *iommu; u16 dma_alias; @@ -2724,13 +2723,12 @@ static int domain_prepare_identity_map(struct device *dev, range which is reserved in E820, so which didn't get set up to start with in si_domain */ if (domain == si_domain && hw_pass_through) { - pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n", - dev_name(dev), start, end); + dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n", + start, end); return 0; } - pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n", - dev_name(dev), start, end); + dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end); if (end < start) { WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" @@ -2810,7 +2808,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width); static int __init si_domain_init(int hw) { - int nid, ret = 0; + int nid, ret; si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY); if (!si_domain) @@ -2934,7 +2932,6 @@ static bool device_is_rmrr_locked(struct device *dev) static int iommu_should_identity_map(struct device *dev, int startup) { - if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); @@ -3017,8 +3014,8 @@ static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw ret = domain_add_dev_info(si_domain, dev); if (!ret) - pr_info("%s identity mapping for device %s\n", - hw ? "Hardware" : "Software", dev_name(dev)); + dev_info(dev, "%s identity mapping\n", + hw ? "Hardware" : "Software"); else if (ret == -ENODEV) /* device not associated with an iommu */ ret = 0; @@ -3530,7 +3527,7 @@ static unsigned long intel_alloc_iova(struct device *dev, struct dmar_domain *domain, unsigned long nrpages, uint64_t dma_mask) { - unsigned long iova_pfn = 0; + unsigned long iova_pfn; /* Restrict dma_mask to the width that the iommu can handle */ dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); @@ -3551,8 +3548,7 @@ static unsigned long intel_alloc_iova(struct device *dev, iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask), true); if (unlikely(!iova_pfn)) { - pr_err("Allocating %ld-page iova for %s failed", - nrpages, dev_name(dev)); + dev_err(dev, "Allocating %ld-page iova failed", nrpages); return 0; } @@ -3600,7 +3596,7 @@ struct dmar_domain *get_valid_domain_for_dev(struct device *dev) out: if (!domain) - pr_err("Allocating domain for %s failed\n", dev_name(dev)); + dev_err(dev, "Allocating domain failed\n"); return domain; @@ -3626,9 +3622,8 @@ static int iommu_no_mapping(struct device *dev) * 32 bit DMA is removed from si_domain and fall back * to non-identity mapping. */ - dmar_remove_one_dev_info(si_domain, dev); - pr_info("32bit %s uses non-identity mapping\n", - dev_name(dev)); + dmar_remove_one_dev_info(dev); + dev_info(dev, "32bit DMA uses non-identity mapping\n"); return 0; } } else { @@ -3640,8 +3635,7 @@ static int iommu_no_mapping(struct device *dev) int ret; ret = domain_add_dev_info(si_domain, dev); if (!ret) { - pr_info("64bit %s uses identity mapping\n", - dev_name(dev)); + dev_info(dev, "64bit DMA uses identity mapping\n"); return 1; } } @@ -3650,11 +3644,9 @@ static int iommu_no_mapping(struct device *dev) return 0; } -static dma_addr_t __intel_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, int dir, - u64 dma_mask) +static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, + size_t size, int dir, u64 dma_mask) { - phys_addr_t paddr = page_to_phys(page) + offset; struct dmar_domain *domain; phys_addr_t start_paddr; unsigned long iova_pfn; @@ -3706,8 +3698,8 @@ static dma_addr_t __intel_map_page(struct device *dev, struct page *page, error: if (iova_pfn) free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size)); - pr_err("Device %s request: %zx@%llx dir %d --- failed\n", - dev_name(dev), size, (unsigned long long)paddr, dir); + dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n", + size, (unsigned long long)paddr, dir); return DMA_MAPPING_ERROR; } @@ -3716,7 +3708,15 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - return __intel_map_page(dev, page, offset, size, dir, *dev->dma_mask); + return __intel_map_single(dev, page_to_phys(page) + offset, size, + dir, *dev->dma_mask); +} + +static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask); } static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) @@ -3742,8 +3742,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) start_pfn = mm_to_dma_pfn(iova_pfn); last_pfn = start_pfn + nrpages - 1; - pr_debug("Device %s unmapping: pfn %lx-%lx\n", - dev_name(dev), start_pfn, last_pfn); + dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn); freelist = domain_unmap(domain, start_pfn, last_pfn); @@ -3807,8 +3806,9 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, return NULL; memset(page_address(page), 0, size); - *dma_handle = __intel_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL, - dev->coherent_dma_mask); + *dma_handle = __intel_map_single(dev, page_to_phys(page), size, + DMA_BIDIRECTIONAL, + dev->coherent_dma_mask); if (*dma_handle != DMA_MAPPING_ERROR) return page_address(page); if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) @@ -3925,6 +3925,8 @@ static const struct dma_map_ops intel_dma_ops = { .unmap_sg = intel_unmap_sg, .map_page = intel_map_page, .unmap_page = intel_unmap_page, + .map_resource = intel_map_resource, + .unmap_resource = intel_unmap_page, .dma_supported = dma_direct_supported, }; @@ -4340,7 +4342,7 @@ int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg) static int intel_iommu_add(struct dmar_drhd_unit *dmaru) { - int sp, ret = 0; + int sp, ret; struct intel_iommu *iommu = dmaru->iommu; if (g_iommus[iommu->seq_id]) @@ -4504,7 +4506,7 @@ out: int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) { - int ret = 0; + int ret; struct dmar_rmrr_unit *rmrru; struct dmar_atsr_unit *atsru; struct acpi_dmar_atsr *atsr; @@ -4521,7 +4523,7 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) ((void *)rmrr) + rmrr->header.length, rmrr->segment, rmrru->devices, rmrru->devices_cnt); - if(ret < 0) + if (ret < 0) return ret; } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { dmar_remove_dev_scope(info, rmrr->segment, @@ -4541,7 +4543,7 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) atsru->devices_cnt); if (ret > 0) break; - else if(ret < 0) + else if (ret < 0) return ret; } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { if (dmar_remove_dev_scope(info, atsr->segment, @@ -4568,16 +4570,19 @@ static int device_notifier(struct notifier_block *nb, if (iommu_dummy(dev)) return 0; - if (action != BUS_NOTIFY_REMOVED_DEVICE) - return 0; - - domain = find_domain(dev); - if (!domain) - return 0; + if (action == BUS_NOTIFY_REMOVED_DEVICE) { + domain = find_domain(dev); + if (!domain) + return 0; - dmar_remove_one_dev_info(domain, dev); - if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices)) - domain_exit(domain); + dmar_remove_one_dev_info(dev); + if (!domain_type_is_vm_or_si(domain) && + list_empty(&domain->devices)) + domain_exit(domain); + } else if (action == BUS_NOTIFY_ADD_DEVICE) { + if (iommu_should_identity_map(dev, 1)) + domain_add_dev_info(si_domain, dev); + } return 0; } @@ -4988,8 +4993,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) free_devinfo_mem(info); } -static void dmar_remove_one_dev_info(struct dmar_domain *domain, - struct device *dev) +static void dmar_remove_one_dev_info(struct device *dev) { struct device_domain_info *info; unsigned long flags; @@ -5078,7 +5082,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, old_domain = find_domain(dev); if (old_domain) { rcu_read_lock(); - dmar_remove_one_dev_info(old_domain, dev); + dmar_remove_one_dev_info(dev); rcu_read_unlock(); if (!domain_type_is_vm_or_si(old_domain) && @@ -5097,9 +5101,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, addr_width = cap_mgaw(iommu->cap); if (dmar_domain->max_addr > (1LL << addr_width)) { - pr_err("%s: iommu width (%d) is not " - "sufficient for the mapped address (%llx)\n", - __func__, addr_width, dmar_domain->max_addr); + dev_err(dev, "%s: iommu width (%d) is not " + "sufficient for the mapped address (%llx)\n", + __func__, addr_width, dmar_domain->max_addr); return -EFAULT; } dmar_domain->gaw = addr_width; @@ -5125,7 +5129,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, static void intel_iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - dmar_remove_one_dev_info(to_dmar_domain(domain), dev); + dmar_remove_one_dev_info(dev); } static int intel_iommu_map(struct iommu_domain *domain, @@ -5400,7 +5404,7 @@ const struct iommu_ops intel_iommu_ops = { static void quirk_iommu_g4x_gfx(struct pci_dev *dev) { /* G4x/GM45 integrated gfx dmar support is totally busted. */ - pr_info("Disabling IOMMU for graphics on this chipset\n"); + pci_info(dev, "Disabling IOMMU for graphics on this chipset\n"); dmar_map_gfx = 0; } @@ -5418,7 +5422,7 @@ static void quirk_iommu_rwbf(struct pci_dev *dev) * Mobile 4 Series Chipset neglects to set RWBF capability, * but needs it. Same seems to hold for the desktop versions. */ - pr_info("Forcing write-buffer flush capability\n"); + pci_info(dev, "Forcing write-buffer flush capability\n"); rwbf_quirk = 1; } @@ -5448,11 +5452,11 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) return; if (!(ggc & GGC_MEMORY_VT_ENABLED)) { - pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); + pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); dmar_map_gfx = 0; } else if (dmar_map_gfx) { /* we have to ensure the gfx device is idle before we flush */ - pr_info("Disabling batched IOTLB flush on Ironlake\n"); + pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); intel_iommu_strict = 1; } } diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c @@ -466,8 +466,8 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, if (WARN_ON(!pte)) return; - intel_pasid_clear_entry(dev, pasid); did = pasid_get_domain_id(pte); + intel_pasid_clear_entry(dev, pasid); if (!ecap_coherent(iommu->ecap)) clflush_cache_range(pte, sizeof(*pte)); diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c @@ -180,14 +180,6 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, rcu_read_unlock(); } -static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address, pte_t pte) -{ - struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); - - intel_flush_svm_range(svm, address, 1, 1, 0); -} - /* Pages have been freed at this point */ static void intel_invalidate_range(struct mmu_notifier *mn, struct mm_struct *mm, @@ -227,7 +219,6 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) static const struct mmu_notifier_ops intel_mmuops = { .release = intel_mm_release, - .change_pte = intel_change_pte, .invalidate_range = intel_invalidate_range, }; @@ -243,7 +234,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ int pasid_max; int ret; - if (!iommu) + if (!iommu || dmar_disabled) return -EINVAL; if (dev_is_pci(dev)) { @@ -470,20 +461,31 @@ EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid); /* Page request queue descriptor */ struct page_req_dsc { - u64 srr:1; - u64 bof:1; - u64 pasid_present:1; - u64 lpig:1; - u64 pasid:20; - u64 bus:8; - u64 private:23; - u64 prg_index:9; - u64 rd_req:1; - u64 wr_req:1; - u64 exe_req:1; - u64 priv_req:1; - u64 devfn:8; - u64 addr:52; + union { + struct { + u64 type:8; + u64 pasid_present:1; + u64 priv_data_present:1; + u64 rsvd:6; + u64 rid:16; + u64 pasid:20; + u64 exe_req:1; + u64 pm_req:1; + u64 rsvd2:10; + }; + u64 qw_0; + }; + union { + struct { + u64 rd_req:1; + u64 wr_req:1; + u64 lpig:1; + u64 prg_index:9; + u64 addr:52; + }; + u64 qw_1; + }; + u64 priv_data[2]; }; #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10) @@ -596,7 +598,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) /* Accounting for major/minor faults? */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) { - if (sdev->sid == PCI_DEVID(req->bus, req->devfn)) + if (sdev->sid == req->rid) break; } /* Other devices can go away, but the drivers are not permitted @@ -609,33 +611,35 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (sdev && sdev->ops && sdev->ops->fault_cb) { int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | - (req->exe_req << 1) | (req->priv_req); - sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result); + (req->exe_req << 1) | (req->pm_req); + sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, + req->priv_data, rwxp, result); } /* We get here in the error case where the PASID lookup failed, and these can be NULL. Do not use them below this point! */ sdev = NULL; svm = NULL; no_pasid: - if (req->lpig) { - /* Page Group Response */ + if (req->lpig || req->priv_data_present) { + /* + * Per VT-d spec. v3.0 ch7.7, system software must + * respond with page group response if private data + * is present (PDP) or last page in group (LPIG) bit + * is set. This is an additional VT-d feature beyond + * PCI ATS spec. + */ resp.qw0 = QI_PGRP_PASID(req->pasid) | - QI_PGRP_DID((req->bus << 8) | req->devfn) | + QI_PGRP_DID(req->rid) | QI_PGRP_PASID_P(req->pasid_present) | + QI_PGRP_PDP(req->pasid_present) | + QI_PGRP_RESP_CODE(result) | QI_PGRP_RESP_TYPE; resp.qw1 = QI_PGRP_IDX(req->prg_index) | - QI_PGRP_PRIV(req->private) | - QI_PGRP_RESP_CODE(result); - } else if (req->srr) { - /* Page Stream Response */ - resp.qw0 = QI_PSTRM_IDX(req->prg_index) | - QI_PSTRM_PRIV(req->private) | - QI_PSTRM_BUS(req->bus) | - QI_PSTRM_PASID(req->pasid) | - QI_PSTRM_RESP_TYPE; - resp.qw1 = QI_PSTRM_ADDR(address) | - QI_PSTRM_DEVFN(req->devfn) | - QI_PSTRM_RESP_CODE(result); + QI_PGRP_LPIG(req->lpig); + + if (req->priv_data_present) + memcpy(&resp.qw2, req->priv_data, + sizeof(req->priv_data)); } resp.qw2 = 0; resp.qw3 = 0; diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c @@ -294,6 +294,18 @@ static void set_irte_sid(struct irte *irte, unsigned int svt, irte->sid = sid; } +/* + * Set an IRTE to match only the bus number. Interrupt requests that reference + * this IRTE must have a requester-id whose bus number is between or equal + * to the start_bus and end_bus arguments. + */ +static void set_irte_verify_bus(struct irte *irte, unsigned int start_bus, + unsigned int end_bus) +{ + set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, + (start_bus << 8) | end_bus); +} + static int set_ioapic_sid(struct irte *irte, int apic) { int i; @@ -356,6 +368,8 @@ static int set_hpet_sid(struct irte *irte, u8 id) struct set_msi_sid_data { struct pci_dev *pdev; u16 alias; + int count; + int busmatch_count; }; static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque) @@ -364,6 +378,10 @@ static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque) data->pdev = pdev; data->alias = alias; + data->count++; + + if (PCI_BUS_NUM(alias) == pdev->bus->number) + data->busmatch_count++; return 0; } @@ -375,6 +393,8 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev) if (!irte || !dev) return -1; + data.count = 0; + data.busmatch_count = 0; pci_for_each_dma_alias(dev, set_msi_sid_cb, &data); /* @@ -383,6 +403,11 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev) * device is the case of a PCIe-to-PCI bridge, where the alias is for * the subordinate bus. In this case we can only verify the bus. * + * If there are multiple aliases, all with the same bus number, + * then all we can do is verify the bus. This is typical in NTB + * hardware which use proxy IDs where the device will generate traffic + * from multiple devfn numbers on the same bus. + * * If the alias device is on a different bus than our source device * then we have a topology based alias, use it. * @@ -391,9 +416,10 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev) * original device. */ if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number) - set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, - PCI_DEVID(PCI_BUS_NUM(data.alias), - dev->bus->number)); + set_irte_verify_bus(irte, PCI_BUS_NUM(data.alias), + dev->bus->number); + else if (data.count >= 2 && data.busmatch_count == data.count) + set_irte_verify_bus(irte, dev->bus->number, dev->bus->number); else if (data.pdev->bus->number != dev->bus->number) set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias); else diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c @@ -35,6 +35,7 @@ #include <linux/atomic.h> #include <linux/dma-mapping.h> #include <linux/gfp.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/kernel.h> #include <linux/kmemleak.h> @@ -45,8 +46,6 @@ #include <asm/barrier.h> -#include "io-pgtable.h" - /* Struct accessors */ #define io_pgtable_to_data(x) \ container_of((x), struct arm_v7s_io_pgtable, iop) @@ -217,7 +216,8 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, if (dma != phys) goto out_unmap; } - kmemleak_ignore(table); + if (lvl == 2) + kmemleak_ignore(table); return table; out_unmap: diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c @@ -22,6 +22,7 @@ #include <linux/atomic.h> #include <linux/bitops.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/kernel.h> #include <linux/sizes.h> @@ -31,8 +32,6 @@ #include <asm/barrier.h> -#include "io-pgtable.h" - #define ARM_LPAE_MAX_ADDR_BITS 52 #define ARM_LPAE_S2_MAX_CONCAT_PAGES 16 #define ARM_LPAE_MAX_LEVELS 4 diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c @@ -19,11 +19,10 @@ */ #include <linux/bug.h> +#include <linux/io-pgtable.h> #include <linux/kernel.h> #include <linux/types.h> -#include "io-pgtable.h" - static const struct io_pgtable_init_fns * io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE @@ -61,6 +60,7 @@ struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, return &iop->ops; } +EXPORT_SYMBOL_GPL(alloc_io_pgtable_ops); /* * It is the IOMMU driver's responsibility to ensure that the page table @@ -77,3 +77,4 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops) io_pgtable_tlb_flush_all(iop); io_pgtable_init_table[iop->fmt]->free(iop); } +EXPORT_SYMBOL_GPL(free_io_pgtable_ops); diff --git a/drivers/iommu/iommu-debugfs.c b/drivers/iommu/iommu-debugfs.c @@ -12,6 +12,7 @@ #include <linux/debugfs.h> struct dentry *iommu_debugfs_dir; +EXPORT_SYMBOL_GPL(iommu_debugfs_dir); /** * iommu_debugfs_setup - create the top-level iommu directory in debugfs @@ -23,9 +24,9 @@ struct dentry *iommu_debugfs_dir; * Emit a strong warning at boot time to indicate that this feature is * enabled. * - * This function is called from iommu_init; drivers may then call - * iommu_debugfs_new_driver_dir() to instantiate a vendor-specific - * directory to be used to expose internal data. + * This function is called from iommu_init; drivers may then use + * iommu_debugfs_dir to instantiate a vendor-specific directory to be used + * to expose internal data. */ void iommu_debugfs_setup(void) { @@ -48,19 +49,3 @@ void iommu_debugfs_setup(void) pr_warn("*************************************************************\n"); } } - -/** - * iommu_debugfs_new_driver_dir - create a vendor directory under debugfs/iommu - * @vendor: name of the vendor-specific subdirectory to create - * - * This function is called by an IOMMU driver to create the top-level debugfs - * directory for that driver. - * - * Return: upon success, a pointer to the dentry for the new directory. - * NULL in case of failure. - */ -struct dentry *iommu_debugfs_new_driver_dir(const char *vendor) -{ - return debugfs_create_dir(vendor, iommu_debugfs_dir); -} -EXPORT_SYMBOL_GPL(iommu_debugfs_new_driver_dir); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c @@ -668,7 +668,7 @@ rename: trace_add_device_to_group(group->id, dev); - pr_info("Adding device %s to group %d\n", dev_name(dev), group->id); + dev_info(dev, "Adding to iommu group %d\n", group->id); return 0; @@ -684,7 +684,7 @@ err_remove_link: sysfs_remove_link(&dev->kobj, "iommu_group"); err_free_device: kfree(device); - pr_err("Failed to add device %s to group %d: %d\n", dev_name(dev), group->id, ret); + dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); return ret; } EXPORT_SYMBOL_GPL(iommu_group_add_device); @@ -701,7 +701,7 @@ void iommu_group_remove_device(struct device *dev) struct iommu_group *group = dev->iommu_group; struct group_device *tmp_device, *device = NULL; - pr_info("Removing device %s from group %d\n", dev_name(dev), group->id); + dev_info(dev, "Removing from iommu group %d\n", group->id); /* Pre-notify listeners that a device is being removed. */ blocking_notifier_call_chain(&group->notifier, @@ -1585,13 +1585,14 @@ static size_t iommu_pgsize(struct iommu_domain *domain, int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { + const struct iommu_ops *ops = domain->ops; unsigned long orig_iova = iova; unsigned int min_pagesz; size_t orig_size = size; phys_addr_t orig_paddr = paddr; int ret = 0; - if (unlikely(domain->ops->map == NULL || + if (unlikely(ops->map == NULL || domain->pgsize_bitmap == 0UL)) return -ENODEV; @@ -1620,7 +1621,7 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n", iova, &paddr, pgsize); - ret = domain->ops->map(domain, iova, paddr, pgsize, prot); + ret = ops->map(domain, iova, paddr, pgsize, prot); if (ret) break; @@ -1629,6 +1630,9 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, size -= pgsize; } + if (ops->iotlb_sync_map) + ops->iotlb_sync_map(domain); + /* unroll mapping in case something went wrong */ if (ret) iommu_unmap(domain, orig_iova, orig_size - size); @@ -1951,7 +1955,7 @@ int iommu_request_dm_for_dev(struct device *dev) iommu_domain_free(group->default_domain); group->default_domain = dm_domain; - pr_info("Using direct mapping for device %s\n", dev_name(dev)); + dev_info(dev, "Using iommu direct mapping\n"); ret = 0; out: diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c @@ -15,6 +15,7 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/io.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/of.h> #include <linux/of_device.h> @@ -35,8 +36,6 @@ #define arm_iommu_detach_device(...) do {} while (0) #endif -#include "io-pgtable.h" - #define IPMMU_CTX_MAX 8 struct ipmmu_features { diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c @@ -103,6 +103,9 @@ int __init irq_remapping_prepare(void) else if (IS_ENABLED(CONFIG_AMD_IOMMU) && amd_iommu_irq_ops.prepare() == 0) remap_ops = &amd_iommu_irq_ops; + else if (IS_ENABLED(CONFIG_HYPERV_IOMMU) && + hyperv_irq_remap_ops.prepare() == 0) + remap_ops = &hyperv_irq_remap_ops; else return -ENOSYS; diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h @@ -64,6 +64,7 @@ struct irq_remap_ops { extern struct irq_remap_ops intel_irq_remap_ops; extern struct irq_remap_ops amd_iommu_irq_ops; +extern struct irq_remap_ops hyperv_irq_remap_ops; #else /* CONFIG_IRQ_REMAP */ diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c @@ -23,6 +23,7 @@ #include <linux/platform_device.h> #include <linux/errno.h> #include <linux/io.h> +#include <linux/io-pgtable.h> #include <linux/interrupt.h> #include <linux/list.h> #include <linux/spinlock.h> @@ -37,7 +38,6 @@ #include "msm_iommu_hw-8xxx.h" #include "msm_iommu.h" -#include "io-pgtable.h" #define MRC(reg, processor, op1, crn, crm, op2) \ __asm__ __volatile__ ( \ @@ -461,10 +461,10 @@ static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) master->num = msm_iommu_alloc_ctx(iommu->context_map, 0, iommu->ncb); - if (IS_ERR_VALUE(master->num)) { - ret = -ENODEV; - goto fail; - } + if (IS_ERR_VALUE(master->num)) { + ret = -ENODEV; + goto fail; + } config_mids(iommu, master); __program_context(iommu->base, master->num, priv); diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h @@ -19,13 +19,12 @@ #include <linux/component.h> #include <linux/device.h> #include <linux/io.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/list.h> #include <linux/spinlock.h> #include <soc/mediatek/smi.h> -#include "io-pgtable.h" - struct mtk_iommu_suspend_reg { u32 standard_axi_mode; u32 dcm_dis; diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c @@ -474,7 +474,7 @@ static int mtk_iommu_add_device(struct device *dev) return err; } - return iommu_device_link(&data->iommu, dev);; + return iommu_device_link(&data->iommu, dev); } static void mtk_iommu_remove_device(struct device *dev) diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c @@ -26,6 +26,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/io-64-nonatomic-hi-lo.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/kconfig.h> @@ -42,7 +43,6 @@ #include <linux/slab.h> #include <linux/spinlock.h> -#include "io-pgtable.h" #include "arm-smmu-regs.h" #define SMMU_INTR_SEL_NS 0x2000 diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c @@ -1,5 +1,5 @@ /* - * IOMMU API for GART in Tegra20 + * IOMMU API for Graphics Address Relocation Table on Tegra20 * * Copyright (c) 2010-2012, NVIDIA CORPORATION. All rights reserved. * @@ -19,101 +19,75 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ -#define pr_fmt(fmt) "%s(): " fmt, __func__ +#define dev_fmt(fmt) "gart: " fmt -#include <linux/init.h> +#include <linux/io.h> +#include <linux/iommu.h> #include <linux/moduleparam.h> #include <linux/platform_device.h> -#include <linux/spinlock.h> #include <linux/slab.h> +#include <linux/spinlock.h> #include <linux/vmalloc.h> -#include <linux/mm.h> -#include <linux/list.h> -#include <linux/device.h> -#include <linux/io.h> -#include <linux/iommu.h> -#include <linux/of.h> - -#include <asm/cacheflush.h> -/* bitmap of the page sizes currently supported */ -#define GART_IOMMU_PGSIZES (SZ_4K) +#include <soc/tegra/mc.h> #define GART_REG_BASE 0x24 #define GART_CONFIG (0x24 - GART_REG_BASE) #define GART_ENTRY_ADDR (0x28 - GART_REG_BASE) #define GART_ENTRY_DATA (0x2c - GART_REG_BASE) -#define GART_ENTRY_PHYS_ADDR_VALID (1 << 31) + +#define GART_ENTRY_PHYS_ADDR_VALID BIT(31) #define GART_PAGE_SHIFT 12 #define GART_PAGE_SIZE (1 << GART_PAGE_SHIFT) -#define GART_PAGE_MASK \ - (~(GART_PAGE_SIZE - 1) & ~GART_ENTRY_PHYS_ADDR_VALID) +#define GART_PAGE_MASK GENMASK(30, GART_PAGE_SHIFT) -struct gart_client { - struct device *dev; - struct list_head list; -}; +/* bitmap of the page sizes currently supported */ +#define GART_IOMMU_PGSIZES (GART_PAGE_SIZE) struct gart_device { void __iomem *regs; u32 *savedata; - u32 page_count; /* total remappable size */ - dma_addr_t iovmm_base; /* offset to vmm_area */ + unsigned long iovmm_base; /* offset to vmm_area start */ + unsigned long iovmm_end; /* offset to vmm_area end */ spinlock_t pte_lock; /* for pagetable */ - struct list_head client; - spinlock_t client_lock; /* for client list */ - struct device *dev; - + spinlock_t dom_lock; /* for active domain */ + unsigned int active_devices; /* number of active devices */ + struct iommu_domain *active_domain; /* current active domain */ struct iommu_device iommu; /* IOMMU Core handle */ -}; - -struct gart_domain { - struct iommu_domain domain; /* generic domain handle */ - struct gart_device *gart; /* link to gart device */ + struct device *dev; }; static struct gart_device *gart_handle; /* unique for a system */ static bool gart_debug; -#define GART_PTE(_pfn) \ - (GART_ENTRY_PHYS_ADDR_VALID | ((_pfn) << PAGE_SHIFT)) - -static struct gart_domain *to_gart_domain(struct iommu_domain *dom) -{ - return container_of(dom, struct gart_domain, domain); -} - /* * Any interaction between any block on PPSB and a block on APB or AHB * must have these read-back to ensure the APB/AHB bus transaction is * complete before initiating activity on the PPSB block. */ -#define FLUSH_GART_REGS(gart) ((void)readl((gart)->regs + GART_CONFIG)) +#define FLUSH_GART_REGS(gart) readl_relaxed((gart)->regs + GART_CONFIG) #define for_each_gart_pte(gart, iova) \ for (iova = gart->iovmm_base; \ - iova < gart->iovmm_base + GART_PAGE_SIZE * gart->page_count; \ + iova < gart->iovmm_end; \ iova += GART_PAGE_SIZE) static inline void gart_set_pte(struct gart_device *gart, - unsigned long offs, u32 pte) + unsigned long iova, unsigned long pte) { - writel(offs, gart->regs + GART_ENTRY_ADDR); - writel(pte, gart->regs + GART_ENTRY_DATA); - - dev_dbg(gart->dev, "%s %08lx:%08x\n", - pte ? "map" : "unmap", offs, pte & GART_PAGE_MASK); + writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR); + writel_relaxed(pte, gart->regs + GART_ENTRY_DATA); } static inline unsigned long gart_read_pte(struct gart_device *gart, - unsigned long offs) + unsigned long iova) { unsigned long pte; - writel(offs, gart->regs + GART_ENTRY_ADDR); - pte = readl(gart->regs + GART_ENTRY_DATA); + writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR); + pte = readl_relaxed(gart->regs + GART_ENTRY_DATA); return pte; } @@ -125,224 +99,155 @@ static void do_gart_setup(struct gart_device *gart, const u32 *data) for_each_gart_pte(gart, iova) gart_set_pte(gart, iova, data ? *(data++) : 0); - writel(1, gart->regs + GART_CONFIG); + writel_relaxed(1, gart->regs + GART_CONFIG); FLUSH_GART_REGS(gart); } -#ifdef DEBUG -static void gart_dump_table(struct gart_device *gart) -{ - unsigned long iova; - unsigned long flags; - - spin_lock_irqsave(&gart->pte_lock, flags); - for_each_gart_pte(gart, iova) { - unsigned long pte; - - pte = gart_read_pte(gart, iova); - - dev_dbg(gart->dev, "%s %08lx:%08lx\n", - (GART_ENTRY_PHYS_ADDR_VALID & pte) ? "v" : " ", - iova, pte & GART_PAGE_MASK); - } - spin_unlock_irqrestore(&gart->pte_lock, flags); -} -#else -static inline void gart_dump_table(struct gart_device *gart) +static inline bool gart_iova_range_invalid(struct gart_device *gart, + unsigned long iova, size_t bytes) { + return unlikely(iova < gart->iovmm_base || bytes != GART_PAGE_SIZE || + iova + bytes > gart->iovmm_end); } -#endif -static inline bool gart_iova_range_valid(struct gart_device *gart, - unsigned long iova, size_t bytes) +static inline bool gart_pte_valid(struct gart_device *gart, unsigned long iova) { - unsigned long iova_start, iova_end, gart_start, gart_end; - - iova_start = iova; - iova_end = iova_start + bytes - 1; - gart_start = gart->iovmm_base; - gart_end = gart_start + gart->page_count * GART_PAGE_SIZE - 1; - - if (iova_start < gart_start) - return false; - if (iova_end > gart_end) - return false; - return true; + return !!(gart_read_pte(gart, iova) & GART_ENTRY_PHYS_ADDR_VALID); } static int gart_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) { - struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; - struct gart_client *client, *c; - int err = 0; - - client = devm_kzalloc(gart->dev, sizeof(*c), GFP_KERNEL); - if (!client) - return -ENOMEM; - client->dev = dev; - - spin_lock(&gart->client_lock); - list_for_each_entry(c, &gart->client, list) { - if (c->dev == dev) { - dev_err(gart->dev, - "%s is already attached\n", dev_name(dev)); - err = -EINVAL; - goto fail; - } + struct gart_device *gart = gart_handle; + int ret = 0; + + spin_lock(&gart->dom_lock); + + if (gart->active_domain && gart->active_domain != domain) { + ret = -EBUSY; + } else if (dev->archdata.iommu != domain) { + dev->archdata.iommu = domain; + gart->active_domain = domain; + gart->active_devices++; } - list_add(&client->list, &gart->client); - spin_unlock(&gart->client_lock); - dev_dbg(gart->dev, "Attached %s\n", dev_name(dev)); - return 0; -fail: - devm_kfree(gart->dev, client); - spin_unlock(&gart->client_lock); - return err; + spin_unlock(&gart->dom_lock); + + return ret; } static void gart_iommu_detach_dev(struct iommu_domain *domain, struct device *dev) { - struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; - struct gart_client *c; - - spin_lock(&gart->client_lock); - - list_for_each_entry(c, &gart->client, list) { - if (c->dev == dev) { - list_del(&c->list); - devm_kfree(gart->dev, c); - dev_dbg(gart->dev, "Detached %s\n", dev_name(dev)); - goto out; - } + struct gart_device *gart = gart_handle; + + spin_lock(&gart->dom_lock); + + if (dev->archdata.iommu == domain) { + dev->archdata.iommu = NULL; + + if (--gart->active_devices == 0) + gart->active_domain = NULL; } - dev_err(gart->dev, "Couldn't find\n"); -out: - spin_unlock(&gart->client_lock); + + spin_unlock(&gart->dom_lock); } static struct iommu_domain *gart_iommu_domain_alloc(unsigned type) { - struct gart_domain *gart_domain; - struct gart_device *gart; + struct iommu_domain *domain; if (type != IOMMU_DOMAIN_UNMANAGED) return NULL; - gart = gart_handle; - if (!gart) - return NULL; - - gart_domain = kzalloc(sizeof(*gart_domain), GFP_KERNEL); - if (!gart_domain) - return NULL; - - gart_domain->gart = gart; - gart_domain->domain.geometry.aperture_start = gart->iovmm_base; - gart_domain->domain.geometry.aperture_end = gart->iovmm_base + - gart->page_count * GART_PAGE_SIZE - 1; - gart_domain->domain.geometry.force_aperture = true; + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (domain) { + domain->geometry.aperture_start = gart_handle->iovmm_base; + domain->geometry.aperture_end = gart_handle->iovmm_end - 1; + domain->geometry.force_aperture = true; + } - return &gart_domain->domain; + return domain; } static void gart_iommu_domain_free(struct iommu_domain *domain) { - struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; - - if (gart) { - spin_lock(&gart->client_lock); - if (!list_empty(&gart->client)) { - struct gart_client *c; - - list_for_each_entry(c, &gart->client, list) - gart_iommu_detach_dev(domain, c->dev); - } - spin_unlock(&gart->client_lock); + WARN_ON(gart_handle->active_domain == domain); + kfree(domain); +} + +static inline int __gart_iommu_map(struct gart_device *gart, unsigned long iova, + unsigned long pa) +{ + if (unlikely(gart_debug && gart_pte_valid(gart, iova))) { + dev_err(gart->dev, "Page entry is in-use\n"); + return -EINVAL; } - kfree(gart_domain); + gart_set_pte(gart, iova, GART_ENTRY_PHYS_ADDR_VALID | pa); + + return 0; } static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t pa, size_t bytes, int prot) { - struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; - unsigned long flags; - unsigned long pfn; - unsigned long pte; + struct gart_device *gart = gart_handle; + int ret; - if (!gart_iova_range_valid(gart, iova, bytes)) + if (gart_iova_range_invalid(gart, iova, bytes)) return -EINVAL; - spin_lock_irqsave(&gart->pte_lock, flags); - pfn = __phys_to_pfn(pa); - if (!pfn_valid(pfn)) { - dev_err(gart->dev, "Invalid page: %pa\n", &pa); - spin_unlock_irqrestore(&gart->pte_lock, flags); + spin_lock(&gart->pte_lock); + ret = __gart_iommu_map(gart, iova, (unsigned long)pa); + spin_unlock(&gart->pte_lock); + + return ret; +} + +static inline int __gart_iommu_unmap(struct gart_device *gart, + unsigned long iova) +{ + if (unlikely(gart_debug && !gart_pte_valid(gart, iova))) { + dev_err(gart->dev, "Page entry is invalid\n"); return -EINVAL; } - if (gart_debug) { - pte = gart_read_pte(gart, iova); - if (pte & GART_ENTRY_PHYS_ADDR_VALID) { - spin_unlock_irqrestore(&gart->pte_lock, flags); - dev_err(gart->dev, "Page entry is in-use\n"); - return -EBUSY; - } - } - gart_set_pte(gart, iova, GART_PTE(pfn)); - FLUSH_GART_REGS(gart); - spin_unlock_irqrestore(&gart->pte_lock, flags); + + gart_set_pte(gart, iova, 0); + return 0; } static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t bytes) { - struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; - unsigned long flags; + struct gart_device *gart = gart_handle; + int err; - if (!gart_iova_range_valid(gart, iova, bytes)) + if (gart_iova_range_invalid(gart, iova, bytes)) return 0; - spin_lock_irqsave(&gart->pte_lock, flags); - gart_set_pte(gart, iova, 0); - FLUSH_GART_REGS(gart); - spin_unlock_irqrestore(&gart->pte_lock, flags); - return bytes; + spin_lock(&gart->pte_lock); + err = __gart_iommu_unmap(gart, iova); + spin_unlock(&gart->pte_lock); + + return err ? 0 : bytes; } static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { - struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; + struct gart_device *gart = gart_handle; unsigned long pte; - phys_addr_t pa; - unsigned long flags; - if (!gart_iova_range_valid(gart, iova, 0)) + if (gart_iova_range_invalid(gart, iova, GART_PAGE_SIZE)) return -EINVAL; - spin_lock_irqsave(&gart->pte_lock, flags); + spin_lock(&gart->pte_lock); pte = gart_read_pte(gart, iova); - spin_unlock_irqrestore(&gart->pte_lock, flags); + spin_unlock(&gart->pte_lock); - pa = (pte & GART_PAGE_MASK); - if (!pfn_valid(__phys_to_pfn(pa))) { - dev_err(gart->dev, "No entry for %08llx:%pa\n", - (unsigned long long)iova, &pa); - gart_dump_table(gart); - return -EINVAL; - } - return pa; + return pte & GART_PAGE_MASK; } static bool gart_iommu_capable(enum iommu_cap cap) @@ -352,8 +257,12 @@ static bool gart_iommu_capable(enum iommu_cap cap) static int gart_iommu_add_device(struct device *dev) { - struct iommu_group *group = iommu_group_get_for_dev(dev); + struct iommu_group *group; + + if (!dev->iommu_fwspec) + return -ENODEV; + group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) return PTR_ERR(group); @@ -370,6 +279,17 @@ static void gart_iommu_remove_device(struct device *dev) iommu_device_unlink(&gart_handle->iommu, dev); } +static int gart_iommu_of_xlate(struct device *dev, + struct of_phandle_args *args) +{ + return 0; +} + +static void gart_iommu_sync(struct iommu_domain *domain) +{ + FLUSH_GART_REGS(gart_handle); +} + static const struct iommu_ops gart_iommu_ops = { .capable = gart_iommu_capable, .domain_alloc = gart_iommu_domain_alloc, @@ -383,129 +303,96 @@ static const struct iommu_ops gart_iommu_ops = { .unmap = gart_iommu_unmap, .iova_to_phys = gart_iommu_iova_to_phys, .pgsize_bitmap = GART_IOMMU_PGSIZES, + .of_xlate = gart_iommu_of_xlate, + .iotlb_sync_map = gart_iommu_sync, + .iotlb_sync = gart_iommu_sync, }; -static int tegra_gart_suspend(struct device *dev) +int tegra_gart_suspend(struct gart_device *gart) { - struct gart_device *gart = dev_get_drvdata(dev); - unsigned long iova; u32 *data = gart->savedata; - unsigned long flags; + unsigned long iova; + + /* + * All GART users shall be suspended at this point. Disable + * address translation to trap all GART accesses as invalid + * memory accesses. + */ + writel_relaxed(0, gart->regs + GART_CONFIG); + FLUSH_GART_REGS(gart); - spin_lock_irqsave(&gart->pte_lock, flags); for_each_gart_pte(gart, iova) *(data++) = gart_read_pte(gart, iova); - spin_unlock_irqrestore(&gart->pte_lock, flags); + return 0; } -static int tegra_gart_resume(struct device *dev) +int tegra_gart_resume(struct gart_device *gart) { - struct gart_device *gart = dev_get_drvdata(dev); - unsigned long flags; - - spin_lock_irqsave(&gart->pte_lock, flags); do_gart_setup(gart, gart->savedata); - spin_unlock_irqrestore(&gart->pte_lock, flags); + return 0; } -static int tegra_gart_probe(struct platform_device *pdev) +struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc) { struct gart_device *gart; - struct resource *res, *res_remap; - void __iomem *gart_regs; - struct device *dev = &pdev->dev; - int ret; - - if (gart_handle) - return -EIO; + struct resource *res; + int err; BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT); /* the GART memory aperture is required */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - res_remap = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!res || !res_remap) { - dev_err(dev, "GART memory aperture expected\n"); - return -ENXIO; - } - - gart = devm_kzalloc(dev, sizeof(*gart), GFP_KERNEL); - if (!gart) { - dev_err(dev, "failed to allocate gart_device\n"); - return -ENOMEM; + res = platform_get_resource(to_platform_device(dev), IORESOURCE_MEM, 1); + if (!res) { + dev_err(dev, "Memory aperture resource unavailable\n"); + return ERR_PTR(-ENXIO); } - gart_regs = devm_ioremap(dev, res->start, resource_size(res)); - if (!gart_regs) { - dev_err(dev, "failed to remap GART registers\n"); - return -ENXIO; - } - - ret = iommu_device_sysfs_add(&gart->iommu, &pdev->dev, NULL, - dev_name(&pdev->dev)); - if (ret) { - dev_err(dev, "Failed to register IOMMU in sysfs\n"); - return ret; - } - - iommu_device_set_ops(&gart->iommu, &gart_iommu_ops); + gart = kzalloc(sizeof(*gart), GFP_KERNEL); + if (!gart) + return ERR_PTR(-ENOMEM); - ret = iommu_device_register(&gart->iommu); - if (ret) { - dev_err(dev, "Failed to register IOMMU\n"); - iommu_device_sysfs_remove(&gart->iommu); - return ret; - } + gart_handle = gart; - gart->dev = &pdev->dev; + gart->dev = dev; + gart->regs = mc->regs + GART_REG_BASE; + gart->iovmm_base = res->start; + gart->iovmm_end = res->end + 1; spin_lock_init(&gart->pte_lock); - spin_lock_init(&gart->client_lock); - INIT_LIST_HEAD(&gart->client); - gart->regs = gart_regs; - gart->iovmm_base = (dma_addr_t)res_remap->start; - gart->page_count = (resource_size(res_remap) >> GART_PAGE_SHIFT); - - gart->savedata = vmalloc(array_size(sizeof(u32), gart->page_count)); - if (!gart->savedata) { - dev_err(dev, "failed to allocate context save area\n"); - return -ENOMEM; - } + spin_lock_init(&gart->dom_lock); - platform_set_drvdata(pdev, gart); do_gart_setup(gart, NULL); - gart_handle = gart; + err = iommu_device_sysfs_add(&gart->iommu, dev, NULL, "gart"); + if (err) + goto free_gart; - return 0; -} + iommu_device_set_ops(&gart->iommu, &gart_iommu_ops); + iommu_device_set_fwnode(&gart->iommu, dev->fwnode); -static const struct dev_pm_ops tegra_gart_pm_ops = { - .suspend = tegra_gart_suspend, - .resume = tegra_gart_resume, -}; + err = iommu_device_register(&gart->iommu); + if (err) + goto remove_sysfs; -static const struct of_device_id tegra_gart_of_match[] = { - { .compatible = "nvidia,tegra20-gart", }, - { }, -}; + gart->savedata = vmalloc(resource_size(res) / GART_PAGE_SIZE * + sizeof(u32)); + if (!gart->savedata) { + err = -ENOMEM; + goto unregister_iommu; + } -static struct platform_driver tegra_gart_driver = { - .probe = tegra_gart_probe, - .driver = { - .name = "tegra-gart", - .pm = &tegra_gart_pm_ops, - .of_match_table = tegra_gart_of_match, - .suppress_bind_attrs = true, - }, -}; + return gart; -static int __init tegra_gart_init(void) -{ - return platform_driver_register(&tegra_gart_driver); +unregister_iommu: + iommu_device_unregister(&gart->iommu); +remove_sysfs: + iommu_device_sysfs_remove(&gart->iommu); +free_gart: + kfree(gart); + + return ERR_PTR(err); } -subsys_initcall(tegra_gart_init); module_param(gart_debug, bool, 0644); MODULE_PARM_DESC(gart_debug, "Enable GART debugging"); diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c @@ -982,10 +982,6 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, u32 value; int err; - /* This can happen on Tegra20 which doesn't have an SMMU */ - if (!soc) - return NULL; - smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); if (!smmu) return ERR_PTR(-ENOMEM); diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/sort.h> @@ -38,6 +39,7 @@ #define MC_ERR_ADR 0x0c +#define MC_GART_ERROR_REQ 0x30 #define MC_DECERR_EMEM_OTHERS_STATUS 0x58 #define MC_SECURITY_VIOLATION_STATUS 0x74 @@ -51,7 +53,7 @@ static const struct of_device_id tegra_mc_of_match[] = { #ifdef CONFIG_ARCH_TEGRA_2x_SOC - { .compatible = "nvidia,tegra20-mc", .data = &tegra20_mc_soc }, + { .compatible = "nvidia,tegra20-mc-gart", .data = &tegra20_mc_soc }, #endif #ifdef CONFIG_ARCH_TEGRA_3x_SOC { .compatible = "nvidia,tegra30-mc", .data = &tegra30_mc_soc }, @@ -161,7 +163,7 @@ static int tegra_mc_hotreset_assert(struct reset_controller_dev *rcdev, /* block clients DMA requests */ err = rst_ops->block_dma(mc, rst); if (err) { - dev_err(mc->dev, "Failed to block %s DMA: %d\n", + dev_err(mc->dev, "failed to block %s DMA: %d\n", rst->name, err); return err; } @@ -171,7 +173,7 @@ static int tegra_mc_hotreset_assert(struct reset_controller_dev *rcdev, /* wait for completion of the outstanding DMA requests */ while (!rst_ops->dma_idling(mc, rst)) { if (!retries--) { - dev_err(mc->dev, "Failed to flush %s DMA\n", + dev_err(mc->dev, "failed to flush %s DMA\n", rst->name); return -EBUSY; } @@ -184,7 +186,7 @@ static int tegra_mc_hotreset_assert(struct reset_controller_dev *rcdev, /* clear clients DMA requests sitting before arbitration */ err = rst_ops->hotreset_assert(mc, rst); if (err) { - dev_err(mc->dev, "Failed to hot reset %s: %d\n", + dev_err(mc->dev, "failed to hot reset %s: %d\n", rst->name, err); return err; } @@ -213,7 +215,7 @@ static int tegra_mc_hotreset_deassert(struct reset_controller_dev *rcdev, /* take out client from hot reset */ err = rst_ops->hotreset_deassert(mc, rst); if (err) { - dev_err(mc->dev, "Failed to deassert hot reset %s: %d\n", + dev_err(mc->dev, "failed to deassert hot reset %s: %d\n", rst->name, err); return err; } @@ -223,7 +225,7 @@ static int tegra_mc_hotreset_deassert(struct reset_controller_dev *rcdev, /* allow new DMA requests to proceed to arbitration */ err = rst_ops->unblock_dma(mc, rst); if (err) { - dev_err(mc->dev, "Failed to unblock %s DMA : %d\n", + dev_err(mc->dev, "failed to unblock %s DMA : %d\n", rst->name, err); return err; } @@ -575,8 +577,15 @@ static __maybe_unused irqreturn_t tegra20_mc_irq(int irq, void *data) break; case MC_INT_INVALID_GART_PAGE: - dev_err_ratelimited(mc->dev, "%s\n", error); - continue; + reg = MC_GART_ERROR_REQ; + value = mc_readl(mc, reg); + + id = (value >> 1) & mc->soc->client_id_mask; + desc = error_names[2]; + + if (value & BIT(0)) + direction = "write"; + break; case MC_INT_SECURITY_VIOLATION: reg = MC_SECURITY_VIOLATION_STATUS; @@ -611,23 +620,18 @@ static __maybe_unused irqreturn_t tegra20_mc_irq(int irq, void *data) static int tegra_mc_probe(struct platform_device *pdev) { - const struct of_device_id *match; struct resource *res; struct tegra_mc *mc; void *isr; int err; - match = of_match_node(tegra_mc_of_match, pdev->dev.of_node); - if (!match) - return -ENODEV; - mc = devm_kzalloc(&pdev->dev, sizeof(*mc), GFP_KERNEL); if (!mc) return -ENOMEM; platform_set_drvdata(pdev, mc); spin_lock_init(&mc->lock); - mc->soc = match->data; + mc->soc = of_device_get_match_data(&pdev->dev); mc->dev = &pdev->dev; /* length of MC tick in nanoseconds */ @@ -638,38 +642,35 @@ static int tegra_mc_probe(struct platform_device *pdev) if (IS_ERR(mc->regs)) return PTR_ERR(mc->regs); + mc->clk = devm_clk_get(&pdev->dev, "mc"); + if (IS_ERR(mc->clk)) { + dev_err(&pdev->dev, "failed to get MC clock: %ld\n", + PTR_ERR(mc->clk)); + return PTR_ERR(mc->clk); + } + #ifdef CONFIG_ARCH_TEGRA_2x_SOC if (mc->soc == &tegra20_mc_soc) { - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - mc->regs2 = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(mc->regs2)) - return PTR_ERR(mc->regs2); - isr = tegra20_mc_irq; } else #endif { - mc->clk = devm_clk_get(&pdev->dev, "mc"); - if (IS_ERR(mc->clk)) { - dev_err(&pdev->dev, "failed to get MC clock: %ld\n", - PTR_ERR(mc->clk)); - return PTR_ERR(mc->clk); - } - err = tegra_mc_setup_latency_allowance(mc); if (err < 0) { - dev_err(&pdev->dev, "failed to setup latency allowance: %d\n", + dev_err(&pdev->dev, + "failed to setup latency allowance: %d\n", err); return err; } isr = tegra_mc_irq; - } - err = tegra_mc_setup_timings(mc); - if (err < 0) { - dev_err(&pdev->dev, "failed to setup timings: %d\n", err); - return err; + err = tegra_mc_setup_timings(mc); + if (err < 0) { + dev_err(&pdev->dev, "failed to setup timings: %d\n", + err); + return err; + } } mc->irq = platform_get_irq(pdev, 0); @@ -678,11 +679,11 @@ static int tegra_mc_probe(struct platform_device *pdev) return mc->irq; } - WARN(!mc->soc->client_id_mask, "Missing client ID mask for this SoC\n"); + WARN(!mc->soc->client_id_mask, "missing client ID mask for this SoC\n"); mc_writel(mc, mc->soc->intmask, MC_INTMASK); - err = devm_request_irq(&pdev->dev, mc->irq, isr, IRQF_SHARED, + err = devm_request_irq(&pdev->dev, mc->irq, isr, 0, dev_name(&pdev->dev), mc); if (err < 0) { dev_err(&pdev->dev, "failed to request IRQ#%u: %d\n", mc->irq, @@ -695,20 +696,65 @@ static int tegra_mc_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to register reset controller: %d\n", err); - if (IS_ENABLED(CONFIG_TEGRA_IOMMU_SMMU)) { + if (IS_ENABLED(CONFIG_TEGRA_IOMMU_SMMU) && mc->soc->smmu) { mc->smmu = tegra_smmu_probe(&pdev->dev, mc->soc->smmu, mc); - if (IS_ERR(mc->smmu)) + if (IS_ERR(mc->smmu)) { dev_err(&pdev->dev, "failed to probe SMMU: %ld\n", PTR_ERR(mc->smmu)); + mc->smmu = NULL; + } + } + + if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && !mc->soc->smmu) { + mc->gart = tegra_gart_probe(&pdev->dev, mc); + if (IS_ERR(mc->gart)) { + dev_err(&pdev->dev, "failed to probe GART: %ld\n", + PTR_ERR(mc->gart)); + mc->gart = NULL; + } + } + + return 0; +} + +static int tegra_mc_suspend(struct device *dev) +{ + struct tegra_mc *mc = dev_get_drvdata(dev); + int err; + + if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && mc->gart) { + err = tegra_gart_suspend(mc->gart); + if (err) + return err; } return 0; } +static int tegra_mc_resume(struct device *dev) +{ + struct tegra_mc *mc = dev_get_drvdata(dev); + int err; + + if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && mc->gart) { + err = tegra_gart_resume(mc->gart); + if (err) + return err; + } + + return 0; +} + +static const struct dev_pm_ops tegra_mc_pm_ops = { + .suspend = tegra_mc_suspend, + .resume = tegra_mc_resume, +}; + static struct platform_driver tegra_mc_driver = { .driver = { .name = "tegra-mc", .of_match_table = tegra_mc_of_match, + .pm = &tegra_mc_pm_ops, .suppress_bind_attrs = true, }, .prevent_deferred_probe = true, diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h @@ -26,19 +26,13 @@ static inline u32 mc_readl(struct tegra_mc *mc, unsigned long offset) { - if (mc->regs2 && offset >= 0x24) - return readl(mc->regs2 + offset - 0x3c); - - return readl(mc->regs + offset); + return readl_relaxed(mc->regs + offset); } static inline void mc_writel(struct tegra_mc *mc, u32 value, unsigned long offset) { - if (mc->regs2 && offset >= 0x24) - return writel(value, mc->regs2 + offset - 0x3c); - - writel(value, mc->regs + offset); + writel_relaxed(value, mc->regs + offset); } extern const struct tegra_mc_reset_ops terga_mc_reset_ops_common; diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c @@ -142,6 +142,33 @@ int pci_ats_queue_depth(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_ats_queue_depth); +/** + * pci_ats_page_aligned - Return Page Aligned Request bit status. + * @pdev: the PCI device + * + * Returns 1, if the Untranslated Addresses generated by the device + * are always aligned or 0 otherwise. + * + * Per PCIe spec r4.0, sec 10.5.1.2, if the Page Aligned Request bit + * is set, it indicates the Untranslated Addresses generated by the + * device are always aligned to a 4096 byte boundary. + */ +int pci_ats_page_aligned(struct pci_dev *pdev) +{ + u16 cap; + + if (!pdev->ats_cap) + return 0; + + pci_read_config_word(pdev, pdev->ats_cap + PCI_ATS_CAP, &cap); + + if (cap & PCI_ATS_CAP_PAGE_ALIGNED) + return 1; + + return 0; +} +EXPORT_SYMBOL_GPL(pci_ats_page_aligned); + #ifdef CONFIG_PCI_PRI /** * pci_enable_pri - Enable PRI capability @@ -368,6 +395,36 @@ int pci_pasid_features(struct pci_dev *pdev) } EXPORT_SYMBOL_GPL(pci_pasid_features); +/** + * pci_prg_resp_pasid_required - Return PRG Response PASID Required bit + * status. + * @pdev: PCI device structure + * + * Returns 1 if PASID is required in PRG Response Message, 0 otherwise. + * + * Even though the PRG response PASID status is read from PRI Status + * Register, since this API will mainly be used by PASID users, this + * function is defined within #ifdef CONFIG_PCI_PASID instead of + * CONFIG_PCI_PRI. + */ +int pci_prg_resp_pasid_required(struct pci_dev *pdev) +{ + u16 status; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); + if (!pos) + return 0; + + pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); + + if (status & PCI_PRI_STATUS_PASID) + return 1; + + return 0; +} +EXPORT_SYMBOL_GPL(pci_prg_resp_pasid_required); + #define PASID_NUMBER_SHIFT 8 #define PASID_NUMBER_MASK (0x1f << PASID_NUMBER_SHIFT) /** diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h @@ -374,20 +374,17 @@ enum { #define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) #define QI_DEV_EIOTLB_MAX_INVS 32 -#define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) -#define QI_PGRP_PRIV(priv) (((u64)(priv)) << 32) -#define QI_PGRP_RESP_CODE(res) ((u64)(res)) -#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) -#define QI_PGRP_DID(did) (((u64)(did)) << 16) +/* Page group response descriptor QW0 */ #define QI_PGRP_PASID_P(p) (((u64)(p)) << 4) +#define QI_PGRP_PDP(p) (((u64)(p)) << 5) +#define QI_PGRP_RESP_CODE(res) (((u64)(res)) << 12) +#define QI_PGRP_DID(rid) (((u64)(rid)) << 16) +#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) + +/* Page group response descriptor QW1 */ +#define QI_PGRP_LPIG(x) (((u64)(x)) << 2) +#define QI_PGRP_IDX(idx) (((u64)(idx)) << 3) -#define QI_PSTRM_ADDR(addr) (((u64)(addr)) & VTD_PAGE_MASK) -#define QI_PSTRM_DEVFN(devfn) (((u64)(devfn)) << 4) -#define QI_PSTRM_RESP_CODE(res) ((u64)(res)) -#define QI_PSTRM_IDX(idx) (((u64)(idx)) << 55) -#define QI_PSTRM_PRIV(priv) (((u64)(priv)) << 32) -#define QI_PSTRM_BUS(bus) (((u64)(bus)) << 24) -#define QI_PSTRM_PASID(pasid) (((u64)(pasid)) << 4) #define QI_RESP_SUCCESS 0x0 #define QI_RESP_INVALID 0x1 diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h @@ -20,7 +20,7 @@ struct device; struct svm_dev_ops { void (*fault_cb)(struct device *dev, int pasid, u64 address, - u32 private, int rwxp, int response); + void *private, int rwxp, int response); }; /* Values for rxwp in fault_cb callback */ diff --git a/drivers/iommu/io-pgtable.h b/include/linux/io-pgtable.h diff --git a/include/linux/iommu.h b/include/linux/iommu.h @@ -167,8 +167,9 @@ struct iommu_resv_region { * @detach_dev: detach device from an iommu domain * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain - * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain + * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain * @iotlb_range_add: Add a given iova range to the flush queue for this domain + * @iotlb_sync_map: Sync mappings created recently using @map to the hardware * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue * @iova_to_phys: translate iova to physical address @@ -183,6 +184,8 @@ struct iommu_resv_region { * @domain_window_enable: Configure and enable a particular window for a domain * @domain_window_disable: Disable a particular window for a domain * @of_xlate: add OF master IDs to iommu grouping + * @is_attach_deferred: Check if domain attach should be deferred from iommu + * driver init to device driver init (default no) * @pgsize_bitmap: bitmap of all possible supported page sizes */ struct iommu_ops { @@ -201,6 +204,7 @@ struct iommu_ops { void (*flush_iotlb_all)(struct iommu_domain *domain); void (*iotlb_range_add)(struct iommu_domain *domain, unsigned long iova, size_t size); + void (*iotlb_sync_map)(struct iommu_domain *domain); void (*iotlb_sync)(struct iommu_domain *domain); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*add_device)(struct device *dev); diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h @@ -40,6 +40,7 @@ void pci_disable_pasid(struct pci_dev *pdev); void pci_restore_pasid_state(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); +int pci_prg_resp_pasid_required(struct pci_dev *pdev); #else /* CONFIG_PCI_PASID */ @@ -66,6 +67,10 @@ static inline int pci_max_pasids(struct pci_dev *pdev) return -EINVAL; } +static inline int pci_prg_resp_pasid_required(struct pci_dev *pdev) +{ + return 0; +} #endif /* CONFIG_PCI_PASID */ diff --git a/include/linux/pci.h b/include/linux/pci.h @@ -1527,11 +1527,13 @@ void pci_ats_init(struct pci_dev *dev); int pci_enable_ats(struct pci_dev *dev, int ps); void pci_disable_ats(struct pci_dev *dev); int pci_ats_queue_depth(struct pci_dev *dev); +int pci_ats_page_aligned(struct pci_dev *dev); #else static inline void pci_ats_init(struct pci_dev *d) { } static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; } static inline void pci_disable_ats(struct pci_dev *d) { } static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } +static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; } #endif #ifdef CONFIG_PCIE_PTM diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h @@ -9,6 +9,7 @@ #ifndef __SOC_TEGRA_MC_H__ #define __SOC_TEGRA_MC_H__ +#include <linux/err.h> #include <linux/reset-controller.h> #include <linux/types.h> @@ -77,6 +78,7 @@ struct tegra_smmu_soc { struct tegra_mc; struct tegra_smmu; +struct gart_device; #ifdef CONFIG_TEGRA_IOMMU_SMMU struct tegra_smmu *tegra_smmu_probe(struct device *dev, @@ -96,6 +98,28 @@ static inline void tegra_smmu_remove(struct tegra_smmu *smmu) } #endif +#ifdef CONFIG_TEGRA_IOMMU_GART +struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc); +int tegra_gart_suspend(struct gart_device *gart); +int tegra_gart_resume(struct gart_device *gart); +#else +static inline struct gart_device * +tegra_gart_probe(struct device *dev, struct tegra_mc *mc) +{ + return ERR_PTR(-ENODEV); +} + +static inline int tegra_gart_suspend(struct gart_device *gart) +{ + return -ENODEV; +} + +static inline int tegra_gart_resume(struct gart_device *gart) +{ + return -ENODEV; +} +#endif + struct tegra_mc_reset { const char *name; unsigned long id; @@ -144,7 +168,8 @@ struct tegra_mc_soc { struct tegra_mc { struct device *dev; struct tegra_smmu *smmu; - void __iomem *regs, *regs2; + struct gart_device *gart; + void __iomem *regs; struct clk *clk; int irq; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h @@ -866,6 +866,7 @@ #define PCI_ATS_CAP 0x04 /* ATS Capability Register */ #define PCI_ATS_CAP_QDEP(x) ((x) & 0x1f) /* Invalidate Queue Depth */ #define PCI_ATS_MAX_QDEP 32 /* Max Invalidate Queue Depth */ +#define PCI_ATS_CAP_PAGE_ALIGNED 0x0020 /* Page Aligned Request */ #define PCI_ATS_CTRL 0x06 /* ATS Control Register */ #define PCI_ATS_CTRL_ENABLE 0x8000 /* ATS Enable */ #define PCI_ATS_CTRL_STU(x) ((x) & 0x1f) /* Smallest Translation Unit */ @@ -880,6 +881,7 @@ #define PCI_PRI_STATUS_RF 0x001 /* Response Failure */ #define PCI_PRI_STATUS_UPRGI 0x002 /* Unexpected PRG index */ #define PCI_PRI_STATUS_STOPPED 0x100 /* PRI Stopped */ +#define PCI_PRI_STATUS_PASID 0x8000 /* PRG Response PASID Required */ #define PCI_PRI_MAX_REQ 0x08 /* PRI max reqs supported */ #define PCI_PRI_ALLOC_REQ 0x0c /* PRI max reqs allowed */ #define PCI_EXT_CAP_PRI_SIZEOF 16