whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 8e143b90e4d45cca3dc53760d3cfab988bc74571
parent 78e8696c234ab637c4dd516cabeac344d84ec10b
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Tue,  1 Jan 2019 15:55:29 -0800

Merge tag 'iommu-updates-v4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull IOMMU updates from Joerg Roedel:

 - Page table code for AMD IOMMU now supports large pages where smaller
   page-sizes were mapped before. VFIO had to work around that in the
   past and I included a patch to remove it (acked by Alex Williamson)

 - Patches to unmodularize a couple of IOMMU drivers that would never
   work as modules anyway.

 - Work to unify the the iommu-related pointers in 'struct device' into
   one pointer. This work is not finished yet, but will probably be in
   the next cycle.

 - NUMA aware allocation in iommu-dma code

 - Support for r8a774a1 and r8a774c0 in the Renesas IOMMU driver

 - Scalable mode support for the Intel VT-d driver

 - PM runtime improvements for the ARM-SMMU driver

 - Support for the QCOM-SMMUv2 IOMMU hardware from Qualcom

 - Various smaller fixes and improvements

* tag 'iommu-updates-v4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (78 commits)
  iommu: Check for iommu_ops == NULL in iommu_probe_device()
  ACPI/IORT: Don't call iommu_ops->add_device directly
  iommu/of: Don't call iommu_ops->add_device directly
  iommu: Consolitate ->add/remove_device() calls
  iommu/sysfs: Rename iommu_release_device()
  dmaengine: sh: rcar-dmac: Use device_iommu_mapped()
  xhci: Use device_iommu_mapped()
  powerpc/iommu: Use device_iommu_mapped()
  ACPI/IORT: Use device_iommu_mapped()
  iommu/of: Use device_iommu_mapped()
  driver core: Introduce device_iommu_mapped() function
  iommu/tegra: Use helper functions to access dev->iommu_fwspec
  iommu/qcom: Use helper functions to access dev->iommu_fwspec
  iommu/of: Use helper functions to access dev->iommu_fwspec
  iommu/mediatek: Use helper functions to access dev->iommu_fwspec
  iommu/ipmmu-vmsa: Use helper functions to access dev->iommu_fwspec
  iommu/dma: Use helper functions to access dev->iommu_fwspec
  iommu/arm-smmu: Use helper functions to access dev->iommu_fwspec
  ACPI/IORT: Use helper functions to access dev->iommu_fwspec
  iommu: Introduce wrappers around dev->iommu_fwspec
  ...

Diffstat:
MDocumentation/admin-guide/kernel-parameters.txt | 12++++++------
MDocumentation/devicetree/bindings/iommu/arm,smmu.txt | 43+++++++++++++++++++++++++++++++++++++++++++
MDocumentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt | 2++
March/powerpc/kernel/eeh.c | 2+-
March/powerpc/kernel/iommu.c | 4++--
March/x86/kernel/tboot.c | 2+-
Mdrivers/acpi/arm64/iort.c | 23++++++++++++-----------
Mdrivers/dma/sh/rcar-dmac.c | 2+-
Mdrivers/gpu/drm/i915/i915_gem_execbuffer.c | 2+-
Mdrivers/gpu/drm/i915/intel_display.c | 2+-
Mdrivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2+-
Mdrivers/iommu/amd_iommu.c | 275++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Mdrivers/iommu/amd_iommu_init.c | 64+++++++++++++++++++++++++++++++++-------------------------------
Mdrivers/iommu/amd_iommu_types.h | 1+
Mdrivers/iommu/amd_iommu_v2.c | 2++
Mdrivers/iommu/arm-smmu-v3.c | 63+++++++++++++++++++++++++++++++++++++--------------------------
Mdrivers/iommu/arm-smmu.c | 209+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Mdrivers/iommu/dma-iommu.c | 22+++++++++++-----------
Mdrivers/iommu/dmar.c | 91+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Mdrivers/iommu/intel-iommu.c | 351++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mdrivers/iommu/intel-pasid.c | 449++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mdrivers/iommu/intel-pasid.h | 40+++++++++++++++++++++++++++++++++++++---
Mdrivers/iommu/intel-svm.c | 171++++++++++++++++++++++++++++---------------------------------------------------
Mdrivers/iommu/intel_irq_remapping.c | 6++++--
Mdrivers/iommu/io-pgtable-arm-v7s.c | 4----
Mdrivers/iommu/iommu-sysfs.c | 14+++++++-------
Mdrivers/iommu/iommu.c | 113+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Mdrivers/iommu/ipmmu-vmsa.c | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mdrivers/iommu/irq_remapping.c | 1-
Mdrivers/iommu/msm_iommu.c | 13+++----------
Mdrivers/iommu/mtk_iommu.c | 25++++++++++++++-----------
Mdrivers/iommu/mtk_iommu_v1.c | 47+++++++++++++++++++++--------------------------
Mdrivers/iommu/of_iommu.c | 16++++++++++------
Mdrivers/iommu/omap-iommu-debug.c | 25++++++-------------------
Mdrivers/iommu/qcom_iommu.c | 34++++++++++++----------------------
Mdrivers/iommu/rockchip-iommu.c | 13++++++-------
Mdrivers/iommu/tegra-gart.c | 37+++++++------------------------------
Mdrivers/iommu/tegra-smmu.c | 26+++-----------------------
Mdrivers/misc/mic/scif/scif_rma.c | 2+-
Mdrivers/misc/mic/scif/scif_rma.h | 2+-
Mdrivers/usb/host/xhci.c | 2+-
Mdrivers/vfio/vfio_iommu_type1.c | 33++-------------------------------
Minclude/linux/device.h | 10++++++++++
Dinclude/linux/dma_remapping.h | 58----------------------------------------------------------
Minclude/linux/intel-iommu.h | 108++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Minclude/linux/iommu.h | 18++++++++++++++++--
46 files changed, 1612 insertions(+), 917 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt @@ -1690,12 +1690,12 @@ By default, super page will be supported if Intel IOMMU has the capability. With this option, super page will not be supported. - ecs_off [Default Off] - By default, extended context tables will be supported if - the hardware advertises that it has support both for the - extended tables themselves, and also PASID support. With - this option set, extended tables will not be used even - on hardware which claims to support them. + sm_off [Default Off] + By default, scalable mode will be supported if the + hardware advertises that it has support for the scalable + mode translation. With this option set, scalable mode + will not be used even on hardware which claims to support + it. tboot_noforce [Default Off] Do not force the Intel IOMMU enabled under tboot. By default, tboot will force Intel IOMMU on, which diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -17,10 +17,20 @@ conditions. "arm,mmu-401" "arm,mmu-500" "cavium,smmu-v2" + "qcom,smmu-v2" depending on the particular implementation and/or the version of the architecture implemented. + Qcom SoCs must contain, as below, SoC-specific compatibles + along with "qcom,smmu-v2": + "qcom,msm8996-smmu-v2", "qcom,smmu-v2", + "qcom,sdm845-smmu-v2", "qcom,smmu-v2". + + Qcom SoCs implementing "arm,mmu-500" must also include, + as below, SoC-specific compatibles: + "qcom,sdm845-smmu-500", "arm,mmu-500" + - reg : Base address and size of the SMMU. - #global-interrupts : The number of global interrupts exposed by the @@ -71,6 +81,22 @@ conditions. or using stream matching with #iommu-cells = <2>, and may be ignored if present in such cases. +- clock-names: List of the names of clocks input to the device. The + required list depends on particular implementation and + is as follows: + - for "qcom,smmu-v2": + - "bus": clock required for downstream bus access and + for the smmu ptw, + - "iface": clock required to access smmu's registers + through the TCU's programming interface. + - unspecified for other implementations. + +- clocks: Specifiers for all clocks listed in the clock-names property, + as per generic clock bindings. + +- power-domains: Specifiers for power domains required to be powered on for + the SMMU to operate, as per generic power domain bindings. + ** Deprecated properties: - mmu-masters (deprecated in favour of the generic "iommus" binding) : @@ -137,3 +163,20 @@ conditions. iommu-map = <0 &smmu3 0 0x400>; ... }; + + /* Qcom's arm,smmu-v2 implementation */ + smmu4: iommu@d00000 { + compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2"; + reg = <0xd00000 0x10000>; + + #global-interrupts = <1>; + interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 320 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 321 IRQ_TYPE_LEVEL_HIGH>; + #iommu-cells = <1>; + power-domains = <&mmcc MDSS_GDSC>; + + clocks = <&mmcc SMMU_MDP_AXI_CLK>, + <&mmcc SMMU_MDP_AHB_CLK>; + clock-names = "bus", "iface"; + }; diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt @@ -14,6 +14,8 @@ Required Properties: - "renesas,ipmmu-r8a7743" for the R8A7743 (RZ/G1M) IPMMU. - "renesas,ipmmu-r8a7744" for the R8A7744 (RZ/G1N) IPMMU. - "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU. + - "renesas,ipmmu-r8a774a1" for the R8A774A1 (RZ/G2M) IPMMU. + - "renesas,ipmmu-r8a774c0" for the R8A774C0 (RZ/G2E) IPMMU. - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU. - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU. - "renesas,ipmmu-r8a7793" for the R8A7793 (R-Car M2-N) IPMMU. diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c @@ -1472,7 +1472,7 @@ static int dev_has_iommu_table(struct device *dev, void *data) if (!dev) return 0; - if (dev->iommu_group) { + if (device_iommu_mapped(dev)) { *ppdev = pdev; return 1; } diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c @@ -1088,7 +1088,7 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) if (!device_is_registered(dev)) return -ENOENT; - if (dev->iommu_group) { + if (device_iommu_mapped(dev)) { pr_debug("%s: Skipping device %s with iommu group %d\n", __func__, dev_name(dev), iommu_group_id(dev->iommu_group)); @@ -1109,7 +1109,7 @@ void iommu_del_device(struct device *dev) * and we needn't detach them from the associated * IOMMU groups */ - if (!dev->iommu_group) { + if (!device_iommu_mapped(dev)) { pr_debug("iommu_tce: skipping device %s with no tbl\n", dev_name(dev)); return; diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c @@ -19,7 +19,7 @@ * */ -#include <linux/dma_remapping.h> +#include <linux/intel-iommu.h> #include <linux/init_task.h> #include <linux/spinlock.h> #include <linux/export.h> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c @@ -779,7 +779,7 @@ static inline bool iort_iommu_driver_enabled(u8 type) static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev) { struct acpi_iort_node *iommu; - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); iommu = iort_get_iort_node(fwspec->iommu_fwnode); @@ -794,9 +794,10 @@ static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev) return NULL; } -static inline const struct iommu_ops *iort_fwspec_iommu_ops( - struct iommu_fwspec *fwspec) +static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev) { + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + return (fwspec && fwspec->ops) ? fwspec->ops : NULL; } @@ -805,8 +806,8 @@ static inline int iort_add_device_replay(const struct iommu_ops *ops, { int err = 0; - if (ops->add_device && dev->bus && !dev->iommu_group) - err = ops->add_device(dev); + if (dev->bus && !device_iommu_mapped(dev)) + err = iommu_probe_device(dev); return err; } @@ -824,6 +825,7 @@ static inline int iort_add_device_replay(const struct iommu_ops *ops, */ int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct acpi_iort_its_group *its; struct acpi_iort_node *iommu_node, *its_node = NULL; int i, resv = 0; @@ -841,9 +843,9 @@ int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) * a given PCI or named component may map IDs to. */ - for (i = 0; i < dev->iommu_fwspec->num_ids; i++) { + for (i = 0; i < fwspec->num_ids; i++) { its_node = iort_node_map_id(iommu_node, - dev->iommu_fwspec->ids[i], + fwspec->ids[i], NULL, IORT_MSI_TYPE); if (its_node) break; @@ -874,8 +876,7 @@ int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) return (resv == its->its_count) ? resv : -ENODEV; } #else -static inline const struct iommu_ops *iort_fwspec_iommu_ops( - struct iommu_fwspec *fwspec) +static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev); { return NULL; } static inline int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev) @@ -1045,7 +1046,7 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) * If we already translated the fwspec there * is nothing left to do, return the iommu_ops. */ - ops = iort_fwspec_iommu_ops(dev->iommu_fwspec); + ops = iort_fwspec_iommu_ops(dev); if (ops) return ops; @@ -1084,7 +1085,7 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) * add_device callback for dev, replay it to get things in order. */ if (!err) { - ops = iort_fwspec_iommu_ops(dev->iommu_fwspec); + ops = iort_fwspec_iommu_ops(dev); err = iort_add_device_replay(ops, dev); } diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c @@ -1809,7 +1809,7 @@ static int rcar_dmac_probe(struct platform_device *pdev) * level we can't disable it selectively, so ignore channel 0 for now if * the device is part of an IOMMU group. */ - if (pdev->dev.iommu_group) { + if (device_iommu_mapped(&pdev->dev)) { dmac->n_channels--; channels_offset = 1; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -26,7 +26,7 @@ * */ -#include <linux/dma_remapping.h> +#include <linux/intel-iommu.h> #include <linux/reservation.h> #include <linux/sync_file.h> #include <linux/uaccess.h> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c @@ -46,7 +46,7 @@ #include <drm/drm_plane_helper.h> #include <drm/drm_rect.h> #include <drm/drm_atomic_uapi.h> -#include <linux/dma_remapping.h> +#include <linux/intel-iommu.h> #include <linux/reservation.h> /* Primary plane formats for gen <= 3 */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -34,7 +34,7 @@ #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_module.h> -#include <linux/dma_remapping.h> +#include <linux/intel-iommu.h> #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices" #define VMWGFX_CHIP_SVGAII 0 diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c @@ -17,6 +17,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) "AMD-Vi: " fmt + #include <linux/ratelimit.h> #include <linux/pci.h> #include <linux/acpi.h> @@ -277,7 +279,7 @@ static u16 get_alias(struct device *dev) return pci_alias; } - pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d " + pr_info("Using IVRS reported alias %02x:%02x.%d " "for device %s[%04x:%04x], kernel reported alias " "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, @@ -291,7 +293,7 @@ static u16 get_alias(struct device *dev) if (pci_alias == devid && PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { pci_add_dma_alias(pdev, ivrs_alias & 0xff); - pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n", + pr_info("Added PCI DMA alias %02x.%d for %s\n", PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), dev_name(dev)); } @@ -436,7 +438,14 @@ static int iommu_init_device(struct device *dev) dev_data->alias = get_alias(dev); - if (dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) { + /* + * By default we use passthrough mode for IOMMUv2 capable device. + * But if amd_iommu=force_isolation is set (e.g. to debug DMA to + * invalid address), we ignore the capability for the device so + * it'll be forced to go into translation mode. + */ + if ((iommu_pass_through || !amd_iommu_force_isolation) && + dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) { struct amd_iommu *iommu; iommu = amd_iommu_rlookup_table[dev_data->devid]; @@ -511,7 +520,7 @@ static void dump_dte_entry(u16 devid) int i; for (i = 0; i < 4; ++i) - pr_err("AMD-Vi: DTE[%d]: %016llx\n", i, + pr_err("DTE[%d]: %016llx\n", i, amd_iommu_dev_table[devid].data[i]); } @@ -521,7 +530,7 @@ static void dump_command(unsigned long phys_addr) int i; for (i = 0; i < 4; ++i) - pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); + pr_err("CMD[%d]: %08x\n", i, cmd->data[i]); } static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, @@ -536,10 +545,10 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, dev_data = get_dev_data(&pdev->dev); if (dev_data && __ratelimit(&dev_data->rs)) { - dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n", + dev_err(&pdev->dev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n", domain_id, address, flags); } else if (printk_ratelimit()) { - pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n", + pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), domain_id, address, flags); } @@ -566,7 +575,7 @@ retry: if (type == 0) { /* Did we hit the erratum? */ if (++count == LOOP_TIMEOUT) { - pr_err("AMD-Vi: No event written to event log\n"); + pr_err("No event written to event log\n"); return; } udelay(1); @@ -576,43 +585,41 @@ retry: if (type == EVENT_TYPE_IO_FAULT) { amd_iommu_report_page_fault(devid, pasid, address, flags); return; - } else { - dev_err(dev, "AMD-Vi: Event logged ["); } switch (type) { case EVENT_TYPE_ILL_DEV: - dev_err(dev, "ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n", + dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); dump_dte_entry(devid); break; case EVENT_TYPE_DEV_TAB_ERR: - dev_err(dev, "DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " - "address=0x%016llx flags=0x%04x]\n", + dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + "address=0x%llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), address, flags); break; case EVENT_TYPE_PAGE_TAB_ERR: - dev_err(dev, "PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n", + dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; case EVENT_TYPE_ILL_CMD: - dev_err(dev, "ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); + dev_err(dev, "Event logged [ILLEGAL_COMMAND_ERROR address=0x%llx]\n", address); dump_command(address); break; case EVENT_TYPE_CMD_HARD_ERR: - dev_err(dev, "COMMAND_HARDWARE_ERROR address=0x%016llx flags=0x%04x]\n", + dev_err(dev, "Event logged [COMMAND_HARDWARE_ERROR address=0x%llx flags=0x%04x]\n", address, flags); break; case EVENT_TYPE_IOTLB_INV_TO: - dev_err(dev, "IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%016llx]\n", + dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%llx]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), address); break; case EVENT_TYPE_INV_DEV_REQ: - dev_err(dev, "INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n", + dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; @@ -620,12 +627,12 @@ retry: pasid = ((event[0] >> 16) & 0xFFFF) | ((event[1] << 6) & 0xF0000); tag = event[1] & 0x03FF; - dev_err(dev, "INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n", + dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; default: - dev_err(dev, "UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n", + dev_err(dev, "Event logged [UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n", event[0], event[1], event[2], event[3]); } @@ -652,7 +659,7 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw) struct amd_iommu_fault fault; if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) { - pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n"); + pr_err_ratelimited("Unknown PPR request received\n"); return; } @@ -757,12 +764,12 @@ static void iommu_poll_ga_log(struct amd_iommu *iommu) if (!iommu_ga_log_notifier) break; - pr_debug("AMD-Vi: %s: devid=%#x, ga_tag=%#x\n", + pr_debug("%s: devid=%#x, ga_tag=%#x\n", __func__, GA_DEVID(log_entry), GA_TAG(log_entry)); if (iommu_ga_log_notifier(GA_TAG(log_entry)) != 0) - pr_err("AMD-Vi: GA log notifier failed.\n"); + pr_err("GA log notifier failed.\n"); break; default: break; @@ -787,18 +794,18 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data) iommu->mmio_base + MMIO_STATUS_OFFSET); if (status & MMIO_STATUS_EVT_INT_MASK) { - pr_devel("AMD-Vi: Processing IOMMU Event Log\n"); + pr_devel("Processing IOMMU Event Log\n"); iommu_poll_events(iommu); } if (status & MMIO_STATUS_PPR_INT_MASK) { - pr_devel("AMD-Vi: Processing IOMMU PPR Log\n"); + pr_devel("Processing IOMMU PPR Log\n"); iommu_poll_ppr_log(iommu); } #ifdef CONFIG_IRQ_REMAP if (status & MMIO_STATUS_GALOG_INT_MASK) { - pr_devel("AMD-Vi: Processing IOMMU GA Log\n"); + pr_devel("Processing IOMMU GA Log\n"); iommu_poll_ga_log(iommu); } #endif @@ -842,7 +849,7 @@ static int wait_on_sem(volatile u64 *sem) } if (i == LOOP_TIMEOUT) { - pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); + pr_alert("Completion-Wait loop timed out\n"); return -EIO; } @@ -1034,7 +1041,7 @@ again: /* Skip udelay() the first time around */ if (count++) { if (count == LOOP_TIMEOUT) { - pr_err("AMD-Vi: Command buffer timeout\n"); + pr_err("Command buffer timeout\n"); return -EIO; } @@ -1315,6 +1322,101 @@ static void domain_flush_devices(struct protection_domain *domain) * ****************************************************************************/ +static void free_page_list(struct page *freelist) +{ + while (freelist != NULL) { + unsigned long p = (unsigned long)page_address(freelist); + freelist = freelist->freelist; + free_page(p); + } +} + +static struct page *free_pt_page(unsigned long pt, struct page *freelist) +{ + struct page *p = virt_to_page((void *)pt); + + p->freelist = freelist; + + return p; +} + +#define DEFINE_FREE_PT_FN(LVL, FN) \ +static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ +{ \ + unsigned long p; \ + u64 *pt; \ + int i; \ + \ + pt = (u64 *)__pt; \ + \ + for (i = 0; i < 512; ++i) { \ + /* PTE present? */ \ + if (!IOMMU_PTE_PRESENT(pt[i])) \ + continue; \ + \ + /* Large PTE? */ \ + if (PM_PTE_LEVEL(pt[i]) == 0 || \ + PM_PTE_LEVEL(pt[i]) == 7) \ + continue; \ + \ + p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ + freelist = FN(p, freelist); \ + } \ + \ + return free_pt_page((unsigned long)pt, freelist); \ +} + +DEFINE_FREE_PT_FN(l2, free_pt_page) +DEFINE_FREE_PT_FN(l3, free_pt_l2) +DEFINE_FREE_PT_FN(l4, free_pt_l3) +DEFINE_FREE_PT_FN(l5, free_pt_l4) +DEFINE_FREE_PT_FN(l6, free_pt_l5) + +static struct page *free_sub_pt(unsigned long root, int mode, + struct page *freelist) +{ + switch (mode) { + case PAGE_MODE_NONE: + case PAGE_MODE_7_LEVEL: + break; + case PAGE_MODE_1_LEVEL: + freelist = free_pt_page(root, freelist); + break; + case PAGE_MODE_2_LEVEL: + freelist = free_pt_l2(root, freelist); + break; + case PAGE_MODE_3_LEVEL: + freelist = free_pt_l3(root, freelist); + break; + case PAGE_MODE_4_LEVEL: + freelist = free_pt_l4(root, freelist); + break; + case PAGE_MODE_5_LEVEL: + freelist = free_pt_l5(root, freelist); + break; + case PAGE_MODE_6_LEVEL: + freelist = free_pt_l6(root, freelist); + break; + default: + BUG(); + } + + return freelist; +} + +static void free_pagetable(struct protection_domain *domain) +{ + unsigned long root = (unsigned long)domain->pt_root; + struct page *freelist = NULL; + + BUG_ON(domain->mode < PAGE_MODE_NONE || + domain->mode > PAGE_MODE_6_LEVEL); + + free_sub_pt(root, domain->mode, freelist); + + free_page_list(freelist); +} + /* * This function is used to add another level to an IO page table. Adding * another level increases the size of the address space by 9 bits to a size up @@ -1363,10 +1465,13 @@ static u64 *alloc_pte(struct protection_domain *domain, while (level > end_lvl) { u64 __pte, __npte; + int pte_level; - __pte = *pte; + __pte = *pte; + pte_level = PM_PTE_LEVEL(__pte); - if (!IOMMU_PTE_PRESENT(__pte)) { + if (!IOMMU_PTE_PRESENT(__pte) || + pte_level == PAGE_MODE_7_LEVEL) { page = (u64 *)get_zeroed_page(gfp); if (!page) return NULL; @@ -1374,19 +1479,21 @@ static u64 *alloc_pte(struct protection_domain *domain, __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); /* pte could have been changed somewhere. */ - if (cmpxchg64(pte, __pte, __npte) != __pte) { + if (cmpxchg64(pte, __pte, __npte) != __pte) free_page((unsigned long)page); - continue; - } + else if (pte_level == PAGE_MODE_7_LEVEL) + domain->updated = true; + + continue; } /* No level skipping support yet */ - if (PM_PTE_LEVEL(*pte) != level) + if (pte_level != level) return NULL; level -= 1; - pte = IOMMU_PTE_PAGE(*pte); + pte = IOMMU_PTE_PAGE(__pte); if (pte_page && level == end_lvl) *pte_page = pte; @@ -1455,6 +1562,25 @@ static u64 *fetch_pte(struct protection_domain *domain, return pte; } +static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) +{ + unsigned long pt; + int mode; + + while (cmpxchg64(pte, pteval, 0) != pteval) { + pr_warn("AMD-Vi: IOMMU pte changed since we read it\n"); + pteval = *pte; + } + + if (!IOMMU_PTE_PRESENT(pteval)) + return freelist; + + pt = (unsigned long)IOMMU_PTE_PAGE(pteval); + mode = IOMMU_PTE_MODE(pteval); + + return free_sub_pt(pt, mode, freelist); +} + /* * Generic mapping functions. It maps a physical address into a DMA * address space. It allocates the page table pages if necessary. @@ -1469,6 +1595,7 @@ static int iommu_map_page(struct protection_domain *dom, int prot, gfp_t gfp) { + struct page *freelist = NULL; u64 __pte, *pte; int i, count; @@ -1485,8 +1612,10 @@ static int iommu_map_page(struct protection_domain *dom, return -ENOMEM; for (i = 0; i < count; ++i) - if (IOMMU_PTE_PRESENT(pte[i])) - return -EBUSY; + freelist = free_clear_pte(&pte[i], pte[i], freelist); + + if (freelist != NULL) + dom->updated = true; if (count > 1) { __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); @@ -1504,6 +1633,9 @@ static int iommu_map_page(struct protection_domain *dom, update_domain(dom); + /* Everything flushed out, free pages now */ + free_page_list(freelist); + return 0; } @@ -1636,67 +1768,6 @@ static void domain_id_free(int id) spin_unlock(&pd_bitmap_lock); } -#define DEFINE_FREE_PT_FN(LVL, FN) \ -static void free_pt_##LVL (unsigned long __pt) \ -{ \ - unsigned long p; \ - u64 *pt; \ - int i; \ - \ - pt = (u64 *)__pt; \ - \ - for (i = 0; i < 512; ++i) { \ - /* PTE present? */ \ - if (!IOMMU_PTE_PRESENT(pt[i])) \ - continue; \ - \ - /* Large PTE? */ \ - if (PM_PTE_LEVEL(pt[i]) == 0 || \ - PM_PTE_LEVEL(pt[i]) == 7) \ - continue; \ - \ - p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ - FN(p); \ - } \ - free_page((unsigned long)pt); \ -} - -DEFINE_FREE_PT_FN(l2, free_page) -DEFINE_FREE_PT_FN(l3, free_pt_l2) -DEFINE_FREE_PT_FN(l4, free_pt_l3) -DEFINE_FREE_PT_FN(l5, free_pt_l4) -DEFINE_FREE_PT_FN(l6, free_pt_l5) - -static void free_pagetable(struct protection_domain *domain) -{ - unsigned long root = (unsigned long)domain->pt_root; - - switch (domain->mode) { - case PAGE_MODE_NONE: - break; - case PAGE_MODE_1_LEVEL: - free_page(root); - break; - case PAGE_MODE_2_LEVEL: - free_pt_l2(root); - break; - case PAGE_MODE_3_LEVEL: - free_pt_l3(root); - break; - case PAGE_MODE_4_LEVEL: - free_pt_l4(root); - break; - case PAGE_MODE_5_LEVEL: - free_pt_l5(root); - break; - case PAGE_MODE_6_LEVEL: - free_pt_l6(root); - break; - default: - BUG(); - } -} - static void free_gcr3_tbl_level1(u64 *tbl) { u64 *ptr; @@ -2771,9 +2842,9 @@ int __init amd_iommu_init_dma_ops(void) iommu_detected = 1; if (amd_iommu_unmap_flush) - pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n"); + pr_info("IO/TLB flush on unmap enabled\n"); else - pr_info("AMD-Vi: Lazy IO/TLB flushing enabled\n"); + pr_info("Lazy IO/TLB flushing enabled\n"); return 0; @@ -2878,7 +2949,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) case IOMMU_DOMAIN_DMA: dma_domain = dma_ops_domain_alloc(); if (!dma_domain) { - pr_err("AMD-Vi: Failed to allocate\n"); + pr_err("Failed to allocate\n"); return NULL; } pdomain = &dma_domain->domain; @@ -4299,7 +4370,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) * legacy mode. So, we force legacy mode instead. */ if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) { - pr_debug("AMD-Vi: %s: Fall back to using intr legacy remap\n", + pr_debug("%s: Fall back to using intr legacy remap\n", __func__); pi_data->is_guest_mode = false; } diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c @@ -17,6 +17,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) "AMD-Vi: " fmt + #include <linux/pci.h> #include <linux/acpi.h> #include <linux/list.h> @@ -443,9 +445,9 @@ static void iommu_disable(struct amd_iommu *iommu) static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end) { if (!request_mem_region(address, end, "amd_iommu")) { - pr_err("AMD-Vi: Can not reserve memory region %llx-%llx for mmio\n", + pr_err("Can not reserve memory region %llx-%llx for mmio\n", address, end); - pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n"); + pr_err("This is a BIOS bug. Please contact your hardware vendor\n"); return NULL; } @@ -512,7 +514,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) u32 ivhd_size = get_ivhd_header_size(h); if (!ivhd_size) { - pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type); + pr_err("Unsupported IVHD type %#x\n", h->type); return -EINVAL; } @@ -553,7 +555,7 @@ static int __init check_ivrs_checksum(struct acpi_table_header *table) checksum += p[i]; if (checksum != 0) { /* ACPI table corrupt */ - pr_err(FW_BUG "AMD-Vi: IVRS invalid checksum\n"); + pr_err(FW_BUG "IVRS invalid checksum\n"); return -ENODEV; } @@ -1028,7 +1030,7 @@ static int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line) if (!(entry->id == id && entry->cmd_line)) continue; - pr_info("AMD-Vi: Command-line override present for %s id %d - ignoring\n", + pr_info("Command-line override present for %s id %d - ignoring\n", type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id); *devid = entry->devid; @@ -1061,7 +1063,7 @@ static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid, !entry->cmd_line) continue; - pr_info("AMD-Vi: Command-line override for hid:%s uid:%s\n", + pr_info("Command-line override for hid:%s uid:%s\n", hid, uid); *devid = entry->devid; return 0; @@ -1077,7 +1079,7 @@ static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid, entry->cmd_line = cmd_line; entry->root_devid = (entry->devid & (~0x7)); - pr_info("AMD-Vi:%s, add hid:%s, uid:%s, rdevid:%d\n", + pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n", entry->cmd_line ? "cmd" : "ivrs", entry->hid, entry->uid, entry->root_devid); @@ -1173,7 +1175,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, */ ivhd_size = get_ivhd_header_size(h); if (!ivhd_size) { - pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type); + pr_err("Unsupported IVHD type %#x\n", h->type); return -EINVAL; } @@ -1455,7 +1457,7 @@ static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8)); pci_write_config_dword(iommu->dev, 0xf4, value | 0x4); - pr_info("AMD-Vi: Applying erratum 746 workaround for IOMMU at %s\n", + pr_info("Applying erratum 746 workaround for IOMMU at %s\n", dev_name(&iommu->dev->dev)); /* Clear the enable writing bit */ @@ -1486,7 +1488,7 @@ static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ iommu_write_l2(iommu, 0x47, value | BIT(0)); - pr_info("AMD-Vi: Applying ATS write check workaround for IOMMU at %s\n", + pr_info("Applying ATS write check workaround for IOMMU at %s\n", dev_name(&iommu->dev->dev)); } @@ -1506,7 +1508,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->index = amd_iommus_present++; if (unlikely(iommu->index >= MAX_IOMMUS)) { - WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); + WARN(1, "System has more IOMMUs than supported by this driver\n"); return -ENOSYS; } @@ -1674,12 +1676,12 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || (val != val2)) { - pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n"); + pr_err("Unable to write to IOMMU perf counter.\n"); amd_iommu_pc_present = false; return; } - pr_info("AMD-Vi: IOMMU performance counters supported\n"); + pr_info("IOMMU performance counters supported\n"); val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); iommu->max_banks = (u8) ((val >> 12) & 0x3f); @@ -1840,11 +1842,11 @@ static void print_iommu_info(void) for_each_iommu(iommu) { int i; - pr_info("AMD-Vi: Found IOMMU at %s cap 0x%hx\n", + pr_info("Found IOMMU at %s cap 0x%hx\n", dev_name(&iommu->dev->dev), iommu->cap_ptr); if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - pr_info("AMD-Vi: Extended features (%#llx):\n", + pr_info("Extended features (%#llx):\n", iommu->features); for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { if (iommu_feature(iommu, (1ULL << i))) @@ -1858,11 +1860,11 @@ static void print_iommu_info(void) } } if (irq_remapping_enabled) { - pr_info("AMD-Vi: Interrupt remapping enabled\n"); + pr_info("Interrupt remapping enabled\n"); if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) - pr_info("AMD-Vi: virtual APIC enabled\n"); + pr_info("Virtual APIC enabled\n"); if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) - pr_info("AMD-Vi: X2APIC enabled\n"); + pr_info("X2APIC enabled\n"); } } @@ -2376,7 +2378,7 @@ static bool __init check_ioapic_information(void) devid = get_ioapic_devid(id); if (devid < 0) { - pr_err("%sAMD-Vi: IOAPIC[%d] not in IVRS table\n", + pr_err("%s: IOAPIC[%d] not in IVRS table\n", fw_bug, id); ret = false; } else if (devid == IOAPIC_SB_DEVID) { @@ -2394,11 +2396,11 @@ static bool __init check_ioapic_information(void) * when the BIOS is buggy and provides us the wrong * device id for the IOAPIC in the system. */ - pr_err("%sAMD-Vi: No southbridge IOAPIC found\n", fw_bug); + pr_err("%s: No southbridge IOAPIC found\n", fw_bug); } if (!ret) - pr_err("AMD-Vi: Disabling interrupt remapping\n"); + pr_err("Disabling interrupt remapping\n"); return ret; } @@ -2453,7 +2455,7 @@ static int __init early_amd_iommu_init(void) return -ENODEV; else if (ACPI_FAILURE(status)) { const char *err = acpi_format_exception(status); - pr_err("AMD-Vi: IVRS table error: %s\n", err); + pr_err("IVRS table error: %s\n", err); return -EINVAL; } @@ -2606,7 +2608,7 @@ static bool detect_ivrs(void) return false; else if (ACPI_FAILURE(status)) { const char *err = acpi_format_exception(status); - pr_err("AMD-Vi: IVRS table error: %s\n", err); + pr_err("IVRS table error: %s\n", err); return false; } @@ -2641,7 +2643,7 @@ static int __init state_next(void) ret = early_amd_iommu_init(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) { - pr_info("AMD-Vi: AMD IOMMU disabled on kernel command-line\n"); + pr_info("AMD IOMMU disabled on kernel command-line\n"); free_dma_resources(); free_iommu_resources(); init_state = IOMMU_CMDLINE_DISABLED; @@ -2788,7 +2790,7 @@ static bool amd_iommu_sme_check(void) (boot_cpu_data.microcode <= 0x080011ff)) return true; - pr_notice("AMD-Vi: IOMMU not currently supported when SME is active\n"); + pr_notice("IOMMU not currently supported when SME is active\n"); return false; } @@ -2873,12 +2875,12 @@ static int __init parse_ivrs_ioapic(char *str) ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); if (ret != 4) { - pr_err("AMD-Vi: Invalid command line: ivrs_ioapic%s\n", str); + pr_err("Invalid command line: ivrs_ioapic%s\n", str); return 1; } if (early_ioapic_map_size == EARLY_MAP_SIZE) { - pr_err("AMD-Vi: Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", + pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", str); return 1; } @@ -2903,12 +2905,12 @@ static int __init parse_ivrs_hpet(char *str) ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); if (ret != 4) { - pr_err("AMD-Vi: Invalid command line: ivrs_hpet%s\n", str); + pr_err("Invalid command line: ivrs_hpet%s\n", str); return 1; } if (early_hpet_map_size == EARLY_MAP_SIZE) { - pr_err("AMD-Vi: Early HPET map overflow - ignoring ivrs_hpet%s\n", + pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n", str); return 1; } @@ -2933,7 +2935,7 @@ static int __init parse_ivrs_acpihid(char *str) ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid); if (ret != 4) { - pr_err("AMD-Vi: Invalid command line: ivrs_acpihid(%s)\n", str); + pr_err("Invalid command line: ivrs_acpihid(%s)\n", str); return 1; } @@ -2942,7 +2944,7 @@ static int __init parse_ivrs_acpihid(char *str) uid = p; if (!hid || !(*hid) || !uid) { - pr_err("AMD-Vi: Invalid command line: hid or uid\n"); + pr_err("Invalid command line: hid or uid\n"); return 1; } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h @@ -269,6 +269,7 @@ #define PAGE_MODE_4_LEVEL 0x04 #define PAGE_MODE_5_LEVEL 0x05 #define PAGE_MODE_6_LEVEL 0x06 +#define PAGE_MODE_7_LEVEL 0x07 #define PM_LEVEL_SHIFT(x) (12 + ((x) * 9)) #define PM_LEVEL_SIZE(x) (((x) < 6) ? \ diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c @@ -16,6 +16,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) "AMD-Vi: " fmt + #include <linux/mmu_notifier.h> #include <linux/amd-iommu.h> #include <linux/mm_types.h> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c @@ -20,7 +20,8 @@ #include <linux/interrupt.h> #include <linux/iommu.h> #include <linux/iopoll.h> -#include <linux/module.h> +#include <linux/init.h> +#include <linux/moduleparam.h> #include <linux/msi.h> #include <linux/of.h> #include <linux/of_address.h> @@ -356,6 +357,10 @@ #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 +/* + * not really modular, but the easiest way to keep compat with existing + * bootargs behaviour is to continue using module_param_named here. + */ static bool disable_bypass = 1; module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); MODULE_PARM_DESC(disable_bypass, @@ -576,7 +581,11 @@ struct arm_smmu_device { struct arm_smmu_strtab_cfg strtab_cfg; - u32 sync_count; + /* Hi16xx adds an extra 32 bits of goodness to its MSI payload */ + union { + u32 sync_count; + u64 padding; + }; /* IOMMU core code handle */ struct iommu_device iommu; @@ -675,7 +684,13 @@ static void queue_inc_cons(struct arm_smmu_queue *q) u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1; q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); - writel(q->cons, q->cons_reg); + + /* + * Ensure that all CPU accesses (reads and writes) to the queue + * are complete before we update the cons pointer. + */ + mb(); + writel_relaxed(q->cons, q->cons_reg); } static int queue_sync_prod(struct arm_smmu_queue *q) @@ -828,7 +843,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH); cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); - cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA, ent->sync.msidata); + /* + * Commands are written little-endian, but we want the SMMU to + * receive MSIData, and thus write it back to memory, in CPU + * byte order, so big-endian needs an extra byteswap here. + */ + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA, + cpu_to_le32(ent->sync.msidata)); cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK; break; default: @@ -1691,24 +1712,26 @@ static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) static void arm_smmu_detach_dev(struct device *dev) { - struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct arm_smmu_master_data *master = fwspec->iommu_priv; master->ste.assigned = false; - arm_smmu_install_ste_for_dev(dev->iommu_fwspec); + arm_smmu_install_ste_for_dev(fwspec); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) { int ret = 0; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_device *smmu; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_master_data *master; struct arm_smmu_strtab_ent *ste; - if (!dev->iommu_fwspec) + if (!fwspec) return -ENOENT; - master = dev->iommu_fwspec->iommu_priv; + master = fwspec->iommu_priv; smmu = master->smmu; ste = &master->ste; @@ -1748,7 +1771,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) ste->s2_cfg = &smmu_domain->s2_cfg; } - arm_smmu_install_ste_for_dev(dev->iommu_fwspec); + arm_smmu_install_ste_for_dev(fwspec); out_unlock: mutex_unlock(&smmu_domain->init_mutex); return ret; @@ -1839,7 +1862,7 @@ static int arm_smmu_add_device(struct device *dev) int i, ret; struct arm_smmu_device *smmu; struct arm_smmu_master_data *master; - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct iommu_group *group; if (!fwspec || fwspec->ops != &arm_smmu_ops) @@ -1890,7 +1913,7 @@ static int arm_smmu_add_device(struct device *dev) static void arm_smmu_remove_device(struct device *dev) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_master_data *master; struct arm_smmu_device *smmu; @@ -2928,37 +2951,25 @@ static int arm_smmu_device_probe(struct platform_device *pdev) return 0; } -static int arm_smmu_device_remove(struct platform_device *pdev) +static void arm_smmu_device_shutdown(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); arm_smmu_device_disable(smmu); - - return 0; -} - -static void arm_smmu_device_shutdown(struct platform_device *pdev) -{ - arm_smmu_device_remove(pdev); } static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v3", }, { }, }; -MODULE_DEVICE_TABLE(of, arm_smmu_of_match); static struct platform_driver arm_smmu_driver = { .driver = { .name = "arm-smmu-v3", .of_match_table = of_match_ptr(arm_smmu_of_match), + .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, - .remove = arm_smmu_device_remove, .shutdown = arm_smmu_device_shutdown, }; -module_platform_driver(arm_smmu_driver); - -MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations"); -MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(arm_smmu_driver); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c @@ -41,13 +41,15 @@ #include <linux/io-64-nonatomic-hi-lo.h> #include <linux/iommu.h> #include <linux/iopoll.h> -#include <linux/module.h> +#include <linux/init.h> +#include <linux/moduleparam.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_device.h> #include <linux/of_iommu.h> #include <linux/pci.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -101,6 +103,10 @@ #define MSI_IOVA_LENGTH 0x100000 static int force_stage; +/* + * not really modular, but the easiest way to keep compat with existing + * bootargs behaviour is to continue using module_param() here. + */ module_param(force_stage, int, S_IRUGO); MODULE_PARM_DESC(force_stage, "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation."); @@ -119,6 +125,7 @@ enum arm_smmu_implementation { GENERIC_SMMU, ARM_MMU500, CAVIUM_SMMUV2, + QCOM_SMMUV2, }; struct arm_smmu_s2cr { @@ -206,6 +213,8 @@ struct arm_smmu_device { u32 num_global_irqs; u32 num_context_irqs; unsigned int *irqs; + struct clk_bulk_data *clks; + int num_clks; u32 cavium_id_base; /* Specific to Cavium */ @@ -267,6 +276,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; +static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + return pm_runtime_get_sync(smmu->dev); + + return 0; +} + +static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + pm_runtime_put(smmu->dev); +} + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -926,11 +949,15 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - int irq; + int ret, irq; if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) return; + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return; + /* * Disable the context bank and free the page tables before freeing * it. @@ -945,6 +972,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) free_io_pgtable_ops(smmu_domain->pgtbl_ops); __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); + + arm_smmu_rpm_put(smmu); } static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) @@ -1103,7 +1132,7 @@ static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx) static int arm_smmu_master_alloc_smes(struct device *dev) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv; struct arm_smmu_device *smmu = cfg->smmu; struct arm_smmu_smr *smrs = smmu->smrs; @@ -1206,7 +1235,7 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) { int ret; - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_device *smmu; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -1226,10 +1255,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -ENODEV; smmu = fwspec_smmu(fwspec); + + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return ret; + /* Ensure that the domain is finalised */ ret = arm_smmu_init_domain_context(domain, smmu); if (ret < 0) - return ret; + goto rpm_put; /* * Sanity check the domain. We don't support domains across @@ -1239,49 +1273,74 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) dev_err(dev, "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); - return -EINVAL; + ret = -EINVAL; + goto rpm_put; } /* Looks ok, so add the device to the domain */ - return arm_smmu_domain_add_master(smmu_domain, fwspec); + ret = arm_smmu_domain_add_master(smmu_domain, fwspec); + +rpm_put: + arm_smmu_rpm_put(smmu); + return ret; } static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + int ret; if (!ops) return -ENODEV; - return ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_get(smmu); + ret = ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_put(smmu); + + return ret; } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + size_t ret; if (!ops) return 0; - return ops->unmap(ops, iova, size); + arm_smmu_rpm_get(smmu); + ret = ops->unmap(ops, iova, size); + arm_smmu_rpm_put(smmu); + + return ret; } static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domain->tlb_ops) + if (smmu_domain->tlb_ops) { + arm_smmu_rpm_get(smmu); smmu_domain->tlb_ops->tlb_flush_all(smmu_domain); + arm_smmu_rpm_put(smmu); + } } static void arm_smmu_iotlb_sync(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domain->tlb_ops) + if (smmu_domain->tlb_ops) { + arm_smmu_rpm_get(smmu); smmu_domain->tlb_ops->tlb_sync(smmu_domain); + arm_smmu_rpm_put(smmu); + } } static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, @@ -1296,6 +1355,11 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, u32 tmp; u64 phys; unsigned long va, flags; + int ret; + + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return 0; cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); @@ -1324,6 +1388,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, return 0; } + arm_smmu_rpm_put(smmu); + return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff); } @@ -1380,7 +1446,7 @@ static int arm_smmu_add_device(struct device *dev) { struct arm_smmu_device *smmu; struct arm_smmu_master_cfg *cfg; - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); int i, ret; if (using_legacy_binding) { @@ -1391,7 +1457,7 @@ static int arm_smmu_add_device(struct device *dev) * will allocate/initialise a new one. Thus we need to update fwspec for * later use. */ - fwspec = dev->iommu_fwspec; + fwspec = dev_iommu_fwspec_get(dev); if (ret) goto out_free; } else if (fwspec && fwspec->ops == &arm_smmu_ops) { @@ -1428,12 +1494,21 @@ static int arm_smmu_add_device(struct device *dev) while (i--) cfg->smendx[i] = INVALID_SMENDX; + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + goto out_cfg_free; + ret = arm_smmu_master_alloc_smes(dev); + arm_smmu_rpm_put(smmu); + if (ret) goto out_cfg_free; iommu_device_link(&smmu->iommu, dev); + device_link_add(dev, smmu->dev, + DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER); + return 0; out_cfg_free: @@ -1445,10 +1520,10 @@ out_free: static void arm_smmu_remove_device(struct device *dev) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_master_cfg *cfg; struct arm_smmu_device *smmu; - + int ret; if (!fwspec || fwspec->ops != &arm_smmu_ops) return; @@ -1456,8 +1531,15 @@ static void arm_smmu_remove_device(struct device *dev) cfg = fwspec->iommu_priv; smmu = cfg->smmu; + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return; + iommu_device_unlink(&smmu->iommu, dev); arm_smmu_master_free_smes(fwspec); + + arm_smmu_rpm_put(smmu); + iommu_group_remove_device(dev); kfree(fwspec->iommu_priv); iommu_fwspec_free(dev); @@ -1465,7 +1547,7 @@ static void arm_smmu_remove_device(struct device *dev) static struct iommu_group *arm_smmu_device_group(struct device *dev) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_device *smmu = fwspec_smmu(fwspec); struct iommu_group *group = NULL; int i, idx; @@ -1947,13 +2029,14 @@ struct arm_smmu_match_data { }; #define ARM_SMMU_MATCH_DATA(name, ver, imp) \ -static struct arm_smmu_match_data name = { .version = ver, .model = imp } +static const struct arm_smmu_match_data name = { .version = ver, .model = imp } ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); +ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2); static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 }, @@ -1962,9 +2045,9 @@ static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,mmu-401", .data = &arm_mmu401 }, { .compatible = "arm,mmu-500", .data = &arm_mmu500 }, { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 }, + { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 }, { }, }; -MODULE_DEVICE_TABLE(of, arm_smmu_of_match); #ifdef CONFIG_ACPI static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) @@ -2150,6 +2233,17 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->irqs[i] = irq; } + err = devm_clk_bulk_get_all(dev, &smmu->clks); + if (err < 0) { + dev_err(dev, "failed to get clocks %d\n", err); + return err; + } + smmu->num_clks = err; + + err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks); + if (err) + return err; + err = arm_smmu_device_cfg_probe(smmu); if (err) return err; @@ -2200,6 +2294,17 @@ static int arm_smmu_device_probe(struct platform_device *pdev) arm_smmu_test_smr_masks(smmu); /* + * We want to avoid touching dev->power.lock in fastpaths unless + * it's really going to do something useful - pm_runtime_enabled() + * can serve as an ideal proxy for that decision. So, conditionally + * enable pm_runtime. + */ + if (dev->pm_domain) { + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + } + + /* * For ACPI and generic DT bindings, an SMMU will be probed before * any device which might need it, so we want the bus ops in place * ready to handle default domain setup as soon as any SMMU exists. @@ -2224,48 +2329,82 @@ static int arm_smmu_legacy_bus_init(void) } device_initcall_sync(arm_smmu_legacy_bus_init); -static int arm_smmu_device_remove(struct platform_device *pdev) +static void arm_smmu_device_shutdown(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); if (!smmu) - return -ENODEV; + return; if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) dev_err(&pdev->dev, "removing device with active domains!\n"); + arm_smmu_rpm_get(smmu); /* Turn the thing off */ writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); - return 0; + arm_smmu_rpm_put(smmu); + + if (pm_runtime_enabled(smmu->dev)) + pm_runtime_force_suspend(smmu->dev); + else + clk_bulk_disable(smmu->num_clks, smmu->clks); + + clk_bulk_unprepare(smmu->num_clks, smmu->clks); } -static void arm_smmu_device_shutdown(struct platform_device *pdev) +static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) { - arm_smmu_device_remove(pdev); + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + int ret; + + ret = clk_bulk_enable(smmu->num_clks, smmu->clks); + if (ret) + return ret; + + arm_smmu_device_reset(smmu); + + return 0; } -static int __maybe_unused arm_smmu_pm_resume(struct device *dev) +static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev) { struct arm_smmu_device *smmu = dev_get_drvdata(dev); - arm_smmu_device_reset(smmu); + clk_bulk_disable(smmu->num_clks, smmu->clks); + return 0; } -static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume); +static int __maybe_unused arm_smmu_pm_resume(struct device *dev) +{ + if (pm_runtime_suspended(dev)) + return 0; + + return arm_smmu_runtime_resume(dev); +} + +static int __maybe_unused arm_smmu_pm_suspend(struct device *dev) +{ + if (pm_runtime_suspended(dev)) + return 0; + + return arm_smmu_runtime_suspend(dev); +} + +static const struct dev_pm_ops arm_smmu_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume) + SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend, + arm_smmu_runtime_resume, NULL) +}; static struct platform_driver arm_smmu_driver = { .driver = { - .name = "arm-smmu", - .of_match_table = of_match_ptr(arm_smmu_of_match), - .pm = &arm_smmu_pm_ops, + .name = "arm-smmu", + .of_match_table = of_match_ptr(arm_smmu_of_match), + .pm = &arm_smmu_pm_ops, + .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, - .remove = arm_smmu_device_remove, .shutdown = arm_smmu_device_shutdown, }; -module_platform_driver(arm_smmu_driver); - -MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations"); -MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(arm_smmu_driver); diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c @@ -175,7 +175,7 @@ EXPORT_SYMBOL(iommu_put_dma_cookie); void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) { - if (!is_of_node(dev->iommu_fwspec->iommu_fwnode)) + if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode)) iort_iommu_msi_get_resv_regions(dev, list); } @@ -447,20 +447,17 @@ static void __iommu_dma_free_pages(struct page **pages, int count) kvfree(pages); } -static struct page **__iommu_dma_alloc_pages(unsigned int count, - unsigned long order_mask, gfp_t gfp) +static struct page **__iommu_dma_alloc_pages(struct device *dev, + unsigned int count, unsigned long order_mask, gfp_t gfp) { struct page **pages; - unsigned int i = 0, array_size = count * sizeof(*pages); + unsigned int i = 0, nid = dev_to_node(dev); order_mask &= (2U << MAX_ORDER) - 1; if (!order_mask) return NULL; - if (array_size <= PAGE_SIZE) - pages = kzalloc(array_size, GFP_KERNEL); - else - pages = vzalloc(array_size); + pages = kvzalloc(count * sizeof(*pages), GFP_KERNEL); if (!pages) return NULL; @@ -479,10 +476,12 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, for (order_mask &= (2U << __fls(count)) - 1; order_mask; order_mask &= ~order_size) { unsigned int order = __fls(order_mask); + gfp_t alloc_flags = gfp; order_size = 1U << order; - page = alloc_pages((order_mask - order_size) ? - gfp | __GFP_NORETRY : gfp, order); + if (order_mask > order_size) + alloc_flags |= __GFP_NORETRY; + page = alloc_pages_node(nid, alloc_flags, order); if (!page) continue; if (!order) @@ -567,7 +566,8 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, alloc_sizes = min_size; count = PAGE_ALIGN(size) >> PAGE_SHIFT; - pages = __iommu_dma_alloc_pages(count, alloc_sizes >> PAGE_SHIFT, gfp); + pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT, + gfp); if (!pages) return NULL; diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c @@ -1160,6 +1160,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) int head, tail; struct q_inval *qi = iommu->qi; int wait_index = (index + 1) % QI_LENGTH; + int shift = qi_shift(iommu); if (qi->desc_status[wait_index] == QI_ABORT) return -EAGAIN; @@ -1173,13 +1174,19 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) */ if (fault & DMA_FSTS_IQE) { head = readl(iommu->reg + DMAR_IQH_REG); - if ((head >> DMAR_IQ_SHIFT) == index) { - pr_err("VT-d detected invalid descriptor: " - "low=%llx, high=%llx\n", - (unsigned long long)qi->desc[index].low, - (unsigned long long)qi->desc[index].high); - memcpy(&qi->desc[index], &qi->desc[wait_index], - sizeof(struct qi_desc)); + if ((head >> shift) == index) { + struct qi_desc *desc = qi->desc + head; + + /* + * desc->qw2 and desc->qw3 are either reserved or + * used by software as private data. We won't print + * out these two qw's for security consideration. + */ + pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n", + (unsigned long long)desc->qw0, + (unsigned long long)desc->qw1); + memcpy(desc, qi->desc + (wait_index << shift), + 1 << shift); writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG); return -EINVAL; } @@ -1191,10 +1198,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) */ if (fault & DMA_FSTS_ITE) { head = readl(iommu->reg + DMAR_IQH_REG); - head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH; head |= 1; tail = readl(iommu->reg + DMAR_IQT_REG); - tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH; writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); @@ -1222,15 +1229,14 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) { int rc; struct q_inval *qi = iommu->qi; - struct qi_desc *hw, wait_desc; + int offset, shift, length; + struct qi_desc wait_desc; int wait_index, index; unsigned long flags; if (!qi) return 0; - hw = qi->desc; - restart: rc = 0; @@ -1243,16 +1249,21 @@ restart: index = qi->free_head; wait_index = (index + 1) % QI_LENGTH; + shift = qi_shift(iommu); + length = 1 << shift; qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE; - hw[index] = *desc; - - wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) | + offset = index << shift; + memcpy(qi->desc + offset, desc, length); + wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE; - wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]); + wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]); + wait_desc.qw2 = 0; + wait_desc.qw3 = 0; - hw[wait_index] = wait_desc; + offset = wait_index << shift; + memcpy(qi->desc + offset, &wait_desc, length); qi->free_head = (qi->free_head + 2) % QI_LENGTH; qi->free_cnt -= 2; @@ -1261,7 +1272,7 @@ restart: * update the HW tail register indicating the presence of * new descriptors. */ - writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG); + writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG); while (qi->desc_status[wait_index] != QI_DONE) { /* @@ -1298,8 +1309,10 @@ void qi_global_iec(struct intel_iommu *iommu) { struct qi_desc desc; - desc.low = QI_IEC_TYPE; - desc.high = 0; + desc.qw0 = QI_IEC_TYPE; + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; /* should never fail */ qi_submit_sync(&desc, iommu); @@ -1310,9 +1323,11 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, { struct qi_desc desc; - desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) + desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) | QI_CC_GRAN(type) | QI_CC_TYPE; - desc.high = 0; + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, iommu); } @@ -1331,10 +1346,12 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, if (cap_read_drain(iommu->cap)) dr = 1; - desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) + desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; - desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) + desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) | QI_IOTLB_AM(size_order); + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, iommu); } @@ -1347,15 +1364,17 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, if (mask) { WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1)); addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; - desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; + desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; } else - desc.high = QI_DEV_IOTLB_ADDR(addr); + desc.qw1 = QI_DEV_IOTLB_ADDR(addr); if (qdep >= QI_DEV_IOTLB_MAX_INVS) qdep = 0; - desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | + desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, iommu); } @@ -1403,16 +1422,24 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) u32 sts; unsigned long flags; struct q_inval *qi = iommu->qi; + u64 val = virt_to_phys(qi->desc); qi->free_head = qi->free_tail = 0; qi->free_cnt = QI_LENGTH; + /* + * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability + * is present. + */ + if (ecap_smts(iommu->ecap)) + val |= (1 << 11) | 1; + raw_spin_lock_irqsave(&iommu->register_lock, flags); /* write zero to the tail reg */ writel(0, iommu->reg + DMAR_IQT_REG); - dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); + dmar_writeq(iommu->reg + DMAR_IQA_REG, val); iommu->gcmd |= DMA_GCMD_QIE; writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); @@ -1448,8 +1475,12 @@ int dmar_enable_qi(struct intel_iommu *iommu) qi = iommu->qi; - - desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0); + /* + * Need two pages to accommodate 256 descriptors of 256 bits each + * if the remapping hardware supports scalable mode translation. + */ + desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, + !!ecap_smts(iommu->ecap)); if (!desc_page) { kfree(qi); iommu->qi = NULL; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c @@ -292,49 +292,6 @@ static inline void context_clear_entry(struct context_entry *context) } /* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-10: available - * 11: snoop behavior - * 12-63: Host physcial address - */ -struct dma_pte { - u64 val; -}; - -static inline void dma_clear_pte(struct dma_pte *pte) -{ - pte->val = 0; -} - -static inline u64 dma_pte_addr(struct dma_pte *pte) -{ -#ifdef CONFIG_64BIT - return pte->val & VTD_PAGE_MASK; -#else - /* Must have a full atomic 64-bit read */ - return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; -#endif -} - -static inline bool dma_pte_present(struct dma_pte *pte) -{ - return (pte->val & 3) != 0; -} - -static inline bool dma_pte_superpage(struct dma_pte *pte) -{ - return (pte->val & DMA_PTE_LARGE_PAGE); -} - -static inline int first_pte_in_page(struct dma_pte *pte) -{ - return !((unsigned long)pte & ~VTD_PAGE_MASK); -} - -/* * This domain is a statically identity mapping domain. * 1. This domain creats a static 1:1 mapping to all usable memory. * 2. It maps to each iommu if successful. @@ -406,38 +363,16 @@ static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; -static int intel_iommu_ecs = 1; -static int intel_iommu_pasid28; +static int intel_iommu_sm = 1; static int iommu_identity_mapping; #define IDENTMAP_ALL 1 #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -/* Broadwell and Skylake have broken ECS support — normal so-called "second - * level" translation of DMA requests-without-PASID doesn't actually happen - * unless you also set the NESTE bit in an extended context-entry. Which of - * course means that SVM doesn't work because it's trying to do nested - * translation of the physical addresses it finds in the process page tables, - * through the IOVA->phys mapping found in the "second level" page tables. - * - * The VT-d specification was retroactively changed to change the definition - * of the capability bits and pretend that Broadwell/Skylake never happened... - * but unfortunately the wrong bit was changed. It's ECS which is broken, but - * for some reason it was the PASID capability bit which was redefined (from - * bit 28 on BDW/SKL to bit 40 in future). - * - * So our test for ECS needs to eschew those implementations which set the old - * PASID capabiity bit 28, since those are the ones on which ECS is broken. - * Unless we are working around the 'pasid28' limitations, that is, by putting - * the device into passthrough mode for normal DMA and thus masking the bug. - */ -#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ - (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap))) -/* PASID support is thus enabled if ECS is enabled and *either* of the old - * or new capability bits are set. */ -#define pasid_enabled(iommu) (ecs_enabled(iommu) && \ - (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap))) +#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) +#define pasid_supported(iommu) (sm_supported(iommu) && \ + ecap_pasid((iommu)->ecap)) int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); @@ -448,21 +383,24 @@ static LIST_HEAD(device_domain_list); /* * Iterate over elements in device_domain_list and call the specified - * callback @fn against each element. This helper should only be used - * in the context where the device_domain_lock has already been holden. + * callback @fn against each element. */ int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data) { int ret = 0; + unsigned long flags; struct device_domain_info *info; - assert_spin_locked(&device_domain_lock); + spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry(info, &device_domain_list, global) { ret = fn(info, data); - if (ret) + if (ret) { + spin_unlock_irqrestore(&device_domain_lock, flags); return ret; + } } + spin_unlock_irqrestore(&device_domain_lock, flags); return 0; } @@ -518,15 +456,9 @@ static int __init intel_iommu_setup(char *str) } else if (!strncmp(str, "sp_off", 6)) { pr_info("Disable supported super page\n"); intel_iommu_superpage = 0; - } else if (!strncmp(str, "ecs_off", 7)) { - printk(KERN_INFO - "Intel-IOMMU: disable extended context table support\n"); - intel_iommu_ecs = 0; - } else if (!strncmp(str, "pasid28", 7)) { - printk(KERN_INFO - "Intel-IOMMU: enable pre-production PASID support\n"); - intel_iommu_pasid28 = 1; - iommu_identity_mapping |= IDENTMAP_GFX; + } else if (!strncmp(str, "sm_off", 6)) { + pr_info("Intel-IOMMU: disable scalable mode support\n"); + intel_iommu_sm = 0; } else if (!strncmp(str, "tboot_noforce", 13)) { printk(KERN_INFO "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); @@ -773,7 +705,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, u64 *entry; entry = &root->lo; - if (ecs_enabled(iommu)) { + if (sm_supported(iommu)) { if (devfn >= 0x80) { devfn -= 0x80; entry = &root->hi; @@ -915,7 +847,7 @@ static void free_context_table(struct intel_iommu *iommu) if (context) free_pgtable_page(context); - if (!ecs_enabled(iommu)) + if (!sm_supported(iommu)) continue; context = iommu_context_addr(iommu, i, 0x80, 0); @@ -1267,8 +1199,8 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) unsigned long flag; addr = virt_to_phys(iommu->root_entry); - if (ecs_enabled(iommu)) - addr |= DMA_RTADDR_RTT; + if (sm_supported(iommu)) + addr |= DMA_RTADDR_SMT; raw_spin_lock_irqsave(&iommu->register_lock, flag); dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); @@ -1282,7 +1214,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } -static void iommu_flush_write_buffer(struct intel_iommu *iommu) +void iommu_flush_write_buffer(struct intel_iommu *iommu) { u32 val; unsigned long flag; @@ -1694,6 +1626,16 @@ static int iommu_init_domains(struct intel_iommu *iommu) */ set_bit(0, iommu->domain_ids); + /* + * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid + * entry for first-level or pass-through translation modes should + * be programmed with a domain id different from those used for + * second-level or nested translation. We reserve a domain id for + * this purpose. + */ + if (sm_supported(iommu)) + set_bit(FLPT_DEFAULT_DID, iommu->domain_ids); + return 0; } @@ -1758,10 +1700,9 @@ static void free_dmar_iommu(struct intel_iommu *iommu) free_context_table(iommu); #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu)) { + if (pasid_supported(iommu)) { if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); - intel_svm_exit(iommu); } #endif } @@ -1981,8 +1922,59 @@ static void domain_exit(struct dmar_domain *domain) free_domain_mem(domain); } +/* + * Get the PASID directory size for scalable mode context entry. + * Value of X in the PDTS field of a scalable mode context entry + * indicates PASID directory with 2^(X + 7) entries. + */ +static inline unsigned long context_get_sm_pds(struct pasid_table *table) +{ + int pds, max_pde; + + max_pde = table->max_pasid >> PASID_PDE_SHIFT; + pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS); + if (pds < 7) + return 0; + + return pds - 7; +} + +/* + * Set the RID_PASID field of a scalable mode context entry. The + * IOMMU hardware will use the PASID value set in this field for + * DMA translations of DMA requests without PASID. + */ +static inline void +context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid) +{ + context->hi |= pasid & ((1 << 20) - 1); + context->hi |= (1 << 20); +} + +/* + * Set the DTE(Device-TLB Enable) field of a scalable mode context + * entry. + */ +static inline void context_set_sm_dte(struct context_entry *context) +{ + context->lo |= (1 << 2); +} + +/* + * Set the PRE(Page Request Enable) field of a scalable mode context + * entry. + */ +static inline void context_set_sm_pre(struct context_entry *context) +{ + context->lo |= (1 << 4); +} + +/* Convert value to context PASID directory size field coding. */ +#define context_pdts(pds) (((pds) & 0x7) << 9) + static int domain_context_mapping_one(struct dmar_domain *domain, struct intel_iommu *iommu, + struct pasid_table *table, u8 bus, u8 devfn) { u16 did = domain->iommu_did[iommu->seq_id]; @@ -1990,8 +1982,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct device_domain_info *info = NULL; struct context_entry *context; unsigned long flags; - struct dma_pte *pgd; - int ret, agaw; + int ret; WARN_ON(did == 0); @@ -2037,41 +2028,67 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } } - pgd = domain->pgd; - context_clear_entry(context); - context_set_domain_id(context, did); - /* - * Skip top levels of page tables for iommu which has less agaw - * than default. Unnecessary for PT mode. - */ - if (translation != CONTEXT_TT_PASS_THROUGH) { - for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { - ret = -ENOMEM; - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) - goto out_unlock; - } + if (sm_supported(iommu)) { + unsigned long pds; - info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); - if (info && info->ats_supported) - translation = CONTEXT_TT_DEV_IOTLB; - else - translation = CONTEXT_TT_MULTI_LEVEL; + WARN_ON(!table); + + /* Setup the PASID DIR pointer: */ + pds = context_get_sm_pds(table); + context->lo = (u64)virt_to_phys(table->table) | + context_pdts(pds); + + /* Setup the RID_PASID field: */ + context_set_sm_rid2pasid(context, PASID_RID2PASID); - context_set_address_root(context, virt_to_phys(pgd)); - context_set_address_width(context, iommu->agaw); - } else { /* - * In pass through mode, AW must be programmed to - * indicate the largest AGAW value supported by - * hardware. And ASR is ignored by hardware. + * Setup the Device-TLB enable bit and Page request + * Enable bit: */ - context_set_address_width(context, iommu->msagaw); + info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); + if (info && info->ats_supported) + context_set_sm_dte(context); + if (info && info->pri_supported) + context_set_sm_pre(context); + } else { + struct dma_pte *pgd = domain->pgd; + int agaw; + + context_set_domain_id(context, did); + context_set_translation_type(context, translation); + + if (translation != CONTEXT_TT_PASS_THROUGH) { + /* + * Skip top levels of page tables for iommu which has + * less agaw than default. Unnecessary for PT mode. + */ + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + ret = -ENOMEM; + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) + goto out_unlock; + } + + info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); + if (info && info->ats_supported) + translation = CONTEXT_TT_DEV_IOTLB; + else + translation = CONTEXT_TT_MULTI_LEVEL; + + context_set_address_root(context, virt_to_phys(pgd)); + context_set_address_width(context, agaw); + } else { + /* + * In pass through mode, AW must be programmed to + * indicate the largest AGAW value supported by + * hardware. And ASR is ignored by hardware. + */ + context_set_address_width(context, iommu->msagaw); + } } - context_set_translation_type(context, translation); context_set_fault_enable(context); context_set_present(context); domain_flush_cache(domain, context, sizeof(*context)); @@ -2105,6 +2122,7 @@ out_unlock: struct domain_context_mapping_data { struct dmar_domain *domain; struct intel_iommu *iommu; + struct pasid_table *table; }; static int domain_context_mapping_cb(struct pci_dev *pdev, @@ -2113,25 +2131,31 @@ static int domain_context_mapping_cb(struct pci_dev *pdev, struct domain_context_mapping_data *data = opaque; return domain_context_mapping_one(data->domain, data->iommu, - PCI_BUS_NUM(alias), alias & 0xff); + data->table, PCI_BUS_NUM(alias), + alias & 0xff); } static int domain_context_mapping(struct dmar_domain *domain, struct device *dev) { + struct domain_context_mapping_data data; + struct pasid_table *table; struct intel_iommu *iommu; u8 bus, devfn; - struct domain_context_mapping_data data; iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return -ENODEV; + table = intel_pasid_get_table(dev); + if (!dev_is_pci(dev)) - return domain_context_mapping_one(domain, iommu, bus, devfn); + return domain_context_mapping_one(domain, iommu, table, + bus, devfn); data.domain = domain; data.iommu = iommu; + data.table = table; return pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_mapping_cb, &data); @@ -2467,8 +2491,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, dmar_find_matched_atsr_unit(pdev)) info->ats_supported = 1; - if (ecs_enabled(iommu)) { - if (pasid_enabled(iommu)) { + if (sm_supported(iommu)) { + if (pasid_supported(iommu)) { int features = pci_pasid_features(pdev); if (features >= 0) info->pasid_supported = features | 1; @@ -2514,16 +2538,34 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, list_add(&info->global, &device_domain_list); if (dev) dev->archdata.iommu = info; + spin_unlock_irqrestore(&device_domain_lock, flags); - if (dev && dev_is_pci(dev) && info->pasid_supported) { + /* PASID table is mandatory for a PCI device in scalable mode. */ + if (dev && dev_is_pci(dev) && sm_supported(iommu)) { ret = intel_pasid_alloc_table(dev); if (ret) { - pr_warn("No pasid table for %s, pasid disabled\n", - dev_name(dev)); - info->pasid_supported = 0; + pr_err("PASID table allocation for %s failed\n", + dev_name(dev)); + dmar_remove_one_dev_info(domain, dev); + return NULL; + } + + /* Setup the PASID entry for requests without PASID: */ + spin_lock(&iommu->lock); + if (hw_pass_through && domain_type_is_si(domain)) + ret = intel_pasid_setup_pass_through(iommu, domain, + dev, PASID_RID2PASID); + else + ret = intel_pasid_setup_second_level(iommu, domain, + dev, PASID_RID2PASID); + spin_unlock(&iommu->lock); + if (ret) { + pr_err("Setup RID2PASID for %s failed\n", + dev_name(dev)); + dmar_remove_one_dev_info(domain, dev); + return NULL; } } - spin_unlock_irqrestore(&device_domain_lock, flags); if (dev && domain_context_mapping(domain, dev)) { pr_err("Domain context map for %s failed\n", dev_name(dev)); @@ -3287,7 +3329,7 @@ static int __init init_dmars(void) * We need to ensure the system pasid table is no bigger * than the smallest supported. */ - if (pasid_enabled(iommu)) { + if (pasid_supported(iommu)) { u32 temp = 2 << ecap_pss(iommu->ecap); intel_pasid_max_id = min_t(u32, temp, @@ -3348,7 +3390,7 @@ static int __init init_dmars(void) if (!ecap_pass_through(iommu->ecap)) hw_pass_through = 0; #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu)) + if (pasid_supported(iommu)) intel_svm_init(iommu); #endif } @@ -3452,7 +3494,7 @@ domains_done: iommu_flush_write_buffer(iommu); #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { ret = intel_svm_enable_prq(iommu); if (ret) goto free_iommu; @@ -4335,7 +4377,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) goto out; #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu)) + if (pasid_supported(iommu)) intel_svm_init(iommu); #endif @@ -4352,7 +4394,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) iommu_flush_write_buffer(iommu); #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { ret = intel_svm_enable_prq(iommu); if (ret) goto disable_iommu; @@ -4927,6 +4969,10 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) iommu = info->iommu; if (info->dev) { + if (dev_is_pci(info->dev) && sm_supported(iommu)) + intel_pasid_tear_down_entry(iommu, info->dev, + PASID_RID2PASID); + iommu_disable_dev_iotlb(info); domain_context_clear(iommu, info->dev); intel_pasid_free_table(info->dev); @@ -5254,19 +5300,6 @@ static void intel_iommu_put_resv_regions(struct device *dev, } #ifdef CONFIG_INTEL_IOMMU_SVM -#define MAX_NR_PASID_BITS (20) -static inline unsigned long intel_iommu_get_pts(struct device *dev) -{ - int pts, max_pasid; - - max_pasid = intel_pasid_get_dev_max_id(dev); - pts = find_first_bit((unsigned long *)&max_pasid, MAX_NR_PASID_BITS); - if (pts < 5) - return 0; - - return pts - 5; -} - int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) { struct device_domain_info *info; @@ -5298,33 +5331,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd sdev->sid = PCI_DEVID(info->bus, info->devfn); if (!(ctx_lo & CONTEXT_PASIDE)) { - if (iommu->pasid_state_table) - context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table); - context[1].lo = (u64)virt_to_phys(info->pasid_table->table) | - intel_iommu_get_pts(sdev->dev); - - wmb(); - /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both - * extended to permit requests-with-PASID if the PASIDE bit - * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH, - * however, the PASIDE bit is ignored and requests-with-PASID - * are unconditionally blocked. Which makes less sense. - * So convert from CONTEXT_TT_PASS_THROUGH to one of the new - * "guest mode" translation types depending on whether ATS - * is available or not. Annoyingly, we can't use the new - * modes *unless* PASIDE is set. */ - if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) { - ctx_lo &= ~CONTEXT_TT_MASK; - if (info->ats_supported) - ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2; - else - ctx_lo |= CONTEXT_TT_PT_PASID << 2; - } ctx_lo |= CONTEXT_PASIDE; - if (iommu->pasid_state_table) - ctx_lo |= CONTEXT_DINVE; - if (info->pri_supported) - ctx_lo |= CONTEXT_PRS; context[0].lo = ctx_lo; wmb(); iommu->flush.flush_context(iommu, sdev->did, sdev->sid, diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c @@ -9,6 +9,8 @@ #define pr_fmt(fmt) "DMAR: " fmt +#include <linux/bitops.h> +#include <linux/cpufeature.h> #include <linux/dmar.h> #include <linux/intel-iommu.h> #include <linux/iommu.h> @@ -123,12 +125,13 @@ int intel_pasid_alloc_table(struct device *dev) struct pasid_table *pasid_table; struct pasid_table_opaque data; struct page *pages; - size_t size, count; + int max_pasid = 0; int ret, order; + int size; + might_sleep(); info = dev->archdata.iommu; - if (WARN_ON(!info || !dev_is_pci(dev) || - !info->pasid_supported || info->pasid_table)) + if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) return -EINVAL; /* DMA alias device already has a pasid table, use it: */ @@ -138,23 +141,25 @@ int intel_pasid_alloc_table(struct device *dev) if (ret) goto attach_out; - pasid_table = kzalloc(sizeof(*pasid_table), GFP_ATOMIC); + pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); if (!pasid_table) return -ENOMEM; INIT_LIST_HEAD(&pasid_table->dev); - size = sizeof(struct pasid_entry); - count = min_t(int, pci_max_pasids(to_pci_dev(dev)), intel_pasid_max_id); - order = get_order(size * count); + if (info->pasid_supported) + max_pasid = min_t(int, pci_max_pasids(to_pci_dev(dev)), + intel_pasid_max_id); + + size = max_pasid >> (PASID_PDE_SHIFT - 3); + order = size ? get_order(size) : 0; pages = alloc_pages_node(info->iommu->node, - GFP_ATOMIC | __GFP_ZERO, - order); + GFP_KERNEL | __GFP_ZERO, order); if (!pages) return -ENOMEM; pasid_table->table = page_address(pages); pasid_table->order = order; - pasid_table->max_pasid = count; + pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); attach_out: device_attach_pasid_table(info, pasid_table); @@ -162,14 +167,33 @@ attach_out: return 0; } +/* Get PRESENT bit of a PASID directory entry. */ +static inline bool +pasid_pde_is_present(struct pasid_dir_entry *pde) +{ + return READ_ONCE(pde->val) & PASID_PTE_PRESENT; +} + +/* Get PASID table from a PASID directory entry. */ +static inline struct pasid_entry * +get_pasid_table_from_pde(struct pasid_dir_entry *pde) +{ + if (!pasid_pde_is_present(pde)) + return NULL; + + return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK); +} + void intel_pasid_free_table(struct device *dev) { struct device_domain_info *info; struct pasid_table *pasid_table; + struct pasid_dir_entry *dir; + struct pasid_entry *table; + int i, max_pde; info = dev->archdata.iommu; - if (!info || !dev_is_pci(dev) || - !info->pasid_supported || !info->pasid_table) + if (!info || !dev_is_pci(dev) || !info->pasid_table) return; pasid_table = info->pasid_table; @@ -178,6 +202,14 @@ void intel_pasid_free_table(struct device *dev) if (!list_empty(&pasid_table->dev)) return; + /* Free scalable mode PASID directory tables: */ + dir = pasid_table->table; + max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; + for (i = 0; i < max_pde; i++) { + table = get_pasid_table_from_pde(&dir[i]); + free_pgtable_page(table); + } + free_pages((unsigned long)pasid_table->table, pasid_table->order); kfree(pasid_table); } @@ -206,17 +238,37 @@ int intel_pasid_get_dev_max_id(struct device *dev) struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) { + struct device_domain_info *info; struct pasid_table *pasid_table; + struct pasid_dir_entry *dir; struct pasid_entry *entries; + int dir_index, index; pasid_table = intel_pasid_get_table(dev); if (WARN_ON(!pasid_table || pasid < 0 || pasid >= intel_pasid_get_dev_max_id(dev))) return NULL; - entries = pasid_table->table; + dir = pasid_table->table; + info = dev->archdata.iommu; + dir_index = pasid >> PASID_PDE_SHIFT; + index = pasid & PASID_PTE_MASK; + + spin_lock(&pasid_lock); + entries = get_pasid_table_from_pde(&dir[dir_index]); + if (!entries) { + entries = alloc_pgtable_page(info->iommu->node); + if (!entries) { + spin_unlock(&pasid_lock); + return NULL; + } + + WRITE_ONCE(dir[dir_index].val, + (u64)virt_to_phys(entries) | PASID_PTE_PRESENT); + } + spin_unlock(&pasid_lock); - return &entries[pasid]; + return &entries[index]; } /* @@ -224,10 +276,17 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) */ static inline void pasid_clear_entry(struct pasid_entry *pe) { - WRITE_ONCE(pe->val, 0); + WRITE_ONCE(pe->val[0], 0); + WRITE_ONCE(pe->val[1], 0); + WRITE_ONCE(pe->val[2], 0); + WRITE_ONCE(pe->val[3], 0); + WRITE_ONCE(pe->val[4], 0); + WRITE_ONCE(pe->val[5], 0); + WRITE_ONCE(pe->val[6], 0); + WRITE_ONCE(pe->val[7], 0); } -void intel_pasid_clear_entry(struct device *dev, int pasid) +static void intel_pasid_clear_entry(struct device *dev, int pasid) { struct pasid_entry *pe; @@ -237,3 +296,361 @@ void intel_pasid_clear_entry(struct device *dev, int pasid) pasid_clear_entry(pe); } + +static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) +{ + u64 old; + + old = READ_ONCE(*ptr); + WRITE_ONCE(*ptr, (old & ~mask) | bits); +} + +/* + * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode + * PASID entry. + */ +static inline void +pasid_set_domain_id(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value); +} + +/* + * Get domain ID value of a scalable mode PASID entry. + */ +static inline u16 +pasid_get_domain_id(struct pasid_entry *pe) +{ + return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0)); +} + +/* + * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_slptr(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value); +} + +/* + * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID + * entry. + */ +static inline void +pasid_set_address_width(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2); +} + +/* + * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_translation_type(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6); +} + +/* + * Enable fault processing by clearing the FPD(Fault Processing + * Disable) field (Bit 1) of a scalable mode PASID entry. + */ +static inline void pasid_set_fault_enable(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 1, 0); +} + +/* + * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a + * scalable mode PASID entry. + */ +static inline void pasid_set_sre(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 0, 1); +} + +/* + * Setup the P(Present) field (Bit 0) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_present(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 0, 1); +} + +/* + * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) +{ + pasid_set_bits(&pe->val[1], 1 << 23, value); +} + +/* + * Setup the First Level Page table Pointer field (Bit 140~191) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_flptr(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value); +} + +/* + * Setup the First Level Paging Mode field (Bit 130~131) of a + * scalable mode PASID entry. + */ +static inline void +pasid_set_flpm(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); +} + +static void +pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, + u16 did, int pasid) +{ + struct qi_desc desc; + + desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid); + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; + + qi_submit_sync(&desc, iommu); +} + +static void +iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) +{ + struct qi_desc desc; + + desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; + + qi_submit_sync(&desc, iommu); +} + +static void +devtlb_invalidation_with_pasid(struct intel_iommu *iommu, + struct device *dev, int pasid) +{ + struct device_domain_info *info; + u16 sid, qdep, pfsid; + + info = dev->archdata.iommu; + if (!info || !info->ats_enabled) + return; + + sid = info->bus << 8 | info->devfn; + qdep = info->ats_qdep; + pfsid = info->pfsid; + + qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); +} + +void intel_pasid_tear_down_entry(struct intel_iommu *iommu, + struct device *dev, int pasid) +{ + struct pasid_entry *pte; + u16 did; + + pte = intel_pasid_get_entry(dev, pasid); + if (WARN_ON(!pte)) + return; + + intel_pasid_clear_entry(dev, pasid); + did = pasid_get_domain_id(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + + /* Device IOTLB doesn't need to be flushed in caching mode. */ + if (!cap_caching_mode(iommu->cap)) + devtlb_invalidation_with_pasid(iommu, dev, pasid); +} + +/* + * Set up the scalable mode pasid table entry for first only + * translation type. + */ +int intel_pasid_setup_first_level(struct intel_iommu *iommu, + struct device *dev, pgd_t *pgd, + int pasid, u16 did, int flags) +{ + struct pasid_entry *pte; + + if (!ecap_flts(iommu->ecap)) { + pr_err("No first level translation support on %s\n", + iommu->name); + return -EINVAL; + } + + pte = intel_pasid_get_entry(dev, pasid); + if (WARN_ON(!pte)) + return -EINVAL; + + pasid_clear_entry(pte); + + /* Setup the first level page table pointer: */ + pasid_set_flptr(pte, (u64)__pa(pgd)); + if (flags & PASID_FLAG_SUPERVISOR_MODE) { + if (!ecap_srs(iommu->ecap)) { + pr_err("No supervisor request support on %s\n", + iommu->name); + return -EINVAL; + } + pasid_set_sre(pte); + } + +#ifdef CONFIG_X86 + if (cpu_feature_enabled(X86_FEATURE_LA57)) + pasid_set_flpm(pte, 1); +#endif /* CONFIG_X86 */ + + pasid_set_domain_id(pte, did); + pasid_set_address_width(pte, iommu->agaw); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + /* Setup Present and PASID Granular Transfer Type: */ + pasid_set_translation_type(pte, 1); + pasid_set_present(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + if (cap_caching_mode(iommu->cap)) { + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + } else { + iommu_flush_write_buffer(iommu); + } + + return 0; +} + +/* + * Set up the scalable mode pasid entry for second only translation type. + */ +int intel_pasid_setup_second_level(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid) +{ + struct pasid_entry *pte; + struct dma_pte *pgd; + u64 pgd_val; + int agaw; + u16 did; + + /* + * If hardware advertises no support for second level + * translation, return directly. + */ + if (!ecap_slts(iommu->ecap)) { + pr_err("No second level translation support on %s\n", + iommu->name); + return -EINVAL; + } + + /* + * Skip top levels of page tables for iommu which has less agaw + * than default. Unnecessary for PT mode. + */ + pgd = domain->pgd; + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) { + dev_err(dev, "Invalid domain page table\n"); + return -EINVAL; + } + } + + pgd_val = virt_to_phys(pgd); + did = domain->iommu_did[iommu->seq_id]; + + pte = intel_pasid_get_entry(dev, pasid); + if (!pte) { + dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); + return -ENODEV; + } + + pasid_clear_entry(pte); + pasid_set_domain_id(pte, did); + pasid_set_slptr(pte, pgd_val); + pasid_set_address_width(pte, agaw); + pasid_set_translation_type(pte, 2); + pasid_set_fault_enable(pte); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + /* + * Since it is a second level only translation setup, we should + * set SRE bit as well (addresses are expected to be GPAs). + */ + pasid_set_sre(pte); + pasid_set_present(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + if (cap_caching_mode(iommu->cap)) { + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + } else { + iommu_flush_write_buffer(iommu); + } + + return 0; +} + +/* + * Set up the scalable mode pasid entry for passthrough translation type. + */ +int intel_pasid_setup_pass_through(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid) +{ + u16 did = FLPT_DEFAULT_DID; + struct pasid_entry *pte; + + pte = intel_pasid_get_entry(dev, pasid); + if (!pte) { + dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); + return -ENODEV; + } + + pasid_clear_entry(pte); + pasid_set_domain_id(pte, did); + pasid_set_address_width(pte, iommu->agaw); + pasid_set_translation_type(pte, 4); + pasid_set_fault_enable(pte); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + /* + * We should set SRE bit as well since the addresses are expected + * to be GPAs. + */ + pasid_set_sre(pte); + pasid_set_present(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + if (cap_caching_mode(iommu->cap)) { + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + } else { + iommu_flush_write_buffer(iommu); + } + + return 0; +} diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h @@ -10,13 +10,37 @@ #ifndef __INTEL_PASID_H #define __INTEL_PASID_H +#define PASID_RID2PASID 0x0 #define PASID_MIN 0x1 -#define PASID_MAX 0x20000 +#define PASID_MAX 0x100000 +#define PASID_PTE_MASK 0x3F +#define PASID_PTE_PRESENT 1 +#define PDE_PFN_MASK PAGE_MASK +#define PASID_PDE_SHIFT 6 +#define MAX_NR_PASID_BITS 20 -struct pasid_entry { +/* + * Domain ID reserved for pasid entries programmed for first-level + * only and pass-through transfer modes. + */ +#define FLPT_DEFAULT_DID 1 + +/* + * The SUPERVISOR_MODE flag indicates a first level translation which + * can be used for access to kernel addresses. It is valid only for + * access to the kernel's static 1:1 mapping of physical memory — not + * to vmalloc or even module mappings. + */ +#define PASID_FLAG_SUPERVISOR_MODE BIT(0) + +struct pasid_dir_entry { u64 val; }; +struct pasid_entry { + u64 val[8]; +}; + /* The representative of a PASID table */ struct pasid_table { void *table; /* pasid table pointer */ @@ -34,6 +58,16 @@ void intel_pasid_free_table(struct device *dev); struct pasid_table *intel_pasid_get_table(struct device *dev); int intel_pasid_get_dev_max_id(struct device *dev); struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid); -void intel_pasid_clear_entry(struct device *dev, int pasid); +int intel_pasid_setup_first_level(struct intel_iommu *iommu, + struct device *dev, pgd_t *pgd, + int pasid, u16 did, int flags); +int intel_pasid_setup_second_level(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid); +int intel_pasid_setup_pass_through(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid); +void intel_pasid_tear_down_entry(struct intel_iommu *iommu, + struct device *dev, int pasid); #endif /* __INTEL_PASID_H */ diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c @@ -29,21 +29,10 @@ #include "intel-pasid.h" -#define PASID_ENTRY_P BIT_ULL(0) -#define PASID_ENTRY_FLPM_5LP BIT_ULL(9) -#define PASID_ENTRY_SRE BIT_ULL(11) - static irqreturn_t prq_event_thread(int irq, void *d); -struct pasid_state_entry { - u64 val; -}; - int intel_svm_init(struct intel_iommu *iommu) { - struct page *pages; - int order; - if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && !cap_fl1gp_support(iommu->cap)) return -EINVAL; @@ -52,41 +41,6 @@ int intel_svm_init(struct intel_iommu *iommu) !cap_5lp_support(iommu->cap)) return -EINVAL; - /* Start at 2 because it's defined as 2^(1+PSS) */ - iommu->pasid_max = 2 << ecap_pss(iommu->ecap); - - /* Eventually I'm promised we will get a multi-level PASID table - * and it won't have to be physically contiguous. Until then, - * limit the size because 8MiB contiguous allocations can be hard - * to come by. The limit of 0x20000, which is 1MiB for each of - * the PASID and PASID-state tables, is somewhat arbitrary. */ - if (iommu->pasid_max > 0x20000) - iommu->pasid_max = 0x20000; - - order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); - if (ecap_dis(iommu->ecap)) { - /* Just making it explicit... */ - BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry)); - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); - if (pages) - iommu->pasid_state_table = page_address(pages); - else - pr_warn("IOMMU: %s: Failed to allocate PASID state table\n", - iommu->name); - } - - return 0; -} - -int intel_svm_exit(struct intel_iommu *iommu) -{ - int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); - - if (iommu->pasid_state_table) { - free_pages((unsigned long)iommu->pasid_state_table, order); - iommu->pasid_state_table = NULL; - } - return 0; } @@ -163,27 +117,40 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d * because that's the only option the hardware gives us. Despite * the fact that they are actually only accessible through one. */ if (gl) - desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE; + desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | + QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | + QI_EIOTLB_TYPE; else - desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; - desc.high = 0; + desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | + QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | + QI_EIOTLB_TYPE; + desc.qw1 = 0; } else { int mask = ilog2(__roundup_pow_of_two(pages)); - desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE; - desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) | - QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask); + desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | + QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | + QI_EIOTLB_TYPE; + desc.qw1 = QI_EIOTLB_ADDR(address) | + QI_EIOTLB_GL(gl) | + QI_EIOTLB_IH(ih) | + QI_EIOTLB_AM(mask); } + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, svm->iommu); if (sdev->dev_iotlb) { - desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) | - QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE; + desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) | + QI_DEV_EIOTLB_SID(sdev->sid) | + QI_DEV_EIOTLB_QDEP(sdev->qdep) | + QI_DEIOTLB_TYPE; if (pages == -1) { - desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE; + desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | + QI_DEV_EIOTLB_SIZE; } else if (pages > 1) { /* The least significant zero bit indicates the size. So, * for example, an "address" value of 0x12345f000 will @@ -191,10 +158,13 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT); unsigned long mask = __rounddown_pow_of_two(address ^ last); - desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE; + desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) | + (mask - 1)) | QI_DEV_EIOTLB_SIZE; } else { - desc.high = QI_DEV_EIOTLB_ADDR(address); + desc.qw1 = QI_DEV_EIOTLB_ADDR(address); } + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, svm->iommu); } } @@ -204,11 +174,6 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, { struct intel_svm_dev *sdev; - /* Try deferred invalidate if available */ - if (svm->iommu->pasid_state_table && - !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63)) - return; - rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl); @@ -234,17 +199,6 @@ static void intel_invalidate_range(struct mmu_notifier *mn, (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0); } - -static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid) -{ - struct qi_desc desc; - - desc.high = 0; - desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid); - - qi_submit_sync(&desc, svm->iommu); -} - static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); @@ -264,8 +218,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) { - intel_pasid_clear_entry(sdev->dev, svm->pasid); - intel_flush_pasid_dev(svm, sdev, svm->pasid); + intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); } rcu_read_unlock(); @@ -284,11 +237,9 @@ static LIST_HEAD(global_svm_list); int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); - struct pasid_entry *entry; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; struct mm_struct *mm = NULL; - u64 pasid_entry_val; int pasid_max; int ret; @@ -397,24 +348,22 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ kfree(sdev); goto out; } - pasid_entry_val = (u64)__pa(mm->pgd) | PASID_ENTRY_P; - } else - pasid_entry_val = (u64)__pa(init_mm.pgd) | - PASID_ENTRY_P | PASID_ENTRY_SRE; - if (cpu_feature_enabled(X86_FEATURE_LA57)) - pasid_entry_val |= PASID_ENTRY_FLPM_5LP; - - entry = intel_pasid_get_entry(dev, svm->pasid); - entry->val = pasid_entry_val; - - wmb(); + } - /* - * Flush PASID cache when a PASID table entry becomes - * present. - */ - if (cap_caching_mode(iommu->cap)) - intel_flush_pasid_dev(svm, sdev, svm->pasid); + spin_lock(&iommu->lock); + ret = intel_pasid_setup_first_level(iommu, dev, + mm ? mm->pgd : init_mm.pgd, + svm->pasid, FLPT_DEFAULT_DID, + mm ? 0 : PASID_FLAG_SUPERVISOR_MODE); + spin_unlock(&iommu->lock); + if (ret) { + if (mm) + mmu_notifier_unregister(&svm->notifier, mm); + intel_pasid_free_id(svm->pasid); + kfree(svm); + kfree(sdev); + goto out; + } list_add_tail(&svm->list, &global_svm_list); } @@ -460,10 +409,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) * to use. We have a *shared* PASID table, because it's * large and has to be physically contiguous. So it's * hard to be as defensive as we might like. */ - intel_flush_pasid_dev(svm, sdev, svm->pasid); + intel_pasid_tear_down_entry(iommu, dev, svm->pasid); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); kfree_rcu(sdev, rcu); - intel_pasid_clear_entry(dev, svm->pasid); if (list_empty(&svm->devs)) { intel_pasid_free_id(svm->pasid); @@ -671,24 +619,27 @@ static irqreturn_t prq_event_thread(int irq, void *d) no_pasid: if (req->lpig) { /* Page Group Response */ - resp.low = QI_PGRP_PASID(req->pasid) | + resp.qw0 = QI_PGRP_PASID(req->pasid) | QI_PGRP_DID((req->bus << 8) | req->devfn) | QI_PGRP_PASID_P(req->pasid_present) | QI_PGRP_RESP_TYPE; - resp.high = QI_PGRP_IDX(req->prg_index) | - QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result); - - qi_submit_sync(&resp, iommu); + resp.qw1 = QI_PGRP_IDX(req->prg_index) | + QI_PGRP_PRIV(req->private) | + QI_PGRP_RESP_CODE(result); } else if (req->srr) { /* Page Stream Response */ - resp.low = QI_PSTRM_IDX(req->prg_index) | - QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) | - QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE; - resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) | + resp.qw0 = QI_PSTRM_IDX(req->prg_index) | + QI_PSTRM_PRIV(req->private) | + QI_PSTRM_BUS(req->bus) | + QI_PSTRM_PASID(req->pasid) | + QI_PSTRM_RESP_TYPE; + resp.qw1 = QI_PSTRM_ADDR(address) | + QI_PSTRM_DEVFN(req->devfn) | QI_PSTRM_RESP_CODE(result); - - qi_submit_sync(&resp, iommu); } + resp.qw2 = 0; + resp.qw3 = 0; + qi_submit_sync(&resp, iommu); head = (head + sizeof(*req)) & PRQ_RING_MASK; } diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c @@ -145,9 +145,11 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask) { struct qi_desc desc; - desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) + desc.qw0 = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) | QI_IEC_SELECTIVE; - desc.high = 0; + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; return qi_submit_sync(&desc, iommu); } diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c @@ -709,10 +709,6 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, { struct arm_v7s_io_pgtable *data; -#ifdef PHYS_OFFSET - if (upper_32_bits(PHYS_OFFSET)) - return NULL; -#endif if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS) return NULL; diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c @@ -11,7 +11,7 @@ #include <linux/device.h> #include <linux/iommu.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/slab.h> /* @@ -22,25 +22,25 @@ static struct attribute *devices_attr[] = { NULL, }; -static const struct attribute_group iommu_devices_attr_group = { +static const struct attribute_group devices_attr_group = { .name = "devices", .attrs = devices_attr, }; -static const struct attribute_group *iommu_dev_groups[] = { - &iommu_devices_attr_group, +static const struct attribute_group *dev_groups[] = { + &devices_attr_group, NULL, }; -static void iommu_release_device(struct device *dev) +static void release_device(struct device *dev) { kfree(dev); } static struct class iommu_class = { .name = "iommu", - .dev_release = iommu_release_device, - .dev_groups = iommu_dev_groups, + .dev_release = release_device, + .dev_groups = dev_groups, }; static int __init iommu_dev_init(void) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c @@ -22,7 +22,8 @@ #include <linux/kernel.h> #include <linux/bug.h> #include <linux/types.h> -#include <linux/module.h> +#include <linux/init.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/iommu.h> @@ -110,6 +111,27 @@ void iommu_device_unregister(struct iommu_device *iommu) spin_unlock(&iommu_device_lock); } +int iommu_probe_device(struct device *dev) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + int ret = -EINVAL; + + WARN_ON(dev->iommu_group); + + if (ops) + ret = ops->add_device(dev); + + return ret; +} + +void iommu_release_device(struct device *dev) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (dev->iommu_group) + ops->remove_device(dev); +} + static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, unsigned type); static int __iommu_attach_device(struct iommu_domain *domain, @@ -1117,16 +1139,7 @@ struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) static int add_iommu_group(struct device *dev, void *data) { - struct iommu_callback_data *cb = data; - const struct iommu_ops *ops = cb->ops; - int ret; - - if (!ops->add_device) - return 0; - - WARN_ON(dev->iommu_group); - - ret = ops->add_device(dev); + int ret = iommu_probe_device(dev); /* * We ignore -ENODEV errors for now, as they just mean that the @@ -1141,11 +1154,7 @@ static int add_iommu_group(struct device *dev, void *data) static int remove_iommu_group(struct device *dev, void *data) { - struct iommu_callback_data *cb = data; - const struct iommu_ops *ops = cb->ops; - - if (ops->remove_device && dev->iommu_group) - ops->remove_device(dev); + iommu_release_device(dev); return 0; } @@ -1153,27 +1162,22 @@ static int remove_iommu_group(struct device *dev, void *data) static int iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { + unsigned long group_action = 0; struct device *dev = data; - const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; - unsigned long group_action = 0; /* * ADD/DEL call into iommu driver ops if provided, which may * result in ADD/DEL notifiers to group->notifier */ if (action == BUS_NOTIFY_ADD_DEVICE) { - if (ops->add_device) { - int ret; + int ret; - ret = ops->add_device(dev); - return (ret) ? NOTIFY_DONE : NOTIFY_OK; - } + ret = iommu_probe_device(dev); + return (ret) ? NOTIFY_DONE : NOTIFY_OK; } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { - if (ops->remove_device && dev->iommu_group) { - ops->remove_device(dev); - return 0; - } + iommu_release_device(dev); + return NOTIFY_OK; } /* @@ -1712,33 +1716,32 @@ EXPORT_SYMBOL_GPL(iommu_unmap_fast); size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot) { - struct scatterlist *s; - size_t mapped = 0; - unsigned int i, min_pagesz; + size_t len = 0, mapped = 0; + phys_addr_t start; + unsigned int i = 0; int ret; - if (unlikely(domain->pgsize_bitmap == 0UL)) - return 0; + while (i <= nents) { + phys_addr_t s_phys = sg_phys(sg); - min_pagesz = 1 << __ffs(domain->pgsize_bitmap); - - for_each_sg(sg, s, nents, i) { - phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset; + if (len && s_phys != start + len) { + ret = iommu_map(domain, iova + mapped, start, len, prot); + if (ret) + goto out_err; - /* - * We are mapping on IOMMU page boundaries, so offset within - * the page must be 0. However, the IOMMU may support pages - * smaller than PAGE_SIZE, so s->offset may still represent - * an offset of that boundary within the CPU page. - */ - if (!IS_ALIGNED(s->offset, min_pagesz)) - goto out_err; + mapped += len; + len = 0; + } - ret = iommu_map(domain, iova + mapped, phys, s->length, prot); - if (ret) - goto out_err; + if (len) { + len += sg->length; + } else { + len = sg->length; + start = s_phys; + } - mapped += s->length; + if (++i < nents) + sg = sg_next(sg); } return mapped; @@ -1976,7 +1979,7 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); if (fwspec) return ops == fwspec->ops ? 0 : -EINVAL; @@ -1988,26 +1991,26 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, of_node_get(to_of_node(iommu_fwnode)); fwspec->iommu_fwnode = iommu_fwnode; fwspec->ops = ops; - dev->iommu_fwspec = fwspec; + dev_iommu_fwspec_set(dev, fwspec); return 0; } EXPORT_SYMBOL_GPL(iommu_fwspec_init); void iommu_fwspec_free(struct device *dev) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); if (fwspec) { fwnode_handle_put(fwspec->iommu_fwnode); kfree(fwspec); - dev->iommu_fwspec = NULL; + dev_iommu_fwspec_set(dev, NULL); } } EXPORT_SYMBOL_GPL(iommu_fwspec_free); int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); size_t size; int i; @@ -2016,11 +2019,11 @@ int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) size = offsetof(struct iommu_fwspec, ids[fwspec->num_ids + num_ids]); if (size > sizeof(*fwspec)) { - fwspec = krealloc(dev->iommu_fwspec, size, GFP_KERNEL); + fwspec = krealloc(fwspec, size, GFP_KERNEL); if (!fwspec) return -ENOMEM; - dev->iommu_fwspec = fwspec; + dev_iommu_fwspec_set(dev, fwspec); } for (i = 0; i < num_ids; i++) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * IPMMU VMSA + * IOMMU API for Renesas VMSA-compatible IPMMU + * Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com> * * Copyright (C) 2014 Renesas Electronics Corporation */ @@ -11,10 +12,10 @@ #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/export.h> +#include <linux/init.h> #include <linux/interrupt.h> #include <linux/io.h> #include <linux/iommu.h> -#include <linux/module.h> #include <linux/of.h> #include <linux/of_device.h> #include <linux/of_iommu.h> @@ -81,7 +82,9 @@ static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev) { - return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + return fwspec ? fwspec->iommu_priv : NULL; } #define TLB_LOOP_TIMEOUT 100 /* 100us */ @@ -643,7 +646,7 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain) static int ipmmu_attach_device(struct iommu_domain *io_domain, struct device *dev) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); unsigned int i; @@ -692,7 +695,7 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, static void ipmmu_detach_device(struct iommu_domain *io_domain, struct device *dev) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); unsigned int i; @@ -744,36 +747,71 @@ static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, static int ipmmu_init_platform_device(struct device *dev, struct of_phandle_args *args) { + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct platform_device *ipmmu_pdev; ipmmu_pdev = of_find_device_by_node(args->np); if (!ipmmu_pdev) return -ENODEV; - dev->iommu_fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev); - return 0; -} + fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev); -static bool ipmmu_slave_whitelist(struct device *dev) -{ - /* By default, do not allow use of IPMMU */ - return false; + return 0; } static const struct soc_device_attribute soc_rcar_gen3[] = { + { .soc_id = "r8a774a1", }, + { .soc_id = "r8a774c0", }, { .soc_id = "r8a7795", }, { .soc_id = "r8a7796", }, { .soc_id = "r8a77965", }, { .soc_id = "r8a77970", }, + { .soc_id = "r8a77990", }, + { .soc_id = "r8a77995", }, + { /* sentinel */ } +}; + +static const struct soc_device_attribute soc_rcar_gen3_whitelist[] = { + { .soc_id = "r8a774c0", }, + { .soc_id = "r8a7795", .revision = "ES3.*" }, + { .soc_id = "r8a77965", }, + { .soc_id = "r8a77990", }, { .soc_id = "r8a77995", }, { /* sentinel */ } }; +static const char * const rcar_gen3_slave_whitelist[] = { +}; + +static bool ipmmu_slave_whitelist(struct device *dev) +{ + unsigned int i; + + /* + * For R-Car Gen3 use a white list to opt-in slave devices. + * For Other SoCs, this returns true anyway. + */ + if (!soc_device_match(soc_rcar_gen3)) + return true; + + /* Check whether this R-Car Gen3 can use the IPMMU correctly or not */ + if (!soc_device_match(soc_rcar_gen3_whitelist)) + return false; + + /* Check whether this slave device can work with the IPMMU */ + for (i = 0; i < ARRAY_SIZE(rcar_gen3_slave_whitelist); i++) { + if (!strcmp(dev_name(dev), rcar_gen3_slave_whitelist[i])) + return true; + } + + /* Otherwise, do not allow use of IPMMU */ + return false; +} + static int ipmmu_of_xlate(struct device *dev, struct of_phandle_args *spec) { - /* For R-Car Gen3 use a white list to opt-in slave devices */ - if (soc_device_match(soc_rcar_gen3) && !ipmmu_slave_whitelist(dev)) + if (!ipmmu_slave_whitelist(dev)) return -ENODEV; iommu_fwspec_add_ids(dev, spec->args, 1); @@ -941,6 +979,12 @@ static const struct of_device_id ipmmu_of_ids[] = { .compatible = "renesas,ipmmu-vmsa", .data = &ipmmu_features_default, }, { + .compatible = "renesas,ipmmu-r8a774a1", + .data = &ipmmu_features_rcar_gen3, + }, { + .compatible = "renesas,ipmmu-r8a774c0", + .data = &ipmmu_features_rcar_gen3, + }, { .compatible = "renesas,ipmmu-r8a7795", .data = &ipmmu_features_rcar_gen3, }, { @@ -953,6 +997,9 @@ static const struct of_device_id ipmmu_of_ids[] = { .compatible = "renesas,ipmmu-r8a77970", .data = &ipmmu_features_rcar_gen3, }, { + .compatible = "renesas,ipmmu-r8a77990", + .data = &ipmmu_features_rcar_gen3, + }, { .compatible = "renesas,ipmmu-r8a77995", .data = &ipmmu_features_rcar_gen3, }, { @@ -960,8 +1007,6 @@ static const struct of_device_id ipmmu_of_ids[] = { }, }; -MODULE_DEVICE_TABLE(of, ipmmu_of_ids); - static int ipmmu_probe(struct platform_device *pdev) { struct ipmmu_vmsa_device *mmu; @@ -1132,15 +1177,4 @@ static int __init ipmmu_init(void) setup_done = true; return 0; } - -static void __exit ipmmu_exit(void) -{ - return platform_driver_unregister(&ipmmu_driver); -} - subsys_initcall(ipmmu_init); -module_exit(ipmmu_exit); - -MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); -MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c @@ -1,4 +1,3 @@ -#include <linux/seq_file.h> #include <linux/cpumask.h> #include <linux/kernel.h> #include <linux/string.h> diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c @@ -1,5 +1,7 @@ /* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. * + * Author: Stepan Moskovchenko <stepanm@codeaurora.org> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and * only version 2 as published by the Free Software Foundation. @@ -17,7 +19,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/platform_device.h> #include <linux/errno.h> #include <linux/io.h> @@ -861,14 +863,5 @@ static int __init msm_iommu_driver_init(void) return ret; } - -static void __exit msm_iommu_driver_exit(void) -{ - platform_driver_unregister(&msm_iommu_driver); -} - subsys_initcall(msm_iommu_driver_init); -module_exit(msm_iommu_driver_exit); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>"); diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c @@ -113,7 +113,7 @@ struct mtk_iommu_domain { struct iommu_domain domain; }; -static struct iommu_ops mtk_iommu_ops; +static const struct iommu_ops mtk_iommu_ops; static LIST_HEAD(m4ulist); /* List all the M4U HWs */ @@ -244,7 +244,7 @@ static void mtk_iommu_config(struct mtk_iommu_data *data, { struct mtk_smi_larb_iommu *larb_mmu; unsigned int larbid, portid; - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); int i; for (i = 0; i < fwspec->num_ids; ++i) { @@ -336,7 +336,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv; + struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv; if (!data) return -ENODEV; @@ -355,7 +355,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, static void mtk_iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv; + struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv; if (!data) return; @@ -417,13 +417,14 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, static int mtk_iommu_add_device(struct device *dev) { + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct mtk_iommu_data *data; struct iommu_group *group; - if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops) + if (!fwspec || fwspec->ops != &mtk_iommu_ops) return -ENODEV; /* Not a iommu client device */ - data = dev->iommu_fwspec->iommu_priv; + data = fwspec->iommu_priv; iommu_device_link(&data->iommu, dev); group = iommu_group_get_for_dev(dev); @@ -436,12 +437,13 @@ static int mtk_iommu_add_device(struct device *dev) static void mtk_iommu_remove_device(struct device *dev) { + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct mtk_iommu_data *data; - if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops) + if (!fwspec || fwspec->ops != &mtk_iommu_ops) return; - data = dev->iommu_fwspec->iommu_priv; + data = fwspec->iommu_priv; iommu_device_unlink(&data->iommu, dev); iommu_group_remove_device(dev); @@ -468,6 +470,7 @@ static struct iommu_group *mtk_iommu_device_group(struct device *dev) static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) { + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct platform_device *m4updev; if (args->args_count != 1) { @@ -476,19 +479,19 @@ static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) return -EINVAL; } - if (!dev->iommu_fwspec->iommu_priv) { + if (!fwspec->iommu_priv) { /* Get the m4u device */ m4updev = of_find_device_by_node(args->np); if (WARN_ON(!m4updev)) return -EINVAL; - dev->iommu_fwspec->iommu_priv = platform_get_drvdata(m4updev); + fwspec->iommu_priv = platform_get_drvdata(m4updev); } return iommu_fwspec_add_ids(dev, args->args, 1); } -static struct iommu_ops mtk_iommu_ops = { +static const struct iommu_ops mtk_iommu_ops = { .domain_alloc = mtk_iommu_domain_alloc, .domain_free = mtk_iommu_domain_free, .attach_dev = mtk_iommu_attach_device, diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c @@ -1,4 +1,6 @@ /* + * IOMMU API for MTK architected m4u v1 implementations + * * Copyright (c) 2015-2016 MediaTek Inc. * Author: Honghui Zhang <honghui.zhang@mediatek.com> * @@ -35,7 +37,7 @@ #include <linux/spinlock.h> #include <asm/barrier.h> #include <asm/dma-iommu.h> -#include <linux/module.h> +#include <linux/init.h> #include <dt-bindings/memory/mt2701-larb-port.h> #include <soc/mediatek/smi.h> #include "mtk_iommu.h" @@ -206,7 +208,7 @@ static void mtk_iommu_config(struct mtk_iommu_data *data, { struct mtk_smi_larb_iommu *larb_mmu; unsigned int larbid, portid; - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); int i; for (i = 0; i < fwspec->num_ids; ++i) { @@ -271,7 +273,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv; + struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv; int ret; if (!data) @@ -293,7 +295,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, static void mtk_iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv; + struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv; if (!data) return; @@ -362,7 +364,7 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, return pa; } -static struct iommu_ops mtk_iommu_ops; +static const struct iommu_ops mtk_iommu_ops; /* * MTK generation one iommu HW only support one iommu domain, and all the client @@ -371,6 +373,7 @@ static struct iommu_ops mtk_iommu_ops; static int mtk_iommu_create_mapping(struct device *dev, struct of_phandle_args *args) { + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct mtk_iommu_data *data; struct platform_device *m4updev; struct dma_iommu_mapping *mtk_mapping; @@ -383,28 +386,29 @@ static int mtk_iommu_create_mapping(struct device *dev, return -EINVAL; } - if (!dev->iommu_fwspec) { + if (!fwspec) { ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_ops); if (ret) return ret; - } else if (dev->iommu_fwspec->ops != &mtk_iommu_ops) { + fwspec = dev_iommu_fwspec_get(dev); + } else if (dev_iommu_fwspec_get(dev)->ops != &mtk_iommu_ops) { return -EINVAL; } - if (!dev->iommu_fwspec->iommu_priv) { + if (!fwspec->iommu_priv) { /* Get the m4u device */ m4updev = of_find_device_by_node(args->np); if (WARN_ON(!m4updev)) return -EINVAL; - dev->iommu_fwspec->iommu_priv = platform_get_drvdata(m4updev); + fwspec->iommu_priv = platform_get_drvdata(m4updev); } ret = iommu_fwspec_add_ids(dev, args->args, 1); if (ret) return ret; - data = dev->iommu_fwspec->iommu_priv; + data = fwspec->iommu_priv; m4udev = data->dev; mtk_mapping = m4udev->archdata.iommu; if (!mtk_mapping) { @@ -422,6 +426,7 @@ static int mtk_iommu_create_mapping(struct device *dev, static int mtk_iommu_add_device(struct device *dev) { + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct dma_iommu_mapping *mtk_mapping; struct of_phandle_args iommu_spec; struct of_phandle_iterator it; @@ -440,7 +445,7 @@ static int mtk_iommu_add_device(struct device *dev) of_node_put(iommu_spec.np); } - if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops) + if (!fwspec || fwspec->ops != &mtk_iommu_ops) return -ENODEV; /* Not a iommu client device */ /* @@ -458,7 +463,7 @@ static int mtk_iommu_add_device(struct device *dev) if (err) return err; - data = dev->iommu_fwspec->iommu_priv; + data = fwspec->iommu_priv; mtk_mapping = data->dev->archdata.iommu; err = arm_iommu_attach_device(dev, mtk_mapping); if (err) { @@ -471,12 +476,13 @@ static int mtk_iommu_add_device(struct device *dev) static void mtk_iommu_remove_device(struct device *dev) { + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct mtk_iommu_data *data; - if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops) + if (!fwspec || fwspec->ops != &mtk_iommu_ops) return; - data = dev->iommu_fwspec->iommu_priv; + data = fwspec->iommu_priv; iommu_device_unlink(&data->iommu, dev); iommu_group_remove_device(dev); @@ -524,7 +530,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) return 0; } -static struct iommu_ops mtk_iommu_ops = { +static const struct iommu_ops mtk_iommu_ops = { .domain_alloc = mtk_iommu_domain_alloc, .domain_free = mtk_iommu_domain_free, .attach_dev = mtk_iommu_attach_device, @@ -704,15 +710,4 @@ static int __init m4u_init(void) { return platform_driver_register(&mtk_iommu_driver); } - -static void __exit m4u_exit(void) -{ - return platform_driver_unregister(&mtk_iommu_driver); -} - subsys_initcall(m4u_init); -module_exit(m4u_exit); - -MODULE_DESCRIPTION("IOMMU API for MTK architected m4u v1 implementations"); -MODULE_AUTHOR("Honghui Zhang <honghui.zhang@mediatek.com>"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c @@ -164,7 +164,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np) { const struct iommu_ops *ops = NULL; - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); int err = NO_IOMMU; if (!master_np) @@ -208,20 +208,24 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, } } + /* * Two success conditions can be represented by non-negative err here: * >0 : there is no IOMMU, or one was unavailable for non-fatal reasons * 0 : we found an IOMMU, and dev->fwspec is initialised appropriately * <0 : any actual error */ - if (!err) - ops = dev->iommu_fwspec->ops; + if (!err) { + /* The fwspec pointer changed, read it again */ + fwspec = dev_iommu_fwspec_get(dev); + ops = fwspec->ops; + } /* * If we have reason to believe the IOMMU driver missed the initial - * add_device callback for dev, replay it to get things in order. + * probe for dev, replay it to get things in order. */ - if (ops && ops->add_device && dev->bus && !dev->iommu_group) - err = ops->add_device(dev); + if (dev->bus && !device_iommu_mapped(dev)) + err = iommu_probe_device(dev); /* Ignore all other errors apart from EPROBE_DEFER */ if (err == -EPROBE_DEFER) { diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c @@ -159,7 +159,7 @@ static size_t omap_dump_tlb_entries(struct omap_iommu *obj, struct seq_file *s) return 0; } -static int debug_read_tlb(struct seq_file *s, void *data) +static int tlb_show(struct seq_file *s, void *data) { struct omap_iommu *obj = s->private; @@ -210,7 +210,7 @@ static void dump_ioptable(struct seq_file *s) spin_unlock(&obj->page_table_lock); } -static int debug_read_pagetable(struct seq_file *s, void *data) +static int pagetable_show(struct seq_file *s, void *data) { struct omap_iommu *obj = s->private; @@ -228,35 +228,22 @@ static int debug_read_pagetable(struct seq_file *s, void *data) return 0; } -#define DEBUG_SEQ_FOPS_RO(name) \ - static int debug_open_##name(struct inode *inode, struct file *file) \ - { \ - return single_open(file, debug_read_##name, inode->i_private); \ - } \ - \ - static const struct file_operations debug_##name##_fops = { \ - .open = debug_open_##name, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ - } - #define DEBUG_FOPS_RO(name) \ - static const struct file_operations debug_##name##_fops = { \ + static const struct file_operations name##_fops = { \ .open = simple_open, \ .read = debug_read_##name, \ .llseek = generic_file_llseek, \ } DEBUG_FOPS_RO(regs); -DEBUG_SEQ_FOPS_RO(tlb); -DEBUG_SEQ_FOPS_RO(pagetable); +DEFINE_SHOW_ATTRIBUTE(tlb); +DEFINE_SHOW_ATTRIBUTE(pagetable); #define __DEBUG_ADD_FILE(attr, mode) \ { \ struct dentry *dent; \ dent = debugfs_create_file(#attr, mode, obj->debug_dir, \ - obj, &debug_##attr##_fops); \ + obj, &attr##_fops); \ if (!dent) \ goto err; \ } diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c @@ -29,7 +29,7 @@ #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/kconfig.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/mutex.h> #include <linux/of.h> #include <linux/of_address.h> @@ -354,7 +354,8 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain) static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) { - struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec); + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec); struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); int ret; @@ -365,7 +366,7 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev /* Ensure that the domain is finalized */ pm_runtime_get_sync(qcom_iommu->dev); - ret = qcom_iommu_init_domain(domain, qcom_iommu, dev->iommu_fwspec); + ret = qcom_iommu_init_domain(domain, qcom_iommu, fwspec); pm_runtime_put_sync(qcom_iommu->dev); if (ret < 0) return ret; @@ -387,7 +388,7 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *dev) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec); struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); unsigned i; @@ -500,7 +501,7 @@ static bool qcom_iommu_capable(enum iommu_cap cap) static int qcom_iommu_add_device(struct device *dev) { - struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec); + struct qcom_iommu_dev *qcom_iommu = to_iommu(dev_iommu_fwspec_get(dev)); struct iommu_group *group; struct device_link *link; @@ -531,7 +532,7 @@ static int qcom_iommu_add_device(struct device *dev) static void qcom_iommu_remove_device(struct device *dev) { - struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec); + struct qcom_iommu_dev *qcom_iommu = to_iommu(dev_iommu_fwspec_get(dev)); if (!qcom_iommu) return; @@ -543,6 +544,7 @@ static void qcom_iommu_remove_device(struct device *dev) static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) { + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct qcom_iommu_dev *qcom_iommu; struct platform_device *iommu_pdev; unsigned asid = args->args[0]; @@ -568,14 +570,14 @@ static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) WARN_ON(asid > qcom_iommu->num_ctxs)) return -EINVAL; - if (!dev->iommu_fwspec->iommu_priv) { - dev->iommu_fwspec->iommu_priv = qcom_iommu; + if (!fwspec->iommu_priv) { + fwspec->iommu_priv = qcom_iommu; } else { /* make sure devices iommus dt node isn't referring to * multiple different iommu devices. Multiple context * banks are ok, but multiple devices are not: */ - if (WARN_ON(qcom_iommu != dev->iommu_fwspec->iommu_priv)) + if (WARN_ON(qcom_iommu != fwspec->iommu_priv)) return -EINVAL; } @@ -908,7 +910,6 @@ static const struct of_device_id qcom_iommu_of_match[] = { { .compatible = "qcom,msm-iommu-v1" }, { /* sentinel */ } }; -MODULE_DEVICE_TABLE(of, qcom_iommu_of_match); static struct platform_driver qcom_iommu_driver = { .driver = { @@ -934,15 +935,4 @@ static int __init qcom_iommu_init(void) return ret; } - -static void __exit qcom_iommu_exit(void) -{ - platform_driver_unregister(&qcom_iommu_driver); - platform_driver_unregister(&qcom_iommu_ctx_driver); -} - -module_init(qcom_iommu_init); -module_exit(qcom_iommu_exit); - -MODULE_DESCRIPTION("IOMMU API for QCOM IOMMU v1 implementations"); -MODULE_LICENSE("GPL v2"); +device_initcall(qcom_iommu_init); diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c @@ -1,4 +1,9 @@ /* + * IOMMU API for Rockchip + * + * Module Authors: Simon Xue <xxm@rock-chips.com> + * Daniel Kurtz <djkurtz@chromium.org> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -17,7 +22,7 @@ #include <linux/iopoll.h> #include <linux/list.h> #include <linux/mm.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/of.h> #include <linux/of_iommu.h> #include <linux/of_platform.h> @@ -1281,7 +1286,6 @@ static const struct of_device_id rk_iommu_dt_ids[] = { { .compatible = "rockchip,iommu" }, { /* sentinel */ } }; -MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids); static struct platform_driver rk_iommu_driver = { .probe = rk_iommu_probe, @@ -1299,8 +1303,3 @@ static int __init rk_iommu_init(void) return platform_driver_register(&rk_iommu_driver); } subsys_initcall(rk_iommu_init); - -MODULE_DESCRIPTION("IOMMU API for Rockchip"); -MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>"); -MODULE_ALIAS("platform:rockchip-iommu"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c @@ -3,6 +3,8 @@ * * Copyright (c) 2010-2012, NVIDIA CORPORATION. All rights reserved. * + * Author: Hiroshi DOYU <hdoyu@nvidia.com> + * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. @@ -19,7 +21,8 @@ #define pr_fmt(fmt) "%s(): " fmt, __func__ -#include <linux/module.h> +#include <linux/init.h> +#include <linux/moduleparam.h> #include <linux/platform_device.h> #include <linux/spinlock.h> #include <linux/slab.h> @@ -478,20 +481,6 @@ static int tegra_gart_probe(struct platform_device *pdev) return 0; } -static int tegra_gart_remove(struct platform_device *pdev) -{ - struct gart_device *gart = platform_get_drvdata(pdev); - - iommu_device_unregister(&gart->iommu); - iommu_device_sysfs_remove(&gart->iommu); - - writel(0, gart->regs + GART_CONFIG); - if (gart->savedata) - vfree(gart->savedata); - gart_handle = NULL; - return 0; -} - static const struct dev_pm_ops tegra_gart_pm_ops = { .suspend = tegra_gart_suspend, .resume = tegra_gart_resume, @@ -501,34 +490,22 @@ static const struct of_device_id tegra_gart_of_match[] = { { .compatible = "nvidia,tegra20-gart", }, { }, }; -MODULE_DEVICE_TABLE(of, tegra_gart_of_match); static struct platform_driver tegra_gart_driver = { .probe = tegra_gart_probe, - .remove = tegra_gart_remove, .driver = { .name = "tegra-gart", .pm = &tegra_gart_pm_ops, .of_match_table = tegra_gart_of_match, + .suppress_bind_attrs = true, }, }; -static int tegra_gart_init(void) +static int __init tegra_gart_init(void) { return platform_driver_register(&tegra_gart_driver); } - -static void __exit tegra_gart_exit(void) -{ - platform_driver_unregister(&tegra_gart_driver); -} - subsys_initcall(tegra_gart_init); -module_exit(tegra_gart_exit); -module_param(gart_debug, bool, 0644); +module_param(gart_debug, bool, 0644); MODULE_PARM_DESC(gart_debug, "Enable GART debugging"); -MODULE_DESCRIPTION("IOMMU API for GART in Tegra20"); -MODULE_AUTHOR("Hiroshi DOYU <hdoyu@nvidia.com>"); -MODULE_ALIAS("platform:tegra-gart"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c @@ -846,7 +846,7 @@ static struct iommu_group *tegra_smmu_group_get(struct tegra_smmu *smmu, static struct iommu_group *tegra_smmu_device_group(struct device *dev) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct tegra_smmu *smmu = dev->archdata.iommu; struct iommu_group *group; @@ -926,17 +926,7 @@ static int tegra_smmu_swgroups_show(struct seq_file *s, void *data) return 0; } -static int tegra_smmu_swgroups_open(struct inode *inode, struct file *file) -{ - return single_open(file, tegra_smmu_swgroups_show, inode->i_private); -} - -static const struct file_operations tegra_smmu_swgroups_fops = { - .open = tegra_smmu_swgroups_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(tegra_smmu_swgroups); static int tegra_smmu_clients_show(struct seq_file *s, void *data) { @@ -964,17 +954,7 @@ static int tegra_smmu_clients_show(struct seq_file *s, void *data) return 0; } -static int tegra_smmu_clients_open(struct inode *inode, struct file *file) -{ - return single_open(file, tegra_smmu_clients_show, inode->i_private); -} - -static const struct file_operations tegra_smmu_clients_fops = { - .open = tegra_smmu_clients_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients); static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu) { diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c @@ -15,7 +15,7 @@ * Intel SCIF driver. * */ -#include <linux/dma_remapping.h> +#include <linux/intel-iommu.h> #include <linux/pagemap.h> #include <linux/sched/mm.h> #include <linux/sched/signal.h> diff --git a/drivers/misc/mic/scif/scif_rma.h b/drivers/misc/mic/scif/scif_rma.h @@ -53,7 +53,7 @@ #ifndef SCIF_RMA_H #define SCIF_RMA_H -#include <linux/dma_remapping.h> +#include <linux/intel-iommu.h> #include <linux/mmu_notifier.h> #include "../bus/scif_bus.h" diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c @@ -245,7 +245,7 @@ static void xhci_zero_64b_regs(struct xhci_hcd *xhci) * an iommu. Doing anything when there is no iommu is definitely * unsafe... */ - if (!(xhci->quirks & XHCI_ZERO_64B_REGS) || !dev->iommu_group) + if (!(xhci->quirks & XHCI_ZERO_64B_REGS) || !device_iommu_mapped(dev)) return; xhci_info(xhci, "Zeroing 64bit base registers, expecting fault\n"); diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c @@ -978,32 +978,6 @@ unlock: return ret; } -/* - * Turns out AMD IOMMU has a page table bug where it won't map large pages - * to a region that previously mapped smaller pages. This should be fixed - * soon, so this is just a temporary workaround to break mappings down into - * PAGE_SIZE. Better to map smaller pages than nothing. - */ -static int map_try_harder(struct vfio_domain *domain, dma_addr_t iova, - unsigned long pfn, long npage, int prot) -{ - long i; - int ret = 0; - - for (i = 0; i < npage; i++, pfn++, iova += PAGE_SIZE) { - ret = iommu_map(domain->domain, iova, - (phys_addr_t)pfn << PAGE_SHIFT, - PAGE_SIZE, prot | domain->prot); - if (ret) - break; - } - - for (; i < npage && i > 0; i--, iova -= PAGE_SIZE) - iommu_unmap(domain->domain, iova, PAGE_SIZE); - - return ret; -} - static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, unsigned long pfn, long npage, int prot) { @@ -1013,11 +987,8 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, list_for_each_entry(d, &iommu->domain_list, next) { ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, npage << PAGE_SHIFT, prot | d->prot); - if (ret) { - if (ret != -EBUSY || - map_try_harder(d, iova, pfn, npage, prot)) - goto unwind; - } + if (ret) + goto unwind; cond_resched(); } diff --git a/include/linux/device.h b/include/linux/device.h @@ -1058,6 +1058,16 @@ static inline struct device *kobj_to_dev(struct kobject *kobj) return container_of(kobj, struct device, kobj); } +/** + * device_iommu_mapped - Returns true when the device DMA is translated + * by an IOMMU + * @dev: Device to perform the check on + */ +static inline bool device_iommu_mapped(struct device *dev) +{ + return (dev->iommu_group != NULL); +} + /* Get the wakeup routines, which depend on struct device */ #include <linux/pm_wakeup.h> diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _DMA_REMAPPING_H -#define _DMA_REMAPPING_H - -/* - * VT-d hardware uses 4KiB page size regardless of host page size. - */ -#define VTD_PAGE_SHIFT (12) -#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT) -#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) -#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) - -#define VTD_STRIDE_SHIFT (9) -#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) - -#define DMA_PTE_READ (1) -#define DMA_PTE_WRITE (2) -#define DMA_PTE_LARGE_PAGE (1 << 7) -#define DMA_PTE_SNP (1 << 11) - -#define CONTEXT_TT_MULTI_LEVEL 0 -#define CONTEXT_TT_DEV_IOTLB 1 -#define CONTEXT_TT_PASS_THROUGH 2 -/* Extended context entry types */ -#define CONTEXT_TT_PT_PASID 4 -#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5 -#define CONTEXT_TT_MASK (7ULL << 2) - -#define CONTEXT_DINVE (1ULL << 8) -#define CONTEXT_PRS (1ULL << 9) -#define CONTEXT_PASIDE (1ULL << 11) - -struct intel_iommu; -struct dmar_domain; -struct root_entry; - - -#ifdef CONFIG_INTEL_IOMMU -extern int iommu_calculate_agaw(struct intel_iommu *iommu); -extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); -extern int dmar_disabled; -extern int intel_iommu_enabled; -extern int intel_iommu_tboot_noforce; -#else -static inline int iommu_calculate_agaw(struct intel_iommu *iommu) -{ - return 0; -} -static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) -{ - return 0; -} -#define dmar_disabled (1) -#define intel_iommu_enabled (0) -#endif - - -#endif diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h @@ -26,7 +26,6 @@ #include <linux/iova.h> #include <linux/io.h> #include <linux/idr.h> -#include <linux/dma_remapping.h> #include <linux/mmu_notifier.h> #include <linux/list.h> #include <linux/iommu.h> @@ -37,9 +36,29 @@ #include <asm/iommu.h> /* - * Intel IOMMU register specification per version 1.0 public spec. + * VT-d hardware uses 4KiB page size regardless of host page size. */ +#define VTD_PAGE_SHIFT (12) +#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT) +#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) +#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) + +#define VTD_STRIDE_SHIFT (9) +#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) + +#define DMA_PTE_READ (1) +#define DMA_PTE_WRITE (2) +#define DMA_PTE_LARGE_PAGE (1 << 7) +#define DMA_PTE_SNP (1 << 11) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define CONTEXT_TT_DEV_IOTLB 1 +#define CONTEXT_TT_PASS_THROUGH 2 +#define CONTEXT_PASIDE BIT_ULL(3) + +/* + * Intel IOMMU register specification per version 1.0 public spec. + */ #define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ #define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ #define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ @@ -151,6 +170,10 @@ * Extended Capability Register */ +#define ecap_smpwc(e) (((e) >> 48) & 0x1) +#define ecap_flts(e) (((e) >> 47) & 0x1) +#define ecap_slts(e) (((e) >> 46) & 0x1) +#define ecap_smts(e) (((e) >> 43) & 0x1) #define ecap_dit(e) ((e >> 41) & 0x1) #define ecap_pasid(e) ((e >> 40) & 0x1) #define ecap_pss(e) ((e >> 35) & 0x1f) @@ -229,6 +252,7 @@ /* DMA_RTADDR_REG */ #define DMA_RTADDR_RTT (((u64)1) << 11) +#define DMA_RTADDR_SMT (((u64)1) << 10) /* CCMD_REG */ #define DMA_CCMD_ICC (((u64)1) << 63) @@ -374,13 +398,18 @@ enum { #define QI_GRAN_NONG_PASID 2 #define QI_GRAN_PSI_PASID 3 +#define qi_shift(iommu) (DMAR_IQ_SHIFT + !!ecap_smts((iommu)->ecap)) + struct qi_desc { - u64 low, high; + u64 qw0; + u64 qw1; + u64 qw2; + u64 qw3; }; struct q_inval { raw_spinlock_t q_lock; - struct qi_desc *desc; /* invalidation queue */ + void *desc; /* invalidation queue */ int *desc_status; /* desc status */ int free_head; /* first free entry */ int free_tail; /* last free entry */ @@ -512,15 +541,8 @@ struct intel_iommu { struct iommu_flush flush; #endif #ifdef CONFIG_INTEL_IOMMU_SVM - /* These are large and need to be contiguous, so we allocate just - * one for now. We'll maybe want to rethink that if we truly give - * devices away to userspace processes (e.g. for DPDK) and don't - * want to trust that userspace will use *only* the PASID it was - * told to. But while it's all driver-arbitrated, we're fine. */ - struct pasid_state_entry *pasid_state_table; struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ - u32 pasid_max; #endif struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ @@ -563,6 +585,49 @@ static inline void __iommu_flush_cache( clflush_cache_range(addr, size); } +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-10: available + * 11: snoop behavior + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; + +static inline void dma_clear_pte(struct dma_pte *pte) +{ + pte->val = 0; +} + +static inline u64 dma_pte_addr(struct dma_pte *pte) +{ +#ifdef CONFIG_64BIT + return pte->val & VTD_PAGE_MASK; +#else + /* Must have a full atomic 64-bit read */ + return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; +#endif +} + +static inline bool dma_pte_present(struct dma_pte *pte) +{ + return (pte->val & 3) != 0; +} + +static inline bool dma_pte_superpage(struct dma_pte *pte) +{ + return (pte->val & DMA_PTE_LARGE_PAGE); +} + +static inline int first_pte_in_page(struct dma_pte *pte) +{ + return !((unsigned long)pte & ~VTD_PAGE_MASK); +} + extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); @@ -587,10 +652,10 @@ void free_pgtable_page(void *vaddr); struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data); +void iommu_flush_write_buffer(struct intel_iommu *iommu); #ifdef CONFIG_INTEL_IOMMU_SVM int intel_svm_init(struct intel_iommu *iommu); -int intel_svm_exit(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); @@ -632,4 +697,23 @@ bool context_present(struct context_entry *context); struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, u8 devfn, int alloc); +#ifdef CONFIG_INTEL_IOMMU +extern int iommu_calculate_agaw(struct intel_iommu *iommu); +extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); +extern int dmar_disabled; +extern int intel_iommu_enabled; +extern int intel_iommu_tboot_noforce; +#else +static inline int iommu_calculate_agaw(struct intel_iommu *iommu) +{ + return 0; +} +static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) +{ + return 0; +} +#define dmar_disabled (1) +#define intel_iommu_enabled (0) +#endif + #endif diff --git a/include/linux/iommu.h b/include/linux/iommu.h @@ -168,8 +168,8 @@ struct iommu_resv_region { * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain - * @tlb_range_add: Add a given iova range to the flush queue for this domain - * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush + * @iotlb_range_add: Add a given iova range to the flush queue for this domain + * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue * @iova_to_phys: translate iova to physical address * @add_device: add device to iommu grouping @@ -398,6 +398,20 @@ void iommu_fwspec_free(struct device *dev); int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids); const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); +static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) +{ + return dev->iommu_fwspec; +} + +static inline void dev_iommu_fwspec_set(struct device *dev, + struct iommu_fwspec *fwspec) +{ + dev->iommu_fwspec = fwspec; +} + +int iommu_probe_device(struct device *dev); +void iommu_release_device(struct device *dev); + #else /* CONFIG_IOMMU_API */ struct iommu_ops {};