whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 528985117126f11beea339cf39120ee99da04cd2
parent 84df9525b0c27f3ebc2ebb1864fa62a97fdedb7d
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Mon, 22 Oct 2018 17:30:06 +0100

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Catalin Marinas:
 "Apart from some new arm64 features and clean-ups, this also contains
  the core mmu_gather changes for tracking the levels of the page table
  being cleared and a minor update to the generic
  compat_sys_sigaltstack() introducing COMPAT_SIGMINSKSZ.

  Summary:

   - Core mmu_gather changes which allow tracking the levels of
     page-table being cleared together with the arm64 low-level flushing
     routines

   - Support for the new ARMv8.5 PSTATE.SSBS bit which can be used to
     mitigate Spectre-v4 dynamically without trapping to EL3 firmware

   - Introduce COMPAT_SIGMINSTKSZ for use in compat_sys_sigaltstack

   - Optimise emulation of MRS instructions to ID_* registers on ARMv8.4

   - Support for Common Not Private (CnP) translations allowing threads
     of the same CPU to share the TLB entries

   - Accelerated crc32 routines

   - Move swapper_pg_dir to the rodata section

   - Trap WFI instruction executed in user space

   - ARM erratum 1188874 workaround (arch_timer)

   - Miscellaneous fixes and clean-ups"

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (78 commits)
  arm64: KVM: Guests can skip __install_bp_hardening_cb()s HYP work
  arm64: cpufeature: Trap CTR_EL0 access only where it is necessary
  arm64: cpufeature: Fix handling of CTR_EL0.IDC field
  arm64: cpufeature: ctr: Fix cpu capability check for late CPUs
  Documentation/arm64: HugeTLB page implementation
  arm64: mm: Use __pa_symbol() for set_swapper_pgd()
  arm64: Add silicon-errata.txt entry for ARM erratum 1188873
  Revert "arm64: uaccess: implement unsafe accessors"
  arm64: mm: Drop the unused cpu parameter
  MAINTAINERS: fix bad sdei paths
  arm64: mm: Use #ifdef for the __PAGETABLE_P?D_FOLDED defines
  arm64: Fix typo in a comment in arch/arm64/mm/kasan_init.c
  arm64: xen: Use existing helper to check interrupt status
  arm64: Use daifflag_restore after bp_hardening
  arm64: daifflags: Use irqflags functions for daifflags
  arm64: arch_timer: avoid unused function warning
  arm64: Trap WFI executed in userspace
  arm64: docs: Document SSBS HWCAP
  arm64: docs: Fix typos in ELF hwcaps
  arm64/kprobes: remove an extra semicolon in arch_prepare_kprobe
  ...

Diffstat:
MDocumentation/arm64/elf_hwcaps.txt | 12++++++++----
ADocumentation/arm64/hugetlbpage.txt | 38++++++++++++++++++++++++++++++++++++++
MDocumentation/arm64/silicon-errata.txt | 1+
MMAINTAINERS | 17+++++++++++++++--
March/arm/include/asm/kvm_arm.h | 1+
March/arm/include/asm/kvm_mmu.h | 5+++++
March/arm64/Kconfig | 34++++++++++++++++++++++++++++++----
March/arm64/include/asm/assembler.h | 7+++----
March/arm64/include/asm/cache.h | 40++++++++++++++++++++++++++++++++++++++++
March/arm64/include/asm/compat.h | 1+
Darch/arm64/include/asm/compiler.h | 30------------------------------
March/arm64/include/asm/cpucaps.h | 7+++++--
March/arm64/include/asm/cpufeature.h | 9++++++++-
March/arm64/include/asm/cputype.h | 2++
March/arm64/include/asm/daifflags.h | 15+++++----------
March/arm64/include/asm/esr.h | 77+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
March/arm64/include/asm/kernel-pgtable.h | 2+-
March/arm64/include/asm/kvm_arm.h | 1+
March/arm64/include/asm/kvm_emulate.h | 2+-
March/arm64/include/asm/kvm_host.h | 11+++++++++++
March/arm64/include/asm/kvm_mmu.h | 5+++++
March/arm64/include/asm/mmu.h | 3+++
March/arm64/include/asm/mmu_context.h | 17+++++++++++++++--
March/arm64/include/asm/page.h | 2--
March/arm64/include/asm/pgtable-hwdef.h | 2++
March/arm64/include/asm/pgtable.h | 48+++++++++++++++++++++++++++++++++++++++++-------
March/arm64/include/asm/processor.h | 11+++++++----
March/arm64/include/asm/ptrace.h | 1+
March/arm64/include/asm/sysreg.h | 44++++++++++++++++++++++++++++++++------------
March/arm64/include/asm/tlb.h | 34+++++++++++++---------------------
March/arm64/include/asm/tlbflush.h | 112+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
March/arm64/include/asm/uaccess.h | 1-
March/arm64/include/asm/xen/events.h | 2+-
March/arm64/include/uapi/asm/hwcap.h | 1+
March/arm64/include/uapi/asm/ptrace.h | 1+
March/arm64/kernel/cpu_errata.c | 96++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
March/arm64/kernel/cpufeature.c | 195++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
March/arm64/kernel/cpuinfo.c | 11++++++++++-
March/arm64/kernel/entry.S | 18+++++++++++++++---
March/arm64/kernel/head.S | 40++++++++++++++++++++++------------------
March/arm64/kernel/probes/kprobes.c | 2+-
March/arm64/kernel/process.c | 4++++
March/arm64/kernel/psci.c | 1-
March/arm64/kernel/setup.c | 4----
March/arm64/kernel/sleep.S | 1+
March/arm64/kernel/ssbd.c | 24+++++++++++++++++++++---
March/arm64/kernel/suspend.c | 4++++
March/arm64/kernel/traps.c | 211+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
March/arm64/kernel/vmlinux.lds.S | 35++++++++++++++++++++---------------
March/arm64/kvm/hyp-init.S | 3+++
March/arm64/kvm/hyp/sysreg-sr.c | 11+++++++++++
March/arm64/lib/Makefile | 4+++-
Aarch/arm64/lib/crc32.S | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
March/arm64/mm/context.c | 11+++++++----
March/arm64/mm/dump.c | 6+++---
March/arm64/mm/fault.c | 38+++++++++++++++-----------------------
March/arm64/mm/init.c | 2--
March/arm64/mm/kasan_init.c | 2+-
March/arm64/mm/mmu.c | 46++++++++++++++++++++++++----------------------
March/arm64/mm/numa.c | 13++++++++-----
March/arm64/mm/proc.S | 11++++++++---
Mdrivers/clocksource/arm_arch_timer.c | 15+++++++++++++++
Mdrivers/perf/arm_pmu_platform.c | 6+++---
Minclude/asm-generic/tlb.h | 86++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Minclude/linux/compat.h | 3+++
Mkernel/signal.c | 14+++++++++-----
Mlib/crc32.c | 11+++++++----
Mmm/Makefile | 6+++---
Mmm/memory.c | 247-------------------------------------------------------------------------------
Amm/mmu_gather.c | 261+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mvirt/kvm/arm/arm.c | 4++--
71 files changed, 1511 insertions(+), 591 deletions(-)

diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt @@ -78,11 +78,11 @@ HWCAP_EVTSTRM HWCAP_AES - Functionality implied by ID_AA64ISAR1_EL1.AES == 0b0001. + Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0001. HWCAP_PMULL - Functionality implied by ID_AA64ISAR1_EL1.AES == 0b0010. + Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0010. HWCAP_SHA1 @@ -153,7 +153,7 @@ HWCAP_ASIMDDP HWCAP_SHA512 - Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0002. + Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0010. HWCAP_SVE @@ -173,8 +173,12 @@ HWCAP_USCAT HWCAP_ILRCPC - Functionality implied by ID_AA64ISR1_EL1.LRCPC == 0b0002. + Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010. HWCAP_FLAGM Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001. + +HWCAP_SSBS + + Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010. diff --git a/Documentation/arm64/hugetlbpage.txt b/Documentation/arm64/hugetlbpage.txt @@ -0,0 +1,38 @@ +HugeTLBpage on ARM64 +==================== + +Hugepage relies on making efficient use of TLBs to improve performance of +address translations. The benefit depends on both - + + - the size of hugepages + - size of entries supported by the TLBs + +The ARM64 port supports two flavours of hugepages. + +1) Block mappings at the pud/pmd level +-------------------------------------- + +These are regular hugepages where a pmd or a pud page table entry points to a +block of memory. Regardless of the supported size of entries in TLB, block +mappings reduce the depth of page table walk needed to translate hugepage +addresses. + +2) Using the Contiguous bit +--------------------------- + +The architecture provides a contiguous bit in the translation table entries +(D4.5.3, ARM DDI 0487C.a) that hints to the MMU to indicate that it is one of a +contiguous set of entries that can be cached in a single TLB entry. + +The contiguous bit is used in Linux to increase the mapping size at the pmd and +pte (last) level. The number of supported contiguous entries varies by page size +and level of the page table. + + +The following hugepage sizes are supported - + + CONT PTE PMD CONT PMD PUD + -------- --- -------- --- + 4K: 64K 2M 32M 1G + 16K: 2M 32M 1G + 64K: 2M 512M 16G diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt @@ -56,6 +56,7 @@ stable kernels. | ARM | Cortex-A72 | #853709 | N/A | | ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 | | ARM | Cortex-A55 | #1024718 | ARM64_ERRATUM_1024718 | +| ARM | Cortex-A76 | #1188873 | ARM64_ERRATUM_1188873 | | ARM | MMU-500 | #841119,#826419 | N/A | | | | | | | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | diff --git a/MAINTAINERS b/MAINTAINERS @@ -9712,6 +9712,19 @@ S: Maintained F: arch/arm/boot/dts/mmp* F: arch/arm/mach-mmp/ +MMU GATHER AND TLB INVALIDATION +M: Will Deacon <will.deacon@arm.com> +M: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> +M: Andrew Morton <akpm@linux-foundation.org> +M: Nick Piggin <npiggin@gmail.com> +M: Peter Zijlstra <peterz@infradead.org> +L: linux-arch@vger.kernel.org +L: linux-mm@kvack.org +S: Maintained +F: arch/*/include/asm/tlb.h +F: include/asm-generic/tlb.h +F: mm/mmu_gather.c + MN88472 MEDIA DRIVER M: Antti Palosaari <crope@iki.fi> L: linux-media@vger.kernel.org @@ -13502,8 +13515,8 @@ L: linux-arm-kernel@lists.infradead.org S: Maintained F: Documentation/devicetree/bindings/arm/firmware/sdei.txt F: drivers/firmware/arm_sdei.c -F: include/linux/sdei.h -F: include/uapi/linux/sdei.h +F: include/linux/arm_sdei.h +F: include/uapi/linux/arm_sdei.h SOFTWARE RAID (Multiple Disks) SUPPORT M: Shaohua Li <shli@kernel.org> diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h @@ -161,6 +161,7 @@ #else #define VTTBR_X (5 - KVM_T0SZ) #endif +#define VTTBR_CNP_BIT _AC(1, UL) #define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT _AC(48, ULL) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h @@ -355,6 +355,11 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) (addr) +static inline bool kvm_cpu_has_cnp(void) +{ + return false; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig @@ -75,6 +75,7 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK select CPU_PM if (SUSPEND || CPU_IDLE) + select CRC32 select DCACHE_WORD_ACCESS select DMA_DIRECT_OPS select EDAC_SUPPORT @@ -142,6 +143,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE + select HAVE_RCU_TABLE_INVALIDATE select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS @@ -479,6 +481,19 @@ config ARM64_ERRATUM_1024718 If unsure, say Y. +config ARM64_ERRATUM_1188873 + bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result" + default y + select ARM_ARCH_TIMER_OOL_WORKAROUND + help + This option adds work arounds for ARM Cortex-A76 erratum 1188873 + + Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could cause + register corruption when accessing the timer registers from + AArch32 userspace. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -769,9 +784,6 @@ source kernel/Kconfig.hz config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y -config ARCH_HAS_HOLES_MEMORYMODEL - def_bool y if SPARSEMEM - config ARCH_SPARSEMEM_ENABLE def_bool y select SPARSEMEM_VMEMMAP_ENABLE @@ -786,7 +798,7 @@ config ARCH_FLATMEM_ENABLE def_bool !NUMA config HAVE_ARCH_PFN_VALID - def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM + def_bool y config HW_PERF_EVENTS def_bool y @@ -1132,6 +1144,20 @@ config ARM64_RAS_EXTN and access the new registers if the system supports the extension. Platform RAS features may additionally depend on firmware support. +config ARM64_CNP + bool "Enable support for Common Not Private (CNP) translations" + default y + depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN + help + Common Not Private (CNP) allows translation table entries to + be shared between different PEs in the same inner shareable + domain, so the hardware can use this fact to optimise the + caching of such entries in the TLB. + + Selecting this option allows the CNP feature to be detected + at runtime, and does not affect PEs that do not implement + this feature. + endmenu config ARM64_SVE diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h @@ -286,12 +286,11 @@ alternative_endif ldr \rd, [\rn, #MM_CONTEXT_ID] .endm /* - * read_ctr - read CTR_EL0. If the system has mismatched - * cache line sizes, provide the system wide safe value - * from arm64_ftr_reg_ctrel0.sys_val + * read_ctr - read CTR_EL0. If the system has mismatched register fields, + * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val */ .macro read_ctr, reg -alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE +alternative_if_not ARM64_MISMATCHED_CACHE_TYPE mrs \reg, ctr_el0 // read CTR nop alternative_else diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h @@ -40,6 +40,15 @@ #define L1_CACHE_SHIFT (6) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#define CLIDR_LOUU_SHIFT 27 +#define CLIDR_LOC_SHIFT 24 +#define CLIDR_LOUIS_SHIFT 21 + +#define CLIDR_LOUU(clidr) (((clidr) >> CLIDR_LOUU_SHIFT) & 0x7) +#define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7) +#define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7) + /* * Memory returned by kmalloc() may be used for DMA, so we must make * sure that all such allocations are cache aligned. Otherwise, @@ -84,6 +93,37 @@ static inline int cache_line_size(void) return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } +/* + * Read the effective value of CTR_EL0. + * + * According to ARM ARM for ARMv8-A (ARM DDI 0487C.a), + * section D10.2.33 "CTR_EL0, Cache Type Register" : + * + * CTR_EL0.IDC reports the data cache clean requirements for + * instruction to data coherence. + * + * 0 - dcache clean to PoU is required unless : + * (CLIDR_EL1.LoC == 0) || (CLIDR_EL1.LoUIS == 0 && CLIDR_EL1.LoUU == 0) + * 1 - dcache clean to PoU is not required for i-to-d coherence. + * + * This routine provides the CTR_EL0 with the IDC field updated to the + * effective state. + */ +static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void) +{ + u32 ctr = read_cpuid_cachetype(); + + if (!(ctr & BIT(CTR_IDC_SHIFT))) { + u64 clidr = read_sysreg(clidr_el1); + + if (CLIDR_LOC(clidr) == 0 || + (CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0)) + ctr |= BIT(CTR_IDC_SHIFT); + } + + return ctr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h @@ -159,6 +159,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) } #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) +#define COMPAT_MINSIGSTKSZ 2048 static inline void __user *arch_compat_alloc_user_space(long len) { diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h @@ -1,30 +0,0 @@ -/* - * Based on arch/arm/include/asm/compiler.h - * - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_COMPILER_H -#define __ASM_COMPILER_H - -/* - * This is used to ensure the compiler did actually allocate the register we - * asked it for some inline assembly sequences. Apparently we can't trust the - * compiler from one version to another so a bit of paranoia won't hurt. This - * string is meant to be concatenated with the inline asm string and will - * cause compilation to stop on mismatch. (for details, see gcc PR 15089) - */ -#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" - -#endif /* __ASM_COMPILER_H */ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h @@ -33,7 +33,7 @@ #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 #define ARM64_HARDEN_EL2_VECTORS 14 -#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 +#define ARM64_HAS_CNP 15 #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 @@ -51,7 +51,10 @@ #define ARM64_SSBD 30 #define ARM64_MISMATCHED_CACHE_TYPE 31 #define ARM64_HAS_STAGE2_FWB 32 +#define ARM64_HAS_CRC32 33 +#define ARM64_SSBS 34 +#define ARM64_WORKAROUND_1188873 35 -#define ARM64_NCAPS 33 +#define ARM64_NCAPS 36 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h @@ -262,7 +262,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; /* * CPU feature detected at boot time based on system-wide value of a * feature. It is safe for a late CPU to have this feature even though - * the system hasn't enabled it, although the featuer will not be used + * the system hasn't enabled it, although the feature will not be used * by Linux in this case. If the system has enabled this feature already, * then every late CPU must have it. */ @@ -508,6 +508,12 @@ static inline bool system_supports_sve(void) cpus_have_const_cap(ARM64_SVE); } +static inline bool system_supports_cnp(void) +{ + return IS_ENABLED(CONFIG_ARM64_CNP) && + cpus_have_const_cap(ARM64_HAS_CNP); +} + #define ARM64_SSBD_UNKNOWN -1 #define ARM64_SSBD_FORCE_DISABLE 0 #define ARM64_SSBD_KERNEL 1 @@ -530,6 +536,7 @@ void arm64_set_ssbd_mitigation(bool state); static inline void arm64_set_ssbd_mitigation(bool state) {} #endif +extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define ARM_CPU_PART_CORTEX_A75 0xD0A #define ARM_CPU_PART_CORTEX_A35 0xD04 #define ARM_CPU_PART_CORTEX_A55 0xD05 +#define ARM_CPU_PART_CORTEX_A76 0xD0B #define APM_CPU_PART_POTENZA 0x000 @@ -110,6 +111,7 @@ #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) #define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) +#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h @@ -36,11 +36,8 @@ static inline unsigned long local_daif_save(void) { unsigned long flags; - asm volatile( - "mrs %0, daif // local_daif_save\n" - : "=r" (flags) - : - : "memory"); + flags = arch_local_save_flags(); + local_daif_mask(); return flags; @@ -60,11 +57,9 @@ static inline void local_daif_restore(unsigned long flags) { if (!arch_irqs_disabled_flags(flags)) trace_hardirqs_on(); - asm volatile( - "msr daif, %0 // local_daif_restore" - : - : "r" (flags) - : "memory"); + + arch_local_irq_restore(flags); + if (arch_irqs_disabled_flags(flags)) trace_hardirqs_off(); } diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h @@ -137,6 +137,8 @@ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_TI (UL(1) << 0) +#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0) #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) #define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) @@ -148,6 +150,9 @@ #define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC) /* ESR value templates for specific events */ +#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | ESR_ELx_WFx_ISS_TI) +#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \ + ESR_ELx_WFx_ISS_WFI) /* BRK instruction trap from AArch64 state */ #define ESR_ELx_VAL_BRK64(imm) \ @@ -187,6 +192,8 @@ #define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \ ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_RT(esr) \ + (((esr) & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT) /* * User space cache operations have the following sysreg encoding * in System instructions. @@ -206,6 +213,18 @@ #define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ ESR_ELx_SYS64_ISS_DIR_WRITE) +/* + * User space MRS operations which are supported for emulation + * have the following sysreg encoding in System instructions. + * op0 = 3, op1= 0, crn = 0, {crm = 0, 4-7}, READ (L = 1) + */ +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(3, 0, 0, 0, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) #define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0) #define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ @@ -249,6 +268,64 @@ #define ESR_ELx_FP_EXC_TFV (UL(1) << 23) +/* + * ISS field definitions for CP15 accesses + */ +#define ESR_ELx_CP15_32_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_32_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_32_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_32_ISS_RT_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_32_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRM_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRN_SHIFT 10 +#define ESR_ELx_CP15_32_ISS_CRN_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP1_SHIFT 14 +#define ESR_ELx_CP15_32_ISS_OP1_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP2_SHIFT 17 +#define ESR_ELx_CP15_32_ISS_OP2_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) + +#define ESR_ELx_CP15_32_ISS_SYS_MASK (ESR_ELx_CP15_32_ISS_OP1_MASK | \ + ESR_ELx_CP15_32_ISS_OP2_MASK | \ + ESR_ELx_CP15_32_ISS_CRN_MASK | \ + ESR_ELx_CP15_32_ISS_CRM_MASK | \ + ESR_ELx_CP15_32_ISS_DIR_MASK) +#define ESR_ELx_CP15_32_ISS_SYS_VAL(op1, op2, crn, crm) \ + (((op1) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_CP15_32_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_64_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT_SHIFT) + +#define ESR_ELx_CP15_64_ISS_RT2_SHIFT 10 +#define ESR_ELx_CP15_64_ISS_RT2_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT2_SHIFT) + +#define ESR_ELx_CP15_64_ISS_OP1_SHIFT 16 +#define ESR_ELx_CP15_64_ISS_OP1_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_64_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_CRM_SHIFT) + +#define ESR_ELx_CP15_64_ISS_SYS_VAL(op1, crm) \ + (((op1) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) | \ + ((crm) << ESR_ELx_CP15_64_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_SYS_MASK (ESR_ELx_CP15_64_ISS_OP1_MASK | \ + ESR_ELx_CP15_64_ISS_CRM_MASK | \ + ESR_ELx_CP15_64_ISS_DIR_MASK) + +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + +#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ + ESR_ELx_CP15_32_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h @@ -97,7 +97,7 @@ + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define SWAPPER_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) #ifdef CONFIG_ARM64_SW_TTBR0_PAN diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h @@ -175,6 +175,7 @@ #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) +#define VTTBR_CNP_BIT (UL(1)) #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT (UL(48)) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h @@ -335,7 +335,7 @@ static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); - return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + return ESR_ELx_SYS64_ISS_RT(esr); } static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h @@ -387,6 +387,8 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); +void __kvm_enable_ssbs(void); + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) @@ -407,6 +409,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, */ BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); + + /* + * Disabling SSBD on a non-VHE system requires us to enable SSBS + * at EL2. + */ + if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) && + arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + kvm_call_hyp(__kvm_enable_ssbs); + } } static inline bool kvm_arch_check_sve_has_vhe(void) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h @@ -517,5 +517,10 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) +static inline bool kvm_cpu_has_cnp(void) +{ + return system_supports_cnp(); +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h @@ -95,5 +95,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, extern void *fixmap_remap_fdt(phys_addr_t dt_phys); extern void mark_linear_text_alias_ro(void); +#define INIT_MM_CONTEXT(name) \ + .pgd = init_pg_dir, + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h @@ -147,12 +147,25 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) extern ttbr_replace_func idmap_cpu_replace_ttbr1; ttbr_replace_func *replace_phys; - phys_addr_t pgd_phys = virt_to_phys(pgdp); + /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */ + phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); + + if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) { + /* + * cpu_replace_ttbr1() is used when there's a boot CPU + * up (i.e. cpufeature framework is not up yet) and + * latter only when we enable CNP via cpufeature's + * enable() callback. + * Also we rely on the cpu_hwcap bit being set before + * calling the enable() function. + */ + ttbr1 |= TTBR_CNP_BIT; + } replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); cpu_install_idmap(); - replace_phys(pgd_phys); + replace_phys(ttbr1); cpu_uninstall_idmap(); } diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h @@ -37,9 +37,7 @@ extern void clear_page(void *to); typedef struct page *pgtable_t; -#ifdef CONFIG_HAVE_ARCH_PFN_VALID extern int pfn_valid(unsigned long); -#endif #include <asm/memory.h> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h @@ -211,6 +211,8 @@ #define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS) #define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) +#define TTBR_CNP_BIT (UL(1) << 0) + /* * TCR flags. */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h @@ -360,6 +360,7 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) @@ -428,10 +429,33 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PUD_TYPE_TABLE) #endif +extern pgd_t init_pg_dir[PTRS_PER_PGD]; +extern pgd_t init_pg_end[]; +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; + +extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); + +static inline bool in_swapper_pgdir(void *addr) +{ + return ((unsigned long)addr & PAGE_MASK) == + ((unsigned long)swapper_pg_dir & PAGE_MASK); +} + static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { +#ifdef __PAGETABLE_PMD_FOLDED + if (in_swapper_pgdir(pmdp)) { + set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); + return; + } +#endif /* __PAGETABLE_PMD_FOLDED */ + WRITE_ONCE(*pmdp, pmd); - dsb(ishst); + + if (pmd_valid(pmd)) + dsb(ishst); } static inline void pmd_clear(pmd_t *pmdp) @@ -477,11 +501,21 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) #define pud_present(pud) pte_present(pud_pte(pud)) +#define pud_valid(pud) pte_valid(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { +#ifdef __PAGETABLE_PUD_FOLDED + if (in_swapper_pgdir(pudp)) { + set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); + return; + } +#endif /* __PAGETABLE_PUD_FOLDED */ + WRITE_ONCE(*pudp, pud); - dsb(ishst); + + if (pud_valid(pud)) + dsb(ishst); } static inline void pud_clear(pud_t *pudp) @@ -532,6 +566,11 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { + if (in_swapper_pgdir(pgdp)) { + set_swapper_pgd(pgdp, pgd); + return; + } + WRITE_ONCE(*pgdp, pgd); dsb(ishst); } @@ -712,11 +751,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, } #endif -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern pgd_t swapper_pg_end[]; -extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; - /* * Encode and decode a swap entry: * bits 0-1: present (must be zero) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h @@ -174,6 +174,10 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, { start_thread_common(regs, pc); regs->pstate = PSR_MODE_EL0t; + + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_SSBS_BIT; + regs->sp = sp; } @@ -190,6 +194,9 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate |= PSR_AA32_E_BIT; #endif + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_AA32_SSBS_BIT; + regs->compat_sp = sp; } #endif @@ -244,10 +251,6 @@ static inline void spin_lock_prefetch(const void *ptr) #endif -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused); -void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused); -void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused); - extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */ extern void __init minsigstksz_setup(void); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h @@ -50,6 +50,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 #define PSR_AA32_V_BIT 0x10000000 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h @@ -20,7 +20,6 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H -#include <asm/compiler.h> #include <linux/stringify.h> /* @@ -84,13 +83,26 @@ #endif /* CONFIG_BROKEN_GAS_INST */ -#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) -#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) +/* + * Instructions for modifying PSTATE fields. + * As per Arm ARM for v8-A, Section "C.5.1.3 op0 == 0b00, architectural hints, + * barriers and CLREX, and PSTATE access", ARM DDI 0487 C.a, system instructions + * for accessing PSTATE fields have the following encoding: + * Op0 = 0, CRn = 4 + * Op1, Op2 encodes the PSTATE field modified and defines the constraints. + * CRm = Imm4 for the instruction. + * Rt = 0x1f + */ +#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) +#define PSTATE_Imm_shift CRm_shift + +#define PSTATE_PAN pstate_field(0, 4) +#define PSTATE_UAO pstate_field(0, 3) +#define PSTATE_SSBS pstate_field(3, 1) -#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ - (!!x)<<8 | 0x1f) +#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) @@ -419,6 +431,7 @@ #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) /* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_DSSBS (1UL << 44) #define SCTLR_ELx_EE (1 << 25) #define SCTLR_ELx_IESB (1 << 21) #define SCTLR_ELx_WXN (1 << 19) @@ -439,7 +452,7 @@ (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ (1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \ (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE @@ -453,7 +466,7 @@ #define SCTLR_EL2_SET (SCTLR_ELx_IESB | ENDIAN_SET_EL2 | SCTLR_EL2_RES1) #define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ - ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) + SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) #if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL2 set/clear bits" @@ -477,7 +490,7 @@ (1 << 29)) #define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \ (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) @@ -489,12 +502,12 @@ #define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\ - SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_NTWI |\ + SCTLR_EL1_DZE | SCTLR_EL1_UCT |\ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) #define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ - SCTLR_EL1_RES0) + SCTLR_ELx_DSSBS | SCTLR_EL1_NTWI | SCTLR_EL1_RES0) #if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL1 set/clear bits" @@ -544,6 +557,13 @@ #define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 +/* id_aa64pfr1 */ +#define ID_AA64PFR1_SSBS_SHIFT 4 + +#define ID_AA64PFR1_SSBS_PSTATE_NI 0 +#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 +#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 + /* id_aa64mmfr0 */ #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h @@ -22,16 +22,10 @@ #include <linux/pagemap.h> #include <linux/swap.h> -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - -#define tlb_remove_entry(tlb, entry) tlb_remove_table(tlb, entry) static inline void __tlb_remove_table(void *_table) { free_page_and_swap_cache((struct page *)_table); } -#else -#define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry) -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ static void tlb_flush(struct mmu_gather *tlb); @@ -40,36 +34,35 @@ static void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); + bool last_level = !tlb->freed_tables; + unsigned long stride = tlb_get_unmap_size(tlb); /* - * The ASID allocator will either invalidate the ASID or mark - * it as used. + * If we're tearing down the address space then we only care about + * invalidating the walk-cache, since the ASID allocator won't + * reallocate our ASID without invalidating the entire TLB. */ - if (tlb->fullmm) + if (tlb->fullmm) { + if (!last_level) + flush_tlb_mm(tlb->mm); return; + } - /* - * The intermediate page table levels are already handled by - * the __(pte|pmd|pud)_free_tlb() functions, so last level - * TLBI is sufficient here. - */ - __flush_tlb_range(&vma, tlb->start, tlb->end, true); + __flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); pgtable_page_dtor(pte); - tlb_remove_entry(tlb, pte); + tlb_remove_table(tlb, pte); } #if CONFIG_PGTABLE_LEVELS > 2 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); - tlb_remove_entry(tlb, virt_to_page(pmdp)); + tlb_remove_table(tlb, virt_to_page(pmdp)); } #endif @@ -77,8 +70,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); - tlb_remove_entry(tlb, virt_to_page(pudp)); + tlb_remove_table(tlb, virt_to_page(pudp)); } #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h @@ -70,43 +70,73 @@ }) /* - * TLB Management - * ============== + * TLB Invalidation + * ================ * - * The TLB specific code is expected to perform whatever tests it needs - * to determine if it should invalidate the TLB for each call. Start - * addresses are inclusive and end addresses are exclusive; it is safe to - * round these addresses down. + * This header file implements the low-level TLB invalidation routines + * (sometimes referred to as "flushing" in the kernel) for arm64. * - * flush_tlb_all() + * Every invalidation operation uses the following template: + * + * DSB ISHST // Ensure prior page-table updates have completed + * TLBI ... // Invalidate the TLB + * DSB ISH // Ensure the TLB invalidation has completed + * if (invalidated kernel mappings) + * ISB // Discard any instructions fetched from the old mapping + * + * + * The following functions form part of the "core" TLB invalidation API, + * as documented in Documentation/core-api/cachetlb.rst: * - * Invalidate the entire TLB. + * flush_tlb_all() + * Invalidate the entire TLB (kernel + user) on all CPUs * * flush_tlb_mm(mm) + * Invalidate an entire user address space on all CPUs. + * The 'mm' argument identifies the ASID to invalidate. + * + * flush_tlb_range(vma, start, end) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * Note that this operation also invalidates any walk-cache + * entries associated with translations for the specified address + * range. + * + * flush_tlb_kernel_range(start, end) + * Same as flush_tlb_range(..., start, end), but applies to + * kernel mappings rather than a particular user address space. + * Whilst not explicitly documented, this function is used when + * unmapping pages from vmalloc/io space. + * + * flush_tlb_page(vma, addr) + * Invalidate a single user mapping for address 'addr' in the + * address space corresponding to 'vma->mm'. Note that this + * operation only invalidates a single, last-level page-table + * entry and therefore does not affect any walk-caches. * - * Invalidate all TLB entries in a particular address space. - * - mm - mm_struct describing address space * - * flush_tlb_range(mm,start,end) + * Next, we have some undocumented invalidation routines that you probably + * don't want to call unless you know what you're doing: * - * Invalidate a range of TLB entries in the specified address - * space. - * - mm - mm_struct describing address space - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) + * local_flush_tlb_all() + * Same as flush_tlb_all(), but only applies to the calling CPU. * - * flush_tlb_page(vaddr,vma) + * __flush_tlb_kernel_pgtable(addr) + * Invalidate a single kernel mapping for address 'addr' on all + * CPUs, ensuring that any walk-cache entries associated with the + * translation are also invalidated. * - * Invalidate the specified page in the specified address range. - * - vaddr - virtual address (may not be aligned) - * - vma - vma_struct describing address range + * __flush_tlb_range(vma, start, end, stride, last_level) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * The invalidation operations are issued at a granularity + * determined by 'stride' and only affect any walk-cache entries + * if 'last_level' is equal to false. * - * flush_kern_tlb_page(kaddr) * - * Invalidate the TLB entry for the specified page. The address - * will be in the kernels virtual memory space. Current uses - * only require the D-TLB to be invalidated. - * - kaddr - Kernel virtual memory address + * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented + * on top of these routines, since that is our interface to the mmu_gather + * API as used by munmap() and friends. */ static inline void local_flush_tlb_all(void) { @@ -149,25 +179,28 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) +#define MAX_TLBI_OPS 1024UL static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, - bool last_level) + unsigned long stride, bool last_level) { unsigned long asid = ASID(vma->vm_mm); unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * stride)) { flush_tlb_mm(vma->vm_mm); return; } + /* Convert the stride into units of 4k */ + stride >>= 12; + start = __TLBI_VADDR(start, asid); end = __TLBI_VADDR(end, asid); dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { + for (addr = start; addr < end; addr += stride) { if (last_level) { __tlbi(vale1is, addr); __tlbi_user(vale1is, addr); @@ -182,14 +215,18 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __flush_tlb_range(vma, start, end, false); + /* + * We cannot use leaf-only invalidation here, since we may be invalidating + * table entries as part of collapsing hugepages or moving page tables. + */ + __flush_tlb_range(vma, start, end, PAGE_SIZE, false); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { flush_tlb_all(); return; } @@ -199,7 +236,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - __tlbi(vaae1is, addr); + __tlbi(vaale1is, addr); dsb(ish); isb(); } @@ -208,20 +245,11 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end * Used to invalidate the TLB (walk caches) corresponding to intermediate page * table levels (pgd/pud/pmd). */ -static inline void __flush_tlb_pgtable(struct mm_struct *mm, - unsigned long uaddr) -{ - unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm)); - - __tlbi(vae1is, addr); - __tlbi_user(vae1is, addr); - dsb(ish); -} - static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) { unsigned long addr = __TLBI_VADDR(kaddr, 0); + dsb(ishst); __tlbi(vaae1is, addr); dsb(ish); } diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h @@ -32,7 +32,6 @@ #include <asm/cpufeature.h> #include <asm/ptrace.h> #include <asm/memory.h> -#include <asm/compiler.h> #include <asm/extable.h> #define get_ds() (KERNEL_DS) diff --git a/arch/arm64/include/asm/xen/events.h b/arch/arm64/include/asm/xen/events.h @@ -14,7 +14,7 @@ enum ipi_vector { static inline int xen_irqs_disabled(struct pt_regs *regs) { - return raw_irqs_disabled_flags((unsigned long) regs->pstate); + return !interrupts_enabled(regs); } #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h @@ -48,5 +48,6 @@ #define HWCAP_USCAT (1 << 25) #define HWCAP_ILRCPC (1 << 26) #define HWCAP_FLAGM (1 << 27) +#define HWCAP_SSBS (1 << 28) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h @@ -46,6 +46,7 @@ #define PSR_I_BIT 0x00000080 #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 +#define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 #define PSR_V_BIT 0x10000000 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c @@ -68,21 +68,43 @@ static bool has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, int scope) { - u64 mask = CTR_CACHE_MINLINE_MASK; - - /* Skip matching the min line sizes for cache type check */ - if (entry->capability == ARM64_MISMATCHED_CACHE_TYPE) - mask ^= arm64_ftr_reg_ctrel0.strict_mask; + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + u64 sys = arm64_ftr_reg_ctrel0.sys_val & mask; + u64 ctr_raw, ctr_real; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return (read_cpuid_cachetype() & mask) != - (arm64_ftr_reg_ctrel0.sys_val & mask); + + /* + * We want to make sure that all the CPUs in the system expose + * a consistent CTR_EL0 to make sure that applications behaves + * correctly with migration. + * + * If a CPU has CTR_EL0.IDC but does not advertise it via CTR_EL0 : + * + * 1) It is safe if the system doesn't support IDC, as CPU anyway + * reports IDC = 0, consistent with the rest. + * + * 2) If the system has IDC, it is still safe as we trap CTR_EL0 + * access on this CPU via the ARM64_HAS_CACHE_IDC capability. + * + * So, we need to make sure either the raw CTR_EL0 or the effective + * CTR_EL0 matches the system's copy to allow a secondary CPU to boot. + */ + ctr_raw = read_cpuid_cachetype() & mask; + ctr_real = read_cpuid_effective_cachetype() & mask; + + return (ctr_real != sys) && (ctr_raw != sys); } static void cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) { - sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + + /* Trap CTR_EL0 access on this CPU, only if it has a mismatch */ + if ((read_cpuid_cachetype() & mask) != + (arm64_ftr_reg_ctrel0.sys_val & mask)) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); @@ -116,6 +138,15 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, static DEFINE_SPINLOCK(bp_lock); int cpu, slot = -1; + /* + * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs + * start/end if we're a guest. Skip the hyp-vectors work. + */ + if (!hyp_vecs_start) { + __this_cpu_write(bp_hardening_data.fn, fn); + return; + } + spin_lock(&bp_lock); for_each_possible_cpu(cpu) { if (per_cpu(bp_hardening_data.fn, cpu) == fn) { @@ -312,6 +343,14 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, void arm64_set_ssbd_mitigation(bool state) { + if (this_cpu_has_cap(ARM64_SSBS)) { + if (state) + asm volatile(SET_PSTATE_SSBS(0)); + else + asm volatile(SET_PSTATE_SSBS(1)); + return; + } + switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); @@ -336,6 +375,11 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + if (this_cpu_has_cap(ARM64_SSBS)) { + required = false; + goto out_printmsg; + } + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { ssbd_state = ARM64_SSBD_UNKNOWN; return false; @@ -384,7 +428,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, switch (ssbd_state) { case ARM64_SSBD_FORCE_DISABLE: - pr_info_once("%s disabled from command-line\n", entry->desc); arm64_set_ssbd_mitigation(false); required = false; break; @@ -397,7 +440,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; case ARM64_SSBD_FORCE_ENABLE: - pr_info_once("%s forced from command-line\n", entry->desc); arm64_set_ssbd_mitigation(true); required = true; break; @@ -407,10 +449,27 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; } +out_printmsg: + switch (ssbd_state) { + case ARM64_SSBD_FORCE_DISABLE: + pr_info_once("%s disabled from command-line\n", entry->desc); + break; + + case ARM64_SSBD_FORCE_ENABLE: + pr_info_once("%s forced from command-line\n", entry->desc); + break; + } + return required; } #endif /* CONFIG_ARM64_SSBD */ +static void __maybe_unused +cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); +} + #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) @@ -616,14 +675,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { - .desc = "Mismatched cache line size", - .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, - .matches = has_mismatched_cache_type, - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .cpu_enable = cpu_enable_trap_ctr_access, - }, - { - .desc = "Mismatched cache type", + .desc = "Mismatched cache type (CTR_EL0)", .capability = ARM64_MISMATCHED_CACHE_TYPE, .matches = has_mismatched_cache_type, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, @@ -680,6 +732,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = has_ssbd_mitigation, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 + { + /* Cortex-A76 r0p0 to r2p0 */ + .desc = "ARM erratum 1188873", + .capability = ARM64_WORKAROUND_1188873, + ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c @@ -20,6 +20,7 @@ #include <linux/bsearch.h> #include <linux/cpumask.h> +#include <linux/crash_dump.h> #include <linux/sort.h> #include <linux/stop_machine.h> #include <linux/types.h> @@ -117,6 +118,7 @@ EXPORT_SYMBOL(cpu_hwcap_keys); static bool __maybe_unused cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); +static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); /* * NOTE: Any changes to the visibility of features should be kept in @@ -164,6 +166,11 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), @@ -371,7 +378,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), - ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1), ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz), /* Op1 = 0, CRn = 0, CRm = 5 */ @@ -657,7 +664,6 @@ void update_cpu_features(int cpu, /* * EL3 is not our concern. - * ID_AA64PFR1 is currently RES0. */ taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); @@ -848,15 +854,55 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus } static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { - return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_IDC_SHIFT); + u64 ctr; + + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_effective_cachetype(); + + return ctr & BIT(CTR_IDC_SHIFT); +} + +static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused) +{ + /* + * If the CPU exposes raw CTR_EL0.IDC = 0, while effectively + * CTR_EL0.IDC = 1 (from CLIDR values), we need to trap accesses + * to the CTR_EL0 on this CPU and emulate it with the real/safe + * value. + */ + if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT))) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { - return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_DIC_SHIFT); + u64 ctr; + + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_cachetype(); + + return ctr & BIT(CTR_DIC_SHIFT); +} + +static bool __maybe_unused +has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) +{ + /* + * Kdump isn't guaranteed to power-off all secondary CPUs, CNP + * may share TLB entries with a CPU stuck in the crashed + * kernel. + */ + if (is_kdump_kernel()) + return false; + + return has_cpuid_feature(entry, scope); } #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 @@ -1035,6 +1081,70 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) WARN_ON(val & (7 << 27 | 7 << 21)); } +#ifdef CONFIG_ARM64_SSBD +static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) +{ + if (user_mode(regs)) + return 1; + + if (instr & BIT(PSTATE_Imm_shift)) + regs->pstate |= PSR_SSBS_BIT; + else + regs->pstate &= ~PSR_SSBS_BIT; + + arm64_skip_faulting_instruction(regs, 4); + return 0; +} + +static struct undef_hook ssbs_emulation_hook = { + .instr_mask = ~(1U << PSTATE_Imm_shift), + .instr_val = 0xd500401f | PSTATE_SSBS, + .fn = ssbs_emulation_handler, +}; + +static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) +{ + static bool undef_hook_registered = false; + static DEFINE_SPINLOCK(hook_lock); + + spin_lock(&hook_lock); + if (!undef_hook_registered) { + register_undef_hook(&ssbs_emulation_hook); + undef_hook_registered = true; + } + spin_unlock(&hook_lock); + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); + arm64_set_ssbd_mitigation(false); + } else { + arm64_set_ssbd_mitigation(true); + } +} +#endif /* CONFIG_ARM64_SSBD */ + +#ifdef CONFIG_ARM64_PAN +static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) +{ + /* + * We modify PSTATE. This won't work from irq context as the PSTATE + * is discarded once we return from the exception. + */ + WARN_ON_ONCE(in_interrupt()); + + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); + asm(SET_PSTATE_PAN(1)); +} +#endif /* CONFIG_ARM64_PAN */ + +#ifdef CONFIG_ARM64_RAS_EXTN +static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) +{ + /* Firmware may have left a deferred SError in this register. */ + write_sysreg_s(0, SYS_DISR_EL1); +} +#endif /* CONFIG_ARM64_RAS_EXTN */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -1184,6 +1294,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_CACHE_IDC, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_idc, + .cpu_enable = cpu_emulate_effective_ctr, }, { .desc = "Instruction cache invalidation not required for I/D coherence", @@ -1222,6 +1333,41 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_hw_dbm, }, #endif +#ifdef CONFIG_ARM64_SSBD + { + .desc = "CRC32 instructions", + .capability = ARM64_HAS_CRC32, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR0_EL1, + .field_pos = ID_AA64ISAR0_CRC32_SHIFT, + .min_field_value = 1, + }, + { + .desc = "Speculative Store Bypassing Safe (SSBS)", + .capability = ARM64_SSBS, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_SSBS_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, + .cpu_enable = cpu_enable_ssbs, + }, +#endif +#ifdef CONFIG_ARM64_CNP + { + .desc = "Common not Private translations", + .capability = ARM64_HAS_CNP, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_useable_cnp, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_CNP_SHIFT, + .min_field_value = 1, + .cpu_enable = cpu_enable_cnp, + }, +#endif {}, }; @@ -1267,6 +1413,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE), #endif + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS), {}, }; @@ -1658,6 +1805,11 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO)); } +static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) +{ + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); +} + /* * We emulate only the following system register space. * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7] @@ -1719,27 +1871,32 @@ static int emulate_sys_reg(u32 id, u64 *valp) return 0; } -static int emulate_mrs(struct pt_regs *regs, u32 insn) +int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt) { int rc; - u32 sys_reg, dst; u64 val; - /* - * sys_reg values are defined as used in mrs/msr instruction. - * shift the imm value to get the encoding. - */ - sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; rc = emulate_sys_reg(sys_reg, &val); if (!rc) { - dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); - pt_regs_write_reg(regs, dst, val); + pt_regs_write_reg(regs, rt, val); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } - return rc; } +static int emulate_mrs(struct pt_regs *regs, u32 insn) +{ + u32 sys_reg, rt; + + /* + * sys_reg values are defined as used in mrs/msr instruction. + * shift the imm value to get the encoding. + */ + sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; + rt = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + return do_emulate_mrs(regs, sys_reg, rt); +} + static struct undef_hook mrs_hook = { .instr_mask = 0xfff00000, .instr_val = 0xd5300000, @@ -1755,9 +1912,3 @@ static int __init enable_mrs_emulation(void) } core_initcall(enable_mrs_emulation); - -void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) -{ - /* Firmware may have left a deferred SError in this register. */ - write_sysreg_s(0, SYS_DISR_EL1); -} diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c @@ -81,6 +81,7 @@ static const char *const hwcap_str[] = { "uscat", "ilrcpc", "flagm", + "ssbs", NULL }; @@ -324,7 +325,15 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); - info->reg_ctr = read_cpuid_cachetype(); + /* + * Use the effective value of the CTR_EL0 than the raw value + * exposed by the CPU. CTR_E0.IDC field value must be interpreted + * with the CLIDR_EL1 fields to avoid triggering false warnings + * when there is a mismatch across the CPUs. Keep track of the + * effective value of the CTR_EL0 in our internal records for + * acurate sanity check and feature enablement. + */ + info->reg_ctr = read_cpuid_effective_cachetype(); info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); info->reg_revidr = read_cpuid(REVIDR_EL1); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S @@ -589,7 +589,7 @@ el1_undef: inherit_daif pstate=x23, tmp=x2 mov x0, sp bl do_undefinstr - ASM_BUG() + kernel_exit 1 el1_dbg: /* * Debug exception handling @@ -665,6 +665,7 @@ el0_sync: cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc cmp x24, #ESR_ELx_EC_SYS64 // configurable trap + ccmp x24, #ESR_ELx_EC_WFx, #4, ne b.eq el0_sys cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc @@ -697,9 +698,9 @@ el0_sync_compat: cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap b.eq el0_undef cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap @@ -722,6 +723,17 @@ el0_irq_compat: el0_error_compat: kernel_entry 0, 32 b el0_error_naked + +el0_cp15: + /* + * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions + */ + enable_daif + ct_user_exit + mov x0, x25 + mov x1, sp + bl do_cp15instr + b ret_to_user #endif el0_da: diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S @@ -287,19 +287,21 @@ __create_page_tables: mov x28, lr /* - * Invalidate the idmap and swapper page tables to avoid potential - * dirty cache lines being evicted. + * Invalidate the init page tables to avoid potential dirty cache lines + * being evicted. Other page tables are allocated in rodata as part of + * the kernel image, and thus are clean to the PoC per the boot + * protocol. */ - adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x0, init_pg_dir + adrp x1, init_pg_end sub x1, x1, x0 bl __inval_dcache_area /* - * Clear the idmap and swapper page tables. + * Clear the init page tables. */ - adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x0, init_pg_dir + adrp x1, init_pg_end sub x1, x1, x0 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -373,7 +375,7 @@ __create_page_tables: /* * Map the kernel image (starting with PHYS_OFFSET). */ - adrp x0, swapper_pg_dir + adrp x0, init_pg_dir mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement mov x4, PTRS_PER_PGD @@ -390,7 +392,7 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x1, init_pg_end sub x1, x1, x0 dmb sy bl __inval_dcache_area @@ -706,6 +708,7 @@ secondary_startup: * Common entry point for secondary CPUs. */ bl __cpu_setup // initialise processor + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =__secondary_switched br x8 @@ -748,6 +751,7 @@ ENDPROC(__secondary_switched) * Enable the MMU. * * x0 = SCTLR_EL1 value for turning on the MMU. + * x1 = TTBR1_EL1 value * * Returns to the caller via x30/lr. This requires the caller to be covered * by the .idmap.text section. @@ -756,17 +760,16 @@ ENDPROC(__secondary_switched) * If it isn't, park the CPU */ ENTRY(__enable_mmu) - mrs x1, ID_AA64MMFR0_EL1 - ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 + mrs x2, ID_AA64MMFR0_EL1 + ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support - update_early_cpu_boot_status 0, x1, x2 - adrp x1, idmap_pg_dir - adrp x2, swapper_pg_dir - phys_to_ttbr x3, x1 - phys_to_ttbr x4, x2 - msr ttbr0_el1, x3 // load TTBR0 - msr ttbr1_el1, x4 // load TTBR1 + update_early_cpu_boot_status 0, x2, x3 + adrp x2, idmap_pg_dir + phys_to_ttbr x1, x1 + phys_to_ttbr x2, x2 + msr ttbr0_el1, x2 // load TTBR0 + msr ttbr1_el1, x1 // load TTBR1 isb msr sctlr_el1, x0 isb @@ -823,6 +826,7 @@ __primary_switch: mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value #endif + adrp x1, init_pg_dir bl __enable_mmu #ifdef CONFIG_RELOCATABLE bl __relocate_kernel diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c @@ -107,7 +107,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if (!p->ainsn.api.insn) return -ENOMEM; break; - }; + } /* prepare the instruction */ if (p->ainsn.api.insn) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c @@ -358,6 +358,10 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (IS_ENABLED(CONFIG_ARM64_UAO) && cpus_have_const_cap(ARM64_HAS_UAO)) childregs->pstate |= PSR_UAO_BIT; + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) + childregs->pstate |= PSR_SSBS_BIT; + p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c @@ -24,7 +24,6 @@ #include <uapi/linux/psci.h> -#include <asm/compiler.h> #include <asm/cpu_ops.h> #include <asm/errno.h> #include <asm/smp_plat.h> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c @@ -349,12 +349,8 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) conswitchp = &dummy_con; #endif -#endif if (boot_args[1] || boot_args[2] || boot_args[3]) { pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" "\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n" diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S @@ -101,6 +101,7 @@ ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =_cpu_resume br x8 diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c @@ -3,17 +3,33 @@ * Copyright (C) 2018 ARM Ltd, All Rights Reserved. */ +#include <linux/compat.h> #include <linux/errno.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <linux/thread_info.h> #include <asm/cpufeature.h> +static void ssbd_ssbs_enable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate |= val; +} + +static void ssbd_ssbs_disable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate &= ~val; +} + /* * prctl interface for SSBD - * FIXME: Drop the below ifdefery once merged in 4.18. */ -#ifdef PR_SPEC_STORE_BYPASS static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) { int state = arm64_get_ssbd_state(); @@ -46,12 +62,14 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) return -EPERM; task_clear_spec_ssb_disable(task); clear_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_enable(task); break; case PR_SPEC_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) return -EPERM; task_set_spec_ssb_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; case PR_SPEC_FORCE_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) @@ -59,6 +77,7 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; default: return -ERANGE; @@ -107,4 +126,3 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) return -ENODEV; } } -#endif /* PR_SPEC_STORE_BYPASS */ diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c @@ -48,6 +48,10 @@ void notrace __cpu_suspend_exit(void) */ cpu_uninstall_idmap(); + /* Restore CnP bit in TTBR1_EL1 */ + if (system_supports_cnp()) + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + /* * PSTATE was not saved over suspend/resume, re-enable any detected * features that might not have been set correctly. diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c @@ -310,10 +310,12 @@ static int call_undef_hook(struct pt_regs *regs) int (*fn)(struct pt_regs *regs, u32 instr) = NULL; void __user *pc = (void __user *)instruction_pointer(regs); - if (!user_mode(regs)) - return 1; - - if (compat_thumb_mode(regs)) { + if (!user_mode(regs)) { + __le32 instr_le; + if (probe_kernel_address((__force __le32 *)pc, instr_le)) + goto exit; + instr = le32_to_cpu(instr_le); + } else if (compat_thumb_mode(regs)) { /* 16-bit Thumb instruction */ __le16 instr_le; if (get_user(instr_le, (__le16 __user *)pc)) @@ -352,6 +354,9 @@ void force_signal_inject(int signal, int code, unsigned long address) const char *desc; struct pt_regs *regs = current_pt_regs(); + if (WARN_ON(!user_mode(regs))) + return; + clear_siginfo(&info); switch (signal) { @@ -406,14 +411,10 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; + BUG_ON(!user_mode(regs)); force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); } -void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) -{ - sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); -} - #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ res = -EFAULT; \ @@ -437,7 +438,7 @@ void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { unsigned long address; - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; int ret = 0; @@ -472,7 +473,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); pt_regs_write_reg(regs, rt, val); @@ -482,7 +483,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_counter_get_cntvct()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); @@ -490,12 +491,28 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_timer_get_rate()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } +static void mrs_handler(unsigned int esr, struct pt_regs *regs) +{ + u32 sysreg, rt; + + rt = ESR_ELx_SYS64_ISS_RT(esr); + sysreg = esr_sys64_to_sysreg(esr); + + if (do_emulate_mrs(regs, sysreg, rt) != 0) + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); +} + +static void wfi_handler(unsigned int esr, struct pt_regs *regs) +{ + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -526,9 +543,176 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, .handler = cntfrq_read_handler, }, + { + /* Trap read access to CPUID registers */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL, + .handler = mrs_handler, + }, + { + /* Trap WFI instructions executed in userspace */ + .esr_mask = ESR_ELx_WFx_MASK, + .esr_val = ESR_ELx_WFx_WFI_VAL, + .handler = wfi_handler, + }, {}, }; + +#ifdef CONFIG_COMPAT +#define PSTATE_IT_1_0_SHIFT 25 +#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT) +#define PSTATE_IT_7_2_SHIFT 10 +#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT) + +static u32 compat_get_it_state(struct pt_regs *regs) +{ + u32 it, pstate = regs->pstate; + + it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT; + it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2; + + return it; +} + +static void compat_set_it_state(struct pt_regs *regs, u32 it) +{ + u32 pstate_it; + + pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK; + pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK; + + regs->pstate &= ~PSR_AA32_IT_MASK; + regs->pstate |= pstate_it; +} + +static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) +{ + int cond; + + /* Only a T32 instruction can trap without CV being set */ + if (!(esr & ESR_ELx_CV)) { + u32 it; + + it = compat_get_it_state(regs); + if (!it) + return true; + + cond = it >> 4; + } else { + cond = (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; + } + + return aarch32_opcode_cond_checks[cond](regs->pstate); +} + +static void advance_itstate(struct pt_regs *regs) +{ + u32 it; + + /* ARM mode */ + if (!(regs->pstate & PSR_AA32_T_BIT) || + !(regs->pstate & PSR_AA32_IT_MASK)) + return; + + it = compat_get_it_state(regs); + + /* + * If this is the last instruction of the block, wipe the IT + * state. Otherwise advance it. + */ + if (!(it & 7)) + it = 0; + else + it = (it & 0xe0) | ((it << 1) & 0x1f); + + compat_set_it_state(regs, it); +} + +static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs, + unsigned int sz) +{ + advance_itstate(regs); + arm64_skip_faulting_instruction(regs, sz); +} + +static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT; + + pt_regs_write_reg(regs, reg, arch_timer_get_rate()); + arm64_compat_skip_faulting_instruction(regs, 4); +} + +static struct sys64_hook cp15_32_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_32_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_32_ISS_SYS_CNTFRQ, + .handler = compat_cntfrq_read_handler, + }, + {}, +}; + +static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT; + int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT; + u64 val = arch_counter_get_cntvct(); + + pt_regs_write_reg(regs, rt, lower_32_bits(val)); + pt_regs_write_reg(regs, rt2, upper_32_bits(val)); + arm64_compat_skip_faulting_instruction(regs, 4); +} + +static struct sys64_hook cp15_64_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT, + .handler = compat_cntvct_read_handler, + }, + {}, +}; + +asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) +{ + struct sys64_hook *hook, *hook_base; + + if (!cp15_cond_valid(esr, regs)) { + /* + * There is no T16 variant of a CP access, so we + * always advance PC by 4 bytes. + */ + arm64_compat_skip_faulting_instruction(regs, 4); + return; + } + + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_CP15_32: + hook_base = cp15_32_hooks; + break; + case ESR_ELx_EC_CP15_64: + hook_base = cp15_64_hooks; + break; + default: + do_undefinstr(regs); + return; + } + + for (hook = hook_base; hook->handler; hook++) + if ((hook->esr_mask & esr) == hook->esr_val) { + hook->handler(esr, regs); + return; + } + + /* + * New cp15 instructions may previously have been undefined at + * EL0. Fall back to our usual undefined instruction handler + * so that we handle these consistently. + */ + do_undefinstr(regs); +} +#endif + asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) { struct sys64_hook *hook; @@ -605,7 +789,6 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); - die("Oops - bad mode", regs, 0); local_daif_mask(); panic("bad mode"); } diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S @@ -138,6 +138,23 @@ SECTIONS EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */ NOTES + . = ALIGN(PAGE_SIZE); + idmap_pg_dir = .; + . += IDMAP_DIR_SIZE; + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + tramp_pg_dir = .; + . += PAGE_SIZE; +#endif + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + reserved_ttbr0 = .; + . += RESERVED_TTBR0_SIZE; +#endif + swapper_pg_dir = .; + . += PAGE_SIZE; + swapper_pg_end = .; + . = ALIGN(SEGMENT_ALIGN); __init_begin = .; __inittext_begin = .; @@ -216,21 +233,9 @@ SECTIONS BSS_SECTION(0, 0, 0) . = ALIGN(PAGE_SIZE); - idmap_pg_dir = .; - . += IDMAP_DIR_SIZE; - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_pg_dir = .; - . += PAGE_SIZE; -#endif - -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - reserved_ttbr0 = .; - . += RESERVED_TTBR0_SIZE; -#endif - swapper_pg_dir = .; - . += SWAPPER_DIR_SIZE; - swapper_pg_end = .; + init_pg_dir = .; + . += INIT_DIR_SIZE; + init_pg_end = .; __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S @@ -65,6 +65,9 @@ __do_hyp_init: b.lo __kvm_handle_stub_hvc phys_to_ttbr x4, x0 +alternative_if ARM64_HAS_CNP + orr x4, x4, #TTBR_CNP_BIT +alternative_else_nop_endif msr ttbr0_el2, x4 mrs x4, tcr_el1 diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -288,3 +288,14 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) vcpu->arch.sysregs_loaded_on_cpu = false; } + +void __hyp_text __kvm_enable_ssbs(void) +{ + u64 tmp; + + asm volatile( + "mrs %0, sctlr_el2\n" + "orr %0, %0, %1\n" + "msr sctlr_el2, %0" + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); +} diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile @@ -12,7 +12,7 @@ lib-y := clear_user.o delay.o copy_from_user.o \ # when supported by the CPU. Result and argument registers are handled # correctly, based on the function prototype. lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o -CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ +CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ @@ -25,3 +25,5 @@ KCOV_INSTRUMENT_atomic_ll_sc.o := n UBSAN_SANITIZE_atomic_ll_sc.o := n lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o + +obj-$(CONFIG_CRC32) += crc32.o diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S @@ -0,0 +1,60 @@ +/* + * Accelerated CRC32(C) using AArch64 CRC instructions + * + * Copyright (C) 2016 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/alternative.h> +#include <asm/assembler.h> + + .cpu generic+crc + + .macro __crc32, c +0: subs x2, x2, #16 + b.mi 8f + ldp x3, x4, [x1], #16 +CPU_BE( rev x3, x3 ) +CPU_BE( rev x4, x4 ) + crc32\c\()x w0, w0, x3 + crc32\c\()x w0, w0, x4 + b.ne 0b + ret + +8: tbz x2, #3, 4f + ldr x3, [x1], #8 +CPU_BE( rev x3, x3 ) + crc32\c\()x w0, w0, x3 +4: tbz x2, #2, 2f + ldr w3, [x1], #4 +CPU_BE( rev w3, w3 ) + crc32\c\()w w0, w0, w3 +2: tbz x2, #1, 1f + ldrh w3, [x1], #2 +CPU_BE( rev16 w3, w3 ) + crc32\c\()h w0, w0, w3 +1: tbz x2, #0, 0f + ldrb w3, [x1] + crc32\c\()b w0, w0, w3 +0: ret + .endm + + .align 5 +ENTRY(crc32_le) +alternative_if_not ARM64_HAS_CRC32 + b crc32_le_base +alternative_else_nop_endif + __crc32 +ENDPROC(crc32_le) + + .align 5 +ENTRY(__crc32c_le) +alternative_if_not ARM64_HAS_CRC32 + b __crc32c_le_base +alternative_else_nop_endif + __crc32 c +ENDPROC(__crc32c_le) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c @@ -88,7 +88,7 @@ void verify_cpu_asid_bits(void) } } -static void flush_context(unsigned int cpu) +static void flush_context(void) { int i; u64 asid; @@ -142,7 +142,7 @@ static bool check_update_reserved_asid(u64 asid, u64 newasid) return hit; } -static u64 new_context(struct mm_struct *mm, unsigned int cpu) +static u64 new_context(struct mm_struct *mm) { static u32 cur_idx = 1; u64 asid = atomic64_read(&mm->context.id); @@ -180,7 +180,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) /* We're out of ASIDs, so increment the global generation count */ generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION, &asid_generation); - flush_context(cpu); + flush_context(); /* We have more ASIDs than CPUs, so this will always succeed */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); @@ -196,6 +196,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) unsigned long flags; u64 asid, old_active_asid; + if (system_supports_cnp()) + cpu_set_reserved_ttbr0(); + asid = atomic64_read(&mm->context.id); /* @@ -223,7 +226,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) /* Check that our ASID belongs to the current generation. */ asid = atomic64_read(&mm->context.id); if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) { - asid = new_context(mm, cpu); + asid = new_context(mm); atomic64_set(&mm->context.id, asid); } diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c @@ -36,8 +36,8 @@ static const struct addr_marker address_markers[] = { #endif { MODULES_VADDR, "Modules start" }, { MODULES_END, "Modules end" }, - { VMALLOC_START, "vmalloc() Area" }, - { VMALLOC_END, "vmalloc() End" }, + { VMALLOC_START, "vmalloc() area" }, + { VMALLOC_END, "vmalloc() end" }, { FIXADDR_START, "Fixmap start" }, { FIXADDR_TOP, "Fixmap end" }, { PCI_IO_START, "PCI I/O start" }, @@ -46,7 +46,7 @@ static const struct addr_marker address_markers[] = { { VMEMMAP_START, "vmemmap start" }, { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, #endif - { PAGE_OFFSET, "Linear Mapping" }, + { PAGE_OFFSET, "Linear mapping" }, { -1, NULL }, }; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c @@ -37,6 +37,7 @@ #include <asm/cmpxchg.h> #include <asm/cpufeature.h> #include <asm/exception.h> +#include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/sysreg.h> @@ -56,10 +57,16 @@ struct fault_info { }; static const struct fault_info fault_info[]; +static struct fault_info debug_fault_info[]; static inline const struct fault_info *esr_to_fault_info(unsigned int esr) { - return fault_info + (esr & 63); + return fault_info + (esr & ESR_ELx_FSC); +} + +static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) +{ + return debug_fault_info + DBG_ESR_EVT(esr); } #ifdef CONFIG_KPROBES @@ -235,9 +242,8 @@ static bool is_el1_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; } -static inline bool is_el1_permission_fault(unsigned int esr, - struct pt_regs *regs, - unsigned long addr) +static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; @@ -283,7 +289,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; - if (is_el1_permission_fault(esr, regs, addr)) { + if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; else @@ -454,7 +460,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (addr < TASK_SIZE && is_el1_permission_fault(esr, regs, addr)) { + if (addr < TASK_SIZE && is_el1_permission_fault(addr, esr, regs)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die_kernel_fault("access to user memory with fs=KERNEL_DS", @@ -771,7 +777,7 @@ asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, if (addr > TASK_SIZE) arm64_apply_bp_hardening(); - local_irq_enable(); + local_daif_restore(DAIF_PROCCTX); do_mem_abort(addr, esr, regs); } @@ -785,7 +791,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, if (user_mode(regs)) { if (instruction_pointer(regs) > TASK_SIZE) arm64_apply_bp_hardening(); - local_irq_enable(); + local_daif_restore(DAIF_PROCCTX); } clear_siginfo(&info); @@ -831,7 +837,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); + const struct fault_info *inf = esr_to_debug_fault_info(esr); int rv; /* @@ -864,17 +870,3 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, return rv; } NOKPROBE_SYMBOL(do_debug_exception); - -#ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) -{ - /* - * We modify PSTATE. This won't work from irq context as the PSTATE - * is discarded once we return from the exception. - */ - WARN_ON_ONCE(in_interrupt()); - - sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); - asm(SET_PSTATE_PAN(1)); -} -#endif /* CONFIG_ARM64_PAN */ diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c @@ -284,7 +284,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) #endif /* CONFIG_NUMA */ -#ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { phys_addr_t addr = pfn << PAGE_SHIFT; @@ -294,7 +293,6 @@ int pfn_valid(unsigned long pfn) return memblock_is_map_memory(addr); } EXPORT_SYMBOL(pfn_valid); -#endif #ifndef CONFIG_SPARSEMEM static void __init arm64_memory_present(void) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c @@ -192,7 +192,7 @@ void __init kasan_init(void) /* * We are going to perform proper setup of shadow memory. - * At first we should unmap early shadow (clear_pgds() call bellow). + * At first we should unmap early shadow (clear_pgds() call below). * However, instrumented code couldn't execute without shadow memory. * tmp_pg_dir used to keep early shadow mapped until full shadow * setup will be finished. diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c @@ -67,6 +67,24 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; +static DEFINE_SPINLOCK(swapper_pgdir_lock); + +void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) +{ + pgd_t *fixmap_pgdp; + + spin_lock(&swapper_pgdir_lock); + fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp)); + WRITE_ONCE(*fixmap_pgdp, pgd); + /* + * We need dsb(ishst) here to ensure the page-table-walker sees + * our new entry before set_p?d() returns. The fixmap's + * flush_tlb_kernel_range() via clear_fixmap() does this for us. + */ + pgd_clear_fixmap(); + spin_unlock(&swapper_pgdir_lock); +} + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -629,34 +647,18 @@ static void __init map_kernel(pgd_t *pgdp) */ void __init paging_init(void) { - phys_addr_t pgd_phys = early_pgtable_alloc(); - pgd_t *pgdp = pgd_set_fixmap(pgd_phys); + pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); map_kernel(pgdp); map_mem(pgdp); - /* - * We want to reuse the original swapper_pg_dir so we don't have to - * communicate the new address to non-coherent secondaries in - * secondary_entry, and so cpu_switch_mm can generate the address with - * adrp+add rather than a load from some global variable. - * - * To do this we need to go via a temporary pgd. - */ - cpu_replace_ttbr1(__va(pgd_phys)); - memcpy(swapper_pg_dir, pgdp, PGD_SIZE); - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); - pgd_clear_fixmap(); - memblock_free(pgd_phys, PAGE_SIZE); - /* - * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd - * allocated with it. - */ - memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, - __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir) - - PAGE_SIZE); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + init_mm.pgd = swapper_pg_dir; + + memblock_free(__pa_symbol(init_pg_dir), + __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); } /* diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c @@ -391,7 +391,6 @@ static int __init numa_init(int (*init_func)(void)) nodes_clear(numa_nodes_parsed); nodes_clear(node_possible_map); nodes_clear(node_online_map); - numa_free_distance(); ret = numa_alloc_distance(); if (ret < 0) @@ -399,20 +398,24 @@ static int __init numa_init(int (*init_func)(void)) ret = init_func(); if (ret < 0) - return ret; + goto out_free_distance; if (nodes_empty(numa_nodes_parsed)) { pr_info("No NUMA configuration found\n"); - return -EINVAL; + ret = -EINVAL; + goto out_free_distance; } ret = numa_register_nodes(); if (ret < 0) - return ret; + goto out_free_distance; setup_node_to_cpumask_map(); return 0; +out_free_distance: + numa_free_distance(); + return ret; } /** @@ -432,7 +435,7 @@ static int __init dummy_numa_init(void) if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", - 0LLU, PFN_PHYS(max_pfn) - 1); + memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1); for_each_memblock(memory, mblk) { ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S @@ -160,6 +160,12 @@ ENTRY(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id phys_to_ttbr x3, x0 + +alternative_if ARM64_HAS_CNP + cbz x1, 1f // skip CNP for reserved ASID + orr x3, x3, #TTBR_CNP_BIT +1: +alternative_else_nop_endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN bfi x3, x1, #48, #16 // set the ASID field in TTBR0 #endif @@ -184,7 +190,7 @@ ENDPROC(cpu_do_switch_mm) .endm /* - * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd) + * void idmap_cpu_replace_ttbr1(phys_addr_t ttbr1) * * This is the low-level counterpart to cpu_replace_ttbr1, and should not be * called by anything else. It can only be executed from a TTBR0 mapping. @@ -194,8 +200,7 @@ ENTRY(idmap_cpu_replace_ttbr1) __idmap_cpu_set_reserved_ttbr1 x1, x3 - phys_to_ttbr x3, x0 - msr ttbr1_el1, x3 + msr ttbr1_el1, x0 isb restore_daif x2 diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c @@ -319,6 +319,13 @@ static u64 notrace arm64_858921_read_cntvct_el0(void) } #endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 +static u64 notrace arm64_1188873_read_cntvct_el0(void) +{ + return read_sysreg(cntvct_el0); +} +#endif + #ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround); EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround); @@ -408,6 +415,14 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { .read_cntvct_el0 = arm64_858921_read_cntvct_el0, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 + { + .match_type = ate_match_local_cap_id, + .id = (void *)ARM64_WORKAROUND_1188873, + .desc = "ARM erratum 1188873", + .read_cntvct_el0 = arm64_1188873_read_cntvct_el0, + }, +#endif }; typedef bool (*ate_match_fn_t)(const struct arch_timer_erratum_workaround *, diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c @@ -77,14 +77,14 @@ static int pmu_parse_irq_affinity(struct device_node *node, int i) dn = of_parse_phandle(node, "interrupt-affinity", i); if (!dn) { - pr_warn("failed to parse interrupt-affinity[%d] for %s\n", - i, node->name); + pr_warn("failed to parse interrupt-affinity[%d] for %pOFn\n", + i, node); return -EINVAL; } cpu = of_cpu_node_to_id(dn); if (cpu < 0) { - pr_warn("failed to find logical CPU for %s\n", dn->name); + pr_warn("failed to find logical CPU for %pOFn\n", dn); cpu = nr_cpu_ids; } diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h @@ -20,6 +20,8 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> +#ifdef CONFIG_MMU + #ifdef CONFIG_HAVE_RCU_TABLE_FREE /* * Semi RCU freeing of the page directories. @@ -97,12 +99,30 @@ struct mmu_gather { #endif unsigned long start; unsigned long end; - /* we are in the middle of an operation to clear - * a full mm and can make some optimizations */ - unsigned int fullmm : 1, - /* we have performed an operation which - * requires a complete flush of the tlb */ - need_flush_all : 1; + /* + * we are in the middle of an operation to clear + * a full mm and can make some optimizations + */ + unsigned int fullmm : 1; + + /* + * we have performed an operation which + * requires a complete flush of the tlb + */ + unsigned int need_flush_all : 1; + + /* + * we have removed page directories + */ + unsigned int freed_tables : 1; + + /* + * at which levels have we cleared entries? + */ + unsigned int cleared_ptes : 1; + unsigned int cleared_pmds : 1; + unsigned int cleared_puds : 1; + unsigned int cleared_p4ds : 1; struct mmu_gather_batch *active; struct mmu_gather_batch local; @@ -118,6 +138,7 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, void tlb_flush_mmu(struct mmu_gather *tlb); void arch_tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end, bool force); +void tlb_flush_mmu_free(struct mmu_gather *tlb); extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); @@ -137,6 +158,11 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) tlb->start = TASK_SIZE; tlb->end = 0; } + tlb->freed_tables = 0; + tlb->cleared_ptes = 0; + tlb->cleared_pmds = 0; + tlb->cleared_puds = 0; + tlb->cleared_p4ds = 0; } static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) @@ -186,6 +212,25 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, } #endif +static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb) +{ + if (tlb->cleared_ptes) + return PAGE_SHIFT; + if (tlb->cleared_pmds) + return PMD_SHIFT; + if (tlb->cleared_puds) + return PUD_SHIFT; + if (tlb->cleared_p4ds) + return P4D_SHIFT; + + return PAGE_SHIFT; +} + +static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) +{ + return 1UL << tlb_get_unmap_shift(tlb); +} + /* * In the case of tlb vma handling, we can optimise these away in the * case where we're doing a full MM flush. When we're doing a munmap, @@ -219,13 +264,19 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->cleared_ptes = 1; \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) -#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ - do { \ - __tlb_adjust_range(tlb, address, huge_page_size(h)); \ - __tlb_remove_tlb_entry(tlb, ptep, address); \ +#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ + do { \ + unsigned long _sz = huge_page_size(h); \ + __tlb_adjust_range(tlb, address, _sz); \ + if (_sz == PMD_SIZE) \ + tlb->cleared_pmds = 1; \ + else if (_sz == PUD_SIZE) \ + tlb->cleared_puds = 1; \ + __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) /** @@ -239,6 +290,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ do { \ __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \ + tlb->cleared_pmds = 1; \ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) @@ -253,6 +305,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \ + tlb->cleared_puds = 1; \ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ } while (0) @@ -278,6 +331,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define pte_free_tlb(tlb, ptep, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ + tlb->cleared_pmds = 1; \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #endif @@ -285,7 +340,9 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #ifndef pmd_free_tlb #define pmd_free_tlb(tlb, pmdp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ + tlb->cleared_puds = 1; \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) #endif @@ -295,6 +352,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define pud_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ + tlb->cleared_p4ds = 1; \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif @@ -304,12 +363,15 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #ifndef p4d_free_tlb #define p4d_free_tlb(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __p4d_free_tlb(tlb, pudp, address); \ } while (0) #endif #endif +#endif /* CONFIG_MMU */ + #define tlb_migrate_finish(mm) do {} while (0) #endif /* _ASM_GENERIC__TLB_H */ diff --git a/include/linux/compat.h b/include/linux/compat.h @@ -103,6 +103,9 @@ typedef struct compat_sigaltstack { compat_size_t ss_size; } compat_stack_t; #endif +#ifndef COMPAT_MINSIGSTKSZ +#define COMPAT_MINSIGSTKSZ MINSIGSTKSZ +#endif #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) diff --git a/kernel/signal.c b/kernel/signal.c @@ -3460,7 +3460,8 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) } static int -do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp) +do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, + size_t min_ss_size) { struct task_struct *t = current; @@ -3490,7 +3491,7 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp) ss_size = 0; ss_sp = NULL; } else { - if (unlikely(ss_size < MINSIGSTKSZ)) + if (unlikely(ss_size < min_ss_size)) return -ENOMEM; } @@ -3508,7 +3509,8 @@ SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) if (uss && copy_from_user(&new, uss, sizeof(stack_t))) return -EFAULT; err = do_sigaltstack(uss ? &new : NULL, uoss ? &old : NULL, - current_user_stack_pointer()); + current_user_stack_pointer(), + MINSIGSTKSZ); if (!err && uoss && copy_to_user(uoss, &old, sizeof(stack_t))) err = -EFAULT; return err; @@ -3519,7 +3521,8 @@ int restore_altstack(const stack_t __user *uss) stack_t new; if (copy_from_user(&new, uss, sizeof(stack_t))) return -EFAULT; - (void)do_sigaltstack(&new, NULL, current_user_stack_pointer()); + (void)do_sigaltstack(&new, NULL, current_user_stack_pointer(), + MINSIGSTKSZ); /* squash all but EFAULT for now */ return 0; } @@ -3553,7 +3556,8 @@ static int do_compat_sigaltstack(const compat_stack_t __user *uss_ptr, uss.ss_size = uss32.ss_size; } ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, - compat_user_stack_pointer()); + compat_user_stack_pointer(), + COMPAT_MINSIGSTKSZ); if (ret >= 0 && uoss_ptr) { compat_stack_t old; memset(&old, 0, sizeof(old)); diff --git a/lib/crc32.c b/lib/crc32.c @@ -183,21 +183,21 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, } #if CRC_LE_BITS == 1 -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE); } -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE); } #else -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, (const u32 (*)[256])crc32table_le, CRC32_POLY_LE); } -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE); @@ -206,6 +206,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(__crc32c_le); +u32 crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); +u32 __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); + /* * This multiplies the polynomials x and y modulo the given modulus. * This follows the "little-endian" CRC convention that the lsbit diff --git a/mm/Makefile b/mm/Makefile @@ -23,9 +23,9 @@ KCOV_INSTRUMENT_vmstat.o := n mmu-y := nommu.o mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ - mlock.o mmap.o mprotect.o mremap.o msync.o \ - page_vma_mapped.o pagewalk.o pgtable-generic.o \ - rmap.o vmalloc.o + mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \ + msync.o page_vma_mapped.o pagewalk.o \ + pgtable-generic.o rmap.o vmalloc.o ifdef CONFIG_CROSS_MEMORY_ATTACH diff --git a/mm/memory.c b/mm/memory.c @@ -186,253 +186,6 @@ static void check_sync_rss_stat(struct task_struct *task) #endif /* SPLIT_RSS_COUNTING */ -#ifdef HAVE_GENERIC_MMU_GATHER - -static bool tlb_next_batch(struct mmu_gather *tlb) -{ - struct mmu_gather_batch *batch; - - batch = tlb->active; - if (batch->next) { - tlb->active = batch->next; - return true; - } - - if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) - return false; - - batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); - if (!batch) - return false; - - tlb->batch_count++; - batch->next = NULL; - batch->nr = 0; - batch->max = MAX_GATHER_BATCH; - - tlb->active->next = batch; - tlb->active = batch; - - return true; -} - -void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - tlb->mm = mm; - - /* Is it from 0 to ~0? */ - tlb->fullmm = !(start | (end+1)); - tlb->need_flush_all = 0; - tlb->local.next = NULL; - tlb->local.nr = 0; - tlb->local.max = ARRAY_SIZE(tlb->__pages); - tlb->active = &tlb->local; - tlb->batch_count = 0; - -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - tlb->batch = NULL; -#endif - tlb->page_size = 0; - - __tlb_reset_range(tlb); -} - -static void tlb_flush_mmu_free(struct mmu_gather *tlb) -{ - struct mmu_gather_batch *batch; - -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - tlb_table_flush(tlb); -#endif - for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { - free_pages_and_swap_cache(batch->pages, batch->nr); - batch->nr = 0; - } - tlb->active = &tlb->local; -} - -void tlb_flush_mmu(struct mmu_gather *tlb) -{ - tlb_flush_mmu_tlbonly(tlb); - tlb_flush_mmu_free(tlb); -} - -/* tlb_finish_mmu - * Called at the end of the shootdown operation to free up any resources - * that were required. - */ -void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end, bool force) -{ - struct mmu_gather_batch *batch, *next; - - if (force) - __tlb_adjust_range(tlb, start, end - start); - - tlb_flush_mmu(tlb); - - /* keep the page table cache within bounds */ - check_pgt_cache(); - - for (batch = tlb->local.next; batch; batch = next) { - next = batch->next; - free_pages((unsigned long)batch, 0); - } - tlb->local.next = NULL; -} - -/* __tlb_remove_page - * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while - * handling the additional races in SMP caused by other CPUs caching valid - * mappings in their TLBs. Returns the number of free page slots left. - * When out of page slots we must call tlb_flush_mmu(). - *returns true if the caller should flush. - */ -bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) -{ - struct mmu_gather_batch *batch; - - VM_BUG_ON(!tlb->end); - VM_WARN_ON(tlb->page_size != page_size); - - batch = tlb->active; - /* - * Add the page and check if we are full. If so - * force a flush. - */ - batch->pages[batch->nr++] = page; - if (batch->nr == batch->max) { - if (!tlb_next_batch(tlb)) - return true; - batch = tlb->active; - } - VM_BUG_ON_PAGE(batch->nr > batch->max, page); - - return false; -} - -#endif /* HAVE_GENERIC_MMU_GATHER */ - -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - -/* - * See the comment near struct mmu_table_batch. - */ - -/* - * If we want tlb_remove_table() to imply TLB invalidates. - */ -static inline void tlb_table_invalidate(struct mmu_gather *tlb) -{ -#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE - /* - * Invalidate page-table caches used by hardware walkers. Then we still - * need to RCU-sched wait while freeing the pages because software - * walkers can still be in-flight. - */ - tlb_flush_mmu_tlbonly(tlb); -#endif -} - -static void tlb_remove_table_smp_sync(void *arg) -{ - /* Simply deliver the interrupt */ -} - -static void tlb_remove_table_one(void *table) -{ - /* - * This isn't an RCU grace period and hence the page-tables cannot be - * assumed to be actually RCU-freed. - * - * It is however sufficient for software page-table walkers that rely on - * IRQ disabling. See the comment near struct mmu_table_batch. - */ - smp_call_function(tlb_remove_table_smp_sync, NULL, 1); - __tlb_remove_table(table); -} - -static void tlb_remove_table_rcu(struct rcu_head *head) -{ - struct mmu_table_batch *batch; - int i; - - batch = container_of(head, struct mmu_table_batch, rcu); - - for (i = 0; i < batch->nr; i++) - __tlb_remove_table(batch->tables[i]); - - free_page((unsigned long)batch); -} - -void tlb_table_flush(struct mmu_gather *tlb) -{ - struct mmu_table_batch **batch = &tlb->batch; - - if (*batch) { - tlb_table_invalidate(tlb); - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); - *batch = NULL; - } -} - -void tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - struct mmu_table_batch **batch = &tlb->batch; - - if (*batch == NULL) { - *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); - if (*batch == NULL) { - tlb_table_invalidate(tlb); - tlb_remove_table_one(table); - return; - } - (*batch)->nr = 0; - } - - (*batch)->tables[(*batch)->nr++] = table; - if ((*batch)->nr == MAX_TABLE_BATCH) - tlb_table_flush(tlb); -} - -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ - -/** - * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down - * @tlb: the mmu_gather structure to initialize - * @mm: the mm_struct of the target address space - * @start: start of the region that will be removed from the page-table - * @end: end of the region that will be removed from the page-table - * - * Called to initialize an (on-stack) mmu_gather structure for page-table - * tear-down from @mm. The @start and @end are set to 0 and -1 - * respectively when @mm is without users and we're going to destroy - * the full address space (exit/execve). - */ -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - arch_tlb_gather_mmu(tlb, mm, start, end); - inc_tlb_flush_pending(tlb->mm); -} - -void tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) -{ - /* - * If there are parallel threads are doing PTE changes on same range - * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB - * flush by batching, a thread has stable TLB entry can fail to flush - * the TLB by observing pte_none|!pte_dirty, for example so flush TLB - * forcefully if we detect parallel PTE batching threads. - */ - bool force = mm_tlb_flush_nested(tlb->mm); - - arch_tlb_finish_mmu(tlb, start, end, force); - dec_tlb_flush_pending(tlb->mm); -} - /* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c @@ -0,0 +1,261 @@ +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/kernel.h> +#include <linux/mmdebug.h> +#include <linux/mm_types.h> +#include <linux/pagemap.h> +#include <linux/rcupdate.h> +#include <linux/smp.h> +#include <linux/swap.h> + +#include <asm/pgalloc.h> +#include <asm/tlb.h> + +#ifdef HAVE_GENERIC_MMU_GATHER + +static bool tlb_next_batch(struct mmu_gather *tlb) +{ + struct mmu_gather_batch *batch; + + batch = tlb->active; + if (batch->next) { + tlb->active = batch->next; + return true; + } + + if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) + return false; + + batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); + if (!batch) + return false; + + tlb->batch_count++; + batch->next = NULL; + batch->nr = 0; + batch->max = MAX_GATHER_BATCH; + + tlb->active->next = batch; + tlb->active = batch; + + return true; +} + +void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + tlb->mm = mm; + + /* Is it from 0 to ~0? */ + tlb->fullmm = !(start | (end+1)); + tlb->need_flush_all = 0; + tlb->local.next = NULL; + tlb->local.nr = 0; + tlb->local.max = ARRAY_SIZE(tlb->__pages); + tlb->active = &tlb->local; + tlb->batch_count = 0; + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + tlb->batch = NULL; +#endif + tlb->page_size = 0; + + __tlb_reset_range(tlb); +} + +void tlb_flush_mmu_free(struct mmu_gather *tlb) +{ + struct mmu_gather_batch *batch; + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + tlb_table_flush(tlb); +#endif + for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { + free_pages_and_swap_cache(batch->pages, batch->nr); + batch->nr = 0; + } + tlb->active = &tlb->local; +} + +void tlb_flush_mmu(struct mmu_gather *tlb) +{ + tlb_flush_mmu_tlbonly(tlb); + tlb_flush_mmu_free(tlb); +} + +/* tlb_finish_mmu + * Called at the end of the shootdown operation to free up any resources + * that were required. + */ +void arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end, bool force) +{ + struct mmu_gather_batch *batch, *next; + + if (force) { + __tlb_reset_range(tlb); + __tlb_adjust_range(tlb, start, end - start); + } + + tlb_flush_mmu(tlb); + + /* keep the page table cache within bounds */ + check_pgt_cache(); + + for (batch = tlb->local.next; batch; batch = next) { + next = batch->next; + free_pages((unsigned long)batch, 0); + } + tlb->local.next = NULL; +} + +/* __tlb_remove_page + * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while + * handling the additional races in SMP caused by other CPUs caching valid + * mappings in their TLBs. Returns the number of free page slots left. + * When out of page slots we must call tlb_flush_mmu(). + *returns true if the caller should flush. + */ +bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) +{ + struct mmu_gather_batch *batch; + + VM_BUG_ON(!tlb->end); + VM_WARN_ON(tlb->page_size != page_size); + + batch = tlb->active; + /* + * Add the page and check if we are full. If so + * force a flush. + */ + batch->pages[batch->nr++] = page; + if (batch->nr == batch->max) { + if (!tlb_next_batch(tlb)) + return true; + batch = tlb->active; + } + VM_BUG_ON_PAGE(batch->nr > batch->max, page); + + return false; +} + +#endif /* HAVE_GENERIC_MMU_GATHER */ + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + +/* + * See the comment near struct mmu_table_batch. + */ + +/* + * If we want tlb_remove_table() to imply TLB invalidates. + */ +static inline void tlb_table_invalidate(struct mmu_gather *tlb) +{ +#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE + /* + * Invalidate page-table caches used by hardware walkers. Then we still + * need to RCU-sched wait while freeing the pages because software + * walkers can still be in-flight. + */ + tlb_flush_mmu_tlbonly(tlb); +#endif +} + +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_one(void *table) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely on + * IRQ disabling. See the comment near struct mmu_table_batch. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + __tlb_remove_table(table); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + struct mmu_table_batch *batch; + int i; + + batch = container_of(head, struct mmu_table_batch, rcu); + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +void tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + tlb_table_invalidate(tlb); + call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + *batch = NULL; + } +} + +void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch == NULL) { + *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + tlb_table_invalidate(tlb); + tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + tlb_table_flush(tlb); +} + +#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ + +/** + * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down + * @tlb: the mmu_gather structure to initialize + * @mm: the mm_struct of the target address space + * @start: start of the region that will be removed from the page-table + * @end: end of the region that will be removed from the page-table + * + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. The @start and @end are set to 0 and -1 + * respectively when @mm is without users and we're going to destroy + * the full address space (exit/execve). + */ +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + arch_tlb_gather_mmu(tlb, mm, start, end); + inc_tlb_flush_pending(tlb->mm); +} + +void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + /* + * If there are parallel threads are doing PTE changes on same range + * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB + * flush by batching, a thread has stable TLB entry can fail to flush + * the TLB by observing pte_none|!pte_dirty, for example so flush TLB + * forcefully if we detect parallel PTE batching threads. + */ + bool force = mm_tlb_flush_nested(tlb->mm); + + arch_tlb_finish_mmu(tlb, start, end, force); + dec_tlb_flush_pending(tlb->mm); +} diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c @@ -496,7 +496,7 @@ static bool need_new_vmid_gen(struct kvm *kvm) static void update_vttbr(struct kvm *kvm) { phys_addr_t pgd_phys; - u64 vmid; + u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0; bool new_gen; read_lock(&kvm_vmid_lock); @@ -546,7 +546,7 @@ static void update_vttbr(struct kvm *kvm) pgd_phys = virt_to_phys(kvm->arch.pgd); BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); - kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; + kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp; write_unlock(&kvm_vmid_lock); }