whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 3d8dfe75ef69f4dd4ba35c09b20a5aa58b4a5078
parent d6075262969321bcb5d795de25595fc2a141ac02
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Sun, 10 Mar 2019 10:17:23 -0700

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Catalin Marinas:

 - Pseudo NMI support for arm64 using GICv3 interrupt priorities

 - uaccess macros clean-up (unsafe user accessors also merged but
   reverted, waiting for objtool support on arm64)

 - ptrace regsets for Pointer Authentication (ARMv8.3) key management

 - inX() ordering w.r.t. delay() on arm64 and riscv (acks in place by
   the riscv maintainers)

 - arm64/perf updates: PMU bindings converted to json-schema, unused
   variable and misleading comment removed

 - arm64/debug fixes to ensure checking of the triggering exception
   level and to avoid the propagation of the UNKNOWN FAR value into the
   si_code for debug signals

 - Workaround for Fujitsu A64FX erratum 010001

 - lib/raid6 ARM NEON optimisations

 - NR_CPUS now defaults to 256 on arm64

 - Minor clean-ups (documentation/comments, Kconfig warning, unused
   asm-offsets, clang warnings)

 - MAINTAINERS update for list information to the ARM64 ACPI entry

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (54 commits)
  arm64: mmu: drop paging_init comments
  arm64: debug: Ensure debug handlers check triggering exception level
  arm64: debug: Don't propagate UNKNOWN FAR into si_code for debug signals
  Revert "arm64: uaccess: Implement unsafe accessors"
  arm64: avoid clang warning about self-assignment
  arm64: Kconfig.platforms: fix warning unmet direct dependencies
  lib/raid6: arm: optimize away a mask operation in NEON recovery routine
  lib/raid6: use vdupq_n_u8 to avoid endianness warnings
  arm64: io: Hook up __io_par() for inX() ordering
  riscv: io: Update __io_[p]ar() macros to take an argument
  asm-generic/io: Pass result of I/O accessor to __io_[p]ar()
  arm64: Add workaround for Fujitsu A64FX erratum 010001
  arm64: Rename get_thread_info()
  arm64: Remove documentation about TIF_USEDFPU
  arm64: irqflags: Fix clang build warnings
  arm64: Enable the support of pseudo-NMIs
  arm64: Skip irqflags tracing for NMI in IRQs disabled context
  arm64: Skip preemption when exiting an NMI
  arm64: Handle serror in NMI context
  irqchip/gic-v3: Allow interrupts to be set as pseudo-NMI
  ...

Diffstat:
MDocumentation/admin-guide/kernel-parameters.txt | 5+++++
MDocumentation/arm64/booting.txt | 5+++++
MDocumentation/arm64/pointer-authentication.txt | 5+++++
MDocumentation/arm64/silicon-errata.txt | 1+
DDocumentation/devicetree/bindings/arm/pmu.txt | 70----------------------------------------------------------------------
ADocumentation/devicetree/bindings/arm/pmu.yaml | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
MMAINTAINERS | 3++-
March/arm/include/asm/arch_gicv3.h | 33+++++++++++++++++++++++++++++++++
March/arm64/Kconfig | 36++++++++++++++++++++++++++++++++++--
March/arm64/Kconfig.platforms | 2+-
March/arm64/include/asm/alternative.h | 4++--
March/arm64/include/asm/arch_gicv3.h | 32++++++++++++++++++++++++++++++++
March/arm64/include/asm/asm-uaccess.h | 2+-
March/arm64/include/asm/assembler.h | 36++++++++++++++++++++++++------------
March/arm64/include/asm/cpucaps.h | 3++-
March/arm64/include/asm/cpufeature.h | 10++++++++++
March/arm64/include/asm/cputype.h | 9+++++++++
March/arm64/include/asm/daifflags.h | 60++++++++++++++++++++++++++++++++++++++++++++++--------------
March/arm64/include/asm/efi.h | 11+++++++++++
March/arm64/include/asm/hardirq.h | 31+++++++++++++++++++++++++++++++
March/arm64/include/asm/io.h | 1+
March/arm64/include/asm/irqflags.h | 100+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
March/arm64/include/asm/kvm_host.h | 16++++++++++++++++
Darch/arm64/include/asm/memblock.h | 21---------------------
March/arm64/include/asm/memory.h | 5+++--
March/arm64/include/asm/mmu.h | 1+
March/arm64/include/asm/pgtable-hwdef.h | 1+
March/arm64/include/asm/processor.h | 3+++
March/arm64/include/asm/ptdump.h | 9+++------
March/arm64/include/asm/ptrace.h | 26+++++++++++++++++++++++---
March/arm64/include/asm/thread_info.h | 1-
March/arm64/include/asm/uaccess.h | 36++++++++++++------------------------
March/arm64/include/uapi/asm/ptrace.h | 13+++++++++++++
March/arm64/kernel/alternative.c | 60+++++++++++++++++++++++++++++++++++++++++++++++++++++-------
March/arm64/kernel/asm-offsets.c | 21+--------------------
March/arm64/kernel/cpufeature.c | 41+++++++++++++++++++++++++++++++++++++++--
March/arm64/kernel/entry.S | 60+++++++++++++++++++++++++++++++++++++++++++++++-------------
March/arm64/kernel/irq.c | 3+++
March/arm64/kernel/kgdb.c | 14++++++++++----
March/arm64/kernel/perf_event.c | 2+-
March/arm64/kernel/probes/kprobes.c | 6++++++
March/arm64/kernel/process.c | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
March/arm64/kernel/ptrace.c | 147+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
March/arm64/kernel/setup.c | 1-
March/arm64/kernel/smp.c | 33+++++++++++++++++++++++++++++++++
March/arm64/kernel/traps.c | 8++++++--
March/arm64/kvm/hyp/switch.c | 16++++++++++++++++
March/arm64/mm/dump.c | 4++--
March/arm64/mm/fault.c | 9+++++----
March/arm64/mm/init.c | 20+-------------------
March/arm64/mm/mmu.c | 5-----
March/arm64/mm/proc.S | 12+-----------
March/arm64/mm/ptdump_debugfs.c | 7++-----
March/riscv/include/asm/io.h | 36++++++++++++++++++------------------
Mdrivers/firmware/efi/arm-runtime.c | 6+++---
Mdrivers/firmware/efi/runtime-wrappers.c | 17+++++++++++++++--
Mdrivers/irqchip/irq-gic-v3.c | 265+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Mdrivers/perf/xgene_pmu.c | 2--
Minclude/asm-generic/io.h | 20++++++++++----------
Minclude/linux/efi.h | 5+++--
Minclude/linux/hardirq.h | 7+++++++
Minclude/uapi/linux/elf.h | 2++
Mlib/raid6/neon.uc | 5++---
Mlib/raid6/recov_neon_inner.c | 19++++++++-----------
64 files changed, 1214 insertions(+), 368 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt @@ -1846,6 +1846,11 @@ to let secondary kernels in charge of setting up LPIs. + irqchip.gicv3_pseudo_nmi= [ARM64] + Enables support for pseudo-NMIs in the kernel. This + requires the kernel to be built with + CONFIG_ARM64_PSEUDO_NMI. + irqfixup [HW] When an interrupt is not handled search all handlers for it. Intended to get systems with badly broken diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt @@ -188,6 +188,11 @@ Before jumping into the kernel, the following conditions must be met: the kernel image will be entered must be initialised by software at a higher exception level to prevent execution in an UNKNOWN state. + - SCR_EL3.FIQ must have the same value across all CPUs the kernel is + executing on. + - The value of SCR_EL3.FIQ must be the same as the one present at boot + time whenever the kernel is executing. + For systems with a GICv3 interrupt controller to be used in v3 mode: - If EL3 is present: ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1. diff --git a/Documentation/arm64/pointer-authentication.txt b/Documentation/arm64/pointer-authentication.txt @@ -78,6 +78,11 @@ bits can vary between the two. Note that the masks apply to TTBR0 addresses, and are not valid to apply to TTBR1 addresses (e.g. kernel pointers). +Additionally, when CONFIG_CHECKPOINT_RESTORE is also set, the kernel +will expose the NT_ARM_PACA_KEYS and NT_ARM_PACG_KEYS regsets (struct +user_pac_address_keys and struct user_pac_generic_keys). These can be +used to get and set the keys for a thread. + Virtualization -------------- diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt @@ -82,3 +82,4 @@ stable kernels. | Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | | Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 | | Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 | +| Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt @@ -1,70 +0,0 @@ -* ARM Performance Monitor Units - -ARM cores often have a PMU for counting cpu and cache events like cache misses -and hits. The interface to the PMU is part of the ARM ARM. The ARM PMU -representation in the device tree should be done as under:- - -Required properties: - -- compatible : should be one of - "apm,potenza-pmu" - "arm,armv8-pmuv3" - "arm,cortex-a73-pmu" - "arm,cortex-a72-pmu" - "arm,cortex-a57-pmu" - "arm,cortex-a53-pmu" - "arm,cortex-a35-pmu" - "arm,cortex-a17-pmu" - "arm,cortex-a15-pmu" - "arm,cortex-a12-pmu" - "arm,cortex-a9-pmu" - "arm,cortex-a8-pmu" - "arm,cortex-a7-pmu" - "arm,cortex-a5-pmu" - "arm,arm11mpcore-pmu" - "arm,arm1176-pmu" - "arm,arm1136-pmu" - "brcm,vulcan-pmu" - "cavium,thunder-pmu" - "qcom,scorpion-pmu" - "qcom,scorpion-mp-pmu" - "qcom,krait-pmu" -- interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu - interrupt (PPI) then 1 interrupt should be specified. - -Optional properties: - -- interrupt-affinity : When using SPIs, specifies a list of phandles to CPU - nodes corresponding directly to the affinity of - the SPIs listed in the interrupts property. - - When using a PPI, specifies a list of phandles to CPU - nodes corresponding to the set of CPUs which have - a PMU of this type signalling the PPI listed in the - interrupts property, unless this is already specified - by the PPI interrupt specifier itself (in which case - the interrupt-affinity property shouldn't be present). - - This property should be present when there is more than - a single SPI. - - -- qcom,no-pc-write : Indicates that this PMU doesn't support the 0xc and 0xd - events. - -- secure-reg-access : Indicates that the ARMv7 Secure Debug Enable Register - (SDER) is accessible. This will cause the driver to do - any setup required that is only possible in ARMv7 secure - state. If not present the ARMv7 SDER will not be touched, - which means the PMU may fail to operate unless external - code (bootloader or security monitor) has performed the - appropriate initialisation. Note that this property is - not valid for non-ARMv7 CPUs or ARMv7 CPUs booting Linux - in Non-secure state. - -Example: - -pmu { - compatible = "arm,cortex-a9-pmu"; - interrupts = <100 101>; -}; diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/pmu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ARM Performance Monitor Units + +maintainers: + - Mark Rutland <mark.rutland@arm.com> + - Will Deacon <will.deacon@arm.com> + +description: |+ + ARM cores often have a PMU for counting cpu and cache events like cache misses + and hits. The interface to the PMU is part of the ARM ARM. The ARM PMU + representation in the device tree should be done as under:- + +properties: + compatible: + items: + - enum: + - apm,potenza-pmu + - arm,armv8-pmuv3 + - arm,cortex-a73-pmu + - arm,cortex-a72-pmu + - arm,cortex-a57-pmu + - arm,cortex-a53-pmu + - arm,cortex-a35-pmu + - arm,cortex-a17-pmu + - arm,cortex-a15-pmu + - arm,cortex-a12-pmu + - arm,cortex-a9-pmu + - arm,cortex-a8-pmu + - arm,cortex-a7-pmu + - arm,cortex-a5-pmu + - arm,arm11mpcore-pmu + - arm,arm1176-pmu + - arm,arm1136-pmu + - brcm,vulcan-pmu + - cavium,thunder-pmu + - qcom,scorpion-pmu + - qcom,scorpion-mp-pmu + - qcom,krait-pmu + + interrupts: + # Don't know how many CPUs, so no constraints to specify + description: 1 per-cpu interrupt (PPI) or 1 interrupt per core. + + interrupt-affinity: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: + When using SPIs, specifies a list of phandles to CPU + nodes corresponding directly to the affinity of + the SPIs listed in the interrupts property. + + When using a PPI, specifies a list of phandles to CPU + nodes corresponding to the set of CPUs which have + a PMU of this type signalling the PPI listed in the + interrupts property, unless this is already specified + by the PPI interrupt specifier itself (in which case + the interrupt-affinity property shouldn't be present). + + This property should be present when there is more than + a single SPI. + + qcom,no-pc-write: + type: boolean + description: + Indicates that this PMU doesn't support the 0xc and 0xd events. + + secure-reg-access: + type: boolean + description: + Indicates that the ARMv7 Secure Debug Enable Register + (SDER) is accessible. This will cause the driver to do + any setup required that is only possible in ARMv7 secure + state. If not present the ARMv7 SDER will not be touched, + which means the PMU may fail to operate unless external + code (bootloader or security monitor) has performed the + appropriate initialisation. Note that this property is + not valid for non-ARMv7 CPUs or ARMv7 CPUs booting Linux + in Non-secure state. + +required: + - compatible + +... diff --git a/MAINTAINERS b/MAINTAINERS @@ -366,6 +366,7 @@ M: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> M: Hanjun Guo <hanjun.guo@linaro.org> M: Sudeep Holla <sudeep.holla@arm.com> L: linux-acpi@vger.kernel.org +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: drivers/acpi/arm64 @@ -1195,7 +1196,7 @@ F: arch/arm*/include/asm/hw_breakpoint.h F: arch/arm*/include/asm/perf_event.h F: drivers/perf/* F: include/linux/perf/arm_pmu.h -F: Documentation/devicetree/bindings/arm/pmu.txt +F: Documentation/devicetree/bindings/arm/pmu.yaml F: Documentation/devicetree/bindings/perf/ ARM PORT diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h @@ -34,6 +34,7 @@ #define ICC_SRE __ACCESS_CP15(c12, 0, c12, 5) #define ICC_IGRPEN1 __ACCESS_CP15(c12, 0, c12, 7) #define ICC_BPR1 __ACCESS_CP15(c12, 0, c12, 3) +#define ICC_RPR __ACCESS_CP15(c12, 0, c11, 3) #define __ICC_AP0Rx(x) __ACCESS_CP15(c12, 0, c8, 4 | x) #define ICC_AP0R0 __ICC_AP0Rx(0) @@ -245,6 +246,21 @@ static inline void gic_write_bpr1(u32 val) write_sysreg(val, ICC_BPR1); } +static inline u32 gic_read_pmr(void) +{ + return read_sysreg(ICC_PMR); +} + +static inline void gic_write_pmr(u32 val) +{ + write_sysreg(val, ICC_PMR); +} + +static inline u32 gic_read_rpr(void) +{ + return read_sysreg(ICC_RPR); +} + /* * Even in 32bit systems that use LPAE, there is no guarantee that the I/O * interface provides true 64bit atomic accesses, so using strd/ldrd doesn't @@ -347,5 +363,22 @@ static inline void gits_write_vpendbaser(u64 val, void * __iomem addr) #define gits_read_vpendbaser(c) __gic_readq_nonatomic(c) +static inline bool gic_prio_masking_enabled(void) +{ + return false; +} + +static inline void gic_pmr_mask_irqs(void) +{ + /* Should not get called. */ + WARN_ON_ONCE(true); +} + +static inline void gic_arch_enable_irqs(void) +{ + /* Should not get called. */ + WARN_ON_ONCE(true); +} + #endif /* !__ASSEMBLY__ */ #endif /* !__ASM_ARCH_GICV3_H */ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig @@ -643,6 +643,25 @@ config QCOM_FALKOR_ERRATUM_E1041 If unsure, say Y. +config FUJITSU_ERRATUM_010001 + bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly" + default y + help + This option adds workaround for Fujitsu-A64FX erratum E#010001. + On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory + accesses may cause undefined fault (Data abort, DFSC=0b111111). + This fault occurs under a specific hardware condition when a + load/store instruction performs an address translation using: + case-1 TTBR0_EL1 with TCR_EL1.NFD0 == 1. + case-2 TTBR0_EL2 with TCR_EL2.NFD0 == 1. + case-3 TTBR1_EL1 with TCR_EL1.NFD1 == 1. + case-4 TTBR1_EL2 with TCR_EL2.NFD1 == 1. + + The workaround is to ensure these bits are clear in TCR_ELx. + The workaround only affect the Fujitsu-A64FX. + + If unsure, say Y. + endmenu @@ -792,8 +811,7 @@ config SCHED_SMT config NR_CPUS int "Maximum number of CPUs (2-4096)" range 2 4096 - # These have to remain sorted largest to smallest - default "64" + default "256" config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" @@ -1328,6 +1346,20 @@ config ARM64_MODULE_PLTS bool select HAVE_MOD_ARCH_SPECIFIC +config ARM64_PSEUDO_NMI + bool "Support for NMI-like interrupts" + select CONFIG_ARM_GIC_V3 + help + Adds support for mimicking Non-Maskable Interrupts through the use of + GIC interrupt priority. This support requires version 3 or later of + Arm GIC. + + This high priority configuration for interrupts needs to be + explicitly enabled by setting the kernel parameter + "irqchip.gicv3_pseudo_nmi" to 1. + + If unsure, say N + config RELOCATABLE bool help diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms @@ -151,7 +151,7 @@ config ARCH_MVEBU config ARCH_MXC bool "ARMv8 based NXP i.MX SoC family" select ARM64_ERRATUM_843419 - select ARM64_ERRATUM_845719 + select ARM64_ERRATUM_845719 if COMPAT select IMX_GPCV2 select IMX_GPCV2_PM_DOMAINS select PM diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h @@ -14,8 +14,6 @@ #include <linux/stddef.h> #include <linux/stringify.h> -extern int alternatives_applied; - struct alt_instr { s32 orig_offset; /* offset to original instruction */ s32 alt_offset; /* offset to replacement instruction */ @@ -27,7 +25,9 @@ struct alt_instr { typedef void (*alternative_cb_t)(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); +void __init apply_boot_alternatives(void); void __init apply_alternatives_all(void); +bool alternative_is_applied(u16 cpufeature); #ifdef CONFIG_MODULES void apply_alternatives_module(void *start, size_t length); diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h @@ -22,6 +22,7 @@ #ifndef __ASSEMBLY__ +#include <linux/irqchip/arm-gic-common.h> #include <linux/stringify.h> #include <asm/barrier.h> #include <asm/cacheflush.h> @@ -114,6 +115,21 @@ static inline void gic_write_bpr1(u32 val) write_sysreg_s(val, SYS_ICC_BPR1_EL1); } +static inline u32 gic_read_pmr(void) +{ + return read_sysreg_s(SYS_ICC_PMR_EL1); +} + +static inline void gic_write_pmr(u32 val) +{ + write_sysreg_s(val, SYS_ICC_PMR_EL1); +} + +static inline u32 gic_read_rpr(void) +{ + return read_sysreg_s(SYS_ICC_RPR_EL1); +} + #define gic_read_typer(c) readq_relaxed(c) #define gic_write_irouter(v, c) writeq_relaxed(v, c) #define gic_read_lpir(c) readq_relaxed(c) @@ -140,5 +156,21 @@ static inline void gic_write_bpr1(u32 val) #define gits_write_vpendbaser(v, c) writeq_relaxed(v, c) #define gits_read_vpendbaser(c) readq_relaxed(c) +static inline bool gic_prio_masking_enabled(void) +{ + return system_uses_irq_prio_masking(); +} + +static inline void gic_pmr_mask_irqs(void) +{ + BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF); + gic_write_pmr(GIC_PRIO_IRQOFF); +} + +static inline void gic_arch_enable_irqs(void) +{ + asm volatile ("msr daifclr, #2" : : : "memory"); +} + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ARCH_GICV3_H */ diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h @@ -24,7 +24,7 @@ .endm .macro __uaccess_ttbr0_enable, tmp1, tmp2 - get_thread_info \tmp1 + get_current_task \tmp1 ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 mrs \tmp2, ttbr1_el1 extr \tmp2, \tmp2, \tmp1, #48 diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h @@ -27,6 +27,7 @@ #include <asm/asm-offsets.h> #include <asm/cpufeature.h> +#include <asm/cputype.h> #include <asm/debug-monitors.h> #include <asm/page.h> #include <asm/pgtable-hwdef.h> @@ -62,16 +63,8 @@ .endm /* - * Enable and disable interrupts. + * Save/restore interrupts. */ - .macro disable_irq - msr daifset, #2 - .endm - - .macro enable_irq - msr daifclr, #2 - .endm - .macro save_and_disable_irq, flags mrs \flags, daif msr daifset, #2 @@ -536,9 +529,9 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .endm /* - * Return the current thread_info. + * Return the current task_struct. */ - .macro get_thread_info, rd + .macro get_current_task, rd mrs \rd, sp_el0 .endm @@ -604,6 +597,25 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU #endif .endm +/* + * tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU. + */ + .macro tcr_clear_errata_bits, tcr, tmp1, tmp2 +#ifdef CONFIG_FUJITSU_ERRATUM_010001 + mrs \tmp1, midr_el1 + + mov_q \tmp2, MIDR_FUJITSU_ERRATUM_010001_MASK + and \tmp1, \tmp1, \tmp2 + mov_q \tmp2, MIDR_FUJITSU_ERRATUM_010001 + cmp \tmp1, \tmp2 + b.ne 10f + + mov_q \tmp2, TCR_CLEAR_FUJITSU_ERRATUM_010001 + bic \tcr, \tcr, \tmp2 +10: +#endif /* CONFIG_FUJITSU_ERRATUM_010001 */ + .endm + /** * Errata workaround prior to disable MMU. Insert an ISB immediately prior * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0. @@ -721,7 +733,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .macro if_will_cond_yield_neon #ifdef CONFIG_PREEMPT - get_thread_info x0 + get_current_task x0 ldr x0, [x0, #TSK_TI_PREEMPT] sub x0, x0, #PREEMPT_DISABLE_OFFSET cbz x0, .Lyield_\@ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h @@ -60,7 +60,8 @@ #define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39 #define ARM64_HAS_GENERIC_AUTH_ARCH 40 #define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41 +#define ARM64_HAS_IRQ_PRIO_MASKING 42 -#define ARM64_NCAPS 42 +#define ARM64_NCAPS 43 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h @@ -391,6 +391,10 @@ extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; +/* ARM64 CAPS + alternative_cb */ +#define ARM64_NPATCHABLE (ARM64_NCAPS + 1) +extern DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); + #define for_each_available_cap(cap) \ for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS) @@ -612,6 +616,12 @@ static inline bool system_supports_generic_auth(void) cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF)); } +static inline bool system_uses_irq_prio_masking(void) +{ + return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && + cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); +} + #define ARM64_SSBD_UNKNOWN -1 #define ARM64_SSBD_FORCE_DISABLE 0 #define ARM64_SSBD_KERNEL 1 diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h @@ -76,6 +76,7 @@ #define ARM_CPU_IMP_BRCM 0x42 #define ARM_CPU_IMP_QCOM 0x51 #define ARM_CPU_IMP_NVIDIA 0x4E +#define ARM_CPU_IMP_FUJITSU 0x46 #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -104,6 +105,8 @@ #define NVIDIA_CPU_PART_DENVER 0x003 #define NVIDIA_CPU_PART_CARMEL 0x004 +#define FUJITSU_CPU_PART_A64FX 0x001 + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -122,6 +125,12 @@ #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) +#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) + +/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ +#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX +#define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_VARIANT(1)) +#define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_NFD1 | TCR_NFD0) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h @@ -18,6 +18,8 @@ #include <linux/irqflags.h> +#include <asm/cpufeature.h> + #define DAIF_PROCCTX 0 #define DAIF_PROCCTX_NOIRQ PSR_I_BIT #define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT) @@ -37,31 +39,61 @@ static inline unsigned long local_daif_save(void) { unsigned long flags; - flags = arch_local_save_flags(); + flags = read_sysreg(daif); + + if (system_uses_irq_prio_masking()) { + /* If IRQs are masked with PMR, reflect it in the flags */ + if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF) + flags |= PSR_I_BIT; + } local_daif_mask(); return flags; } -static inline void local_daif_unmask(void) -{ - trace_hardirqs_on(); - asm volatile( - "msr daifclr, #0xf // local_daif_unmask" - : - : - : "memory"); -} - static inline void local_daif_restore(unsigned long flags) { - if (!arch_irqs_disabled_flags(flags)) + bool irq_disabled = flags & PSR_I_BIT; + + if (!irq_disabled) { trace_hardirqs_on(); - arch_local_irq_restore(flags); + if (system_uses_irq_prio_masking()) + arch_local_irq_enable(); + } else if (!(flags & PSR_A_BIT)) { + /* + * If interrupts are disabled but we can take + * asynchronous errors, we can take NMIs + */ + if (system_uses_irq_prio_masking()) { + flags &= ~PSR_I_BIT; + /* + * There has been concern that the write to daif + * might be reordered before this write to PMR. + * From the ARM ARM DDI 0487D.a, section D1.7.1 + * "Accessing PSTATE fields": + * Writes to the PSTATE fields have side-effects on + * various aspects of the PE operation. All of these + * side-effects are guaranteed: + * - Not to be visible to earlier instructions in + * the execution stream. + * - To be visible to later instructions in the + * execution stream + * + * Also, writes to PMR are self-synchronizing, so no + * interrupts with a lower priority than PMR is signaled + * to the PE after the write. + * + * So we don't need additional synchronization here. + */ + arch_local_irq_disable(); + } + } + + write_sysreg(flags, daif); - if (arch_irqs_disabled_flags(flags)) + if (irq_disabled) trace_hardirqs_off(); } diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h @@ -44,6 +44,17 @@ efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) +/* + * Even when Linux uses IRQ priorities for IRQ disabling, EFI does not. + * And EFI shouldn't really play around with priority masking as it is not aware + * which priorities the OS has assigned to its interrupts. + */ +#define arch_efi_save_flags(state_flags) \ + ((void)((state_flags) = read_sysreg(daif))) + +#define arch_efi_restore_flags(state_flags) write_sysreg(state_flags, daif) + + /* arch specific definitions used by the stub code */ /* diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h @@ -17,8 +17,12 @@ #define __ASM_HARDIRQ_H #include <linux/cache.h> +#include <linux/percpu.h> #include <linux/threads.h> +#include <asm/barrier.h> #include <asm/irq.h> +#include <asm/kvm_arm.h> +#include <asm/sysreg.h> #define NR_IPI 7 @@ -37,6 +41,33 @@ u64 smp_irq_stat_cpu(unsigned int cpu); #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 +struct nmi_ctx { + u64 hcr; +}; + +DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts); + +#define arch_nmi_enter() \ + do { \ + if (is_kernel_in_hyp_mode()) { \ + struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ + nmi_ctx->hcr = read_sysreg(hcr_el2); \ + if (!(nmi_ctx->hcr & HCR_TGE)) { \ + write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \ + isb(); \ + } \ + } \ + } while (0) + +#define arch_nmi_exit() \ + do { \ + if (is_kernel_in_hyp_mode()) { \ + struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ + if (!(nmi_ctx->hcr & HCR_TGE)) \ + write_sysreg(nmi_ctx->hcr, hcr_el2); \ + } \ + } while (0) + static inline void ack_bad_irq(unsigned int irq) { extern unsigned long irq_err_count; diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h @@ -121,6 +121,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) : "memory"); \ }) +#define __io_par(v) __iormb(v) #define __iowmb() wmb() #define mmiowb() do { } while (0) diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h @@ -18,7 +18,9 @@ #ifdef __KERNEL__ +#include <asm/alternative.h> #include <asm/ptrace.h> +#include <asm/sysreg.h> /* * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and @@ -36,33 +38,27 @@ /* * CPU interrupt mask handling. */ -static inline unsigned long arch_local_irq_save(void) -{ - unsigned long flags; - asm volatile( - "mrs %0, daif // arch_local_irq_save\n" - "msr daifset, #2" - : "=r" (flags) - : - : "memory"); - return flags; -} - static inline void arch_local_irq_enable(void) { - asm volatile( - "msr daifclr, #2 // arch_local_irq_enable" - : + asm volatile(ALTERNATIVE( + "msr daifclr, #2 // arch_local_irq_enable\n" + "nop", + "msr_s " __stringify(SYS_ICC_PMR_EL1) ",%0\n" + "dsb sy", + ARM64_HAS_IRQ_PRIO_MASKING) : + : "r" ((unsigned long) GIC_PRIO_IRQON) : "memory"); } static inline void arch_local_irq_disable(void) { - asm volatile( - "msr daifset, #2 // arch_local_irq_disable" - : + asm volatile(ALTERNATIVE( + "msr daifset, #2 // arch_local_irq_disable", + "msr_s " __stringify(SYS_ICC_PMR_EL1) ", %0", + ARM64_HAS_IRQ_PRIO_MASKING) : + : "r" ((unsigned long) GIC_PRIO_IRQOFF) : "memory"); } @@ -71,12 +67,44 @@ static inline void arch_local_irq_disable(void) */ static inline unsigned long arch_local_save_flags(void) { + unsigned long daif_bits; unsigned long flags; - asm volatile( - "mrs %0, daif // arch_local_save_flags" - : "=r" (flags) - : + + daif_bits = read_sysreg(daif); + + /* + * The asm is logically equivalent to: + * + * if (system_uses_irq_prio_masking()) + * flags = (daif_bits & PSR_I_BIT) ? + * GIC_PRIO_IRQOFF : + * read_sysreg_s(SYS_ICC_PMR_EL1); + * else + * flags = daif_bits; + */ + asm volatile(ALTERNATIVE( + "mov %0, %1\n" + "nop\n" + "nop", + "mrs_s %0, " __stringify(SYS_ICC_PMR_EL1) "\n" + "ands %1, %1, " __stringify(PSR_I_BIT) "\n" + "csel %0, %0, %2, eq", + ARM64_HAS_IRQ_PRIO_MASKING) + : "=&r" (flags), "+r" (daif_bits) + : "r" ((unsigned long) GIC_PRIO_IRQOFF) : "memory"); + + return flags; +} + +static inline unsigned long arch_local_irq_save(void) +{ + unsigned long flags; + + flags = arch_local_save_flags(); + + arch_local_irq_disable(); + return flags; } @@ -85,16 +113,32 @@ static inline unsigned long arch_local_save_flags(void) */ static inline void arch_local_irq_restore(unsigned long flags) { - asm volatile( - "msr daif, %0 // arch_local_irq_restore" - : - : "r" (flags) - : "memory"); + asm volatile(ALTERNATIVE( + "msr daif, %0\n" + "nop", + "msr_s " __stringify(SYS_ICC_PMR_EL1) ", %0\n" + "dsb sy", + ARM64_HAS_IRQ_PRIO_MASKING) + : "+r" (flags) + : + : "memory"); } static inline int arch_irqs_disabled_flags(unsigned long flags) { - return flags & PSR_I_BIT; + int res; + + asm volatile(ALTERNATIVE( + "and %w0, %w1, #" __stringify(PSR_I_BIT) "\n" + "nop", + "cmp %w1, #" __stringify(GIC_PRIO_IRQOFF) "\n" + "cset %w0, ls", + ARM64_HAS_IRQ_PRIO_MASKING) + : "=&r" (res) + : "r" ((int) flags) + : "memory"); + + return res; } #endif #endif diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h @@ -24,6 +24,7 @@ #include <linux/types.h> #include <linux/kvm_types.h> +#include <asm/arch_gicv3.h> #include <asm/cpufeature.h> #include <asm/daifflags.h> #include <asm/fpsimd.h> @@ -485,10 +486,25 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) static inline void kvm_arm_vhe_guest_enter(void) { local_daif_mask(); + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + */ + if (system_uses_irq_prio_masking()) { + gic_write_pmr(GIC_PRIO_IRQON); + dsb(sy); + } } static inline void kvm_arm_vhe_guest_exit(void) { + /* + * local_daif_restore() takes care to properly restore PSTATE.DAIF + * and the GIC PMR if the host is using IRQ priorities. + */ local_daif_restore(DAIF_PROCCTX_NOIRQ); /* diff --git a/arch/arm64/include/asm/memblock.h b/arch/arm64/include/asm/memblock.h @@ -1,21 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_MEMBLOCK_H -#define __ASM_MEMBLOCK_H - -extern void arm64_memblock_init(void); - -#endif diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h @@ -312,8 +312,9 @@ static inline void *phys_to_virt(phys_addr_t x) #define page_to_virt(page) ({ \ unsigned long __addr = \ ((__page_to_voff(page)) | PAGE_OFFSET); \ - __addr = __tag_set(__addr, page_kasan_tag(page)); \ - ((void *)__addr); \ + unsigned long __addr_tag = \ + __tag_set(__addr, page_kasan_tag(page)); \ + ((void *)__addr_tag); \ }) #define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h @@ -129,6 +129,7 @@ static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) static inline void arm64_apply_bp_hardening(void) { } #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ +extern void arm64_memblock_init(void); extern void paging_init(void); extern void bootmem_init(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h @@ -302,6 +302,7 @@ #define TCR_TBI1 (UL(1) << 38) #define TCR_HA (UL(1) << 39) #define TCR_HD (UL(1) << 40) +#define TCR_NFD0 (UL(1) << 53) #define TCR_NFD1 (UL(1) << 54) /* diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h @@ -191,6 +191,9 @@ static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) memset(regs, 0, sizeof(*regs)); forget_syscall(regs); regs->pc = pc; + + if (system_uses_irq_prio_masking()) + regs->pmr_save = GIC_PRIO_IRQON; } static inline void start_thread(struct pt_regs *regs, unsigned long pc, diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h @@ -34,13 +34,10 @@ struct ptdump_info { void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info); #ifdef CONFIG_ARM64_PTDUMP_DEBUGFS -int ptdump_debugfs_register(struct ptdump_info *info, const char *name); +void ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else -static inline int ptdump_debugfs_register(struct ptdump_info *info, - const char *name) -{ - return 0; -} +static inline void ptdump_debugfs_register(struct ptdump_info *info, + const char *name) { } #endif void ptdump_check_wx(void); #endif /* CONFIG_ARM64_PTDUMP_CORE */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h @@ -19,12 +19,26 @@ #ifndef __ASM_PTRACE_H #define __ASM_PTRACE_H +#include <asm/cpufeature.h> + #include <uapi/asm/ptrace.h> /* Current Exception Level values, as contained in CurrentEL */ #define CurrentEL_EL1 (1 << 2) #define CurrentEL_EL2 (2 << 2) +/* + * PMR values used to mask/unmask interrupts. + * + * GIC priority masking works as follows: if an IRQ's priority is a higher value + * than the value held in PMR, that IRQ is masked. Lowering the value of PMR + * means masking more IRQs (or at least that the same IRQs remain masked). + * + * To mask interrupts, we clear the most significant bit of PMR. + */ +#define GIC_PRIO_IRQON 0xf0 +#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) + /* Additional SPSR bits not exposed in the UABI */ #define PSR_IL_BIT (1 << 20) @@ -167,7 +181,8 @@ struct pt_regs { #endif u64 orig_addr_limit; - u64 unused; // maintain 16 byte alignment + /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */ + u64 pmr_save; u64 stackframe[2]; }; @@ -202,8 +217,13 @@ static inline void forget_syscall(struct pt_regs *regs) #define processor_mode(regs) \ ((regs)->pstate & PSR_MODE_MASK) -#define interrupts_enabled(regs) \ - (!((regs)->pstate & PSR_I_BIT)) +#define irqs_priority_unmasked(regs) \ + (system_uses_irq_prio_masking() ? \ + (regs)->pmr_save == GIC_PRIO_IRQON : \ + true) + +#define interrupts_enabled(regs) \ + (!((regs)->pstate & PSR_I_BIT) && irqs_priority_unmasked(regs)) #define fast_interrupts_enabled(regs) \ (!((regs)->pstate & PSR_F_BIT)) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h @@ -79,7 +79,6 @@ void arch_release_task_struct(struct task_struct *tsk); * TIF_SIGPENDING - signal pending * TIF_NEED_RESCHED - rescheduling necessary * TIF_NOTIFY_RESUME - callback before returning to user - * TIF_USEDFPU - FPU was used by this task this quantum (SMP) */ #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h @@ -267,7 +267,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) : "+r" (err), "=&r" (x) \ : "r" (addr), "i" (-EFAULT)) -#define __get_user_err(x, ptr, err) \ +#define __raw_get_user(x, ptr, err) \ do { \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ @@ -296,28 +296,22 @@ do { \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) -#define __get_user_check(x, ptr, err) \ -({ \ +#define __get_user_error(x, ptr, err) \ +do { \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __get_user_err((x), __p, (err)); \ + __raw_get_user((x), __p, (err)); \ } else { \ (x) = 0; (err) = -EFAULT; \ } \ -}) - -#define __get_user_error(x, ptr, err) \ -({ \ - __get_user_check((x), (ptr), (err)); \ - (void)0; \ -}) +} while (0) #define __get_user(x, ptr) \ ({ \ int __gu_err = 0; \ - __get_user_check((x), (ptr), __gu_err); \ + __get_user_error((x), (ptr), __gu_err); \ __gu_err; \ }) @@ -337,7 +331,7 @@ do { \ : "+r" (err) \ : "r" (x), "r" (addr), "i" (-EFAULT)) -#define __put_user_err(x, ptr, err) \ +#define __raw_put_user(x, ptr, err) \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ @@ -365,28 +359,22 @@ do { \ uaccess_disable_not_uao(); \ } while (0) -#define __put_user_check(x, ptr, err) \ -({ \ +#define __put_user_error(x, ptr, err) \ +do { \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __put_user_err((x), __p, (err)); \ + __raw_put_user((x), __p, (err)); \ } else { \ (err) = -EFAULT; \ } \ -}) - -#define __put_user_error(x, ptr, err) \ -({ \ - __put_user_check((x), (ptr), (err)); \ - (void)0; \ -}) +} while (0) #define __put_user(x, ptr) \ ({ \ int __pu_err = 0; \ - __put_user_check((x), (ptr), __pu_err); \ + __put_user_error((x), (ptr), __pu_err); \ __pu_err; \ }) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h @@ -233,6 +233,19 @@ struct user_pac_mask { __u64 insn_mask; }; +/* pointer authentication keys (NT_ARM_PACA_KEYS, NT_ARM_PACG_KEYS) */ + +struct user_pac_address_keys { + __uint128_t apiakey; + __uint128_t apibkey; + __uint128_t apdakey; + __uint128_t apdbkey; +}; + +struct user_pac_generic_keys { + __uint128_t apgakey; +}; + #endif /* __ASSEMBLY__ */ #endif /* _UAPI__ASM_PTRACE_H */ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c @@ -32,13 +32,23 @@ #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) -int alternatives_applied; +static int all_alternatives_applied; + +static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS); struct alt_region { struct alt_instr *begin; struct alt_instr *end; }; +bool alternative_is_applied(u16 cpufeature) +{ + if (WARN_ON(cpufeature >= ARM64_NCAPS)) + return false; + + return test_bit(cpufeature, applied_alternatives); +} + /* * Check if the target PC is within an alternative block. */ @@ -145,7 +155,8 @@ static void clean_dcache_range_nopatch(u64 start, u64 end) } while (cur += d_size, cur < end); } -static void __apply_alternatives(void *alt_region, bool is_module) +static void __apply_alternatives(void *alt_region, bool is_module, + unsigned long *feature_mask) { struct alt_instr *alt; struct alt_region *region = alt_region; @@ -155,6 +166,9 @@ static void __apply_alternatives(void *alt_region, bool is_module) for (alt = region->begin; alt < region->end; alt++) { int nr_inst; + if (!test_bit(alt->cpufeature, feature_mask)) + continue; + /* Use ARM64_CB_PATCH as an unconditional patch */ if (alt->cpufeature < ARM64_CB_PATCH && !cpus_have_cap(alt->cpufeature)) @@ -192,6 +206,12 @@ static void __apply_alternatives(void *alt_region, bool is_module) dsb(ish); __flush_icache_all(); isb(); + + /* Ignore ARM64_CB bit from feature mask */ + bitmap_or(applied_alternatives, applied_alternatives, + feature_mask, ARM64_NCAPS); + bitmap_and(applied_alternatives, applied_alternatives, + cpu_hwcaps, ARM64_NCAPS); } } @@ -208,14 +228,19 @@ static int __apply_alternatives_multi_stop(void *unused) /* We always have a CPU 0 at this point (__init) */ if (smp_processor_id()) { - while (!READ_ONCE(alternatives_applied)) + while (!READ_ONCE(all_alternatives_applied)) cpu_relax(); isb(); } else { - BUG_ON(alternatives_applied); - __apply_alternatives(&region, false); + DECLARE_BITMAP(remaining_capabilities, ARM64_NPATCHABLE); + + bitmap_complement(remaining_capabilities, boot_capabilities, + ARM64_NPATCHABLE); + + BUG_ON(all_alternatives_applied); + __apply_alternatives(&region, false, remaining_capabilities); /* Barriers provided by the cache flushing */ - WRITE_ONCE(alternatives_applied, 1); + WRITE_ONCE(all_alternatives_applied, 1); } return 0; @@ -227,6 +252,24 @@ void __init apply_alternatives_all(void) stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask); } +/* + * This is called very early in the boot process (directly after we run + * a feature detect on the boot CPU). No need to worry about other CPUs + * here. + */ +void __init apply_boot_alternatives(void) +{ + struct alt_region region = { + .begin = (struct alt_instr *)__alt_instructions, + .end = (struct alt_instr *)__alt_instructions_end, + }; + + /* If called on non-boot cpu things could go wrong */ + WARN_ON(smp_processor_id() != 0); + + __apply_alternatives(&region, false, &boot_capabilities[0]); +} + #ifdef CONFIG_MODULES void apply_alternatives_module(void *start, size_t length) { @@ -234,7 +277,10 @@ void apply_alternatives_module(void *start, size_t length) .begin = start, .end = start + length, }; + DECLARE_BITMAP(all_capabilities, ARM64_NPATCHABLE); + + bitmap_fill(all_capabilities, ARM64_NPATCHABLE); - __apply_alternatives(&region, true); + __apply_alternatives(&region, true, &all_capabilities[0]); } #endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c @@ -53,13 +53,9 @@ int main(void) DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); BLANK(); DEFINE(S_X0, offsetof(struct pt_regs, regs[0])); - DEFINE(S_X1, offsetof(struct pt_regs, regs[1])); DEFINE(S_X2, offsetof(struct pt_regs, regs[2])); - DEFINE(S_X3, offsetof(struct pt_regs, regs[3])); DEFINE(S_X4, offsetof(struct pt_regs, regs[4])); - DEFINE(S_X5, offsetof(struct pt_regs, regs[5])); DEFINE(S_X6, offsetof(struct pt_regs, regs[6])); - DEFINE(S_X7, offsetof(struct pt_regs, regs[7])); DEFINE(S_X8, offsetof(struct pt_regs, regs[8])); DEFINE(S_X10, offsetof(struct pt_regs, regs[10])); DEFINE(S_X12, offsetof(struct pt_regs, regs[12])); @@ -73,14 +69,11 @@ int main(void) DEFINE(S_X28, offsetof(struct pt_regs, regs[28])); DEFINE(S_LR, offsetof(struct pt_regs, regs[30])); DEFINE(S_SP, offsetof(struct pt_regs, sp)); -#ifdef CONFIG_COMPAT - DEFINE(S_COMPAT_SP, offsetof(struct pt_regs, compat_sp)); -#endif DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate)); DEFINE(S_PC, offsetof(struct pt_regs, pc)); - DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); + DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); @@ -93,7 +86,6 @@ int main(void) BLANK(); DEFINE(PAGE_SZ, PAGE_SIZE); BLANK(); - DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); BLANK(); @@ -110,25 +102,18 @@ int main(void) BLANK(); DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); - DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec)); DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); - DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); - DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec)); DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); - DEFINE(VDSO_CS_RAW_MULT, offsetof(struct vdso_data, cs_raw_mult)); DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); - DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall)); BLANK(); DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); - DEFINE(TVAL_TV_USEC, offsetof(struct timeval, tv_usec)); DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); - DEFINE(TSPEC_TV_NSEC, offsetof(struct timespec, tv_nsec)); BLANK(); DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); @@ -142,13 +127,9 @@ int main(void) DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); - DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); - DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); - DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); #endif #ifdef CONFIG_CPU_PM - DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c @@ -54,6 +54,9 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcaps); static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM64_NCAPS]; +/* Need also bit for ARM64_CB_PATCH */ +DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); + /* * Flag to indicate if we have computed the system wide * capabilities based on the boot time active CPUs. This @@ -1118,7 +1121,7 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to * do anything here. */ - if (!alternatives_applied) + if (!alternative_is_applied(ARM64_HAS_VIRT_HOST_EXTN)) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } #endif @@ -1203,11 +1206,27 @@ static void cpu_enable_address_auth(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_PTR_AUTH */ +#ifdef CONFIG_ARM64_PSEUDO_NMI +static bool enable_pseudo_nmi; + +static int __init early_enable_pseudo_nmi(char *p) +{ + return strtobool(p, &enable_pseudo_nmi); +} +early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi); + +static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, + int scope) +{ + return enable_pseudo_nmi && has_useable_gicv3_cpuif(entry, scope); +} +#endif + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, @@ -1480,6 +1499,21 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, }, #endif /* CONFIG_ARM64_PTR_AUTH */ +#ifdef CONFIG_ARM64_PSEUDO_NMI + { + /* + * Depends on having GICv3 + */ + .desc = "IRQ priority masking", + .capability = ARM64_HAS_IRQ_PRIO_MASKING, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, + .matches = can_use_gic_priorities, + .sys_reg = SYS_ID_AA64PFR0_EL1, + .field_pos = ID_AA64PFR0_GIC_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = 1, + }, +#endif {}, }; @@ -1654,6 +1688,9 @@ static void update_cpu_capabilities(u16 scope_mask) if (caps->desc) pr_info("detected: %s\n", caps->desc); cpus_set_cap(caps->capability); + + if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU)) + set_bit(caps->capability, boot_capabilities); } } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S @@ -185,7 +185,7 @@ alternative_cb_end .else add x21, sp, #S_FRAME_SIZE - get_thread_info tsk + get_current_task tsk /* Save the task's original addr_limit and set USER_DS */ ldr x20, [tsk, #TSK_TI_ADDR_LIMIT] str x20, [sp, #S_ORIG_ADDR_LIMIT] @@ -249,6 +249,12 @@ alternative_else_nop_endif msr sp_el0, tsk .endif + /* Save pmr */ +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + mrs_s x20, SYS_ICC_PMR_EL1 + str x20, [sp, #S_PMR_SAVE] +alternative_else_nop_endif + /* * Registers that may be useful after this macro is invoked: * @@ -269,6 +275,14 @@ alternative_else_nop_endif /* No need to restore UAO, it will be restored from SPSR_EL1 */ .endif + /* Restore pmr */ +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + ldr x20, [sp, #S_PMR_SAVE] + msr_s SYS_ICC_PMR_EL1, x20 + /* Ensure priority change is seen by redistributor */ + dsb sy +alternative_else_nop_endif + ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ct_user_enter @@ -603,32 +617,52 @@ el1_irq: kernel_entry 1 enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_ARM64_PSEUDO_NMI +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + ldr x20, [sp, #S_PMR_SAVE] +alternative_else + mov x20, #GIC_PRIO_IRQON +alternative_endif + cmp x20, #GIC_PRIO_IRQOFF + /* Irqs were disabled, don't trace */ + b.ls 1f +#endif bl trace_hardirqs_off +1: #endif irq_handler #ifdef CONFIG_PREEMPT ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count - cbnz x24, 1f // preempt count != 0 - bl el1_preempt +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + /* + * DA_F were cleared at start of handling. If anything is set in DAIF, + * we come back from an NMI, so skip preemption + */ + mrs x0, daif + orr x24, x24, x0 +alternative_else_nop_endif + cbnz x24, 1f // preempt count != 0 || NMI return path + bl preempt_schedule_irq // irq en/disable is done inside 1: #endif #ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_ARM64_PSEUDO_NMI + /* + * if IRQs were disabled when we received the interrupt, we have an NMI + * and we are not re-enabling interrupt upon eret. Skip tracing. + */ + cmp x20, #GIC_PRIO_IRQOFF + b.ls 1f +#endif bl trace_hardirqs_on +1: #endif + kernel_exit 1 ENDPROC(el1_irq) -#ifdef CONFIG_PREEMPT -el1_preempt: - mov x24, lr -1: bl preempt_schedule_irq // irq en/disable is done inside - ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS - tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? - ret x24 -#endif - /* * EL0 mode handlers. */ @@ -1070,7 +1104,7 @@ ENTRY(ret_from_fork) cbz x19, 1f // not a kernel thread mov x0, x20 blr x19 -1: get_thread_info tsk +1: get_current_task tsk b ret_to_user ENDPROC(ret_from_fork) NOKPROBE(ret_from_fork) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c @@ -33,6 +33,9 @@ unsigned long irq_err_count; +/* Only access this in an NMI enter/exit */ +DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts); + DEFINE_PER_CPU(unsigned long *, irq_stack_ptr); int arch_show_interrupts(struct seq_file *p, int prec) diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c @@ -244,27 +244,33 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) { + if (user_mode(regs)) + return DBG_HOOK_ERROR; + kgdb_handle_exception(1, SIGTRAP, 0, regs); - return 0; + return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_brk_fn) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) { + if (user_mode(regs)) + return DBG_HOOK_ERROR; + compiled_break = 1; kgdb_handle_exception(1, SIGTRAP, 0, regs); - return 0; + return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) { - if (!kgdb_single_step) + if (user_mode(regs) || !kgdb_single_step) return DBG_HOOK_ERROR; kgdb_handle_exception(1, SIGTRAP, 0, regs); - return 0; + return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_step_brk_fn); diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c @@ -810,7 +810,7 @@ static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, } /* - * Add an event filter to a given event. This will only work for PMUv2 PMUs. + * Add an event filter to a given event. */ static int armv8pmu_set_event_filter(struct hw_perf_event *event, struct perf_event_attr *attr) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c @@ -450,6 +450,9 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); int retval; + if (user_mode(regs)) + return DBG_HOOK_ERROR; + /* return error if this is not our step */ retval = kprobe_ss_hit(kcb, instruction_pointer(regs)); @@ -466,6 +469,9 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) int __kprobes kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) { + if (user_mode(regs)) + return DBG_HOOK_ERROR; + kprobe_handler(regs); return DBG_HOOK_HANDLED; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c @@ -51,6 +51,7 @@ #include <linux/thread_info.h> #include <asm/alternative.h> +#include <asm/arch_gicv3.h> #include <asm/compat.h> #include <asm/cacheflush.h> #include <asm/exec.h> @@ -74,6 +75,50 @@ EXPORT_SYMBOL_GPL(pm_power_off); void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); +static void __cpu_do_idle(void) +{ + dsb(sy); + wfi(); +} + +static void __cpu_do_idle_irqprio(void) +{ + unsigned long pmr; + unsigned long daif_bits; + + daif_bits = read_sysreg(daif); + write_sysreg(daif_bits | PSR_I_BIT, daif); + + /* + * Unmask PMR before going idle to make sure interrupts can + * be raised. + */ + pmr = gic_read_pmr(); + gic_write_pmr(GIC_PRIO_IRQON); + + __cpu_do_idle(); + + gic_write_pmr(pmr); + write_sysreg(daif_bits, daif); +} + +/* + * cpu_do_idle() + * + * Idle the processor (wait for interrupt). + * + * If the CPU supports priority masking we must do additional work to + * ensure that interrupts are not masked at the PMR (because the core will + * not wake up if we block the wake up signal in the interrupt controller). + */ +void cpu_do_idle(void) +{ + if (system_uses_irq_prio_masking()) + __cpu_do_idle_irqprio(); + else + __cpu_do_idle(); +} + /* * This is our default idle handler. */ @@ -232,6 +277,9 @@ void __show_regs(struct pt_regs *regs) printk("sp : %016llx\n", sp); + if (system_uses_irq_prio_masking()) + printk("pmr_save: %08llx\n", regs->pmr_save); + i = top_reg; while (i >= 0) { @@ -363,6 +411,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) childregs->pstate |= PSR_SSBS_BIT; + if (system_uses_irq_prio_masking()) + childregs->pmr_save = GIC_PRIO_IRQON; + p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c @@ -979,6 +979,131 @@ static int pac_mask_get(struct task_struct *target, return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &uregs, 0, -1); } + +#ifdef CONFIG_CHECKPOINT_RESTORE +static __uint128_t pac_key_to_user(const struct ptrauth_key *key) +{ + return (__uint128_t)key->hi << 64 | key->lo; +} + +static struct ptrauth_key pac_key_from_user(__uint128_t ukey) +{ + struct ptrauth_key key = { + .lo = (unsigned long)ukey, + .hi = (unsigned long)(ukey >> 64), + }; + + return key; +} + +static void pac_address_keys_to_user(struct user_pac_address_keys *ukeys, + const struct ptrauth_keys *keys) +{ + ukeys->apiakey = pac_key_to_user(&keys->apia); + ukeys->apibkey = pac_key_to_user(&keys->apib); + ukeys->apdakey = pac_key_to_user(&keys->apda); + ukeys->apdbkey = pac_key_to_user(&keys->apdb); +} + +static void pac_address_keys_from_user(struct ptrauth_keys *keys, + const struct user_pac_address_keys *ukeys) +{ + keys->apia = pac_key_from_user(ukeys->apiakey); + keys->apib = pac_key_from_user(ukeys->apibkey); + keys->apda = pac_key_from_user(ukeys->apdakey); + keys->apdb = pac_key_from_user(ukeys->apdbkey); +} + +static int pac_address_keys_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct ptrauth_keys *keys = &target->thread.keys_user; + struct user_pac_address_keys user_keys; + + if (!system_supports_address_auth()) + return -EINVAL; + + pac_address_keys_to_user(&user_keys, keys); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &user_keys, 0, -1); +} + +static int pac_address_keys_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct ptrauth_keys *keys = &target->thread.keys_user; + struct user_pac_address_keys user_keys; + int ret; + + if (!system_supports_address_auth()) + return -EINVAL; + + pac_address_keys_to_user(&user_keys, keys); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &user_keys, 0, -1); + if (ret) + return ret; + pac_address_keys_from_user(keys, &user_keys); + + return 0; +} + +static void pac_generic_keys_to_user(struct user_pac_generic_keys *ukeys, + const struct ptrauth_keys *keys) +{ + ukeys->apgakey = pac_key_to_user(&keys->apga); +} + +static void pac_generic_keys_from_user(struct ptrauth_keys *keys, + const struct user_pac_generic_keys *ukeys) +{ + keys->apga = pac_key_from_user(ukeys->apgakey); +} + +static int pac_generic_keys_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct ptrauth_keys *keys = &target->thread.keys_user; + struct user_pac_generic_keys user_keys; + + if (!system_supports_generic_auth()) + return -EINVAL; + + pac_generic_keys_to_user(&user_keys, keys); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &user_keys, 0, -1); +} + +static int pac_generic_keys_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct ptrauth_keys *keys = &target->thread.keys_user; + struct user_pac_generic_keys user_keys; + int ret; + + if (!system_supports_generic_auth()) + return -EINVAL; + + pac_generic_keys_to_user(&user_keys, keys); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &user_keys, 0, -1); + if (ret) + return ret; + pac_generic_keys_from_user(keys, &user_keys); + + return 0; +} +#endif /* CONFIG_CHECKPOINT_RESTORE */ #endif /* CONFIG_ARM64_PTR_AUTH */ enum aarch64_regset { @@ -995,6 +1120,10 @@ enum aarch64_regset { #endif #ifdef CONFIG_ARM64_PTR_AUTH REGSET_PAC_MASK, +#ifdef CONFIG_CHECKPOINT_RESTORE + REGSET_PACA_KEYS, + REGSET_PACG_KEYS, +#endif #endif }; @@ -1074,6 +1203,24 @@ static const struct user_regset aarch64_regsets[] = { .get = pac_mask_get, /* this cannot be set dynamically */ }, +#ifdef CONFIG_CHECKPOINT_RESTORE + [REGSET_PACA_KEYS] = { + .core_note_type = NT_ARM_PACA_KEYS, + .n = sizeof(struct user_pac_address_keys) / sizeof(__uint128_t), + .size = sizeof(__uint128_t), + .align = sizeof(__uint128_t), + .get = pac_address_keys_get, + .set = pac_address_keys_set, + }, + [REGSET_PACG_KEYS] = { + .core_note_type = NT_ARM_PACG_KEYS, + .n = sizeof(struct user_pac_generic_keys) / sizeof(__uint128_t), + .size = sizeof(__uint128_t), + .align = sizeof(__uint128_t), + .get = pac_generic_keys_get, + .set = pac_generic_keys_set, + }, +#endif #endif }; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c @@ -58,7 +58,6 @@ #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/traps.h> -#include <asm/memblock.h> #include <asm/efi.h> #include <asm/xen/hypervisor.h> #include <asm/mmu_context.h> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c @@ -35,6 +35,7 @@ #include <linux/smp.h> #include <linux/seq_file.h> #include <linux/irq.h> +#include <linux/irqchip/arm-gic-v3.h> #include <linux/percpu.h> #include <linux/clockchips.h> #include <linux/completion.h> @@ -180,6 +181,24 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) return ret; } +static void init_gic_priority_masking(void) +{ + u32 cpuflags; + + if (WARN_ON(!gic_enable_sre())) + return; + + cpuflags = read_sysreg(daif); + + WARN_ON(!(cpuflags & PSR_I_BIT)); + + gic_write_pmr(GIC_PRIO_IRQOFF); + + /* We can only unmask PSR.I if we can take aborts */ + if (!(cpuflags & PSR_A_BIT)) + write_sysreg(cpuflags & ~PSR_I_BIT, daif); +} + /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -206,6 +225,9 @@ asmlinkage notrace void secondary_start_kernel(void) */ cpu_uninstall_idmap(); + if (system_uses_irq_prio_masking()) + init_gic_priority_masking(); + preempt_disable(); trace_hardirqs_off(); @@ -419,6 +441,17 @@ void __init smp_prepare_boot_cpu(void) */ jump_label_init(); cpuinfo_store_boot_cpu(); + + /* + * We now know enough about the boot CPU to apply the + * alternatives that cannot wait until interrupt handling + * and/or scheduling is enabled. + */ + apply_boot_alternatives(); + + /* Conditionally switch to GIC PMR for interrupt masking */ + if (system_uses_irq_prio_masking()) + init_gic_priority_masking(); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c @@ -898,13 +898,17 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) { - nmi_enter(); + const bool was_in_nmi = in_nmi(); + + if (!was_in_nmi) + nmi_enter(); /* non-RAS errors are not containable */ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) arm64_serror_panic(regs, esr); - nmi_exit(); + if (!was_in_nmi) + nmi_exit(); } void __pte_error(const char *file, int line, unsigned long val) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c @@ -22,6 +22,7 @@ #include <kvm/arm_psci.h> +#include <asm/arch_gicv3.h> #include <asm/cpufeature.h> #include <asm/kprobes.h> #include <asm/kvm_asm.h> @@ -525,6 +526,17 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt; u64 exit_code; + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + */ + if (system_uses_irq_prio_masking()) { + gic_write_pmr(GIC_PRIO_IRQON); + dsb(sy); + } + vcpu = kern_hyp_va(vcpu); host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); @@ -577,6 +589,10 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) */ __debug_switch_to_host(vcpu); + /* Returning to host will clear PSR.I, remask PMR if needed */ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQOFF); + return exit_code; } diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c @@ -406,7 +406,7 @@ void ptdump_check_wx(void) static int ptdump_init(void) { ptdump_initialize(); - return ptdump_debugfs_register(&kernel_ptdump_info, - "kernel_page_tables"); + ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables"); + return 0; } device_initcall(ptdump_init); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c @@ -810,11 +810,12 @@ void __init hook_debug_fault_code(int nr, debug_fault_info[nr].name = name; } -asmlinkage int __exception do_debug_exception(unsigned long addr, +asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, struct pt_regs *regs) { const struct fault_info *inf = esr_to_debug_fault_info(esr); + unsigned long pc = instruction_pointer(regs); int rv; /* @@ -824,14 +825,14 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, if (interrupts_enabled(regs)) trace_hardirqs_off(); - if (user_mode(regs) && !is_ttbr0_addr(instruction_pointer(regs))) + if (user_mode(regs) && !is_ttbr0_addr(pc)) arm64_apply_bp_hardening(); - if (!inf->fn(addr, esr, regs)) { + if (!inf->fn(addr_if_watchpoint, esr, regs)) { rv = 1; } else { arm64_notify_die(inf->name, regs, - inf->sig, inf->code, (void __user *)addr, esr); + inf->sig, inf->code, (void __user *)pc, esr); rv = 0; } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c @@ -260,24 +260,6 @@ int pfn_valid(unsigned long pfn) } EXPORT_SYMBOL(pfn_valid); -#ifndef CONFIG_SPARSEMEM -static void __init arm64_memory_present(void) -{ -} -#else -static void __init arm64_memory_present(void) -{ - struct memblock_region *reg; - - for_each_memblock(memory, reg) { - int nid = memblock_get_region_node(reg); - - memory_present(nid, memblock_region_memory_base_pfn(reg), - memblock_region_memory_end_pfn(reg)); - } -} -#endif - static phys_addr_t memory_limit = PHYS_ADDR_MAX; /* @@ -464,7 +446,7 @@ void __init bootmem_init(void) * Sparsemem tries to allocate bootmem in memory_present(), so must be * done after the fixed reservations. */ - arm64_memory_present(); + memblocks_present(); sparse_init(); zone_sizes_init(min, max); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c @@ -42,7 +42,6 @@ #include <asm/setup.h> #include <asm/sizes.h> #include <asm/tlb.h> -#include <asm/memblock.h> #include <asm/mmu_context.h> #include <asm/ptdump.h> #include <asm/tlbflush.h> @@ -655,10 +654,6 @@ static void __init map_kernel(pgd_t *pgdp) kasan_copy_shadow(pgdp); } -/* - * paging_init() sets up the page tables, initialises the zone memory - * maps and sets up the zero page. - */ void __init paging_init(void) { pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S @@ -55,17 +55,6 @@ #define MAIR(attr, mt) ((attr) << ((mt) * 8)) -/* - * cpu_do_idle() - * - * Idle the processor (wait for interrupt). - */ -ENTRY(cpu_do_idle) - dsb sy // WFI may enter a low-power mode - wfi - ret -ENDPROC(cpu_do_idle) - #ifdef CONFIG_CPU_PM /** * cpu_do_suspend - save CPU registers context @@ -456,6 +445,7 @@ ENTRY(__cpu_setup) ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ TCR_TBI0 | TCR_A1 | TCR_KASAN_FLAGS + tcr_clear_errata_bits x10, x9, x5 #ifdef CONFIG_ARM64_USER_VA_BITS_52 ldr_l x9, vabits_user diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c @@ -12,10 +12,7 @@ static int ptdump_show(struct seq_file *m, void *v) } DEFINE_SHOW_ATTRIBUTE(ptdump); -int ptdump_debugfs_register(struct ptdump_info *info, const char *name) +void ptdump_debugfs_register(struct ptdump_info *info, const char *name) { - struct dentry *pe; - pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); - return pe ? 0 : -ENOMEM; - + debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); } diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h @@ -163,20 +163,20 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * doesn't define any ordering between the memory space and the I/O space. */ #define __io_br() do {} while (0) -#define __io_ar() __asm__ __volatile__ ("fence i,r" : : : "memory"); +#define __io_ar(v) __asm__ __volatile__ ("fence i,r" : : : "memory"); #define __io_bw() __asm__ __volatile__ ("fence w,o" : : : "memory"); #define __io_aw() do {} while (0) -#define readb(c) ({ u8 __v; __io_br(); __v = readb_cpu(c); __io_ar(); __v; }) -#define readw(c) ({ u16 __v; __io_br(); __v = readw_cpu(c); __io_ar(); __v; }) -#define readl(c) ({ u32 __v; __io_br(); __v = readl_cpu(c); __io_ar(); __v; }) +#define readb(c) ({ u8 __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; }) +#define readw(c) ({ u16 __v; __io_br(); __v = readw_cpu(c); __io_ar(__v); __v; }) +#define readl(c) ({ u32 __v; __io_br(); __v = readl_cpu(c); __io_ar(__v); __v; }) #define writeb(v,c) ({ __io_bw(); writeb_cpu((v),(c)); __io_aw(); }) #define writew(v,c) ({ __io_bw(); writew_cpu((v),(c)); __io_aw(); }) #define writel(v,c) ({ __io_bw(); writel_cpu((v),(c)); __io_aw(); }) #ifdef CONFIG_64BIT -#define readq(c) ({ u64 __v; __io_br(); __v = readq_cpu(c); __io_ar(); __v; }) +#define readq(c) ({ u64 __v; __io_br(); __v = readq_cpu(c); __io_ar(__v); __v; }) #define writeq(v,c) ({ __io_bw(); writeq_cpu((v),(c)); __io_aw(); }) #endif @@ -198,20 +198,20 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * writes. */ #define __io_pbr() __asm__ __volatile__ ("fence io,i" : : : "memory"); -#define __io_par() __asm__ __volatile__ ("fence i,ior" : : : "memory"); +#define __io_par(v) __asm__ __volatile__ ("fence i,ior" : : : "memory"); #define __io_pbw() __asm__ __volatile__ ("fence iow,o" : : : "memory"); #define __io_paw() __asm__ __volatile__ ("fence o,io" : : : "memory"); -#define inb(c) ({ u8 __v; __io_pbr(); __v = readb_cpu((void*)(PCI_IOBASE + (c))); __io_par(); __v; }) -#define inw(c) ({ u16 __v; __io_pbr(); __v = readw_cpu((void*)(PCI_IOBASE + (c))); __io_par(); __v; }) -#define inl(c) ({ u32 __v; __io_pbr(); __v = readl_cpu((void*)(PCI_IOBASE + (c))); __io_par(); __v; }) +#define inb(c) ({ u8 __v; __io_pbr(); __v = readb_cpu((void*)(PCI_IOBASE + (c))); __io_par(__v); __v; }) +#define inw(c) ({ u16 __v; __io_pbr(); __v = readw_cpu((void*)(PCI_IOBASE + (c))); __io_par(__v); __v; }) +#define inl(c) ({ u32 __v; __io_pbr(); __v = readl_cpu((void*)(PCI_IOBASE + (c))); __io_par(__v); __v; }) #define outb(v,c) ({ __io_pbw(); writeb_cpu((v),(void*)(PCI_IOBASE + (c))); __io_paw(); }) #define outw(v,c) ({ __io_pbw(); writew_cpu((v),(void*)(PCI_IOBASE + (c))); __io_paw(); }) #define outl(v,c) ({ __io_pbw(); writel_cpu((v),(void*)(PCI_IOBASE + (c))); __io_paw(); }) #ifdef CONFIG_64BIT -#define inq(c) ({ u64 __v; __io_pbr(); __v = readq_cpu((void*)(c)); __io_par(); __v; }) +#define inq(c) ({ u64 __v; __io_pbr(); __v = readq_cpu((void*)(c)); __io_par(__v); __v; }) #define outq(v,c) ({ __io_pbw(); writeq_cpu((v),(void*)(c)); __io_paw(); }) #endif @@ -254,16 +254,16 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) afence; \ } -__io_reads_ins(reads, u8, b, __io_br(), __io_ar()) -__io_reads_ins(reads, u16, w, __io_br(), __io_ar()) -__io_reads_ins(reads, u32, l, __io_br(), __io_ar()) +__io_reads_ins(reads, u8, b, __io_br(), __io_ar(addr)) +__io_reads_ins(reads, u16, w, __io_br(), __io_ar(addr)) +__io_reads_ins(reads, u32, l, __io_br(), __io_ar(addr)) #define readsb(addr, buffer, count) __readsb(addr, buffer, count) #define readsw(addr, buffer, count) __readsw(addr, buffer, count) #define readsl(addr, buffer, count) __readsl(addr, buffer, count) -__io_reads_ins(ins, u8, b, __io_pbr(), __io_par()) -__io_reads_ins(ins, u16, w, __io_pbr(), __io_par()) -__io_reads_ins(ins, u32, l, __io_pbr(), __io_par()) +__io_reads_ins(ins, u8, b, __io_pbr(), __io_par(addr)) +__io_reads_ins(ins, u16, w, __io_pbr(), __io_par(addr)) +__io_reads_ins(ins, u32, l, __io_pbr(), __io_par(addr)) #define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count) #define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count) #define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count) @@ -283,10 +283,10 @@ __io_writes_outs(outs, u32, l, __io_pbw(), __io_paw()) #define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count) #ifdef CONFIG_64BIT -__io_reads_ins(reads, u64, q, __io_br(), __io_ar()) +__io_reads_ins(reads, u64, q, __io_br(), __io_ar(addr)) #define readsq(addr, buffer, count) __readsq(addr, buffer, count) -__io_reads_ins(ins, u64, q, __io_pbr(), __io_par()) +__io_reads_ins(ins, u64, q, __io_pbr(), __io_par(addr)) #define insq(addr, buffer, count) __insq((void __iomem *)addr, buffer, count) __io_writes_outs(writes, u64, q, __io_bw(), __io_aw()) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c @@ -42,10 +42,10 @@ static struct ptdump_info efi_ptdump_info = { static int __init ptdump_init(void) { - if (!efi_enabled(EFI_RUNTIME_SERVICES)) - return 0; + if (efi_enabled(EFI_RUNTIME_SERVICES)) + ptdump_debugfs_register(&efi_ptdump_info, "efi_page_tables"); - return ptdump_debugfs_register(&efi_ptdump_info, "efi_page_tables"); + return 0; } device_initcall(ptdump_init); diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c @@ -89,11 +89,24 @@ exit: \ efi_rts_work.status; \ }) +#ifndef arch_efi_save_flags +#define arch_efi_save_flags(state_flags) local_save_flags(state_flags) +#define arch_efi_restore_flags(state_flags) local_irq_restore(state_flags) +#endif + +unsigned long efi_call_virt_save_flags(void) +{ + unsigned long flags; + + arch_efi_save_flags(flags); + return flags; +} + void efi_call_virt_check_flags(unsigned long flags, const char *call) { unsigned long cur_flags, mismatch; - local_save_flags(cur_flags); + cur_flags = efi_call_virt_save_flags(); mismatch = flags ^ cur_flags; if (!WARN_ON_ONCE(mismatch & ARCH_EFI_IRQ_FLAGS_MASK)) @@ -102,7 +115,7 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call) add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_NOW_UNRELIABLE); pr_err_ratelimited(FW_BUG "IRQ flags corrupted (0x%08lx=>0x%08lx) by EFI %s\n", flags, cur_flags, call); - local_irq_restore(flags); + arch_efi_restore_flags(flags); } /* diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c @@ -27,6 +27,7 @@ #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/percpu.h> +#include <linux/refcount.h> #include <linux/slab.h> #include <linux/irqchip.h> @@ -41,6 +42,8 @@ #include "irq-gic-common.h" +#define GICD_INT_NMI_PRI (GICD_INT_DEF_PRI & ~0x80) + #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0) struct redist_region { @@ -66,6 +69,34 @@ struct gic_chip_data { static struct gic_chip_data gic_data __read_mostly; static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); +/* + * The behaviours of RPR and PMR registers differ depending on the value of + * SCR_EL3.FIQ, and the behaviour of non-secure priority registers of the + * distributor and redistributors depends on whether security is enabled in the + * GIC. + * + * When security is enabled, non-secure priority values from the (re)distributor + * are presented to the GIC CPUIF as follow: + * (GIC_(R)DIST_PRI[irq] >> 1) | 0x80; + * + * If SCR_EL3.FIQ == 1, the values writen to/read from PMR and RPR at non-secure + * EL1 are subject to a similar operation thus matching the priorities presented + * from the (re)distributor when security is enabled. + * + * see GICv3/GICv4 Architecture Specification (IHI0069D): + * - section 4.8.1 Non-secure accesses to register fields for Secure interrupt + * priorities. + * - Figure 4-7 Secure read of the priority field for a Non-secure Group 1 + * interrupt. + * + * For now, we only support pseudo-NMIs if we have non-secure view of + * priorities. + */ +static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); + +/* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */ +static refcount_t ppi_nmi_refs[16]; + static struct gic_kvm_info gic_v3_kvm_info; static DEFINE_PER_CPU(bool, has_rss); @@ -232,6 +263,12 @@ static void gic_unmask_irq(struct irq_data *d) gic_poke_irq(d, GICD_ISENABLER); } +static inline bool gic_supports_nmi(void) +{ + return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && + static_branch_likely(&supports_pseudo_nmis); +} + static int gic_irq_set_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool val) { @@ -287,6 +324,79 @@ static int gic_irq_get_irqchip_state(struct irq_data *d, return 0; } +static void gic_irq_set_prio(struct irq_data *d, u8 prio) +{ + void __iomem *base = gic_dist_base(d); + + writeb_relaxed(prio, base + GICD_IPRIORITYR + gic_irq(d)); +} + +static int gic_irq_nmi_setup(struct irq_data *d) +{ + struct irq_desc *desc = irq_to_desc(d->irq); + + if (!gic_supports_nmi()) + return -EINVAL; + + if (gic_peek_irq(d, GICD_ISENABLER)) { + pr_err("Cannot set NMI property of enabled IRQ %u\n", d->irq); + return -EINVAL; + } + + /* + * A secondary irq_chip should be in charge of LPI request, + * it should not be possible to get there + */ + if (WARN_ON(gic_irq(d) >= 8192)) + return -EINVAL; + + /* desc lock should already be held */ + if (gic_irq(d) < 32) { + /* Setting up PPI as NMI, only switch handler for first NMI */ + if (!refcount_inc_not_zero(&ppi_nmi_refs[gic_irq(d) - 16])) { + refcount_set(&ppi_nmi_refs[gic_irq(d) - 16], 1); + desc->handle_irq = handle_percpu_devid_fasteoi_nmi; + } + } else { + desc->handle_irq = handle_fasteoi_nmi; + } + + gic_irq_set_prio(d, GICD_INT_NMI_PRI); + + return 0; +} + +static void gic_irq_nmi_teardown(struct irq_data *d) +{ + struct irq_desc *desc = irq_to_desc(d->irq); + + if (WARN_ON(!gic_supports_nmi())) + return; + + if (gic_peek_irq(d, GICD_ISENABLER)) { + pr_err("Cannot set NMI property of enabled IRQ %u\n", d->irq); + return; + } + + /* + * A secondary irq_chip should be in charge of LPI request, + * it should not be possible to get there + */ + if (WARN_ON(gic_irq(d) >= 8192)) + return; + + /* desc lock should already be held */ + if (gic_irq(d) < 32) { + /* Tearing down NMI, only switch handler for last NMI */ + if (refcount_dec_and_test(&ppi_nmi_refs[gic_irq(d) - 16])) + desc->handle_irq = handle_percpu_devid_irq; + } else { + desc->handle_irq = handle_fasteoi_irq; + } + + gic_irq_set_prio(d, GICD_INT_DEF_PRI); +} + static void gic_eoi_irq(struct irq_data *d) { gic_write_eoir(gic_irq(d)); @@ -350,12 +460,50 @@ static u64 gic_mpidr_to_affinity(unsigned long mpidr) return aff; } +static void gic_deactivate_unhandled(u32 irqnr) +{ + if (static_branch_likely(&supports_deactivate_key)) { + if (irqnr < 8192) + gic_write_dir(irqnr); + } else { + gic_write_eoir(irqnr); + } +} + +static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) +{ + int err; + + if (static_branch_likely(&supports_deactivate_key)) + gic_write_eoir(irqnr); + /* + * Leave the PSR.I bit set to prevent other NMIs to be + * received while handling this one. + * PSR.I will be restored when we ERET to the + * interrupted context. + */ + err = handle_domain_nmi(gic_data.domain, irqnr, regs); + if (err) + gic_deactivate_unhandled(irqnr); +} + static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) { u32 irqnr; irqnr = gic_read_iar(); + if (gic_supports_nmi() && + unlikely(gic_read_rpr() == GICD_INT_NMI_PRI)) { + gic_handle_nmi(irqnr, regs); + return; + } + + if (gic_prio_masking_enabled()) { + gic_pmr_mask_irqs(); + gic_arch_enable_irqs(); + } + if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) { int err; @@ -367,12 +515,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs err = handle_domain_irq(gic_data.domain, irqnr, regs); if (err) { WARN_ONCE(true, "Unexpected interrupt received!\n"); - if (static_branch_likely(&supports_deactivate_key)) { - if (irqnr < 8192) - gic_write_dir(irqnr); - } else { - gic_write_eoir(irqnr); - } + gic_deactivate_unhandled(irqnr); } return; } @@ -395,6 +538,44 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs } } +static u32 gic_get_pribits(void) +{ + u32 pribits; + + pribits = gic_read_ctlr(); + pribits &= ICC_CTLR_EL1_PRI_BITS_MASK; + pribits >>= ICC_CTLR_EL1_PRI_BITS_SHIFT; + pribits++; + + return pribits; +} + +static bool gic_has_group0(void) +{ + u32 val; + u32 old_pmr; + + old_pmr = gic_read_pmr(); + + /* + * Let's find out if Group0 is under control of EL3 or not by + * setting the highest possible, non-zero priority in PMR. + * + * If SCR_EL3.FIQ is set, the priority gets shifted down in + * order for the CPU interface to set bit 7, and keep the + * actual priority in the non-secure range. In the process, it + * looses the least significant bit and the actual priority + * becomes 0x80. Reading it back returns 0, indicating that + * we're don't have access to Group0. + */ + gic_write_pmr(BIT(8 - gic_get_pribits())); + val = gic_read_pmr(); + + gic_write_pmr(old_pmr); + + return val != 0; +} + static void __init gic_dist_init(void) { unsigned int i; @@ -530,13 +711,19 @@ static void gic_update_vlpi_properties(void) !gic_data.rdists.has_direct_lpi ? "no " : ""); } +/* Check whether it's single security state view */ +static inline bool gic_dist_security_disabled(void) +{ + return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS; +} + static void gic_cpu_sys_reg_init(void) { int i, cpu = smp_processor_id(); u64 mpidr = cpu_logical_map(cpu); u64 need_rss = MPIDR_RS(mpidr); bool group0; - u32 val, pribits; + u32 pribits; /* * Need to check that the SRE bit has actually been set. If @@ -548,28 +735,22 @@ static void gic_cpu_sys_reg_init(void) if (!gic_enable_sre()) pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n"); - pribits = gic_read_ctlr(); - pribits &= ICC_CTLR_EL1_PRI_BITS_MASK; - pribits >>= ICC_CTLR_EL1_PRI_BITS_SHIFT; - pribits++; + pribits = gic_get_pribits(); - /* - * Let's find out if Group0 is under control of EL3 or not by - * setting the highest possible, non-zero priority in PMR. - * - * If SCR_EL3.FIQ is set, the priority gets shifted down in - * order for the CPU interface to set bit 7, and keep the - * actual priority in the non-secure range. In the process, it - * looses the least significant bit and the actual priority - * becomes 0x80. Reading it back returns 0, indicating that - * we're don't have access to Group0. - */ - write_gicreg(BIT(8 - pribits), ICC_PMR_EL1); - val = read_gicreg(ICC_PMR_EL1); - group0 = val != 0; + group0 = gic_has_group0(); /* Set priority mask register */ - write_gicreg(DEFAULT_PMR_VALUE, ICC_PMR_EL1); + if (!gic_prio_masking_enabled()) { + write_gicreg(DEFAULT_PMR_VALUE, ICC_PMR_EL1); + } else { + /* + * Mismatch configuration with boot CPU, the system is likely + * to die as interrupt masking will not work properly on all + * CPUs + */ + WARN_ON(gic_supports_nmi() && group0 && + !gic_dist_security_disabled()); + } /* * Some firmwares hand over to the kernel with the BPR changed from @@ -824,12 +1005,6 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, #endif #ifdef CONFIG_CPU_PM -/* Check whether it's single security state view */ -static bool gic_dist_security_disabled(void) -{ - return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS; -} - static int gic_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, void *v) { @@ -866,6 +1041,8 @@ static struct irq_chip gic_chip = { .irq_set_affinity = gic_set_affinity, .irq_get_irqchip_state = gic_irq_get_irqchip_state, .irq_set_irqchip_state = gic_irq_set_irqchip_state, + .irq_nmi_setup = gic_irq_nmi_setup, + .irq_nmi_teardown = gic_irq_nmi_teardown, .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, @@ -881,6 +1058,8 @@ static struct irq_chip gic_eoimode1_chip = { .irq_get_irqchip_state = gic_irq_get_irqchip_state, .irq_set_irqchip_state = gic_irq_set_irqchip_state, .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity, + .irq_nmi_setup = gic_irq_nmi_setup, + .irq_nmi_teardown = gic_irq_nmi_teardown, .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, @@ -1082,6 +1261,21 @@ static bool gic_enable_quirk_msm8996(void *data) return true; } +static void gic_enable_nmi_support(void) +{ + int i; + + for (i = 0; i < 16; i++) + refcount_set(&ppi_nmi_refs[i], 0); + + static_branch_enable(&supports_pseudo_nmis); + + if (static_branch_likely(&supports_deactivate_key)) + gic_eoimode1_chip.flags |= IRQCHIP_SUPPORTS_NMI; + else + gic_chip.flags |= IRQCHIP_SUPPORTS_NMI; +} + static int __init gic_init_bases(void __iomem *dist_base, struct redist_region *rdist_regs, u32 nr_redist_regions, @@ -1151,6 +1345,13 @@ static int __init gic_init_bases(void __iomem *dist_base, its_cpu_init(); } + if (gic_prio_masking_enabled()) { + if (!gic_has_group0() || gic_dist_security_disabled()) + gic_enable_nmi_support(); + else + pr_warn("SCR_EL3.FIQ is cleared, cannot enable use of pseudo-NMIs\n"); + } + return 0; out_free: diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c @@ -1052,7 +1052,6 @@ static void xgene_perf_start(struct perf_event *event, int flags) static void xgene_perf_stop(struct perf_event *event, int flags) { struct hw_perf_event *hw = &event->hw; - u64 config; if (hw->state & PERF_HES_UPTODATE) return; @@ -1064,7 +1063,6 @@ static void xgene_perf_stop(struct perf_event *event, int flags) if (hw->state & PERF_HES_UPTODATE) return; - config = hw->config; xgene_perf_read(event); hw->state |= PERF_HES_UPTODATE; } diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h @@ -32,9 +32,9 @@ /* prevent prefetching of coherent DMA data ahead of a dma-complete */ #ifndef __io_ar #ifdef rmb -#define __io_ar() rmb() +#define __io_ar(v) rmb() #else -#define __io_ar() barrier() +#define __io_ar(v) barrier() #endif #endif @@ -65,7 +65,7 @@ #endif #ifndef __io_par -#define __io_par() __io_ar() +#define __io_par(v) __io_ar(v) #endif @@ -158,7 +158,7 @@ static inline u8 readb(const volatile void __iomem *addr) __io_br(); val = __raw_readb(addr); - __io_ar(); + __io_ar(val); return val; } #endif @@ -171,7 +171,7 @@ static inline u16 readw(const volatile void __iomem *addr) __io_br(); val = __le16_to_cpu(__raw_readw(addr)); - __io_ar(); + __io_ar(val); return val; } #endif @@ -184,7 +184,7 @@ static inline u32 readl(const volatile void __iomem *addr) __io_br(); val = __le32_to_cpu(__raw_readl(addr)); - __io_ar(); + __io_ar(val); return val; } #endif @@ -198,7 +198,7 @@ static inline u64 readq(const volatile void __iomem *addr) __io_br(); val = __le64_to_cpu(__raw_readq(addr)); - __io_ar(); + __io_ar(val); return val; } #endif @@ -471,7 +471,7 @@ static inline u8 inb(unsigned long addr) __io_pbr(); val = __raw_readb(PCI_IOBASE + addr); - __io_par(); + __io_par(val); return val; } #endif @@ -484,7 +484,7 @@ static inline u16 inw(unsigned long addr) __io_pbr(); val = __le16_to_cpu(__raw_readw(PCI_IOBASE + addr)); - __io_par(); + __io_par(val); return val; } #endif @@ -497,7 +497,7 @@ static inline u32 inl(unsigned long addr) __io_pbr(); val = __le32_to_cpu(__raw_readl(PCI_IOBASE + addr)); - __io_par(); + __io_par(val); return val; } #endif diff --git a/include/linux/efi.h b/include/linux/efi.h @@ -1613,6 +1613,7 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, bool efi_runtime_disabled(void); extern void efi_call_virt_check_flags(unsigned long flags, const char *call); +extern unsigned long efi_call_virt_save_flags(void); enum efi_secureboot_mode { efi_secureboot_mode_unset, @@ -1658,7 +1659,7 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table); \ arch_efi_call_virt_setup(); \ \ - local_save_flags(__flags); \ + __flags = efi_call_virt_save_flags(); \ __s = arch_efi_call_virt(p, f, args); \ efi_call_virt_check_flags(__flags, __stringify(f)); \ \ @@ -1673,7 +1674,7 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table); \ arch_efi_call_virt_setup(); \ \ - local_save_flags(__flags); \ + __flags = efi_call_virt_save_flags(); \ arch_efi_call_virt(p, f, args); \ efi_call_virt_check_flags(__flags, __stringify(f)); \ \ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h @@ -60,8 +60,14 @@ extern void irq_enter(void); */ extern void irq_exit(void); +#ifndef arch_nmi_enter +#define arch_nmi_enter() do { } while (0) +#define arch_nmi_exit() do { } while (0) +#endif + #define nmi_enter() \ do { \ + arch_nmi_enter(); \ printk_nmi_enter(); \ lockdep_off(); \ ftrace_nmi_enter(); \ @@ -80,6 +86,7 @@ extern void irq_exit(void); ftrace_nmi_exit(); \ lockdep_on(); \ printk_nmi_exit(); \ + arch_nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h @@ -421,6 +421,8 @@ typedef struct elf64_shdr { #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ #define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */ #define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */ +#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */ +#define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc @@ -28,7 +28,6 @@ typedef uint8x16_t unative_t; -#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) #define NSIZE sizeof(unative_t) /* @@ -61,7 +60,7 @@ void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) int d, z, z0; register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; - const unative_t x1d = NBYTES(0x1d); + const unative_t x1d = vdupq_n_u8(0x1d); z0 = disks - 3; /* Highest data disk */ p = dptr[z0+1]; /* XOR parity */ @@ -92,7 +91,7 @@ void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop, int d, z, z0; register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; - const unative_t x1d = NBYTES(0x1d); + const unative_t x1d = vdupq_n_u8(0x1d); z0 = stop; /* P/Q right side optimization */ p = dptr[disks-2]; /* XOR parity */ diff --git a/lib/raid6/recov_neon_inner.c b/lib/raid6/recov_neon_inner.c @@ -10,11 +10,6 @@ #include <arm_neon.h> -static const uint8x16_t x0f = { - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, -}; - #ifdef CONFIG_ARM /* * AArch32 does not provide this intrinsic natively because it does not @@ -41,6 +36,7 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp, uint8x16_t pm1 = vld1q_u8(pbmul + 16); uint8x16_t qm0 = vld1q_u8(qmul); uint8x16_t qm1 = vld1q_u8(qmul + 16); + uint8x16_t x0f = vdupq_n_u8(0x0f); /* * while ( bytes-- ) { @@ -60,14 +56,14 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp, px = veorq_u8(vld1q_u8(p), vld1q_u8(dp)); vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq)); - vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4); + vy = vshrq_n_u8(vx, 4); vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f)); - vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f)); + vy = vqtbl1q_u8(qm1, vy); qx = veorq_u8(vx, vy); - vy = (uint8x16_t)vshrq_n_s16((int16x8_t)px, 4); + vy = vshrq_n_u8(px, 4); vx = vqtbl1q_u8(pm0, vandq_u8(px, x0f)); - vy = vqtbl1q_u8(pm1, vandq_u8(vy, x0f)); + vy = vqtbl1q_u8(pm1, vy); vx = veorq_u8(vx, vy); db = veorq_u8(vx, qx); @@ -87,6 +83,7 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq, { uint8x16_t qm0 = vld1q_u8(qmul); uint8x16_t qm1 = vld1q_u8(qmul + 16); + uint8x16_t x0f = vdupq_n_u8(0x0f); /* * while (bytes--) { @@ -100,9 +97,9 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq, vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq)); - vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4); + vy = vshrq_n_u8(vx, 4); vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f)); - vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f)); + vy = vqtbl1q_u8(qm1, vy); vx = veorq_u8(vx, vy); vy = veorq_u8(vx, vld1q_u8(p));