whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 35a738fb5fd0fdd1cc7e749e3a45f1876ecd1db8
parent bcd49c3dd172c38e14faf151adca63c8db4c9557
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Thu,  7 Mar 2019 17:09:28 -0800

Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fpu updates from Ingo Molnar:
 "Three changes:

   - preparatory patch for AVX state tracking that computing-cluster
     folks would like to use for user-space batching - but we are not
     happy about the related ABI yet so this is only the kernel tracking
     side

   - a cleanup for CR0 handling in do_device_not_available()

   - plus we removed a workaround for an ancient binutils version"

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/fpu: Track AVX-512 usage of tasks
  x86/fpu: Get rid of CONFIG_AS_FXSAVEQ
  x86/traps: Have read_cr0() only once in the #NM handler

Diffstat:
March/x86/Makefile | 1-
March/x86/include/asm/fpu/internal.h | 57+++++++++++++--------------------------------------------
March/x86/include/asm/fpu/types.h | 7+++++++
March/x86/kernel/traps.c | 5++---
4 files changed, 22 insertions(+), 48 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile @@ -187,7 +187,6 @@ cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc, cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) # does binutils support specific instructions? -asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h @@ -137,37 +137,25 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) { if (IS_ENABLED(CONFIG_X86_32)) return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); - else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) + else return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); - /* See comment in copy_fxregs_to_kernel() below. */ - return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); } static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) { - if (IS_ENABLED(CONFIG_X86_32)) { + if (IS_ENABLED(CONFIG_X86_32)) kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - } else { - if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) { - kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); - } else { - /* See comment in copy_fxregs_to_kernel() below. */ - kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); - } - } + else + kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) { if (IS_ENABLED(CONFIG_X86_32)) return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) + else return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); - - /* See comment in copy_fxregs_to_kernel() below. */ - return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), - "m" (*fx)); } static inline void copy_kernel_to_fregs(struct fregs_state *fx) @@ -184,34 +172,8 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) { if (IS_ENABLED(CONFIG_X86_32)) asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave)); - else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) + else asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); - else { - /* Using "rex64; fxsave %0" is broken because, if the memory - * operand uses any extended registers for addressing, a second - * REX prefix will be generated (to the assembler, rex64 - * followed by semicolon is a separate instruction), and hence - * the 64-bitness is lost. - * - * Using "fxsaveq %0" would be the ideal choice, but is only - * supported starting with gas 2.16. - * - * Using, as a workaround, the properly prefixed form below - * isn't accepted by any binutils version so far released, - * complaining that the same type of prefix is used twice if - * an extended register is needed for addressing (fix submitted - * to mainline 2005-11-21). - * - * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave)); - * - * This, however, we can work around by forcing the compiler to - * select an addressing mode that doesn't require extended - * registers. - */ - asm volatile( "rex64/fxsave (%[fx])" - : "=m" (fpu->state.fxsave) - : [fx] "R" (&fpu->state.fxsave)); - } } /* These macros all use (%edi)/(%rdi) as the single memory argument. */ @@ -414,6 +376,13 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) { if (likely(use_xsave())) { copy_xregs_to_kernel(&fpu->state.xsave); + + /* + * AVX512 state is tracked here because its use is + * known to slow the max clock speed of the core. + */ + if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512) + fpu->avx512_timestamp = jiffies; return 1; } diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h @@ -303,6 +303,13 @@ struct fpu { unsigned char initialized; /* + * @avx512_timestamp: + * + * Records the timestamp of AVX512 use during last context switch. + */ + unsigned long avx512_timestamp; + + /* * @state: * * In-memory copy of all FPU registers that we save/restore diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c @@ -881,12 +881,12 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { - unsigned long cr0; + unsigned long cr0 = read_cr0(); RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); #ifdef CONFIG_MATH_EMULATION - if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) { + if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) { struct math_emu_info info = { }; cond_local_irq_enable(regs); @@ -898,7 +898,6 @@ do_device_not_available(struct pt_regs *regs, long error_code) #endif /* This should not happen. */ - cr0 = read_cr0(); if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) { /* Try to fix it up and carry on. */ write_cr0(cr0 & ~X86_CR0_TS);