whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 262d6a9a63a387c8dfa9eb4f7713e159c941e52c
parent b6e3cb4e8679dd971eed33f6a08d62c66a4230c9
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Sun, 10 Mar 2019 14:46:56 -0700

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:
 "A set of fixes for x86:

   - Make the unwinder more robust when it encounters a NULL pointer
     call, so the backtrace becomes more useful

   - Fix the bogus ORC unwind table alignment

   - Prevent kernel panic during kexec on HyperV caused by a cleared but
     not disabled hypercall page.

   - Remove the now pointless stacksize increase for KASAN_EXTRA, as
     KASAN_EXTRA is gone.

   - Remove unused variables from the x86 memory management code"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/hyperv: Fix kernel panic when kexec on HyperV
  x86/mm: Remove unused variable 'old_pte'
  x86/mm: Remove unused variable 'cpu'
  Revert "x86_64: Increase stack size for KASAN_EXTRA"
  x86/unwind: Add hardcoded ORC entry for NULL
  x86/unwind: Handle NULL pointer calls better in frame unwinder
  x86/unwind/orc: Fix ORC unwind table alignment

Diffstat:
March/x86/hyperv/hv_init.c | 7+++++++
March/x86/include/asm/page_64_types.h | 4----
March/x86/include/asm/unwind.h | 6++++++
March/x86/kernel/unwind_frame.c | 25++++++++++++++++++++++---
March/x86/kernel/unwind_orc.c | 17+++++++++++++++++
March/x86/mm/pageattr.c | 4++--
Minclude/asm-generic/vmlinux.lds.h | 2+-
7 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c @@ -407,6 +407,13 @@ void hyperv_cleanup(void) /* Reset our OS id */ wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); + /* + * Reset hypercall page reference before reset the page, + * let hypercall operations fail safely rather than + * panic the kernel for using invalid hypercall page + */ + hv_hypercall_pg = NULL; + /* Reset the hypercall page */ hypercall_msr.as_uint64 = 0; wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h @@ -7,11 +7,7 @@ #endif #ifdef CONFIG_KASAN -#ifdef CONFIG_KASAN_EXTRA -#define KASAN_STACK_ORDER 2 -#else #define KASAN_STACK_ORDER 1 -#endif #else #define KASAN_STACK_ORDER 0 #endif diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h @@ -23,6 +23,12 @@ struct unwind_state { #elif defined(CONFIG_UNWINDER_FRAME_POINTER) bool got_irq; unsigned long *bp, *orig_sp, ip; + /* + * If non-NULL: The current frame is incomplete and doesn't contain a + * valid BP. When looking for the next frame, use this instead of the + * non-existent saved BP. + */ + unsigned long *next_bp; struct pt_regs *regs; #else unsigned long *sp; diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c @@ -320,10 +320,14 @@ bool unwind_next_frame(struct unwind_state *state) } /* Get the next frame pointer: */ - if (state->regs) + if (state->next_bp) { + next_bp = state->next_bp; + state->next_bp = NULL; + } else if (state->regs) { next_bp = (unsigned long *)state->regs->bp; - else + } else { next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task, *state->bp); + } /* Move to the next frame if it's safe: */ if (!update_stack_state(state, next_bp)) @@ -398,6 +402,21 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, bp = get_frame_pointer(task, regs); + /* + * If we crash with IP==0, the last successfully executed instruction + * was probably an indirect function call with a NULL function pointer. + * That means that SP points into the middle of an incomplete frame: + * *SP is a return pointer, and *(SP-sizeof(unsigned long)) is where we + * would have written a frame pointer if we hadn't crashed. + * Pretend that the frame is complete and that BP points to it, but save + * the real BP so that we can use it when looking for the next frame. + */ + if (regs && regs->ip == 0 && + (unsigned long *)kernel_stack_pointer(regs) >= first_frame) { + state->next_bp = bp; + bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1; + } + /* Initialize stack info and make sure the frame data is accessible: */ get_stack_info(bp, state->task, &state->stack_info, &state->stack_mask); @@ -410,7 +429,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, */ while (!unwind_done(state) && (!on_stack(&state->stack_info, first_frame, sizeof(long)) || - state->bp < first_frame)) + (state->next_bp == NULL && state->bp < first_frame))) unwind_next_frame(state); } EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c @@ -113,6 +113,20 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip) } #endif +/* + * If we crash with IP==0, the last successfully executed instruction + * was probably an indirect function call with a NULL function pointer, + * and we don't have unwind information for NULL. + * This hardcoded ORC entry for IP==0 allows us to unwind from a NULL function + * pointer into its parent and then continue normally from there. + */ +static struct orc_entry null_orc_entry = { + .sp_offset = sizeof(long), + .sp_reg = ORC_REG_SP, + .bp_reg = ORC_REG_UNDEFINED, + .type = ORC_TYPE_CALL +}; + static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; @@ -120,6 +134,9 @@ static struct orc_entry *orc_find(unsigned long ip) if (!orc_init) return NULL; + if (ip == 0) + return &null_orc_entry; + /* For non-init vmlinux addresses, use the fast lookup table: */ if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) { unsigned int idx, start, stop; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c @@ -738,7 +738,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address, { unsigned long numpages, pmask, psize, lpaddr, pfn, old_pfn; pgprot_t old_prot, new_prot, req_prot, chk_prot; - pte_t new_pte, old_pte, *tmp; + pte_t new_pte, *tmp; enum pg_level level; /* @@ -781,7 +781,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address, * Convert protection attributes to 4k-format, as cpa->mask* are set * up accordingly. */ - old_pte = *kpte; + /* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */ req_prot = pgprot_large_2_4k(old_prot); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h @@ -733,7 +733,7 @@ KEEP(*(.orc_unwind_ip)) \ __stop_orc_unwind_ip = .; \ } \ - . = ALIGN(6); \ + . = ALIGN(2); \ .orc_unwind : AT(ADDR(.orc_unwind) - LOAD_OFFSET) { \ __start_orc_unwind = .; \ KEEP(*(.orc_unwind)) \