whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit a67eefad996f007afc7c38efe8f45257fb1e3994
parent 62606c224d72a98c35d21a849f95cccf95b0a252
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Thu, 25 Oct 2018 17:11:52 -0700

Merge tag 'printk-for-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/pmladek/printk

Pull printk updates from Petr Mladek:

 - Fix two more locations where printf formatting leaked pointers

 - Better log_buf_len parameter handling

 - Add prefix to messages from printk code

 - Do not miss messages on other consoles when the log is replayed on a
   new one

 - Reduce race between console registration and panic() when the log
   might get replayed on all consoles

 - Some cont buffer code clean up

 - Call console only when there is something to do (log vs cont buffer)

* tag 'printk-for-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/pmladek/printk:
  lib/vsprintf: Hash printed address for netdev bits fallback
  lib/vsprintf: Hash legacy clock addresses
  lib/vsprintf: Prepare for more general use of ptr_to_id()
  lib/vsprintf: Make ptr argument conts in ptr_to_id()
  printk: fix integer overflow in setup_log_buf()
  printk: do not preliminary split up cont buffer
  printk: lock/unlock console only for new logbuf entries
  printk: keep kernel cont support always enabled
  printk: Give error on attempt to set log buffer length to over 2G
  printk: Add KBUILD_MODNAME and remove a redundant print prefix
  printk: Correct wrong casting
  printk: Fix panic caused by passing log_buf_len to command line
  printk: CON_PRINTBUFFER console registration is a bit racy
  printk: Do not miss new messages when replaying the log

Diffstat:
MDocumentation/core-api/printk-formats.rst | 5++---
Mkernel/printk/printk.c | 86++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mlib/vsprintf.c | 216++++++++++++++++++++++++++++++++++++++++----------------------------------------
3 files changed, 158 insertions(+), 149 deletions(-)

diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst @@ -420,9 +420,8 @@ struct clk %pC pll1 %pCn pll1 -For printing struct clk structures. %pC and %pCn print the name -(Common Clock Framework) or address (legacy clock framework) of the -structure. +For printing struct clk structures. %pC and %pCn print the name of the clock +(Common Clock Framework) or a unique 32-bit ID (legacy clock framework). Passed by reference. diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c @@ -16,6 +16,8 @@ * 01Mar01 Andrew Morton */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/mm.h> #include <linux/tty.h> @@ -192,16 +194,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, return 0; } -/* - * Number of registered extended console drivers. - * - * If extended consoles are present, in-kernel cont reassembly is disabled - * and each fragment is stored as a separate log entry with proper - * continuation flag so that every emitted message has full metadata. This - * doesn't change the result for regular consoles or /proc/kmsg. For - * /dev/kmsg, as long as the reader concatenates messages according to - * consecutive continuation flags, the end result should be the same too. - */ +/* Number of registered extended console drivers. */ static int nr_ext_console_drivers; /* @@ -423,6 +416,7 @@ static u32 log_next_idx; /* the next printk record to write to the console */ static u64 console_seq; static u32 console_idx; +static u64 exclusive_console_stop_seq; /* the next printk record to read after the last 'clear' command */ static u64 clear_seq; @@ -437,6 +431,7 @@ static u32 clear_idx; /* record buffer */ #define LOG_ALIGN __alignof__(struct printk_log) #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) +#define LOG_BUF_LEN_MAX (u32)(1 << 31) static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; @@ -1037,18 +1032,28 @@ void log_buf_vmcoreinfo_setup(void) static unsigned long __initdata new_log_buf_len; /* we practice scaling the ring buffer by powers of 2 */ -static void __init log_buf_len_update(unsigned size) +static void __init log_buf_len_update(u64 size) { + if (size > (u64)LOG_BUF_LEN_MAX) { + size = (u64)LOG_BUF_LEN_MAX; + pr_err("log_buf over 2G is not supported.\n"); + } + if (size) size = roundup_pow_of_two(size); if (size > log_buf_len) - new_log_buf_len = size; + new_log_buf_len = (unsigned long)size; } /* save requested log_buf_len since it's too early to process it */ static int __init log_buf_len_setup(char *str) { - unsigned size = memparse(str, &str); + u64 size; + + if (!str) + return -EINVAL; + + size = memparse(str, &str); log_buf_len_update(size); @@ -1093,7 +1098,7 @@ void __init setup_log_buf(int early) { unsigned long flags; char *new_log_buf; - int free; + unsigned int free; if (log_buf != __log_buf) return; @@ -1113,7 +1118,7 @@ void __init setup_log_buf(int early) } if (unlikely(!new_log_buf)) { - pr_err("log_buf_len: %ld bytes not available\n", + pr_err("log_buf_len: %lu bytes not available\n", new_log_buf_len); return; } @@ -1126,8 +1131,8 @@ void __init setup_log_buf(int early) memcpy(log_buf, __log_buf, __LOG_BUF_LEN); logbuf_unlock_irqrestore(flags); - pr_info("log_buf_len: %d bytes\n", log_buf_len); - pr_info("early log buf free: %d(%d%%)\n", + pr_info("log_buf_len: %u bytes\n", log_buf_len); + pr_info("early log buf free: %u(%u%%)\n", free, (free * 100) / __LOG_BUF_LEN); } @@ -1767,12 +1772,8 @@ static void cont_flush(void) static bool cont_add(int facility, int level, enum log_flags flags, const char *text, size_t len) { - /* - * If ext consoles are present, flush and skip in-kernel - * continuation. See nr_ext_console_drivers definition. Also, if - * the line gets too long, split it up in separate records. - */ - if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) { + /* If the line gets too long, split it up in separate records. */ + if (cont.len + len > sizeof(cont.buf)) { cont_flush(); return false; } @@ -1795,9 +1796,6 @@ static bool cont_add(int facility, int level, enum log_flags flags, const char * cont_flush(); } - if (cont.len > (sizeof(cont.buf) * 80) / 100) - cont_flush(); - return true; } @@ -1889,8 +1887,9 @@ asmlinkage int vprintk_emit(int facility, int level, const char *fmt, va_list args) { int printed_len; - bool in_sched = false; + bool in_sched = false, pending_output; unsigned long flags; + u64 curr_log_seq; if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; @@ -1902,11 +1901,13 @@ asmlinkage int vprintk_emit(int facility, int level, /* This stops the holder of console_sem just where we want him */ logbuf_lock_irqsave(flags); + curr_log_seq = log_next_seq; printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args); + pending_output = (curr_log_seq != log_next_seq); logbuf_unlock_irqrestore(flags); /* If called from the scheduler, we can not call up(). */ - if (!in_sched) { + if (!in_sched && pending_output) { /* * Disable preemption to avoid being preempted while holding * console_sem which would prevent anyone from printing to @@ -1923,7 +1924,8 @@ asmlinkage int vprintk_emit(int facility, int level, preempt_enable(); } - wake_up_klogd(); + if (pending_output) + wake_up_klogd(); return printed_len; } EXPORT_SYMBOL(vprintk_emit); @@ -2009,6 +2011,7 @@ static u64 syslog_seq; static u32 syslog_idx; static u64 console_seq; static u32 console_idx; +static u64 exclusive_console_stop_seq; static u64 log_first_seq; static u32 log_first_idx; static u64 log_next_seq; @@ -2351,8 +2354,9 @@ again: printk_safe_enter_irqsave(flags); raw_spin_lock(&logbuf_lock); if (console_seq < log_first_seq) { - len = sprintf(text, "** %u printk messages dropped **\n", - (unsigned)(log_first_seq - console_seq)); + len = sprintf(text, + "** %llu printk messages dropped **\n", + log_first_seq - console_seq); /* messages are gone, move to first one */ console_seq = log_first_seq; @@ -2376,6 +2380,12 @@ skip: goto skip; } + /* Output to all consoles once old messages replayed. */ + if (unlikely(exclusive_console && + console_seq >= exclusive_console_stop_seq)) { + exclusive_console = NULL; + } + len += msg_print_text(msg, console_msg_format & MSG_FORMAT_SYSLOG, text + len, @@ -2418,10 +2428,6 @@ skip: console_locked = 0; - /* Release the exclusive_console once it is used */ - if (unlikely(exclusive_console)) - exclusive_console = NULL; - raw_spin_unlock(&logbuf_lock); up_console_sem(); @@ -2688,8 +2694,7 @@ void register_console(struct console *newcon) } if (newcon->flags & CON_EXTENDED) - if (!nr_ext_console_drivers++) - pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n"); + nr_ext_console_drivers++; if (newcon->flags & CON_PRINTBUFFER) { /* @@ -2699,13 +2704,18 @@ void register_console(struct console *newcon) logbuf_lock_irqsave(flags); console_seq = syslog_seq; console_idx = syslog_idx; - logbuf_unlock_irqrestore(flags); /* * We're about to replay the log buffer. Only do this to the * just-registered console to avoid excessive message spam to * the already-registered consoles. + * + * Set exclusive_console with disabled interrupts to reduce + * race window with eventual console_flush_on_panic() that + * ignores console_lock. */ exclusive_console = newcon; + exclusive_console_stop_seq = console_seq; + logbuf_unlock_irqrestore(flags); } console_unlock(); console_sysfs_notify(); diff --git a/lib/vsprintf.c b/lib/vsprintf.c @@ -613,6 +613,109 @@ char *string(char *buf, char *end, const char *s, struct printf_spec spec) } static noinline_for_stack +char *pointer_string(char *buf, char *end, const void *ptr, + struct printf_spec spec) +{ + spec.base = 16; + spec.flags |= SMALL; + if (spec.field_width == -1) { + spec.field_width = 2 * sizeof(ptr); + spec.flags |= ZEROPAD; + } + + return number(buf, end, (unsigned long int)ptr, spec); +} + +/* Make pointers available for printing early in the boot sequence. */ +static int debug_boot_weak_hash __ro_after_init; + +static int __init debug_boot_weak_hash_enable(char *str) +{ + debug_boot_weak_hash = 1; + pr_info("debug_boot_weak_hash enabled\n"); + return 0; +} +early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable); + +static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key); +static siphash_key_t ptr_key __read_mostly; + +static void enable_ptr_key_workfn(struct work_struct *work) +{ + get_random_bytes(&ptr_key, sizeof(ptr_key)); + /* Needs to run from preemptible context */ + static_branch_disable(&not_filled_random_ptr_key); +} + +static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn); + +static void fill_random_ptr_key(struct random_ready_callback *unused) +{ + /* This may be in an interrupt handler. */ + queue_work(system_unbound_wq, &enable_ptr_key_work); +} + +static struct random_ready_callback random_ready = { + .func = fill_random_ptr_key +}; + +static int __init initialize_ptr_random(void) +{ + int key_size = sizeof(ptr_key); + int ret; + + /* Use hw RNG if available. */ + if (get_random_bytes_arch(&ptr_key, key_size) == key_size) { + static_branch_disable(&not_filled_random_ptr_key); + return 0; + } + + ret = add_random_ready_callback(&random_ready); + if (!ret) { + return 0; + } else if (ret == -EALREADY) { + /* This is in preemptible context */ + enable_ptr_key_workfn(&enable_ptr_key_work); + return 0; + } + + return ret; +} +early_initcall(initialize_ptr_random); + +/* Maps a pointer to a 32 bit unique identifier. */ +static char *ptr_to_id(char *buf, char *end, const void *ptr, + struct printf_spec spec) +{ + const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)"; + unsigned long hashval; + + /* When debugging early boot use non-cryptographically secure hash. */ + if (unlikely(debug_boot_weak_hash)) { + hashval = hash_long((unsigned long)ptr, 32); + return pointer_string(buf, end, (const void *)hashval, spec); + } + + if (static_branch_unlikely(&not_filled_random_ptr_key)) { + spec.field_width = 2 * sizeof(ptr); + /* string length must be less than default_width */ + return string(buf, end, str, spec); + } + +#ifdef CONFIG_64BIT + hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key); + /* + * Mask off the first 32 bits, this makes explicit that we have + * modified the address (and 32 bits is plenty for a unique ID). + */ + hashval = hashval & 0xffffffff; +#else + hashval = (unsigned long)siphash_1u32((u32)ptr, &ptr_key); +#endif + return pointer_string(buf, end, (const void *)hashval, spec); +} + +static noinline_for_stack char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_spec spec, const char *fmt) { @@ -1357,20 +1460,6 @@ char *uuid_string(char *buf, char *end, const u8 *addr, return string(buf, end, uuid, spec); } -static noinline_for_stack -char *pointer_string(char *buf, char *end, const void *ptr, - struct printf_spec spec) -{ - spec.base = 16; - spec.flags |= SMALL; - if (spec.field_width == -1) { - spec.field_width = 2 * sizeof(ptr); - spec.flags |= ZEROPAD; - } - - return number(buf, end, (unsigned long int)ptr, spec); -} - int kptr_restrict __read_mostly; static noinline_for_stack @@ -1421,7 +1510,8 @@ char *restricted_pointer(char *buf, char *end, const void *ptr, } static noinline_for_stack -char *netdev_bits(char *buf, char *end, const void *addr, const char *fmt) +char *netdev_bits(char *buf, char *end, const void *addr, + struct printf_spec spec, const char *fmt) { unsigned long long num; int size; @@ -1432,9 +1522,7 @@ char *netdev_bits(char *buf, char *end, const void *addr, const char *fmt) size = sizeof(netdev_features_t); break; default: - num = (unsigned long)addr; - size = sizeof(unsigned long); - break; + return ptr_to_id(buf, end, addr, spec); } return special_hex_number(buf, end, num, size); @@ -1474,7 +1562,7 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, #ifdef CONFIG_COMMON_CLK return string(buf, end, __clk_get_name(clk), spec); #else - return special_hex_number(buf, end, (unsigned long)clk, sizeof(unsigned long)); + return ptr_to_id(buf, end, clk, spec); #endif } } @@ -1651,94 +1739,6 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, return widen_string(buf, buf - buf_start, end, spec); } -/* Make pointers available for printing early in the boot sequence. */ -static int debug_boot_weak_hash __ro_after_init; - -static int __init debug_boot_weak_hash_enable(char *str) -{ - debug_boot_weak_hash = 1; - pr_info("debug_boot_weak_hash enabled\n"); - return 0; -} -early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable); - -static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key); -static siphash_key_t ptr_key __read_mostly; - -static void enable_ptr_key_workfn(struct work_struct *work) -{ - get_random_bytes(&ptr_key, sizeof(ptr_key)); - /* Needs to run from preemptible context */ - static_branch_disable(&not_filled_random_ptr_key); -} - -static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn); - -static void fill_random_ptr_key(struct random_ready_callback *unused) -{ - /* This may be in an interrupt handler. */ - queue_work(system_unbound_wq, &enable_ptr_key_work); -} - -static struct random_ready_callback random_ready = { - .func = fill_random_ptr_key -}; - -static int __init initialize_ptr_random(void) -{ - int key_size = sizeof(ptr_key); - int ret; - - /* Use hw RNG if available. */ - if (get_random_bytes_arch(&ptr_key, key_size) == key_size) { - static_branch_disable(&not_filled_random_ptr_key); - return 0; - } - - ret = add_random_ready_callback(&random_ready); - if (!ret) { - return 0; - } else if (ret == -EALREADY) { - /* This is in preemptible context */ - enable_ptr_key_workfn(&enable_ptr_key_work); - return 0; - } - - return ret; -} -early_initcall(initialize_ptr_random); - -/* Maps a pointer to a 32 bit unique identifier. */ -static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec) -{ - const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)"; - unsigned long hashval; - - /* When debugging early boot use non-cryptographically secure hash. */ - if (unlikely(debug_boot_weak_hash)) { - hashval = hash_long((unsigned long)ptr, 32); - return pointer_string(buf, end, (const void *)hashval, spec); - } - - if (static_branch_unlikely(&not_filled_random_ptr_key)) { - spec.field_width = 2 * sizeof(ptr); - /* string length must be less than default_width */ - return string(buf, end, str, spec); - } - -#ifdef CONFIG_64BIT - hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key); - /* - * Mask off the first 32 bits, this makes explicit that we have - * modified the address (and 32 bits is plenty for a unique ID). - */ - hashval = hashval & 0xffffffff; -#else - hashval = (unsigned long)siphash_1u32((u32)ptr, &ptr_key); -#endif - return pointer_string(buf, end, (const void *)hashval, spec); -} - /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -1942,7 +1942,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, break; return restricted_pointer(buf, end, ptr, spec); case 'N': - return netdev_bits(buf, end, ptr, fmt); + return netdev_bits(buf, end, ptr, spec, fmt); case 'a': return address_val(buf, end, ptr, fmt); case 'd':