whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 6ef746769ef5cfef84cdfdf61ecbab5a6aa4651a
parent 85b5d4bcab8b46664f8e1993bd5919cb0f24a3ca
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Tue, 30 Oct 2018 09:08:07 -0700

Merge tag 'pm-4.20-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull more power management updates from Rafael Wysocki:
 "These remove a questionable heuristic from the menu cpuidle governor,
  fix a recent build regression in the intel_pstate driver, clean up ARM
  big-Little support in cpufreq and fix up hung task watchdog's
  interaction with system-wide power management transitions.

  Specifics:

   - Fix build regression in the intel_pstate driver that doesn't build
     without CONFIG_ACPI after recent changes (Dominik Brodowski).

   - One of the heuristics in the menu cpuidle governor is based on a
     function returning 0 most of the time, so drop it and clean up the
     scheduler code related to it (Daniel Lezcano).

   - Prevent the arm_big_little cpufreq driver from being used on ARM64
     which is not suitable for it and drop the arm_big_little_dt driver
     that is not used any more (Sudeep Holla).

   - Prevent the hung task watchdog from triggering during resume from
     system-wide sleep states by disabling it before freezing tasks and
     enabling it again after they have been thawed (Vitaly Kuznetsov)"

* tag 'pm-4.20-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  kernel: hung_task.c: disable on suspend
  cpufreq: remove unused arm_big_little_dt driver
  cpufreq: drop ARM_BIG_LITTLE_CPUFREQ support for ARM64
  cpufreq: intel_pstate: Fix compilation for !CONFIG_ACPI
  cpuidle: menu: Remove get_loadavg() from the performance multiplier
  sched: Factor out nr_iowait and nr_iowait_cpu

Diffstat:
MMAINTAINERS | 1-
Mdrivers/cpufreq/Kconfig.arm | 9+--------
Mdrivers/cpufreq/Makefile | 3---
Ddrivers/cpufreq/arm_big_little_dt.c | 100-------------------------------------------------------------------------------
Mdrivers/cpufreq/intel_pstate.c | 20+++++++++++---------
Mdrivers/cpuidle/governors/menu.c | 25++++++-------------------
Minclude/linux/sched/stat.h | 1-
Mkernel/hung_task.c | 30+++++++++++++++++++++++++++++-
Mkernel/sched/core.c | 34+++++++++++++---------------------
9 files changed, 60 insertions(+), 163 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS @@ -3823,7 +3823,6 @@ W: http://www.arm.com/products/processors/technologies/biglittleprocessing.php S: Maintained F: drivers/cpufreq/arm_big_little.h F: drivers/cpufreq/arm_big_little.c -F: drivers/cpufreq/arm_big_little_dt.c CPU POWER MONITORING SUBSYSTEM M: Thomas Renninger <trenn@suse.com> diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm @@ -28,20 +28,13 @@ config ARM_ARMADA_37XX_CPUFREQ # big LITTLE core layer and glue drivers config ARM_BIG_LITTLE_CPUFREQ tristate "Generic ARM big LITTLE CPUfreq driver" - depends on (ARM_CPU_TOPOLOGY || ARM64) && HAVE_CLK + depends on ARM_CPU_TOPOLOGY && HAVE_CLK # if CPU_THERMAL is on and THERMAL=m, ARM_BIT_LITTLE_CPUFREQ cannot be =y depends on !CPU_THERMAL || THERMAL select PM_OPP help This enables the Generic CPUfreq driver for ARM big.LITTLE platforms. -config ARM_DT_BL_CPUFREQ - tristate "Generic probing via DT for ARM big LITTLE CPUfreq driver" - depends on ARM_BIG_LITTLE_CPUFREQ && OF - help - This enables probing via DT for Generic CPUfreq driver for ARM - big.LITTLE platform. This gets frequency tables from DT. - config ARM_SCPI_CPUFREQ tristate "SCPI based CPUfreq driver" depends on ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile @@ -48,9 +48,6 @@ obj-$(CONFIG_X86_SFI_CPUFREQ) += sfi-cpufreq.o ################################################################################## # ARM SoC drivers obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o -# big LITTLE per platform glues. Keep DT_BL_CPUFREQ as the last entry in all big -# LITTLE drivers, so that it is probed last. -obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_big_little_dt.o obj-$(CONFIG_ARM_ARMADA_37XX_CPUFREQ) += armada-37xx-cpufreq.o obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o diff --git a/drivers/cpufreq/arm_big_little_dt.c b/drivers/cpufreq/arm_big_little_dt.c @@ -1,100 +0,0 @@ -/* - * Generic big.LITTLE CPUFreq Interface driver - * - * It provides necessary ops to arm_big_little cpufreq driver and gets - * Frequency information from Device Tree. Freq table in DT must be in KHz. - * - * Copyright (C) 2013 Linaro. - * Viresh Kumar <viresh.kumar@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/cpufreq.h> -#include <linux/device.h> -#include <linux/export.h> -#include <linux/module.h> -#include <linux/of_device.h> -#include <linux/pm_opp.h> -#include <linux/platform_device.h> -#include <linux/slab.h> -#include <linux/types.h> -#include "arm_big_little.h" - -/* get cpu node with valid operating-points */ -static struct device_node *get_cpu_node_with_valid_op(int cpu) -{ - struct device_node *np = of_cpu_device_node_get(cpu); - - if (!of_get_property(np, "operating-points", NULL)) { - of_node_put(np); - np = NULL; - } - - return np; -} - -static int dt_get_transition_latency(struct device *cpu_dev) -{ - struct device_node *np; - u32 transition_latency = CPUFREQ_ETERNAL; - - np = of_node_get(cpu_dev->of_node); - if (!np) { - pr_info("Failed to find cpu node. Use CPUFREQ_ETERNAL transition latency\n"); - return CPUFREQ_ETERNAL; - } - - of_property_read_u32(np, "clock-latency", &transition_latency); - of_node_put(np); - - pr_debug("%s: clock-latency: %d\n", __func__, transition_latency); - return transition_latency; -} - -static const struct cpufreq_arm_bL_ops dt_bL_ops = { - .name = "dt-bl", - .get_transition_latency = dt_get_transition_latency, - .init_opp_table = dev_pm_opp_of_cpumask_add_table, - .free_opp_table = dev_pm_opp_of_cpumask_remove_table, -}; - -static int generic_bL_probe(struct platform_device *pdev) -{ - struct device_node *np; - - np = get_cpu_node_with_valid_op(0); - if (!np) - return -ENODEV; - - of_node_put(np); - return bL_cpufreq_register(&dt_bL_ops); -} - -static int generic_bL_remove(struct platform_device *pdev) -{ - bL_cpufreq_unregister(&dt_bL_ops); - return 0; -} - -static struct platform_driver generic_bL_platdrv = { - .driver = { - .name = "arm-bL-cpufreq-dt", - }, - .probe = generic_bL_probe, - .remove = generic_bL_remove, -}; -module_platform_driver(generic_bL_platdrv); - -MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>"); -MODULE_DESCRIPTION("Generic ARM big LITTLE cpufreq driver via DT"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c @@ -386,16 +386,11 @@ static int intel_pstate_get_cppc_guranteed(int cpu) return cppc_perf.guaranteed_perf; } -#else +#else /* CONFIG_ACPI_CPPC_LIB */ static void intel_pstate_set_itmt_prio(int cpu) { } - -static int intel_pstate_get_cppc_guranteed(int cpu) -{ - return -ENOTSUPP; -} -#endif +#endif /* CONFIG_ACPI_CPPC_LIB */ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) { @@ -477,7 +472,7 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) acpi_processor_unregister_performance(policy->cpu); } -#else +#else /* CONFIG_ACPI */ static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) { } @@ -490,7 +485,14 @@ static inline bool intel_pstate_acpi_pm_profile_server(void) { return false; } -#endif +#endif /* CONFIG_ACPI */ + +#ifndef CONFIG_ACPI_CPPC_LIB +static int intel_pstate_get_cppc_guranteed(int cpu) +{ + return -ENOTSUPP; +} +#endif /* CONFIG_ACPI_CPPC_LIB */ static inline void update_turbo_state(void) { diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c @@ -130,11 +130,6 @@ struct menu_device { int interval_ptr; }; -static inline int get_loadavg(unsigned long load) -{ - return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10; -} - static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters) { int bucket = 0; @@ -168,18 +163,10 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters * to be, the higher this multiplier, and thus the higher * the barrier to go to an expensive C state. */ -static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned long load) +static inline int performance_multiplier(unsigned long nr_iowaiters) { - int mult = 1; - - /* for higher loadavg, we are more reluctant */ - - mult += 2 * get_loadavg(load); - - /* for IO wait tasks (per cpu!) we add 5x each */ - mult += 10 * nr_iowaiters; - - return mult; + /* for IO wait tasks (per cpu!) we add 10x each */ + return 1 + 10 * nr_iowaiters; } static DEFINE_PER_CPU(struct menu_device, menu_devices); @@ -297,7 +284,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int idx; unsigned int interactivity_req; unsigned int predicted_us; - unsigned long nr_iowaiters, cpu_load; + unsigned long nr_iowaiters; ktime_t delta_next; if (data->needs_update) { @@ -308,7 +295,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, /* determine the expected residency time, round up */ data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next)); - get_iowait_load(&nr_iowaiters, &cpu_load); + nr_iowaiters = nr_iowait_cpu(dev->cpu); data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); if (unlikely(drv->state_count <= 1 || latency_req == 0) || @@ -352,7 +339,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * Use the performance multiplier and the user-configurable * latency_req to determine the maximum exit latency. */ - interactivity_req = predicted_us / performance_multiplier(nr_iowaiters, cpu_load); + interactivity_req = predicted_us / performance_multiplier(nr_iowaiters); if (latency_req > interactivity_req) latency_req = interactivity_req; } diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h @@ -20,7 +20,6 @@ extern unsigned long nr_running(void); extern bool single_task_running(void); extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); -extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); static inline int sched_info_on(void) { diff --git a/kernel/hung_task.c b/kernel/hung_task.c @@ -15,6 +15,7 @@ #include <linux/lockdep.h> #include <linux/export.h> #include <linux/sysctl.h> +#include <linux/suspend.h> #include <linux/utsname.h> #include <linux/sched/signal.h> #include <linux/sched/debug.h> @@ -242,6 +243,28 @@ void reset_hung_task_detector(void) } EXPORT_SYMBOL_GPL(reset_hung_task_detector); +static bool hung_detector_suspended; + +static int hungtask_pm_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + case PM_RESTORE_PREPARE: + hung_detector_suspended = true; + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + case PM_POST_RESTORE: + hung_detector_suspended = false; + break; + default: + break; + } + return NOTIFY_OK; +} + /* * kthread which checks for tasks stuck in D state */ @@ -261,7 +284,8 @@ static int watchdog(void *dummy) interval = min_t(unsigned long, interval, timeout); t = hung_timeout_jiffies(hung_last_checked, interval); if (t <= 0) { - if (!atomic_xchg(&reset_hung_task, 0)) + if (!atomic_xchg(&reset_hung_task, 0) && + !hung_detector_suspended) check_hung_uninterruptible_tasks(timeout); hung_last_checked = jiffies; continue; @@ -275,6 +299,10 @@ static int watchdog(void *dummy) static int __init hung_task_init(void) { atomic_notifier_chain_register(&panic_notifier_list, &panic_block); + + /* Disable hung task detector on suspend */ + pm_notifier(hungtask_pm_notify, 0); + watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); return 0; diff --git a/kernel/sched/core.c b/kernel/sched/core.c @@ -2881,6 +2881,18 @@ unsigned long long nr_context_switches(void) } /* + * Consumers of these two interfaces, like for example the cpuidle menu + * governor, are using nonsensical data. Preferring shallow idle state selection + * for a CPU that has IO-wait which might not even end up running the task when + * it does become runnable. + */ + +unsigned long nr_iowait_cpu(int cpu) +{ + return atomic_read(&cpu_rq(cpu)->nr_iowait); +} + +/* * IO-wait accounting, and how its mostly bollocks (on SMP). * * The idea behind IO-wait account is to account the idle time that we could @@ -2915,31 +2927,11 @@ unsigned long nr_iowait(void) unsigned long i, sum = 0; for_each_possible_cpu(i) - sum += atomic_read(&cpu_rq(i)->nr_iowait); + sum += nr_iowait_cpu(i); return sum; } -/* - * Consumers of these two interfaces, like for example the cpuidle menu - * governor, are using nonsensical data. Preferring shallow idle state selection - * for a CPU that has IO-wait which might not even end up running the task when - * it does become runnable. - */ - -unsigned long nr_iowait_cpu(int cpu) -{ - struct rq *this = cpu_rq(cpu); - return atomic_read(&this->nr_iowait); -} - -void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) -{ - struct rq *rq = this_rq(); - *nr_waiters = atomic_read(&rq->nr_iowait); - *load = rq->load.weight; -} - #ifdef CONFIG_SMP /*