From a831881be220358a1d28c5d95d69449fb6d623ca Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 18 Dec 2012 17:32:19 +0100 Subject: nohz: Basic full dynticks interface For extreme usecases such as Real Time or HPC, having the ability to shutdown the tick when a single task runs on a CPU is a desired feature: * Reducing the amount of interrupts improves throughput for CPU-bound tasks. The CPU is less distracted from its real job, from an execution time and from the cache point of views. * This also improve latency response as we have less critical sections. Start with introducing a very simple interface to define full dynticks CPU: use a boot time option defined cpumask through the "nohz_extended=" kernel parameter. CPUs that are part of this range will have their tick shutdown whenever possible: provided they run a single task and they don't do kernel activity that require the periodic tick. These details will be later documented in Documentation/* An online CPU must be kept outside this range to handle the timekeeping. Suggested-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'kernel/time/Kconfig') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 24510d84efd7..5a87c03e45ad 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -79,6 +79,25 @@ config NO_HZ only trigger on an as-needed basis both when the system is busy and when the system is idle. +config NO_HZ_EXTENDED + bool "Full dynticks system" + depends on NO_HZ && RCU_USER_QS && VIRT_CPU_ACCOUNTING_GEN && RCU_NOCB_CPU && SMP + select CONTEXT_TRACKING_FORCE + help + Adaptively try to shutdown the tick whenever possible, even when + the CPU is running tasks. Typically this requires running a single + task on the CPU. Chances for running tickless are maximized when + the task mostly runs in userspace and has few kernel activity. + + You need to fill up the nohz_extended boot parameter with the + desired range of dynticks CPUs. + + This is implemented at the expense of some overhead in user <-> kernel + transitions: syscalls, exceptions and interrupts. Even when it's + dynamically off. + + Say N. + config HIGH_RES_TIMERS bool "High Resolution Timer Support" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS -- cgit v1.2.3 From ab71d36ddb9e60d4ddb28a187718815d38c3c666 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Aug 2011 23:21:01 +0200 Subject: nohz: Unhide full dynticks feature from its dependencies The full dynticks feature only shows up when all its Kconfig dependencies are met (RCU nocbs, RCU user mode, ...) This is far from being user friendly as those who want to activate this feature need to look into the Kconfig files and iterate through each dependency then activate these by hand in order to show and select the full dynticks Kconfig option. So process the other way around: show up the Kconfig option if the minimal low level dependencies are met and activate the high level ones when we enable the feature. Note there is one exception in the picture: CONFIG_VIRT_CPU_ACCOUNTING_GEN is part of a Kconfig choice menu and it appears we can't select it from another Kconfig selection when it's under such layout. So for now this particular item stays as a passive dependency. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'kernel/time/Kconfig') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 5a87c03e45ad..726c33e00da2 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -80,11 +80,20 @@ config NO_HZ busy and when the system is idle. config NO_HZ_EXTENDED - bool "Full dynticks system" - depends on NO_HZ && RCU_USER_QS && VIRT_CPU_ACCOUNTING_GEN && RCU_NOCB_CPU && SMP - select CONTEXT_TRACKING_FORCE - help - Adaptively try to shutdown the tick whenever possible, even when + bool "Full dynticks system" + # NO_HZ dependency + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS + # RCU_USER_QS + depends on HAVE_CONTEXT_TRACKING && SMP + # RCU_NOCB_CPU dependency + depends on TREE_RCU || TREE_PREEMPT_RCU + depends on VIRT_CPU_ACCOUNTING_GEN + select NO_HZ + select RCU_USER_QS + select RCU_NOCB_CPU + select CONTEXT_TRACKING_FORCE + help + Adaptively try to shutdown the tick whenever possible, even when the CPU is running tasks. Typically this requires running a single task on the CPU. Chances for running tickless are maximized when the task mostly runs in userspace and has few kernel activity. -- cgit v1.2.3 From 3451d0243c3cdfd729b36f9684a14659d4895ca3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Aug 2011 23:21:01 +0200 Subject: nohz: Rename CONFIG_NO_HZ to CONFIG_NO_HZ_COMMON We are planning to convert the dynticks Kconfig options layout into a choice menu. The user must be able to easily pick any of the following implementations: constant periodic tick, idle dynticks, full dynticks. As this implies a mutual exclusion, the two dynticks implementions need to converge on the selection of a common Kconfig option in order to ease the sharing of a common infrastructure. It would thus seem pretty natural to reuse CONFIG_NO_HZ to that end. It already implements all the idle dynticks code and the full dynticks depends on all that code for now. So ideally the choice menu would propose CONFIG_NO_HZ_IDLE and CONFIG_NO_HZ_EXTENDED then both would select CONFIG_NO_HZ. On the other hand we want to stay backward compatible: if CONFIG_NO_HZ is set in an older config file, we want to enable CONFIG_NO_HZ_IDLE by default. But we can't afford both at the same time or we run into a circular dependency: 1) CONFIG_NO_HZ_IDLE and CONFIG_NO_HZ_EXTENDED both select CONFIG_NO_HZ 2) If CONFIG_NO_HZ is set, we default to CONFIG_NO_HZ_IDLE We might be able to support that from Kconfig/Kbuild but it may not be wise to introduce such a confusing behaviour. So to solve this, create a new CONFIG_NO_HZ_COMMON option which gathers the common code between idle and full dynticks (that common code for now is simply the idle dynticks code) and select it from their referring Kconfig. Then we'll later create CONFIG_NO_HZ_IDLE and map CONFIG_NO_HZ to it for backward compatibility. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- Documentation/RCU/stallwarn.txt | 2 +- Documentation/cpu-freq/governors.txt | 4 ++-- arch/um/include/shared/common-offsets.h | 4 ++-- arch/um/os-Linux/time.c | 2 +- include/linux/sched.h | 8 ++++---- include/linux/tick.h | 8 ++++---- init/Kconfig | 2 +- kernel/hrtimer.c | 4 ++-- kernel/sched/core.c | 18 +++++++++--------- kernel/sched/fair.c | 10 +++++----- kernel/sched/sched.h | 4 ++-- kernel/softirq.c | 2 +- kernel/time/Kconfig | 13 +++++++++---- kernel/time/tick-sched.c | 12 ++++++------ kernel/timer.c | 4 ++-- 15 files changed, 51 insertions(+), 46 deletions(-) (limited to 'kernel/time/Kconfig') diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 1927151b386b..b336755b71ed 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -176,7 +176,7 @@ o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that o A hardware or software issue shuts off the scheduler-clock interrupt on a CPU that is not in dyntick-idle mode. This problem really has happened, and seems to be most likely to - result in RCU CPU stall warnings for CONFIG_NO_HZ=n kernels. + result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels. o A bug in the RCU implementation. diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index c7a2eb8450c2..e3e5d9ae50cd 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -131,8 +131,8 @@ sampling_rate_min: The sampling rate is limited by the HW transition latency: transition_latency * 100 Or by kernel restrictions: -If CONFIG_NO_HZ is set, the limit is 10ms fixed. -If CONFIG_NO_HZ is not set or nohz=off boot parameter is used, the +If CONFIG_NO_HZ_COMMON is set, the limit is 10ms fixed. +If CONFIG_NO_HZ_COMMON is not set or nohz=off boot parameter is used, the limits depend on the CONFIG_HZ option: HZ=1000: min=20000us (20ms) HZ=250: min=80000us (80ms) diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 2df313b6a586..c92306809029 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -30,8 +30,8 @@ DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); #ifdef CONFIG_PRINTK DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK); #endif -#ifdef CONFIG_NO_HZ -DEFINE(UML_CONFIG_NO_HZ, CONFIG_NO_HZ); +#ifdef CONFIG_NO_HZ_COMMON +DEFINE(UML_CONFIG_NO_HZ_COMMON, CONFIG_NO_HZ_COMMON); #endif #ifdef CONFIG_UML_X86 DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86); diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index fac388cb464f..e9824d5dd7d5 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -79,7 +79,7 @@ long long os_nsecs(void) return timeval_to_ns(&tv); } -#ifdef UML_CONFIG_NO_HZ +#ifdef UML_CONFIG_NO_HZ_COMMON static int after_sleep_interval(struct timespec *ts) { return 0; diff --git a/include/linux/sched.h b/include/linux/sched.h index 10626e2ee688..1ff9e0a5de27 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -230,7 +230,7 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(int cpu); -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); @@ -1758,13 +1758,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, } #endif -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON void calc_load_enter_idle(void); void calc_load_exit_idle(void); #else static inline void calc_load_enter_idle(void) { } static inline void calc_load_exit_idle(void) { } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ #ifndef CONFIG_CPUMASK_OFFSTACK static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) @@ -1850,7 +1850,7 @@ extern void idle_task_exit(void); static inline void idle_task_exit(void) {} #endif -#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) extern void wake_up_nohz_cpu(int cpu); #else static inline void wake_up_nohz_cpu(int cpu) { } diff --git a/include/linux/tick.h b/include/linux/tick.h index 44bfa8aa439f..5e403339ee14 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -82,7 +82,7 @@ extern int tick_program_event(ktime_t expires, int force); extern void tick_setup_sched_timer(void); # endif -# if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS extern void tick_cancel_sched_timer(int cpu); # else static inline void tick_cancel_sched_timer(int cpu) { } @@ -123,7 +123,7 @@ static inline void tick_check_idle(int cpu) { } static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -# ifdef CONFIG_NO_HZ +# ifdef CONFIG_NO_HZ_COMMON DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched); static inline int tick_nohz_tick_stopped(void) @@ -138,7 +138,7 @@ extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); -# else /* !CONFIG_NO_HZ */ +# else /* !CONFIG_NO_HZ_COMMON */ static inline int tick_nohz_tick_stopped(void) { return 0; @@ -155,7 +155,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } -# endif /* !NO_HZ */ +# endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_EXTENDED extern int tick_nohz_extended_cpu(int cpu); diff --git a/init/Kconfig b/init/Kconfig index 8a1dac2f80a9..edc8132584f1 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -580,7 +580,7 @@ config RCU_FANOUT_EXACT config RCU_FAST_NO_HZ bool "Accelerate last non-dyntick-idle CPU's grace periods" - depends on NO_HZ && SMP + depends on NO_HZ_COMMON && SMP default n help This option causes RCU to attempt to accelerate grace periods in diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cc47812d3feb..ec60482d8b03 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -160,7 +160,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, */ static int hrtimer_get_target(int this_cpu, int pinned) { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) return get_nohz_timer_target(); #endif @@ -1106,7 +1106,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) } EXPORT_SYMBOL_GPL(hrtimer_get_remaining); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /** * hrtimer_get_next_event - get the time until next expiry event * diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e91ee589f793..9bb397da63d6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -549,7 +549,7 @@ void resched_cpu(int cpu) raw_spin_unlock_irqrestore(&rq->lock, flags); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * In the semi idle case, use the nearest busy cpu for migrating timers * from an idle cpu. This is good for power-savings. @@ -641,14 +641,14 @@ static inline bool got_nohz_idle_kick(void) return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); } -#else /* CONFIG_NO_HZ */ +#else /* CONFIG_NO_HZ_COMMON */ static inline bool got_nohz_idle_kick(void) { return false; } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ void sched_avg_update(struct rq *rq) { @@ -2139,7 +2139,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) return load >> FSHIFT; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Handle NO_HZ for the global load-average. * @@ -2365,12 +2365,12 @@ static void calc_global_nohz(void) smp_wmb(); calc_load_idx++; } -#else /* !CONFIG_NO_HZ */ +#else /* !CONFIG_NO_HZ_COMMON */ static inline long calc_load_fold_idle(void) { return 0; } static inline void calc_global_nohz(void) { } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * calc_load - update the avenrun load estimates 10 ticks after the @@ -2530,7 +2530,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, sched_avg_update(this_rq); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * There is no sane way to deal with nohz on smp when using jiffies because the * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading @@ -2590,7 +2590,7 @@ void update_cpu_load_nohz(void) } raw_spin_unlock(&this_rq->lock); } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from scheduler_tick() @@ -7023,7 +7023,7 @@ void __init sched_init(void) INIT_LIST_HEAD(&rq->cfs_tasks); rq_attach_root(rq, &def_root_domain); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON rq->nohz_flags = 0; #endif #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 539760ef00c4..5c97fca091a7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5331,7 +5331,7 @@ out_unlock: return 0; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * idle load balancing details * - When one of the busy CPUs notice that there may be an idle rebalancing @@ -5541,9 +5541,9 @@ out: rq->next_balance = next_balance; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* - * In CONFIG_NO_HZ case, the idle balance kickee will do the + * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the * rebalancing for all the cpus for whom scheduler ticks are stopped. */ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) @@ -5686,7 +5686,7 @@ void trigger_load_balance(struct rq *rq, int cpu) if (time_after_eq(jiffies, rq->next_balance) && likely(!on_null_domain(cpu))) raise_softirq(SCHED_SOFTIRQ); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu))) nohz_balancer_kick(cpu); #endif @@ -6156,7 +6156,7 @@ __init void init_sched_fair_class(void) #ifdef CONFIG_SMP open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON nohz.next_balance = jiffies; zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); cpu_notifier(sched_ilb_notifier, 0); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3bd15a43eebc..889904dd6d77 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -404,7 +404,7 @@ struct rq { #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; unsigned long last_load_update_tick; -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON u64 nohz_stamp; unsigned long nohz_flags; #endif @@ -1333,7 +1333,7 @@ extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); extern void account_cfs_bandwidth_used(int enabled, int was_enabled); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON enum rq_nohz_flag_bits { NOHZ_TICK_STOPPED, NOHZ_BALANCE_KICK, diff --git a/kernel/softirq.c b/kernel/softirq.c index b4d252fd195b..de15813f2a66 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -348,7 +348,7 @@ void irq_exit(void) if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* Make sure that timer wheel updates are propagated */ if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) tick_nohz_irq_exit(); diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 726c33e00da2..c88fc43494c9 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -64,16 +64,21 @@ config GENERIC_CMOS_UPDATE if GENERIC_CLOCKEVENTS menu "Timers subsystem" -# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is +# Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is # only related to the tick functionality. Oneshot clockevent devices # are supported independ of this. config TICK_ONESHOT bool +config NO_HZ_COMMON + bool + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS + select TICK_ONESHOT + config NO_HZ bool "Tickless System (Dynamic Ticks)" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS - select TICK_ONESHOT + select NO_HZ_COMMON help This option enables a tickless system: timer interrupts will only trigger on an as-needed basis both when the system is @@ -81,14 +86,14 @@ config NO_HZ config NO_HZ_EXTENDED bool "Full dynticks system" - # NO_HZ dependency + # NO_HZ_COMMON dependency depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS # RCU_USER_QS depends on HAVE_CONTEXT_TRACKING && SMP # RCU_NOCB_CPU dependency depends on TREE_RCU || TREE_PREEMPT_RCU depends on VIRT_CPU_ACCOUNTING_GEN - select NO_HZ + select NO_HZ_COMMON select RCU_USER_QS select RCU_NOCB_CPU select CONTEXT_TRACKING_FORCE diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 57bb3fe5aaa3..ccfc2086cd4b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -104,7 +104,7 @@ static void tick_sched_do_timer(ktime_t now) { int cpu = smp_processor_id(); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Check if the do_timer duty was dropped. We don't care about * concurrency: This happens only when the cpu in charge went @@ -124,7 +124,7 @@ static void tick_sched_do_timer(ktime_t now) static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * When we are idle and the tick is stopped, we have to touch * the watchdog as we might not schedule for a really long @@ -235,7 +235,7 @@ core_initcall(init_tick_nohz_extended); /* * NOHZ - aka dynamic tick functionality */ -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * NO HZ enabled ? */ @@ -907,7 +907,7 @@ static inline void tick_check_nohz(int cpu) static inline void tick_nohz_switch_to_nohz(void) { } static inline void tick_check_nohz(int cpu) { } -#endif /* NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from irq_enter to notify about the possible interruption of idle() @@ -992,14 +992,14 @@ void tick_setup_sched_timer(void) now = ktime_get(); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (tick_nohz_enabled) ts->nohz_mode = NOHZ_MODE_HIGHRES; #endif } #endif /* HIGH_RES_TIMERS */ -#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); diff --git a/kernel/timer.c b/kernel/timer.c index 4e3040b40d16..1b7489fdea41 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -738,7 +738,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, cpu = smp_processor_id(); -#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) cpu = get_nohz_timer_target(); #endif @@ -1188,7 +1188,7 @@ static inline void __run_timers(struct tvec_base *base) spin_unlock_irq(&base->lock); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Find out when the next timer event is due to happen. This * is used on S/390 to stop all activity when a CPU is idle. -- cgit v1.2.3 From 3ca277e41914ab344214ed50a41c14c48ae973f3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Aug 2011 23:21:01 +0200 Subject: nohz: Pack nohz Kconfig option in a menu of choices Now the user has the choice between three implementations of the timer tick: * Static periodic tick * Idle dynticks * Full dynticks At least for now, these are mutually exclusive choices, so let's rely on the proper Kconfig feature to display these to the user. A new entry CONFIG_NO_HZ_IDLE is created and the old CONFIG_NO_HZ maps to it for config file backward compatibility. The old name was too general now that we have more granular dynticks implementations. While at it, add some explanation to help the user on his decision between the 3 entries. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'kernel/time/Kconfig') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index c88fc43494c9..27cc404ea187 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -75,17 +75,33 @@ config NO_HZ_COMMON depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS select TICK_ONESHOT +# Kept around for compatibility, maps to NO_HZ_IDLE config NO_HZ - bool "Tickless System (Dynamic Ticks)" + bool + +choice + prompt "Timer tick handling" + default NO_HZ_IDLE if NO_HZ + +config PERIODIC_HZ + bool "Periodic timer ticks (constant rate, no dynticks)" + help + This option keeps the tick running periodically at a constant + rate, even when the CPU doesn't need it. + +config NO_HZ_IDLE + bool "Idle dynticks system (tickless idle)" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS select NO_HZ_COMMON help - This option enables a tickless system: timer interrupts will - only trigger on an as-needed basis both when the system is - busy and when the system is idle. + This option enables a tickless idle system: timer interrupts + will only trigger on an as-needed basis when the system is idle. + This is usually interesting for energy saving. + + Most of the time you want to say Y here. config NO_HZ_EXTENDED - bool "Full dynticks system" + bool "Full dynticks system (tickless single task)" # NO_HZ_COMMON dependency depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS # RCU_USER_QS @@ -112,6 +128,8 @@ config NO_HZ_EXTENDED Say N. +endchoice + config HIGH_RES_TIMERS bool "High Resolution Timer Support" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS -- cgit v1.2.3 From 0644ca5c774f1a7033bfc83c7ed0660f74f28b56 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Apr 2013 16:40:12 +0200 Subject: nohz: Fix old dynticks idle Kconfig backward compatibility In order to enforce backward compatibility with older config files, we want the new dynticks-idle Kconfig entry to default its value to the one of the old CONFIG_NO_HZ symbol if present. Namely we want: config NO_HZ # old obsolete dynticks idle symbol bool config NO_HZ_IDLE # new dynticks idle symbol default NO_HZ However Kconfig prevents this to work if the old symbol is not visible. And this is currently the case because NO_HZ lacks a title in order to show it in make oldconfig and alike. To fix this, bring a minimal title and help text to the obsolete Kconfig entry that explains its purpose. This makes the "defaulting" to work. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel/time/Kconfig') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 27cc404ea187..cbe64be17d1f 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -75,10 +75,6 @@ config NO_HZ_COMMON depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS select TICK_ONESHOT -# Kept around for compatibility, maps to NO_HZ_IDLE -config NO_HZ - bool - choice prompt "Timer tick handling" default NO_HZ_IDLE if NO_HZ @@ -130,6 +126,14 @@ config NO_HZ_EXTENDED endchoice +config NO_HZ + bool "Old Idle dynticks config" + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS + help + This is the old config entry that enables dynticks idle. + We keep it around for a little while to enforce backward + compatibility with older config files. + config HIGH_RES_TIMERS bool "High Resolution Timer Support" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS -- cgit v1.2.3 From c5bfece2d6129131b4ade985e63bc35ddf5868d4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Apr 2013 16:45:34 +0200 Subject: nohz: Switch from "extended nohz" to "full nohz" based naming "Extended nohz" was used as a naming base for the full dynticks API and Kconfig symbols. It reflects the fact the system tries to stop the tick in more places than just idle. But that "extended" name is a bit opaque and vague. Rename it to "full" makes it clearer what the system tries to do under this config: try to shutdown the tick anytime it can. The various constraints that prevent that to happen shouldn't be considered as fundamental properties of this feature but rather technical issues that may be solved in the future. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- Documentation/kernel-parameters.txt | 4 +-- include/linux/tick.h | 6 ++--- kernel/sched/core.c | 6 ++--- kernel/time/Kconfig | 4 +-- kernel/time/tick-broadcast.c | 2 +- kernel/time/tick-common.c | 2 +- kernel/time/tick-sched.c | 54 ++++++++++++++++++------------------- 7 files changed, 39 insertions(+), 39 deletions(-) (limited to 'kernel/time/Kconfig') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 231698feaddc..82365dde00a8 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1913,8 +1913,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Valid arguments: on, off Default: on - nohz_extended= [KNL,BOOT] - In kernels built with CONFIG_NO_HZ_EXTENDED=y, set + nohz_full= [KNL,BOOT] + In kernels built with CONFIG_NO_HZ_FULL=y, set the specified list of CPUs whose tick will be stopped whenever possible. You need to keep at least one online CPU outside the range to maintain the timekeeping. diff --git a/include/linux/tick.h b/include/linux/tick.h index 5e403339ee14..b4e3b0c9639e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -157,10 +157,10 @@ static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } # endif /* !CONFIG_NO_HZ_COMMON */ -#ifdef CONFIG_NO_HZ_EXTENDED -extern int tick_nohz_extended_cpu(int cpu); +#ifdef CONFIG_NO_HZ_FULL +extern int tick_nohz_full_cpu(int cpu); #else -static inline int tick_nohz_extended_cpu(int cpu) { return 0; } +static inline int tick_nohz_full_cpu(int cpu) { return 0; } #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9bb397da63d6..0f0a5b3fd62c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -617,9 +617,9 @@ static void wake_up_idle_cpu(int cpu) smp_send_reschedule(cpu); } -static bool wake_up_extended_nohz_cpu(int cpu) +static bool wake_up_full_nohz_cpu(int cpu) { - if (tick_nohz_extended_cpu(cpu)) { + if (tick_nohz_full_cpu(cpu)) { if (cpu != smp_processor_id() || tick_nohz_tick_stopped()) smp_send_reschedule(cpu); @@ -631,7 +631,7 @@ static bool wake_up_extended_nohz_cpu(int cpu) void wake_up_nohz_cpu(int cpu) { - if (!wake_up_extended_nohz_cpu(cpu)) + if (!wake_up_full_nohz_cpu(cpu)) wake_up_idle_cpu(cpu); } diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index cbe64be17d1f..4a17b5069466 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -96,7 +96,7 @@ config NO_HZ_IDLE Most of the time you want to say Y here. -config NO_HZ_EXTENDED +config NO_HZ_FULL bool "Full dynticks system (tickless single task)" # NO_HZ_COMMON dependency depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS @@ -115,7 +115,7 @@ config NO_HZ_EXTENDED task on the CPU. Chances for running tickless are maximized when the task mostly runs in userspace and has few kernel activity. - You need to fill up the nohz_extended boot parameter with the + You need to fill up the nohz_full boot parameter with the desired range of dynticks CPUs. This is implemented at the expense of some overhead in user <-> kernel diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 8a6875cc1879..a3a3123f6272 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -573,7 +573,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) bc->event_handler = tick_handle_oneshot_broadcast; /* Take the do_timer update */ - if (!tick_nohz_extended_cpu(cpu)) + if (!tick_nohz_full_cpu(cpu)) tick_do_timer_cpu = cpu; /* diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index b7dc0cbdb59b..83f2bd967161 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -163,7 +163,7 @@ static void tick_setup_device(struct tick_device *td, * this cpu: */ if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { - if (!tick_nohz_extended_cpu(cpu)) + if (!tick_nohz_full_cpu(cpu)) tick_do_timer_cpu = cpu; else tick_do_timer_cpu = TICK_DO_TIMER_NONE; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e057d338daa4..369b5769fc97 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -113,7 +113,7 @@ static void tick_sched_do_timer(ktime_t now) * jiffies_lock. */ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) - && !tick_nohz_extended_cpu(cpu)) + && !tick_nohz_full_cpu(cpu)) tick_do_timer_cpu = cpu; #endif @@ -143,29 +143,29 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) profile_tick(CPU_PROFILING); } -#ifdef CONFIG_NO_HZ_EXTENDED -static cpumask_var_t nohz_extended_mask; -bool have_nohz_extended_mask; +#ifdef CONFIG_NO_HZ_FULL +static cpumask_var_t nohz_full_mask; +bool have_nohz_full_mask; -int tick_nohz_extended_cpu(int cpu) +int tick_nohz_full_cpu(int cpu) { - if (!have_nohz_extended_mask) + if (!have_nohz_full_mask) return 0; - return cpumask_test_cpu(cpu, nohz_extended_mask); + return cpumask_test_cpu(cpu, nohz_full_mask); } /* Parse the boot-time nohz CPU list from the kernel parameters. */ -static int __init tick_nohz_extended_setup(char *str) +static int __init tick_nohz_full_setup(char *str) { - alloc_bootmem_cpumask_var(&nohz_extended_mask); - if (cpulist_parse(str, nohz_extended_mask) < 0) - pr_warning("NOHZ: Incorrect nohz_extended cpumask\n"); + alloc_bootmem_cpumask_var(&nohz_full_mask); + if (cpulist_parse(str, nohz_full_mask) < 0) + pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); else - have_nohz_extended_mask = true; + have_nohz_full_mask = true; return 1; } -__setup("nohz_extended=", tick_nohz_extended_setup); +__setup("nohz_full=", tick_nohz_full_setup); static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, unsigned long action, @@ -179,7 +179,7 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, * If we handle the timekeeping duty for full dynticks CPUs, * we can't safely shutdown that CPU. */ - if (have_nohz_extended_mask && tick_do_timer_cpu == cpu) + if (have_nohz_full_mask && tick_do_timer_cpu == cpu) return -EINVAL; break; } @@ -191,20 +191,20 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, * separations: 0,2,4,6,... * This is NR_CPUS + sizeof('\0') */ -static char __initdata nohz_ext_buf[NR_CPUS + 1]; +static char __initdata nohz_full_buf[NR_CPUS + 1]; -static int __init init_tick_nohz_extended(void) +static int __init init_tick_nohz_full(void) { cpumask_var_t online_nohz; int cpu; - if (!have_nohz_extended_mask) + if (!have_nohz_full_mask) return 0; cpu_notifier(tick_nohz_cpu_down_callback, 0); if (!zalloc_cpumask_var(&online_nohz, GFP_KERNEL)) { - pr_warning("NO_HZ: Not enough memory to check extended nohz mask\n"); + pr_warning("NO_HZ: Not enough memory to check full nohz mask\n"); return -ENOMEM; } @@ -215,31 +215,31 @@ static int __init init_tick_nohz_extended(void) get_online_cpus(); /* Ensure we keep a CPU outside the dynticks range for timekeeping */ - cpumask_and(online_nohz, cpu_online_mask, nohz_extended_mask); + cpumask_and(online_nohz, cpu_online_mask, nohz_full_mask); if (cpumask_equal(online_nohz, cpu_online_mask)) { pr_warning("NO_HZ: Must keep at least one online CPU " - "out of nohz_extended range\n"); + "out of nohz_full range\n"); /* * We know the current CPU doesn't have its tick stopped. * Let's use it for the timekeeping duty. */ preempt_disable(); cpu = smp_processor_id(); - pr_warning("NO_HZ: Clearing %d from nohz_extended range\n", cpu); - cpumask_clear_cpu(cpu, nohz_extended_mask); + pr_warning("NO_HZ: Clearing %d from nohz_full range\n", cpu); + cpumask_clear_cpu(cpu, nohz_full_mask); preempt_enable(); } put_online_cpus(); free_cpumask_var(online_nohz); - cpulist_scnprintf(nohz_ext_buf, sizeof(nohz_ext_buf), nohz_extended_mask); - pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_ext_buf); + cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); + pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); return 0; } -core_initcall(init_tick_nohz_extended); +core_initcall(init_tick_nohz_full); #else -#define have_nohz_extended_mask (0) +#define have_nohz_full_mask (0) #endif /* @@ -589,7 +589,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) return false; } - if (have_nohz_extended_mask) { + if (have_nohz_full_mask) { /* * Keep the tick alive to guarantee timekeeping progression * if there are full dynticks CPUs around -- cgit v1.2.3 From 5b533f4ff5b6d8f2442f2ac82ca1530ff3f55267 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 13 Apr 2013 16:53:35 +0200 Subject: nohz: Align periodic tick Kconfig with other choices' naming convention Rename CONFIG_PERIODIC_HZ to CONFIG_HZ_PERIODIC in order to stay consistent with other tick implementation entries: CONFIG_HZ_PERIODIC CONFIG_NO_HZ_IDLE CONFIG_NO_HZ_FULL Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time/Kconfig') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 4a17b5069466..3b683227a10d 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -79,7 +79,7 @@ choice prompt "Timer tick handling" default NO_HZ_IDLE if NO_HZ -config PERIODIC_HZ +config HZ_PERIODIC bool "Periodic timer ticks (constant rate, no dynticks)" help This option keeps the tick running periodically at a constant -- cgit v1.2.3 From fae30dd6698ecaa93cb50213266c224bc550c32c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 13 Apr 2013 17:04:04 +0200 Subject: nohz: Improve a bit the full dynticks Kconfig documentation Remove the "single task" statement from CONFIG_NO_HZ_FULL title. The constraint can be invalidated when tasks from other sched classes than SCHED_FAIR are running. Moreover it's possible that hrtick join the party in the future. Also add a line about the dependency on SMP. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/time/Kconfig') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 3b683227a10d..358d601a4fec 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -97,11 +97,13 @@ config NO_HZ_IDLE Most of the time you want to say Y here. config NO_HZ_FULL - bool "Full dynticks system (tickless single task)" + bool "Full dynticks system (tickless)" # NO_HZ_COMMON dependency depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS - # RCU_USER_QS - depends on HAVE_CONTEXT_TRACKING && SMP + # We need at least one periodic CPU for timekeeping + depends on SMP + # RCU_USER_QS dependency + depends on HAVE_CONTEXT_TRACKING # RCU_NOCB_CPU dependency depends on TREE_RCU || TREE_PREEMPT_RCU depends on VIRT_CPU_ACCOUNTING_GEN -- cgit v1.2.3 From 76c24fb054b52b34af4dcde589cbb9e2b98fc74c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Apr 2013 00:15:40 +0200 Subject: nohz: New APIs to re-evaluate the tick on full dynticks CPUs Provide two new helpers in order to notify the full dynticks CPUs about some internal system changes against which they may reconsider the state of their tick. Some practical examples include: posix cpu timers, perf tick and sched clock tick. For now the notifying handler, implemented through IPIs, is a stub that will be implemented when we get the tick stop/restart infrastructure in. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/tick.h | 4 ++++ kernel/time/Kconfig | 1 + kernel/time/tick-sched.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) (limited to 'kernel/time/Kconfig') diff --git a/include/linux/tick.h b/include/linux/tick.h index b4e3b0c9639e..c2dcfb18f65b 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -159,8 +159,12 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } #ifdef CONFIG_NO_HZ_FULL extern int tick_nohz_full_cpu(int cpu); +extern void tick_nohz_full_kick(void); +extern void tick_nohz_full_kick_all(void); #else static inline int tick_nohz_full_cpu(int cpu) { return 0; } +static inline void tick_nohz_full_kick(void) { } +static inline void tick_nohz_full_kick_all(void) { } #endif diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 358d601a4fec..fbb4c7eb92a0 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -111,6 +111,7 @@ config NO_HZ_FULL select RCU_USER_QS select RCU_NOCB_CPU select CONTEXT_TRACKING_FORCE + select IRQ_WORK help Adaptively try to shutdown the tick whenever possible, even when the CPU is running tasks. Typically this requires running a single diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 369b5769fc97..2bcad5b904d8 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -147,6 +147,57 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) static cpumask_var_t nohz_full_mask; bool have_nohz_full_mask; +/* + * Re-evaluate the need for the tick on the current CPU + * and restart it if necessary. + */ +static void tick_nohz_full_check(void) +{ + /* + * STUB for now, will be filled with the full tick stop/restart + * infrastructure patches + */ +} + +static void nohz_full_kick_work_func(struct irq_work *work) +{ + tick_nohz_full_check(); +} + +static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { + .func = nohz_full_kick_work_func, +}; + +/* + * Kick the current CPU if it's full dynticks in order to force it to + * re-evaluate its dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick(void) +{ + if (tick_nohz_full_cpu(smp_processor_id())) + irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); +} + +static void nohz_full_kick_ipi(void *info) +{ + tick_nohz_full_check(); +} + +/* + * Kick all full dynticks CPUs in order to force these to re-evaluate + * their dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick_all(void) +{ + if (!have_nohz_full_mask) + return; + + preempt_disable(); + smp_call_function_many(nohz_full_mask, + nohz_full_kick_ipi, NULL, false); + preempt_enable(); +} + int tick_nohz_full_cpu(int cpu) { if (!have_nohz_full_mask) -- cgit v1.2.3 From f98823ac758ba1aa77c6e3f8ad4ef3ad84ee0a7c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 27 Mar 2013 01:17:22 +0100 Subject: nohz: New option to default all CPUs in full dynticks range Provide a new kernel config that defaults all CPUs to be part of the full dynticks range, except the boot one for timekeeping. This default setting is overriden by the nohz_full= boot option if passed by the user. This is helpful for those who don't need a finegrained range of full dynticks CPU and also for automated testing. Suggested-by: Ingo Molnar Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 10 ++++++++++ kernel/time/tick-sched.c | 23 +++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'kernel/time/Kconfig') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 358d601a4fec..99c3f13dd478 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -128,6 +128,16 @@ config NO_HZ_FULL endchoice +config NO_HZ_FULL_ALL + bool "Full dynticks system on all CPUs by default" + depends on NO_HZ_FULL + help + If the user doesn't pass the nohz_full boot option to + define the range of full dynticks CPUs, consider that all + CPUs in the system are full dynticks by default. + Note the boot CPU will still be kept outside the range to + handle the timekeeping duty. + config NO_HZ bool "Old Idle dynticks config" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d71a5f2bd7b2..a76e09044f9f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -203,12 +203,31 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, */ static char __initdata nohz_full_buf[NR_CPUS + 1]; +static int tick_nohz_init_all(void) +{ + int err = -1; + +#ifdef CONFIG_NO_HZ_FULL_ALL + if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { + pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); + return err; + } + err = 0; + cpumask_setall(nohz_full_mask); + cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); + have_nohz_full_mask = true; +#endif + return err; +} + void __init tick_nohz_init(void) { int cpu; - if (!have_nohz_full_mask) - return; + if (!have_nohz_full_mask) { + if (tick_nohz_init_all() < 0) + return; + } cpu_notifier(tick_nohz_cpu_down_callback, 0); -- cgit v1.2.3 From 0637e029392386e6996f5d6574aadccee8315efa Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 22 Apr 2013 21:15:59 +0200 Subject: nohz: Select wide RCU nocb for full dynticks It makes testing and implementation much easier as we know in advance that all CPUs are RCU nocbs. Also this prepares to remove the dynamic check for nohz_full= boot mask to be a subset of rcu_nocbs= Eventually this should also help removing the requirement for the boot CPU to be outside the full dynticks range. Suggested-by: Christoph Lameter Suggested-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/time/Kconfig') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index f6a792ab4983..e1ac129b01c9 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -110,6 +110,7 @@ config NO_HZ_FULL select NO_HZ_COMMON select RCU_USER_QS select RCU_NOCB_CPU + select RCU_NOCB_CPU_ALL select CONTEXT_TRACKING_FORCE select IRQ_WORK help -- cgit v1.2.3 From 65e709dc0c25dbd563861924815e9a3a93878b75 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Apr 2013 17:35:50 +0200 Subject: nohz: Remove full dynticks' superfluous dependency on RCU tree Remove the dependency on (TREE_RCU || TREE_PREEMPT_RCU). The full dynticks option already depends on SMP which implies (whatever flavour of) RCU tree config anyway. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/time/Kconfig') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index e1ac129b01c9..1ea2bba4a686 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -104,8 +104,6 @@ config NO_HZ_FULL depends on SMP # RCU_USER_QS dependency depends on HAVE_CONTEXT_TRACKING - # RCU_NOCB_CPU dependency - depends on TREE_RCU || TREE_PREEMPT_RCU depends on VIRT_CPU_ACCOUNTING_GEN select NO_HZ_COMMON select RCU_USER_QS -- cgit v1.2.3 From c58b0df12a6b5c497637db0676effd00e1fbab13 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 26 Apr 2013 15:16:31 +0200 Subject: nohz: Select VIRT_CPU_ACCOUNTING_GEN from full dynticks config Turn the full dynticks passive dependency on VIRT_CPU_ACCOUNTING_GEN to an active one. The full dynticks Kconfig is currently hidden behind the full dynticks cputime accounting, which is an awkward and counter-intuitive layout: the user first has to select the dynticks cputime accounting in order to make the full dynticks feature to be visible. We definetly want it the other way around. The usual way to perform this kind of active dependency is use "select" on the depended target. Now we can't use the Kconfig "select" instruction when the target is a "choice". So this patch inspires on how the RCU subsystem Kconfig interact with its dependencies on SMP and PREEMPT: we make sure that cputime accounting can't propose another option than VIRT_CPU_ACCOUNTING_GEN when NO_HZ_FULL is selected by using the right "depends on" instruction for each cputime accounting choices. v2: Keep full dynticks cputime accounting available even without full dynticks, as per Paul McKenney's suggestion. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Christoph Lameter Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- init/Kconfig | 6 +++--- kernel/time/Kconfig | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel/time/Kconfig') diff --git a/init/Kconfig b/init/Kconfig index edc8132584f1..8f97a7407714 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -306,7 +306,7 @@ choice # Kind of a stub config for the pure tick based cputime accounting config TICK_CPU_ACCOUNTING bool "Simple tick based cputime accounting" - depends on !S390 + depends on !S390 && !NO_HZ_FULL help This is the basic tick based cputime accounting that maintains statistics about user, system and idle time spent on per jiffies @@ -316,7 +316,7 @@ config TICK_CPU_ACCOUNTING config VIRT_CPU_ACCOUNTING_NATIVE bool "Deterministic task and CPU time accounting" - depends on HAVE_VIRT_CPU_ACCOUNTING + depends on HAVE_VIRT_CPU_ACCOUNTING && !NO_HZ_FULL select VIRT_CPU_ACCOUNTING help Select this option to enable more accurate task and CPU time @@ -346,7 +346,7 @@ config VIRT_CPU_ACCOUNTING_GEN config IRQ_TIME_ACCOUNTING bool "Fine granularity task level IRQ time accounting" - depends on HAVE_IRQ_TIME_ACCOUNTING + depends on HAVE_IRQ_TIME_ACCOUNTING && !NO_HZ_FULL help Select this option to enable fine granularity task irq time accounting. This is done by reading a timestamp on each diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 1ea2bba4a686..a2ddd650cb92 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -104,11 +104,13 @@ config NO_HZ_FULL depends on SMP # RCU_USER_QS dependency depends on HAVE_CONTEXT_TRACKING - depends on VIRT_CPU_ACCOUNTING_GEN + # VIRT_CPU_ACCOUNTING_GEN dependency + depends on 64BIT select NO_HZ_COMMON select RCU_USER_QS select RCU_NOCB_CPU select RCU_NOCB_CPU_ALL + select VIRT_CPU_ACCOUNTING_GEN select CONTEXT_TRACKING_FORCE select IRQ_WORK help -- cgit v1.2.3 From 73c30828771acafb0a5e3a1c4cf75e6c5dc5f98a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 3 May 2013 01:28:12 +0200 Subject: rcu: Fix full dynticks' dependency on wide RCU nocb mode Commit 0637e029392386e6996f5d6574aadccee8315efa ("nohz: Select wide RCU nocb for full dynticks") intended to force CONFIG_RCU_NOCB_CPU_ALL=y when full dynticks is enabled. However this option is part of a choice menu and Kconfig's "select" instruction has no effect on such targets. Fix this by using reverse dependencies on the targets we don't want instead. Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Christoph Lameter Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- init/Kconfig | 4 ++-- kernel/time/Kconfig | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/time/Kconfig') diff --git a/init/Kconfig b/init/Kconfig index 66f67afad4fa..b3fdf9aee3da 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -687,7 +687,7 @@ choice config RCU_NOCB_CPU_NONE bool "No build_forced no-CBs CPUs" - depends on RCU_NOCB_CPU + depends on RCU_NOCB_CPU && !NO_HZ_FULL help This option does not force any of the CPUs to be no-CBs CPUs. Only CPUs designated by the rcu_nocbs= boot parameter will be @@ -695,7 +695,7 @@ config RCU_NOCB_CPU_NONE config RCU_NOCB_CPU_ZERO bool "CPU 0 is a build_forced no-CBs CPU" - depends on RCU_NOCB_CPU + depends on RCU_NOCB_CPU && !NO_HZ_FULL help This option forces CPU 0 to be a no-CBs CPU. Additional CPUs may be designated as no-CBs CPUs using the rcu_nocbs= boot diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index a2ddd650cb92..e4c07b0692bb 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -109,7 +109,6 @@ config NO_HZ_FULL select NO_HZ_COMMON select RCU_USER_QS select RCU_NOCB_CPU - select RCU_NOCB_CPU_ALL select VIRT_CPU_ACCOUNTING_GEN select CONTEXT_TRACKING_FORCE select IRQ_WORK -- cgit v1.2.3