From ab840f7a06df780c4db01f34a5660b1e472d9ca6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 23 Jul 2012 12:30:22 -0700 Subject: rcu: Update rcutorture defaults A number of new features have been added to rcutorture over the years, but the defaults have not been updated to include them. This commit therefore turns on a couple of them that have proven helpful and trustworthy, namely periodic progress reports and testing of NO_HZ. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutorture.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 25b15033c61f..53c548c39265 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -53,10 +53,11 @@ MODULE_AUTHOR("Paul E. McKenney and Josh Triplett Date: Mon, 23 Jul 2012 12:05:55 -0700 Subject: rcu: Track CPU-hotplug duration statistics Many rcutorture runs include CPU-hotplug operations in their stress testing. This commit accumulates statistics on the durations of these operations in deference to the recent concern about the overhead and latency of these operations. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutorture.c | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 53c548c39265..2736cc878ba2 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -177,8 +177,14 @@ static long n_rcu_torture_boosts; static long n_rcu_torture_timers; static long n_offline_attempts; static long n_offline_successes; +static unsigned long sum_offline; +static int min_offline = -1; +static int max_offline; static long n_online_attempts; static long n_online_successes; +static unsigned long sum_online; +static int min_online = -1; +static int max_online; static long n_barrier_attempts; static long n_barrier_successes; static struct list_head rcu_torture_removed; @@ -1215,11 +1221,13 @@ rcu_torture_printk(char *page) n_rcu_torture_boost_failure, n_rcu_torture_boosts, n_rcu_torture_timers); - cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ", - n_online_successes, - n_online_attempts, - n_offline_successes, - n_offline_attempts); + cnt += sprintf(&page[cnt], + "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", + n_online_successes, n_online_attempts, + n_offline_successes, n_offline_attempts, + min_online, max_online, + min_offline, max_offline, + sum_online, sum_offline, HZ); cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld", n_barrier_successes, n_barrier_attempts, @@ -1491,8 +1499,10 @@ static int __cpuinit rcu_torture_onoff(void *arg) { int cpu; + unsigned long delta; int maxcpu = -1; DEFINE_RCU_RANDOM(rand); + unsigned long starttime; VERBOSE_PRINTK_STRING("rcu_torture_onoff task started"); for_each_online_cpu(cpu) @@ -1510,6 +1520,7 @@ rcu_torture_onoff(void *arg) printk(KERN_ALERT "%s" TORTURE_FLAG "rcu_torture_onoff task: offlining %d\n", torture_type, cpu); + starttime = jiffies; n_offline_attempts++; if (cpu_down(cpu) == 0) { if (verbose) @@ -1517,12 +1528,23 @@ rcu_torture_onoff(void *arg) "rcu_torture_onoff task: offlined %d\n", torture_type, cpu); n_offline_successes++; + delta = jiffies - starttime; + sum_offline += delta; + if (min_offline < 0) { + min_offline = delta; + max_offline = delta; + } + if (min_offline > delta) + min_offline = delta; + if (max_offline < delta) + max_offline = delta; } } else if (cpu_is_hotpluggable(cpu)) { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG "rcu_torture_onoff task: onlining %d\n", torture_type, cpu); + starttime = jiffies; n_online_attempts++; if (cpu_up(cpu) == 0) { if (verbose) @@ -1530,6 +1552,16 @@ rcu_torture_onoff(void *arg) "rcu_torture_onoff task: onlined %d\n", torture_type, cpu); n_online_successes++; + delta = jiffies - starttime; + sum_online += delta; + if (min_online < 0) { + min_online = delta; + max_online = delta; + } + if (min_online > delta) + min_online = delta; + if (max_online < delta) + max_online = delta; } } schedule_timeout_interruptible(onoff_interval * HZ); -- cgit v1.2.3 From 2caa1e4432be7260dca60c3de6949b77eb007515 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 9 Aug 2012 16:30:45 -0700 Subject: rcu: Switch rcutorture to pr_alert() and friends Drop a few characters by switching kernel/rcutorture.c from "printk(KERN_ALERT" to "pr_alert(". Signed-off-by: Paul E. McKenney --- kernel/rcutorture.c | 100 ++++++++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 50 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 2736cc878ba2..61be03ba598d 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -120,11 +120,11 @@ MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); #define TORTURE_FLAG "-torture:" #define PRINTK_STRING(s) \ - do { printk(KERN_ALERT "%s" TORTURE_FLAG s "\n", torture_type); } while (0) + do { pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) #define VERBOSE_PRINTK_STRING(s) \ - do { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG s "\n", torture_type); } while (0) + do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) #define VERBOSE_PRINTK_ERRSTRING(s) \ - do { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) + do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) static char printk_buf[4096]; @@ -242,7 +242,7 @@ rcutorture_shutdown_notify(struct notifier_block *unused1, if (fullstop == FULLSTOP_DONTSTOP) fullstop = FULLSTOP_SHUTDOWN; else - printk(KERN_WARNING /* but going down anyway, so... */ + pr_warn(/* but going down anyway, so... */ "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); mutex_unlock(&fullstop_mutex); return NOTIFY_DONE; @@ -255,7 +255,7 @@ rcutorture_shutdown_notify(struct notifier_block *unused1, static void rcutorture_shutdown_absorb(char *title) { if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { - printk(KERN_NOTICE + pr_notice( "rcutorture thread %s parking due to system shutdown\n", title); schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); @@ -1276,7 +1276,7 @@ rcu_torture_stats_print(void) int cnt; cnt = rcu_torture_printk(printk_buf); - printk(KERN_ALERT "%s", printk_buf); + pr_alert("%s", printk_buf); } /* @@ -1389,20 +1389,20 @@ rcu_torture_stutter(void *arg) static inline void rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag) { - printk(KERN_ALERT "%s" TORTURE_FLAG - "--- %s: nreaders=%d nfakewriters=%d " - "stat_interval=%d verbose=%d test_no_idle_hz=%d " - "shuffle_interval=%d stutter=%d irqreader=%d " - "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " - "test_boost=%d/%d test_boost_interval=%d " - "test_boost_duration=%d shutdown_secs=%d " - "onoff_interval=%d onoff_holdoff=%d\n", - torture_type, tag, nrealreaders, nfakewriters, - stat_interval, verbose, test_no_idle_hz, shuffle_interval, - stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, - test_boost, cur_ops->can_boost, - test_boost_interval, test_boost_duration, shutdown_secs, - onoff_interval, onoff_holdoff); + pr_alert("%s" TORTURE_FLAG + "--- %s: nreaders=%d nfakewriters=%d " + "stat_interval=%d verbose=%d test_no_idle_hz=%d " + "shuffle_interval=%d stutter=%d irqreader=%d " + "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " + "test_boost=%d/%d test_boost_interval=%d " + "test_boost_duration=%d shutdown_secs=%d " + "onoff_interval=%d onoff_holdoff=%d\n", + torture_type, tag, nrealreaders, nfakewriters, + stat_interval, verbose, test_no_idle_hz, shuffle_interval, + stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, + test_boost, cur_ops->can_boost, + test_boost_interval, test_boost_duration, shutdown_secs, + onoff_interval, onoff_holdoff); } static struct notifier_block rcutorture_shutdown_nb = { @@ -1469,9 +1469,9 @@ rcu_torture_shutdown(void *arg) !kthread_should_stop()) { delta = shutdown_time - jiffies_snap; if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_shutdown task: %lu jiffies remaining\n", - torture_type, delta); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_shutdown task: %lu jiffies remaining\n", + torture_type, delta); schedule_timeout_interruptible(delta); jiffies_snap = ACCESS_ONCE(jiffies); } @@ -1517,16 +1517,16 @@ rcu_torture_onoff(void *arg) cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1); if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: offlining %d\n", - torture_type, cpu); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: offlining %d\n", + torture_type, cpu); starttime = jiffies; n_offline_attempts++; if (cpu_down(cpu) == 0) { if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: offlined %d\n", - torture_type, cpu); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: offlined %d\n", + torture_type, cpu); n_offline_successes++; delta = jiffies - starttime; sum_offline += delta; @@ -1541,16 +1541,16 @@ rcu_torture_onoff(void *arg) } } else if (cpu_is_hotpluggable(cpu)) { if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: onlining %d\n", - torture_type, cpu); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: onlining %d\n", + torture_type, cpu); starttime = jiffies; n_online_attempts++; if (cpu_up(cpu) == 0) { if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: onlined %d\n", - torture_type, cpu); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: onlined %d\n", + torture_type, cpu); n_online_successes++; delta = jiffies - starttime; sum_online += delta; @@ -1626,14 +1626,14 @@ static int __cpuinit rcu_torture_stall(void *args) if (!kthread_should_stop()) { stop_at = get_seconds() + stall_cpu; /* RCU CPU stall is expected behavior in following code. */ - printk(KERN_ALERT "rcu_torture_stall start.\n"); + pr_alert("rcu_torture_stall start.\n"); rcu_read_lock(); preempt_disable(); while (ULONG_CMP_LT(get_seconds(), stop_at)) continue; /* Induce RCU CPU stall warning. */ preempt_enable(); rcu_read_unlock(); - printk(KERN_ALERT "rcu_torture_stall end.\n"); + pr_alert("rcu_torture_stall end.\n"); } rcutorture_shutdown_absorb("rcu_torture_stall"); while (!kthread_should_stop()) @@ -1749,12 +1749,12 @@ static int rcu_torture_barrier_init(void) if (n_barrier_cbs == 0) return 0; if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) { - printk(KERN_ALERT "%s" TORTURE_FLAG - " Call or barrier ops missing for %s,\n", - torture_type, cur_ops->name); - printk(KERN_ALERT "%s" TORTURE_FLAG - " RCU barrier testing omitted from run.\n", - torture_type); + pr_alert("%s" TORTURE_FLAG + " Call or barrier ops missing for %s,\n", + torture_type, cur_ops->name); + pr_alert("%s" TORTURE_FLAG + " RCU barrier testing omitted from run.\n", + torture_type); return 0; } atomic_set(&barrier_cbs_count, 0); @@ -1847,7 +1847,7 @@ rcu_torture_cleanup(void) mutex_lock(&fullstop_mutex); rcutorture_record_test_transition(); if (fullstop == FULLSTOP_SHUTDOWN) { - printk(KERN_WARNING /* but going down anyway, so... */ + pr_warn(/* but going down anyway, so... */ "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); mutex_unlock(&fullstop_mutex); schedule_timeout_uninterruptible(10); @@ -1971,17 +1971,17 @@ rcu_torture_init(void) break; } if (i == ARRAY_SIZE(torture_ops)) { - printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n", - torture_type); - printk(KERN_ALERT "rcu-torture types:"); + pr_alert("rcu-torture: invalid torture type: \"%s\"\n", + torture_type); + pr_alert("rcu-torture types:"); for (i = 0; i < ARRAY_SIZE(torture_ops); i++) - printk(KERN_ALERT " %s", torture_ops[i]->name); - printk(KERN_ALERT "\n"); + pr_alert(" %s", torture_ops[i]->name); + pr_alert("\n"); mutex_unlock(&fullstop_mutex); return -EINVAL; } if (cur_ops->fqs == NULL && fqs_duration != 0) { - printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); + pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); fqs_duration = 0; } if (cur_ops->init) -- cgit v1.2.3 From 60f53782c51f27c695840ce90c6c432284319eef Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 25 Aug 2012 15:27:40 -0700 Subject: rcu: Prevent initialization race in rcutorture kthreads When you do something like "t = kthread_run(...)", it is possible that the kthread will start running before the assignment to "t" happens. If the child kthread expects to find a pointer to its task_struct in "t", it will then be fatally disappointed. This commit therefore switches such cases to kthread_create() followed by wake_up_process(), guaranteeing that the assignment happens before the child kthread starts running. Reported-by: Fengguang Wu Signed-off-by: Paul E. McKenney --- kernel/rcutorture.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 61be03ba598d..aaa7b9f3532a 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -2029,14 +2029,15 @@ rcu_torture_init(void) /* Start up the kthreads. */ VERBOSE_PRINTK_STRING("Creating rcu_torture_writer task"); - writer_task = kthread_run(rcu_torture_writer, NULL, - "rcu_torture_writer"); + writer_task = kthread_create(rcu_torture_writer, NULL, + "rcu_torture_writer"); if (IS_ERR(writer_task)) { firsterr = PTR_ERR(writer_task); VERBOSE_PRINTK_ERRSTRING("Failed to create writer"); writer_task = NULL; goto unwind; } + wake_up_process(writer_task); fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), GFP_KERNEL); if (fakewriter_tasks == NULL) { @@ -2151,14 +2152,15 @@ rcu_torture_init(void) } if (shutdown_secs > 0) { shutdown_time = jiffies + shutdown_secs * HZ; - shutdown_task = kthread_run(rcu_torture_shutdown, NULL, - "rcu_torture_shutdown"); + shutdown_task = kthread_create(rcu_torture_shutdown, NULL, + "rcu_torture_shutdown"); if (IS_ERR(shutdown_task)) { firsterr = PTR_ERR(shutdown_task); VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown"); shutdown_task = NULL; goto unwind; } + wake_up_process(shutdown_task); } i = rcu_torture_onoff_init(); if (i != 0) { -- cgit v1.2.3 From e3ebfb96f396731ca2d0b108785d5da31b53ab00 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Jul 2012 14:42:01 -0700 Subject: rcu: Add PROVE_RCU_DELAY to provoke difficult races There have been some recent bugs that were triggered only when preemptible RCU's __rcu_read_unlock() was preempted just after setting ->rcu_read_lock_nesting to INT_MIN, which is a low-probability event. Therefore, reproducing those bugs (to say nothing of gaining confidence in alleged fixes) was quite difficult. This commit therefore creates a new debug-only RCU kernel config option that forces a short delay in __rcu_read_unlock() to increase the probability of those sorts of bugs occurring. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcupdate.c | 4 ++++ lib/Kconfig.debug | 14 ++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 4e6a61b15e86..29ca1c6da594 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -45,6 +45,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -81,6 +82,9 @@ void __rcu_read_unlock(void) } else { barrier(); /* critical section before exit code. */ t->rcu_read_lock_nesting = INT_MIN; +#ifdef CONFIG_PROVE_RCU_DELAY + udelay(10); /* Make preemption more probable. */ +#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ barrier(); /* assign before ->rcu_read_unlock_special load */ if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) rcu_read_unlock_special(t); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2403a63b5da5..dacbbe4d7a80 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -629,6 +629,20 @@ config PROVE_RCU_REPEATEDLY Say N if you are unsure. +config PROVE_RCU_DELAY + bool "RCU debugging: preemptible RCU race provocation" + depends on DEBUG_KERNEL && PREEMPT_RCU + default n + help + There is a class of races that involve an unlikely preemption + of __rcu_read_unlock() just after ->rcu_read_lock_nesting has + been set to INT_MIN. This feature inserts a delay at that + point to increase the probability of these races. + + Say Y to increase probability of preemption of __rcu_read_unlock(). + + Say N if you are unsure. + config SPARSE_RCU_POINTER bool "RCU debugging: sparse-based checks for pointer usage" default n -- cgit v1.2.3 From 818615c4cde2a71a5857007b134cce89d506cc3f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 11 Jul 2012 00:24:57 -0700 Subject: rcu: Pull TINY_RCU dyntick-idle tracing into non-idle region Because TINY_RCU's idle detection keys directly off of the nesting level, rather than from a separate variable as in TREE_RCU, the TINY_RCU dyntick-idle tracing on transition to idle must happen before the change to the nesting level. This commit therefore makes this change by passing the desired new value (rather than the old value) of the nesting level in to rcu_idle_enter_common(). [ paulmck: Add fix for wrong-variable bug spotted by Michael Wang . ] Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutiny.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 547b1fe5b052..e4163c5af1de 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -56,24 +56,27 @@ static void __call_rcu(struct rcu_head *head, static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ -static void rcu_idle_enter_common(long long oldval) +static void rcu_idle_enter_common(long long newval) { - if (rcu_dynticks_nesting) { + if (newval) { RCU_TRACE(trace_rcu_dyntick("--=", - oldval, rcu_dynticks_nesting)); + rcu_dynticks_nesting, newval)); + rcu_dynticks_nesting = newval; return; } - RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting)); + RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting, newval)); if (!is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", - oldval, rcu_dynticks_nesting)); + rcu_dynticks_nesting, newval)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ } + barrier(); + rcu_dynticks_nesting = newval; rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ } @@ -84,17 +87,16 @@ static void rcu_idle_enter_common(long long oldval) void rcu_idle_enter(void) { unsigned long flags; - long long oldval; + long long newval; local_irq_save(flags); - oldval = rcu_dynticks_nesting; WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) - rcu_dynticks_nesting = 0; + newval = 0; else - rcu_dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; - rcu_idle_enter_common(oldval); + newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE; + rcu_idle_enter_common(newval); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_enter); @@ -105,13 +107,12 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter); void rcu_irq_exit(void) { unsigned long flags; - long long oldval; + long long newval; local_irq_save(flags); - oldval = rcu_dynticks_nesting; - rcu_dynticks_nesting--; - WARN_ON_ONCE(rcu_dynticks_nesting < 0); - rcu_idle_enter_common(oldval); + newval = rcu_dynticks_nesting - 1; + WARN_ON_ONCE(newval < 0); + rcu_idle_enter_common(newval); local_irq_restore(flags); } -- cgit v1.2.3 From 1e3fd2b38cea41f5386bf23440f2cbdd74cf13d0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 27 Jul 2012 13:41:47 -0700 Subject: rcu: Properly initialize ->boost_tasks on CPU offline When rcu_preempt_offline_tasks() clears tasks from a leaf rcu_node structure, it does not NULL out the structure's ->boost_tasks field. This commit therefore fixes this issue. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree_plugin.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 7f3244c0df01..b1b485111321 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -584,8 +584,11 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ } + rnp->gp_tasks = NULL; + rnp->exp_tasks = NULL; #ifdef CONFIG_RCU_BOOST - /* In case root is being boosted and leaf is not. */ + rnp->boost_tasks = NULL; + /* In case root is being boosted and leaf was not. */ raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ if (rnp_root->boost_tasks != NULL && rnp_root->boost_tasks != rnp_root->gp_tasks) @@ -593,8 +596,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ #endif /* #ifdef CONFIG_RCU_BOOST */ - rnp->gp_tasks = NULL; - rnp->exp_tasks = NULL; return retval; } -- cgit v1.2.3 From b4270ee356e5ecef5394ab80c0a0301c1676b7f0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 31 Jul 2012 10:12:48 -0700 Subject: rcu: Permit RCU_NONIDLE() to be used from interrupt context There is a need to use RCU from interrupt context, but either before rcu_irq_enter() is called or after rcu_irq_exit() is called. If the interrupt occurs from idle, then lockdep-RCU will complain about such uses, as they appear to be illegal uses of RCU from the idle loop. In other environments, RCU_NONIDLE() could be used to properly protect the use of RCU, but RCU_NONIDLE() currently cannot be invoked except from process context. This commit therefore modifies RCU_NONIDLE() to permit its use more globally. Reported-by: Steven Rostedt Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 6 ++---- kernel/rcutiny.c | 2 ++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 115ead2b5155..0fbbd52e01f9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -210,14 +210,12 @@ extern void exit_rcu(void); * to nest RCU_NONIDLE() wrappers, but the nesting level is currently * quite limited. If deeper nesting is required, it will be necessary * to adjust DYNTICK_TASK_NESTING_VALUE accordingly. - * - * This macro may be used from process-level code only. */ #define RCU_NONIDLE(a) \ do { \ - rcu_idle_exit(); \ + rcu_irq_enter(); \ do { a; } while (0); \ - rcu_idle_enter(); \ + rcu_irq_exit(); \ } while (0) /* diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index e4163c5af1de..2e073a24d250 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -115,6 +115,7 @@ void rcu_irq_exit(void) rcu_idle_enter_common(newval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_irq_exit); /* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */ static void rcu_idle_exit_common(long long oldval) @@ -172,6 +173,7 @@ void rcu_irq_enter(void) rcu_idle_exit_common(oldval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_irq_enter); #ifdef CONFIG_DEBUG_LOCK_ALLOC -- cgit v1.2.3 From 5cc900cf55fe58aaad93767c5a526e2a69cbcbc6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 31 Jul 2012 14:09:49 -0700 Subject: rcu: Improve boost selection when moving tasks to root rcu_node The rcu_preempt_offline_tasks() moves all tasks queued on a given leaf rcu_node structure to the root rcu_node, which is done when the last CPU corresponding the the leaf rcu_node structure goes offline. Now that RCU-preempt's synchronize_rcu_expedited() implementation blocks CPU-hotplug operations during the initialization of each rcu_node structure's ->boost_tasks pointer, rcu_preempt_offline_tasks() can do a better job of setting the root rcu_node's ->boost_tasks pointer. The key point is that rcu_preempt_offline_tasks() runs as part of the CPU-hotplug process, so that a concurrent synchronize_rcu_expedited() is guaranteed to either have not started on the one hand (in which case there is no boosting on behalf of the expedited grace period) or to be completely initialized on the other (in which case, in the absence of other priority boosting, all ->boost_tasks pointers will be initialized). Therefore, if rcu_preempt_offline_tasks() finds that the ->boost_tasks pointer is equal to the ->exp_tasks pointer, it can be sure that it is correctly placed. In the case where there was boosting ongoing at the time that the synchronize_rcu_expedited() function started, different nodes might start boosting the tasks blocking the expedited grace period at different times. In this mixed case, the root node will either be boosting tasks for the expedited grace period already, or it will start as soon as it gets done boosting for the normal grace period -- but in this latter case, the root node's tasks needed to be boosted in any case. This commit therefore adds a check of the ->boost_tasks pointer against the ->exp_tasks pointer to the list that prevents updating ->boost_tasks. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree_plugin.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index b1b485111321..15d28febbbd4 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -588,10 +588,15 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, rnp->exp_tasks = NULL; #ifdef CONFIG_RCU_BOOST rnp->boost_tasks = NULL; - /* In case root is being boosted and leaf was not. */ + /* + * In case root is being boosted and leaf was not. Make sure + * that we boost the tasks blocking the current grace period + * in this case. + */ raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ if (rnp_root->boost_tasks != NULL && - rnp_root->boost_tasks != rnp_root->gp_tasks) + rnp_root->boost_tasks != rnp_root->gp_tasks && + rnp_root->boost_tasks != rnp_root->exp_tasks) rnp_root->boost_tasks = rnp_root->gp_tasks; raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ #endif /* #ifdef CONFIG_RCU_BOOST */ -- cgit v1.2.3 From a82dcc76021e22c174ba85d90b7a8c750b7362d0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 1 Aug 2012 14:29:20 -0700 Subject: rcu: Make offline-CPU checking allow for indefinite delays The rcu_implicit_offline_qs() function implicitly assumed that execution would progress predictably when interrupts are disabled, which is of course not guaranteed when running on a hypervisor. Furthermore, this function is short, and is called from one place only in a short function. This commit therefore ensures that the timing is checked before checking the condition, which guarantees correct behavior even given indefinite delays. It also inlines rcu_implicit_offline_qs() into rcu_implicit_dynticks_qs(). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 53 +++++++++++++++++++++-------------------------------- 1 file changed, 21 insertions(+), 32 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f7bcd9e6c054..2c4ee4cdbc0e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -318,35 +318,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) return &rsp->node[0]; } -/* - * If the specified CPU is offline, tell the caller that it is in - * a quiescent state. Otherwise, whack it with a reschedule IPI. - * Grace periods can end up waiting on an offline CPU when that - * CPU is in the process of coming online -- it will be added to the - * rcu_node bitmasks before it actually makes it online. The same thing - * can happen while a CPU is in the process of coming online. Because this - * race is quite rare, we check for it after detecting that the grace - * period has been delayed rather than checking each and every CPU - * each and every time we start a new grace period. - */ -static int rcu_implicit_offline_qs(struct rcu_data *rdp) -{ - /* - * If the CPU is offline for more than a jiffy, it is in a quiescent - * state. We can trust its state not to change because interrupts - * are disabled. The reason for the jiffy's worth of slack is to - * handle CPUs initializing on the way up and finding their way - * to the idle loop on the way down. - */ - if (cpu_is_offline(rdp->cpu) && - ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) { - trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); - rdp->offline_fqs++; - return 1; - } - return 0; -} - /* * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle * @@ -675,7 +646,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) * Return true if the specified CPU has passed through a quiescent * state by virtue of being in or having passed through an dynticks * idle state since the last call to dyntick_save_progress_counter() - * for this same CPU. + * for this same CPU, or by virtue of having been offline. */ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) { @@ -699,8 +670,26 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) return 1; } - /* Go check for the CPU being offline. */ - return rcu_implicit_offline_qs(rdp); + /* + * Check for the CPU being offline, but only if the grace period + * is old enough. We don't need to worry about the CPU changing + * state: If we see it offline even once, it has been through a + * quiescent state. + * + * The reason for insisting that the grace period be at least + * one jiffy old is that CPUs that are not quite online and that + * have just gone offline can still execute RCU read-side critical + * sections. + */ + if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies)) + return 0; /* Grace period is not old enough. */ + barrier(); + if (cpu_is_offline(rdp->cpu)) { + trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); + rdp->offline_fqs++; + return 1; + } + return 0; } static int jiffies_till_stall_check(void) -- cgit v1.2.3 From b065a85354239cc96295f696eeace67ad3a55e5c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 1 Aug 2012 15:57:54 -0700 Subject: rcu: Fix obsolete rcu_initiate_boost() header comment Commit 1217ed1b (rcu: permit rcu_read_unlock() to be called while holding runqueue locks) made rcu_initiate_boost() restore irq state when releasing the rcu_node structure's ->lock, but failed to update the header comment accordingly. This commit therefore brings the header comment up to date. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree_plugin.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 15d28febbbd4..c47b28bf18ae 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1197,9 +1197,9 @@ static int rcu_boost_kthread(void *arg) * kthread to start boosting them. If there is an expedited grace * period in progress, it is always time to boost. * - * The caller must hold rnp->lock, which this function releases, - * but irqs remain disabled. The ->boost_kthread_task is immortal, - * so we don't need to worry about it going away. + * The caller must hold rnp->lock, which this function releases. + * The ->boost_kthread_task is immortal, so we don't need to worry + * about it going away. */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) { -- cgit v1.2.3 From 115f7a7ca0d412aab81acaaaa95eb1ab1c622e2f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 10 Aug 2012 13:55:03 -0700 Subject: rcu: Apply for_each_rcu_flavor() to increment_cpu_stall_ticks() The increment_cpu_stall_ticks() function listed each RCU flavor explicitly, with an ifdef to handle preemptible RCU. This commit therefore applies for_each_rcu_flavor() to save a line of code. Because this commit switches from a code-based enumeration of the flavors of RCU to an rcu_state-list-based enumeration, it is no longer possible to apply __get_cpu_var() to the per-CPU rcu_data structures. We instead use __this_cpu_var() on the rcu_state structure's ->rda field that references the corresponding rcu_data structures. Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c47b28bf18ae..70b33bf780f0 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2200,11 +2200,10 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp) /* Increment ->ticks_this_gp for all flavors of RCU. */ static void increment_cpu_stall_ticks(void) { - __get_cpu_var(rcu_sched_data).ticks_this_gp++; - __get_cpu_var(rcu_bh_data).ticks_this_gp++; -#ifdef CONFIG_TREE_PREEMPT_RCU - __get_cpu_var(rcu_preempt_data).ticks_this_gp++; -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + __this_cpu_ptr(rsp->rda)->ticks_this_gp++; } #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ -- cgit v1.2.3 From 5fd4dc068c4ded1339180dbcd1a99e15b1c0a728 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 10 Aug 2012 16:00:11 -0700 Subject: rcu: Avoid rcu_print_detail_task_stall_rnp() segfault The rcu_print_detail_task_stall_rnp() function invokes rcu_preempt_blocked_readers_cgp() to verify that there are some preempted RCU readers blocking the current grace period outside of the protection of the rcu_node structure's ->lock. This means that the last blocked reader might exit its RCU read-side critical section and remove itself from the ->blkd_tasks list before the ->lock is acquired, resulting in a segmentation fault when the subsequent code attempts to dereference the now-NULL gp_tasks pointer. This commit therefore moves the test under the lock. This will not have measurable effect on lock contention because this code is invoked only when printing RCU CPU stall warnings, in other words, in the common case, never. Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 70b33bf780f0..df47014e129d 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -422,9 +422,11 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) unsigned long flags; struct task_struct *t; - if (!rcu_preempt_blocked_readers_cgp(rnp)) - return; raw_spin_lock_irqsave(&rnp->lock, flags); + if (!rcu_preempt_blocked_readers_cgp(rnp)) { + raw_spin_unlock_irqrestore(&rnp->lock, flags); + return; + } t = list_entry(rnp->gp_tasks, struct task_struct, rcu_node_entry); list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) -- cgit v1.2.3 From c8020a67e625c714c4dbedc8ae2944b461e204ec Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 10 Aug 2012 16:55:59 -0700 Subject: rcu: Protect rcu_node accesses during CPU stall warnings The print_other_cpu_stall() function accesses a number of rcu_node fields without protection from the ->lock. In theory, this is not a problem because the fields accessed are all integers, but in practice the compiler can get nasty. Therefore, the commit extends the existing critical section to cover the entire loop body. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2c4ee4cdbc0e..2cf8eb3e2d43 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -746,14 +746,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp) rcu_for_each_leaf_node(rsp, rnp) { raw_spin_lock_irqsave(&rnp->lock, flags); ndetected += rcu_print_task_stall(rnp); + if (rnp->qsmask != 0) { + for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) + if (rnp->qsmask & (1UL << cpu)) { + print_cpu_stall_info(rsp, + rnp->grplo + cpu); + ndetected++; + } + } raw_spin_unlock_irqrestore(&rnp->lock, flags); - if (rnp->qsmask == 0) - continue; - for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) - if (rnp->qsmask & (1UL << cpu)) { - print_cpu_stall_info(rsp, rnp->grplo + cpu); - ndetected++; - } } /* -- cgit v1.2.3 From c96ea7cfdd88d0a67c970502bc5313fede34b86b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Aug 2012 11:17:06 -0700 Subject: rcu: Avoid spurious RCU CPU stall warnings If a given CPU avoids the idle loop but also avoids starting a new RCU grace period for a full minute, RCU can issue spurious RCU CPU stall warnings. This commit fixes this issue by adding a check for ongoing grace period to avoid these spurious stall warnings. Reported-by: Becky Bruce Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2cf8eb3e2d43..98f275296c6d 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -819,7 +819,8 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) j = ACCESS_ONCE(jiffies); js = ACCESS_ONCE(rsp->jiffies_stall); rnp = rdp->mynode; - if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { + if (rcu_gp_in_progress(rsp) && + (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { /* We haven't checked in, so go dump stack. */ print_cpu_stall(rsp); -- cgit v1.2.3 From fdab649b1aa732cd6e79654349088465cdff49af Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Aug 2012 16:34:12 -0700 Subject: rcu: Remove redundant memory barrier from __call_rcu() The first memory barrier in __call_rcu() is supposed to order any updates done beforehand by the caller against the actual queuing of the callback. However, the second memory barrier (which is intended to order incrementing the queue lengths before queuing the callback) is also between the caller's updates and the queuing of the callback. The second memory barrier can therefore serve both purposes. This commit therefore removes the first memory barrier. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 98f275296c6d..ba4f4b4c2382 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1922,8 +1922,6 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), head->func = func; head->next = NULL; - smp_mb(); /* Ensure RCU update seen before callback registry. */ - /* * Opportunistically note grace-period endings and beginnings. * Note that we might see a beginning right after we see an -- cgit v1.2.3 From 7a11e2058f02feb6884efb067f328012c318a13f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Aug 2012 12:14:19 -0700 Subject: rcu: Move TINY_PREEMPT_RCU away from raw_local_irq_save() The use of raw_local_irq_save() is unnecessary, given that local_irq_save() really does disable interrupts. Also, it appears to interfere with lockdep. Therefore, this commit moves to local_irq_save(). Reported-by: Fengguang Wu Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Fengguang Wu --- kernel/rcutiny_plugin.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 918fd1e8509c..3d0190282204 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -278,7 +278,7 @@ static int rcu_boost(void) rcu_preempt_ctrlblk.exp_tasks == NULL) return 0; /* Nothing to boost. */ - raw_local_irq_save(flags); + local_irq_save(flags); /* * Recheck with irqs disabled: all tasks in need of boosting @@ -287,7 +287,7 @@ static int rcu_boost(void) */ if (rcu_preempt_ctrlblk.boost_tasks == NULL && rcu_preempt_ctrlblk.exp_tasks == NULL) { - raw_local_irq_restore(flags); + local_irq_restore(flags); return 0; } @@ -317,7 +317,7 @@ static int rcu_boost(void) t = container_of(tb, struct task_struct, rcu_node_entry); rt_mutex_init_proxy_locked(&mtx, t); t->rcu_boost_mutex = &mtx; - raw_local_irq_restore(flags); + local_irq_restore(flags); rt_mutex_lock(&mtx); rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ @@ -991,9 +991,9 @@ static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n) { unsigned long flags; - raw_local_irq_save(flags); + local_irq_save(flags); rcp->qlen -= n; - raw_local_irq_restore(flags); + local_irq_restore(flags); } /* -- cgit v1.2.3 From 803b0ebae921714d1c36f0996db8125eda5fae53 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 23 Aug 2012 08:34:07 -0700 Subject: time: RCU permitted to stop idle entry via softirq The can_stop_idle_tick() function complains if a softirq vector is raised too late in the idle-entry process, presumably in order to prevent dangling softirq invocations from being delayed across the full idle period, which might be indefinitely long -- and if softirq was asserted any later than the call to this function, such a delay might well happen. However, RCU needs to be able to use softirq to stop idle entry in order to be able to drain RCU callbacks from the current CPU, which in turn enables faster entry into dyntick-idle mode, which in turn reduces power consumption. Because RCU takes this action at a well-defined point in the idle-entry path, it is safe for RCU to take this approach. This commit therefore silences the error message that is sometimes produced when the going-idle CPU suddenly finds that it has an RCU_SOFTIRQ to process. The error message will continue to be issued for other softirq vectors. Reported-by: Sedat Dilek Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Sedat Dilek Reviewed-by: Josh Triplett --- include/linux/interrupt.h | 2 ++ kernel/time/tick-sched.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c5f856a040b9..5e4e6170f43a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -430,6 +430,8 @@ enum NR_SOFTIRQS }; +#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) + /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 024540f97f74..4b1785a7bb83 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -436,7 +436,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (unlikely(local_softirq_pending() && cpu_online(cpu))) { static int ratelimit; - if (ratelimit < 10) { + if (ratelimit < 10 && + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", (unsigned int) local_softirq_pending()); ratelimit++; -- cgit v1.2.3 From 22a767269a767b3ee91e4aaea353ac6bec6a912d Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Wed, 19 Sep 2012 08:52:32 -0700 Subject: rcu: Move TINY_RCU quiescent state out of extended quiescent state TINY_RCU's rcu_idle_enter_common() invokes rcu_sched_qs() in order to inform the RCU core of the quiescent state implied by idle entry. Of course, idle is also an extended quiescent state, so that the call to rcu_sched_qs() speeds up RCU's invoking of any callbacks that might be queued. This speed-up is important when entering into dyntick-idle mode -- if there are no further scheduling-clock interrupts, the callbacks might never be invoked, which could result in a system hang. However, processing callbacks does event tracing, which in turn implies RCU read-side critical sections, which are illegal in extended quiescent states. This patch therefore moves the call to rcu_sched_qs() so that it precedes the point at which we inform lockdep that RCU has entered an extended quiescent state. Signed-off-by: Li Zhong Signed-off-by: Paul E. McKenney --- kernel/rcutiny.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 2e073a24d250..e4c6a598d6f7 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -75,9 +75,9 @@ static void rcu_idle_enter_common(long long newval) current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ } + rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ barrier(); rcu_dynticks_nesting = newval; - rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ } /* -- cgit v1.2.3 From 86f343b50bb9f56cce60fade22da9defff28934c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Sep 2012 10:41:50 -0700 Subject: rcu: Fix CONFIG_RCU_FAST_NO_HZ stall warning message The print_cpu_stall_fast_no_hz() function attempts to print -1 when the ->idle_gp_timer is not pending, but unsigned arithmetic causes it to instead print ULONG_MAX, which is 4294967295 on 32-bit systems and 18446744073709551615 on 64-bit systems. Neither of these are the most reader-friendly values, so this commit instead causes "timer not pending" to be printed when ->idle_gp_timer is not pending. Reported-by: Paul Walmsley Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- Documentation/RCU/stallwarn.txt | 16 ++++++++-------- kernel/rcutree_plugin.h | 12 ++++++++---- 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 523364e4e1f1..1927151b386b 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -99,7 +99,7 @@ In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is printed: INFO: rcu_preempt detected stall on CPU - 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer=-1 + 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer not pending (t=65000 jiffies) The "(64628 ticks this GP)" indicates that this CPU has taken more @@ -116,13 +116,13 @@ number between the two "/"s is the value of the nesting, which will be a small positive number if in the idle loop and a very large positive number (as shown above) otherwise. -For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the -CPU is not in the process of trying to force itself into dyntick-idle -state, the "." indicates that the CPU has not given up forcing RCU -into dyntick-idle mode (it would be "H" otherwise), and the "timer=-1" -indicates that the CPU has not recented forced RCU into dyntick-idle -mode (it would otherwise indicate the number of microseconds remaining -in this forced state). +For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the CPU is +not in the process of trying to force itself into dyntick-idle state, the +"." indicates that the CPU has not given up forcing RCU into dyntick-idle +mode (it would be "H" otherwise), and the "timer not pending" indicates +that the CPU has not recently forced RCU into dyntick-idle mode (it +would otherwise indicate the number of microseconds remaining in this +forced state). Multiple Warnings From One Stall diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index df47014e129d..e12d07ba601a 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2130,11 +2130,15 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); struct timer_list *tltp = &rdtp->idle_gp_timer; + char c; - sprintf(cp, "drain=%d %c timer=%lu", - rdtp->dyntick_drain, - rdtp->dyntick_holdoff == jiffies ? 'H' : '.', - timer_pending(tltp) ? tltp->expires - jiffies : -1); + c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; + if (timer_pending(tltp)) + sprintf(cp, "drain=%d %c timer=%lu", + rdtp->dyntick_drain, c, tltp->expires - jiffies); + else + sprintf(cp, "drain=%d %c timer not pending", + rdtp->dyntick_drain, c); } #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ -- cgit v1.2.3 From 1331e7a1bbe1f11b19c4327ba0853bee2a606543 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 Aug 2012 17:43:50 -0700 Subject: rcu: Remove _rcu_barrier() dependency on __stop_machine() Currently, _rcu_barrier() relies on preempt_disable() to prevent any CPU from going offline, which in turn depends on CPU hotplug's use of __stop_machine(). This patch therefore makes _rcu_barrier() use get_online_cpus() to block CPU-hotplug operations. This has the added benefit of removing the need for _rcu_barrier() to adopt callbacks: Because CPU-hotplug operations are excluded, there can be no callbacks to adopt. This commit simplifies the code accordingly. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 83 +++++++------------------------------------------- kernel/rcutree.h | 3 -- kernel/rcutree_trace.c | 4 +-- 3 files changed, 13 insertions(+), 77 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f7bcd9e6c054..c45d3f745302 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1392,17 +1392,6 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) int i; struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); - /* - * If there is an rcu_barrier() operation in progress, then - * only the task doing that operation is permitted to adopt - * callbacks. To do otherwise breaks rcu_barrier() and friends - * by causing them to fail to wait for the callbacks in the - * orphanage. - */ - if (rsp->rcu_barrier_in_progress && - rsp->rcu_barrier_in_progress != current) - return; - /* Do the accounting first. */ rdp->qlen_lazy += rsp->qlen_lazy; rdp->qlen += rsp->qlen; @@ -1457,9 +1446,8 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) * The CPU has been completely removed, and some other CPU is reporting * this fact from process context. Do the remainder of the cleanup, * including orphaning the outgoing CPU's RCU callbacks, and also - * adopting them, if there is no _rcu_barrier() instance running. - * There can only be one CPU hotplug operation at a time, so no other - * CPU can be attempting to update rcu_cpu_kthread_task. + * adopting them. There can only be one CPU hotplug operation at a time, + * so no other CPU can be attempting to update rcu_cpu_kthread_task. */ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) { @@ -1521,10 +1509,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) #else /* #ifdef CONFIG_HOTPLUG_CPU */ -static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) -{ -} - static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) { } @@ -2328,13 +2312,10 @@ static void rcu_barrier_func(void *type) static void _rcu_barrier(struct rcu_state *rsp) { int cpu; - unsigned long flags; struct rcu_data *rdp; - struct rcu_data rd; unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); unsigned long snap_done; - init_rcu_head_on_stack(&rd.barrier_head); _rcu_barrier_trace(rsp, "Begin", -1, snap); /* Take mutex to serialize concurrent rcu_barrier() requests. */ @@ -2374,70 +2355,30 @@ static void _rcu_barrier(struct rcu_state *rsp) /* * Initialize the count to one rather than to zero in order to * avoid a too-soon return to zero in case of a short grace period - * (or preemption of this task). Also flag this task as doing - * an rcu_barrier(). This will prevent anyone else from adopting - * orphaned callbacks, which could cause otherwise failure if a - * CPU went offline and quickly came back online. To see this, - * consider the following sequence of events: - * - * 1. We cause CPU 0 to post an rcu_barrier_callback() callback. - * 2. CPU 1 goes offline, orphaning its callbacks. - * 3. CPU 0 adopts CPU 1's orphaned callbacks. - * 4. CPU 1 comes back online. - * 5. We cause CPU 1 to post an rcu_barrier_callback() callback. - * 6. Both rcu_barrier_callback() callbacks are invoked, awakening - * us -- but before CPU 1's orphaned callbacks are invoked!!! + * (or preemption of this task). Exclude CPU-hotplug operations + * to ensure that no offline CPU has callbacks queued. */ init_completion(&rsp->barrier_completion); atomic_set(&rsp->barrier_cpu_count, 1); - raw_spin_lock_irqsave(&rsp->onofflock, flags); - rsp->rcu_barrier_in_progress = current; - raw_spin_unlock_irqrestore(&rsp->onofflock, flags); + get_online_cpus(); /* - * Force every CPU with callbacks to register a new callback - * that will tell us when all the preceding callbacks have - * been invoked. If an offline CPU has callbacks, wait for - * it to either come back online or to finish orphaning those - * callbacks. + * Force each CPU with callbacks to register a new callback. + * When that callback is invoked, we will know that all of the + * corresponding CPU's preceding callbacks have been invoked. */ - for_each_possible_cpu(cpu) { - preempt_disable(); + for_each_online_cpu(cpu) { rdp = per_cpu_ptr(rsp->rda, cpu); - if (cpu_is_offline(cpu)) { - _rcu_barrier_trace(rsp, "Offline", cpu, - rsp->n_barrier_done); - preempt_enable(); - while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) - schedule_timeout_interruptible(1); - } else if (ACCESS_ONCE(rdp->qlen)) { + if (ACCESS_ONCE(rdp->qlen)) { _rcu_barrier_trace(rsp, "OnlineQ", cpu, rsp->n_barrier_done); smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); - preempt_enable(); } else { _rcu_barrier_trace(rsp, "OnlineNQ", cpu, rsp->n_barrier_done); - preempt_enable(); } } - - /* - * Now that all online CPUs have rcu_barrier_callback() callbacks - * posted, we can adopt all of the orphaned callbacks and place - * an rcu_barrier_callback() callback after them. When that is done, - * we are guaranteed to have an rcu_barrier_callback() callback - * following every callback that could possibly have been - * registered before _rcu_barrier() was called. - */ - raw_spin_lock_irqsave(&rsp->onofflock, flags); - rcu_adopt_orphan_cbs(rsp); - rsp->rcu_barrier_in_progress = NULL; - raw_spin_unlock_irqrestore(&rsp->onofflock, flags); - atomic_inc(&rsp->barrier_cpu_count); - smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ - rd.rsp = rsp; - rsp->call(&rd.barrier_head, rcu_barrier_callback); + put_online_cpus(); /* * Now that we have an rcu_barrier_callback() callback on each @@ -2458,8 +2399,6 @@ static void _rcu_barrier(struct rcu_state *rsp) /* Other rcu_barrier() invocations can now safely proceed. */ mutex_unlock(&rsp->barrier_mutex); - - destroy_rcu_head_on_stack(&rd.barrier_head); } /** diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 4d29169f2124..94dfdf1f31f5 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -398,9 +398,6 @@ struct rcu_state { struct rcu_head **orphan_donetail; /* Tail of above. */ long qlen_lazy; /* Number of lazy callbacks. */ long qlen; /* Total number of callbacks. */ - struct task_struct *rcu_barrier_in_progress; - /* Task doing rcu_barrier(), */ - /* or NULL if no barrier. */ struct mutex barrier_mutex; /* Guards barrier fields. */ atomic_t barrier_cpu_count; /* # CPUs waiting on. */ struct completion barrier_completion; /* Wake at barrier end. */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index abffb486e94e..6a2e52a85d77 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -51,8 +51,8 @@ static int show_rcubarrier(struct seq_file *m, void *unused) struct rcu_state *rsp; for_each_rcu_flavor(rsp) - seq_printf(m, "%s: %c bcc: %d nbd: %lu\n", - rsp->name, rsp->rcu_barrier_in_progress ? 'B' : '.', + seq_printf(m, "%s: bcc: %d nbd: %lu\n", + rsp->name, atomic_read(&rsp->barrier_cpu_count), rsp->n_barrier_done); return 0; -- cgit v1.2.3 From 0d8ee37e2fcb7b77b9c5dee784beca5a215cad4c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 3 Aug 2012 13:16:15 -0700 Subject: rcu: Disallow callback registry on offline CPUs Posting a callback after the CPU_DEAD notifier effectively leaks that callback unless/until that CPU comes back online. Silence is unhelpful when attempting to track down such leaks, so this commit emits a WARN_ON_ONCE() and unconditionally leaks the callback when an offline CPU attempts to register a callback. The rdp->nxttail[RCU_NEXT_TAIL] is set to NULL in the CPU_DEAD notifier and restored in the CPU_UP_PREPARE notifier, allowing _call_rcu() to determine exactly when posting callbacks is illegal. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index c45d3f745302..be76c80a14d1 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1505,6 +1505,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", cpu, rdp->qlen, rdp->nxtlist); + init_callback_list(rdp); + /* Disallow further callbacks on this CPU. */ + rdp->nxttail[RCU_NEXT_TAIL] = NULL; } #else /* #ifdef CONFIG_HOTPLUG_CPU */ @@ -1927,6 +1930,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), rdp = this_cpu_ptr(rsp->rda); /* Add the callback to our list. */ + if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) { + /* _call_rcu() is illegal on offline CPU; leak the callback. */ + WARN_ON_ONCE(1); + local_irq_restore(flags); + return; + } ACCESS_ONCE(rdp->qlen)++; if (lazy) rdp->qlen_lazy++; @@ -2464,6 +2473,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; + init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); -- cgit v1.2.3 From 5d18023294abc22984886bd7185344e0c2be0daf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 20 Aug 2012 11:26:57 +0200 Subject: sched: Fix load avg vs cpu-hotplug Rabik and Paul reported two different issues related to the same few lines of code. Rabik's issue is that the nr_uninterruptible migration code is wrong in that he sees artifacts due to this (Rabik please do expand in more detail). Paul's issue is that this code as it stands relies on us using stop_machine() for unplug, we all would like to remove this assumption so that eventually we can remove this stop_machine() usage altogether. The only reason we'd have to migrate nr_uninterruptible is so that we could use for_each_online_cpu() loops in favour of for_each_possible_cpu() loops, however since nr_uninterruptible() is the only such loop and its using possible lets not bother at all. The problem Rabik sees is (probably) caused by the fact that by migrating nr_uninterruptible we screw rq->calc_load_active for both rqs involved. So don't bother with fancy migration schemes (meaning we now have to keep using for_each_possible_cpu()) and instead fold any nr_active delta after we migrate all tasks away to make sure we don't have any skewed nr_active accounting. [ paulmck: Move call to calc_load_migration to CPU_DEAD to avoid miscounting noted by Rakib. ] Reported-by: Rakib Mullick Reported-by: Paul E. McKenney Signed-off-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- kernel/sched/core.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fbf1fd098dc6..8c38b5e7ce47 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5304,27 +5304,17 @@ void idle_task_exit(void) } /* - * While a dead CPU has no uninterruptible tasks queued at this point, - * it might still have a nonzero ->nr_uninterruptible counter, because - * for performance reasons the counter is not stricly tracking tasks to - * their home CPUs. So we just add the counter to another CPU's counter, - * to keep the global sum constant after CPU-down: - */ -static void migrate_nr_uninterruptible(struct rq *rq_src) -{ - struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); - - rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; - rq_src->nr_uninterruptible = 0; -} - -/* - * remove the tasks which were accounted by rq from calc_load_tasks. + * Since this CPU is going 'away' for a while, fold any nr_active delta + * we might have. Assumes we're called after migrate_tasks() so that the + * nr_active count is stable. + * + * Also see the comment "Global load-average calculations". */ -static void calc_global_load_remove(struct rq *rq) +static void calc_load_migrate(struct rq *rq) { - atomic_long_sub(rq->calc_load_active, &calc_load_tasks); - rq->calc_load_active = 0; + long delta = calc_load_fold_active(rq); + if (delta) + atomic_long_add(delta, &calc_load_tasks); } /* @@ -5617,9 +5607,18 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) migrate_tasks(cpu); BUG_ON(rq->nr_running != 1); /* the migration thread */ raw_spin_unlock_irqrestore(&rq->lock, flags); + break; - migrate_nr_uninterruptible(rq); - calc_global_load_remove(rq); + case CPU_DEAD: + { + struct rq *dest_rq; + + local_irq_save(flags); + dest_rq = cpu_rq(smp_processor_id()); + raw_spin_lock(&dest_rq->lock); + calc_load_migrate(rq); + raw_spin_unlock_irqrestore(&dest_rq->lock, flags); + } break; #endif } -- cgit v1.2.3