From 11ed7f934cb807f26da09547b5946c2e534d1dac Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Wed, 27 Aug 2014 16:43:40 -0400 Subject: rcu: Make nocb leader kthreads process pending callbacks after spawning The nocb callbacks generated before the nocb kthreads are spawned are enqueued in the nocb queue for later processing. Commit fbce7497ee5af ("rcu: Parallelize and economize NOCB kthread wakeups") introduced nocb leader kthreads which checked the nocb_leader_wake flag to see if there were any such pending callbacks. A case was reported in which newly spawned leader kthreads were not processing the pending callbacks as this flag was not set, which led to a boot hang. The following commit ensures that the newly spawned nocb kthreads process the pending callbacks by allowing the kthreads to run immediately after spawning instead of waiting. This is done by inverting the logic of nocb_leader_wake tests to nocb_leader_sleep which allows us to use the default initialization of this flag to 0 to let the kthreads run. Reported-by: Amit Shah Signed-off-by: Pranith Kumar Link: http://www.spinics.net/lists/kernel/msg1802899.html [ paulmck: Backported to v3.17-rc2. ] Signed-off-by: Paul E. McKenney Tested-by: Amit Shah --- kernel/rcu/tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 71e64c718f75..6a86eb7bac45 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -358,7 +358,7 @@ struct rcu_data { struct rcu_head **nocb_gp_tail; long nocb_gp_count; long nocb_gp_count_lazy; - bool nocb_leader_wake; /* Is the nocb leader thread awake? */ + bool nocb_leader_sleep; /* Is the nocb leader thread asleep? */ struct rcu_data *nocb_next_follower; /* Next follower in wakeup chain. */ -- cgit v1.2.3 From 9fdd3bc9005824704f9802bec7b3e06f5edae434 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 Jul 2014 14:50:47 -0700 Subject: rcu: Break more call_rcu() deadlock involving scheduler and perf Commit 96d3fd0d315a9 (rcu: Break call_rcu() deadlock involving scheduler and perf) covered the case where __call_rcu_nocb_enqueue() needs to wake the rcuo kthread due to the queue being initially empty, but did not do anything for the case where the queue was overflowing. This commit therefore also defers wakeup for the overflow case. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 3 +++ kernel/rcu/tree.h | 9 +++++++-- kernel/rcu/tree_plugin.h | 26 ++++++++++++++++++-------- 3 files changed, 28 insertions(+), 10 deletions(-) (limited to 'kernel/rcu/tree.h') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index aca382266411..9b56f37148cf 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -180,9 +180,12 @@ TRACE_EVENT(rcu_grace_period_init, * argument is a string as follows: * * "WakeEmpty": Wake rcuo kthread, first CB to empty list. + * "WakeEmptyIsDeferred": Wake rcuo kthread later, first CB to empty list. * "WakeOvf": Wake rcuo kthread, CB list is huge. + * "WakeOvfIsDeferred": Wake rcuo kthread later, CB list is huge. * "WakeNot": Don't wake rcuo kthread. * "WakeNotPoll": Don't wake rcuo kthread because it is polling. + * "DeferredWake": Carried out the "IsDeferred" wakeup. * "Poll": Start of new polling cycle for rcu_nocb_poll. * "Sleep": Sleep waiting for CBs for !rcu_nocb_poll. * "WokeEmpty": rcuo kthread woke to find empty list. diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 6a86eb7bac45..e33562f2a655 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -350,7 +350,7 @@ struct rcu_data { int nocb_p_count_lazy; /* (approximate). */ wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ struct task_struct *nocb_kthread; - bool nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ + int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ /* The following fields are used by the leader, hence own cacheline. */ struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp; @@ -383,6 +383,11 @@ struct rcu_data { #define RCU_FORCE_QS 3 /* Need to force quiescent state. */ #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK +/* Values for nocb_defer_wakeup field in struct rcu_data. */ +#define RCU_NOGP_WAKE_NOT 0 +#define RCU_NOGP_WAKE 1 +#define RCU_NOGP_WAKE_FORCE 2 + #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500)) /* For jiffies_till_first_fqs and */ /* and jiffies_till_next_fqs. */ @@ -589,7 +594,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, struct rcu_data *rdp, unsigned long flags); -static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); +static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); static void do_nocb_deferred_wakeup(struct rcu_data *rdp); static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index bb564560aeb8..d67cc5c375c5 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2121,16 +2121,23 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty")); } else { - rdp->nocb_defer_wakeup = true; + rdp->nocb_defer_wakeup = RCU_NOGP_WAKE; trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmptyIsDeferred")); } rdp->qlen_last_fqs_check = 0; } else if (len > rdp->qlen_last_fqs_check + qhimark) { /* ... or if many callbacks queued. */ - wake_nocb_leader(rdp, true); + if (!irqs_disabled_flags(flags)) { + wake_nocb_leader(rdp, true); + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WakeOvf")); + } else { + rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WakeOvfIsDeferred")); + } rdp->qlen_last_fqs_check = LONG_MAX / 2; - trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf")); } else { trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot")); } @@ -2438,7 +2445,7 @@ static int rcu_nocb_kthread(void *arg) } /* Is a deferred wakeup of rcu_nocb_kthread() required? */ -static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) +static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) { return ACCESS_ONCE(rdp->nocb_defer_wakeup); } @@ -2446,11 +2453,14 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) /* Do a deferred wakeup of rcu_nocb_kthread(). */ static void do_nocb_deferred_wakeup(struct rcu_data *rdp) { + int ndw; + if (!rcu_nocb_need_deferred_wakeup(rdp)) return; - ACCESS_ONCE(rdp->nocb_defer_wakeup) = false; - wake_nocb_leader(rdp, false); - trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty")); + ndw = ACCESS_ONCE(rdp->nocb_defer_wakeup); + ACCESS_ONCE(rdp->nocb_defer_wakeup) = RCU_NOGP_WAKE_NOT; + wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE); + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake")); } /* Initialize per-rcu_data variables for no-CBs CPUs. */ @@ -2557,7 +2567,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) { } -static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) +static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) { return false; } -- cgit v1.2.3 From 176f8f7a52cc6d09d686f0d900abda6942a52fbb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Aug 2014 17:43:50 -0700 Subject: rcu: Make TASKS_RCU handle nohz_full= CPUs Currently TASKS_RCU would ignore a CPU running a task in nohz_full= usermode execution. There would be neither a context switch nor a scheduling-clock interrupt to tell TASKS_RCU that the task in question had passed through a quiescent state. The grace period would therefore extend indefinitely. This commit therefore makes RCU's dyntick-idle subsystem record the task_struct structure of the task that is running in dyntick-idle mode on each CPU. The TASKS_RCU grace period can then access this information and record a quiescent state on behalf of any CPU running in dyntick-idle usermode. Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 3 ++- include/linux/sched.h | 2 ++ kernel/rcu/tree.c | 2 ++ kernel/rcu/tree.h | 2 ++ kernel/rcu/tree_plugin.h | 16 ++++++++++++++++ kernel/rcu/update.c | 4 +++- 6 files changed, 27 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index dffd9258ee60..03b274873b06 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -121,7 +121,8 @@ extern struct group_info init_groups; #define INIT_TASK_RCU_TASKS(tsk) \ .rcu_tasks_holdout = false, \ .rcu_tasks_holdout_list = \ - LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), + LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), \ + .rcu_tasks_idle_cpu = -1, #else #define INIT_TASK_RCU_TASKS(tsk) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index eaacac4ae77d..ec8b34722bcc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1274,6 +1274,7 @@ struct task_struct { unsigned long rcu_tasks_nvcsw; bool rcu_tasks_holdout; struct list_head rcu_tasks_holdout_list; + int rcu_tasks_idle_cpu; #endif /* #ifdef CONFIG_TASKS_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) @@ -2020,6 +2021,7 @@ static inline void rcu_copy_process(struct task_struct *p) #ifdef CONFIG_TASKS_RCU p->rcu_tasks_holdout = false; INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); + p->rcu_tasks_idle_cpu = -1; #endif /* #ifdef CONFIG_TASKS_RCU */ } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e23dad0661e2..c880f5387b1f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -526,6 +526,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, atomic_inc(&rdtp->dynticks); smp_mb__after_atomic(); /* Force ordering with next sojourn. */ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); + rcu_dynticks_task_enter(); /* * It is illegal to enter an extended quiescent state while @@ -642,6 +643,7 @@ void rcu_irq_exit(void) static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, int user) { + rcu_dynticks_task_exit(); smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */ atomic_inc(&rdtp->dynticks); /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 6a86eb7bac45..3a92000c354f 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -605,6 +605,8 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle, static void rcu_bind_gp_kthread(void); static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp); static bool rcu_nohz_full_cpu(struct rcu_state *rsp); +static void rcu_dynticks_task_enter(void); +static void rcu_dynticks_task_exit(void); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7672586d3920..e466b40052a7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -3036,3 +3036,19 @@ static void rcu_bind_gp_kthread(void) housekeeping_affine(current); #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ } + +/* Record the current task on dyntick-idle entry. */ +static void rcu_dynticks_task_enter(void) +{ +#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) + ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id(); +#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ +} + +/* Record no current task on dyntick-idle exit. */ +static void rcu_dynticks_task_exit(void) +{ +#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) + ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1; +#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ +} diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index e1d71741958f..2658de4a5975 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -463,7 +463,9 @@ static void check_holdout_task(struct task_struct *t, { if (!ACCESS_ONCE(t->rcu_tasks_holdout) || t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) || - !ACCESS_ONCE(t->on_rq)) { + !ACCESS_ONCE(t->on_rq) || + (IS_ENABLED(CONFIG_NO_HZ_FULL) && + !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) { ACCESS_ONCE(t->rcu_tasks_holdout) = false; list_del_rcu(&t->rcu_tasks_holdout_list); put_task_struct(t); -- cgit v1.2.3 From 9386c0b75dda05f535a10ea1abf1817fe292c81c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 13 Jul 2014 12:00:53 -0700 Subject: rcu: Rationalize kthread spawning Currently, RCU spawns kthreads from several different early_initcall() functions. Although this has served RCU well for quite some time, as more kthreads are added a more deterministic approach is required. This commit therefore causes all of RCU's early-boot kthreads to be spawned from a single early_initcall() function. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Tested-by: Paul Gortmaker --- kernel/rcu/tree.c | 4 +++- kernel/rcu/tree.h | 1 + kernel/rcu/tree_plugin.h | 12 +++--------- 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1b70cb6fbe3c..9be47f43903b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3489,7 +3489,7 @@ static int rcu_pm_notify(struct notifier_block *self, } /* - * Spawn the kthread that handles this RCU flavor's grace periods. + * Spawn the kthreads that handle each RCU flavor's grace periods. */ static int __init rcu_spawn_gp_kthread(void) { @@ -3498,6 +3498,7 @@ static int __init rcu_spawn_gp_kthread(void) struct rcu_state *rsp; struct task_struct *t; + rcu_scheduler_fully_active = 1; for_each_rcu_flavor(rsp) { t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name); BUG_ON(IS_ERR(t)); @@ -3507,6 +3508,7 @@ static int __init rcu_spawn_gp_kthread(void) raw_spin_unlock_irqrestore(&rnp->lock, flags); rcu_spawn_nocb_kthreads(rsp); } + rcu_spawn_boost_kthreads(); return 0; } early_initcall(rcu_spawn_gp_kthread); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 6a86eb7bac45..a966092fdfd7 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -572,6 +572,7 @@ static void rcu_preempt_do_callbacks(void); static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_BOOST */ +static void __init rcu_spawn_boost_kthreads(void); static void rcu_prepare_kthreads(int cpu); static void rcu_cleanup_after_idle(int cpu); static void rcu_prepare_for_idle(int cpu); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 4c1af96836f6..410c74424d96 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1435,14 +1435,13 @@ static struct smp_hotplug_thread rcu_cpu_thread_spec = { }; /* - * Spawn all kthreads -- called as soon as the scheduler is running. + * Spawn boost kthreads -- called as soon as the scheduler is running. */ -static int __init rcu_spawn_kthreads(void) +static void __init rcu_spawn_boost_kthreads(void) { struct rcu_node *rnp; int cpu; - rcu_scheduler_fully_active = 1; for_each_possible_cpu(cpu) per_cpu(rcu_cpu_has_work, cpu) = 0; BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); @@ -1452,9 +1451,7 @@ static int __init rcu_spawn_kthreads(void) rcu_for_each_leaf_node(rcu_state_p, rnp) (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); } - return 0; } -early_initcall(rcu_spawn_kthreads); static void rcu_prepare_kthreads(int cpu) { @@ -1492,12 +1489,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) { } -static int __init rcu_scheduler_really_started(void) +static void __init rcu_spawn_boost_kthreads(void) { - rcu_scheduler_fully_active = 1; - return 0; } -early_initcall(rcu_scheduler_really_started); static void rcu_prepare_kthreads(int cpu) { -- cgit v1.2.3 From 35ce7f29a44a888c45c0a9f202f69e10613c5306 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 11 Jul 2014 11:30:24 -0700 Subject: rcu: Create rcuo kthreads only for onlined CPUs RCU currently uses for_each_possible_cpu() to spawn rcuo kthreads, which can result in more rcuo kthreads than one would expect, for example, derRichard reported 64 CPUs worth of rcuo kthreads on an 8-CPU image. This commit therefore creates rcuo kthreads only for those CPUs that actually come online. This was reported by derRichard on the OFTC IRC network. Reported-by: Richard Weinberger Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Tested-by: Paul Gortmaker --- kernel/rcu/tree.c | 3 +- kernel/rcu/tree.h | 6 +++- kernel/rcu/tree_plugin.h | 90 ++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 86 insertions(+), 13 deletions(-) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9be47f43903b..b49c8433f834 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3442,6 +3442,7 @@ static int rcu_cpu_notify(struct notifier_block *self, case CPU_UP_PREPARE_FROZEN: rcu_prepare_cpu(cpu); rcu_prepare_kthreads(cpu); + rcu_spawn_all_nocb_kthreads(cpu); break; case CPU_ONLINE: case CPU_DOWN_FAILED: @@ -3506,8 +3507,8 @@ static int __init rcu_spawn_gp_kthread(void) raw_spin_lock_irqsave(&rnp->lock, flags); rsp->gp_kthread = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); - rcu_spawn_nocb_kthreads(rsp); } + rcu_spawn_nocb_kthreads(); rcu_spawn_boost_kthreads(); return 0; } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a966092fdfd7..a9a226d2e80a 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -593,7 +593,11 @@ static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); static void do_nocb_deferred_wakeup(struct rcu_data *rdp); static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); -static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); +static void rcu_spawn_all_nocb_kthreads(int cpu); +static void __init rcu_spawn_nocb_kthreads(void); +#ifdef CONFIG_RCU_NOCB_CPU +static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp); +#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ static void __maybe_unused rcu_kick_nohz_cpu(int cpu); static bool init_nocb_callback_list(struct rcu_data *rdp); static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 410c74424d96..31c7afb611fd 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2479,6 +2479,7 @@ void __init rcu_init_nohz(void) rdp->nxttail[RCU_NEXT_TAIL] != NULL); init_nocb_callback_list(rdp); } + rcu_organize_nocb_kthreads(rsp); } } @@ -2490,15 +2491,85 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) rdp->nocb_follower_tail = &rdp->nocb_follower_head; } +/* + * If the specified CPU is a no-CBs CPU that does not already have its + * rcuo kthread for the specified RCU flavor, spawn it. If the CPUs are + * brought online out of order, this can require re-organizing the + * leader-follower relationships. + */ +static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu) +{ + struct rcu_data *rdp; + struct rcu_data *rdp_last; + struct rcu_data *rdp_old_leader; + struct rcu_data *rdp_spawn = per_cpu_ptr(rsp->rda, cpu); + struct task_struct *t; + + /* + * If this isn't a no-CBs CPU or if it already has an rcuo kthread, + * then nothing to do. + */ + if (!rcu_is_nocb_cpu(cpu) || rdp_spawn->nocb_kthread) + return; + + /* If we didn't spawn the leader first, reorganize! */ + rdp_old_leader = rdp_spawn->nocb_leader; + if (rdp_old_leader != rdp_spawn && !rdp_old_leader->nocb_kthread) { + rdp_last = NULL; + rdp = rdp_old_leader; + do { + rdp->nocb_leader = rdp_spawn; + if (rdp_last && rdp != rdp_spawn) + rdp_last->nocb_next_follower = rdp; + rdp_last = rdp; + rdp = rdp->nocb_next_follower; + rdp_last->nocb_next_follower = NULL; + } while (rdp); + rdp_spawn->nocb_next_follower = rdp_old_leader; + } + + /* Spawn the kthread for this CPU and RCU flavor. */ + t = kthread_run(rcu_nocb_kthread, rdp_spawn, + "rcuo%c/%d", rsp->abbr, cpu); + BUG_ON(IS_ERR(t)); + ACCESS_ONCE(rdp_spawn->nocb_kthread) = t; +} + +/* + * If the specified CPU is a no-CBs CPU that does not already have its + * rcuo kthreads, spawn them. + */ +static void rcu_spawn_all_nocb_kthreads(int cpu) +{ + struct rcu_state *rsp; + + if (rcu_scheduler_fully_active) + for_each_rcu_flavor(rsp) + rcu_spawn_one_nocb_kthread(rsp, cpu); +} + +/* + * Once the scheduler is running, spawn rcuo kthreads for all online + * no-CBs CPUs. This assumes that the early_initcall()s happen before + * non-boot CPUs come online -- if this changes, we will need to add + * some mutual exclusion. + */ +static void __init rcu_spawn_nocb_kthreads(void) +{ + int cpu; + + for_each_online_cpu(cpu) + rcu_spawn_all_nocb_kthreads(cpu); +} + /* How many follower CPU IDs per leader? Default of -1 for sqrt(nr_cpu_ids). */ static int rcu_nocb_leader_stride = -1; module_param(rcu_nocb_leader_stride, int, 0444); /* - * Create a kthread for each RCU flavor for each no-CBs CPU. - * Also initialize leader-follower relationships. + * Initialize leader-follower relationships for all no-CBs CPU. */ -static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) +static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp) { int cpu; int ls = rcu_nocb_leader_stride; @@ -2506,7 +2577,6 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) struct rcu_data *rdp; struct rcu_data *rdp_leader = NULL; /* Suppress misguided gcc warn. */ struct rcu_data *rdp_prev = NULL; - struct task_struct *t; if (rcu_nocb_mask == NULL) return; @@ -2532,12 +2602,6 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) rdp_prev->nocb_next_follower = rdp; } rdp_prev = rdp; - - /* Spawn the kthread for this CPU. */ - t = kthread_run(rcu_nocb_kthread, rdp, - "rcuo%c/%d", rsp->abbr, cpu); - BUG_ON(IS_ERR(t)); - ACCESS_ONCE(rdp->nocb_kthread) = t; } } @@ -2591,7 +2655,11 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp) { } -static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) +static void rcu_spawn_all_nocb_kthreads(int cpu) +{ +} + +static void __init rcu_spawn_nocb_kthreads(void) { } -- cgit v1.2.3 From d7e29933969e5ca7c112ce1368a07911f4485dc2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Oct 2014 09:15:54 -0700 Subject: rcu: Make rcu_barrier() understand about missing rcuo kthreads Commit 35ce7f29a44a (rcu: Create rcuo kthreads only for onlined CPUs) avoids creating rcuo kthreads for CPUs that never come online. This fixes a bug in many instances of firmware: Instead of lying about their age, these systems instead lie about the number of CPUs that they have. Before commit 35ce7f29a44a, this could result in huge numbers of useless rcuo kthreads being created. It appears that experience indicates that I should have told the people suffering from this problem to fix their broken firmware, but I instead produced what turned out to be a partial fix. The missing piece supplied by this commit makes sure that rcu_barrier() knows not to post callbacks for no-CBs CPUs that have not yet come online, because otherwise rcu_barrier() will hang on systems having firmware that lies about the number of CPUs. It is tempting to simply have rcu_barrier() refuse to post a callback on any no-CBs CPU that does not have an rcuo kthread. This unfortunately does not work because rcu_barrier() is required to wait for all pending callbacks. It is therefore required to wait even for those callbacks that cannot possibly be invoked. Even if doing so hangs the system. Given that posting a callback to a no-CBs CPU that does not yet have an rcuo kthread can hang rcu_barrier(), It is tempting to report an error in this case. Unfortunately, this will result in false positives at boot time, when it is perfectly legal to post callbacks to the boot CPU before the scheduler has started, in other words, before it is legal to invoke rcu_barrier(). So this commit instead has rcu_barrier() avoid posting callbacks to CPUs having neither rcuo kthread nor pending callbacks, and has it complain bitterly if it finds CPUs having no rcuo kthread but some pending callbacks. And when rcu_barrier() does find CPUs having no rcuo kthread but pending callbacks, as noted earlier, it has no choice but to hang indefinitely. Reported-by: Yanko Kaneti Reported-by: Jay Vosburgh Reported-by: Meelis Roos Reported-by: Eric B Munson Signed-off-by: Paul E. McKenney Tested-by: Eric B Munson Tested-by: Jay Vosburgh Tested-by: Yanko Kaneti Tested-by: Kevin Fenzi Tested-by: Meelis Roos --- include/trace/events/rcu.h | 18 +++++++++--------- kernel/rcu/tree.c | 15 ++++++++++----- kernel/rcu/tree.h | 1 + kernel/rcu/tree_plugin.h | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 14 deletions(-) (limited to 'kernel/rcu/tree.h') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 9b56f37148cf..e335e7d8c6c2 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -660,18 +660,18 @@ TRACE_EVENT(rcu_torture_read, /* * Tracepoint for _rcu_barrier() execution. The string "s" describes * the _rcu_barrier phase: - * "Begin": rcu_barrier_callback() started. - * "Check": rcu_barrier_callback() checking for piggybacking. - * "EarlyExit": rcu_barrier_callback() piggybacked, thus early exit. - * "Inc1": rcu_barrier_callback() piggyback check counter incremented. - * "Offline": rcu_barrier_callback() found offline CPU - * "OnlineNoCB": rcu_barrier_callback() found online no-CBs CPU. - * "OnlineQ": rcu_barrier_callback() found online CPU with callbacks. - * "OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks. + * "Begin": _rcu_barrier() started. + * "Check": _rcu_barrier() checking for piggybacking. + * "EarlyExit": _rcu_barrier() piggybacked, thus early exit. + * "Inc1": _rcu_barrier() piggyback check counter incremented. + * "OfflineNoCB": _rcu_barrier() found callback on never-online CPU + * "OnlineNoCB": _rcu_barrier() found online no-CBs CPU. + * "OnlineQ": _rcu_barrier() found online CPU with callbacks. + * "OnlineNQ": _rcu_barrier() found online CPU, no callbacks. * "IRQ": An rcu_barrier_callback() callback posted on remote CPU. * "CB": An rcu_barrier_callback() invoked a callback, not the last. * "LastCB": An rcu_barrier_callback() invoked the last callback. - * "Inc2": rcu_barrier_callback() piggyback check counter incremented. + * "Inc2": _rcu_barrier() piggyback check counter incremented. * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument * is the count of remaining callbacks, and "done" is the piggybacking count. */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 133e47223095..9815447d22e0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3299,11 +3299,16 @@ static void _rcu_barrier(struct rcu_state *rsp) continue; rdp = per_cpu_ptr(rsp->rda, cpu); if (rcu_is_nocb_cpu(cpu)) { - _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, - rsp->n_barrier_done); - atomic_inc(&rsp->barrier_cpu_count); - __call_rcu(&rdp->barrier_head, rcu_barrier_callback, - rsp, cpu, 0); + if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) { + _rcu_barrier_trace(rsp, "OfflineNoCB", cpu, + rsp->n_barrier_done); + } else { + _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, + rsp->n_barrier_done); + atomic_inc(&rsp->barrier_cpu_count); + __call_rcu(&rdp->barrier_head, + rcu_barrier_callback, rsp, cpu, 0); + } } else if (ACCESS_ONCE(rdp->qlen)) { _rcu_barrier_trace(rsp, "OnlineQ", cpu, rsp->n_barrier_done); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index d03764652d91..bbdc45d8d74f 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -587,6 +587,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); static void print_cpu_stall_info_end(void); static void zero_cpu_stall_ticks(struct rcu_data *rdp); static void increment_cpu_stall_ticks(void); +static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu); static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); static void rcu_init_one_nocb(struct rcu_node *rnp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 387dd4599344..c1d7f27bd38f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2049,6 +2049,33 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force) } } +/* + * Does the specified CPU need an RCU callback for the specified flavor + * of rcu_barrier()? + */ +static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) +{ + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); + struct rcu_head *rhp; + + /* No-CBs CPUs might have callbacks on any of three lists. */ + rhp = ACCESS_ONCE(rdp->nocb_head); + if (!rhp) + rhp = ACCESS_ONCE(rdp->nocb_gp_head); + if (!rhp) + rhp = ACCESS_ONCE(rdp->nocb_follower_head); + + /* Having no rcuo kthread but CBs after scheduler starts is bad! */ + if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp) { + /* RCU callback enqueued before CPU first came online??? */ + pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n", + cpu, rhp->func); + WARN_ON_ONCE(1); + } + + return !!rhp; +} + /* * Enqueue the specified string of rcu_head structures onto the specified * CPU's no-CBs lists. The CPU is specified by rdp, the head of the @@ -2642,6 +2669,12 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) #else /* #ifdef CONFIG_RCU_NOCB_CPU */ +static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) +{ + WARN_ON_ONCE(1); /* Should be dead code. */ + return false; +} + static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) { } -- cgit v1.2.3