diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-28 13:33:57 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-28 13:33:57 -0700 |
commit | 16b3d0cf5bad844daaf436ad2e9061de0fe36e5c (patch) | |
tree | d553a51e6d95fb166df7fa62264e9a27e4c438a4 /kernel/cpu.c | |
parent | 42dec9a936e7696bea1f27d3c5a0068cd9aa95fd (diff) | |
parent | 2ea46c6fc9452ac100ad907b051d797225847e33 (diff) |
Merge tag 'sched-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
- Clean up SCHED_DEBUG: move the decades old mess of sysctl, procfs and
debugfs interfaces to a unified debugfs interface.
- Signals: Allow caching one sigqueue object per task, to improve
performance & latencies.
- Improve newidle_balance() irq-off latencies on systems with a large
number of CPU cgroups.
- Improve energy-aware scheduling
- Improve the PELT metrics for certain workloads
- Reintroduce select_idle_smt() to improve load-balancing locality -
but without the previous regressions
- Add 'scheduler latency debugging': warn after long periods of pending
need_resched. This is an opt-in feature that requires the enabling of
the LATENCY_WARN scheduler feature, or the use of the
resched_latency_warn_ms=xx boot parameter.
- CPU hotplug fixes for HP-rollback, and for the 'fail' interface. Fix
remaining balance_push() vs. hotplug holes/races
- PSI fixes, plus allow /proc/pressure/ files to be written by
CAP_SYS_RESOURCE tasks as well
- Fix/improve various load-balancing corner cases vs. capacity margins
- Fix sched topology on systems with NUMA diameter of 3 or above
- Fix PF_KTHREAD vs to_kthread() race
- Minor rseq optimizations
- Misc cleanups, optimizations, fixes and smaller updates
* tag 'sched-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (61 commits)
cpumask/hotplug: Fix cpu_dying() state tracking
kthread: Fix PF_KTHREAD vs to_kthread() race
sched/debug: Fix cgroup_path[] serialization
sched,psi: Handle potential task count underflow bugs more gracefully
sched: Warn on long periods of pending need_resched
sched/fair: Move update_nohz_stats() to the CONFIG_NO_HZ_COMMON block to simplify the code & fix an unused function warning
sched/debug: Rename the sched_debug parameter to sched_verbose
sched,fair: Alternative sched_slice()
sched: Move /proc/sched_debug to debugfs
sched,debug: Convert sysctl sched_domains to debugfs
debugfs: Implement debugfs_create_str()
sched,preempt: Move preempt_dynamic to debug.c
sched: Move SCHED_DEBUG sysctl to debugfs
sched: Don't make LATENCYTOP select SCHED_DEBUG
sched: Remove sched_schedstats sysctl out from under SCHED_DEBUG
sched/numa: Allow runtime enabling/disabling of NUMA balance without SCHED_DEBUG
sched: Use cpu_dying() to fix balance_push vs hotplug-rollback
cpumask: Introduce DYING mask
cpumask: Make cpu_{online,possible,present,active}() inline
rseq: Optimise rseq_get_rseq_cs() and clear_rseq_cs()
...
Diffstat (limited to 'kernel/cpu.c')
-rw-r--r-- | kernel/cpu.c | 210 |
1 files changed, 137 insertions, 73 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 1b6302ecbabe..e538518556f4 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -63,6 +63,7 @@ struct cpuhp_cpu_state { bool rollback; bool single; bool bringup; + int cpu; struct hlist_node *node; struct hlist_node *last; enum cpuhp_state cb_state; @@ -135,6 +136,11 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state) return cpuhp_hp_states + state; } +static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step) +{ + return bringup ? !step->startup.single : !step->teardown.single; +} + /** * cpuhp_invoke_callback _ Invoke the callbacks for a given state * @cpu: The cpu for which the callback should be invoked @@ -157,26 +163,24 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, if (st->fail == state) { st->fail = CPUHP_INVALID; - - if (!(bringup ? step->startup.single : step->teardown.single)) - return 0; - return -EAGAIN; } + if (cpuhp_step_empty(bringup, step)) { + WARN_ON_ONCE(1); + return 0; + } + if (!step->multi_instance) { WARN_ON_ONCE(lastp && *lastp); cb = bringup ? step->startup.single : step->teardown.single; - if (!cb) - return 0; + trace_cpuhp_enter(cpu, st->target, state, cb); ret = cb(cpu); trace_cpuhp_exit(cpu, st->state, state, ret); return ret; } cbm = bringup ? step->startup.multi : step->teardown.multi; - if (!cbm) - return 0; /* Single invocation for instance add/remove */ if (node) { @@ -461,13 +465,16 @@ static inline enum cpuhp_state cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state = st->state; + bool bringup = st->state < target; st->rollback = false; st->last = NULL; st->target = target; st->single = false; - st->bringup = st->state < target; + st->bringup = bringup; + if (cpu_dying(st->cpu) != !bringup) + set_cpu_dying(st->cpu, !bringup); return prev_state; } @@ -475,6 +482,17 @@ cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) static inline void cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) { + bool bringup = !st->bringup; + + st->target = prev_state; + + /* + * Already rolling back. No need invert the bringup value or to change + * the current state. + */ + if (st->rollback) + return; + st->rollback = true; /* @@ -488,8 +506,9 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) st->state++; } - st->target = prev_state; - st->bringup = !st->bringup; + st->bringup = bringup; + if (cpu_dying(st->cpu) != !bringup) + set_cpu_dying(st->cpu, !bringup); } /* Regular hotplug invocation of the AP hotplug thread */ @@ -591,10 +610,53 @@ static int finish_cpu(unsigned int cpu) * Hotplug state machine related functions */ -static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st) +/* + * Get the next state to run. Empty ones will be skipped. Returns true if a + * state must be run. + * + * st->state will be modified ahead of time, to match state_to_run, as if it + * has already ran. + */ +static bool cpuhp_next_state(bool bringup, + enum cpuhp_state *state_to_run, + struct cpuhp_cpu_state *st, + enum cpuhp_state target) +{ + do { + if (bringup) { + if (st->state >= target) + return false; + + *state_to_run = ++st->state; + } else { + if (st->state <= target) + return false; + + *state_to_run = st->state--; + } + + if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run))) + break; + } while (true); + + return true; +} + +static int cpuhp_invoke_callback_range(bool bringup, + unsigned int cpu, + struct cpuhp_cpu_state *st, + enum cpuhp_state target) { - for (st->state--; st->state > st->target; st->state--) - cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); + enum cpuhp_state state; + int err = 0; + + while (cpuhp_next_state(bringup, &state, st, target)) { + err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL); + if (err) + break; + } + + return err; } static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st) @@ -617,16 +679,12 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state prev_state = st->state; int ret = 0; - while (st->state < target) { - st->state++; - ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); - if (ret) { - if (can_rollback_cpu(st)) { - st->target = prev_state; - undo_cpu_up(cpu, st); - } - break; - } + ret = cpuhp_invoke_callback_range(true, cpu, st, target); + if (ret) { + cpuhp_reset_state(st, prev_state); + if (can_rollback_cpu(st)) + WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, + prev_state)); } return ret; } @@ -640,6 +698,7 @@ static void cpuhp_create(unsigned int cpu) init_completion(&st->done_up); init_completion(&st->done_down); + st->cpu = cpu; } static int cpuhp_should_run(unsigned int cpu) @@ -690,17 +749,9 @@ static void cpuhp_thread_fun(unsigned int cpu) state = st->cb_state; st->should_run = false; } else { - if (bringup) { - st->state++; - state = st->state; - st->should_run = (st->state < st->target); - WARN_ON_ONCE(st->state > st->target); - } else { - state = st->state; - st->state--; - st->should_run = (st->state > st->target); - WARN_ON_ONCE(st->state < st->target); - } + st->should_run = cpuhp_next_state(bringup, &state, st, st->target); + if (!st->should_run) + goto end; } WARN_ON_ONCE(!cpuhp_is_ap_state(state)); @@ -728,6 +779,7 @@ static void cpuhp_thread_fun(unsigned int cpu) st->should_run = false; } +end: cpuhp_lock_release(bringup); lockdep_release_cpus_lock(); @@ -881,19 +933,18 @@ static int take_cpu_down(void *_param) return err; /* - * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not - * do this step again. + * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going + * down, that the current state is CPUHP_TEARDOWN_CPU - 1. */ - WARN_ON(st->state != CPUHP_TEARDOWN_CPU); - st->state--; + WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1)); + /* Invoke the former CPU_DYING callbacks */ - for (; st->state > target; st->state--) { - ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); - /* - * DYING must not fail! - */ - WARN_ON_ONCE(ret); - } + ret = cpuhp_invoke_callback_range(false, cpu, st, target); + + /* + * DYING must not fail! + */ + WARN_ON_ONCE(ret); /* Give up timekeeping duties */ tick_handover_do_timer(); @@ -975,27 +1026,22 @@ void cpuhp_report_idle_dead(void) cpuhp_complete_idle_dead, st, 0); } -static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st) -{ - for (st->state++; st->state < st->target; st->state++) - cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); -} - static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state = st->state; int ret = 0; - for (; st->state > target; st->state--) { - ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); - if (ret) { - st->target = prev_state; - if (st->state < prev_state) - undo_cpu_down(cpu, st); - break; - } + ret = cpuhp_invoke_callback_range(false, cpu, st, target); + if (ret) { + + cpuhp_reset_state(st, prev_state); + + if (st->state < prev_state) + WARN_ON(cpuhp_invoke_callback_range(true, cpu, st, + prev_state)); } + return ret; } @@ -1045,9 +1091,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, * to do the further cleanups. */ ret = cpuhp_down_callbacks(cpu, st, target); - if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) { - cpuhp_reset_state(st, prev_state); - __cpuhp_kick_ap(st); + if (ret && st->state < prev_state) { + if (st->state == CPUHP_TEARDOWN_CPU) { + cpuhp_reset_state(st, prev_state); + __cpuhp_kick_ap(st); + } else { + WARN(1, "DEAD callback error for CPU%d", cpu); + } } out: @@ -1164,14 +1214,12 @@ void notify_cpu_starting(unsigned int cpu) rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ cpumask_set_cpu(cpu, &cpus_booted_once_mask); - while (st->state < target) { - st->state++; - ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); - /* - * STARTING must not fail! - */ - WARN_ON_ONCE(ret); - } + ret = cpuhp_invoke_callback_range(true, cpu, st, target); + + /* + * STARTING must not fail! + */ + WARN_ON_ONCE(ret); } /* @@ -1777,8 +1825,7 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup, * If there's nothing to do, we done. * Relies on the union for multi_instance. */ - if ((bringup && !sp->startup.single) || - (!bringup && !sp->teardown.single)) + if (cpuhp_step_empty(bringup, sp)) return 0; /* * The non AP bound callbacks can fail on bringup. On teardown @@ -2207,6 +2254,11 @@ static ssize_t write_cpuhp_fail(struct device *dev, if (ret) return ret; + if (fail == CPUHP_INVALID) { + st->fail = fail; + return count; + } + if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE) return -EINVAL; @@ -2217,6 +2269,15 @@ static ssize_t write_cpuhp_fail(struct device *dev, return -EINVAL; /* + * DEAD callbacks cannot fail... + * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter + * triggering STARTING callbacks, a failure in this state would + * hinder rollback. + */ + if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU) + return -EINVAL; + + /* * Cannot fail anything that doesn't have callbacks. */ mutex_lock(&cpuhp_state_mutex); @@ -2460,6 +2521,9 @@ EXPORT_SYMBOL(__cpu_present_mask); struct cpumask __cpu_active_mask __read_mostly; EXPORT_SYMBOL(__cpu_active_mask); +struct cpumask __cpu_dying_mask __read_mostly; +EXPORT_SYMBOL(__cpu_dying_mask); + atomic_t __num_online_cpus __read_mostly; EXPORT_SYMBOL(__num_online_cpus); |