diff options
Diffstat (limited to 'kernel/softirq.c')
-rw-r--r-- | kernel/softirq.c | 357 |
1 files changed, 297 insertions, 60 deletions
diff --git a/kernel/softirq.c b/kernel/softirq.c index 9908ec4a9bfe..4992853ef53d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -13,6 +13,7 @@ #include <linux/kernel_stat.h> #include <linux/interrupt.h> #include <linux/init.h> +#include <linux/local_lock.h> #include <linux/mm.h> #include <linux/notifier.h> #include <linux/percpu.h> @@ -25,6 +26,7 @@ #include <linux/smpboot.h> #include <linux/tick.h> #include <linux/irq.h> +#include <linux/wait_bit.h> #include <asm/softirq_stack.h> @@ -102,20 +104,204 @@ EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context); #endif /* - * preempt_count and SOFTIRQ_OFFSET usage: - * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving - * softirq processing. - * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) + * SOFTIRQ_OFFSET usage: + * + * On !RT kernels 'count' is the preempt counter, on RT kernels this applies + * to a per CPU counter and to task::softirqs_disabled_cnt. + * + * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq + * processing. + * + * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) * on local_bh_disable or local_bh_enable. + * * This lets us distinguish between whether we are currently processing * softirq and whether we just have bh disabled. */ +#ifdef CONFIG_PREEMPT_RT + +/* + * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and + * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a + * softirq disabled section to be preempted. + * + * The per task counter is used for softirq_count(), in_softirq() and + * in_serving_softirqs() because these counts are only valid when the task + * holding softirq_ctrl::lock is running. + * + * The per CPU counter prevents pointless wakeups of ksoftirqd in case that + * the task which is in a softirq disabled section is preempted or blocks. + */ +struct softirq_ctrl { + local_lock_t lock; + int cnt; +}; + +static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = { + .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock), +}; + +/** + * local_bh_blocked() - Check for idle whether BH processing is blocked + * + * Returns false if the per CPU softirq::cnt is 0 otherwise true. + * + * This is invoked from the idle task to guard against false positive + * softirq pending warnings, which would happen when the task which holds + * softirq_ctrl::lock was the only running task on the CPU and blocks on + * some other lock. + */ +bool local_bh_blocked(void) +{ + return __this_cpu_read(softirq_ctrl.cnt) != 0; +} + +void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) +{ + unsigned long flags; + int newcnt; + + WARN_ON_ONCE(in_hardirq()); + + /* First entry of a task into a BH disabled section? */ + if (!current->softirq_disable_cnt) { + if (preemptible()) { + local_lock(&softirq_ctrl.lock); + /* Required to meet the RCU bottomhalf requirements. */ + rcu_read_lock(); + } else { + DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt)); + } + } + + /* + * Track the per CPU softirq disabled state. On RT this is per CPU + * state to allow preemption of bottom half disabled sections. + */ + newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt); + /* + * Reflect the result in the task state to prevent recursion on the + * local lock and to make softirq_count() & al work. + */ + current->softirq_disable_cnt = newcnt; + + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) { + raw_local_irq_save(flags); + lockdep_softirqs_off(ip); + raw_local_irq_restore(flags); + } +} +EXPORT_SYMBOL(__local_bh_disable_ip); + +static void __local_bh_enable(unsigned int cnt, bool unlock) +{ + unsigned long flags; + int newcnt; + + DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt != + this_cpu_read(softirq_ctrl.cnt)); + + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) { + raw_local_irq_save(flags); + lockdep_softirqs_on(_RET_IP_); + raw_local_irq_restore(flags); + } + + newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt); + current->softirq_disable_cnt = newcnt; + + if (!newcnt && unlock) { + rcu_read_unlock(); + local_unlock(&softirq_ctrl.lock); + } +} + +void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) +{ + bool preempt_on = preemptible(); + unsigned long flags; + u32 pending; + int curcnt; + + WARN_ON_ONCE(in_irq()); + lockdep_assert_irqs_enabled(); + + local_irq_save(flags); + curcnt = __this_cpu_read(softirq_ctrl.cnt); + + /* + * If this is not reenabling soft interrupts, no point in trying to + * run pending ones. + */ + if (curcnt != cnt) + goto out; + + pending = local_softirq_pending(); + if (!pending || ksoftirqd_running(pending)) + goto out; + + /* + * If this was called from non preemptible context, wake up the + * softirq daemon. + */ + if (!preempt_on) { + wakeup_softirqd(); + goto out; + } + + /* + * Adjust softirq count to SOFTIRQ_OFFSET which makes + * in_serving_softirq() become true. + */ + cnt = SOFTIRQ_OFFSET; + __local_bh_enable(cnt, false); + __do_softirq(); + +out: + __local_bh_enable(cnt, preempt_on); + local_irq_restore(flags); +} +EXPORT_SYMBOL(__local_bh_enable_ip); + +/* + * Invoked from ksoftirqd_run() outside of the interrupt disabled section + * to acquire the per CPU local lock for reentrancy protection. + */ +static inline void ksoftirqd_run_begin(void) +{ + __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); + local_irq_disable(); +} + +/* Counterpart to ksoftirqd_run_begin() */ +static inline void ksoftirqd_run_end(void) +{ + __local_bh_enable(SOFTIRQ_OFFSET, true); + WARN_ON_ONCE(in_interrupt()); + local_irq_enable(); +} + +static inline void softirq_handle_begin(void) { } +static inline void softirq_handle_end(void) { } + +static inline bool should_wake_ksoftirqd(void) +{ + return !this_cpu_read(softirq_ctrl.cnt); +} + +static inline void invoke_softirq(void) +{ + if (should_wake_ksoftirqd()) + wakeup_softirqd(); +} + +#else /* CONFIG_PREEMPT_RT */ -#ifdef CONFIG_TRACE_IRQFLAGS /* - * This is for softirq.c-internal use, where hardirqs are disabled + * This one is for softirq.c-internal use, where hardirqs are disabled * legitimately: */ +#ifdef CONFIG_TRACE_IRQFLAGS void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) { unsigned long flags; @@ -206,12 +392,38 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) } EXPORT_SYMBOL(__local_bh_enable_ip); +static inline void softirq_handle_begin(void) +{ + __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); +} + +static inline void softirq_handle_end(void) +{ + __local_bh_enable(SOFTIRQ_OFFSET); + WARN_ON_ONCE(in_interrupt()); +} + +static inline void ksoftirqd_run_begin(void) +{ + local_irq_disable(); +} + +static inline void ksoftirqd_run_end(void) +{ + local_irq_enable(); +} + +static inline bool should_wake_ksoftirqd(void) +{ + return true; +} + static inline void invoke_softirq(void) { if (ksoftirqd_running(local_softirq_pending())) return; - if (!force_irqthreads) { + if (!force_irqthreads || !__this_cpu_read(ksoftirqd)) { #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK /* * We can safely execute softirq on the current stack if @@ -250,6 +462,8 @@ asmlinkage __visible void do_softirq(void) local_irq_restore(flags); } +#endif /* !CONFIG_PREEMPT_RT */ + /* * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times, * but break the loop if need_resched() is set or after 2 ms. @@ -318,7 +532,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) pending = local_softirq_pending(); - __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); + softirq_handle_begin(); in_hardirq = lockdep_softirq_start(); account_softirq_enter(current); @@ -354,8 +568,10 @@ restart: pending >>= softirq_bit; } - if (__this_cpu_read(ksoftirqd) == current) + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && + __this_cpu_read(ksoftirqd) == current) rcu_softirq_qs(); + local_irq_disable(); pending = local_softirq_pending(); @@ -369,8 +585,7 @@ restart: account_softirq_exit(current); lockdep_softirq_end(in_hardirq); - __local_bh_enable(SOFTIRQ_OFFSET); - WARN_ON_ONCE(in_interrupt()); + softirq_handle_end(); current_restore_flags(old_flags, PF_MEMALLOC); } @@ -465,7 +680,7 @@ inline void raise_softirq_irqoff(unsigned int nr) * Otherwise we wake up ksoftirqd to make sure we * schedule the softirq soon. */ - if (!in_interrupt()) + if (!in_interrupt() && should_wake_ksoftirqd()) wakeup_softirqd(); } @@ -531,6 +746,20 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) } EXPORT_SYMBOL(__tasklet_hi_schedule); +static bool tasklet_clear_sched(struct tasklet_struct *t) +{ + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) { + wake_up_var(&t->state); + return true; + } + + WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n", + t->use_callback ? "callback" : "func", + t->use_callback ? (void *)t->callback : (void *)t->func); + + return false; +} + static void tasklet_action_common(struct softirq_action *a, struct tasklet_head *tl_head, unsigned int softirq_nr) @@ -550,13 +779,12 @@ static void tasklet_action_common(struct softirq_action *a, if (tasklet_trylock(t)) { if (!atomic_read(&t->count)) { - if (!test_and_clear_bit(TASKLET_STATE_SCHED, - &t->state)) - BUG(); - if (t->use_callback) - t->callback(t); - else - t->func(t->data); + if (tasklet_clear_sched(t)) { + if (t->use_callback) + t->callback(t); + else + t->func(t->data); + } tasklet_unlock(t); continue; } @@ -606,21 +834,62 @@ void tasklet_init(struct tasklet_struct *t, } EXPORT_SYMBOL(tasklet_init); +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) +/* + * Do not use in new code. Waiting for tasklets from atomic contexts is + * error prone and should be avoided. + */ +void tasklet_unlock_spin_wait(struct tasklet_struct *t) +{ + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + /* + * Prevent a live lock when current preempted soft + * interrupt processing or prevents ksoftirqd from + * running. If the tasklet runs on a different CPU + * then this has no effect other than doing the BH + * disable/enable dance for nothing. + */ + local_bh_disable(); + local_bh_enable(); + } else { + cpu_relax(); + } + } +} +EXPORT_SYMBOL(tasklet_unlock_spin_wait); +#endif + void tasklet_kill(struct tasklet_struct *t) { if (in_interrupt()) pr_notice("Attempt to kill tasklet from interrupt\n"); - while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { - do { - yield(); - } while (test_bit(TASKLET_STATE_SCHED, &t->state)); - } + while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state)); + tasklet_unlock_wait(t); - clear_bit(TASKLET_STATE_SCHED, &t->state); + tasklet_clear_sched(t); } EXPORT_SYMBOL(tasklet_kill); +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) +void tasklet_unlock(struct tasklet_struct *t) +{ + smp_mb__before_atomic(); + clear_bit(TASKLET_STATE_RUN, &t->state); + smp_mb__after_atomic(); + wake_up_var(&t->state); +} +EXPORT_SYMBOL_GPL(tasklet_unlock); + +void tasklet_unlock_wait(struct tasklet_struct *t) +{ + wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state)); +} +EXPORT_SYMBOL_GPL(tasklet_unlock_wait); +#endif + void __init softirq_init(void) { int cpu; @@ -643,53 +912,21 @@ static int ksoftirqd_should_run(unsigned int cpu) static void run_ksoftirqd(unsigned int cpu) { - local_irq_disable(); + ksoftirqd_run_begin(); if (local_softirq_pending()) { /* * We can safely run softirq on inline stack, as we are not deep * in the task stack here. */ __do_softirq(); - local_irq_enable(); + ksoftirqd_run_end(); cond_resched(); return; } - local_irq_enable(); + ksoftirqd_run_end(); } #ifdef CONFIG_HOTPLUG_CPU -/* - * tasklet_kill_immediate is called to remove a tasklet which can already be - * scheduled for execution on @cpu. - * - * Unlike tasklet_kill, this function removes the tasklet - * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state. - * - * When this function is called, @cpu must be in the CPU_DEAD state. - */ -void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu) -{ - struct tasklet_struct **i; - - BUG_ON(cpu_online(cpu)); - BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state)); - - if (!test_bit(TASKLET_STATE_SCHED, &t->state)) - return; - - /* CPU is dead, so no lock needed. */ - for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) { - if (*i == t) { - *i = t->next; - /* If this was the tail element, move the tail ptr */ - if (*i == NULL) - per_cpu(tasklet_vec, cpu).tail = i; - return; - } - } - BUG(); -} - static int takeover_tasklets(unsigned int cpu) { /* CPU is dead, so no lock needed. */ |