summaryrefslogtreecommitdiff
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/tick-sched.c105
-rw-r--r--kernel/time/timekeeping.c10
2 files changed, 73 insertions, 42 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 40420644d0b..7656642e4b8 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -275,42 +275,17 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
}
EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
-/**
- * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
- *
- * When the next event is more than a tick into the future, stop the idle tick
- * Called either from the idle loop or from irq_exit() when an idle period was
- * just interrupted by an interrupt which did not cause a reschedule.
- */
-void tick_nohz_stop_sched_tick(int inidle)
+static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
{
- unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
- struct tick_sched *ts;
+ unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
ktime_t last_update, expires, now;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
u64 time_delta;
int cpu;
- local_irq_save(flags);
-
cpu = smp_processor_id();
ts = &per_cpu(tick_cpu_sched, cpu);
- /*
- * Call to tick_nohz_start_idle stops the last_update_time from being
- * updated. Thus, it must not be called in the event we are called from
- * irq_exit() with the prior state different than idle.
- */
- if (!inidle && !ts->inidle)
- goto end;
-
- /*
- * Set ts->inidle unconditionally. Even if the system did not
- * switch to NOHZ mode the cpu frequency governers rely on the
- * update of the idle time accounting in tick_nohz_start_idle().
- */
- ts->inidle = 1;
-
now = tick_nohz_start_idle(cpu, ts);
/*
@@ -326,10 +301,10 @@ void tick_nohz_stop_sched_tick(int inidle)
}
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
- goto end;
+ return;
if (need_resched())
- goto end;
+ return;
if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
static int ratelimit;
@@ -339,7 +314,7 @@ void tick_nohz_stop_sched_tick(int inidle)
(unsigned int) local_softirq_pending());
ratelimit++;
}
- goto end;
+ return;
}
ts->idle_calls++;
@@ -434,7 +409,6 @@ void tick_nohz_stop_sched_tick(int inidle)
ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1;
ts->idle_jiffies = last_jiffies;
- rcu_enter_nohz();
}
ts->idle_sleeps++;
@@ -472,8 +446,64 @@ out:
ts->next_jiffies = next_jiffies;
ts->last_jiffies = last_jiffies;
ts->sleep_length = ktime_sub(dev->next_event, now);
-end:
- local_irq_restore(flags);
+}
+
+/**
+ * tick_nohz_idle_enter - stop the idle tick from the idle task
+ *
+ * When the next event is more than a tick into the future, stop the idle tick
+ * Called when we start the idle loop.
+ *
+ * The arch is responsible of calling:
+ *
+ * - rcu_idle_enter() after its last use of RCU before the CPU is put
+ * to sleep.
+ * - rcu_idle_exit() before the first use of RCU after the CPU is woken up.
+ */
+void tick_nohz_idle_enter(void)
+{
+ struct tick_sched *ts;
+
+ WARN_ON_ONCE(irqs_disabled());
+
+ /*
+ * Update the idle state in the scheduler domain hierarchy
+ * when tick_nohz_stop_sched_tick() is called from the idle loop.
+ * State will be updated to busy during the first busy tick after
+ * exiting idle.
+ */
+ set_cpu_sd_state_idle();
+
+ local_irq_disable();
+
+ ts = &__get_cpu_var(tick_cpu_sched);
+ /*
+ * set ts->inidle unconditionally. even if the system did not
+ * switch to nohz mode the cpu frequency governers rely on the
+ * update of the idle time accounting in tick_nohz_start_idle().
+ */
+ ts->inidle = 1;
+ tick_nohz_stop_sched_tick(ts);
+
+ local_irq_enable();
+}
+
+/**
+ * tick_nohz_irq_exit - update next tick event from interrupt exit
+ *
+ * When an interrupt fires while we are idle and it doesn't cause
+ * a reschedule, it may still add, modify or delete a timer, enqueue
+ * an RCU callback, etc...
+ * So we need to re-calculate and reprogram the next tick event.
+ */
+void tick_nohz_irq_exit(void)
+{
+ struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
+
+ if (!ts->inidle)
+ return;
+
+ tick_nohz_stop_sched_tick(ts);
}
/**
@@ -515,11 +545,13 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
}
/**
- * tick_nohz_restart_sched_tick - restart the idle tick from the idle task
+ * tick_nohz_idle_exit - restart the idle tick from the idle task
*
* Restart the idle tick when the CPU is woken up from idle
+ * This also exit the RCU extended quiescent state. The CPU
+ * can use RCU again after this function is called.
*/
-void tick_nohz_restart_sched_tick(void)
+void tick_nohz_idle_exit(void)
{
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
@@ -529,6 +561,7 @@ void tick_nohz_restart_sched_tick(void)
ktime_t now;
local_irq_disable();
+
if (ts->idle_active || (ts->inidle && ts->tick_stopped))
now = ktime_get();
@@ -543,8 +576,6 @@ void tick_nohz_restart_sched_tick(void)
ts->inidle = 0;
- rcu_exit_nohz();
-
/* Update jiffies first */
select_nohz_load_balancer(0);
tick_do_update_jiffies64(now);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 237841378c0..0c635818640 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -131,7 +131,7 @@ static inline s64 timekeeping_get_ns_raw(void)
/* calculate the delta since the last update_wall_time: */
cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
- /* return delta convert to nanoseconds using ntp adjusted mult. */
+ /* return delta convert to nanoseconds. */
return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
}
@@ -813,11 +813,11 @@ static void timekeeping_adjust(s64 offset)
* First we shift it down from NTP_SHIFT to clocksource->shifted nsecs.
*
* Note we subtract one in the shift, so that error is really error*2.
- * This "saves" dividing(shifting) intererval twice, but keeps the
- * (error > interval) comparision as still measuring if error is
+ * This "saves" dividing(shifting) interval twice, but keeps the
+ * (error > interval) comparison as still measuring if error is
* larger then half an interval.
*
- * Note: It does not "save" on aggrivation when reading the code.
+ * Note: It does not "save" on aggravation when reading the code.
*/
error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1);
if (error > interval) {
@@ -833,7 +833,7 @@ static void timekeeping_adjust(s64 offset)
* nanosecond, and store the amount rounded up into
* the error. This causes the likely below to be unlikely.
*
- * The properfix is to avoid rounding up by using
+ * The proper fix is to avoid rounding up by using
* the high precision timekeeper.xtime_nsec instead of
* xtime.tv_nsec everywhere. Fixing this will take some
* time.