From 203cbf77de59fc8f13502dcfd11350c6d4a5c95f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Jan 2016 16:54:46 +0000 Subject: hrtimer: Handle remaining time proper for TIME_LOW_RES If CONFIG_TIME_LOW_RES is enabled we add a jiffie to the relative timeout to prevent short sleeps, but we do not account for that in interfaces which retrieve the remaining time. Helge observed that timerfd can return a remaining time larger than the relative timeout. That's not expected and breaks userland test programs. Store the information that the timer was armed relative and provide functions to adjust the remaining time. To avoid bloating the hrtimer struct make state a u8, which as a bonus results in better code on x86 at least. Reported-and-tested-by: Helge Deller Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: John Stultz Cc: linux-m68k@lists.linux-m68k.org Cc: dhowells@redhat.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160114164159.273328486@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/time/hrtimer.c | 55 ++++++++++++++++++++++++++++++++---------------- kernel/time/timer_list.c | 2 +- 2 files changed, 38 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 435b8850dd80..fa909f9fd559 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -897,10 +897,10 @@ static int enqueue_hrtimer(struct hrtimer *timer, */ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, - unsigned long newstate, int reprogram) + u8 newstate, int reprogram) { struct hrtimer_cpu_base *cpu_base = base->cpu_base; - unsigned int state = timer->state; + u8 state = timer->state; timer->state = newstate; if (!(state & HRTIMER_STATE_ENQUEUED)) @@ -930,7 +930,7 @@ static inline int remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart) { if (hrtimer_is_queued(timer)) { - unsigned long state = timer->state; + u8 state = timer->state; int reprogram; /* @@ -954,6 +954,22 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest return 0; } +static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode) +{ +#ifdef CONFIG_TIME_LOW_RES + /* + * CONFIG_TIME_LOW_RES indicates that the system has no way to return + * granular time values. For relative timers we add hrtimer_resolution + * (i.e. one jiffie) to prevent short timeouts. + */ + timer->is_rel = mode & HRTIMER_MODE_REL; + if (timer->is_rel) + tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution)); +#endif + return tim; +} + /** * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU * @timer: the timer to be added @@ -974,19 +990,10 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* Remove an active timer from the queue: */ remove_hrtimer(timer, base, true); - if (mode & HRTIMER_MODE_REL) { + if (mode & HRTIMER_MODE_REL) tim = ktime_add_safe(tim, base->get_time()); - /* - * CONFIG_TIME_LOW_RES is a temporary way for architectures - * to signal that they simply return xtime in - * do_gettimeoffset(). In this case we want to round up by - * resolution when starting a relative timer, to avoid short - * timeouts. This will go away with the GTOD framework. - */ -#ifdef CONFIG_TIME_LOW_RES - tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution)); -#endif - } + + tim = hrtimer_update_lowres(timer, tim, mode); hrtimer_set_expires_range_ns(timer, tim, delta_ns); @@ -1074,19 +1081,23 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel); /** * hrtimer_get_remaining - get remaining time for the timer * @timer: the timer to read + * @adjust: adjust relative timers when CONFIG_TIME_LOW_RES=y */ -ktime_t hrtimer_get_remaining(const struct hrtimer *timer) +ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust) { unsigned long flags; ktime_t rem; lock_hrtimer_base(timer, &flags); - rem = hrtimer_expires_remaining(timer); + if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust) + rem = hrtimer_expires_remaining_adjusted(timer); + else + rem = hrtimer_expires_remaining(timer); unlock_hrtimer_base(timer, &flags); return rem; } -EXPORT_SYMBOL_GPL(hrtimer_get_remaining); +EXPORT_SYMBOL_GPL(__hrtimer_get_remaining); #ifdef CONFIG_NO_HZ_COMMON /** @@ -1219,6 +1230,14 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, timer_stats_account_hrtimer(timer); fn = timer->function; + /* + * Clear the 'is relative' flag for the TIME_LOW_RES case. If the + * timer is restarted with a period then it becomes an absolute + * timer. If its not restarted it does not matter. + */ + if (IS_ENABLED(CONFIG_TIME_LOW_RES)) + timer->is_rel = false; + /* * Because we run timers from hardirq context, there is no chance * they get migrated to another cpu, therefore its safe to unlock diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index f75e35b60149..ba7d8b288bb3 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -69,7 +69,7 @@ print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer, print_name_offset(m, taddr); SEQ_printf(m, ", "); print_name_offset(m, timer->function); - SEQ_printf(m, ", S:%02lx", timer->state); + SEQ_printf(m, ", S:%02x", timer->state); #ifdef CONFIG_TIMER_STATS SEQ_printf(m, ", "); print_name_offset(m, timer->start_site); -- cgit v1.2.3 From 572c39172684c3711e4a03c9a7380067e2b0661c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Jan 2016 16:54:47 +0000 Subject: posix-timers: Handle relative timers with CONFIG_TIME_LOW_RES proper As Helge reported for timerfd we have the same issue in posix timers. We return remaining time larger than the programmed relative time to user space in case of CONFIG_TIME_LOW_RES=y. Use the proper function to adjust the extra time added in hrtimer_start_range_ns(). Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Helge Deller Cc: John Stultz Cc: linux-m68k@lists.linux-m68k.org Cc: dhowells@redhat.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160114164159.450510905@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/time/posix-timers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 31d11ac9fa47..f2826c35e918 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -760,7 +760,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); - remaining = ktime_sub(hrtimer_get_expires(timer), now); + remaining = __hrtimer_expires_remaining_adjusted(timer, now); /* Return 0 only, when the timer is expired and not pending */ if (remaining.tv64 <= 0) { /* -- cgit v1.2.3 From 51cbb5242a41700a3f250ecfb48dcfb7e4375ea4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Jan 2016 16:54:48 +0000 Subject: itimers: Handle relative timers with CONFIG_TIME_LOW_RES proper As Helge reported for timerfd we have the same issue in itimers. We return remaining time larger than the programmed relative time to user space in case of CONFIG_TIME_LOW_RES=y. Use the proper function to adjust the extra time added in hrtimer_start_range_ns(). Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Helge Deller Cc: John Stultz Cc: linux-m68k@lists.linux-m68k.org Cc: dhowells@redhat.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160114164159.528222587@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/time/itimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 8d262b467573..1d5c7204ddc9 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -26,7 +26,7 @@ */ static struct timeval itimer_get_remtime(struct hrtimer *timer) { - ktime_t rem = hrtimer_get_remaining(timer); + ktime_t rem = __hrtimer_get_remaining(timer, true); /* * Racy but safe: if the itimer expires after the above -- cgit v1.2.3 From dd4e17ab704269bce71402285f5e8b9ac24b1eff Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 21 Jan 2016 15:03:34 -0800 Subject: ntp: Fix ADJ_SETOFFSET being used w/ ADJ_NANO Recently, in commit 37cf4dc3370f I forgot to check if the timeval being passed was actually a timespec (as is signaled with ADJ_NANO). This resulted in that patch breaking ADJ_SETOFFSET users who set ADJ_NANO, by rejecting valid timespecs that were compared with valid timeval ranges. This patch addresses this by checking for the ADJ_NANO flag and using the timepsec check instead in that case. Reported-by: Harald Hoyer Reported-by: Kay Sievers Fixes: 37cf4dc3370f "time: Verify time values in adjtimex ADJ_SETOFFSET to avoid overflow" Signed-off-by: John Stultz Cc: Sasha Levin Cc: Richard Cochran Cc: Prarit Bhargava Cc: David Herrmann Link: http://lkml.kernel.org/r/1453417415-19110-2-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/ntp.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 36f2ca09aa5e..6df8927c58a5 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -685,8 +685,18 @@ int ntp_validate_timex(struct timex *txc) if (!capable(CAP_SYS_TIME)) return -EPERM; - if (!timeval_inject_offset_valid(&txc->time)) - return -EINVAL; + if (txc->modes & ADJ_NANO) { + struct timespec ts; + + ts.tv_sec = txc->time.tv_sec; + ts.tv_nsec = txc->time.tv_usec; + if (!timespec_inject_offset_valid(&ts)) + return -EINVAL; + + } else { + if (!timeval_inject_offset_valid(&txc->time)) + return -EINVAL; + } } /* -- cgit v1.2.3 From 7809998ab1af22602a8463845108edc49dfb9ef0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Jan 2016 16:41:49 +0100 Subject: tick/sched: Hide unused oneshot timer code A couple of functions in kernel/time/tick-sched.c are only relevant for oneshot timer mode, i.e. when hires-timers or nohz mode are enabled. If both are disabled, we get gcc warnings about them: kernel/time/tick-sched.c:98:16: warning: 'tick_init_jiffy_update' defined but not used [-Wunused-function] static ktime_t tick_init_jiffy_update(void) ^ kernel/time/tick-sched.c:112:13: warning: 'tick_sched_do_timer' defined but not used [-Wunused-function] static void tick_sched_do_timer(ktime_t now) ^ kernel/time/tick-sched.c:134:13: warning: 'tick_sched_handle' defined but not used [-Wunused-function] static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) ^ This encloses the whole set of functions in an appropriate ifdef to avoid the warning and to make it clearer when they are used. Signed-off-by: Arnd Bergmann Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1453736525-1959191-1-git-send-email-arnd@arndb.de Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7ea28ed3109d..cbe5d8dcf15a 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -36,16 +36,17 @@ */ static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); -/* - * The time, when the last jiffy update happened. Protected by jiffies_lock. - */ -static ktime_t last_jiffies_update; - struct tick_sched *tick_get_tick_sched(int cpu) { return &per_cpu(tick_cpu_sched, cpu); } +#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) +/* + * The time, when the last jiffy update happened. Protected by jiffies_lock. + */ +static ktime_t last_jiffies_update; + /* * Must be called with interrupts disabled ! */ @@ -151,6 +152,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING); } +#endif #ifdef CONFIG_NO_HZ_FULL cpumask_var_t tick_nohz_full_mask; -- cgit v1.2.3 From 1ca8ec532fc2d986f1f4a319857bb18e0c9739b4 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 27 Jan 2016 19:26:07 +0800 Subject: tick/nohz: Set the correct expiry when switching to nohz/lowres mode commit 0ff53d096422 sets the next tick interrupt to the last jiffies update, i.e. in the past, because the forward operation is invoked before the set operation. There is no resulting damage (yet), but we get an extra pointless tick interrupt. Revert the order so we get the next tick interrupt in the future. Fixes: commit 0ff53d096422 "tick: sched: Force tick interrupt and get rid of softirq magic" Signed-off-by: Wanpeng Li Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1453893967-3458-1-git-send-email-wanpeng.li@hotmail.com Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index cbe5d8dcf15a..de2d9fef6ea6 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -995,9 +995,9 @@ static void tick_nohz_switch_to_nohz(void) /* Get the next period */ next = tick_init_jiffy_update(); - hrtimer_forward_now(&ts->sched_timer, tick_period); hrtimer_set_expires(&ts->sched_timer, next); - tick_program_event(next, 1); + hrtimer_forward_now(&ts->sched_timer, tick_period); + tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); tick_nohz_activate(ts, NOHZ_MODE_LOWRES); } -- cgit v1.2.3