From 4b41308d2d0398409620613c7eaaaf52c738b042 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 10 Aug 2011 10:37:59 -0700 Subject: alarmtimers: Change alarmtimer functions to return alarmtimer_restart values In order to properly fix the denial of service issue with high freq periodic alarm timers, we need to push the re-arming logic into the alarm timer handler, much as the hrtimer code does. This patch introduces alarmtimer_restart enum and changes the alarmtimer handler declarations to use it as a return value. Further, to ease following changes, it extends the alarmtimer handler functions to also take the time at expiration. No logic is yet modified. CC: Thomas Gleixner Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index ea5e1a928d5..9e786053ffa 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -196,7 +196,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) } spin_unlock_irqrestore(&base->lock, flags); if (alarm->function) - alarm->function(alarm); + alarm->function(alarm, now); spin_lock_irqsave(&base->lock, flags); } @@ -299,7 +299,7 @@ static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) * @function: callback that is run when the alarm fires */ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, - void (*function)(struct alarm *)) + enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) { timerqueue_init(&alarm->node); alarm->period = ktime_set(0, 0); @@ -365,12 +365,15 @@ static enum alarmtimer_type clock2alarm(clockid_t clockid) * * Posix timer callback for expired alarm timers. */ -static void alarm_handle_timer(struct alarm *alarm) +static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, + ktime_t now) { struct k_itimer *ptr = container_of(alarm, struct k_itimer, it.alarmtimer); if (posix_timer_event(ptr, 0) != 0) ptr->it_overrun++; + + return ALARMTIMER_NORESTART; } /** @@ -509,13 +512,15 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, * * Wakes up the task that set the alarmtimer */ -static void alarmtimer_nsleep_wakeup(struct alarm *alarm) +static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm, + ktime_t now) { struct task_struct *task = (struct task_struct *)alarm->data; alarm->data = NULL; if (task) wake_up_process(task); + return ALARMTIMER_NORESTART; } /** -- cgit v1.2.3 From 54da23b720d5d612f8f1669f9ed3744008fb7382 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 10 Aug 2011 11:08:07 -0700 Subject: alarmtimers: Push rearming peroidic timers down into alamrtimer handler This patch pushes the periodic alarmtimer re-arming down into the alarmtimer handler, mimicking how hrtimers handle this. CC: Thomas Gleixner Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 9e786053ffa..55e634ea605 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -174,6 +174,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) unsigned long flags; ktime_t now; int ret = HRTIMER_NORESTART; + int restart = ALARMTIMER_NORESTART; spin_lock_irqsave(&base->lock, flags); now = base->gettime(); @@ -188,16 +189,16 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) timerqueue_del(&base->timerqueue, &alarm->node); alarm->enabled = 0; - /* Re-add periodic timers */ - if (alarm->period.tv64) { - alarm->node.expires = ktime_add(expired, alarm->period); - timerqueue_add(&base->timerqueue, &alarm->node); - alarm->enabled = 1; - } + spin_unlock_irqrestore(&base->lock, flags); if (alarm->function) - alarm->function(alarm, now); + restart = alarm->function(alarm, now); spin_lock_irqsave(&base->lock, flags); + + if (restart != ALARMTIMER_NORESTART) { + timerqueue_add(&base->timerqueue, &alarm->node); + alarm->enabled = 1; + } } if (next) { @@ -373,6 +374,11 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, if (posix_timer_event(ptr, 0) != 0) ptr->it_overrun++; + /* Re-add periodic timers */ + if (alarm->period.tv64) { + alarm->node.expires = ktime_add(now, alarm->period); + return ALARMTIMER_RESTART; + } return ALARMTIMER_NORESTART; } -- cgit v1.2.3 From dce75a8c71819ed4c7efdcd53c9b6f6356dc8cb5 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 10 Aug 2011 11:31:03 -0700 Subject: alarmtimers: Add alarm_forward functionality In order to avoid wasting time expiring and re-adding very high freq periodic alarmtimers, introduce alarm_forward() which is similar to hrtimer_forward and moves the timer to the next future expiration time and returns the number of overruns. CC: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/alarmtimer.h | 2 ++ kernel/time/alarmtimer.c | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 0289eb29e79..17535963b27 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -42,4 +42,6 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period); void alarm_cancel(struct alarm *alarm); +u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval); + #endif diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 55e634ea605..f03b04291b6 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -347,6 +347,41 @@ void alarm_cancel(struct alarm *alarm) } + +u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) +{ + u64 overrun = 1; + ktime_t delta; + + delta = ktime_sub(now, alarm->node.expires); + + if (delta.tv64 < 0) + return 0; + + if (unlikely(delta.tv64 >= interval.tv64)) { + s64 incr = ktime_to_ns(interval); + + overrun = ktime_divns(delta, incr); + + alarm->node.expires = ktime_add_ns(alarm->node.expires, + incr*overrun); + + if (alarm->node.expires.tv64 > now.tv64) + return overrun; + /* + * This (and the ktime_add() below) is the + * correction for exact: + */ + overrun++; + } + + alarm->node.expires = ktime_add(alarm->node.expires, interval); + return overrun; +} + + + + /** * clock2alarm - helper that converts from clockid to alarmtypes * @clockid: clockid. @@ -376,7 +411,7 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, /* Re-add periodic timers */ if (alarm->period.tv64) { - alarm->node.expires = ktime_add(now, alarm->period); + ptr->it_overrun += alarm_forward(alarm, now, alarm->period); return ALARMTIMER_RESTART; } return ALARMTIMER_NORESTART; -- cgit v1.2.3 From d77e23accec56bf2ba12187fe77a2f500a511282 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 10 Aug 2011 11:40:23 -0700 Subject: alarmtimers: Remove interval cap limit hack Now that the alarmtimers code has been refactored, the interval cap limit can be removed. CC: Thomas Gleixner Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index f03b04291b6..a522c007e6f 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -525,15 +525,6 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, if (!rtcdev) return -ENOTSUPP; - /* - * XXX HACK! Currently we can DOS a system if the interval - * period on alarmtimers is too small. Cap the interval here - * to 100us and solve this properly in a future patch! -jstultz - */ - if ((new_setting->it_interval.tv_sec == 0) && - (new_setting->it_interval.tv_nsec < 100000)) - new_setting->it_interval.tv_nsec = 100000; - if (old_setting) alarm_timer_get(timr, old_setting); -- cgit v1.2.3 From 9e26476243e438f4534a562660c1296a15a9e202 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 10 Aug 2011 12:09:24 -0700 Subject: alarmtimers: Remove period from alarm structure Now that periodic alarmtimers are managed by the handler function, remove the period value from the alarm structure and let the handlers manage the interval on their own. CC: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/alarmtimer.h | 3 +-- include/linux/posix-timers.h | 5 ++++- kernel/time/alarmtimer.c | 30 ++++++++++++++---------------- 3 files changed, 19 insertions(+), 19 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 17535963b27..c854a8efa86 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -30,7 +30,6 @@ enum alarmtimer_restart { */ struct alarm { struct timerqueue_node node; - ktime_t period; enum alarmtimer_restart (*function)(struct alarm *, ktime_t now); enum alarmtimer_type type; bool enabled; @@ -39,7 +38,7 @@ struct alarm { void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)); -void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period); +void alarm_start(struct alarm *alarm, ktime_t start); void alarm_cancel(struct alarm *alarm); u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval); diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 959c14132f4..042058fdb0a 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -81,7 +81,10 @@ struct k_itimer { unsigned long incr; unsigned long expires; } mmtimer; - struct alarm alarmtimer; + struct { + struct alarm alarmtimer; + ktime_t interval; + } alarm; struct rcu_head rcu; } it; }; diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index a522c007e6f..90935591dd4 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -303,7 +303,6 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) { timerqueue_init(&alarm->node); - alarm->period = ktime_set(0, 0); alarm->function = function; alarm->type = type; alarm->enabled = 0; @@ -313,9 +312,8 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, * alarm_start - Sets an alarm to fire * @alarm: ptr to alarm to set * @start: time to run the alarm - * @period: period at which the alarm will recur */ -void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period) +void alarm_start(struct alarm *alarm, ktime_t start) { struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; @@ -324,7 +322,6 @@ void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period) if (alarm->enabled) alarmtimer_remove(base, alarm); alarm->node.expires = start; - alarm->period = period; alarmtimer_enqueue(base, alarm); alarm->enabled = 1; spin_unlock_irqrestore(&base->lock, flags); @@ -405,13 +402,14 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, ktime_t now) { struct k_itimer *ptr = container_of(alarm, struct k_itimer, - it.alarmtimer); + it.alarm.alarmtimer); if (posix_timer_event(ptr, 0) != 0) ptr->it_overrun++; /* Re-add periodic timers */ - if (alarm->period.tv64) { - ptr->it_overrun += alarm_forward(alarm, now, alarm->period); + if (ptr->it.alarm.interval.tv64) { + ptr->it_overrun += alarm_forward(alarm, now, + ptr->it.alarm.interval); return ALARMTIMER_RESTART; } return ALARMTIMER_NORESTART; @@ -471,7 +469,7 @@ static int alarm_timer_create(struct k_itimer *new_timer) type = clock2alarm(new_timer->it_clock); base = &alarm_bases[type]; - alarm_init(&new_timer->it.alarmtimer, type, alarm_handle_timer); + alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer); return 0; } @@ -488,9 +486,9 @@ static void alarm_timer_get(struct k_itimer *timr, memset(cur_setting, 0, sizeof(struct itimerspec)); cur_setting->it_interval = - ktime_to_timespec(timr->it.alarmtimer.period); + ktime_to_timespec(timr->it.alarm.interval); cur_setting->it_value = - ktime_to_timespec(timr->it.alarmtimer.node.expires); + ktime_to_timespec(timr->it.alarm.alarmtimer.node.expires); return; } @@ -505,7 +503,7 @@ static int alarm_timer_del(struct k_itimer *timr) if (!rtcdev) return -ENOTSUPP; - alarm_cancel(&timr->it.alarmtimer); + alarm_cancel(&timr->it.alarm.alarmtimer); return 0; } @@ -529,12 +527,12 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, alarm_timer_get(timr, old_setting); /* If the timer was already set, cancel it */ - alarm_cancel(&timr->it.alarmtimer); + alarm_cancel(&timr->it.alarm.alarmtimer); /* start the timer */ - alarm_start(&timr->it.alarmtimer, - timespec_to_ktime(new_setting->it_value), - timespec_to_ktime(new_setting->it_interval)); + timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); + alarm_start(&timr->it.alarm.alarmtimer, + timespec_to_ktime(new_setting->it_value)); return 0; } @@ -567,7 +565,7 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp) alarm->data = (void *)current; do { set_current_state(TASK_INTERRUPTIBLE); - alarm_start(alarm, absexp, ktime_set(0, 0)); + alarm_start(alarm, absexp); if (likely(alarm->data)) schedule(); -- cgit v1.2.3 From a28cde81ab13cc251748a4c4ef06883dd09a10ea Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 10 Aug 2011 12:30:21 -0700 Subject: alarmtimers: Add more refined alarm state tracking In order to allow for functionality like try_to_cancel, add more refined state tracking (similar to hrtimers). CC: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/alarmtimer.h | 34 +++++++++++++++++++++++++++++++++- kernel/time/alarmtimer.c | 21 ++++++++++++++------- 2 files changed, 47 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index c854a8efa86..304124a6c98 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -18,6 +18,11 @@ enum alarmtimer_restart { ALARMTIMER_RESTART, }; + +#define ALARMTIMER_STATE_INACTIVE 0x00 +#define ALARMTIMER_STATE_ENQUEUED 0x01 +#define ALARMTIMER_STATE_CALLBACK 0x02 + /** * struct alarm - Alarm timer structure * @node: timerqueue node for adding to the event list this value @@ -32,7 +37,7 @@ struct alarm { struct timerqueue_node node; enum alarmtimer_restart (*function)(struct alarm *, ktime_t now); enum alarmtimer_type type; - bool enabled; + int state; void *data; }; @@ -43,4 +48,31 @@ void alarm_cancel(struct alarm *alarm); u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval); +/* + * A alarmtimer is active, when it is enqueued into timerqueue or the + * callback function is running. + */ +static inline int alarmtimer_active(const struct alarm *timer) +{ + return timer->state != ALARMTIMER_STATE_INACTIVE; +} + +/* + * Helper function to check, whether the timer is on one of the queues + */ +static inline int alarmtimer_is_queued(struct alarm *timer) +{ + return timer->state & ALARMTIMER_STATE_ENQUEUED; +} + +/* + * Helper function to check, whether the timer is running the callback + * function + */ +static inline int alarmtimer_callback_running(struct alarm *timer) +{ + return timer->state & ALARMTIMER_STATE_CALLBACK; +} + + #endif diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 90935591dd4..5b14cc29b6a 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -126,6 +126,8 @@ static struct rtc_device *alarmtimer_get_rtcdev(void) static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) { timerqueue_add(&base->timerqueue, &alarm->node); + alarm->state |= ALARMTIMER_STATE_ENQUEUED; + if (&alarm->node == timerqueue_getnext(&base->timerqueue)) { hrtimer_try_to_cancel(&base->timer); hrtimer_start(&base->timer, alarm->node.expires, @@ -147,7 +149,12 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) { struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue); + if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) + return; + timerqueue_del(&base->timerqueue, &alarm->node); + alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; + if (next == &alarm->node) { hrtimer_try_to_cancel(&base->timer); next = timerqueue_getnext(&base->timerqueue); @@ -188,16 +195,18 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) alarm = container_of(next, struct alarm, node); timerqueue_del(&base->timerqueue, &alarm->node); - alarm->enabled = 0; + alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; + alarm->state |= ALARMTIMER_STATE_CALLBACK; spin_unlock_irqrestore(&base->lock, flags); if (alarm->function) restart = alarm->function(alarm, now); spin_lock_irqsave(&base->lock, flags); + alarm->state &= ~ALARMTIMER_STATE_CALLBACK; if (restart != ALARMTIMER_NORESTART) { timerqueue_add(&base->timerqueue, &alarm->node); - alarm->enabled = 1; + alarm->state |= ALARMTIMER_STATE_ENQUEUED; } } @@ -305,7 +314,7 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, timerqueue_init(&alarm->node); alarm->function = function; alarm->type = type; - alarm->enabled = 0; + alarm->state = ALARMTIMER_STATE_INACTIVE; } /** @@ -319,11 +328,10 @@ void alarm_start(struct alarm *alarm, ktime_t start) unsigned long flags; spin_lock_irqsave(&base->lock, flags); - if (alarm->enabled) + if (alarmtimer_active(alarm)) alarmtimer_remove(base, alarm); alarm->node.expires = start; alarmtimer_enqueue(base, alarm); - alarm->enabled = 1; spin_unlock_irqrestore(&base->lock, flags); } @@ -337,9 +345,8 @@ void alarm_cancel(struct alarm *alarm) unsigned long flags; spin_lock_irqsave(&base->lock, flags); - if (alarm->enabled) + if (alarmtimer_is_queued(alarm)) alarmtimer_remove(base, alarm); - alarm->enabled = 0; spin_unlock_irqrestore(&base->lock, flags); } -- cgit v1.2.3 From 9082c465a5403f4a98734193e078552991a2e283 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 10 Aug 2011 12:41:36 -0700 Subject: alarmtimers: Add try_to_cancel functionality There's a number of edge cases when cancelling a alarm, so to be sure we accurately do so, introduce try_to_cancel, which returns proper failure errors if it cannot. Also modify cancel to spin until the alarm is properly disabled. CC: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/alarmtimer.h | 3 ++- kernel/time/alarmtimer.c | 43 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 304124a6c98..975009e1cbe 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -44,7 +44,8 @@ struct alarm { void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)); void alarm_start(struct alarm *alarm, ktime_t start); -void alarm_cancel(struct alarm *alarm); +int alarm_try_to_cancel(struct alarm *alarm); +int alarm_cancel(struct alarm *alarm); u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval); diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 5b14cc29b6a..bdb7342b689 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -336,21 +336,49 @@ void alarm_start(struct alarm *alarm, ktime_t start) } /** - * alarm_cancel - Tries to cancel an alarm timer + * alarm_try_to_cancel - Tries to cancel an alarm timer * @alarm: ptr to alarm to be canceled + * + * Returns 1 if the timer was canceled, 0 if it was not running, + * and -1 if the callback was running */ -void alarm_cancel(struct alarm *alarm) +int alarm_try_to_cancel(struct alarm *alarm) { struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; - + int ret = -1; spin_lock_irqsave(&base->lock, flags); - if (alarmtimer_is_queued(alarm)) + + if (alarmtimer_callback_running(alarm)) + goto out; + + if (alarmtimer_is_queued(alarm)) { alarmtimer_remove(base, alarm); + ret = 1; + } else + ret = 0; +out: spin_unlock_irqrestore(&base->lock, flags); + return ret; } +/** + * alarm_cancel - Spins trying to cancel an alarm timer until it is done + * @alarm: ptr to alarm to be canceled + * + * Returns 1 if the timer was canceled, 0 if it was not active. + */ +int alarm_cancel(struct alarm *alarm) +{ + for (;;) { + int ret = alarm_try_to_cancel(alarm); + if (ret >= 0) + return ret; + cpu_relax(); + } +} + u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) { @@ -510,7 +538,9 @@ static int alarm_timer_del(struct k_itimer *timr) if (!rtcdev) return -ENOTSUPP; - alarm_cancel(&timr->it.alarm.alarmtimer); + if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0) + return TIMER_RETRY; + return 0; } @@ -534,7 +564,8 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, alarm_timer_get(timr, old_setting); /* If the timer was already set, cancel it */ - alarm_cancel(&timr->it.alarm.alarmtimer); + if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0) + return TIMER_RETRY; /* start the timer */ timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); -- cgit v1.2.3 From 8bc0dafb5cf38a19484dfb16e2c6d29e85820046 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 14 Jul 2011 18:35:13 -0700 Subject: alarmtimers: Rework RTC device selection using class interface This allows cleaner detection of the RTC device being registered, rather then probing any time someone calls alarmtimer_get_rtcdev. CC: Thomas Gleixner Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 78 +++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 38 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index bdb7342b689..154d5563ab1 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -52,27 +52,6 @@ static struct rtc_timer rtctimer; static struct rtc_device *rtcdev; static DEFINE_SPINLOCK(rtcdev_lock); -/** - * has_wakealarm - check rtc device has wakealarm ability - * @dev: current device - * @name_ptr: name to be returned - * - * This helper function checks to see if the rtc device can wake - * from suspend. - */ -static int has_wakealarm(struct device *dev, void *name_ptr) -{ - struct rtc_device *candidate = to_rtc_device(dev); - - if (!candidate->ops->set_alarm) - return 0; - if (!device_may_wakeup(candidate->dev.parent)) - return 0; - - *(const char **)name_ptr = dev_name(dev); - return 1; -} - /** * alarmtimer_get_rtcdev - Return selected rtcdevice * @@ -82,37 +61,58 @@ static int has_wakealarm(struct device *dev, void *name_ptr) */ static struct rtc_device *alarmtimer_get_rtcdev(void) { - struct device *dev; - char *str; unsigned long flags; struct rtc_device *ret; spin_lock_irqsave(&rtcdev_lock, flags); - if (!rtcdev) { - /* Find an rtc device and init the rtc_timer */ - dev = class_find_device(rtc_class, NULL, &str, has_wakealarm); - /* If we have a device then str is valid. See has_wakealarm() */ - if (dev) { - rtcdev = rtc_class_open(str); - /* - * Drop the reference we got in class_find_device, - * rtc_open takes its own. - */ - put_device(dev); - rtc_timer_init(&rtctimer, NULL, NULL); - } - } ret = rtcdev; spin_unlock_irqrestore(&rtcdev_lock, flags); return ret; } + + +static int alarmtimer_rtc_add_device(struct device *dev, + struct class_interface *class_intf) +{ + unsigned long flags; + struct rtc_device *rtc = to_rtc_device(dev); + + if (rtcdev) + return -EBUSY; + + if (!rtc->ops->set_alarm) + return -1; + if (!device_may_wakeup(rtc->dev.parent)) + return -1; + + spin_lock_irqsave(&rtcdev_lock, flags); + if (!rtcdev) { + rtcdev = rtc; + /* hold a reference so it doesn't go away */ + get_device(dev); + } + spin_unlock_irqrestore(&rtcdev_lock, flags); + return 0; +} + +static struct class_interface alarmtimer_rtc_interface = { + .add_dev = &alarmtimer_rtc_add_device, +}; + +static void alarmtimer_rtc_interface_setup(void) +{ + alarmtimer_rtc_interface.class = rtc_class; + class_interface_register(&alarmtimer_rtc_interface); +} #else #define alarmtimer_get_rtcdev() (0) #define rtcdev (0) +#define alarmtimer_rtc_interface_setup() #endif + /** * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue * @base: pointer to the base where the timer is being run @@ -244,7 +244,7 @@ static int alarmtimer_suspend(struct device *dev) freezer_delta = ktime_set(0, 0); spin_unlock_irqrestore(&freezer_delta_lock, flags); - rtc = rtcdev; + rtc = alarmtimer_get_rtcdev(); /* If we have no rtcdev, just return */ if (!rtc) return 0; @@ -792,6 +792,8 @@ static int __init alarmtimer_init(void) HRTIMER_MODE_ABS); alarm_bases[i].timer.function = alarmtimer_fired; } + + alarmtimer_rtc_interface_setup(); error = platform_driver_register(&alarmtimer_driver); platform_device_register_simple("alarmtimer", -1, NULL, 0); -- cgit v1.2.3 From 6beea0cda8ce71c01354e688e5735c47e331e84f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 24 Aug 2011 09:37:48 +0200 Subject: nohz: Fix update_ts_time_stat idle accounting update_ts_time_stat currently updates idle time even if we are in iowait loop at the moment. The only real users of the idle counter (via get_cpu_idle_time_us) are CPU governors and they expect to get cumulative time for both idle and iowait times. The value (idle_sleeptime) is also printed to userspace by print_cpu but it prints both idle and iowait times so the idle part is misleading. Let's clean this up and fix update_ts_time_stat to account both counters properly and update consumers of idle to consider iowait time as well. If we do this we might use get_cpu_{idle,iowait}_time_us from other contexts as well and we will get expected values. Signed-off-by: Michal Hocko Cc: Dave Jones Cc: Arnd Bergmann Cc: Alexey Dobriyan Link: http://lkml.kernel.org/r/e9c909c221a8da402c4da07e4cd968c3218f8eb1.1314172057.git.mhocko@suse.cz Signed-off-by: Thomas Gleixner --- drivers/cpufreq/cpufreq_conservative.c | 4 +++- drivers/cpufreq/cpufreq_ondemand.c | 4 +++- kernel/time/tick-sched.c | 8 ++++---- 3 files changed, 10 insertions(+), 6 deletions(-) (limited to 'kernel/time') diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 33b56e5c5c1..c97b468ee9f 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -120,10 +120,12 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) { - u64 idle_time = get_cpu_idle_time_us(cpu, wall); + u64 idle_time = get_cpu_idle_time_us(cpu, NULL); if (idle_time == -1ULL) return get_cpu_idle_time_jiffy(cpu, wall); + else + idle_time += get_cpu_iowait_time_us(cpu, wall); return idle_time; } diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 891360edecd..07756bddede 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -144,10 +144,12 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) { - u64 idle_time = get_cpu_idle_time_us(cpu, wall); + u64 idle_time = get_cpu_idle_time_us(cpu, NULL); if (idle_time == -1ULL) return get_cpu_idle_time_jiffy(cpu, wall); + else + idle_time += get_cpu_iowait_time_us(cpu, wall); return idle_time; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d5097c44b40..7ab44bca654 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -159,9 +159,10 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda if (ts->idle_active) { delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); if (nr_iowait_cpu(cpu) > 0) ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); + else + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); ts->idle_entrytime = now; } @@ -200,8 +201,7 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) * @last_update_time: variable to store update time in * * Return the cummulative idle time (since boot) for a given - * CPU, in microseconds. The idle time returned includes - * the iowait time (unlike what "top" and co report). + * CPU, in microseconds. * * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. @@ -221,7 +221,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); -/* +/** * get_cpu_iowait_time_us - get the total iowait time of a cpu * @cpu: CPU number to query * @last_update_time: variable to store update time in -- cgit v1.2.3 From 09a1d34f8535ecf9a347ea76f7597730c2bc0c8d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 24 Aug 2011 09:39:30 +0200 Subject: nohz: Make idle/iowait counter update conditional get_cpu_{idle,iowait}_time_us update idle/iowait counters unconditionally if the given CPU is in the idle loop. This doesn't work well outside of CPU governors which are singletons so nobody (except for IRQ) can race with them. We will need to use both functions from /proc/stat handler to properly handle nohz idle/iowait times. Make the update depend on a non NULL last_update_time argument. Signed-off-by: Michal Hocko Cc: Dave Jones Cc: Arnd Bergmann Cc: Alexey Dobriyan Link: http://lkml.kernel.org/r/11f23179472635ce52e78921d47a20216b872f23.1314172057.git.mhocko@suse.cz Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7ab44bca654..664c4a36543 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -198,7 +198,8 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) /** * get_cpu_idle_time_us - get the total idle time of a cpu * @cpu: CPU number to query - * @last_update_time: variable to store update time in + * @last_update_time: variable to store update time in. Do not update + * counters if NULL. * * Return the cummulative idle time (since boot) for a given * CPU, in microseconds. @@ -211,20 +212,35 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, idle; if (!tick_nohz_enabled) return -1; - update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); + now = ktime_get(); + if (last_update_time) { + update_ts_time_stats(cpu, ts, now, last_update_time); + idle = ts->idle_sleeptime; + } else { + if (ts->idle_active && !nr_iowait_cpu(cpu)) { + ktime_t delta = ktime_sub(now, ts->idle_entrytime); + + idle = ktime_add(ts->idle_sleeptime, delta); + } else { + idle = ts->idle_sleeptime; + } + } + + return ktime_to_us(idle); - return ktime_to_us(ts->idle_sleeptime); } EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); /** * get_cpu_iowait_time_us - get the total iowait time of a cpu * @cpu: CPU number to query - * @last_update_time: variable to store update time in + * @last_update_time: variable to store update time in. Do not update + * counters if NULL. * * Return the cummulative iowait time (since boot) for a given * CPU, in microseconds. @@ -237,13 +253,26 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, iowait; if (!tick_nohz_enabled) return -1; - update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); + now = ktime_get(); + if (last_update_time) { + update_ts_time_stats(cpu, ts, now, last_update_time); + iowait = ts->iowait_sleeptime; + } else { + if (ts->idle_active && nr_iowait_cpu(cpu) > 0) { + ktime_t delta = ktime_sub(now, ts->idle_entrytime); + + iowait = ktime_add(ts->iowait_sleeptime, delta); + } else { + iowait = ts->iowait_sleeptime; + } + } - return ktime_to_us(ts->iowait_sleeptime); + return ktime_to_us(iowait); } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); -- cgit v1.2.3 From 29c158e81c733ac7d6a75c5ee929f34fb9f92983 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 23 Aug 2011 13:20:46 +0200 Subject: nohz: Remove "Switched to NOHz mode" debugging messages When performing cpu hotplug tests the kernel printk log buffer gets flooded with pointless "Switched to NOHz mode..." messages. Especially when afterwards analyzing a dump this might have removed more interesting stuff out of the buffer. Assuming that switching to NOHz mode simply works just remove the printk. Signed-off-by: Heiko Carstens Link: http://lkml.kernel.org/r/20110823112046.GB2540@osiris.boeblingen.de.ibm.com Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 664c4a36543..7e2e0817cbf 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -669,8 +669,6 @@ static void tick_nohz_switch_to_nohz(void) next = ktime_add(next, tick_period); } local_irq_enable(); - - printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); } /* @@ -822,10 +820,8 @@ void tick_setup_sched_timer(void) } #ifdef CONFIG_NO_HZ - if (tick_nohz_enabled) { + if (tick_nohz_enabled) ts->nohz_mode = NOHZ_MODE_HIGHRES; - printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); - } #endif } #endif /* HIGH_RES_TIMERS */ -- cgit v1.2.3 From d1748302f70be7469809809283fe164156a34231 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 23 Aug 2011 15:29:42 +0200 Subject: clockevents: Make minimum delay adjustments configurable The automatic increase of the min_delta_ns of a clockevents device should be done in the clockevents code as the minimum delay is an attribute of the clockevents device. In addition not all architectures want the automatic adjustment, on a massively virtualized system it can happen that the programming of a clock event fails several times in a row because the virtual cpu has been rescheduled quickly enough. In that case the minimum delay will erroneously be increased with no way back. The new config symbol GENERIC_CLOCKEVENTS_MIN_ADJUST is used to enable the automatic adjustment. The config option is selected only for x86. Signed-off-by: Martin Schwidefsky Cc: john stultz Link: http://lkml.kernel.org/r/20110823133142.494157493@de.ibm.com Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 1 + include/linux/clockchips.h | 2 +- kernel/time/Kconfig | 2 + kernel/time/clockevents.c | 125 ++++++++++++++++++++++++++++++++++++++----- kernel/time/tick-broadcast.c | 4 +- kernel/time/tick-common.c | 4 +- kernel/time/tick-internal.h | 2 - kernel/time/tick-oneshot.c | 77 ++------------------------ 8 files changed, 123 insertions(+), 94 deletions(-) (limited to 'kernel/time') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6a47bb22657..a1609cd7e4c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -68,6 +68,7 @@ config X86 select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW + select GENERIC_CLOCKEVENTS_MIN_ADJUST select IRQ_FORCED_THREADING select USE_GENERIC_SMP_HELPERS if SMP select HAVE_BPF_JIT if (X86_64 && NET) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index d6733e27af3..39bb050bdbb 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -140,7 +140,7 @@ extern void clockevents_set_mode(struct clock_event_device *dev, enum clock_event_mode mode); extern int clockevents_register_notifier(struct notifier_block *nb); extern int clockevents_program_event(struct clock_event_device *dev, - ktime_t expires, ktime_t now); + ktime_t expires, bool force); extern void clockevents_handle_noop(struct clock_event_device *dev); diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index f06a8a36564..b26c2228fe9 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -27,3 +27,5 @@ config GENERIC_CLOCKEVENTS_BUILD default y depends on GENERIC_CLOCKEVENTS || GENERIC_CLOCKEVENTS_MIGR +config GENERIC_CLOCKEVENTS_MIN_ADJUST + bool diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index e4c699dfa4e..713ef94ecee 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -94,42 +94,139 @@ void clockevents_shutdown(struct clock_event_device *dev) dev->next_event.tv64 = KTIME_MAX; } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST + +/* Limit min_delta to a jiffie */ +#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ) + +/** + * clockevents_increase_min_delta - raise minimum delta of a clock event device + * @dev: device to increase the minimum delta + * + * Returns 0 on success, -ETIME when the minimum delta reached the limit. + */ +static int clockevents_increase_min_delta(struct clock_event_device *dev) +{ + /* Nothing to do if we already reached the limit */ + if (dev->min_delta_ns >= MIN_DELTA_LIMIT) { + printk(KERN_WARNING "CE: Reprogramming failure. Giving up\n"); + dev->next_event.tv64 = KTIME_MAX; + return -ETIME; + } + + if (dev->min_delta_ns < 5000) + dev->min_delta_ns = 5000; + else + dev->min_delta_ns += dev->min_delta_ns >> 1; + + if (dev->min_delta_ns > MIN_DELTA_LIMIT) + dev->min_delta_ns = MIN_DELTA_LIMIT; + + printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n", + dev->name ? dev->name : "?", + (unsigned long long) dev->min_delta_ns); + return 0; +} + +/** + * clockevents_program_min_delta - Set clock event device to the minimum delay. + * @dev: device to program + * + * Returns 0 on success, -ETIME when the retry loop failed. + */ +static int clockevents_program_min_delta(struct clock_event_device *dev) +{ + unsigned long long clc; + int64_t delta; + int i; + + for (i = 0;;) { + delta = dev->min_delta_ns; + dev->next_event = ktime_add_ns(ktime_get(), delta); + + if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) + return 0; + + dev->retries++; + clc = ((unsigned long long) delta * dev->mult) >> dev->shift; + if (dev->set_next_event((unsigned long) clc, dev) == 0) + return 0; + + if (++i > 2) { + /* + * We tried 3 times to program the device with the + * given min_delta_ns. Try to increase the minimum + * delta, if that fails as well get out of here. + */ + if (clockevents_increase_min_delta(dev)) + return -ETIME; + i = 0; + } + } +} + +#else /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */ + +/** + * clockevents_program_min_delta - Set clock event device to the minimum delay. + * @dev: device to program + * + * Returns 0 on success, -ETIME when the retry loop failed. + */ +static int clockevents_program_min_delta(struct clock_event_device *dev) +{ + unsigned long long clc; + int64_t delta; + + delta = dev->min_delta_ns; + dev->next_event = ktime_add_ns(ktime_get(), delta); + + if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) + return 0; + + dev->retries++; + clc = ((unsigned long long) delta * dev->mult) >> dev->shift; + return dev->set_next_event((unsigned long) clc, dev); +} + +#endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */ + /** * clockevents_program_event - Reprogram the clock event device. + * @dev: device to program * @expires: absolute expiry time (monotonic clock) + * @force: program minimum delay if expires can not be set * * Returns 0 on success, -ETIME when the event is in the past. */ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, - ktime_t now) + bool force) { unsigned long long clc; int64_t delta; + int rc; if (unlikely(expires.tv64 < 0)) { WARN_ON_ONCE(1); return -ETIME; } - delta = ktime_to_ns(ktime_sub(expires, now)); - - if (delta <= 0) - return -ETIME; - dev->next_event = expires; if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) return 0; - if (delta > dev->max_delta_ns) - delta = dev->max_delta_ns; - if (delta < dev->min_delta_ns) - delta = dev->min_delta_ns; + delta = ktime_to_ns(ktime_sub(expires, ktime_get())); + if (delta <= 0) + return force ? clockevents_program_min_delta(dev) : -ETIME; - clc = delta * dev->mult; - clc >>= dev->shift; + delta = min(delta, (int64_t) dev->max_delta_ns); + delta = max(delta, (int64_t) dev->min_delta_ns); - return dev->set_next_event((unsigned long) clc, dev); + clc = ((unsigned long long) delta * dev->mult) >> dev->shift; + rc = dev->set_next_event((unsigned long) clc, dev); + + return (rc && force) ? clockevents_program_min_delta(dev) : rc; } /** @@ -258,7 +355,7 @@ int clockevents_update_freq(struct clock_event_device *dev, u32 freq) if (dev->mode != CLOCK_EVT_MODE_ONESHOT) return 0; - return clockevents_program_event(dev, dev->next_event, ktime_get()); + return clockevents_program_event(dev, dev->next_event, false); } /* diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index c7218d13273..f954282d9a8 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -194,7 +194,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) for (next = dev->next_event; ;) { next = ktime_add(next, tick_period); - if (!clockevents_program_event(dev, next, ktime_get())) + if (!clockevents_program_event(dev, next, false)) return; tick_do_periodic_broadcast(); } @@ -373,7 +373,7 @@ static int tick_broadcast_set_event(ktime_t expires, int force) { struct clock_event_device *bc = tick_broadcast_device.evtdev; - return tick_dev_program_event(bc, expires, force); + return clockevents_program_event(bc, expires, force); } int tick_resume_broadcast_oneshot(struct clock_event_device *bc) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 119528de823..da6c9ecad4e 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -94,7 +94,7 @@ void tick_handle_periodic(struct clock_event_device *dev) */ next = ktime_add(dev->next_event, tick_period); for (;;) { - if (!clockevents_program_event(dev, next, ktime_get())) + if (!clockevents_program_event(dev, next, false)) return; /* * Have to be careful here. If we're in oneshot mode, @@ -137,7 +137,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); for (;;) { - if (!clockevents_program_event(dev, next, ktime_get())) + if (!clockevents_program_event(dev, next, false)) return; next = ktime_add(next, tick_period); } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 1009b06d6f8..4e265b901fe 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -26,8 +26,6 @@ extern void clockevents_shutdown(struct clock_event_device *dev); extern void tick_setup_oneshot(struct clock_event_device *newdev, void (*handler)(struct clock_event_device *), ktime_t nextevt); -extern int tick_dev_program_event(struct clock_event_device *dev, - ktime_t expires, int force); extern int tick_program_event(ktime_t expires, int force); extern void tick_oneshot_notify(void); extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 2d04411a5f0..824109060a3 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -21,74 +21,6 @@ #include "tick-internal.h" -/* Limit min_delta to a jiffie */ -#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ) - -static int tick_increase_min_delta(struct clock_event_device *dev) -{ - /* Nothing to do if we already reached the limit */ - if (dev->min_delta_ns >= MIN_DELTA_LIMIT) - return -ETIME; - - if (dev->min_delta_ns < 5000) - dev->min_delta_ns = 5000; - else - dev->min_delta_ns += dev->min_delta_ns >> 1; - - if (dev->min_delta_ns > MIN_DELTA_LIMIT) - dev->min_delta_ns = MIN_DELTA_LIMIT; - - printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n", - dev->name ? dev->name : "?", - (unsigned long long) dev->min_delta_ns); - return 0; -} - -/** - * tick_program_event internal worker function - */ -int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires, - int force) -{ - ktime_t now = ktime_get(); - int i; - - for (i = 0;;) { - int ret = clockevents_program_event(dev, expires, now); - - if (!ret || !force) - return ret; - - dev->retries++; - /* - * We tried 3 times to program the device with the given - * min_delta_ns. If that's not working then we increase it - * and emit a warning. - */ - if (++i > 2) { - /* Increase the min. delta and try again */ - if (tick_increase_min_delta(dev)) { - /* - * Get out of the loop if min_delta_ns - * hit the limit already. That's - * better than staying here forever. - * - * We clear next_event so we have a - * chance that the box survives. - */ - printk(KERN_WARNING - "CE: Reprogramming failure. Giving up\n"); - dev->next_event.tv64 = KTIME_MAX; - return -ETIME; - } - i = 0; - } - - now = ktime_get(); - expires = ktime_add_ns(now, dev->min_delta_ns); - } -} - /** * tick_program_event */ @@ -96,7 +28,7 @@ int tick_program_event(ktime_t expires, int force) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); - return tick_dev_program_event(dev, expires, force); + return clockevents_program_event(dev, expires, force); } /** @@ -104,11 +36,10 @@ int tick_program_event(ktime_t expires, int force) */ void tick_resume_oneshot(void) { - struct tick_device *td = &__get_cpu_var(tick_cpu_device); - struct clock_event_device *dev = td->evtdev; + struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); - tick_program_event(ktime_get(), 1); + clockevents_program_event(dev, ktime_get(), true); } /** @@ -120,7 +51,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev, { newdev->event_handler = handler; clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT); - tick_dev_program_event(newdev, next_event, 1); + clockevents_program_event(newdev, next_event, true); } /** -- cgit v1.2.3 From 65516f8a7c2028381f0dae4c16ddb621c96158cc Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 23 Aug 2011 15:29:43 +0200 Subject: clockevents: Add direct ktime programming function There is at least one architecture (s390) with a sane clockevent device that can be programmed with the equivalent of a ktime. No need to create a delta against the current time, the ktime can be used directly. A new clock device function 'set_next_ktime' is introduced that is called with the unmodified ktime for the timer if the clock event device has the CLOCK_EVT_FEAT_KTIME bit set. Signed-off-by: Martin Schwidefsky Cc: john stultz Link: http://lkml.kernel.org/r/20110823133142.815350967@de.ibm.com Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 10 +++++++--- kernel/time/clockevents.c | 4 ++++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 39bb050bdbb..81e803e90aa 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -45,20 +45,22 @@ enum clock_event_nofitiers { */ #define CLOCK_EVT_FEAT_PERIODIC 0x000001 #define CLOCK_EVT_FEAT_ONESHOT 0x000002 +#define CLOCK_EVT_FEAT_KTIME 0x000004 /* * x86(64) specific misfeatures: * * - Clockevent source stops in C3 State and needs broadcast support. * - Local APIC timer is used as a dummy device. */ -#define CLOCK_EVT_FEAT_C3STOP 0x000004 -#define CLOCK_EVT_FEAT_DUMMY 0x000008 +#define CLOCK_EVT_FEAT_C3STOP 0x000008 +#define CLOCK_EVT_FEAT_DUMMY 0x000010 /** * struct clock_event_device - clock event device descriptor * @event_handler: Assigned by the framework to be called by the low * level handler of the event source - * @set_next_event: set next event function + * @set_next_event: set next event function using a clocksource delta + * @set_next_ktime: set next event function using a direct ktime value * @next_event: local storage for the next event in oneshot mode * @max_delta_ns: maximum delta value in ns * @min_delta_ns: minimum delta value in ns @@ -81,6 +83,8 @@ struct clock_event_device { void (*event_handler)(struct clock_event_device *); int (*set_next_event)(unsigned long evt, struct clock_event_device *); + int (*set_next_ktime)(ktime_t expires, + struct clock_event_device *); ktime_t next_event; u64 max_delta_ns; u64 min_delta_ns; diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 713ef94ecee..1ecd6ba36d6 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -216,6 +216,10 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) return 0; + /* Shortcut for clockevent devices that can deal with ktime. */ + if (dev->features & CLOCK_EVT_FEAT_KTIME) + return dev->set_next_ktime(expires, dev); + delta = ktime_to_ns(ktime_sub(expires, ktime_get())); if (delta <= 0) return force ? clockevents_program_min_delta(dev) : -ETIME; -- cgit v1.2.3 From 9fb60336253edf73dedc527b2aa2bf32eae0d6da Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 Sep 2011 13:32:23 +0200 Subject: clocksource: Make watchdog reset lockless KGDB needs to trylock watchdog_lock when trying to reset the clocksource watchdog after the system has been stopped to avoid a potential deadlock. When the trylock fails TSC usually becomes unstable. We can be more clever by using an atomic counter and checking it in the clocksource_watchdog callback. We restart the watchdog whenever the counter is > 0 and only decrement the counter when we ran through a full update cycle. Signed-off-by: Thomas Gleixner Cc: John Stultz Acked-by: Jason Wessel Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1109121326280.2723@ionos Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e0980f0d9a0..cf52fda2e09 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -186,6 +186,7 @@ static struct timer_list watchdog_timer; static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); static int watchdog_running; +static atomic_t watchdog_reset_pending; static int clocksource_watchdog_kthread(void *data); static void __clocksource_change_rating(struct clocksource *cs, int rating); @@ -247,12 +248,14 @@ static void clocksource_watchdog(unsigned long data) struct clocksource *cs; cycle_t csnow, wdnow; int64_t wd_nsec, cs_nsec; - int next_cpu; + int next_cpu, reset_pending; spin_lock(&watchdog_lock); if (!watchdog_running) goto out; + reset_pending = atomic_read(&watchdog_reset_pending); + list_for_each_entry(cs, &watchdog_list, wd_list) { /* Clocksource already marked unstable? */ @@ -268,7 +271,8 @@ static void clocksource_watchdog(unsigned long data) local_irq_enable(); /* Clocksource initialized ? */ - if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { + if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || + atomic_read(&watchdog_reset_pending)) { cs->flags |= CLOCK_SOURCE_WATCHDOG; cs->wd_last = wdnow; cs->cs_last = csnow; @@ -283,8 +287,11 @@ static void clocksource_watchdog(unsigned long data) cs->cs_last = csnow; cs->wd_last = wdnow; + if (atomic_read(&watchdog_reset_pending)) + continue; + /* Check the deviation from the watchdog clocksource. */ - if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { + if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { clocksource_unstable(cs, cs_nsec - wd_nsec); continue; } @@ -302,6 +309,13 @@ static void clocksource_watchdog(unsigned long data) } } + /* + * We only clear the watchdog_reset_pending, when we did a + * full cycle through all clocksources. + */ + if (reset_pending) + atomic_dec(&watchdog_reset_pending); + /* * Cycle through CPUs to check if the CPUs stay synchronized * to each other. @@ -344,23 +358,7 @@ static inline void clocksource_reset_watchdog(void) static void clocksource_resume_watchdog(void) { - unsigned long flags; - - /* - * We use trylock here to avoid a potential dead lock when - * kgdb calls this code after the kernel has been stopped with - * watchdog_lock held. When watchdog_lock is held we just - * return and accept, that the watchdog might trigger and mark - * the monitored clock source (usually TSC) unstable. - * - * This does not affect the other caller clocksource_resume() - * because at this point the kernel is UP, interrupts are - * disabled and nothing can hold watchdog_lock. - */ - if (!spin_trylock_irqsave(&watchdog_lock, flags)) - return; - clocksource_reset_watchdog(); - spin_unlock_irqrestore(&watchdog_lock, flags); + atomic_inc(&watchdog_reset_pending); } static void clocksource_enqueue_watchdog(struct clocksource *cs) -- cgit v1.2.3 From 4523f6ada86853750565c68e17126af2e3df9b8a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2011 10:54:29 +0200 Subject: alarmtimers: Fix error handling commit 8bc0daf (alarmtimers: Rework RTC device selection using class interface) did not implement required error checks. Add them. Signed-off-by: Thomas Gleixner --- kernel/time/alarmtimer.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 154d5563ab1..c436e790b21 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -100,19 +100,25 @@ static struct class_interface alarmtimer_rtc_interface = { .add_dev = &alarmtimer_rtc_add_device, }; -static void alarmtimer_rtc_interface_setup(void) +static int alarmtimer_rtc_interface_setup(void) { alarmtimer_rtc_interface.class = rtc_class; - class_interface_register(&alarmtimer_rtc_interface); + return class_interface_register(&alarmtimer_rtc_interface); +} +static void alarmtimer_rtc_interface_remove(void) +{ + class_interface_unregister(&alarmtimer_rtc_interface); } #else -#define alarmtimer_get_rtcdev() (0) -#define rtcdev (0) -#define alarmtimer_rtc_interface_setup() +static inline struct rtc_device *alarmtimer_get_rtcdev(void) +{ + return NULL; +} +#define rtcdev (NULL) +static inline int alarmtimer_rtc_interface_setup(void) { return 0; } +static inline void alarmtimer_rtc_interface_remove(void) { } #endif - - /** * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue * @base: pointer to the base where the timer is being run @@ -764,6 +770,7 @@ static struct platform_driver alarmtimer_driver = { */ static int __init alarmtimer_init(void) { + struct platform_device *pdev; int error = 0; int i; struct k_clock alarm_clock = { @@ -793,11 +800,25 @@ static int __init alarmtimer_init(void) alarm_bases[i].timer.function = alarmtimer_fired; } - alarmtimer_rtc_interface_setup(); + error = alarmtimer_rtc_interface_setup(); + if (error) + return error; + error = platform_driver_register(&alarmtimer_driver); - platform_device_register_simple("alarmtimer", -1, NULL, 0); + if (error) + goto out_if; + pdev = platform_device_register_simple("alarmtimer", -1, NULL, 0); + if (IS_ERR(pdev)) { + error = PTR_ERR(pdev); + goto out_drv; + } + return 0; + +out_drv: + platform_driver_unregister(&alarmtimer_driver); +out_if: + alarmtimer_rtc_interface_remove(); return error; } device_initcall(alarmtimer_init); - -- cgit v1.2.3