diff options
Diffstat (limited to 'kernel/sched/cputime.c')
| -rw-r--r-- | kernel/sched/cputime.c | 211 |
1 files changed, 87 insertions, 124 deletions
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a846cf89eb96..7700a9cba335 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -23,10 +23,8 @@ * task when irq is in progress while we read rq->clock. That is a worthy * compromise in place of having locks on each irq in account_system_time. */ -DEFINE_PER_CPU(u64, cpu_hardirq_time); -DEFINE_PER_CPU(u64, cpu_softirq_time); +DEFINE_PER_CPU(struct irqtime, cpu_irqtime); -static DEFINE_PER_CPU(u64, irq_start_time); static int sched_clock_irqtime; void enable_sched_clock_irqtime(void) @@ -39,16 +37,13 @@ void disable_sched_clock_irqtime(void) sched_clock_irqtime = 0; } -#ifndef CONFIG_64BIT -DEFINE_PER_CPU(seqcount_t, irq_time_seq); -#endif /* CONFIG_64BIT */ - /* * Called before incrementing preempt_count on {soft,}irq_enter * and before decrementing preempt_count on {soft,}irq_exit. */ void irqtime_account_irq(struct task_struct *curr) { + struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); s64 delta; int cpu; @@ -56,10 +51,10 @@ void irqtime_account_irq(struct task_struct *curr) return; cpu = smp_processor_id(); - delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); - __this_cpu_add(irq_start_time, delta); + delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; + irqtime->irq_start_time += delta; - irq_time_write_begin(); + u64_stats_update_begin(&irqtime->sync); /* * We do not account for softirq time from ksoftirqd here. * We want to continue accounting softirq time to ksoftirqd thread @@ -67,42 +62,36 @@ void irqtime_account_irq(struct task_struct *curr) * that do not consume any time, but still wants to run. */ if (hardirq_count()) - __this_cpu_add(cpu_hardirq_time, delta); + irqtime->hardirq_time += delta; else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) - __this_cpu_add(cpu_softirq_time, delta); + irqtime->softirq_time += delta; - irq_time_write_end(); + u64_stats_update_end(&irqtime->sync); } EXPORT_SYMBOL_GPL(irqtime_account_irq); -static cputime_t irqtime_account_hi_update(cputime_t maxtime) +static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime) { u64 *cpustat = kcpustat_this_cpu->cpustat; - unsigned long flags; cputime_t irq_cputime; - local_irq_save(flags); - irq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time)) - - cpustat[CPUTIME_IRQ]; + irq_cputime = nsecs_to_cputime64(irqtime) - cpustat[idx]; irq_cputime = min(irq_cputime, maxtime); - cpustat[CPUTIME_IRQ] += irq_cputime; - local_irq_restore(flags); + cpustat[idx] += irq_cputime; + return irq_cputime; } -static cputime_t irqtime_account_si_update(cputime_t maxtime) +static cputime_t irqtime_account_hi_update(cputime_t maxtime) { - u64 *cpustat = kcpustat_this_cpu->cpustat; - unsigned long flags; - cputime_t softirq_cputime; + return irqtime_account_update(__this_cpu_read(cpu_irqtime.hardirq_time), + CPUTIME_IRQ, maxtime); +} - local_irq_save(flags); - softirq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time)) - - cpustat[CPUTIME_SOFTIRQ]; - softirq_cputime = min(softirq_cputime, maxtime); - cpustat[CPUTIME_SOFTIRQ] += softirq_cputime; - local_irq_restore(flags); - return softirq_cputime; +static cputime_t irqtime_account_si_update(cputime_t maxtime) +{ + return irqtime_account_update(__this_cpu_read(cpu_irqtime.softirq_time), + CPUTIME_SOFTIRQ, maxtime); } #else /* CONFIG_IRQ_TIME_ACCOUNTING */ @@ -139,16 +128,13 @@ static inline void task_group_account_field(struct task_struct *p, int index, * Account user cpu time to a process. * @p: the process that the cpu time gets accounted to * @cputime: the cpu time spent in user space since the last update - * @cputime_scaled: cputime scaled by cpu frequency */ -void account_user_time(struct task_struct *p, cputime_t cputime, - cputime_t cputime_scaled) +void account_user_time(struct task_struct *p, cputime_t cputime) { int index; /* Add user time to process. */ p->utime += cputime; - p->utimescaled += cputime_scaled; account_group_user_time(p, cputime); index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; @@ -164,16 +150,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime, * Account guest cpu time to a process. * @p: the process that the cpu time gets accounted to * @cputime: the cpu time spent in virtual machine since the last update - * @cputime_scaled: cputime scaled by cpu frequency */ -static void account_guest_time(struct task_struct *p, cputime_t cputime, - cputime_t cputime_scaled) +static void account_guest_time(struct task_struct *p, cputime_t cputime) { u64 *cpustat = kcpustat_this_cpu->cpustat; /* Add guest time to process. */ p->utime += cputime; - p->utimescaled += cputime_scaled; account_group_user_time(p, cputime); p->gtime += cputime; @@ -191,16 +174,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, * Account system cpu time to a process and desired cpustat field * @p: the process that the cpu time gets accounted to * @cputime: the cpu time spent in kernel space since the last update - * @cputime_scaled: cputime scaled by cpu frequency - * @target_cputime64: pointer to cpustat field that has to be updated + * @index: pointer to cpustat field that has to be updated */ static inline -void __account_system_time(struct task_struct *p, cputime_t cputime, - cputime_t cputime_scaled, int index) +void __account_system_time(struct task_struct *p, cputime_t cputime, int index) { /* Add system time to process. */ p->stime += cputime; - p->stimescaled += cputime_scaled; account_group_system_time(p, cputime); /* Add system time to cpustat. */ @@ -215,15 +195,14 @@ void __account_system_time(struct task_struct *p, cputime_t cputime, * @p: the process that the cpu time gets accounted to * @hardirq_offset: the offset to subtract from hardirq_count() * @cputime: the cpu time spent in kernel space since the last update - * @cputime_scaled: cputime scaled by cpu frequency */ void account_system_time(struct task_struct *p, int hardirq_offset, - cputime_t cputime, cputime_t cputime_scaled) + cputime_t cputime) { int index; if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { - account_guest_time(p, cputime, cputime_scaled); + account_guest_time(p, cputime); return; } @@ -234,7 +213,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, else index = CPUTIME_SYSTEM; - __account_system_time(p, cputime, cputime_scaled, index); + __account_system_time(p, cputime, index); } /* @@ -295,6 +274,9 @@ static inline cputime_t account_other_time(cputime_t max) { cputime_t accounted; + /* Shall be converted to a lockdep-enabled lightweight check */ + WARN_ON_ONCE(!irqs_disabled()); + accounted = steal_account_process_time(max); if (accounted < max) @@ -306,6 +288,26 @@ static inline cputime_t account_other_time(cputime_t max) return accounted; } +#ifdef CONFIG_64BIT +static inline u64 read_sum_exec_runtime(struct task_struct *t) +{ + return t->se.sum_exec_runtime; +} +#else +static u64 read_sum_exec_runtime(struct task_struct *t) +{ + u64 ns; + struct rq_flags rf; + struct rq *rq; + + rq = task_rq_lock(t, &rf); + ns = t->se.sum_exec_runtime; + task_rq_unlock(rq, t, &rf); + + return ns; +} +#endif + /* * Accumulate raw cputime values of dead tasks (sig->[us]time) and live * tasks (sum on group iteration) belonging to @tsk's group. @@ -318,6 +320,17 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) unsigned int seq, nextseq; unsigned long flags; + /* + * Update current task runtime to account pending time since last + * scheduler action or thread_group_cputime() call. This thread group + * might have other running tasks on different CPUs, but updating + * their runtime can affect syscall performance, so we skip account + * those pending times and rely only on values updated on tick or + * other scheduler action. + */ + if (same_thread_group(current, tsk)) + (void) task_sched_runtime(current); + rcu_read_lock(); /* Attempt a lockless read on the first round. */ nextseq = 0; @@ -332,7 +345,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) task_cputime(t, &utime, &stime); times->utime += utime; times->stime += stime; - times->sum_exec_runtime += task_sched_runtime(t); + times->sum_exec_runtime += read_sum_exec_runtime(t); } /* If lockless access failed, take the lock. */ nextseq = 1; @@ -367,7 +380,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, struct rq *rq, int ticks) { u64 cputime = (__force u64) cputime_one_jiffy * ticks; - cputime_t scaled, other; + cputime_t other; /* * When returning from idle, many ticks can get accounted at @@ -380,7 +393,6 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, if (other >= cputime) return; cputime -= other; - scaled = cputime_to_scaled(cputime); if (this_cpu_ksoftirqd() == p) { /* @@ -388,15 +400,15 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, * So, we have to handle it separately here. * Also, p->stime needs to be updated for ksoftirqd. */ - __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ); + __account_system_time(p, cputime, CPUTIME_SOFTIRQ); } else if (user_tick) { - account_user_time(p, cputime, scaled); + account_user_time(p, cputime); } else if (p == rq->idle) { account_idle_time(cputime); } else if (p->flags & PF_VCPU) { /* System time or guest time */ - account_guest_time(p, cputime, scaled); + account_guest_time(p, cputime); } else { - __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM); + __account_system_time(p, cputime, CPUTIME_SYSTEM); } } @@ -479,7 +491,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime */ void account_process_tick(struct task_struct *p, int user_tick) { - cputime_t cputime, scaled, steal; + cputime_t cputime, steal; struct rq *rq = this_rq(); if (vtime_accounting_cpu_enabled()) @@ -497,12 +509,11 @@ void account_process_tick(struct task_struct *p, int user_tick) return; cputime -= steal; - scaled = cputime_to_scaled(cputime); if (user_tick) - account_user_time(p, cputime, scaled); + account_user_time(p, cputime); else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) - account_system_time(p, HARDIRQ_OFFSET, cputime, scaled); + account_system_time(p, HARDIRQ_OFFSET, cputime); else account_idle_time(cputime); } @@ -723,7 +734,7 @@ static void __vtime_account_system(struct task_struct *tsk) { cputime_t delta_cpu = get_vtime_delta(tsk); - account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); + account_system_time(tsk, irq_count(), delta_cpu); } void vtime_account_system(struct task_struct *tsk) @@ -744,7 +755,7 @@ void vtime_account_user(struct task_struct *tsk) tsk->vtime_snap_whence = VTIME_SYS; if (vtime_delta(tsk)) { delta_cpu = get_vtime_delta(tsk); - account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); + account_user_time(tsk, delta_cpu); } write_seqcount_end(&tsk->vtime_seqcount); } @@ -840,29 +851,25 @@ cputime_t task_gtime(struct task_struct *t) * add up the pending nohz execution time since the last * cputime snapshot. */ -static void -fetch_task_cputime(struct task_struct *t, - cputime_t *u_dst, cputime_t *s_dst, - cputime_t *u_src, cputime_t *s_src, - cputime_t *udelta, cputime_t *sdelta) +void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) { + cputime_t delta; unsigned int seq; - unsigned long long delta; - do { - *udelta = 0; - *sdelta = 0; + if (!vtime_accounting_enabled()) { + *utime = t->utime; + *stime = t->stime; + return; + } + do { seq = read_seqcount_begin(&t->vtime_seqcount); - if (u_dst) - *u_dst = *u_src; - if (s_dst) - *s_dst = *s_src; + *utime = t->utime; + *stime = t->stime; /* Task is sleeping, nothing to add */ - if (t->vtime_snap_whence == VTIME_INACTIVE || - is_idle_task(t)) + if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t)) continue; delta = vtime_delta(t); @@ -871,54 +878,10 @@ fetch_task_cputime(struct task_struct *t, * Task runs either in user or kernel space, add pending nohz time to * the right place. */ - if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) { - *udelta = delta; - } else { - if (t->vtime_snap_whence == VTIME_SYS) - *sdelta = delta; - } + if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) + *utime += delta; + else if (t->vtime_snap_whence == VTIME_SYS) + *stime += delta; } while (read_seqcount_retry(&t->vtime_seqcount, seq)); } - - -void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) -{ - cputime_t udelta, sdelta; - - if (!vtime_accounting_enabled()) { - if (utime) - *utime = t->utime; - if (stime) - *stime = t->stime; - return; - } - - fetch_task_cputime(t, utime, stime, &t->utime, - &t->stime, &udelta, &sdelta); - if (utime) - *utime += udelta; - if (stime) - *stime += sdelta; -} - -void task_cputime_scaled(struct task_struct *t, - cputime_t *utimescaled, cputime_t *stimescaled) -{ - cputime_t udelta, sdelta; - - if (!vtime_accounting_enabled()) { - if (utimescaled) - *utimescaled = t->utimescaled; - if (stimescaled) - *stimescaled = t->stimescaled; - return; - } - - fetch_task_cputime(t, utimescaled, stimescaled, - &t->utimescaled, &t->stimescaled, &udelta, &sdelta); - if (utimescaled) - *utimescaled += cputime_to_scaled(udelta); - if (stimescaled) - *stimescaled += cputime_to_scaled(sdelta); -} #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ |
