From 87be28aaf1458445d5f648688c2eec0f13b8f3b9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:43:58 +0200 Subject: x86/asm/tsc: Replace rdtscll() with native_read_tsc() Now that the ->read_tsc() paravirt hook is gone, rdtscll() is just a wrapper around native_read_tsc(). Unwrap it. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/d2449ae62c1b1fb90195bcfb19ef4a35883a04dc.1434501121.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/lib/delay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 39d6a3db0b96..9a52ad0c0758 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -100,7 +100,7 @@ void use_tsc_delay(void) int read_current_timer(unsigned long *timer_val) { if (delay_fn == delay_tsc) { - rdtscll(*timer_val); + *timer_val = native_read_tsc(); return 0; } return -1; -- cgit v1.2.3 From 9cfa1a0279e22063a727fd204a75cf3672860d83 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:44:00 +0200 Subject: x86/asm/tsc: Use the full 64-bit TSC in delay_tsc() As a very minor optimization, delay_tsc() was only using the low 32 bits of the TSC. It's a delay function, so just use the whole thing. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/bd1a277c71321b67c4794970cb5ace05efe21ab6.1434501121.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/lib/delay.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 9a52ad0c0758..35115f3786a9 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -49,16 +49,16 @@ static void delay_loop(unsigned long loops) /* TSC based delay: */ static void delay_tsc(unsigned long __loops) { - u32 bclock, now, loops = __loops; + u64 bclock, now, loops = __loops; int cpu; preempt_disable(); cpu = smp_processor_id(); rdtsc_barrier(); - rdtscl(bclock); + bclock = native_read_tsc(); for (;;) { rdtsc_barrier(); - rdtscl(now); + now = native_read_tsc(); if ((now - bclock) >= loops) break; @@ -80,7 +80,7 @@ static void delay_tsc(unsigned long __loops) loops -= (now - bclock); cpu = smp_processor_id(); rdtsc_barrier(); - rdtscl(bclock); + bclock = native_read_tsc(); } } preempt_enable(); -- cgit v1.2.3 From 4ea1636b04dbd66536fa387bae2eea463efc705b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:44:07 +0200 Subject: x86/asm/tsc: Rename native_read_tsc() to rdtsc() Now that there is no paravirt TSC, the "native" is inappropriate. The function does RDTSC, so give it the obvious name: rdtsc(). Suggested-by: Borislav Petkov Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/fd43e16281991f096c1e4d21574d9e1402c62d39.1434501121.git.luto@kernel.org [ Ported it to v4.2-rc1. ] Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/aslr.c | 2 +- arch/x86/entry/vdso/vclock_gettime.c | 2 +- arch/x86/include/asm/msr.h | 11 ++++++++++- arch/x86/include/asm/pvclock.h | 2 +- arch/x86/include/asm/stackprotector.h | 2 +- arch/x86/include/asm/tsc.h | 2 +- arch/x86/kernel/apb_timer.c | 8 ++++---- arch/x86/kernel/apic/apic.c | 8 ++++---- arch/x86/kernel/cpu/amd.c | 4 ++-- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++-- arch/x86/kernel/espfix_64.c | 2 +- arch/x86/kernel/hpet.c | 4 ++-- arch/x86/kernel/trace_clock.c | 2 +- arch/x86/kernel/tsc.c | 4 ++-- arch/x86/kvm/lapic.c | 4 ++-- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 12 ++++++------ arch/x86/lib/delay.c | 8 ++++---- drivers/cpufreq/intel_pstate.c | 2 +- drivers/input/gameport/gameport.c | 4 ++-- drivers/input/joystick/analog.c | 4 ++-- drivers/net/hamradio/baycom_epp.c | 2 +- drivers/thermal/intel_powerclamp.c | 4 ++-- tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c | 4 ++-- 25 files changed, 59 insertions(+), 50 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index ea33236190b1..6a9b96b4624d 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -82,7 +82,7 @@ static unsigned long get_random_long(void) if (has_cpuflag(X86_FEATURE_TSC)) { debug_putstr(" RDTSC"); - raw = native_read_tsc(); + raw = rdtsc(); random ^= raw; use_i8254 = false; diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 972b488ac16a..0340d93c18ca 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -186,7 +186,7 @@ notrace static cycle_t vread_tsc(void) * but no one has ever seen it happen. */ rdtsc_barrier(); - ret = (cycle_t)native_read_tsc(); + ret = (cycle_t)rdtsc(); last = gtod->cycle_last; diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index c89ed6ceed02..ff0c120dafe5 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -109,7 +109,16 @@ notrace static inline int native_write_msr_safe(unsigned int msr, extern int rdmsr_safe_regs(u32 regs[8]); extern int wrmsr_safe_regs(u32 regs[8]); -static __always_inline unsigned long long native_read_tsc(void) +/** + * rdtsc() - returns the current TSC without ordering constraints + * + * rdtsc() returns the result of RDTSC as a 64-bit integer. The + * only ordering constraint it supplies is the ordering implied by + * "asm volatile": it will put the RDTSC in the place you expect. The + * CPU can and will speculatively execute that RDTSC, though, so the + * results can be non-monotonic if compared on different CPUs. + */ +static __always_inline unsigned long long rdtsc(void) { DECLARE_ARGS(val, low, high); diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 2bd69d62c623..5c490db62e32 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -62,7 +62,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) static __always_inline u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src) { - u64 delta = native_read_tsc() - src->tsc_timestamp; + u64 delta = rdtsc() - src->tsc_timestamp; return pvclock_scale_delta(delta, src->tsc_to_system_mul, src->tsc_shift); } diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index bc5fa2af112e..58505f01962f 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -72,7 +72,7 @@ static __always_inline void boot_init_stack_canary(void) * on during the bootup the random pool has true entropy too. */ get_random_bytes(&canary, sizeof(canary)); - tsc = native_read_tsc(); + tsc = rdtsc(); canary += tsc + (tsc << 32UL); current->stack_canary = canary; diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index b4883902948b..3df7675debcf 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -26,7 +26,7 @@ static inline cycles_t get_cycles(void) return 0; #endif - return native_read_tsc(); + return rdtsc(); } extern void tsc_init(void); diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 25efa534c4e4..222a57076039 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -263,7 +263,7 @@ static int apbt_clocksource_register(void) /* Verify whether apbt counter works */ t1 = dw_apb_clocksource_read(clocksource_apbt); - start = native_read_tsc(); + start = rdtsc(); /* * We don't know the TSC frequency yet, but waiting for @@ -273,7 +273,7 @@ static int apbt_clocksource_register(void) */ do { rep_nop(); - now = native_read_tsc(); + now = rdtsc(); } while ((now - start) < 200000UL); /* APBT is the only always on clocksource, it has to work! */ @@ -390,13 +390,13 @@ unsigned long apbt_quick_calibrate(void) old = dw_apb_clocksource_read(clocksource_apbt); old += loop; - t1 = native_read_tsc(); + t1 = rdtsc(); do { new = dw_apb_clocksource_read(clocksource_apbt); } while (new < old); - t2 = native_read_tsc(); + t2 = rdtsc(); shift = 5; if (unlikely(loop >> shift == 0)) { diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 51af1ed1ae2e..0d71cd9b4a50 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -457,7 +457,7 @@ static int lapic_next_deadline(unsigned long delta, { u64 tsc; - tsc = native_read_tsc(); + tsc = rdtsc(); wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); return 0; } @@ -592,7 +592,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) unsigned long pm = acpi_pm_read_early(); if (cpu_has_tsc) - tsc = native_read_tsc(); + tsc = rdtsc(); switch (lapic_cal_loops++) { case 0: @@ -1209,7 +1209,7 @@ void setup_local_APIC(void) long long max_loops = cpu_khz ? cpu_khz : 1000000; if (cpu_has_tsc) - tsc = native_read_tsc(); + tsc = rdtsc(); if (disable_apic) { disable_ioapic_support(); @@ -1293,7 +1293,7 @@ void setup_local_APIC(void) } if (queued) { if (cpu_has_tsc && cpu_khz) { - ntsc = native_read_tsc(); + ntsc = rdtsc(); max_loops = (cpu_khz << 10) - (ntsc - tsc); } else max_loops--; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a69710db6112..51ad2af84a72 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -125,10 +125,10 @@ static void init_amd_k6(struct cpuinfo_x86 *c) n = K6_BUG_LOOP; f_vide = vide; - d = native_read_tsc(); + d = rdtsc(); while (n--) f_vide(); - d2 = native_read_tsc(); + d2 = rdtsc(); d = d2-d; if (d > 20*K6_BUG_LOOP) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a5283d2d0094..96cceccd11b4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -125,7 +125,7 @@ void mce_setup(struct mce *m) { memset(m, 0, sizeof(struct mce)); m->cpu = m->extcpu = smp_processor_id(); - m->tsc = native_read_tsc(); + m->tsc = rdtsc(); /* We hope get_seconds stays lockless */ m->time = get_seconds(); m->cpuvendor = boot_cpu_data.x86_vendor; @@ -1784,7 +1784,7 @@ static void collect_tscs(void *data) { unsigned long *cpu_tsc = (unsigned long *)data; - cpu_tsc[smp_processor_id()] = native_read_tsc(); + cpu_tsc[smp_processor_id()] = rdtsc(); } static int mce_apei_read_done; diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 334a2a9c034d..67315cd0132c 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -110,7 +110,7 @@ static void init_espfix_random(void) */ if (!arch_get_random_long(&rand)) { /* The constant is an arbitrary large prime */ - rand = native_read_tsc(); + rand = rdtsc(); rand *= 0xc345c6b72fd16123UL; } diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index cc390fe69b71..f75c5908c7a6 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -735,7 +735,7 @@ static int hpet_clocksource_register(void) /* Verify whether hpet counter works */ t1 = hpet_readl(HPET_COUNTER); - start = native_read_tsc(); + start = rdtsc(); /* * We don't know the TSC frequency yet, but waiting for @@ -745,7 +745,7 @@ static int hpet_clocksource_register(void) */ do { rep_nop(); - now = native_read_tsc(); + now = rdtsc(); } while ((now - start) < 200000UL); if (t1 == hpet_readl(HPET_COUNTER)) { diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c index bd8f4d41bd56..67efb8c96fc4 100644 --- a/arch/x86/kernel/trace_clock.c +++ b/arch/x86/kernel/trace_clock.c @@ -15,7 +15,7 @@ u64 notrace trace_clock_x86_tsc(void) u64 ret; rdtsc_barrier(); - ret = native_read_tsc(); + ret = rdtsc(); return ret; } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index e66f5dcaeb63..21d6e04e3e82 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) data = cyc2ns_write_begin(cpu); - tsc_now = native_read_tsc(); + tsc_now = rdtsc(); ns_now = cycles_2_ns(tsc_now); /* @@ -290,7 +290,7 @@ u64 native_sched_clock(void) } /* read the Time Stamp Counter: */ - tsc_now = native_read_tsc(); + tsc_now = rdtsc(); /* return the value in ns */ return cycles_2_ns(tsc_now); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 954e98a8c2e3..2f0ade48614f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1172,7 +1172,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) tsc_deadline = apic->lapic_timer.expired_tscdeadline; apic->lapic_timer.expired_tscdeadline = 0; - guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); + guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ @@ -1240,7 +1240,7 @@ static void start_apic_timer(struct kvm_lapic *apic) local_irq_save(flags); now = apic->lapic_timer.timer.base->get_time(); - guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); + guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); if (likely(tscdeadline > guest_tsc)) { ns = (tscdeadline - guest_tsc) * 1000000ULL; do_div(ns, this_tsc_khz); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 602b974a60a6..8dfbad7a2c44 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1080,7 +1080,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) { u64 tsc; - tsc = svm_scale_tsc(vcpu, native_read_tsc()); + tsc = svm_scale_tsc(vcpu, rdtsc()); return target_tsc - tsc; } @@ -3079,7 +3079,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr_info->index) { case MSR_IA32_TSC: { msr_info->data = svm->vmcb->control.tsc_offset + - svm_scale_tsc(vcpu, native_read_tsc()); + svm_scale_tsc(vcpu, rdtsc()); break; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4fa1ccad7beb..10d69a6df14f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2236,7 +2236,7 @@ static u64 guest_read_tsc(void) { u64 host_tsc, tsc_offset; - host_tsc = native_read_tsc(); + host_tsc = rdtsc(); tsc_offset = vmcs_read64(TSC_OFFSET); return host_tsc + tsc_offset; } @@ -2317,7 +2317,7 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) { - return target_tsc - native_read_tsc(); + return target_tsc - rdtsc(); } static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f771058cfb5c..dfa97139282d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1455,7 +1455,7 @@ static cycle_t read_tsc(void) * but no one has ever seen it happen. */ rdtsc_barrier(); - ret = (cycle_t)native_read_tsc(); + ret = (cycle_t)rdtsc(); last = pvclock_gtod_data.clock.cycle_last; @@ -1646,7 +1646,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) return 1; } if (!use_master_clock) { - host_tsc = native_read_tsc(); + host_tsc = rdtsc(); kernel_ns = get_kernel_ns(); } @@ -2810,7 +2810,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : - native_read_tsc() - vcpu->arch.last_host_tsc; + rdtsc() - vcpu->arch.last_host_tsc; if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); if (check_tsc_unstable()) { @@ -2838,7 +2838,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); - vcpu->arch.last_host_tsc = native_read_tsc(); + vcpu->arch.last_host_tsc = rdtsc(); } static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, @@ -6623,7 +6623,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) hw_breakpoint_restore(); vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, - native_read_tsc()); + rdtsc()); vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); @@ -7437,7 +7437,7 @@ int kvm_arch_hardware_enable(void) if (ret != 0) return ret; - local_tsc = native_read_tsc(); + local_tsc = rdtsc(); stable = !check_tsc_unstable(); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 35115f3786a9..f24bc59ab0a0 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -55,10 +55,10 @@ static void delay_tsc(unsigned long __loops) preempt_disable(); cpu = smp_processor_id(); rdtsc_barrier(); - bclock = native_read_tsc(); + bclock = rdtsc(); for (;;) { rdtsc_barrier(); - now = native_read_tsc(); + now = rdtsc(); if ((now - bclock) >= loops) break; @@ -80,7 +80,7 @@ static void delay_tsc(unsigned long __loops) loops -= (now - bclock); cpu = smp_processor_id(); rdtsc_barrier(); - bclock = native_read_tsc(); + bclock = rdtsc(); } } preempt_enable(); @@ -100,7 +100,7 @@ void use_tsc_delay(void) int read_current_timer(unsigned long *timer_val) { if (delay_fn == delay_tsc) { - *timer_val = native_read_tsc(); + *timer_val = rdtsc(); return 0; } return -1; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 15ada47bb720..7c56d7eaa671 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -765,7 +765,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu) local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); - tsc = native_read_tsc(); + tsc = rdtsc(); local_irq_restore(flags); cpu->last_sample_time = cpu->sample.time; diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index abc0cb22e750..4a2a9e370be7 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -149,9 +149,9 @@ static int old_gameport_measure_speed(struct gameport *gameport) for(i = 0; i < 50; i++) { local_irq_save(flags); - t1 = native_read_tsc(); + t1 = rdtsc(); for (t = 0; t < 50; t++) gameport_read(gameport); - t2 = native_read_tsc(); + t2 = rdtsc(); local_irq_restore(flags); udelay(i * 10); if (t2 - t1 < tx) tx = t2 - t1; diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index f871b4f00056..6f8b084e13d0 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -143,7 +143,7 @@ struct analog_port { #include -#define GET_TIME(x) do { if (cpu_has_tsc) x = (unsigned int)native_read_tsc(); else x = get_time_pit(); } while (0) +#define GET_TIME(x) do { if (cpu_has_tsc) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0) #define DELTA(x,y) (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0))) #define TIME_NAME (cpu_has_tsc?"TSC":"PIT") static unsigned int get_time_pit(void) @@ -160,7 +160,7 @@ static unsigned int get_time_pit(void) return count; } #elif defined(__x86_64__) -#define GET_TIME(x) do { x = (unsigned int)native_read_tsc(); } while (0) +#define GET_TIME(x) do { x = (unsigned int)rdtsc(); } while (0) #define DELTA(x,y) ((y)-(x)) #define TIME_NAME "TSC" #elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_TILE) diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 44e5c3b5e0af..72c9f1f352b4 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -638,7 +638,7 @@ static int receive(struct net_device *dev, int cnt) #define GETTICK(x) \ ({ \ if (cpu_has_tsc) \ - x = (unsigned int)native_read_tsc(); \ + x = (unsigned int)rdtsc(); \ }) #else /* __i386__ */ #define GETTICK(x) diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index ab13448defcf..2ac0c704bcb8 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -340,7 +340,7 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio, /* check result for the last window */ msr_now = pkg_state_counter(); - tsc_now = native_read_tsc(); + tsc_now = rdtsc(); /* calculate pkg cstate vs tsc ratio */ if (!msr_last || !tsc_last) @@ -482,7 +482,7 @@ static void poll_pkg_cstate(struct work_struct *dummy) u64 val64; msr_now = pkg_state_counter(); - tsc_now = native_read_tsc(); + tsc_now = rdtsc(); jiffies_now = jiffies; /* calculate pkg cstate vs tsc ratio */ diff --git a/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c index f02b0c0bff9b..6ff8383f2941 100644 --- a/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c +++ b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c @@ -81,11 +81,11 @@ static int __init cpufreq_test_tsc(void) printk(KERN_DEBUG "start--> \n"); then = read_pmtmr(); - then_tsc = native_read_tsc(); + then_tsc = rdtsc(); for (i=0;i<20;i++) { mdelay(100); now = read_pmtmr(); - now_tsc = native_read_tsc(); + now_tsc = rdtsc(); diff = (now - then) & 0xFFFFFF; diff_tsc = now_tsc - then_tsc; printk(KERN_DEBUG "t1: %08u t2: %08u diff_pmtmr: %08u diff_tsc: %016llu\n", then, now, diff, diff_tsc); -- cgit v1.2.3 From 03b9730b769fc4d87e40f6104f4c5b2e43889f19 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:44:08 +0200 Subject: x86/asm/tsc: Add rdtsc_ordered() and use it in trivial call sites rdtsc_barrier(); rdtsc() is an unnecessary mouthful and requires more thought than should be necessary. Add an rdtsc_ordered() helper and replace the trivial call sites with it. This should not change generated code. The duplication of the fence asm is temporary. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/dddbf98a2af53312e9aa73a5a2b1622fe5d6f52b.1434501121.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vclock_gettime.c | 16 ++-------------- arch/x86/include/asm/msr.h | 26 ++++++++++++++++++++++++++ arch/x86/kernel/trace_clock.c | 7 +------ arch/x86/kvm/x86.c | 16 ++-------------- arch/x86/lib/delay.c | 9 +++------ 5 files changed, 34 insertions(+), 40 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 0340d93c18ca..ca94fa649251 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -175,20 +175,8 @@ static notrace cycle_t vread_pvclock(int *mode) notrace static cycle_t vread_tsc(void) { - cycle_t ret; - u64 last; - - /* - * Empirically, a fence (of type that depends on the CPU) - * before rdtsc is enough to ensure that rdtsc is ordered - * with respect to loads. The various CPU manuals are unclear - * as to whether rdtsc can be reordered with later loads, - * but no one has ever seen it happen. - */ - rdtsc_barrier(); - ret = (cycle_t)rdtsc(); - - last = gtod->cycle_last; + cycle_t ret = (cycle_t)rdtsc_ordered(); + u64 last = gtod->cycle_last; if (likely(ret >= last)) return ret; diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index ff0c120dafe5..02bdd6c65017 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -127,6 +127,32 @@ static __always_inline unsigned long long rdtsc(void) return EAX_EDX_VAL(val, low, high); } +/** + * rdtsc_ordered() - read the current TSC in program order + * + * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer. + * It is ordered like a load to a global in-memory counter. It should + * be impossible to observe non-monotonic rdtsc_unordered() behavior + * across multiple CPUs as long as the TSC is synced. + */ +static __always_inline unsigned long long rdtsc_ordered(void) +{ + /* + * The RDTSC instruction is not ordered relative to memory + * access. The Intel SDM and the AMD APM are both vague on this + * point, but empirically an RDTSC instruction can be + * speculatively executed before prior loads. An RDTSC + * immediately after an appropriate barrier appears to be + * ordered as a normal load, that is, it provides the same + * ordering guarantees as reading from a global memory location + * that some other imaginary CPU is updating continuously with a + * time stamp. + */ + alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, + "lfence", X86_FEATURE_LFENCE_RDTSC); + return rdtsc(); +} + static inline unsigned long long native_read_pmc(int counter) { DECLARE_ARGS(val, low, high); diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c index 67efb8c96fc4..80bb24d9b880 100644 --- a/arch/x86/kernel/trace_clock.c +++ b/arch/x86/kernel/trace_clock.c @@ -12,10 +12,5 @@ */ u64 notrace trace_clock_x86_tsc(void) { - u64 ret; - - rdtsc_barrier(); - ret = rdtsc(); - - return ret; + return rdtsc_ordered(); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dfa97139282d..8d73ec8a2364 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1444,20 +1444,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc); static cycle_t read_tsc(void) { - cycle_t ret; - u64 last; - - /* - * Empirically, a fence (of type that depends on the CPU) - * before rdtsc is enough to ensure that rdtsc is ordered - * with respect to loads. The various CPU manuals are unclear - * as to whether rdtsc can be reordered with later loads, - * but no one has ever seen it happen. - */ - rdtsc_barrier(); - ret = (cycle_t)rdtsc(); - - last = pvclock_gtod_data.clock.cycle_last; + cycle_t ret = (cycle_t)rdtsc_ordered(); + u64 last = pvclock_gtod_data.clock.cycle_last; if (likely(ret >= last)) return ret; diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index f24bc59ab0a0..4453d52a143d 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -54,11 +54,9 @@ static void delay_tsc(unsigned long __loops) preempt_disable(); cpu = smp_processor_id(); - rdtsc_barrier(); - bclock = rdtsc(); + bclock = rdtsc_ordered(); for (;;) { - rdtsc_barrier(); - now = rdtsc(); + now = rdtsc_ordered(); if ((now - bclock) >= loops) break; @@ -79,8 +77,7 @@ static void delay_tsc(unsigned long __loops) if (unlikely(cpu != smp_processor_id())) { loops -= (now - bclock); cpu = smp_processor_id(); - rdtsc_barrier(); - bclock = rdtsc(); + bclock = rdtsc_ordered(); } } preempt_enable(); -- cgit v1.2.3 From b466bdb614823aaaa7188e85516177d2850f4782 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Mon, 10 Aug 2015 12:19:54 +0200 Subject: x86/asm/delay: Introduce an MWAITX-based delay with a configurable timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MWAITX can enable a timer and a corresponding timer value specified in SW P0 clocks. The SW P0 frequency is the same as TSC. The timer provides an upper bound on how long the instruction waits before exiting. This way, a delay function in the kernel can leverage that MWAITX timer of MWAITX. When a CPU core executes MWAITX, it will be quiesced in a waiting phase, diminishing its power consumption. This way, we can save power in comparison to our default TSC-based delays. A simple test shows that: $ cat /sys/bus/pci/devices/0000\:00\:18.4/hwmon/hwmon0/power1_acc $ sleep 10000s $ cat /sys/bus/pci/devices/0000\:00\:18.4/hwmon/hwmon0/power1_acc Results: * TSC-based default delay: 485115 uWatts average power * MWAITX-based delay: 252738 uWatts average power Thus, that's about 240 milliWatts less power consumption. The test method relies on the support of AMD CPU accumulated power algorithm in fam15h_power for which patches are forthcoming. Suggested-by: Andy Lutomirski Suggested-by: Borislav Petkov Suggested-by: Peter Zijlstra Signed-off-by: Huang Rui [ Fix delay truncation. ] Signed-off-by: Borislav Petkov Cc: Aaron Lu Cc: Andreas Herrmann Cc: Aravind Gopalakrishnan Cc: Fengguang Wu Cc: Frédéric Weisbecker Cc: H. Peter Anvin Cc: Hector Marco-Gisbert Cc: Jacob Shin Cc: Jiri Olsa Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Paolo Bonzini Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Tony Li Link: http://lkml.kernel.org/r/1438744732-1459-3-git-send-email-ray.huang@amd.com Link: http://lkml.kernel.org/r/1439201994-28067-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/delay.h | 1 + arch/x86/kernel/cpu/amd.c | 4 ++++ arch/x86/lib/delay.c | 47 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 1 deletion(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index 9b3b4f2754c7..36a760bda462 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h @@ -4,5 +4,6 @@ #include void use_tsc_delay(void); +void use_mwaitx_delay(void); #endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 51ad2af84a72..4a70fc6d400a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -11,6 +11,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 # include @@ -506,6 +507,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) /* A random value per boot for bit slice [12:upper_bit) */ va_align.bits = get_random_int() & va_align.mask; } + + if (cpu_has(c, X86_FEATURE_MWAITX)) + use_mwaitx_delay(); } static void early_init_amd(struct cpuinfo_x86 *c) diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 4453d52a143d..e912b2f6d36e 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef CONFIG_SMP # include @@ -83,6 +84,44 @@ static void delay_tsc(unsigned long __loops) preempt_enable(); } +/* + * On some AMD platforms, MWAITX has a configurable 32-bit timer, that + * counts with TSC frequency. The input value is the loop of the + * counter, it will exit when the timer expires. + */ +static void delay_mwaitx(unsigned long __loops) +{ + u64 start, end, delay, loops = __loops; + + start = rdtsc_ordered(); + + for (;;) { + delay = min_t(u64, MWAITX_MAX_LOOPS, loops); + + /* + * Use cpu_tss as a cacheline-aligned, seldomly + * accessed per-cpu variable as the monitor target. + */ + __monitorx(this_cpu_ptr(&cpu_tss), 0, 0); + + /* + * AMD, like Intel, supports the EAX hint and EAX=0xf + * means, do not enter any deep C-state and we use it + * here in delay() to minimize wakeup latency. + */ + __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE); + + end = rdtsc_ordered(); + + if (loops <= end - start) + break; + + loops -= end - start; + + start = end; + } +} + /* * Since we calibrate only once at boot, this * function should be set once at boot and not changed @@ -91,7 +130,13 @@ static void (*delay_fn)(unsigned long) = delay_loop; void use_tsc_delay(void) { - delay_fn = delay_tsc; + if (delay_fn == delay_loop) + delay_fn = delay_tsc; +} + +void use_mwaitx_delay(void) +{ + delay_fn = delay_mwaitx; } int read_current_timer(unsigned long *timer_val) -- cgit v1.2.3