diff options
Diffstat (limited to 'arch/x86/kernel')
25 files changed, 785 insertions, 379 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 047f9ff2e36c..bde3993624f1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -106,6 +106,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o obj-$(CONFIG_TRACING) += tracepoint.o obj-$(CONFIG_IOSF_MBI) += iosf_mbi.o +obj-$(CONFIG_PMC_ATOM) += pmc_atom.o ### # 64 bit specific files diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 163b22581472..3242e591fa82 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o +obj-$(CONFIG_ACPI_APEI) += apei.o ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += cstate.o diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c new file mode 100644 index 000000000000..c280df6b2aa2 --- /dev/null +++ b/arch/x86/kernel/acpi/apei.c @@ -0,0 +1,62 @@ +/* + * Arch-specific APEI-related functions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <acpi/apei.h> + +#include <asm/mce.h> +#include <asm/tlbflush.h> + +int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data) +{ +#ifdef CONFIG_X86_MCE + int i; + struct acpi_hest_ia_corrected *cmc; + struct acpi_hest_ia_error_bank *mc_bank; + + if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) + return 0; + + cmc = (struct acpi_hest_ia_corrected *)hest_hdr; + if (!cmc->enabled) + return 0; + + /* + * We expect HEST to provide a list of MC banks that report errors + * in firmware first mode. Otherwise, return non-zero value to + * indicate that we are done parsing HEST. + */ + if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || + !cmc->num_hardware_banks) + return 1; + + pr_info("HEST: Enabling Firmware First mode for corrected errors.\n"); + + mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1); + for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++) + mce_disable_bank(mc_bank->bank_number); +#endif + return 1; +} + +void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) +{ +#ifdef CONFIG_X86_MCE + apei_mce_report_mem_error(sev, mem_err); +#endif +} + +void arch_apei_flush_tlb_one(unsigned long addr) +{ + __flush_tlb_one(addr); +} diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 86281ffb96d6..a531f6564ed0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -74,10 +74,6 @@ int acpi_fix_pin2_polarity __initdata; static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; #endif -#ifndef __HAVE_ARCH_CMPXCHG -#warning ACPI uses CMPXCHG, i486 and later hardware -#endif - /* -------------------------------------------------------------------------- Boot-time Configuration -------------------------------------------------------------------------- */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ce8b8ff0e0ef..60e5497681f5 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -8,6 +8,7 @@ #include <asm/processor.h> #include <asm/apic.h> #include <asm/cpu.h> +#include <asm/smp.h> #include <asm/pci-direct.h> #ifdef CONFIG_X86_64 @@ -50,7 +51,6 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) return wrmsr_safe_regs(gprs); } -#ifdef CONFIG_X86_32 /* * B step AMD K6 before B 9730xxxx have hardware bugs that can cause * misexecution of code under Linux. Owners of such processors should @@ -70,6 +70,7 @@ __asm__(".globl vide\n\t.align 4\nvide: ret"); static void init_amd_k5(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_32 /* * General Systems BIOSen alias the cpu frequency registers * of the Elan at 0x000df000. Unfortuantly, one of the Linux @@ -83,11 +84,12 @@ static void init_amd_k5(struct cpuinfo_x86 *c) if (inl(CBAR) & CBAR_ENB) outl(0 | CBAR_KEY, CBAR); } +#endif } - static void init_amd_k6(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_32 u32 l, h; int mbytes = get_num_physpages() >> (20-PAGE_SHIFT); @@ -176,10 +178,44 @@ static void init_amd_k6(struct cpuinfo_x86 *c) /* placeholder for any needed mods */ return; } +#endif } -static void amd_k7_smp_check(struct cpuinfo_x86 *c) +static void init_amd_k7(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_32 + u32 l, h; + + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + msr_clear_bit(MSR_K7_HWCR, 15); + set_cpu_cap(c, X86_FEATURE_XMM); + } + } + + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk(KERN_INFO + "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", + l, ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + + set_cpu_cap(c, X86_FEATURE_K7); + /* calling is from identify_secondary_cpu() ? */ if (!c->cpu_index) return; @@ -207,7 +243,7 @@ static void amd_k7_smp_check(struct cpuinfo_x86 *c) if (((c->x86_model == 6) && (c->x86_mask >= 2)) || ((c->x86_model == 7) && (c->x86_mask >= 1)) || (c->x86_model > 7)) - if (cpu_has_mp) + if (cpu_has(c, X86_FEATURE_MP)) return; /* If we get here, not a certified SMP capable AMD system. */ @@ -219,45 +255,8 @@ static void amd_k7_smp_check(struct cpuinfo_x86 *c) WARN_ONCE(1, "WARNING: This combination of AMD" " processors is not suitable for SMP.\n"); add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); -} - -static void init_amd_k7(struct cpuinfo_x86 *c) -{ - u32 l, h; - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - msr_clear_bit(MSR_K7_HWCR, 15); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk(KERN_INFO - "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", - l, ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - - set_cpu_cap(c, X86_FEATURE_K7); - - amd_k7_smp_check(c); -} #endif +} #ifdef CONFIG_NUMA /* @@ -446,6 +445,26 @@ static void early_init_amd_mc(struct cpuinfo_x86 *c) static void bsp_init_amd(struct cpuinfo_x86 *c) { + +#ifdef CONFIG_X86_64 + if (c->x86 >= 0xf) { + unsigned long long tseg; + + /* + * Split up direct mapping around the TSEG SMM area. + * Don't do it for gbpages because there seems very little + * benefit in doing so. + */ + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { + unsigned long pfn = tseg >> PAGE_SHIFT; + + printk(KERN_DEBUG "tseg: %010llx\n", tseg); + if (pfn_range_is_mapped(pfn, pfn + 1)) + set_memory_4k((unsigned long)__va(tseg), 1); + } + } +#endif + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { if (c->x86 > 0x10 || @@ -515,101 +534,74 @@ static const int amd_erratum_383[]; static const int amd_erratum_400[]; static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); -static void init_amd(struct cpuinfo_x86 *c) +static void init_amd_k8(struct cpuinfo_x86 *c) { - u32 dummy; - unsigned long long value; + u32 level; + u64 value; -#ifdef CONFIG_SMP - /* - * Disable TLB flush filter by setting HWCR.FFDIS on K8 - * bit 6 of msr C001_0015 - * - * Errata 63 for SH-B3 steppings - * Errata 122 for all steppings (F+ have it disabled by default) - */ - if (c->x86 == 0xf) - msr_set_bit(MSR_K7_HWCR, 6); -#endif - - early_init_amd(c); + /* On C+ stepping K8 rep microcode works well for copy/memset */ + level = cpuid_eax(1); + if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); /* - * Bit 31 in normal CPUID used for nonstandard 3DNow ID; - * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + * Some BIOSes incorrectly force this feature, but only K8 revision D + * (model = 0x14) and later actually support it. + * (AMD Erratum #110, docId: 25759). */ - clear_cpu_cap(c, 0*32+31); - -#ifdef CONFIG_X86_64 - /* On C+ stepping K8 rep microcode works well for copy/memset */ - if (c->x86 == 0xf) { - u32 level; - - level = cpuid_eax(1); - if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - - /* - * Some BIOSes incorrectly force this feature, but only K8 - * revision D (model = 0x14) and later actually support it. - * (AMD Erratum #110, docId: 25759). - */ - if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { - clear_cpu_cap(c, X86_FEATURE_LAHF_LM); - if (!rdmsrl_amd_safe(0xc001100d, &value)) { - value &= ~(1ULL << 32); - wrmsrl_amd_safe(0xc001100d, value); - } + if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { + clear_cpu_cap(c, X86_FEATURE_LAHF_LM); + if (!rdmsrl_amd_safe(0xc001100d, &value)) { + value &= ~BIT_64(32); + wrmsrl_amd_safe(0xc001100d, value); } - } - if (c->x86 >= 0x10) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - /* get apicid instead of initial apic id from cpuid */ - c->apicid = hard_smp_processor_id(); -#else + if (!c->x86_model_id[0]) + strcpy(c->x86_model_id, "Hammer"); +} + +static void init_amd_gh(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_64 + /* do this for boot cpu */ + if (c == &boot_cpu_data) + check_enable_amd_mmconf_dmi(); + + fam10h_check_enable_mmcfg(); +#endif /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) + * Disable GART TLB Walk Errors on Fam10h. We do this here because this + * is always needed when GART is enabled, even in a kernel which has no + * MCE support built in. BIOS should disable GartTlbWlk Errors already. + * If it doesn't, we do it here as suggested by the BKDG. + * + * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 */ + msr_set_bit(MSR_AMD64_MCx_MASK(4), 10); - switch (c->x86) { - case 4: - init_amd_k5(c); - break; - case 5: - init_amd_k6(c); - break; - case 6: /* An Athlon/Duron */ - init_amd_k7(c); - break; - } + /* + * On family 10h BIOS may not have properly enabled WC+ support, causing + * it to be converted to CD memtype. This may result in performance + * degradation for certain nested-paging guests. Prevent this conversion + * by clearing bit 24 in MSR_AMD64_BU_CFG2. + * + * NOTE: we want to use the _safe accessors so as not to #GP kvm + * guests on older kvm hosts. + */ + msr_clear_bit(MSR_AMD64_BU_CFG2, 24); - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_cpu_cap(c, X86_FEATURE_MCE); -#endif + if (cpu_has_amd_erratum(c, amd_erratum_383)) + set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); +} - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - - if (!c->x86_model_id[0]) { - switch (c->x86) { - case 0xf: - /* Should distinguish Models here, but this is only - a fallback anyways. */ - strcpy(c->x86_model_id, "Hammer"); - break; - } - } +static void init_amd_bd(struct cpuinfo_x86 *c) +{ + u64 value; /* re-enable TopologyExtensions if switched off by BIOS */ - if ((c->x86 == 0x15) && - (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && + if ((c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && !cpu_has(c, X86_FEATURE_TOPOEXT)) { if (msr_set_bit(0xc0011005, 54) > 0) { @@ -625,14 +617,60 @@ static void init_amd(struct cpuinfo_x86 *c) * The way access filter has a performance penalty on some workloads. * Disable it on the affected CPUs. */ - if ((c->x86 == 0x15) && - (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { - + if ((c->x86_model >= 0x02) && (c->x86_model < 0x20)) { if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) { value |= 0x1E; wrmsrl_safe(0xc0011021, value); } } +} + +static void init_amd(struct cpuinfo_x86 *c) +{ + u32 dummy; + +#ifdef CONFIG_SMP + /* + * Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 0xf) + msr_set_bit(MSR_K7_HWCR, 6); +#endif + + early_init_amd(c); + + /* + * Bit 31 in normal CPUID used for nonstandard 3DNow ID; + * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + */ + clear_cpu_cap(c, 0*32+31); + + if (c->x86 >= 0x10) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + /* get apicid instead of initial apic id from cpuid */ + c->apicid = hard_smp_processor_id(); + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_cpu_cap(c, X86_FEATURE_MCE); + + switch (c->x86) { + case 4: init_amd_k5(c); break; + case 5: init_amd_k6(c); break; + case 6: init_amd_k7(c); break; + case 0xf: init_amd_k8(c); break; + case 0x10: init_amd_gh(c); break; + case 0x15: init_amd_bd(c); break; + } + + /* Enable workaround for FXSAVE leak */ + if (c->x86 >= 6) + set_cpu_bug(c, X86_BUG_FXSAVE_LEAK); cpu_detect_cache_sizes(c); @@ -656,33 +694,6 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } -#ifdef CONFIG_X86_64 - if (c->x86 == 0x10) { - /* do this for boot cpu */ - if (c == &boot_cpu_data) - check_enable_amd_mmconf_dmi(); - - fam10h_check_enable_mmcfg(); - } - - if (c == &boot_cpu_data && c->x86 >= 0xf) { - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { - unsigned long pfn = tseg >> PAGE_SHIFT; - - printk(KERN_DEBUG "tseg: %010llx\n", tseg); - if (pfn_range_is_mapped(pfn, pfn + 1)) - set_memory_4k((unsigned long)__va(tseg), 1); - } - } -#endif - /* * Family 0x12 and above processors have APIC timer * running in deep C states. @@ -690,34 +701,6 @@ static void init_amd(struct cpuinfo_x86 *c) if (c->x86 > 0x11) set_cpu_cap(c, X86_FEATURE_ARAT); - if (c->x86 == 0x10) { - /* - * Disable GART TLB Walk Errors on Fam10h. We do this here - * because this is always needed when GART is enabled, even in a - * kernel which has no MCE support built in. - * BIOS should disable GartTlbWlk Errors already. If - * it doesn't, do it here as suggested by the BKDG. - * - * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 - */ - msr_set_bit(MSR_AMD64_MCx_MASK(4), 10); - - /* - * On family 10h BIOS may not have properly enabled WC+ support, - * causing it to be converted to CD memtype. This may result in - * performance degradation for certain nested-paging guests. - * Prevent this conversion by clearing bit 24 in - * MSR_AMD64_BU_CFG2. - * - * NOTE: we want to use the _safe accessors so as not to #GP kvm - * guests on older kvm hosts. - */ - msr_clear_bit(MSR_AMD64_BU_CFG2, 24); - - if (cpu_has_amd_erratum(c, amd_erratum_383)) - set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); - } - if (cpu_has_amd_erratum(c, amd_erratum_400)) set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); @@ -741,11 +724,6 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) } #endif -static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) -{ - tlb_flushall_shift = 6; -} - static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) { u32 ebx, eax, ecx, edx; @@ -793,8 +771,6 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) tlb_lli_2m[ENTRIES] = eax & mask; tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; - - cpu_set_tlb_flushall_shift(c); } static const struct cpu_dev amd_cpu_dev = { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ef1b93f18ed1..333fd5209336 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -481,26 +481,17 @@ u16 __read_mostly tlb_lld_2m[NR_INFO]; u16 __read_mostly tlb_lld_4m[NR_INFO]; u16 __read_mostly tlb_lld_1g[NR_INFO]; -/* - * tlb_flushall_shift shows the balance point in replacing cr3 write - * with multiple 'invlpg'. It will do this replacement when - * flush_tlb_lines <= active_lines/2^tlb_flushall_shift. - * If tlb_flushall_shift is -1, means the replacement will be disabled. - */ -s8 __read_mostly tlb_flushall_shift = -1; - void cpu_detect_tlb(struct cpuinfo_x86 *c) { if (this_cpu->c_detect_tlb) this_cpu->c_detect_tlb(c); printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" - "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n" - "tlb_flushall_shift: %d\n", + "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], - tlb_lld_1g[ENTRIES], tlb_flushall_shift); + tlb_lld_1g[ENTRIES]); } void detect_ht(struct cpuinfo_x86 *c) @@ -634,6 +625,15 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[9] = ebx; } + /* Extended state features: level 0x0000000d */ + if (c->cpuid_level >= 0x0000000d) { + u32 eax, ebx, ecx, edx; + + cpuid_count(0x0000000d, 1, &eax, &ebx, &ecx, &edx); + + c->x86_capability[10] = eax; + } + /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); c->extended_cpuid_level = xlvl; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f9e4fdd3b877..74e804ddc5c7 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -253,7 +253,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) */ if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && (c->x86_mask < 0x6 || c->x86_mask == 0xb)) - set_cpu_cap(c, X86_FEATURE_11AP); + set_cpu_bug(c, X86_BUG_11AP); #ifdef CONFIG_X86_INTEL_USERCOPY @@ -402,7 +402,7 @@ static void init_intel(struct cpuinfo_x86 *c) if (c->x86 == 6 && cpu_has_clflush && (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) - set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR); + set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); #ifdef CONFIG_X86_64 if (c->x86 == 15) @@ -634,31 +634,6 @@ static void intel_tlb_lookup(const unsigned char desc) } } -static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) -{ - switch ((c->x86 << 8) + c->x86_model) { - case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 0x61d: /* six-core 45 nm xeon "Dunnington" */ - tlb_flushall_shift = -1; - break; - case 0x63a: /* Ivybridge */ - tlb_flushall_shift = 2; - break; - case 0x61a: /* 45 nm nehalem, "Bloomfield" */ - case 0x61e: /* 45 nm nehalem, "Lynnfield" */ - case 0x625: /* 32 nm nehalem, "Clarkdale" */ - case 0x62c: /* 32 nm nehalem, "Gulftown" */ - case 0x62e: /* 45 nm nehalem-ex, "Beckton" */ - case 0x62f: /* 32 nm Xeon E7 */ - case 0x62a: /* SandyBridge */ - case 0x62d: /* SandyBridge, "Romely-EP" */ - default: - tlb_flushall_shift = 6; - } -} - static void intel_detect_tlb(struct cpuinfo_x86 *c) { int i, j, n; @@ -683,7 +658,6 @@ static void intel_detect_tlb(struct cpuinfo_x86 *c) for (j = 1 ; j < 16 ; j++) intel_tlb_lookup(desc[j]); } - intel_tlb_flushall_shift_set(c); } static const struct cpu_dev intel_cpu_dev = { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9a79c8dbd8e8..4fc57975acc1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2385,6 +2385,10 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) threshold_cpu_callback(action, cpu); mce_device_remove(cpu); mce_intel_hcpu_update(cpu); + + /* intentionally ignoring frozen here */ + if (!(action & CPU_TASKS_FROZEN)) + cmci_rediscover(); break; case CPU_DOWN_PREPARE: smp_call_function_single(cpu, mce_disable_cpu, &action, 1); @@ -2396,11 +2400,6 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; } - if (action == CPU_POST_DEAD) { - /* intentionally ignoring frozen here */ - cmci_rediscover(); - } - return NOTIFY_OK; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 9a316b21df8b..3bdb95ae8c43 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -42,7 +42,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); * cmci_discover_lock protects against parallel discovery attempts * which could race against each other. */ -static DEFINE_SPINLOCK(cmci_discover_lock); +static DEFINE_RAW_SPINLOCK(cmci_discover_lock); #define CMCI_THRESHOLD 1 #define CMCI_POLL_INTERVAL (30 * HZ) @@ -144,14 +144,14 @@ static void cmci_storm_disable_banks(void) int bank; u64 val; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); owned = __get_cpu_var(mce_banks_owned); for_each_set_bit(bank, owned, MAX_NR_BANKS) { rdmsrl(MSR_IA32_MCx_CTL2(bank), val); val &= ~MCI_CTL2_CMCI_EN; wrmsrl(MSR_IA32_MCx_CTL2(bank), val); } - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } static bool cmci_storm_detect(void) @@ -211,7 +211,7 @@ static void cmci_discover(int banks) int i; int bios_wrong_thresh = 0; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { u64 val; int bios_zero_thresh = 0; @@ -266,7 +266,7 @@ static void cmci_discover(int banks) WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); } } - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { pr_info_once( "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); @@ -316,10 +316,10 @@ void cmci_clear(void) if (!cmci_supported(&banks)) return; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) __cmci_disable_bank(i); - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } static void cmci_rediscover_work_func(void *arg) @@ -360,9 +360,9 @@ void cmci_disable_bank(int bank) if (!cmci_supported(&banks)) return; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); __cmci_disable_bank(bank); - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } static void intel_init_cmci(void) diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index 2bf616505499..e2b22df964cd 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -1,23 +1,25 @@ #!/bin/sh # -# Generate the x86_cap_flags[] array from include/asm/cpufeature.h +# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h # IN=$1 OUT=$2 -TABS="$(printf '\t\t\t\t\t')" -trap 'rm "$OUT"' EXIT +function dump_array() +{ + ARRAY=$1 + SIZE=$2 + PFX=$3 + POSTFIX=$4 -( - echo "#ifndef _ASM_X86_CPUFEATURE_H" - echo "#include <asm/cpufeature.h>" - echo "#endif" - echo "" - echo "const char * const x86_cap_flags[NCAPINTS*32] = {" + PFX_SZ=$(echo $PFX | wc -c) + TABS="$(printf '\t\t\t\t\t')" + + echo "const char * const $ARRAY[$SIZE] = {" - # Iterate through any input lines starting with #define X86_FEATURE_ - sed -n -e 's/\t/ /g' -e 's/^ *# *define *X86_FEATURE_//p' $IN | + # Iterate through any input lines starting with #define $PFX + sed -n -e 's/\t/ /g' -e "s/^ *# *define *$PFX//p" $IN | while read i do # Name is everything up to the first whitespace @@ -31,11 +33,32 @@ trap 'rm "$OUT"' EXIT # Name is uppercase, VALUE is all lowercase VALUE="$(echo "$VALUE" | tr A-Z a-z)" - TABCOUNT=$(( ( 5*8 - 14 - $(echo "$NAME" | wc -c) ) / 8 )) - printf "\t[%s]%.*s = %s,\n" \ - "X86_FEATURE_$NAME" "$TABCOUNT" "$TABS" "$VALUE" + if [ -n "$POSTFIX" ]; then + T=$(( $PFX_SZ + $(echo $POSTFIX | wc -c) + 2 )) + TABS="$(printf '\t\t\t\t\t\t')" + TABCOUNT=$(( ( 6*8 - ($T + 1) - $(echo "$NAME" | wc -c) ) / 8 )) + printf "\t[%s - %s]%.*s = %s,\n" "$PFX$NAME" "$POSTFIX" "$TABCOUNT" "$TABS" "$VALUE" + else + TABCOUNT=$(( ( 5*8 - ($PFX_SZ + 1) - $(echo "$NAME" | wc -c) ) / 8 )) + printf "\t[%s]%.*s = %s,\n" "$PFX$NAME" "$TABCOUNT" "$TABS" "$VALUE" + fi done echo "};" +} + +trap 'rm "$OUT"' EXIT + +( + echo "#ifndef _ASM_X86_CPUFEATURE_H" + echo "#include <asm/cpufeature.h>" + echo "#endif" + echo "" + + dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" "" + echo "" + + dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32" + ) > $OUT trap - EXIT diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c index 3bbdf4cd38b9..30790d798e6b 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -294,31 +294,41 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) cpu_to_node(cpu)); } -static void amd_uncore_cpu_up_prepare(unsigned int cpu) +static int amd_uncore_cpu_up_prepare(unsigned int cpu) { - struct amd_uncore *uncore; + struct amd_uncore *uncore_nb = NULL, *uncore_l2; if (amd_uncore_nb) { - uncore = amd_uncore_alloc(cpu); - uncore->cpu = cpu; - uncore->num_counters = NUM_COUNTERS_NB; - uncore->rdpmc_base = RDPMC_BASE_NB; - uncore->msr_base = MSR_F15H_NB_PERF_CTL; - uncore->active_mask = &amd_nb_active_mask; - uncore->pmu = &amd_nb_pmu; - *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; + uncore_nb = amd_uncore_alloc(cpu); + if (!uncore_nb) + goto fail; + uncore_nb->cpu = cpu; + uncore_nb->num_counters = NUM_COUNTERS_NB; + uncore_nb->rdpmc_base = RDPMC_BASE_NB; + uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; + uncore_nb->active_mask = &amd_nb_active_mask; + uncore_nb->pmu = &amd_nb_pmu; + *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; } if (amd_uncore_l2) { - uncore = amd_uncore_alloc(cpu); - uncore->cpu = cpu; - uncore->num_counters = NUM_COUNTERS_L2; - uncore->rdpmc_base = RDPMC_BASE_L2; - uncore->msr_base = MSR_F16H_L2I_PERF_CTL; - uncore->active_mask = &amd_l2_active_mask; - uncore->pmu = &amd_l2_pmu; - *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + uncore_l2 = amd_uncore_alloc(cpu); + if (!uncore_l2) + goto fail; + uncore_l2->cpu = cpu; + uncore_l2->num_counters = NUM_COUNTERS_L2; + uncore_l2->rdpmc_base = RDPMC_BASE_L2; + uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL; + uncore_l2->active_mask = &amd_l2_active_mask; + uncore_l2->pmu = &amd_l2_pmu; + *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2; } + + return 0; + +fail: + kfree(uncore_nb); + return -ENOMEM; } static struct amd_uncore * @@ -441,7 +451,7 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores) if (!--uncore->refcnt) kfree(uncore); - *per_cpu_ptr(amd_uncore_nb, cpu) = NULL; + *per_cpu_ptr(uncores, cpu) = NULL; } static void amd_uncore_cpu_dead(unsigned int cpu) @@ -461,7 +471,8 @@ amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action, switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - amd_uncore_cpu_up_prepare(cpu); + if (amd_uncore_cpu_up_prepare(cpu)) + return notifier_from_errno(-ENOMEM); break; case CPU_STARTING: @@ -501,20 +512,33 @@ static void __init init_cpu_already_online(void *dummy) amd_uncore_cpu_online(cpu); } +static void cleanup_cpu_online(void *dummy) +{ + unsigned int cpu = smp_processor_id(); + + amd_uncore_cpu_dead(cpu); +} + static int __init amd_uncore_init(void) { - unsigned int cpu; + unsigned int cpu, cpu2; int ret = -ENODEV; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) - return -ENODEV; + goto fail_nodev; if (!cpu_has_topoext) - return -ENODEV; + goto fail_nodev; if (cpu_has_perfctr_nb) { amd_uncore_nb = alloc_percpu(struct amd_uncore *); - perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1); + if (!amd_uncore_nb) { + ret = -ENOMEM; + goto fail_nb; + } + ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1); + if (ret) + goto fail_nb; printk(KERN_INFO "perf: AMD NB counters detected\n"); ret = 0; @@ -522,20 +546,28 @@ static int __init amd_uncore_init(void) if (cpu_has_perfctr_l2) { amd_uncore_l2 = alloc_percpu(struct amd_uncore *); - perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1); + if (!amd_uncore_l2) { + ret = -ENOMEM; + goto fail_l2; + } + ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1); + if (ret) + goto fail_l2; printk(KERN_INFO "perf: AMD L2I counters detected\n"); ret = 0; } if (ret) - return -ENODEV; + goto fail_nodev; cpu_notifier_register_begin(); /* init cpus already online before registering for hotplug notifier */ for_each_online_cpu(cpu) { - amd_uncore_cpu_up_prepare(cpu); + ret = amd_uncore_cpu_up_prepare(cpu); + if (ret) + goto fail_online; smp_call_function_single(cpu, init_cpu_already_online, NULL, 1); } @@ -543,5 +575,30 @@ static int __init amd_uncore_init(void) cpu_notifier_register_done(); return 0; + + +fail_online: + for_each_online_cpu(cpu2) { + if (cpu2 == cpu) + break; + smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1); + } + cpu_notifier_register_done(); + + /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */ + amd_uncore_nb = amd_uncore_l2 = NULL; + if (cpu_has_perfctr_l2) + perf_pmu_unregister(&amd_l2_pmu); +fail_l2: + if (cpu_has_perfctr_nb) + perf_pmu_unregister(&amd_nb_pmu); + if (amd_uncore_l2) + free_percpu(amd_uncore_l2); +fail_nb: + if (amd_uncore_nb) + free_percpu(amd_uncore_nb); + +fail_nodev: + return ret; } device_initcall(amd_uncore_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index ae6552a0701f..cfc6f9dfcd90 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2947,10 +2947,7 @@ again: * extra registers. If we failed to take an extra * register, try the alternative. */ - if (idx % 2) - idx--; - else - idx++; + idx ^= 1; if (idx != reg1->idx % 6) { if (idx == 2) config1 >>= 8; diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 06fe3ed8b851..5433658e598d 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -97,6 +97,14 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has(c, i) && x86_cap_flags[i] != NULL) seq_printf(m, " %s", x86_cap_flags[i]); + seq_printf(m, "\nbugs\t\t:"); + for (i = 0; i < 32*NBUGINTS; i++) { + unsigned int bug_bit = 32*NCAPINTS + i; + + if (cpu_has_bug(c, bug_bit) && x86_bug_flags[i]) + seq_printf(m, " %s", x86_bug_flags[i]); + } + seq_printf(m, "\nbogomips\t: %lu.%02lu\n", c->loops_per_jiffy/(500000/HZ), (c->loops_per_jiffy/(5000/HZ)) % 100); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index b6f794aa1693..4a8013d55947 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -38,7 +38,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, - { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 }, diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 0d0c9d4ab6d5..47c410d99f5d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1059,9 +1059,6 @@ ENTRY(mcount) END(mcount) ENTRY(ftrace_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - pushl %eax pushl %ecx pushl %edx @@ -1093,8 +1090,6 @@ END(ftrace_caller) ENTRY(ftrace_regs_caller) pushf /* push flags before compare (in cs location) */ - cmpl $0, function_trace_stop - jne ftrace_restore_flags /* * i386 does not save SS and ESP when coming from kernel. @@ -1153,7 +1148,6 @@ GLOBAL(ftrace_regs_call) popf /* Pop flags at end (no addl to corrupt flags) */ jmp ftrace_ret -ftrace_restore_flags: popf jmp ftrace_stub #else /* ! CONFIG_DYNAMIC_FTRACE */ @@ -1162,9 +1156,6 @@ ENTRY(mcount) cmpl $__PAGE_OFFSET, %esp jb ftrace_stub /* Paging not enabled yet? */ - cmpl $0, function_trace_stop - jne ftrace_stub - cmpl $ftrace_stub, ftrace_trace_function jnz trace #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c844f0816ab8..2fac1343a90b 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -207,7 +207,6 @@ ENDPROC(native_usergs_sysret64) */ .macro XCPT_FRAME start=1 offset=0 INTR_FRAME \start, RIP+\offset-ORIG_RAX - /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ .endm /* @@ -287,21 +286,21 @@ ENDPROC(native_usergs_sysret64) ENTRY(save_paranoid) XCPT_FRAME 1 RDI+8 cld - movq_cfi rdi, RDI+8 - movq_cfi rsi, RSI+8 + movq %rdi, RDI+8(%rsp) + movq %rsi, RSI+8(%rsp) movq_cfi rdx, RDX+8 movq_cfi rcx, RCX+8 movq_cfi rax, RAX+8 - movq_cfi r8, R8+8 - movq_cfi r9, R9+8 - movq_cfi r10, R10+8 - movq_cfi r11, R11+8 + movq %r8, R8+8(%rsp) + movq %r9, R9+8(%rsp) + movq %r10, R10+8(%rsp) + movq %r11, R11+8(%rsp) movq_cfi rbx, RBX+8 - movq_cfi rbp, RBP+8 - movq_cfi r12, R12+8 - movq_cfi r13, R13+8 - movq_cfi r14, R14+8 - movq_cfi r15, R15+8 + movq %rbp, RBP+8(%rsp) + movq %r12, R12+8(%rsp) + movq %r13, R13+8(%rsp) + movq %r14, R14+8(%rsp) + movq %r15, R15+8(%rsp) movl $1,%ebx movl $MSR_GS_BASE,%ecx rdmsr @@ -1387,21 +1386,21 @@ ENTRY(error_entry) CFI_ADJUST_CFA_OFFSET 15*8 /* oldrax contains error code */ cld - movq_cfi rdi, RDI+8 - movq_cfi rsi, RSI+8 - movq_cfi rdx, RDX+8 - movq_cfi rcx, RCX+8 - movq_cfi rax, RAX+8 - movq_cfi r8, R8+8 - movq_cfi r9, R9+8 - movq_cfi r10, R10+8 - movq_cfi r11, R11+8 + movq %rdi, RDI+8(%rsp) + movq %rsi, RSI+8(%rsp) + movq %rdx, RDX+8(%rsp) + movq %rcx, RCX+8(%rsp) + movq %rax, RAX+8(%rsp) + movq %r8, R8+8(%rsp) + movq %r9, R9+8(%rsp) + movq %r10, R10+8(%rsp) + movq %r11, R11+8(%rsp) movq_cfi rbx, RBX+8 - movq_cfi rbp, RBP+8 - movq_cfi r12, R12+8 - movq_cfi r13, R13+8 - movq_cfi r14, R14+8 - movq_cfi r15, R15+8 + movq %rbp, RBP+8(%rsp) + movq %r12, R12+8(%rsp) + movq %r13, R13+8(%rsp) + movq %r14, R14+8(%rsp) + movq %r15, R15+8(%rsp) xorl %ebx,%ebx testl $3,CS+8(%rsp) je error_kernelspace @@ -1419,6 +1418,7 @@ error_sti: * compat mode. Check for these here too. */ error_kernelspace: + CFI_REL_OFFSET rcx, RCX+8 incl %ebx leaq native_irq_return_iret(%rip),%rcx cmpq %rcx,RIP+8(%rsp) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index cbc4a91b131e..3386dc9aa333 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -703,6 +703,9 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long return_hooker = (unsigned long) &return_to_handler; + if (unlikely(ftrace_graph_is_dead())) + return; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index c050a0153168..c73aecf10d34 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -46,10 +46,6 @@ END(function_hook) .endm ENTRY(ftrace_caller) - /* Check if tracing was disabled (quick check) */ - cmpl $0, function_trace_stop - jne ftrace_stub - ftrace_caller_setup /* regs go into 4th parameter (but make it NULL) */ movq $0, %rcx @@ -73,10 +69,6 @@ ENTRY(ftrace_regs_caller) /* Save the current flags before compare (in SS location)*/ pushfq - /* Check if tracing was disabled (quick check) */ - cmpl $0, function_trace_stop - jne ftrace_restore_flags - /* skip=8 to skip flags saved in SS */ ftrace_caller_setup 8 @@ -131,7 +123,7 @@ GLOBAL(ftrace_regs_call) popfq jmp ftrace_return -ftrace_restore_flags: + popfq jmp ftrace_stub @@ -141,9 +133,6 @@ END(ftrace_regs_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(function_hook) - cmpl $0, function_trace_stop - jne ftrace_stub - cmpq $ftrace_stub, ftrace_trace_function jnz trace diff --git a/arch/x86/kernel/pmc_atom.c b/arch/x86/kernel/pmc_atom.c new file mode 100644 index 000000000000..0c424a67985d --- /dev/null +++ b/arch/x86/kernel/pmc_atom.c @@ -0,0 +1,321 @@ +/* + * Intel Atom SOC Power Management Controller Driver + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/device.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/io.h> + +#include <asm/pmc_atom.h> + +#define DRIVER_NAME KBUILD_MODNAME + +struct pmc_dev { + u32 base_addr; + void __iomem *regmap; +#ifdef CONFIG_DEBUG_FS + struct dentry *dbgfs_dir; +#endif /* CONFIG_DEBUG_FS */ +}; + +static struct pmc_dev pmc_device; +static u32 acpi_base_addr; + +struct pmc_dev_map { + const char *name; + u32 bit_mask; +}; + +static const struct pmc_dev_map dev_map[] = { + {"0 - LPSS1_F0_DMA", BIT_LPSS1_F0_DMA}, + {"1 - LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1}, + {"2 - LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2}, + {"3 - LPSS1_F3_HSUART1", BIT_LPSS1_F3_HSUART1}, + {"4 - LPSS1_F4_HSUART2", BIT_LPSS1_F4_HSUART2}, + {"5 - LPSS1_F5_SPI", BIT_LPSS1_F5_SPI}, + {"6 - LPSS1_F6_Reserved", BIT_LPSS1_F6_XXX}, + {"7 - LPSS1_F7_Reserved", BIT_LPSS1_F7_XXX}, + {"8 - SCC_EMMC", BIT_SCC_EMMC}, + {"9 - SCC_SDIO", BIT_SCC_SDIO}, + {"10 - SCC_SDCARD", BIT_SCC_SDCARD}, + {"11 - SCC_MIPI", BIT_SCC_MIPI}, + {"12 - HDA", BIT_HDA}, + {"13 - LPE", BIT_LPE}, + {"14 - OTG", BIT_OTG}, + {"15 - USH", BIT_USH}, + {"16 - GBE", BIT_GBE}, + {"17 - SATA", BIT_SATA}, + {"18 - USB_EHCI", BIT_USB_EHCI}, + {"19 - SEC", BIT_SEC}, + {"20 - PCIE_PORT0", BIT_PCIE_PORT0}, + {"21 - PCIE_PORT1", BIT_PCIE_PORT1}, + {"22 - PCIE_PORT2", BIT_PCIE_PORT2}, + {"23 - PCIE_PORT3", BIT_PCIE_PORT3}, + {"24 - LPSS2_F0_DMA", BIT_LPSS2_F0_DMA}, + {"25 - LPSS2_F1_I2C1", BIT_LPSS2_F1_I2C1}, + {"26 - LPSS2_F2_I2C2", BIT_LPSS2_F2_I2C2}, + {"27 - LPSS2_F3_I2C3", BIT_LPSS2_F3_I2C3}, + {"28 - LPSS2_F3_I2C4", BIT_LPSS2_F4_I2C4}, + {"29 - LPSS2_F5_I2C5", BIT_LPSS2_F5_I2C5}, + {"30 - LPSS2_F6_I2C6", BIT_LPSS2_F6_I2C6}, + {"31 - LPSS2_F7_I2C7", BIT_LPSS2_F7_I2C7}, + {"32 - SMB", BIT_SMB}, + {"33 - OTG_SS_PHY", BIT_OTG_SS_PHY}, + {"34 - USH_SS_PHY", BIT_USH_SS_PHY}, + {"35 - DFX", BIT_DFX}, +}; + +static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset) +{ + return readl(pmc->regmap + reg_offset); +} + +static inline void pmc_reg_write(struct pmc_dev *pmc, int reg_offset, u32 val) +{ + writel(val, pmc->regmap + reg_offset); +} + +static void pmc_power_off(void) +{ + u16 pm1_cnt_port; + u32 pm1_cnt_value; + + pr_info("Preparing to enter system sleep state S5\n"); + + pm1_cnt_port = acpi_base_addr + PM1_CNT; + + pm1_cnt_value = inl(pm1_cnt_port); + pm1_cnt_value &= SLEEP_TYPE_MASK; + pm1_cnt_value |= SLEEP_TYPE_S5; + pm1_cnt_value |= SLEEP_ENABLE; + + outl(pm1_cnt_value, pm1_cnt_port); +} + +static void pmc_hw_reg_setup(struct pmc_dev *pmc) +{ + /* + * Disable PMC S0IX_WAKE_EN events coming from: + * - LPC clock run + * - GPIO_SUS ored dedicated IRQs + * - GPIO_SCORE ored dedicated IRQs + * - GPIO_SUS shared IRQ + * - GPIO_SCORE shared IRQ + */ + pmc_reg_write(pmc, PMC_S0IX_WAKE_EN, (u32)PMC_WAKE_EN_SETTING); +} + +#ifdef CONFIG_DEBUG_FS +static int pmc_dev_state_show(struct seq_file *s, void *unused) +{ + struct pmc_dev *pmc = s->private; + u32 func_dis, func_dis_2, func_dis_index; + u32 d3_sts_0, d3_sts_1, d3_sts_index; + int dev_num, dev_index, reg_index; + + func_dis = pmc_reg_read(pmc, PMC_FUNC_DIS); + func_dis_2 = pmc_reg_read(pmc, PMC_FUNC_DIS_2); + d3_sts_0 = pmc_reg_read(pmc, PMC_D3_STS_0); + d3_sts_1 = pmc_reg_read(pmc, PMC_D3_STS_1); + + dev_num = ARRAY_SIZE(dev_map); + + for (dev_index = 0; dev_index < dev_num; dev_index++) { + reg_index = dev_index / PMC_REG_BIT_WIDTH; + if (reg_index) { + func_dis_index = func_dis_2; + d3_sts_index = d3_sts_1; + } else { + func_dis_index = func_dis; + d3_sts_index = d3_sts_0; + } + + seq_printf(s, "Dev: %-32s\tState: %s [%s]\n", + dev_map[dev_index].name, + dev_map[dev_index].bit_mask & func_dis_index ? + "Disabled" : "Enabled ", + dev_map[dev_index].bit_mask & d3_sts_index ? + "D3" : "D0"); + } + return 0; +} + +static int pmc_dev_state_open(struct inode *inode, struct file *file) +{ + return single_open(file, pmc_dev_state_show, inode->i_private); +} + +static const struct file_operations pmc_dev_state_ops = { + .open = pmc_dev_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int pmc_sleep_tmr_show(struct seq_file *s, void *unused) +{ + struct pmc_dev *pmc = s->private; + u64 s0ir_tmr, s0i1_tmr, s0i2_tmr, s0i3_tmr, s0_tmr; + + s0ir_tmr = (u64)pmc_reg_read(pmc, PMC_S0IR_TMR) << PMC_TMR_SHIFT; + s0i1_tmr = (u64)pmc_reg_read(pmc, PMC_S0I1_TMR) << PMC_TMR_SHIFT; + s0i2_tmr = (u64)pmc_reg_read(pmc, PMC_S0I2_TMR) << PMC_TMR_SHIFT; + s0i3_tmr = (u64)pmc_reg_read(pmc, PMC_S0I3_TMR) << PMC_TMR_SHIFT; + s0_tmr = (u64)pmc_reg_read(pmc, PMC_S0_TMR) << PMC_TMR_SHIFT; + + seq_printf(s, "S0IR Residency:\t%lldus\n", s0ir_tmr); + seq_printf(s, "S0I1 Residency:\t%lldus\n", s0i1_tmr); + seq_printf(s, "S0I2 Residency:\t%lldus\n", s0i2_tmr); + seq_printf(s, "S0I3 Residency:\t%lldus\n", s0i3_tmr); + seq_printf(s, "S0 Residency:\t%lldus\n", s0_tmr); + return 0; +} + +static int pmc_sleep_tmr_open(struct inode *inode, struct file *file) +{ + return single_open(file, pmc_sleep_tmr_show, inode->i_private); +} + +static const struct file_operations pmc_sleep_tmr_ops = { + .open = pmc_sleep_tmr_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void pmc_dbgfs_unregister(struct pmc_dev *pmc) +{ + if (!pmc->dbgfs_dir) + return; + + debugfs_remove_recursive(pmc->dbgfs_dir); + pmc->dbgfs_dir = NULL; +} + +static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) +{ + struct dentry *dir, *f; + + dir = debugfs_create_dir("pmc_atom", NULL); + if (!dir) + return -ENOMEM; + + f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO, + dir, pmc, &pmc_dev_state_ops); + if (!f) { + dev_err(&pdev->dev, "dev_states register failed\n"); + goto err; + } + f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO, + dir, pmc, &pmc_sleep_tmr_ops); + if (!f) { + dev_err(&pdev->dev, "sleep_state register failed\n"); + goto err; + } + pmc->dbgfs_dir = dir; + return 0; +err: + pmc_dbgfs_unregister(pmc); + return -ENODEV; +} +#endif /* CONFIG_DEBUG_FS */ + +static int pmc_setup_dev(struct pci_dev *pdev) +{ + struct pmc_dev *pmc = &pmc_device; + int ret; + + /* Obtain ACPI base address */ + pci_read_config_dword(pdev, ACPI_BASE_ADDR_OFFSET, &acpi_base_addr); + acpi_base_addr &= ACPI_BASE_ADDR_MASK; + + /* Install power off function */ + if (acpi_base_addr != 0 && pm_power_off == NULL) + pm_power_off = pmc_power_off; + + pci_read_config_dword(pdev, PMC_BASE_ADDR_OFFSET, &pmc->base_addr); + pmc->base_addr &= PMC_BASE_ADDR_MASK; + + pmc->regmap = ioremap_nocache(pmc->base_addr, PMC_MMIO_REG_LEN); + if (!pmc->regmap) { + dev_err(&pdev->dev, "error: ioremap failed\n"); + return -ENOMEM; + } + + /* PMC hardware registers setup */ + pmc_hw_reg_setup(pmc); + +#ifdef CONFIG_DEBUG_FS + ret = pmc_dbgfs_register(pmc, pdev); + if (ret) { + iounmap(pmc->regmap); + return ret; + } +#endif /* CONFIG_DEBUG_FS */ + return 0; +} + +/* + * Data for PCI driver interface + * + * This data only exists for exporting the supported + * PCI ids via MODULE_DEVICE_TABLE. We do not actually + * register a pci_driver, because lpc_ich will register + * a driver on the same PCI id. + */ +static const struct pci_device_id pmc_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_VLV_PMC) }, + { 0, }, +}; + +MODULE_DEVICE_TABLE(pci, pmc_pci_ids); + +static int __init pmc_atom_init(void) +{ + int err = -ENODEV; + struct pci_dev *pdev = NULL; + const struct pci_device_id *ent; + + /* We look for our device - PCU PMC + * we assume that there is max. one device. + * + * We can't use plain pci_driver mechanism, + * as the device is really a multiple function device, + * main driver that binds to the pci_device is lpc_ich + * and have to find & bind to the device this way. + */ + for_each_pci_dev(pdev) { + ent = pci_match_id(pmc_pci_ids, pdev); + if (ent) { + err = pmc_setup_dev(pdev); + goto out; + } + } + /* Device not found. */ +out: + return err; +} + +module_init(pmc_atom_init); +/* no module_exit, this driver shouldn't be unloaded */ + +MODULE_AUTHOR("Aubrey Li <aubrey.li@linux.intel.com>"); +MODULE_DESCRIPTION("Intel Atom SOC Power Management Controller Interface"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 52b1157c53eb..17962e667a91 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -28,6 +28,7 @@ #include <linux/mc146818rtc.h> #include <asm/realmode.h> #include <asm/x86_init.h> +#include <asm/efi.h> /* * Power off function, if any @@ -401,12 +402,25 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { static int __init reboot_init(void) { + int rv; + /* * Only do the DMI check if reboot_type hasn't been overridden * on the command line */ - if (reboot_default) - dmi_check_system(reboot_dmi_table); + if (!reboot_default) + return 0; + + /* + * The DMI quirks table takes precedence. If no quirks entry + * matches and the ACPI Hardware Reduced bit is set, force EFI + * reboot. + */ + rv = dmi_check_system(reboot_dmi_table); + + if (!rv && efi_reboot_required()) + reboot_type = BOOT_EFI; + return 0; } core_initcall(reboot_init); @@ -528,11 +542,7 @@ static void native_machine_emergency_restart(void) break; case BOOT_EFI: - if (efi_enabled(EFI_RUNTIME_SERVICES)) - efi.reset_system(reboot_mode == REBOOT_WARM ? - EFI_RESET_WARM : - EFI_RESET_COLD, - EFI_SUCCESS, 0, NULL); + efi_reboot(reboot_mode, NULL); reboot_type = BOOT_BIOS; break; diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index 2a26819bb6a8..80eab01c1a68 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -37,10 +37,12 @@ static void remove_e820_regions(struct resource *avail) void arch_remove_reservations(struct resource *avail) { - /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */ + /* + * Trim out BIOS area (high 2MB) and E820 regions. We do not remove + * the low 1MB unconditionally, as this area is needed for some ISA + * cards requiring a memory range, e.g. the i82365 PCMCIA controller. + */ if (avail->flags & IORESOURCE_MEM) { - if (avail->start < BIOS_END) - avail->start = BIOS_END; resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END); remove_e820_regions(avail); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 78a0e6298922..41ead8d3bc0b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -924,10 +924,10 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_EFI if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, - "EL32", 4)) { + EFI32_LOADER_SIGNATURE, 4)) { set_bit(EFI_BOOT, &efi.flags); } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, - "EL64", 4)) { + EFI64_LOADER_SIGNATURE, 4)) { set_bit(EFI_BOOT, &efi.flags); set_bit(EFI_64BIT, &efi.flags); } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ea030319b321..b6025f9e36c6 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -234,9 +234,6 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) return ns; } -/* XXX surely we already have this someplace in the kernel?! */ -#define DIV_ROUND(n, d) (((n) + ((d) / 2)) / (d)) - static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { unsigned long long tsc_now, ns_now; @@ -259,7 +256,9 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) * time function is continuous; see the comment near struct * cyc2ns_data. */ - data->cyc2ns_mul = DIV_ROUND(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, cpu_khz); + data->cyc2ns_mul = + DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, + cpu_khz); data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; data->cyc2ns_offset = ns_now - mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); @@ -951,7 +950,7 @@ core_initcall(cpufreq_tsc); static struct clocksource clocksource_tsc; /* - * We compare the TSC to the cycle_last value in the clocksource + * We used to compare the TSC to the cycle_last value in the clocksource * structure to avoid a nasty time-warp. This can be observed in a * very small window right after one CPU updated cycle_last under * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which @@ -961,26 +960,23 @@ static struct clocksource clocksource_tsc; * due to the unsigned delta calculation of the time keeping core * code, which is necessary to support wrapping clocksources like pm * timer. + * + * This sanity check is now done in the core timekeeping code. + * checking the result of read_tsc() - cycle_last for being negative. + * That works because CLOCKSOURCE_MASK(64) does not mask out any bit. */ static cycle_t read_tsc(struct clocksource *cs) { - cycle_t ret = (cycle_t)get_cycles(); - - return ret >= clocksource_tsc.cycle_last ? - ret : clocksource_tsc.cycle_last; -} - -static void resume_tsc(struct clocksource *cs) -{ - if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) - clocksource_tsc.cycle_last = 0; + return (cycle_t)get_cycles(); } +/* + * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() + */ static struct clocksource clocksource_tsc = { .name = "tsc", .rating = 300, .read = read_tsc, - .resume = resume_tsc, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_MUST_VERIFY, diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index ea5b5709aa76..e1e1e80fc6a6 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -81,10 +81,10 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, if (!show_unhandled_signals) return; - pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", - level, current->comm, task_pid_nr(current), - message, regs->ip, regs->cs, - regs->sp, regs->ax, regs->si, regs->di); + printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", + level, current->comm, task_pid_nr(current), + message, regs->ip, regs->cs, + regs->sp, regs->ax, regs->si, regs->di); } static int addr_to_vsyscall_nr(unsigned long addr) diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c index 9531fbb123ba..c7d791f32b98 100644 --- a/arch/x86/kernel/vsyscall_gtod.c +++ b/arch/x86/kernel/vsyscall_gtod.c @@ -31,29 +31,30 @@ void update_vsyscall(struct timekeeper *tk) gtod_write_begin(vdata); /* copy vsyscall data */ - vdata->vclock_mode = tk->clock->archdata.vclock_mode; - vdata->cycle_last = tk->clock->cycle_last; - vdata->mask = tk->clock->mask; - vdata->mult = tk->mult; - vdata->shift = tk->shift; + vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode; + vdata->cycle_last = tk->tkr.cycle_last; + vdata->mask = tk->tkr.mask; + vdata->mult = tk->tkr.mult; + vdata->shift = tk->tkr.shift; vdata->wall_time_sec = tk->xtime_sec; - vdata->wall_time_snsec = tk->xtime_nsec; + vdata->wall_time_snsec = tk->tkr.xtime_nsec; vdata->monotonic_time_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_snsec = tk->xtime_nsec + vdata->monotonic_time_snsec = tk->tkr.xtime_nsec + ((u64)tk->wall_to_monotonic.tv_nsec - << tk->shift); + << tk->tkr.shift); while (vdata->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->shift)) { + (((u64)NSEC_PER_SEC) << tk->tkr.shift)) { vdata->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->shift; + ((u64)NSEC_PER_SEC) << tk->tkr.shift; vdata->monotonic_time_sec++; } vdata->wall_time_coarse_sec = tk->xtime_sec; - vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift); + vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >> + tk->tkr.shift); vdata->monotonic_time_coarse_sec = vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; |
