diff options
Diffstat (limited to 'arch/x86')
74 files changed, 1009 insertions, 1036 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 43f6efae73cf..ddf6d788470e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1071,7 +1071,7 @@ config X86_MCE_THRESHOLD def_bool y config X86_MCE_INJECT - depends on X86_MCE + depends on X86_MCE && X86_LOCAL_APIC tristate "Machine check injector support" ---help--- Provide support for injecting machine checks for testing purposes. diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index e5612f3e3b57..9b42b6d1e902 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -333,6 +333,7 @@ size_t strnlen(const char *s, size_t maxlen); unsigned int atou(const char *s); unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base); size_t strlen(const char *s); +char *strchr(const char *s, int c); /* tty.c */ void puts(const char *); diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index a66854d99ee1..8b7c9e75edcb 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -11,6 +11,7 @@ */ #include "misc.h" #include "error.h" +#include "../boot.h" #include <generated/compile.h> #include <linux/module.h> @@ -52,15 +53,22 @@ static unsigned long get_boot_seed(void) #include "../../lib/kaslr.c" struct mem_vector { - unsigned long start; - unsigned long size; + unsigned long long start; + unsigned long long size; }; +/* Only supporting at most 4 unusable memmap regions with kaslr */ +#define MAX_MEMMAP_REGIONS 4 + +static bool memmap_too_large; + enum mem_avoid_index { MEM_AVOID_ZO_RANGE = 0, MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, MEM_AVOID_BOOTPARAMS, + MEM_AVOID_MEMMAP_BEGIN, + MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1, MEM_AVOID_MAX, }; @@ -77,6 +85,123 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) return true; } +/** + * _memparse - Parse a string with mem suffixes into a number + * @ptr: Where parse begins + * @retptr: (output) Optional pointer to next char after parse completes + * + * Parses a string into a number. The number stored at @ptr is + * potentially suffixed with K, M, G, T, P, E. + */ +static unsigned long long _memparse(const char *ptr, char **retptr) +{ + char *endptr; /* Local pointer to end of parsed string */ + + unsigned long long ret = simple_strtoull(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + case 'P': + case 'p': + ret <<= 10; + case 'T': + case 't': + ret <<= 10; + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + endptr++; + default: + break; + } + + if (retptr) + *retptr = endptr; + + return ret; +} + +static int +parse_memmap(char *p, unsigned long long *start, unsigned long long *size) +{ + char *oldp; + + if (!p) + return -EINVAL; + + /* We don't care about this option here */ + if (!strncmp(p, "exactmap", 8)) + return -EINVAL; + + oldp = p; + *size = _memparse(p, &p); + if (p == oldp) + return -EINVAL; + + switch (*p) { + case '@': + /* Skip this region, usable */ + *start = 0; + *size = 0; + return 0; + case '#': + case '$': + case '!': + *start = _memparse(p + 1, &p); + return 0; + } + + return -EINVAL; +} + +static void mem_avoid_memmap(void) +{ + char arg[128]; + int rc; + int i; + char *str; + + /* See if we have any memmap areas */ + rc = cmdline_find_option("memmap", arg, sizeof(arg)); + if (rc <= 0) + return; + + i = 0; + str = arg; + while (str && (i < MAX_MEMMAP_REGIONS)) { + int rc; + unsigned long long start, size; + char *k = strchr(str, ','); + + if (k) + *k++ = 0; + + rc = parse_memmap(str, &start, &size); + if (rc < 0) + break; + str = k; + /* A usable region that should not be skipped */ + if (size == 0) + continue; + + mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start; + mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size; + i++; + } + + /* More than 4 memmaps, fail kaslr */ + if ((i >= MAX_MEMMAP_REGIONS) && str) + memmap_too_large = true; +} + /* * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T). * The mem_avoid array is used to store the ranges that need to be avoided @@ -197,6 +322,9 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, /* We don't need to set a mapping for setup_data. */ + /* Mark the memmap regions we need to avoid */ + mem_avoid_memmap(); + #ifdef CONFIG_X86_VERBOSE_BOOTUP /* Make sure video RAM can be used. */ add_identity_map(0, PMD_SIZE); @@ -379,6 +507,12 @@ static unsigned long find_random_phys_addr(unsigned long minimum, int i; unsigned long addr; + /* Check if we had too many memmaps. */ + if (memmap_too_large) { + debug_putstr("Aborted e820 scan (more than 4 memmap= args)!\n"); + return 0; + } + /* Make sure minimum is aligned. */ minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); @@ -456,7 +590,7 @@ void choose_random_location(unsigned long input, /* Walk e820 and find a random address. */ random_addr = find_random_phys_addr(min_addr, output_size); if (!random_addr) { - warn("KASLR disabled: could not find suitable E820 region!"); + warn("Physical KASLR disabled: no suitable memory region!"); } else { /* Update the new physical address location. */ if (*output != random_addr) { diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 9e240fcba784..5457b02fc050 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -156,3 +156,16 @@ char *strstr(const char *s1, const char *s2) } return NULL; } + +/** + * strchr - Find the first occurrence of the character c in the string s. + * @s: the string to be searched + * @c: the character to search for + */ +char *strchr(const char *s, int c) +{ + while (*s != (char)c) + if (*s++ == '\0') + return NULL; + return (char *)s; +} diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index 1d392c39fe56..b8ccdb5c9244 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -1,11 +1,4 @@ -obj-y += core.o - -obj-$(CONFIG_CPU_SUP_AMD) += amd/core.o amd/uncore.o -obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += amd/power.o -obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o -ifdef CONFIG_AMD_IOMMU -obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o -endif - -obj-$(CONFIG_CPU_SUP_INTEL) += msr.o +obj-y += core.o +obj-y += amd/ +obj-$(CONFIG_X86_LOCAL_APIC) += msr.o obj-$(CONFIG_CPU_SUP_INTEL) += intel/ diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile new file mode 100644 index 000000000000..b1da46f396e0 --- /dev/null +++ b/arch/x86/events/amd/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_CPU_SUP_AMD) += core.o uncore.o +obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o +obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o +ifdef CONFIG_AMD_IOMMU +obj-$(CONFIG_CPU_SUP_AMD) += iommu.o +endif + diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index a0b1bdb3ad42..4d1f7f2d9aff 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -22,13 +22,17 @@ #define NUM_COUNTERS_NB 4 #define NUM_COUNTERS_L2 4 -#define MAX_COUNTERS NUM_COUNTERS_NB +#define NUM_COUNTERS_L3 6 +#define MAX_COUNTERS 6 #define RDPMC_BASE_NB 6 -#define RDPMC_BASE_L2 10 +#define RDPMC_BASE_LLC 10 #define COUNTER_SHIFT 16 +static int num_counters_llc; +static int num_counters_nb; + static HLIST_HEAD(uncore_unused_list); struct amd_uncore { @@ -45,30 +49,30 @@ struct amd_uncore { }; static struct amd_uncore * __percpu *amd_uncore_nb; -static struct amd_uncore * __percpu *amd_uncore_l2; +static struct amd_uncore * __percpu *amd_uncore_llc; static struct pmu amd_nb_pmu; -static struct pmu amd_l2_pmu; +static struct pmu amd_llc_pmu; static cpumask_t amd_nb_active_mask; -static cpumask_t amd_l2_active_mask; +static cpumask_t amd_llc_active_mask; static bool is_nb_event(struct perf_event *event) { return event->pmu->type == amd_nb_pmu.type; } -static bool is_l2_event(struct perf_event *event) +static bool is_llc_event(struct perf_event *event) { - return event->pmu->type == amd_l2_pmu.type; + return event->pmu->type == amd_llc_pmu.type; } static struct amd_uncore *event_to_amd_uncore(struct perf_event *event) { if (is_nb_event(event) && amd_uncore_nb) return *per_cpu_ptr(amd_uncore_nb, event->cpu); - else if (is_l2_event(event) && amd_uncore_l2) - return *per_cpu_ptr(amd_uncore_l2, event->cpu); + else if (is_llc_event(event) && amd_uncore_llc) + return *per_cpu_ptr(amd_uncore_llc, event->cpu); return NULL; } @@ -183,16 +187,16 @@ static int amd_uncore_event_init(struct perf_event *event) return -ENOENT; /* - * NB and L2 counters (MSRs) are shared across all cores that share the - * same NB / L2 cache. Interrupts can be directed to a single target - * core, however, event counts generated by processes running on other - * cores cannot be masked out. So we do not support sampling and - * per-thread events. + * NB and Last level cache counters (MSRs) are shared across all cores + * that share the same NB / Last level cache. Interrupts can be directed + * to a single target core, however, event counts generated by processes + * running on other cores cannot be masked out. So we do not support + * sampling and per-thread events. */ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) return -EINVAL; - /* NB and L2 counters do not have usr/os/guest/host bits */ + /* NB and Last level cache counters do not have usr/os/guest/host bits */ if (event->attr.exclude_user || event->attr.exclude_kernel || event->attr.exclude_host || event->attr.exclude_guest) return -EINVAL; @@ -226,8 +230,8 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev, if (pmu->type == amd_nb_pmu.type) active_mask = &amd_nb_active_mask; - else if (pmu->type == amd_l2_pmu.type) - active_mask = &amd_l2_active_mask; + else if (pmu->type == amd_llc_pmu.type) + active_mask = &amd_llc_active_mask; else return 0; @@ -244,30 +248,47 @@ static struct attribute_group amd_uncore_attr_group = { .attrs = amd_uncore_attrs, }; -PMU_FORMAT_ATTR(event, "config:0-7,32-35"); -PMU_FORMAT_ATTR(umask, "config:8-15"); - -static struct attribute *amd_uncore_format_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - NULL, -}; - -static struct attribute_group amd_uncore_format_group = { - .name = "format", - .attrs = amd_uncore_format_attr, +/* + * Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based + * on family + */ +#define AMD_FORMAT_ATTR(_dev, _name, _format) \ +static ssize_t \ +_dev##_show##_name(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev); + +/* Used for each uncore counter type */ +#define AMD_ATTRIBUTE(_name) \ +static struct attribute *amd_uncore_format_attr_##_name[] = { \ + &format_attr_event_##_name.attr, \ + &format_attr_umask.attr, \ + NULL, \ +}; \ +static struct attribute_group amd_uncore_format_group_##_name = { \ + .name = "format", \ + .attrs = amd_uncore_format_attr_##_name, \ +}; \ +static const struct attribute_group *amd_uncore_attr_groups_##_name[] = { \ + &amd_uncore_attr_group, \ + &amd_uncore_format_group_##_name, \ + NULL, \ }; -static const struct attribute_group *amd_uncore_attr_groups[] = { - &amd_uncore_attr_group, - &amd_uncore_format_group, - NULL, -}; +AMD_FORMAT_ATTR(event, , "config:0-7,32-35"); +AMD_FORMAT_ATTR(umask, , "config:8-15"); +AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60"); +AMD_FORMAT_ATTR(event, _l3, "config:0-7"); +AMD_ATTRIBUTE(df); +AMD_ATTRIBUTE(l3); static struct pmu amd_nb_pmu = { .task_ctx_nr = perf_invalid_context, - .attr_groups = amd_uncore_attr_groups, - .name = "amd_nb", .event_init = amd_uncore_event_init, .add = amd_uncore_add, .del = amd_uncore_del, @@ -276,10 +297,8 @@ static struct pmu amd_nb_pmu = { .read = amd_uncore_read, }; -static struct pmu amd_l2_pmu = { +static struct pmu amd_llc_pmu = { .task_ctx_nr = perf_invalid_context, - .attr_groups = amd_uncore_attr_groups, - .name = "amd_l2", .event_init = amd_uncore_event_init, .add = amd_uncore_add, .del = amd_uncore_del, @@ -296,14 +315,14 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) static int amd_uncore_cpu_up_prepare(unsigned int cpu) { - struct amd_uncore *uncore_nb = NULL, *uncore_l2; + struct amd_uncore *uncore_nb = NULL, *uncore_llc; if (amd_uncore_nb) { uncore_nb = amd_uncore_alloc(cpu); if (!uncore_nb) goto fail; uncore_nb->cpu = cpu; - uncore_nb->num_counters = NUM_COUNTERS_NB; + uncore_nb->num_counters = num_counters_nb; uncore_nb->rdpmc_base = RDPMC_BASE_NB; uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; uncore_nb->active_mask = &amd_nb_active_mask; @@ -312,18 +331,18 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; } - if (amd_uncore_l2) { - uncore_l2 = amd_uncore_alloc(cpu); - if (!uncore_l2) + if (amd_uncore_llc) { + uncore_llc = amd_uncore_alloc(cpu); + if (!uncore_llc) goto fail; - uncore_l2->cpu = cpu; - uncore_l2->num_counters = NUM_COUNTERS_L2; - uncore_l2->rdpmc_base = RDPMC_BASE_L2; - uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL; - uncore_l2->active_mask = &amd_l2_active_mask; - uncore_l2->pmu = &amd_l2_pmu; - uncore_l2->id = -1; - *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2; + uncore_llc->cpu = cpu; + uncore_llc->num_counters = num_counters_llc; + uncore_llc->rdpmc_base = RDPMC_BASE_LLC; + uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL; + uncore_llc->active_mask = &amd_llc_active_mask; + uncore_llc->pmu = &amd_llc_pmu; + uncore_llc->id = -1; + *per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc; } return 0; @@ -376,17 +395,17 @@ static int amd_uncore_cpu_starting(unsigned int cpu) *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; } - if (amd_uncore_l2) { + if (amd_uncore_llc) { unsigned int apicid = cpu_data(cpu).apicid; unsigned int nshared; - uncore = *per_cpu_ptr(amd_uncore_l2, cpu); + uncore = *per_cpu_ptr(amd_uncore_llc, cpu); cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx); nshared = ((eax >> 14) & 0xfff) + 1; uncore->id = apicid - (apicid % nshared); - uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2); - *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc); + *per_cpu_ptr(amd_uncore_llc, cpu) = uncore; } return 0; @@ -419,8 +438,8 @@ static int amd_uncore_cpu_online(unsigned int cpu) if (amd_uncore_nb) uncore_online(cpu, amd_uncore_nb); - if (amd_uncore_l2) - uncore_online(cpu, amd_uncore_l2); + if (amd_uncore_llc) + uncore_online(cpu, amd_uncore_llc); return 0; } @@ -456,8 +475,8 @@ static int amd_uncore_cpu_down_prepare(unsigned int cpu) if (amd_uncore_nb) uncore_down_prepare(cpu, amd_uncore_nb); - if (amd_uncore_l2) - uncore_down_prepare(cpu, amd_uncore_l2); + if (amd_uncore_llc) + uncore_down_prepare(cpu, amd_uncore_llc); return 0; } @@ -479,8 +498,8 @@ static int amd_uncore_cpu_dead(unsigned int cpu) if (amd_uncore_nb) uncore_dead(cpu, amd_uncore_nb); - if (amd_uncore_l2) - uncore_dead(cpu, amd_uncore_l2); + if (amd_uncore_llc) + uncore_dead(cpu, amd_uncore_llc); return 0; } @@ -492,6 +511,47 @@ static int __init amd_uncore_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) goto fail_nodev; + switch(boot_cpu_data.x86) { + case 23: + /* Family 17h: */ + num_counters_nb = NUM_COUNTERS_NB; + num_counters_llc = NUM_COUNTERS_L3; + /* + * For Family17h, the NorthBridge counters are + * re-purposed as Data Fabric counters. Also, support is + * added for L3 counters. The pmus are exported based on + * family as either L2 or L3 and NB or DF. + */ + amd_nb_pmu.name = "amd_df"; + amd_llc_pmu.name = "amd_l3"; + format_attr_event_df.show = &event_show_df; + format_attr_event_l3.show = &event_show_l3; + break; + case 22: + /* Family 16h - may change: */ + num_counters_nb = NUM_COUNTERS_NB; + num_counters_llc = NUM_COUNTERS_L2; + amd_nb_pmu.name = "amd_nb"; + amd_llc_pmu.name = "amd_l2"; + format_attr_event_df = format_attr_event; + format_attr_event_l3 = format_attr_event; + break; + default: + /* + * All prior families have the same number of + * NorthBridge and Last Level Cache counters + */ + num_counters_nb = NUM_COUNTERS_NB; + num_counters_llc = NUM_COUNTERS_L2; + amd_nb_pmu.name = "amd_nb"; + amd_llc_pmu.name = "amd_l2"; + format_attr_event_df = format_attr_event; + format_attr_event_l3 = format_attr_event; + break; + } + amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df; + amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3; + if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) goto fail_nodev; @@ -510,16 +570,16 @@ static int __init amd_uncore_init(void) } if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) { - amd_uncore_l2 = alloc_percpu(struct amd_uncore *); - if (!amd_uncore_l2) { + amd_uncore_llc = alloc_percpu(struct amd_uncore *); + if (!amd_uncore_llc) { ret = -ENOMEM; - goto fail_l2; + goto fail_llc; } - ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1); + ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1); if (ret) - goto fail_l2; + goto fail_llc; - pr_info("perf: AMD L2I counters detected\n"); + pr_info("perf: AMD LLC counters detected\n"); ret = 0; } @@ -529,7 +589,7 @@ static int __init amd_uncore_init(void) if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, "perf/x86/amd/uncore:prepare", amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead)) - goto fail_l2; + goto fail_llc; if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, "perf/x86/amd/uncore:starting", @@ -546,11 +606,11 @@ fail_start: cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); fail_prep: cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); -fail_l2: +fail_llc: if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) perf_pmu_unregister(&amd_nb_pmu); - if (amd_uncore_l2) - free_percpu(amd_uncore_l2); + if (amd_uncore_llc) + free_percpu(amd_uncore_llc); fail_nb: if (amd_uncore_nb) free_percpu(amd_uncore_nb); diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 0c5fbc68e82d..eff8e36aaf72 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -195,7 +195,7 @@ static inline void native_apic_msr_write(u32 reg, u32 v) static inline void native_apic_msr_eoi_write(u32 reg, u32 v) { - wrmsr_notrace(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0); + __wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0); } static inline u32 native_apic_msr_read(u32 reg) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index eafee3161d1c..d45ab4b53396 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -100,7 +100,7 @@ #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ @@ -288,6 +288,7 @@ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ @@ -320,5 +321,4 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ - #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index ced283ac79df..af95c47d5c9e 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h @@ -59,6 +59,17 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) } #define div_u64_rem div_u64_rem +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + u32 high, low; + + asm ("mull %[b]" : "=a" (low), "=d" (high) + : [a] "a" (a), [b] "rm" (b) ); + + return low | ((u64)high) << 32; +} +#define mul_u32_u32 mul_u32_u32 + #else # include <asm-generic/div64.h> #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index ec23d8e1297c..67313f3a9874 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -30,8 +30,6 @@ extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, int checktype); extern void update_e820(void); extern void e820_setup_gap(void); -extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, - unsigned long start_addr, unsigned long long end_addr); struct setup_data; extern void parse_e820_ext(u64 phys_addr, u32 data_len); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index d4a684997497..255645f60ca2 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -87,6 +87,16 @@ extern void fpstate_init_soft(struct swregs_state *soft); #else static inline void fpstate_init_soft(struct swregs_state *soft) {} #endif + +static inline void fpstate_init_xstate(struct xregs_state *xsave) +{ + /* + * XRSTORS requires these bits set in xcomp_bv, or it will + * trigger #GP: + */ + xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask; +} + static inline void fpstate_init_fxstate(struct fxregs_state *fx) { fx->cwd = 0x37f; diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 49da9f497b90..fe04491130ae 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -27,7 +27,6 @@ extern void intel_mid_pwr_power_off(void); extern int intel_mid_pwr_get_lss_id(struct pci_dev *pdev); extern int get_gpio_by_name(const char *name); -extern void intel_scu_device_register(struct platform_device *pdev); extern int __init sfi_parse_mrtc(struct sfi_table_header *table); extern int __init sfi_parse_mtmr(struct sfi_table_header *table); extern int sfi_mrtc_num; @@ -42,10 +41,8 @@ struct devs_id { char name[SFI_NAME_LEN + 1]; u8 type; u8 delay; + u8 msic; void *(*get_platform_data)(void *info); - /* Custom handler for devices */ - void (*device_handler)(struct sfi_device_table_entry *pentry, - struct devs_id *dev); }; #define sfi_device(i) \ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index d34bd370074b..7afb0e2f07f4 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -164,6 +164,17 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt +/* + * The default ioremap() behavior is non-cached; if you need something + * else, you probably want one of the following. + */ +extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); +#define ioremap_uc ioremap_uc + +extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); + /** * ioremap - map bus memory into CPU space * @offset: bus address of the memory @@ -178,17 +189,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) * If the area you are trying to map is a PCI BAR you should have a * look at pci_iomap(). */ -extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); -#define ioremap_uc ioremap_uc - -extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, - unsigned long prot_val); - -/* - * The default ioremap() behavior is non-cached: - */ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) { return ioremap_nocache(offset, size); @@ -207,18 +207,42 @@ extern void set_iounmap_nonlazy(void); */ #define xlate_dev_kmem_ptr(p) p +/** + * memset_io Set a range of I/O memory to a constant value + * @addr: The beginning of the I/O-memory range to set + * @val: The value to set the memory to + * @count: The number of bytes to set + * + * Set a range of I/O memory to a given value. + */ static inline void memset_io(volatile void __iomem *addr, unsigned char val, size_t count) { memset((void __force *)addr, val, count); } +/** + * memcpy_fromio Copy a block of data from I/O memory + * @dst: The (RAM) destination for the copy + * @src: The (I/O memory) source for the data + * @count: The number of bytes to copy + * + * Copy a block of data from I/O memory. + */ static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count) { memcpy(dst, (const void __force *)src, count); } +/** + * memcpy_toio Copy a block of data into I/O memory + * @dst: The (I/O memory) destination for the copy + * @src: The (RAM) source for the data + * @count: The number of bytes to copy + * + * Copy a block of data to I/O memory. + */ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) { diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 5132f2a6c0a2..e63873683d4a 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -97,10 +97,6 @@ #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ -/* Software defined banks */ -#define MCE_EXTENDED_BANK 128 -#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0) - #define MCE_LOG_LEN 32 #define MCE_LOG_SIGNATURE "MACHINECHECK" @@ -193,6 +189,15 @@ extern struct mce_vendor_flags mce_flags; extern struct mca_config mca_cfg; extern struct mca_msr_regs msr_ops; + +enum mce_notifier_prios { + MCE_PRIO_SRAO = INT_MAX, + MCE_PRIO_EXTLOG = INT_MAX - 1, + MCE_PRIO_NFIT = INT_MAX - 2, + MCE_PRIO_EDAC = INT_MAX - 3, + MCE_PRIO_LOWEST = 0, +}; + extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); @@ -306,8 +311,6 @@ extern void (*deferred_error_int_vector)(void); void intel_init_thermal(struct cpuinfo_x86 *c); -void mce_log_therm_throt_event(__u64 status); - /* Interrupt Handler for core thermal thresholds */ extern int (*platform_thermal_notify)(__u64 msr_val); @@ -362,12 +365,13 @@ struct smca_hwid { unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ u32 hwid_mcatype; /* (hwid,mcatype) tuple */ u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */ + u8 count; /* Number of instances. */ }; struct smca_bank { struct smca_hwid *hwid; - /* Instance ID */ - u32 id; + u32 id; /* Value of MCA_IPID[InstanceId]. */ + u8 sysfs_id; /* Value used for sysfs name. */ }; extern struct smca_bank smca_banks[MAX_NR_BANKS]; diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 2266f864b747..daadeeea00b1 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -7,18 +7,17 @@ #define native_rdmsr(msr, val1, val2) \ do { \ - u64 __val = native_read_msr((msr)); \ + u64 __val = __rdmsr((msr)); \ (void)((val1) = (u32)__val); \ (void)((val2) = (u32)(__val >> 32)); \ } while (0) #define native_wrmsr(msr, low, high) \ - native_write_msr(msr, low, high) + __wrmsr(msr, low, high) #define native_wrmsrl(msr, val) \ - native_write_msr((msr), \ - (u32)((u64)(val)), \ - (u32)((u64)(val) >> 32)) + __wrmsr((msr), (u32)((u64)(val)), \ + (u32)((u64)(val) >> 32)) struct ucode_patch { struct list_head plist; diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 3e3e20be829a..3d57009e168b 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -54,6 +54,4 @@ static inline int __init save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } void reload_ucode_amd(void) {} #endif - -extern bool check_current_patch_level(u32 *rev, bool early); #endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index db0b90c3b03e..898dba2e2e2c 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -80,7 +80,14 @@ static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {} static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {} #endif -static inline unsigned long long native_read_msr(unsigned int msr) +/* + * __rdmsr() and __wrmsr() are the two primitives which are the bare minimum MSR + * accessors and should not have any tracing or other functionality piggybacking + * on them - those are *purely* for accessing MSRs and nothing more. So don't even + * think of extending them - you will be slapped with a stinking trout or a frozen + * shark will reach you, wherever you are! You've been warned. + */ +static inline unsigned long long notrace __rdmsr(unsigned int msr) { DECLARE_ARGS(val, low, high); @@ -88,11 +95,30 @@ static inline unsigned long long native_read_msr(unsigned int msr) "2:\n" _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe) : EAX_EDX_RET(val, low, high) : "c" (msr)); - if (msr_tracepoint_active(__tracepoint_read_msr)) - do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0); + return EAX_EDX_VAL(val, low, high); } +static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high) +{ + asm volatile("1: wrmsr\n" + "2:\n" + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) + : : "c" (msr), "a"(low), "d" (high) : "memory"); +} + +static inline unsigned long long native_read_msr(unsigned int msr) +{ + unsigned long long val; + + val = __rdmsr(msr); + + if (msr_tracepoint_active(__tracepoint_read_msr)) + do_trace_read_msr(msr, val, 0); + + return val; +} + static inline unsigned long long native_read_msr_safe(unsigned int msr, int *err) { @@ -116,29 +142,14 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, /* Can be uninlined because referenced by paravirt */ static inline void notrace -__native_write_msr_notrace(unsigned int msr, u32 low, u32 high) -{ - asm volatile("1: wrmsr\n" - "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) - : : "c" (msr), "a"(low), "d" (high) : "memory"); -} - -/* Can be uninlined because referenced by paravirt */ -static inline void notrace native_write_msr(unsigned int msr, u32 low, u32 high) { - __native_write_msr_notrace(msr, low, high); + __wrmsr(msr, low, high); + if (msr_tracepoint_active(__tracepoint_write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), 0); } -static inline void -wrmsr_notrace(unsigned int msr, u32 low, u32 high) -{ - __native_write_msr_notrace(msr, low, high); -} - /* Can be uninlined because referenced by paravirt */ static inline int notrace native_write_msr_safe(unsigned int msr, u32 low, u32 high) diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index b6c0b404898a..fbc73360aea0 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -27,6 +27,7 @@ struct vm_area_struct; extern pgd_t swapper_pg_dir[1024]; extern pgd_t initial_page_table[1024]; +extern pmd_t initial_pg_pmd[]; static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } @@ -75,4 +76,35 @@ do { \ #define kern_addr_valid(kaddr) (0) #endif +/* + * This is how much memory in addition to the memory covered up to + * and including _end we need mapped initially. + * We need: + * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) + * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) + * + * Modulo rounding, each megabyte assigned here requires a kilobyte of + * memory, which is currently unreclaimed. + * + * This should be a multiple of a page. + * + * KERNEL_IMAGE_SIZE should be greater than pa(_end) + * and small than max_low_pfn, otherwise will waste some page table entries + */ +#if PTRS_PER_PMD > 1 +#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) +#else +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) +#endif + +/* + * Number of possible pages in the lowmem region. + * + * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a + * gas warning about overflowing shift count when gas has been compiled + * with only a host target support using a 32-bit type for internal + * representation. + */ +#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) + #endif /* _ASM_X86_PGTABLE_32_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 921bea7a2708..6d391909e864 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -23,9 +23,6 @@ /* How long a lock should spin before we consider blocking */ #define SPIN_THRESHOLD (1 << 15) -extern struct static_key paravirt_ticketlocks_enabled; -static __always_inline bool static_key_false(struct static_key *key); - #include <asm/qspinlock.h> /* diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 5b7e43eff139..04eb4c1c0a90 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -529,18 +529,19 @@ static void lapic_timer_broadcast(const struct cpumask *mask) * The local apic timer can be used for any function which is CPU local. */ static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | - CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP - | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_state_shutdown = lapic_timer_shutdown, - .set_state_periodic = lapic_timer_set_periodic, - .set_state_oneshot = lapic_timer_set_oneshot, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, + .name = "lapic", + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP + | CLOCK_EVT_FEAT_DUMMY, + .shift = 32, + .set_state_shutdown = lapic_timer_shutdown, + .set_state_periodic = lapic_timer_set_periodic, + .set_state_oneshot = lapic_timer_set_oneshot, + .set_state_oneshot_stopped = lapic_timer_shutdown, + .set_next_event = lapic_next_event, + .broadcast = lapic_timer_broadcast, + .rating = 100, + .irq = -1, }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); @@ -1864,14 +1865,14 @@ static void __smp_spurious_interrupt(u8 vector) "should never happen.\n", vector, smp_processor_id()); } -__visible void smp_spurious_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) { entering_irq(); __smp_spurious_interrupt(~regs->orig_ax); exiting_irq(); } -__visible void smp_trace_spurious_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs) { u8 vector = ~regs->orig_ax; @@ -1922,14 +1923,14 @@ static void __smp_error_interrupt(struct pt_regs *regs) } -__visible void smp_error_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs) { entering_irq(); __smp_error_interrupt(regs); exiting_irq(); } -__visible void smp_trace_error_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs) { entering_irq(); trace_error_apic_entry(ERROR_APIC_VECTOR); @@ -2028,8 +2029,8 @@ void disconnect_bsp_APIC(int virt_wire_setup) /* * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated * contiguously, it equals to current allocated max logical CPU ID plus 1. - * All allocated CPU ID should be in [0, nr_logical_cpuidi), so the maximum of - * nr_logical_cpuids is nr_cpu_ids. + * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, + * so the maximum of nr_logical_cpuids is nr_cpu_ids. * * NOTE: Reserve 0 for BSP. */ @@ -2094,7 +2095,7 @@ int __generic_processor_info(int apicid, int version, bool enabled) * Since fixing handling of boot_cpu_physical_apicid requires * another discussion and tests on each platform, we leave it * for now and here we use read_apic_id() directly in this - * function, generic_processor_info(). + * function, __generic_processor_info(). */ if (disabled_cpu_apicid != BAD_APICID && disabled_cpu_apicid != read_apic_id() && diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 52f352b063fd..347bb9f65737 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1107,12 +1107,12 @@ int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, struct irq_alloc_info *info) ioapic = mp_find_ioapic(gsi); if (ioapic < 0) - return -1; + return -ENODEV; pin = mp_find_ioapic_pin(ioapic, gsi); idx = find_irq_entry(ioapic, pin, mp_INT); if ((flags & IOAPIC_MAP_CHECK) && idx < 0) - return -1; + return -ENODEV; return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, info); } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 5d30c5e42bb1..f3557a1eb562 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -559,7 +559,7 @@ void send_cleanup_vector(struct irq_cfg *cfg) __send_cleanup_vector(data); } -asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) +asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 35690a168cf7..97ea712fc72f 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -56,6 +56,7 @@ static struct { unsigned int socketid_shift; /* aka pnode_shift for UV1/2/3 */ unsigned int pnode_mask; unsigned int gpa_shift; + unsigned int gnode_shift; } uv_cpuid; int uv_min_hub_revision_id; @@ -133,6 +134,7 @@ static int __init early_get_pnodeid(void) break; case UV4_HUB_PART_NUMBER: uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1; + uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */ break; } @@ -1074,8 +1076,10 @@ void __init uv_init_hub_info(struct uv_hub_info_s *hub_info) (1UL << uv_cpuid.gpa_shift) - 1; node_id.v = uv_read_local_mmr(UVH_NODE_ID); + uv_cpuid.gnode_shift = max_t(unsigned int, + uv_cpuid.gnode_shift, mn.n_val); hub_info->gnode_extra = - (node_id.s.node_id & ~((1 << mn.n_val) - 1)) >> 1; + (node_id.s.node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1; hub_info->gnode_upper = ((unsigned long)hub_info->gnode_extra << mn.m_val); @@ -1172,19 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) index, _min_socket, _max_socket, _min_pnode, _max_pnode); } -static void __init decode_uv_systab(void) +static int __init decode_uv_systab(void) { struct uv_systab *st; int i; + if (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE) + return 0; /* No extended UVsystab required */ + st = uv_systab; - if ((!st || st->revision < UV_SYSTAB_VERSION_UV4) && !is_uv4_hub()) - return; - if (st->revision != UV_SYSTAB_VERSION_UV4_LATEST) { - pr_crit( + if ((!st) || (st->revision < UV_SYSTAB_VERSION_UV4_LATEST)) { + int rev = st ? st->revision : 0; + + pr_err( "UV: BIOS UVsystab version(%x) mismatch, expecting(%x)\n", - st->revision, UV_SYSTAB_VERSION_UV4_LATEST); - BUG(); + rev, UV_SYSTAB_VERSION_UV4_LATEST); + pr_err( + "UV: Cannot support UV operations, switching to generic PC\n"); + uv_system_type = UV_NONE; + return -EINVAL; } for (i = 0; st->entry[i].type != UV_SYSTAB_TYPE_UNUSED; i++) { @@ -1205,6 +1215,7 @@ static void __init decode_uv_systab(void) break; } } + return 0; } /* @@ -1373,7 +1384,8 @@ void __init uv_system_init(void) map_low_mmrs(); uv_bios_init(); /* get uv_systab for decoding */ - decode_uv_systab(); + if (decode_uv_systab() < 0) + return; /* UVsystab problem, abort UV init */ build_socket_tables(); build_uv_gr_table(); uv_init_hub_info(&hub_info); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1d3167269a67..80e657e89eed 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -541,8 +541,10 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (!check_tsc_unstable()) - set_sched_clock_stable(); + if (check_tsc_unstable()) + clear_sched_clock_stable(); + } else { + clear_sched_clock_stable(); } /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 1661d8ec9280..2c234a6d94c4 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -1,5 +1,5 @@ -#include <linux/bitops.h> -#include <linux/kernel.h> + +#include <linux/sched.h> #include <asm/cpufeature.h> #include <asm/e820.h> @@ -104,6 +104,8 @@ static void early_init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif + + clear_sched_clock_stable(); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9bab7a8a4293..f74e84ea8557 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -83,6 +83,7 @@ static void default_init(struct cpuinfo_x86 *c) strcpy(c->x86_model_id, "386"); } #endif + clear_sched_clock_stable(); } static const struct cpu_dev default_cpu = { @@ -655,6 +656,16 @@ void cpu_detect(struct cpuinfo_x86 *c) } } +static void apply_forced_caps(struct cpuinfo_x86 *c) +{ + int i; + + for (i = 0; i < NCAPINTS; i++) { + c->x86_capability[i] &= ~cpu_caps_cleared[i]; + c->x86_capability[i] |= cpu_caps_set[i]; + } +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -748,6 +759,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); init_scattered_cpuid_features(c); + + /* + * Clear/Set all flags overridden by options, after probe. + * This needs to happen each time we re-probe, which may happen + * several times during CPU initialization. + */ + apply_forced_caps(c); } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) @@ -801,14 +819,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) memset(&c->x86_capability, 0, sizeof c->x86_capability); c->extended_cpuid_level = 0; - if (!have_cpuid_p()) - identify_cpu_without_cpuid(c); - /* cyrix could have cpuid enabled via c_identify()*/ if (have_cpuid_p()) { cpu_detect(c); get_cpu_vendor(c); get_cpu_cap(c); + setup_force_cpu_cap(X86_FEATURE_CPUID); if (this_cpu->c_early_init) this_cpu->c_early_init(c); @@ -818,6 +834,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_bsp_init) this_cpu->c_bsp_init(c); + } else { + identify_cpu_without_cpuid(c); + setup_clear_cpu_cap(X86_FEATURE_CPUID); } setup_force_cpu_cap(X86_FEATURE_ALWAYS); @@ -1034,10 +1053,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); /* Clear/Set all flags overridden by options, after probe */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } + apply_forced_caps(c); #ifdef CONFIG_X86_64 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); @@ -1055,6 +1071,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) */ if (this_cpu->c_init) this_cpu->c_init(c); + else + clear_sched_clock_stable(); /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); @@ -1096,10 +1114,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) * Clear/Set all flags overridden by options, need do it * before following smp all cpus cap AND. */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } + apply_forced_caps(c); /* * On SMP, boot_cpu_data holds the common feature set between diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index bd9dcd6b712d..47416f959a48 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -9,6 +9,7 @@ #include <asm/pci-direct.h> #include <asm/tsc.h> #include <asm/cpufeature.h> +#include <linux/sched.h> #include "cpu.h" @@ -183,6 +184,7 @@ static void early_init_cyrix(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); break; } + clear_sched_clock_stable(); } static void init_cyrix(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 203f860d2ab3..026c728d6ba7 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -119,8 +119,10 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (!check_tsc_unstable()) - set_sched_clock_stable(); + if (check_tsc_unstable()) + clear_sched_clock_stable(); + } else { + clear_sched_clock_stable(); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 83f1a98d37db..2eee85379689 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -52,8 +52,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) if (severity >= GHES_SEV_RECOVERABLE) m.status |= MCI_STATUS_UC; - if (severity >= GHES_SEV_PANIC) + + if (severity >= GHES_SEV_PANIC) { m.status |= MCI_STATUS_PCC; + m.tsc = rdtsc(); + } m.addr = mem_err->physical_addr; mce_log(&m); diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c index 93d824ec3120..1e5a50c11d3c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c @@ -72,7 +72,7 @@ struct llist_node *mce_gen_pool_prepare_records(void) return new_head.first; } -void mce_gen_pool_process(void) +void mce_gen_pool_process(struct work_struct *__unused) { struct llist_node *head; struct mce_evt_llist *node, *tmp; diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 517619ea6498..99165b206df3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -152,7 +152,6 @@ static void raise_mce(struct mce *m) if (context == MCJ_CTX_RANDOM) return; -#ifdef CONFIG_X86_LOCAL_APIC if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) { unsigned long start; int cpu; @@ -192,9 +191,7 @@ static void raise_mce(struct mce *m) raise_local(); put_cpu(); put_online_cpus(); - } else -#endif - { + } else { preempt_disable(); raise_local(); preempt_enable(); diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index cd74a3f00aea..903043e6a62b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -31,7 +31,7 @@ struct mce_evt_llist { struct mce mce; }; -void mce_gen_pool_process(void); +void mce_gen_pool_process(struct work_struct *__unused); bool mce_gen_pool_empty(void); int mce_gen_pool_add(struct mce *mce); int mce_gen_pool_init(void); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 537c6647d84c..8e9725c607ea 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -128,7 +128,6 @@ void mce_setup(struct mce *m) { memset(m, 0, sizeof(struct mce)); m->cpu = m->extcpu = smp_processor_id(); - m->tsc = rdtsc(); /* We hope get_seconds stays lockless */ m->time = get_seconds(); m->cpuvendor = boot_cpu_data.x86_vendor; @@ -217,9 +216,7 @@ void mce_register_decode_chain(struct notifier_block *nb) { atomic_inc(&num_notifiers); - /* Ensure SRAO notifier has the highest priority in the decode chain. */ - if (nb != &mce_srao_nb && nb->priority == INT_MAX) - nb->priority -= 1; + WARN_ON(nb->priority > MCE_PRIO_LOWEST && nb->priority < MCE_PRIO_EDAC); atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); } @@ -583,7 +580,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, } static struct notifier_block mce_srao_nb = { .notifier_call = srao_decode_notifier, - .priority = INT_MAX, + .priority = MCE_PRIO_SRAO, }; static int mce_default_notifier(struct notifier_block *nb, unsigned long val, @@ -609,7 +606,7 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val, static struct notifier_block mce_default_nb = { .notifier_call = mce_default_notifier, /* lowest prio, we want it to run last. */ - .priority = 0, + .priority = MCE_PRIO_LOWEST, }; /* @@ -710,14 +707,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) mce_gather_info(&m, NULL); - /* - * m.tsc was set in mce_setup(). Clear it if not requested. - * - * FIXME: Propagate @flags to mce_gather_info/mce_setup() to avoid - * that dance. - */ - if (!(flags & MCP_TIMESTAMP)) - m.tsc = 0; + if (flags & MCP_TIMESTAMP) + m.tsc = rdtsc(); for (i = 0; i < mca_cfg.banks; i++) { if (!mce_banks[i].ctl || !test_bit(i, *b)) @@ -1156,6 +1147,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) goto out; mce_gather_info(&m, regs); + m.tsc = rdtsc(); final = this_cpu_ptr(&mces_seen); *final = m; @@ -1322,41 +1314,6 @@ int memory_failure(unsigned long pfn, int vector, int flags) #endif /* - * Action optional processing happens here (picking up - * from the list of faulting pages that do_machine_check() - * placed into the genpool). - */ -static void mce_process_work(struct work_struct *dummy) -{ - mce_gen_pool_process(); -} - -#ifdef CONFIG_X86_MCE_INTEL -/*** - * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog - * @cpu: The CPU on which the event occurred. - * @status: Event status information - * - * This function should be called by the thermal interrupt after the - * event has been processed and the decision was made to log the event - * further. - * - * The status parameter will be saved to the 'status' field of 'struct mce' - * and historically has been the register value of the - * MSR_IA32_THERMAL_STATUS (Intel) msr. - */ -void mce_log_therm_throt_event(__u64 status) -{ - struct mce m; - - mce_setup(&m); - m.bank = MCE_THERMAL_BANK; - m.status = status; - mce_log(&m); -} -#endif /* CONFIG_X86_MCE_INTEL */ - -/* * Periodic polling timer for "silent" machine check errors. If the * poller finds an MCE, poll 2x faster. When the poller finds no more * errors, poll 2x slower (up to check_interval seconds). @@ -2189,7 +2146,7 @@ int __init mcheck_init(void) mce_register_decode_chain(&mce_default_nb); mcheck_vendor_init_severity(); - INIT_WORK(&mce_work, mce_process_work); + INIT_WORK(&mce_work, mce_gen_pool_process); init_irq_work(&mce_irq_work, mce_irq_work_cb); return 0; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index a5fd137417a2..524cc5780a77 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -192,6 +192,7 @@ static void get_smca_bank_info(unsigned int bank) smca_banks[bank].hwid = s_hwid; smca_banks[bank].id = instance_id; + smca_banks[bank].sysfs_id = s_hwid->count++; break; } } @@ -777,7 +778,8 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) mce_setup(&m); m.status = status; - m.bank = bank; + m.bank = bank; + m.tsc = rdtsc(); if (threshold_err) m.misc = misc; @@ -814,14 +816,14 @@ static inline void __smp_deferred_error_interrupt(void) deferred_error_int_vector(); } -asmlinkage __visible void smp_deferred_error_interrupt(void) +asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void) { entering_irq(); __smp_deferred_error_interrupt(); exiting_ack_irq(); } -asmlinkage __visible void smp_trace_deferred_error_interrupt(void) +asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void) { entering_irq(); trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); @@ -1064,9 +1066,12 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return NULL; } + if (smca_banks[bank].hwid->count == 1) + return smca_get_name(bank_type); + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "%s_%x", smca_get_name(bank_type), - smca_banks[bank].id); + smca_banks[bank].sysfs_id); return buf_mcatype; } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 465aca8be009..d7cc190ae457 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -6,7 +6,7 @@ * * Maintains a counter in /sys that keeps track of the number of thermal * events, such that the user knows how bad the thermal problem might be - * (since the logging to syslog and mcelog is rate limited). + * (since the logging to syslog is rate limited). * * Author: Dmitriy Zavin (dmitriyz@google.com) * @@ -141,13 +141,8 @@ static struct attribute_group thermal_attr_group = { * IRQ has been acknowledged. * * It will take care of rate limiting and printing messages to the syslog. - * - * Returns: 0 : Event should NOT be further logged, i.e. still in - * "timeout" from previous log message. - * 1 : Event should be logged further, and a message has been - * printed to the syslog. */ -static int therm_throt_process(bool new_event, int event, int level) +static void therm_throt_process(bool new_event, int event, int level) { struct _thermal_state *state; unsigned int this_cpu = smp_processor_id(); @@ -162,16 +157,16 @@ static int therm_throt_process(bool new_event, int event, int level) else if (event == POWER_LIMIT_EVENT) state = &pstate->core_power_limit; else - return 0; + return; } else if (level == PACKAGE_LEVEL) { if (event == THERMAL_THROTTLING_EVENT) state = &pstate->package_throttle; else if (event == POWER_LIMIT_EVENT) state = &pstate->package_power_limit; else - return 0; + return; } else - return 0; + return; old_event = state->new_event; state->new_event = new_event; @@ -181,7 +176,7 @@ static int therm_throt_process(bool new_event, int event, int level) if (time_before64(now, state->next_check) && state->count != state->last_count) - return 0; + return; state->next_check = now + CHECK_INTERVAL; state->last_count = state->count; @@ -193,16 +188,14 @@ static int therm_throt_process(bool new_event, int event, int level) this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); - return 1; + return; } if (old_event) { if (event == THERMAL_THROTTLING_EVENT) pr_info("CPU%d: %s temperature/speed normal\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package"); - return 1; + return; } - - return 0; } static int thresh_event_valid(int level, int event) @@ -365,10 +358,9 @@ static void intel_thermal_interrupt(void) /* Check for violation of core thermal thresholds*/ notify_thresholds(msr_val); - if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, - THERMAL_THROTTLING_EVENT, - CORE_LEVEL) != 0) - mce_log_therm_throt_event(msr_val); + therm_throt_process(msr_val & THERM_STATUS_PROCHOT, + THERMAL_THROTTLING_EVENT, + CORE_LEVEL); if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable) therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, @@ -404,14 +396,16 @@ static inline void __smp_thermal_interrupt(void) smp_thermal_vector(); } -asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs) +asmlinkage __visible void __irq_entry +smp_thermal_interrupt(struct pt_regs *regs) { entering_irq(); __smp_thermal_interrupt(); exiting_ack_irq(); } -asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs) +asmlinkage __visible void __irq_entry +smp_trace_thermal_interrupt(struct pt_regs *regs) { entering_irq(); trace_thermal_apic_entry(THERMAL_APIC_VECTOR); diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index 9beb092d68a5..bb0e75eed10a 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -23,14 +23,14 @@ static inline void __smp_threshold_interrupt(void) mce_threshold_vector(); } -asmlinkage __visible void smp_threshold_interrupt(void) +asmlinkage __visible void __irq_entry smp_threshold_interrupt(void) { entering_irq(); __smp_threshold_interrupt(); exiting_ack_irq(); } -asmlinkage __visible void smp_trace_threshold_interrupt(void) +asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void) { entering_irq(); trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 079e81733a58..73082365ed1c 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -42,16 +42,19 @@ static struct equiv_cpu_entry *equiv_cpu_table; /* * This points to the current valid container of microcode patches which we will - * save from the initrd/builtin before jettisoning its contents. + * save from the initrd/builtin before jettisoning its contents. @mc is the + * microcode patch we found to match. */ -struct container { - u8 *data; - size_t size; -} cont; +struct cont_desc { + struct microcode_amd *mc; + u32 cpuid_1_eax; + u32 psize; + u8 *data; + size_t size; +}; static u32 ucode_new_rev; static u8 amd_ucode_patch[PATCH_MAX_SIZE]; -static u16 this_equiv_id; /* * Microcode patch container file is prepended to the initrd in cpio @@ -60,57 +63,13 @@ static u16 this_equiv_id; static const char ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; -static size_t compute_container_size(u8 *data, u32 total_size) +static u16 find_equiv_id(struct equiv_cpu_entry *equiv_table, u32 sig) { - size_t size = 0; - u32 *header = (u32 *)data; - - if (header[0] != UCODE_MAGIC || - header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ - header[2] == 0) /* size */ - return size; - - size = header[2] + CONTAINER_HDR_SZ; - total_size -= size; - data += size; - - while (total_size) { - u16 patch_size; - - header = (u32 *)data; - - if (header[0] != UCODE_UCODE_TYPE) - break; - - /* - * Sanity-check patch size. - */ - patch_size = header[1]; - if (patch_size > PATCH_MAX_SIZE) - break; - - size += patch_size + SECTION_HDR_SIZE; - data += patch_size + SECTION_HDR_SIZE; - total_size -= patch_size + SECTION_HDR_SIZE; + for (; equiv_table && equiv_table->installed_cpu; equiv_table++) { + if (sig == equiv_table->installed_cpu) + return equiv_table->equiv_cpu; } - return size; -} - -static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table, - unsigned int sig) -{ - int i = 0; - - if (!equiv_cpu_table) - return 0; - - while (equiv_cpu_table[i].installed_cpu != 0) { - if (sig == equiv_cpu_table[i].installed_cpu) - return equiv_cpu_table[i].equiv_cpu; - - i++; - } return 0; } @@ -118,91 +77,109 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table, * This scans the ucode blob for the proper container as we can have multiple * containers glued together. Returns the equivalence ID from the equivalence * table or 0 if none found. + * Returns the amount of bytes consumed while scanning. @desc contains all the + * data we're going to use in later stages of the application. */ -static u16 -find_proper_container(u8 *ucode, size_t size, struct container *ret_cont) +static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc) { - struct container ret = { NULL, 0 }; - u32 eax, ebx, ecx, edx; struct equiv_cpu_entry *eq; - int offset, left; - u16 eq_id = 0; - u32 *header; - u8 *data; + ssize_t orig_size = size; + u32 *hdr = (u32 *)ucode; + u16 eq_id; + u8 *buf; - data = ucode; - left = size; - header = (u32 *)data; + /* Am I looking at an equivalence table header? */ + if (hdr[0] != UCODE_MAGIC || + hdr[1] != UCODE_EQUIV_CPU_TABLE_TYPE || + hdr[2] == 0) + return CONTAINER_HDR_SZ; + buf = ucode; - /* find equiv cpu table */ - if (header[0] != UCODE_MAGIC || - header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ - header[2] == 0) /* size */ - return eq_id; + eq = (struct equiv_cpu_entry *)(buf + CONTAINER_HDR_SZ); - eax = 0x00000001; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); + /* Find the equivalence ID of our CPU in this table: */ + eq_id = find_equiv_id(eq, desc->cpuid_1_eax); - while (left > 0) { - eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ); + buf += hdr[2] + CONTAINER_HDR_SZ; + size -= hdr[2] + CONTAINER_HDR_SZ; + + /* + * Scan through the rest of the container to find where it ends. We do + * some basic sanity-checking too. + */ + while (size > 0) { + struct microcode_amd *mc; + u32 patch_size; - ret.data = data; + hdr = (u32 *)buf; - /* Advance past the container header */ - offset = header[2] + CONTAINER_HDR_SZ; - data += offset; - left -= offset; + if (hdr[0] != UCODE_UCODE_TYPE) + break; - eq_id = find_equiv_id(eq, eax); - if (eq_id) { - ret.size = compute_container_size(ret.data, left + offset); + /* Sanity-check patch size. */ + patch_size = hdr[1]; + if (patch_size > PATCH_MAX_SIZE) + break; - /* - * truncate how much we need to iterate over in the - * ucode update loop below - */ - left = ret.size - offset; + /* Skip patch section header: */ + buf += SECTION_HDR_SIZE; + size -= SECTION_HDR_SIZE; - *ret_cont = ret; - return eq_id; + mc = (struct microcode_amd *)buf; + if (eq_id == mc->hdr.processor_rev_id) { + desc->psize = patch_size; + desc->mc = mc; } - /* - * support multiple container files appended together. if this - * one does not have a matching equivalent cpu entry, we fast - * forward to the next container file. - */ - while (left > 0) { - header = (u32 *)data; - - if (header[0] == UCODE_MAGIC && - header[1] == UCODE_EQUIV_CPU_TABLE_TYPE) - break; - - offset = header[1] + SECTION_HDR_SIZE; - data += offset; - left -= offset; - } + buf += patch_size; + size -= patch_size; + } - /* mark where the next microcode container file starts */ - offset = data - (u8 *)ucode; - ucode = data; + /* + * If we have found a patch (desc->mc), it means we're looking at the + * container which has a patch for this CPU so return 0 to mean, @ucode + * already points to the proper container. Otherwise, we return the size + * we scanned so that we can advance to the next container in the + * buffer. + */ + if (desc->mc) { + desc->data = ucode; + desc->size = orig_size - size; + + return 0; } - return eq_id; + return orig_size - size; } -static int __apply_microcode_amd(struct microcode_amd *mc_amd) +/* + * Scan the ucode blob for the proper container as we can have multiple + * containers glued together. + */ +static void scan_containers(u8 *ucode, size_t size, struct cont_desc *desc) +{ + ssize_t rem = size; + + while (rem >= 0) { + ssize_t s = parse_container(ucode, rem, desc); + if (!s) + return; + + ucode += s; + rem -= s; + } +} + +static int __apply_microcode_amd(struct microcode_amd *mc) { u32 rev, dummy; - native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); + native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc->hdr.data_code); /* verify patch application was successful */ native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - if (rev != mc_amd->hdr.patch_id) + if (rev != mc->hdr.patch_id) return -1; return 0; @@ -217,17 +194,16 @@ static int __apply_microcode_amd(struct microcode_amd *mc_amd) * load_microcode_amd() to save equivalent cpu table and microcode patches in * kernel heap memory. * - * Returns true if container found (sets @ret_cont), false otherwise. + * Returns true if container found (sets @desc), false otherwise. */ -static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch, - struct container *ret_cont) +static bool +apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_patch) { + struct cont_desc desc = { 0 }; u8 (*patch)[PATCH_MAX_SIZE]; - u32 rev, *header, *new_rev; - struct container ret; - int offset, left; - u16 eq_id = 0; - u8 *data; + struct microcode_amd *mc; + u32 rev, dummy, *new_rev; + bool ret = false; #ifdef CONFIG_X86_32 new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); @@ -237,50 +213,27 @@ static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch, patch = &amd_ucode_patch; #endif - if (check_current_patch_level(&rev, true)) - return false; - - eq_id = find_proper_container(ucode, size, &ret); - if (!eq_id) - return false; - - this_equiv_id = eq_id; - header = (u32 *)ret.data; - - /* We're pointing to an equiv table, skip over it. */ - data = ret.data + header[2] + CONTAINER_HDR_SZ; - left = ret.size - (header[2] + CONTAINER_HDR_SZ); - - while (left > 0) { - struct microcode_amd *mc; - - header = (u32 *)data; - if (header[0] != UCODE_UCODE_TYPE || /* type */ - header[1] == 0) /* size */ - break; + desc.cpuid_1_eax = cpuid_1_eax; - mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE); + scan_containers(ucode, size, &desc); - if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) { + mc = desc.mc; + if (!mc) + return ret; - if (!__apply_microcode_amd(mc)) { - rev = mc->hdr.patch_id; - *new_rev = rev; + native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + if (rev >= mc->hdr.patch_id) + return ret; - if (save_patch) - memcpy(patch, mc, min_t(u32, header[1], PATCH_MAX_SIZE)); - } - } + if (!__apply_microcode_amd(mc)) { + *new_rev = mc->hdr.patch_id; + ret = true; - offset = header[1] + SECTION_HDR_SIZE; - data += offset; - left -= offset; + if (save_patch) + memcpy(patch, mc, min_t(u32, desc.psize, PATCH_MAX_SIZE)); } - if (ret_cont) - *ret_cont = ret; - - return true; + return ret; } static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) @@ -298,10 +251,9 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) #endif } -void __init load_ucode_amd_bsp(unsigned int family) +void __load_ucode_amd(unsigned int cpuid_1_eax, struct cpio_data *ret) { struct ucode_cpu_info *uci; - u32 eax, ebx, ecx, edx; struct cpio_data cp; const char *path; bool use_pa; @@ -316,184 +268,95 @@ void __init load_ucode_amd_bsp(unsigned int family) use_pa = false; } - if (!get_builtin_microcode(&cp, family)) + if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax)) && !initrd_gone) cp = find_microcode_in_initrd(path, use_pa); - if (!(cp.data && cp.size)) - return; - - /* Get BSP's CPUID.EAX(1), needed in load_microcode_amd() */ - eax = 1; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - uci->cpu_sig.sig = eax; + /* Needed in load_microcode_amd() */ + uci->cpu_sig.sig = cpuid_1_eax; - apply_microcode_early_amd(cp.data, cp.size, true, NULL); + *ret = cp; } -#ifdef CONFIG_X86_32 -/* - * On 32-bit, since AP's early load occurs before paging is turned on, we - * cannot traverse cpu_equiv_table and microcode_cache in kernel heap memory. - * So during cold boot, AP will apply_ucode_in_initrd() just like the BSP. - * In save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch, - * which is used upon resume from suspend. - */ -void load_ucode_amd_ap(unsigned int family) +void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) { - struct microcode_amd *mc; - struct cpio_data cp; - - mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); - if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { - __apply_microcode_amd(mc); - return; - } - - if (!get_builtin_microcode(&cp, family)) - cp = find_microcode_in_initrd((const char *)__pa_nodebug(ucode_path), true); + struct cpio_data cp = { }; + __load_ucode_amd(cpuid_1_eax, &cp); if (!(cp.data && cp.size)) return; - /* - * This would set amd_ucode_patch above so that the following APs can - * use it directly instead of going down this path again. - */ - apply_microcode_early_amd(cp.data, cp.size, true, NULL); + apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, true); } -#else -void load_ucode_amd_ap(unsigned int family) + +void load_ucode_amd_ap(unsigned int cpuid_1_eax) { - struct equiv_cpu_entry *eq; struct microcode_amd *mc; - u32 rev, eax; - u16 eq_id; - - /* 64-bit runs with paging enabled, thus early==false. */ - if (check_current_patch_level(&rev, false)) - return; - - /* First AP hasn't cached it yet, go through the blob. */ - if (!cont.data) { - struct cpio_data cp = { NULL, 0, "" }; + struct cpio_data cp; + u32 *new_rev, rev, dummy; - if (cont.size == -1) - return; + if (IS_ENABLED(CONFIG_X86_32)) { + mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); + new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); + } else { + mc = (struct microcode_amd *)amd_ucode_patch; + new_rev = &ucode_new_rev; + } -reget: - if (!get_builtin_microcode(&cp, family)) { -#ifdef CONFIG_BLK_DEV_INITRD - if (!initrd_gone) - cp = find_cpio_data(ucode_path, (void *)initrd_start, - initrd_end - initrd_start, NULL); -#endif - if (!(cp.data && cp.size)) { - /* - * Mark it so that other APs do not scan again - * for no real reason and slow down boot - * needlessly. - */ - cont.size = -1; - return; - } - } + native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - if (!apply_microcode_early_amd(cp.data, cp.size, false, &cont)) { - cont.size = -1; + /* Check whether we have saved a new patch already: */ + if (*new_rev && rev < mc->hdr.patch_id) { + if (!__apply_microcode_amd(mc)) { + *new_rev = mc->hdr.patch_id; return; } } - eax = cpuid_eax(0x00000001); - eq = (struct equiv_cpu_entry *)(cont.data + CONTAINER_HDR_SZ); - - eq_id = find_equiv_id(eq, eax); - if (!eq_id) + __load_ucode_amd(cpuid_1_eax, &cp); + if (!(cp.data && cp.size)) return; - if (eq_id == this_equiv_id) { - mc = (struct microcode_amd *)amd_ucode_patch; - - if (mc && rev < mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) - ucode_new_rev = mc->hdr.patch_id; - } - - } else { - - /* - * AP has a different equivalence ID than BSP, looks like - * mixed-steppings silicon so go through the ucode blob anew. - */ - goto reget; - } + apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false); } -#endif /* CONFIG_X86_32 */ static enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); -int __init save_microcode_in_initrd_amd(unsigned int fam) +int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) { + struct cont_desc desc = { 0 }; enum ucode_state ret; - int retval = 0; - u16 eq_id; - - if (!cont.data) { - if (IS_ENABLED(CONFIG_X86_32) && (cont.size != -1)) { - struct cpio_data cp = { NULL, 0, "" }; - -#ifdef CONFIG_BLK_DEV_INITRD - cp = find_cpio_data(ucode_path, (void *)initrd_start, - initrd_end - initrd_start, NULL); -#endif + struct cpio_data cp; - if (!(cp.data && cp.size)) { - cont.size = -1; - return -EINVAL; - } + cp = find_microcode_in_initrd(ucode_path, false); + if (!(cp.data && cp.size)) + return -EINVAL; - eq_id = find_proper_container(cp.data, cp.size, &cont); - if (!eq_id) { - cont.size = -1; - return -EINVAL; - } + desc.cpuid_1_eax = cpuid_1_eax; - } else - return -EINVAL; - } + scan_containers(cp.data, cp.size, &desc); + if (!desc.mc) + return -EINVAL; - ret = load_microcode_amd(smp_processor_id(), fam, cont.data, cont.size); + ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax), + desc.data, desc.size); if (ret != UCODE_OK) - retval = -EINVAL; - - /* - * This will be freed any msec now, stash patches for the current - * family and switch to patch cache for cpu hotplug, etc later. - */ - cont.data = NULL; - cont.size = 0; + return -EINVAL; - return retval; + return 0; } void reload_ucode_amd(void) { struct microcode_amd *mc; - u32 rev; - - /* - * early==false because this is a syscore ->resume path and by - * that time paging is long enabled. - */ - if (check_current_patch_level(&rev, false)) - return; + u32 rev, dummy; mc = (struct microcode_amd *)amd_ucode_patch; if (!mc) return; + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + if (rev < mc->hdr.patch_id) { if (!__apply_microcode_amd(mc)) { ucode_new_rev = mc->hdr.patch_id; @@ -631,60 +494,13 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, return patch_size; } -/* - * Those patch levels cannot be updated to newer ones and thus should be final. - */ -static u32 final_levels[] = { - 0x01000098, - 0x0100009f, - 0x010000af, - 0, /* T-101 terminator */ -}; - -/* - * Check the current patch level on this CPU. - * - * @rev: Use it to return the patch level. It is set to 0 in the case of - * error. - * - * Returns: - * - true: if update should stop - * - false: otherwise - */ -bool check_current_patch_level(u32 *rev, bool early) -{ - u32 lvl, dummy, i; - bool ret = false; - u32 *levels; - - native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); - - if (IS_ENABLED(CONFIG_X86_32) && early) - levels = (u32 *)__pa_nodebug(&final_levels); - else - levels = final_levels; - - for (i = 0; levels[i]; i++) { - if (lvl == levels[i]) { - lvl = 0; - ret = true; - break; - } - } - - if (rev) - *rev = lvl; - - return ret; -} - static int apply_microcode_amd(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_amd *mc_amd; struct ucode_cpu_info *uci; struct ucode_patch *p; - u32 rev; + u32 rev, dummy; BUG_ON(raw_smp_processor_id() != cpu); @@ -697,8 +513,7 @@ static int apply_microcode_amd(int cpu) mc_amd = p->data; uci->mc = p->data; - if (check_current_patch_level(&rev, false)) - return -1; + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); /* need to apply patch? */ if (rev >= mc_amd->hdr.patch_id) { diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 73102d932760..e51eeaed8016 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -66,19 +66,50 @@ static DEFINE_MUTEX(microcode_mutex); struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -/* - * Operations that are run on a target cpu: - */ - struct cpu_info_ctx { struct cpu_signature *cpu_sig; int err; }; +/* + * Those patch levels cannot be updated to newer ones and thus should be final. + */ +static u32 final_levels[] = { + 0x01000098, + 0x0100009f, + 0x010000af, + 0, /* T-101 terminator */ +}; + +/* + * Check the current patch level on this CPU. + * + * Returns: + * - true: if update should stop + * - false: otherwise + */ +static bool amd_check_current_patch_level(void) +{ + u32 lvl, dummy, i; + u32 *levels; + + native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); + + if (IS_ENABLED(CONFIG_X86_32)) + levels = (u32 *)__pa_nodebug(&final_levels); + else + levels = final_levels; + + for (i = 0; levels[i]; i++) { + if (lvl == levels[i]) + return true; + } + return false; +} + static bool __init check_loader_disabled_bsp(void) { static const char *__dis_opt_str = "dis_ucode_ldr"; - u32 a, b, c, d; #ifdef CONFIG_X86_32 const char *cmdline = (const char *)__pa_nodebug(boot_command_line); @@ -94,18 +125,19 @@ static bool __init check_loader_disabled_bsp(void) if (!have_cpuid_p()) return *res; - a = 1; - c = 0; - native_cpuid(&a, &b, &c, &d); - /* * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not * completely accurate as xen pv guests don't see that CPUID bit set but * that's good enough as they don't land on the BSP path anyway. */ - if (c & BIT(31)) + if (native_cpuid_ecx(1) & BIT(31)) return *res; + if (x86_cpuid_vendor() == X86_VENDOR_AMD) { + if (amd_check_current_patch_level()) + return *res; + } + if (cmdline_find_option_bool(cmdline, option) <= 0) *res = false; @@ -133,23 +165,21 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name) void __init load_ucode_bsp(void) { - int vendor; - unsigned int family; + unsigned int cpuid_1_eax; if (check_loader_disabled_bsp()) return; - vendor = x86_cpuid_vendor(); - family = x86_cpuid_family(); + cpuid_1_eax = native_cpuid_eax(1); - switch (vendor) { + switch (x86_cpuid_vendor()) { case X86_VENDOR_INTEL: - if (family >= 6) + if (x86_family(cpuid_1_eax) >= 6) load_ucode_intel_bsp(); break; case X86_VENDOR_AMD: - if (family >= 0x10) - load_ucode_amd_bsp(family); + if (x86_family(cpuid_1_eax) >= 0x10) + load_ucode_amd_bsp(cpuid_1_eax); break; default: break; @@ -167,22 +197,21 @@ static bool check_loader_disabled_ap(void) void load_ucode_ap(void) { - int vendor, family; + unsigned int cpuid_1_eax; if (check_loader_disabled_ap()) return; - vendor = x86_cpuid_vendor(); - family = x86_cpuid_family(); + cpuid_1_eax = native_cpuid_eax(1); - switch (vendor) { + switch (x86_cpuid_vendor()) { case X86_VENDOR_INTEL: - if (family >= 6) + if (x86_family(cpuid_1_eax) >= 6) load_ucode_intel_ap(); break; case X86_VENDOR_AMD: - if (family >= 0x10) - load_ucode_amd_ap(family); + if (x86_family(cpuid_1_eax) >= 0x10) + load_ucode_amd_ap(cpuid_1_eax); break; default: break; @@ -201,7 +230,7 @@ static int __init save_microcode_in_initrd(void) break; case X86_VENDOR_AMD: if (c->x86 >= 0x10) - ret = save_microcode_in_initrd_amd(c->x86); + ret = save_microcode_in_initrd_amd(cpuid_eax(1)); break; default: break; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 34178564be2a..c1ea5b999839 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -1,4 +1,5 @@ #include <linux/kernel.h> +#include <linux/sched.h> #include <linux/mm.h> #include <asm/cpufeature.h> #include <asm/msr.h> @@ -14,6 +15,8 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) if (xlvl >= 0x80860001) c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } + + clear_sched_clock_stable(); } static void init_transmeta(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 90e8dde3ec26..b2bbad6ebe4d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -580,24 +580,19 @@ static void __init update_e820_saved(void) } #define MAX_GAP_END 0x100000000ull /* - * Search for a gap in the e820 memory space from start_addr to end_addr. + * Search for a gap in the e820 memory space from 0 to MAX_GAP_END. */ -__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, - unsigned long start_addr, unsigned long long end_addr) +static int __init e820_search_gap(unsigned long *gapstart, + unsigned long *gapsize) { - unsigned long long last; + unsigned long long last = MAX_GAP_END; int i = e820->nr_map; int found = 0; - last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; - while (--i >= 0) { unsigned long long start = e820->map[i].addr; unsigned long long end = start + e820->map[i].size; - if (end < start_addr) - continue; - /* * Since "last" is at most 4GB, we know we'll * fit in 32 bits if this condition is true @@ -628,18 +623,19 @@ __init void e820_setup_gap(void) unsigned long gapstart, gapsize; int found; - gapstart = 0x10000000; gapsize = 0x400000; - found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END); + found = e820_search_gap(&gapstart, &gapsize); -#ifdef CONFIG_X86_64 if (!found) { +#ifdef CONFIG_X86_64 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; printk(KERN_ERR "e820: cannot find a gap in the 32bit address range\n" "e820: PCI devices with unassigned 32bit BARs may break!\n"); - } +#else + gapstart = 0x10000000; #endif + } /* * e820_reserve_resources_late protect stolen RAM already diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index de7234401275..e1114f070c2d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -9,7 +9,6 @@ #include <asm/fpu/regset.h> #include <asm/fpu/signal.h> #include <asm/fpu/types.h> -#include <asm/fpu/xstate.h> #include <asm/traps.h> #include <linux/hardirq.h> @@ -179,14 +178,8 @@ void fpstate_init(union fpregs_state *state) memset(state, 0, fpu_kernel_xstate_size); - /* - * XRSTORS requires that this bit is set in xcomp_bv, or - * it will #GP. Make sure it is replaced after the memset(). - */ if (static_cpu_has(X86_FEATURE_XSAVES)) - state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | - xfeatures_mask; - + fpstate_init_xstate(&state->xsave); if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); else diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 60dece392b3a..19bdd1bf8160 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -48,13 +48,7 @@ void fpu__init_cpu(void) fpu__init_cpu_xstate(); } -/* - * The earliest FPU detection code. - * - * Set the X86_FEATURE_FPU CPU-capability bit based on - * trying to execute an actual sequence of FPU instructions: - */ -static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) +static bool fpu__probe_without_cpuid(void) { unsigned long cr0; u16 fsw, fcw; @@ -65,18 +59,25 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) cr0 &= ~(X86_CR0_TS | X86_CR0_EM); write_cr0(cr0); - if (!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" : "+m" (fsw), "+m" (fcw)); + + pr_info("x86/fpu: Probing for FPU: FSW=0x%04hx FCW=0x%04hx\n", fsw, fcw); - if (fsw == 0 && (fcw & 0x103f) == 0x003f) - set_cpu_cap(c, X86_FEATURE_FPU); + return fsw == 0 && (fcw & 0x103f) == 0x003f; +} + +static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) +{ + if (!boot_cpu_has(X86_FEATURE_CPUID) && + !test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { + if (fpu__probe_without_cpuid()) + setup_force_cpu_cap(X86_FEATURE_FPU); else - clear_cpu_cap(c, X86_FEATURE_FPU); + setup_clear_cpu_cap(X86_FEATURE_FPU); } #ifndef CONFIG_MATH_EMULATION - if (!boot_cpu_has(X86_FEATURE_FPU)) { + if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_FPU)) { pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n"); for (;;) asm volatile("hlt"); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 1d7770447b3e..c24ac1efb12d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -78,6 +78,7 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_PKU); setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); + setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ); } /* @@ -705,8 +706,14 @@ void __init fpu__init_system_xstate(void) WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; + if (!boot_cpu_has(X86_FEATURE_FPU)) { + pr_info("x86/fpu: No FPU detected\n"); + return; + } + if (!boot_cpu_has(X86_FEATURE_XSAVE)) { - pr_info("x86/fpu: Legacy x87 FPU detected.\n"); + pr_info("x86/fpu: x87 FPU will use %s\n", + boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE"); return; } diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index f16c55bfc090..e5fb436a6548 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void) start_kernel(); } + +/* + * Initialize page tables. This creates a PDE and a set of page + * tables, which are located immediately beyond __brk_base. The variable + * _brk_end is set up to point to the first "safe" location. + * Mappings are created both at virtual address 0 (identity mapping) + * and PAGE_OFFSET for up to _end. + * + * In PAE mode initial_page_table is statically defined to contain + * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 + * entries). The identity mapping is handled by pointing two PGD entries + * to the first kernel PMD. Note the upper half of each PMD or PTE are + * always zero at this stage. + */ +void __init mk_early_pgtbl_32(void) +{ +#ifdef __pa +#undef __pa +#endif +#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) + pte_t pte, *ptep; + int i; + unsigned long *ptr; + /* Enough space to fit pagetables for the low memory linear map */ + const unsigned long limit = __pa(_end) + + (PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT); +#ifdef CONFIG_X86_PAE + pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd); +#define SET_PL2(pl2, val) { (pl2).pmd = (val); } +#else + pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table); +#define SET_PL2(pl2, val) { (pl2).pgd = (val); } +#endif + + ptep = (pte_t *)__pa(__brk_base); + pte.pte = PTE_IDENT_ATTR; + + while ((pte.pte & PTE_PFN_MASK) < limit) { + + SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR); + *pl2p = pl2; +#ifndef CONFIG_X86_PAE + /* Kernel PDE entry */ + *(pl2p + ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2; +#endif + for (i = 0; i < PTRS_PER_PTE; i++) { + *ptep = pte; + pte.pte += PAGE_SIZE; + ptep++; + } + + pl2p++; + } + + ptr = (unsigned long *)__pa(&max_pfn_mapped); + /* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */ + *ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT; + + ptr = (unsigned long *)__pa(&_brk_end); + *ptr = (unsigned long)ptep + PAGE_OFFSET; +} + diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 4e8577d03372..1f85ee8f9439 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -24,6 +24,7 @@ #include <asm/nops.h> #include <asm/bootparam.h> #include <asm/export.h> +#include <asm/pgtable_32.h> /* Physical address */ #define pa(X) ((X) - __PAGE_OFFSET) @@ -41,44 +42,10 @@ #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id -/* - * This is how much memory in addition to the memory covered up to - * and including _end we need mapped initially. - * We need: - * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) - * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) - * - * Modulo rounding, each megabyte assigned here requires a kilobyte of - * memory, which is currently unreclaimed. - * - * This should be a multiple of a page. - * - * KERNEL_IMAGE_SIZE should be greater than pa(_end) - * and small than max_low_pfn, otherwise will waste some page table entries - */ - -#if PTRS_PER_PMD > 1 -#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) -#else -#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) -#endif #define SIZEOF_PTREGS 17*4 /* - * Number of possible pages in the lowmem region. - * - * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a - * gas warning about overflowing shift count when gas has been compiled - * with only a host target support using a 32-bit type for internal - * representation. - */ -LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT) - -/* Enough space to fit pagetables for the low memory linear map */ -MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT - -/* * Worst-case size of the kernel mapping we need to make: * a relocatable kernel can live anywhere in lowmem, so we need to be able * to map all of lowmem. @@ -160,90 +127,15 @@ ENTRY(startup_32) call load_ucode_bsp #endif -/* - * Initialize page tables. This creates a PDE and a set of page - * tables, which are located immediately beyond __brk_base. The variable - * _brk_end is set up to point to the first "safe" location. - * Mappings are created both at virtual address 0 (identity mapping) - * and PAGE_OFFSET for up to _end. - */ -#ifdef CONFIG_X86_PAE - - /* - * In PAE mode initial_page_table is statically defined to contain - * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 - * entries). The identity mapping is handled by pointing two PGD entries - * to the first kernel PMD. - * - * Note the upper half of each PMD or PTE are always zero at this stage. - */ - -#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ - - xorl %ebx,%ebx /* %ebx is kept at zero */ - - movl $pa(__brk_base), %edi - movl $pa(initial_pg_pmd), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ - movl %ecx,(%edx) /* Store PMD entry */ - /* Upper half already zero */ - addl $8,%edx - movl $512,%ecx -11: - stosl - xchgl %eax,%ebx - stosl - xchgl %eax,%ebx - addl $0x1000,%eax - loop 11b - - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b -1: - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) + /* Create early pagetables. */ + call mk_early_pgtbl_32 /* Do early initialization of the fixmap area */ movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax +#ifdef CONFIG_X86_PAE +#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) -#else /* Not PAE */ - -page_pde_offset = (__PAGE_OFFSET >> 20); - - movl $pa(__brk_base), %edi - movl $pa(initial_page_table), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ - movl %ecx,(%edx) /* Store identity PDE entry */ - movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ - addl $4,%edx - movl $1024, %ecx -11: - stosl - addl $0x1000,%eax - loop 11b - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) - - /* Do early initialization of the fixmap area */ - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax +#else movl %eax,pa(initial_page_table+0xffc) #endif @@ -666,6 +558,7 @@ ENTRY(setup_once_ref) __PAGE_ALIGNED_BSS .align PAGE_SIZE #ifdef CONFIG_X86_PAE +.globl initial_pg_pmd initial_pg_pmd: .fill 1024*KPMDS,4,0 #else diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 7c6e9ffe4424..4d8183b5f113 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -264,7 +264,7 @@ void __smp_x86_platform_ipi(void) x86_platform_ipi_callback(); } -__visible void smp_x86_platform_ipi(struct pt_regs *regs) +__visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -315,7 +315,7 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs) } #endif -__visible void smp_trace_x86_platform_ipi(struct pt_regs *regs) +__visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 3512ba607361..275487872be2 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -9,6 +9,7 @@ #include <linux/hardirq.h> #include <asm/apic.h> #include <asm/trace/irq_vectors.h> +#include <linux/interrupt.h> static inline void __smp_irq_work_interrupt(void) { @@ -16,14 +17,14 @@ static inline void __smp_irq_work_interrupt(void) irq_work_run(); } -__visible void smp_irq_work_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); __smp_irq_work_interrupt(); exiting_irq(); } -__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); trace_irq_work_entry(IRQ_WORK_VECTOR); diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index cb9c1ed1d391..f73f475d0573 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -132,10 +132,8 @@ int sched_set_itmt_support(void) sysctl_sched_itmt_enabled = 1; - if (sysctl_sched_itmt_enabled) { - x86_topology_update = true; - rebuild_sched_domains(); - } + x86_topology_update = true; + rebuild_sched_domains(); mutex_unlock(&itmt_update_mutex); diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index fc25f698d792..c37bd0f39c70 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -32,8 +32,7 @@ static void bug_at(unsigned char *ip, int line) * Something went wrong. Crash the box, as something could be * corrupting the kernel. */ - pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n", - ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line); + pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph) %d\n", ip, ip, ip, line); BUG(); } diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 36bc66416021..099fcba4981d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -620,18 +620,4 @@ void __init kvm_spinlock_init(void) } } -static __init int kvm_spinlock_init_jump(void) -{ - if (!kvm_para_available()) - return 0; - if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) - return 0; - - static_key_slow_inc(¶virt_ticketlocks_enabled); - printk(KERN_INFO "KVM setup paravirtual spinlock\n"); - - return 0; -} -early_initcall(kvm_spinlock_init_jump); - #endif /* CONFIG_PARAVIRT_SPINLOCKS */ diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 2a5cafdf8808..542710b99f52 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -107,12 +107,12 @@ static inline void kvm_sched_clock_init(bool stable) { if (!stable) { pv_time_ops.sched_clock = kvm_clock_read; + clear_sched_clock_stable(); return; } kvm_sched_clock_offset = kvm_clock_read(); pv_time_ops.sched_clock = kvm_sched_clock_read; - set_sched_clock_stable(); printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n", kvm_sched_clock_offset); diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 6d4bf812af45..6259327f3454 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -42,6 +42,3 @@ struct pv_lock_ops pv_lock_ops = { #endif /* SMP */ }; EXPORT_SYMBOL(pv_lock_ops); - -struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE; -EXPORT_SYMBOL(paravirt_ticketlocks_enabled); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4cfba947d774..eb69b14dbfc8 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -575,7 +575,9 @@ static void __init reserve_crashkernel(void) /* 0 means: find the address automatically */ if (crash_base <= 0) { /* - * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX + * Set CRASH_ADDR_LOW_MAX upper bound for crash memory, + * as old kexec-tools loads bzImage below that, unless + * "crashkernel=size[KMG],high" is specified. */ crash_base = memblock_find_in_range(CRASH_ALIGN, high ? CRASH_ADDR_HIGH_MAX diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 68f8cc222f25..d3c66a15bbde 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -259,7 +259,7 @@ static inline void __smp_reschedule_interrupt(void) scheduler_ipi(); } -__visible void smp_reschedule_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); __smp_reschedule_interrupt(); @@ -268,7 +268,7 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs) */ } -__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs) { /* * Need to call irq_enter() before calling the trace point. @@ -292,14 +292,15 @@ static inline void __smp_call_function_interrupt(void) inc_irq_stat(irq_call_count); } -__visible void smp_call_function_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); __smp_call_function_interrupt(); exiting_irq(); } -__visible void smp_trace_call_function_interrupt(struct pt_regs *regs) +__visible void __irq_entry +smp_trace_call_function_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); trace_call_function_entry(CALL_FUNCTION_VECTOR); @@ -314,14 +315,16 @@ static inline void __smp_call_function_single_interrupt(void) inc_irq_stat(irq_call_count); } -__visible void smp_call_function_single_interrupt(struct pt_regs *regs) +__visible void __irq_entry +smp_call_function_single_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); __smp_call_function_single_interrupt(); exiting_irq(); } -__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs) +__visible void __irq_entry +smp_trace_call_function_single_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index e41af597aed8..cb60bbe093b2 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1107,6 +1107,16 @@ static u64 read_tsc(struct clocksource *cs) return (u64)rdtsc_ordered(); } +static void tsc_cs_mark_unstable(struct clocksource *cs) +{ + if (tsc_unstable) + return; + tsc_unstable = 1; + clear_sched_clock_stable(); + disable_sched_clock_irqtime(); + pr_info("Marking TSC unstable due to clocksource watchdog\n"); +} + /* * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() */ @@ -1119,6 +1129,7 @@ static struct clocksource clocksource_tsc = { CLOCK_SOURCE_MUST_VERIFY, .archdata = { .vclock_mode = VCLOCK_TSC }, .resume = tsc_resume, + .mark_unstable = tsc_cs_mark_unstable, }; void mark_tsc_unstable(char *reason) diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 073d1f1a620b..a8e91ae89fb3 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -156,13 +156,13 @@ EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { + unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy; int d0; xloops *= 4; asm("mull %%edx" :"=d" (xloops), "=&a" (d0) - :"1" (xloops), "0" - (this_cpu_read(cpu_info.loops_per_jiffy) * (HZ/4))); + :"1" (xloops), "0" (lpj * (HZ / 4))); __delay(++xloops); } diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index 90e4f2a6625b..a7dbec4dce27 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -5,14 +5,12 @@ obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI)) += platform_mrfld_sd.o # WiFi obj-$(subst m,y,$(CONFIG_BRCMFMAC_SDIO)) += platform_bcm43xx.o # IPC Devices -obj-y += platform_ipc.o obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic.o obj-$(subst m,y,$(CONFIG_SND_MFLD_MACHINE)) += platform_msic_audio.o obj-$(subst m,y,$(CONFIG_GPIO_MSIC)) += platform_msic_gpio.o obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_ocd.o obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_battery.o obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o -obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o # SPI Devices obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_mrfld_spidev.o @@ -28,4 +26,5 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o # MISC Devices obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o +obj-$(subst m,y,$(CONFIG_RTC_DRV_CMOS)) += platform_mrfld_rtc.o obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c index 52534ec29765..74283875c7e8 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c @@ -32,6 +32,9 @@ static struct gpio_keys_button gpio_button[] = { {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20}, {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20}, {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20}, + {KEY_MUTE, -1, 1, "mute_enable", EV_KEY, 0, 20}, + {KEY_VOLUMEUP, -1, 1, "volume_up", EV_KEY, 0, 20}, + {KEY_VOLUMEDOWN, -1, 1, "volume_down", EV_KEY, 0, 20}, {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20}, {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20}, {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20}, diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.c b/arch/x86/platform/intel-mid/device_libs/platform_ipc.c deleted file mode 100644 index a84b73d6c4a0..000000000000 --- a/arch/x86/platform/intel-mid/device_libs/platform_ipc.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * platform_ipc.c: IPC platform library file - * - * (C) Copyright 2013 Intel Corporation - * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/sfi.h> -#include <linux/gpio.h> -#include <asm/intel-mid.h> -#include "platform_ipc.h" - -void __init ipc_device_handler(struct sfi_device_table_entry *pentry, - struct devs_id *dev) -{ - struct platform_device *pdev; - void *pdata = NULL; - static struct resource res __initdata = { - .name = "IRQ", - .flags = IORESOURCE_IRQ, - }; - - pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n", - pentry->name, pentry->irq); - - /* - * We need to call platform init of IPC devices to fill misc_pdata - * structure. It will be used in msic_init for initialization. - */ - if (dev != NULL) - pdata = dev->get_platform_data(pentry); - - /* - * On Medfield the platform device creation is handled by the MSIC - * MFD driver so we don't need to do it here. - */ - if (intel_mid_has_msic()) - return; - - pdev = platform_device_alloc(pentry->name, 0); - if (pdev == NULL) { - pr_err("out of memory for SFI platform device '%s'.\n", - pentry->name); - return; - } - res.start = pentry->irq; - platform_device_add_resources(pdev, &res, 1); - - pdev->dev.platform_data = pdata; - intel_scu_device_register(pdev); -} - -static const struct devs_id pmic_audio_dev_id __initconst = { - .name = "pmic_audio", - .type = SFI_DEV_TYPE_IPC, - .delay = 1, - .device_handler = &ipc_device_handler, -}; - -sfi_device(pmic_audio_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h deleted file mode 100644 index 79bb09d4f718..000000000000 --- a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * platform_ipc.h: IPC platform library header file - * - * (C) Copyright 2013 Intel Corporation - * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ -#ifndef _PLATFORM_IPC_H_ -#define _PLATFORM_IPC_H_ - -void __init -ipc_device_handler(struct sfi_device_table_entry *pentry, struct devs_id *dev); - -#endif diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c new file mode 100644 index 000000000000..3135416df037 --- /dev/null +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c @@ -0,0 +1,48 @@ +/* + * Intel Merrifield legacy RTC initialization file + * + * (C) Copyright 2017 Intel Corporation + * + * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include <linux/init.h> + +#include <asm/hw_irq.h> +#include <asm/intel-mid.h> +#include <asm/io_apic.h> +#include <asm/time.h> +#include <asm/x86_init.h> + +static int __init mrfld_legacy_rtc_alloc_irq(void) +{ + struct irq_alloc_info info; + int ret; + + if (!x86_platform.legacy.rtc) + return -ENODEV; + + ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 1, 0); + ret = mp_map_gsi_to_irq(RTC_IRQ, IOAPIC_MAP_ALLOC, &info); + if (ret < 0) { + pr_info("Failed to allocate RTC interrupt. Disabling RTC\n"); + x86_platform.legacy.rtc = 0; + return ret; + } + + return 0; +} + +static int __init mrfld_legacy_rtc_init(void) +{ + if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER) + return -ENODEV; + + return mrfld_legacy_rtc_alloc_irq(); +} +arch_initcall(mrfld_legacy_rtc_init); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c index 3f1f1c77d090..86edd1e941eb 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c @@ -28,9 +28,9 @@ static struct platform_device wdt_dev = { static int tangier_probe(struct platform_device *pdev) { - int gsi; struct irq_alloc_info info; struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data; + int gsi, irq; if (!pdata) return -EINVAL; @@ -38,10 +38,10 @@ static int tangier_probe(struct platform_device *pdev) /* IOAPIC builds identity mapping between GSI and IRQ on MID */ gsi = pdata->irq; ioapic_set_alloc_attr(&info, cpu_to_node(0), 1, 0); - if (mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info) <= 0) { - dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n", - gsi); - return -EINVAL; + irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info); + if (irq < 0) { + dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n", gsi); + return irq; } return 0; @@ -82,4 +82,4 @@ static int __init register_mid_wdt(void) return 0; } -rootfs_initcall(register_mid_wdt); +arch_initcall(register_mid_wdt); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c index cb3490ecb341..d4dc744dd5a5 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c @@ -20,7 +20,6 @@ #include <asm/intel-mid.h> #include "platform_msic.h" -#include "platform_ipc.h" static void *msic_audio_platform_data(void *info) { @@ -40,8 +39,8 @@ static const struct devs_id msic_audio_dev_id __initconst = { .name = "msic_audio", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_audio_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_audio_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c index 4f72193939a6..5c3e9919633f 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c @@ -19,7 +19,6 @@ #include <asm/intel-mid.h> #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_battery_platform_data(void *info) { @@ -30,8 +29,8 @@ static const struct devs_id msic_battery_dev_id __initconst = { .name = "msic_battery", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_battery_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_battery_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c index 70de5b531ba0..9fdb88d460d7 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c @@ -20,7 +20,6 @@ #include <asm/intel-mid.h> #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_gpio_platform_data(void *info) { @@ -41,8 +40,8 @@ static const struct devs_id msic_gpio_dev_id __initconst = { .name = "msic_gpio", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_gpio_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_gpio_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c index 3d7c2011b6cf..7ae37cdbf256 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c @@ -20,7 +20,6 @@ #include <asm/intel-mid.h> #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_ocd_platform_data(void *info) { @@ -42,8 +41,8 @@ static const struct devs_id msic_ocd_dev_id __initconst = { .name = "msic_ocd", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_ocd_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_ocd_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c index 038f618fbc52..96809b98cf69 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c @@ -18,7 +18,6 @@ #include <asm/intel-mid.h> #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_power_btn_platform_data(void *info) { @@ -29,8 +28,8 @@ static const struct devs_id msic_power_btn_dev_id __initconst = { .name = "msic_power_btn", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_power_btn_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_power_btn_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c index 114a5755b1e4..3e4167d246cd 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c @@ -19,7 +19,6 @@ #include <asm/intel-mid.h> #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_thermal_platform_data(void *info) { @@ -30,8 +29,8 @@ static const struct devs_id msic_thermal_dev_id __initconst = { .name = "msic_thermal", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_thermal_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_thermal_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c deleted file mode 100644 index e30cb62e3300..000000000000 --- a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * platform_pmic_gpio.c: PMIC GPIO platform data initialization file - * - * (C) Copyright 2013 Intel Corporation - * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/scatterlist.h> -#include <linux/gpio.h> -#include <linux/init.h> -#include <linux/sfi.h> -#include <linux/intel_pmic_gpio.h> -#include <asm/intel-mid.h> - -#include "platform_ipc.h" - -static void __init *pmic_gpio_platform_data(void *info) -{ - static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; - int gpio_base = get_gpio_by_name("pmic_gpio_base"); - - if (gpio_base < 0) - gpio_base = 64; - pmic_gpio_pdata.gpio_base = gpio_base; - pmic_gpio_pdata.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; - pmic_gpio_pdata.gpiointr = 0xffffeff8; - - return &pmic_gpio_pdata; -} - -static const struct devs_id pmic_gpio_spi_dev_id __initconst = { - .name = "pmic_gpio", - .type = SFI_DEV_TYPE_SPI, - .delay = 1, - .get_platform_data = &pmic_gpio_platform_data, -}; - -static const struct devs_id pmic_gpio_ipc_dev_id __initconst = { - .name = "pmic_gpio", - .type = SFI_DEV_TYPE_IPC, - .delay = 1, - .get_platform_data = &pmic_gpio_platform_data, - .device_handler = &ipc_device_handler -}; - -sfi_device(pmic_gpio_spi_dev_id); -sfi_device(pmic_gpio_ipc_dev_id); diff --git a/arch/x86/platform/intel-mid/mrfld.c b/arch/x86/platform/intel-mid/mrfld.c index e0607c77a1bd..ae7bdeb0e507 100644 --- a/arch/x86/platform/intel-mid/mrfld.c +++ b/arch/x86/platform/intel-mid/mrfld.c @@ -91,6 +91,7 @@ static unsigned long __init tangier_calibrate_tsc(void) static void __init tangier_arch_setup(void) { x86_platform.calibrate_tsc = tangier_calibrate_tsc; + x86_platform.legacy.rtc = 1; } /* tangier arch ops */ diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index 051d264fce2e..19b43e3a9f0f 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -15,7 +15,6 @@ #include <linux/interrupt.h> #include <linux/scatterlist.h> #include <linux/sfi.h> -#include <linux/intel_pmic_gpio.h> #include <linux/spi/spi.h> #include <linux/i2c.h> #include <linux/skbuff.h> @@ -226,7 +225,7 @@ int get_gpio_by_name(const char *name) return -EINVAL; } -void __init intel_scu_device_register(struct platform_device *pdev) +static void __init intel_scu_ipc_device_register(struct platform_device *pdev) { if (ipc_next_dev == MAX_IPCDEVS) pr_err("too many SCU IPC devices"); @@ -335,10 +334,22 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry, pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n", pentry->name, pentry->irq); + + /* + * We need to call platform init of IPC devices to fill misc_pdata + * structure. It will be used in msic_init for initialization. + */ pdata = intel_mid_sfi_get_pdata(dev, pentry); if (IS_ERR(pdata)) return; + /* + * On Medfield the platform device creation is handled by the MSIC + * MFD driver so we don't need to do it here. + */ + if (dev->msic && intel_mid_has_msic()) + return; + pdev = platform_device_alloc(pentry->name, 0); if (pdev == NULL) { pr_err("out of memory for SFI platform device '%s'.\n", @@ -348,7 +359,10 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry, install_irq_resource(pdev, pentry->irq); pdev->dev.platform_data = pdata; - platform_device_add(pdev); + if (dev->delay) + intel_scu_ipc_device_register(pdev); + else + platform_device_add(pdev); } static void __init sfi_handle_spi_dev(struct sfi_device_table_entry *pentry, @@ -503,27 +517,23 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) if (!dev) continue; - if (dev->device_handler) { - dev->device_handler(pentry, dev); - } else { - switch (pentry->type) { - case SFI_DEV_TYPE_IPC: - sfi_handle_ipc_dev(pentry, dev); - break; - case SFI_DEV_TYPE_SPI: - sfi_handle_spi_dev(pentry, dev); - break; - case SFI_DEV_TYPE_I2C: - sfi_handle_i2c_dev(pentry, dev); - break; - case SFI_DEV_TYPE_SD: - sfi_handle_sd_dev(pentry, dev); - break; - case SFI_DEV_TYPE_UART: - case SFI_DEV_TYPE_HSI: - default: - break; - } + switch (pentry->type) { + case SFI_DEV_TYPE_IPC: + sfi_handle_ipc_dev(pentry, dev); + break; + case SFI_DEV_TYPE_SPI: + sfi_handle_spi_dev(pentry, dev); + break; + case SFI_DEV_TYPE_I2C: + sfi_handle_i2c_dev(pentry, dev); + break; + case SFI_DEV_TYPE_SD: + sfi_handle_sd_dev(pentry, dev); + break; + case SFI_DEV_TYPE_UART: + case SFI_DEV_TYPE_HSI: + default: + break; } } return 0; diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig index d957d5f21a86..0bc60a308730 100644 --- a/arch/x86/ras/Kconfig +++ b/arch/x86/ras/Kconfig @@ -1,6 +1,6 @@ config MCE_AMD_INJ tristate "Simple MCE injection interface for AMD processors" - depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB + depends on RAS && X86_MCE && DEBUG_FS && AMD_NB default n help This is a simple debugfs interface to inject MCEs and test different diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index e8a9ea7d7a21..25a7c4302ce7 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -141,25 +141,6 @@ void __init xen_init_spinlocks(void) pv_lock_ops.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen); } -/* - * While the jump_label init code needs to happend _after_ the jump labels are - * enabled and before SMP is started. Hence we use pre-SMP initcall level - * init. We cannot do it in xen_init_spinlocks as that is done before - * jump labels are activated. - */ -static __init int xen_init_spinlocks_jump(void) -{ - if (!xen_pvspin) - return 0; - - if (!xen_domain()) - return 0; - - static_key_slow_inc(¶virt_ticketlocks_enabled); - return 0; -} -early_initcall(xen_init_spinlocks_jump); - static __init int xen_parse_nopvspin(char *arg) { xen_pvspin = false; |