summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/acrn.c16
-rw-r--r--arch/x86/kernel/cpu/common.c7
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/mce/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mce/core.c16
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c1
-rw-r--r--arch/x86/kernel/cpu/mce/therm_throt.c739
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c58
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c2
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c11
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h1
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c24
-rw-r--r--arch/x86/kernel/cpu/scattered.c5
-rw-r--r--arch/x86/kernel/cpu/sgx/driver.c3
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.c13
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c14
19 files changed, 128 insertions, 795 deletions
diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c
index 0b2c03943ac6..23f5f27b5a02 100644
--- a/arch/x86/kernel/cpu/acrn.c
+++ b/arch/x86/kernel/cpu/acrn.c
@@ -10,6 +10,8 @@
*/
#include <linux/interrupt.h>
+
+#include <asm/acrn.h>
#include <asm/apic.h>
#include <asm/cpufeatures.h>
#include <asm/desc.h>
@@ -19,7 +21,7 @@
static u32 __init acrn_detect(void)
{
- return hypervisor_cpuid_base("ACRNACRNACRN", 0);
+ return acrn_cpuid_base();
}
static void __init acrn_init_platform(void)
@@ -55,6 +57,18 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_acrn_hv_callback)
set_irq_regs(old_regs);
}
+void acrn_setup_intr_handler(void (*handler)(void))
+{
+ acrn_intr_handler = handler;
+}
+EXPORT_SYMBOL_GPL(acrn_setup_intr_handler);
+
+void acrn_remove_intr_handler(void)
+{
+ acrn_intr_handler = NULL;
+}
+EXPORT_SYMBOL_GPL(acrn_remove_intr_handler);
+
const __initconst struct hypervisor_x86 x86_hyper_acrn = {
.name = "ACRN",
.detect = acrn_detect,
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 35ad8480c464..ab640abe26b6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -960,6 +960,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
if (c->extended_cpuid_level >= 0x8000000a)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
+ if (c->extended_cpuid_level >= 0x8000001f)
+ c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f);
+
init_scattered_cpuid_features(c);
init_speculation_control(c);
@@ -1739,8 +1742,8 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
-DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
-DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
+DEFINE_PER_CPU(void *, hardirq_stack_ptr);
+DEFINE_PER_CPU(bool, hardirq_stack_inuse);
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 59a1e3ce3f14..0e422a544835 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -24,6 +24,7 @@
#include <asm/traps.h>
#include <asm/resctrl.h>
#include <asm/numa.h>
+#include <asm/thermal.h>
#ifdef CONFIG_X86_64
#include <linux/topology.h>
@@ -719,6 +720,8 @@ static void init_intel(struct cpuinfo_x86 *c)
tsx_disable();
split_lock_init();
+
+ intel_init_thermal(c);
}
#ifdef CONFIG_X86_32
@@ -1159,6 +1162,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1),
{}
};
diff --git a/arch/x86/kernel/cpu/mce/Makefile b/arch/x86/kernel/cpu/mce/Makefile
index 9f020c994154..015856abdbb1 100644
--- a/arch/x86/kernel/cpu/mce/Makefile
+++ b/arch/x86/kernel/cpu/mce/Makefile
@@ -9,8 +9,6 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
mce-inject-y := inject.o
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
-obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
-
obj-$(CONFIG_ACPI_APEI) += apei.o
obj-$(CONFIG_X86_MCELOG_LEGACY) += dev-mcelog.o
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index e133ce1e562b..7962355436da 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -878,6 +878,12 @@ static atomic_t mce_executing;
static atomic_t mce_callin;
/*
+ * Track which CPUs entered the MCA broadcast synchronization and which not in
+ * order to print holdouts.
+ */
+static cpumask_t mce_missing_cpus = CPU_MASK_ALL;
+
+/*
* Check if a timeout waiting for other CPUs happened.
*/
static int mce_timed_out(u64 *t, const char *msg)
@@ -894,8 +900,12 @@ static int mce_timed_out(u64 *t, const char *msg)
if (!mca_cfg.monarch_timeout)
goto out;
if ((s64)*t < SPINUNIT) {
- if (mca_cfg.tolerant <= 1)
+ if (mca_cfg.tolerant <= 1) {
+ if (cpumask_and(&mce_missing_cpus, cpu_online_mask, &mce_missing_cpus))
+ pr_emerg("CPUs not responding to MCE broadcast (may include false positives): %*pbl\n",
+ cpumask_pr_args(&mce_missing_cpus));
mce_panic(msg, NULL, NULL);
+ }
cpu_missing = 1;
return 1;
}
@@ -1006,6 +1016,7 @@ static int mce_start(int *no_way_out)
* is updated before mce_callin.
*/
order = atomic_inc_return(&mce_callin);
+ cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
/*
* Wait for everyone.
@@ -1114,6 +1125,7 @@ static int mce_end(int order)
reset:
atomic_set(&global_nwo, 0);
atomic_set(&mce_callin, 0);
+ cpumask_setall(&mce_missing_cpus);
barrier();
/*
@@ -2178,7 +2190,6 @@ __setup("mce", mcheck_enable);
int __init mcheck_init(void)
{
- mcheck_intel_therm_init();
mce_register_decode_chain(&early_nb);
mce_register_decode_chain(&mce_uc_nb);
mce_register_decode_chain(&mce_default_nb);
@@ -2713,6 +2724,7 @@ static void mce_reset(void)
atomic_set(&mce_executing, 0);
atomic_set(&mce_callin, 0);
atomic_set(&global_nwo, 0);
+ cpumask_setall(&mce_missing_cpus);
}
static int fake_panic_get(void *data, u64 *val)
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index c2476fe0682e..e309476743b7 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -531,7 +531,6 @@ static void intel_imc_init(struct cpuinfo_x86 *c)
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
- intel_init_thermal(c);
intel_init_cmci();
intel_init_lmce();
intel_ppin_init(c);
diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c
deleted file mode 100644
index a7cd2d203ced..000000000000
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ /dev/null
@@ -1,739 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Thermal throttle event support code (such as syslog messaging and rate
- * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
- *
- * This allows consistent reporting of CPU thermal throttle events.
- *
- * Maintains a counter in /sys that keeps track of the number of thermal
- * events, such that the user knows how bad the thermal problem might be
- * (since the logging to syslog is rate limited).
- *
- * Author: Dmitriy Zavin (dmitriyz@google.com)
- *
- * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
- * Inspired by Ross Biro's and Al Borchers' counter code.
- */
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/percpu.h>
-#include <linux/export.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/cpu.h>
-
-#include <asm/processor.h>
-#include <asm/traps.h>
-#include <asm/apic.h>
-#include <asm/mce.h>
-#include <asm/msr.h>
-#include <asm/trace/irq_vectors.h>
-
-#include "internal.h"
-
-/* How long to wait between reporting thermal events */
-#define CHECK_INTERVAL (300 * HZ)
-
-#define THERMAL_THROTTLING_EVENT 0
-#define POWER_LIMIT_EVENT 1
-
-/**
- * struct _thermal_state - Represent the current thermal event state
- * @next_check: Stores the next timestamp, when it is allowed
- * to log the next warning message.
- * @last_interrupt_time: Stores the timestamp for the last threshold
- * high event.
- * @therm_work: Delayed workqueue structure
- * @count: Stores the current running count for thermal
- * or power threshold interrupts.
- * @last_count: Stores the previous running count for thermal
- * or power threshold interrupts.
- * @max_time_ms: This shows the maximum amount of time CPU was
- * in throttled state for a single thermal
- * threshold high to low state.
- * @total_time_ms: This is a cumulative time during which CPU was
- * in the throttled state.
- * @rate_control_active: Set when a throttling message is logged.
- * This is used for the purpose of rate-control.
- * @new_event: Stores the last high/low status of the
- * THERM_STATUS_PROCHOT or
- * THERM_STATUS_POWER_LIMIT.
- * @level: Stores whether this _thermal_state instance is
- * for a CORE level or for PACKAGE level.
- * @sample_index: Index for storing the next sample in the buffer
- * temp_samples[].
- * @sample_count: Total number of samples collected in the buffer
- * temp_samples[].
- * @average: The last moving average of temperature samples
- * @baseline_temp: Temperature at which thermal threshold high
- * interrupt was generated.
- * @temp_samples: Storage for temperature samples to calculate
- * moving average.
- *
- * This structure is used to represent data related to thermal state for a CPU.
- * There is a separate storage for core and package level for each CPU.
- */
-struct _thermal_state {
- u64 next_check;
- u64 last_interrupt_time;
- struct delayed_work therm_work;
- unsigned long count;
- unsigned long last_count;
- unsigned long max_time_ms;
- unsigned long total_time_ms;
- bool rate_control_active;
- bool new_event;
- u8 level;
- u8 sample_index;
- u8 sample_count;
- u8 average;
- u8 baseline_temp;
- u8 temp_samples[3];
-};
-
-struct thermal_state {
- struct _thermal_state core_throttle;
- struct _thermal_state core_power_limit;
- struct _thermal_state package_throttle;
- struct _thermal_state package_power_limit;
- struct _thermal_state core_thresh0;
- struct _thermal_state core_thresh1;
- struct _thermal_state pkg_thresh0;
- struct _thermal_state pkg_thresh1;
-};
-
-/* Callback to handle core threshold interrupts */
-int (*platform_thermal_notify)(__u64 msr_val);
-EXPORT_SYMBOL(platform_thermal_notify);
-
-/* Callback to handle core package threshold_interrupts */
-int (*platform_thermal_package_notify)(__u64 msr_val);
-EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
-
-/* Callback support of rate control, return true, if
- * callback has rate control */
-bool (*platform_thermal_package_rate_control)(void);
-EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
-
-
-static DEFINE_PER_CPU(struct thermal_state, thermal_state);
-
-static atomic_t therm_throt_en = ATOMIC_INIT(0);
-
-static u32 lvtthmr_init __read_mostly;
-
-#ifdef CONFIG_SYSFS
-#define define_therm_throt_device_one_ro(_name) \
- static DEVICE_ATTR(_name, 0444, \
- therm_throt_device_show_##_name, \
- NULL) \
-
-#define define_therm_throt_device_show_func(event, name) \
- \
-static ssize_t therm_throt_device_show_##event##_##name( \
- struct device *dev, \
- struct device_attribute *attr, \
- char *buf) \
-{ \
- unsigned int cpu = dev->id; \
- ssize_t ret; \
- \
- preempt_disable(); /* CPU hotplug */ \
- if (cpu_online(cpu)) { \
- ret = sprintf(buf, "%lu\n", \
- per_cpu(thermal_state, cpu).event.name); \
- } else \
- ret = 0; \
- preempt_enable(); \
- \
- return ret; \
-}
-
-define_therm_throt_device_show_func(core_throttle, count);
-define_therm_throt_device_one_ro(core_throttle_count);
-
-define_therm_throt_device_show_func(core_power_limit, count);
-define_therm_throt_device_one_ro(core_power_limit_count);
-
-define_therm_throt_device_show_func(package_throttle, count);
-define_therm_throt_device_one_ro(package_throttle_count);
-
-define_therm_throt_device_show_func(package_power_limit, count);
-define_therm_throt_device_one_ro(package_power_limit_count);
-
-define_therm_throt_device_show_func(core_throttle, max_time_ms);
-define_therm_throt_device_one_ro(core_throttle_max_time_ms);
-
-define_therm_throt_device_show_func(package_throttle, max_time_ms);
-define_therm_throt_device_one_ro(package_throttle_max_time_ms);
-
-define_therm_throt_device_show_func(core_throttle, total_time_ms);
-define_therm_throt_device_one_ro(core_throttle_total_time_ms);
-
-define_therm_throt_device_show_func(package_throttle, total_time_ms);
-define_therm_throt_device_one_ro(package_throttle_total_time_ms);
-
-static struct attribute *thermal_throttle_attrs[] = {
- &dev_attr_core_throttle_count.attr,
- &dev_attr_core_throttle_max_time_ms.attr,
- &dev_attr_core_throttle_total_time_ms.attr,
- NULL
-};
-
-static const struct attribute_group thermal_attr_group = {
- .attrs = thermal_throttle_attrs,
- .name = "thermal_throttle"
-};
-#endif /* CONFIG_SYSFS */
-
-#define CORE_LEVEL 0
-#define PACKAGE_LEVEL 1
-
-#define THERM_THROT_POLL_INTERVAL HZ
-#define THERM_STATUS_PROCHOT_LOG BIT(1)
-
-#define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
-#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
-
-static void clear_therm_status_log(int level)
-{
- int msr;
- u64 mask, msr_val;
-
- if (level == CORE_LEVEL) {
- msr = MSR_IA32_THERM_STATUS;
- mask = THERM_STATUS_CLEAR_CORE_MASK;
- } else {
- msr = MSR_IA32_PACKAGE_THERM_STATUS;
- mask = THERM_STATUS_CLEAR_PKG_MASK;
- }
-
- rdmsrl(msr, msr_val);
- msr_val &= mask;
- wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
-}
-
-static void get_therm_status(int level, bool *proc_hot, u8 *temp)
-{
- int msr;
- u64 msr_val;
-
- if (level == CORE_LEVEL)
- msr = MSR_IA32_THERM_STATUS;
- else
- msr = MSR_IA32_PACKAGE_THERM_STATUS;
-
- rdmsrl(msr, msr_val);
- if (msr_val & THERM_STATUS_PROCHOT_LOG)
- *proc_hot = true;
- else
- *proc_hot = false;
-
- *temp = (msr_val >> 16) & 0x7F;
-}
-
-static void __maybe_unused throttle_active_work(struct work_struct *work)
-{
- struct _thermal_state *state = container_of(to_delayed_work(work),
- struct _thermal_state, therm_work);
- unsigned int i, avg, this_cpu = smp_processor_id();
- u64 now = get_jiffies_64();
- bool hot;
- u8 temp;
-
- get_therm_status(state->level, &hot, &temp);
- /* temperature value is offset from the max so lesser means hotter */
- if (!hot && temp > state->baseline_temp) {
- if (state->rate_control_active)
- pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n",
- this_cpu,
- state->level == CORE_LEVEL ? "Core" : "Package",
- state->count);
-
- state->rate_control_active = false;
- return;
- }
-
- if (time_before64(now, state->next_check) &&
- state->rate_control_active)
- goto re_arm;
-
- state->next_check = now + CHECK_INTERVAL;
-
- if (state->count != state->last_count) {
- /* There was one new thermal interrupt */
- state->last_count = state->count;
- state->average = 0;
- state->sample_count = 0;
- state->sample_index = 0;
- }
-
- state->temp_samples[state->sample_index] = temp;
- state->sample_count++;
- state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples);
- if (state->sample_count < ARRAY_SIZE(state->temp_samples))
- goto re_arm;
-
- avg = 0;
- for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i)
- avg += state->temp_samples[i];
-
- avg /= ARRAY_SIZE(state->temp_samples);
-
- if (state->average > avg) {
- pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n",
- this_cpu,
- state->level == CORE_LEVEL ? "Core" : "Package",
- state->count);
- state->rate_control_active = true;
- }
-
- state->average = avg;
-
-re_arm:
- clear_therm_status_log(state->level);
- schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
-}
-
-/***
- * therm_throt_process - Process thermal throttling event from interrupt
- * @curr: Whether the condition is current or not (boolean), since the
- * thermal interrupt normally gets called both when the thermal
- * event begins and once the event has ended.
- *
- * This function is called by the thermal interrupt after the
- * IRQ has been acknowledged.
- *
- * It will take care of rate limiting and printing messages to the syslog.
- */
-static void therm_throt_process(bool new_event, int event, int level)
-{
- struct _thermal_state *state;
- unsigned int this_cpu = smp_processor_id();
- bool old_event;
- u64 now;
- struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
-
- now = get_jiffies_64();
- if (level == CORE_LEVEL) {
- if (event == THERMAL_THROTTLING_EVENT)
- state = &pstate->core_throttle;
- else if (event == POWER_LIMIT_EVENT)
- state = &pstate->core_power_limit;
- else
- return;
- } else if (level == PACKAGE_LEVEL) {
- if (event == THERMAL_THROTTLING_EVENT)
- state = &pstate->package_throttle;
- else if (event == POWER_LIMIT_EVENT)
- state = &pstate->package_power_limit;
- else
- return;
- } else
- return;
-
- old_event = state->new_event;
- state->new_event = new_event;
-
- if (new_event)
- state->count++;
-
- if (event != THERMAL_THROTTLING_EVENT)
- return;
-
- if (new_event && !state->last_interrupt_time) {
- bool hot;
- u8 temp;
-
- get_therm_status(state->level, &hot, &temp);
- /*
- * Ignore short temperature spike as the system is not close
- * to PROCHOT. 10C offset is large enough to ignore. It is
- * already dropped from the high threshold temperature.
- */
- if (temp > 10)
- return;
-
- state->baseline_temp = temp;
- state->last_interrupt_time = now;
- schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
- } else if (old_event && state->last_interrupt_time) {
- unsigned long throttle_time;
-
- throttle_time = jiffies_delta_to_msecs(now - state->last_interrupt_time);
- if (throttle_time > state->max_time_ms)
- state->max_time_ms = throttle_time;
- state->total_time_ms += throttle_time;
- state->last_interrupt_time = 0;
- }
-}
-
-static int thresh_event_valid(int level, int event)
-{
- struct _thermal_state *state;
- unsigned int this_cpu = smp_processor_id();
- struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
- u64 now = get_jiffies_64();
-
- if (level == PACKAGE_LEVEL)
- state = (event == 0) ? &pstate->pkg_thresh0 :
- &pstate->pkg_thresh1;
- else
- state = (event == 0) ? &pstate->core_thresh0 :
- &pstate->core_thresh1;
-
- if (time_before64(now, state->next_check))
- return 0;
-
- state->next_check = now + CHECK_INTERVAL;
-
- return 1;
-}
-
-static bool int_pln_enable;
-static int __init int_pln_enable_setup(char *s)
-{
- int_pln_enable = true;
-
- return 1;
-}
-__setup("int_pln_enable", int_pln_enable_setup);
-
-#ifdef CONFIG_SYSFS
-/* Add/Remove thermal_throttle interface for CPU device: */
-static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
-{
- int err;
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-
- err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
- if (err)
- return err;
-
- if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
- err = sysfs_add_file_to_group(&dev->kobj,
- &dev_attr_core_power_limit_count.attr,
- thermal_attr_group.name);
- if (err)
- goto del_group;
- }
-
- if (cpu_has(c, X86_FEATURE_PTS)) {
- err = sysfs_add_file_to_group(&dev->kobj,
- &dev_attr_package_throttle_count.attr,
- thermal_attr_group.name);
- if (err)
- goto del_group;
-
- err = sysfs_add_file_to_group(&dev->kobj,
- &dev_attr_package_throttle_max_time_ms.attr,
- thermal_attr_group.name);
- if (err)
- goto del_group;
-
- err = sysfs_add_file_to_group(&dev->kobj,
- &dev_attr_package_throttle_total_time_ms.attr,
- thermal_attr_group.name);
- if (err)
- goto del_group;
-
- if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
- err = sysfs_add_file_to_group(&dev->kobj,
- &dev_attr_package_power_limit_count.attr,
- thermal_attr_group.name);
- if (err)
- goto del_group;
- }
- }
-
- return 0;
-
-del_group:
- sysfs_remove_group(&dev->kobj, &thermal_attr_group);
-
- return err;
-}
-
-static void thermal_throttle_remove_dev(struct device *dev)
-{
- sysfs_remove_group(&dev->kobj, &thermal_attr_group);
-}
-
-/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int thermal_throttle_online(unsigned int cpu)
-{
- struct thermal_state *state = &per_cpu(thermal_state, cpu);
- struct device *dev = get_cpu_device(cpu);
- u32 l;
-
- state->package_throttle.level = PACKAGE_LEVEL;
- state->core_throttle.level = CORE_LEVEL;
-
- INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
- INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
-
- /* Unmask the thermal vector after the above workqueues are initialized. */
- l = apic_read(APIC_LVTTHMR);
- apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
-
- return thermal_throttle_add_dev(dev, cpu);
-}
-
-static int thermal_throttle_offline(unsigned int cpu)
-{
- struct thermal_state *state = &per_cpu(thermal_state, cpu);
- struct device *dev = get_cpu_device(cpu);
- u32 l;
-
- /* Mask the thermal vector before draining evtl. pending work */
- l = apic_read(APIC_LVTTHMR);
- apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
-
- cancel_delayed_work_sync(&state->package_throttle.therm_work);
- cancel_delayed_work_sync(&state->core_throttle.therm_work);
-
- state->package_throttle.rate_control_active = false;
- state->core_throttle.rate_control_active = false;
-
- thermal_throttle_remove_dev(dev);
- return 0;
-}
-
-static __init int thermal_throttle_init_device(void)
-{
- int ret;
-
- if (!atomic_read(&therm_throt_en))
- return 0;
-
- ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
- thermal_throttle_online,
- thermal_throttle_offline);
- return ret < 0 ? ret : 0;
-}
-device_initcall(thermal_throttle_init_device);
-
-#endif /* CONFIG_SYSFS */
-
-static void notify_package_thresholds(__u64 msr_val)
-{
- bool notify_thres_0 = false;
- bool notify_thres_1 = false;
-
- if (!platform_thermal_package_notify)
- return;
-
- /* lower threshold check */
- if (msr_val & THERM_LOG_THRESHOLD0)
- notify_thres_0 = true;
- /* higher threshold check */
- if (msr_val & THERM_LOG_THRESHOLD1)
- notify_thres_1 = true;
-
- if (!notify_thres_0 && !notify_thres_1)
- return;
-
- if (platform_thermal_package_rate_control &&
- platform_thermal_package_rate_control()) {
- /* Rate control is implemented in callback */
- platform_thermal_package_notify(msr_val);
- return;
- }
-
- /* lower threshold reached */
- if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
- platform_thermal_package_notify(msr_val);
- /* higher threshold reached */
- if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
- platform_thermal_package_notify(msr_val);
-}
-
-static void notify_thresholds(__u64 msr_val)
-{
- /* check whether the interrupt handler is defined;
- * otherwise simply return
- */
- if (!platform_thermal_notify)
- return;
-
- /* lower threshold reached */
- if ((msr_val & THERM_LOG_THRESHOLD0) &&
- thresh_event_valid(CORE_LEVEL, 0))
- platform_thermal_notify(msr_val);
- /* higher threshold reached */
- if ((msr_val & THERM_LOG_THRESHOLD1) &&
- thresh_event_valid(CORE_LEVEL, 1))
- platform_thermal_notify(msr_val);
-}
-
-/* Thermal transition interrupt handler */
-static void intel_thermal_interrupt(void)
-{
- __u64 msr_val;
-
- if (static_cpu_has(X86_FEATURE_HWP))
- wrmsrl_safe(MSR_HWP_STATUS, 0);
-
- rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
-
- /* Check for violation of core thermal thresholds*/
- notify_thresholds(msr_val);
-
- therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
- THERMAL_THROTTLING_EVENT,
- CORE_LEVEL);
-
- if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
- therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
- POWER_LIMIT_EVENT,
- CORE_LEVEL);
-
- if (this_cpu_has(X86_FEATURE_PTS)) {
- rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
- /* check violations of package thermal thresholds */
- notify_package_thresholds(msr_val);
- therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
- THERMAL_THROTTLING_EVENT,
- PACKAGE_LEVEL);
- if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
- therm_throt_process(msr_val &
- PACKAGE_THERM_STATUS_POWER_LIMIT,
- POWER_LIMIT_EVENT,
- PACKAGE_LEVEL);
- }
-}
-
-static void unexpected_thermal_interrupt(void)
-{
- pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
- smp_processor_id());
-}
-
-static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
-
-DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
-{
- trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
- inc_irq_stat(irq_thermal_count);
- smp_thermal_vector();
- trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
- ack_APIC_irq();
-}
-
-/* Thermal monitoring depends on APIC, ACPI and clock modulation */
-static int intel_thermal_supported(struct cpuinfo_x86 *c)
-{
- if (!boot_cpu_has(X86_FEATURE_APIC))
- return 0;
- if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
- return 0;
- return 1;
-}
-
-void __init mcheck_intel_therm_init(void)
-{
- /*
- * This function is only called on boot CPU. Save the init thermal
- * LVT value on BSP and use that value to restore APs' thermal LVT
- * entry BIOS programmed later
- */
- if (intel_thermal_supported(&boot_cpu_data))
- lvtthmr_init = apic_read(APIC_LVTTHMR);
-}
-
-void intel_init_thermal(struct cpuinfo_x86 *c)
-{
- unsigned int cpu = smp_processor_id();
- int tm2 = 0;
- u32 l, h;
-
- if (!intel_thermal_supported(c))
- return;
-
- /*
- * First check if its enabled already, in which case there might
- * be some SMM goo which handles it, so we can't even put a handler
- * since it might be delivered via SMI already:
- */
- rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-
- h = lvtthmr_init;
- /*
- * The initial value of thermal LVT entries on all APs always reads
- * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
- * sequence to them and LVT registers are reset to 0s except for
- * the mask bits which are set to 1s when APs receive INIT IPI.
- * If BIOS takes over the thermal interrupt and sets its interrupt
- * delivery mode to SMI (not fixed), it restores the value that the
- * BIOS has programmed on AP based on BSP's info we saved since BIOS
- * is always setting the same value for all threads/cores.
- */
- if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
- apic_write(APIC_LVTTHMR, lvtthmr_init);
-
-
- if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
- if (system_state == SYSTEM_BOOTING)
- pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
- return;
- }
-
- /* early Pentium M models use different method for enabling TM2 */
- if (cpu_has(c, X86_FEATURE_TM2)) {
- if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
- rdmsr(MSR_THERM2_CTL, l, h);
- if (l & MSR_THERM2_CTL_TM_SELECT)
- tm2 = 1;
- } else if (l & MSR_IA32_MISC_ENABLE_TM2)
- tm2 = 1;
- }
-
- /* We'll mask the thermal vector in the lapic till we're ready: */
- h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
- apic_write(APIC_LVTTHMR, h);
-
- rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
- if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
- wrmsr(MSR_IA32_THERM_INTERRUPT,
- (l | (THERM_INT_LOW_ENABLE
- | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
- else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
- wrmsr(MSR_IA32_THERM_INTERRUPT,
- l | (THERM_INT_LOW_ENABLE
- | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
- else
- wrmsr(MSR_IA32_THERM_INTERRUPT,
- l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
-
- if (cpu_has(c, X86_FEATURE_PTS)) {
- rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
- if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
- wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
- (l | (PACKAGE_THERM_INT_LOW_ENABLE
- | PACKAGE_THERM_INT_HIGH_ENABLE))
- & ~PACKAGE_THERM_INT_PLN_ENABLE, h);
- else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
- wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
- l | (PACKAGE_THERM_INT_LOW_ENABLE
- | PACKAGE_THERM_INT_HIGH_ENABLE
- | PACKAGE_THERM_INT_PLN_ENABLE), h);
- else
- wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
- l | (PACKAGE_THERM_INT_LOW_ENABLE
- | PACKAGE_THERM_INT_HIGH_ENABLE), h);
- }
-
- smp_thermal_vector = intel_thermal_interrupt;
-
- rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
-
- pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
- tm2 ? "TM2" : "TM1");
-
- /* enable thermal throttle processing */
- atomic_set(&therm_throt_en, 1);
-}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index ec6f0415bc6d..b935e1b5f115 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -830,7 +830,7 @@ static const struct attribute_group cpu_root_microcode_group = {
.attrs = cpu_root_microcode_attrs,
};
-int __init microcode_init(void)
+static int __init microcode_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
int error;
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 43b54bef5448..e88bc296afca 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -31,6 +31,11 @@
#include <asm/reboot.h>
#include <asm/nmi.h>
#include <clocksource/hyperv_timer.h>
+#include <asm/numa.h>
+
+/* Is Linux running as the root partition? */
+bool hv_root_partition;
+EXPORT_SYMBOL_GPL(hv_root_partition);
struct ms_hyperv_info ms_hyperv;
EXPORT_SYMBOL_GPL(ms_hyperv);
@@ -226,6 +231,32 @@ static void __init hv_smp_prepare_boot_cpu(void)
hv_init_spinlocks();
#endif
}
+
+static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
+{
+#ifdef CONFIG_X86_64
+ int i;
+ int ret;
+#endif
+
+ native_smp_prepare_cpus(max_cpus);
+
+#ifdef CONFIG_X86_64
+ for_each_present_cpu(i) {
+ if (i == 0)
+ continue;
+ ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i));
+ BUG_ON(ret);
+ }
+
+ for_each_present_cpu(i) {
+ if (i == 0)
+ continue;
+ ret = hv_call_create_vp(numa_cpu_node(i), hv_current_partition_id, i, i);
+ BUG_ON(ret);
+ }
+#endif
+}
#endif
static void __init ms_hyperv_init_platform(void)
@@ -243,6 +274,7 @@ static void __init ms_hyperv_init_platform(void)
* Extract the features and hints
*/
ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
+ ms_hyperv.features_b = cpuid_ebx(HYPERV_CPUID_FEATURES);
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
@@ -256,6 +288,22 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
/*
+ * Check CPU management privilege.
+ *
+ * To mirror what Windows does we should extract CPU management
+ * features and use the ReservedIdentityBit to detect if Linux is the
+ * root partition. But that requires negotiating CPU management
+ * interface (a process to be finalized).
+ *
+ * For now, use the privilege flag as the indicator for running as
+ * root.
+ */
+ if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) {
+ hv_root_partition = true;
+ pr_info("Hyper-V: running as root partition\n");
+ }
+
+ /*
* Extract host information.
*/
if (cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS) >=
@@ -277,6 +325,14 @@ static void __init ms_hyperv_init_platform(void)
x86_platform.calibrate_cpu = hv_get_tsc_khz;
}
+ if (ms_hyperv.features_b & HV_ISOLATION) {
+ ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG);
+ ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG);
+
+ pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
+ ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
+ }
+
if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) {
ms_hyperv.nested_features =
cpuid_eax(HYPERV_CPUID_NESTED_FEATURES);
@@ -366,6 +422,8 @@ static void __init ms_hyperv_init_platform(void)
# ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
+ if (hv_root_partition)
+ smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
# endif
/*
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 5bd011737272..9231640782fa 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -537,9 +537,9 @@ static void __init print_out_mtrr_range_state(void)
if (!size_base)
continue;
- size_base = to_size_factor(size_base, &size_factor),
+ size_base = to_size_factor(size_base, &size_factor);
start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
- start_base = to_size_factor(start_base, &start_factor),
+ start_base = to_size_factor(start_base, &start_factor);
type = range_state[i].type;
pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index a29997e6cf9e..b90f3f437765 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -3,7 +3,6 @@
* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
* because MTRRs can span up to 40 bits (36bits on most modern x86)
*/
-#define DEBUG
#include <linux/export.h>
#include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 61eb26edc6d2..28c8a23aa42e 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -31,8 +31,6 @@
System Programming Guide; Section 9.11. (1997 edition - PPro).
*/
-#define DEBUG
-
#include <linux/types.h> /* FIXME: kvm_para.h needs this */
#include <linux/stop_machine.h>
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index a5ee607a3b89..3ef5868ac588 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -3,7 +3,7 @@
* local apic based NMI watchdog for various CPUs.
*
* This file also handles reservation of performance counters for coordination
- * with other users (like oprofile).
+ * with other users.
*
* Note that these events normally don't tick when the CPU idles. This means
* the frequency varies with CPU load.
@@ -105,15 +105,6 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
}
-/* checks for a bit availability (hack for oprofile) */
-int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
-{
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- return !test_bit(counter, perfctr_nmi_owner);
-}
-EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
-
int reserve_perfctr_nmi(unsigned int msr)
{
unsigned int counter;
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index ee71c47844cb..c4d320d02fd5 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -572,6 +572,7 @@ union cpuid_0x10_x_edx {
void rdt_last_cmd_clear(void);
void rdt_last_cmd_puts(const char *s);
+__printf(1, 2)
void rdt_last_cmd_printf(const char *fmt, ...);
void rdt_ctrl_update(void *arg);
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 460f3e0df106..f9190adc52cb 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -563,11 +563,11 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
*/
if (rdtgrp->type == RDTCTRL_GROUP) {
- tsk->closid = rdtgrp->closid;
- tsk->rmid = rdtgrp->mon.rmid;
+ WRITE_ONCE(tsk->closid, rdtgrp->closid);
+ WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
} else if (rdtgrp->type == RDTMON_GROUP) {
if (rdtgrp->mon.parent->closid == tsk->closid) {
- tsk->rmid = rdtgrp->mon.rmid;
+ WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
} else {
rdt_last_cmd_puts("Can't move task to different control group\n");
return -EINVAL;
@@ -2310,22 +2310,18 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
for_each_process_thread(p, t) {
if (!from || is_closid_match(t, from) ||
is_rmid_match(t, from)) {
- t->closid = to->closid;
- t->rmid = to->mon.rmid;
+ WRITE_ONCE(t->closid, to->closid);
+ WRITE_ONCE(t->rmid, to->mon.rmid);
-#ifdef CONFIG_SMP
/*
- * This is safe on x86 w/o barriers as the ordering
- * of writing to task_cpu() and t->on_cpu is
- * reverse to the reading here. The detection is
- * inaccurate as tasks might move or schedule
- * before the smp function call takes place. In
- * such a case the function call is pointless, but
+ * If the task is on a CPU, set the CPU in the mask.
+ * The detection is inaccurate as tasks might move or
+ * schedule before the smp function call takes place.
+ * In such a case the function call is pointless, but
* there is no other side effect.
*/
- if (mask && t->on_cpu)
+ if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
cpumask_set_cpu(task_cpu(t), mask);
-#endif
}
}
read_unlock(&tasklist_lock);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 236924930bf0..972ec3bfa9c0 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -40,11 +40,6 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
- { X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 },
- { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 },
- { X86_FEATURE_SEV_ES, CPUID_EAX, 3, 0x8000001f, 0 },
- { X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x8000001f, 0 },
- { X86_FEATURE_VM_PAGE_FLUSH, CPUID_EAX, 2, 0x8000001f, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index f2eac41bb4ff..8ce6d8371cfb 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -72,6 +72,9 @@ static int sgx_release(struct inode *inode, struct file *file)
synchronize_srcu(&encl->srcu);
mmu_notifier_unregister(&encl_mm->mmu_notifier, encl_mm->mm);
kfree(encl_mm);
+
+ /* 'encl_mm' is gone, put encl_mm->encl reference: */
+ kref_put(&encl->refcount, sgx_encl_release);
}
kref_put(&encl->refcount, sgx_encl_release);
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index ee50a5010277..7449ef33f081 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -141,7 +141,6 @@ static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
struct sgx_encl_page *entry;
unsigned long phys_addr;
struct sgx_encl *encl;
- unsigned long pfn;
vm_fault_t ret;
encl = vma->vm_private_data;
@@ -168,13 +167,6 @@ static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
phys_addr = sgx_get_epc_phys_addr(entry->epc_page);
- /* Check if another thread got here first to insert the PTE. */
- if (!follow_pfn(vma, addr, &pfn)) {
- mutex_unlock(&encl->lock);
-
- return VM_FAULT_NOPAGE;
- }
-
ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
if (ret != VM_FAULT_NOPAGE) {
mutex_unlock(&encl->lock);
@@ -481,6 +473,9 @@ static void sgx_mmu_notifier_free(struct mmu_notifier *mn)
{
struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier);
+ /* 'encl_mm' is going away, put encl_mm->encl reference: */
+ kref_put(&encl_mm->encl->refcount, sgx_encl_release);
+
kfree(encl_mm);
}
@@ -534,6 +529,8 @@ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
if (!encl_mm)
return -ENOMEM;
+ /* Grab a refcount for the encl_mm->encl reference: */
+ kref_get(&encl->refcount);
encl_mm->encl = encl;
encl_mm->mm = mm;
encl_mm->mmu_notifier.ops = &sgx_mmu_notifier_ops;
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index c519fc5f6948..8df81a3ed945 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -700,25 +700,27 @@ static bool __init sgx_page_cache_init(void)
return true;
}
-static void __init sgx_init(void)
+static int __init sgx_init(void)
{
int ret;
int i;
if (!cpu_feature_enabled(X86_FEATURE_SGX))
- return;
+ return -ENODEV;
if (!sgx_page_cache_init())
- return;
+ return -ENOMEM;
- if (!sgx_page_reclaimer_init())
+ if (!sgx_page_reclaimer_init()) {
+ ret = -ENOMEM;
goto err_page_cache;
+ }
ret = sgx_drv_init();
if (ret)
goto err_kthread;
- return;
+ return 0;
err_kthread:
kthread_stop(ksgxd_tsk);
@@ -728,6 +730,8 @@ err_page_cache:
vfree(sgx_epc_sections[i].pages);
memunmap(sgx_epc_sections[i].virt_addr);
}
+
+ return ret;
}
device_initcall(sgx_init);