From 7f34b935e8bf2f5304fce273a8fa98c63886d686 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Nov 2016 18:44:43 +0100 Subject: x86/mcheck: Be prepared for a rollback back to the ONLINE state If we try a CPU down and fail in the middle then we roll back to the online state. This means we would perform CPU_ONLINE / mce_device_create() without invoking CPU_DEAD / mce_device_remove() for the cleanup of what was allocated in CPU_ONLINE. Be prepared for this and don't allocate the struct if we have it already. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Borislav Petkov Cc: Tony Luck Cc: rt@linutronix.de Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20161110174447.11848-4-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel/cpu/mcheck/mce.c') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a7fdf453d895..e9ffd6d9e32d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2409,6 +2409,10 @@ static int mce_device_create(unsigned int cpu) if (!mce_available(&boot_cpu_data)) return -EIO; + dev = per_cpu(mce_device, cpu); + if (dev) + return 0; + dev = kzalloc(sizeof *dev, GFP_KERNEL); if (!dev) return -ENOMEM; -- cgit v1.2.3 From 4d7b02d58c4000597d08930193d7aed81fba6b7c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Nov 2016 18:44:44 +0100 Subject: x86/mcheck: Split threshold_cpu_callback into two callbacks The threshold_cpu_callback callbacks looks like one of the notifier and its arguments are almost the same. Split this out and have one ONLINE and one DEAD callback. This will come handy later once the main code gets changed to use the callback mechanism. Also, handle threshold_cpu_callback_online() return value so we don't continue if the function fails. Boris Petkov removed the callback pointer and replaced it with proper functions. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Borislav Petkov Cc: Tony Luck Cc: rt@linutronix.de Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20161110174447.11848-5-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mce.h | 7 +++++-- arch/x86/kernel/cpu/mcheck/mce.c | 14 +++++++------ arch/x86/kernel/cpu/mcheck/mce_amd.c | 38 ++++++++++++++---------------------- 3 files changed, 28 insertions(+), 31 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/mce.c') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 9bd7ff5ffbcc..caafad4a211e 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -293,9 +293,7 @@ void do_machine_check(struct pt_regs *, long); /* * Threshold handler */ - extern void (*mce_threshold_vector)(void); -extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); /* Deferred error interrupt handler */ extern void (*deferred_error_int_vector)(void); @@ -377,7 +375,12 @@ struct smca_bank_info { }; extern struct smca_bank_info smca_banks[MAX_NR_BANKS]; +extern int mce_threshold_create_device(unsigned int cpu); +extern int mce_threshold_remove_device(unsigned int cpu); +#else +static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; +static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; #endif #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e9ffd6d9e32d..052b5e05c3c4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2255,8 +2255,6 @@ static struct bus_type mce_subsys = { DEFINE_PER_CPU(struct device *, mce_device); -void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); - static inline struct mce_bank *attr_to_bank(struct device_attribute *attr) { return container_of(attr, struct mce_bank, attr); @@ -2512,13 +2510,17 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: + mce_device_create(cpu); - if (threshold_cpu_callback) - threshold_cpu_callback(action, cpu); + + if (mce_threshold_create_device(cpu)) { + mce_device_remove(cpu); + return NOTIFY_BAD; + } + break; case CPU_DEAD: - if (threshold_cpu_callback) - threshold_cpu_callback(action, cpu); + mce_threshold_remove_device(cpu); mce_device_remove(cpu); mce_intel_hcpu_update(cpu); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index e93580c82ef0..c33a3ee2e383 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -55,6 +55,8 @@ /* Threshold LVT offset is at MSR0xC0000410[15:12] */ #define SMCA_THR_LVT_OFF 0xF000 +static bool thresholding_en; + static const char * const th_names[] = { "load_store", "insn_fetch", @@ -1077,10 +1079,13 @@ free_out: per_cpu(threshold_banks, cpu)[bank] = NULL; } -static void threshold_remove_device(unsigned int cpu) +int mce_threshold_remove_device(unsigned int cpu) { unsigned int bank; + if (!thresholding_en) + return 0; + for (bank = 0; bank < mca_cfg.banks; ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; @@ -1088,15 +1093,19 @@ static void threshold_remove_device(unsigned int cpu) } kfree(per_cpu(threshold_banks, cpu)); per_cpu(threshold_banks, cpu) = NULL; + return 0; } /* create dir/files for all valid threshold banks */ -static int threshold_create_device(unsigned int cpu) +int mce_threshold_create_device(unsigned int cpu) { unsigned int bank; struct threshold_bank **bp; int err = 0; + if (!thresholding_en) + return 0; + bp = per_cpu(threshold_banks, cpu); if (bp) return 0; @@ -1117,40 +1126,23 @@ static int threshold_create_device(unsigned int cpu) } return err; err: - threshold_remove_device(cpu); + mce_threshold_remove_device(cpu); return err; } -/* get notified when a cpu comes on/off */ -static void -amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu) -{ - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - threshold_create_device(cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - threshold_remove_device(cpu); - break; - default: - break; - } -} - static __init int threshold_init_device(void) { unsigned lcpu = 0; /* to hit CPUs online before the notifier is up */ for_each_online_cpu(lcpu) { - int err = threshold_create_device(lcpu); + int err = mce_threshold_create_device(lcpu); if (err) return err; } - threshold_cpu_callback = amd_64_threshold_cpu_callback; + + thresholding_en = true; return 0; } -- cgit v1.2.3 From 39f152ffbfedb42b57b6e0c896eeae51dbe83b7a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Nov 2016 18:44:45 +0100 Subject: x86/mcheck: Reorganize the hotplug callbacks Initially I wanted to remove mcheck_cpu_init() from identify_cpu() and let it become an independent early hotplug callback. The main problem here was that the init on the boot CPU may happen too late (device_initcall_sync(mcheck_init_device)) and nobody wanted to risk receiving and MCE event at boot time leading to a shutdown (if the MCE feature is not yet enabled). Here is attempt two: the timming stays as-is but the ordering of the functions is changed: - mcheck_cpu_init() (which is run from identify_cpu()) will setup the timer struct but won't fire the timer. This is moved to CPU_ONLINE since its cleanup part is in CPU_DOWN_PREPARE. So if it is okay to stop the timer early in the shutdown phase, it should be okay to start it late in the bring up phase. - CPU_DOWN_PREPARE disables the MCE feature flags for !INTEL CPUs in mce_disable_cpu(). If a failure occures it would be re-enabled on all vendor CPUs (including Intel where it was not disabled during shutdown). To keep this working I am moving it to CPU_ONLINE. smp_call_function_single() is dropped beause the notifier runs nowdays on the target CPU. - CPU_ONLINE is invoking mce_device_create() + mce_threshold_create_device() but its cleanup part is in CPU_DEAD (mce_threshold_remove_device() and mce_device_remove()). In order to keep this symmetrical I am moving the clean up from CPU_DEAD to CPU_DOWN_PREPARE. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Borislav Petkov Cc: Tony Luck Cc: rt@linutronix.de Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20161110174447.11848-6-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/mce.c') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 052b5e05c3c4..a524faa51400 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1745,6 +1745,14 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) add_timer_on(t, cpu); } +static void __mcheck_cpu_setup_timer(void) +{ + struct timer_list *t = this_cpu_ptr(&mce_timer); + unsigned int cpu = smp_processor_id(); + + setup_pinned_timer(t, mce_timer_fn, cpu); +} + static void __mcheck_cpu_init_timer(void) { struct timer_list *t = this_cpu_ptr(&mce_timer); @@ -1796,7 +1804,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_clear_banks(); - __mcheck_cpu_init_timer(); + __mcheck_cpu_setup_timer(); } /* @@ -2470,28 +2478,25 @@ static void mce_device_remove(unsigned int cpu) } /* Make sure there are no machine checks on offlined CPUs. */ -static void mce_disable_cpu(void *h) +static void mce_disable_cpu(void) { - unsigned long action = *(unsigned long *)h; - if (!mce_available(raw_cpu_ptr(&cpu_info))) return; - if (!(action & CPU_TASKS_FROZEN)) + if (!cpuhp_tasks_frozen) cmci_clear(); vendor_disable_error_reporting(); } -static void mce_reenable_cpu(void *h) +static void mce_reenable_cpu(void) { - unsigned long action = *(unsigned long *)h; int i; if (!mce_available(raw_cpu_ptr(&cpu_info))) return; - if (!(action & CPU_TASKS_FROZEN)) + if (!cpuhp_tasks_frozen) cmci_reenable(); for (i = 0; i < mca_cfg.banks; i++) { struct mce_bank *b = &mce_banks[i]; @@ -2510,6 +2515,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: + case CPU_DOWN_FAILED: mce_device_create(cpu); @@ -2517,11 +2523,10 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) mce_device_remove(cpu); return NOTIFY_BAD; } - + mce_reenable_cpu(); + mce_start_timer(cpu, t); break; case CPU_DEAD: - mce_threshold_remove_device(cpu); - mce_device_remove(cpu); mce_intel_hcpu_update(cpu); /* intentionally ignoring frozen here */ @@ -2529,12 +2534,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) cmci_rediscover(); break; case CPU_DOWN_PREPARE: - smp_call_function_single(cpu, mce_disable_cpu, &action, 1); + mce_disable_cpu(); del_timer_sync(t); - break; - case CPU_DOWN_FAILED: - smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); - mce_start_timer(cpu, t); + + mce_threshold_remove_device(cpu); + mce_device_remove(cpu); break; } -- cgit v1.2.3 From 8c0eeac819c85e4c1143f7a874d87b4594739208 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Nov 2016 18:44:46 +0100 Subject: x86/mcheck: Move CPU_ONLINE and CPU_DOWN_PREPARE to hotplug state machine The CPU_ONLINE and CPU_DOWN_PREPARE look fully symmetrical and could be move to the hotplug state machine. On a failure during registration we have the tear down callback invoked (mce_cpu_pre_down()) so there should be no timer around and so no need to need keep notifier installed (this was the reason according to the comment why the notifier was registered despite of errors). Signed-off-by: Sebastian Andrzej Siewior Acked-by: Borislav Petkov Cc: Tony Luck Cc: rt@linutronix.de Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20161110174447.11848-7-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce.c | 78 +++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 42 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/mce.c') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a524faa51400..78955f501ff2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2511,21 +2511,8 @@ static int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct timer_list *t = &per_cpu(mce_timer, cpu); switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - case CPU_DOWN_FAILED: - - mce_device_create(cpu); - - if (mce_threshold_create_device(cpu)) { - mce_device_remove(cpu); - return NOTIFY_BAD; - } - mce_reenable_cpu(); - mce_start_timer(cpu, t); - break; case CPU_DEAD: mce_intel_hcpu_update(cpu); @@ -2534,17 +2521,41 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) cmci_rediscover(); break; case CPU_DOWN_PREPARE: - mce_disable_cpu(); - del_timer_sync(t); - mce_threshold_remove_device(cpu); - mce_device_remove(cpu); break; } return NOTIFY_OK; } +static int mce_cpu_online(unsigned int cpu) +{ + struct timer_list *t = &per_cpu(mce_timer, cpu); + int ret; + + mce_device_create(cpu); + + ret = mce_threshold_create_device(cpu); + if (ret) { + mce_device_remove(cpu); + return ret; + } + mce_reenable_cpu(); + mce_start_timer(cpu, t); + return 0; +} + +static int mce_cpu_pre_down(unsigned int cpu) +{ + struct timer_list *t = &per_cpu(mce_timer, cpu); + + mce_disable_cpu(); + del_timer_sync(t); + mce_threshold_remove_device(cpu); + mce_device_remove(cpu); + return 0; +} + static struct notifier_block mce_cpu_notifier = { .notifier_call = mce_cpu_callback, }; @@ -2569,8 +2580,8 @@ static __init void mce_init_banks(void) static __init int mcheck_init_device(void) { + enum cpuhp_state hp_online; int err; - int i = 0; if (!mce_available(&boot_cpu_data)) { err = -EIO; @@ -2588,21 +2599,13 @@ static __init int mcheck_init_device(void) if (err) goto err_out_mem; - cpu_notifier_register_begin(); - for_each_online_cpu(i) { - err = mce_device_create(i); - if (err) { - /* - * Register notifier anyway (and do not unreg it) so - * that we don't leave undeleted timers, see notifier - * callback above. - */ - __register_hotcpu_notifier(&mce_cpu_notifier); - cpu_notifier_register_done(); - goto err_device_create; - } - } + err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/mce:online", + mce_cpu_online, mce_cpu_pre_down); + if (err < 0) + goto err_out_mem; + hp_online = err; + cpu_notifier_register_begin(); __register_hotcpu_notifier(&mce_cpu_notifier); cpu_notifier_register_done(); @@ -2617,16 +2620,7 @@ static __init int mcheck_init_device(void) err_register: unregister_syscore_ops(&mce_syscore_ops); - -err_device_create: - /* - * We didn't keep track of which devices were created above, but - * even if we had, the set of online cpus might have changed. - * Play safe and remove for every possible cpu, since - * mce_device_remove() will do the right thing. - */ - for_each_possible_cpu(i) - mce_device_remove(i); + cpuhp_remove_state(hp_online); err_out_mem: free_cpumask_var(mce_device_initialized); -- cgit v1.2.3 From 0e285d36bd2bfee0b95433ccc9065c878164f5b2 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Nov 2016 18:44:47 +0100 Subject: x86/mcheck: Move CPU_DEAD to hotplug state machine This moves the last piece of the old hotplug notifier code in MCE to the new hotplug state machine. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Borislav Petkov Cc: Tony Luck Cc: rt@linutronix.de Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20161110174447.11848-8-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce.c | 42 ++++++++++++++-------------------------- include/linux/cpuhotplug.h | 1 + 2 files changed, 16 insertions(+), 27 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/mce.c') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 78955f501ff2..b888e2f6af41 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2506,26 +2506,14 @@ static void mce_reenable_cpu(void) } } -/* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static int -mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +static int mce_cpu_dead(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; + mce_intel_hcpu_update(cpu); - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DEAD: - mce_intel_hcpu_update(cpu); - - /* intentionally ignoring frozen here */ - if (!(action & CPU_TASKS_FROZEN)) - cmci_rediscover(); - break; - case CPU_DOWN_PREPARE: - - break; - } - - return NOTIFY_OK; + /* intentionally ignoring frozen here */ + if (!cpuhp_tasks_frozen) + cmci_rediscover(); + return 0; } static int mce_cpu_online(unsigned int cpu) @@ -2556,10 +2544,6 @@ static int mce_cpu_pre_down(unsigned int cpu) return 0; } -static struct notifier_block mce_cpu_notifier = { - .notifier_call = mce_cpu_callback, -}; - static __init void mce_init_banks(void) { int i; @@ -2599,16 +2583,17 @@ static __init int mcheck_init_device(void) if (err) goto err_out_mem; + err = cpuhp_setup_state(CPUHP_X86_MCE_DEAD, "x86/mce:dead", NULL, + mce_cpu_dead); + if (err) + goto err_out_mem; + err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/mce:online", mce_cpu_online, mce_cpu_pre_down); if (err < 0) - goto err_out_mem; + goto err_out_online; hp_online = err; - cpu_notifier_register_begin(); - __register_hotcpu_notifier(&mce_cpu_notifier); - cpu_notifier_register_done(); - register_syscore_ops(&mce_syscore_ops); /* register character device /dev/mcelog */ @@ -2622,6 +2607,9 @@ err_register: unregister_syscore_ops(&mce_syscore_ops); cpuhp_remove_state(hp_online); +err_out_online: + cpuhp_remove_state(CPUHP_X86_MCE_DEAD); + err_out_mem: free_cpumask_var(mce_device_initialized); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 3410d83cc2e2..79b96f647d64 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -16,6 +16,7 @@ enum cpuhp_state { CPUHP_PERF_SUPERH, CPUHP_X86_HPET_DEAD, CPUHP_X86_APB_DEAD, + CPUHP_X86_MCE_DEAD, CPUHP_VIRT_NET_DEAD, CPUHP_SLUB_DEAD, CPUHP_MM_WRITEBACK_DEAD, -- cgit v1.2.3