From be2cf20a5ad31ebb13562c1c866ecc626fbd721e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 10 Jul 2012 18:36:40 +1000 Subject: powerpc: More fixes for lazy IRQ vs. idle Looks like we still have issues with pSeries and Cell idle code vs. the lazy irq state. In fact, the reset fixes that went upstream are exposing the problem more by causing BUG_ON() to trigger (which this patch turns into a WARN_ON instead). We need to be careful when using a variant of low power state that has the side effect of turning interrupts back on, to properly set all the SW & lazy state to look as if everything is enabled before we enter the low power state with MSR:EE off as we will return with MSR:EE on. If not, we have a discrepancy of state which can cause things to go very wrong later on. This patch moves the logic into a helper and uses it from the pseries and cell idle code. The power4/970 idle code already got things right (in assembly even !) so I'm not touching it. The power7 "bare metal" idle code is subtly different and correct. Remains PA6T and some hypervisor based Cell platforms which have questionable code in there, but they are mostly dead platforms so I'll fix them when I manage to get final answers from the respective maintainers about how the low power state actually works on them. Signed-off-by: Benjamin Herrenschmidt CC: stable@vger.kernel.org [v3.4] --- arch/powerpc/include/asm/hw_irq.h | 2 ++ arch/powerpc/kernel/irq.c | 46 +++++++++++++++++++++++++ arch/powerpc/platforms/cell/pervasive.c | 11 +++--- arch/powerpc/platforms/pseries/processor_idle.c | 17 +++++---- 4 files changed, 64 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 6eb75b80488c..92224b7fd702 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -125,6 +125,8 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) return !regs->softe; } +extern bool prep_irq_for_idle(void); + #else /* CONFIG_PPC64 */ #define SET_MSR_EE(x) mtmsr(x) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 1b415027ec0e..9270a399c9d6 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -286,6 +286,52 @@ void notrace restore_interrupts(void) __hard_irq_enable(); } +/* + * This is a helper to use when about to go into idle low-power + * when the latter has the side effect of re-enabling interrupts + * (such as calling H_CEDE under pHyp). + * + * You call this function with interrupts soft-disabled (this is + * already the case when ppc_md.power_save is called). The function + * will return whether to enter power save or just return. + * + * In the former case, it will have notified lockdep of interrupts + * being re-enabled and generally sanitized the lazy irq state, + * and in the latter case it will leave with interrupts hard + * disabled and marked as such, so the local_irq_enable() call + * in cpu_idle() will properly re-enable everything. + */ +bool prep_irq_for_idle(void) +{ + /* + * First we need to hard disable to ensure no interrupt + * occurs before we effectively enter the low power state + */ + hard_irq_disable(); + + /* + * If anything happened while we were soft-disabled, + * we return now and do not enter the low power state. + */ + if (lazy_irq_pending()) + return false; + + /* Tell lockdep we are about to re-enable */ + trace_hardirqs_on(); + + /* + * Mark interrupts as soft-enabled and clear the + * PACA_IRQ_HARD_DIS from the pending mask since we + * are about to hard enable as well as a side effect + * of entering the low power state. + */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + local_paca->soft_enabled = 1; + + /* Tell the caller to enter the low power state */ + return true; +} + #endif /* CONFIG_PPC64 */ int arch_show_interrupts(struct seq_file *p, int prec) diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index efdacc829576..d17e98bc0c10 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -42,11 +42,9 @@ static void cbe_power_save(void) { unsigned long ctrl, thread_switch_control; - /* - * We need to hard disable interrupts, the local_irq_enable() done by - * our caller upon return will hard re-enable. - */ - hard_irq_disable(); + /* Ensure our interrupt state is properly tracked */ + if (!prep_irq_for_idle()) + return; ctrl = mfspr(SPRN_CTRLF); @@ -81,6 +79,9 @@ static void cbe_power_save(void) */ ctrl &= ~(CTRL_RUNLATCH | CTRL_TE); mtspr(SPRN_CTRLT, ctrl); + + /* Re-enable interrupts in MSR */ + __hard_irq_enable(); } static int cbe_system_reset_exception(struct pt_regs *regs) diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c index e61483e8e960..c71be66bd5dc 100644 --- a/arch/powerpc/platforms/pseries/processor_idle.c +++ b/arch/powerpc/platforms/pseries/processor_idle.c @@ -99,15 +99,18 @@ out: static void check_and_cede_processor(void) { /* - * Interrupts are soft-disabled at this point, - * but not hard disabled. So an interrupt might have - * occurred before entering NAP, and would be potentially - * lost (edge events, decrementer events, etc...) unless - * we first hard disable then check. + * Ensure our interrupt state is properly tracked, + * also checks if no interrupt has occurred while we + * were soft-disabled */ - hard_irq_disable(); - if (!lazy_irq_pending()) + if (prep_irq_for_idle()) { cede_processor(); +#ifdef CONFIG_TRACE_IRQFLAGS + /* Ensure that H_CEDE returns with IRQs on */ + if (WARN_ON(!(mfmsr() & MSR_EE))) + __hard_irq_enable(); +#endif + } } static int dedicated_cede_loop(struct cpuidle_device *dev, -- cgit v1.2.3 From 21b2de341270bd7bb7a811027ffe63276d9b3b75 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 10 Jul 2012 18:37:56 +1000 Subject: powerpc: Fix build of some debug irq code There was a typo, checking for CONFIG_TRACE_IRQFLAG instead of CONFIG_TRACE_IRQFLAGS causing some useful debug code to not be built This in turns causes a build error on BookE 64-bit due to incorrect semicolons at the end of a couple of macros, so let's fix that too Signed-off-by: Benjamin Herrenschmidt CC: stable@vger.kernel.org [v3.4] --- arch/powerpc/include/asm/hw_irq.h | 4 ++-- arch/powerpc/kernel/irq.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 92224b7fd702..0554ab062bdc 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -86,8 +86,8 @@ static inline bool arch_irqs_disabled(void) } #ifdef CONFIG_PPC_BOOK3E -#define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory"); -#define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory"); +#define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory") +#define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory") #else #define __hard_irq_enable() __mtmsrd(local_paca->kernel_msr | MSR_EE, 1) #define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 9270a399c9d6..1f017bb7a7ce 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -229,7 +229,7 @@ notrace void arch_local_irq_restore(unsigned long en) */ if (unlikely(irq_happened != PACA_IRQ_HARD_DIS)) __hard_irq_disable(); -#ifdef CONFIG_TRACE_IRQFLAG +#ifdef CONFIG_TRACE_IRQFLAGS else { /* * We should already be hard disabled here. We had bugs -- cgit v1.2.3 From aa709f3bc92c6daaf177cd7e3446da2ef64426c6 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 5 Jul 2012 16:30:33 +0000 Subject: powerpc/numa: Avoid stupid uninitialized warning from gcc Newer gcc are being a bit blind here (it's pretty obvious we don't reach the code path using the array if we haven't initialized the pointer) but none of that is performance critical so let's just silence it. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 6e8f677f5646..1e95556dc692 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -639,7 +639,7 @@ static void __init parse_drconf_memory(struct device_node *memory) unsigned int n, rc, ranges, is_kexec_kdump = 0; unsigned long lmb_size, base, size, sz; int nid; - struct assoc_arrays aa; + struct assoc_arrays aa = { .arrays = NULL }; n = of_get_drconf_memory(memory, &dm); if (!n) -- cgit v1.2.3 From 50fb31cfed9218b439360caf7c0399b00042da15 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Jul 2012 15:51:10 +1000 Subject: tty/hvc_opal: Fix debug function name udbg_init_debug_opal() should be udbg_init_debug_opal_raw() as the caller in arch/powerpc/kernel/udbg.c expects Signed-off-by: Benjamin Herrenschmidt --- drivers/tty/hvc/hvc_opal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index ced26c8ccd57..0d2ea0c224c3 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -401,7 +401,7 @@ out: } #ifdef CONFIG_PPC_EARLY_DEBUG_OPAL_RAW -void __init udbg_init_debug_opal(void) +void __init udbg_init_debug_opal_raw(void) { u32 index = CONFIG_PPC_EARLY_DEBUG_OPAL_VTERMNO; hvc_opal_privs[index] = &hvc_opal_boot_priv; -- cgit v1.2.3