From 99ac3a5e408ddb63a658b4668da8f8761ca1bfc0 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Fri, 19 Aug 2011 20:25:05 +0900 Subject: ARM: EXYNOS4: Add restart hook for proper reboot This is required to use SWRESET. Signed-off-by: Kyungmin Park Signed-off-by: Kukjin Kim (cherry picked from commit d2edddf2b25863ec0893635662b0832f9965b543) --- arch/arm/mach-exynos4/cpu.c | 10 ++++++++++ arch/arm/mach-exynos4/include/mach/regs-pmu.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/arch/arm/mach-exynos4/cpu.c b/arch/arm/mach-exynos4/cpu.c index bfd621460ab..1139608fa11 100644 --- a/arch/arm/mach-exynos4/cpu.c +++ b/arch/arm/mach-exynos4/cpu.c @@ -24,8 +24,10 @@ #include #include #include +#include #include +#include extern int combiner_init(unsigned int combiner_nr, void __iomem *base, unsigned int irq_start); @@ -114,6 +116,11 @@ static void exynos4_idle(void) local_irq_enable(); } +static void exynos4_sw_reset(void) +{ + __raw_writel(0x1, S5P_SWRESET); +} + /* * exynos4_map_io * @@ -222,5 +229,8 @@ int __init exynos4_init(void) /* set idle function */ pm_idle = exynos4_idle; + /* set sw_reset function */ + s5p_reset_hook = exynos4_sw_reset; + return sysdev_register(&exynos4_sysdev); } diff --git a/arch/arm/mach-exynos4/include/mach/regs-pmu.h b/arch/arm/mach-exynos4/include/mach/regs-pmu.h index a9643371f8e..fbb79ccf00b 100644 --- a/arch/arm/mach-exynos4/include/mach/regs-pmu.h +++ b/arch/arm/mach-exynos4/include/mach/regs-pmu.h @@ -29,6 +29,8 @@ #define S5P_USE_STANDBY_WFE1 (1 << 25) #define S5P_USE_MASK ((0x3 << 16) | (0x3 << 24)) +#define S5P_SWRESET S5P_PMUREG(0x0400) + #define S5P_WAKEUP_STAT S5P_PMUREG(0x0600) #define S5P_EINT_WAKEUP_MASK S5P_PMUREG(0x0604) #define S5P_WAKEUP_MASK S5P_PMUREG(0x0608) -- cgit v1.2.3 From 2988d6f59bdecf10e3a6f0e13861dc8108cd2faf Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 19 Aug 2011 17:58:35 +0100 Subject: ARM: 7029/1: Make cpu_architecture into a global variable The CPU architecture really should not be changing at runtime, so make it a global variable instead of a function. The cpu_architecture() function declared in remains the correct way to read this variable from C code. Signed-off-by: Dave Martin Reviewed-by: Jon Medhurst Signed-off-by: Russell King (cherry picked from commit db9c84ccacb0e7a7903117929cf1dc914f82fc90) --- arch/arm/include/asm/system.h | 3 ++- arch/arm/kernel/setup.c | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 832888d0c20..4adf71b2b54 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -57,6 +57,7 @@ #ifndef __ASSEMBLY__ +#include #include #include @@ -104,7 +105,7 @@ struct mm_struct; extern void show_pte(struct mm_struct *mm, unsigned long addr); extern void __show_regs(struct pt_regs *); -extern int cpu_architecture(void); +extern int __pure cpu_architecture(void); extern void cpu_init(void); void arm_machine_restart(char mode, const char *cmd); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index e514c76043b..10fce616e99 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include #include @@ -42,6 +44,7 @@ #include #include #include +#include #include #include @@ -115,6 +118,13 @@ struct outer_cache_fns outer_cache __read_mostly; EXPORT_SYMBOL(outer_cache); #endif +/* + * Cached cpu_architecture() result for use by assembler code. + * C code should use the cpu_architecture() function instead of accessing this + * variable directly. + */ +int __cpu_architecture __read_mostly = CPU_ARCH_UNKNOWN; + struct stack { u32 irq[3]; u32 abt[3]; @@ -210,7 +220,7 @@ static const char *proc_arch[] = { "?(17)", }; -int cpu_architecture(void) +static int __get_cpu_architecture(void) { int cpu_arch; @@ -243,6 +253,13 @@ int cpu_architecture(void) return cpu_arch; } +int __pure cpu_architecture(void) +{ + BUG_ON(__cpu_architecture == CPU_ARCH_UNKNOWN); + + return __cpu_architecture; +} + static int cpu_has_aliasing_icache(unsigned int arch) { int aliasing_icache; @@ -414,6 +431,7 @@ static void __init setup_processor(void) } cpu_name = list->cpu_name; + __cpu_architecture = __get_cpu_architecture(); #ifdef MULTI_CPU processor = *list->proc; -- cgit v1.2.3 From 9b45bd9beb2bc6ab6aade3f1fac4885c2c16839e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 19 Aug 2011 17:59:27 +0100 Subject: ARM: 7030/1: entry: Remove unnecessary masking when decoding Thumb-2 instructions When testing whether a Thumb-2 instruction is 32 bits long or not, the masking done in order to test bits 11-15 of the first instruction halfword won't affect the result of the comparison, so remove it. Signed-off-by: Dave Martin Reviewed-by: Jon Medhurst Acked-by: Nicolas Pitre Signed-off-by: Russell King (cherry picked from commit 948e32025349faaa1cd0a55dc669cc7da095b21d) --- arch/arm/kernel/entry-armv.S | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index a87cbf889ff..b7236d400ae 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -262,8 +262,7 @@ __und_svc: ldr r0, [r4, #-4] #else ldrh r0, [r4, #-2] @ Thumb instruction at LR - 2 - and r9, r0, #0xf800 - cmp r9, #0xe800 @ 32-bit instruction if xx >= 0 + cmp r0, #0xe800 @ 32-bit instruction if xx >= 0 ldrhhs r9, [r4] @ bottom 16 bits orrhs r0, r9, r0, lsl #16 #endif @@ -445,8 +444,7 @@ __und_usr: ARM( ldrht r5, [r4], #2 ) THUMB( ldrht r5, [r4] ) THUMB( add r4, r4, #2 ) - and r0, r5, #0xf800 @ mask bits 111x x... .... .... - cmp r0, #0xe800 @ 32bit instruction if xx != 0 + cmp r5, #0xe800 @ 32bit instruction if xx != 0 blo __und_usr_unknown 3: ldrht r0, [r4] add r2, r2, #2 @ r2 is PC + 2, make it PC + 4 -- cgit v1.2.3 From ee49866febeac921a878088c58da1fdc0c0073f6 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 19 Aug 2011 18:00:08 +0100 Subject: ARM: 7031/1: entry: Fix Thumb-2 undef handling for multi-CPU kernels When v6 and >=v7 boards are supported in the same kernel, the __und_usr code currently makes a build-time assumption that Thumb-2 instructions occurring in userspace don't need to be supported. Strictly speaking this is incorrect. This patch fixes the above case by doing a run-time check on the CPU architecture in these cases. This only affects kernels which support v6 and >=v7 CPUs together: plain v6 and plain v7 kernels are unaffected. Signed-off-by: Dave Martin Reviewed-by: Jon Medhurst Signed-off-by: Russell King (cherry picked from commit 07ed43553f9ceefe5e3cbd25da0f6ff912b4fedb) --- arch/arm/kernel/entry-armv.S | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index b7236d400ae..9ad50c4208a 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -24,6 +24,7 @@ #include #include #include +#include #include "entry-header.S" #include @@ -439,7 +440,27 @@ __und_usr: #endif beq call_fpe @ Thumb instruction -#if __LINUX_ARM_ARCH__ >= 7 +#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7 +/* + * Thumb-2 instruction handling. Note that because pre-v6 and >= v6 platforms + * can never be supported in a single kernel, this code is not applicable at + * all when __LINUX_ARM_ARCH__ < 6. This allows simplifying assumptions to be + * made about .arch directives. + */ +#if __LINUX_ARM_ARCH__ < 7 +/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */ +#define NEED_CPU_ARCHITECTURE + ldr r5, .LCcpu_architecture + ldr r5, [r5] + cmp r5, #CPU_ARCH_ARMv7 + blo __und_usr_unknown +/* + * The following code won't get run unless the running CPU really is v7, so + * coding round the lack of ldrht on older arches is pointless. Temporarily + * override the assembler target arch with the minimum required instead: + */ + .arch armv6t2 +#endif 2: ARM( ldrht r5, [r4], #2 ) THUMB( ldrht r5, [r4] ) @@ -449,7 +470,16 @@ __und_usr: 3: ldrht r0, [r4] add r2, r2, #2 @ r2 is PC + 2, make it PC + 4 orr r0, r0, r5, lsl #16 + +#if __LINUX_ARM_ARCH__ < 7 +/* If the target arch was overridden, change it back: */ +#ifdef CONFIG_CPU_32v6K + .arch armv6k #else + .arch armv6 +#endif +#endif /* __LINUX_ARM_ARCH__ < 7 */ +#else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */ b __und_usr_unknown #endif UNWIND(.fnend ) @@ -576,6 +606,12 @@ call_fpe: movw_pc lr @ CP#14 (Debug) movw_pc lr @ CP#15 (Control) +#ifdef NEED_CPU_ARCHITECTURE + .align 2 +.LCcpu_architecture: + .word __cpu_architecture +#endif + #ifdef CONFIG_NEON .align 6 -- cgit v1.2.3 From f392cbf75615e9d8cb90ef5ffb8d4e752b7be3c7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 31 Aug 2011 00:03:13 +0800 Subject: usb: ehci: make HC see up-to-date qh/qtd descriptor ASAP This patch introduces the helper of ehci_sync_mem to flush qtd/qh into memory immediately on some ARM, so that HC can see the up-to-date qtd/qh descriptor asap. This patch fixs one performance bug on ARM Cortex A9 dual core platform, which has been reported on quite a few ARM machines (OMAP4, Tegra 2, snowball...), see details from link of https://bugs.launchpad.net/bugs/709245. The patch has been tested ok on OMAP4 panda A1 board, and the performance of 'dd' over usb mass storage can be increased from 4~5MB/sec to 14~16MB/sec after applying this patch. Cc: Russell King Signed-off-by: Ming Lei Signed-off-by: Alan Stern --- drivers/usb/host/ehci-q.c | 18 ++++++++++++++++++ drivers/usb/host/ehci.h | 17 +++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 0917e3a3246..b7f9496e4f0 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -995,6 +995,12 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh) head->qh_next.qh = qh; head->hw->hw_next = dma; + /* + * flush qh descriptor into memory immediately, + * see comments in qh_append_tds. + */ + ehci_sync_mem(); + qh_get(qh); qh->xacterrs = 0; qh->qh_state = QH_STATE_LINKED; @@ -1082,6 +1088,18 @@ static struct ehci_qh *qh_append_tds ( wmb (); dummy->hw_token = token; + /* + * Writing to dma coherent buffer on ARM may + * be delayed to reach memory, so HC may not see + * hw_token of dummy qtd in time, which can cause + * the qtd transaction to be executed very late, + * and degrade performance a lot. ehci_sync_mem + * is added to flush 'token' immediatelly into + * memory, so that ehci can execute the transaction + * ASAP. + */ + ehci_sync_mem(); + urb->hcpriv = qh_get (qh); } } diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h index 989e0a8e01f..6fb0de8d4e8 100644 --- a/drivers/usb/host/ehci.h +++ b/drivers/usb/host/ehci.h @@ -736,6 +736,23 @@ static inline u32 hc32_to_cpup (const struct ehci_hcd *ehci, const __hc32 *x) #endif +/* + * Writing to dma coherent memory on ARM may be delayed via L2 + * writing buffer, so introduce the helper which can flush L2 writing + * buffer into memory immediately, especially used to flush ehci + * descriptor to memory. + */ +#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE +static inline void ehci_sync_mem() +{ + mb(); +} +#else +static inline void ehci_sync_mem() +{ +} +#endif + /*-------------------------------------------------------------------------*/ #ifndef DEBUG -- cgit v1.2.3