From 15ca68a993d10767c37793e6a0a780b0a7e395dd Mon Sep 17 00:00:00 2001 From: Noam Camus Date: Sun, 7 Sep 2014 22:52:33 +0300 Subject: ARC: Make vmalloc size configurable On ARC, lower 2G of address space is translated and used for - user vaddr space (region 0 to 5) - unused kernel-user gutter (region 6) - kernel vaddr space (region 7) where each region simply represents 256MB of address space. The kernel vaddr space of 256MB is used to implement vmalloc, modules So far this was enough, but not on EZChip system with 4K CPUs (given that per cpu mechanism uses vmalloc for allocating chunks) So allow VMALLOC_SIZE to be configurable by expanding down into the unused kernel-user gutter region which at default 256M was excessive anyways. Also use _BITUL() to fix a build error since PGDIR_SIZE cannot use "1UL" as called from assembly code in mm/tlbex.S Signed-off-by: Noam Camus [vgupta: rewrote changelog, debugged bootup crash due to int vs. hex] Acked-by: Vineet Gupta --- arch/arc/include/asm/processor.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/arc/include/asm/processor.h') diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 1d694c1ef6d6..d0a9211ec769 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -97,7 +97,7 @@ extern unsigned int get_wchan(struct task_struct *p); #endif /* !__ASSEMBLY__ */ /* - * System Memory Map on ARC + * Default System Memory Map on ARC * * ---------------------------- (lower 2G, Translated) ------------------------- * 0x0000_0000 0x5FFF_FFFF (user vaddr: TASK_SIZE) @@ -109,18 +109,17 @@ extern unsigned int get_wchan(struct task_struct *p); * 0xC000_0000 0xFFFF_FFFF (peripheral uncached space) * ----------------------------------------------------------------------------- */ -#define VMALLOC_START 0x70000000 -/* - * 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter - * See asm/highmem.h for details - */ -#define VMALLOC_SIZE (PAGE_OFFSET - VMALLOC_START - PGDIR_SIZE * 4) -#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) +#define TASK_SIZE 0x60000000 -#define USER_KERNEL_GUTTER 0x10000000 +#define VMALLOC_START (PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20)) + +/* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */ +#define VMALLOC_SIZE ((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4) + +#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) -#define TASK_SIZE (VMALLOC_START - USER_KERNEL_GUTTER) +#define USER_KERNEL_GUTTER (VMALLOC_START - TASK_SIZE) #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX STACK_TOP -- cgit v1.2.3 From 8bcf2c48f32e22f923b69f779c95b1348308d5b1 Mon Sep 17 00:00:00 2001 From: Noam Camus Date: Sun, 6 Dec 2015 15:40:55 +0200 Subject: ARC: [plat-eznps] Use dedicated user stack top NPS use special mapping right below TASK_SIZE. Hence we need to lower STACK_TOP so that user stack won't overlap NPS special mapping. Signed-off-by: Noam Camus Acked-by: Vineet Gupta --- arch/arc/include/asm/processor.h | 18 ++++++++++++++++++ arch/arc/mm/tlb.c | 6 ++++++ 2 files changed, 24 insertions(+) (limited to 'arch/arc/include/asm/processor.h') diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index d0a9211ec769..194a09fce198 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -121,7 +121,25 @@ extern unsigned int get_wchan(struct task_struct *p); #define USER_KERNEL_GUTTER (VMALLOC_START - TASK_SIZE) +#ifdef CONFIG_ARC_PLAT_EZNPS +/* NPS architecture defines special window of 129M in user address space for + * special memory areas, when accessing this window the MMU do not use TLB. + * Instead MMU direct the access to: + * 0x57f00000:0x57ffffff -- 1M of closely coupled memory (aka CMEM) + * 0x58000000:0x5fffffff -- 16 huge pages, 8M each, with fixed map (aka FMTs) + * + * CMEM - is the fastest memory we got and its size is 16K. + * FMT - is used to map either to internal/external memory. + * Internal memory is the second fast memory and its size is 16M + * External memory is the biggest memory (16G) and also the slowest. + * + * STACK_TOP need to be PMD align (21bit) that is why we supply 0x57e00000. + */ +#define STACK_TOP 0x57e00000 +#else #define STACK_TOP TASK_SIZE +#endif + #define STACK_TOP_MAX STACK_TOP /* This decides where the kernel will search for a free chunk of vm diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 745a9aeb2d96..ec868a9081a1 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -819,6 +819,12 @@ void arc_mmu_init(void) */ BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE)); + /* + * stack top size sanity check, + * Can't be done in processor.h due to header include depenedencies + */ + BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE)); + /* For efficiency sake, kernel is compile time built for a MMU ver * This must match the hardware it is running on. * Linux built for MMU V2, if run on MMU V1 will break down because V1 -- cgit v1.2.3 From 46c3e6b8768643d9bc7325324d17e37781b7bbf8 Mon Sep 17 00:00:00 2001 From: Tal Zilcer Date: Mon, 9 Mar 2015 16:58:39 +0200 Subject: ARC: [plat-eznps] Use dedicated cpu_relax() Since the CTOP is SMT hardware multi-threaded, we need to hint the HW that now will be a very good time to do a hardware thread context switching. This is done by issuing the schd.rw instruction (binary coded here so as to not require specific revision of GCC to build the kernel). sched.rw means that Thread becomes eligible for execution by the threads scheduler after all pending read/write transactions were completed. Implementing cpu_relax_lowlatency() with barrier() Since with current semantics of cpu_relax() it may take a while till yielded CPU will get back. Signed-off-by: Noam Camus Cc: Peter Zijlstra Acked-by: Vineet Gupta --- arch/arc/include/asm/processor.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/arc/include/asm/processor.h') diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 194a09fce198..f9048994b22f 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -57,9 +57,19 @@ struct task_struct; * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise * get optimised away by gcc */ -#define cpu_relax() __asm__ __volatile__ ("" : : : "memory") +#ifndef CONFIG_EZNPS_MTM_EXT -#define cpu_relax_lowlatency() cpu_relax() +#define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() + +#else + +#define cpu_relax() \ + __asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory") + +#define cpu_relax_lowlatency() barrier() + +#endif #define copy_segments(tsk, mm) do { } while (0) #define release_segments(mm) do { } while (0) -- cgit v1.2.3