diff options
author | Philippe Langlais <philippe.langlais@linaro.org> | 2012-03-19 09:23:33 +0100 |
---|---|---|
committer | Philippe Langlais <philippe.langlais@linaro.org> | 2012-03-19 09:23:33 +0100 |
commit | b16692467e08abab7d7971ec902cd66a9226d43b (patch) | |
tree | 6bff77016c616a50b534f557b34bed3db57afff9 /arch | |
parent | 55f4551fa7903dfd2ed508fbc33f55e999d13fde (diff) | |
parent | aa14c10bbcd4449a6dbdc671a334bef1b8431d21 (diff) |
Merge topic branch 'hwmem' into integration-linux-ux500-3.3
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/mach-ux500/dcache.c | 254 | ||||
-rw-r--r-- | arch/arm/mach-ux500/hwmem-int.c | 199 | ||||
-rw-r--r-- | arch/arm/mach-ux500/include/mach/dcache.h | 26 |
3 files changed, 479 insertions, 0 deletions
diff --git a/arch/arm/mach-ux500/dcache.c b/arch/arm/mach-ux500/dcache.c new file mode 100644 index 00000000000..b117d4e8283 --- /dev/null +++ b/arch/arm/mach-ux500/dcache.c @@ -0,0 +1,254 @@ +/* + * Copyright (C) ST-Ericsson SA 2011 + * + * Cache handler integration and data cache helpers. + * + * Author: Johan Mossberg <johan.xx.mossberg@stericsson.com> + * for ST-Ericsson. + * + * License terms: GNU General Public License (GPL), version 2. + */ + +#include <linux/dma-mapping.h> + +#include <asm/pgtable.h> +#include <asm/cacheflush.h> +#include <asm/outercache.h> +#include <asm/system.h> + +/* + * Values are derived from measurements on HREFP_1.1_V32_OM_S10 running + * u8500-android-2.2_r1.1_v0.21. + * + * A lot of time can be spent trying to figure out the perfect breakpoints but + * for now I've chosen the following simple way. + * + * breakpoint = best_case + (worst_case - best_case) * 0.666 + * The breakpoint is moved slightly towards the worst case because a full + * clean/flush affects the entire system so we should be a bit careful. + * + * BEST CASE: + * Best case is that the cache is empty and the system is idling. The case + * where the cache contains only targeted data could be better in some cases + * but it's hard to do measurements and calculate on that case so I choose the + * easier alternative. + * + * inner_clean_breakpoint = time_2_range_clean_on_empty_cache( + * complete_clean_on_empty_cache_time) + * inner_flush_breakpoint = time_2_range_flush_on_empty_cache( + * complete_flush_on_empty_cache_time) + * + * outer_clean_breakpoint = time_2_range_clean_on_empty_cache( + * complete_clean_on_empty_cache_time) + * outer_flush_breakpoint = time_2_range_flush_on_empty_cache( + * complete_flush_on_empty_cache_time) + * + * WORST CASE: + * Worst case is that the cache is filled with dirty non targeted data that + * will be used after the synchronization and the system is under heavy load. + * + * inner_clean_breakpoint = time_2_range_clean_on_empty_cache( + * complete_clean_on_full_cache_time * 1.5) + * Times 1.5 because it runs on both cores half the time. + * inner_flush_breakpoint = time_2_range_flush_on_empty_cache( + * complete_flush_on_full_cache_time * 1.5 + + * complete_flush_on_full_cache_time / 2) + * Plus "complete_flush_on_full_cache_time / 2" because all data has to be read + * back, here we assume that both cores can fill their cache simultaneously + * (seems to be the case as operations on full and empty inner cache takes + * roughly the same amount of time ie the bus to outer is not the bottle neck). + * + * outer_clean_breakpoint = time_2_range_clean_on_empty_cache( + * complete_clean_on_full_cache_time + + * (complete_clean_on_full_cache_time - + * complete_clean_on_empty_cache_time)) + * Plus "(complete_flush_on_full_cache_time - + * complete_flush_on_empty_cache_time)" because no one else can work when we + * hog the bus with our unecessary transfer. + * outer_flush_breakpoint = time_2_range_flush_on_empty_cache( + * complete_flush_on_full_cache_time * 2 + + * (complete_flush_on_full_cache_time - + * complete_flush_on_empty_cache_time) * 2) + * + * These values might have to be updated if changes are made to the CPU, L2$, + * memory bus or memory. + */ +/* 28930 */ +static const u32 inner_clean_breakpoint = 21324 + (32744 - 21324) * 0.666; +/* 36224 */ +static const u32 inner_flush_breakpoint = 21324 + (43697 - 21324) * 0.666; +/* 254069 */ +static const u32 outer_clean_breakpoint = 68041 + (347363 - 68041) * 0.666; +/* 485414 */ +static const u32 outer_flush_breakpoint = 68041 + (694727 - 68041) * 0.666; + +static void __clean_inner_dcache_all(void *param); +static void clean_inner_dcache_all(void); + +static void __flush_inner_dcache_all(void *param); +static void flush_inner_dcache_all(void); + +static bool is_cache_exclusive(void); + +void drain_cpu_write_buf(void) +{ + dsb(); + outer_cache.sync(); +} + +void clean_cpu_dcache(void *vaddr, u32 paddr, u32 length, bool inner_only, + bool *cleaned_everything) +{ + /* + * There is no problem with exclusive caches here as the Cortex-A9 + * documentation (8.1.4. Exclusive L2 cache) says that when a dirty + * line is moved from L2 to L1 it is first written to mem. Because + * of this there is no way a line can avoid the clean by jumping + * between the cache levels. + */ + *cleaned_everything = true; + + if (length < inner_clean_breakpoint) { + /* Inner clean range */ + dmac_map_area(vaddr, length, DMA_TO_DEVICE); + *cleaned_everything = false; + } else { + clean_inner_dcache_all(); + } + + if (!inner_only) { + /* + * There is currently no outer_cache.clean_all() so we use + * flush instead, which is ok as clean is a subset of flush. + * Clean range and flush range take the same amount of time + * so we can use outer_flush_breakpoint here. + */ + if (length < outer_flush_breakpoint) { + outer_cache.clean_range(paddr, paddr + length); + *cleaned_everything = false; + } else { + outer_cache.flush_all(); + } + } +} + +void flush_cpu_dcache(void *vaddr, u32 paddr, u32 length, bool inner_only, + bool *flushed_everything) +{ + /* + * There might still be stale data in the caches after this call if the + * cache levels are exclusive. The follwing can happen. + * 1. Clean L1 moves the data to L2. + * 2. Speculative prefetch, preemption or loads on the other core moves + * all the data back to L1, any dirty data will be written to mem as a + * result of this. + * 3. Flush L2 does nothing as there is no targeted data in L2. + * 4. Flush L1 moves the data to L2. Notice that this does not happen + * when the cache levels are non-exclusive as clean pages are not + * written to L2 in that case. + * 5. Stale data is still present in L2! + * I see two possible solutions, don't use exclusive caches or + * (temporarily) disable prefetching to L1, preeemption and the other + * core. + * + * A situation can occur where the operation does not seem atomic from + * the other core's point of view, even on a non-exclusive cache setup. + * Replace step 2 in the previous scenarion with a write from the other + * core. The other core will write on top of the old data but the + * result will not be written to memory. One would expect either that + * the write was performed on top of the old data and was written to + * memory (the write occured before the flush) or that the write was + * performed on top of the new data and was not written to memory (the + * write occured after the flush). The same problem can occur with one + * core if kernel preemption is enabled. The solution is to + * (temporarily) disable the other core and preemption. I can't think + * of any situation where this would be a problem and disabling the + * other core for the duration of this call is mighty expensive so for + * now I just ignore the problem. + */ + + *flushed_everything = true; + + if (!inner_only) { + /* + * Beautiful solution for the exclusive problems :) + */ + if (is_cache_exclusive()) + panic("%s can't handle exclusive CPU caches\n", + __func__); + + if (length < inner_clean_breakpoint) { + /* Inner clean range */ + dmac_map_area(vaddr, length, DMA_TO_DEVICE); + *flushed_everything = false; + } else { + clean_inner_dcache_all(); + } + + if (length < outer_flush_breakpoint) { + outer_cache.flush_range(paddr, paddr + length); + *flushed_everything = false; + } else { + outer_cache.flush_all(); + } + } + + if (length < inner_flush_breakpoint) { + /* Inner flush range */ + dmac_flush_range(vaddr, (void *)((u32)vaddr + length)); + *flushed_everything = false; + } else { + flush_inner_dcache_all(); + } +} + +bool speculative_data_prefetch(void) +{ + return true; +} + +u32 get_dcache_granularity(void) +{ + return 32; +} + +/* + * Local functions + */ + +static void __clean_inner_dcache_all(void *param) +{ + __cpuc_clean_dcache_all(); +} + +static void clean_inner_dcache_all(void) +{ + on_each_cpu(__clean_inner_dcache_all, NULL, 1); +} + +static void __flush_inner_dcache_all(void *param) +{ + __cpuc_flush_dcache_all(); +} + +static void flush_inner_dcache_all(void) +{ + on_each_cpu(__flush_inner_dcache_all, NULL, 1); +} + +static bool is_cache_exclusive(void) +{ + static const u32 CA9_ACTLR_EXCL = 0x80; + + u32 armv7_actlr; + + asm ( + "mrc p15, 0, %0, c1, c0, 1" + : "=r" (armv7_actlr) + ); + + if (armv7_actlr & CA9_ACTLR_EXCL) + return true; + else + return false; +} diff --git a/arch/arm/mach-ux500/hwmem-int.c b/arch/arm/mach-ux500/hwmem-int.c new file mode 100644 index 00000000000..e3fecb8c354 --- /dev/null +++ b/arch/arm/mach-ux500/hwmem-int.c @@ -0,0 +1,199 @@ +/* + * Copyright (C) ST-Ericsson SA 2011 + * + * Hardware memory driver integration + * + * Author: Johan Mossberg <johan.xx.mossberg@stericsson.com> for ST-Ericsson. + * + * License terms: GNU General Public License (GPL), version 2. + */ + +#include <linux/hwmem.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/err.h> +#include <linux/slab.h> + +/* CONA API */ +void *cona_create(const char *name, phys_addr_t region_paddr, + size_t region_size); +void *cona_alloc(void *instance, size_t size); +void cona_free(void *instance, void *alloc); +phys_addr_t cona_get_alloc_paddr(void *alloc); +void *cona_get_alloc_kaddr(void *instance, void *alloc); +size_t cona_get_alloc_size(void *alloc); + +struct hwmem_mem_type_struct *hwmem_mem_types; +unsigned int hwmem_num_mem_types; + +static phys_addr_t hwmem_paddr; +static size_t hwmem_size; + +static phys_addr_t hwmem_prot_paddr; +static size_t hwmem_prot_size; + +static int __init parse_hwmem_prot_param(char *p) +{ + + hwmem_prot_size = memparse(p, &p); + + if (*p != '@') + goto no_at; + + hwmem_prot_paddr = memparse(p + 1, &p); + + return 0; + +no_at: + hwmem_prot_size = 0; + + return -EINVAL; +} +early_param("hwmem_prot", parse_hwmem_prot_param); + +static int __init parse_hwmem_param(char *p) +{ + hwmem_size = memparse(p, &p); + + if (*p != '@') + goto no_at; + + hwmem_paddr = memparse(p + 1, &p); + + return 0; + +no_at: + hwmem_size = 0; + + return -EINVAL; +} +early_param("hwmem", parse_hwmem_param); + +static int __init setup_hwmem(void) +{ + static const unsigned int NUM_MEM_TYPES = 3; + + int ret; + + if (hwmem_paddr != PAGE_ALIGN(hwmem_paddr) || + hwmem_size != PAGE_ALIGN(hwmem_size) || hwmem_size == 0) { + printk(KERN_WARNING "HWMEM: hwmem_paddr !=" + " PAGE_ALIGN(hwmem_paddr) || hwmem_size !=" + " PAGE_ALIGN(hwmem_size) || hwmem_size == 0\n"); + return -ENOMSG; + } + + hwmem_mem_types = kzalloc(sizeof(struct hwmem_mem_type_struct) * + NUM_MEM_TYPES, GFP_KERNEL); + if (hwmem_mem_types == NULL) + return -ENOMEM; + + hwmem_mem_types[0].id = HWMEM_MEM_SCATTERED_SYS; + hwmem_mem_types[0].allocator_api.alloc = cona_alloc; + hwmem_mem_types[0].allocator_api.free = cona_free; + hwmem_mem_types[0].allocator_api.get_alloc_paddr = + cona_get_alloc_paddr; + hwmem_mem_types[0].allocator_api.get_alloc_kaddr = + cona_get_alloc_kaddr; + hwmem_mem_types[0].allocator_api.get_alloc_size = cona_get_alloc_size; + hwmem_mem_types[0].allocator_instance = cona_create("hwmem", + hwmem_paddr, hwmem_size); + if (IS_ERR(hwmem_mem_types[0].allocator_instance)) { + ret = PTR_ERR(hwmem_mem_types[0].allocator_instance); + goto hwmem_ima_init_failed; + } + + hwmem_mem_types[1] = hwmem_mem_types[0]; + hwmem_mem_types[1].id = HWMEM_MEM_CONTIGUOUS_SYS; + + hwmem_mem_types[2] = hwmem_mem_types[1]; + hwmem_mem_types[2].id = HWMEM_MEM_PROTECTED_SYS; + + if (hwmem_prot_size > 0) { + hwmem_mem_types[2].allocator_instance = cona_create("hwmem_prot", + hwmem_prot_paddr, hwmem_prot_size); + if (IS_ERR(hwmem_mem_types[2].allocator_instance)) { + ret = PTR_ERR(hwmem_mem_types[2].allocator_instance); + goto hwmem_ima_init_failed; + } + } + + hwmem_num_mem_types = NUM_MEM_TYPES; + + return 0; + +hwmem_ima_init_failed: + kfree(hwmem_mem_types); + + return ret; +} +arch_initcall_sync(setup_hwmem); + +enum hwmem_alloc_flags cachi_get_cache_settings( + enum hwmem_alloc_flags requested_cache_settings) +{ + static const u32 CACHE_ON_FLAGS_MASK = HWMEM_ALLOC_HINT_CACHED | + HWMEM_ALLOC_HINT_CACHE_WB | HWMEM_ALLOC_HINT_CACHE_WT | + HWMEM_ALLOC_HINT_CACHE_NAOW | HWMEM_ALLOC_HINT_CACHE_AOW | + HWMEM_ALLOC_HINT_INNER_AND_OUTER_CACHE | + HWMEM_ALLOC_HINT_INNER_CACHE_ONLY; + + enum hwmem_alloc_flags cache_settings; + + if (!(requested_cache_settings & CACHE_ON_FLAGS_MASK) && + requested_cache_settings & (HWMEM_ALLOC_HINT_NO_WRITE_COMBINE | + HWMEM_ALLOC_HINT_UNCACHED | HWMEM_ALLOC_HINT_WRITE_COMBINE)) + /* + * We never use uncached as it's extremely slow and there is + * no scenario where it would be better than buffered memory. + */ + return HWMEM_ALLOC_HINT_WRITE_COMBINE; + + /* + * The user has specified cached or nothing at all, both are treated as + * cached. + */ + cache_settings = (requested_cache_settings & + ~(HWMEM_ALLOC_HINT_UNCACHED | + HWMEM_ALLOC_HINT_NO_WRITE_COMBINE | + HWMEM_ALLOC_HINT_INNER_CACHE_ONLY | + HWMEM_ALLOC_HINT_CACHE_NAOW)) | + HWMEM_ALLOC_HINT_WRITE_COMBINE | HWMEM_ALLOC_HINT_CACHED | + HWMEM_ALLOC_HINT_CACHE_AOW | + HWMEM_ALLOC_HINT_INNER_AND_OUTER_CACHE; + if (!(cache_settings & (HWMEM_ALLOC_HINT_CACHE_WB | + HWMEM_ALLOC_HINT_CACHE_WT))) + cache_settings |= HWMEM_ALLOC_HINT_CACHE_WB; + /* + * On ARMv7 "alloc on write" is just a hint so we need to assume the + * worst case ie "alloc on write". We would however like to remember + * the requested "alloc on write" setting so that we can pass it on to + * the hardware, we use the reserved bit in the alloc flags to do that. + */ + if (requested_cache_settings & HWMEM_ALLOC_HINT_CACHE_AOW) + cache_settings |= HWMEM_ALLOC_RESERVED_CHI; + else + cache_settings &= ~HWMEM_ALLOC_RESERVED_CHI; + + return cache_settings; +} + +void cachi_set_pgprot_cache_options(enum hwmem_alloc_flags cache_settings, + pgprot_t *pgprot) +{ + if (cache_settings & HWMEM_ALLOC_HINT_CACHED) { + if (cache_settings & HWMEM_ALLOC_HINT_CACHE_WT) + *pgprot = __pgprot_modify(*pgprot, L_PTE_MT_MASK, + L_PTE_MT_WRITETHROUGH); + else { + if (cache_settings & HWMEM_ALLOC_RESERVED_CHI) + *pgprot = __pgprot_modify(*pgprot, + L_PTE_MT_MASK, L_PTE_MT_WRITEALLOC); + else + *pgprot = __pgprot_modify(*pgprot, + L_PTE_MT_MASK, L_PTE_MT_WRITEBACK); + } + } else { + *pgprot = pgprot_writecombine(*pgprot); + } +} diff --git a/arch/arm/mach-ux500/include/mach/dcache.h b/arch/arm/mach-ux500/include/mach/dcache.h new file mode 100644 index 00000000000..83fe618b04f --- /dev/null +++ b/arch/arm/mach-ux500/include/mach/dcache.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) ST-Ericsson SA 2011 + * + * Data cache helpers + * + * Author: Johan Mossberg <johan.xx.mossberg@stericsson.com> + * for ST-Ericsson. + * + * License terms: GNU General Public License (GPL), version 2. + */ + +#ifndef _MACH_UX500_DCACHE_H_ +#define _MACH_UX500_DCACHE_H_ + +#include <linux/types.h> + +void drain_cpu_write_buf(void); +void clean_cpu_dcache(void *vaddr, u32 paddr, u32 length, bool inner_only, + bool *cleaned_everything); +void flush_cpu_dcache(void *vaddr, u32 paddr, u32 length, bool inner_only, + bool *flushed_everything); +bool speculative_data_prefetch(void); +/* Returns 1 if no cache is present */ +u32 get_dcache_granularity(void); + +#endif /* _MACH_UX500_DCACHE_H_ */ |