Merge topic branch 'hwmem' into integration-linux-ux500-3.3

author: Philippe Langlais <philippe.langlais@linaro.org> 2012-03-19 09:23:33 +0100
committer: Philippe Langlais <philippe.langlais@linaro.org> 2012-03-19 09:23:33 +0100
commit: b16692467e08abab7d7971ec902cd66a9226d43b (patch)
tree: 6bff77016c616a50b534f557b34bed3db57afff9 /arch
parent: 55f4551fa7903dfd2ed508fbc33f55e999d13fde (diff)
parent: aa14c10bbcd4449a6dbdc671a334bef1b8431d21 (diff)
3 files changed, 479 insertions, 0 deletions
diff --git a/arch/arm/mach-ux500/dcache.c b/arch/arm/mach-ux500/dcache.c
new file mode 100644
index 00000000000..b117d4e8283
--- /dev/null
+++ b/arch/arm/mach-ux500/dcache.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2011
+ *
+ * Cache handler integration and data cache helpers.
+ *
+ * Author: Johan Mossberg <johan.xx.mossberg@stericsson.com>
+ * for ST-Ericsson.
+ *
+ * License terms: GNU General Public License (GPL), version 2.
+ */
+
+#include <linux/dma-mapping.h>
+
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+#include <asm/outercache.h>
+#include <asm/system.h>
+
+/*
+ * Values are derived from measurements on HREFP_1.1_V32_OM_S10 running
+ * u8500-android-2.2_r1.1_v0.21.
+ *
+ * A lot of time can be spent trying to figure out the perfect breakpoints but
+ * for now I've chosen the following simple way.
+ *
+ * breakpoint = best_case + (worst_case - best_case) * 0.666
+ * The breakpoint is moved slightly towards the worst case because a full
+ * clean/flush affects the entire system so we should be a bit careful.
+ *
+ * BEST CASE:
+ * Best case is that the cache is empty and the system is idling. The case
+ * where the cache contains only targeted data could be better in some cases
+ * but it's hard to do measurements and calculate on that case so I choose the
+ * easier alternative.
+ *
+ * inner_clean_breakpoint = time_2_range_clean_on_empty_cache(
+ *					complete_clean_on_empty_cache_time)
+ * inner_flush_breakpoint = time_2_range_flush_on_empty_cache(
+ *					complete_flush_on_empty_cache_time)
+ *
+ * outer_clean_breakpoint = time_2_range_clean_on_empty_cache(
+ *					complete_clean_on_empty_cache_time)
+ * outer_flush_breakpoint = time_2_range_flush_on_empty_cache(
+ *					complete_flush_on_empty_cache_time)
+ *
+ * WORST CASE:
+ * Worst case is that the cache is filled with dirty non targeted data that
+ * will be used after the synchronization and the system is under heavy load.
+ *
+ * inner_clean_breakpoint = time_2_range_clean_on_empty_cache(
+ *				complete_clean_on_full_cache_time * 1.5)
+ * Times 1.5 because it runs on both cores half the time.
+ * inner_flush_breakpoint = time_2_range_flush_on_empty_cache(
+ *				complete_flush_on_full_cache_time * 1.5 +
+ *					complete_flush_on_full_cache_time / 2)
+ * Plus "complete_flush_on_full_cache_time / 2" because all data has to be read
+ * back, here we assume that both cores can fill their cache simultaneously
+ * (seems to be the case as operations on full and empty inner cache takes
+ * roughly the same amount of time ie the bus to outer is not the bottle neck).
+ *
+ * outer_clean_breakpoint = time_2_range_clean_on_empty_cache(
+ *					complete_clean_on_full_cache_time +
+ *					(complete_clean_on_full_cache_time -
+ *					complete_clean_on_empty_cache_time))
+ * Plus "(complete_flush_on_full_cache_time -
+ * complete_flush_on_empty_cache_time)" because no one else can work when we
+ * hog the bus with our unecessary transfer.
+ * outer_flush_breakpoint = time_2_range_flush_on_empty_cache(
+ *					complete_flush_on_full_cache_time * 2 +
+ *					(complete_flush_on_full_cache_time -
+ *				complete_flush_on_empty_cache_time) * 2)
+ *
+ * These values might have to be updated if changes are made to the CPU, L2$,
+ * memory bus or memory.
+ */
+/* 28930 */
+static const u32 inner_clean_breakpoint = 21324 + (32744 - 21324) * 0.666;
+/* 36224 */
+static const u32 inner_flush_breakpoint = 21324 + (43697 - 21324) * 0.666;
+/* 254069 */
+static const u32 outer_clean_breakpoint = 68041 + (347363 - 68041) * 0.666;
+/* 485414 */
+static const u32 outer_flush_breakpoint = 68041 + (694727 - 68041) * 0.666;
+
+static void __clean_inner_dcache_all(void *param);
+static void clean_inner_dcache_all(void);
+
+static void __flush_inner_dcache_all(void *param);
+static void flush_inner_dcache_all(void);
+
+static bool is_cache_exclusive(void);
+
+void drain_cpu_write_buf(void)
+{
+	dsb();
+	outer_cache.sync();
+}
+
+void clean_cpu_dcache(void *vaddr, u32 paddr, u32 length, bool inner_only,
+						bool *cleaned_everything)
+{
+	/*
+	 * There is no problem with exclusive caches here as the Cortex-A9
+	 * documentation (8.1.4. Exclusive L2 cache) says that when a dirty
+	 * line is moved from L2 to L1 it is first written to mem. Because
+	 * of this there is no way a line can avoid the clean by jumping
+	 * between the cache levels.
+	 */
+	*cleaned_everything = true;
+
+	if (length < inner_clean_breakpoint) {
+		/* Inner clean range */
+		dmac_map_area(vaddr, length, DMA_TO_DEVICE);
+		*cleaned_everything = false;
+	} else {
+		clean_inner_dcache_all();
+	}
+
+	if (!inner_only) {
+		/*
+		 * There is currently no outer_cache.clean_all() so we use
+		 * flush instead, which is ok as clean is a subset of flush.
+		 * Clean range and flush range take the same amount of time
+		 * so we can use outer_flush_breakpoint here.
+		 */
+		if (length < outer_flush_breakpoint) {
+			outer_cache.clean_range(paddr, paddr + length);
+			*cleaned_everything = false;
+		} else {
+			outer_cache.flush_all();
+		}
+	}
+}
+
+void flush_cpu_dcache(void *vaddr, u32 paddr, u32 length, bool inner_only,
+						bool *flushed_everything)
+{
+	/*
+	 * There might still be stale data in the caches after this call if the
+	 * cache levels are exclusive. The follwing can happen.
+	 * 1. Clean L1 moves the data to L2.
+	 * 2. Speculative prefetch, preemption or loads on the other core moves
+	 * all the data back to L1, any dirty data will be written to mem as a
+	 * result of this.
+	 * 3. Flush L2 does nothing as there is no targeted data in L2.
+	 * 4. Flush L1 moves the data to L2. Notice that this does not happen
+	 * when the cache levels are non-exclusive as clean pages are not
+	 * written to L2 in that case.
+	 * 5. Stale data is still present in L2!
+	 * I see two possible solutions, don't use exclusive caches or
+	 * (temporarily) disable prefetching to L1, preeemption and the other
+	 * core.
+	 *
+	 * A situation can occur where the operation does not seem atomic from
+	 * the other core's point of view, even on a non-exclusive cache setup.
+	 * Replace step 2 in the previous scenarion with a write from the other
+	 * core. The other core will write on top of the old data but the
+	 * result will not be written to memory. One would expect either that
+	 * the write was performed on top of the old data and was written to
+	 * memory (the write occured before the flush) or that the write was
+	 * performed on top of the new data and was not written to memory (the
+	 * write occured after the flush). The same problem can occur with one
+	 * core if kernel preemption is enabled. The solution is to
+	 * (temporarily) disable the other core and preemption. I can't think
+	 * of any situation where this would be a problem and disabling the
+	 * other core for the duration of this call is mighty expensive so for
+	 * now I just ignore the problem.
+	 */
+
+	*flushed_everything = true;
+
+	if (!inner_only) {
+		/*
+		 * Beautiful solution for the exclusive problems :)
+		 */
+		if (is_cache_exclusive())
+			panic("%s can't handle exclusive CPU caches\n",
+								__func__);
+
+		if (length < inner_clean_breakpoint) {
+			/* Inner clean range */
+			dmac_map_area(vaddr, length, DMA_TO_DEVICE);
+			*flushed_everything = false;
+		} else {
+			clean_inner_dcache_all();
+		}
+
+		if (length < outer_flush_breakpoint) {
+			outer_cache.flush_range(paddr, paddr + length);
+			*flushed_everything = false;
+		} else {
+			outer_cache.flush_all();
+		}
+	}
+
+	if (length < inner_flush_breakpoint) {
+		/* Inner flush range */
+		dmac_flush_range(vaddr, (void *)((u32)vaddr + length));
+		*flushed_everything = false;
+	} else {
+		flush_inner_dcache_all();
+	}
+}
+
+bool speculative_data_prefetch(void)
+{
+	return true;
+}
+
+u32 get_dcache_granularity(void)
+{
+	return 32;
+}
+
+/*
+ * Local functions
+ */
+
+static void __clean_inner_dcache_all(void *param)
+{
+	__cpuc_clean_dcache_all();
+}
+
+static void clean_inner_dcache_all(void)
+{
+	on_each_cpu(__clean_inner_dcache_all, NULL, 1);
+}
+
+static void __flush_inner_dcache_all(void *param)
+{
+	__cpuc_flush_dcache_all();
+}
+
+static void flush_inner_dcache_all(void)
+{
+	on_each_cpu(__flush_inner_dcache_all, NULL, 1);
+}
+
+static bool is_cache_exclusive(void)
+{
+	static const u32 CA9_ACTLR_EXCL = 0x80;
+
+	u32 armv7_actlr;
+
+	asm (
+		"mrc	p15, 0, %0, c1, c0, 1"
+		: "=r" (armv7_actlr)
+	);
+
+	if (armv7_actlr & CA9_ACTLR_EXCL)
+		return true;
+	else
+		return false;
+}
diff --git a/arch/arm/mach-ux500/hwmem-int.c b/arch/arm/mach-ux500/hwmem-int.c
new file mode 100644
index 00000000000..e3fecb8c354
--- /dev/null
+++ b/arch/arm/mach-ux500/hwmem-int.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2011
+ *
+ * Hardware memory driver integration
+ *
+ * Author: Johan Mossberg <johan.xx.mossberg@stericsson.com> for ST-Ericsson.
+ *
+ * License terms: GNU General Public License (GPL), version 2.
+ */
+
+#include <linux/hwmem.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+/* CONA API */
+void *cona_create(const char *name, phys_addr_t region_paddr,
+							size_t region_size);
+void *cona_alloc(void *instance, size_t size);
+void cona_free(void *instance, void *alloc);
+phys_addr_t cona_get_alloc_paddr(void *alloc);
+void *cona_get_alloc_kaddr(void *instance, void *alloc);
+size_t cona_get_alloc_size(void *alloc);
+
+struct hwmem_mem_type_struct *hwmem_mem_types;
+unsigned int hwmem_num_mem_types;
+
+static phys_addr_t hwmem_paddr;
+static size_t hwmem_size;
+
+static phys_addr_t hwmem_prot_paddr;
+static size_t hwmem_prot_size;
+
+static int __init parse_hwmem_prot_param(char *p)
+{
+
+	hwmem_prot_size = memparse(p, &p);
+
+	if (*p != '@')
+		goto no_at;
+
+	hwmem_prot_paddr = memparse(p + 1, &p);
+
+	return 0;
+
+no_at:
+	hwmem_prot_size = 0;
+
+	return -EINVAL;
+}
+early_param("hwmem_prot", parse_hwmem_prot_param);
+
+static int __init parse_hwmem_param(char *p)
+{
+	hwmem_size = memparse(p, &p);
+
+	if (*p != '@')
+		goto no_at;
+
+	hwmem_paddr = memparse(p + 1, &p);
+
+	return 0;
+
+no_at:
+	hwmem_size = 0;
+
+	return -EINVAL;
+}
+early_param("hwmem", parse_hwmem_param);
+
+static int __init setup_hwmem(void)
+{
+	static const unsigned int NUM_MEM_TYPES = 3;
+
+	int ret;
+
+	if (hwmem_paddr != PAGE_ALIGN(hwmem_paddr) ||
+		hwmem_size != PAGE_ALIGN(hwmem_size) || hwmem_size == 0) {
+		printk(KERN_WARNING "HWMEM: hwmem_paddr !="
+		" PAGE_ALIGN(hwmem_paddr) || hwmem_size !="
+		" PAGE_ALIGN(hwmem_size) || hwmem_size == 0\n");
+		return -ENOMSG;
+	}
+
+	hwmem_mem_types = kzalloc(sizeof(struct hwmem_mem_type_struct) *
+						NUM_MEM_TYPES, GFP_KERNEL);
+	if (hwmem_mem_types == NULL)
+		return -ENOMEM;
+
+	hwmem_mem_types[0].id = HWMEM_MEM_SCATTERED_SYS;
+	hwmem_mem_types[0].allocator_api.alloc = cona_alloc;
+	hwmem_mem_types[0].allocator_api.free = cona_free;
+	hwmem_mem_types[0].allocator_api.get_alloc_paddr =
+							cona_get_alloc_paddr;
+	hwmem_mem_types[0].allocator_api.get_alloc_kaddr =
+							cona_get_alloc_kaddr;
+	hwmem_mem_types[0].allocator_api.get_alloc_size = cona_get_alloc_size;
+	hwmem_mem_types[0].allocator_instance = cona_create("hwmem",
+						hwmem_paddr, hwmem_size);
+	if (IS_ERR(hwmem_mem_types[0].allocator_instance)) {
+		ret = PTR_ERR(hwmem_mem_types[0].allocator_instance);
+		goto hwmem_ima_init_failed;
+	}
+
+	hwmem_mem_types[1] = hwmem_mem_types[0];
+	hwmem_mem_types[1].id = HWMEM_MEM_CONTIGUOUS_SYS;
+
+	hwmem_mem_types[2] = hwmem_mem_types[1];
+	hwmem_mem_types[2].id = HWMEM_MEM_PROTECTED_SYS;
+
+	if (hwmem_prot_size > 0) {
+		hwmem_mem_types[2].allocator_instance = cona_create("hwmem_prot",
+							hwmem_prot_paddr, hwmem_prot_size);
+		if (IS_ERR(hwmem_mem_types[2].allocator_instance)) {
+			ret = PTR_ERR(hwmem_mem_types[2].allocator_instance);
+			goto hwmem_ima_init_failed;
+		}
+	}
+
+	hwmem_num_mem_types = NUM_MEM_TYPES;
+
+	return 0;
+
+hwmem_ima_init_failed:
+	kfree(hwmem_mem_types);
+
+	return ret;
+}
+arch_initcall_sync(setup_hwmem);
+
+enum hwmem_alloc_flags cachi_get_cache_settings(
+			enum hwmem_alloc_flags requested_cache_settings)
+{
+	static const u32 CACHE_ON_FLAGS_MASK = HWMEM_ALLOC_HINT_CACHED |
+		HWMEM_ALLOC_HINT_CACHE_WB | HWMEM_ALLOC_HINT_CACHE_WT |
+		HWMEM_ALLOC_HINT_CACHE_NAOW | HWMEM_ALLOC_HINT_CACHE_AOW |
+				HWMEM_ALLOC_HINT_INNER_AND_OUTER_CACHE |
+					HWMEM_ALLOC_HINT_INNER_CACHE_ONLY;
+
+	enum hwmem_alloc_flags cache_settings;
+
+	if (!(requested_cache_settings & CACHE_ON_FLAGS_MASK) &&
+		requested_cache_settings & (HWMEM_ALLOC_HINT_NO_WRITE_COMBINE |
+		HWMEM_ALLOC_HINT_UNCACHED | HWMEM_ALLOC_HINT_WRITE_COMBINE))
+		/*
+		 * We never use uncached as it's extremely slow and there is
+		 * no scenario where it would be better than buffered memory.
+		 */
+		return HWMEM_ALLOC_HINT_WRITE_COMBINE;
+
+	/*
+	 * The user has specified cached or nothing at all, both are treated as
+	 * cached.
+	 */
+	cache_settings = (requested_cache_settings &
+		 ~(HWMEM_ALLOC_HINT_UNCACHED |
+		HWMEM_ALLOC_HINT_NO_WRITE_COMBINE |
+		HWMEM_ALLOC_HINT_INNER_CACHE_ONLY |
+		HWMEM_ALLOC_HINT_CACHE_NAOW)) |
+		HWMEM_ALLOC_HINT_WRITE_COMBINE | HWMEM_ALLOC_HINT_CACHED |
+		HWMEM_ALLOC_HINT_CACHE_AOW |
+		HWMEM_ALLOC_HINT_INNER_AND_OUTER_CACHE;
+	if (!(cache_settings & (HWMEM_ALLOC_HINT_CACHE_WB |
+						HWMEM_ALLOC_HINT_CACHE_WT)))
+		cache_settings |= HWMEM_ALLOC_HINT_CACHE_WB;
+	/*
+	 * On ARMv7 "alloc on write" is just a hint so we need to assume the
+	 * worst case ie "alloc on write". We would however like to remember
+	 * the requested "alloc on write" setting so that we can pass it on to
+	 * the hardware, we use the reserved bit in the alloc flags to do that.
+	 */
+	if (requested_cache_settings & HWMEM_ALLOC_HINT_CACHE_AOW)
+		cache_settings |= HWMEM_ALLOC_RESERVED_CHI;
+	else
+		cache_settings &= ~HWMEM_ALLOC_RESERVED_CHI;
+
+	return cache_settings;
+}
+
+void cachi_set_pgprot_cache_options(enum hwmem_alloc_flags cache_settings,
+							pgprot_t *pgprot)
+{
+	if (cache_settings & HWMEM_ALLOC_HINT_CACHED) {
+		if (cache_settings & HWMEM_ALLOC_HINT_CACHE_WT)
+			*pgprot = __pgprot_modify(*pgprot, L_PTE_MT_MASK,
+							L_PTE_MT_WRITETHROUGH);
+		else {
+			if (cache_settings & HWMEM_ALLOC_RESERVED_CHI)
+				*pgprot = __pgprot_modify(*pgprot,
+					L_PTE_MT_MASK, L_PTE_MT_WRITEALLOC);
+			else
+				*pgprot = __pgprot_modify(*pgprot,
+					L_PTE_MT_MASK, L_PTE_MT_WRITEBACK);
+		}
+	} else {
+		*pgprot = pgprot_writecombine(*pgprot);
+	}
+}
diff --git a/arch/arm/mach-ux500/include/mach/dcache.h b/arch/arm/mach-ux500/include/mach/dcache.h
new file mode 100644
index 00000000000..83fe618b04f
--- /dev/null
+++ b/arch/arm/mach-ux500/include/mach/dcache.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2011
+ *
+ * Data cache helpers
+ *
+ * Author: Johan Mossberg <johan.xx.mossberg@stericsson.com>
+ * for ST-Ericsson.
+ *
+ * License terms: GNU General Public License (GPL), version 2.
+ */
+
+#ifndef _MACH_UX500_DCACHE_H_
+#define _MACH_UX500_DCACHE_H_
+
+#include <linux/types.h>
+
+void drain_cpu_write_buf(void);
+void clean_cpu_dcache(void *vaddr, u32 paddr, u32 length, bool inner_only,
+						bool *cleaned_everything);
+void flush_cpu_dcache(void *vaddr, u32 paddr, u32 length, bool inner_only,
+						bool *flushed_everything);
+bool speculative_data_prefetch(void);
+/* Returns 1 if no cache is present */
+u32 get_dcache_granularity(void);
+
+#endif /* _MACH_UX500_DCACHE_H_ */
author	Philippe Langlais <philippe.langlais@linaro.org>	2012-03-19 09:23:33 +0100
committer	Philippe Langlais <philippe.langlais@linaro.org>	2012-03-19 09:23:33 +0100
commit	b16692467e08abab7d7971ec902cd66a9226d43b (patch)
tree	6bff77016c616a50b534f557b34bed3db57afff9 /arch
parent	55f4551fa7903dfd2ed508fbc33f55e999d13fde (diff)
parent	aa14c10bbcd4449a6dbdc671a334bef1b8431d21 (diff)