From fdce83c108846d6f0d5b1774e1cc29f2573a6ad3 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 4 Nov 2008 00:04:03 -0500 Subject: Blackfin: rewrite cache handling functions Take the cache flush functions from the kernel as they use hardware loops in order to get optimal performance. Signed-off-by: Mike Frysinger --- cpu/blackfin/cache.S | 118 +++++++++++++++++++++------------- include/asm-blackfin/blackfin_local.h | 4 ++ lib_blackfin/cache.c | 18 ++++-- 3 files changed, 90 insertions(+), 50 deletions(-) diff --git a/cpu/blackfin/cache.S b/cpu/blackfin/cache.S index 9facadfd1..6ed655a67 100644 --- a/cpu/blackfin/cache.S +++ b/cpu/blackfin/cache.S @@ -1,5 +1,10 @@ -/* cache.S - low level cache handling routines - * Copyright (C) 2003-2007 Analog Devices Inc. +/* + * Blackfin cache control code + * + * Copyright 2003-2008 Analog Devices Inc. + * + * Enter bugs at http://blackfin.uclinux.org/ + * * Licensed under the GPL-2 or later. */ @@ -8,54 +13,75 @@ #include .text -.align 2 -ENTRY(_blackfin_icache_flush_range) - R2 = -32; - R2 = R0 & R2; - P0 = R2; - P1 = R1; - CSYNC; +/* Since all L1 caches work the same way, we use the same method for flushing + * them. Only the actual flush instruction differs. We write this in asm as + * GCC can be hard to coax into writing nice hardware loops. + * + * Also, we assume the following register setup: + * R0 = start address + * R1 = end address + */ +.macro do_flush flushins:req optflushins optnopins label + + R2 = -L1_CACHE_BYTES; + + /* start = (start & -L1_CACHE_BYTES) */ + R0 = R0 & R2; + + /* end = ((end - 1) & -L1_CACHE_BYTES) + L1_CACHE_BYTES; */ + R1 += -1; + R1 = R1 & R2; + R1 += L1_CACHE_BYTES; + + /* count = (end - start) >> L1_CACHE_SHIFT */ + R2 = R1 - R0; + R2 >>= L1_CACHE_SHIFT; + P1 = R2; + +.ifnb \label +\label : +.endif + P0 = R0; + LSETUP (1f, 2f) LC1 = P1; 1: - IFLUSH[P0++]; - CC = P0 < P1(iu); - IF CC JUMP 1b(bp); - IFLUSH[P0]; - SSYNC; +.ifnb \optflushins + \optflushins [P0]; +.endif +#if ANOMALY_05000443 +.ifb \optnopins +2: +.endif + \flushins [P0++]; +.ifnb \optnopins +2: \optnopins; +.endif +#else +2: \flushins [P0++]; +#endif + RTS; +.endm + +/* Invalidate all instruction cache lines assocoiated with this memory area */ +ENTRY(_blackfin_icache_flush_range) + do_flush IFLUSH, , nop ENDPROC(_blackfin_icache_flush_range) -ENTRY(_blackfin_dcache_flush_range) - R2 = -32; - R2 = R0 & R2; - P0 = R2; - P1 = R1; - CSYNC; -1: - FLUSH[P0++]; - CC = P0 < P1(iu); - IF CC JUMP 1b(bp); - FLUSH[P0]; - SSYNC; - RTS; -ENDPROC(_blackfin_dcache_flush_range) +/* Flush all cache lines assocoiated with this area of memory. */ +ENTRY(_blackfin_icache_dcache_flush_range) + do_flush FLUSH, IFLUSH +ENDPROC(_blackfin_icache_dcache_flush_range) +/* Throw away all D-cached data in specified region without any obligation to + * write them back. Since the Blackfin ISA does not have an "invalidate" + * instruction, we use flush/invalidate. Perhaps as a speed optimization we + * could bang on the DTEST MMRs ... + */ ENTRY(_blackfin_dcache_flush_invalidate_range) - R2 = -32; - R2 = R0 & R2; - P0 = R2; - P1 = R1; - CSYNC; -1: - FLUSHINV[P0++]; - CC = P0 < P1(iu); - IF CC JUMP 1b(bp); - - /* - * If the data crosses a cache line, then we'll be pointing to - * the last cache line, but won't have flushed/invalidated it yet, so do - * one more. - */ - FLUSHINV[P0]; - SSYNC; - RTS; + do_flush FLUSHINV ENDPROC(_blackfin_dcache_flush_invalidate_range) + +/* Flush all data cache lines assocoiated with this memory area */ +ENTRY(_blackfin_dcache_flush_range) + do_flush FLUSH, , , .Ldfr +ENDPROC(_blackfin_dcache_flush_range) diff --git a/include/asm-blackfin/blackfin_local.h b/include/asm-blackfin/blackfin_local.h index 90259ba7d..4e1583443 100644 --- a/include/asm-blackfin/blackfin_local.h +++ b/include/asm-blackfin/blackfin_local.h @@ -43,6 +43,9 @@ #define SCLK_TO_MSEC(sclk) ((MSEC_PER_SEC * ((sclk) / USEC_PER_MSEC)) / (BFIN_SCLK / USEC_PER_MSEC)) #define MSEC_TO_SCLK(msec) ((((BFIN_SCLK / USEC_PER_MSEC) * (msec)) / MSEC_PER_SEC) * USEC_PER_MSEC) +#define L1_CACHE_SHIFT 5 +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + #include #ifndef __ASSEMBLY__ @@ -60,6 +63,7 @@ extern u_long get_sclk(void); extern void blackfin_icache_flush_range(const void *, const void *); extern void blackfin_dcache_flush_range(const void *, const void *); +extern void blackfin_icache_dcache_flush_range(const void *, const void *); extern void blackfin_dcache_flush_invalidate_range(const void *, const void *); /* Use DMA to move data from on chip to external memory. While this is diff --git a/lib_blackfin/cache.c b/lib_blackfin/cache.c index 870c5bfba..1557864f9 100644 --- a/lib_blackfin/cache.c +++ b/lib_blackfin/cache.c @@ -15,15 +15,25 @@ void flush_cache(unsigned long addr, unsigned long size) { + void *start_addr, *end_addr; + int istatus, dstatus; + /* no need to flush stuff in on chip memory (L1/L2/etc...) */ if (addr >= 0xE0000000) return; - if (icache_status()) - blackfin_icache_flush_range((void *)addr, (void *)(addr + size)); + start_addr = (void *)addr; + end_addr = (void *)(addr + size); + istatus = icache_status(); + dstatus = dcache_status(); - if (dcache_status()) - blackfin_dcache_flush_range((void *)addr, (void *)(addr + size)); + if (istatus) { + if (dstatus) + blackfin_icache_dcache_flush_range(start_addr, end_addr); + else + blackfin_icache_flush_range(start_addr, end_addr); + } else if (dstatus) + blackfin_dcache_flush_range(start_addr, end_addr); } void icache_enable(void) -- cgit v1.2.3