mutex: speed up generic mutex implementations

- atomic operations which both modify the variable and return something imply full smp memory barriers before and after the memory operations involved (failing atomic_cmpxchg, atomic_add_unless, etc don't imply a barrier because they don't modify the target). See Documentation/atomic_ops.txt. So remove extra barriers and branches. - All architectures support atomic_cmpxchg. This has no relation to __HAVE_ARCH_CMPXCHG. We can just take the atomic_cmpxchg path unconditionally This reduces a simple single threaded fastpath lock+unlock test from 590 cycles to 203 cycles on a ppc970 system. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Nick Piggin <npiggin@suse.de> 2008-10-21 10:59:15 +0200
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-10-23 09:18:20 -0700
commit: a8ddac7e53e89cb877965097d05adfeb1c91def3 (patch)
tree: db4ee686e50f7fb57b0cef20e0a8e7f06151e317 /include
parent: 5a439c565799cb8d290d71ce375e86be64d43a4b (diff)
2 files changed, 3 insertions, 32 deletions
diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
index ed108be6743..f104af7cf43 100644
--- a/include/asm-generic/mutex-dec.h
+++ b/include/asm-generic/mutex-dec.h
@@ -22,8 +22,6 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
 {
 	if (unlikely(atomic_dec_return(count) < 0))
 		fail_fn(count);
-	else
-		smp_mb();
 }
 
 /**
@@ -41,10 +39,7 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
 	if (unlikely(atomic_dec_return(count) < 0))
 		return fail_fn(count);
-	else {
-		smp_mb();
-		return 0;
-	}
+	return 0;
 }
 
 /**
@@ -63,7 +58,6 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
 static inline void
 __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 {
-	smp_mb();
 	if (unlikely(atomic_inc_return(count) <= 0))
 		fail_fn(count);
 }
@@ -88,25 +82,9 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-	/*
-	 * We have two variants here. The cmpxchg based one is the best one
-	 * because it never induce a false contention state.  It is included
-	 * here because architectures using the inc/dec algorithms over the
-	 * xchg ones are much more likely to support cmpxchg natively.
-	 *
-	 * If not we fall back to the spinlock based variant - that is
-	 * just as efficient (and simpler) as a 'destructive' probing of
-	 * the mutex state would be.
-	 */
-#ifdef __HAVE_ARCH_CMPXCHG
-	if (likely(atomic_cmpxchg(count, 1, 0) == 1)) {
-		smp_mb();
+	if (likely(atomic_cmpxchg(count, 1, 0) == 1))
 		return 1;
-	}
 	return 0;
-#else
-	return fail_fn(count);
-#endif
 }
 
 #endif
diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
index 7b9cd2cbfeb..580a6d35c70 100644
--- a/include/asm-generic/mutex-xchg.h
+++ b/include/asm-generic/mutex-xchg.h
@@ -27,8 +27,6 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
 {
 	if (unlikely(atomic_xchg(count, 0) != 1))
 		fail_fn(count);
-	else
-		smp_mb();
 }
 
 /**
@@ -46,10 +44,7 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
 	if (unlikely(atomic_xchg(count, 0) != 1))
 		return fail_fn(count);
-	else {
-		smp_mb();
-		return 0;
-	}
+	return 0;
 }
 
 /**
@@ -67,7 +62,6 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
 static inline void
 __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 {
-	smp_mb();
 	if (unlikely(atomic_xchg(count, 1) != 0))
 		fail_fn(count);
 }
@@ -110,7 +104,6 @@ __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 		if (prev < 0)
 			prev = 0;
 	}
-	smp_mb();
 
 	return prev;
 }
author	Nick Piggin <npiggin@suse.de>	2008-10-21 10:59:15 +0200
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-10-23 09:18:20 -0700
commit	a8ddac7e53e89cb877965097d05adfeb1c91def3 (patch)
tree	db4ee686e50f7fb57b0cef20e0a8e7f06151e317 /include
parent	5a439c565799cb8d290d71ce375e86be64d43a4b (diff)