From 52400ba946759af28442dee6265c5c0180ac7122 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 3 Apr 2009 13:40:49 -0700 Subject: futex: add requeue_pi functionality PI Futexes and their underlying rt_mutex cannot be left ownerless if there are pending waiters as this will break the PI boosting logic, so the standard requeue commands aren't sufficient. The new commands properly manage pi futex ownership by ensuring a futex with waiters has an owner at all times. This will allow glibc to properly handle pi mutexes with pthread_condvars. The approach taken here is to create two new futex op codes: FUTEX_WAIT_REQUEUE_PI: Tasks will use this op code to wait on a futex (such as a non-pi waitqueue) and wake after they have been requeued to a pi futex. Prior to returning to userspace, they will acquire this pi futex (and the underlying rt_mutex). futex_wait_requeue_pi() is the result of a high speed collision between futex_wait() and futex_lock_pi() (with the first part of futex_lock_pi() being done by futex_proxy_trylock_atomic() on behalf of the top_waiter). FUTEX_REQUEUE_PI (and FUTEX_CMP_REQUEUE_PI): This call must be used to wake tasks waiting with FUTEX_WAIT_REQUEUE_PI, regardless of how many tasks the caller intends to wake or requeue. pthread_cond_broadcast() should call this with nr_wake=1 and nr_requeue=INT_MAX. pthread_cond_signal() should call this with nr_wake=1 and nr_requeue=0. The reason being we need both callers to get the benefit of the futex_proxy_trylock_atomic() routine. futex_requeue() also enqueues the top_waiter on the rt_mutex via rt_mutex_start_proxy_lock(). Signed-off-by: Darren Hart Reviewed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner --- include/linux/futex.h | 8 ++++++++ include/linux/thread_info.h | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 3bf5bb5a34f..b05519ca9e5 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -23,6 +23,9 @@ union ktime; #define FUTEX_TRYLOCK_PI 8 #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 +#define FUTEX_WAIT_REQUEUE_PI 11 +#define FUTEX_REQUEUE_PI 12 +#define FUTEX_CMP_REQUEUE_PI 13 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -38,6 +41,11 @@ union ktime; #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) +#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#define FUTEX_REQUEUE_PI_PRIVATE (FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG) +#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) /* * Support for robust futexes: the kernel cleans up held futexes at diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e6b820f8b56..a8cc4e13434 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -21,13 +21,14 @@ struct restart_block { struct { unsigned long arg0, arg1, arg2, arg3; }; - /* For futex_wait */ + /* For futex_wait and futex_wait_requeue_pi */ struct { u32 *uaddr; u32 val; u32 flags; u32 bitset; u64 time; + u32 *uaddr2; } futex; /* For nanosleep */ struct { -- cgit v1.2.3 From ba9c22f2c01cf5c88beed5a6b9e07d42e10bd358 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Mon, 20 Apr 2009 22:22:22 -0700 Subject: futex: remove FUTEX_REQUEUE_PI (non CMP) The new requeue PI futex op codes were modeled after the existing FUTEX_REQUEUE and FUTEX_CMP_REQUEUE calls. I was unaware at the time that FUTEX_REQUEUE was only around for compatibility reasons and shouldn't be used in new code. Ulrich Drepper elaborates on this in his Futexes are Tricky paper: http://people.redhat.com/drepper/futex.pdf. The deprecated call doesn't catch changes to the futex corresponding to the destination futex which can lead to deadlock. Therefor, I feel it best to remove FUTEX_REQUEUE_PI and leave only FUTEX_CMP_REQUEUE_PI as there are not yet any existing users of the API. This patch does change the OP code value of FUTEX_CMP_REQUEUE_PI to 12 from 13. Since my test case is the only known user of this API, I felt this was the right thing to do, rather than leave a hole in the enumeration. I chose to continue using the _CMP_ modifier in the OP code to make it explicit to the user that the test is being done. Builds, boots, and ran several hundred iterations requeue_pi.c. Signed-off-by: Darren Hart LKML-Reference: <49ED580E.1050502@us.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/futex.h | 4 +--- kernel/futex.c | 6 +----- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index b05519ca9e5..34956c8fdeb 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -24,8 +24,7 @@ union ktime; #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 #define FUTEX_WAIT_REQUEUE_PI 11 -#define FUTEX_REQUEUE_PI 12 -#define FUTEX_CMP_REQUEUE_PI 13 +#define FUTEX_CMP_REQUEUE_PI 12 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -43,7 +42,6 @@ union ktime; #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) -#define FUTEX_REQUEUE_PI_PRIVATE (FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) diff --git a/kernel/futex.c b/kernel/futex.c index 6d2daa46f9f..aec8bf89bf4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2555,9 +2555,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, clockrt, uaddr2); break; - case FUTEX_REQUEUE_PI: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 1); - break; case FUTEX_CMP_REQUEUE_PI: ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 1); @@ -2596,8 +2593,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. */ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || - cmd == FUTEX_REQUEUE_PI || cmd == FUTEX_CMP_REQUEUE_PI || - cmd == FUTEX_WAKE_OP) + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) val2 = (u32) (unsigned long) utime; return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); -- cgit v1.2.3