From bd6d29c25bb1a24a4c160ec5de43e0004e01f72b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 6 Apr 2010 00:10:17 +0200 Subject: lockstat: Make lockstat counting per cpu Locking statistics are implemented using global atomic variables. This is usually fine unless some path write them very often. This is the case for the function and function graph tracers that disable irqs for each entry saved (except if the function tracer is in preempt disabled only mode). And calls to local_irq_save/restore() increment hardirqs_on_events and hardirqs_off_events stats (or similar stats for redundant versions). Incrementing these global vars for each function ends up in too much cache bouncing if lockstats are enabled. To solve this, implement the debug_atomic_*() operations using per cpu vars. -v2: Use per_cpu() instead of get_cpu_var() to fetch the desired cpu vars on debug_atomic_read() -v3: Store the stats in a structure. No need for local_t as we are NMI/irq safe. -v4: Fix tons of build errors. I thought I had tested it but I probably forgot to select the relevant config. Suggested-by: Steven Rostedt Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Steven Rostedt LKML-Reference: <1270505417-8144-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt --- kernel/lockdep.c | 47 +++++++++++++++++------------------------------ 1 file changed, 17 insertions(+), 30 deletions(-) (limited to 'kernel/lockdep.c') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 0c30d0455de..069af0276bf 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -430,20 +430,7 @@ static struct stack_trace lockdep_init_trace = { /* * Various lockdep statistics: */ -atomic_t chain_lookup_hits; -atomic_t chain_lookup_misses; -atomic_t hardirqs_on_events; -atomic_t hardirqs_off_events; -atomic_t redundant_hardirqs_on; -atomic_t redundant_hardirqs_off; -atomic_t softirqs_on_events; -atomic_t softirqs_off_events; -atomic_t redundant_softirqs_on; -atomic_t redundant_softirqs_off; -atomic_t nr_unused_locks; -atomic_t nr_cyclic_checks; -atomic_t nr_find_usage_forwards_checks; -atomic_t nr_find_usage_backwards_checks; +DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); #endif /* @@ -758,7 +745,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) return NULL; } class = lock_classes + nr_lock_classes++; - debug_atomic_inc(&nr_unused_locks); + debug_atomic_inc(nr_unused_locks); class->key = key; class->name = lock->name; class->subclass = subclass; @@ -1215,7 +1202,7 @@ check_noncircular(struct lock_list *root, struct lock_class *target, { int result; - debug_atomic_inc(&nr_cyclic_checks); + debug_atomic_inc(nr_cyclic_checks); result = __bfs_forwards(root, target, class_equal, target_entry); @@ -1252,7 +1239,7 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit, { int result; - debug_atomic_inc(&nr_find_usage_forwards_checks); + debug_atomic_inc(nr_find_usage_forwards_checks); result = __bfs_forwards(root, (void *)bit, usage_match, target_entry); @@ -1275,7 +1262,7 @@ find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit, { int result; - debug_atomic_inc(&nr_find_usage_backwards_checks); + debug_atomic_inc(nr_find_usage_backwards_checks); result = __bfs_backwards(root, (void *)bit, usage_match, target_entry); @@ -1835,7 +1822,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, list_for_each_entry(chain, hash_head, entry) { if (chain->chain_key == chain_key) { cache_hit: - debug_atomic_inc(&chain_lookup_hits); + debug_atomic_inc(chain_lookup_hits); if (very_verbose(class)) printk("\nhash chain already cached, key: " "%016Lx tail class: [%p] %s\n", @@ -1900,7 +1887,7 @@ cache_hit: chain_hlocks[chain->base + j] = class - lock_classes; } list_add_tail_rcu(&chain->entry, hash_head); - debug_atomic_inc(&chain_lookup_misses); + debug_atomic_inc(chain_lookup_misses); inc_chains(); return 1; @@ -2321,7 +2308,7 @@ void trace_hardirqs_on_caller(unsigned long ip) return; if (unlikely(curr->hardirqs_enabled)) { - debug_atomic_inc(&redundant_hardirqs_on); + debug_atomic_inc(redundant_hardirqs_on); return; } /* we'll do an OFF -> ON transition: */ @@ -2348,7 +2335,7 @@ void trace_hardirqs_on_caller(unsigned long ip) curr->hardirq_enable_ip = ip; curr->hardirq_enable_event = ++curr->irq_events; - debug_atomic_inc(&hardirqs_on_events); + debug_atomic_inc(hardirqs_on_events); } EXPORT_SYMBOL(trace_hardirqs_on_caller); @@ -2380,9 +2367,9 @@ void trace_hardirqs_off_caller(unsigned long ip) curr->hardirqs_enabled = 0; curr->hardirq_disable_ip = ip; curr->hardirq_disable_event = ++curr->irq_events; - debug_atomic_inc(&hardirqs_off_events); + debug_atomic_inc(hardirqs_off_events); } else - debug_atomic_inc(&redundant_hardirqs_off); + debug_atomic_inc(redundant_hardirqs_off); } EXPORT_SYMBOL(trace_hardirqs_off_caller); @@ -2406,7 +2393,7 @@ void trace_softirqs_on(unsigned long ip) return; if (curr->softirqs_enabled) { - debug_atomic_inc(&redundant_softirqs_on); + debug_atomic_inc(redundant_softirqs_on); return; } @@ -2416,7 +2403,7 @@ void trace_softirqs_on(unsigned long ip) curr->softirqs_enabled = 1; curr->softirq_enable_ip = ip; curr->softirq_enable_event = ++curr->irq_events; - debug_atomic_inc(&softirqs_on_events); + debug_atomic_inc(softirqs_on_events); /* * We are going to turn softirqs on, so set the * usage bit for all held locks, if hardirqs are @@ -2446,10 +2433,10 @@ void trace_softirqs_off(unsigned long ip) curr->softirqs_enabled = 0; curr->softirq_disable_ip = ip; curr->softirq_disable_event = ++curr->irq_events; - debug_atomic_inc(&softirqs_off_events); + debug_atomic_inc(softirqs_off_events); DEBUG_LOCKS_WARN_ON(!softirq_count()); } else - debug_atomic_inc(&redundant_softirqs_off); + debug_atomic_inc(redundant_softirqs_off); } static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) @@ -2654,7 +2641,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, return 0; break; case LOCK_USED: - debug_atomic_dec(&nr_unused_locks); + debug_atomic_dec(nr_unused_locks); break; default: if (!debug_locks_off_graph_unlock()) @@ -2760,7 +2747,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (!class) return 0; } - debug_atomic_inc((atomic_t *)&class->ops); + atomic_inc((atomic_t *)&class->ops); if (very_verbose(class)) { printk("\nacquire class [%p] %s", class->key, class->name); if (class->name_version > 1) -- cgit v1.2.3 From 8795d7717c467bea7b0a0649d44a258e09f34db2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 15 Apr 2010 23:10:42 +0200 Subject: lockdep: Fix redundant_hardirqs_on incremented with irqs enabled When a path restore the flags while irqs are already enabled, we update the per cpu var redundant_hardirqs_on in a racy fashion and debug_atomic_inc() warns about this situation. In this particular case, loosing a few hits in a stat is not a big deal, so increment it without protection. v2: Don't bother with disabling irq, we can miss one count in rare situations Reported-by: Stephen Rothwell Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: David Miller Cc: Benjamin Herrenschmidt --- kernel/lockdep.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/lockdep.c') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 78325f8f113..1b58a1bbcc8 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2298,7 +2298,12 @@ void trace_hardirqs_on_caller(unsigned long ip) return; if (unlikely(curr->hardirqs_enabled)) { - debug_atomic_inc(redundant_hardirqs_on); + /* + * Neither irq nor preemption are disabled here + * so this is racy by nature but loosing one hit + * in a stat is not a big deal. + */ + this_cpu_inc(lockdep_stats.redundant_hardirqs_on); return; } /* we'll do an OFF -> ON transition: */ -- cgit v1.2.3 From ba697f40dbb704956a4cf67a7845b538015a01ea Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 4 May 2010 04:47:25 +0200 Subject: lockdep: Provide off case for redundant_hardirqs_on increment We forgot to provide a !CONFIG_DEBUG_LOCKDEP case for the redundant_hardirqs_on stat handling. Manage that in the headers with a new __debug_atomic_inc() helper. Fixes: kernel/lockdep.c:2306: error: 'lockdep_stats' undeclared (first use in this function) kernel/lockdep.c:2306: error: (Each undeclared identifier is reported only once kernel/lockdep.c:2306: error: for each function it appears in.) Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra --- kernel/lockdep.c | 2 +- kernel/lockdep_internals.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/lockdep.c') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 1b58a1bbcc8..9cf79858fd8 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2303,7 +2303,7 @@ void trace_hardirqs_on_caller(unsigned long ip) * so this is racy by nature but loosing one hit * in a stat is not a big deal. */ - this_cpu_inc(lockdep_stats.redundant_hardirqs_on); + __debug_atomic_inc(redundant_hardirqs_on); return; } /* we'll do an OFF -> ON transition: */ diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h index 2b174762fa0..7de27a80f80 100644 --- a/kernel/lockdep_internals.h +++ b/kernel/lockdep_internals.h @@ -139,6 +139,9 @@ struct lockdep_stats { DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); +#define __debug_atomic_inc(ptr) \ + this_cpu_inc(lockdep_stats.ptr); + #define debug_atomic_inc(ptr) { \ WARN_ON_ONCE(!irqs_disabled()); \ this_cpu_inc(lockdep_stats.ptr); \ @@ -160,6 +163,7 @@ DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); __total; \ }) #else +# define __debug_atomic_inc(ptr) do { } while (0) # define debug_atomic_inc(ptr) do { } while (0) # define debug_atomic_dec(ptr) do { } while (0) # define debug_atomic_read(ptr) 0 -- cgit v1.2.3 From 4726f2a617ebd868a4fdeb5679613b897e5f1676 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Tue, 4 May 2010 14:16:48 +0800 Subject: lockdep: Reduce stack_trace usage When calling check_prevs_add(), if all validations passed add_lock_to_list() will add new lock to dependency tree and alloc stack_trace for each list_entry. But at this time, we are always on the same stack, so stack_trace for each list_entry has the same value. This is redundant and eats up lots of memory which could lead to warning on low MAX_STACK_TRACE_ENTRIES. Use one copy of stack_trace instead. V2: As suggested by Peter Zijlstra, move save_trace() from check_prevs_add() to check_prev_add(). Add tracking for trylock dependence which is also redundant. Signed-off-by: Yong Zhang Cc: David S. Miller Signed-off-by: Peter Zijlstra LKML-Reference: <20100504065711.GC10784@windriver.com> Signed-off-by: Ingo Molnar --- kernel/lockdep.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'kernel/lockdep.c') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 9cf79858fd8..51080807dc8 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -805,7 +805,8 @@ static struct lock_list *alloc_list_entry(void) * Add a new dependency to the head of the list: */ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, - struct list_head *head, unsigned long ip, int distance) + struct list_head *head, unsigned long ip, + int distance, struct stack_trace *trace) { struct lock_list *entry; /* @@ -816,11 +817,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, if (!entry) return 0; - if (!save_trace(&entry->trace)) - return 0; - entry->class = this; entry->distance = distance; + entry->trace = *trace; /* * Since we never remove from the dependency list, the list can * be walked lockless by other CPUs, it's only allocation @@ -1622,12 +1621,20 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, */ static int check_prev_add(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next, int distance) + struct held_lock *next, int distance, int trylock_loop) { struct lock_list *entry; int ret; struct lock_list this; struct lock_list *uninitialized_var(target_entry); + /* + * Static variable, serialized by the graph_lock(). + * + * We use this static variable to save the stack trace in case + * we call into this function multiple times due to encountering + * trylocks in the held lock stack. + */ + static struct stack_trace trace; /* * Prove that the new -> dependency would not @@ -1675,20 +1682,23 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, } } + if (!trylock_loop && !save_trace(&trace)) + return 0; + /* * Ok, all validations passed, add the new lock * to the previous lock's dependency list: */ ret = add_lock_to_list(hlock_class(prev), hlock_class(next), &hlock_class(prev)->locks_after, - next->acquire_ip, distance); + next->acquire_ip, distance, &trace); if (!ret) return 0; ret = add_lock_to_list(hlock_class(next), hlock_class(prev), &hlock_class(next)->locks_before, - next->acquire_ip, distance); + next->acquire_ip, distance, &trace); if (!ret) return 0; @@ -1718,6 +1728,7 @@ static int check_prevs_add(struct task_struct *curr, struct held_lock *next) { int depth = curr->lockdep_depth; + int trylock_loop = 0; struct held_lock *hlock; /* @@ -1743,7 +1754,8 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) * added: */ if (hlock->read != 2) { - if (!check_prev_add(curr, hlock, next, distance)) + if (!check_prev_add(curr, hlock, next, + distance, trylock_loop)) return 0; /* * Stop after the first non-trylock entry, @@ -1766,6 +1778,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) if (curr->held_locks[depth].irq_context != curr->held_locks[depth-1].irq_context) break; + trylock_loop = 1; } return 1; out_bug: -- cgit v1.2.3 From 883a2a3189dae9d2912c417e47152f51cb922a3f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 May 2010 06:16:11 +0200 Subject: tracing: Drop lock_acquired waittime field Drop the waittime field from the lock_acquired event, we can calculate it by substracting the lock_acquired event timestamp with the matching lock_acquire one. It is not needed and takes useless space in the traces. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Hitoshi Mitake Cc: Steven Rostedt --- include/trace/events/lock.h | 11 ++++------- kernel/lockdep.c | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'kernel/lockdep.c') diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h index 5c1dcfc16c6..17ca287ae17 100644 --- a/include/trace/events/lock.h +++ b/include/trace/events/lock.h @@ -78,24 +78,21 @@ TRACE_EVENT(lock_contended, ); TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip, waittime), + TP_ARGS(lock, ip), TP_STRUCT__entry( __string(name, lock->name) - __field(s64, wait_nsec) __field(void *, lockdep_addr) ), TP_fast_assign( __assign_str(name, lock->name); - __entry->wait_nsec = waittime; __entry->lockdep_addr = lock; ), - TP_printk("%p %s (%llu ns)", __entry->lockdep_addr, - __get_str(name), - __entry->wait_nsec) + TP_printk("%p %s", __entry->lockdep_addr, + __get_str(name)) ); #endif diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 2594e1ce41c..31e22e74236 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3380,7 +3380,7 @@ found_it: hlock->holdtime_stamp = now; } - trace_lock_acquired(lock, ip, waittime); + trace_lock_acquired(lock, ip); stats = get_lock_stats(hlock_class(hlock)); if (waittime) { -- cgit v1.2.3 From 93135439459920c4d856f4ab8f068c030085c8df Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 May 2010 06:24:25 +0200 Subject: tracing: Drop the nested field from lock_release event Drop the nested field as we don't use it. Every nested state can be computed from a state machine on post processing already. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Hitoshi Mitake Cc: Steven Rostedt --- include/trace/events/lock.h | 4 ++-- kernel/lockdep.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/lockdep.c') diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h index 17ca287ae17..fde4c385339 100644 --- a/include/trace/events/lock.h +++ b/include/trace/events/lock.h @@ -37,9 +37,9 @@ TRACE_EVENT(lock_acquire, TRACE_EVENT(lock_release, - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, nested, ip), + TP_ARGS(lock, ip), TP_STRUCT__entry( __string(name, lock->name) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 31e22e74236..e9c759f06c1 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3227,7 +3227,7 @@ void lock_release(struct lockdep_map *lock, int nested, raw_local_irq_save(flags); check_flags(flags); current->lockdep_recursion = 1; - trace_lock_release(lock, nested, ip); + trace_lock_release(lock, ip); __lock_release(lock, nested, ip); current->lockdep_recursion = 0; raw_local_irq_restore(flags); -- cgit v1.2.3 From 2b3fc35f6919344e3cf722dde8308f47235c0b70 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 Apr 2010 16:23:07 +0800 Subject: rcu: optionally leave lockdep enabled after RCU lockdep splat There is no need to disable lockdep after an RCU lockdep splat, so remove the debug_lockdeps_off() from lockdep_rcu_dereference(). To avoid repeated lockdep splats, use a static variable in the inlined rcu_dereference_check() and rcu_dereference_protected() macros so that a given instance splats only once, but so that multiple instances can be detected per boot. This is controlled by a new config variable CONFIG_PROVE_RCU_REPEATEDLY, which is disabled by default. This provides the normal lockdep behavior by default, but permits people who want to find multiple RCU-lockdep splats per boot to easily do so. Requested-by: Eric Paris Signed-off-by: Lai Jiangshan Tested-by: Eric Paris Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 15 +++++++++++---- kernel/lockdep.c | 3 +++ lib/Kconfig.debug | 12 ++++++++++++ 3 files changed, 26 insertions(+), 4 deletions(-) (limited to 'kernel/lockdep.c') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index db266bbed23..4dca2752cfd 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -192,6 +192,15 @@ static inline int rcu_read_lock_sched_held(void) extern int rcu_my_thread_group_empty(void); +#define __do_rcu_dereference_check(c) \ + do { \ + static bool __warned; \ + if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ + __warned = true; \ + lockdep_rcu_dereference(__FILE__, __LINE__); \ + } \ + } while (0) + /** * rcu_dereference_check - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing @@ -221,8 +230,7 @@ extern int rcu_my_thread_group_empty(void); */ #define rcu_dereference_check(p, c) \ ({ \ - if (debug_lockdep_rcu_enabled() && !(c)) \ - lockdep_rcu_dereference(__FILE__, __LINE__); \ + __do_rcu_dereference_check(c); \ rcu_dereference_raw(p); \ }) @@ -239,8 +247,7 @@ extern int rcu_my_thread_group_empty(void); */ #define rcu_dereference_protected(p, c) \ ({ \ - if (debug_lockdep_rcu_enabled() && !(c)) \ - lockdep_rcu_dereference(__FILE__, __LINE__); \ + __do_rcu_dereference_check(c); \ (p); \ }) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 2594e1ce41c..3a756ba8d5d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3801,8 +3801,11 @@ void lockdep_rcu_dereference(const char *file, const int line) { struct task_struct *curr = current; +#ifndef CONFIG_PROVE_RCU_REPEATEDLY if (!debug_locks_off()) return; +#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ + /* Note: the following can be executed concurrently, so be careful. */ printk("\n===================================================\n"); printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); printk( "---------------------------------------------------\n"); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 935248bdbc4..94090b4bb7d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -512,6 +512,18 @@ config PROVE_RCU Say N if you are unsure. +config PROVE_RCU_REPEATEDLY + bool "RCU debugging: don't disable PROVE_RCU on first splat" + depends on PROVE_RCU + default n + help + By itself, PROVE_RCU will disable checking upon issuing the + first warning (or "splat"). This feature prevents such + disabling, allowing multiple RCU-lockdep warnings to be printed + on a single reboot. + + Say N if you are unsure. + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT -- cgit v1.2.3