From b4d623f67460595ba2c37eefceec33a3cea97422 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 8 Feb 2017 10:44:14 +1100 Subject: debugobjects: track number of kmem_cache_alloc/kmem_cache_free done Patch series "debugobjects: Reduce global pool_lock contention", v2. This patchset aims to reduce contention of the global pool_lock while improving performance at the same time. It is done to resolve the following soft lockup problem with a debug kernel in some of the large SMP systems: NMI watchdog: BUG: soft lockup - CPU#35 stuck for 22s! [rcuos/1:21] ... RIP: 0010:[] [] _raw_spin_unlock_irqrestore+0x3b/0x60 ... Call Trace: [] free_object+0x81/0xb0 [] debug_check_no_obj_freed+0x193/0x220 [] ? trace_hardirqs_on_caller+0xf9/0x1c0 [] ? file_free_rcu+0x36/0x60 [] kmem_cache_free+0xd2/0x380 [] ? fput+0x90/0x90 [] file_free_rcu+0x36/0x60 [] rcu_nocb_kthread+0x1b3/0x550 [] ? rcu_nocb_kthread+0x101/0x550 [] ? sync_exp_work_done.constprop.63+0x50/0x50 [] kthread+0x101/0x120 [] ? trace_hardirqs_on_caller+0xf9/0x1c0 [] ret_from_fork+0x22/0x50 On a 8-socket IvyBridge-EX system (120 cores, 240 threads), the elapsed time of a 4.9-rc7 kernel parallel build (make -j 240) was reduced from 7m57s to 7m19s with a patched 4.9-rc7 kernel. There was also about a 10X reduction in the number of debug objects being allocated from or freed to the kmemcache during the kernel build. This patch (of 3): New debugfs stat counters are added to track the numbers of kmem_cache_alloc() and kmem_cache_free() function calls to get a sense of how the internal debug objects cache management is performing. Link: http://lkml.kernel.org/r/1483647425-4135-2-git-send-email-longman@redhat.com Signed-off-by: Waiman Long Cc: Thomas Gleixner Cc: "Du Changbin" Cc: Christian Borntraeger Cc: Jan Stancek Signed-off-by: Andrew Morton --- lib/debugobjects.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 04c1ef717fe0..d78673e7dc56 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -55,6 +55,12 @@ static int debug_objects_enabled __read_mostly static struct debug_obj_descr *descr_test __read_mostly; +/* + * Track numbers of kmem_cache_alloc and kmem_cache_free done. + */ +static int debug_objects_alloc; +static int debug_objects_freed; + static void free_obj_work(struct work_struct *work); static DECLARE_WORK(debug_obj_work, free_obj_work); @@ -102,6 +108,7 @@ static void fill_pool(void) raw_spin_lock_irqsave(&pool_lock, flags); hlist_add_head(&new->node, &obj_pool); + debug_objects_alloc++; obj_pool_free++; raw_spin_unlock_irqrestore(&pool_lock, flags); } @@ -173,6 +180,7 @@ static void free_obj_work(struct work_struct *work) obj = hlist_entry(obj_pool.first, typeof(*obj), node); hlist_del(&obj->node); obj_pool_free--; + debug_objects_freed++; /* * We release pool_lock across kmem_cache_free() to * avoid contention on pool_lock. @@ -758,6 +766,8 @@ static int debug_stats_show(struct seq_file *m, void *v) seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free); seq_printf(m, "pool_used :%d\n", obj_pool_used); seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used); + seq_printf(m, "objects_alloc :%d\n", debug_objects_alloc); + seq_printf(m, "objects_freed :%d\n", debug_objects_freed); return 0; } -- cgit v1.2.3