From d602dabaeba79df90cc67c32d5fe4ee0d5e2b73a Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Sat, 10 Jul 2010 18:05:33 +0800 Subject: SLOB: Free objects to their own list SLOB has alloced smaller objects from their own list in reduce overall external fragmentation and increase repeatability, free to their own list also. This is /proc/meminfo result in my test machine: without this patch: === MemTotal: 1030720 kB MemFree: 750012 kB Buffers: 15496 kB Cached: 160396 kB SwapCached: 0 kB Active: 105024 kB Inactive: 145604 kB Active(anon): 74816 kB Inactive(anon): 2180 kB Active(file): 30208 kB Inactive(file): 143424 kB Unevictable: 16 kB .... with this patch: === MemTotal: 1030720 kB MemFree: 751908 kB Buffers: 15492 kB Cached: 160280 kB SwapCached: 0 kB Active: 102720 kB Inactive: 146140 kB Active(anon): 73168 kB Inactive(anon): 2180 kB Active(file): 29552 kB Inactive(file): 143960 kB Unevictable: 16 kB ... The result shows an improvement of 1 MB! And when I tested it on a embeded system with 64 MB, I found this path is never called during kernel bootup. Acked-by: Matt Mackall Signed-off-by: Bob Liu Signed-off-by: Pekka Enberg --- mm/slob.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/slob.c b/mm/slob.c index 23631e2bb57..6a208f81888 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -394,6 +394,7 @@ static void slob_free(void *block, int size) slob_t *prev, *next, *b = (slob_t *)block; slobidx_t units; unsigned long flags; + struct list_head *slob_list; if (unlikely(ZERO_OR_NULL_PTR(block))) return; @@ -422,7 +423,13 @@ static void slob_free(void *block, int size) set_slob(b, units, (void *)((unsigned long)(b + SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK)); - set_slob_page_free(sp, &free_slob_small); + if (size < SLOB_BREAK1) + slob_list = &free_slob_small; + else if (size < SLOB_BREAK2) + slob_list = &free_slob_medium; + else + slob_list = &free_slob_large; + set_slob_page_free(sp, slob_list); goto out; } -- cgit v1.2.3 From 2154a336381f85f5390d9a84c6cf4a7d2847b6ed Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 9 Jul 2010 14:07:10 -0500 Subject: slub: Use a constant for a unspecified node. kmalloc_node() and friends can be passed a constant -1 to indicate that no choice was made for the node from which the object needs to come. Use NUMA_NO_NODE instead of -1. CC: KAMEZAWA Hiroyuki Signed-off-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 578f68f3c51..cc0a3c72e51 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1073,7 +1073,7 @@ static inline struct page *alloc_slab_page(gfp_t flags, int node, flags |= __GFP_NOTRACK; - if (node == -1) + if (node == NUMA_NO_NODE) return alloc_pages(flags, order); else return alloc_pages_exact_node(node, flags, order); @@ -1387,7 +1387,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) { struct page *page; - int searchnode = (node == -1) ? numa_node_id() : node; + int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; page = get_partial_node(get_node(s, searchnode)); if (page || (flags & __GFP_THISNODE)) @@ -1515,7 +1515,7 @@ static void flush_all(struct kmem_cache *s) static inline int node_match(struct kmem_cache_cpu *c, int node) { #ifdef CONFIG_NUMA - if (node != -1 && c->node != node) + if (node != NUMA_NO_NODE && c->node != node) return 0; #endif return 1; @@ -1727,7 +1727,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) { - void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_); + void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); @@ -1738,7 +1738,7 @@ EXPORT_SYMBOL(kmem_cache_alloc); #ifdef CONFIG_TRACING void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) { - return slab_alloc(s, gfpflags, -1, _RET_IP_); + return slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); } EXPORT_SYMBOL(kmem_cache_alloc_notrace); #endif @@ -2728,7 +2728,7 @@ void *__kmalloc(size_t size, gfp_t flags) if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - ret = slab_alloc(s, flags, -1, _RET_IP_); + ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_); trace_kmalloc(_RET_IP_, ret, size, s->size, flags); @@ -3312,7 +3312,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - ret = slab_alloc(s, gfpflags, -1, caller); + ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller); /* Honor the call site pointer we recieved. */ trace_kmalloc(caller, ret, size, s->size, gfpflags); -- cgit v1.2.3 From f90ec390148fdbc0db38c477bc6dc94db721e7f1 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 9 Jul 2010 14:07:11 -0500 Subject: SLUB: Constants need UL UL suffix is missing in some constants. Conform to how slab.h uses constants. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index cc0a3c72e51..2c119035172 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -162,8 +162,8 @@ #define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */ /* Internal SLUB flags */ -#define __OBJECT_POISON 0x80000000 /* Poison object */ -#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ +#define __OBJECT_POISON 0x80000000UL /* Poison object */ +#define __SYSFS_ADD_DEFERRED 0x40000000UL /* Not yet visible via sysfs */ static int kmem_size = sizeof(struct kmem_cache); -- cgit v1.2.3 From d7278bd7d1aab5c6d35fd271eeb860548f0bd0bb Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 9 Jul 2010 14:07:12 -0500 Subject: slub: Check kasprintf results in kmem_cache_init() Small allocations may fail during slab bringup which is fatal. Add a BUG_ON() so that we fail immediately rather than failing later during sysfs processing. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 2c119035172..8655be5b740 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3118,9 +3118,12 @@ void __init kmem_cache_init(void) slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ - for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) - kmalloc_caches[i]. name = - kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { + char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); + + BUG_ON(!s); + kmalloc_caches[i].name = s; + } #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); -- cgit v1.2.3 From f5b801ac38a9612b380ee9a75ab1861f0594e79f Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 9 Jul 2010 14:07:13 -0500 Subject: slub: Allow removal of slab caches during boot If a slab cache is removed before we have setup sysfs then simply skip over the sysfs handling. Cc: Benjamin Herrenschmidt Cc: Roland Dreier Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 8655be5b740..b89a7c99b2f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4507,6 +4507,13 @@ static int sysfs_slab_add(struct kmem_cache *s) static void sysfs_slab_remove(struct kmem_cache *s) { + if (slab_state < SYSFS) + /* + * Sysfs has not been setup yet so no need to remove the + * cache from sysfs. + */ + return; + kobject_uevent(&s->kobj, KOBJ_REMOVE); kobject_del(&s->kobj); kobject_put(&s->kobj); -- cgit v1.2.3 From af537b0a6c650ab6ff7104d8163e96866b31c835 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 9 Jul 2010 14:07:14 -0500 Subject: slub: Use kmem_cache flags to detect if slab is in debugging mode. The cacheline with the flags is reachable from the hot paths after the percpu allocator changes went in. So there is no need anymore to put a flag into each slab page. Get rid of the SlubDebug flag and use the flags in kmem_cache instead. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/page-flags.h | 2 -- mm/slub.c | 33 ++++++++++++--------------------- 2 files changed, 12 insertions(+), 23 deletions(-) (limited to 'mm') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5b59f35dcb8..6fa317801e1 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -128,7 +128,6 @@ enum pageflags { /* SLUB */ PG_slub_frozen = PG_active, - PG_slub_debug = PG_error, }; #ifndef __GENERATING_BOUNDS_H @@ -215,7 +214,6 @@ PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) __PAGEFLAG(SlobFree, slob_free) __PAGEFLAG(SlubFrozen, slub_frozen) -__PAGEFLAG(SlubDebug, slub_debug) /* * Private page markings that may be used by the filesystem that owns the page diff --git a/mm/slub.c b/mm/slub.c index b89a7c99b2f..9cf5dae7815 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -107,11 +107,17 @@ * the fast path and disables lockless freelists. */ +#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ + SLAB_TRACE | SLAB_DEBUG_FREE) + +static inline int kmem_cache_debug(struct kmem_cache *s) +{ #ifdef CONFIG_SLUB_DEBUG -#define SLABDEBUG 1 + return unlikely(s->flags & SLAB_DEBUG_FLAGS); #else -#define SLABDEBUG 0 + return 0; #endif +} /* * Issues still to be resolved: @@ -1157,9 +1163,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) inc_slabs_node(s, page_to_nid(page), page->objects); page->slab = s; page->flags |= 1 << PG_slab; - if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | - SLAB_STORE_USER | SLAB_TRACE)) - __SetPageSlubDebug(page); start = page_address(page); @@ -1186,14 +1189,13 @@ static void __free_slab(struct kmem_cache *s, struct page *page) int order = compound_order(page); int pages = 1 << order; - if (unlikely(SLABDEBUG && PageSlubDebug(page))) { + if (kmem_cache_debug(s)) { void *p; slab_pad_check(s, page); for_each_object(p, s, page_address(page), page->objects) check_object(s, page, p, 0); - __ClearPageSlubDebug(page); } kmemcheck_free_shadow(page, compound_order(page)); @@ -1415,8 +1417,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); } else { stat(s, DEACTIVATE_FULL); - if (SLABDEBUG && PageSlubDebug(page) && - (s->flags & SLAB_STORE_USER)) + if (kmem_cache_debug(s) && (s->flags & SLAB_STORE_USER)) add_full(n, page); } slab_unlock(page); @@ -1624,7 +1625,7 @@ load_freelist: object = c->page->freelist; if (unlikely(!object)) goto another_slab; - if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) + if (kmem_cache_debug(s)) goto debug; c->freelist = get_freepointer(s, object); @@ -1783,7 +1784,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, stat(s, FREE_SLOWPATH); slab_lock(page); - if (unlikely(SLABDEBUG && PageSlubDebug(page))) + if (kmem_cache_debug(s)) goto debug; checks_ok: @@ -3398,16 +3399,6 @@ static void validate_slab_slab(struct kmem_cache *s, struct page *page, } else printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n", s->name, page); - - if (s->flags & DEBUG_DEFAULT_FLAGS) { - if (!PageSlubDebug(page)) - printk(KERN_ERR "SLUB %s: SlubDebug not set " - "on slab 0x%p\n", s->name, page); - } else { - if (PageSlubDebug(page)) - printk(KERN_ERR "SLUB %s: SlubDebug set on " - "slab 0x%p\n", s->name, page); - } } static int validate_slab_node(struct kmem_cache *s, -- cgit v1.2.3 From 78b435368fcd615e695a06012cd963a556284e00 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 19 Jul 2010 10:59:42 -0700 Subject: slab: use deferable timers for its periodic housekeeping slab has a "once every 2 second" timer for its housekeeping. As the number of logical processors is growing, its more and more common that this 2 second timer becomes the primary wakeup source. This patch turns this housekeeping timer into a deferable timer, which means that the timer does not interrupt idle, but just runs at the next event that wakes the cpu up. The impact is that the timer likely runs a bit later, but during the delay no code is running so there's not all that much reason for a difference in housekeeping to occur because of this delay. Signed-off-by: Arjan van de Ven Signed-off-by: Pekka Enberg --- mm/slab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/slab.c b/mm/slab.c index e49f8f46f46..29aad44a55c 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -861,7 +861,7 @@ static void __cpuinit start_cpu_timer(int cpu) */ if (keventd_up() && reap_work->work.func == NULL) { init_reap_node(cpu); - INIT_DELAYED_WORK(reap_work, cache_reap); + INIT_DELAYED_WORK_DEFERRABLE(reap_work, cache_reap); schedule_delayed_work_on(cpu, reap_work, __round_jiffies_relative(HZ, cpu)); } -- cgit v1.2.3 From bc6488e91078af0b42ee0d8335e0587f64550d7d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 26 Jul 2010 10:41:14 -0500 Subject: slub numa: Fix rare allocation from unexpected node The network developers have seen sporadic allocations resulting in objects coming from unexpected NUMA nodes despite asking for objects from a specific node. This is due to get_partial() calling get_any_partial() if partial slabs are exhausted for a node even if a node was specified and therefore one would expect allocations only from the specified node. get_any_partial() sporadically may return a slab from a foreign node to gradually reduce the size of partial lists on remote nodes and thereby reduce total memory use for a slab cache. The behavior is controlled by the remote_defrag_ratio of each cache. Strictly speaking this is permitted behavior since __GFP_THISNODE was not specified for the allocation but it is certain surprising. This patch makes sure that the remote defrag behavior only occurs if no node was specified. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 578f68f3c51..39d39653239 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1390,7 +1390,7 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) int searchnode = (node == -1) ? numa_node_id() : node; page = get_partial_node(get_node(s, searchnode)); - if (page || (flags & __GFP_THISNODE)) + if (page || node != -1) return page; return get_any_partial(s, flags); -- cgit v1.2.3 From e438444de82f354563d46ee5d991b5916dd19b01 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 3 Aug 2010 07:28:21 +0300 Subject: Revert "slub: Allow removal of slab caches during boot" This reverts commit f5b801ac38a9612b380ee9a75ab1861f0594e79f. --- mm/slub.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 9cf5dae7815..f0f403693bd 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4498,13 +4498,6 @@ static int sysfs_slab_add(struct kmem_cache *s) static void sysfs_slab_remove(struct kmem_cache *s) { - if (slab_state < SYSFS) - /* - * Sysfs has not been setup yet so no need to remove the - * cache from sysfs. - */ - return; - kobject_uevent(&s->kobj, KOBJ_REMOVE); kobject_del(&s->kobj); kobject_put(&s->kobj); -- cgit v1.2.3 From 2bce64858442149784f6c8803c9095a8556320a2 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 19 Jul 2010 11:39:11 -0500 Subject: slub: Allow removal of slab caches during boot Serialize kmem_cache_create and kmem_cache_destroy using the slub_lock. Only possible after the use of the slub_lock during dynamic dma creation has been removed. Then make sure that the setup of the slab sysfs entries does not race with kmem_cache_create and kmem_cache destroy. If a slab cache is removed before we have setup sysfs then simply skip over the sysfs handling. Cc: Benjamin Herrenschmidt Cc: Roland Dreier Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index f0f403693bd..fb6518efe1e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2491,7 +2491,6 @@ void kmem_cache_destroy(struct kmem_cache *s) s->refcount--; if (!s->refcount) { list_del(&s->list); - up_write(&slub_lock); if (kmem_cache_close(s)) { printk(KERN_ERR "SLUB %s: %s called for cache that " "still has objects.\n", s->name, __func__); @@ -2500,8 +2499,8 @@ void kmem_cache_destroy(struct kmem_cache *s) if (s->flags & SLAB_DESTROY_BY_RCU) rcu_barrier(); sysfs_slab_remove(s); - } else - up_write(&slub_lock); + } + up_write(&slub_lock); } EXPORT_SYMBOL(kmem_cache_destroy); @@ -3227,14 +3226,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, */ s->objsize = max(s->objsize, (int)size); s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); - up_write(&slub_lock); if (sysfs_slab_alias(s, name)) { - down_write(&slub_lock); s->refcount--; - up_write(&slub_lock); goto err; } + up_write(&slub_lock); return s; } @@ -3243,14 +3240,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, if (kmem_cache_open(s, GFP_KERNEL, name, size, align, flags, ctor)) { list_add(&s->list, &slab_caches); - up_write(&slub_lock); if (sysfs_slab_add(s)) { - down_write(&slub_lock); list_del(&s->list); - up_write(&slub_lock); kfree(s); goto err; } + up_write(&slub_lock); return s; } kfree(s); @@ -4498,6 +4493,13 @@ static int sysfs_slab_add(struct kmem_cache *s) static void sysfs_slab_remove(struct kmem_cache *s) { + if (slab_state < SYSFS) + /* + * Sysfs has not been setup yet so no need to remove the + * cache from sysfs. + */ + return; + kobject_uevent(&s->kobj, KOBJ_REMOVE); kobject_del(&s->kobj); kobject_put(&s->kobj); @@ -4543,8 +4545,11 @@ static int __init slab_sysfs_init(void) struct kmem_cache *s; int err; + down_write(&slub_lock); + slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); if (!slab_kset) { + up_write(&slub_lock); printk(KERN_ERR "Cannot register slab subsystem.\n"); return -ENOSYS; } @@ -4569,6 +4574,7 @@ static int __init slab_sysfs_init(void) kfree(al); } + up_write(&slub_lock); resiliency_test(); return 0; } -- cgit v1.2.3