diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-28 14:08:56 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-28 14:08:56 -0700 | 
| commit | e97e386b126c2d60b8da61ce1e4964b41b3d1514 (patch) | |
| tree | 7e04b7f735004330777200c6742568fc130ff893 /mm | |
| parent | d9dedc13851f9cbd568fbc631a17b0be83404957 (diff) | |
| parent | c124f5b54f879e5870befcc076addbd5d614663f (diff) | |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6:
  slub: pack objects denser
  slub: Calculate min_objects based on number of processors.
  slub: Drop DEFAULT_MAX_ORDER / DEFAULT_MIN_OBJECTS
  slub: Simplify any_slab_object checks
  slub: Make the order configurable for each slab cache
  slub: Drop fallback to page allocator method
  slub: Fallback to minimal order during slab page allocation
  slub: Update statistics handling for variable order slabs
  slub: Add kmem_cache_order_objects struct
  slub: for_each_object must be passed the number of objects in a slab
  slub: Store max number of objects in the page struct.
  slub: Dump list of objects not freed on kmem_cache_close()
  slub: free_list() cleanup
  slub: improve kmem_cache_destroy() error message
  slob: fix bug - when slob allocates "struct kmem_cache", it does not force alignment.
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/slob.c | 3 | ||||
| -rw-r--r-- | mm/slub.c | 481 | 
2 files changed, 289 insertions, 195 deletions
| diff --git a/mm/slob.c b/mm/slob.c index e2c3c0ec546..6038cbadf79 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -533,7 +533,8 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,  {  	struct kmem_cache *c; -	c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1); +	c = slob_alloc(sizeof(struct kmem_cache), +		flags, ARCH_KMALLOC_MINALIGN, -1);  	if (c) {  		c->name = name; diff --git a/mm/slub.c b/mm/slub.c index 38914bc64ac..992ecd4f0d3 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -149,25 +149,6 @@ static inline void ClearSlabDebug(struct page *page)  /* Enable to test recovery from slab corruption on boot */  #undef SLUB_RESILIENCY_TEST -#if PAGE_SHIFT <= 12 - -/* - * Small page size. Make sure that we do not fragment memory - */ -#define DEFAULT_MAX_ORDER 1 -#define DEFAULT_MIN_OBJECTS 4 - -#else - -/* - * Large page machines are customarily able to handle larger - * page orders. - */ -#define DEFAULT_MAX_ORDER 2 -#define DEFAULT_MIN_OBJECTS 8 - -#endif -  /*   * Mininum number of partial slabs. These will be left on the partial   * lists even if they are empty. kmem_cache_shrink may reclaim them. @@ -204,8 +185,6 @@ static inline void ClearSlabDebug(struct page *page)  /* Internal SLUB flags */  #define __OBJECT_POISON		0x80000000 /* Poison object */  #define __SYSFS_ADD_DEFERRED	0x40000000 /* Not yet visible via sysfs */ -#define __KMALLOC_CACHE		0x20000000 /* objects freed using kfree */ -#define __PAGE_ALLOC_FALLBACK	0x10000000 /* Allow fallback to page alloc */  static int kmem_size = sizeof(struct kmem_cache); @@ -296,7 +275,7 @@ static inline int check_valid_pointer(struct kmem_cache *s,  		return 1;  	base = page_address(page); -	if (object < base || object >= base + s->objects * s->size || +	if (object < base || object >= base + page->objects * s->size ||  		(object - base) % s->size) {  		return 0;  	} @@ -322,8 +301,8 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)  }  /* Loop over all objects in a slab */ -#define for_each_object(__p, __s, __addr) \ -	for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\ +#define for_each_object(__p, __s, __addr, __objects) \ +	for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\  			__p += (__s)->size)  /* Scan freelist */ @@ -336,6 +315,26 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr)  	return (p - addr) / s->size;  } +static inline struct kmem_cache_order_objects oo_make(int order, +						unsigned long size) +{ +	struct kmem_cache_order_objects x = { +		(order << 16) + (PAGE_SIZE << order) / size +	}; + +	return x; +} + +static inline int oo_order(struct kmem_cache_order_objects x) +{ +	return x.x >> 16; +} + +static inline int oo_objects(struct kmem_cache_order_objects x) +{ +	return x.x & ((1 << 16) - 1); +} +  #ifdef CONFIG_SLUB_DEBUG  /*   * Debug settings: @@ -446,8 +445,8 @@ static void print_tracking(struct kmem_cache *s, void *object)  static void print_page_info(struct page *page)  { -	printk(KERN_ERR "INFO: Slab 0x%p used=%u fp=0x%p flags=0x%04lx\n", -		page, page->inuse, page->freelist, page->flags); +	printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n", +		page, page->objects, page->inuse, page->freelist, page->flags);  } @@ -647,6 +646,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)  				p + off, POISON_INUSE, s->size - off);  } +/* Check the pad bytes at the end of a slab page */  static int slab_pad_check(struct kmem_cache *s, struct page *page)  {  	u8 *start; @@ -659,20 +659,20 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)  		return 1;  	start = page_address(page); -	end = start + (PAGE_SIZE << s->order); -	length = s->objects * s->size; -	remainder = end - (start + length); +	length = (PAGE_SIZE << compound_order(page)); +	end = start + length; +	remainder = length % s->size;  	if (!remainder)  		return 1; -	fault = check_bytes(start + length, POISON_INUSE, remainder); +	fault = check_bytes(end - remainder, POISON_INUSE, remainder);  	if (!fault)  		return 1;  	while (end > fault && end[-1] == POISON_INUSE)  		end--;  	slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); -	print_section("Padding", start, length); +	print_section("Padding", end - remainder, remainder);  	restore_bytes(s, "slab padding", POISON_INUSE, start, end);  	return 0; @@ -734,15 +734,24 @@ static int check_object(struct kmem_cache *s, struct page *page,  static int check_slab(struct kmem_cache *s, struct page *page)  { +	int maxobj; +  	VM_BUG_ON(!irqs_disabled());  	if (!PageSlab(page)) {  		slab_err(s, page, "Not a valid slab page");  		return 0;  	} -	if (page->inuse > s->objects) { + +	maxobj = (PAGE_SIZE << compound_order(page)) / s->size; +	if (page->objects > maxobj) { +		slab_err(s, page, "objects %u > max %u", +			s->name, page->objects, maxobj); +		return 0; +	} +	if (page->inuse > page->objects) {  		slab_err(s, page, "inuse %u > max %u", -			s->name, page->inuse, s->objects); +			s->name, page->inuse, page->objects);  		return 0;  	}  	/* Slab_pad_check fixes things up after itself */ @@ -759,8 +768,9 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)  	int nr = 0;  	void *fp = page->freelist;  	void *object = NULL; +	unsigned long max_objects; -	while (fp && nr <= s->objects) { +	while (fp && nr <= page->objects) {  		if (fp == search)  			return 1;  		if (!check_valid_pointer(s, page, fp)) { @@ -772,7 +782,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)  			} else {  				slab_err(s, page, "Freepointer corrupt");  				page->freelist = NULL; -				page->inuse = s->objects; +				page->inuse = page->objects;  				slab_fix(s, "Freelist cleared");  				return 0;  			} @@ -783,10 +793,20 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)  		nr++;  	} -	if (page->inuse != s->objects - nr) { +	max_objects = (PAGE_SIZE << compound_order(page)) / s->size; +	if (max_objects > 65535) +		max_objects = 65535; + +	if (page->objects != max_objects) { +		slab_err(s, page, "Wrong number of objects. Found %d but " +			"should be %d", page->objects, max_objects); +		page->objects = max_objects; +		slab_fix(s, "Number of objects adjusted."); +	} +	if (page->inuse != page->objects - nr) {  		slab_err(s, page, "Wrong object count. Counter is %d but " -			"counted were %d", page->inuse, s->objects - nr); -		page->inuse = s->objects - nr; +			"counted were %d", page->inuse, page->objects - nr); +		page->inuse = page->objects - nr;  		slab_fix(s, "Object count adjusted.");  	}  	return search == NULL; @@ -840,7 +860,7 @@ static inline unsigned long slabs_node(struct kmem_cache *s, int node)  	return atomic_long_read(&n->nr_slabs);  } -static inline void inc_slabs_node(struct kmem_cache *s, int node) +static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)  {  	struct kmem_cache_node *n = get_node(s, node); @@ -850,14 +870,17 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node)  	 * dilemma by deferring the increment of the count during  	 * bootstrap (see early_kmem_cache_node_alloc).  	 */ -	if (!NUMA_BUILD || n) +	if (!NUMA_BUILD || n) {  		atomic_long_inc(&n->nr_slabs); +		atomic_long_add(objects, &n->total_objects); +	}  } -static inline void dec_slabs_node(struct kmem_cache *s, int node) +static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)  {  	struct kmem_cache_node *n = get_node(s, node);  	atomic_long_dec(&n->nr_slabs); +	atomic_long_sub(objects, &n->total_objects);  }  /* Object debug checks for alloc/free paths */ @@ -905,7 +928,7 @@ bad:  		 * as used avoids touching the remaining objects.  		 */  		slab_fix(s, "Marking all objects used"); -		page->inuse = s->objects; +		page->inuse = page->objects;  		page->freelist = NULL;  	}  	return 0; @@ -1055,31 +1078,52 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize,  static inline unsigned long slabs_node(struct kmem_cache *s, int node)  							{ return 0; } -static inline void inc_slabs_node(struct kmem_cache *s, int node) {} -static inline void dec_slabs_node(struct kmem_cache *s, int node) {} +static inline void inc_slabs_node(struct kmem_cache *s, int node, +							int objects) {} +static inline void dec_slabs_node(struct kmem_cache *s, int node, +							int objects) {}  #endif +  /*   * Slab allocation and freeing   */ +static inline struct page *alloc_slab_page(gfp_t flags, int node, +					struct kmem_cache_order_objects oo) +{ +	int order = oo_order(oo); + +	if (node == -1) +		return alloc_pages(flags, order); +	else +		return alloc_pages_node(node, flags, order); +} +  static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)  {  	struct page *page; -	int pages = 1 << s->order; +	struct kmem_cache_order_objects oo = s->oo;  	flags |= s->allocflags; -	if (node == -1) -		page = alloc_pages(flags, s->order); -	else -		page = alloc_pages_node(node, flags, s->order); - -	if (!page) -		return NULL; +	page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node, +									oo); +	if (unlikely(!page)) { +		oo = s->min; +		/* +		 * Allocation may have failed due to fragmentation. +		 * Try a lower order alloc if possible +		 */ +		page = alloc_slab_page(flags, node, oo); +		if (!page) +			return NULL; +		stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); +	} +	page->objects = oo_objects(oo);  	mod_zone_page_state(page_zone(page),  		(s->flags & SLAB_RECLAIM_ACCOUNT) ?  		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, -		pages); +		1 << oo_order(oo));  	return page;  } @@ -1106,7 +1150,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)  	if (!page)  		goto out; -	inc_slabs_node(s, page_to_nid(page)); +	inc_slabs_node(s, page_to_nid(page), page->objects);  	page->slab = s;  	page->flags |= 1 << PG_slab;  	if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | @@ -1116,10 +1160,10 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)  	start = page_address(page);  	if (unlikely(s->flags & SLAB_POISON)) -		memset(start, POISON_INUSE, PAGE_SIZE << s->order); +		memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));  	last = start; -	for_each_object(p, s, start) { +	for_each_object(p, s, start, page->objects) {  		setup_object(s, page, last);  		set_freepointer(s, last, p);  		last = p; @@ -1135,13 +1179,15 @@ out:  static void __free_slab(struct kmem_cache *s, struct page *page)  { -	int pages = 1 << s->order; +	int order = compound_order(page); +	int pages = 1 << order;  	if (unlikely(SlabDebug(page))) {  		void *p;  		slab_pad_check(s, page); -		for_each_object(p, s, page_address(page)) +		for_each_object(p, s, page_address(page), +						page->objects)  			check_object(s, page, p, 0);  		ClearSlabDebug(page);  	} @@ -1153,7 +1199,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)  	__ClearPageSlab(page);  	reset_page_mapcount(page); -	__free_pages(page, s->order); +	__free_pages(page, order);  }  static void rcu_free_slab(struct rcu_head *h) @@ -1179,7 +1225,7 @@ static void free_slab(struct kmem_cache *s, struct page *page)  static void discard_slab(struct kmem_cache *s, struct page *page)  { -	dec_slabs_node(s, page_to_nid(page)); +	dec_slabs_node(s, page_to_nid(page), page->objects);  	free_slab(s, page);  } @@ -1515,7 +1561,7 @@ load_freelist:  		goto debug;  	c->freelist = object[c->offset]; -	c->page->inuse = s->objects; +	c->page->inuse = c->page->objects;  	c->page->freelist = NULL;  	c->node = page_to_nid(c->page);  unlock_out: @@ -1552,27 +1598,6 @@ new_slab:  		c->page = new;  		goto load_freelist;  	} - -	/* -	 * No memory available. -	 * -	 * If the slab uses higher order allocs but the object is -	 * smaller than a page size then we can fallback in emergencies -	 * to the page allocator via kmalloc_large. The page allocator may -	 * have failed to obtain a higher order page and we can try to -	 * allocate a single page if the object fits into a single page. -	 * That is only possible if certain conditions are met that are being -	 * checked when a slab is created. -	 */ -	if (!(gfpflags & __GFP_NORETRY) && -				(s->flags & __PAGE_ALLOC_FALLBACK)) { -		if (gfpflags & __GFP_WAIT) -			local_irq_enable(); -		object = kmalloc_large(s->objsize, gfpflags); -		if (gfpflags & __GFP_WAIT) -			local_irq_disable(); -		return object; -	}  	return NULL;  debug:  	if (!alloc_debug_processing(s, c->page, object, addr)) @@ -1773,8 +1798,8 @@ static struct page *get_object_page(const void *x)   * take the list_lock.   */  static int slub_min_order; -static int slub_max_order = DEFAULT_MAX_ORDER; -static int slub_min_objects = DEFAULT_MIN_OBJECTS; +static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; +static int slub_min_objects;  /*   * Merge control. If this is set then no merging of slab caches will occur. @@ -1789,7 +1814,7 @@ static int slub_nomerge;   * system components. Generally order 0 allocations should be preferred since   * order 0 does not cause fragmentation in the page allocator. Larger objects   * be problematic to put into order 0 slabs because there may be too much - * unused space left. We go to a higher order if more than 1/8th of the slab + * unused space left. We go to a higher order if more than 1/16th of the slab   * would be wasted.   *   * In order to reach satisfactory performance we must ensure that a minimum @@ -1814,6 +1839,9 @@ static inline int slab_order(int size, int min_objects,  	int rem;  	int min_order = slub_min_order; +	if ((PAGE_SIZE << min_order) / size > 65535) +		return get_order(size * 65535) - 1; +  	for (order = max(min_order,  				fls(min_objects * size - 1) - PAGE_SHIFT);  			order <= max_order; order++) { @@ -1848,8 +1876,10 @@ static inline int calculate_order(int size)  	 * we reduce the minimum objects required in a slab.  	 */  	min_objects = slub_min_objects; +	if (!min_objects) +		min_objects = 4 * (fls(nr_cpu_ids) + 1);  	while (min_objects > 1) { -		fraction = 8; +		fraction = 16;  		while (fraction >= 4) {  			order = slab_order(size, min_objects,  						slub_max_order, fraction); @@ -2091,7 +2121,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,  	init_tracking(kmalloc_caches, n);  #endif  	init_kmem_cache_node(n); -	inc_slabs_node(kmalloc_caches, node); +	inc_slabs_node(kmalloc_caches, node, page->objects);  	/*  	 * lockdep requires consistent irq usage for each lock @@ -2167,11 +2197,12 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)   * calculate_sizes() determines the order and the distribution of data within   * a slab object.   */ -static int calculate_sizes(struct kmem_cache *s) +static int calculate_sizes(struct kmem_cache *s, int forced_order)  {  	unsigned long flags = s->flags;  	unsigned long size = s->objsize;  	unsigned long align = s->align; +	int order;  	/*  	 * Round up object size to the next word boundary. We can only @@ -2255,26 +2286,16 @@ static int calculate_sizes(struct kmem_cache *s)  	 */  	size = ALIGN(size, align);  	s->size = size; +	if (forced_order >= 0) +		order = forced_order; +	else +		order = calculate_order(size); -	if ((flags & __KMALLOC_CACHE) && -			PAGE_SIZE / size < slub_min_objects) { -		/* -		 * Kmalloc cache that would not have enough objects in -		 * an order 0 page. Kmalloc slabs can fallback to -		 * page allocator order 0 allocs so take a reasonably large -		 * order that will allows us a good number of objects. -		 */ -		s->order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER); -		s->flags |= __PAGE_ALLOC_FALLBACK; -		s->allocflags |= __GFP_NOWARN; -	} else -		s->order = calculate_order(size); - -	if (s->order < 0) +	if (order < 0)  		return 0;  	s->allocflags = 0; -	if (s->order) +	if (order)  		s->allocflags |= __GFP_COMP;  	if (s->flags & SLAB_CACHE_DMA) @@ -2286,9 +2307,12 @@ static int calculate_sizes(struct kmem_cache *s)  	/*  	 * Determine the number of objects per slab  	 */ -	s->objects = (PAGE_SIZE << s->order) / size; +	s->oo = oo_make(order, size); +	s->min = oo_make(get_order(size), size); +	if (oo_objects(s->oo) > oo_objects(s->max)) +		s->max = s->oo; -	return !!s->objects; +	return !!oo_objects(s->oo);  } @@ -2304,7 +2328,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,  	s->align = align;  	s->flags = kmem_cache_flags(size, flags, name, ctor); -	if (!calculate_sizes(s)) +	if (!calculate_sizes(s, -1))  		goto error;  	s->refcount = 1; @@ -2321,7 +2345,7 @@ error:  	if (flags & SLAB_PANIC)  		panic("Cannot create slab %s size=%lu realsize=%u "  			"order=%u offset=%u flags=%lx\n", -			s->name, (unsigned long)size, s->size, s->order, +			s->name, (unsigned long)size, s->size, oo_order(s->oo),  			s->offset, flags);  	return 0;  } @@ -2367,26 +2391,52 @@ const char *kmem_cache_name(struct kmem_cache *s)  }  EXPORT_SYMBOL(kmem_cache_name); +static void list_slab_objects(struct kmem_cache *s, struct page *page, +							const char *text) +{ +#ifdef CONFIG_SLUB_DEBUG +	void *addr = page_address(page); +	void *p; +	DECLARE_BITMAP(map, page->objects); + +	bitmap_zero(map, page->objects); +	slab_err(s, page, "%s", text); +	slab_lock(page); +	for_each_free_object(p, s, page->freelist) +		set_bit(slab_index(p, s, addr), map); + +	for_each_object(p, s, addr, page->objects) { + +		if (!test_bit(slab_index(p, s, addr), map)) { +			printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n", +							p, p - addr); +			print_tracking(s, p); +		} +	} +	slab_unlock(page); +#endif +} +  /* - * Attempt to free all slabs on a node. Return the number of slabs we - * were unable to free. + * Attempt to free all partial slabs on a node.   */ -static int free_list(struct kmem_cache *s, struct kmem_cache_node *n, -			struct list_head *list) +static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)  { -	int slabs_inuse = 0;  	unsigned long flags;  	struct page *page, *h;  	spin_lock_irqsave(&n->list_lock, flags); -	list_for_each_entry_safe(page, h, list, lru) +	list_for_each_entry_safe(page, h, &n->partial, lru) {  		if (!page->inuse) {  			list_del(&page->lru);  			discard_slab(s, page); -		} else -			slabs_inuse++; +			n->nr_partial--; +		} else { +			list_slab_objects(s, page, +				"Objects remaining on kmem_cache_close()"); +		} +	}  	spin_unlock_irqrestore(&n->list_lock, flags); -	return slabs_inuse;  }  /* @@ -2403,8 +2453,8 @@ static inline int kmem_cache_close(struct kmem_cache *s)  	for_each_node_state(node, N_NORMAL_MEMORY) {  		struct kmem_cache_node *n = get_node(s, node); -		n->nr_partial -= free_list(s, n, &n->partial); -		if (slabs_node(s, node)) +		free_partial(s, n); +		if (n->nr_partial || slabs_node(s, node))  			return 1;  	}  	free_kmem_cache_nodes(s); @@ -2422,8 +2472,11 @@ void kmem_cache_destroy(struct kmem_cache *s)  	if (!s->refcount) {  		list_del(&s->list);  		up_write(&slub_lock); -		if (kmem_cache_close(s)) -			WARN_ON(1); +		if (kmem_cache_close(s)) { +			printk(KERN_ERR "SLUB %s: %s called for cache that " +				"still has objects.\n", s->name, __func__); +			dump_stack(); +		}  		sysfs_slab_remove(s);  	} else  		up_write(&slub_lock); @@ -2482,7 +2535,7 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s,  	down_write(&slub_lock);  	if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, -			flags | __KMALLOC_CACHE, NULL)) +								flags, NULL))  		goto panic;  	list_add(&s->list, &slab_caches); @@ -2730,8 +2783,9 @@ int kmem_cache_shrink(struct kmem_cache *s)  	struct kmem_cache_node *n;  	struct page *page;  	struct page *t; +	int objects = oo_objects(s->max);  	struct list_head *slabs_by_inuse = -		kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL); +		kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);  	unsigned long flags;  	if (!slabs_by_inuse) @@ -2744,7 +2798,7 @@ int kmem_cache_shrink(struct kmem_cache *s)  		if (!n->nr_partial)  			continue; -		for (i = 0; i < s->objects; i++) +		for (i = 0; i < objects; i++)  			INIT_LIST_HEAD(slabs_by_inuse + i);  		spin_lock_irqsave(&n->list_lock, flags); @@ -2776,7 +2830,7 @@ int kmem_cache_shrink(struct kmem_cache *s)  		 * Rebuild the partial list with the slabs filled up most  		 * first and the least used slabs at the end.  		 */ -		for (i = s->objects - 1; i >= 0; i--) +		for (i = objects - 1; i >= 0; i--)  			list_splice(slabs_by_inuse + i, n->partial.prev);  		spin_unlock_irqrestore(&n->list_lock, flags); @@ -2997,9 +3051,6 @@ static int slab_unmergeable(struct kmem_cache *s)  	if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))  		return 1; -	if ((s->flags & __PAGE_ALLOC_FALLBACK)) -		return 1; -  	if (s->ctor)  		return 1; @@ -3192,7 +3243,8 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,  }  #if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO) -static unsigned long count_partial(struct kmem_cache_node *n) +static unsigned long count_partial(struct kmem_cache_node *n, +					int (*get_count)(struct page *))  {  	unsigned long flags;  	unsigned long x = 0; @@ -3200,10 +3252,25 @@ static unsigned long count_partial(struct kmem_cache_node *n)  	spin_lock_irqsave(&n->list_lock, flags);  	list_for_each_entry(page, &n->partial, lru) -		x += page->inuse; +		x += get_count(page);  	spin_unlock_irqrestore(&n->list_lock, flags);  	return x;  } + +static int count_inuse(struct page *page) +{ +	return page->inuse; +} + +static int count_total(struct page *page) +{ +	return page->objects; +} + +static int count_free(struct page *page) +{ +	return page->objects - page->inuse; +}  #endif  #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) @@ -3218,7 +3285,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page,  		return 0;  	/* Now we know that a valid freelist exists */ -	bitmap_zero(map, s->objects); +	bitmap_zero(map, page->objects);  	for_each_free_object(p, s, page->freelist) {  		set_bit(slab_index(p, s, addr), map); @@ -3226,7 +3293,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page,  			return 0;  	} -	for_each_object(p, s, addr) +	for_each_object(p, s, addr, page->objects)  		if (!test_bit(slab_index(p, s, addr), map))  			if (!check_object(s, page, p, 1))  				return 0; @@ -3292,7 +3359,7 @@ static long validate_slab_cache(struct kmem_cache *s)  {  	int node;  	unsigned long count = 0; -	unsigned long *map = kmalloc(BITS_TO_LONGS(s->objects) * +	unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *  				sizeof(unsigned long), GFP_KERNEL);  	if (!map) @@ -3495,14 +3562,14 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s,  		struct page *page, enum track_item alloc)  {  	void *addr = page_address(page); -	DECLARE_BITMAP(map, s->objects); +	DECLARE_BITMAP(map, page->objects);  	void *p; -	bitmap_zero(map, s->objects); +	bitmap_zero(map, page->objects);  	for_each_free_object(p, s, page->freelist)  		set_bit(slab_index(p, s, addr), map); -	for_each_object(p, s, addr) +	for_each_object(p, s, addr, page->objects)  		if (!test_bit(slab_index(p, s, addr), map))  			add_location(t, s, get_track(s, p, alloc));  } @@ -3592,22 +3659,23 @@ static int list_locations(struct kmem_cache *s, char *buf,  }  enum slab_stat_type { -	SL_FULL, -	SL_PARTIAL, -	SL_CPU, -	SL_OBJECTS +	SL_ALL,			/* All slabs */ +	SL_PARTIAL,		/* Only partially allocated slabs */ +	SL_CPU,			/* Only slabs used for cpu caches */ +	SL_OBJECTS,		/* Determine allocated objects not slabs */ +	SL_TOTAL		/* Determine object capacity not slabs */  }; -#define SO_FULL		(1 << SL_FULL) +#define SO_ALL		(1 << SL_ALL)  #define SO_PARTIAL	(1 << SL_PARTIAL)  #define SO_CPU		(1 << SL_CPU)  #define SO_OBJECTS	(1 << SL_OBJECTS) +#define SO_TOTAL	(1 << SL_TOTAL)  static ssize_t show_slab_objects(struct kmem_cache *s,  			    char *buf, unsigned long flags)  {  	unsigned long total = 0; -	int cpu;  	int node;  	int x;  	unsigned long *nodes; @@ -3618,56 +3686,60 @@ static ssize_t show_slab_objects(struct kmem_cache *s,  		return -ENOMEM;  	per_cpu = nodes + nr_node_ids; -	for_each_possible_cpu(cpu) { -		struct page *page; -		struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); +	if (flags & SO_CPU) { +		int cpu; -		if (!c) -			continue; +		for_each_possible_cpu(cpu) { +			struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); -		page = c->page; -		node = c->node; -		if (node < 0) -			continue; -		if (page) { -			if (flags & SO_CPU) { -				if (flags & SO_OBJECTS) -					x = page->inuse; +			if (!c || c->node < 0) +				continue; + +			if (c->page) { +					if (flags & SO_TOTAL) +						x = c->page->objects; +				else if (flags & SO_OBJECTS) +					x = c->page->inuse;  				else  					x = 1; +  				total += x; -				nodes[node] += x; +				nodes[c->node] += x;  			} -			per_cpu[node]++; +			per_cpu[c->node]++;  		}  	} -	for_each_node_state(node, N_NORMAL_MEMORY) { -		struct kmem_cache_node *n = get_node(s, node); +	if (flags & SO_ALL) { +		for_each_node_state(node, N_NORMAL_MEMORY) { +			struct kmem_cache_node *n = get_node(s, node); + +		if (flags & SO_TOTAL) +			x = atomic_long_read(&n->total_objects); +		else if (flags & SO_OBJECTS) +			x = atomic_long_read(&n->total_objects) - +				count_partial(n, count_free); -		if (flags & SO_PARTIAL) { -			if (flags & SO_OBJECTS) -				x = count_partial(n);  			else -				x = n->nr_partial; +				x = atomic_long_read(&n->nr_slabs);  			total += x;  			nodes[node] += x;  		} -		if (flags & SO_FULL) { -			int full_slabs = atomic_long_read(&n->nr_slabs) -					- per_cpu[node] -					- n->nr_partial; +	} else if (flags & SO_PARTIAL) { +		for_each_node_state(node, N_NORMAL_MEMORY) { +			struct kmem_cache_node *n = get_node(s, node); -			if (flags & SO_OBJECTS) -				x = full_slabs * s->objects; +			if (flags & SO_TOTAL) +				x = count_partial(n, count_total); +			else if (flags & SO_OBJECTS) +				x = count_partial(n, count_inuse);  			else -				x = full_slabs; +				x = n->nr_partial;  			total += x;  			nodes[node] += x;  		}  	} -  	x = sprintf(buf, "%lu", total);  #ifdef CONFIG_NUMA  	for_each_node_state(node, N_NORMAL_MEMORY) @@ -3682,14 +3754,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,  static int any_slab_objects(struct kmem_cache *s)  {  	int node; -	int cpu; - -	for_each_possible_cpu(cpu) { -		struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); - -		if (c && c->page) -			return 1; -	}  	for_each_online_node(node) {  		struct kmem_cache_node *n = get_node(s, node); @@ -3697,7 +3761,7 @@ static int any_slab_objects(struct kmem_cache *s)  		if (!n)  			continue; -		if (n->nr_partial || atomic_long_read(&n->nr_slabs)) +		if (atomic_read(&n->total_objects))  			return 1;  	}  	return 0; @@ -3739,15 +3803,27 @@ SLAB_ATTR_RO(object_size);  static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)  { -	return sprintf(buf, "%d\n", s->objects); +	return sprintf(buf, "%d\n", oo_objects(s->oo));  }  SLAB_ATTR_RO(objs_per_slab); +static ssize_t order_store(struct kmem_cache *s, +				const char *buf, size_t length) +{ +	int order = simple_strtoul(buf, NULL, 10); + +	if (order > slub_max_order || order < slub_min_order) +		return -EINVAL; + +	calculate_sizes(s, order); +	return length; +} +  static ssize_t order_show(struct kmem_cache *s, char *buf)  { -	return sprintf(buf, "%d\n", s->order); +	return sprintf(buf, "%d\n", oo_order(s->oo));  } -SLAB_ATTR_RO(order); +SLAB_ATTR(order);  static ssize_t ctor_show(struct kmem_cache *s, char *buf)  { @@ -3768,7 +3844,7 @@ SLAB_ATTR_RO(aliases);  static ssize_t slabs_show(struct kmem_cache *s, char *buf)  { -	return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); +	return show_slab_objects(s, buf, SO_ALL);  }  SLAB_ATTR_RO(slabs); @@ -3786,10 +3862,22 @@ SLAB_ATTR_RO(cpu_slabs);  static ssize_t objects_show(struct kmem_cache *s, char *buf)  { -	return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); +	return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);  }  SLAB_ATTR_RO(objects); +static ssize_t objects_partial_show(struct kmem_cache *s, char *buf) +{ +	return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS); +} +SLAB_ATTR_RO(objects_partial); + +static ssize_t total_objects_show(struct kmem_cache *s, char *buf) +{ +	return show_slab_objects(s, buf, SO_ALL|SO_TOTAL); +} +SLAB_ATTR_RO(total_objects); +  static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)  {  	return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); @@ -3869,7 +3957,7 @@ static ssize_t red_zone_store(struct kmem_cache *s,  	s->flags &= ~SLAB_RED_ZONE;  	if (buf[0] == '1')  		s->flags |= SLAB_RED_ZONE; -	calculate_sizes(s); +	calculate_sizes(s, -1);  	return length;  }  SLAB_ATTR(red_zone); @@ -3888,7 +3976,7 @@ static ssize_t poison_store(struct kmem_cache *s,  	s->flags &= ~SLAB_POISON;  	if (buf[0] == '1')  		s->flags |= SLAB_POISON; -	calculate_sizes(s); +	calculate_sizes(s, -1);  	return length;  }  SLAB_ATTR(poison); @@ -3907,7 +3995,7 @@ static ssize_t store_user_store(struct kmem_cache *s,  	s->flags &= ~SLAB_STORE_USER;  	if (buf[0] == '1')  		s->flags |= SLAB_STORE_USER; -	calculate_sizes(s); +	calculate_sizes(s, -1);  	return length;  }  SLAB_ATTR(store_user); @@ -4038,7 +4126,7 @@ STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);  STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);  STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);  STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); - +STAT_ATTR(ORDER_FALLBACK, order_fallback);  #endif  static struct attribute *slab_attrs[] = { @@ -4047,6 +4135,8 @@ static struct attribute *slab_attrs[] = {  	&objs_per_slab_attr.attr,  	&order_attr.attr,  	&objects_attr.attr, +	&objects_partial_attr.attr, +	&total_objects_attr.attr,  	&slabs_attr.attr,  	&partial_attr.attr,  	&cpu_slabs_attr.attr, @@ -4089,6 +4179,7 @@ static struct attribute *slab_attrs[] = {  	&deactivate_to_head_attr.attr,  	&deactivate_to_tail_attr.attr,  	&deactivate_remote_frees_attr.attr, +	&order_fallback_attr.attr,  #endif  	NULL  }; @@ -4375,7 +4466,8 @@ static int s_show(struct seq_file *m, void *p)  	unsigned long nr_partials = 0;  	unsigned long nr_slabs = 0;  	unsigned long nr_inuse = 0; -	unsigned long nr_objs; +	unsigned long nr_objs = 0; +	unsigned long nr_free = 0;  	struct kmem_cache *s;  	int node; @@ -4389,14 +4481,15 @@ static int s_show(struct seq_file *m, void *p)  		nr_partials += n->nr_partial;  		nr_slabs += atomic_long_read(&n->nr_slabs); -		nr_inuse += count_partial(n); +		nr_objs += atomic_long_read(&n->total_objects); +		nr_free += count_partial(n, count_free);  	} -	nr_objs = nr_slabs * s->objects; -	nr_inuse += (nr_slabs - nr_partials) * s->objects; +	nr_inuse = nr_objs - nr_free;  	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, -		   nr_objs, s->size, s->objects, (1 << s->order)); +		   nr_objs, s->size, oo_objects(s->oo), +		   (1 << oo_order(s->oo)));  	seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);  	seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,  		   0UL); | 
