diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 117 |
1 files changed, 69 insertions, 48 deletions
diff --git a/mm/slub.c b/mm/slub.c index 8b8508adf9c2..06cdb1829dc9 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2007,6 +2007,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) int pages; int pobjects; + preempt_disable(); do { pages = 0; pobjects = 0; @@ -2040,6 +2041,14 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); + if (unlikely(!s->cpu_partial)) { + unsigned long flags; + + local_irq_save(flags); + unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); + local_irq_restore(flags); + } + preempt_enable(); #endif } @@ -3358,69 +3367,92 @@ void kfree(const void *x) } EXPORT_SYMBOL(kfree); +#define SHRINK_PROMOTE_MAX 32 + /* - * kmem_cache_shrink removes empty slabs from the partial lists and sorts - * the remaining slabs by the number of items in use. The slabs with the - * most items in use come first. New allocations will then fill those up - * and thus they can be removed from the partial lists. + * kmem_cache_shrink discards empty slabs and promotes the slabs filled + * up most to the head of the partial lists. New allocations will then + * fill those up and thus they can be removed from the partial lists. * * The slabs with the least items are placed last. This results in them * being allocated from last increasing the chance that the last objects * are freed in them. */ -int __kmem_cache_shrink(struct kmem_cache *s) +int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) { int node; int i; struct kmem_cache_node *n; struct page *page; struct page *t; - int objects = oo_objects(s->max); - struct list_head *slabs_by_inuse = - kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL); + struct list_head discard; + struct list_head promote[SHRINK_PROMOTE_MAX]; unsigned long flags; + int ret = 0; - if (!slabs_by_inuse) - return -ENOMEM; + if (deactivate) { + /* + * Disable empty slabs caching. Used to avoid pinning offline + * memory cgroups by kmem pages that can be freed. + */ + s->cpu_partial = 0; + s->min_partial = 0; + + /* + * s->cpu_partial is checked locklessly (see put_cpu_partial), + * so we have to make sure the change is visible. + */ + kick_all_cpus_sync(); + } flush_all(s); for_each_kmem_cache_node(s, node, n) { - if (!n->nr_partial) - continue; - - for (i = 0; i < objects; i++) - INIT_LIST_HEAD(slabs_by_inuse + i); + INIT_LIST_HEAD(&discard); + for (i = 0; i < SHRINK_PROMOTE_MAX; i++) + INIT_LIST_HEAD(promote + i); spin_lock_irqsave(&n->list_lock, flags); /* - * Build lists indexed by the items in use in each slab. + * Build lists of slabs to discard or promote. * * Note that concurrent frees may occur while we hold the * list_lock. page->inuse here is the upper limit. */ list_for_each_entry_safe(page, t, &n->partial, lru) { - list_move(&page->lru, slabs_by_inuse + page->inuse); - if (!page->inuse) + int free = page->objects - page->inuse; + + /* Do not reread page->inuse */ + barrier(); + + /* We do not keep full slabs on the list */ + BUG_ON(free <= 0); + + if (free == page->objects) { + list_move(&page->lru, &discard); n->nr_partial--; + } else if (free <= SHRINK_PROMOTE_MAX) + list_move(&page->lru, promote + free - 1); } /* - * Rebuild the partial list with the slabs filled up most - * first and the least used slabs at the end. + * Promote the slabs filled up most to the head of the + * partial list. */ - for (i = objects - 1; i > 0; i--) - list_splice(slabs_by_inuse + i, n->partial.prev); + for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) + list_splice(promote + i, &n->partial); spin_unlock_irqrestore(&n->list_lock, flags); /* Release empty slabs */ - list_for_each_entry_safe(page, t, slabs_by_inuse, lru) + list_for_each_entry_safe(page, t, &discard, lru) discard_slab(s, page); + + if (slabs_node(s, node)) + ret = 1; } - kfree(slabs_by_inuse); - return 0; + return ret; } static int slab_mem_going_offline_callback(void *arg) @@ -3429,7 +3461,7 @@ static int slab_mem_going_offline_callback(void *arg) mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_caches, list) - __kmem_cache_shrink(s); + __kmem_cache_shrink(s, false); mutex_unlock(&slab_mutex); return 0; @@ -3577,6 +3609,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache) p->slab_cache = s; #endif } + slab_init_memcg_params(s); list_add(&s->list, &slab_caches); return s; } @@ -3635,13 +3668,10 @@ struct kmem_cache * __kmem_cache_alias(const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void *)) { - struct kmem_cache *s; + struct kmem_cache *s, *c; s = find_mergeable(size, align, flags, name, ctor); if (s) { - int i; - struct kmem_cache *c; - s->refcount++; /* @@ -3651,10 +3681,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align, s->object_size = max(s->object_size, (int)size); s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); - for_each_memcg_cache_index(i) { - c = cache_from_memcg_idx(s, i); - if (!c) - continue; + for_each_memcg_cache(c, s) { c->object_size = s->object_size; c->inuse = max_t(int, c->inuse, ALIGN(size, sizeof(void *))); @@ -4691,12 +4718,9 @@ static ssize_t shrink_show(struct kmem_cache *s, char *buf) static ssize_t shrink_store(struct kmem_cache *s, const char *buf, size_t length) { - if (buf[0] == '1') { - int rc = kmem_cache_shrink(s); - - if (rc) - return rc; - } else + if (buf[0] == '1') + kmem_cache_shrink(s); + else return -EINVAL; return length; } @@ -4920,7 +4944,7 @@ static ssize_t slab_attr_store(struct kobject *kobj, err = attribute->store(s, buf, len); #ifdef CONFIG_MEMCG_KMEM if (slab_state >= FULL && err >= 0 && is_root_cache(s)) { - int i; + struct kmem_cache *c; mutex_lock(&slab_mutex); if (s->max_attr_size < len) @@ -4943,11 +4967,8 @@ static ssize_t slab_attr_store(struct kobject *kobj, * directly either failed or succeeded, in which case we loop * through the descendants with best-effort propagation. */ - for_each_memcg_cache_index(i) { - struct kmem_cache *c = cache_from_memcg_idx(s, i); - if (c) - attribute->store(c, buf, len); - } + for_each_memcg_cache(c, s) + attribute->store(c, buf, len); mutex_unlock(&slab_mutex); } #endif @@ -4964,7 +4985,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) if (is_root_cache(s)) return; - root_cache = s->memcg_params->root_cache; + root_cache = s->memcg_params.root_cache; /* * This mean this cache had no attribute written. Therefore, no point @@ -5044,7 +5065,7 @@ static inline struct kset *cache_kset(struct kmem_cache *s) { #ifdef CONFIG_MEMCG_KMEM if (!is_root_cache(s)) - return s->memcg_params->root_cache->memcg_kset; + return s->memcg_params.root_cache->memcg_kset; #endif return slab_kset; } |