summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig9
-rw-r--r--mm/Kconfig.debug2
-rw-r--r--mm/debug.c6
-rw-r--r--mm/filemap.c161
-rw-r--r--mm/huge_memory.c16
-rw-r--r--mm/khugepaged.c25
-rw-r--r--mm/ksm.c3
-rw-r--r--mm/memcontrol.c67
-rw-r--r--mm/memory.c14
-rw-r--r--mm/memory_hotplug.c8
-rw-r--r--mm/mempolicy.c17
-rw-r--r--mm/mmap.c13
-rw-r--r--mm/page-writeback.c26
-rw-r--r--mm/page_alloc.c55
-rw-r--r--mm/page_io.c3
-rw-r--r--mm/readahead.c9
-rw-r--r--mm/shmem.c5
-rw-r--r--mm/slab.c114
-rw-r--r--mm/slub.c65
-rw-r--r--mm/swapfile.c1
-rw-r--r--mm/usercopy.c68
-rw-r--r--mm/vmscan.c41
-rw-r--r--mm/workingset.c10
23 files changed, 359 insertions, 379 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 78a23c5c302d..be0ee11fa0d9 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -262,7 +262,14 @@ config COMPACTION
select MIGRATION
depends on MMU
help
- Allows the compaction of memory for the allocation of huge pages.
+ Compaction is the only memory management component to form
+ high order (larger physically contiguous) memory blocks
+ reliably. The page allocator relies on compaction heavily and
+ the lack of the feature can lead to unexpected OOM killer
+ invocations for high order memory requests. You shouldn't
+ disable this option unless there really is a strong reason for
+ it and then we would be really interested to hear about that at
+ linux-mm@kvack.org.
#
# support for page migration
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 22f4cd96acb0..afcc550877ff 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -76,8 +76,6 @@ config PAGE_POISONING_ZERO
no longer necessary to write zeros when GFP_ZERO is used on
allocation.
- Enabling page poisoning with this option will disable hibernation
-
If unsure, say N
bool
diff --git a/mm/debug.c b/mm/debug.c
index 8865bfb41b0b..74c7cae4f683 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -42,9 +42,11 @@ const struct trace_print_flags vmaflag_names[] = {
void __dump_page(struct page *page, const char *reason)
{
+ int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
+
pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx",
- page, page_ref_count(page), page_mapcount(page),
- page->mapping, page->index);
+ page, page_ref_count(page), mapcount,
+ page->mapping, page_to_pgoff(page));
if (PageCompound(page))
pr_cont(" compound_mapcount: %d", compound_mapcount(page));
pr_cont("\n");
diff --git a/mm/filemap.c b/mm/filemap.c
index 2f1175e86c77..68f1813fbdc3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -110,36 +110,94 @@
* ->tasklist_lock (memory_failure, collect_procs_ao)
*/
+static int page_cache_tree_insert(struct address_space *mapping,
+ struct page *page, void **shadowp)
+{
+ struct radix_tree_node *node;
+ void **slot;
+ int error;
+
+ error = __radix_tree_create(&mapping->page_tree, page->index, 0,
+ &node, &slot);
+ if (error)
+ return error;
+ if (*slot) {
+ void *p;
+
+ p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+ if (!radix_tree_exceptional_entry(p))
+ return -EEXIST;
+
+ mapping->nrexceptional--;
+ if (!dax_mapping(mapping)) {
+ if (shadowp)
+ *shadowp = p;
+ if (node)
+ workingset_node_shadows_dec(node);
+ } else {
+ /* DAX can replace empty locked entry with a hole */
+ WARN_ON_ONCE(p !=
+ (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
+ RADIX_DAX_ENTRY_LOCK));
+ /* DAX accounts exceptional entries as normal pages */
+ if (node)
+ workingset_node_pages_dec(node);
+ /* Wakeup waiters for exceptional entry lock */
+ dax_wake_mapping_entry_waiter(mapping, page->index,
+ false);
+ }
+ }
+ radix_tree_replace_slot(slot, page);
+ mapping->nrpages++;
+ if (node) {
+ workingset_node_pages_inc(node);
+ /*
+ * Don't track node that contains actual pages.
+ *
+ * Avoid acquiring the list_lru lock if already
+ * untracked. The list_empty() test is safe as
+ * node->private_list is protected by
+ * mapping->tree_lock.
+ */
+ if (!list_empty(&node->private_list))
+ list_lru_del(&workingset_shadow_nodes,
+ &node->private_list);
+ }
+ return 0;
+}
+
static void page_cache_tree_delete(struct address_space *mapping,
struct page *page, void *shadow)
{
- struct radix_tree_node *node;
int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageTail(page), page);
VM_BUG_ON_PAGE(nr != 1 && shadow, page);
- if (shadow) {
- mapping->nrexceptional += nr;
- /*
- * Make sure the nrexceptional update is committed before
- * the nrpages update so that final truncate racing
- * with reclaim does not see both counters 0 at the
- * same time and miss a shadow entry.
- */
- smp_wmb();
- }
- mapping->nrpages -= nr;
-
for (i = 0; i < nr; i++) {
- node = radix_tree_replace_clear_tags(&mapping->page_tree,
- page->index + i, shadow);
+ struct radix_tree_node *node;
+ void **slot;
+
+ __radix_tree_lookup(&mapping->page_tree, page->index + i,
+ &node, &slot);
+
+ radix_tree_clear_tags(&mapping->page_tree, node, slot);
+
if (!node) {
VM_BUG_ON_PAGE(nr != 1, page);
- return;
+ /*
+ * We need a node to properly account shadow
+ * entries. Don't plant any without. XXX
+ */
+ shadow = NULL;
}
+ radix_tree_replace_slot(slot, shadow);
+
+ if (!node)
+ break;
+
workingset_node_pages_dec(node);
if (shadow)
workingset_node_shadows_inc(node);
@@ -163,6 +221,18 @@ static void page_cache_tree_delete(struct address_space *mapping,
&node->private_list);
}
}
+
+ if (shadow) {
+ mapping->nrexceptional += nr;
+ /*
+ * Make sure the nrexceptional update is committed before
+ * the nrpages update so that final truncate racing
+ * with reclaim does not see both counters 0 at the
+ * same time and miss a shadow entry.
+ */
+ smp_wmb();
+ }
+ mapping->nrpages -= nr;
}
/*
@@ -561,9 +631,8 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
spin_lock_irqsave(&mapping->tree_lock, flags);
__delete_from_page_cache(old, NULL);
- error = radix_tree_insert(&mapping->page_tree, offset, new);
+ error = page_cache_tree_insert(mapping, new, NULL);
BUG_ON(error);
- mapping->nrpages++;
/*
* hugetlb pages do not participate in page cache accounting.
@@ -584,62 +653,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
}
EXPORT_SYMBOL_GPL(replace_page_cache_page);
-static int page_cache_tree_insert(struct address_space *mapping,
- struct page *page, void **shadowp)
-{
- struct radix_tree_node *node;
- void **slot;
- int error;
-
- error = __radix_tree_create(&mapping->page_tree, page->index, 0,
- &node, &slot);
- if (error)
- return error;
- if (*slot) {
- void *p;
-
- p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
- if (!radix_tree_exceptional_entry(p))
- return -EEXIST;
-
- mapping->nrexceptional--;
- if (!dax_mapping(mapping)) {
- if (shadowp)
- *shadowp = p;
- if (node)
- workingset_node_shadows_dec(node);
- } else {
- /* DAX can replace empty locked entry with a hole */
- WARN_ON_ONCE(p !=
- (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
- RADIX_DAX_ENTRY_LOCK));
- /* DAX accounts exceptional entries as normal pages */
- if (node)
- workingset_node_pages_dec(node);
- /* Wakeup waiters for exceptional entry lock */
- dax_wake_mapping_entry_waiter(mapping, page->index,
- false);
- }
- }
- radix_tree_replace_slot(slot, page);
- mapping->nrpages++;
- if (node) {
- workingset_node_pages_inc(node);
- /*
- * Don't track node that contains actual pages.
- *
- * Avoid acquiring the list_lru lock if already
- * untracked. The list_empty() test is safe as
- * node->private_list is protected by
- * mapping->tree_lock.
- */
- if (!list_empty(&node->private_list))
- list_lru_del(&workingset_shadow_nodes,
- &node->private_list);
- }
- return 0;
-}
-
static int __add_to_page_cache_locked(struct page *page,
struct address_space *mapping,
pgoff_t offset, gfp_t gfp_mask,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2373f0a7d340..283583fcb1e7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1078,7 +1078,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
goto out;
page = pmd_page(*pmd);
- VM_BUG_ON_PAGE(!PageHead(page), page);
+ VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
if (flags & FOLL_TOUCH)
touch_pmd(vma, addr, pmd);
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
@@ -1116,7 +1116,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
}
skip_mlock:
page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
- VM_BUG_ON_PAGE(!PageCompound(page), page);
+ VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
if (flags & FOLL_GET)
get_page(page);
@@ -1138,9 +1138,6 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
bool was_writable;
int flags = 0;
- /* A PROT_NONE fault should not end up here */
- BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
-
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
if (unlikely(!pmd_same(pmd, *fe->pmd)))
goto out_unlock;
@@ -1168,7 +1165,7 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
}
/* See similar comment in do_numa_page for explanation */
- if (!(vma->vm_flags & VM_WRITE))
+ if (!pmd_write(pmd))
flags |= TNF_NO_GROUP;
/*
@@ -1512,7 +1509,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
struct page *page;
pgtable_t pgtable;
pmd_t _pmd;
- bool young, write, dirty;
+ bool young, write, dirty, soft_dirty;
unsigned long addr;
int i;
@@ -1546,6 +1543,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
write = pmd_write(*pmd);
young = pmd_young(*pmd);
dirty = pmd_dirty(*pmd);
+ soft_dirty = pmd_soft_dirty(*pmd);
pmdp_huge_split_prepare(vma, haddr, pmd);
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
@@ -1562,6 +1560,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
swp_entry_t swp_entry;
swp_entry = make_migration_entry(page + i, write);
entry = swp_entry_to_pte(swp_entry);
+ if (soft_dirty)
+ entry = pte_swp_mksoft_dirty(entry);
} else {
entry = mk_pte(page + i, vma->vm_page_prot);
entry = maybe_mkwrite(entry, vma);
@@ -1569,6 +1569,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
entry = pte_wrprotect(entry);
if (!young)
entry = pte_mkold(entry);
+ if (soft_dirty)
+ entry = pte_mksoft_dirty(entry);
}
if (dirty)
SetPageDirty(page + i);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 79c52d0061af..728d7790dc2d 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -838,7 +838,8 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
* value (scan code).
*/
-static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
+static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
+ struct vm_area_struct **vmap)
{
struct vm_area_struct *vma;
unsigned long hstart, hend;
@@ -846,7 +847,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
if (unlikely(khugepaged_test_exit(mm)))
return SCAN_ANY_PROCESS;
- vma = find_vma(mm, address);
+ *vmap = vma = find_vma(mm, address);
if (!vma)
return SCAN_VMA_NULL;
@@ -881,6 +882,11 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
.pmd = pmd,
};
+ /* we only decide to swapin, if there is enough young ptes */
+ if (referenced < HPAGE_PMD_NR/2) {
+ trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
+ return false;
+ }
fe.pte = pte_offset_map(pmd, address);
for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE;
fe.pte++, fe.address += PAGE_SIZE) {
@@ -888,17 +894,12 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
if (!is_swap_pte(pteval))
continue;
swapped_in++;
- /* we only decide to swapin, if there is enough young ptes */
- if (referenced < HPAGE_PMD_NR/2) {
- trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
- return false;
- }
ret = do_swap_page(&fe, pteval);
/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
if (ret & VM_FAULT_RETRY) {
down_read(&mm->mmap_sem);
- if (hugepage_vma_revalidate(mm, address)) {
+ if (hugepage_vma_revalidate(mm, address, &fe.vma)) {
/* vma is no longer available, don't continue to swapin */
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false;
@@ -923,7 +924,6 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
static void collapse_huge_page(struct mm_struct *mm,
unsigned long address,
struct page **hpage,
- struct vm_area_struct *vma,
int node, int referenced)
{
pmd_t *pmd, _pmd;
@@ -933,6 +933,7 @@ static void collapse_huge_page(struct mm_struct *mm,
spinlock_t *pmd_ptl, *pte_ptl;
int isolated = 0, result = 0;
struct mem_cgroup *memcg;
+ struct vm_area_struct *vma;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
gfp_t gfp;
@@ -961,7 +962,7 @@ static void collapse_huge_page(struct mm_struct *mm,
}
down_read(&mm->mmap_sem);
- result = hugepage_vma_revalidate(mm, address);
+ result = hugepage_vma_revalidate(mm, address, &vma);
if (result) {
mem_cgroup_cancel_charge(new_page, memcg, true);
up_read(&mm->mmap_sem);
@@ -994,7 +995,7 @@ static void collapse_huge_page(struct mm_struct *mm,
* handled by the anon_vma lock + PG_lock.
*/
down_write(&mm->mmap_sem);
- result = hugepage_vma_revalidate(mm, address);
+ result = hugepage_vma_revalidate(mm, address, &vma);
if (result)
goto out;
/* check if the pmd is still valid */
@@ -1202,7 +1203,7 @@ out_unmap:
if (ret) {
node = khugepaged_find_target_node();
/* collapse_huge_page will return with the mmap_sem released */
- collapse_huge_page(mm, address, hpage, vma, node, referenced);
+ collapse_huge_page(mm, address, hpage, node, referenced);
}
out:
trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
diff --git a/mm/ksm.c b/mm/ksm.c
index 73d43bafd9fb..5048083b60f2 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -283,7 +283,8 @@ static inline struct rmap_item *alloc_rmap_item(void)
{
struct rmap_item *rmap_item;
- rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
+ rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
+ __GFP_NORETRY | __GFP_NOWARN);
if (rmap_item)
ksm_rmap_items++;
return rmap_item;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2ff0289ad061..4be518d4e68a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1740,17 +1740,22 @@ static DEFINE_MUTEX(percpu_charge_mutex);
static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
struct memcg_stock_pcp *stock;
+ unsigned long flags;
bool ret = false;
if (nr_pages > CHARGE_BATCH)
return ret;
- stock = &get_cpu_var(memcg_stock);
+ local_irq_save(flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
stock->nr_pages -= nr_pages;
ret = true;
}
- put_cpu_var(memcg_stock);
+
+ local_irq_restore(flags);
+
return ret;
}
@@ -1771,15 +1776,18 @@ static void drain_stock(struct memcg_stock_pcp *stock)
stock->cached = NULL;
}
-/*
- * This must be called under preempt disabled or must be called by
- * a thread which is pinned to local cpu.
- */
static void drain_local_stock(struct work_struct *dummy)
{
- struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
+ struct memcg_stock_pcp *stock;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
drain_stock(stock);
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
+
+ local_irq_restore(flags);
}
/*
@@ -1788,14 +1796,19 @@ static void drain_local_stock(struct work_struct *dummy)
*/
static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
- struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
+ struct memcg_stock_pcp *stock;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ stock = this_cpu_ptr(&memcg_stock);
if (stock->cached != memcg) { /* reset if necessary */
drain_stock(stock);
stock->cached = memcg;
}
stock->nr_pages += nr_pages;
- put_cpu_var(memcg_stock);
+
+ local_irq_restore(flags);
}
/*
@@ -4082,24 +4095,6 @@ static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
atomic_add(n, &memcg->id.ref);
}
-static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
-{
- while (!atomic_inc_not_zero(&memcg->id.ref)) {
- /*
- * The root cgroup cannot be destroyed, so it's refcount must
- * always be >= 1.
- */
- if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
- VM_BUG_ON(1);
- break;
- }
- memcg = parent_mem_cgroup(memcg);
- if (!memcg)
- memcg = root_mem_cgroup;
- }
- return memcg;
-}
-
static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
{
if (atomic_sub_and_test(n, &memcg->id.ref)) {
@@ -5821,6 +5816,24 @@ static int __init mem_cgroup_init(void)
subsys_initcall(mem_cgroup_init);
#ifdef CONFIG_MEMCG_SWAP
+static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
+{
+ while (!atomic_inc_not_zero(&memcg->id.ref)) {
+ /*
+ * The root cgroup cannot be destroyed, so it's refcount must
+ * always be >= 1.
+ */
+ if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
+ VM_BUG_ON(1);
+ break;
+ }
+ memcg = parent_mem_cgroup(memcg);
+ if (!memcg)
+ memcg = root_mem_cgroup;
+ }
+ return memcg;
+}
+
/**
* mem_cgroup_swapout - transfer a memsw charge to swap
* @page: page whose memsw charge to transfer
diff --git a/mm/memory.c b/mm/memory.c
index 83be99d9d8a1..f1a68049edff 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3351,9 +3351,6 @@ static int do_numa_page(struct fault_env *fe, pte_t pte)
bool was_writable = pte_write(pte);
int flags = 0;
- /* A PROT_NONE fault should not end up here */
- BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
-
/*
* The "pte" at this point cannot be used safely without
* validation through pte_unmap_same(). It's of NUMA type but
@@ -3398,7 +3395,7 @@ static int do_numa_page(struct fault_env *fe, pte_t pte)
* pte_dirty has unpredictable behaviour between PTE scan updates,
* background writeback, dirty balancing and application behaviour.
*/
- if (!(vma->vm_flags & VM_WRITE))
+ if (!pte_write(pte))
flags |= TNF_NO_GROUP;
/*
@@ -3458,6 +3455,11 @@ static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd)
return VM_FAULT_FALLBACK;
}
+static inline bool vma_is_accessible(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE);
+}
+
/*
* These routines also need to handle stuff like marking pages dirty
* and/or accessed for architectures that don't do it in hardware (most
@@ -3524,7 +3526,7 @@ static int handle_pte_fault(struct fault_env *fe)
if (!pte_present(entry))
return do_swap_page(fe, entry);
- if (pte_protnone(entry))
+ if (pte_protnone(entry) && vma_is_accessible(fe->vma))
return do_numa_page(fe, entry);
fe->ptl = pte_lockptr(fe->vma->vm_mm, fe->pmd);
@@ -3590,7 +3592,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
barrier();
if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
- if (pmd_protnone(orig_pmd))
+ if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
return do_huge_pmd_numa_page(&fe, orig_pmd);
if ((fe.flags & FAULT_FLAG_WRITE) &&
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 41266dc29f33..9d29ba0f7192 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1555,8 +1555,8 @@ static struct page *new_node_page(struct page *page, unsigned long private,
{
gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
int nid = page_to_nid(page);
- nodemask_t nmask = node_online_map;
- struct page *new_page;
+ nodemask_t nmask = node_states[N_MEMORY];
+ struct page *new_page = NULL;
/*
* TODO: allocate a destination hugepage from a nearest neighbor node,
@@ -1568,11 +1568,13 @@ static struct page *new_node_page(struct page *page, unsigned long private,
next_node_in(nid, nmask));
node_clear(nid, nmask);
+
if (PageHighMem(page)
|| (zone_idx(page_zone(page)) == ZONE_MOVABLE))
gfp_mask |= __GFP_HIGHMEM;
- new_page = __alloc_pages_nodemask(gfp_mask, 0,
+ if (!nodes_empty(nmask))
+ new_page = __alloc_pages_nodemask(gfp_mask, 0,
node_zonelist(nid, gfp_mask), &nmask);
if (!new_page)
new_page = __alloc_pages(gfp_mask, 0,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d8c4e38fb5f4..2da72a5b6ecc 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2336,6 +2336,23 @@ out:
return ret;
}
+/*
+ * Drop the (possibly final) reference to task->mempolicy. It needs to be
+ * dropped after task->mempolicy is set to NULL so that any allocation done as
+ * part of its kmem_cache_free(), such as by KASAN, doesn't reference a freed
+ * policy.
+ */
+void mpol_put_task_policy(struct task_struct *task)
+{
+ struct mempolicy *pol;
+
+ task_lock(task);
+ pol = task->mempolicy;
+ task->mempolicy = NULL;
+ task_unlock(task);
+ mpol_put(pol);
+}
+
static void sp_delete(struct shared_policy *sp, struct sp_node *n)
{
pr_debug("deleting %lx-l%lx\n", n->start, n->end);
diff --git a/mm/mmap.c b/mm/mmap.c
index ca9d91bca0d6..7a0707a48047 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -88,6 +88,11 @@ static void unmap_region(struct mm_struct *mm,
* w: (no) no w: (no) no w: (copy) copy w: (no) no
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
*
+ * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
+ * MAP_PRIVATE:
+ * r: (no) no
+ * w: (no) no
+ * x: (yes) yes
*/
pgprot_t protection_map[16] = {
__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
@@ -3063,6 +3068,14 @@ out:
return ERR_PTR(ret);
}
+bool vma_is_special_mapping(const struct vm_area_struct *vma,
+ const struct vm_special_mapping *sm)
+{
+ return vma->vm_private_data == sm &&
+ (vma->vm_ops == &special_mapping_vmops ||
+ vma->vm_ops == &legacy_special_mapping_vmops);
+}
+
/*
* Called with mm->mmap_sem held for writing.
* Insert a new vma covering the given region, with the given flags.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f4cd7d8005c9..28d6f36a2d79 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2080,26 +2080,12 @@ void writeback_set_ratelimit(void)
ratelimit_pages = 16;
}
-static int
-ratelimit_handler(struct notifier_block *self, unsigned long action,
- void *hcpu)
+static int page_writeback_cpu_online(unsigned int cpu)
{
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- case CPU_DEAD:
- writeback_set_ratelimit();
- return NOTIFY_OK;
- default:
- return NOTIFY_DONE;
- }
+ writeback_set_ratelimit();
+ return 0;
}
-static struct notifier_block ratelimit_nb = {
- .notifier_call = ratelimit_handler,
- .next = NULL,
-};
-
/*
* Called early on to tune the page writeback dirty limits.
*
@@ -2122,8 +2108,10 @@ void __init page_writeback_init(void)
{
BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
- writeback_set_ratelimit();
- register_cpu_notifier(&ratelimit_nb);
+ cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
+ page_writeback_cpu_online, NULL);
+ cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
+ page_writeback_cpu_online);
}
/**
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3fbe73a6fe4b..a2214c64ed3c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3137,54 +3137,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
return NULL;
}
-static inline bool
-should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
- enum compact_result compact_result,
- enum compact_priority *compact_priority,
- int compaction_retries)
-{
- int max_retries = MAX_COMPACT_RETRIES;
-
- if (!order)
- return false;
-
- /*
- * compaction considers all the zone as desperately out of memory
- * so it doesn't really make much sense to retry except when the
- * failure could be caused by insufficient priority
- */
- if (compaction_failed(compact_result)) {
- if (*compact_priority > MIN_COMPACT_PRIORITY) {
- (*compact_priority)--;
- return true;
- }
- return false;
- }
-
- /*
- * make sure the compaction wasn't deferred or didn't bail out early
- * due to locks contention before we declare that we should give up.
- * But do not retry if the given zonelist is not suitable for
- * compaction.
- */
- if (compaction_withdrawn(compact_result))
- return compaction_zonelist_suitable(ac, order, alloc_flags);
-
- /*
- * !costly requests are much more important than __GFP_REPEAT
- * costly ones because they are de facto nofail and invoke OOM
- * killer to move on while costly can fail and users are ready
- * to cope with that. 1/4 retries is rather arbitrary but we
- * would need much more detailed feedback from compaction to
- * make a better decision.
- */
- if (order > PAGE_ALLOC_COSTLY_ORDER)
- max_retries /= 4;
- if (compaction_retries <= max_retries)
- return true;
-
- return false;
-}
#else
static inline struct page *
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
@@ -3195,6 +3147,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
return NULL;
}
+#endif /* CONFIG_COMPACTION */
+
static inline bool
should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
enum compact_result compact_result,
@@ -3221,7 +3175,6 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
}
return false;
}
-#endif /* CONFIG_COMPACTION */
/* Perform direct synchronous page reclaim */
static int
@@ -4407,7 +4360,7 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
do {
zone_type--;
zone = pgdat->node_zones + zone_type;
- if (populated_zone(zone)) {
+ if (managed_zone(zone)) {
zoneref_set_zone(zone,
&zonelist->_zonerefs[nr_zones++]);
check_highest_zone(zone_type);
@@ -4645,7 +4598,7 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
for (j = 0; j < nr_nodes; j++) {
node = node_order[j];
z = &NODE_DATA(node)->node_zones[zone_type];
- if (populated_zone(z)) {
+ if (managed_zone(z)) {
zoneref_set_zone(z,
&zonelist->_zonerefs[pos++]);
check_highest_zone(zone_type);
diff --git a/mm/page_io.c b/mm/page_io.c
index 16bd82fad38c..eafe5ddc2b54 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -264,6 +264,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
int ret;
struct swap_info_struct *sis = page_swap_info(page);
+ BUG_ON(!PageSwapCache(page));
if (sis->flags & SWP_FILE) {
struct kiocb kiocb;
struct file *swap_file = sis->swap_file;
@@ -337,6 +338,7 @@ int swap_readpage(struct page *page)
int ret = 0;
struct swap_info_struct *sis = page_swap_info(page);
+ BUG_ON(!PageSwapCache(page));
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageUptodate(page), page);
if (frontswap_load(page) == 0) {
@@ -386,6 +388,7 @@ int swap_set_page_dirty(struct page *page)
if (sis->flags & SWP_FILE) {
struct address_space *mapping = sis->swap_file->f_mapping;
+ BUG_ON(!PageSwapCache(page));
return mapping->a_ops->set_page_dirty(page);
} else {
return __set_page_dirty_no_writeback(page);
diff --git a/mm/readahead.c b/mm/readahead.c
index 65ec288dc057..c8a955b1297e 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -8,6 +8,7 @@
*/
#include <linux/kernel.h>
+#include <linux/dax.h>
#include <linux/gfp.h>
#include <linux/export.h>
#include <linux/blkdev.h>
@@ -544,6 +545,14 @@ do_readahead(struct address_space *mapping, struct file *filp,
if (!mapping || !mapping->a_ops)
return -EINVAL;
+ /*
+ * Readahead doesn't make sense for DAX inodes, but we don't want it
+ * to report a failure either. Instead, we just return success and
+ * don't do any work.
+ */
+ if (dax_mapping(mapping))
+ return 0;
+
return force_page_cache_readahead(mapping, filp, index, nr);
}
diff --git a/mm/shmem.c b/mm/shmem.c
index fd8b2b5741b1..971fc83e6402 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -270,7 +270,7 @@ bool shmem_charge(struct inode *inode, long pages)
info->alloced -= pages;
shmem_recalc_inode(inode);
spin_unlock_irqrestore(&info->lock, flags);
-
+ shmem_unacct_blocks(info->flags, pages);
return false;
}
percpu_counter_add(&sbinfo->used_blocks, pages);
@@ -291,6 +291,7 @@ void shmem_uncharge(struct inode *inode, long pages)
if (sbinfo->max_blocks)
percpu_counter_sub(&sbinfo->used_blocks, pages);
+ shmem_unacct_blocks(info->flags, pages);
}
/*
@@ -1980,7 +1981,7 @@ unsigned long shmem_get_unmapped_area(struct file *file,
return addr;
sb = shm_mnt->mnt_sb;
}
- if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
+ if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
return addr;
}
diff --git a/mm/slab.c b/mm/slab.c
index b67271024135..090fb26b3a39 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -886,6 +886,7 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
return 0;
}
+#if (defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)) || defined(CONFIG_SMP)
/*
* Allocates and initializes node for a node on each slab cache, used for
* either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
@@ -908,6 +909,7 @@ static int init_cache_node_node(int node)
return 0;
}
+#endif
static int setup_kmem_cache_node(struct kmem_cache *cachep,
int node, gfp_t gfp, bool force_change)
@@ -975,6 +977,8 @@ fail:
return ret;
}
+#ifdef CONFIG_SMP
+
static void cpuup_canceled(long cpu)
{
struct kmem_cache *cachep;
@@ -1075,65 +1079,54 @@ bad:
return -ENOMEM;
}
-static int cpuup_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+int slab_prepare_cpu(unsigned int cpu)
{
- long cpu = (long)hcpu;
- int err = 0;
+ int err;
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- mutex_lock(&slab_mutex);
- err = cpuup_prepare(cpu);
- mutex_unlock(&slab_mutex);
- break;
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- start_cpu_timer(cpu);
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- /*
- * Shutdown cache reaper. Note that the slab_mutex is
- * held so that if cache_reap() is invoked it cannot do
- * anything expensive but will only modify reap_work
- * and reschedule the timer.
- */
- cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
- /* Now the cache_reaper is guaranteed to be not running. */
- per_cpu(slab_reap_work, cpu).work.func = NULL;
- break;
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- start_cpu_timer(cpu);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- /*
- * Even if all the cpus of a node are down, we don't free the
- * kmem_cache_node of any cache. This to avoid a race between
- * cpu_down, and a kmalloc allocation from another cpu for
- * memory from the node of the cpu going down. The node
- * structure is usually allocated from kmem_cache_create() and
- * gets destroyed at kmem_cache_destroy().
- */
- /* fall through */
+ mutex_lock(&slab_mutex);
+ err = cpuup_prepare(cpu);
+ mutex_unlock(&slab_mutex);
+ return err;
+}
+
+/*
+ * This is called for a failed online attempt and for a successful
+ * offline.
+ *
+ * Even if all the cpus of a node are down, we don't free the
+ * kmem_list3 of any cache. This to avoid a race between cpu_down, and
+ * a kmalloc allocation from another cpu for memory from the node of
+ * the cpu going down. The list3 structure is usually allocated from
+ * kmem_cache_create() and gets destroyed at kmem_cache_destroy().
+ */
+int slab_dead_cpu(unsigned int cpu)
+{
+ mutex_lock(&slab_mutex);
+ cpuup_canceled(cpu);
+ mutex_unlock(&slab_mutex);
+ return 0;
+}
#endif
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- mutex_lock(&slab_mutex);
- cpuup_canceled(cpu);
- mutex_unlock(&slab_mutex);
- break;
- }
- return notifier_from_errno(err);
+
+static int slab_online_cpu(unsigned int cpu)
+{
+ start_cpu_timer(cpu);
+ return 0;
}
-static struct notifier_block cpucache_notifier = {
- &cpuup_callback, NULL, 0
-};
+static int slab_offline_cpu(unsigned int cpu)
+{
+ /*
+ * Shutdown cache reaper. Note that the slab_mutex is held so
+ * that if cache_reap() is invoked it cannot do anything
+ * expensive but will only modify reap_work and reschedule the
+ * timer.
+ */
+ cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
+ /* Now the cache_reaper is guaranteed to be not running. */
+ per_cpu(slab_reap_work, cpu).work.func = NULL;
+ return 0;
+}
#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
/*
@@ -1336,12 +1329,6 @@ void __init kmem_cache_init_late(void)
/* Done! */
slab_state = FULL;
- /*
- * Register a cpu startup notifier callback that initializes
- * cpu_cache_get for all new cpus
- */
- register_cpu_notifier(&cpucache_notifier);
-
#ifdef CONFIG_NUMA
/*
* Register a memory hotplug callback that initializes and frees
@@ -1358,13 +1345,14 @@ void __init kmem_cache_init_late(void)
static int __init cpucache_init(void)
{
- int cpu;
+ int ret;
/*
* Register the timers that return unneeded pages to the page allocator
*/
- for_each_online_cpu(cpu)
- start_cpu_timer(cpu);
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SLAB online",
+ slab_online_cpu, slab_offline_cpu);
+ WARN_ON(ret < 0);
/* Done! */
slab_state = FULL;
diff --git a/mm/slub.c b/mm/slub.c
index 9adae58462f8..2b3e740609e9 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -194,10 +194,6 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
#define __OBJECT_POISON 0x80000000UL /* Poison object */
#define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */
-#ifdef CONFIG_SMP
-static struct notifier_block slab_notifier;
-#endif
-
/*
* Tracking user of a slab.
*/
@@ -2305,6 +2301,25 @@ static void flush_all(struct kmem_cache *s)
}
/*
+ * Use the cpu notifier to insure that the cpu slabs are flushed when
+ * necessary.
+ */
+static int slub_cpu_dead(unsigned int cpu)
+{
+ struct kmem_cache *s;
+ unsigned long flags;
+
+ mutex_lock(&slab_mutex);
+ list_for_each_entry(s, &slab_caches, list) {
+ local_irq_save(flags);
+ __flush_cpu_slab(s, cpu);
+ local_irq_restore(flags);
+ }
+ mutex_unlock(&slab_mutex);
+ return 0;
+}
+
+/*
* Check if the objects in a per cpu structure fit numa
* locality expectations.
*/
@@ -4144,9 +4159,8 @@ void __init kmem_cache_init(void)
/* Setup random freelists for each cache */
init_freelist_randomization();
-#ifdef CONFIG_SMP
- register_cpu_notifier(&slab_notifier);
-#endif
+ cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
+ slub_cpu_dead);
pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%d, Nodes=%d\n",
cache_line_size(),
@@ -4210,43 +4224,6 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
return err;
}
-#ifdef CONFIG_SMP
-/*
- * Use the cpu notifier to insure that the cpu slabs are flushed when
- * necessary.
- */
-static int slab_cpuup_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- long cpu = (long)hcpu;
- struct kmem_cache *s;
- unsigned long flags;
-
- switch (action) {
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- mutex_lock(&slab_mutex);
- list_for_each_entry(s, &slab_caches, list) {
- local_irq_save(flags);
- __flush_cpu_slab(s, cpu);
- local_irq_restore(flags);
- }
- mutex_unlock(&slab_mutex);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block slab_notifier = {
- .notifier_call = slab_cpuup_callback
-};
-
-#endif
-
void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
{
struct kmem_cache *s;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 78cfa292a29a..2657accc6e2b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2724,7 +2724,6 @@ int swapcache_prepare(swp_entry_t entry)
struct swap_info_struct *page_swap_info(struct page *page)
{
swp_entry_t swap = { .val = page_private(page) };
- BUG_ON(!PageSwapCache(page));
return swap_info[swp_type(swap)];
}
diff --git a/mm/usercopy.c b/mm/usercopy.c
index 8ebae91a6b55..3c8da0af9695 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -83,7 +83,7 @@ static bool overlaps(const void *ptr, unsigned long n, unsigned long low,
unsigned long check_high = check_low + n;
/* Does not overlap if entirely above or entirely below. */
- if (check_low >= high || check_high < low)
+ if (check_low >= high || check_high <= low)
return false;
return true;
@@ -124,7 +124,7 @@ static inline const char *check_kernel_text_object(const void *ptr,
static inline const char *check_bogus_address(const void *ptr, unsigned long n)
{
/* Reject if object wraps past end of memory. */
- if (ptr + n < ptr)
+ if ((unsigned long)ptr + n < (unsigned long)ptr)
return "<wrapped address>";
/* Reject if NULL or ZERO-allocation. */
@@ -134,31 +134,16 @@ static inline const char *check_bogus_address(const void *ptr, unsigned long n)
return NULL;
}
-static inline const char *check_heap_object(const void *ptr, unsigned long n,
- bool to_user)
+/* Checks for allocs that are marked in some way as spanning multiple pages. */
+static inline const char *check_page_span(const void *ptr, unsigned long n,
+ struct page *page, bool to_user)
{
- struct page *page, *endpage;
+#ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN
const void *end = ptr + n - 1;
+ struct page *endpage;
bool is_reserved, is_cma;
/*
- * Some architectures (arm64) return true for virt_addr_valid() on
- * vmalloced addresses. Work around this by checking for vmalloc
- * first.
- */
- if (is_vmalloc_addr(ptr))
- return NULL;
-
- if (!virt_addr_valid(ptr))
- return NULL;
-
- page = virt_to_head_page(ptr);
-
- /* Check slab allocator for flags and size. */
- if (PageSlab(page))
- return __check_heap_object(ptr, n, page);
-
- /*
* Sometimes the kernel data regions are not marked Reserved (see
* check below). And sometimes [_sdata,_edata) does not cover
* rodata and/or bss, so check each range explicitly.
@@ -186,7 +171,7 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n,
((unsigned long)end & (unsigned long)PAGE_MASK)))
return NULL;
- /* Allow if start and end are inside the same compound page. */
+ /* Allow if fully inside the same compound (__GFP_COMP) page. */
endpage = virt_to_head_page(end);
if (likely(endpage == page))
return NULL;
@@ -199,20 +184,47 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n,
is_reserved = PageReserved(page);
is_cma = is_migrate_cma_page(page);
if (!is_reserved && !is_cma)
- goto reject;
+ return "<spans multiple pages>";
for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
page = virt_to_head_page(ptr);
if (is_reserved && !PageReserved(page))
- goto reject;
+ return "<spans Reserved and non-Reserved pages>";
if (is_cma && !is_migrate_cma_page(page))
- goto reject;
+ return "<spans CMA and non-CMA pages>";
}
+#endif
return NULL;
+}
+
+static inline const char *check_heap_object(const void *ptr, unsigned long n,
+ bool to_user)
+{
+ struct page *page;
+
+ /*
+ * Some architectures (arm64) return true for virt_addr_valid() on
+ * vmalloced addresses. Work around this by checking for vmalloc
+ * first.
+ *
+ * We also need to check for module addresses explicitly since we
+ * may copy static data from modules to userspace
+ */
+ if (is_vmalloc_or_module_addr(ptr))
+ return NULL;
+
+ if (!virt_addr_valid(ptr))
+ return NULL;
+
+ page = virt_to_head_page(ptr);
+
+ /* Check slab allocator for flags and size. */
+ if (PageSlab(page))
+ return __check_heap_object(ptr, n, page);
-reject:
- return "<spans multiple pages>";
+ /* Verify object does not incorrectly span multiple pages. */
+ return check_page_span(ptr, n, page, to_user);
}
/*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 374d95d04178..0fe8b7113868 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1665,7 +1665,7 @@ static bool inactive_reclaimable_pages(struct lruvec *lruvec,
for (zid = sc->reclaim_idx; zid >= 0; zid--) {
zone = &pgdat->node_zones[zid];
- if (!populated_zone(zone))
+ if (!managed_zone(zone))
continue;
if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE +
@@ -2036,7 +2036,7 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
struct zone *zone = &pgdat->node_zones[zid];
unsigned long inactive_zone, active_zone;
- if (!populated_zone(zone))
+ if (!managed_zone(zone))
continue;
inactive_zone = zone_page_state(zone,
@@ -2171,7 +2171,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
for (z = 0; z < MAX_NR_ZONES; z++) {
struct zone *zone = &pgdat->node_zones[z];
- if (!populated_zone(zone))
+ if (!managed_zone(zone))
continue;
total_high_wmark += high_wmark_pages(zone);
@@ -2303,23 +2303,6 @@ out:
}
}
-#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-static void init_tlb_ubc(void)
-{
- /*
- * This deliberately does not clear the cpumask as it's expensive
- * and unnecessary. If there happens to be data in there then the
- * first SWAP_CLUSTER_MAX pages will send an unnecessary IPI and
- * then will be cleared.
- */
- current->tlb_ubc.flush_required = false;
-}
-#else
-static inline void init_tlb_ubc(void)
-{
-}
-#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
-
/*
* This is a basic per-node page freer. Used by both kswapd and direct reclaim.
*/
@@ -2355,8 +2338,6 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
scan_adjusted = (global_reclaim(sc) && !current_is_kswapd() &&
sc->priority == DEF_PRIORITY);
- init_tlb_ubc();
-
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
nr[LRU_INACTIVE_FILE]) {
@@ -2510,7 +2491,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
/* If compaction would go ahead or the allocation would succeed, stop */
for (z = 0; z <= sc->reclaim_idx; z++) {
struct zone *zone = &pgdat->node_zones[z];
- if (!populated_zone(zone))
+ if (!managed_zone(zone))
continue;
switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) {
@@ -2840,7 +2821,7 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
for (i = 0; i <= ZONE_NORMAL; i++) {
zone = &pgdat->node_zones[i];
- if (!populated_zone(zone) ||
+ if (!managed_zone(zone) ||
pgdat_reclaimable_pages(pgdat) == 0)
continue;
@@ -3141,7 +3122,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
for (i = 0; i <= classzone_idx; i++) {
struct zone *zone = pgdat->node_zones + i;
- if (!populated_zone(zone))
+ if (!managed_zone(zone))
continue;
if (!zone_balanced(zone, order, classzone_idx))
@@ -3169,7 +3150,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat,
sc->nr_to_reclaim = 0;
for (z = 0; z <= sc->reclaim_idx; z++) {
zone = pgdat->node_zones + z;
- if (!populated_zone(zone))
+ if (!managed_zone(zone))
continue;
sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX);
@@ -3242,7 +3223,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
if (buffer_heads_over_limit) {
for (i = MAX_NR_ZONES - 1; i >= 0; i--) {
zone = pgdat->node_zones + i;
- if (!populated_zone(zone))
+ if (!managed_zone(zone))
continue;
sc.reclaim_idx = i;
@@ -3262,7 +3243,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
*/
for (i = classzone_idx; i >= 0; i--) {
zone = pgdat->node_zones + i;
- if (!populated_zone(zone))
+ if (!managed_zone(zone))
continue;
if (zone_balanced(zone, sc.order, classzone_idx))
@@ -3508,7 +3489,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
pg_data_t *pgdat;
int z;
- if (!populated_zone(zone))
+ if (!managed_zone(zone))
return;
if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
@@ -3522,7 +3503,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
/* Only wake kswapd if all zones are unbalanced */
for (z = 0; z <= classzone_idx; z++) {
zone = pgdat->node_zones + z;
- if (!populated_zone(zone))
+ if (!managed_zone(zone))
continue;
if (zone_balanced(zone, order, classzone_idx))
diff --git a/mm/workingset.c b/mm/workingset.c
index 69551cfae97b..617475f529f4 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -418,21 +418,19 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
* no pages, so we expect to be able to remove them all and
* delete and free the empty node afterwards.
*/
-
- BUG_ON(!node->count);
- BUG_ON(node->count & RADIX_TREE_COUNT_MASK);
+ BUG_ON(!workingset_node_shadows(node));
+ BUG_ON(workingset_node_pages(node));
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
if (node->slots[i]) {
BUG_ON(!radix_tree_exceptional_entry(node->slots[i]));
node->slots[i] = NULL;
- BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
- node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
+ workingset_node_shadows_dec(node);
BUG_ON(!mapping->nrexceptional);
mapping->nrexceptional--;
}
}
- BUG_ON(node->count);
+ BUG_ON(workingset_node_shadows(node));
inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
if (!__radix_tree_delete_node(&mapping->page_tree, node))
BUG();