From 7f39dda9d86fb4f4f17af0de170decf125726f8c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 4 Jun 2014 16:05:33 -0700 Subject: mm: fix sleeping function warning from __put_anon_vma Trinity reports BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:47 in_atomic(): 0, irqs_disabled(): 0, pid: 5787, name: trinity-c27 __might_sleep < down_write < __put_anon_vma < page_get_anon_vma < migrate_pages < compact_zone < compact_zone_order < try_to_compact_pages .. Right, since conversion to mutex then rwsem, we should not put_anon_vma() from inside an rcu_read_lock()ed section: fix the two places that did so. And add might_sleep() to anon_vma_free(), as suggested by Peter Zijlstra. Fixes: 88c22088bf23 ("mm: optimize page_lock_anon_vma() fast-path") Reported-by: Dave Jones Signed-off-by: Hugh Dickins Cc: Peter Zijlstra Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index 9c3e77396d1a..10aef960d3d0 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -103,6 +103,7 @@ static inline void anon_vma_free(struct anon_vma *anon_vma) * LOCK should suffice since the actual taking of the lock must * happen _before_ what follows. */ + might_sleep(); if (rwsem_is_locked(&anon_vma->root->rwsem)) { anon_vma_lock_write(anon_vma); anon_vma_unlock_write(anon_vma); @@ -426,8 +427,9 @@ struct anon_vma *page_get_anon_vma(struct page *page) * above cannot corrupt). */ if (!page_mapped(page)) { + rcu_read_unlock(); put_anon_vma(anon_vma); - anon_vma = NULL; + return NULL; } out: rcu_read_unlock(); @@ -477,9 +479,9 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page) } if (!page_mapped(page)) { + rcu_read_unlock(); put_anon_vma(anon_vma); - anon_vma = NULL; - goto out; + return NULL; } /* we pinned the anon_vma, its safe to sleep */ -- cgit v1.2.3 From b43790eedd31e9535b89bbfa45793919e9504c34 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 4 Jun 2014 16:06:42 -0700 Subject: mm: softdirty: don't forget to save file map softdiry bit on unmap pte_file_mksoft_dirty operates with argument passed by a value and returns modified result thus we need to assign @ptfile here, otherwise itis a no-op which may lead to loss of the softdirty bit. Signed-off-by: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index 10aef960d3d0..7da400d5d98e 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1361,7 +1361,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, if (page->index != linear_page_index(vma, address)) { pte_t ptfile = pgoff_to_pte(page->index); if (pte_soft_dirty(pteval)) - pte_file_mksoft_dirty(ptfile); + ptfile = pte_file_mksoft_dirty(ptfile); set_pte_at(mm, address, pte, ptfile); } -- cgit v1.2.3 From ac7695012a6f3269acd80d6c2b2218a6769edbf3 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 4 Jun 2014 16:08:17 -0700 Subject: mm/rmap.c: make page_referenced_one() and try_to_unmap_one() static KSM was converted to use rmap_walk() and now nobody uses these functions outside mm/rmap.c. Let's covert them back to static. Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 4 ---- mm/rmap.c | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'mm/rmap.c') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b66c2110cb1f..9be55c7617da 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -183,14 +183,10 @@ static inline void page_dup_rmap(struct page *page) */ int page_referenced(struct page *, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags); -int page_referenced_one(struct page *, struct vm_area_struct *, - unsigned long address, void *arg); #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) int try_to_unmap(struct page *, enum ttu_flags flags); -int try_to_unmap_one(struct page *, struct vm_area_struct *, - unsigned long address, void *arg); /* * Called from mm/filemap_xip.c to unmap empty zero page diff --git a/mm/rmap.c b/mm/rmap.c index 7da400d5d98e..8754e1fa83b6 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -671,7 +671,7 @@ struct page_referenced_arg { /* * arg: page_referenced_arg will be passed */ -int page_referenced_one(struct page *page, struct vm_area_struct *vma, +static int page_referenced_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *arg) { struct mm_struct *mm = vma->vm_mm; @@ -1114,7 +1114,7 @@ out: /* * @arg: enum ttu_flags will be passed to this argument */ -int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, +static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *arg) { struct mm_struct *mm = vma->vm_mm; -- cgit v1.2.3 From bea04b073292b2acb522c7c1aa67a4fc58151530 Mon Sep 17 00:00:00 2001 From: Jianyu Zhan Date: Wed, 4 Jun 2014 16:09:51 -0700 Subject: mm: use the light version __mod_zone_page_state in mlocked_vma_newpage() mlocked_vma_newpage() is called with pte lock held(a spinlock), which implies preemtion disabled, and the vm stat counter is not modified from interrupt context, so we need not use an irq-safe mod_zone_page_state() here, using a light-weight version __mod_zone_page_state() would be OK. This patch also documents __mod_zone_page_state() and some of its callsites. The comment above __mod_zone_page_state() is from Hugh Dickins, and acked by Christoph. Most credits to Hugh and Christoph for the clarification on the usage of the __mod_zone_page_state(). [akpm@linux-foundation.org: coding-style fixes] Suggested-by: Andrew Morton Acked-by: Hugh Dickins Signed-off-by: Jianyu Zhan Reviewed-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 7 ++++++- mm/rmap.c | 11 +++++++++++ mm/vmstat.c | 4 +++- 3 files changed, 20 insertions(+), 2 deletions(-) (limited to 'mm/rmap.c') diff --git a/mm/internal.h b/mm/internal.h index a25424a24e0c..e067984bafa0 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -201,7 +201,12 @@ static inline int mlocked_vma_newpage(struct vm_area_struct *vma, return 0; if (!TestSetPageMlocked(page)) { - mod_zone_page_state(page_zone(page), NR_MLOCK, + /* + * We use the irq-unsafe __mod_zone_page_stat because this + * counter is not modified from interrupt context, and the pte + * lock is held(spinlock), which implies preemption disabled. + */ + __mod_zone_page_state(page_zone(page), NR_MLOCK, hpage_nr_pages(page)); count_vm_event(UNEVICTABLE_PGMLOCKED); } diff --git a/mm/rmap.c b/mm/rmap.c index 8754e1fa83b6..4644e10248f0 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -988,6 +988,12 @@ void do_page_add_anon_rmap(struct page *page, { int first = atomic_inc_and_test(&page->_mapcount); if (first) { + /* + * We use the irq-unsafe __{inc|mod}_zone_page_stat because + * these counters are not modified in interrupt context, and + * pte lock(a spinlock) is held, which implies preemption + * disabled. + */ if (PageTransHuge(page)) __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); @@ -1079,6 +1085,11 @@ void page_remove_rmap(struct page *page) /* * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED * and not charged by memcg for now. + * + * We use the irq-unsafe __{inc|mod}_zone_page_stat because + * these counters are not modified in interrupt context, and + * these counters are not modified in interrupt context, and + * pte lock(a spinlock) is held, which implies preemption disabled. */ if (unlikely(PageHuge(page))) goto out; diff --git a/mm/vmstat.c b/mm/vmstat.c index 376bd2d21482..b37bd49bfd55 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -207,7 +207,9 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat, } /* - * For use when we know that interrupts are disabled. + * For use when we know that interrupts are disabled, + * or when we know that preemption is disabled and that + * particular counter cannot be updated from interrupt context. */ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, int delta) -- cgit v1.2.3 From 7ee07a44eb53374a73544ae14c71366a02d462e0 Mon Sep 17 00:00:00 2001 From: Jianyu Zhan Date: Wed, 4 Jun 2014 16:09:52 -0700 Subject: mm: fold mlocked_vma_newpage() into its only call site In previous commit(mm: use the light version __mod_zone_page_state in mlocked_vma_newpage()) a irq-unsafe __mod_zone_page_state is used. And as suggested by Andrew, to reduce the risks that new call sites incorrectly using mlocked_vma_newpage() without knowing they are adding racing, this patch folds mlocked_vma_newpage() into its only call site, page_add_new_anon_rmap, to make it open-cocded for people to know what is going on. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Jianyu Zhan Suggested-by: Andrew Morton Suggested-by: Hugh Dickins Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 29 ----------------------------- mm/rmap.c | 20 +++++++++++++++++--- 2 files changed, 17 insertions(+), 32 deletions(-) (limited to 'mm/rmap.c') diff --git a/mm/internal.h b/mm/internal.h index e067984bafa0..802c3a4fc03a 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -188,31 +188,6 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma) munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end); } -/* - * Called only in fault path, to determine if a new page is being - * mapped into a LOCKED vma. If it is, mark page as mlocked. - */ -static inline int mlocked_vma_newpage(struct vm_area_struct *vma, - struct page *page) -{ - VM_BUG_ON_PAGE(PageLRU(page), page); - - if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) - return 0; - - if (!TestSetPageMlocked(page)) { - /* - * We use the irq-unsafe __mod_zone_page_stat because this - * counter is not modified from interrupt context, and the pte - * lock is held(spinlock), which implies preemption disabled. - */ - __mod_zone_page_state(page_zone(page), NR_MLOCK, - hpage_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGMLOCKED); - } - return 1; -} - /* * must be called with vma's mmap_sem held for read or write, and page locked. */ @@ -255,10 +230,6 @@ extern unsigned long vma_address(struct page *page, struct vm_area_struct *vma); #endif #else /* !CONFIG_MMU */ -static inline int mlocked_vma_newpage(struct vm_area_struct *v, struct page *p) -{ - return 0; -} static inline void clear_page_mlock(struct page *page) { } static inline void mlock_vma_page(struct page *page) { } static inline void mlock_migrate_page(struct page *new, struct page *old) { } diff --git a/mm/rmap.c b/mm/rmap.c index 4644e10248f0..e375ce4bd93e 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1032,11 +1032,25 @@ void page_add_new_anon_rmap(struct page *page, __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, hpage_nr_pages(page)); __page_set_anon_rmap(page, vma, address, 1); - if (!mlocked_vma_newpage(vma, page)) { + + VM_BUG_ON_PAGE(PageLRU(page), page); + if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) { SetPageActive(page); lru_cache_add(page); - } else - add_page_to_unevictable_list(page); + return; + } + + if (!TestSetPageMlocked(page)) { + /* + * We use the irq-unsafe __mod_zone_page_stat because this + * counter is not modified from interrupt context, and the pte + * lock is held(spinlock), which implies preemption disabled. + */ + __mod_zone_page_state(page_zone(page), NR_MLOCK, + hpage_nr_pages(page)); + count_vm_event(UNEVICTABLE_PGMLOCKED); + } + add_page_to_unevictable_list(page); } /** -- cgit v1.2.3 From 3d92860f979f725a9c10c2fc26c0415a4332adbf Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 4 Jun 2014 16:10:51 -0700 Subject: mm/rmap.c: don't call mmu_notifier_invalidate_page() during munlock In its munmap mode, try_to_unmap_one() searches other mlocked vmas, it never unmaps pages. There is no reason for invalidation because ptes are left unchanged. Signed-off-by: Konstantin Khlebnikov Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index e375ce4bd93e..ab74290d185d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1252,7 +1252,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, out_unmap: pte_unmap_unlock(pte, ptl); - if (ret != SWAP_FAIL) + if (ret != SWAP_FAIL && TTU_ACTION(flags) != TTU_MUNLOCK) mmu_notifier_invalidate_page(mm, address); out: return ret; -- cgit v1.2.3 From daa5ba768b9e15da8867824d2f1e8d455f1acac2 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 4 Jun 2014 16:10:52 -0700 Subject: mm/rmap.c: cleanup ttu_flags Transform action part of ttu_flags into individiual bits. These flags aren't part of any uses-space visible api or even trace events. Signed-off-by: Konstantin Khlebnikov Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 7 +++---- mm/rmap.c | 10 +++++----- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'mm/rmap.c') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 9be55c7617da..be574506e6a9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -72,10 +72,9 @@ struct anon_vma_chain { }; enum ttu_flags { - TTU_UNMAP = 0, /* unmap mode */ - TTU_MIGRATION = 1, /* migration mode */ - TTU_MUNLOCK = 2, /* munlock mode */ - TTU_ACTION_MASK = 0xff, + TTU_UNMAP = 1, /* unmap mode */ + TTU_MIGRATION = 2, /* migration mode */ + TTU_MUNLOCK = 4, /* munlock mode */ TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ diff --git a/mm/rmap.c b/mm/rmap.c index ab74290d185d..ea8e20d75b29 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1162,7 +1162,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, if (vma->vm_flags & VM_LOCKED) goto out_mlock; - if (TTU_ACTION(flags) == TTU_MUNLOCK) + if (flags & TTU_MUNLOCK) goto out_unmap; } if (!(flags & TTU_IGNORE_ACCESS)) { @@ -1230,7 +1230,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * pte. do_swap_page() will wait until the migration * pte is removed and then restart fault handling. */ - BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION); + BUG_ON(!(flags & TTU_MIGRATION)); entry = make_migration_entry(page, pte_write(pteval)); } swp_pte = swp_entry_to_pte(entry); @@ -1239,7 +1239,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, set_pte_at(mm, address, pte, swp_pte); BUG_ON(pte_file(*pte)); } else if (IS_ENABLED(CONFIG_MIGRATION) && - (TTU_ACTION(flags) == TTU_MIGRATION)) { + (flags & TTU_MIGRATION)) { /* Establish migration entry for a file page */ swp_entry_t entry; entry = make_migration_entry(page, pte_write(pteval)); @@ -1252,7 +1252,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, out_unmap: pte_unmap_unlock(pte, ptl); - if (ret != SWAP_FAIL && TTU_ACTION(flags) != TTU_MUNLOCK) + if (ret != SWAP_FAIL && !(flags & TTU_MUNLOCK)) mmu_notifier_invalidate_page(mm, address); out: return ret; @@ -1539,7 +1539,7 @@ int try_to_unmap(struct page *page, enum ttu_flags flags) * locking requirements of exec(), migration skips * temporary VMAs until after exec() completes. */ - if (flags & TTU_MIGRATION && !PageKsm(page) && PageAnon(page)) + if ((flags & TTU_MIGRATION) && !PageKsm(page) && PageAnon(page)) rwc.invalid_vma = invalid_migration_vma; ret = rmap_walk(page, &rwc); -- cgit v1.2.3