From 1f25fe20a76af0d960172fb104d4b13697cafa84 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 15 Jan 2016 16:52:24 -0800 Subject: mm, thp: adjust conditions when we can reuse the page on WP fault With new refcounting we will be able map the same compound page with PTEs and PMDs. It requires adjustment to conditions when we can reuse the page on write-protection fault. For PTE fault we can't reuse the page if it's part of huge page. For PMD we can only reuse the page if nobody else maps the huge page or it's part. We can do it by checking page_mapcount() on each sub-page, but it's expensive. The cheaper way is to check page_count() to be equal 1: every mapcount takes page reference, so this way we can guarantee, that the PMD is the only mapping. This approach can give false negative if somebody pinned the page, but that doesn't affect correctness. Signed-off-by: Kirill A. Shutemov Tested-by: Sasha Levin Tested-by: Aneesh Kumar K.V Acked-by: Jerome Marchand Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Dave Hansen Cc: Mel Gorman Cc: Rik van Riel Cc: Naoya Horiguchi Cc: Steve Capper Cc: Johannes Weiner Cc: Michal Hocko Cc: Christoph Lameter Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 066bd21765ad..a282933c5bc6 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -538,7 +538,8 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -#define reuse_swap_page(page) (page_mapcount(page) == 1) +#define reuse_swap_page(page) \ + (!PageTransCompound(page) && page_mapcount(page) == 1) static inline int try_to_free_swap(struct page *page) { -- cgit v1.2.3 From 10853a039208c4afaa322a7d802456c8dca222f4 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 15 Jan 2016 16:55:11 -0800 Subject: mm: move lazily freed pages to inactive list MADV_FREE is a hint that it's okay to discard pages if there is memory pressure and we use reclaimers(ie, kswapd and direct reclaim) to free them so there is no value keeping them in the active anonymous LRU so this patch moves them to inactive LRU list's head. This means that MADV_FREE-ed pages which were living on the inactive list are reclaimed first because they are more likely to be cold rather than recently active pages. An arguable issue for the approach would be whether we should put the page to the head or tail of the inactive list. I chose head because the kernel cannot make sure it's really cold or warm for every MADV_FREE usecase but at least we know it's not *hot*, so landing of inactive head would be a comprimise for various usecases. This fixes suboptimal behavior of MADV_FREE when pages living on the active list will sit there for a long time even under memory pressure while the inactive list is reclaimed heavily. This basically breaks the whole purpose of using MADV_FREE to help the system to free memory which is might not be used. Signed-off-by: Minchan Kim Acked-by: Hugh Dickins Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Mel Gorman Cc: Rik van Riel Cc: Shaohua Li Cc: "James E.J. Bottomley" Cc: "Kirill A. Shutemov" Cc: Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chen Gang Cc: Chris Zankel Cc: Daniel Micay Cc: Darrick J. Wong Cc: David S. Miller Cc: Helge Deller Cc: Ivan Kokshaysky Cc: Jason Evans Cc: KOSAKI Motohiro Cc: Kirill A. Shutemov Cc: Matt Turner Cc: Max Filippov Cc: Michael Kerrisk Cc: Mika Penttil Cc: Ralf Baechle Cc: Richard Henderson Cc: Roland Dreier Cc: Russell King Cc: Will Deacon Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + mm/madvise.c | 2 ++ mm/swap.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index a282933c5bc6..414e101cd061 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -307,6 +307,7 @@ extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); extern void deactivate_file_page(struct page *page); +extern void deactivate_page(struct page *page); extern void swap_setup(void); extern void add_page_to_unevictable_list(struct page *page); diff --git a/mm/madvise.c b/mm/madvise.c index 98e28e777ccb..4e9454622801 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -368,6 +368,8 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, ptent = pte_mkold(ptent); ptent = pte_mkclean(ptent); set_pte_at(mm, addr, pte, ptent); + if (PageActive(page)) + deactivate_page(page); tlb_remove_tlb_entry(tlb, pte, addr); } } diff --git a/mm/swap.c b/mm/swap.c index abffc33bb975..674e2c93da4e 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -45,6 +45,7 @@ int page_cluster; static DEFINE_PER_CPU(struct pagevec, lru_add_pvec); static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs); +static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); /* * This path almost never happens for VM activity - pages are normally @@ -554,6 +555,24 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, update_page_reclaim_stat(lruvec, file, 0); } + +static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, + void *arg) +{ + if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { + int file = page_is_file_cache(page); + int lru = page_lru_base_type(page); + + del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE); + ClearPageActive(page); + ClearPageReferenced(page); + add_page_to_lru_list(page, lruvec, lru); + + __count_vm_event(PGDEACTIVATE); + update_page_reclaim_stat(lruvec, file, 0); + } +} + /* * Drain pages out of the cpu's pagevecs. * Either "cpu" is the current CPU, and preemption has already been @@ -580,6 +599,10 @@ void lru_add_drain_cpu(int cpu) if (pagevec_count(pvec)) pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); + pvec = &per_cpu(lru_deactivate_pvecs, cpu); + if (pagevec_count(pvec)) + pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); + activate_page_drain(cpu); } @@ -609,6 +632,26 @@ void deactivate_file_page(struct page *page) } } +/** + * deactivate_page - deactivate a page + * @page: page to deactivate + * + * deactivate_page() moves @page to the inactive list if @page was on the active + * list and was not an unevictable page. This is done to accelerate the reclaim + * of @page. + */ +void deactivate_page(struct page *page) +{ + if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { + struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); + + page_cache_get(page); + if (!pagevec_add(pvec, page)) + pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); + put_cpu_var(lru_deactivate_pvecs); + } +} + void lru_add_drain(void) { lru_add_drain_cpu(get_cpu()); @@ -638,6 +681,7 @@ void lru_add_drain_all(void) if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || + pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || need_activate_page_drain(cpu)) { INIT_WORK(work, lru_add_drain_per_cpu); schedule_work_on(cpu, work); -- cgit v1.2.3