diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 462 |
1 files changed, 184 insertions, 278 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 8deb5f4da4d9..347b3ff2a478 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -78,6 +78,9 @@ struct scan_control { int order; + /* Scan (total_size >> priority) pages at once */ + int priority; + /* * The memory cgroup that hit its limit and as a result is the * primary target of this reclaim invocation. @@ -91,11 +94,6 @@ struct scan_control { nodemask_t *nodemask; }; -struct mem_cgroup_zone { - struct mem_cgroup *mem_cgroup; - struct zone *zone; -}; - #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) #ifdef ARCH_HAS_PREFETCH @@ -147,24 +145,14 @@ static bool global_reclaim(struct scan_control *sc) } #endif -static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) -{ - return &mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup)->reclaim_stat; -} - -static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, - enum lru_list lru) +static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) { if (!mem_cgroup_disabled()) - return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup, - zone_to_nid(mz->zone), - zone_idx(mz->zone), - BIT(lru)); + return mem_cgroup_get_lru_size(lruvec, lru); - return zone_page_state(mz->zone, NR_LRU_BASE + lru); + return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru); } - /* * Add a shrinker callback to be called from the vm */ @@ -626,7 +614,6 @@ enum page_references { }; static enum page_references page_check_references(struct page *page, - struct mem_cgroup_zone *mz, struct scan_control *sc) { int referenced_ptes, referenced_page; @@ -685,9 +672,8 @@ static enum page_references page_check_references(struct page *page, * shrink_page_list() returns the number of reclaimed pages */ static unsigned long shrink_page_list(struct list_head *page_list, - struct mem_cgroup_zone *mz, + struct zone *zone, struct scan_control *sc, - int priority, unsigned long *ret_nr_dirty, unsigned long *ret_nr_writeback) { @@ -716,7 +702,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep; VM_BUG_ON(PageActive(page)); - VM_BUG_ON(page_zone(page) != mz->zone); + VM_BUG_ON(page_zone(page) != zone); sc->nr_scanned++; @@ -739,7 +725,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep; } - references = page_check_references(page, mz, sc); + references = page_check_references(page, sc); switch (references) { case PAGEREF_ACTIVATE: goto activate_locked; @@ -790,7 +776,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, * unless under significant pressure. */ if (page_is_file_cache(page) && - (!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) { + (!current_is_kswapd() || + sc->priority >= DEF_PRIORITY - 2)) { /* * Immediately reclaim when written back. * Similar in principal to deactivate_page() @@ -928,7 +915,7 @@ keep: * will encounter the same problem */ if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) - zone_set_flag(mz->zone, ZONE_CONGESTED); + zone_set_flag(zone, ZONE_CONGESTED); free_hot_cold_page_list(&free_pages, 1); @@ -949,29 +936,14 @@ keep: * * returns 0 on success, -ve errno on failure. */ -int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) +int __isolate_lru_page(struct page *page, isolate_mode_t mode) { - bool all_lru_mode; int ret = -EINVAL; /* Only take pages on the LRU. */ if (!PageLRU(page)) return ret; - all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) == - (ISOLATE_ACTIVE|ISOLATE_INACTIVE); - - /* - * When checking the active state, we need to be sure we are - * dealing with comparible boolean values. Take the logical not - * of each. - */ - if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE)) - return ret; - - if (!all_lru_mode && !!page_is_file_cache(page) != file) - return ret; - /* Do not give back unevictable pages for compaction */ if (PageUnevictable(page)) return ret; @@ -1039,47 +1011,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) * Appropriate locks must be held before calling this function. * * @nr_to_scan: The number of pages to look through on the list. - * @mz: The mem_cgroup_zone to pull pages from. + * @lruvec: The LRU vector to pull pages from. * @dst: The temp list to put pages on to. * @nr_scanned: The number of pages that were scanned. * @sc: The scan_control struct for this reclaim session * @mode: One of the LRU isolation modes - * @active: True [1] if isolating active pages - * @file: True [1] if isolating file [!anon] pages + * @lru: LRU list id for isolating * * returns how many pages were moved onto *@dst. */ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, - struct mem_cgroup_zone *mz, struct list_head *dst, + struct lruvec *lruvec, struct list_head *dst, unsigned long *nr_scanned, struct scan_control *sc, - isolate_mode_t mode, int active, int file) + isolate_mode_t mode, enum lru_list lru) { - struct lruvec *lruvec; - struct list_head *src; + struct list_head *src = &lruvec->lists[lru]; unsigned long nr_taken = 0; unsigned long scan; - int lru = LRU_BASE; - - lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); - if (active) - lru += LRU_ACTIVE; - if (file) - lru += LRU_FILE; - src = &lruvec->lists[lru]; for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { struct page *page; + int nr_pages; page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); VM_BUG_ON(!PageLRU(page)); - switch (__isolate_lru_page(page, mode, file)) { + switch (__isolate_lru_page(page, mode)) { case 0: - mem_cgroup_lru_del(page); + nr_pages = hpage_nr_pages(page); + mem_cgroup_update_lru_size(lruvec, lru, -nr_pages); list_move(&page->lru, dst); - nr_taken += hpage_nr_pages(page); + nr_taken += nr_pages; break; case -EBUSY: @@ -1093,11 +1057,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, } *nr_scanned = scan; - - trace_mm_vmscan_lru_isolate(sc->order, - nr_to_scan, scan, - nr_taken, - mode, file); + trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan, + nr_taken, mode, is_file_lru(lru)); return nr_taken; } @@ -1134,15 +1095,16 @@ int isolate_lru_page(struct page *page) if (PageLRU(page)) { struct zone *zone = page_zone(page); + struct lruvec *lruvec; spin_lock_irq(&zone->lru_lock); + lruvec = mem_cgroup_page_lruvec(page, zone); if (PageLRU(page)) { int lru = page_lru(page); - ret = 0; get_page(page); ClearPageLRU(page); - - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); + ret = 0; } spin_unlock_irq(&zone->lru_lock); } @@ -1175,11 +1137,10 @@ static int too_many_isolated(struct zone *zone, int file, } static noinline_for_stack void -putback_inactive_pages(struct mem_cgroup_zone *mz, - struct list_head *page_list) +putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) { - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); - struct zone *zone = mz->zone; + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; + struct zone *zone = lruvec_zone(lruvec); LIST_HEAD(pages_to_free); /* @@ -1197,9 +1158,13 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, spin_lock_irq(&zone->lru_lock); continue; } + + lruvec = mem_cgroup_page_lruvec(page, zone); + SetPageLRU(page); lru = page_lru(page); - add_page_to_lru_list(zone, page, lru); + add_page_to_lru_list(page, lruvec, lru); + if (is_active_lru(lru)) { int file = is_file_lru(lru); int numpages = hpage_nr_pages(page); @@ -1208,7 +1173,7 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, if (put_page_testzero(page)) { __ClearPageLRU(page); __ClearPageActive(page); - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); if (unlikely(PageCompound(page))) { spin_unlock_irq(&zone->lru_lock); @@ -1225,71 +1190,24 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, list_splice(&pages_to_free, page_list); } -static noinline_for_stack void -update_isolated_counts(struct mem_cgroup_zone *mz, - struct list_head *page_list, - unsigned long *nr_anon, - unsigned long *nr_file) -{ - struct zone *zone = mz->zone; - unsigned int count[NR_LRU_LISTS] = { 0, }; - unsigned long nr_active = 0; - struct page *page; - int lru; - - /* - * Count pages and clear active flags - */ - list_for_each_entry(page, page_list, lru) { - int numpages = hpage_nr_pages(page); - lru = page_lru_base_type(page); - if (PageActive(page)) { - lru += LRU_ACTIVE; - ClearPageActive(page); - nr_active += numpages; - } - count[lru] += numpages; - } - - preempt_disable(); - __count_vm_events(PGDEACTIVATE, nr_active); - - __mod_zone_page_state(zone, NR_ACTIVE_FILE, - -count[LRU_ACTIVE_FILE]); - __mod_zone_page_state(zone, NR_INACTIVE_FILE, - -count[LRU_INACTIVE_FILE]); - __mod_zone_page_state(zone, NR_ACTIVE_ANON, - -count[LRU_ACTIVE_ANON]); - __mod_zone_page_state(zone, NR_INACTIVE_ANON, - -count[LRU_INACTIVE_ANON]); - - *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; - *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; - - __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file); - preempt_enable(); -} - /* * shrink_inactive_list() is a helper for shrink_zone(). It returns the number * of reclaimed pages */ static noinline_for_stack unsigned long -shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, - struct scan_control *sc, int priority, int file) +shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, + struct scan_control *sc, enum lru_list lru) { LIST_HEAD(page_list); unsigned long nr_scanned; unsigned long nr_reclaimed = 0; unsigned long nr_taken; - unsigned long nr_anon; - unsigned long nr_file; unsigned long nr_dirty = 0; unsigned long nr_writeback = 0; - isolate_mode_t isolate_mode = ISOLATE_INACTIVE; - struct zone *zone = mz->zone; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + isolate_mode_t isolate_mode = 0; + int file = is_file_lru(lru); + struct zone *zone = lruvec_zone(lruvec); + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; while (unlikely(too_many_isolated(zone, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1308,31 +1226,30 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, spin_lock_irq(&zone->lru_lock); - nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, &nr_scanned, - sc, isolate_mode, 0, file); + nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list, + &nr_scanned, sc, isolate_mode, lru); + + __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken); + __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); + if (global_reclaim(sc)) { zone->pages_scanned += nr_scanned; if (current_is_kswapd()) - __count_zone_vm_events(PGSCAN_KSWAPD, zone, - nr_scanned); + __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned); else - __count_zone_vm_events(PGSCAN_DIRECT, zone, - nr_scanned); + __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned); } spin_unlock_irq(&zone->lru_lock); if (nr_taken == 0) return 0; - update_isolated_counts(mz, &page_list, &nr_anon, &nr_file); - - nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority, + nr_reclaimed = shrink_page_list(&page_list, zone, sc, &nr_dirty, &nr_writeback); spin_lock_irq(&zone->lru_lock); - reclaim_stat->recent_scanned[0] += nr_anon; - reclaim_stat->recent_scanned[1] += nr_file; + reclaim_stat->recent_scanned[file] += nr_taken; if (global_reclaim(sc)) { if (current_is_kswapd()) @@ -1343,10 +1260,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, nr_reclaimed); } - putback_inactive_pages(mz, &page_list); + putback_inactive_pages(lruvec, &page_list); - __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); + __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); @@ -1375,13 +1291,14 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any * isolated page is PageWriteback */ - if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority))) + if (nr_writeback && nr_writeback >= + (nr_taken >> (DEF_PRIORITY - sc->priority))) wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, zone_idx(zone), nr_scanned, nr_reclaimed, - priority, + sc->priority, trace_shrink_flags(file)); return nr_reclaimed; } @@ -1404,30 +1321,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, * But we had to alter page->flags anyway. */ -static void move_active_pages_to_lru(struct zone *zone, +static void move_active_pages_to_lru(struct lruvec *lruvec, struct list_head *list, struct list_head *pages_to_free, enum lru_list lru) { + struct zone *zone = lruvec_zone(lruvec); unsigned long pgmoved = 0; struct page *page; + int nr_pages; while (!list_empty(list)) { - struct lruvec *lruvec; - page = lru_to_page(list); + lruvec = mem_cgroup_page_lruvec(page, zone); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - lruvec = mem_cgroup_lru_add_list(zone, page, lru); + nr_pages = hpage_nr_pages(page); + mem_cgroup_update_lru_size(lruvec, lru, nr_pages); list_move(&page->lru, &lruvec->lists[lru]); - pgmoved += hpage_nr_pages(page); + pgmoved += nr_pages; if (put_page_testzero(page)) { __ClearPageLRU(page); __ClearPageActive(page); - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); if (unlikely(PageCompound(page))) { spin_unlock_irq(&zone->lru_lock); @@ -1443,9 +1362,9 @@ static void move_active_pages_to_lru(struct zone *zone, } static void shrink_active_list(unsigned long nr_to_scan, - struct mem_cgroup_zone *mz, + struct lruvec *lruvec, struct scan_control *sc, - int priority, int file) + enum lru_list lru) { unsigned long nr_taken; unsigned long nr_scanned; @@ -1454,10 +1373,11 @@ static void shrink_active_list(unsigned long nr_to_scan, LIST_HEAD(l_active); LIST_HEAD(l_inactive); struct page *page; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; unsigned long nr_rotated = 0; - isolate_mode_t isolate_mode = ISOLATE_ACTIVE; - struct zone *zone = mz->zone; + isolate_mode_t isolate_mode = 0; + int file = is_file_lru(lru); + struct zone *zone = lruvec_zone(lruvec); lru_add_drain(); @@ -1468,18 +1388,15 @@ static void shrink_active_list(unsigned long nr_to_scan, spin_lock_irq(&zone->lru_lock); - nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, &nr_scanned, sc, - isolate_mode, 1, file); + nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, + &nr_scanned, sc, isolate_mode, lru); if (global_reclaim(sc)) zone->pages_scanned += nr_scanned; reclaim_stat->recent_scanned[file] += nr_taken; __count_zone_vm_events(PGREFILL, zone, nr_scanned); - if (file) - __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken); - else - __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken); + __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); spin_unlock_irq(&zone->lru_lock); @@ -1535,10 +1452,8 @@ static void shrink_active_list(unsigned long nr_to_scan, */ reclaim_stat->recent_rotated[file] += nr_rotated; - move_active_pages_to_lru(zone, &l_active, &l_hold, - LRU_ACTIVE + file * LRU_FILE); - move_active_pages_to_lru(zone, &l_inactive, &l_hold, - LRU_BASE + file * LRU_FILE); + move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); + move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); @@ -1561,13 +1476,12 @@ static int inactive_anon_is_low_global(struct zone *zone) /** * inactive_anon_is_low - check if anonymous pages need to be deactivated - * @zone: zone to check - * @sc: scan control of this context + * @lruvec: LRU vector to check * * Returns true if the zone does not have enough inactive anon pages, * meaning some active anon pages need to be deactivated. */ -static int inactive_anon_is_low(struct mem_cgroup_zone *mz) +static int inactive_anon_is_low(struct lruvec *lruvec) { /* * If we don't have swap space, anonymous page deactivation @@ -1577,13 +1491,12 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz) return 0; if (!mem_cgroup_disabled()) - return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup, - mz->zone); + return mem_cgroup_inactive_anon_is_low(lruvec); - return inactive_anon_is_low_global(mz->zone); + return inactive_anon_is_low_global(lruvec_zone(lruvec)); } #else -static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz) +static inline int inactive_anon_is_low(struct lruvec *lruvec) { return 0; } @@ -1601,7 +1514,7 @@ static int inactive_file_is_low_global(struct zone *zone) /** * inactive_file_is_low - check if file pages need to be deactivated - * @mz: memory cgroup and zone to check + * @lruvec: LRU vector to check * * When the system is doing streaming IO, memory pressure here * ensures that active file pages get deactivated, until more @@ -1613,44 +1526,39 @@ static int inactive_file_is_low_global(struct zone *zone) * This uses a different ratio than the anonymous pages, because * the page cache uses a use-once replacement algorithm. */ -static int inactive_file_is_low(struct mem_cgroup_zone *mz) +static int inactive_file_is_low(struct lruvec *lruvec) { if (!mem_cgroup_disabled()) - return mem_cgroup_inactive_file_is_low(mz->mem_cgroup, - mz->zone); + return mem_cgroup_inactive_file_is_low(lruvec); - return inactive_file_is_low_global(mz->zone); + return inactive_file_is_low_global(lruvec_zone(lruvec)); } -static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file) +static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru) { - if (file) - return inactive_file_is_low(mz); + if (is_file_lru(lru)) + return inactive_file_is_low(lruvec); else - return inactive_anon_is_low(mz); + return inactive_anon_is_low(lruvec); } static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, - struct mem_cgroup_zone *mz, - struct scan_control *sc, int priority) + struct lruvec *lruvec, struct scan_control *sc) { - int file = is_file_lru(lru); - if (is_active_lru(lru)) { - if (inactive_list_is_low(mz, file)) - shrink_active_list(nr_to_scan, mz, sc, priority, file); + if (inactive_list_is_low(lruvec, lru)) + shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } - return shrink_inactive_list(nr_to_scan, mz, sc, priority, file); + return shrink_inactive_list(nr_to_scan, lruvec, sc, lru); } -static int vmscan_swappiness(struct mem_cgroup_zone *mz, - struct scan_control *sc) +static int vmscan_swappiness(struct scan_control *sc) { if (global_reclaim(sc)) return vm_swappiness; - return mem_cgroup_swappiness(mz->mem_cgroup); + return mem_cgroup_swappiness(sc->target_mem_cgroup); } /* @@ -1662,17 +1570,18 @@ static int vmscan_swappiness(struct mem_cgroup_zone *mz, * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan */ -static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, - unsigned long *nr, int priority) +static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, + unsigned long *nr) { unsigned long anon, file, free; unsigned long anon_prio, file_prio; unsigned long ap, fp; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; u64 fraction[2], denominator; enum lru_list lru; int noswap = 0; bool force_scan = false; + struct zone *zone = lruvec_zone(lruvec); /* * If the zone or memcg is small, nr[l] can be 0. This @@ -1684,7 +1593,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * latencies, so it's better to scan a minimum amount there as * well. */ - if (current_is_kswapd() && mz->zone->all_unreclaimable) + if (current_is_kswapd() && zone->all_unreclaimable) force_scan = true; if (!global_reclaim(sc)) force_scan = true; @@ -1698,16 +1607,16 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, goto out; } - anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) + - zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); - file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) + - zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); + anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + + get_lru_size(lruvec, LRU_INACTIVE_ANON); + file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + + get_lru_size(lruvec, LRU_INACTIVE_FILE); if (global_reclaim(sc)) { - free = zone_page_state(mz->zone, NR_FREE_PAGES); + free = zone_page_state(zone, NR_FREE_PAGES); /* If we have very few page cache pages, force-scan anon pages. */ - if (unlikely(file + free <= high_wmark_pages(mz->zone))) { + if (unlikely(file + free <= high_wmark_pages(zone))) { fraction[0] = 1; fraction[1] = 0; denominator = 1; @@ -1719,8 +1628,8 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * With swappiness at 100, anonymous and file have the same priority. * This scanning priority is essentially the inverse of IO cost. */ - anon_prio = vmscan_swappiness(mz, sc); - file_prio = 200 - vmscan_swappiness(mz, sc); + anon_prio = vmscan_swappiness(sc); + file_prio = 200 - anon_prio; /* * OK, so we have swap space and a fair amount of page cache @@ -1733,7 +1642,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * * anon in [0], file in [1] */ - spin_lock_irq(&mz->zone->lru_lock); + spin_lock_irq(&zone->lru_lock); if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { reclaim_stat->recent_scanned[0] /= 2; reclaim_stat->recent_rotated[0] /= 2; @@ -1754,7 +1663,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, fp = file_prio * (reclaim_stat->recent_scanned[1] + 1); fp /= reclaim_stat->recent_rotated[1] + 1; - spin_unlock_irq(&mz->zone->lru_lock); + spin_unlock_irq(&zone->lru_lock); fraction[0] = ap; fraction[1] = fp; @@ -1764,9 +1673,9 @@ out: int file = is_file_lru(lru); unsigned long scan; - scan = zone_nr_lru_pages(mz, lru); - if (priority || noswap || !vmscan_swappiness(mz, sc)) { - scan >>= priority; + scan = get_lru_size(lruvec, lru); + if (sc->priority || noswap || !vmscan_swappiness(sc)) { + scan >>= sc->priority; if (!scan && force_scan) scan = SWAP_CLUSTER_MAX; scan = div64_u64(scan * fraction[file], denominator); @@ -1776,11 +1685,11 @@ out: } /* Use reclaim/compaction for costly allocs or under memory pressure */ -static bool in_reclaim_compaction(int priority, struct scan_control *sc) +static bool in_reclaim_compaction(struct scan_control *sc) { if (COMPACTION_BUILD && sc->order && (sc->order > PAGE_ALLOC_COSTLY_ORDER || - priority < DEF_PRIORITY - 2)) + sc->priority < DEF_PRIORITY - 2)) return true; return false; @@ -1793,17 +1702,16 @@ static bool in_reclaim_compaction(int priority, struct scan_control *sc) * calls try_to_compact_zone() that it will have enough free pages to succeed. * It will give up earlier than that if there is difficulty reclaiming pages. */ -static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, +static inline bool should_continue_reclaim(struct lruvec *lruvec, unsigned long nr_reclaimed, unsigned long nr_scanned, - int priority, struct scan_control *sc) { unsigned long pages_for_compaction; unsigned long inactive_lru_pages; /* If not in reclaim/compaction mode, stop */ - if (!in_reclaim_compaction(priority, sc)) + if (!in_reclaim_compaction(sc)) return false; /* Consider stopping depending on scan and reclaim activity */ @@ -1834,15 +1742,15 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, * inactive lists are large enough, continue reclaiming */ pages_for_compaction = (2UL << sc->order); - inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); + inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE); if (nr_swap_pages > 0) - inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); + inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON); if (sc->nr_reclaimed < pages_for_compaction && inactive_lru_pages > pages_for_compaction) return true; /* If compaction would go ahead or the allocation would succeed, stop */ - switch (compaction_suitable(mz->zone, sc->order)) { + switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) { case COMPACT_PARTIAL: case COMPACT_CONTINUE: return false; @@ -1854,8 +1762,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. */ -static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, - struct scan_control *sc) +static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { unsigned long nr[NR_LRU_LISTS]; unsigned long nr_to_scan; @@ -1867,7 +1774,7 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, restart: nr_reclaimed = 0; nr_scanned = sc->nr_scanned; - get_scan_count(mz, sc, nr, priority); + get_scan_count(lruvec, sc, nr); blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || @@ -1879,7 +1786,7 @@ restart: nr[lru] -= nr_to_scan; nr_reclaimed += shrink_list(lru, nr_to_scan, - mz, sc, priority); + lruvec, sc); } } /* @@ -1890,7 +1797,8 @@ restart: * with multiple processes reclaiming pages, the total * freeing target can get unreasonably large. */ - if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) + if (nr_reclaimed >= nr_to_reclaim && + sc->priority < DEF_PRIORITY) break; } blk_finish_plug(&plug); @@ -1900,36 +1808,33 @@ restart: * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_anon_is_low(mz)) - shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0); + if (inactive_anon_is_low(lruvec)) + shrink_active_list(SWAP_CLUSTER_MAX, lruvec, + sc, LRU_ACTIVE_ANON); /* reclaim/compaction might need reclaim to continue */ - if (should_continue_reclaim(mz, nr_reclaimed, - sc->nr_scanned - nr_scanned, - priority, sc)) + if (should_continue_reclaim(lruvec, nr_reclaimed, + sc->nr_scanned - nr_scanned, sc)) goto restart; throttle_vm_writeout(sc->gfp_mask); } -static void shrink_zone(int priority, struct zone *zone, - struct scan_control *sc) +static void shrink_zone(struct zone *zone, struct scan_control *sc) { struct mem_cgroup *root = sc->target_mem_cgroup; struct mem_cgroup_reclaim_cookie reclaim = { .zone = zone, - .priority = priority, + .priority = sc->priority, }; struct mem_cgroup *memcg; memcg = mem_cgroup_iter(root, NULL, &reclaim); do { - struct mem_cgroup_zone mz = { - .mem_cgroup = memcg, - .zone = zone, - }; + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); + + shrink_lruvec(lruvec, sc); - shrink_mem_cgroup_zone(priority, &mz, sc); /* * Limit reclaim has historically picked one memcg and * scanned it with decreasing priority levels until @@ -2005,8 +1910,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) * the caller that it should consider retrying the allocation instead of * further reclaim. */ -static bool shrink_zones(int priority, struct zonelist *zonelist, - struct scan_control *sc) +static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; @@ -2033,7 +1937,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, if (global_reclaim(sc)) { if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc->priority != DEF_PRIORITY) continue; /* Let kswapd poll it */ if (COMPACTION_BUILD) { /* @@ -2065,7 +1970,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, /* need some check for avoid more shrink_zone() */ } - shrink_zone(priority, zone, sc); + shrink_zone(zone, sc); } return aborted_reclaim; @@ -2116,7 +2021,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc, struct shrink_control *shrink) { - int priority; unsigned long total_scanned = 0; struct reclaim_state *reclaim_state = current->reclaim_state; struct zoneref *z; @@ -2129,9 +2033,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, if (global_reclaim(sc)) count_vm_event(ALLOCSTALL); - for (priority = DEF_PRIORITY; priority >= 0; priority--) { + do { sc->nr_scanned = 0; - aborted_reclaim = shrink_zones(priority, zonelist, sc); + aborted_reclaim = shrink_zones(zonelist, sc); /* * Don't shrink slabs when reclaiming memory from @@ -2173,7 +2077,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, /* Take a nap, wait for some writeback to complete */ if (!sc->hibernation_mode && sc->nr_scanned && - priority < DEF_PRIORITY - 2) { + sc->priority < DEF_PRIORITY - 2) { struct zone *preferred_zone; first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), @@ -2181,7 +2085,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, &preferred_zone); wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); } - } + } while (--sc->priority >= 0); out: delayacct_freepages_end(); @@ -2219,6 +2123,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .may_unmap = 1, .may_swap = 1, .order = order, + .priority = DEF_PRIORITY, .target_mem_cgroup = NULL, .nodemask = nodemask, }; @@ -2251,17 +2156,15 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, .may_unmap = 1, .may_swap = !noswap, .order = 0, + .priority = 0, .target_mem_cgroup = memcg, }; - struct mem_cgroup_zone mz = { - .mem_cgroup = memcg, - .zone = zone, - }; + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); - trace_mm_vmscan_memcg_softlimit_reclaim_begin(0, + trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order, sc.may_writepage, sc.gfp_mask); @@ -2272,7 +2175,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, * will pick up pages from other mem cgroup's as well. We hack * the priority and make it zero. */ - shrink_mem_cgroup_zone(0, &mz, &sc); + shrink_lruvec(lruvec, &sc); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); @@ -2293,6 +2196,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_swap = !noswap, .nr_to_reclaim = SWAP_CLUSTER_MAX, .order = 0, + .priority = DEF_PRIORITY, .target_mem_cgroup = memcg, .nodemask = NULL, /* we don't care the placement */ .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | @@ -2323,8 +2227,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, } #endif -static void age_active_anon(struct zone *zone, struct scan_control *sc, - int priority) +static void age_active_anon(struct zone *zone, struct scan_control *sc) { struct mem_cgroup *memcg; @@ -2333,14 +2236,11 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc, memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - struct mem_cgroup_zone mz = { - .mem_cgroup = memcg, - .zone = zone, - }; + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); - if (inactive_anon_is_low(&mz)) - shrink_active_list(SWAP_CLUSTER_MAX, &mz, - sc, priority, 0); + if (inactive_anon_is_low(lruvec)) + shrink_active_list(SWAP_CLUSTER_MAX, lruvec, + sc, LRU_ACTIVE_ANON); memcg = mem_cgroup_iter(NULL, memcg, NULL); } while (memcg); @@ -2449,7 +2349,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, { int all_zones_ok; unsigned long balanced; - int priority; int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long total_scanned; @@ -2473,11 +2372,12 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, }; loop_again: total_scanned = 0; + sc.priority = DEF_PRIORITY; sc.nr_reclaimed = 0; sc.may_writepage = !laptop_mode; count_vm_event(PAGEOUTRUN); - for (priority = DEF_PRIORITY; priority >= 0; priority--) { + do { unsigned long lru_pages = 0; int has_under_min_watermark_zone = 0; @@ -2494,14 +2394,15 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) continue; /* * Do some background aging of the anon list, to give * pages a chance to be referenced before reclaiming. */ - age_active_anon(zone, &sc, priority); + age_active_anon(zone, &sc); /* * If the number of buffer_heads in the machine @@ -2549,7 +2450,8 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) continue; sc.nr_scanned = 0; @@ -2593,7 +2495,7 @@ loop_again: !zone_watermark_ok_safe(zone, testorder, high_wmark_pages(zone) + balance_gap, end_zone, 0)) { - shrink_zone(priority, zone, &sc); + shrink_zone(zone, &sc); reclaim_state->reclaimed_slab = 0; nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); @@ -2650,7 +2552,7 @@ loop_again: * OK, kswapd is getting into trouble. Take a nap, then take * another pass across the zones. */ - if (total_scanned && (priority < DEF_PRIORITY - 2)) { + if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) { if (has_under_min_watermark_zone) count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); else @@ -2665,7 +2567,7 @@ loop_again: */ if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) break; - } + } while (--sc.priority >= 0); out: /* @@ -2715,7 +2617,8 @@ out: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) continue; /* Would compaction fail due to lack of free memory? */ @@ -2786,7 +2689,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx) * them before going back to sleep. */ set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); - schedule(); + + if (!kthread_should_stop()) + schedule(); + set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold); } else { if (remaining) @@ -2982,6 +2888,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) .nr_to_reclaim = nr_to_reclaim, .hibernation_mode = 1, .order = 0, + .priority = DEF_PRIORITY, }; struct shrink_control shrink = { .gfp_mask = sc.gfp_mask, @@ -3052,14 +2959,17 @@ int kswapd_run(int nid) } /* - * Called by memory hotplug when all memory in a node is offlined. + * Called by memory hotplug when all memory in a node is offlined. Caller must + * hold lock_memory_hotplug(). */ void kswapd_stop(int nid) { struct task_struct *kswapd = NODE_DATA(nid)->kswapd; - if (kswapd) + if (kswapd) { kthread_stop(kswapd); + NODE_DATA(nid)->kswapd = NULL; + } } static int __init kswapd_init(void) @@ -3159,7 +3069,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) const unsigned long nr_pages = 1 << order; struct task_struct *p = current; struct reclaim_state reclaim_state; - int priority; struct scan_control sc = { .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), @@ -3168,6 +3077,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) SWAP_CLUSTER_MAX), .gfp_mask = gfp_mask, .order = order, + .priority = ZONE_RECLAIM_PRIORITY, }; struct shrink_control shrink = { .gfp_mask = sc.gfp_mask, @@ -3190,11 +3100,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * Free memory by calling shrink zone with increasing * priorities until we have enough memory freed. */ - priority = ZONE_RECLAIM_PRIORITY; do { - shrink_zone(priority, zone, &sc); - priority--; - } while (priority >= 0 && sc.nr_reclaimed < nr_pages); + shrink_zone(zone, &sc); + } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); } nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); @@ -3345,6 +3253,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) zone = pagezone; spin_lock_irq(&zone->lru_lock); } + lruvec = mem_cgroup_page_lruvec(page, zone); if (!PageLRU(page) || !PageUnevictable(page)) continue; @@ -3354,11 +3263,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) VM_BUG_ON(PageActive(page)); ClearPageUnevictable(page); - __dec_zone_state(zone, NR_UNEVICTABLE); - lruvec = mem_cgroup_lru_move_lists(zone, page, - LRU_UNEVICTABLE, lru); - list_move(&page->lru, &lruvec->lists[lru]); - __inc_zone_state(zone, NR_INACTIVE_ANON + lru); + del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE); + add_page_to_lru_list(page, lruvec, lru); pgrescued++; } } |