summaryrefslogtreecommitdiff
path: root/include/linux/mmzone.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mmzone.h')
-rw-r--r--include/linux/mmzone.h245
1 files changed, 152 insertions, 93 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c60df9257cc7..7f2ae99e5daf 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -68,8 +68,10 @@ extern char * const migratetype_names[MIGRATE_TYPES];
#ifdef CONFIG_CMA
# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
+# define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
#else
# define is_migrate_cma(migratetype) false
+# define is_migrate_cma_page(_page) false
#endif
#define for_each_migratetype_order(order, type) \
@@ -85,13 +87,6 @@ extern int page_group_by_mobility_disabled;
get_pfnblock_flags_mask(page, page_to_pfn(page), \
PB_migrate_end, MIGRATETYPE_MASK)
-static inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn)
-{
- BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2);
- return get_pfnblock_flags_mask(page, pfn, PB_migrate_end,
- MIGRATETYPE_MASK);
-}
-
struct free_area {
struct list_head free_list[MIGRATE_TYPES];
unsigned long nr_free;
@@ -100,7 +95,7 @@ struct free_area {
struct pglist_data;
/*
- * zone->lock and zone->lru_lock are two of the hottest locks in the kernel.
+ * zone->lock and the zone lru_lock are two of the hottest locks in the kernel.
* So add a wild amount of padding here to ensure that they fall into separate
* cachelines. There are very few zone structures in the machine, so space
* consumption is not a concern here.
@@ -117,36 +112,23 @@ struct zone_padding {
enum zone_stat_item {
/* First 128 byte cacheline (assuming 64 bit words) */
NR_FREE_PAGES,
- NR_ALLOC_BATCH,
- NR_LRU_BASE,
- NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
- NR_ACTIVE_ANON, /* " " " " " */
- NR_INACTIVE_FILE, /* " " " " " */
- NR_ACTIVE_FILE, /* " " " " " */
- NR_UNEVICTABLE, /* " " " " " */
+ NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
+ NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE,
+ NR_ZONE_ACTIVE_ANON,
+ NR_ZONE_INACTIVE_FILE,
+ NR_ZONE_ACTIVE_FILE,
+ NR_ZONE_UNEVICTABLE,
+ NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
- NR_ANON_PAGES, /* Mapped anonymous pages */
- NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
- only modified from process context */
- NR_FILE_PAGES,
- NR_FILE_DIRTY,
- NR_WRITEBACK,
NR_SLAB_RECLAIMABLE,
NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */
- NR_KERNEL_STACK,
+ NR_KERNEL_STACK_KB, /* measured in KiB */
/* Second 128 byte cacheline */
- NR_UNSTABLE_NFS, /* NFS unstable pages */
NR_BOUNCE,
- NR_VMSCAN_WRITE,
- NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
- NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
- NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
- NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
- NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
- NR_DIRTIED, /* page dirtyings since bootup */
- NR_WRITTEN, /* page writings since bootup */
- NR_PAGES_SCANNED, /* pages scanned since last reclaim */
+#if IS_ENABLED(CONFIG_ZSMALLOC)
+ NR_ZSPAGES, /* allocated in zsmalloc */
+#endif
#ifdef CONFIG_NUMA
NUMA_HIT, /* allocated in intended node */
NUMA_MISS, /* allocated in non intended node */
@@ -155,12 +137,40 @@ enum zone_stat_item {
NUMA_LOCAL, /* allocation from local node */
NUMA_OTHER, /* allocation from other node */
#endif
+ NR_FREE_CMA_PAGES,
+ NR_VM_ZONE_STAT_ITEMS };
+
+enum node_stat_item {
+ NR_LRU_BASE,
+ NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
+ NR_ACTIVE_ANON, /* " " " " " */
+ NR_INACTIVE_FILE, /* " " " " " */
+ NR_ACTIVE_FILE, /* " " " " " */
+ NR_UNEVICTABLE, /* " " " " " */
+ NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
+ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
+ NR_PAGES_SCANNED, /* pages scanned since last reclaim */
WORKINGSET_REFAULT,
WORKINGSET_ACTIVATE,
WORKINGSET_NODERECLAIM,
- NR_ANON_TRANSPARENT_HUGEPAGES,
- NR_FREE_CMA_PAGES,
- NR_VM_ZONE_STAT_ITEMS };
+ NR_ANON_MAPPED, /* Mapped anonymous pages */
+ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
+ only modified from process context */
+ NR_FILE_PAGES,
+ NR_FILE_DIRTY,
+ NR_WRITEBACK,
+ NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
+ NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
+ NR_SHMEM_THPS,
+ NR_SHMEM_PMDMAPPED,
+ NR_ANON_THPS,
+ NR_UNSTABLE_NFS, /* NFS unstable pages */
+ NR_VMSCAN_WRITE,
+ NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
+ NR_DIRTIED, /* page dirtyings since bootup */
+ NR_WRITTEN, /* page writings since bootup */
+ NR_VM_NODE_STAT_ITEMS
+};
/*
* We do arithmetic on the LRU lists in various places in the code,
@@ -217,7 +227,7 @@ struct lruvec {
/* Evictions & activations on the inactive file list */
atomic_long_t inactive_age;
#ifdef CONFIG_MEMCG
- struct zone *zone;
+ struct pglist_data *pgdat;
#endif
};
@@ -269,6 +279,11 @@ struct per_cpu_pageset {
#endif
};
+struct per_cpu_nodestat {
+ s8 stat_threshold;
+ s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS];
+};
+
#endif /* !__GENERATING_BOUNDS.H */
enum zone_type {
@@ -350,22 +365,9 @@ struct zone {
#ifdef CONFIG_NUMA
int node;
#endif
-
- /*
- * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
- * this zone's LRU. Maintained by the pageout code.
- */
- unsigned int inactive_ratio;
-
struct pglist_data *zone_pgdat;
struct per_cpu_pageset __percpu *pageset;
- /*
- * This is a per-zone reserve of pages that are not available
- * to userspace allocations.
- */
- unsigned long totalreserve_pages;
-
#ifndef CONFIG_SPARSEMEM
/*
* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
@@ -374,14 +376,6 @@ struct zone {
unsigned long *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
-#ifdef CONFIG_NUMA
- /*
- * zone reclaim becomes active if more unmapped pages exist.
- */
- unsigned long min_unmapped_pages;
- unsigned long min_slab_pages;
-#endif /* CONFIG_NUMA */
-
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn;
@@ -474,24 +468,21 @@ struct zone {
unsigned long wait_table_hash_nr_entries;
unsigned long wait_table_bits;
+ /* Write-intensive fields used from the page allocator */
ZONE_PADDING(_pad1_)
+
/* free areas of different sizes */
struct free_area free_area[MAX_ORDER];
/* zone flags, see below */
unsigned long flags;
- /* Write-intensive fields used from the page allocator */
+ /* Primarily protects free_area */
spinlock_t lock;
+ /* Write-intensive fields used by compaction and vmstats. */
ZONE_PADDING(_pad2_)
- /* Write-intensive fields used by page reclaim */
-
- /* Fields commonly accessed by the page reclaim scanner */
- spinlock_t lru_lock;
- struct lruvec lruvec;
-
/*
* When free pages are below this point, additional steps are taken
* when reading the number of free pages to avoid per-cpu counter
@@ -529,20 +520,18 @@ struct zone {
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
} ____cacheline_internodealigned_in_smp;
-enum zone_flags {
- ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */
- ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */
- ZONE_CONGESTED, /* zone has many dirty pages backed by
+enum pgdat_flags {
+ PGDAT_CONGESTED, /* pgdat has many dirty pages backed by
* a congested BDI
*/
- ZONE_DIRTY, /* reclaim scanning has recently found
+ PGDAT_DIRTY, /* reclaim scanning has recently found
* many dirty file pages at the tail
* of the LRU.
*/
- ZONE_WRITEBACK, /* reclaim scanning has recently found
+ PGDAT_WRITEBACK, /* reclaim scanning has recently found
* many pages under writeback
*/
- ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
+ PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
};
static inline unsigned long zone_end_pfn(const struct zone *zone)
@@ -666,8 +655,9 @@ typedef struct pglist_data {
wait_queue_head_t pfmemalloc_wait;
struct task_struct *kswapd; /* Protected by
mem_hotplug_begin/end() */
- int kswapd_max_order;
- enum zone_type classzone_idx;
+ int kswapd_order;
+ enum zone_type kswapd_classzone_idx;
+
#ifdef CONFIG_COMPACTION
int kcompactd_max_order;
enum zone_type kcompactd_classzone_idx;
@@ -684,6 +674,23 @@ typedef struct pglist_data {
/* Number of pages migrated during the rate limiting time interval */
unsigned long numabalancing_migrate_nr_pages;
#endif
+ /*
+ * This is a per-node reserve of pages that are not available
+ * to userspace allocations.
+ */
+ unsigned long totalreserve_pages;
+
+#ifdef CONFIG_NUMA
+ /*
+ * zone reclaim becomes active if more unmapped pages exist.
+ */
+ unsigned long min_unmapped_pages;
+ unsigned long min_slab_pages;
+#endif /* CONFIG_NUMA */
+
+ /* Write-intensive fields used by page reclaim */
+ ZONE_PADDING(_pad1_)
+ spinlock_t lru_lock;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/*
@@ -698,6 +705,23 @@ typedef struct pglist_data {
struct list_head split_queue;
unsigned long split_queue_len;
#endif
+
+ /* Fields commonly accessed by the page reclaim scanner */
+ struct lruvec lruvec;
+
+ /*
+ * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+ * this node's LRU. Maintained by the pageout code.
+ */
+ unsigned int inactive_ratio;
+
+ unsigned long flags;
+
+ ZONE_PADDING(_pad2_)
+
+ /* Per-node vmstats */
+ struct per_cpu_nodestat __percpu *per_cpu_nodestats;
+ atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS];
} pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
@@ -711,6 +735,15 @@ typedef struct pglist_data {
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
+static inline spinlock_t *zone_lru_lock(struct zone *zone)
+{
+ return &zone->zone_pgdat->lru_lock;
+}
+
+static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
+{
+ return &pgdat->lruvec;
+}
static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
{
@@ -746,8 +779,12 @@ static inline bool is_dev_zone(const struct zone *zone)
extern struct mutex zonelists_mutex;
void build_all_zonelists(pg_data_t *pgdat, struct zone *zone);
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
+bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+ int classzone_idx, unsigned int alloc_flags,
+ long free_pages);
bool zone_watermark_ok(struct zone *z, unsigned int order,
- unsigned long mark, int classzone_idx, int alloc_flags);
+ unsigned long mark, int classzone_idx,
+ unsigned int alloc_flags);
bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
unsigned long mark, int classzone_idx);
enum memmap_context {
@@ -759,12 +796,12 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
extern void lruvec_init(struct lruvec *lruvec);
-static inline struct zone *lruvec_zone(struct lruvec *lruvec)
+static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
{
#ifdef CONFIG_MEMCG
- return lruvec->zone;
+ return lruvec->pgdat;
#else
- return container_of(lruvec, struct zone, lruvec);
+ return container_of(lruvec, struct pglist_data, lruvec);
#endif
}
@@ -791,9 +828,21 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
*/
#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
-static inline int populated_zone(struct zone *zone)
+/*
+ * Returns true if a zone has pages managed by the buddy allocator.
+ * All the reclaim decisions have to use this function rather than
+ * populated_zone(). If the whole zone is reserved then we can easily
+ * end up with populated_zone() && !managed_zone().
+ */
+static inline bool managed_zone(struct zone *zone)
+{
+ return zone->managed_pages;
+}
+
+/* Returns true if a zone has memory */
+static inline bool populated_zone(struct zone *zone)
{
- return (!!zone->present_pages);
+ return zone->present_pages;
}
extern int movable_zone;
@@ -828,10 +877,7 @@ static inline int is_highmem_idx(enum zone_type idx)
static inline int is_highmem(struct zone *zone)
{
#ifdef CONFIG_HIGHMEM
- int zone_off = (char *)zone - (char *)zone->zone_pgdat->node_zones;
- return zone_off == ZONE_HIGHMEM * sizeof(*zone) ||
- (zone_off == ZONE_MOVABLE * sizeof(*zone) &&
- zone_movable_is_highmem());
+ return is_highmem_idx(zone_idx(zone));
#else
return 0;
#endif
@@ -922,6 +968,10 @@ static inline int zonelist_node_idx(struct zoneref *zoneref)
#endif /* CONFIG_NUMA */
}
+struct zoneref *__next_zones_zonelist(struct zoneref *z,
+ enum zone_type highest_zoneidx,
+ nodemask_t *nodes);
+
/**
* next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point
* @z - The cursor used as a starting point for the search
@@ -934,9 +984,14 @@ static inline int zonelist_node_idx(struct zoneref *zoneref)
* being examined. It should be advanced by one before calling
* next_zones_zonelist again.
*/
-struct zoneref *next_zones_zonelist(struct zoneref *z,
+static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z,
enum zone_type highest_zoneidx,
- nodemask_t *nodes);
+ nodemask_t *nodes)
+{
+ if (likely(!nodes && zonelist_zone_idx(z) <= highest_zoneidx))
+ return z;
+ return __next_zones_zonelist(z, highest_zoneidx, nodes);
+}
/**
* first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist
@@ -952,13 +1007,10 @@ struct zoneref *next_zones_zonelist(struct zoneref *z,
*/
static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
enum zone_type highest_zoneidx,
- nodemask_t *nodes,
- struct zone **zone)
+ nodemask_t *nodes)
{
- struct zoneref *z = next_zones_zonelist(zonelist->_zonerefs,
+ return next_zones_zonelist(zonelist->_zonerefs,
highest_zoneidx, nodes);
- *zone = zonelist_zone(z);
- return z;
}
/**
@@ -973,10 +1025,17 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
* within a given nodemask
*/
#define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
- for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \
+ for (z = first_zones_zonelist(zlist, highidx, nodemask), zone = zonelist_zone(z); \
zone; \
z = next_zones_zonelist(++z, highidx, nodemask), \
- zone = zonelist_zone(z)) \
+ zone = zonelist_zone(z))
+
+#define for_next_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
+ for (zone = z->zone; \
+ zone; \
+ z = next_zones_zonelist(++z, highidx, nodemask), \
+ zone = zonelist_zone(z))
+
/**
* for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
@@ -1056,7 +1115,7 @@ struct mem_section {
unsigned long *pageblock_flags;
#ifdef CONFIG_PAGE_EXTENSION
/*
- * If !SPARSEMEM, pgdat doesn't have page_ext pointer. We use
+ * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use
* section. (see page_ext.h about this.)
*/
struct page_ext *page_ext;