aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/mmzone.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mmzone.h')
-rw-r--r--include/linux/mmzone.h140
1 files changed, 88 insertions, 52 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0d53eba1c383..6a1d79d84675 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -20,6 +20,7 @@
#include <linux/atomic.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
+#include <linux/local_lock.h>
#include <asm/page.h>
/* Free memory management - zoned buddy allocator. */
@@ -113,7 +114,7 @@ static inline bool free_area_empty(struct free_area *area, int migratetype)
struct pglist_data;
/*
- * Add a wild amount of padding here to ensure datas fall into separate
+ * Add a wild amount of padding here to ensure data fall into separate
* cachelines. There are very few zone structures in the machine, so space
* consumption is not a concern here.
*/
@@ -134,10 +135,10 @@ enum numa_stat_item {
NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */
NUMA_LOCAL, /* allocation from local node */
NUMA_OTHER, /* allocation from other node */
- NR_VM_NUMA_STAT_ITEMS
+ NR_VM_NUMA_EVENT_ITEMS
};
#else
-#define NR_VM_NUMA_STAT_ITEMS 0
+#define NR_VM_NUMA_EVENT_ITEMS 0
#endif
enum zone_stat_item {
@@ -332,29 +333,55 @@ enum zone_watermarks {
NR_WMARK
};
+/*
+ * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER plus one additional
+ * for pageblock size for THP if configured.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define NR_PCP_THP 1
+#else
+#define NR_PCP_THP 0
+#endif
+#define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP))
+
+/*
+ * Shift to encode migratetype and order in the same integer, with order
+ * in the least significant bits.
+ */
+#define NR_PCP_ORDER_WIDTH 8
+#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_WIDTH) - 1)
+
#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)
+/* Fields and list protected by pagesets local_lock in page_alloc.c */
struct per_cpu_pages {
int count; /* number of pages in the list */
int high; /* high watermark, emptying needed */
int batch; /* chunk size for buddy add/remove */
+ short free_factor; /* batch scaling factor during free */
+#ifdef CONFIG_NUMA
+ short expire; /* When 0, remote pagesets are drained */
+#endif
/* Lists of pages, one per migrate type stored on the pcp-lists */
- struct list_head lists[MIGRATE_PCPTYPES];
+ struct list_head lists[NR_PCP_LISTS];
};
-struct per_cpu_pageset {
- struct per_cpu_pages pcp;
-#ifdef CONFIG_NUMA
- s8 expire;
- u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS];
-#endif
+struct per_cpu_zonestat {
#ifdef CONFIG_SMP
- s8 stat_threshold;
s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
+ s8 stat_threshold;
+#endif
+#ifdef CONFIG_NUMA
+ /*
+ * Low priority inaccurate counters that are only folded
+ * on demand. Use a large type to avoid the overhead of
+ * folding during refresh_cpu_vm_stats.
+ */
+ unsigned long vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
#endif
};
@@ -484,7 +511,8 @@ struct zone {
int node;
#endif
struct pglist_data *zone_pgdat;
- struct per_cpu_pageset __percpu *pageset;
+ struct per_cpu_pages __percpu *per_cpu_pageset;
+ struct per_cpu_zonestat __percpu *per_cpu_zonestats;
/*
* the high and batch values are copied to individual pagesets for
* faster access
@@ -512,6 +540,10 @@ struct zone {
* is calculated as:
* present_pages = spanned_pages - absent_pages(pages in holes);
*
+ * present_early_pages is present pages existing within the zone
+ * located on memory available since early boot, excluding hotplugged
+ * memory.
+ *
* managed_pages is present pages managed by the buddy system, which
* is calculated as (reserved_pages includes pages allocated by the
* bootmem allocator):
@@ -544,6 +576,9 @@ struct zone {
atomic_long_t managed_pages;
unsigned long spanned_pages;
unsigned long present_pages;
+#if defined(CONFIG_MEMORY_HOTPLUG)
+ unsigned long present_early_pages;
+#endif
#ifdef CONFIG_CMA
unsigned long cma_pages;
#endif
@@ -619,7 +654,7 @@ struct zone {
ZONE_PADDING(_pad3_)
/* Zone statistics */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
- atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
+ atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
} ____cacheline_internodealigned_in_smp;
enum pgdat_flags {
@@ -637,6 +672,7 @@ enum zone_flags {
ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks.
* Cleared when kswapd is woken.
*/
+ ZONE_RECLAIM_ACTIVE, /* kswapd may be scanning the zone. */
};
static inline unsigned long zone_managed_pages(struct zone *zone)
@@ -738,10 +774,12 @@ struct zonelist {
struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
};
-#ifndef CONFIG_DISCONTIGMEM
-/* The array of struct pages - for discontigmem use pgdat->lmem_map */
+/*
+ * The array of struct pages for flatmem.
+ * It must be declared for SPARSEMEM as well because there are configurations
+ * that rely on that.
+ */
extern struct page *mem_map;
-#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct deferred_split {
@@ -775,7 +813,7 @@ typedef struct pglist_data {
struct zonelist node_zonelists[MAX_ZONELISTS];
int nr_zones; /* number of populated zones in this node */
-#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
+#ifdef CONFIG_FLATMEM /* means !SPARSEMEM */
struct page *node_mem_map;
#ifdef CONFIG_PAGE_EXTENSION
struct page_ext *node_page_ext;
@@ -815,6 +853,7 @@ typedef struct pglist_data {
enum zone_type kcompactd_highest_zoneidx;
wait_queue_head_t kcompactd_wait;
struct task_struct *kcompactd;
+ bool proactive_compact_trigger;
#endif
/*
* This is a per-node reserve of pages that are not available
@@ -865,7 +904,7 @@ typedef struct pglist_data {
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
+#ifdef CONFIG_FLATMEM
#define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr))
#else
#define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr))
@@ -982,22 +1021,11 @@ static inline void zone_set_nid(struct zone *zone, int nid) {}
extern int movable_zone;
-#ifdef CONFIG_HIGHMEM
-static inline int zone_movable_is_highmem(void)
-{
-#ifdef CONFIG_NEED_MULTIPLE_NODES
- return movable_zone == ZONE_HIGHMEM;
-#else
- return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
-#endif
-}
-#endif
-
static inline int is_highmem_idx(enum zone_type idx)
{
#ifdef CONFIG_HIGHMEM
return (idx == ZONE_HIGHMEM ||
- (idx == ZONE_MOVABLE && zone_movable_is_highmem()));
+ (idx == ZONE_MOVABLE && movable_zone == ZONE_HIGHMEM));
#else
return 0;
#endif
@@ -1029,7 +1057,7 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *,
extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *,
size_t *, loff_t *);
-int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
+int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *, int,
void *, size_t *, loff_t *);
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
void *, size_t *, loff_t *);
@@ -1037,21 +1065,24 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
void *, size_t *, loff_t *);
int numa_zonelist_order_handler(struct ctl_table *, int,
void *, size_t *, loff_t *);
-extern int percpu_pagelist_fraction;
+extern int percpu_pagelist_high_fraction;
extern char numa_zonelist_order[];
#define NUMA_ZONELIST_ORDER_LEN 16
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
extern struct pglist_data contig_page_data;
-#define NODE_DATA(nid) (&contig_page_data)
+static inline struct pglist_data *NODE_DATA(int nid)
+{
+ return &contig_page_data;
+}
#define NODE_MEM_MAP(nid) mem_map
-#else /* CONFIG_NEED_MULTIPLE_NODES */
+#else /* CONFIG_NUMA */
#include <asm/mmzone.h>
-#endif /* !CONFIG_NEED_MULTIPLE_NODES */
+#endif /* !CONFIG_NUMA */
extern struct pglist_data *first_online_pgdat(void);
extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat);
@@ -1200,8 +1231,6 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
#ifdef CONFIG_SPARSEMEM
/*
- * SECTION_SHIFT #bits space required to store a section #
- *
* PA_SECTION_SHIFT physical address to/from section number
* PFN_SECTION_SHIFT pfn to/from section number
*/
@@ -1321,7 +1350,6 @@ static inline struct mem_section *__nr_to_section(unsigned long nr)
return NULL;
return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
}
-extern unsigned long __section_nr(struct mem_section *ms);
extern size_t mem_section_usage_size(void);
/*
@@ -1344,7 +1372,7 @@ extern size_t mem_section_usage_size(void);
#define SECTION_TAINT_ZONE_DEVICE (1UL<<4)
#define SECTION_MAP_LAST_BIT (1UL<<5)
#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
-#define SECTION_NID_SHIFT 3
+#define SECTION_NID_SHIFT 6
static inline struct page *__section_mem_map_addr(struct mem_section *section)
{
@@ -1427,10 +1455,30 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
#endif
#ifndef CONFIG_HAVE_ARCH_PFN_VALID
+/**
+ * pfn_valid - check if there is a valid memory map entry for a PFN
+ * @pfn: the page frame number to check
+ *
+ * Check if there is a valid memory map entry aka struct page for the @pfn.
+ * Note, that availability of the memory map entry does not imply that
+ * there is actual usable memory at that @pfn. The struct page may
+ * represent a hole or an unusable page frame.
+ *
+ * Return: 1 for PFNs that have memory map entries and 0 otherwise
+ */
static inline int pfn_valid(unsigned long pfn)
{
struct mem_section *ms;
+ /*
+ * Ensure the upper PAGE_SHIFT bits are clear in the
+ * pfn. Else it might lead to false positives when
+ * some of the upper bits are set, but the lower bits
+ * match a valid pfn.
+ */
+ if (PHYS_PFN(PFN_PHYS(pfn)) != pfn)
+ return 0;
+
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
ms = __nr_to_section(pfn_to_section_nr(pfn));
@@ -1484,18 +1532,6 @@ void sparse_init(void);
#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
#endif /* CONFIG_SPARSEMEM */
-/*
- * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
- * need to check pfn validity within that MAX_ORDER_NR_PAGES block.
- * pfn_valid_within() should be used in this case; we optimise this away
- * when we have no holes within a MAX_ORDER_NR_PAGES block.
- */
-#ifdef CONFIG_HOLES_IN_ZONE
-#define pfn_valid_within(pfn) pfn_valid(pfn)
-#else
-#define pfn_valid_within(pfn) (1)
-#endif
-
#endif /* !__GENERATING_BOUNDS.H */
#endif /* !__ASSEMBLY__ */
#endif /* _LINUX_MMZONE_H */