aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/balloon_compaction.h272
-rw-r--r--include/linux/bootmem.h4
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/huge_mm.h4
-rw-r--r--include/linux/hugetlb.h7
-rw-r--r--include/linux/kernel.h14
-rw-r--r--include/linux/memory.h1
-rw-r--r--include/linux/memory_hotplug.h13
-rw-r--r--include/linux/migrate.h19
-rw-r--r--include/linux/mm.h31
-rw-r--r--include/linux/mm_types.h19
-rw-r--r--include/linux/mmzone.h9
-rw-r--r--include/linux/node.h3
-rw-r--r--include/linux/oom.h21
-rw-r--r--include/linux/page-isolation.h10
-rw-r--r--include/linux/pagemap.h16
-rw-r--r--include/linux/sched.h7
-rw-r--r--include/linux/shm.h15
-rw-r--r--include/linux/types.h1
-rw-r--r--include/linux/writeback.h9
21 files changed, 425 insertions, 54 deletions
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
new file mode 100644
index 000000000000..f7f1d7169b11
--- /dev/null
+++ b/include/linux/balloon_compaction.h
@@ -0,0 +1,272 @@
+/*
+ * include/linux/balloon_compaction.h
+ *
+ * Common interface definitions for making balloon pages movable by compaction.
+ *
+ * Despite being perfectly possible to perform ballooned pages migration, they
+ * make a special corner case to compaction scans because balloon pages are not
+ * enlisted at any LRU list like the other pages we do compact / migrate.
+ *
+ * As the page isolation scanning step a compaction thread does is a lockless
+ * procedure (from a page standpoint), it might bring some racy situations while
+ * performing balloon page compaction. In order to sort out these racy scenarios
+ * and safely perform balloon's page compaction and migration we must, always,
+ * ensure following these three simple rules:
+ *
+ * i. when updating a balloon's page ->mapping element, strictly do it under
+ * the following lock order, independently of the far superior
+ * locking scheme (lru_lock, balloon_lock):
+ * +-page_lock(page);
+ * +--spin_lock_irq(&b_dev_info->pages_lock);
+ * ... page->mapping updates here ...
+ *
+ * ii. before isolating or dequeueing a balloon page from the balloon device
+ * pages list, the page reference counter must be raised by one and the
+ * extra refcount must be dropped when the page is enqueued back into
+ * the balloon device page list, thus a balloon page keeps its reference
+ * counter raised only while it is under our special handling;
+ *
+ * iii. after the lockless scan step have selected a potential balloon page for
+ * isolation, re-test the page->mapping flags and the page ref counter
+ * under the proper page lock, to ensure isolating a valid balloon page
+ * (not yet isolated, nor under release procedure)
+ *
+ * The functions provided by this interface are placed to help on coping with
+ * the aforementioned balloon page corner case, as well as to ensure the simple
+ * set of exposed rules are satisfied while we are dealing with balloon pages
+ * compaction / migration.
+ *
+ * Copyright (C) 2012, Red Hat, Inc. Rafael Aquini <aquini@redhat.com>
+ */
+#ifndef _LINUX_BALLOON_COMPACTION_H
+#define _LINUX_BALLOON_COMPACTION_H
+#include <linux/pagemap.h>
+#include <linux/page-flags.h>
+#include <linux/migrate.h>
+#include <linux/gfp.h>
+#include <linux/err.h>
+
+/*
+ * Balloon device information descriptor.
+ * This struct is used to allow the common balloon compaction interface
+ * procedures to find the proper balloon device holding memory pages they'll
+ * have to cope for page compaction / migration, as well as it serves the
+ * balloon driver as a page book-keeper for its registered balloon devices.
+ */
+struct balloon_dev_info {
+ void *balloon_device; /* balloon device descriptor */
+ struct address_space *mapping; /* balloon special page->mapping */
+ unsigned long isolated_pages; /* # of isolated pages for migration */
+ spinlock_t pages_lock; /* Protection to pages list */
+ struct list_head pages; /* Pages enqueued & handled to Host */
+};
+
+extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info);
+extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info);
+extern struct balloon_dev_info *balloon_devinfo_alloc(
+ void *balloon_dev_descriptor);
+
+static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info)
+{
+ kfree(b_dev_info);
+}
+
+/*
+ * balloon_page_free - release a balloon page back to the page free lists
+ * @page: ballooned page to be set free
+ *
+ * This function must be used to properly set free an isolated/dequeued balloon
+ * page at the end of a sucessful page migration, or at the balloon driver's
+ * page release procedure.
+ */
+static inline void balloon_page_free(struct page *page)
+{
+ /*
+ * Balloon pages always get an extra refcount before being isolated
+ * and before being dequeued to help on sorting out fortuite colisions
+ * between a thread attempting to isolate and another thread attempting
+ * to release the very same balloon page.
+ *
+ * Before we handle the page back to Buddy, lets drop its extra refcnt.
+ */
+ put_page(page);
+ __free_page(page);
+}
+
+#ifdef CONFIG_BALLOON_COMPACTION
+extern bool balloon_page_isolate(struct page *page);
+extern void balloon_page_putback(struct page *page);
+extern int balloon_page_migrate(struct page *newpage,
+ struct page *page, enum migrate_mode mode);
+extern struct address_space
+*balloon_mapping_alloc(struct balloon_dev_info *b_dev_info,
+ const struct address_space_operations *a_ops);
+
+static inline void balloon_mapping_free(struct address_space *balloon_mapping)
+{
+ kfree(balloon_mapping);
+}
+
+/*
+ * page_flags_cleared - helper to perform balloon @page ->flags tests.
+ *
+ * As balloon pages are obtained from buddy and we do not play with page->flags
+ * at driver level (exception made when we get the page lock for compaction),
+ * we can safely identify a ballooned page by checking if the
+ * PAGE_FLAGS_CHECK_AT_PREP page->flags are all cleared. This approach also
+ * helps us skip ballooned pages that are locked for compaction or release, thus
+ * mitigating their racy check at balloon_page_movable()
+ */
+static inline bool page_flags_cleared(struct page *page)
+{
+ return !(page->flags & PAGE_FLAGS_CHECK_AT_PREP);
+}
+
+/*
+ * __is_movable_balloon_page - helper to perform @page mapping->flags tests
+ */
+static inline bool __is_movable_balloon_page(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ return mapping_balloon(mapping);
+}
+
+/*
+ * balloon_page_movable - test page->mapping->flags to identify balloon pages
+ * that can be moved by compaction/migration.
+ *
+ * This function is used at core compaction's page isolation scheme, therefore
+ * most pages exposed to it are not enlisted as balloon pages and so, to avoid
+ * undesired side effects like racing against __free_pages(), we cannot afford
+ * holding the page locked while testing page->mapping->flags here.
+ *
+ * As we might return false positives in the case of a balloon page being just
+ * released under us, the page->mapping->flags need to be re-tested later,
+ * under the proper page lock, at the functions that will be coping with the
+ * balloon page case.
+ */
+static inline bool balloon_page_movable(struct page *page)
+{
+ /*
+ * Before dereferencing and testing mapping->flags, let's make sure
+ * this is not a page that uses ->mapping in a different way
+ */
+ if (page_flags_cleared(page) && !page_mapped(page) &&
+ page_count(page) == 1)
+ return __is_movable_balloon_page(page);
+
+ return false;
+}
+
+/*
+ * balloon_page_insert - insert a page into the balloon's page list and make
+ * the page->mapping assignment accordingly.
+ * @page : page to be assigned as a 'balloon page'
+ * @mapping : allocated special 'balloon_mapping'
+ * @head : balloon's device page list head
+ *
+ * Caller must ensure the page is locked and the spin_lock protecting balloon
+ * pages list is held before inserting a page into the balloon device.
+ */
+static inline void balloon_page_insert(struct page *page,
+ struct address_space *mapping,
+ struct list_head *head)
+{
+ page->mapping = mapping;
+ list_add(&page->lru, head);
+}
+
+/*
+ * balloon_page_delete - delete a page from balloon's page list and clear
+ * the page->mapping assignement accordingly.
+ * @page : page to be released from balloon's page list
+ *
+ * Caller must ensure the page is locked and the spin_lock protecting balloon
+ * pages list is held before deleting a page from the balloon device.
+ */
+static inline void balloon_page_delete(struct page *page)
+{
+ page->mapping = NULL;
+ list_del(&page->lru);
+}
+
+/*
+ * balloon_page_device - get the b_dev_info descriptor for the balloon device
+ * that enqueues the given page.
+ */
+static inline struct balloon_dev_info *balloon_page_device(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ if (likely(mapping))
+ return mapping->private_data;
+
+ return NULL;
+}
+
+static inline gfp_t balloon_mapping_gfp_mask(void)
+{
+ return GFP_HIGHUSER_MOVABLE;
+}
+
+static inline bool balloon_compaction_check(void)
+{
+ return true;
+}
+
+#else /* !CONFIG_BALLOON_COMPACTION */
+
+static inline void *balloon_mapping_alloc(void *balloon_device,
+ const struct address_space_operations *a_ops)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void balloon_mapping_free(struct address_space *balloon_mapping)
+{
+ return;
+}
+
+static inline void balloon_page_insert(struct page *page,
+ struct address_space *mapping,
+ struct list_head *head)
+{
+ list_add(&page->lru, head);
+}
+
+static inline void balloon_page_delete(struct page *page)
+{
+ list_del(&page->lru);
+}
+
+static inline bool balloon_page_movable(struct page *page)
+{
+ return false;
+}
+
+static inline bool balloon_page_isolate(struct page *page)
+{
+ return false;
+}
+
+static inline void balloon_page_putback(struct page *page)
+{
+ return;
+}
+
+static inline int balloon_page_migrate(struct page *newpage,
+ struct page *page, enum migrate_mode mode)
+{
+ return 0;
+}
+
+static inline gfp_t balloon_mapping_gfp_mask(void)
+{
+ return GFP_HIGHUSER;
+}
+
+static inline bool balloon_compaction_check(void)
+{
+ return false;
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
+#endif /* _LINUX_BALLOON_COMPACTION_H */
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 6d6795d46a75..7b74452c5317 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -51,8 +51,8 @@ extern unsigned long free_all_bootmem(void);
extern void free_bootmem_node(pg_data_t *pgdat,
unsigned long addr,
unsigned long size);
-extern void free_bootmem(unsigned long addr, unsigned long size);
-extern void free_bootmem_late(unsigned long addr, unsigned long size);
+extern void free_bootmem(unsigned long physaddr, unsigned long size);
+extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
/*
* Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 75fe9a134803..408fb1e77a0a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -418,7 +418,7 @@ struct address_space {
struct backing_dev_info *backing_dev_info; /* device readahead, etc */
spinlock_t private_lock; /* for use by the address_space */
struct list_head private_list; /* ditto */
- struct address_space *assoc_mapping; /* ditto */
+ void *private_data; /* ditto */
} __attribute__((aligned(sizeof(long))));
/*
* On most architectures that alignment is already the case; but
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index d0a79678f169..31e8041274f6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -266,7 +266,7 @@ static inline enum zone_type gfp_zone(gfp_t flags)
static inline int gfp_zonelist(gfp_t flags)
{
- if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE))
+ if (IS_ENABLED(CONFIG_NUMA) && unlikely(flags & __GFP_THISNODE))
return 1;
return 0;
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b31cb7da0346..1af477552459 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -8,6 +8,10 @@ extern int do_huge_pmd_anonymous_page(struct mm_struct *mm,
extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
struct vm_area_struct *vma);
+extern void huge_pmd_set_accessed(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmd,
+ pmd_t orig_pmd, int dirty);
extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd,
pmd_t orig_pmd);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 225164842ab6..3e7fa1acf09c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -183,7 +183,8 @@ extern const struct file_operations hugetlbfs_file_operations;
extern const struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_file_setup(const char *name, unsigned long addr,
size_t size, vm_flags_t acct,
- struct user_struct **user, int creat_flags);
+ struct user_struct **user, int creat_flags,
+ int page_size_log);
static inline int is_file_hugepages(struct file *file)
{
@@ -195,12 +196,14 @@ static inline int is_file_hugepages(struct file *file)
return 0;
}
+
#else /* !CONFIG_HUGETLBFS */
#define is_file_hugepages(file) 0
static inline struct file *
hugetlb_file_setup(const char *name, unsigned long addr, size_t size,
- vm_flags_t acctflag, struct user_struct **user, int creat_flags)
+ vm_flags_t acctflag, struct user_struct **user, int creat_flags,
+ int page_size_log)
{
return ERR_PTR(-ENOSYS);
}
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 7d8dfc7392f1..dd9900cabf89 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -687,20 +687,6 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
/* Trap pasters of __FUNCTION__ at compile-time */
#define __FUNCTION__ (__func__)
-/* This helps us to avoid #ifdef CONFIG_NUMA */
-#ifdef CONFIG_NUMA
-#define NUMA_BUILD 1
-#else
-#define NUMA_BUILD 0
-#endif
-
-/* This helps us avoid #ifdef CONFIG_COMPACTION */
-#ifdef CONFIG_COMPACTION
-#define COMPACTION_BUILD 1
-#else
-#define COMPACTION_BUILD 0
-#endif
-
/* This helps us to avoid #ifdef CONFIG_SYMBOL_PREFIX */
#ifdef CONFIG_SYMBOL_PREFIX
#define SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
diff --git a/include/linux/memory.h b/include/linux/memory.h
index ff9a9f8e0ed9..a09216d0dcc7 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -53,6 +53,7 @@ int arch_get_memory_phys_device(unsigned long start_pfn);
struct memory_notify {
unsigned long start_pfn;
unsigned long nr_pages;
+ int status_change_nid_normal;
int status_change_nid;
};
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 95573ec4ee6c..4a45c4e50025 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -26,6 +26,13 @@ enum {
MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
};
+/* Types for control the zone type of onlined memory */
+enum {
+ ONLINE_KEEP,
+ ONLINE_KERNEL,
+ ONLINE_MOVABLE,
+};
+
/*
* pgdat resizing functions
*/
@@ -46,6 +53,10 @@ void pgdat_resize_init(struct pglist_data *pgdat)
}
/*
* Zone resizing functions
+ *
+ * Note: any attempt to resize a zone should has pgdat_resize_lock()
+ * zone_span_writelock() both held. This ensure the size of a zone
+ * can't be changed while pgdat_resize_lock() held.
*/
static inline unsigned zone_span_seqbegin(struct zone *zone)
{
@@ -71,7 +82,7 @@ extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
/* VM interface that may be used by firmware interface */
-extern int online_pages(unsigned long, unsigned long);
+extern int online_pages(unsigned long, unsigned long, int);
extern void __offline_isolated_pages(unsigned long, unsigned long);
typedef void (*online_page_callback_t)(struct page *page);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ce7e6671968b..0b5865c61efd 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,9 +7,27 @@
typedef struct page *new_page_t(struct page *, unsigned long private, int **);
+/*
+ * Return values from addresss_space_operations.migratepage():
+ * - negative errno on page migration failure;
+ * - zero on page migration success;
+ *
+ * The balloon page migration introduces this special case where a 'distinct'
+ * return code is used to flag a successful page migration to unmap_and_move().
+ * This approach is necessary because page migration can race against balloon
+ * deflation procedure, and for such case we could introduce a nasty page leak
+ * if a successfully migrated balloon page gets released concurrently with
+ * migration's unmap_and_move() wrap-up steps.
+ */
+#define MIGRATEPAGE_SUCCESS 0
+#define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page
+ * sucessful migration case.
+ */
+
#ifdef CONFIG_MIGRATION
extern void putback_lru_pages(struct list_head *l);
+extern void putback_movable_pages(struct list_head *l);
extern int migrate_page(struct address_space *,
struct page *, struct page *, enum migrate_mode);
extern int migrate_pages(struct list_head *l, new_page_t x,
@@ -33,6 +51,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
#else
static inline void putback_lru_pages(struct list_head *l) {}
+static inline void putback_movable_pages(struct list_head *l) {}
static inline int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private, bool offlining,
enum migrate_mode mode) { return -ENOSYS; }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bcaab4e6fe91..4af4f0b1be4c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1456,6 +1456,37 @@ extern unsigned long vm_mmap(struct file *, unsigned long,
unsigned long, unsigned long,
unsigned long, unsigned long);
+struct vm_unmapped_area_info {
+#define VM_UNMAPPED_AREA_TOPDOWN 1
+ unsigned long flags;
+ unsigned long length;
+ unsigned long low_limit;
+ unsigned long high_limit;
+ unsigned long align_mask;
+ unsigned long align_offset;
+};
+
+extern unsigned long unmapped_area(struct vm_unmapped_area_info *info);
+extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info);
+
+/*
+ * Search for an unmapped address range.
+ *
+ * We are looking for a range that:
+ * - does not intersect with any VMA;
+ * - is contained within the [low_limit, high_limit) interval;
+ * - is at least the desired size.
+ * - satisfies (begin_addr & align_mask) == (align_offset & align_mask)
+ */
+static inline unsigned long
+vm_unmapped_area(struct vm_unmapped_area_info *info)
+{
+ if (!(info->flags & VM_UNMAPPED_AREA_TOPDOWN))
+ return unmapped_area(info);
+ else
+ return unmapped_area_topdown(info);
+}
+
/* truncate.c */
extern void truncate_inode_pages(struct address_space *, loff_t);
extern void truncate_inode_pages_range(struct address_space *,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 31f8a3af7d94..7ade2731b5d6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -224,7 +224,8 @@ struct vm_region {
* library, the executable area etc).
*/
struct vm_area_struct {
- struct mm_struct * vm_mm; /* The address space we belong to. */
+ /* The first cache line has the info for VMA tree walking. */
+
unsigned long vm_start; /* Our start address within vm_mm. */
unsigned long vm_end; /* The first byte after our end address
within vm_mm. */
@@ -232,11 +233,22 @@ struct vm_area_struct {
/* linked list of VM areas per task, sorted by address */
struct vm_area_struct *vm_next, *vm_prev;
+ struct rb_node vm_rb;
+
+ /*
+ * Largest free memory gap in bytes to the left of this VMA.
+ * Either between this VMA and vma->vm_prev, or between one of the
+ * VMAs below us in the VMA rbtree and its ->vm_prev. This helps
+ * get_unmapped_area find a free area of the right size.
+ */
+ unsigned long rb_subtree_gap;
+
+ /* Second cache line starts here. */
+
+ struct mm_struct *vm_mm; /* The address space we belong to. */
pgprot_t vm_page_prot; /* Access permissions of this VMA. */
unsigned long vm_flags; /* Flags, see mm.h. */
- struct rb_node vm_rb;
-
/*
* For areas with an address space and backing store,
* linkage into the address_space->i_mmap interval tree, or
@@ -322,6 +334,7 @@ struct mm_struct {
unsigned long task_size; /* size of task vm space */
unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */
unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */
+ unsigned long highest_vm_end; /* highest vma end address */
pgd_t * pgd;
atomic_t mm_users; /* How many users with user space? */
atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a23923ba8263..0c0b1d608a69 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -63,10 +63,8 @@ enum {
#ifdef CONFIG_CMA
# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
-# define cma_wmark_pages(zone) zone->min_cma_pages
#else
# define is_migrate_cma(migratetype) false
-# define cma_wmark_pages(zone) 0
#endif
#define for_each_migratetype_order(order, type) \
@@ -383,13 +381,6 @@ struct zone {
/* see spanned/present_pages for more description */
seqlock_t span_seqlock;
#endif
-#ifdef CONFIG_CMA
- /*
- * CMA needs to increase watermark levels during the allocation
- * process to make sure that the system is not starved.
- */
- unsigned long min_cma_pages;
-#endif
struct free_area free_area[MAX_ORDER];
#ifndef CONFIG_SPARSEMEM
diff --git a/include/linux/node.h b/include/linux/node.h
index 624e53cecc02..2115ad5d6f19 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -27,10 +27,9 @@ struct node {
};
struct memory_block;
-extern struct node node_devices[];
+extern struct node *node_devices[];
typedef void (*node_registration_func_t)(struct node *);
-extern int register_node(struct node *, int, struct node *);
extern void unregister_node(struct node *node);
#ifdef CONFIG_NUMA
extern int register_one_node(int nid);
diff --git a/include/linux/oom.h b/include/linux/oom.h
index fb9826847b89..da60007075b5 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -29,8 +29,23 @@ enum oom_scan_t {
OOM_SCAN_SELECT, /* always select this thread first */
};
-extern void compare_swap_oom_score_adj(int old_val, int new_val);
-extern int test_set_oom_score_adj(int new_val);
+/* Thread is the potential origin of an oom condition; kill first on oom */
+#define OOM_FLAG_ORIGIN ((__force oom_flags_t)0x1)
+
+static inline void set_current_oom_origin(void)
+{
+ current->signal->oom_flags |= OOM_FLAG_ORIGIN;
+}
+
+static inline void clear_current_oom_origin(void)
+{
+ current->signal->oom_flags &= ~OOM_FLAG_ORIGIN;
+}
+
+static inline bool oom_task_origin(const struct task_struct *p)
+{
+ return !!(p->signal->oom_flags & OOM_FLAG_ORIGIN);
+}
extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask,
@@ -49,8 +64,6 @@ extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
unsigned long totalpages, const nodemask_t *nodemask,
bool force_kill);
-extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
- int order);
extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
int order, nodemask_t *mask, bool force_kill);
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 76a9539cfd3f..a92061e08d48 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -2,7 +2,8 @@
#define __LINUX_PAGEISOLATION_H
-bool has_unmovable_pages(struct zone *zone, struct page *page, int count);
+bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
+ bool skip_hwpoisoned_pages);
void set_pageblock_migratetype(struct page *page, int migratetype);
int move_freepages_block(struct zone *zone, struct page *page,
int migratetype);
@@ -21,7 +22,7 @@ int move_freepages(struct zone *zone,
*/
int
start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
- unsigned migratetype);
+ unsigned migratetype, bool skip_hwpoisoned_pages);
/*
* Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
@@ -34,12 +35,13 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
/*
* Test all pages in [start_pfn, end_pfn) are isolated or not.
*/
-int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn);
+int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
+ bool skip_hwpoisoned_pages);
/*
* Internal functions. Changes pageblock's migrate type.
*/
-int set_migratetype_isolate(struct page *page);
+int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages);
void unset_migratetype_isolate(struct page *page, unsigned migratetype);
struct page *alloc_migrate_target(struct page *page, unsigned long private,
int **resultp);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e42c762f0dc7..6da609d14c15 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -24,6 +24,7 @@ enum mapping_flags {
AS_ENOSPC = __GFP_BITS_SHIFT + 1, /* ENOSPC on async write */
AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */
AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */
+ AS_BALLOON_MAP = __GFP_BITS_SHIFT + 4, /* balloon page special map */
};
static inline void mapping_set_error(struct address_space *mapping, int error)
@@ -53,6 +54,21 @@ static inline int mapping_unevictable(struct address_space *mapping)
return !!mapping;
}
+static inline void mapping_set_balloon(struct address_space *mapping)
+{
+ set_bit(AS_BALLOON_MAP, &mapping->flags);
+}
+
+static inline void mapping_clear_balloon(struct address_space *mapping)
+{
+ clear_bit(AS_BALLOON_MAP, &mapping->flags);
+}
+
+static inline int mapping_balloon(struct address_space *mapping)
+{
+ return mapping && test_bit(AS_BALLOON_MAP, &mapping->flags);
+}
+
static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
{
return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0dd42a02df2e..3e387df065fc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -631,9 +631,10 @@ struct signal_struct {
struct rw_semaphore group_rwsem;
#endif
- int oom_score_adj; /* OOM kill score adjustment */
- int oom_score_adj_min; /* OOM kill score adjustment minimum value.
- * Only settable by CAP_SYS_RESOURCE. */
+ oom_flags_t oom_flags;
+ short oom_score_adj; /* OOM kill score adjustment */
+ short oom_score_adj_min; /* OOM kill score adjustment min value.
+ * Only settable by CAP_SYS_RESOURCE. */
struct mutex cred_guard_mutex; /* guard against foreign influences on
* credential calculations
diff --git a/include/linux/shm.h b/include/linux/shm.h
index bcf8a6a3ec00..429c1995d756 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -29,6 +29,21 @@ struct shmid_kernel /* private to the kernel */
#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */
#define SHM_NORESERVE 010000 /* don't check for reservations */
+/* Bits [26:31] are reserved */
+
+/*
+ * When SHM_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
+ * This gives us 6 bits, which is enough until someone invents 128 bit address
+ * spaces.
+ *
+ * Assume these are all power of twos.
+ * When 0 use the default page size.
+ */
+#define SHM_HUGE_SHIFT 26
+#define SHM_HUGE_MASK 0x3f
+#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT)
+#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
+
#ifdef CONFIG_SYSVIPC
long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
unsigned long shmlba);
diff --git a/include/linux/types.h b/include/linux/types.h
index 1cc0e4b9a048..4d118ba11349 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -156,6 +156,7 @@ typedef u32 dma_addr_t;
#endif
typedef unsigned __bitwise__ gfp_t;
typedef unsigned __bitwise__ fmode_t;
+typedef unsigned __bitwise__ oom_flags_t;
#ifdef CONFIG_PHYS_ADDR_T_64BIT
typedef u64 phys_addr_t;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 50c3e8fa06a8..b82a83aba311 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -161,14 +161,7 @@ void __bdi_update_bandwidth(struct backing_dev_info *bdi,
unsigned long start_time);
void page_writeback_init(void);
-void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
- unsigned long nr_pages_dirtied);
-
-static inline void
-balance_dirty_pages_ratelimited(struct address_space *mapping)
-{
- balance_dirty_pages_ratelimited_nr(mapping, 1);
-}
+void balance_dirty_pages_ratelimited(struct address_space *mapping);
typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
void *data);