aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c5
-rw-r--r--mm/huge_memory.c18
-rw-r--r--mm/kasan/report.c3
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/memory_hotplug.c56
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/page_alloc.c69
-rw-r--r--mm/shmem.c11
-rw-r--r--mm/slub.c23
-rw-r--r--mm/zswap.c30
10 files changed, 165 insertions, 56 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index b772a33ef640..3f9afded581b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1791,6 +1791,11 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
cond_resched();
find_page:
+ if (fatal_signal_pending(current)) {
+ error = -EINTR;
+ goto out;
+ }
+
page = find_get_page(mapping, index);
if (!page) {
page_cache_sync_readahead(mapping,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9a6bd6c8d55a..5f3ad65c85de 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -783,6 +783,12 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
assert_spin_locked(pmd_lockptr(mm, pmd));
+ /*
+ * When we COW a devmap PMD entry, we split it into PTEs, so we should
+ * not be in this function with `flags & FOLL_COW` set.
+ */
+ WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
+
if (flags & FOLL_WRITE && !pmd_write(*pmd))
return NULL;
@@ -1128,6 +1134,16 @@ out_unlock:
return ret;
}
+/*
+ * FOLL_FORCE can write to even unwritable pmd's, but only
+ * after we've gone through a COW cycle and they are dirty.
+ */
+static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
+{
+ return pmd_write(pmd) ||
+ ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
+}
+
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr,
pmd_t *pmd,
@@ -1138,7 +1154,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
assert_spin_locked(pmd_lockptr(mm, pmd));
- if (flags & FOLL_WRITE && !pmd_write(*pmd))
+ if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags))
goto out;
/* Avoid dumping huge zero page */
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index b82b3e215157..f479365530b6 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -13,6 +13,7 @@
*
*/
+#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/printk.h>
@@ -300,6 +301,8 @@ void kasan_report(unsigned long addr, size_t size,
if (likely(!kasan_report_enabled()))
return;
+ disable_trace_on_warning();
+
info.access_addr = (void *)addr;
info.access_size = size;
info.is_write = is_write;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a63a8f832664..b822e158b319 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4353,9 +4353,9 @@ static int mem_cgroup_do_precharge(unsigned long count)
return ret;
}
- /* Try charges one by one with reclaim */
+ /* Try charges one by one with reclaim, but do not retry */
while (count--) {
- ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
+ ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1);
if (ret)
return ret;
mc.precharge++;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e43142c15631..b8c11e063ff0 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1033,36 +1033,39 @@ static void node_states_set_node(int node, struct memory_notify *arg)
node_set_state(node, N_MEMORY);
}
-int zone_can_shift(unsigned long pfn, unsigned long nr_pages,
- enum zone_type target)
+bool zone_can_shift(unsigned long pfn, unsigned long nr_pages,
+ enum zone_type target, int *zone_shift)
{
struct zone *zone = page_zone(pfn_to_page(pfn));
enum zone_type idx = zone_idx(zone);
int i;
+ *zone_shift = 0;
+
if (idx < target) {
/* pages must be at end of current zone */
if (pfn + nr_pages != zone_end_pfn(zone))
- return 0;
+ return false;
/* no zones in use between current zone and target */
for (i = idx + 1; i < target; i++)
if (zone_is_initialized(zone - idx + i))
- return 0;
+ return false;
}
if (target < idx) {
/* pages must be at beginning of current zone */
if (pfn != zone->zone_start_pfn)
- return 0;
+ return false;
/* no zones in use between current zone and target */
for (i = target + 1; i < idx; i++)
if (zone_is_initialized(zone - idx + i))
- return 0;
+ return false;
}
- return target - idx;
+ *zone_shift = target - idx;
+ return true;
}
/* Must be protected by mem_hotplug_begin() */
@@ -1089,10 +1092,13 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
!can_online_high_movable(zone))
return -EINVAL;
- if (online_type == MMOP_ONLINE_KERNEL)
- zone_shift = zone_can_shift(pfn, nr_pages, ZONE_NORMAL);
- else if (online_type == MMOP_ONLINE_MOVABLE)
- zone_shift = zone_can_shift(pfn, nr_pages, ZONE_MOVABLE);
+ if (online_type == MMOP_ONLINE_KERNEL) {
+ if (!zone_can_shift(pfn, nr_pages, ZONE_NORMAL, &zone_shift))
+ return -EINVAL;
+ } else if (online_type == MMOP_ONLINE_MOVABLE) {
+ if (!zone_can_shift(pfn, nr_pages, ZONE_MOVABLE, &zone_shift))
+ return -EINVAL;
+ }
zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages);
if (!zone)
@@ -1477,17 +1483,20 @@ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
}
/*
- * Confirm all pages in a range [start, end) is belongs to the same zone.
+ * Confirm all pages in a range [start, end) belong to the same zone.
+ * When true, return its valid [start, end).
*/
-int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
+int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
+ unsigned long *valid_start, unsigned long *valid_end)
{
unsigned long pfn, sec_end_pfn;
+ unsigned long start, end;
struct zone *zone = NULL;
struct page *page;
int i;
- for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn);
+ for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
pfn < end_pfn;
- pfn = sec_end_pfn + 1, sec_end_pfn += PAGES_PER_SECTION) {
+ pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) {
/* Make sure the memory section is present first */
if (!present_section_nr(pfn_to_section_nr(pfn)))
continue;
@@ -1503,10 +1512,20 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
page = pfn_to_page(pfn + i);
if (zone && page_zone(page) != zone)
return 0;
+ if (!zone)
+ start = pfn + i;
zone = page_zone(page);
+ end = pfn + MAX_ORDER_NR_PAGES;
}
}
- return 1;
+
+ if (zone) {
+ *valid_start = start;
+ *valid_end = end;
+ return 1;
+ } else {
+ return 0;
+ }
}
/*
@@ -1833,6 +1852,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
long offlined_pages;
int ret, drain, retry_max, node;
unsigned long flags;
+ unsigned long valid_start, valid_end;
struct zone *zone;
struct memory_notify arg;
@@ -1843,10 +1863,10 @@ static int __ref __offline_pages(unsigned long start_pfn,
return -EINVAL;
/* This makes hotplug much easier...and readable.
we assume this for now. .*/
- if (!test_pages_in_a_zone(start_pfn, end_pfn))
+ if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end))
return -EINVAL;
- zone = page_zone(pfn_to_page(start_pfn));
+ zone = page_zone(pfn_to_page(valid_start));
node = zone_to_nid(zone);
nr_pages = end_pfn - start_pfn;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 2e346645eb80..1e7873e40c9a 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2017,8 +2017,8 @@ retry_cpuset:
nmask = policy_nodemask(gfp, pol);
zl = policy_zonelist(gfp, pol, node);
- mpol_cond_put(pol);
page = __alloc_pages_nodemask(gfp, order, zl, nmask);
+ mpol_cond_put(pol);
out:
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d604d2596b7b..f3e0c69a97b7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3523,12 +3523,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
struct page *page = NULL;
unsigned int alloc_flags;
unsigned long did_some_progress;
- enum compact_priority compact_priority = DEF_COMPACT_PRIORITY;
+ enum compact_priority compact_priority;
enum compact_result compact_result;
- int compaction_retries = 0;
- int no_progress_loops = 0;
+ int compaction_retries;
+ int no_progress_loops;
unsigned long alloc_start = jiffies;
unsigned int stall_timeout = 10 * HZ;
+ unsigned int cpuset_mems_cookie;
/*
* In the slowpath, we sanity check order to avoid ever trying to
@@ -3549,6 +3550,23 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
gfp_mask &= ~__GFP_ATOMIC;
+retry_cpuset:
+ compaction_retries = 0;
+ no_progress_loops = 0;
+ compact_priority = DEF_COMPACT_PRIORITY;
+ cpuset_mems_cookie = read_mems_allowed_begin();
+ /*
+ * We need to recalculate the starting point for the zonelist iterator
+ * because we might have used different nodemask in the fast path, or
+ * there was a cpuset modification and we are retrying - otherwise we
+ * could end up iterating over non-eligible zones endlessly.
+ */
+ ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
+ ac->high_zoneidx, ac->nodemask);
+ if (!ac->preferred_zoneref->zone)
+ goto nopage;
+
+
/*
* The fast path uses conservative alloc_flags to succeed only until
* kswapd needs to be woken up, and to avoid the cost of setting up
@@ -3708,6 +3726,13 @@ retry:
&compaction_retries))
goto retry;
+ /*
+ * It's possible we raced with cpuset update so the OOM would be
+ * premature (see below the nopage: label for full explanation).
+ */
+ if (read_mems_allowed_retry(cpuset_mems_cookie))
+ goto retry_cpuset;
+
/* Reclaim has failed us, start killing things */
page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
if (page)
@@ -3720,6 +3745,16 @@ retry:
}
nopage:
+ /*
+ * When updating a task's mems_allowed or mempolicy nodemask, it is
+ * possible to race with parallel threads in such a way that our
+ * allocation can fail while the mask is being updated. If we are about
+ * to fail, check if the cpuset changed during allocation and if so,
+ * retry.
+ */
+ if (read_mems_allowed_retry(cpuset_mems_cookie))
+ goto retry_cpuset;
+
warn_alloc(gfp_mask,
"page allocation failure: order:%u", order);
got_pg:
@@ -3734,7 +3769,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, nodemask_t *nodemask)
{
struct page *page;
- unsigned int cpuset_mems_cookie;
unsigned int alloc_flags = ALLOC_WMARK_LOW;
gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
struct alloc_context ac = {
@@ -3771,9 +3805,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
-retry_cpuset:
- cpuset_mems_cookie = read_mems_allowed_begin();
-
/* Dirty zone balancing only done in the fast path */
ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
@@ -3784,8 +3815,13 @@ retry_cpuset:
*/
ac.preferred_zoneref = first_zones_zonelist(ac.zonelist,
ac.high_zoneidx, ac.nodemask);
- if (!ac.preferred_zoneref) {
+ if (!ac.preferred_zoneref->zone) {
page = NULL;
+ /*
+ * This might be due to race with cpuset_current_mems_allowed
+ * update, so make sure we retry with original nodemask in the
+ * slow path.
+ */
goto no_zone;
}
@@ -3794,6 +3830,7 @@ retry_cpuset:
if (likely(page))
goto out;
+no_zone:
/*
* Runtime PM, block IO and its error handling path can deadlock
* because I/O on the device might not complete.
@@ -3805,21 +3842,10 @@ retry_cpuset:
* Restore the original nodemask if it was potentially replaced with
* &cpuset_current_mems_allowed to optimize the fast-path attempt.
*/
- if (cpusets_enabled())
+ if (unlikely(ac.nodemask != nodemask))
ac.nodemask = nodemask;
- page = __alloc_pages_slowpath(alloc_mask, order, &ac);
-no_zone:
- /*
- * When updating a task's mems_allowed, it is possible to race with
- * parallel threads in such a way that an allocation can fail while
- * the mask is being updated. If a page allocation is about to fail,
- * check if the cpuset changed during allocation and if so, retry.
- */
- if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) {
- alloc_mask = gfp_mask;
- goto retry_cpuset;
- }
+ page = __alloc_pages_slowpath(alloc_mask, order, &ac);
out:
if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
@@ -7248,6 +7274,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
.zone = page_zone(pfn_to_page(start)),
.mode = MIGRATE_SYNC,
.ignore_skip_hint = true,
+ .gfp_mask = GFP_KERNEL,
};
INIT_LIST_HEAD(&cc.migratepages);
diff --git a/mm/shmem.c b/mm/shmem.c
index bb53285a1d99..3a7587a0314d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -415,6 +415,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
struct shrink_control *sc, unsigned long nr_to_split)
{
LIST_HEAD(list), *pos, *next;
+ LIST_HEAD(to_remove);
struct inode *inode;
struct shmem_inode_info *info;
struct page *page;
@@ -441,9 +442,8 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
/* Check if there's anything to gain */
if (round_up(inode->i_size, PAGE_SIZE) ==
round_up(inode->i_size, HPAGE_PMD_SIZE)) {
- list_del_init(&info->shrinklist);
+ list_move(&info->shrinklist, &to_remove);
removed++;
- iput(inode);
goto next;
}
@@ -454,6 +454,13 @@ next:
}
spin_unlock(&sbinfo->shrinklist_lock);
+ list_for_each_safe(pos, next, &to_remove) {
+ info = list_entry(pos, struct shmem_inode_info, shrinklist);
+ inode = &info->vfs_inode;
+ list_del_init(&info->shrinklist);
+ iput(inode);
+ }
+
list_for_each_safe(pos, next, &list) {
int ret;
diff --git a/mm/slub.c b/mm/slub.c
index 067598a00849..7aa6f433f4de 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -496,10 +496,11 @@ static inline int check_valid_pointer(struct kmem_cache *s,
return 1;
}
-static void print_section(char *text, u8 *addr, unsigned int length)
+static void print_section(char *level, char *text, u8 *addr,
+ unsigned int length)
{
metadata_access_enable();
- print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
+ print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
length, 1);
metadata_access_disable();
}
@@ -636,14 +637,15 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
p, p - addr, get_freepointer(s, p));
if (s->flags & SLAB_RED_ZONE)
- print_section("Redzone ", p - s->red_left_pad, s->red_left_pad);
+ print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
+ s->red_left_pad);
else if (p > addr + 16)
- print_section("Bytes b4 ", p - 16, 16);
+ print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
- print_section("Object ", p, min_t(unsigned long, s->object_size,
- PAGE_SIZE));
+ print_section(KERN_ERR, "Object ", p,
+ min_t(unsigned long, s->object_size, PAGE_SIZE));
if (s->flags & SLAB_RED_ZONE)
- print_section("Redzone ", p + s->object_size,
+ print_section(KERN_ERR, "Redzone ", p + s->object_size,
s->inuse - s->object_size);
if (s->offset)
@@ -658,7 +660,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
if (off != size_from_object(s))
/* Beginning of the filler is the free pointer */
- print_section("Padding ", p + off, size_from_object(s) - off);
+ print_section(KERN_ERR, "Padding ", p + off,
+ size_from_object(s) - off);
dump_stack();
}
@@ -820,7 +823,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
end--;
slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
- print_section("Padding ", end - remainder, remainder);
+ print_section(KERN_ERR, "Padding ", end - remainder, remainder);
restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
return 0;
@@ -973,7 +976,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
page->freelist);
if (!alloc)
- print_section("Object ", (void *)object,
+ print_section(KERN_INFO, "Object ", (void *)object,
s->object_size);
dump_stack();
diff --git a/mm/zswap.c b/mm/zswap.c
index 067a0d62f318..cabf09e0128b 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -78,7 +78,13 @@ static u64 zswap_duplicate_entry;
/* Enable/disable zswap (disabled by default) */
static bool zswap_enabled;
-module_param_named(enabled, zswap_enabled, bool, 0644);
+static int zswap_enabled_param_set(const char *,
+ const struct kernel_param *);
+static struct kernel_param_ops zswap_enabled_param_ops = {
+ .set = zswap_enabled_param_set,
+ .get = param_get_bool,
+};
+module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
/* Crypto compressor to use */
#define ZSWAP_COMPRESSOR_DEFAULT "lzo"
@@ -176,6 +182,9 @@ static atomic_t zswap_pools_count = ATOMIC_INIT(0);
/* used by param callback function */
static bool zswap_init_started;
+/* fatal error during init */
+static bool zswap_init_failed;
+
/*********************************
* helpers and fwd declarations
**********************************/
@@ -624,6 +633,11 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
char *s = strstrip((char *)val);
int ret;
+ if (zswap_init_failed) {
+ pr_err("can't set param, initialization failed\n");
+ return -ENODEV;
+ }
+
/* no change required */
if (!strcmp(s, *(char **)kp->arg))
return 0;
@@ -703,6 +717,17 @@ static int zswap_zpool_param_set(const char *val,
return __zswap_param_set(val, kp, NULL, zswap_compressor);
}
+static int zswap_enabled_param_set(const char *val,
+ const struct kernel_param *kp)
+{
+ if (zswap_init_failed) {
+ pr_err("can't enable, initialization failed\n");
+ return -ENODEV;
+ }
+
+ return param_set_bool(val, kp);
+}
+
/*********************************
* writeback code
**********************************/
@@ -1201,6 +1226,9 @@ hp_fail:
dstmem_fail:
zswap_entry_cache_destroy();
cache_fail:
+ /* if built-in, we aren't unloaded on failure; don't allow use */
+ zswap_init_failed = true;
+ zswap_enabled = false;
return -ENOMEM;
}
/* must be late so crypto has time to come up */