aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c36
-rw-r--r--mm/cma.c4
-rw-r--r--mm/dmapool.c2
-rw-r--r--mm/filemap.c43
-rw-r--r--mm/huge_memory.c3
-rw-r--r--mm/hugetlb.c8
-rw-r--r--mm/kasan/kasan.c3
-rw-r--r--mm/memcontrol.c67
-rw-r--r--mm/memory.c2
-rw-r--r--mm/migrate.c14
-rw-r--r--mm/mmap.c11
-rw-r--r--mm/page-writeback.c54
-rw-r--r--mm/readahead.c8
-rw-r--r--mm/slab.c13
-rw-r--r--mm/vmscan.c2
-rw-r--r--mm/vmstat.c7
16 files changed, 168 insertions, 109 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 2df8ddcb0ca0..619984fc07ec 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -480,6 +480,10 @@ static void cgwb_release_workfn(struct work_struct *work)
release_work);
struct backing_dev_info *bdi = wb->bdi;
+ spin_lock_irq(&cgwb_lock);
+ list_del_rcu(&wb->bdi_node);
+ spin_unlock_irq(&cgwb_lock);
+
wb_shutdown(wb);
css_put(wb->memcg_css);
@@ -575,6 +579,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
if (!ret) {
atomic_inc(&bdi->usage_cnt);
+ list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
list_add(&wb->memcg_node, memcg_cgwb_list);
list_add(&wb->blkcg_node, blkcg_cgwb_list);
css_get(memcg_css);
@@ -676,7 +681,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
{
struct radix_tree_iter iter;
- struct bdi_writeback_congested *congested, *congested_n;
+ struct rb_node *rbn;
void **slot;
WARN_ON(test_bit(WB_registered, &bdi->wb.state));
@@ -686,9 +691,11 @@ static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
cgwb_kill(*slot);
- rbtree_postorder_for_each_entry_safe(congested, congested_n,
- &bdi->cgwb_congested_tree, rb_node) {
- rb_erase(&congested->rb_node, &bdi->cgwb_congested_tree);
+ while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
+ struct bdi_writeback_congested *congested =
+ rb_entry(rbn, struct bdi_writeback_congested, rb_node);
+
+ rb_erase(rbn, &bdi->cgwb_congested_tree);
congested->bdi = NULL; /* mark @congested unlinked */
}
@@ -764,15 +771,22 @@ static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { }
int bdi_init(struct backing_dev_info *bdi)
{
+ int ret;
+
bdi->dev = NULL;
bdi->min_ratio = 0;
bdi->max_ratio = 100;
bdi->max_prop_frac = FPROP_FRAC_BASE;
INIT_LIST_HEAD(&bdi->bdi_list);
+ INIT_LIST_HEAD(&bdi->wb_list);
init_waitqueue_head(&bdi->wb_waitq);
- return cgwb_bdi_init(bdi);
+ ret = cgwb_bdi_init(bdi);
+
+ list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
+
+ return ret;
}
EXPORT_SYMBOL(bdi_init);
@@ -823,7 +837,7 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
synchronize_rcu_expedited();
}
-void bdi_destroy(struct backing_dev_info *bdi)
+void bdi_unregister(struct backing_dev_info *bdi)
{
/* make sure nobody finds us on the bdi_list anymore */
bdi_remove_from_list(bdi);
@@ -835,9 +849,19 @@ void bdi_destroy(struct backing_dev_info *bdi)
device_unregister(bdi->dev);
bdi->dev = NULL;
}
+}
+void bdi_exit(struct backing_dev_info *bdi)
+{
+ WARN_ON_ONCE(bdi->dev);
wb_exit(&bdi->wb);
}
+
+void bdi_destroy(struct backing_dev_info *bdi)
+{
+ bdi_unregister(bdi);
+ bdi_exit(bdi);
+}
EXPORT_SYMBOL(bdi_destroy);
/*
diff --git a/mm/cma.c b/mm/cma.c
index e7d1db533025..4eb56badf37e 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -361,7 +361,7 @@ err:
* This function allocates part of contiguous memory on specific
* contiguous memory area.
*/
-struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align)
+struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align)
{
unsigned long mask, offset, pfn, start = 0;
unsigned long bitmap_maxno, bitmap_no, bitmap_count;
@@ -371,7 +371,7 @@ struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align)
if (!cma || !cma->count)
return NULL;
- pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+ pr_debug("%s(cma %p, count %zu, align %d)\n", __func__, (void *)cma,
count, align);
if (!count)
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 71a8998cd03a..312a716fa14c 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -394,7 +394,7 @@ static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
list_for_each_entry(page, &pool->page_list, page_list) {
if (dma < page->dma)
continue;
- if (dma < (page->dma + pool->allocation))
+ if ((dma - page->dma) < pool->allocation)
return page;
}
return NULL;
diff --git a/mm/filemap.c b/mm/filemap.c
index 72940fb38666..327910c2400c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2473,6 +2473,26 @@ ssize_t generic_perform_write(struct file *file,
iov_iter_count(i));
again:
+ /*
+ * Bring in the user page that we will copy from _first_.
+ * Otherwise there's a nasty deadlock on copying from the
+ * same page as we're writing to, without it being marked
+ * up-to-date.
+ *
+ * Not only is this an optimisation, but it is also required
+ * to check that the address is actually valid, when atomic
+ * usercopies are used, below.
+ */
+ if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
+ status = -EFAULT;
+ break;
+ }
+
+ if (fatal_signal_pending(current)) {
+ status = -EINTR;
+ break;
+ }
+
status = a_ops->write_begin(file, mapping, pos, bytes, flags,
&page, &fsdata);
if (unlikely(status < 0))
@@ -2480,17 +2500,8 @@ again:
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
- /*
- * 'page' is now locked. If we are trying to copy from a
- * mapping of 'page' in userspace, the copy might fault and
- * would need PageUptodate() to complete. But, page can not be
- * made Uptodate without acquiring the page lock, which we hold.
- * Deadlock. Avoid with pagefault_disable(). Fix up below with
- * iov_iter_fault_in_readable().
- */
- pagefault_disable();
+
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
- pagefault_enable();
flush_dcache_page(page);
status = a_ops->write_end(file, mapping, pos, bytes, copied,
@@ -2513,24 +2524,12 @@ again:
*/
bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
iov_iter_single_seg_count(i));
- /*
- * This is the fallback to recover if the copy from
- * userspace above faults.
- */
- if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
- status = -EFAULT;
- break;
- }
goto again;
}
pos += copied;
written += copied;
balance_dirty_pages_ratelimited(mapping);
- if (fatal_signal_pending(current)) {
- status = -EINTR;
- break;
- }
} while (iov_iter_count(i));
return written ? written : status;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4b06b8db9df2..bbac913f96bc 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2206,7 +2206,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
_pte++, address += PAGE_SIZE) {
pte_t pteval = *_pte;
- if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
+ if (pte_none(pteval) || (pte_present(pteval) &&
+ is_zero_pfn(pte_pfn(pteval)))) {
if (!userfaultfd_armed(vma) &&
++none_or_zero <= khugepaged_max_ptes_none)
continue;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 999fb0aef8f1..9cc773483624 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3202,6 +3202,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
continue;
/*
+ * Shared VMAs have their own reserves and do not affect
+ * MAP_PRIVATE accounting but it is possible that a shared
+ * VMA is using the same page so check and skip such VMAs.
+ */
+ if (iter_vma->vm_flags & VM_MAYSHARE)
+ continue;
+
+ /*
* Unmap the page from other VMAs without their own reserves.
* They get marked to be SIGKILLed if they fault in these
* areas. This is because a future no-page fault on this VMA
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 7b28e9cdf1c7..8da211411b57 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -135,12 +135,11 @@ static __always_inline bool memory_is_poisoned_16(unsigned long addr)
if (unlikely(*shadow_addr)) {
u16 shadow_first_bytes = *(u16 *)shadow_addr;
- s8 last_byte = (addr + 15) & KASAN_SHADOW_MASK;
if (unlikely(shadow_first_bytes))
return true;
- if (likely(!last_byte))
+ if (likely(IS_ALIGNED(addr, 8)))
return false;
return memory_is_poisoned_1(addr + 15);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6ddaeba34e09..c57c4423c688 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -644,12 +644,14 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
}
/*
+ * Return page count for single (non recursive) @memcg.
+ *
* Implementation Note: reading percpu statistics for memcg.
*
* Both of vmstat[] and percpu_counter has threshold and do periodic
* synchronization to implement "quick" read. There are trade-off between
* reading cost and precision of value. Then, we may have a chance to implement
- * a periodic synchronizion of counter in memcg's counter.
+ * a periodic synchronization of counter in memcg's counter.
*
* But this _read() function is used for user interface now. The user accounts
* memory usage by memory cgroup and he _always_ requires exact value because
@@ -659,17 +661,24 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
*
* If there are kernel internal actions which can make use of some not-exact
* value, and reading all cpu value can be performance bottleneck in some
- * common workload, threashold and synchonization as vmstat[] should be
+ * common workload, threshold and synchronization as vmstat[] should be
* implemented.
*/
-static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
- enum mem_cgroup_stat_index idx)
+static unsigned long
+mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx)
{
long val = 0;
int cpu;
+ /* Per-cpu values can be negative, use a signed accumulator */
for_each_possible_cpu(cpu)
val += per_cpu(memcg->stat->count[idx], cpu);
+ /*
+ * Summing races with updates, so val may be negative. Avoid exposing
+ * transient negative values.
+ */
+ if (val < 0)
+ val = 0;
return val;
}
@@ -1254,7 +1263,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
- pr_cont(" %s:%ldKB", mem_cgroup_stat_names[i],
+ pr_cont(" %s:%luKB", mem_cgroup_stat_names[i],
K(mem_cgroup_read_stat(iter, i)));
}
@@ -2819,14 +2828,11 @@ static unsigned long tree_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
struct mem_cgroup *iter;
- long val = 0;
+ unsigned long val = 0;
- /* Per-cpu values can be negative, use a signed accumulator */
for_each_mem_cgroup_tree(iter, memcg)
val += mem_cgroup_read_stat(iter, idx);
- if (val < 0) /* race ? */
- val = 0;
return val;
}
@@ -3169,7 +3175,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
- seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i],
+ seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i],
mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
}
@@ -3194,13 +3200,13 @@ static int memcg_stat_show(struct seq_file *m, void *v)
(u64)memsw * PAGE_SIZE);
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
- long long val = 0;
+ unsigned long long val = 0;
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
for_each_mem_cgroup_tree(mi, memcg)
val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
- seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val);
+ seq_printf(m, "total_%s %llu\n", mem_cgroup_stat_names[i], val);
}
for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
@@ -3381,6 +3387,7 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
ret = page_counter_memparse(args, "-1", &threshold);
if (ret)
return ret;
+ threshold <<= PAGE_SHIFT;
mutex_lock(&memcg->thresholds_lock);
@@ -3734,44 +3741,43 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
/**
* mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
* @wb: bdi_writeback in question
- * @pavail: out parameter for number of available pages
+ * @pfilepages: out parameter for number of file pages
+ * @pheadroom: out parameter for number of allocatable pages according to memcg
* @pdirty: out parameter for number of dirty pages
* @pwriteback: out parameter for number of pages under writeback
*
- * Determine the numbers of available, dirty, and writeback pages in @wb's
- * memcg. Dirty and writeback are self-explanatory. Available is a bit
- * more involved.
+ * Determine the numbers of file, headroom, dirty, and writeback pages in
+ * @wb's memcg. File, dirty and writeback are self-explanatory. Headroom
+ * is a bit more involved.
*
- * A memcg's headroom is "min(max, high) - used". The available memory is
- * calculated as the lowest headroom of itself and the ancestors plus the
- * number of pages already being used for file pages. Note that this
- * doesn't consider the actual amount of available memory in the system.
- * The caller should further cap *@pavail accordingly.
+ * A memcg's headroom is "min(max, high) - used". In the hierarchy, the
+ * headroom is calculated as the lowest headroom of itself and the
+ * ancestors. Note that this doesn't consider the actual amount of
+ * available memory in the system. The caller should further cap
+ * *@pheadroom accordingly.
*/
-void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail,
- unsigned long *pdirty, unsigned long *pwriteback)
+void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
+ unsigned long *pheadroom, unsigned long *pdirty,
+ unsigned long *pwriteback)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
struct mem_cgroup *parent;
- unsigned long head_room = PAGE_COUNTER_MAX;
- unsigned long file_pages;
*pdirty = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_DIRTY);
/* this should eventually include NR_UNSTABLE_NFS */
*pwriteback = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
+ *pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
+ (1 << LRU_ACTIVE_FILE));
+ *pheadroom = PAGE_COUNTER_MAX;
- file_pages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
- (1 << LRU_ACTIVE_FILE));
while ((parent = parent_mem_cgroup(memcg))) {
unsigned long ceiling = min(memcg->memory.limit, memcg->high);
unsigned long used = page_counter_read(&memcg->memory);
- head_room = min(head_room, ceiling - min(ceiling, used));
+ *pheadroom = min(*pheadroom, ceiling - min(ceiling, used));
memcg = parent;
}
-
- *pavail = file_pages + head_room;
}
#else /* CONFIG_CGROUP_WRITEBACK */
@@ -4179,7 +4185,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (memcg_wb_domain_init(memcg, GFP_KERNEL))
goto out_free_stat;
- spin_lock_init(&memcg->pcp_counter_lock);
return memcg;
out_free_stat:
diff --git a/mm/memory.c b/mm/memory.c
index 9cb27470fee9..deb679c31f2a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2426,6 +2426,8 @@ void unmap_mapping_range(struct address_space *mapping,
if (details.last_index < details.first_index)
details.last_index = ULONG_MAX;
+
+ /* DAX uses i_mmap_lock to serialise file truncate vs page fault */
i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
unmap_mapping_range_tree(&mapping->i_mmap, &details);
diff --git a/mm/migrate.c b/mm/migrate.c
index c3cb566af3e2..842ecd7aaf7f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -740,6 +740,15 @@ static int move_to_new_page(struct page *newpage, struct page *page,
if (PageSwapBacked(page))
SetPageSwapBacked(newpage);
+ /*
+ * Indirectly called below, migrate_page_copy() copies PG_dirty and thus
+ * needs newpage's memcg set to transfer memcg dirty page accounting.
+ * So perform memcg migration in two steps:
+ * 1. set newpage->mem_cgroup (here)
+ * 2. clear page->mem_cgroup (below)
+ */
+ set_page_memcg(newpage, page_memcg(page));
+
mapping = page_mapping(page);
if (!mapping)
rc = migrate_page(mapping, newpage, page, mode);
@@ -756,9 +765,10 @@ static int move_to_new_page(struct page *newpage, struct page *page,
rc = fallback_migrate_page(mapping, newpage, page, mode);
if (rc != MIGRATEPAGE_SUCCESS) {
+ set_page_memcg(newpage, NULL);
newpage->mapping = NULL;
} else {
- mem_cgroup_migrate(page, newpage, false);
+ set_page_memcg(page, NULL);
if (page_was_mapped)
remove_migration_ptes(page, newpage);
page->mapping = NULL;
@@ -1075,7 +1085,7 @@ out:
if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
put_new_page(new_hpage, private);
else
- put_page(new_hpage);
+ putback_active_hugepage(new_hpage);
if (result) {
if (rc)
diff --git a/mm/mmap.c b/mm/mmap.c
index 971dd2cb77d2..79bcc9f92e48 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -612,8 +612,6 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm,
void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
struct rb_node **rb_link, struct rb_node *rb_parent)
{
- WARN_ONCE(vma->vm_file && !vma->vm_ops, "missing vma->vm_ops");
-
/* Update tracking information for the gap following the new vma. */
if (vma->vm_next)
vma_gap_update(vma->vm_next);
@@ -1492,13 +1490,14 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
int vma_wants_writenotify(struct vm_area_struct *vma)
{
vm_flags_t vm_flags = vma->vm_flags;
+ const struct vm_operations_struct *vm_ops = vma->vm_ops;
/* If it was private or non-writable, the write bit is already clear */
if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
return 0;
/* The backer wishes to know when pages are first written to? */
- if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+ if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
return 1;
/* The open routine did something to the protections that pgprot_modify
@@ -1638,12 +1637,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
*/
WARN_ON_ONCE(addr != vma->vm_start);
- /* All file mapping must have ->vm_ops set */
- if (!vma->vm_ops) {
- static const struct vm_operations_struct dummy_ops = {};
- vma->vm_ops = &dummy_ops;
- }
-
addr = vma->vm_start;
vm_flags = vma->vm_flags;
} else if (vm_flags & VM_SHARED) {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0a931cdd4f6b..2c90357c34ea 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -145,9 +145,6 @@ struct dirty_throttle_control {
unsigned long pos_ratio;
};
-#define DTC_INIT_COMMON(__wb) .wb = (__wb), \
- .wb_completions = &(__wb)->completions
-
/*
* Length of period for aging writeout fractions of bdis. This is an
* arbitrarily chosen number. The longer the period, the slower fractions will
@@ -157,12 +154,16 @@ struct dirty_throttle_control {
#ifdef CONFIG_CGROUP_WRITEBACK
-#define GDTC_INIT(__wb) .dom = &global_wb_domain, \
- DTC_INIT_COMMON(__wb)
+#define GDTC_INIT(__wb) .wb = (__wb), \
+ .dom = &global_wb_domain, \
+ .wb_completions = &(__wb)->completions
+
#define GDTC_INIT_NO_WB .dom = &global_wb_domain
-#define MDTC_INIT(__wb, __gdtc) .dom = mem_cgroup_wb_domain(__wb), \
- .gdtc = __gdtc, \
- DTC_INIT_COMMON(__wb)
+
+#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \
+ .dom = mem_cgroup_wb_domain(__wb), \
+ .wb_completions = &(__wb)->memcg_completions, \
+ .gdtc = __gdtc
static bool mdtc_valid(struct dirty_throttle_control *dtc)
{
@@ -213,7 +214,8 @@ static void wb_min_max_ratio(struct bdi_writeback *wb,
#else /* CONFIG_CGROUP_WRITEBACK */
-#define GDTC_INIT(__wb) DTC_INIT_COMMON(__wb)
+#define GDTC_INIT(__wb) .wb = (__wb), \
+ .wb_completions = &(__wb)->completions
#define GDTC_INIT_NO_WB
#define MDTC_INIT(__wb, __gdtc)
@@ -682,13 +684,19 @@ static unsigned long hard_dirty_limit(struct wb_domain *dom,
return max(thresh, dom->dirty_limit);
}
-/* memory available to a memcg domain is capped by system-wide clean memory */
-static void mdtc_cap_avail(struct dirty_throttle_control *mdtc)
+/*
+ * Memory which can be further allocated to a memcg domain is capped by
+ * system-wide clean memory excluding the amount being used in the domain.
+ */
+static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
+ unsigned long filepages, unsigned long headroom)
{
struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
- unsigned long clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
+ unsigned long clean = filepages - min(filepages, mdtc->dirty);
+ unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
+ unsigned long other_clean = global_clean - min(global_clean, clean);
- mdtc->avail = min(mdtc->avail, clean);
+ mdtc->avail = filepages + min(headroom, other_clean);
}
/**
@@ -1562,16 +1570,16 @@ static void balance_dirty_pages(struct address_space *mapping,
}
if (mdtc) {
- unsigned long writeback;
+ unsigned long filepages, headroom, writeback;
/*
* If @wb belongs to !root memcg, repeat the same
* basic calculations for the memcg domain.
*/
- mem_cgroup_wb_stats(wb, &mdtc->avail, &mdtc->dirty,
- &writeback);
- mdtc_cap_avail(mdtc);
+ mem_cgroup_wb_stats(wb, &filepages, &headroom,
+ &mdtc->dirty, &writeback);
mdtc->dirty += writeback;
+ mdtc_calc_avail(mdtc, filepages, headroom);
domain_dirty_limits(mdtc);
@@ -1893,10 +1901,11 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
return true;
if (mdtc) {
- unsigned long writeback;
+ unsigned long filepages, headroom, writeback;
- mem_cgroup_wb_stats(wb, &mdtc->avail, &mdtc->dirty, &writeback);
- mdtc_cap_avail(mdtc);
+ mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
+ &writeback);
+ mdtc_calc_avail(mdtc, filepages, headroom);
domain_dirty_limits(mdtc); /* ditto, ignore writeback */
if (mdtc->dirty > mdtc->bg_thresh)
@@ -1956,7 +1965,6 @@ void laptop_mode_timer_fn(unsigned long data)
int nr_pages = global_page_state(NR_FILE_DIRTY) +
global_page_state(NR_UNSTABLE_NFS);
struct bdi_writeback *wb;
- struct wb_iter iter;
/*
* We want to write everything out, not just down to the dirty
@@ -1965,10 +1973,12 @@ void laptop_mode_timer_fn(unsigned long data)
if (!bdi_has_dirty_io(&q->backing_dev_info))
return;
- bdi_for_each_wb(wb, &q->backing_dev_info, &iter, 0)
+ rcu_read_lock();
+ list_for_each_entry_rcu(wb, &q->backing_dev_info.wb_list, bdi_node)
if (wb_has_dirty_io(wb))
wb_start_writeback(wb, nr_pages, true,
WB_REASON_LAPTOP_TIMER);
+ rcu_read_unlock();
}
/*
diff --git a/mm/readahead.c b/mm/readahead.c
index 60cd846a9a44..24682f6f4cfd 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -89,8 +89,8 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
while (!list_empty(pages)) {
page = list_to_page(pages);
list_del(&page->lru);
- if (add_to_page_cache_lru(page, mapping,
- page->index, GFP_KERNEL)) {
+ if (add_to_page_cache_lru(page, mapping, page->index,
+ GFP_KERNEL & mapping_gfp_mask(mapping))) {
read_cache_pages_invalidate_page(mapping, page);
continue;
}
@@ -127,8 +127,8 @@ static int read_pages(struct address_space *mapping, struct file *filp,
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_to_page(pages);
list_del(&page->lru);
- if (!add_to_page_cache_lru(page, mapping,
- page->index, GFP_KERNEL)) {
+ if (!add_to_page_cache_lru(page, mapping, page->index,
+ GFP_KERNEL & mapping_gfp_mask(mapping))) {
mapping->a_ops->readpage(filp, page);
}
page_cache_release(page);
diff --git a/mm/slab.c b/mm/slab.c
index c77ebe6cc87c..4fcc5dd8d5a6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2190,9 +2190,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
size += BYTES_PER_WORD;
}
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
- if (size >= kmalloc_size(INDEX_NODE + 1)
- && cachep->object_size > cache_line_size()
- && ALIGN(size, cachep->align) < PAGE_SIZE) {
+ /*
+ * To activate debug pagealloc, off-slab management is necessary
+ * requirement. In early phase of initialization, small sized slab
+ * doesn't get initialized so it would not be possible. So, we need
+ * to check size >= 256. It guarantees that all necessary small
+ * sized slab is initialized in current slab initialization sequence.
+ */
+ if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
+ size >= 256 && cachep->object_size > cache_line_size() &&
+ ALIGN(size, cachep->align) < PAGE_SIZE) {
cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
size = PAGE_SIZE;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2d978b28a410..7f63a9381f71 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -175,7 +175,7 @@ static bool sane_reclaim(struct scan_control *sc)
if (!memcg)
return true;
#ifdef CONFIG_CGROUP_WRITEBACK
- if (memcg->css.cgroup)
+ if (cgroup_on_dfl(memcg->css.cgroup))
return true;
#endif
return false;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 4f5cd974e11a..fbf14485a049 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1363,15 +1363,16 @@ static cpumask_var_t cpu_stat_off;
static void vmstat_update(struct work_struct *w)
{
- if (refresh_cpu_vm_stats())
+ if (refresh_cpu_vm_stats()) {
/*
* Counters were updated so we expect more updates
* to occur in the future. Keep on running the
* update worker thread.
*/
- schedule_delayed_work(this_cpu_ptr(&vmstat_work),
+ schedule_delayed_work_on(smp_processor_id(),
+ this_cpu_ptr(&vmstat_work),
round_jiffies_relative(sysctl_stat_interval));
- else {
+ } else {
/*
* We did not update any counters so the app may be in
* a mode where it does not cause counter updates.