diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 449 |
1 files changed, 407 insertions, 42 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6f5c0c517c49..c313c49074ca 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -25,7 +25,7 @@ #include <linux/page_counter.h> #include <linux/memcontrol.h> #include <linux/cgroup.h> -#include <linux/mm.h> +#include <linux/pagewalk.h> #include <linux/sched/mm.h> #include <linux/shmem_fs.h> #include <linux/hugetlb.h> @@ -57,6 +57,7 @@ #include <linux/lockdep.h> #include <linux/file.h> #include <linux/tracehook.h> +#include <linux/psi.h> #include <linux/seq_buf.h> #include "internal.h" #include <net/sock.h> @@ -87,6 +88,10 @@ int do_swap_account __read_mostly; #define do_swap_account 0 #endif +#ifdef CONFIG_CGROUP_WRITEBACK +static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq); +#endif + /* Whether legacy memory+swap accounting is active */ static bool do_memsw_account(void) { @@ -313,6 +318,7 @@ DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key); EXPORT_SYMBOL(memcg_kmem_enabled_key); struct workqueue_struct *memcg_kmem_cache_wq; +#endif static int memcg_shrinker_map_size; static DEFINE_MUTEX(memcg_shrinker_map_mutex); @@ -436,14 +442,6 @@ void memcg_set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) } } -#else /* CONFIG_MEMCG_KMEM */ -static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg) -{ - return 0; -} -static void memcg_free_shrinker_maps(struct mem_cgroup *memcg) { } -#endif /* CONFIG_MEMCG_KMEM */ - /** * mem_cgroup_css_from_page - css of the memcg associated with a page * @page: page of interest @@ -752,15 +750,13 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, /* Update memcg */ __mod_memcg_state(memcg, idx, val); + /* Update lruvec */ + __this_cpu_add(pn->lruvec_stat_local->count[idx], val); + x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { struct mem_cgroup_per_node *pi; - /* - * Batch local counters to keep them in sync with - * the hierarchical ones. - */ - __this_cpu_add(pn->lruvec_stat_local->count[idx], x); for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id)) atomic_long_add(x, &pi->lruvec_stat[idx]); x = 0; @@ -2268,21 +2264,22 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) for_each_online_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); struct mem_cgroup *memcg; + bool flush = false; + rcu_read_lock(); memcg = stock->cached; - if (!memcg || !stock->nr_pages || !css_tryget(&memcg->css)) - continue; - if (!mem_cgroup_is_descendant(memcg, root_memcg)) { - css_put(&memcg->css); - continue; - } - if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) { + if (memcg && stock->nr_pages && + mem_cgroup_is_descendant(memcg, root_memcg)) + flush = true; + rcu_read_unlock(); + + if (flush && + !test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) { if (cpu == curcpu) drain_local_stock(&stock->work); else schedule_work_on(cpu, &stock->work); } - css_put(&memcg->css); } put_cpu(); mutex_unlock(&percpu_charge_mutex); @@ -2357,11 +2354,67 @@ static void high_work_func(struct work_struct *work) } /* + * Clamp the maximum sleep time per allocation batch to 2 seconds. This is + * enough to still cause a significant slowdown in most cases, while still + * allowing diagnostics and tracing to proceed without becoming stuck. + */ +#define MEMCG_MAX_HIGH_DELAY_JIFFIES (2UL*HZ) + +/* + * When calculating the delay, we use these either side of the exponentiation to + * maintain precision and scale to a reasonable number of jiffies (see the table + * below. + * + * - MEMCG_DELAY_PRECISION_SHIFT: Extra precision bits while translating the + * overage ratio to a delay. + * - MEMCG_DELAY_SCALING_SHIFT: The number of bits to scale down down the + * proposed penalty in order to reduce to a reasonable number of jiffies, and + * to produce a reasonable delay curve. + * + * MEMCG_DELAY_SCALING_SHIFT just happens to be a number that produces a + * reasonable delay curve compared to precision-adjusted overage, not + * penalising heavily at first, but still making sure that growth beyond the + * limit penalises misbehaviour cgroups by slowing them down exponentially. For + * example, with a high of 100 megabytes: + * + * +-------+------------------------+ + * | usage | time to allocate in ms | + * +-------+------------------------+ + * | 100M | 0 | + * | 101M | 6 | + * | 102M | 25 | + * | 103M | 57 | + * | 104M | 102 | + * | 105M | 159 | + * | 106M | 230 | + * | 107M | 313 | + * | 108M | 409 | + * | 109M | 518 | + * | 110M | 639 | + * | 111M | 774 | + * | 112M | 921 | + * | 113M | 1081 | + * | 114M | 1254 | + * | 115M | 1439 | + * | 116M | 1638 | + * | 117M | 1849 | + * | 118M | 2000 | + * | 119M | 2000 | + * | 120M | 2000 | + * +-------+------------------------+ + */ + #define MEMCG_DELAY_PRECISION_SHIFT 20 + #define MEMCG_DELAY_SCALING_SHIFT 14 + +/* * Scheduled by try_charge() to be executed from the userland return path * and reclaims memory over the high limit. */ void mem_cgroup_handle_over_high(void) { + unsigned long usage, high, clamped_high; + unsigned long pflags; + unsigned long penalty_jiffies, overage; unsigned int nr_pages = current->memcg_nr_pages_over_high; struct mem_cgroup *memcg; @@ -2370,8 +2423,75 @@ void mem_cgroup_handle_over_high(void) memcg = get_mem_cgroup_from_mm(current->mm); reclaim_high(memcg, nr_pages, GFP_KERNEL); - css_put(&memcg->css); current->memcg_nr_pages_over_high = 0; + + /* + * memory.high is breached and reclaim is unable to keep up. Throttle + * allocators proactively to slow down excessive growth. + * + * We use overage compared to memory.high to calculate the number of + * jiffies to sleep (penalty_jiffies). Ideally this value should be + * fairly lenient on small overages, and increasingly harsh when the + * memcg in question makes it clear that it has no intention of stopping + * its crazy behaviour, so we exponentially increase the delay based on + * overage amount. + */ + + usage = page_counter_read(&memcg->memory); + high = READ_ONCE(memcg->high); + + if (usage <= high) + goto out; + + /* + * Prevent division by 0 in overage calculation by acting as if it was a + * threshold of 1 page + */ + clamped_high = max(high, 1UL); + + overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT, + clamped_high); + + penalty_jiffies = ((u64)overage * overage * HZ) + >> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT); + + /* + * Factor in the task's own contribution to the overage, such that four + * N-sized allocations are throttled approximately the same as one + * 4N-sized allocation. + * + * MEMCG_CHARGE_BATCH pages is nominal, so work out how much smaller or + * larger the current charge patch is than that. + */ + penalty_jiffies = penalty_jiffies * nr_pages / MEMCG_CHARGE_BATCH; + + /* + * Clamp the max delay per usermode return so as to still keep the + * application moving forwards and also permit diagnostics, albeit + * extremely slowly. + */ + penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES); + + /* + * Don't sleep if the amount of jiffies this memcg owes us is so low + * that it's not even worth doing, in an attempt to be nice to those who + * go only a small amount over their memory.high value and maybe haven't + * been aggressively reclaimed enough yet. + */ + if (penalty_jiffies <= HZ / 100) + goto out; + + /* + * If we exit early, we're guaranteed to die (since + * schedule_timeout_killable sets TASK_KILLABLE). This means we don't + * need to account for any ill-begotten jiffies to pay them off later. + */ + psi_memstall_enter(&pflags); + schedule_timeout_killable(penalty_jiffies); + psi_memstall_leave(&pflags); + +out: + css_put(&memcg->css); } static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, @@ -2823,6 +2943,16 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order, if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) { + + /* + * Enforce __GFP_NOFAIL allocation because callers are not + * prepared to see failures and likely do not have any failure + * handling code. + */ + if (gfp & __GFP_NOFAIL) { + page_counter_charge(&memcg->kmem, nr_pages); + return 0; + } cancel_charge(memcg, nr_pages); return -ENOMEM; } @@ -3260,6 +3390,72 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, } } +static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg, bool slab_only) +{ + unsigned long stat[MEMCG_NR_STAT]; + struct mem_cgroup *mi; + int node, cpu, i; + int min_idx, max_idx; + + if (slab_only) { + min_idx = NR_SLAB_RECLAIMABLE; + max_idx = NR_SLAB_UNRECLAIMABLE; + } else { + min_idx = 0; + max_idx = MEMCG_NR_STAT; + } + + for (i = min_idx; i < max_idx; i++) + stat[i] = 0; + + for_each_online_cpu(cpu) + for (i = min_idx; i < max_idx; i++) + stat[i] += per_cpu(memcg->vmstats_percpu->stat[i], cpu); + + for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) + for (i = min_idx; i < max_idx; i++) + atomic_long_add(stat[i], &mi->vmstats[i]); + + if (!slab_only) + max_idx = NR_VM_NODE_STAT_ITEMS; + + for_each_node(node) { + struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; + struct mem_cgroup_per_node *pi; + + for (i = min_idx; i < max_idx; i++) + stat[i] = 0; + + for_each_online_cpu(cpu) + for (i = min_idx; i < max_idx; i++) + stat[i] += per_cpu( + pn->lruvec_stat_cpu->count[i], cpu); + + for (pi = pn; pi; pi = parent_nodeinfo(pi, node)) + for (i = min_idx; i < max_idx; i++) + atomic_long_add(stat[i], &pi->lruvec_stat[i]); + } +} + +static void memcg_flush_percpu_vmevents(struct mem_cgroup *memcg) +{ + unsigned long events[NR_VM_EVENT_ITEMS]; + struct mem_cgroup *mi; + int cpu, i; + + for (i = 0; i < NR_VM_EVENT_ITEMS; i++) + events[i] = 0; + + for_each_online_cpu(cpu) + for (i = 0; i < NR_VM_EVENT_ITEMS; i++) + events[i] += per_cpu(memcg->vmstats_percpu->events[i], + cpu); + + for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) + for (i = 0; i < NR_VM_EVENT_ITEMS; i++) + atomic_long_add(events[i], &mi->vmevents[i]); +} + #ifdef CONFIG_MEMCG_KMEM static int memcg_online_kmem(struct mem_cgroup *memcg) { @@ -3309,7 +3505,14 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) if (!parent) parent = root_mem_cgroup; + /* + * Deactivate and reparent kmem_caches. Then flush percpu + * slab statistics to have precise values at the parent and + * all ancestor levels. It's required to keep slab stats + * accurate after the reparenting of kmem_caches. + */ memcg_deactivate_kmem_caches(memcg, parent); + memcg_flush_percpu_vmstats(memcg, true); kmemcg_id = memcg->kmemcg_id; BUG_ON(kmemcg_id < 0); @@ -3437,6 +3640,9 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of, ret = mem_cgroup_resize_max(memcg, nr_pages, true); break; case _KMEM: + pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. " + "Please report your usecase to linux-mm@kvack.org if you " + "depend on this functionality.\n"); ret = memcg_update_kmem_max(memcg, nr_pages); break; case _TCP: @@ -4101,6 +4307,8 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css, #ifdef CONFIG_CGROUP_WRITEBACK +#include <trace/events/writeback.h> + static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp) { return wb_domain_init(&memcg->cgwb_domain, gfp); @@ -4184,6 +4392,130 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, } } +/* + * Foreign dirty flushing + * + * There's an inherent mismatch between memcg and writeback. The former + * trackes ownership per-page while the latter per-inode. This was a + * deliberate design decision because honoring per-page ownership in the + * writeback path is complicated, may lead to higher CPU and IO overheads + * and deemed unnecessary given that write-sharing an inode across + * different cgroups isn't a common use-case. + * + * Combined with inode majority-writer ownership switching, this works well + * enough in most cases but there are some pathological cases. For + * example, let's say there are two cgroups A and B which keep writing to + * different but confined parts of the same inode. B owns the inode and + * A's memory is limited far below B's. A's dirty ratio can rise enough to + * trigger balance_dirty_pages() sleeps but B's can be low enough to avoid + * triggering background writeback. A will be slowed down without a way to + * make writeback of the dirty pages happen. + * + * Conditions like the above can lead to a cgroup getting repatedly and + * severely throttled after making some progress after each + * dirty_expire_interval while the underyling IO device is almost + * completely idle. + * + * Solving this problem completely requires matching the ownership tracking + * granularities between memcg and writeback in either direction. However, + * the more egregious behaviors can be avoided by simply remembering the + * most recent foreign dirtying events and initiating remote flushes on + * them when local writeback isn't enough to keep the memory clean enough. + * + * The following two functions implement such mechanism. When a foreign + * page - a page whose memcg and writeback ownerships don't match - is + * dirtied, mem_cgroup_track_foreign_dirty() records the inode owning + * bdi_writeback on the page owning memcg. When balance_dirty_pages() + * decides that the memcg needs to sleep due to high dirty ratio, it calls + * mem_cgroup_flush_foreign() which queues writeback on the recorded + * foreign bdi_writebacks which haven't expired. Both the numbers of + * recorded bdi_writebacks and concurrent in-flight foreign writebacks are + * limited to MEMCG_CGWB_FRN_CNT. + * + * The mechanism only remembers IDs and doesn't hold any object references. + * As being wrong occasionally doesn't matter, updates and accesses to the + * records are lockless and racy. + */ +void mem_cgroup_track_foreign_dirty_slowpath(struct page *page, + struct bdi_writeback *wb) +{ + struct mem_cgroup *memcg = page->mem_cgroup; + struct memcg_cgwb_frn *frn; + u64 now = get_jiffies_64(); + u64 oldest_at = now; + int oldest = -1; + int i; + + trace_track_foreign_dirty(page, wb); + + /* + * Pick the slot to use. If there is already a slot for @wb, keep + * using it. If not replace the oldest one which isn't being + * written out. + */ + for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++) { + frn = &memcg->cgwb_frn[i]; + if (frn->bdi_id == wb->bdi->id && + frn->memcg_id == wb->memcg_css->id) + break; + if (time_before64(frn->at, oldest_at) && + atomic_read(&frn->done.cnt) == 1) { + oldest = i; + oldest_at = frn->at; + } + } + + if (i < MEMCG_CGWB_FRN_CNT) { + /* + * Re-using an existing one. Update timestamp lazily to + * avoid making the cacheline hot. We want them to be + * reasonably up-to-date and significantly shorter than + * dirty_expire_interval as that's what expires the record. + * Use the shorter of 1s and dirty_expire_interval / 8. + */ + unsigned long update_intv = + min_t(unsigned long, HZ, + msecs_to_jiffies(dirty_expire_interval * 10) / 8); + + if (time_before64(frn->at, now - update_intv)) + frn->at = now; + } else if (oldest >= 0) { + /* replace the oldest free one */ + frn = &memcg->cgwb_frn[oldest]; + frn->bdi_id = wb->bdi->id; + frn->memcg_id = wb->memcg_css->id; + frn->at = now; + } +} + +/* issue foreign writeback flushes for recorded foreign dirtying events */ +void mem_cgroup_flush_foreign(struct bdi_writeback *wb) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css); + unsigned long intv = msecs_to_jiffies(dirty_expire_interval * 10); + u64 now = jiffies_64; + int i; + + for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++) { + struct memcg_cgwb_frn *frn = &memcg->cgwb_frn[i]; + + /* + * If the record is older than dirty_expire_interval, + * writeback on it has already started. No need to kick it + * off again. Also, don't start a new one if there's + * already one in flight. + */ + if (time_after64(frn->at, now - intv) && + atomic_read(&frn->done.cnt) == 1) { + frn->at = 0; + trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id); + cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id, 0, + WB_REASON_FOREIGN_FLUSH, + &frn->done); + } + } +} + #else /* CONFIG_CGROUP_WRITEBACK */ static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp) @@ -4604,11 +4936,6 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) } } -static inline void mem_cgroup_id_get(struct mem_cgroup *memcg) -{ - mem_cgroup_id_get_many(memcg, 1); -} - static inline void mem_cgroup_id_put(struct mem_cgroup *memcg) { mem_cgroup_id_put_many(memcg, 1); @@ -4682,6 +5009,12 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) { int node; + /* + * Flush percpu vmstats and vmevents to guarantee the value correctness + * on parent's and all ancestor levels. + */ + memcg_flush_percpu_vmstats(memcg, false); + memcg_flush_percpu_vmevents(memcg); for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); free_percpu(memcg->vmstats_percpu); @@ -4700,6 +5033,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) struct mem_cgroup *memcg; unsigned int size; int node; + int __maybe_unused i; size = sizeof(struct mem_cgroup); size += nr_node_ids * sizeof(struct mem_cgroup_per_node *); @@ -4743,6 +5077,14 @@ static struct mem_cgroup *mem_cgroup_alloc(void) #endif #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&memcg->cgwb_list); + for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++) + memcg->cgwb_frn[i].done = + __WB_COMPLETION_INIT(&memcg_cgwb_frn_waitq); +#endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + spin_lock_init(&memcg->deferred_split_queue.split_queue_lock); + INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue); + memcg->deferred_split_queue.split_queue_len = 0; #endif idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); return memcg; @@ -4872,7 +5214,12 @@ static void mem_cgroup_css_released(struct cgroup_subsys_state *css) static void mem_cgroup_css_free(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); + int __maybe_unused i; +#ifdef CONFIG_CGROUP_WRITEBACK + for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++) + wb_wait_for_completion(&memcg->cgwb_frn[i].done); +#endif if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket) static_branch_dec(&memcg_sockets_enabled_key); @@ -5117,6 +5464,14 @@ static int mem_cgroup_move_account(struct page *page, __mod_memcg_state(to, NR_WRITEBACK, nr_pages); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (compound && !list_empty(page_deferred_list(page))) { + spin_lock(&from->deferred_split_queue.split_queue_lock); + list_del_init(page_deferred_list(page)); + from->deferred_split_queue.split_queue_len--; + spin_unlock(&from->deferred_split_queue.split_queue_lock); + } +#endif /* * It is safe to change page->mem_cgroup here because the page * is referenced, charged, and isolated - we can't race with @@ -5125,6 +5480,17 @@ static int mem_cgroup_move_account(struct page *page, /* caller should have done css_get */ page->mem_cgroup = to; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (compound && list_empty(page_deferred_list(page))) { + spin_lock(&to->deferred_split_queue.split_queue_lock); + list_add_tail(page_deferred_list(page), + &to->deferred_split_queue.split_queue); + to->deferred_split_queue.split_queue_len++; + spin_unlock(&to->deferred_split_queue.split_queue_lock); + } +#endif + spin_unlock_irqrestore(&from->move_lock, flags); ret = 0; @@ -5283,17 +5649,16 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, return 0; } +static const struct mm_walk_ops precharge_walk_ops = { + .pmd_entry = mem_cgroup_count_precharge_pte_range, +}; + static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) { unsigned long precharge; - struct mm_walk mem_cgroup_count_precharge_walk = { - .pmd_entry = mem_cgroup_count_precharge_pte_range, - .mm = mm, - }; down_read(&mm->mmap_sem); - walk_page_range(0, mm->highest_vm_end, - &mem_cgroup_count_precharge_walk); + walk_page_range(mm, 0, mm->highest_vm_end, &precharge_walk_ops, NULL); up_read(&mm->mmap_sem); precharge = mc.precharge; @@ -5562,13 +5927,12 @@ put: /* get_mctgt_type() gets the page */ return ret; } +static const struct mm_walk_ops charge_walk_ops = { + .pmd_entry = mem_cgroup_move_charge_pte_range, +}; + static void mem_cgroup_move_charge(void) { - struct mm_walk mem_cgroup_move_charge_walk = { - .pmd_entry = mem_cgroup_move_charge_pte_range, - .mm = mc.mm, - }; - lru_add_drain_all(); /* * Signal lock_page_memcg() to take the memcg's move_lock @@ -5594,7 +5958,8 @@ retry: * When we have consumed all precharges and failed in doing * additional charge, the page walk just aborts. */ - walk_page_range(0, mc.mm->highest_vm_end, &mem_cgroup_move_charge_walk); + walk_page_range(mc.mm, 0, mc.mm->highest_vm_end, &charge_walk_ops, + NULL); up_read(&mc.mm->mmap_sem); atomic_dec(&mc.from->moving_account); @@ -6296,7 +6661,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) unsigned int nr_pages = 1; if (PageTransHuge(page)) { - nr_pages <<= compound_order(page); + nr_pages = compound_nr(page); ug->nr_huge += nr_pages; } if (PageAnon(page)) @@ -6308,7 +6673,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) } ug->pgpgout++; } else { - ug->nr_kmem += 1 << compound_order(page); + ug->nr_kmem += compound_nr(page); __ClearPageKmemcg(page); } |