aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/include/linux/memcontrol.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/memcontrol.h')
-rw-r--r--include/linux/memcontrol.h217
1 files changed, 193 insertions, 24 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e77197a62809..d0b036123c6a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -23,6 +23,7 @@
#include <linux/page-flags.h>
struct mem_cgroup;
+struct obj_cgroup;
struct page;
struct mm_struct;
struct kmem_cache;
@@ -31,8 +32,7 @@ struct kmem_cache;
enum memcg_stat_item {
MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
MEMCG_SOCK,
- /* XXX: why are these zone and not node counters? */
- MEMCG_KERNEL_STACK_KB,
+ MEMCG_PERCPU_B,
MEMCG_NR_STAT,
};
@@ -48,12 +48,6 @@ enum memcg_memory_event {
MEMCG_NR_MEMORY_EVENTS,
};
-enum mem_cgroup_protection {
- MEMCG_PROT_NONE,
- MEMCG_PROT_LOW,
- MEMCG_PROT_MIN,
-};
-
struct mem_cgroup_reclaim_cookie {
pg_data_t *pgdat;
unsigned int generation;
@@ -71,8 +65,8 @@ struct mem_cgroup_id {
/*
* Per memcg event counter is incremented at every pagein/pageout. With THP,
- * it will be incremated by the number of pages. This counter is used for
- * for trigger some periodic events. This is straightforward and better
+ * it will be incremented by the number of pages. This counter is used
+ * to trigger some periodic events. This is straightforward and better
* than using jiffies etc. to handle periodic memcg event.
*/
enum mem_cgroup_events_target {
@@ -193,6 +187,22 @@ struct memcg_cgwb_frn {
};
/*
+ * Bucket for arbitrarily byte-sized objects charged to a memory
+ * cgroup. The bucket can be reparented in one piece when the cgroup
+ * is destroyed, without having to round up the individual references
+ * of all live memory objects in the wild.
+ */
+struct obj_cgroup {
+ struct percpu_ref refcnt;
+ struct mem_cgroup *memcg;
+ atomic_t nr_charged_bytes;
+ union {
+ struct list_head list;
+ struct rcu_head rcu;
+ };
+};
+
+/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per cgroup. We would eventually like to provide
* statistics based on the statistics developed by Rik Van Riel for clock-pro,
@@ -300,7 +310,8 @@ struct mem_cgroup {
/* Index in the kmem_cache->memcg_params.memcg_caches array */
int kmemcg_id;
enum memcg_kmem_state kmem_state;
- struct list_head kmem_caches;
+ struct obj_cgroup __rcu *objcg;
+ struct list_head objcg_list; /* list of inherited objcgs */
#endif
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -329,6 +340,13 @@ struct mem_cgroup {
extern struct mem_cgroup *root_mem_cgroup;
+static __always_inline bool memcg_stat_item_in_bytes(int idx)
+{
+ if (idx == MEMCG_PERCPU_B)
+ return true;
+ return vmstat_item_in_bytes(idx);
+}
+
static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
{
return (memcg == root_mem_cgroup);
@@ -339,12 +357,49 @@ static inline bool mem_cgroup_disabled(void)
return !cgroup_subsys_enabled(memory_cgrp_subsys);
}
-static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
+static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
+ struct mem_cgroup *memcg,
bool in_low_reclaim)
{
if (mem_cgroup_disabled())
return 0;
+ /*
+ * There is no reclaim protection applied to a targeted reclaim.
+ * We are special casing this specific case here because
+ * mem_cgroup_protected calculation is not robust enough to keep
+ * the protection invariant for calculated effective values for
+ * parallel reclaimers with different reclaim target. This is
+ * especially a problem for tail memcgs (as they have pages on LRU)
+ * which would want to have effective values 0 for targeted reclaim
+ * but a different value for external reclaim.
+ *
+ * Example
+ * Let's have global and A's reclaim in parallel:
+ * |
+ * A (low=2G, usage = 3G, max = 3G, children_low_usage = 1.5G)
+ * |\
+ * | C (low = 1G, usage = 2.5G)
+ * B (low = 1G, usage = 0.5G)
+ *
+ * For the global reclaim
+ * A.elow = A.low
+ * B.elow = min(B.usage, B.low) because children_low_usage <= A.elow
+ * C.elow = min(C.usage, C.low)
+ *
+ * With the effective values resetting we have A reclaim
+ * A.elow = 0
+ * B.elow = B.low
+ * C.elow = C.low
+ *
+ * If the global reclaim races with A's reclaim then
+ * B.elow = C.elow = 0 because children_low_usage > A.elow)
+ * is possible and reclaiming B would be violating the protection.
+ *
+ */
+ if (root == memcg)
+ return 0;
+
if (in_low_reclaim)
return READ_ONCE(memcg->memory.emin);
@@ -352,8 +407,36 @@ static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
READ_ONCE(memcg->memory.elow));
}
-enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
- struct mem_cgroup *memcg);
+void mem_cgroup_calculate_protection(struct mem_cgroup *root,
+ struct mem_cgroup *memcg);
+
+static inline bool mem_cgroup_supports_protection(struct mem_cgroup *memcg)
+{
+ /*
+ * The root memcg doesn't account charges, and doesn't support
+ * protection.
+ */
+ return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg);
+
+}
+
+static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
+{
+ if (!mem_cgroup_supports_protection(memcg))
+ return false;
+
+ return READ_ONCE(memcg->memory.elow) >=
+ page_counter_read(&memcg->memory);
+}
+
+static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
+{
+ if (!mem_cgroup_supports_protection(memcg))
+ return false;
+
+ return READ_ONCE(memcg->memory.emin) >=
+ page_counter_read(&memcg->memory);
+}
int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask);
@@ -416,6 +499,33 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
return css ? container_of(css, struct mem_cgroup, css) : NULL;
}
+static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
+{
+ return percpu_ref_tryget(&objcg->refcnt);
+}
+
+static inline void obj_cgroup_get(struct obj_cgroup *objcg)
+{
+ percpu_ref_get(&objcg->refcnt);
+}
+
+static inline void obj_cgroup_put(struct obj_cgroup *objcg)
+{
+ percpu_ref_put(&objcg->refcnt);
+}
+
+/*
+ * After the initialization objcg->memcg is always pointing at
+ * a valid memcg, but can be atomically swapped to the parent memcg.
+ *
+ * The caller must ensure that the returned memcg won't be released:
+ * e.g. acquire the rcu_read_lock or css_set_lock.
+ */
+static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
+{
+ return READ_ONCE(objcg->memcg);
+}
+
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
if (memcg)
@@ -520,7 +630,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
struct mem_cgroup_per_node *mz;
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- return mz->lru_zone_size[zone_idx][lru];
+ return READ_ONCE(mz->lru_zone_size[zone_idx][lru]);
}
void mem_cgroup_handle_over_high(void);
@@ -679,11 +789,34 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
return x;
}
+void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ int val);
void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
int val);
void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);
+
void mod_memcg_obj_state(void *p, int idx, int val);
+static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+ int val)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __mod_lruvec_slab_state(p, idx, val);
+ local_irq_restore(flags);
+}
+
+static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx, int val)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __mod_memcg_lruvec_state(lruvec, idx, val);
+ local_irq_restore(flags);
+}
+
static inline void mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
@@ -825,16 +958,26 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
{
}
-static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
+static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
+ struct mem_cgroup *memcg,
bool in_low_reclaim)
{
return 0;
}
-static inline enum mem_cgroup_protection mem_cgroup_protected(
- struct mem_cgroup *root, struct mem_cgroup *memcg)
+static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
+ struct mem_cgroup *memcg)
+{
+}
+
+static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
+{
+ return false;
+}
+
+static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
{
- return MEMCG_PROT_NONE;
+ return false;
}
static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
@@ -1057,6 +1200,11 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
return node_page_state(lruvec_pgdat(lruvec), idx);
}
+static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx, int val)
+{
+}
+
static inline void __mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
@@ -1089,6 +1237,14 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
__mod_node_page_state(page_pgdat(page), idx, val);
}
+static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+ int val)
+{
+ struct page *page = virt_to_head_page(p);
+
+ mod_node_page_state(page_pgdat(page), idx, val);
+}
+
static inline void mod_memcg_obj_state(void *p, int idx, int val)
{
}
@@ -1341,9 +1497,6 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
}
#endif
-struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
-void memcg_kmem_put_cache(struct kmem_cache *cachep);
-
#ifdef CONFIG_MEMCG_KMEM
int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
unsigned int nr_pages);
@@ -1351,8 +1504,12 @@ void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages);
int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
void __memcg_kmem_uncharge_page(struct page *page, int order);
+struct obj_cgroup *get_obj_cgroup_from_current(void);
+
+int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size);
+void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);
+
extern struct static_key_false memcg_kmem_enabled_key;
-extern struct workqueue_struct *memcg_kmem_cache_wq;
extern int memcg_nr_cache_ids;
void memcg_get_cache_ids(void);
@@ -1368,7 +1525,19 @@ void memcg_put_cache_ids(void);
static inline bool memcg_kmem_enabled(void)
{
- return static_branch_unlikely(&memcg_kmem_enabled_key);
+ return static_branch_likely(&memcg_kmem_enabled_key);
+}
+
+static inline bool memcg_kmem_bypass(void)
+{
+ if (in_interrupt())
+ return true;
+
+ /* Allow remote memcg charging in kthread contexts. */
+ if ((!current->mm || (current->flags & PF_KTHREAD)) &&
+ !current->active_memcg)
+ return true;
+ return false;
}
static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,