From b3d1411b6726ea6930222f8f12587d89762477c6 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 30 Nov 2019 17:50:30 -0800 Subject: mm: emit tracepoint when RSS changes Useful to track how RSS is changing per TGID to detect spikes in RSS and memory hogs. Several Android teams have been using this patch in various kernel trees for half a year now. Many reported to me it is really useful so I'm posting it upstream. Initial patch developed by Tim Murray. Changes I made from original patch: o Prevent any additional space consumed by mm_struct. Regarding the fact that the RSS may change too often thus flooding the traces - note that, there is some "hysterisis" with this already. That is - We update the counter only if we receive 64 page faults due to SPLIT_RSS_ACCOUNTING. However, during zapping or copying of pte range, the RSS is updated immediately which can become noisy/flooding. In a previous discussion, we agreed that BPF or ftrace can be used to rate limit the signal if this becomes an issue. Also note that I added wrappers to trace_rss_stat to prevent compiler errors where linux/mm.h is included from tracing code, causing errors such as: CC kernel/trace/power-traces.o In file included from ./include/trace/define_trace.h:102, from ./include/trace/events/kmem.h:342, from ./include/linux/mm.h:31, from ./include/linux/ring_buffer.h:5, from ./include/linux/trace_events.h:6, from ./include/trace/events/power.h:12, from kernel/trace/power-traces.c:15: ./include/trace/trace_events.h:113:22: error: field `ent' has incomplete type struct trace_entry ent; \ Link: http://lore.kernel.org/r/20190903200905.198642-1-joel@joelfernandes.org Link: http://lkml.kernel.org/r/20191001172817.234886-1-joel@joelfernandes.org Co-developed-by: Tim Murray Signed-off-by: Tim Murray Signed-off-by: Joel Fernandes (Google) Acked-by: Michal Hocko Cc: Carmen Jackson Cc: Mayank Gupta Cc: Daniel Colascione Cc: Steven Rostedt (VMware) Cc: Minchan Kim Cc: "Aneesh Kumar K.V" Cc: Dan Williams Cc: Jerome Glisse Cc: Matthew Wilcox Cc: Ralph Campbell Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 14 +++++++++++--- include/trace/events/kmem.h | 21 +++++++++++++++++++++ mm/memory.c | 6 ++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index f6fb714fa851..935383081397 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1643,19 +1643,27 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) return (unsigned long)val; } +void mm_trace_rss_stat(int member, long count); + static inline void add_mm_counter(struct mm_struct *mm, int member, long value) { - atomic_long_add(value, &mm->rss_stat.count[member]); + long count = atomic_long_add_return(value, &mm->rss_stat.count[member]); + + mm_trace_rss_stat(member, count); } static inline void inc_mm_counter(struct mm_struct *mm, int member) { - atomic_long_inc(&mm->rss_stat.count[member]); + long count = atomic_long_inc_return(&mm->rss_stat.count[member]); + + mm_trace_rss_stat(member, count); } static inline void dec_mm_counter(struct mm_struct *mm, int member) { - atomic_long_dec(&mm->rss_stat.count[member]); + long count = atomic_long_dec_return(&mm->rss_stat.count[member]); + + mm_trace_rss_stat(member, count); } /* Optimized variant when page is already known not to be PageAnon */ diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 69e8bb8963db..5a0666bfcf85 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -316,6 +316,27 @@ TRACE_EVENT(mm_page_alloc_extfrag, __entry->change_ownership) ); +TRACE_EVENT(rss_stat, + + TP_PROTO(int member, + long count), + + TP_ARGS(member, count), + + TP_STRUCT__entry( + __field(int, member) + __field(long, size) + ), + + TP_fast_assign( + __entry->member = member; + __entry->size = (count << PAGE_SHIFT); + ), + + TP_printk("member=%d size=%ldB", + __entry->member, + __entry->size) + ); #endif /* _TRACE_KMEM_H */ /* This part must be outside protection */ diff --git a/mm/memory.c b/mm/memory.c index 9ea917e28ef4..57c910aaba45 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -72,6 +72,8 @@ #include #include +#include + #include #include #include @@ -152,6 +154,10 @@ static int __init init_zero_pfn(void) } core_initcall(init_zero_pfn); +void mm_trace_rss_stat(int member, long count) +{ + trace_rss_stat(member, count); +} #if defined(SPLIT_RSS_COUNTING) -- cgit v1.2.3-59-g8ed1b