From da885a0e5e0610e011f14a70c2dc7c4ddf79c6d6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 17 Apr 2023 16:50:02 -0300 Subject: perf cpumap: Add reference count checking Enabled when REFCNT_CHECKING is defined. The change adds a memory allocated pointer that is interposed between the reference counted cpu map at a get and freed by a put. The pointer replaces the original perf_cpu_map struct, so use of the perf_cpu_map via APIs remains unchanged. Any use of the cpu map without the API requires two versions, handled via the RC_CHK_ACCESS macro. This change is intended to catch: - use after put: using a cpumap after you have put it will cause a segv. - unbalanced puts: two puts for a get will result in a double free that can be captured and reported by tools like address sanitizer, including with the associated stack traces of allocation and frees. - missing puts: if a put is missing then the get turns into a memory leak that can be reported by leak sanitizer, including the stack trace at the point the get occurs. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: Andi Kleen Cc: Andrew Morton Cc: Andy Shevchenko Cc: Darren Hart Cc: Davidlohr Bueso Cc: Dmitriy Vyukov Cc: Eric Dumazet Cc: German Gomez Cc: Hao Luo Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miaoqian Lin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Shunsuke Nakamura Cc: Song Liu Cc: Stephen Brennan Cc: Steven Rostedt (VMware) Cc: Thomas Gleixner Cc: Thomas Richter , Cc: Yury Norov Link: https://lore.kernel.org/lkml/20230407230405.2931830-3-irogers@google.com [ Extracted from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Makefile | 2 +- tools/lib/perf/cpumap.c | 85 +++++++++++++++++--------------- tools/lib/perf/include/internal/cpumap.h | 5 +- 3 files changed, 50 insertions(+), 42 deletions(-) (limited to 'tools/lib/perf') diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index d8cad124e4c5..3a9b2140aa04 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -188,7 +188,7 @@ install_lib: libs cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ) HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h -INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h threadmap.h xyarray.h +INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 27c3e73c6db2..1229b18bcdb1 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -12,19 +12,19 @@ void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus) { - map->nr = nr_cpus; + RC_CHK_ACCESS(map)->nr = nr_cpus; } struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) { - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus); + RC_STRUCT(perf_cpu_map) *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus); + struct perf_cpu_map *result; - if (cpus != NULL) { + if (ADD_RC_CHK(result, cpus)) { cpus->nr = nr_cpus; refcount_set(&cpus->refcnt, 1); - } - return cpus; + return result; } struct perf_cpu_map *perf_cpu_map__dummy_new(void) @@ -32,7 +32,7 @@ struct perf_cpu_map *perf_cpu_map__dummy_new(void) struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); if (cpus) - cpus->map[0].cpu = -1; + RC_CHK_ACCESS(cpus)->map[0].cpu = -1; return cpus; } @@ -42,21 +42,28 @@ static void cpu_map__delete(struct perf_cpu_map *map) if (map) { WARN_ONCE(refcount_read(perf_cpu_map__refcnt(map)) != 0, "cpu_map refcnt unbalanced\n"); - free(map); + RC_CHK_FREE(map); } } struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map) { - if (map) + struct perf_cpu_map *result; + + if (RC_CHK_GET(result, map)) refcount_inc(perf_cpu_map__refcnt(map)); - return map; + + return result; } void perf_cpu_map__put(struct perf_cpu_map *map) { - if (map && refcount_dec_and_test(perf_cpu_map__refcnt(map))) - cpu_map__delete(map); + if (map) { + if (refcount_dec_and_test(perf_cpu_map__refcnt(map))) + cpu_map__delete(map); + else + RC_CHK_PUT(map); + } } static struct perf_cpu_map *cpu_map__default_new(void) @@ -73,7 +80,7 @@ static struct perf_cpu_map *cpu_map__default_new(void) int i; for (i = 0; i < nr_cpus; ++i) - cpus->map[i].cpu = i; + RC_CHK_ACCESS(cpus)->map[i].cpu = i; } return cpus; @@ -99,15 +106,15 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu int i, j; if (cpus != NULL) { - memcpy(cpus->map, tmp_cpus, payload_size); - qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu); + memcpy(RC_CHK_ACCESS(cpus)->map, tmp_cpus, payload_size); + qsort(RC_CHK_ACCESS(cpus)->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu); /* Remove dups */ j = 0; for (i = 0; i < nr_cpus; i++) { - if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu) - cpus->map[j++].cpu = cpus->map[i].cpu; + if (i == 0 || RC_CHK_ACCESS(cpus)->map[i].cpu != RC_CHK_ACCESS(cpus)->map[i - 1].cpu) + RC_CHK_ACCESS(cpus)->map[j++].cpu = RC_CHK_ACCESS(cpus)->map[i].cpu; } - cpus->nr = j; + perf_cpu_map__set_nr(cpus, j); assert(j <= nr_cpus); } return cpus; @@ -268,20 +275,20 @@ struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) .cpu = -1 }; - if (cpus && idx < cpus->nr) - return cpus->map[idx]; + if (cpus && idx < RC_CHK_ACCESS(cpus)->nr) + return RC_CHK_ACCESS(cpus)->map[idx]; return result; } int perf_cpu_map__nr(const struct perf_cpu_map *cpus) { - return cpus ? cpus->nr : 1; + return cpus ? RC_CHK_ACCESS(cpus)->nr : 1; } bool perf_cpu_map__empty(const struct perf_cpu_map *map) { - return map ? map->map[0].cpu == -1 : true; + return map ? RC_CHK_ACCESS(map)->map[0].cpu == -1 : true; } int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) @@ -292,10 +299,10 @@ int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) return -1; low = 0; - high = cpus->nr; + high = RC_CHK_ACCESS(cpus)->nr; while (low < high) { int idx = (low + high) / 2; - struct perf_cpu cpu_at_idx = cpus->map[idx]; + struct perf_cpu cpu_at_idx = RC_CHK_ACCESS(cpus)->map[idx]; if (cpu_at_idx.cpu == cpu.cpu) return idx; @@ -321,7 +328,7 @@ struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map) }; // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well. - return map->nr > 0 ? map->map[map->nr - 1] : result; + return RC_CHK_ACCESS(map)->nr > 0 ? RC_CHK_ACCESS(map)->map[RC_CHK_ACCESS(map)->nr - 1] : result; } /** Is 'b' a subset of 'a'. */ @@ -329,15 +336,15 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu { if (a == b || !b) return true; - if (!a || b->nr > a->nr) + if (!a || RC_CHK_ACCESS(b)->nr > RC_CHK_ACCESS(a)->nr) return false; - for (int i = 0, j = 0; i < a->nr; i++) { - if (a->map[i].cpu > b->map[j].cpu) + for (int i = 0, j = 0; i < RC_CHK_ACCESS(a)->nr; i++) { + if (RC_CHK_ACCESS(a)->map[i].cpu > RC_CHK_ACCESS(b)->map[j].cpu) return false; - if (a->map[i].cpu == b->map[j].cpu) { + if (RC_CHK_ACCESS(a)->map[i].cpu == RC_CHK_ACCESS(b)->map[j].cpu) { j++; - if (j == b->nr) + if (j == RC_CHK_ACCESS(b)->nr) return true; } } @@ -367,27 +374,27 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, return perf_cpu_map__get(other); } - tmp_len = orig->nr + other->nr; + tmp_len = RC_CHK_ACCESS(orig)->nr + RC_CHK_ACCESS(other)->nr; tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); if (!tmp_cpus) return NULL; /* Standard merge algorithm from wikipedia */ i = j = k = 0; - while (i < orig->nr && j < other->nr) { - if (orig->map[i].cpu <= other->map[j].cpu) { - if (orig->map[i].cpu == other->map[j].cpu) + while (i < RC_CHK_ACCESS(orig)->nr && j < RC_CHK_ACCESS(other)->nr) { + if (RC_CHK_ACCESS(orig)->map[i].cpu <= RC_CHK_ACCESS(other)->map[j].cpu) { + if (RC_CHK_ACCESS(orig)->map[i].cpu == RC_CHK_ACCESS(other)->map[j].cpu) j++; - tmp_cpus[k++] = orig->map[i++]; + tmp_cpus[k++] = RC_CHK_ACCESS(orig)->map[i++]; } else - tmp_cpus[k++] = other->map[j++]; + tmp_cpus[k++] = RC_CHK_ACCESS(other)->map[j++]; } - while (i < orig->nr) - tmp_cpus[k++] = orig->map[i++]; + while (i < RC_CHK_ACCESS(orig)->nr) + tmp_cpus[k++] = RC_CHK_ACCESS(orig)->map[i++]; - while (j < other->nr) - tmp_cpus[k++] = other->map[j++]; + while (j < RC_CHK_ACCESS(other)->nr) + tmp_cpus[k++] = RC_CHK_ACCESS(other)->map[j++]; assert(k <= tmp_len); merged = cpu_map__trim_new(k, tmp_cpus); diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 1e840dd53a11..49649eb51ce4 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -4,6 +4,7 @@ #include #include +#include /** * A sized, reference counted, sorted array of integers representing CPU @@ -12,7 +13,7 @@ * gaps if CPU numbers were used. For events associated with a pid, rather than * a CPU, a single dummy map with an entry of -1 is used. */ -struct perf_cpu_map { +DECLARE_RC_STRUCT(perf_cpu_map) { refcount_t refcnt; /** Length of the map array. */ int nr; @@ -32,6 +33,6 @@ void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus); static inline refcount_t *perf_cpu_map__refcnt(struct perf_cpu_map *map) { - return &map->refcnt; + return &RC_CHK_ACCESS(map)->refcnt; } #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ -- cgit v1.2.3-59-g8ed1b