From 3e5deb708c8f3f7d645f567b2ba38d8045fa11ba Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 2 Feb 2024 15:40:54 -0800 Subject: perf cpumap: Clean up use of perf_cpu_map__has_any_cpu_or_is_empty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most uses of what was perf_cpu_map__empty but is now perf_cpu_map__has_any_cpu_or_is_empty want to do something with the CPU map if it contains CPUs. Replace uses of perf_cpu_map__has_any_cpu_or_is_empty with other helpers so that CPUs within the map can be handled. Reviewed-by: James Clark Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexandre Ghiti Cc: Andrew Jones Cc: André Almeida Cc: Athira Rajeev Cc: Atish Patra Cc: Changbin Du Cc: Darren Hart Cc: Davidlohr Bueso Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Nick Desaulniers Cc: Paolo Bonzini Cc: Paran Lee Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Suzuki Poulouse Cc: Thomas Gleixner Cc: Will Deacon Cc: Yang Jihong Cc: Yang Li Cc: Yanteng Si Link: https://lore.kernel.org/r/20240202234057.2085863-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6bba1a89d030..a47ced077fa6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1319,10 +1319,9 @@ static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map) * be the first online CPU in the cache domain else use the * first online CPU of the cache domain as the ID. */ - if (perf_cpu_map__has_any_cpu_or_is_empty(cpu_map)) + id = perf_cpu_map__min(cpu_map).cpu; + if (id == -1) id = cpu.cpu; - else - id = perf_cpu_map__cpu(cpu_map, 0).cpu; /* Free the perf_cpu_map used to find the cache ID */ perf_cpu_map__put(cpu_map); @@ -1642,7 +1641,7 @@ static int perf_stat_init_aggr_mode(void) * taking the highest cpu number to be the size of * the aggregation translate cpumap. */ - if (!perf_cpu_map__has_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus)) + if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus)) nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu; else nr = 0; @@ -2334,7 +2333,7 @@ int process_stat_config_event(struct perf_session *session, perf_event__read_stat_config(&stat_config, &event->stat_config); - if (perf_cpu_map__has_any_cpu_or_is_empty(st->cpus)) { + if (perf_cpu_map__is_empty(st->cpus)) { if (st->aggr_mode != AGGR_UNSET) pr_warning("warning: processing task data, aggregation mode not set\n"); } else if (st->aggr_mode != AGGR_UNSET) { -- cgit v1.2.3-59-g8ed1b From 954ac1b4a79a06736fd85a183f34c18c152286c6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 2 Feb 2024 15:40:56 -0800 Subject: perf stat: Remove duplicate cpus_map_matched function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use libperf's perf_cpu_map__equal() that performs the same function. Reviewed-by: James Clark Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexandre Ghiti Cc: Andrew Jones Cc: André Almeida Cc: Athira Rajeev Cc: Atish Patra Cc: Changbin Du Cc: Darren Hart Cc: Davidlohr Bueso Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Nick Desaulniers Cc: Paolo Bonzini Cc: Paran Lee Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Suzuki Poulouse Cc: Thomas Gleixner Cc: Will Deacon Cc: Yang Jihong Cc: Yang Li Cc: Yanteng Si Link: https://lore.kernel.org/r/20240202234057.2085863-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a47ced077fa6..65388c57bb5d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -164,26 +164,6 @@ static struct perf_stat_config stat_config = { .iostat_run = false, }; -static bool cpus_map_matched(struct evsel *a, struct evsel *b) -{ - if (!a->core.cpus && !b->core.cpus) - return true; - - if (!a->core.cpus || !b->core.cpus) - return false; - - if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus)) - return false; - - for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) { - if (perf_cpu_map__cpu(a->core.cpus, i).cpu != - perf_cpu_map__cpu(b->core.cpus, i).cpu) - return false; - } - - return true; -} - static void evlist__check_cpu_maps(struct evlist *evlist) { struct evsel *evsel, *warned_leader = NULL; @@ -194,7 +174,7 @@ static void evlist__check_cpu_maps(struct evlist *evlist) /* Check that leader matches cpus with each member. */ if (leader == evsel) continue; - if (cpus_map_matched(leader, evsel)) + if (perf_cpu_map__equal(leader->core.cpus, evsel->core.cpus)) continue; /* If there's mismatch disable the group and warn user. */ -- cgit v1.2.3-59-g8ed1b From 03f2357017c37d68e73d7d8d77abfcb72e12bc86 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Fri, 12 Apr 2024 14:07:41 -0700 Subject: perf stat: Add new field in stat_config to enable hardware aware grouping Hardware counter and event information could be used to help creating event groups that better utilize hardware counters and improve multiplexing. Reviewed-by: Ian Rogers Signed-off-by: Weilin Wang Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Samantha Alt Link: https://lore.kernel.org/r/20240412210756.309828-2-weilin.wang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 5 +++++ tools/perf/util/metricgroup.c | 3 +++ tools/perf/util/metricgroup.h | 1 + tools/perf/util/stat.h | 1 + 4 files changed, 10 insertions(+) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 65388c57bb5d..65a3dd7ffac3 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2085,6 +2085,7 @@ static int add_default_attributes(void) stat_config.metric_no_threshold, stat_config.user_requested_cpu_list, stat_config.system_wide, + stat_config.hardware_aware_grouping, &stat_config.metric_events); } @@ -2118,6 +2119,7 @@ static int add_default_attributes(void) stat_config.metric_no_threshold, stat_config.user_requested_cpu_list, stat_config.system_wide, + stat_config.hardware_aware_grouping, &stat_config.metric_events); } @@ -2152,6 +2154,7 @@ static int add_default_attributes(void) /*metric_no_threshold=*/true, stat_config.user_requested_cpu_list, stat_config.system_wide, + stat_config.hardware_aware_grouping, &stat_config.metric_events) < 0) return -1; } @@ -2193,6 +2196,7 @@ static int add_default_attributes(void) /*metric_no_threshold=*/true, stat_config.user_requested_cpu_list, stat_config.system_wide, + stat_config.hardware_aware_grouping, &stat_config.metric_events) < 0) return -1; @@ -2727,6 +2731,7 @@ int cmd_stat(int argc, const char **argv) stat_config.metric_no_threshold, stat_config.user_requested_cpu_list, stat_config.system_wide, + stat_config.hardware_aware_grouping, &stat_config.metric_events); zfree(&metrics); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 6ec083af14a1..9be406524617 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -1690,12 +1690,15 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, + bool hardware_aware_grouping, struct rblist *metric_events) { const struct pmu_metrics_table *table = pmu_metrics_table__find(); if (!table) return -EINVAL; + if (hardware_aware_grouping) + pr_debug("Use hardware aware grouping instead of traditional metric grouping method\n"); return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge, metric_no_threshold, user_requested_cpu_list, system_wide, diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index d5325c6ec8e1..779f6ede1b51 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -77,6 +77,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, + bool hardware_aware_grouping, struct rblist *metric_events); int metricgroup__parse_groups_test(struct evlist *evlist, const struct pmu_metrics_table *table, diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index d6e5c8787ba2..fd7a187551bd 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -87,6 +87,7 @@ struct perf_stat_config { bool metric_no_group; bool metric_no_merge; bool metric_no_threshold; + bool hardware_aware_grouping; bool stop_read_counter; bool iostat_run; char *user_requested_cpu_list; -- cgit v1.2.3-59-g8ed1b From a8cd4766d9128b897af6d4e0d22604f3bdaf2f82 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 7 May 2024 11:35:41 -0700 Subject: perf cpumap: Remove refcnt from 'struct cpu_aggr_map' It is assigned a value of 1 and never incremented. Remove and replace puts with delete. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Ben Gainey Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Li Dong Cc: Mark Rutland Cc: Namhyung Kim Cc: Oliver Upton Cc: Paran Lee Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sun Haiyong Cc: Tim Chen Cc: Yanteng Si Cc: Yicong Yang Link: https://lore.kernel.org/r/20240507183545.1236093-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 16 +++------------- tools/perf/util/cpumap.c | 2 -- tools/perf/util/cpumap.h | 2 -- 3 files changed, 3 insertions(+), 17 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 65a3dd7ffac3..35f79b48e8dc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1631,23 +1631,13 @@ static int perf_stat_init_aggr_mode(void) static void cpu_aggr_map__delete(struct cpu_aggr_map *map) { - if (map) { - WARN_ONCE(refcount_read(&map->refcnt) != 0, - "cpu_aggr_map refcnt unbalanced\n"); - free(map); - } -} - -static void cpu_aggr_map__put(struct cpu_aggr_map *map) -{ - if (map && refcount_dec_and_test(&map->refcnt)) - cpu_aggr_map__delete(map); + free(map); } static void perf_stat__exit_aggr_mode(void) { - cpu_aggr_map__put(stat_config.aggr_map); - cpu_aggr_map__put(stat_config.cpus_aggr_map); + cpu_aggr_map__delete(stat_config.aggr_map); + cpu_aggr_map__delete(stat_config.cpus_aggr_map); stat_config.aggr_map = NULL; stat_config.cpus_aggr_map = NULL; } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 6a270d640acb..27094211edd8 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -180,8 +180,6 @@ struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr) cpus->nr = nr; for (i = 0; i < nr; i++) cpus->map[i] = aggr_cpu_id__empty(); - - refcount_set(&cpus->refcnt, 1); } return cpus; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 26cf76c693f5..ee0f6139b04a 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -5,7 +5,6 @@ #include #include #include -#include /** Identify where counts are aggregated, -1 implies not to aggregate. */ struct aggr_cpu_id { @@ -37,7 +36,6 @@ struct aggr_cpu_id { /** A collection of aggr_cpu_id values, the "built" version is sorted and uniqued. */ struct cpu_aggr_map { - refcount_t refcnt; /** Number of valid entries. */ int nr; /** The entries. */ -- cgit v1.2.3-59-g8ed1b