aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorMarco Elver <elver@google.com>2022-08-29 14:47:11 +0200
committerPeter Zijlstra <peterz@infradead.org>2022-08-30 10:56:22 +0200
commitbe3f152568cc7f5f573d21d5f86a2c4f3cc047ab (patch)
tree9f5c1c891387d7fc348507d23fd8b89f3d883cec /kernel/events
parentperf/hw_breakpoint: Mark data __ro_after_init (diff)
downloadlinux-dev-be3f152568cc7f5f573d21d5f86a2c4f3cc047ab.tar.xz
linux-dev-be3f152568cc7f5f573d21d5f86a2c4f3cc047ab.zip
perf/hw_breakpoint: Optimize constant number of breakpoint slots
Optimize internal hw_breakpoint state if the architecture's number of breakpoint slots is constant. This avoids several kmalloc() calls and potentially unnecessary failures if the allocations fail, as well as subtly improves code generation and cache locality. The protocol is that if an architecture defines hw_breakpoint_slots via the preprocessor, it must be constant and the same for all types. Signed-off-by: Marco Elver <elver@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Dmitry Vyukov <dvyukov@google.com> Acked-by: Ian Rogers <irogers@google.com> Link: https://lore.kernel.org/r/20220829124719.675715-7-elver@google.com
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/hw_breakpoint.c94
1 files changed, 61 insertions, 33 deletions
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 7df46b276452..9fb66d358d81 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -40,13 +40,16 @@ struct bp_cpuinfo {
/* Number of pinned cpu breakpoints in a cpu */
unsigned int cpu_pinned;
/* tsk_pinned[n] is the number of tasks having n+1 breakpoints */
+#ifdef hw_breakpoint_slots
+ unsigned int tsk_pinned[hw_breakpoint_slots(0)];
+#else
unsigned int *tsk_pinned;
+#endif
/* Number of non-pinned cpu/task breakpoints in a cpu */
unsigned int flexible; /* XXX: placeholder, see fetch_this_slot() */
};
static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
-static int nr_slots[TYPE_MAX] __ro_after_init;
static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
{
@@ -73,6 +76,54 @@ struct bp_busy_slots {
/* Serialize accesses to the above constraints */
static DEFINE_MUTEX(nr_bp_mutex);
+#ifdef hw_breakpoint_slots
+/*
+ * Number of breakpoint slots is constant, and the same for all types.
+ */
+static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA));
+static inline int hw_breakpoint_slots_cached(int type) { return hw_breakpoint_slots(type); }
+static inline int init_breakpoint_slots(void) { return 0; }
+#else
+/*
+ * Dynamic number of breakpoint slots.
+ */
+static int __nr_bp_slots[TYPE_MAX] __ro_after_init;
+
+static inline int hw_breakpoint_slots_cached(int type)
+{
+ return __nr_bp_slots[type];
+}
+
+static __init int init_breakpoint_slots(void)
+{
+ int i, cpu, err_cpu;
+
+ for (i = 0; i < TYPE_MAX; i++)
+ __nr_bp_slots[i] = hw_breakpoint_slots(i);
+
+ for_each_possible_cpu(cpu) {
+ for (i = 0; i < TYPE_MAX; i++) {
+ struct bp_cpuinfo *info = get_bp_info(cpu, i);
+
+ info->tsk_pinned = kcalloc(__nr_bp_slots[i], sizeof(int), GFP_KERNEL);
+ if (!info->tsk_pinned)
+ goto err;
+ }
+ }
+
+ return 0;
+err:
+ for_each_possible_cpu(err_cpu) {
+ for (i = 0; i < TYPE_MAX; i++)
+ kfree(get_bp_info(err_cpu, i)->tsk_pinned);
+ if (err_cpu == cpu)
+ break;
+ }
+
+ return -ENOMEM;
+}
+#endif
+
__weak int hw_breakpoint_weight(struct perf_event *bp)
{
return 1;
@@ -95,7 +146,7 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
int i;
- for (i = nr_slots[type] - 1; i >= 0; i--) {
+ for (i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
if (tsk_pinned[i] > 0)
return i + 1;
}
@@ -312,7 +363,7 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
fetch_this_slot(&slots, weight);
/* Flexible counters need to keep at least one slot */
- if (slots.pinned + (!!slots.flexible) > nr_slots[type])
+ if (slots.pinned + (!!slots.flexible) > hw_breakpoint_slots_cached(type))
return -ENOSPC;
ret = arch_reserve_bp_slot(bp);
@@ -632,7 +683,7 @@ bool hw_breakpoint_is_used(void)
if (info->cpu_pinned)
return true;
- for (int slot = 0; slot < nr_slots[type]; ++slot) {
+ for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
if (info->tsk_pinned[slot])
return true;
}
@@ -716,42 +767,19 @@ static struct pmu perf_breakpoint = {
int __init init_hw_breakpoint(void)
{
- int cpu, err_cpu;
- int i, ret;
-
- for (i = 0; i < TYPE_MAX; i++)
- nr_slots[i] = hw_breakpoint_slots(i);
-
- for_each_possible_cpu(cpu) {
- for (i = 0; i < TYPE_MAX; i++) {
- struct bp_cpuinfo *info = get_bp_info(cpu, i);
-
- info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int),
- GFP_KERNEL);
- if (!info->tsk_pinned) {
- ret = -ENOMEM;
- goto err;
- }
- }
- }
+ int ret;
ret = rhltable_init(&task_bps_ht, &task_bps_ht_params);
if (ret)
- goto err;
+ return ret;
+
+ ret = init_breakpoint_slots();
+ if (ret)
+ return ret;
constraints_initialized = true;
perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
return register_die_notifier(&hw_breakpoint_exceptions_nb);
-
-err:
- for_each_possible_cpu(err_cpu) {
- for (i = 0; i < TYPE_MAX; i++)
- kfree(get_bp_info(err_cpu, i)->tsk_pinned);
- if (err_cpu == cpu)
- break;
- }
-
- return ret;
}