diff options
author | 2024-07-03 12:56:26 -0700 | |
---|---|---|
committer | 2024-07-03 12:57:41 -0700 | |
commit | 210ac17dedc9810d744ae52f8780278211a490aa (patch) | |
tree | 869ac309edde592c98f91a2a3556a6deb2c9c378 | |
parent | documentation: Fix riscv cmodx example (diff) | |
parent | perf: RISC-V: Check standard event availability (diff) | |
download | linux-rng-210ac17dedc9810d744ae52f8780278211a490aa.tar.xz linux-rng-210ac17dedc9810d744ae52f8780278211a490aa.zip |
Merge patch series "Assorted fixes in RISC-V PMU driver"
Atish Patra <atishp@rivosinc.com> says:
This series contains 3 fixes out of which the first one is a new fix
for invalid event data reported in lkml[2]. The last two are v3 of Samuel's
patch[1]. I added the RB/TB/Fixes tag and moved 1 unrelated change
to its own patch. I also changed an error message in kvm vcpu_pmu from
pr_err to pr_debug to avoid redundant failure error messages generated
due to the boot time quering of events implemented in the patch[1]
Here is the original cover letter for the patch[1]
Before this patch:
$ perf list hw
List of pre-defined events (to be used in -e or -M):
branch-instructions OR branches [Hardware event]
branch-misses [Hardware event]
bus-cycles [Hardware event]
cache-misses [Hardware event]
cache-references [Hardware event]
cpu-cycles OR cycles [Hardware event]
instructions [Hardware event]
ref-cycles [Hardware event]
stalled-cycles-backend OR idle-cycles-backend [Hardware event]
stalled-cycles-frontend OR idle-cycles-frontend [Hardware event]
$ perf stat -ddd true
Performance counter stats for 'true':
4.36 msec task-clock # 0.744 CPUs utilized
1 context-switches # 229.325 /sec
0 cpu-migrations # 0.000 /sec
38 page-faults # 8.714 K/sec
4,375,694 cycles # 1.003 GHz (60.64%)
728,945 instructions # 0.17 insn per cycle
79,199 branches # 18.162 M/sec
17,709 branch-misses # 22.36% of all branches
181,734 L1-dcache-loads # 41.676 M/sec
5,547 L1-dcache-load-misses # 3.05% of all L1-dcache accesses
<not counted> LLC-loads (0.00%)
<not counted> LLC-load-misses (0.00%)
<not counted> L1-icache-loads (0.00%)
<not counted> L1-icache-load-misses (0.00%)
<not counted> dTLB-loads (0.00%)
<not counted> dTLB-load-misses (0.00%)
<not counted> iTLB-loads (0.00%)
<not counted> iTLB-load-misses (0.00%)
<not counted> L1-dcache-prefetches (0.00%)
<not counted> L1-dcache-prefetch-misses (0.00%)
0.005860375 seconds time elapsed
0.000000000 seconds user
0.010383000 seconds sys
After this patch:
$ perf list hw
List of pre-defined events (to be used in -e or -M):
branch-instructions OR branches [Hardware event]
branch-misses [Hardware event]
cache-misses [Hardware event]
cache-references [Hardware event]
cpu-cycles OR cycles [Hardware event]
instructions [Hardware event]
$ perf stat -ddd true
Performance counter stats for 'true':
5.16 msec task-clock # 0.848 CPUs utilized
1 context-switches # 193.817 /sec
0 cpu-migrations # 0.000 /sec
37 page-faults # 7.171 K/sec
5,183,625 cycles # 1.005 GHz
961,696 instructions # 0.19 insn per cycle
85,853 branches # 16.640 M/sec
20,462 branch-misses # 23.83% of all branches
243,545 L1-dcache-loads # 47.203 M/sec
5,974 L1-dcache-load-misses # 2.45% of all L1-dcache accesses
<not supported> LLC-loads
<not supported> LLC-load-misses
<not supported> L1-icache-loads
<not supported> L1-icache-load-misses
<not supported> dTLB-loads
19,619 dTLB-load-misses
<not supported> iTLB-loads
6,831 iTLB-load-misses
<not supported> L1-dcache-prefetches
<not supported> L1-dcache-prefetch-misses
0.006085625 seconds time elapsed
0.000000000 seconds user
0.013022000 seconds sys
[1] https://lore.kernel.org/linux-riscv/20240418014652.1143466-1-samuel.holland@sifive.com/
[2] https://lore.kernel.org/all/CC51D53B-846C-4D81-86FC-FBF969D0A0D6@pku.edu.cn/
* b4-shazam-merge:
perf: RISC-V: Check standard event availability
drivers/perf: riscv: Reset the counter to hpmevent mapping while starting cpus
drivers/perf: riscv: Do not update the event data if uptodate
Link: https://lore.kernel.org/r/20240628-misc_perf_fixes-v4-0-e01cfddcf035@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
-rw-r--r-- | arch/riscv/kvm/vcpu_pmu.c | 2 | ||||
-rw-r--r-- | drivers/perf/riscv_pmu.c | 2 | ||||
-rw-r--r-- | drivers/perf/riscv_pmu_sbi.c | 44 |
3 files changed, 43 insertions, 5 deletions
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 04db1f993c47..bcf41d6e0df0 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -327,7 +327,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); if (IS_ERR(event)) { - pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); + pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); return PTR_ERR(event); } diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index 78c490e0505a..0a02e85a8951 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -167,7 +167,7 @@ u64 riscv_pmu_event_update(struct perf_event *event) unsigned long cmask; u64 oldval, delta; - if (!rvpmu->ctr_read) + if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE)) return 0; cmask = riscv_pmu_ctr_get_width_mask(event); diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index a2e4005e1fd0..4e842dcedfba 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -20,6 +20,7 @@ #include <linux/cpu_pm.h> #include <linux/sched/clock.h> #include <linux/soc/andes/irq.h> +#include <linux/workqueue.h> #include <asm/errata_list.h> #include <asm/sbi.h> @@ -114,7 +115,7 @@ struct sbi_pmu_event_data { }; }; -static const struct sbi_pmu_event_data pmu_hw_event_map[] = { +static struct sbi_pmu_event_data pmu_hw_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = { SBI_PMU_HW_CPU_CYCLES, SBI_PMU_EVENT_TYPE_HW, 0}}, @@ -148,7 +149,7 @@ static const struct sbi_pmu_event_data pmu_hw_event_map[] = { }; #define C(x) PERF_COUNT_HW_CACHE_##x -static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] +static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { [C(L1D)] = { @@ -293,6 +294,34 @@ static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_M }, }; +static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, + 0, cmask, 0, edata->event_idx, 0, 0); + if (!ret.error) { + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, + ret.value, 0x1, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); + } else if (ret.error == SBI_ERR_NOT_SUPPORTED) { + /* This event cannot be monitored by any counter */ + edata->event_idx = -EINVAL; + } +} + +static void pmu_sbi_check_std_events(struct work_struct *work) +{ + for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) + pmu_sbi_check_event(&pmu_hw_event_map[i]); + + for (int i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) + for (int j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) + for (int k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) + pmu_sbi_check_event(&pmu_cache_event_map[i][j][k]); +} + +static DECLARE_WORK(check_std_events_work, pmu_sbi_check_std_events); + static int pmu_sbi_ctr_get_width(int idx) { return pmu_ctr_list[idx].width; @@ -478,6 +507,12 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) u64 raw_config_val; int ret; + /* + * Ensure we are finished checking standard hardware events for + * validity before allowing userspace to configure any events. + */ + flush_work(&check_std_events_work); + switch (type) { case PERF_TYPE_HARDWARE: if (config >= PERF_COUNT_HW_MAX) @@ -762,7 +797,7 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu) * which may include counters that are not enabled yet. */ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, - 0, pmu->cmask, 0, 0, 0, 0); + 0, pmu->cmask, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); } static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu) @@ -1359,6 +1394,9 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) if (ret) goto out_unregister; + /* Asynchronously check which standard events are available */ + schedule_work(&check_std_events_work); + return 0; out_unregister: |