From 1888d3ddc3d6a2511be86045cfb2e7ea5fc67c44 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 15 Feb 2018 18:51:41 +0000 Subject: drivers/bus: Move Arm CCN PMU driver The arm-ccn driver is purely a perf driver for the CCN PMU, not a bus driver in the sense of the other residents of drivers/bus/, so let's move it to the appropriate place for SoC PMU drivers. Not to mention moving the documentation accordingly as well. Acked-by: Pawel Moll Acked-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Arnd Bergmann --- drivers/perf/Kconfig | 7 + drivers/perf/Makefile | 1 + drivers/perf/arm-ccn.c | 1597 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1605 insertions(+) create mode 100644 drivers/perf/arm-ccn.c (limited to 'drivers/perf') diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index da5724cd89cf..331b6d992b5a 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -5,6 +5,13 @@ menu "Performance monitor support" depends on PERF_EVENTS +config ARM_CCN + tristate "ARM CCN driver support" + depends on ARM || ARM64 + help + PMU (perf) driver supporting the ARM CCN (Cache Coherent Network) + interconnect. + config ARM_PMU depends on ARM || ARM64 bool "ARM PMU framework" diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index c2f27419bdf0..5004abee0f3a 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ARM_CCN) += arm-ccn.o obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c new file mode 100644 index 000000000000..b52332e52ca5 --- /dev/null +++ b/drivers/perf/arm-ccn.c @@ -0,0 +1,1597 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2014 ARM Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CCN_NUM_XP_PORTS 2 +#define CCN_NUM_VCS 4 +#define CCN_NUM_REGIONS 256 +#define CCN_REGION_SIZE 0x10000 + +#define CCN_ALL_OLY_ID 0xff00 +#define CCN_ALL_OLY_ID__OLY_ID__SHIFT 0 +#define CCN_ALL_OLY_ID__OLY_ID__MASK 0x1f +#define CCN_ALL_OLY_ID__NODE_ID__SHIFT 8 +#define CCN_ALL_OLY_ID__NODE_ID__MASK 0x3f + +#define CCN_MN_ERRINT_STATUS 0x0008 +#define CCN_MN_ERRINT_STATUS__INTREQ__DESSERT 0x11 +#define CCN_MN_ERRINT_STATUS__ALL_ERRORS__ENABLE 0x02 +#define CCN_MN_ERRINT_STATUS__ALL_ERRORS__DISABLED 0x20 +#define CCN_MN_ERRINT_STATUS__ALL_ERRORS__DISABLE 0x22 +#define CCN_MN_ERRINT_STATUS__CORRECTED_ERRORS_ENABLE 0x04 +#define CCN_MN_ERRINT_STATUS__CORRECTED_ERRORS_DISABLED 0x40 +#define CCN_MN_ERRINT_STATUS__CORRECTED_ERRORS_DISABLE 0x44 +#define CCN_MN_ERRINT_STATUS__PMU_EVENTS__ENABLE 0x08 +#define CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLED 0x80 +#define CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLE 0x88 +#define CCN_MN_OLY_COMP_LIST_63_0 0x01e0 +#define CCN_MN_ERR_SIG_VAL_63_0 0x0300 +#define CCN_MN_ERR_SIG_VAL_63_0__DT (1 << 1) + +#define CCN_DT_ACTIVE_DSM 0x0000 +#define CCN_DT_ACTIVE_DSM__DSM_ID__SHIFT(n) ((n) * 8) +#define CCN_DT_ACTIVE_DSM__DSM_ID__MASK 0xff +#define CCN_DT_CTL 0x0028 +#define CCN_DT_CTL__DT_EN (1 << 0) +#define CCN_DT_PMEVCNT(n) (0x0100 + (n) * 0x8) +#define CCN_DT_PMCCNTR 0x0140 +#define CCN_DT_PMCCNTRSR 0x0190 +#define CCN_DT_PMOVSR 0x0198 +#define CCN_DT_PMOVSR_CLR 0x01a0 +#define CCN_DT_PMOVSR_CLR__MASK 0x1f +#define CCN_DT_PMCR 0x01a8 +#define CCN_DT_PMCR__OVFL_INTR_EN (1 << 6) +#define CCN_DT_PMCR__PMU_EN (1 << 0) +#define CCN_DT_PMSR 0x01b0 +#define CCN_DT_PMSR_REQ 0x01b8 +#define CCN_DT_PMSR_CLR 0x01c0 + +#define CCN_HNF_PMU_EVENT_SEL 0x0600 +#define CCN_HNF_PMU_EVENT_SEL__ID__SHIFT(n) ((n) * 4) +#define CCN_HNF_PMU_EVENT_SEL__ID__MASK 0xf + +#define CCN_XP_DT_CONFIG 0x0300 +#define CCN_XP_DT_CONFIG__DT_CFG__SHIFT(n) ((n) * 4) +#define CCN_XP_DT_CONFIG__DT_CFG__MASK 0xf +#define CCN_XP_DT_CONFIG__DT_CFG__PASS_THROUGH 0x0 +#define CCN_XP_DT_CONFIG__DT_CFG__WATCHPOINT_0_OR_1 0x1 +#define CCN_XP_DT_CONFIG__DT_CFG__WATCHPOINT(n) (0x2 + (n)) +#define CCN_XP_DT_CONFIG__DT_CFG__XP_PMU_EVENT(n) (0x4 + (n)) +#define CCN_XP_DT_CONFIG__DT_CFG__DEVICE_PMU_EVENT(d, n) (0x8 + (d) * 4 + (n)) +#define CCN_XP_DT_INTERFACE_SEL 0x0308 +#define CCN_XP_DT_INTERFACE_SEL__DT_IO_SEL__SHIFT(n) (0 + (n) * 8) +#define CCN_XP_DT_INTERFACE_SEL__DT_IO_SEL__MASK 0x1 +#define CCN_XP_DT_INTERFACE_SEL__DT_DEV_SEL__SHIFT(n) (1 + (n) * 8) +#define CCN_XP_DT_INTERFACE_SEL__DT_DEV_SEL__MASK 0x1 +#define CCN_XP_DT_INTERFACE_SEL__DT_VC_SEL__SHIFT(n) (2 + (n) * 8) +#define CCN_XP_DT_INTERFACE_SEL__DT_VC_SEL__MASK 0x3 +#define CCN_XP_DT_CMP_VAL_L(n) (0x0310 + (n) * 0x40) +#define CCN_XP_DT_CMP_VAL_H(n) (0x0318 + (n) * 0x40) +#define CCN_XP_DT_CMP_MASK_L(n) (0x0320 + (n) * 0x40) +#define CCN_XP_DT_CMP_MASK_H(n) (0x0328 + (n) * 0x40) +#define CCN_XP_DT_CONTROL 0x0370 +#define CCN_XP_DT_CONTROL__DT_ENABLE (1 << 0) +#define CCN_XP_DT_CONTROL__WP_ARM_SEL__SHIFT(n) (12 + (n) * 4) +#define CCN_XP_DT_CONTROL__WP_ARM_SEL__MASK 0xf +#define CCN_XP_DT_CONTROL__WP_ARM_SEL__ALWAYS 0xf +#define CCN_XP_PMU_EVENT_SEL 0x0600 +#define CCN_XP_PMU_EVENT_SEL__ID__SHIFT(n) ((n) * 7) +#define CCN_XP_PMU_EVENT_SEL__ID__MASK 0x3f + +#define CCN_SBAS_PMU_EVENT_SEL 0x0600 +#define CCN_SBAS_PMU_EVENT_SEL__ID__SHIFT(n) ((n) * 4) +#define CCN_SBAS_PMU_EVENT_SEL__ID__MASK 0xf + +#define CCN_RNI_PMU_EVENT_SEL 0x0600 +#define CCN_RNI_PMU_EVENT_SEL__ID__SHIFT(n) ((n) * 4) +#define CCN_RNI_PMU_EVENT_SEL__ID__MASK 0xf + +#define CCN_TYPE_MN 0x01 +#define CCN_TYPE_DT 0x02 +#define CCN_TYPE_HNF 0x04 +#define CCN_TYPE_HNI 0x05 +#define CCN_TYPE_XP 0x08 +#define CCN_TYPE_SBSX 0x0c +#define CCN_TYPE_SBAS 0x10 +#define CCN_TYPE_RNI_1P 0x14 +#define CCN_TYPE_RNI_2P 0x15 +#define CCN_TYPE_RNI_3P 0x16 +#define CCN_TYPE_RND_1P 0x18 /* RN-D = RN-I + DVM */ +#define CCN_TYPE_RND_2P 0x19 +#define CCN_TYPE_RND_3P 0x1a +#define CCN_TYPE_CYCLES 0xff /* Pseudotype */ + +#define CCN_EVENT_WATCHPOINT 0xfe /* Pseudoevent */ + +#define CCN_NUM_PMU_EVENTS 4 +#define CCN_NUM_XP_WATCHPOINTS 2 /* See DT.dbg_id.num_watchpoints */ +#define CCN_NUM_PMU_EVENT_COUNTERS 8 /* See DT.dbg_id.num_pmucntr */ +#define CCN_IDX_PMU_CYCLE_COUNTER CCN_NUM_PMU_EVENT_COUNTERS + +#define CCN_NUM_PREDEFINED_MASKS 4 +#define CCN_IDX_MASK_ANY (CCN_NUM_PMU_EVENT_COUNTERS + 0) +#define CCN_IDX_MASK_EXACT (CCN_NUM_PMU_EVENT_COUNTERS + 1) +#define CCN_IDX_MASK_ORDER (CCN_NUM_PMU_EVENT_COUNTERS + 2) +#define CCN_IDX_MASK_OPCODE (CCN_NUM_PMU_EVENT_COUNTERS + 3) + +struct arm_ccn_component { + void __iomem *base; + u32 type; + + DECLARE_BITMAP(pmu_events_mask, CCN_NUM_PMU_EVENTS); + union { + struct { + DECLARE_BITMAP(dt_cmp_mask, CCN_NUM_XP_WATCHPOINTS); + } xp; + }; +}; + +#define pmu_to_arm_ccn(_pmu) container_of(container_of(_pmu, \ + struct arm_ccn_dt, pmu), struct arm_ccn, dt) + +struct arm_ccn_dt { + int id; + void __iomem *base; + + spinlock_t config_lock; + + DECLARE_BITMAP(pmu_counters_mask, CCN_NUM_PMU_EVENT_COUNTERS + 1); + struct { + struct arm_ccn_component *source; + struct perf_event *event; + } pmu_counters[CCN_NUM_PMU_EVENT_COUNTERS + 1]; + + struct { + u64 l, h; + } cmp_mask[CCN_NUM_PMU_EVENT_COUNTERS + CCN_NUM_PREDEFINED_MASKS]; + + struct hrtimer hrtimer; + + cpumask_t cpu; + struct hlist_node node; + + struct pmu pmu; +}; + +struct arm_ccn { + struct device *dev; + void __iomem *base; + unsigned int irq; + + unsigned sbas_present:1; + unsigned sbsx_present:1; + + int num_nodes; + struct arm_ccn_component *node; + + int num_xps; + struct arm_ccn_component *xp; + + struct arm_ccn_dt dt; + int mn_id; +}; + +static int arm_ccn_node_to_xp(int node) +{ + return node / CCN_NUM_XP_PORTS; +} + +static int arm_ccn_node_to_xp_port(int node) +{ + return node % CCN_NUM_XP_PORTS; +} + + +/* + * Bit shifts and masks in these defines must be kept in sync with + * arm_ccn_pmu_config_set() and CCN_FORMAT_ATTRs below! + */ +#define CCN_CONFIG_NODE(_config) (((_config) >> 0) & 0xff) +#define CCN_CONFIG_XP(_config) (((_config) >> 0) & 0xff) +#define CCN_CONFIG_TYPE(_config) (((_config) >> 8) & 0xff) +#define CCN_CONFIG_EVENT(_config) (((_config) >> 16) & 0xff) +#define CCN_CONFIG_PORT(_config) (((_config) >> 24) & 0x3) +#define CCN_CONFIG_BUS(_config) (((_config) >> 24) & 0x3) +#define CCN_CONFIG_VC(_config) (((_config) >> 26) & 0x7) +#define CCN_CONFIG_DIR(_config) (((_config) >> 29) & 0x1) +#define CCN_CONFIG_MASK(_config) (((_config) >> 30) & 0xf) + +static void arm_ccn_pmu_config_set(u64 *config, u32 node_xp, u32 type, u32 port) +{ + *config &= ~((0xff << 0) | (0xff << 8) | (0x3 << 24)); + *config |= (node_xp << 0) | (type << 8) | (port << 24); +} + +static ssize_t arm_ccn_pmu_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *ea = container_of(attr, + struct dev_ext_attribute, attr); + + return snprintf(buf, PAGE_SIZE, "%s\n", (char *)ea->var); +} + +#define CCN_FORMAT_ATTR(_name, _config) \ + struct dev_ext_attribute arm_ccn_pmu_format_attr_##_name = \ + { __ATTR(_name, S_IRUGO, arm_ccn_pmu_format_show, \ + NULL), _config } + +static CCN_FORMAT_ATTR(node, "config:0-7"); +static CCN_FORMAT_ATTR(xp, "config:0-7"); +static CCN_FORMAT_ATTR(type, "config:8-15"); +static CCN_FORMAT_ATTR(event, "config:16-23"); +static CCN_FORMAT_ATTR(port, "config:24-25"); +static CCN_FORMAT_ATTR(bus, "config:24-25"); +static CCN_FORMAT_ATTR(vc, "config:26-28"); +static CCN_FORMAT_ATTR(dir, "config:29-29"); +static CCN_FORMAT_ATTR(mask, "config:30-33"); +static CCN_FORMAT_ATTR(cmp_l, "config1:0-62"); +static CCN_FORMAT_ATTR(cmp_h, "config2:0-59"); + +static struct attribute *arm_ccn_pmu_format_attrs[] = { + &arm_ccn_pmu_format_attr_node.attr.attr, + &arm_ccn_pmu_format_attr_xp.attr.attr, + &arm_ccn_pmu_format_attr_type.attr.attr, + &arm_ccn_pmu_format_attr_event.attr.attr, + &arm_ccn_pmu_format_attr_port.attr.attr, + &arm_ccn_pmu_format_attr_bus.attr.attr, + &arm_ccn_pmu_format_attr_vc.attr.attr, + &arm_ccn_pmu_format_attr_dir.attr.attr, + &arm_ccn_pmu_format_attr_mask.attr.attr, + &arm_ccn_pmu_format_attr_cmp_l.attr.attr, + &arm_ccn_pmu_format_attr_cmp_h.attr.attr, + NULL +}; + +static const struct attribute_group arm_ccn_pmu_format_attr_group = { + .name = "format", + .attrs = arm_ccn_pmu_format_attrs, +}; + + +struct arm_ccn_pmu_event { + struct device_attribute attr; + u32 type; + u32 event; + int num_ports; + int num_vcs; + const char *def; + int mask; +}; + +#define CCN_EVENT_ATTR(_name) \ + __ATTR(_name, S_IRUGO, arm_ccn_pmu_event_show, NULL) + +/* + * Events defined in TRM for MN, HN-I and SBSX are actually watchpoints set on + * their ports in XP they are connected to. For the sake of usability they are + * explicitly defined here (and translated into a relevant watchpoint in + * arm_ccn_pmu_event_init()) so the user can easily request them without deep + * knowledge of the flit format. + */ + +#define CCN_EVENT_MN(_name, _def, _mask) { .attr = CCN_EVENT_ATTR(mn_##_name), \ + .type = CCN_TYPE_MN, .event = CCN_EVENT_WATCHPOINT, \ + .num_ports = CCN_NUM_XP_PORTS, .num_vcs = CCN_NUM_VCS, \ + .def = _def, .mask = _mask, } + +#define CCN_EVENT_HNI(_name, _def, _mask) { \ + .attr = CCN_EVENT_ATTR(hni_##_name), .type = CCN_TYPE_HNI, \ + .event = CCN_EVENT_WATCHPOINT, .num_ports = CCN_NUM_XP_PORTS, \ + .num_vcs = CCN_NUM_VCS, .def = _def, .mask = _mask, } + +#define CCN_EVENT_SBSX(_name, _def, _mask) { \ + .attr = CCN_EVENT_ATTR(sbsx_##_name), .type = CCN_TYPE_SBSX, \ + .event = CCN_EVENT_WATCHPOINT, .num_ports = CCN_NUM_XP_PORTS, \ + .num_vcs = CCN_NUM_VCS, .def = _def, .mask = _mask, } + +#define CCN_EVENT_HNF(_name, _event) { .attr = CCN_EVENT_ATTR(hnf_##_name), \ + .type = CCN_TYPE_HNF, .event = _event, } + +#define CCN_EVENT_XP(_name, _event) { .attr = CCN_EVENT_ATTR(xp_##_name), \ + .type = CCN_TYPE_XP, .event = _event, \ + .num_ports = CCN_NUM_XP_PORTS, .num_vcs = CCN_NUM_VCS, } + +/* + * RN-I & RN-D (RN-D = RN-I + DVM) nodes have different type ID depending + * on configuration. One of them is picked to represent the whole group, + * as they all share the same event types. + */ +#define CCN_EVENT_RNI(_name, _event) { .attr = CCN_EVENT_ATTR(rni_##_name), \ + .type = CCN_TYPE_RNI_3P, .event = _event, } + +#define CCN_EVENT_SBAS(_name, _event) { .attr = CCN_EVENT_ATTR(sbas_##_name), \ + .type = CCN_TYPE_SBAS, .event = _event, } + +#define CCN_EVENT_CYCLES(_name) { .attr = CCN_EVENT_ATTR(_name), \ + .type = CCN_TYPE_CYCLES } + + +static ssize_t arm_ccn_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev)); + struct arm_ccn_pmu_event *event = container_of(attr, + struct arm_ccn_pmu_event, attr); + ssize_t res; + + res = snprintf(buf, PAGE_SIZE, "type=0x%x", event->type); + if (event->event) + res += snprintf(buf + res, PAGE_SIZE - res, ",event=0x%x", + event->event); + if (event->def) + res += snprintf(buf + res, PAGE_SIZE - res, ",%s", + event->def); + if (event->mask) + res += snprintf(buf + res, PAGE_SIZE - res, ",mask=0x%x", + event->mask); + + /* Arguments required by an event */ + switch (event->type) { + case CCN_TYPE_CYCLES: + break; + case CCN_TYPE_XP: + res += snprintf(buf + res, PAGE_SIZE - res, + ",xp=?,vc=?"); + if (event->event == CCN_EVENT_WATCHPOINT) + res += snprintf(buf + res, PAGE_SIZE - res, + ",port=?,dir=?,cmp_l=?,cmp_h=?,mask=?"); + else + res += snprintf(buf + res, PAGE_SIZE - res, + ",bus=?"); + + break; + case CCN_TYPE_MN: + res += snprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id); + break; + default: + res += snprintf(buf + res, PAGE_SIZE - res, ",node=?"); + break; + } + + res += snprintf(buf + res, PAGE_SIZE - res, "\n"); + + return res; +} + +static umode_t arm_ccn_pmu_events_is_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct device *dev = kobj_to_dev(kobj); + struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev)); + struct device_attribute *dev_attr = container_of(attr, + struct device_attribute, attr); + struct arm_ccn_pmu_event *event = container_of(dev_attr, + struct arm_ccn_pmu_event, attr); + + if (event->type == CCN_TYPE_SBAS && !ccn->sbas_present) + return 0; + if (event->type == CCN_TYPE_SBSX && !ccn->sbsx_present) + return 0; + + return attr->mode; +} + +static struct arm_ccn_pmu_event arm_ccn_pmu_events[] = { + CCN_EVENT_MN(eobarrier, "dir=1,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE), + CCN_EVENT_MN(ecbarrier, "dir=1,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE), + CCN_EVENT_MN(dvmop, "dir=1,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE), + CCN_EVENT_HNI(txdatflits, "dir=1,vc=3", CCN_IDX_MASK_ANY), + CCN_EVENT_HNI(rxdatflits, "dir=0,vc=3", CCN_IDX_MASK_ANY), + CCN_EVENT_HNI(txreqflits, "dir=1,vc=0", CCN_IDX_MASK_ANY), + CCN_EVENT_HNI(rxreqflits, "dir=0,vc=0", CCN_IDX_MASK_ANY), + CCN_EVENT_HNI(rxreqflits_order, "dir=0,vc=0,cmp_h=0x8000", + CCN_IDX_MASK_ORDER), + CCN_EVENT_SBSX(txdatflits, "dir=1,vc=3", CCN_IDX_MASK_ANY), + CCN_EVENT_SBSX(rxdatflits, "dir=0,vc=3", CCN_IDX_MASK_ANY), + CCN_EVENT_SBSX(txreqflits, "dir=1,vc=0", CCN_IDX_MASK_ANY), + CCN_EVENT_SBSX(rxreqflits, "dir=0,vc=0", CCN_IDX_MASK_ANY), + CCN_EVENT_SBSX(rxreqflits_order, "dir=0,vc=0,cmp_h=0x8000", + CCN_IDX_MASK_ORDER), + CCN_EVENT_HNF(cache_miss, 0x1), + CCN_EVENT_HNF(l3_sf_cache_access, 0x02), + CCN_EVENT_HNF(cache_fill, 0x3), + CCN_EVENT_HNF(pocq_retry, 0x4), + CCN_EVENT_HNF(pocq_reqs_recvd, 0x5), + CCN_EVENT_HNF(sf_hit, 0x6), + CCN_EVENT_HNF(sf_evictions, 0x7), + CCN_EVENT_HNF(snoops_sent, 0x8), + CCN_EVENT_HNF(snoops_broadcast, 0x9), + CCN_EVENT_HNF(l3_eviction, 0xa), + CCN_EVENT_HNF(l3_fill_invalid_way, 0xb), + CCN_EVENT_HNF(mc_retries, 0xc), + CCN_EVENT_HNF(mc_reqs, 0xd), + CCN_EVENT_HNF(qos_hh_retry, 0xe), + CCN_EVENT_RNI(rdata_beats_p0, 0x1), + CCN_EVENT_RNI(rdata_beats_p1, 0x2), + CCN_EVENT_RNI(rdata_beats_p2, 0x3), + CCN_EVENT_RNI(rxdat_flits, 0x4), + CCN_EVENT_RNI(txdat_flits, 0x5), + CCN_EVENT_RNI(txreq_flits, 0x6), + CCN_EVENT_RNI(txreq_flits_retried, 0x7), + CCN_EVENT_RNI(rrt_full, 0x8), + CCN_EVENT_RNI(wrt_full, 0x9), + CCN_EVENT_RNI(txreq_flits_replayed, 0xa), + CCN_EVENT_XP(upload_starvation, 0x1), + CCN_EVENT_XP(download_starvation, 0x2), + CCN_EVENT_XP(respin, 0x3), + CCN_EVENT_XP(valid_flit, 0x4), + CCN_EVENT_XP(watchpoint, CCN_EVENT_WATCHPOINT), + CCN_EVENT_SBAS(rdata_beats_p0, 0x1), + CCN_EVENT_SBAS(rxdat_flits, 0x4), + CCN_EVENT_SBAS(txdat_flits, 0x5), + CCN_EVENT_SBAS(txreq_flits, 0x6), + CCN_EVENT_SBAS(txreq_flits_retried, 0x7), + CCN_EVENT_SBAS(rrt_full, 0x8), + CCN_EVENT_SBAS(wrt_full, 0x9), + CCN_EVENT_SBAS(txreq_flits_replayed, 0xa), + CCN_EVENT_CYCLES(cycles), +}; + +/* Populated in arm_ccn_init() */ +static struct attribute + *arm_ccn_pmu_events_attrs[ARRAY_SIZE(arm_ccn_pmu_events) + 1]; + +static const struct attribute_group arm_ccn_pmu_events_attr_group = { + .name = "events", + .is_visible = arm_ccn_pmu_events_is_visible, + .attrs = arm_ccn_pmu_events_attrs, +}; + + +static u64 *arm_ccn_pmu_get_cmp_mask(struct arm_ccn *ccn, const char *name) +{ + unsigned long i; + + if (WARN_ON(!name || !name[0] || !isxdigit(name[0]) || !name[1])) + return NULL; + i = isdigit(name[0]) ? name[0] - '0' : 0xa + tolower(name[0]) - 'a'; + + switch (name[1]) { + case 'l': + return &ccn->dt.cmp_mask[i].l; + case 'h': + return &ccn->dt.cmp_mask[i].h; + default: + return NULL; + } +} + +static ssize_t arm_ccn_pmu_cmp_mask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev)); + u64 *mask = arm_ccn_pmu_get_cmp_mask(ccn, attr->attr.name); + + return mask ? snprintf(buf, PAGE_SIZE, "0x%016llx\n", *mask) : -EINVAL; +} + +static ssize_t arm_ccn_pmu_cmp_mask_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev)); + u64 *mask = arm_ccn_pmu_get_cmp_mask(ccn, attr->attr.name); + int err = -EINVAL; + + if (mask) + err = kstrtoull(buf, 0, mask); + + return err ? err : count; +} + +#define CCN_CMP_MASK_ATTR(_name) \ + struct device_attribute arm_ccn_pmu_cmp_mask_attr_##_name = \ + __ATTR(_name, S_IRUGO | S_IWUSR, \ + arm_ccn_pmu_cmp_mask_show, arm_ccn_pmu_cmp_mask_store) + +#define CCN_CMP_MASK_ATTR_RO(_name) \ + struct device_attribute arm_ccn_pmu_cmp_mask_attr_##_name = \ + __ATTR(_name, S_IRUGO, arm_ccn_pmu_cmp_mask_show, NULL) + +static CCN_CMP_MASK_ATTR(0l); +static CCN_CMP_MASK_ATTR(0h); +static CCN_CMP_MASK_ATTR(1l); +static CCN_CMP_MASK_ATTR(1h); +static CCN_CMP_MASK_ATTR(2l); +static CCN_CMP_MASK_ATTR(2h); +static CCN_CMP_MASK_ATTR(3l); +static CCN_CMP_MASK_ATTR(3h); +static CCN_CMP_MASK_ATTR(4l); +static CCN_CMP_MASK_ATTR(4h); +static CCN_CMP_MASK_ATTR(5l); +static CCN_CMP_MASK_ATTR(5h); +static CCN_CMP_MASK_ATTR(6l); +static CCN_CMP_MASK_ATTR(6h); +static CCN_CMP_MASK_ATTR(7l); +static CCN_CMP_MASK_ATTR(7h); +static CCN_CMP_MASK_ATTR_RO(8l); +static CCN_CMP_MASK_ATTR_RO(8h); +static CCN_CMP_MASK_ATTR_RO(9l); +static CCN_CMP_MASK_ATTR_RO(9h); +static CCN_CMP_MASK_ATTR_RO(al); +static CCN_CMP_MASK_ATTR_RO(ah); +static CCN_CMP_MASK_ATTR_RO(bl); +static CCN_CMP_MASK_ATTR_RO(bh); + +static struct attribute *arm_ccn_pmu_cmp_mask_attrs[] = { + &arm_ccn_pmu_cmp_mask_attr_0l.attr, &arm_ccn_pmu_cmp_mask_attr_0h.attr, + &arm_ccn_pmu_cmp_mask_attr_1l.attr, &arm_ccn_pmu_cmp_mask_attr_1h.attr, + &arm_ccn_pmu_cmp_mask_attr_2l.attr, &arm_ccn_pmu_cmp_mask_attr_2h.attr, + &arm_ccn_pmu_cmp_mask_attr_3l.attr, &arm_ccn_pmu_cmp_mask_attr_3h.attr, + &arm_ccn_pmu_cmp_mask_attr_4l.attr, &arm_ccn_pmu_cmp_mask_attr_4h.attr, + &arm_ccn_pmu_cmp_mask_attr_5l.attr, &arm_ccn_pmu_cmp_mask_attr_5h.attr, + &arm_ccn_pmu_cmp_mask_attr_6l.attr, &arm_ccn_pmu_cmp_mask_attr_6h.attr, + &arm_ccn_pmu_cmp_mask_attr_7l.attr, &arm_ccn_pmu_cmp_mask_attr_7h.attr, + &arm_ccn_pmu_cmp_mask_attr_8l.attr, &arm_ccn_pmu_cmp_mask_attr_8h.attr, + &arm_ccn_pmu_cmp_mask_attr_9l.attr, &arm_ccn_pmu_cmp_mask_attr_9h.attr, + &arm_ccn_pmu_cmp_mask_attr_al.attr, &arm_ccn_pmu_cmp_mask_attr_ah.attr, + &arm_ccn_pmu_cmp_mask_attr_bl.attr, &arm_ccn_pmu_cmp_mask_attr_bh.attr, + NULL +}; + +static const struct attribute_group arm_ccn_pmu_cmp_mask_attr_group = { + .name = "cmp_mask", + .attrs = arm_ccn_pmu_cmp_mask_attrs, +}; + +static ssize_t arm_ccn_pmu_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, &ccn->dt.cpu); +} + +static struct device_attribute arm_ccn_pmu_cpumask_attr = + __ATTR(cpumask, S_IRUGO, arm_ccn_pmu_cpumask_show, NULL); + +static struct attribute *arm_ccn_pmu_cpumask_attrs[] = { + &arm_ccn_pmu_cpumask_attr.attr, + NULL, +}; + +static const struct attribute_group arm_ccn_pmu_cpumask_attr_group = { + .attrs = arm_ccn_pmu_cpumask_attrs, +}; + +/* + * Default poll period is 10ms, which is way over the top anyway, + * as in the worst case scenario (an event every cycle), with 1GHz + * clocked bus, the smallest, 32 bit counter will overflow in + * more than 4s. + */ +static unsigned int arm_ccn_pmu_poll_period_us = 10000; +module_param_named(pmu_poll_period_us, arm_ccn_pmu_poll_period_us, uint, + S_IRUGO | S_IWUSR); + +static ktime_t arm_ccn_pmu_timer_period(void) +{ + return ns_to_ktime((u64)arm_ccn_pmu_poll_period_us * 1000); +} + + +static const struct attribute_group *arm_ccn_pmu_attr_groups[] = { + &arm_ccn_pmu_events_attr_group, + &arm_ccn_pmu_format_attr_group, + &arm_ccn_pmu_cmp_mask_attr_group, + &arm_ccn_pmu_cpumask_attr_group, + NULL +}; + + +static int arm_ccn_pmu_alloc_bit(unsigned long *bitmap, unsigned long size) +{ + int bit; + + do { + bit = find_first_zero_bit(bitmap, size); + if (bit >= size) + return -EAGAIN; + } while (test_and_set_bit(bit, bitmap)); + + return bit; +} + +/* All RN-I and RN-D nodes have identical PMUs */ +static int arm_ccn_pmu_type_eq(u32 a, u32 b) +{ + if (a == b) + return 1; + + switch (a) { + case CCN_TYPE_RNI_1P: + case CCN_TYPE_RNI_2P: + case CCN_TYPE_RNI_3P: + case CCN_TYPE_RND_1P: + case CCN_TYPE_RND_2P: + case CCN_TYPE_RND_3P: + switch (b) { + case CCN_TYPE_RNI_1P: + case CCN_TYPE_RNI_2P: + case CCN_TYPE_RNI_3P: + case CCN_TYPE_RND_1P: + case CCN_TYPE_RND_2P: + case CCN_TYPE_RND_3P: + return 1; + } + break; + } + + return 0; +} + +static int arm_ccn_pmu_event_alloc(struct perf_event *event) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); + struct hw_perf_event *hw = &event->hw; + u32 node_xp, type, event_id; + struct arm_ccn_component *source; + int bit; + + node_xp = CCN_CONFIG_NODE(event->attr.config); + type = CCN_CONFIG_TYPE(event->attr.config); + event_id = CCN_CONFIG_EVENT(event->attr.config); + + /* Allocate the cycle counter */ + if (type == CCN_TYPE_CYCLES) { + if (test_and_set_bit(CCN_IDX_PMU_CYCLE_COUNTER, + ccn->dt.pmu_counters_mask)) + return -EAGAIN; + + hw->idx = CCN_IDX_PMU_CYCLE_COUNTER; + ccn->dt.pmu_counters[CCN_IDX_PMU_CYCLE_COUNTER].event = event; + + return 0; + } + + /* Allocate an event counter */ + hw->idx = arm_ccn_pmu_alloc_bit(ccn->dt.pmu_counters_mask, + CCN_NUM_PMU_EVENT_COUNTERS); + if (hw->idx < 0) { + dev_dbg(ccn->dev, "No more counters available!\n"); + return -EAGAIN; + } + + if (type == CCN_TYPE_XP) + source = &ccn->xp[node_xp]; + else + source = &ccn->node[node_xp]; + ccn->dt.pmu_counters[hw->idx].source = source; + + /* Allocate an event source or a watchpoint */ + if (type == CCN_TYPE_XP && event_id == CCN_EVENT_WATCHPOINT) + bit = arm_ccn_pmu_alloc_bit(source->xp.dt_cmp_mask, + CCN_NUM_XP_WATCHPOINTS); + else + bit = arm_ccn_pmu_alloc_bit(source->pmu_events_mask, + CCN_NUM_PMU_EVENTS); + if (bit < 0) { + dev_dbg(ccn->dev, "No more event sources/watchpoints on node/XP %d!\n", + node_xp); + clear_bit(hw->idx, ccn->dt.pmu_counters_mask); + return -EAGAIN; + } + hw->config_base = bit; + + ccn->dt.pmu_counters[hw->idx].event = event; + + return 0; +} + +static void arm_ccn_pmu_event_release(struct perf_event *event) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); + struct hw_perf_event *hw = &event->hw; + + if (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER) { + clear_bit(CCN_IDX_PMU_CYCLE_COUNTER, ccn->dt.pmu_counters_mask); + } else { + struct arm_ccn_component *source = + ccn->dt.pmu_counters[hw->idx].source; + + if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP && + CCN_CONFIG_EVENT(event->attr.config) == + CCN_EVENT_WATCHPOINT) + clear_bit(hw->config_base, source->xp.dt_cmp_mask); + else + clear_bit(hw->config_base, source->pmu_events_mask); + clear_bit(hw->idx, ccn->dt.pmu_counters_mask); + } + + ccn->dt.pmu_counters[hw->idx].source = NULL; + ccn->dt.pmu_counters[hw->idx].event = NULL; +} + +static int arm_ccn_pmu_event_init(struct perf_event *event) +{ + struct arm_ccn *ccn; + struct hw_perf_event *hw = &event->hw; + u32 node_xp, type, event_id; + int valid; + int i; + struct perf_event *sibling; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + ccn = pmu_to_arm_ccn(event->pmu); + + if (hw->sample_period) { + dev_warn(ccn->dev, "Sampling not supported!\n"); + return -EOPNOTSUPP; + } + + if (has_branch_stack(event) || event->attr.exclude_user || + event->attr.exclude_kernel || event->attr.exclude_hv || + event->attr.exclude_idle || event->attr.exclude_host || + event->attr.exclude_guest) { + dev_warn(ccn->dev, "Can't exclude execution levels!\n"); + return -EINVAL; + } + + if (event->cpu < 0) { + dev_warn(ccn->dev, "Can't provide per-task data!\n"); + return -EOPNOTSUPP; + } + /* + * Many perf core operations (eg. events rotation) operate on a + * single CPU context. This is obvious for CPU PMUs, where one + * expects the same sets of events being observed on all CPUs, + * but can lead to issues for off-core PMUs, like CCN, where each + * event could be theoretically assigned to a different CPU. To + * mitigate this, we enforce CPU assignment to one, selected + * processor (the one described in the "cpumask" attribute). + */ + event->cpu = cpumask_first(&ccn->dt.cpu); + + node_xp = CCN_CONFIG_NODE(event->attr.config); + type = CCN_CONFIG_TYPE(event->attr.config); + event_id = CCN_CONFIG_EVENT(event->attr.config); + + /* Validate node/xp vs topology */ + switch (type) { + case CCN_TYPE_MN: + if (node_xp != ccn->mn_id) { + dev_warn(ccn->dev, "Invalid MN ID %d!\n", node_xp); + return -EINVAL; + } + break; + case CCN_TYPE_XP: + if (node_xp >= ccn->num_xps) { + dev_warn(ccn->dev, "Invalid XP ID %d!\n", node_xp); + return -EINVAL; + } + break; + case CCN_TYPE_CYCLES: + break; + default: + if (node_xp >= ccn->num_nodes) { + dev_warn(ccn->dev, "Invalid node ID %d!\n", node_xp); + return -EINVAL; + } + if (!arm_ccn_pmu_type_eq(type, ccn->node[node_xp].type)) { + dev_warn(ccn->dev, "Invalid type 0x%x for node %d!\n", + type, node_xp); + return -EINVAL; + } + break; + } + + /* Validate event ID vs available for the type */ + for (i = 0, valid = 0; i < ARRAY_SIZE(arm_ccn_pmu_events) && !valid; + i++) { + struct arm_ccn_pmu_event *e = &arm_ccn_pmu_events[i]; + u32 port = CCN_CONFIG_PORT(event->attr.config); + u32 vc = CCN_CONFIG_VC(event->attr.config); + + if (!arm_ccn_pmu_type_eq(type, e->type)) + continue; + if (event_id != e->event) + continue; + if (e->num_ports && port >= e->num_ports) { + dev_warn(ccn->dev, "Invalid port %d for node/XP %d!\n", + port, node_xp); + return -EINVAL; + } + if (e->num_vcs && vc >= e->num_vcs) { + dev_warn(ccn->dev, "Invalid vc %d for node/XP %d!\n", + vc, node_xp); + return -EINVAL; + } + valid = 1; + } + if (!valid) { + dev_warn(ccn->dev, "Invalid event 0x%x for node/XP %d!\n", + event_id, node_xp); + return -EINVAL; + } + + /* Watchpoint-based event for a node is actually set on XP */ + if (event_id == CCN_EVENT_WATCHPOINT && type != CCN_TYPE_XP) { + u32 port; + + type = CCN_TYPE_XP; + port = arm_ccn_node_to_xp_port(node_xp); + node_xp = arm_ccn_node_to_xp(node_xp); + + arm_ccn_pmu_config_set(&event->attr.config, + node_xp, type, port); + } + + /* + * We must NOT create groups containing mixed PMUs, although software + * events are acceptable (for example to create a CCN group + * periodically read when a hrtimer aka cpu-clock leader triggers). + */ + if (event->group_leader->pmu != event->pmu && + !is_software_event(event->group_leader)) + return -EINVAL; + + list_for_each_entry(sibling, &event->group_leader->sibling_list, + group_entry) + if (sibling->pmu != event->pmu && + !is_software_event(sibling)) + return -EINVAL; + + return 0; +} + +static u64 arm_ccn_pmu_read_counter(struct arm_ccn *ccn, int idx) +{ + u64 res; + + if (idx == CCN_IDX_PMU_CYCLE_COUNTER) { +#ifdef readq + res = readq(ccn->dt.base + CCN_DT_PMCCNTR); +#else + /* 40 bit counter, can do snapshot and read in two parts */ + writel(0x1, ccn->dt.base + CCN_DT_PMSR_REQ); + while (!(readl(ccn->dt.base + CCN_DT_PMSR) & 0x1)) + ; + writel(0x1, ccn->dt.base + CCN_DT_PMSR_CLR); + res = readl(ccn->dt.base + CCN_DT_PMCCNTRSR + 4) & 0xff; + res <<= 32; + res |= readl(ccn->dt.base + CCN_DT_PMCCNTRSR); +#endif + } else { + res = readl(ccn->dt.base + CCN_DT_PMEVCNT(idx)); + } + + return res; +} + +static void arm_ccn_pmu_event_update(struct perf_event *event) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); + struct hw_perf_event *hw = &event->hw; + u64 prev_count, new_count, mask; + + do { + prev_count = local64_read(&hw->prev_count); + new_count = arm_ccn_pmu_read_counter(ccn, hw->idx); + } while (local64_xchg(&hw->prev_count, new_count) != prev_count); + + mask = (1LLU << (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER ? 40 : 32)) - 1; + + local64_add((new_count - prev_count) & mask, &event->count); +} + +static void arm_ccn_pmu_xp_dt_config(struct perf_event *event, int enable) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); + struct hw_perf_event *hw = &event->hw; + struct arm_ccn_component *xp; + u32 val, dt_cfg; + + /* Nothing to do for cycle counter */ + if (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER) + return; + + if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP) + xp = &ccn->xp[CCN_CONFIG_XP(event->attr.config)]; + else + xp = &ccn->xp[arm_ccn_node_to_xp( + CCN_CONFIG_NODE(event->attr.config))]; + + if (enable) + dt_cfg = hw->event_base; + else + dt_cfg = CCN_XP_DT_CONFIG__DT_CFG__PASS_THROUGH; + + spin_lock(&ccn->dt.config_lock); + + val = readl(xp->base + CCN_XP_DT_CONFIG); + val &= ~(CCN_XP_DT_CONFIG__DT_CFG__MASK << + CCN_XP_DT_CONFIG__DT_CFG__SHIFT(hw->idx)); + val |= dt_cfg << CCN_XP_DT_CONFIG__DT_CFG__SHIFT(hw->idx); + writel(val, xp->base + CCN_XP_DT_CONFIG); + + spin_unlock(&ccn->dt.config_lock); +} + +static void arm_ccn_pmu_event_start(struct perf_event *event, int flags) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); + struct hw_perf_event *hw = &event->hw; + + local64_set(&event->hw.prev_count, + arm_ccn_pmu_read_counter(ccn, hw->idx)); + hw->state = 0; + + /* Set the DT bus input, engaging the counter */ + arm_ccn_pmu_xp_dt_config(event, 1); +} + +static void arm_ccn_pmu_event_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hw = &event->hw; + + /* Disable counting, setting the DT bus to pass-through mode */ + arm_ccn_pmu_xp_dt_config(event, 0); + + if (flags & PERF_EF_UPDATE) + arm_ccn_pmu_event_update(event); + + hw->state |= PERF_HES_STOPPED; +} + +static void arm_ccn_pmu_xp_watchpoint_config(struct perf_event *event) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); + struct hw_perf_event *hw = &event->hw; + struct arm_ccn_component *source = + ccn->dt.pmu_counters[hw->idx].source; + unsigned long wp = hw->config_base; + u32 val; + u64 cmp_l = event->attr.config1; + u64 cmp_h = event->attr.config2; + u64 mask_l = ccn->dt.cmp_mask[CCN_CONFIG_MASK(event->attr.config)].l; + u64 mask_h = ccn->dt.cmp_mask[CCN_CONFIG_MASK(event->attr.config)].h; + + hw->event_base = CCN_XP_DT_CONFIG__DT_CFG__WATCHPOINT(wp); + + /* Direction (RX/TX), device (port) & virtual channel */ + val = readl(source->base + CCN_XP_DT_INTERFACE_SEL); + val &= ~(CCN_XP_DT_INTERFACE_SEL__DT_IO_SEL__MASK << + CCN_XP_DT_INTERFACE_SEL__DT_IO_SEL__SHIFT(wp)); + val |= CCN_CONFIG_DIR(event->attr.config) << + CCN_XP_DT_INTERFACE_SEL__DT_IO_SEL__SHIFT(wp); + val &= ~(CCN_XP_DT_INTERFACE_SEL__DT_DEV_SEL__MASK << + CCN_XP_DT_INTERFACE_SEL__DT_DEV_SEL__SHIFT(wp)); + val |= CCN_CONFIG_PORT(event->attr.config) << + CCN_XP_DT_INTERFACE_SEL__DT_DEV_SEL__SHIFT(wp); + val &= ~(CCN_XP_DT_INTERFACE_SEL__DT_VC_SEL__MASK << + CCN_XP_DT_INTERFACE_SEL__DT_VC_SEL__SHIFT(wp)); + val |= CCN_CONFIG_VC(event->attr.config) << + CCN_XP_DT_INTERFACE_SEL__DT_VC_SEL__SHIFT(wp); + writel(val, source->base + CCN_XP_DT_INTERFACE_SEL); + + /* Comparison values */ + writel(cmp_l & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_L(wp)); + writel((cmp_l >> 32) & 0x7fffffff, + source->base + CCN_XP_DT_CMP_VAL_L(wp) + 4); + writel(cmp_h & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_H(wp)); + writel((cmp_h >> 32) & 0x0fffffff, + source->base + CCN_XP_DT_CMP_VAL_H(wp) + 4); + + /* Mask */ + writel(mask_l & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_L(wp)); + writel((mask_l >> 32) & 0x7fffffff, + source->base + CCN_XP_DT_CMP_MASK_L(wp) + 4); + writel(mask_h & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_H(wp)); + writel((mask_h >> 32) & 0x0fffffff, + source->base + CCN_XP_DT_CMP_MASK_H(wp) + 4); +} + +static void arm_ccn_pmu_xp_event_config(struct perf_event *event) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); + struct hw_perf_event *hw = &event->hw; + struct arm_ccn_component *source = + ccn->dt.pmu_counters[hw->idx].source; + u32 val, id; + + hw->event_base = CCN_XP_DT_CONFIG__DT_CFG__XP_PMU_EVENT(hw->config_base); + + id = (CCN_CONFIG_VC(event->attr.config) << 4) | + (CCN_CONFIG_BUS(event->attr.config) << 3) | + (CCN_CONFIG_EVENT(event->attr.config) << 0); + + val = readl(source->base + CCN_XP_PMU_EVENT_SEL); + val &= ~(CCN_XP_PMU_EVENT_SEL__ID__MASK << + CCN_XP_PMU_EVENT_SEL__ID__SHIFT(hw->config_base)); + val |= id << CCN_XP_PMU_EVENT_SEL__ID__SHIFT(hw->config_base); + writel(val, source->base + CCN_XP_PMU_EVENT_SEL); +} + +static void arm_ccn_pmu_node_event_config(struct perf_event *event) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); + struct hw_perf_event *hw = &event->hw; + struct arm_ccn_component *source = + ccn->dt.pmu_counters[hw->idx].source; + u32 type = CCN_CONFIG_TYPE(event->attr.config); + u32 val, port; + + port = arm_ccn_node_to_xp_port(CCN_CONFIG_NODE(event->attr.config)); + hw->event_base = CCN_XP_DT_CONFIG__DT_CFG__DEVICE_PMU_EVENT(port, + hw->config_base); + + /* These *_event_sel regs should be identical, but let's make sure... */ + BUILD_BUG_ON(CCN_HNF_PMU_EVENT_SEL != CCN_SBAS_PMU_EVENT_SEL); + BUILD_BUG_ON(CCN_SBAS_PMU_EVENT_SEL != CCN_RNI_PMU_EVENT_SEL); + BUILD_BUG_ON(CCN_HNF_PMU_EVENT_SEL__ID__SHIFT(1) != + CCN_SBAS_PMU_EVENT_SEL__ID__SHIFT(1)); + BUILD_BUG_ON(CCN_SBAS_PMU_EVENT_SEL__ID__SHIFT(1) != + CCN_RNI_PMU_EVENT_SEL__ID__SHIFT(1)); + BUILD_BUG_ON(CCN_HNF_PMU_EVENT_SEL__ID__MASK != + CCN_SBAS_PMU_EVENT_SEL__ID__MASK); + BUILD_BUG_ON(CCN_SBAS_PMU_EVENT_SEL__ID__MASK != + CCN_RNI_PMU_EVENT_SEL__ID__MASK); + if (WARN_ON(type != CCN_TYPE_HNF && type != CCN_TYPE_SBAS && + !arm_ccn_pmu_type_eq(type, CCN_TYPE_RNI_3P))) + return; + + /* Set the event id for the pre-allocated counter */ + val = readl(source->base + CCN_HNF_PMU_EVENT_SEL); + val &= ~(CCN_HNF_PMU_EVENT_SEL__ID__MASK << + CCN_HNF_PMU_EVENT_SEL__ID__SHIFT(hw->config_base)); + val |= CCN_CONFIG_EVENT(event->attr.config) << + CCN_HNF_PMU_EVENT_SEL__ID__SHIFT(hw->config_base); + writel(val, source->base + CCN_HNF_PMU_EVENT_SEL); +} + +static void arm_ccn_pmu_event_config(struct perf_event *event) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); + struct hw_perf_event *hw = &event->hw; + u32 xp, offset, val; + + /* Cycle counter requires no setup */ + if (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER) + return; + + if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP) + xp = CCN_CONFIG_XP(event->attr.config); + else + xp = arm_ccn_node_to_xp(CCN_CONFIG_NODE(event->attr.config)); + + spin_lock(&ccn->dt.config_lock); + + /* Set the DT bus "distance" register */ + offset = (hw->idx / 4) * 4; + val = readl(ccn->dt.base + CCN_DT_ACTIVE_DSM + offset); + val &= ~(CCN_DT_ACTIVE_DSM__DSM_ID__MASK << + CCN_DT_ACTIVE_DSM__DSM_ID__SHIFT(hw->idx % 4)); + val |= xp << CCN_DT_ACTIVE_DSM__DSM_ID__SHIFT(hw->idx % 4); + writel(val, ccn->dt.base + CCN_DT_ACTIVE_DSM + offset); + + if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP) { + if (CCN_CONFIG_EVENT(event->attr.config) == + CCN_EVENT_WATCHPOINT) + arm_ccn_pmu_xp_watchpoint_config(event); + else + arm_ccn_pmu_xp_event_config(event); + } else { + arm_ccn_pmu_node_event_config(event); + } + + spin_unlock(&ccn->dt.config_lock); +} + +static int arm_ccn_pmu_active_counters(struct arm_ccn *ccn) +{ + return bitmap_weight(ccn->dt.pmu_counters_mask, + CCN_NUM_PMU_EVENT_COUNTERS + 1); +} + +static int arm_ccn_pmu_event_add(struct perf_event *event, int flags) +{ + int err; + struct hw_perf_event *hw = &event->hw; + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); + + err = arm_ccn_pmu_event_alloc(event); + if (err) + return err; + + /* + * Pin the timer, so that the overflows are handled by the chosen + * event->cpu (this is the same one as presented in "cpumask" + * attribute). + */ + if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 1) + hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(), + HRTIMER_MODE_REL_PINNED); + + arm_ccn_pmu_event_config(event); + + hw->state = PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + arm_ccn_pmu_event_start(event, PERF_EF_UPDATE); + + return 0; +} + +static void arm_ccn_pmu_event_del(struct perf_event *event, int flags) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); + + arm_ccn_pmu_event_stop(event, PERF_EF_UPDATE); + + arm_ccn_pmu_event_release(event); + + if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 0) + hrtimer_cancel(&ccn->dt.hrtimer); +} + +static void arm_ccn_pmu_event_read(struct perf_event *event) +{ + arm_ccn_pmu_event_update(event); +} + +static void arm_ccn_pmu_enable(struct pmu *pmu) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(pmu); + + u32 val = readl(ccn->dt.base + CCN_DT_PMCR); + val |= CCN_DT_PMCR__PMU_EN; + writel(val, ccn->dt.base + CCN_DT_PMCR); +} + +static void arm_ccn_pmu_disable(struct pmu *pmu) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(pmu); + + u32 val = readl(ccn->dt.base + CCN_DT_PMCR); + val &= ~CCN_DT_PMCR__PMU_EN; + writel(val, ccn->dt.base + CCN_DT_PMCR); +} + +static irqreturn_t arm_ccn_pmu_overflow_handler(struct arm_ccn_dt *dt) +{ + u32 pmovsr = readl(dt->base + CCN_DT_PMOVSR); + int idx; + + if (!pmovsr) + return IRQ_NONE; + + writel(pmovsr, dt->base + CCN_DT_PMOVSR_CLR); + + BUILD_BUG_ON(CCN_IDX_PMU_CYCLE_COUNTER != CCN_NUM_PMU_EVENT_COUNTERS); + + for (idx = 0; idx < CCN_NUM_PMU_EVENT_COUNTERS + 1; idx++) { + struct perf_event *event = dt->pmu_counters[idx].event; + int overflowed = pmovsr & BIT(idx); + + WARN_ON_ONCE(overflowed && !event && + idx != CCN_IDX_PMU_CYCLE_COUNTER); + + if (!event || !overflowed) + continue; + + arm_ccn_pmu_event_update(event); + } + + return IRQ_HANDLED; +} + +static enum hrtimer_restart arm_ccn_pmu_timer_handler(struct hrtimer *hrtimer) +{ + struct arm_ccn_dt *dt = container_of(hrtimer, struct arm_ccn_dt, + hrtimer); + unsigned long flags; + + local_irq_save(flags); + arm_ccn_pmu_overflow_handler(dt); + local_irq_restore(flags); + + hrtimer_forward_now(hrtimer, arm_ccn_pmu_timer_period()); + return HRTIMER_RESTART; +} + + +static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct arm_ccn_dt *dt = hlist_entry_safe(node, struct arm_ccn_dt, node); + struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt); + unsigned int target; + + if (!cpumask_test_and_clear_cpu(cpu, &dt->cpu)) + return 0; + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + perf_pmu_migrate_context(&dt->pmu, cpu, target); + cpumask_set_cpu(target, &dt->cpu); + if (ccn->irq) + WARN_ON(irq_set_affinity_hint(ccn->irq, &dt->cpu) != 0); + return 0; +} + +static DEFINE_IDA(arm_ccn_pmu_ida); + +static int arm_ccn_pmu_init(struct arm_ccn *ccn) +{ + int i; + char *name; + int err; + + /* Initialize DT subsystem */ + ccn->dt.base = ccn->base + CCN_REGION_SIZE; + spin_lock_init(&ccn->dt.config_lock); + writel(CCN_DT_PMOVSR_CLR__MASK, ccn->dt.base + CCN_DT_PMOVSR_CLR); + writel(CCN_DT_CTL__DT_EN, ccn->dt.base + CCN_DT_CTL); + writel(CCN_DT_PMCR__OVFL_INTR_EN | CCN_DT_PMCR__PMU_EN, + ccn->dt.base + CCN_DT_PMCR); + writel(0x1, ccn->dt.base + CCN_DT_PMSR_CLR); + for (i = 0; i < ccn->num_xps; i++) { + writel(0, ccn->xp[i].base + CCN_XP_DT_CONFIG); + writel((CCN_XP_DT_CONTROL__WP_ARM_SEL__ALWAYS << + CCN_XP_DT_CONTROL__WP_ARM_SEL__SHIFT(0)) | + (CCN_XP_DT_CONTROL__WP_ARM_SEL__ALWAYS << + CCN_XP_DT_CONTROL__WP_ARM_SEL__SHIFT(1)) | + CCN_XP_DT_CONTROL__DT_ENABLE, + ccn->xp[i].base + CCN_XP_DT_CONTROL); + } + ccn->dt.cmp_mask[CCN_IDX_MASK_ANY].l = ~0; + ccn->dt.cmp_mask[CCN_IDX_MASK_ANY].h = ~0; + ccn->dt.cmp_mask[CCN_IDX_MASK_EXACT].l = 0; + ccn->dt.cmp_mask[CCN_IDX_MASK_EXACT].h = 0; + ccn->dt.cmp_mask[CCN_IDX_MASK_ORDER].l = ~0; + ccn->dt.cmp_mask[CCN_IDX_MASK_ORDER].h = ~(0x1 << 15); + ccn->dt.cmp_mask[CCN_IDX_MASK_OPCODE].l = ~0; + ccn->dt.cmp_mask[CCN_IDX_MASK_OPCODE].h = ~(0x1f << 9); + + /* Get a convenient /sys/event_source/devices/ name */ + ccn->dt.id = ida_simple_get(&arm_ccn_pmu_ida, 0, 0, GFP_KERNEL); + if (ccn->dt.id == 0) { + name = "ccn"; + } else { + name = devm_kasprintf(ccn->dev, GFP_KERNEL, "ccn_%d", + ccn->dt.id); + if (!name) { + err = -ENOMEM; + goto error_choose_name; + } + } + + /* Perf driver registration */ + ccn->dt.pmu = (struct pmu) { + .module = THIS_MODULE, + .attr_groups = arm_ccn_pmu_attr_groups, + .task_ctx_nr = perf_invalid_context, + .event_init = arm_ccn_pmu_event_init, + .add = arm_ccn_pmu_event_add, + .del = arm_ccn_pmu_event_del, + .start = arm_ccn_pmu_event_start, + .stop = arm_ccn_pmu_event_stop, + .read = arm_ccn_pmu_event_read, + .pmu_enable = arm_ccn_pmu_enable, + .pmu_disable = arm_ccn_pmu_disable, + }; + + /* No overflow interrupt? Have to use a timer instead. */ + if (!ccn->irq) { + dev_info(ccn->dev, "No access to interrupts, using timer.\n"); + hrtimer_init(&ccn->dt.hrtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + ccn->dt.hrtimer.function = arm_ccn_pmu_timer_handler; + } + + /* Pick one CPU which we will use to collect data from CCN... */ + cpumask_set_cpu(get_cpu(), &ccn->dt.cpu); + + /* Also make sure that the overflow interrupt is handled by this CPU */ + if (ccn->irq) { + err = irq_set_affinity_hint(ccn->irq, &ccn->dt.cpu); + if (err) { + dev_err(ccn->dev, "Failed to set interrupt affinity!\n"); + goto error_set_affinity; + } + } + + err = perf_pmu_register(&ccn->dt.pmu, name, -1); + if (err) + goto error_pmu_register; + + cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE, + &ccn->dt.node); + put_cpu(); + return 0; + +error_pmu_register: +error_set_affinity: + put_cpu(); +error_choose_name: + ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id); + for (i = 0; i < ccn->num_xps; i++) + writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL); + writel(0, ccn->dt.base + CCN_DT_PMCR); + return err; +} + +static void arm_ccn_pmu_cleanup(struct arm_ccn *ccn) +{ + int i; + + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE, + &ccn->dt.node); + if (ccn->irq) + irq_set_affinity_hint(ccn->irq, NULL); + for (i = 0; i < ccn->num_xps; i++) + writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL); + writel(0, ccn->dt.base + CCN_DT_PMCR); + perf_pmu_unregister(&ccn->dt.pmu); + ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id); +} + +static int arm_ccn_for_each_valid_region(struct arm_ccn *ccn, + int (*callback)(struct arm_ccn *ccn, int region, + void __iomem *base, u32 type, u32 id)) +{ + int region; + + for (region = 0; region < CCN_NUM_REGIONS; region++) { + u32 val, type, id; + void __iomem *base; + int err; + + val = readl(ccn->base + CCN_MN_OLY_COMP_LIST_63_0 + + 4 * (region / 32)); + if (!(val & (1 << (region % 32)))) + continue; + + base = ccn->base + region * CCN_REGION_SIZE; + val = readl(base + CCN_ALL_OLY_ID); + type = (val >> CCN_ALL_OLY_ID__OLY_ID__SHIFT) & + CCN_ALL_OLY_ID__OLY_ID__MASK; + id = (val >> CCN_ALL_OLY_ID__NODE_ID__SHIFT) & + CCN_ALL_OLY_ID__NODE_ID__MASK; + + err = callback(ccn, region, base, type, id); + if (err) + return err; + } + + return 0; +} + +static int arm_ccn_get_nodes_num(struct arm_ccn *ccn, int region, + void __iomem *base, u32 type, u32 id) +{ + + if (type == CCN_TYPE_XP && id >= ccn->num_xps) + ccn->num_xps = id + 1; + else if (id >= ccn->num_nodes) + ccn->num_nodes = id + 1; + + return 0; +} + +static int arm_ccn_init_nodes(struct arm_ccn *ccn, int region, + void __iomem *base, u32 type, u32 id) +{ + struct arm_ccn_component *component; + + dev_dbg(ccn->dev, "Region %d: id=%u, type=0x%02x\n", region, id, type); + + switch (type) { + case CCN_TYPE_MN: + ccn->mn_id = id; + return 0; + case CCN_TYPE_DT: + return 0; + case CCN_TYPE_XP: + component = &ccn->xp[id]; + break; + case CCN_TYPE_SBSX: + ccn->sbsx_present = 1; + component = &ccn->node[id]; + break; + case CCN_TYPE_SBAS: + ccn->sbas_present = 1; + /* Fall-through */ + default: + component = &ccn->node[id]; + break; + } + + component->base = base; + component->type = type; + + return 0; +} + + +static irqreturn_t arm_ccn_error_handler(struct arm_ccn *ccn, + const u32 *err_sig_val) +{ + /* This should be really handled by firmware... */ + dev_err(ccn->dev, "Error reported in %08x%08x%08x%08x%08x%08x.\n", + err_sig_val[5], err_sig_val[4], err_sig_val[3], + err_sig_val[2], err_sig_val[1], err_sig_val[0]); + dev_err(ccn->dev, "Disabling interrupt generation for all errors.\n"); + writel(CCN_MN_ERRINT_STATUS__ALL_ERRORS__DISABLE, + ccn->base + CCN_MN_ERRINT_STATUS); + + return IRQ_HANDLED; +} + + +static irqreturn_t arm_ccn_irq_handler(int irq, void *dev_id) +{ + irqreturn_t res = IRQ_NONE; + struct arm_ccn *ccn = dev_id; + u32 err_sig_val[6]; + u32 err_or; + int i; + + /* PMU overflow is a special case */ + err_or = err_sig_val[0] = readl(ccn->base + CCN_MN_ERR_SIG_VAL_63_0); + if (err_or & CCN_MN_ERR_SIG_VAL_63_0__DT) { + err_or &= ~CCN_MN_ERR_SIG_VAL_63_0__DT; + res = arm_ccn_pmu_overflow_handler(&ccn->dt); + } + + /* Have to read all err_sig_vals to clear them */ + for (i = 1; i < ARRAY_SIZE(err_sig_val); i++) { + err_sig_val[i] = readl(ccn->base + + CCN_MN_ERR_SIG_VAL_63_0 + i * 4); + err_or |= err_sig_val[i]; + } + if (err_or) + res |= arm_ccn_error_handler(ccn, err_sig_val); + + if (res != IRQ_NONE) + writel(CCN_MN_ERRINT_STATUS__INTREQ__DESSERT, + ccn->base + CCN_MN_ERRINT_STATUS); + + return res; +} + + +static int arm_ccn_probe(struct platform_device *pdev) +{ + struct arm_ccn *ccn; + struct resource *res; + unsigned int irq; + int err; + + ccn = devm_kzalloc(&pdev->dev, sizeof(*ccn), GFP_KERNEL); + if (!ccn) + return -ENOMEM; + ccn->dev = &pdev->dev; + platform_set_drvdata(pdev, ccn); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; + + if (!devm_request_mem_region(ccn->dev, res->start, + resource_size(res), pdev->name)) + return -EBUSY; + + ccn->base = devm_ioremap(ccn->dev, res->start, + resource_size(res)); + if (!ccn->base) + return -EFAULT; + + res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!res) + return -EINVAL; + irq = res->start; + + /* Check if we can use the interrupt */ + writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLE, + ccn->base + CCN_MN_ERRINT_STATUS); + if (readl(ccn->base + CCN_MN_ERRINT_STATUS) & + CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLED) { + /* Can set 'disable' bits, so can acknowledge interrupts */ + writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__ENABLE, + ccn->base + CCN_MN_ERRINT_STATUS); + err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, + dev_name(ccn->dev), ccn); + if (err) + return err; + + ccn->irq = irq; + } + + + /* Build topology */ + + err = arm_ccn_for_each_valid_region(ccn, arm_ccn_get_nodes_num); + if (err) + return err; + + ccn->node = devm_kcalloc(ccn->dev, ccn->num_nodes, sizeof(*ccn->node), + GFP_KERNEL); + ccn->xp = devm_kcalloc(ccn->dev, ccn->num_xps, sizeof(*ccn->node), + GFP_KERNEL); + if (!ccn->node || !ccn->xp) + return -ENOMEM; + + err = arm_ccn_for_each_valid_region(ccn, arm_ccn_init_nodes); + if (err) + return err; + + return arm_ccn_pmu_init(ccn); +} + +static int arm_ccn_remove(struct platform_device *pdev) +{ + struct arm_ccn *ccn = platform_get_drvdata(pdev); + + arm_ccn_pmu_cleanup(ccn); + + return 0; +} + +static const struct of_device_id arm_ccn_match[] = { + { .compatible = "arm,ccn-502", }, + { .compatible = "arm,ccn-504", }, + {}, +}; +MODULE_DEVICE_TABLE(of, arm_ccn_match); + +static struct platform_driver arm_ccn_driver = { + .driver = { + .name = "arm-ccn", + .of_match_table = arm_ccn_match, + }, + .probe = arm_ccn_probe, + .remove = arm_ccn_remove, +}; + +static int __init arm_ccn_init(void) +{ + int i, ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCN_ONLINE, + "perf/arm/ccn:online", NULL, + arm_ccn_pmu_offline_cpu); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(arm_ccn_pmu_events); i++) + arm_ccn_pmu_events_attrs[i] = &arm_ccn_pmu_events[i].attr.attr; + + ret = platform_driver_register(&arm_ccn_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE); + return ret; +} + +static void __exit arm_ccn_exit(void) +{ + platform_driver_unregister(&arm_ccn_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE); +} + +module_init(arm_ccn_init); +module_exit(arm_ccn_exit); + +MODULE_AUTHOR("Pawel Moll "); +MODULE_LICENSE("GPL"); -- cgit v1.2.3-59-g8ed1b From 3de6be7a3dd8934e59d85fc60a170d4ab2f0a0f2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 15 Feb 2018 18:51:42 +0000 Subject: drivers/bus: Split Arm CCI driver The arm-cci driver is really two entirely separate drivers; one for MCPM port control and the other for the performance monitors. Since they are already relatively self-contained, let's take the plunge and move the PMU parts out to drivers/perf where they belong these days. For non-MCPM systems this leaves a small dependency on the remaining "bus" stub for initial probing and discovery, but we end up with something that still fits the general pattern of its fellow system PMU drivers to ease future maintenance. Moving code to a new file also offers a perfect excuse to modernise the license/copyright headers and clean up some funky linewraps on the way. Cc: Lorenzo Pieralisi Reviewed-by: Suzuki Poulose Acked-by: Punit Agrawal Signed-off-by: Robin Murphy Signed-off-by: Arnd Bergmann --- drivers/bus/Kconfig | 28 - drivers/bus/arm-cci.c | 1745 +---------------------------------------------- drivers/perf/Kconfig | 26 + drivers/perf/Makefile | 1 + drivers/perf/arm-cci.c | 1747 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1776 insertions(+), 1771 deletions(-) create mode 100644 drivers/perf/arm-cci.c (limited to 'drivers/perf') diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index 116446c42c6b..39ddb63be993 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -8,25 +8,10 @@ menu "Bus devices" config ARM_CCI bool -config ARM_CCI_PMU - bool - select ARM_CCI - config ARM_CCI400_COMMON bool select ARM_CCI -config ARM_CCI400_PMU - bool "ARM CCI400 PMU support" - depends on (ARM && CPU_V7) || ARM64 - depends on PERF_EVENTS - select ARM_CCI400_COMMON - select ARM_CCI_PMU - help - Support for PMU events monitoring on the ARM CCI-400 (cache coherent - interconnect). CCI-400 supports counting events related to the - connected slave/master interfaces. - config ARM_CCI400_PORT_CTRL bool depends on ARM && OF && CPU_V7 @@ -35,19 +20,6 @@ config ARM_CCI400_PORT_CTRL Low level power management driver for CCI400 cache coherent interconnect for ARM platforms. -config ARM_CCI5xx_PMU - bool "ARM CCI-500/CCI-550 PMU support" - depends on (ARM && CPU_V7) || ARM64 - depends on PERF_EVENTS - select ARM_CCI_PMU - help - Support for PMU events monitoring on the ARM CCI-500/CCI-550 cache - coherent interconnects. Both of them provide 8 independent event counters, - which can count events pertaining to the slave/master interfaces as well - as the internal events to the CCI. - - If unsure, say Y - config BRCMSTB_GISB_ARB bool "Broadcom STB GISB bus arbiter" depends on ARM || ARM64 || MIPS diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 5426c04fe24b..503c1789dd02 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -16,20 +16,17 @@ #include #include -#include #include #include -#include #include -#include #include #include -#include #include #include -static void __iomem *cci_ctrl_base; +/* Referenced read-only by the PMU driver; see drivers/perf/arm-cci.c */ +void __iomem *cci_ctrl_base; static unsigned long cci_ctrl_phys; #ifdef CONFIG_ARM_CCI400_PORT_CTRL @@ -59,1716 +56,7 @@ static const struct of_device_id arm_cci_matches[] = { {}, }; -#ifdef CONFIG_ARM_CCI_PMU - #define DRIVER_NAME "ARM-CCI" -#define DRIVER_NAME_PMU DRIVER_NAME " PMU" - -#define CCI_PMCR 0x0100 -#define CCI_PID2 0x0fe8 - -#define CCI_PMCR_CEN 0x00000001 -#define CCI_PMCR_NCNT_MASK 0x0000f800 -#define CCI_PMCR_NCNT_SHIFT 11 - -#define CCI_PID2_REV_MASK 0xf0 -#define CCI_PID2_REV_SHIFT 4 - -#define CCI_PMU_EVT_SEL 0x000 -#define CCI_PMU_CNTR 0x004 -#define CCI_PMU_CNTR_CTRL 0x008 -#define CCI_PMU_OVRFLW 0x00c - -#define CCI_PMU_OVRFLW_FLAG 1 - -#define CCI_PMU_CNTR_SIZE(model) ((model)->cntr_size) -#define CCI_PMU_CNTR_BASE(model, idx) ((idx) * CCI_PMU_CNTR_SIZE(model)) -#define CCI_PMU_CNTR_MASK ((1ULL << 32) -1) -#define CCI_PMU_CNTR_LAST(cci_pmu) (cci_pmu->num_cntrs - 1) - -#define CCI_PMU_MAX_HW_CNTRS(model) \ - ((model)->num_hw_cntrs + (model)->fixed_hw_cntrs) - -/* Types of interfaces that can generate events */ -enum { - CCI_IF_SLAVE, - CCI_IF_MASTER, -#ifdef CONFIG_ARM_CCI5xx_PMU - CCI_IF_GLOBAL, -#endif - CCI_IF_MAX, -}; - -struct event_range { - u32 min; - u32 max; -}; - -struct cci_pmu_hw_events { - struct perf_event **events; - unsigned long *used_mask; - raw_spinlock_t pmu_lock; -}; - -struct cci_pmu; -/* - * struct cci_pmu_model: - * @fixed_hw_cntrs - Number of fixed event counters - * @num_hw_cntrs - Maximum number of programmable event counters - * @cntr_size - Size of an event counter mapping - */ -struct cci_pmu_model { - char *name; - u32 fixed_hw_cntrs; - u32 num_hw_cntrs; - u32 cntr_size; - struct attribute **format_attrs; - struct attribute **event_attrs; - struct event_range event_ranges[CCI_IF_MAX]; - int (*validate_hw_event)(struct cci_pmu *, unsigned long); - int (*get_event_idx)(struct cci_pmu *, struct cci_pmu_hw_events *, unsigned long); - void (*write_counters)(struct cci_pmu *, unsigned long *); -}; - -static struct cci_pmu_model cci_pmu_models[]; - -struct cci_pmu { - void __iomem *base; - struct pmu pmu; - int nr_irqs; - int *irqs; - unsigned long active_irqs; - const struct cci_pmu_model *model; - struct cci_pmu_hw_events hw_events; - struct platform_device *plat_device; - int num_cntrs; - atomic_t active_events; - struct mutex reserve_mutex; - struct hlist_node node; - cpumask_t cpus; -}; - -#define to_cci_pmu(c) (container_of(c, struct cci_pmu, pmu)) - -enum cci_models { -#ifdef CONFIG_ARM_CCI400_PMU - CCI400_R0, - CCI400_R1, -#endif -#ifdef CONFIG_ARM_CCI5xx_PMU - CCI500_R0, - CCI550_R0, -#endif - CCI_MODEL_MAX -}; - -static void pmu_write_counters(struct cci_pmu *cci_pmu, - unsigned long *mask); -static ssize_t cci_pmu_format_show(struct device *dev, - struct device_attribute *attr, char *buf); -static ssize_t cci_pmu_event_show(struct device *dev, - struct device_attribute *attr, char *buf); - -#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \ - &((struct dev_ext_attribute[]) { \ - { __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config } \ - })[0].attr.attr - -#define CCI_FORMAT_EXT_ATTR_ENTRY(_name, _config) \ - CCI_EXT_ATTR_ENTRY(_name, cci_pmu_format_show, (char *)_config) -#define CCI_EVENT_EXT_ATTR_ENTRY(_name, _config) \ - CCI_EXT_ATTR_ENTRY(_name, cci_pmu_event_show, (unsigned long)_config) - -/* CCI400 PMU Specific definitions */ - -#ifdef CONFIG_ARM_CCI400_PMU - -/* Port ids */ -#define CCI400_PORT_S0 0 -#define CCI400_PORT_S1 1 -#define CCI400_PORT_S2 2 -#define CCI400_PORT_S3 3 -#define CCI400_PORT_S4 4 -#define CCI400_PORT_M0 5 -#define CCI400_PORT_M1 6 -#define CCI400_PORT_M2 7 - -#define CCI400_R1_PX 5 - -/* - * Instead of an event id to monitor CCI cycles, a dedicated counter is - * provided. Use 0xff to represent CCI cycles and hope that no future revisions - * make use of this event in hardware. - */ -enum cci400_perf_events { - CCI400_PMU_CYCLES = 0xff -}; - -#define CCI400_PMU_CYCLE_CNTR_IDX 0 -#define CCI400_PMU_CNTR0_IDX 1 - -/* - * CCI PMU event id is an 8-bit value made of two parts - bits 7:5 for one of 8 - * ports and bits 4:0 are event codes. There are different event codes - * associated with each port type. - * - * Additionally, the range of events associated with the port types changed - * between Rev0 and Rev1. - * - * The constants below define the range of valid codes for each port type for - * the different revisions and are used to validate the event to be monitored. - */ - -#define CCI400_PMU_EVENT_MASK 0xffUL -#define CCI400_PMU_EVENT_SOURCE_SHIFT 5 -#define CCI400_PMU_EVENT_SOURCE_MASK 0x7 -#define CCI400_PMU_EVENT_CODE_SHIFT 0 -#define CCI400_PMU_EVENT_CODE_MASK 0x1f -#define CCI400_PMU_EVENT_SOURCE(event) \ - ((event >> CCI400_PMU_EVENT_SOURCE_SHIFT) & \ - CCI400_PMU_EVENT_SOURCE_MASK) -#define CCI400_PMU_EVENT_CODE(event) \ - ((event >> CCI400_PMU_EVENT_CODE_SHIFT) & CCI400_PMU_EVENT_CODE_MASK) - -#define CCI400_R0_SLAVE_PORT_MIN_EV 0x00 -#define CCI400_R0_SLAVE_PORT_MAX_EV 0x13 -#define CCI400_R0_MASTER_PORT_MIN_EV 0x14 -#define CCI400_R0_MASTER_PORT_MAX_EV 0x1a - -#define CCI400_R1_SLAVE_PORT_MIN_EV 0x00 -#define CCI400_R1_SLAVE_PORT_MAX_EV 0x14 -#define CCI400_R1_MASTER_PORT_MIN_EV 0x00 -#define CCI400_R1_MASTER_PORT_MAX_EV 0x11 - -#define CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(_name, _config) \ - CCI_EXT_ATTR_ENTRY(_name, cci400_pmu_cycle_event_show, \ - (unsigned long)_config) - -static ssize_t cci400_pmu_cycle_event_show(struct device *dev, - struct device_attribute *attr, char *buf); - -static struct attribute *cci400_pmu_format_attrs[] = { - CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"), - CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-7"), - NULL -}; - -static struct attribute *cci400_r0_pmu_event_attrs[] = { - /* Slave events */ - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_normal_or_nonshareable, 0x2), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_inner_or_outershareable, 0x3), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maintenance, 0x4), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_mem_barrier, 0x5), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_sync_barrier, 0x6), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg_sync, 0x8), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_tt_full, 0x9), - CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_last_hs_snoop, 0xA), - CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall_rvalids_h_rready_l, 0xB), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_any, 0xC), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_device, 0xD), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_normal_or_nonshareable, 0xE), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_inner_or_outershare_wback_wclean, 0xF), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_unique, 0x10), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_line_unique, 0x11), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_evict, 0x12), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall_tt_full, 0x13), - /* Master events */ - CCI_EVENT_EXT_ATTR_ENTRY(mi_retry_speculative_fetch, 0x14), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_addr_hazard, 0x15), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_id_hazard, 0x16), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_tt_full, 0x17), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_barrier_hazard, 0x18), - CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_barrier_hazard, 0x19), - CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_tt_full, 0x1A), - /* Special event for cycles counter */ - CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff), - NULL -}; - -static struct attribute *cci400_r1_pmu_event_attrs[] = { - /* Slave events */ - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_normal_or_nonshareable, 0x2), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_inner_or_outershareable, 0x3), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maintenance, 0x4), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_mem_barrier, 0x5), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_sync_barrier, 0x6), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg_sync, 0x8), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_tt_full, 0x9), - CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_last_hs_snoop, 0xA), - CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall_rvalids_h_rready_l, 0xB), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_any, 0xC), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_device, 0xD), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_normal_or_nonshareable, 0xE), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_inner_or_outershare_wback_wclean, 0xF), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_unique, 0x10), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_line_unique, 0x11), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_evict, 0x12), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall_tt_full, 0x13), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_slave_id_hazard, 0x14), - /* Master events */ - CCI_EVENT_EXT_ATTR_ENTRY(mi_retry_speculative_fetch, 0x0), - CCI_EVENT_EXT_ATTR_ENTRY(mi_stall_cycle_addr_hazard, 0x1), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_master_id_hazard, 0x2), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_hi_prio_rtq_full, 0x3), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_barrier_hazard, 0x4), - CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_barrier_hazard, 0x5), - CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_wtq_full, 0x6), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_low_prio_rtq_full, 0x7), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_mid_prio_rtq_full, 0x8), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn0, 0x9), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn1, 0xA), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn2, 0xB), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn3, 0xC), - CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn0, 0xD), - CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn1, 0xE), - CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn2, 0xF), - CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn3, 0x10), - CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_unique_or_line_unique_addr_hazard, 0x11), - /* Special event for cycles counter */ - CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff), - NULL -}; - -static ssize_t cci400_pmu_cycle_event_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dev_ext_attribute *eattr = container_of(attr, - struct dev_ext_attribute, attr); - return snprintf(buf, PAGE_SIZE, "config=0x%lx\n", (unsigned long)eattr->var); -} - -static int cci400_get_event_idx(struct cci_pmu *cci_pmu, - struct cci_pmu_hw_events *hw, - unsigned long cci_event) -{ - int idx; - - /* cycles event idx is fixed */ - if (cci_event == CCI400_PMU_CYCLES) { - if (test_and_set_bit(CCI400_PMU_CYCLE_CNTR_IDX, hw->used_mask)) - return -EAGAIN; - - return CCI400_PMU_CYCLE_CNTR_IDX; - } - - for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx) - if (!test_and_set_bit(idx, hw->used_mask)) - return idx; - - /* No counters available */ - return -EAGAIN; -} - -static int cci400_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_event) -{ - u8 ev_source = CCI400_PMU_EVENT_SOURCE(hw_event); - u8 ev_code = CCI400_PMU_EVENT_CODE(hw_event); - int if_type; - - if (hw_event & ~CCI400_PMU_EVENT_MASK) - return -ENOENT; - - if (hw_event == CCI400_PMU_CYCLES) - return hw_event; - - switch (ev_source) { - case CCI400_PORT_S0: - case CCI400_PORT_S1: - case CCI400_PORT_S2: - case CCI400_PORT_S3: - case CCI400_PORT_S4: - /* Slave Interface */ - if_type = CCI_IF_SLAVE; - break; - case CCI400_PORT_M0: - case CCI400_PORT_M1: - case CCI400_PORT_M2: - /* Master Interface */ - if_type = CCI_IF_MASTER; - break; - default: - return -ENOENT; - } - - if (ev_code >= cci_pmu->model->event_ranges[if_type].min && - ev_code <= cci_pmu->model->event_ranges[if_type].max) - return hw_event; - - return -ENOENT; -} - -static int probe_cci400_revision(void) -{ - int rev; - rev = readl_relaxed(cci_ctrl_base + CCI_PID2) & CCI_PID2_REV_MASK; - rev >>= CCI_PID2_REV_SHIFT; - - if (rev < CCI400_R1_PX) - return CCI400_R0; - else - return CCI400_R1; -} - -static const struct cci_pmu_model *probe_cci_model(struct platform_device *pdev) -{ - if (platform_has_secure_cci_access()) - return &cci_pmu_models[probe_cci400_revision()]; - return NULL; -} -#else /* !CONFIG_ARM_CCI400_PMU */ -static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev) -{ - return NULL; -} -#endif /* CONFIG_ARM_CCI400_PMU */ - -#ifdef CONFIG_ARM_CCI5xx_PMU - -/* - * CCI5xx PMU event id is an 9-bit value made of two parts. - * bits [8:5] - Source for the event - * bits [4:0] - Event code (specific to type of interface) - * - * - */ - -/* Port ids */ -#define CCI5xx_PORT_S0 0x0 -#define CCI5xx_PORT_S1 0x1 -#define CCI5xx_PORT_S2 0x2 -#define CCI5xx_PORT_S3 0x3 -#define CCI5xx_PORT_S4 0x4 -#define CCI5xx_PORT_S5 0x5 -#define CCI5xx_PORT_S6 0x6 - -#define CCI5xx_PORT_M0 0x8 -#define CCI5xx_PORT_M1 0x9 -#define CCI5xx_PORT_M2 0xa -#define CCI5xx_PORT_M3 0xb -#define CCI5xx_PORT_M4 0xc -#define CCI5xx_PORT_M5 0xd -#define CCI5xx_PORT_M6 0xe - -#define CCI5xx_PORT_GLOBAL 0xf - -#define CCI5xx_PMU_EVENT_MASK 0x1ffUL -#define CCI5xx_PMU_EVENT_SOURCE_SHIFT 0x5 -#define CCI5xx_PMU_EVENT_SOURCE_MASK 0xf -#define CCI5xx_PMU_EVENT_CODE_SHIFT 0x0 -#define CCI5xx_PMU_EVENT_CODE_MASK 0x1f - -#define CCI5xx_PMU_EVENT_SOURCE(event) \ - ((event >> CCI5xx_PMU_EVENT_SOURCE_SHIFT) & CCI5xx_PMU_EVENT_SOURCE_MASK) -#define CCI5xx_PMU_EVENT_CODE(event) \ - ((event >> CCI5xx_PMU_EVENT_CODE_SHIFT) & CCI5xx_PMU_EVENT_CODE_MASK) - -#define CCI5xx_SLAVE_PORT_MIN_EV 0x00 -#define CCI5xx_SLAVE_PORT_MAX_EV 0x1f -#define CCI5xx_MASTER_PORT_MIN_EV 0x00 -#define CCI5xx_MASTER_PORT_MAX_EV 0x06 -#define CCI5xx_GLOBAL_PORT_MIN_EV 0x00 -#define CCI5xx_GLOBAL_PORT_MAX_EV 0x0f - - -#define CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \ - CCI_EXT_ATTR_ENTRY(_name, cci5xx_pmu_global_event_show, \ - (unsigned long) _config) - -static ssize_t cci5xx_pmu_global_event_show(struct device *dev, - struct device_attribute *attr, char *buf); - -static struct attribute *cci5xx_pmu_format_attrs[] = { - CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"), - CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-8"), - NULL, -}; - -static struct attribute *cci5xx_pmu_event_attrs[] = { - /* Slave events */ - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_arvalid, 0x0), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_dev, 0x1), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_nonshareable, 0x2), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_shareable_non_alloc, 0x3), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_shareable_alloc, 0x4), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_invalidate, 0x5), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maint, 0x6), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_rval, 0x8), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_rlast_snoop, 0x9), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_awalid, 0xA), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_dev, 0xB), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_non_shareable, 0xC), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wb, 0xD), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wlu, 0xE), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wunique, 0xF), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_evict, 0x10), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_wrevict, 0x11), - CCI_EVENT_EXT_ATTR_ENTRY(si_w_data_beat, 0x12), - CCI_EVENT_EXT_ATTR_ENTRY(si_srq_acvalid, 0x13), - CCI_EVENT_EXT_ATTR_ENTRY(si_srq_read, 0x14), - CCI_EVENT_EXT_ATTR_ENTRY(si_srq_clean, 0x15), - CCI_EVENT_EXT_ATTR_ENTRY(si_srq_data_transfer_low, 0x16), - CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_arvalid, 0x17), - CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall, 0x18), - CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall, 0x19), - CCI_EVENT_EXT_ATTR_ENTRY(si_w_data_stall, 0x1A), - CCI_EVENT_EXT_ATTR_ENTRY(si_w_resp_stall, 0x1B), - CCI_EVENT_EXT_ATTR_ENTRY(si_srq_stall, 0x1C), - CCI_EVENT_EXT_ATTR_ENTRY(si_s_data_stall, 0x1D), - CCI_EVENT_EXT_ATTR_ENTRY(si_rq_stall_ot_limit, 0x1E), - CCI_EVENT_EXT_ATTR_ENTRY(si_r_stall_arbit, 0x1F), - - /* Master events */ - CCI_EVENT_EXT_ATTR_ENTRY(mi_r_data_beat_any, 0x0), - CCI_EVENT_EXT_ATTR_ENTRY(mi_w_data_beat_any, 0x1), - CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall, 0x2), - CCI_EVENT_EXT_ATTR_ENTRY(mi_r_data_stall, 0x3), - CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall, 0x4), - CCI_EVENT_EXT_ATTR_ENTRY(mi_w_data_stall, 0x5), - CCI_EVENT_EXT_ATTR_ENTRY(mi_w_resp_stall, 0x6), - - /* Global events */ - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_stall_tt_full, 0xE), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF), - NULL -}; - -static ssize_t cci5xx_pmu_global_event_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dev_ext_attribute *eattr = container_of(attr, - struct dev_ext_attribute, attr); - /* Global events have single fixed source code */ - return snprintf(buf, PAGE_SIZE, "event=0x%lx,source=0x%x\n", - (unsigned long)eattr->var, CCI5xx_PORT_GLOBAL); -} - -/* - * CCI500 provides 8 independent event counters that can count - * any of the events available. - * CCI500 PMU event source ids - * 0x0-0x6 - Slave interfaces - * 0x8-0xD - Master interfaces - * 0xf - Global Events - * 0x7,0xe - Reserved - */ -static int cci500_validate_hw_event(struct cci_pmu *cci_pmu, - unsigned long hw_event) -{ - u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event); - u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event); - int if_type; - - if (hw_event & ~CCI5xx_PMU_EVENT_MASK) - return -ENOENT; - - switch (ev_source) { - case CCI5xx_PORT_S0: - case CCI5xx_PORT_S1: - case CCI5xx_PORT_S2: - case CCI5xx_PORT_S3: - case CCI5xx_PORT_S4: - case CCI5xx_PORT_S5: - case CCI5xx_PORT_S6: - if_type = CCI_IF_SLAVE; - break; - case CCI5xx_PORT_M0: - case CCI5xx_PORT_M1: - case CCI5xx_PORT_M2: - case CCI5xx_PORT_M3: - case CCI5xx_PORT_M4: - case CCI5xx_PORT_M5: - if_type = CCI_IF_MASTER; - break; - case CCI5xx_PORT_GLOBAL: - if_type = CCI_IF_GLOBAL; - break; - default: - return -ENOENT; - } - - if (ev_code >= cci_pmu->model->event_ranges[if_type].min && - ev_code <= cci_pmu->model->event_ranges[if_type].max) - return hw_event; - - return -ENOENT; -} - -/* - * CCI550 provides 8 independent event counters that can count - * any of the events available. - * CCI550 PMU event source ids - * 0x0-0x6 - Slave interfaces - * 0x8-0xe - Master interfaces - * 0xf - Global Events - * 0x7 - Reserved - */ -static int cci550_validate_hw_event(struct cci_pmu *cci_pmu, - unsigned long hw_event) -{ - u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event); - u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event); - int if_type; - - if (hw_event & ~CCI5xx_PMU_EVENT_MASK) - return -ENOENT; - - switch (ev_source) { - case CCI5xx_PORT_S0: - case CCI5xx_PORT_S1: - case CCI5xx_PORT_S2: - case CCI5xx_PORT_S3: - case CCI5xx_PORT_S4: - case CCI5xx_PORT_S5: - case CCI5xx_PORT_S6: - if_type = CCI_IF_SLAVE; - break; - case CCI5xx_PORT_M0: - case CCI5xx_PORT_M1: - case CCI5xx_PORT_M2: - case CCI5xx_PORT_M3: - case CCI5xx_PORT_M4: - case CCI5xx_PORT_M5: - case CCI5xx_PORT_M6: - if_type = CCI_IF_MASTER; - break; - case CCI5xx_PORT_GLOBAL: - if_type = CCI_IF_GLOBAL; - break; - default: - return -ENOENT; - } - - if (ev_code >= cci_pmu->model->event_ranges[if_type].min && - ev_code <= cci_pmu->model->event_ranges[if_type].max) - return hw_event; - - return -ENOENT; -} - -#endif /* CONFIG_ARM_CCI5xx_PMU */ - -/* - * Program the CCI PMU counters which have PERF_HES_ARCH set - * with the event period and mark them ready before we enable - * PMU. - */ -static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) -{ - int i; - struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; - - DECLARE_BITMAP(mask, cci_pmu->num_cntrs); - - bitmap_zero(mask, cci_pmu->num_cntrs); - for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) { - struct perf_event *event = cci_hw->events[i]; - - if (WARN_ON(!event)) - continue; - - /* Leave the events which are not counting */ - if (event->hw.state & PERF_HES_STOPPED) - continue; - if (event->hw.state & PERF_HES_ARCH) { - set_bit(i, mask); - event->hw.state &= ~PERF_HES_ARCH; - } - } - - pmu_write_counters(cci_pmu, mask); -} - -/* Should be called with cci_pmu->hw_events->pmu_lock held */ -static void __cci_pmu_enable_nosync(struct cci_pmu *cci_pmu) -{ - u32 val; - - /* Enable all the PMU counters. */ - val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; - writel(val, cci_ctrl_base + CCI_PMCR); -} - -/* Should be called with cci_pmu->hw_events->pmu_lock held */ -static void __cci_pmu_enable_sync(struct cci_pmu *cci_pmu) -{ - cci_pmu_sync_counters(cci_pmu); - __cci_pmu_enable_nosync(cci_pmu); -} - -/* Should be called with cci_pmu->hw_events->pmu_lock held */ -static void __cci_pmu_disable(void) -{ - u32 val; - - /* Disable all the PMU counters. */ - val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; - writel(val, cci_ctrl_base + CCI_PMCR); -} - -static ssize_t cci_pmu_format_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dev_ext_attribute *eattr = container_of(attr, - struct dev_ext_attribute, attr); - return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var); -} - -static ssize_t cci_pmu_event_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dev_ext_attribute *eattr = container_of(attr, - struct dev_ext_attribute, attr); - /* source parameter is mandatory for normal PMU events */ - return snprintf(buf, PAGE_SIZE, "source=?,event=0x%lx\n", - (unsigned long)eattr->var); -} - -static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx) -{ - return 0 <= idx && idx <= CCI_PMU_CNTR_LAST(cci_pmu); -} - -static u32 pmu_read_register(struct cci_pmu *cci_pmu, int idx, unsigned int offset) -{ - return readl_relaxed(cci_pmu->base + - CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset); -} - -static void pmu_write_register(struct cci_pmu *cci_pmu, u32 value, - int idx, unsigned int offset) -{ - writel_relaxed(value, cci_pmu->base + - CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset); -} - -static void pmu_disable_counter(struct cci_pmu *cci_pmu, int idx) -{ - pmu_write_register(cci_pmu, 0, idx, CCI_PMU_CNTR_CTRL); -} - -static void pmu_enable_counter(struct cci_pmu *cci_pmu, int idx) -{ - pmu_write_register(cci_pmu, 1, idx, CCI_PMU_CNTR_CTRL); -} - -static bool __maybe_unused -pmu_counter_is_enabled(struct cci_pmu *cci_pmu, int idx) -{ - return (pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR_CTRL) & 0x1) != 0; -} - -static void pmu_set_event(struct cci_pmu *cci_pmu, int idx, unsigned long event) -{ - pmu_write_register(cci_pmu, event, idx, CCI_PMU_EVT_SEL); -} - -/* - * For all counters on the CCI-PMU, disable any 'enabled' counters, - * saving the changed counters in the mask, so that we can restore - * it later using pmu_restore_counters. The mask is private to the - * caller. We cannot rely on the used_mask maintained by the CCI_PMU - * as it only tells us if the counter is assigned to perf_event or not. - * The state of the perf_event cannot be locked by the PMU layer, hence - * we check the individual counter status (which can be locked by - * cci_pm->hw_events->pmu_lock). - * - * @mask should be initialised to empty by the caller. - */ -static void __maybe_unused -pmu_save_counters(struct cci_pmu *cci_pmu, unsigned long *mask) -{ - int i; - - for (i = 0; i < cci_pmu->num_cntrs; i++) { - if (pmu_counter_is_enabled(cci_pmu, i)) { - set_bit(i, mask); - pmu_disable_counter(cci_pmu, i); - } - } -} - -/* - * Restore the status of the counters. Reversal of the pmu_save_counters(). - * For each counter set in the mask, enable the counter back. - */ -static void __maybe_unused -pmu_restore_counters(struct cci_pmu *cci_pmu, unsigned long *mask) -{ - int i; - - for_each_set_bit(i, mask, cci_pmu->num_cntrs) - pmu_enable_counter(cci_pmu, i); -} - -/* - * Returns the number of programmable counters actually implemented - * by the cci - */ -static u32 pmu_get_max_counters(void) -{ - return (readl_relaxed(cci_ctrl_base + CCI_PMCR) & - CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT; -} - -static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *event) -{ - struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - unsigned long cci_event = event->hw.config_base; - int idx; - - if (cci_pmu->model->get_event_idx) - return cci_pmu->model->get_event_idx(cci_pmu, hw, cci_event); - - /* Generic code to find an unused idx from the mask */ - for(idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) - if (!test_and_set_bit(idx, hw->used_mask)) - return idx; - - /* No counters available */ - return -EAGAIN; -} - -static int pmu_map_event(struct perf_event *event) -{ - struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - - if (event->attr.type < PERF_TYPE_MAX || - !cci_pmu->model->validate_hw_event) - return -ENOENT; - - return cci_pmu->model->validate_hw_event(cci_pmu, event->attr.config); -} - -static int pmu_request_irq(struct cci_pmu *cci_pmu, irq_handler_t handler) -{ - int i; - struct platform_device *pmu_device = cci_pmu->plat_device; - - if (unlikely(!pmu_device)) - return -ENODEV; - - if (cci_pmu->nr_irqs < 1) { - dev_err(&pmu_device->dev, "no irqs for CCI PMUs defined\n"); - return -ENODEV; - } - - /* - * Register all available CCI PMU interrupts. In the interrupt handler - * we iterate over the counters checking for interrupt source (the - * overflowing counter) and clear it. - * - * This should allow handling of non-unique interrupt for the counters. - */ - for (i = 0; i < cci_pmu->nr_irqs; i++) { - int err = request_irq(cci_pmu->irqs[i], handler, IRQF_SHARED, - "arm-cci-pmu", cci_pmu); - if (err) { - dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n", - cci_pmu->irqs[i]); - return err; - } - - set_bit(i, &cci_pmu->active_irqs); - } - - return 0; -} - -static void pmu_free_irq(struct cci_pmu *cci_pmu) -{ - int i; - - for (i = 0; i < cci_pmu->nr_irqs; i++) { - if (!test_and_clear_bit(i, &cci_pmu->active_irqs)) - continue; - - free_irq(cci_pmu->irqs[i], cci_pmu); - } -} - -static u32 pmu_read_counter(struct perf_event *event) -{ - struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - struct hw_perf_event *hw_counter = &event->hw; - int idx = hw_counter->idx; - u32 value; - - if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { - dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); - return 0; - } - value = pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR); - - return value; -} - -static void pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx) -{ - pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR); -} - -static void __pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) -{ - int i; - struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; - - for_each_set_bit(i, mask, cci_pmu->num_cntrs) { - struct perf_event *event = cci_hw->events[i]; - - if (WARN_ON(!event)) - continue; - pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i); - } -} - -static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) -{ - if (cci_pmu->model->write_counters) - cci_pmu->model->write_counters(cci_pmu, mask); - else - __pmu_write_counters(cci_pmu, mask); -} - -#ifdef CONFIG_ARM_CCI5xx_PMU - -/* - * CCI-500/CCI-550 has advanced power saving policies, which could gate the - * clocks to the PMU counters, which makes the writes to them ineffective. - * The only way to write to those counters is when the global counters - * are enabled and the particular counter is enabled. - * - * So we do the following : - * - * 1) Disable all the PMU counters, saving their current state - * 2) Enable the global PMU profiling, now that all counters are - * disabled. - * - * For each counter to be programmed, repeat steps 3-7: - * - * 3) Write an invalid event code to the event control register for the - counter, so that the counters are not modified. - * 4) Enable the counter control for the counter. - * 5) Set the counter value - * 6) Disable the counter - * 7) Restore the event in the target counter - * - * 8) Disable the global PMU. - * 9) Restore the status of the rest of the counters. - * - * We choose an event which for CCI-5xx is guaranteed not to count. - * We use the highest possible event code (0x1f) for the master interface 0. - */ -#define CCI5xx_INVALID_EVENT ((CCI5xx_PORT_M0 << CCI5xx_PMU_EVENT_SOURCE_SHIFT) | \ - (CCI5xx_PMU_EVENT_CODE_MASK << CCI5xx_PMU_EVENT_CODE_SHIFT)) -static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) -{ - int i; - DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs); - - bitmap_zero(saved_mask, cci_pmu->num_cntrs); - pmu_save_counters(cci_pmu, saved_mask); - - /* - * Now that all the counters are disabled, we can safely turn the PMU on, - * without syncing the status of the counters - */ - __cci_pmu_enable_nosync(cci_pmu); - - for_each_set_bit(i, mask, cci_pmu->num_cntrs) { - struct perf_event *event = cci_pmu->hw_events.events[i]; - - if (WARN_ON(!event)) - continue; - - pmu_set_event(cci_pmu, i, CCI5xx_INVALID_EVENT); - pmu_enable_counter(cci_pmu, i); - pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i); - pmu_disable_counter(cci_pmu, i); - pmu_set_event(cci_pmu, i, event->hw.config_base); - } - - __cci_pmu_disable(); - - pmu_restore_counters(cci_pmu, saved_mask); -} - -#endif /* CONFIG_ARM_CCI5xx_PMU */ - -static u64 pmu_event_update(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - u64 delta, prev_raw_count, new_raw_count; - - do { - prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = pmu_read_counter(event); - } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count); - - delta = (new_raw_count - prev_raw_count) & CCI_PMU_CNTR_MASK; - - local64_add(delta, &event->count); - - return new_raw_count; -} - -static void pmu_read(struct perf_event *event) -{ - pmu_event_update(event); -} - -static void pmu_event_set_period(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - /* - * The CCI PMU counters have a period of 2^32. To account for the - * possiblity of extreme interrupt latency we program for a period of - * half that. Hopefully we can handle the interrupt before another 2^31 - * events occur and the counter overtakes its previous value. - */ - u64 val = 1ULL << 31; - local64_set(&hwc->prev_count, val); - - /* - * CCI PMU uses PERF_HES_ARCH to keep track of the counters, whose - * values needs to be sync-ed with the s/w state before the PMU is - * enabled. - * Mark this counter for sync. - */ - hwc->state |= PERF_HES_ARCH; -} - -static irqreturn_t pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long flags; - struct cci_pmu *cci_pmu = dev; - struct cci_pmu_hw_events *events = &cci_pmu->hw_events; - int idx, handled = IRQ_NONE; - - raw_spin_lock_irqsave(&events->pmu_lock, flags); - - /* Disable the PMU while we walk through the counters */ - __cci_pmu_disable(); - /* - * Iterate over counters and update the corresponding perf events. - * This should work regardless of whether we have per-counter overflow - * interrupt or a combined overflow interrupt. - */ - for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) { - struct perf_event *event = events->events[idx]; - - if (!event) - continue; - - /* Did this counter overflow? */ - if (!(pmu_read_register(cci_pmu, idx, CCI_PMU_OVRFLW) & - CCI_PMU_OVRFLW_FLAG)) - continue; - - pmu_write_register(cci_pmu, CCI_PMU_OVRFLW_FLAG, idx, - CCI_PMU_OVRFLW); - - pmu_event_update(event); - pmu_event_set_period(event); - handled = IRQ_HANDLED; - } - - /* Enable the PMU and sync possibly overflowed counters */ - __cci_pmu_enable_sync(cci_pmu); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); - - return IRQ_RETVAL(handled); -} - -static int cci_pmu_get_hw(struct cci_pmu *cci_pmu) -{ - int ret = pmu_request_irq(cci_pmu, pmu_handle_irq); - if (ret) { - pmu_free_irq(cci_pmu); - return ret; - } - return 0; -} - -static void cci_pmu_put_hw(struct cci_pmu *cci_pmu) -{ - pmu_free_irq(cci_pmu); -} - -static void hw_perf_event_destroy(struct perf_event *event) -{ - struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - atomic_t *active_events = &cci_pmu->active_events; - struct mutex *reserve_mutex = &cci_pmu->reserve_mutex; - - if (atomic_dec_and_mutex_lock(active_events, reserve_mutex)) { - cci_pmu_put_hw(cci_pmu); - mutex_unlock(reserve_mutex); - } -} - -static void cci_pmu_enable(struct pmu *pmu) -{ - struct cci_pmu *cci_pmu = to_cci_pmu(pmu); - struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; - int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs); - unsigned long flags; - - if (!enabled) - return; - - raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); - __cci_pmu_enable_sync(cci_pmu); - raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); - -} - -static void cci_pmu_disable(struct pmu *pmu) -{ - struct cci_pmu *cci_pmu = to_cci_pmu(pmu); - struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; - unsigned long flags; - - raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); - __cci_pmu_disable(); - raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); -} - -/* - * Check if the idx represents a non-programmable counter. - * All the fixed event counters are mapped before the programmable - * counters. - */ -static bool pmu_fixed_hw_idx(struct cci_pmu *cci_pmu, int idx) -{ - return (idx >= 0) && (idx < cci_pmu->model->fixed_hw_cntrs); -} - -static void cci_pmu_start(struct perf_event *event, int pmu_flags) -{ - struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - unsigned long flags; - - /* - * To handle interrupt latency, we always reprogram the period - * regardlesss of PERF_EF_RELOAD. - */ - if (pmu_flags & PERF_EF_RELOAD) - WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); - - hwc->state = 0; - - if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { - dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); - return; - } - - raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); - - /* Configure the counter unless you are counting a fixed event */ - if (!pmu_fixed_hw_idx(cci_pmu, idx)) - pmu_set_event(cci_pmu, idx, hwc->config_base); - - pmu_event_set_period(event); - pmu_enable_counter(cci_pmu, idx); - - raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); -} - -static void cci_pmu_stop(struct perf_event *event, int pmu_flags) -{ - struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - if (hwc->state & PERF_HES_STOPPED) - return; - - if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { - dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); - return; - } - - /* - * We always reprogram the counter, so ignore PERF_EF_UPDATE. See - * cci_pmu_start() - */ - pmu_disable_counter(cci_pmu, idx); - pmu_event_update(event); - hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; -} - -static int cci_pmu_add(struct perf_event *event, int flags) -{ - struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; - struct hw_perf_event *hwc = &event->hw; - int idx; - int err = 0; - - perf_pmu_disable(event->pmu); - - /* If we don't have a space for the counter then finish early. */ - idx = pmu_get_event_idx(hw_events, event); - if (idx < 0) { - err = idx; - goto out; - } - - event->hw.idx = idx; - hw_events->events[idx] = event; - - hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; - if (flags & PERF_EF_START) - cci_pmu_start(event, PERF_EF_RELOAD); - - /* Propagate our changes to the userspace mapping. */ - perf_event_update_userpage(event); - -out: - perf_pmu_enable(event->pmu); - return err; -} - -static void cci_pmu_del(struct perf_event *event, int flags) -{ - struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - cci_pmu_stop(event, PERF_EF_UPDATE); - hw_events->events[idx] = NULL; - clear_bit(idx, hw_events->used_mask); - - perf_event_update_userpage(event); -} - -static int -validate_event(struct pmu *cci_pmu, - struct cci_pmu_hw_events *hw_events, - struct perf_event *event) -{ - if (is_software_event(event)) - return 1; - - /* - * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The - * core perf code won't check that the pmu->ctx == leader->ctx - * until after pmu->event_init(event). - */ - if (event->pmu != cci_pmu) - return 0; - - if (event->state < PERF_EVENT_STATE_OFF) - return 1; - - if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) - return 1; - - return pmu_get_event_idx(hw_events, event) >= 0; -} - -static int -validate_group(struct perf_event *event) -{ - struct perf_event *sibling, *leader = event->group_leader; - struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - unsigned long mask[BITS_TO_LONGS(cci_pmu->num_cntrs)]; - struct cci_pmu_hw_events fake_pmu = { - /* - * Initialise the fake PMU. We only need to populate the - * used_mask for the purposes of validation. - */ - .used_mask = mask, - }; - memset(mask, 0, BITS_TO_LONGS(cci_pmu->num_cntrs) * sizeof(unsigned long)); - - if (!validate_event(event->pmu, &fake_pmu, leader)) - return -EINVAL; - - list_for_each_entry(sibling, &leader->sibling_list, group_entry) { - if (!validate_event(event->pmu, &fake_pmu, sibling)) - return -EINVAL; - } - - if (!validate_event(event->pmu, &fake_pmu, event)) - return -EINVAL; - - return 0; -} - -static int -__hw_perf_event_init(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int mapping; - - mapping = pmu_map_event(event); - - if (mapping < 0) { - pr_debug("event %x:%llx not supported\n", event->attr.type, - event->attr.config); - return mapping; - } - - /* - * We don't assign an index until we actually place the event onto - * hardware. Use -1 to signify that we haven't decided where to put it - * yet. - */ - hwc->idx = -1; - hwc->config_base = 0; - hwc->config = 0; - hwc->event_base = 0; - - /* - * Store the event encoding into the config_base field. - */ - hwc->config_base |= (unsigned long)mapping; - - /* - * Limit the sample_period to half of the counter width. That way, the - * new counter value is far less likely to overtake the previous one - * unless you have some serious IRQ latency issues. - */ - hwc->sample_period = CCI_PMU_CNTR_MASK >> 1; - hwc->last_period = hwc->sample_period; - local64_set(&hwc->period_left, hwc->sample_period); - - if (event->group_leader != event) { - if (validate_group(event) != 0) - return -EINVAL; - } - - return 0; -} - -static int cci_pmu_event_init(struct perf_event *event) -{ - struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - atomic_t *active_events = &cci_pmu->active_events; - int err = 0; - int cpu; - - if (event->attr.type != event->pmu->type) - return -ENOENT; - - /* Shared by all CPUs, no meaningful state to sample */ - if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) - return -EOPNOTSUPP; - - /* We have no filtering of any kind */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest) - return -EINVAL; - - /* - * Following the example set by other "uncore" PMUs, we accept any CPU - * and rewrite its affinity dynamically rather than having perf core - * handle cpu == -1 and pid == -1 for this case. - * - * The perf core will pin online CPUs for the duration of this call and - * the event being installed into its context, so the PMU's CPU can't - * change under our feet. - */ - cpu = cpumask_first(&cci_pmu->cpus); - if (event->cpu < 0 || cpu < 0) - return -EINVAL; - event->cpu = cpu; - - event->destroy = hw_perf_event_destroy; - if (!atomic_inc_not_zero(active_events)) { - mutex_lock(&cci_pmu->reserve_mutex); - if (atomic_read(active_events) == 0) - err = cci_pmu_get_hw(cci_pmu); - if (!err) - atomic_inc(active_events); - mutex_unlock(&cci_pmu->reserve_mutex); - } - if (err) - return err; - - err = __hw_perf_event_init(event); - if (err) - hw_perf_event_destroy(event); - - return err; -} - -static ssize_t pmu_cpumask_attr_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct pmu *pmu = dev_get_drvdata(dev); - struct cci_pmu *cci_pmu = to_cci_pmu(pmu); - - int n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", - cpumask_pr_args(&cci_pmu->cpus)); - buf[n++] = '\n'; - buf[n] = '\0'; - return n; -} - -static struct device_attribute pmu_cpumask_attr = - __ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL); - -static struct attribute *pmu_attrs[] = { - &pmu_cpumask_attr.attr, - NULL, -}; - -static struct attribute_group pmu_attr_group = { - .attrs = pmu_attrs, -}; - -static struct attribute_group pmu_format_attr_group = { - .name = "format", - .attrs = NULL, /* Filled in cci_pmu_init_attrs */ -}; - -static struct attribute_group pmu_event_attr_group = { - .name = "events", - .attrs = NULL, /* Filled in cci_pmu_init_attrs */ -}; - -static const struct attribute_group *pmu_attr_groups[] = { - &pmu_attr_group, - &pmu_format_attr_group, - &pmu_event_attr_group, - NULL -}; - -static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) -{ - const struct cci_pmu_model *model = cci_pmu->model; - char *name = model->name; - u32 num_cntrs; - - pmu_event_attr_group.attrs = model->event_attrs; - pmu_format_attr_group.attrs = model->format_attrs; - - cci_pmu->pmu = (struct pmu) { - .name = cci_pmu->model->name, - .task_ctx_nr = perf_invalid_context, - .pmu_enable = cci_pmu_enable, - .pmu_disable = cci_pmu_disable, - .event_init = cci_pmu_event_init, - .add = cci_pmu_add, - .del = cci_pmu_del, - .start = cci_pmu_start, - .stop = cci_pmu_stop, - .read = pmu_read, - .attr_groups = pmu_attr_groups, - }; - - cci_pmu->plat_device = pdev; - num_cntrs = pmu_get_max_counters(); - if (num_cntrs > cci_pmu->model->num_hw_cntrs) { - dev_warn(&pdev->dev, - "PMU implements more counters(%d) than supported by" - " the model(%d), truncated.", - num_cntrs, cci_pmu->model->num_hw_cntrs); - num_cntrs = cci_pmu->model->num_hw_cntrs; - } - cci_pmu->num_cntrs = num_cntrs + cci_pmu->model->fixed_hw_cntrs; - - return perf_pmu_register(&cci_pmu->pmu, name, -1); -} - -static int cci_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) -{ - struct cci_pmu *cci_pmu = hlist_entry_safe(node, struct cci_pmu, node); - unsigned int target; - - if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus)) - return 0; - target = cpumask_any_but(cpu_online_mask, cpu); - if (target >= nr_cpu_ids) - return 0; - /* - * TODO: migrate context once core races on event->ctx have - * been fixed. - */ - cpumask_set_cpu(target, &cci_pmu->cpus); - return 0; -} - -static struct cci_pmu_model cci_pmu_models[] = { -#ifdef CONFIG_ARM_CCI400_PMU - [CCI400_R0] = { - .name = "CCI_400", - .fixed_hw_cntrs = 1, /* Cycle counter */ - .num_hw_cntrs = 4, - .cntr_size = SZ_4K, - .format_attrs = cci400_pmu_format_attrs, - .event_attrs = cci400_r0_pmu_event_attrs, - .event_ranges = { - [CCI_IF_SLAVE] = { - CCI400_R0_SLAVE_PORT_MIN_EV, - CCI400_R0_SLAVE_PORT_MAX_EV, - }, - [CCI_IF_MASTER] = { - CCI400_R0_MASTER_PORT_MIN_EV, - CCI400_R0_MASTER_PORT_MAX_EV, - }, - }, - .validate_hw_event = cci400_validate_hw_event, - .get_event_idx = cci400_get_event_idx, - }, - [CCI400_R1] = { - .name = "CCI_400_r1", - .fixed_hw_cntrs = 1, /* Cycle counter */ - .num_hw_cntrs = 4, - .cntr_size = SZ_4K, - .format_attrs = cci400_pmu_format_attrs, - .event_attrs = cci400_r1_pmu_event_attrs, - .event_ranges = { - [CCI_IF_SLAVE] = { - CCI400_R1_SLAVE_PORT_MIN_EV, - CCI400_R1_SLAVE_PORT_MAX_EV, - }, - [CCI_IF_MASTER] = { - CCI400_R1_MASTER_PORT_MIN_EV, - CCI400_R1_MASTER_PORT_MAX_EV, - }, - }, - .validate_hw_event = cci400_validate_hw_event, - .get_event_idx = cci400_get_event_idx, - }, -#endif -#ifdef CONFIG_ARM_CCI5xx_PMU - [CCI500_R0] = { - .name = "CCI_500", - .fixed_hw_cntrs = 0, - .num_hw_cntrs = 8, - .cntr_size = SZ_64K, - .format_attrs = cci5xx_pmu_format_attrs, - .event_attrs = cci5xx_pmu_event_attrs, - .event_ranges = { - [CCI_IF_SLAVE] = { - CCI5xx_SLAVE_PORT_MIN_EV, - CCI5xx_SLAVE_PORT_MAX_EV, - }, - [CCI_IF_MASTER] = { - CCI5xx_MASTER_PORT_MIN_EV, - CCI5xx_MASTER_PORT_MAX_EV, - }, - [CCI_IF_GLOBAL] = { - CCI5xx_GLOBAL_PORT_MIN_EV, - CCI5xx_GLOBAL_PORT_MAX_EV, - }, - }, - .validate_hw_event = cci500_validate_hw_event, - .write_counters = cci5xx_pmu_write_counters, - }, - [CCI550_R0] = { - .name = "CCI_550", - .fixed_hw_cntrs = 0, - .num_hw_cntrs = 8, - .cntr_size = SZ_64K, - .format_attrs = cci5xx_pmu_format_attrs, - .event_attrs = cci5xx_pmu_event_attrs, - .event_ranges = { - [CCI_IF_SLAVE] = { - CCI5xx_SLAVE_PORT_MIN_EV, - CCI5xx_SLAVE_PORT_MAX_EV, - }, - [CCI_IF_MASTER] = { - CCI5xx_MASTER_PORT_MIN_EV, - CCI5xx_MASTER_PORT_MAX_EV, - }, - [CCI_IF_GLOBAL] = { - CCI5xx_GLOBAL_PORT_MIN_EV, - CCI5xx_GLOBAL_PORT_MAX_EV, - }, - }, - .validate_hw_event = cci550_validate_hw_event, - .write_counters = cci5xx_pmu_write_counters, - }, -#endif -}; - -static const struct of_device_id arm_cci_pmu_matches[] = { -#ifdef CONFIG_ARM_CCI400_PMU - { - .compatible = "arm,cci-400-pmu", - .data = NULL, - }, - { - .compatible = "arm,cci-400-pmu,r0", - .data = &cci_pmu_models[CCI400_R0], - }, - { - .compatible = "arm,cci-400-pmu,r1", - .data = &cci_pmu_models[CCI400_R1], - }, -#endif -#ifdef CONFIG_ARM_CCI5xx_PMU - { - .compatible = "arm,cci-500-pmu,r0", - .data = &cci_pmu_models[CCI500_R0], - }, - { - .compatible = "arm,cci-550-pmu,r0", - .data = &cci_pmu_models[CCI550_R0], - }, -#endif - {}, -}; - -static inline const struct cci_pmu_model *get_cci_model(struct platform_device *pdev) -{ - const struct of_device_id *match = of_match_node(arm_cci_pmu_matches, - pdev->dev.of_node); - if (!match) - return NULL; - if (match->data) - return match->data; - - dev_warn(&pdev->dev, "DEPRECATED compatible property," - "requires secure access to CCI registers"); - return probe_cci_model(pdev); -} - -static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs) -{ - int i; - - for (i = 0; i < nr_irqs; i++) - if (irq == irqs[i]) - return true; - - return false; -} - -static struct cci_pmu *cci_pmu_alloc(struct platform_device *pdev) -{ - struct cci_pmu *cci_pmu; - const struct cci_pmu_model *model; - - /* - * All allocations are devm_* hence we don't have to free - * them explicitly on an error, as it would end up in driver - * detach. - */ - model = get_cci_model(pdev); - if (!model) { - dev_warn(&pdev->dev, "CCI PMU version not supported\n"); - return ERR_PTR(-ENODEV); - } - - cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*cci_pmu), GFP_KERNEL); - if (!cci_pmu) - return ERR_PTR(-ENOMEM); - - cci_pmu->model = model; - cci_pmu->irqs = devm_kcalloc(&pdev->dev, CCI_PMU_MAX_HW_CNTRS(model), - sizeof(*cci_pmu->irqs), GFP_KERNEL); - if (!cci_pmu->irqs) - return ERR_PTR(-ENOMEM); - cci_pmu->hw_events.events = devm_kcalloc(&pdev->dev, - CCI_PMU_MAX_HW_CNTRS(model), - sizeof(*cci_pmu->hw_events.events), - GFP_KERNEL); - if (!cci_pmu->hw_events.events) - return ERR_PTR(-ENOMEM); - cci_pmu->hw_events.used_mask = devm_kcalloc(&pdev->dev, - BITS_TO_LONGS(CCI_PMU_MAX_HW_CNTRS(model)), - sizeof(*cci_pmu->hw_events.used_mask), - GFP_KERNEL); - if (!cci_pmu->hw_events.used_mask) - return ERR_PTR(-ENOMEM); - - return cci_pmu; -} - - -static int cci_pmu_probe(struct platform_device *pdev) -{ - struct resource *res; - struct cci_pmu *cci_pmu; - int i, ret, irq; - - cci_pmu = cci_pmu_alloc(pdev); - if (IS_ERR(cci_pmu)) - return PTR_ERR(cci_pmu); - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - cci_pmu->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(cci_pmu->base)) - return -ENOMEM; - - /* - * CCI PMU has one overflow interrupt per counter; but some may be tied - * together to a common interrupt. - */ - cci_pmu->nr_irqs = 0; - for (i = 0; i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model); i++) { - irq = platform_get_irq(pdev, i); - if (irq < 0) - break; - - if (is_duplicate_irq(irq, cci_pmu->irqs, cci_pmu->nr_irqs)) - continue; - - cci_pmu->irqs[cci_pmu->nr_irqs++] = irq; - } - - /* - * Ensure that the device tree has as many interrupts as the number - * of counters. - */ - if (i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model)) { - dev_warn(&pdev->dev, "In-correct number of interrupts: %d, should be %d\n", - i, CCI_PMU_MAX_HW_CNTRS(cci_pmu->model)); - return -EINVAL; - } - - raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock); - mutex_init(&cci_pmu->reserve_mutex); - atomic_set(&cci_pmu->active_events, 0); - cpumask_set_cpu(get_cpu(), &cci_pmu->cpus); - - ret = cci_pmu_init(cci_pmu, pdev); - if (ret) { - put_cpu(); - return ret; - } - - cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE, - &cci_pmu->node); - put_cpu(); - pr_info("ARM %s PMU driver probed", cci_pmu->model->name); - return 0; -} static int cci_platform_probe(struct platform_device *pdev) { @@ -1778,14 +66,6 @@ static int cci_platform_probe(struct platform_device *pdev) return of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev); } -static struct platform_driver cci_pmu_driver = { - .driver = { - .name = DRIVER_NAME_PMU, - .of_match_table = arm_cci_pmu_matches, - }, - .probe = cci_pmu_probe, -}; - static struct platform_driver cci_platform_driver = { .driver = { .name = DRIVER_NAME, @@ -1796,30 +76,9 @@ static struct platform_driver cci_platform_driver = { static int __init cci_platform_init(void) { - int ret; - - ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCI_ONLINE, - "perf/arm/cci:online", NULL, - cci_pmu_offline_cpu); - if (ret) - return ret; - - ret = platform_driver_register(&cci_pmu_driver); - if (ret) - return ret; - return platform_driver_register(&cci_platform_driver); } -#else /* !CONFIG_ARM_CCI_PMU */ - -static int __init cci_platform_init(void) -{ - return 0; -} - -#endif /* CONFIG_ARM_CCI_PMU */ - #ifdef CONFIG_ARM_CCI400_PORT_CTRL #define CCI_PORT_CTRL 0x0 diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 331b6d992b5a..28bb5a029558 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -5,6 +5,32 @@ menu "Performance monitor support" depends on PERF_EVENTS +config ARM_CCI_PMU + bool + select ARM_CCI + +config ARM_CCI400_PMU + bool "ARM CCI400 PMU support" + depends on (ARM && CPU_V7) || ARM64 + select ARM_CCI400_COMMON + select ARM_CCI_PMU + help + Support for PMU events monitoring on the ARM CCI-400 (cache coherent + interconnect). CCI-400 supports counting events related to the + connected slave/master interfaces. + +config ARM_CCI5xx_PMU + bool "ARM CCI-500/CCI-550 PMU support" + depends on (ARM && CPU_V7) || ARM64 + select ARM_CCI_PMU + help + Support for PMU events monitoring on the ARM CCI-500/CCI-550 cache + coherent interconnects. Both of them provide 8 independent event counters, + which can count events pertaining to the slave/master interfaces as well + as the internal events to the CCI. + + If unsure, say Y + config ARM_CCN tristate "ARM CCN driver support" depends on ARM || ARM64 diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 5004abee0f3a..b3902bd37d53 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ARM_CCI_PMU) += arm-cci.o obj-$(CONFIG_ARM_CCN) += arm-ccn.o obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c new file mode 100644 index 000000000000..d5f8c750fd41 --- /dev/null +++ b/drivers/perf/arm-cci.c @@ -0,0 +1,1747 @@ +// SPDX-License-Identifier: GPL-2.0 +// CCI Cache Coherent Interconnect PMU driver +// Copyright (C) 2013-2018 Arm Ltd. +// Author: Punit Agrawal , Suzuki Poulose + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void __iomem *const cci_ctrl_base; + +#define DRIVER_NAME "ARM-CCI PMU" + +#define CCI_PMCR 0x0100 +#define CCI_PID2 0x0fe8 + +#define CCI_PMCR_CEN 0x00000001 +#define CCI_PMCR_NCNT_MASK 0x0000f800 +#define CCI_PMCR_NCNT_SHIFT 11 + +#define CCI_PID2_REV_MASK 0xf0 +#define CCI_PID2_REV_SHIFT 4 + +#define CCI_PMU_EVT_SEL 0x000 +#define CCI_PMU_CNTR 0x004 +#define CCI_PMU_CNTR_CTRL 0x008 +#define CCI_PMU_OVRFLW 0x00c + +#define CCI_PMU_OVRFLW_FLAG 1 + +#define CCI_PMU_CNTR_SIZE(model) ((model)->cntr_size) +#define CCI_PMU_CNTR_BASE(model, idx) ((idx) * CCI_PMU_CNTR_SIZE(model)) +#define CCI_PMU_CNTR_MASK ((1ULL << 32) -1) +#define CCI_PMU_CNTR_LAST(cci_pmu) (cci_pmu->num_cntrs - 1) + +#define CCI_PMU_MAX_HW_CNTRS(model) \ + ((model)->num_hw_cntrs + (model)->fixed_hw_cntrs) + +/* Types of interfaces that can generate events */ +enum { + CCI_IF_SLAVE, + CCI_IF_MASTER, +#ifdef CONFIG_ARM_CCI5xx_PMU + CCI_IF_GLOBAL, +#endif + CCI_IF_MAX, +}; + +struct event_range { + u32 min; + u32 max; +}; + +struct cci_pmu_hw_events { + struct perf_event **events; + unsigned long *used_mask; + raw_spinlock_t pmu_lock; +}; + +struct cci_pmu; +/* + * struct cci_pmu_model: + * @fixed_hw_cntrs - Number of fixed event counters + * @num_hw_cntrs - Maximum number of programmable event counters + * @cntr_size - Size of an event counter mapping + */ +struct cci_pmu_model { + char *name; + u32 fixed_hw_cntrs; + u32 num_hw_cntrs; + u32 cntr_size; + struct attribute **format_attrs; + struct attribute **event_attrs; + struct event_range event_ranges[CCI_IF_MAX]; + int (*validate_hw_event)(struct cci_pmu *, unsigned long); + int (*get_event_idx)(struct cci_pmu *, struct cci_pmu_hw_events *, unsigned long); + void (*write_counters)(struct cci_pmu *, unsigned long *); +}; + +static struct cci_pmu_model cci_pmu_models[]; + +struct cci_pmu { + void __iomem *base; + struct pmu pmu; + int nr_irqs; + int *irqs; + unsigned long active_irqs; + const struct cci_pmu_model *model; + struct cci_pmu_hw_events hw_events; + struct platform_device *plat_device; + int num_cntrs; + atomic_t active_events; + struct mutex reserve_mutex; + struct hlist_node node; + cpumask_t cpus; +}; + +#define to_cci_pmu(c) (container_of(c, struct cci_pmu, pmu)) + +enum cci_models { +#ifdef CONFIG_ARM_CCI400_PMU + CCI400_R0, + CCI400_R1, +#endif +#ifdef CONFIG_ARM_CCI5xx_PMU + CCI500_R0, + CCI550_R0, +#endif + CCI_MODEL_MAX +}; + +static void pmu_write_counters(struct cci_pmu *cci_pmu, + unsigned long *mask); +static ssize_t cci_pmu_format_show(struct device *dev, + struct device_attribute *attr, char *buf); +static ssize_t cci_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf); + +#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \ + &((struct dev_ext_attribute[]) { \ + { __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config } \ + })[0].attr.attr + +#define CCI_FORMAT_EXT_ATTR_ENTRY(_name, _config) \ + CCI_EXT_ATTR_ENTRY(_name, cci_pmu_format_show, (char *)_config) +#define CCI_EVENT_EXT_ATTR_ENTRY(_name, _config) \ + CCI_EXT_ATTR_ENTRY(_name, cci_pmu_event_show, (unsigned long)_config) + +/* CCI400 PMU Specific definitions */ + +#ifdef CONFIG_ARM_CCI400_PMU + +/* Port ids */ +#define CCI400_PORT_S0 0 +#define CCI400_PORT_S1 1 +#define CCI400_PORT_S2 2 +#define CCI400_PORT_S3 3 +#define CCI400_PORT_S4 4 +#define CCI400_PORT_M0 5 +#define CCI400_PORT_M1 6 +#define CCI400_PORT_M2 7 + +#define CCI400_R1_PX 5 + +/* + * Instead of an event id to monitor CCI cycles, a dedicated counter is + * provided. Use 0xff to represent CCI cycles and hope that no future revisions + * make use of this event in hardware. + */ +enum cci400_perf_events { + CCI400_PMU_CYCLES = 0xff +}; + +#define CCI400_PMU_CYCLE_CNTR_IDX 0 +#define CCI400_PMU_CNTR0_IDX 1 + +/* + * CCI PMU event id is an 8-bit value made of two parts - bits 7:5 for one of 8 + * ports and bits 4:0 are event codes. There are different event codes + * associated with each port type. + * + * Additionally, the range of events associated with the port types changed + * between Rev0 and Rev1. + * + * The constants below define the range of valid codes for each port type for + * the different revisions and are used to validate the event to be monitored. + */ + +#define CCI400_PMU_EVENT_MASK 0xffUL +#define CCI400_PMU_EVENT_SOURCE_SHIFT 5 +#define CCI400_PMU_EVENT_SOURCE_MASK 0x7 +#define CCI400_PMU_EVENT_CODE_SHIFT 0 +#define CCI400_PMU_EVENT_CODE_MASK 0x1f +#define CCI400_PMU_EVENT_SOURCE(event) \ + ((event >> CCI400_PMU_EVENT_SOURCE_SHIFT) & \ + CCI400_PMU_EVENT_SOURCE_MASK) +#define CCI400_PMU_EVENT_CODE(event) \ + ((event >> CCI400_PMU_EVENT_CODE_SHIFT) & CCI400_PMU_EVENT_CODE_MASK) + +#define CCI400_R0_SLAVE_PORT_MIN_EV 0x00 +#define CCI400_R0_SLAVE_PORT_MAX_EV 0x13 +#define CCI400_R0_MASTER_PORT_MIN_EV 0x14 +#define CCI400_R0_MASTER_PORT_MAX_EV 0x1a + +#define CCI400_R1_SLAVE_PORT_MIN_EV 0x00 +#define CCI400_R1_SLAVE_PORT_MAX_EV 0x14 +#define CCI400_R1_MASTER_PORT_MIN_EV 0x00 +#define CCI400_R1_MASTER_PORT_MAX_EV 0x11 + +#define CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(_name, _config) \ + CCI_EXT_ATTR_ENTRY(_name, cci400_pmu_cycle_event_show, \ + (unsigned long)_config) + +static ssize_t cci400_pmu_cycle_event_show(struct device *dev, + struct device_attribute *attr, char *buf); + +static struct attribute *cci400_pmu_format_attrs[] = { + CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"), + CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-7"), + NULL +}; + +static struct attribute *cci400_r0_pmu_event_attrs[] = { + /* Slave events */ + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_normal_or_nonshareable, 0x2), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_inner_or_outershareable, 0x3), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maintenance, 0x4), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_mem_barrier, 0x5), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_sync_barrier, 0x6), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg_sync, 0x8), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_tt_full, 0x9), + CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_last_hs_snoop, 0xA), + CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall_rvalids_h_rready_l, 0xB), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_any, 0xC), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_device, 0xD), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_normal_or_nonshareable, 0xE), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_inner_or_outershare_wback_wclean, 0xF), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_unique, 0x10), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_line_unique, 0x11), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_evict, 0x12), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall_tt_full, 0x13), + /* Master events */ + CCI_EVENT_EXT_ATTR_ENTRY(mi_retry_speculative_fetch, 0x14), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_addr_hazard, 0x15), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_id_hazard, 0x16), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_tt_full, 0x17), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_barrier_hazard, 0x18), + CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_barrier_hazard, 0x19), + CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_tt_full, 0x1A), + /* Special event for cycles counter */ + CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff), + NULL +}; + +static struct attribute *cci400_r1_pmu_event_attrs[] = { + /* Slave events */ + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_normal_or_nonshareable, 0x2), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_inner_or_outershareable, 0x3), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maintenance, 0x4), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_mem_barrier, 0x5), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_sync_barrier, 0x6), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg_sync, 0x8), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_tt_full, 0x9), + CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_last_hs_snoop, 0xA), + CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall_rvalids_h_rready_l, 0xB), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_any, 0xC), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_device, 0xD), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_normal_or_nonshareable, 0xE), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_inner_or_outershare_wback_wclean, 0xF), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_unique, 0x10), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_line_unique, 0x11), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_evict, 0x12), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall_tt_full, 0x13), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_slave_id_hazard, 0x14), + /* Master events */ + CCI_EVENT_EXT_ATTR_ENTRY(mi_retry_speculative_fetch, 0x0), + CCI_EVENT_EXT_ATTR_ENTRY(mi_stall_cycle_addr_hazard, 0x1), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_master_id_hazard, 0x2), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_hi_prio_rtq_full, 0x3), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_barrier_hazard, 0x4), + CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_barrier_hazard, 0x5), + CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_wtq_full, 0x6), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_low_prio_rtq_full, 0x7), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_mid_prio_rtq_full, 0x8), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn0, 0x9), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn1, 0xA), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn2, 0xB), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn3, 0xC), + CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn0, 0xD), + CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn1, 0xE), + CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn2, 0xF), + CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn3, 0x10), + CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_unique_or_line_unique_addr_hazard, 0x11), + /* Special event for cycles counter */ + CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff), + NULL +}; + +static ssize_t cci400_pmu_cycle_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr = container_of(attr, + struct dev_ext_attribute, attr); + return snprintf(buf, PAGE_SIZE, "config=0x%lx\n", (unsigned long)eattr->var); +} + +static int cci400_get_event_idx(struct cci_pmu *cci_pmu, + struct cci_pmu_hw_events *hw, + unsigned long cci_event) +{ + int idx; + + /* cycles event idx is fixed */ + if (cci_event == CCI400_PMU_CYCLES) { + if (test_and_set_bit(CCI400_PMU_CYCLE_CNTR_IDX, hw->used_mask)) + return -EAGAIN; + + return CCI400_PMU_CYCLE_CNTR_IDX; + } + + for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx) + if (!test_and_set_bit(idx, hw->used_mask)) + return idx; + + /* No counters available */ + return -EAGAIN; +} + +static int cci400_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_event) +{ + u8 ev_source = CCI400_PMU_EVENT_SOURCE(hw_event); + u8 ev_code = CCI400_PMU_EVENT_CODE(hw_event); + int if_type; + + if (hw_event & ~CCI400_PMU_EVENT_MASK) + return -ENOENT; + + if (hw_event == CCI400_PMU_CYCLES) + return hw_event; + + switch (ev_source) { + case CCI400_PORT_S0: + case CCI400_PORT_S1: + case CCI400_PORT_S2: + case CCI400_PORT_S3: + case CCI400_PORT_S4: + /* Slave Interface */ + if_type = CCI_IF_SLAVE; + break; + case CCI400_PORT_M0: + case CCI400_PORT_M1: + case CCI400_PORT_M2: + /* Master Interface */ + if_type = CCI_IF_MASTER; + break; + default: + return -ENOENT; + } + + if (ev_code >= cci_pmu->model->event_ranges[if_type].min && + ev_code <= cci_pmu->model->event_ranges[if_type].max) + return hw_event; + + return -ENOENT; +} + +static int probe_cci400_revision(void) +{ + int rev; + rev = readl_relaxed(cci_ctrl_base + CCI_PID2) & CCI_PID2_REV_MASK; + rev >>= CCI_PID2_REV_SHIFT; + + if (rev < CCI400_R1_PX) + return CCI400_R0; + else + return CCI400_R1; +} + +static const struct cci_pmu_model *probe_cci_model(struct platform_device *pdev) +{ + if (platform_has_secure_cci_access()) + return &cci_pmu_models[probe_cci400_revision()]; + return NULL; +} +#else /* !CONFIG_ARM_CCI400_PMU */ +static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev) +{ + return NULL; +} +#endif /* CONFIG_ARM_CCI400_PMU */ + +#ifdef CONFIG_ARM_CCI5xx_PMU + +/* + * CCI5xx PMU event id is an 9-bit value made of two parts. + * bits [8:5] - Source for the event + * bits [4:0] - Event code (specific to type of interface) + * + * + */ + +/* Port ids */ +#define CCI5xx_PORT_S0 0x0 +#define CCI5xx_PORT_S1 0x1 +#define CCI5xx_PORT_S2 0x2 +#define CCI5xx_PORT_S3 0x3 +#define CCI5xx_PORT_S4 0x4 +#define CCI5xx_PORT_S5 0x5 +#define CCI5xx_PORT_S6 0x6 + +#define CCI5xx_PORT_M0 0x8 +#define CCI5xx_PORT_M1 0x9 +#define CCI5xx_PORT_M2 0xa +#define CCI5xx_PORT_M3 0xb +#define CCI5xx_PORT_M4 0xc +#define CCI5xx_PORT_M5 0xd +#define CCI5xx_PORT_M6 0xe + +#define CCI5xx_PORT_GLOBAL 0xf + +#define CCI5xx_PMU_EVENT_MASK 0x1ffUL +#define CCI5xx_PMU_EVENT_SOURCE_SHIFT 0x5 +#define CCI5xx_PMU_EVENT_SOURCE_MASK 0xf +#define CCI5xx_PMU_EVENT_CODE_SHIFT 0x0 +#define CCI5xx_PMU_EVENT_CODE_MASK 0x1f + +#define CCI5xx_PMU_EVENT_SOURCE(event) \ + ((event >> CCI5xx_PMU_EVENT_SOURCE_SHIFT) & CCI5xx_PMU_EVENT_SOURCE_MASK) +#define CCI5xx_PMU_EVENT_CODE(event) \ + ((event >> CCI5xx_PMU_EVENT_CODE_SHIFT) & CCI5xx_PMU_EVENT_CODE_MASK) + +#define CCI5xx_SLAVE_PORT_MIN_EV 0x00 +#define CCI5xx_SLAVE_PORT_MAX_EV 0x1f +#define CCI5xx_MASTER_PORT_MIN_EV 0x00 +#define CCI5xx_MASTER_PORT_MAX_EV 0x06 +#define CCI5xx_GLOBAL_PORT_MIN_EV 0x00 +#define CCI5xx_GLOBAL_PORT_MAX_EV 0x0f + + +#define CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \ + CCI_EXT_ATTR_ENTRY(_name, cci5xx_pmu_global_event_show, \ + (unsigned long) _config) + +static ssize_t cci5xx_pmu_global_event_show(struct device *dev, + struct device_attribute *attr, char *buf); + +static struct attribute *cci5xx_pmu_format_attrs[] = { + CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"), + CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-8"), + NULL, +}; + +static struct attribute *cci5xx_pmu_event_attrs[] = { + /* Slave events */ + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_arvalid, 0x0), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_dev, 0x1), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_nonshareable, 0x2), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_shareable_non_alloc, 0x3), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_shareable_alloc, 0x4), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_invalidate, 0x5), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maint, 0x6), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_rval, 0x8), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_rlast_snoop, 0x9), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_awalid, 0xA), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_dev, 0xB), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_non_shareable, 0xC), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wb, 0xD), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wlu, 0xE), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wunique, 0xF), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_evict, 0x10), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_wrevict, 0x11), + CCI_EVENT_EXT_ATTR_ENTRY(si_w_data_beat, 0x12), + CCI_EVENT_EXT_ATTR_ENTRY(si_srq_acvalid, 0x13), + CCI_EVENT_EXT_ATTR_ENTRY(si_srq_read, 0x14), + CCI_EVENT_EXT_ATTR_ENTRY(si_srq_clean, 0x15), + CCI_EVENT_EXT_ATTR_ENTRY(si_srq_data_transfer_low, 0x16), + CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_arvalid, 0x17), + CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall, 0x18), + CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall, 0x19), + CCI_EVENT_EXT_ATTR_ENTRY(si_w_data_stall, 0x1A), + CCI_EVENT_EXT_ATTR_ENTRY(si_w_resp_stall, 0x1B), + CCI_EVENT_EXT_ATTR_ENTRY(si_srq_stall, 0x1C), + CCI_EVENT_EXT_ATTR_ENTRY(si_s_data_stall, 0x1D), + CCI_EVENT_EXT_ATTR_ENTRY(si_rq_stall_ot_limit, 0x1E), + CCI_EVENT_EXT_ATTR_ENTRY(si_r_stall_arbit, 0x1F), + + /* Master events */ + CCI_EVENT_EXT_ATTR_ENTRY(mi_r_data_beat_any, 0x0), + CCI_EVENT_EXT_ATTR_ENTRY(mi_w_data_beat_any, 0x1), + CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall, 0x2), + CCI_EVENT_EXT_ATTR_ENTRY(mi_r_data_stall, 0x3), + CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall, 0x4), + CCI_EVENT_EXT_ATTR_ENTRY(mi_w_data_stall, 0x5), + CCI_EVENT_EXT_ATTR_ENTRY(mi_w_resp_stall, 0x6), + + /* Global events */ + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_stall_tt_full, 0xE), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF), + NULL +}; + +static ssize_t cci5xx_pmu_global_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr = container_of(attr, + struct dev_ext_attribute, attr); + /* Global events have single fixed source code */ + return snprintf(buf, PAGE_SIZE, "event=0x%lx,source=0x%x\n", + (unsigned long)eattr->var, CCI5xx_PORT_GLOBAL); +} + +/* + * CCI500 provides 8 independent event counters that can count + * any of the events available. + * CCI500 PMU event source ids + * 0x0-0x6 - Slave interfaces + * 0x8-0xD - Master interfaces + * 0xf - Global Events + * 0x7,0xe - Reserved + */ +static int cci500_validate_hw_event(struct cci_pmu *cci_pmu, + unsigned long hw_event) +{ + u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event); + u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event); + int if_type; + + if (hw_event & ~CCI5xx_PMU_EVENT_MASK) + return -ENOENT; + + switch (ev_source) { + case CCI5xx_PORT_S0: + case CCI5xx_PORT_S1: + case CCI5xx_PORT_S2: + case CCI5xx_PORT_S3: + case CCI5xx_PORT_S4: + case CCI5xx_PORT_S5: + case CCI5xx_PORT_S6: + if_type = CCI_IF_SLAVE; + break; + case CCI5xx_PORT_M0: + case CCI5xx_PORT_M1: + case CCI5xx_PORT_M2: + case CCI5xx_PORT_M3: + case CCI5xx_PORT_M4: + case CCI5xx_PORT_M5: + if_type = CCI_IF_MASTER; + break; + case CCI5xx_PORT_GLOBAL: + if_type = CCI_IF_GLOBAL; + break; + default: + return -ENOENT; + } + + if (ev_code >= cci_pmu->model->event_ranges[if_type].min && + ev_code <= cci_pmu->model->event_ranges[if_type].max) + return hw_event; + + return -ENOENT; +} + +/* + * CCI550 provides 8 independent event counters that can count + * any of the events available. + * CCI550 PMU event source ids + * 0x0-0x6 - Slave interfaces + * 0x8-0xe - Master interfaces + * 0xf - Global Events + * 0x7 - Reserved + */ +static int cci550_validate_hw_event(struct cci_pmu *cci_pmu, + unsigned long hw_event) +{ + u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event); + u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event); + int if_type; + + if (hw_event & ~CCI5xx_PMU_EVENT_MASK) + return -ENOENT; + + switch (ev_source) { + case CCI5xx_PORT_S0: + case CCI5xx_PORT_S1: + case CCI5xx_PORT_S2: + case CCI5xx_PORT_S3: + case CCI5xx_PORT_S4: + case CCI5xx_PORT_S5: + case CCI5xx_PORT_S6: + if_type = CCI_IF_SLAVE; + break; + case CCI5xx_PORT_M0: + case CCI5xx_PORT_M1: + case CCI5xx_PORT_M2: + case CCI5xx_PORT_M3: + case CCI5xx_PORT_M4: + case CCI5xx_PORT_M5: + case CCI5xx_PORT_M6: + if_type = CCI_IF_MASTER; + break; + case CCI5xx_PORT_GLOBAL: + if_type = CCI_IF_GLOBAL; + break; + default: + return -ENOENT; + } + + if (ev_code >= cci_pmu->model->event_ranges[if_type].min && + ev_code <= cci_pmu->model->event_ranges[if_type].max) + return hw_event; + + return -ENOENT; +} + +#endif /* CONFIG_ARM_CCI5xx_PMU */ + +/* + * Program the CCI PMU counters which have PERF_HES_ARCH set + * with the event period and mark them ready before we enable + * PMU. + */ +static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) +{ + int i; + struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; + + DECLARE_BITMAP(mask, cci_pmu->num_cntrs); + + bitmap_zero(mask, cci_pmu->num_cntrs); + for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) { + struct perf_event *event = cci_hw->events[i]; + + if (WARN_ON(!event)) + continue; + + /* Leave the events which are not counting */ + if (event->hw.state & PERF_HES_STOPPED) + continue; + if (event->hw.state & PERF_HES_ARCH) { + set_bit(i, mask); + event->hw.state &= ~PERF_HES_ARCH; + } + } + + pmu_write_counters(cci_pmu, mask); +} + +/* Should be called with cci_pmu->hw_events->pmu_lock held */ +static void __cci_pmu_enable_nosync(struct cci_pmu *cci_pmu) +{ + u32 val; + + /* Enable all the PMU counters. */ + val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; + writel(val, cci_ctrl_base + CCI_PMCR); +} + +/* Should be called with cci_pmu->hw_events->pmu_lock held */ +static void __cci_pmu_enable_sync(struct cci_pmu *cci_pmu) +{ + cci_pmu_sync_counters(cci_pmu); + __cci_pmu_enable_nosync(cci_pmu); +} + +/* Should be called with cci_pmu->hw_events->pmu_lock held */ +static void __cci_pmu_disable(void) +{ + u32 val; + + /* Disable all the PMU counters. */ + val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; + writel(val, cci_ctrl_base + CCI_PMCR); +} + +static ssize_t cci_pmu_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr = container_of(attr, + struct dev_ext_attribute, attr); + return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var); +} + +static ssize_t cci_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr = container_of(attr, + struct dev_ext_attribute, attr); + /* source parameter is mandatory for normal PMU events */ + return snprintf(buf, PAGE_SIZE, "source=?,event=0x%lx\n", + (unsigned long)eattr->var); +} + +static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx) +{ + return 0 <= idx && idx <= CCI_PMU_CNTR_LAST(cci_pmu); +} + +static u32 pmu_read_register(struct cci_pmu *cci_pmu, int idx, unsigned int offset) +{ + return readl_relaxed(cci_pmu->base + + CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset); +} + +static void pmu_write_register(struct cci_pmu *cci_pmu, u32 value, + int idx, unsigned int offset) +{ + writel_relaxed(value, cci_pmu->base + + CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset); +} + +static void pmu_disable_counter(struct cci_pmu *cci_pmu, int idx) +{ + pmu_write_register(cci_pmu, 0, idx, CCI_PMU_CNTR_CTRL); +} + +static void pmu_enable_counter(struct cci_pmu *cci_pmu, int idx) +{ + pmu_write_register(cci_pmu, 1, idx, CCI_PMU_CNTR_CTRL); +} + +static bool __maybe_unused +pmu_counter_is_enabled(struct cci_pmu *cci_pmu, int idx) +{ + return (pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR_CTRL) & 0x1) != 0; +} + +static void pmu_set_event(struct cci_pmu *cci_pmu, int idx, unsigned long event) +{ + pmu_write_register(cci_pmu, event, idx, CCI_PMU_EVT_SEL); +} + +/* + * For all counters on the CCI-PMU, disable any 'enabled' counters, + * saving the changed counters in the mask, so that we can restore + * it later using pmu_restore_counters. The mask is private to the + * caller. We cannot rely on the used_mask maintained by the CCI_PMU + * as it only tells us if the counter is assigned to perf_event or not. + * The state of the perf_event cannot be locked by the PMU layer, hence + * we check the individual counter status (which can be locked by + * cci_pm->hw_events->pmu_lock). + * + * @mask should be initialised to empty by the caller. + */ +static void __maybe_unused +pmu_save_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + int i; + + for (i = 0; i < cci_pmu->num_cntrs; i++) { + if (pmu_counter_is_enabled(cci_pmu, i)) { + set_bit(i, mask); + pmu_disable_counter(cci_pmu, i); + } + } +} + +/* + * Restore the status of the counters. Reversal of the pmu_save_counters(). + * For each counter set in the mask, enable the counter back. + */ +static void __maybe_unused +pmu_restore_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + int i; + + for_each_set_bit(i, mask, cci_pmu->num_cntrs) + pmu_enable_counter(cci_pmu, i); +} + +/* + * Returns the number of programmable counters actually implemented + * by the cci + */ +static u32 pmu_get_max_counters(void) +{ + return (readl_relaxed(cci_ctrl_base + CCI_PMCR) & + CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT; +} + +static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *event) +{ + struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); + unsigned long cci_event = event->hw.config_base; + int idx; + + if (cci_pmu->model->get_event_idx) + return cci_pmu->model->get_event_idx(cci_pmu, hw, cci_event); + + /* Generic code to find an unused idx from the mask */ + for(idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) + if (!test_and_set_bit(idx, hw->used_mask)) + return idx; + + /* No counters available */ + return -EAGAIN; +} + +static int pmu_map_event(struct perf_event *event) +{ + struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); + + if (event->attr.type < PERF_TYPE_MAX || + !cci_pmu->model->validate_hw_event) + return -ENOENT; + + return cci_pmu->model->validate_hw_event(cci_pmu, event->attr.config); +} + +static int pmu_request_irq(struct cci_pmu *cci_pmu, irq_handler_t handler) +{ + int i; + struct platform_device *pmu_device = cci_pmu->plat_device; + + if (unlikely(!pmu_device)) + return -ENODEV; + + if (cci_pmu->nr_irqs < 1) { + dev_err(&pmu_device->dev, "no irqs for CCI PMUs defined\n"); + return -ENODEV; + } + + /* + * Register all available CCI PMU interrupts. In the interrupt handler + * we iterate over the counters checking for interrupt source (the + * overflowing counter) and clear it. + * + * This should allow handling of non-unique interrupt for the counters. + */ + for (i = 0; i < cci_pmu->nr_irqs; i++) { + int err = request_irq(cci_pmu->irqs[i], handler, IRQF_SHARED, + "arm-cci-pmu", cci_pmu); + if (err) { + dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n", + cci_pmu->irqs[i]); + return err; + } + + set_bit(i, &cci_pmu->active_irqs); + } + + return 0; +} + +static void pmu_free_irq(struct cci_pmu *cci_pmu) +{ + int i; + + for (i = 0; i < cci_pmu->nr_irqs; i++) { + if (!test_and_clear_bit(i, &cci_pmu->active_irqs)) + continue; + + free_irq(cci_pmu->irqs[i], cci_pmu); + } +} + +static u32 pmu_read_counter(struct perf_event *event) +{ + struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); + struct hw_perf_event *hw_counter = &event->hw; + int idx = hw_counter->idx; + u32 value; + + if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + return 0; + } + value = pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR); + + return value; +} + +static void pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx) +{ + pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR); +} + +static void __pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + int i; + struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; + + for_each_set_bit(i, mask, cci_pmu->num_cntrs) { + struct perf_event *event = cci_hw->events[i]; + + if (WARN_ON(!event)) + continue; + pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i); + } +} + +static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + if (cci_pmu->model->write_counters) + cci_pmu->model->write_counters(cci_pmu, mask); + else + __pmu_write_counters(cci_pmu, mask); +} + +#ifdef CONFIG_ARM_CCI5xx_PMU + +/* + * CCI-500/CCI-550 has advanced power saving policies, which could gate the + * clocks to the PMU counters, which makes the writes to them ineffective. + * The only way to write to those counters is when the global counters + * are enabled and the particular counter is enabled. + * + * So we do the following : + * + * 1) Disable all the PMU counters, saving their current state + * 2) Enable the global PMU profiling, now that all counters are + * disabled. + * + * For each counter to be programmed, repeat steps 3-7: + * + * 3) Write an invalid event code to the event control register for the + counter, so that the counters are not modified. + * 4) Enable the counter control for the counter. + * 5) Set the counter value + * 6) Disable the counter + * 7) Restore the event in the target counter + * + * 8) Disable the global PMU. + * 9) Restore the status of the rest of the counters. + * + * We choose an event which for CCI-5xx is guaranteed not to count. + * We use the highest possible event code (0x1f) for the master interface 0. + */ +#define CCI5xx_INVALID_EVENT ((CCI5xx_PORT_M0 << CCI5xx_PMU_EVENT_SOURCE_SHIFT) | \ + (CCI5xx_PMU_EVENT_CODE_MASK << CCI5xx_PMU_EVENT_CODE_SHIFT)) +static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + int i; + DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs); + + bitmap_zero(saved_mask, cci_pmu->num_cntrs); + pmu_save_counters(cci_pmu, saved_mask); + + /* + * Now that all the counters are disabled, we can safely turn the PMU on, + * without syncing the status of the counters + */ + __cci_pmu_enable_nosync(cci_pmu); + + for_each_set_bit(i, mask, cci_pmu->num_cntrs) { + struct perf_event *event = cci_pmu->hw_events.events[i]; + + if (WARN_ON(!event)) + continue; + + pmu_set_event(cci_pmu, i, CCI5xx_INVALID_EVENT); + pmu_enable_counter(cci_pmu, i); + pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i); + pmu_disable_counter(cci_pmu, i); + pmu_set_event(cci_pmu, i, event->hw.config_base); + } + + __cci_pmu_disable(); + + pmu_restore_counters(cci_pmu, saved_mask); +} + +#endif /* CONFIG_ARM_CCI5xx_PMU */ + +static u64 pmu_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 delta, prev_raw_count, new_raw_count; + + do { + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = pmu_read_counter(event); + } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count); + + delta = (new_raw_count - prev_raw_count) & CCI_PMU_CNTR_MASK; + + local64_add(delta, &event->count); + + return new_raw_count; +} + +static void pmu_read(struct perf_event *event) +{ + pmu_event_update(event); +} + +static void pmu_event_set_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + /* + * The CCI PMU counters have a period of 2^32. To account for the + * possiblity of extreme interrupt latency we program for a period of + * half that. Hopefully we can handle the interrupt before another 2^31 + * events occur and the counter overtakes its previous value. + */ + u64 val = 1ULL << 31; + local64_set(&hwc->prev_count, val); + + /* + * CCI PMU uses PERF_HES_ARCH to keep track of the counters, whose + * values needs to be sync-ed with the s/w state before the PMU is + * enabled. + * Mark this counter for sync. + */ + hwc->state |= PERF_HES_ARCH; +} + +static irqreturn_t pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long flags; + struct cci_pmu *cci_pmu = dev; + struct cci_pmu_hw_events *events = &cci_pmu->hw_events; + int idx, handled = IRQ_NONE; + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable the PMU while we walk through the counters */ + __cci_pmu_disable(); + /* + * Iterate over counters and update the corresponding perf events. + * This should work regardless of whether we have per-counter overflow + * interrupt or a combined overflow interrupt. + */ + for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) { + struct perf_event *event = events->events[idx]; + + if (!event) + continue; + + /* Did this counter overflow? */ + if (!(pmu_read_register(cci_pmu, idx, CCI_PMU_OVRFLW) & + CCI_PMU_OVRFLW_FLAG)) + continue; + + pmu_write_register(cci_pmu, CCI_PMU_OVRFLW_FLAG, idx, + CCI_PMU_OVRFLW); + + pmu_event_update(event); + pmu_event_set_period(event); + handled = IRQ_HANDLED; + } + + /* Enable the PMU and sync possibly overflowed counters */ + __cci_pmu_enable_sync(cci_pmu); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + + return IRQ_RETVAL(handled); +} + +static int cci_pmu_get_hw(struct cci_pmu *cci_pmu) +{ + int ret = pmu_request_irq(cci_pmu, pmu_handle_irq); + if (ret) { + pmu_free_irq(cci_pmu); + return ret; + } + return 0; +} + +static void cci_pmu_put_hw(struct cci_pmu *cci_pmu) +{ + pmu_free_irq(cci_pmu); +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); + atomic_t *active_events = &cci_pmu->active_events; + struct mutex *reserve_mutex = &cci_pmu->reserve_mutex; + + if (atomic_dec_and_mutex_lock(active_events, reserve_mutex)) { + cci_pmu_put_hw(cci_pmu); + mutex_unlock(reserve_mutex); + } +} + +static void cci_pmu_enable(struct pmu *pmu) +{ + struct cci_pmu *cci_pmu = to_cci_pmu(pmu); + struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; + int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs); + unsigned long flags; + + if (!enabled) + return; + + raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); + __cci_pmu_enable_sync(cci_pmu); + raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); + +} + +static void cci_pmu_disable(struct pmu *pmu) +{ + struct cci_pmu *cci_pmu = to_cci_pmu(pmu); + struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; + unsigned long flags; + + raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); + __cci_pmu_disable(); + raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); +} + +/* + * Check if the idx represents a non-programmable counter. + * All the fixed event counters are mapped before the programmable + * counters. + */ +static bool pmu_fixed_hw_idx(struct cci_pmu *cci_pmu, int idx) +{ + return (idx >= 0) && (idx < cci_pmu->model->fixed_hw_cntrs); +} + +static void cci_pmu_start(struct perf_event *event, int pmu_flags) +{ + struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); + struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + unsigned long flags; + + /* + * To handle interrupt latency, we always reprogram the period + * regardlesss of PERF_EF_RELOAD. + */ + if (pmu_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + return; + } + + raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); + + /* Configure the counter unless you are counting a fixed event */ + if (!pmu_fixed_hw_idx(cci_pmu, idx)) + pmu_set_event(cci_pmu, idx, hwc->config_base); + + pmu_event_set_period(event); + pmu_enable_counter(cci_pmu, idx); + + raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); +} + +static void cci_pmu_stop(struct perf_event *event, int pmu_flags) +{ + struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (hwc->state & PERF_HES_STOPPED) + return; + + if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + return; + } + + /* + * We always reprogram the counter, so ignore PERF_EF_UPDATE. See + * cci_pmu_start() + */ + pmu_disable_counter(cci_pmu, idx); + pmu_event_update(event); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; +} + +static int cci_pmu_add(struct perf_event *event, int flags) +{ + struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); + struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; + struct hw_perf_event *hwc = &event->hw; + int idx; + int err = 0; + + perf_pmu_disable(event->pmu); + + /* If we don't have a space for the counter then finish early. */ + idx = pmu_get_event_idx(hw_events, event); + if (idx < 0) { + err = idx; + goto out; + } + + event->hw.idx = idx; + hw_events->events[idx] = event; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + cci_pmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + +out: + perf_pmu_enable(event->pmu); + return err; +} + +static void cci_pmu_del(struct perf_event *event, int flags) +{ + struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); + struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + cci_pmu_stop(event, PERF_EF_UPDATE); + hw_events->events[idx] = NULL; + clear_bit(idx, hw_events->used_mask); + + perf_event_update_userpage(event); +} + +static int validate_event(struct pmu *cci_pmu, + struct cci_pmu_hw_events *hw_events, + struct perf_event *event) +{ + if (is_software_event(event)) + return 1; + + /* + * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The + * core perf code won't check that the pmu->ctx == leader->ctx + * until after pmu->event_init(event). + */ + if (event->pmu != cci_pmu) + return 0; + + if (event->state < PERF_EVENT_STATE_OFF) + return 1; + + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) + return 1; + + return pmu_get_event_idx(hw_events, event) >= 0; +} + +static int validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); + unsigned long mask[BITS_TO_LONGS(cci_pmu->num_cntrs)]; + struct cci_pmu_hw_events fake_pmu = { + /* + * Initialise the fake PMU. We only need to populate the + * used_mask for the purposes of validation. + */ + .used_mask = mask, + }; + memset(mask, 0, BITS_TO_LONGS(cci_pmu->num_cntrs) * sizeof(unsigned long)); + + if (!validate_event(event->pmu, &fake_pmu, leader)) + return -EINVAL; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (!validate_event(event->pmu, &fake_pmu, sibling)) + return -EINVAL; + } + + if (!validate_event(event->pmu, &fake_pmu, event)) + return -EINVAL; + + return 0; +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int mapping; + + mapping = pmu_map_event(event); + + if (mapping < 0) { + pr_debug("event %x:%llx not supported\n", event->attr.type, + event->attr.config); + return mapping; + } + + /* + * We don't assign an index until we actually place the event onto + * hardware. Use -1 to signify that we haven't decided where to put it + * yet. + */ + hwc->idx = -1; + hwc->config_base = 0; + hwc->config = 0; + hwc->event_base = 0; + + /* + * Store the event encoding into the config_base field. + */ + hwc->config_base |= (unsigned long)mapping; + + /* + * Limit the sample_period to half of the counter width. That way, the + * new counter value is far less likely to overtake the previous one + * unless you have some serious IRQ latency issues. + */ + hwc->sample_period = CCI_PMU_CNTR_MASK >> 1; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + + if (event->group_leader != event) { + if (validate_group(event) != 0) + return -EINVAL; + } + + return 0; +} + +static int cci_pmu_event_init(struct perf_event *event) +{ + struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); + atomic_t *active_events = &cci_pmu->active_events; + int err = 0; + int cpu; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* Shared by all CPUs, no meaningful state to sample */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EOPNOTSUPP; + + /* We have no filtering of any kind */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest) + return -EINVAL; + + /* + * Following the example set by other "uncore" PMUs, we accept any CPU + * and rewrite its affinity dynamically rather than having perf core + * handle cpu == -1 and pid == -1 for this case. + * + * The perf core will pin online CPUs for the duration of this call and + * the event being installed into its context, so the PMU's CPU can't + * change under our feet. + */ + cpu = cpumask_first(&cci_pmu->cpus); + if (event->cpu < 0 || cpu < 0) + return -EINVAL; + event->cpu = cpu; + + event->destroy = hw_perf_event_destroy; + if (!atomic_inc_not_zero(active_events)) { + mutex_lock(&cci_pmu->reserve_mutex); + if (atomic_read(active_events) == 0) + err = cci_pmu_get_hw(cci_pmu); + if (!err) + atomic_inc(active_events); + mutex_unlock(&cci_pmu->reserve_mutex); + } + if (err) + return err; + + err = __hw_perf_event_init(event); + if (err) + hw_perf_event_destroy(event); + + return err; +} + +static ssize_t pmu_cpumask_attr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct cci_pmu *cci_pmu = to_cci_pmu(pmu); + + int n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", + cpumask_pr_args(&cci_pmu->cpus)); + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} + +static struct device_attribute pmu_cpumask_attr = + __ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL); + +static struct attribute *pmu_attrs[] = { + &pmu_cpumask_attr.attr, + NULL, +}; + +static struct attribute_group pmu_attr_group = { + .attrs = pmu_attrs, +}; + +static struct attribute_group pmu_format_attr_group = { + .name = "format", + .attrs = NULL, /* Filled in cci_pmu_init_attrs */ +}; + +static struct attribute_group pmu_event_attr_group = { + .name = "events", + .attrs = NULL, /* Filled in cci_pmu_init_attrs */ +}; + +static const struct attribute_group *pmu_attr_groups[] = { + &pmu_attr_group, + &pmu_format_attr_group, + &pmu_event_attr_group, + NULL +}; + +static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) +{ + const struct cci_pmu_model *model = cci_pmu->model; + char *name = model->name; + u32 num_cntrs; + + pmu_event_attr_group.attrs = model->event_attrs; + pmu_format_attr_group.attrs = model->format_attrs; + + cci_pmu->pmu = (struct pmu) { + .name = cci_pmu->model->name, + .task_ctx_nr = perf_invalid_context, + .pmu_enable = cci_pmu_enable, + .pmu_disable = cci_pmu_disable, + .event_init = cci_pmu_event_init, + .add = cci_pmu_add, + .del = cci_pmu_del, + .start = cci_pmu_start, + .stop = cci_pmu_stop, + .read = pmu_read, + .attr_groups = pmu_attr_groups, + }; + + cci_pmu->plat_device = pdev; + num_cntrs = pmu_get_max_counters(); + if (num_cntrs > cci_pmu->model->num_hw_cntrs) { + dev_warn(&pdev->dev, + "PMU implements more counters(%d) than supported by" + " the model(%d), truncated.", + num_cntrs, cci_pmu->model->num_hw_cntrs); + num_cntrs = cci_pmu->model->num_hw_cntrs; + } + cci_pmu->num_cntrs = num_cntrs + cci_pmu->model->fixed_hw_cntrs; + + return perf_pmu_register(&cci_pmu->pmu, name, -1); +} + +static int cci_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct cci_pmu *cci_pmu = hlist_entry_safe(node, struct cci_pmu, node); + unsigned int target; + + if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus)) + return 0; + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + /* + * TODO: migrate context once core races on event->ctx have + * been fixed. + */ + cpumask_set_cpu(target, &cci_pmu->cpus); + return 0; +} + +static struct cci_pmu_model cci_pmu_models[] = { +#ifdef CONFIG_ARM_CCI400_PMU + [CCI400_R0] = { + .name = "CCI_400", + .fixed_hw_cntrs = 1, /* Cycle counter */ + .num_hw_cntrs = 4, + .cntr_size = SZ_4K, + .format_attrs = cci400_pmu_format_attrs, + .event_attrs = cci400_r0_pmu_event_attrs, + .event_ranges = { + [CCI_IF_SLAVE] = { + CCI400_R0_SLAVE_PORT_MIN_EV, + CCI400_R0_SLAVE_PORT_MAX_EV, + }, + [CCI_IF_MASTER] = { + CCI400_R0_MASTER_PORT_MIN_EV, + CCI400_R0_MASTER_PORT_MAX_EV, + }, + }, + .validate_hw_event = cci400_validate_hw_event, + .get_event_idx = cci400_get_event_idx, + }, + [CCI400_R1] = { + .name = "CCI_400_r1", + .fixed_hw_cntrs = 1, /* Cycle counter */ + .num_hw_cntrs = 4, + .cntr_size = SZ_4K, + .format_attrs = cci400_pmu_format_attrs, + .event_attrs = cci400_r1_pmu_event_attrs, + .event_ranges = { + [CCI_IF_SLAVE] = { + CCI400_R1_SLAVE_PORT_MIN_EV, + CCI400_R1_SLAVE_PORT_MAX_EV, + }, + [CCI_IF_MASTER] = { + CCI400_R1_MASTER_PORT_MIN_EV, + CCI400_R1_MASTER_PORT_MAX_EV, + }, + }, + .validate_hw_event = cci400_validate_hw_event, + .get_event_idx = cci400_get_event_idx, + }, +#endif +#ifdef CONFIG_ARM_CCI5xx_PMU + [CCI500_R0] = { + .name = "CCI_500", + .fixed_hw_cntrs = 0, + .num_hw_cntrs = 8, + .cntr_size = SZ_64K, + .format_attrs = cci5xx_pmu_format_attrs, + .event_attrs = cci5xx_pmu_event_attrs, + .event_ranges = { + [CCI_IF_SLAVE] = { + CCI5xx_SLAVE_PORT_MIN_EV, + CCI5xx_SLAVE_PORT_MAX_EV, + }, + [CCI_IF_MASTER] = { + CCI5xx_MASTER_PORT_MIN_EV, + CCI5xx_MASTER_PORT_MAX_EV, + }, + [CCI_IF_GLOBAL] = { + CCI5xx_GLOBAL_PORT_MIN_EV, + CCI5xx_GLOBAL_PORT_MAX_EV, + }, + }, + .validate_hw_event = cci500_validate_hw_event, + .write_counters = cci5xx_pmu_write_counters, + }, + [CCI550_R0] = { + .name = "CCI_550", + .fixed_hw_cntrs = 0, + .num_hw_cntrs = 8, + .cntr_size = SZ_64K, + .format_attrs = cci5xx_pmu_format_attrs, + .event_attrs = cci5xx_pmu_event_attrs, + .event_ranges = { + [CCI_IF_SLAVE] = { + CCI5xx_SLAVE_PORT_MIN_EV, + CCI5xx_SLAVE_PORT_MAX_EV, + }, + [CCI_IF_MASTER] = { + CCI5xx_MASTER_PORT_MIN_EV, + CCI5xx_MASTER_PORT_MAX_EV, + }, + [CCI_IF_GLOBAL] = { + CCI5xx_GLOBAL_PORT_MIN_EV, + CCI5xx_GLOBAL_PORT_MAX_EV, + }, + }, + .validate_hw_event = cci550_validate_hw_event, + .write_counters = cci5xx_pmu_write_counters, + }, +#endif +}; + +static const struct of_device_id arm_cci_pmu_matches[] = { +#ifdef CONFIG_ARM_CCI400_PMU + { + .compatible = "arm,cci-400-pmu", + .data = NULL, + }, + { + .compatible = "arm,cci-400-pmu,r0", + .data = &cci_pmu_models[CCI400_R0], + }, + { + .compatible = "arm,cci-400-pmu,r1", + .data = &cci_pmu_models[CCI400_R1], + }, +#endif +#ifdef CONFIG_ARM_CCI5xx_PMU + { + .compatible = "arm,cci-500-pmu,r0", + .data = &cci_pmu_models[CCI500_R0], + }, + { + .compatible = "arm,cci-550-pmu,r0", + .data = &cci_pmu_models[CCI550_R0], + }, +#endif + {}, +}; + +static inline const struct cci_pmu_model *get_cci_model(struct platform_device *pdev) +{ + const struct of_device_id *match = of_match_node(arm_cci_pmu_matches, + pdev->dev.of_node); + if (!match) + return NULL; + if (match->data) + return match->data; + + dev_warn(&pdev->dev, "DEPRECATED compatible property," + "requires secure access to CCI registers"); + return probe_cci_model(pdev); +} + +static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs) +{ + int i; + + for (i = 0; i < nr_irqs; i++) + if (irq == irqs[i]) + return true; + + return false; +} + +static struct cci_pmu *cci_pmu_alloc(struct platform_device *pdev) +{ + struct cci_pmu *cci_pmu; + const struct cci_pmu_model *model; + + /* + * All allocations are devm_* hence we don't have to free + * them explicitly on an error, as it would end up in driver + * detach. + */ + model = get_cci_model(pdev); + if (!model) { + dev_warn(&pdev->dev, "CCI PMU version not supported\n"); + return ERR_PTR(-ENODEV); + } + + cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*cci_pmu), GFP_KERNEL); + if (!cci_pmu) + return ERR_PTR(-ENOMEM); + + cci_pmu->model = model; + cci_pmu->irqs = devm_kcalloc(&pdev->dev, CCI_PMU_MAX_HW_CNTRS(model), + sizeof(*cci_pmu->irqs), GFP_KERNEL); + if (!cci_pmu->irqs) + return ERR_PTR(-ENOMEM); + cci_pmu->hw_events.events = devm_kcalloc(&pdev->dev, + CCI_PMU_MAX_HW_CNTRS(model), + sizeof(*cci_pmu->hw_events.events), + GFP_KERNEL); + if (!cci_pmu->hw_events.events) + return ERR_PTR(-ENOMEM); + cci_pmu->hw_events.used_mask = devm_kcalloc(&pdev->dev, + BITS_TO_LONGS(CCI_PMU_MAX_HW_CNTRS(model)), + sizeof(*cci_pmu->hw_events.used_mask), + GFP_KERNEL); + if (!cci_pmu->hw_events.used_mask) + return ERR_PTR(-ENOMEM); + + return cci_pmu; +} + +static int cci_pmu_probe(struct platform_device *pdev) +{ + struct resource *res; + struct cci_pmu *cci_pmu; + int i, ret, irq; + + cci_pmu = cci_pmu_alloc(pdev); + if (IS_ERR(cci_pmu)) + return PTR_ERR(cci_pmu); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + cci_pmu->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(cci_pmu->base)) + return -ENOMEM; + + /* + * CCI PMU has one overflow interrupt per counter; but some may be tied + * together to a common interrupt. + */ + cci_pmu->nr_irqs = 0; + for (i = 0; i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model); i++) { + irq = platform_get_irq(pdev, i); + if (irq < 0) + break; + + if (is_duplicate_irq(irq, cci_pmu->irqs, cci_pmu->nr_irqs)) + continue; + + cci_pmu->irqs[cci_pmu->nr_irqs++] = irq; + } + + /* + * Ensure that the device tree has as many interrupts as the number + * of counters. + */ + if (i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model)) { + dev_warn(&pdev->dev, "In-correct number of interrupts: %d, should be %d\n", + i, CCI_PMU_MAX_HW_CNTRS(cci_pmu->model)); + return -EINVAL; + } + + raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock); + mutex_init(&cci_pmu->reserve_mutex); + atomic_set(&cci_pmu->active_events, 0); + cpumask_set_cpu(get_cpu(), &cci_pmu->cpus); + + ret = cci_pmu_init(cci_pmu, pdev); + if (ret) { + put_cpu(); + return ret; + } + + cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE, + &cci_pmu->node); + put_cpu(); + pr_info("ARM %s PMU driver probed", cci_pmu->model->name); + return 0; +} + +static struct platform_driver cci_pmu_driver = { + .driver = { + .name = DRIVER_NAME, + .of_match_table = arm_cci_pmu_matches, + }, + .probe = cci_pmu_probe, +}; + +static int __init cci_platform_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCI_ONLINE, + "perf/arm/cci:online", NULL, + cci_pmu_offline_cpu); + if (ret) + return ret; + + return platform_driver_register(&cci_pmu_driver); +} + +device_initcall(cci_platform_init); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ARM CCI PMU support"); -- cgit v1.2.3-59-g8ed1b From 03057f2626e955ebea88a668a6d7d699f836e5c0 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 15 Feb 2018 18:51:43 +0000 Subject: perf/arm-cci: Simplify CPU hotplug Realistically, systems with multiple CCIs are unlikely to ever exist, and since the driver only actually supports a single instance anyway there's really no need to do the multi-instance hotplug state dance. Take the opportunity to simplify the hotplug-related code all over, addressing the context-migration TODO in the process for good measure. Acked-by: Punit Agrawal Acked-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Arnd Bergmann --- drivers/perf/arm-cci.c | 56 +++++++++++++++++--------------------------------- 1 file changed, 19 insertions(+), 37 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index d5f8c750fd41..242623fbce1f 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -90,6 +90,7 @@ static struct cci_pmu_model cci_pmu_models[]; struct cci_pmu { void __iomem *base; struct pmu pmu; + int cpu; int nr_irqs; int *irqs; unsigned long active_irqs; @@ -99,12 +100,12 @@ struct cci_pmu { int num_cntrs; atomic_t active_events; struct mutex reserve_mutex; - struct hlist_node node; - cpumask_t cpus; }; #define to_cci_pmu(c) (container_of(c, struct cci_pmu, pmu)) +static struct cci_pmu *g_cci_pmu; + enum cci_models { #ifdef CONFIG_ARM_CCI400_PMU CCI400_R0, @@ -1325,7 +1326,6 @@ static int cci_pmu_event_init(struct perf_event *event) struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); atomic_t *active_events = &cci_pmu->active_events; int err = 0; - int cpu; if (event->attr.type != event->pmu->type) return -ENOENT; @@ -1352,10 +1352,9 @@ static int cci_pmu_event_init(struct perf_event *event) * the event being installed into its context, so the PMU's CPU can't * change under our feet. */ - cpu = cpumask_first(&cci_pmu->cpus); - if (event->cpu < 0 || cpu < 0) + if (event->cpu < 0) return -EINVAL; - event->cpu = cpu; + event->cpu = cci_pmu->cpu; event->destroy = hw_perf_event_destroy; if (!atomic_inc_not_zero(active_events)) { @@ -1382,11 +1381,7 @@ static ssize_t pmu_cpumask_attr_show(struct device *dev, struct pmu *pmu = dev_get_drvdata(dev); struct cci_pmu *cci_pmu = to_cci_pmu(pmu); - int n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", - cpumask_pr_args(&cci_pmu->cpus)); - buf[n++] = '\n'; - buf[n] = '\0'; - return n; + return cpumap_print_to_pagebuf(true, buf, cpumask_of(cci_pmu->cpu)); } static struct device_attribute pmu_cpumask_attr = @@ -1455,21 +1450,19 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) return perf_pmu_register(&cci_pmu->pmu, name, -1); } -static int cci_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +static int cci_pmu_offline_cpu(unsigned int cpu) { - struct cci_pmu *cci_pmu = hlist_entry_safe(node, struct cci_pmu, node); - unsigned int target; + int target; - if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus)) + if (!g_cci_pmu || cpu != g_cci_pmu->cpu) return 0; + target = cpumask_any_but(cpu_online_mask, cpu); if (target >= nr_cpu_ids) return 0; - /* - * TODO: migrate context once core races on event->ctx have - * been fixed. - */ - cpumask_set_cpu(target, &cci_pmu->cpus); + + perf_pmu_migrate_context(&g_cci_pmu->pmu, cpu, target); + g_cci_pmu->cpu = target; return 0; } @@ -1706,7 +1699,7 @@ static int cci_pmu_probe(struct platform_device *pdev) raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock); mutex_init(&cci_pmu->reserve_mutex); atomic_set(&cci_pmu->active_events, 0); - cpumask_set_cpu(get_cpu(), &cci_pmu->cpus); + cci_pmu->cpu = get_cpu(); ret = cci_pmu_init(cci_pmu, pdev); if (ret) { @@ -1714,9 +1707,11 @@ static int cci_pmu_probe(struct platform_device *pdev) return ret; } - cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE, - &cci_pmu->node); + cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE, + "perf/arm/cci:online", NULL, + cci_pmu_offline_cpu); put_cpu(); + g_cci_pmu = cci_pmu; pr_info("ARM %s PMU driver probed", cci_pmu->model->name); return 0; } @@ -1729,19 +1724,6 @@ static struct platform_driver cci_pmu_driver = { .probe = cci_pmu_probe, }; -static int __init cci_platform_init(void) -{ - int ret; - - ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCI_ONLINE, - "perf/arm/cci:online", NULL, - cci_pmu_offline_cpu); - if (ret) - return ret; - - return platform_driver_register(&cci_pmu_driver); -} - -device_initcall(cci_platform_init); +builtin_platform_driver(cci_pmu_driver); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ARM CCI PMU support"); -- cgit v1.2.3-59-g8ed1b From 32837954db462ecc28051923109ef6e4a221f2b2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 15 Feb 2018 18:51:45 +0000 Subject: perf/arm-cci: Clean up model discovery Since I am the self-appointed of_device_get_match_data() police, it's only right that I should clean up this driver while I'm otherwise touching it. This also reveals that we're passing around a struct platform_device in places where we only ever care about its regular device, so straighten that out in the process. Acked-by: Punit Agrawal Acked-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Arnd Bergmann --- drivers/perf/arm-cci.c | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 242623fbce1f..336f1455cf96 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -371,14 +372,14 @@ static int probe_cci400_revision(void) return CCI400_R1; } -static const struct cci_pmu_model *probe_cci_model(struct platform_device *pdev) +static const struct cci_pmu_model *probe_cci_model(void) { if (platform_has_secure_cci_access()) return &cci_pmu_models[probe_cci400_revision()]; return NULL; } #else /* !CONFIG_ARM_CCI400_PMU */ -static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev) +static inline struct cci_pmu_model *probe_cci_model(void) { return NULL; } @@ -1589,20 +1590,6 @@ static const struct of_device_id arm_cci_pmu_matches[] = { {}, }; -static inline const struct cci_pmu_model *get_cci_model(struct platform_device *pdev) -{ - const struct of_device_id *match = of_match_node(arm_cci_pmu_matches, - pdev->dev.of_node); - if (!match) - return NULL; - if (match->data) - return match->data; - - dev_warn(&pdev->dev, "DEPRECATED compatible property," - "requires secure access to CCI registers"); - return probe_cci_model(pdev); -} - static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs) { int i; @@ -1614,7 +1601,7 @@ static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs) return false; } -static struct cci_pmu *cci_pmu_alloc(struct platform_device *pdev) +static struct cci_pmu *cci_pmu_alloc(struct device *dev) { struct cci_pmu *cci_pmu; const struct cci_pmu_model *model; @@ -1624,28 +1611,33 @@ static struct cci_pmu *cci_pmu_alloc(struct platform_device *pdev) * them explicitly on an error, as it would end up in driver * detach. */ - model = get_cci_model(pdev); + model = of_device_get_match_data(dev); + if (!model) { + dev_warn(dev, + "DEPRECATED compatible property, requires secure access to CCI registers"); + model = probe_cci_model(); + } if (!model) { - dev_warn(&pdev->dev, "CCI PMU version not supported\n"); + dev_warn(dev, "CCI PMU version not supported\n"); return ERR_PTR(-ENODEV); } - cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*cci_pmu), GFP_KERNEL); + cci_pmu = devm_kzalloc(dev, sizeof(*cci_pmu), GFP_KERNEL); if (!cci_pmu) return ERR_PTR(-ENOMEM); cci_pmu->model = model; - cci_pmu->irqs = devm_kcalloc(&pdev->dev, CCI_PMU_MAX_HW_CNTRS(model), + cci_pmu->irqs = devm_kcalloc(dev, CCI_PMU_MAX_HW_CNTRS(model), sizeof(*cci_pmu->irqs), GFP_KERNEL); if (!cci_pmu->irqs) return ERR_PTR(-ENOMEM); - cci_pmu->hw_events.events = devm_kcalloc(&pdev->dev, + cci_pmu->hw_events.events = devm_kcalloc(dev, CCI_PMU_MAX_HW_CNTRS(model), sizeof(*cci_pmu->hw_events.events), GFP_KERNEL); if (!cci_pmu->hw_events.events) return ERR_PTR(-ENOMEM); - cci_pmu->hw_events.used_mask = devm_kcalloc(&pdev->dev, + cci_pmu->hw_events.used_mask = devm_kcalloc(dev, BITS_TO_LONGS(CCI_PMU_MAX_HW_CNTRS(model)), sizeof(*cci_pmu->hw_events.used_mask), GFP_KERNEL); @@ -1661,7 +1653,7 @@ static int cci_pmu_probe(struct platform_device *pdev) struct cci_pmu *cci_pmu; int i, ret, irq; - cci_pmu = cci_pmu_alloc(pdev); + cci_pmu = cci_pmu_alloc(&pdev->dev); if (IS_ERR(cci_pmu)) return PTR_ERR(cci_pmu); -- cgit v1.2.3-59-g8ed1b From e9c112c94b014b581380d370d3fa2f1d23d07cc0 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 15 Feb 2018 18:51:46 +0000 Subject: perf/arm-cci: Untangle global cci_ctrl_base Depending directly on the bus driver's global cci_ctrl_base variable is a little unpleasant, and exporting it to allow the PMU driver to be modular would be even more so. Let's make things a little better abstracted by adding the control register block to the cci_pmu instance data alongside the PMU register block, and communicating the mapped address from the bus driver via platform data. It's not practical to try the same thing for the bus driver itself, given that the globals are entangled with the hairy assembly code for port control, so we leave them be there. It would however be prudent to move them to the __ro_after_init section in passing, since the addresses really should never be changing once set. Signed-off-by: Robin Murphy Signed-off-by: Arnd Bergmann --- drivers/bus/arm-cci.c | 17 +++++++++++++---- drivers/perf/arm-cci.c | 47 ++++++++++++++++++++++++----------------------- 2 files changed, 37 insertions(+), 27 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 23dc0b890d0c..443e4c3fd357 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -25,9 +25,8 @@ #include #include -/* Referenced read-only by the PMU driver; see drivers/perf/arm-cci.c */ -void __iomem *cci_ctrl_base; -static unsigned long cci_ctrl_phys; +static void __iomem *cci_ctrl_base __ro_after_init; +static unsigned long cci_ctrl_phys __ro_after_init; #ifdef CONFIG_ARM_CCI400_PORT_CTRL struct cci_nb_ports { @@ -56,6 +55,15 @@ static const struct of_device_id arm_cci_matches[] = { {}, }; +static const struct of_dev_auxdata arm_cci_auxdata[] = { + OF_DEV_AUXDATA("arm,cci-400-pmu", 0, NULL, &cci_ctrl_base), + OF_DEV_AUXDATA("arm,cci-400-pmu,r0", 0, NULL, &cci_ctrl_base), + OF_DEV_AUXDATA("arm,cci-400-pmu,r1", 0, NULL, &cci_ctrl_base), + OF_DEV_AUXDATA("arm,cci-500-pmu,r0", 0, NULL, &cci_ctrl_base), + OF_DEV_AUXDATA("arm,cci-550-pmu,r0", 0, NULL, &cci_ctrl_base), + {} +}; + #define DRIVER_NAME "ARM-CCI" static int cci_platform_probe(struct platform_device *pdev) @@ -63,7 +71,8 @@ static int cci_platform_probe(struct platform_device *pdev) if (!cci_probed()) return -ENODEV; - return of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev); + return of_platform_populate(pdev->dev.of_node, NULL, + arm_cci_auxdata, &pdev->dev); } static struct platform_driver cci_platform_driver = { diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 336f1455cf96..67a74c48c7c2 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -16,8 +16,6 @@ #include #include -extern void __iomem *const cci_ctrl_base; - #define DRIVER_NAME "ARM-CCI PMU" #define CCI_PMCR 0x0100 @@ -90,6 +88,7 @@ static struct cci_pmu_model cci_pmu_models[]; struct cci_pmu { void __iomem *base; + void __iomem *ctrl_base; struct pmu pmu; int cpu; int nr_irqs; @@ -360,10 +359,10 @@ static int cci400_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_ev return -ENOENT; } -static int probe_cci400_revision(void) +static int probe_cci400_revision(struct cci_pmu *cci_pmu) { int rev; - rev = readl_relaxed(cci_ctrl_base + CCI_PID2) & CCI_PID2_REV_MASK; + rev = readl_relaxed(cci_pmu->ctrl_base + CCI_PID2) & CCI_PID2_REV_MASK; rev >>= CCI_PID2_REV_SHIFT; if (rev < CCI400_R1_PX) @@ -372,14 +371,14 @@ static int probe_cci400_revision(void) return CCI400_R1; } -static const struct cci_pmu_model *probe_cci_model(void) +static const struct cci_pmu_model *probe_cci_model(struct cci_pmu *cci_pmu) { if (platform_has_secure_cci_access()) - return &cci_pmu_models[probe_cci400_revision()]; + return &cci_pmu_models[probe_cci400_revision(cci_pmu)]; return NULL; } #else /* !CONFIG_ARM_CCI400_PMU */ -static inline struct cci_pmu_model *probe_cci_model(void) +static inline struct cci_pmu_model *probe_cci_model(struct cci_pmu *cci_pmu) { return NULL; } @@ -662,8 +661,8 @@ static void __cci_pmu_enable_nosync(struct cci_pmu *cci_pmu) u32 val; /* Enable all the PMU counters. */ - val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; - writel(val, cci_ctrl_base + CCI_PMCR); + val = readl_relaxed(cci_pmu->ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; + writel(val, cci_pmu->ctrl_base + CCI_PMCR); } /* Should be called with cci_pmu->hw_events->pmu_lock held */ @@ -674,13 +673,13 @@ static void __cci_pmu_enable_sync(struct cci_pmu *cci_pmu) } /* Should be called with cci_pmu->hw_events->pmu_lock held */ -static void __cci_pmu_disable(void) +static void __cci_pmu_disable(struct cci_pmu *cci_pmu) { u32 val; /* Disable all the PMU counters. */ - val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; - writel(val, cci_ctrl_base + CCI_PMCR); + val = readl_relaxed(cci_pmu->ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; + writel(val, cci_pmu->ctrl_base + CCI_PMCR); } static ssize_t cci_pmu_format_show(struct device *dev, @@ -782,9 +781,9 @@ pmu_restore_counters(struct cci_pmu *cci_pmu, unsigned long *mask) * Returns the number of programmable counters actually implemented * by the cci */ -static u32 pmu_get_max_counters(void) +static u32 pmu_get_max_counters(struct cci_pmu *cci_pmu) { - return (readl_relaxed(cci_ctrl_base + CCI_PMCR) & + return (readl_relaxed(cci_pmu->ctrl_base + CCI_PMCR) & CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT; } @@ -965,7 +964,7 @@ static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *ma pmu_set_event(cci_pmu, i, event->hw.config_base); } - __cci_pmu_disable(); + __cci_pmu_disable(cci_pmu); pmu_restore_counters(cci_pmu, saved_mask); } @@ -1026,7 +1025,7 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev) raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Disable the PMU while we walk through the counters */ - __cci_pmu_disable(); + __cci_pmu_disable(cci_pmu); /* * Iterate over counters and update the corresponding perf events. * This should work regardless of whether we have per-counter overflow @@ -1108,7 +1107,7 @@ static void cci_pmu_disable(struct pmu *pmu) unsigned long flags; raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); - __cci_pmu_disable(); + __cci_pmu_disable(cci_pmu); raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); } @@ -1438,7 +1437,7 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) }; cci_pmu->plat_device = pdev; - num_cntrs = pmu_get_max_counters(); + num_cntrs = pmu_get_max_counters(cci_pmu); if (num_cntrs > cci_pmu->model->num_hw_cntrs) { dev_warn(&pdev->dev, "PMU implements more counters(%d) than supported by" @@ -1611,21 +1610,23 @@ static struct cci_pmu *cci_pmu_alloc(struct device *dev) * them explicitly on an error, as it would end up in driver * detach. */ + cci_pmu = devm_kzalloc(dev, sizeof(*cci_pmu), GFP_KERNEL); + if (!cci_pmu) + return ERR_PTR(-ENOMEM); + + cci_pmu->ctrl_base = *(void __iomem **)dev->platform_data; + model = of_device_get_match_data(dev); if (!model) { dev_warn(dev, "DEPRECATED compatible property, requires secure access to CCI registers"); - model = probe_cci_model(); + model = probe_cci_model(cci_pmu); } if (!model) { dev_warn(dev, "CCI PMU version not supported\n"); return ERR_PTR(-ENODEV); } - cci_pmu = devm_kzalloc(dev, sizeof(*cci_pmu), GFP_KERNEL); - if (!cci_pmu) - return ERR_PTR(-ENOMEM); - cci_pmu->model = model; cci_pmu->irqs = devm_kcalloc(dev, CCI_PMU_MAX_HW_CNTRS(model), sizeof(*cci_pmu->irqs), GFP_KERNEL); -- cgit v1.2.3-59-g8ed1b