// SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "coresight-etm-perf.h" #include "coresight-priv.h" static struct pmu etm_pmu; static bool etm_perf_up; static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle); static DEFINE_PER_CPU(struct coresight_device *, csdev_src); /* ETMv3.5/PTM's ETMCR is 'config' */ PMU_FORMAT_ATTR(cycacc, "config:" __stringify(ETM_OPT_CYCACC)); PMU_FORMAT_ATTR(contextid, "config:" __stringify(ETM_OPT_CTXTID)); PMU_FORMAT_ATTR(timestamp, "config:" __stringify(ETM_OPT_TS)); PMU_FORMAT_ATTR(retstack, "config:" __stringify(ETM_OPT_RETSTK)); /* Sink ID - same for all ETMs */ PMU_FORMAT_ATTR(sinkid, "config2:0-31"); static struct attribute *etm_config_formats_attr[] = { &format_attr_cycacc.attr, &format_attr_contextid.attr, &format_attr_timestamp.attr, &format_attr_retstack.attr, &format_attr_sinkid.attr, NULL, }; static const struct attribute_group etm_pmu_format_group = { .name = "format", .attrs = etm_config_formats_attr, }; static struct attribute *etm_config_sinks_attr[] = { NULL, }; static const struct attribute_group etm_pmu_sinks_group = { .name = "sinks", .attrs = etm_config_sinks_attr, }; static const struct attribute_group *etm_pmu_attr_groups[] = { &etm_pmu_format_group, &etm_pmu_sinks_group, NULL, }; static inline struct list_head ** etm_event_cpu_path_ptr(struct etm_event_data *data, int cpu) { return per_cpu_ptr(data->path, cpu); } static inline struct list_head * etm_event_cpu_path(struct etm_event_data *data, int cpu) { return *etm_event_cpu_path_ptr(data, cpu); } static void etm_event_read(struct perf_event *event) {} static int etm_addr_filters_alloc(struct perf_event *event) { struct etm_filters *filters; int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu); filters = kzalloc_node(sizeof(struct etm_filters), GFP_KERNEL, node); if (!filters) return -ENOMEM; if (event->parent) memcpy(filters, event->parent->hw.addr_filters, sizeof(*filters)); event->hw.addr_filters = filters; return 0; } static void etm_event_destroy(struct perf_event *event) { kfree(event->hw.addr_filters); event->hw.addr_filters = NULL; } static int etm_event_init(struct perf_event *event) { int ret = 0; if (event->attr.type != etm_pmu.type) { ret = -ENOENT; goto out; } ret = etm_addr_filters_alloc(event); if (ret) goto out; event->destroy = etm_event_destroy; out: return ret; } static void free_sink_buffer(struct etm_event_data *event_data) { int cpu; cpumask_t *mask = &event_data->mask; struct coresight_device *sink; if (WARN_ON(cpumask_empty(mask))) return; if (!event_data->snk_config) return; cpu = cpumask_first(mask); sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu)); sink_ops(sink)->free_buffer(event_data->snk_config); } static void free_event_data(struct work_struct *work) { int cpu; cpumask_t *mask; struct etm_event_data *event_data; event_data = container_of(work, struct etm_event_data, work); mask = &event_data->mask; /* Free the sink buffers, if there are any */ free_sink_buffer(event_data); for_each_cpu(cpu, mask) { struct list_head **ppath; ppath = etm_event_cpu_path_ptr(event_data, cpu); if (!(IS_ERR_OR_NULL(*ppath))) coresight_release_path(*ppath); *ppath = NULL; } free_percpu(event_data->path); kfree(event_data); } static void *alloc_event_data(int cpu) { cpumask_t *mask; struct etm_event_data *event_data; /* First get memory for the session's data */ event_data = kzalloc(sizeof(struct etm_event_data), GFP_KERNEL); if (!event_data) return NULL; mask = &event_data->mask; if (cpu != -1) cpumask_set_cpu(cpu, mask); else cpumask_copy(mask, cpu_present_mask); /* * Each CPU has a single path between source and destination. As such * allocate an array using CPU numbers as indexes. That way a path * for any CPU can easily be accessed at any given time. We proceed * the same way for sessions involving a single CPU. The cost of * unused memory when dealing with single CPU trace scenarios is small * compared to the cost of searching through an optimized array. */ event_data->path = alloc_percpu(struct list_head *); if (!event_data->path) { kfree(event_data); return NULL; } return event_data; } static void etm_free_aux(void *data) { struct etm_event_data *event_data = data; schedule_work(&event_data->work); } static void *etm_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool overwrite) { u32 id; int cpu = event->cpu; cpumask_t *mask; struct coresight_device *sink; struct etm_event_data *event_data = NULL; event_data = alloc_event_data(cpu); if (!event_data) return NULL; INIT_WORK(&event_data->work, free_event_data); /* First get the selected sink from user space. */ if (event->attr.config2) { id = (u32)event->attr.config2; sink = coresight_get_sink_by_id(id); } else { sink = coresight_get_enabled_sink(true); } if (!sink) goto err; mask = &event_data->mask; /* * Setup the path for each CPU in a trace session. We try to build * trace path for each CPU in the mask. If we don't find an ETM * for the CPU or fail to build a path, we clear the CPU from the * mask and continue with the rest. If ever we try to trace on those * CPUs, we can handle it and fail the session. */ for_each_cpu(cpu, mask) { struct list_head *path; struct coresight_device *csdev; csdev = per_cpu(csdev_src, cpu); /* * If there is no ETM associated with this CPU clear it from * the mask and continue with the rest. If ever we try to trace * on this CPU, we handle it accordingly. */ if (!csdev) { cpumask_clear_cpu(cpu, mask); continue; } /* * Building a path doesn't enable it, it simply builds a * list of devices from source to sink that can be * referenced later when the path is actually needed. */ path = coresight_build_path(csdev, sink); if (IS_ERR(path)) { cpumask_clear_cpu(cpu, mask); continue; } *etm_event_cpu_path_ptr(event_data, cpu) = path; } /* If we don't have any CPUs ready for tracing, abort */ cpu = cpumask_first(mask); if (cpu >= nr_cpu_ids) goto err; if (!sink_ops(sink)->alloc_buffer || !sink_ops(sink)->free_buffer) goto err; /* Allocate the sink buffer for this session */ event_data->snk_config = sink_ops(sink)->alloc_buffer(sink, event, pages, nr_pages, overwrite); if (!event_data->snk_config) goto err; out: return event_data; err: etm_free_aux(event_data); event_data = NULL; goto out; } static void etm_event_start(struct perf_event *event, int flags) { int cpu = smp_processor_id(); struct etm_event_data *event_data; struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); struct list_head *path; if (!csdev) goto fail; /* * Deal with the ring buffer API and get a handle on the * session's information. */ event_data = perf_aux_output_begin(handle, event); if (!event_data) goto fail; path = etm_event_cpu_path(event_data, cpu); /* We need a sink, no need to continue without one */ sink = coresight_get_sink(path); if (WARN_ON_ONCE(!sink)) goto fail_end_stop; /* Nothing will happen without a path */ if (coresight_enable_path(path, CS_MODE_PERF, handle)) goto fail_end_stop; /* Tell the perf core the event is alive */ event->hw.state = 0; /* Finally enable the tracer */ if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF)) goto fail_disable_path; out: return; fail_disable_path: coresight_disable_path(path); fail_end_stop: perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); perf_aux_output_end(handle, 0); fail: event->hw.state = PERF_HES_STOPPED; goto out; } static void etm_event_stop(struct perf_event *event, int mode) { int cpu = smp_processor_id(); unsigned long size; struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); struct etm_event_data *event_data = perf_get_aux(handle); struct list_head *path; if (event->hw.state == PERF_HES_STOPPED) return; if (!csdev) return; path = etm_event_cpu_path(event_data, cpu); if (!path) return; sink = coresight_get_sink(path); if (!sink) return; /* stop tracer */ source_ops(csdev)->disable(csdev, event); /* tell the core */ event->hw.state = PERF_HES_STOPPED; if (mode & PERF_EF_UPDATE) { if (WARN_ON_ONCE(handle->event != event)) return; /* update trace information */ if (!sink_ops(sink)->update_buffer) return; size = sink_ops(sink)->update_buffer(sink, handle, event_data->snk_config); perf_aux_output_end(handle, size); } /* Disabling the path make its elements available to other sessions */ coresight_disable_path(path); } static int etm_event_add(struct perf_event *event, int mode) { int ret = 0; struct hw_perf_event *hwc = &event->hw; if (mode & PERF_EF_START) { etm_event_start(event, 0); if (hwc->state & PERF_HES_STOPPED) ret = -EINVAL; } else { hwc->state = PERF_HES_STOPPED; } return ret; } static void etm_event_del(struct perf_event *event, int mode) { etm_event_stop(event, PERF_EF_UPDATE); } static int etm_addr_filters_validate(struct list_head *filters) { bool range = false, address = false; int index = 0; struct perf_addr_filter *filter; list_for_each_entry(filter, filters, entry) { /* * No need to go further if there's no more * room for filters. */ if (++index > ETM_ADDR_CMP_MAX) return -EOPNOTSUPP; /* filter::size==0 means single address trigger */ if (filter->size) { /* * The existing code relies on START/STOP filters * being address filters. */ if (filter->action == PERF_ADDR_FILTER_ACTION_START || filter->action == PERF_ADDR_FILTER_ACTION_STOP) return -EOPNOTSUPP; range = true; } else address = true; /* * At this time we don't allow range and start/stop filtering * to cohabitate, they have to be mutually exclusive. */ if (range && address) return -EOPNOTSUPP; } return 0; } static void etm_addr_filters_sync(struct perf_event *event) { struct perf_addr_filters_head *head = perf_event_addr_filters(event); unsigned long start, stop; struct perf_addr_filter_range *fr = event->addr_filter_ranges; struct etm_filters *filters = event->hw.addr_filters; struct etm_filter *etm_filter; struct perf_addr_filter *filter; int i = 0; list_for_each_entry(filter, &head->list, entry) { start = fr[i].start; stop = start + fr[i].size; etm_filter = &filters->etm_filter[i]; switch (filter->action) { case PERF_ADDR_FILTER_ACTION_FILTER: etm_filter->start_addr = start; etm_filter->stop_addr = stop; etm_filter->type = ETM_ADDR_TYPE_RANGE; break; case PERF_ADDR_FILTER_ACTION_START: etm_filter->start_addr = start; etm_filter->type = ETM_ADDR_TYPE_START; break; case PERF_ADDR_FILTER_ACTION_STOP: etm_filter->stop_addr = stop; etm_filter->type = ETM_ADDR_TYPE_STOP; break; } i++; } filters->nr_filters = i; } int etm_perf_symlink(struct coresight_device *csdev, bool link) { char entry[sizeof("cpu9999999")]; int ret = 0, cpu = source_ops(csdev)->cpu_id(csdev); struct device *pmu_dev = etm_pmu.dev; struct device *cs_dev = &csdev->dev; sprintf(entry, "cpu%d", cpu); if (!etm_perf_up) return -EPROBE_DEFER; if (link) { ret = sysfs_create_link(&pmu_dev->kobj, &cs_dev->kobj, entry); if (ret) return ret; per_cpu(csdev_src, cpu) = csdev; } else { sysfs_remove_link(&pmu_dev->kobj, entry); per_cpu(csdev_src, cpu) = NULL; } return 0; } static ssize_t etm_perf_sink_name_show(struct device *dev, struct device_attribute *dattr, char *buf) { struct dev_ext_attribute *ea; ea = container_of(dattr, struct dev_ext_attribute, attr); return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var)); } int etm_perf_add_symlink_sink(struct coresight_device *csdev) { int ret; unsigned long hash; const char *name; struct device *pmu_dev = etm_pmu.dev; struct device *dev = &csdev->dev; struct dev_ext_attribute *ea; if (csdev->type != CORESIGHT_DEV_TYPE_SINK && csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) return -EINVAL; if (csdev->ea != NULL) return -EINVAL; if (!etm_perf_up) return -EPROBE_DEFER; ea = devm_kzalloc(dev, sizeof(*ea), GFP_KERNEL); if (!ea) return -ENOMEM; name = dev_name(dev); /* See function coresight_get_sink_by_id() to know where this is used */ hash = hashlen_hash(hashlen_string(NULL, name)); ea->attr.attr.name = devm_kstrdup(dev, name, GFP_KERNEL); if (!ea->attr.attr.name) return -ENOMEM; ea->attr.attr.mode = 0444; ea->attr.show = etm_perf_sink_name_show; ea->var = (unsigned long *)hash; ret = sysfs_add_file_to_group(&pmu_dev->kobj, &ea->attr.attr, "sinks"); if (!ret) csdev->ea = ea; return ret; } void etm_perf_del_symlink_sink(struct coresight_device *csdev) { struct device *pmu_dev = etm_pmu.dev; struct dev_ext_attribute *ea = csdev->ea; if (csdev->type != CORESIGHT_DEV_TYPE_SINK && csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) return; if (!ea) return; sysfs_remove_file_from_group(&pmu_dev->kobj, &ea->attr.attr, "sinks"); csdev->ea = NULL; } static int __init etm_perf_init(void) { int ret; etm_pmu.capabilities = (PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE); etm_pmu.attr_groups = etm_pmu_attr_groups; etm_pmu.task_ctx_nr = perf_sw_context; etm_pmu.read = etm_event_read; etm_pmu.event_init = etm_event_init; etm_pmu.setup_aux = etm_setup_aux; etm_pmu.free_aux = etm_free_aux; etm_pmu.start = etm_event_start; etm_pmu.stop = etm_event_stop; etm_pmu.add = etm_event_add; etm_pmu.del = etm_event_del; etm_pmu.addr_filters_sync = etm_addr_filters_sync; etm_pmu.addr_filters_validate = etm_addr_filters_validate; etm_pmu.nr_addr_filters = ETM_ADDR_CMP_MAX; ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1); if (ret == 0) etm_perf_up = true; return ret; } device_initcall(etm_perf_init);