diff options
Diffstat (limited to 'tools')
267 files changed, 40006 insertions, 11390 deletions
diff --git a/tools/Makefile b/tools/Makefile new file mode 100644 index 000000000000..3ae43947a171 --- /dev/null +++ b/tools/Makefile @@ -0,0 +1,77 @@ +include scripts/Makefile.include + +help: + @echo 'Possible targets:' + @echo '' + @echo ' cpupower - a tool for all things x86 CPU power' + @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer' + @echo ' lguest - a minimal 32-bit x86 hypervisor' + @echo ' perf - Linux performance measurement and analysis tool' + @echo ' selftests - various kernel selftests' + @echo ' turbostat - Intel CPU idle stats and freq reporting tool' + @echo ' usb - USB testing tools' + @echo ' virtio - vhost test module' + @echo ' vm - misc vm tools' + @echo ' x86_energy_perf_policy - Intel energy policy tool' + @echo '' + @echo 'You can do:' + @echo ' $$ make -C tools/<tool>_install' + @echo '' + @echo ' from the kernel command line to build and install one of' + @echo ' the tools above' + @echo '' + @echo ' $$ make tools/install' + @echo '' + @echo ' installs all tools.' + @echo '' + @echo 'Cleaning targets:' + @echo '' + @echo ' all of the above with the "_clean" string appended cleans' + @echo ' the respective build directory.' + @echo ' clean: a summary clean target to clean _all_ folders' + +cpupower: FORCE + $(QUIET_SUBDIR0)power/$@/ $(QUIET_SUBDIR1) + +firewire lguest perf usb virtio vm: FORCE + $(QUIET_SUBDIR0)$@/ $(QUIET_SUBDIR1) + +selftests: FORCE + $(QUIET_SUBDIR0)testing/$@/ $(QUIET_SUBDIR1) + +turbostat x86_energy_perf_policy: FORCE + $(QUIET_SUBDIR0)power/x86/$@/ $(QUIET_SUBDIR1) + +cpupower_install: + $(QUIET_SUBDIR0)power/$(@:_install=)/ $(QUIET_SUBDIR1) install + +firewire_install lguest_install perf_install usb_install virtio_install vm_install: + $(QUIET_SUBDIR0)$(@:_install=)/ $(QUIET_SUBDIR1) install + +selftests_install: + $(QUIET_SUBDIR0)testing/$(@:_clean=)/ $(QUIET_SUBDIR1) install + +turbostat_install x86_energy_perf_policy_install: + $(QUIET_SUBDIR0)power/x86/$(@:_install=)/ $(QUIET_SUBDIR1) install + +install: cpupower_install firewire_install lguest_install perf_install \ + selftests_install turbostat_install usb_install virtio_install \ + vm_install x86_energy_perf_policy_install + +cpupower_clean: + $(QUIET_SUBDIR0)power/cpupower/ $(QUIET_SUBDIR1) clean + +firewire_clean lguest_clean perf_clean usb_clean virtio_clean vm_clean: + $(QUIET_SUBDIR0)$(@:_clean=)/ $(QUIET_SUBDIR1) clean + +selftests_clean: + $(QUIET_SUBDIR0)testing/$(@:_clean=)/ $(QUIET_SUBDIR1) clean + +turbostat_clean x86_energy_perf_policy_clean: + $(QUIET_SUBDIR0)power/x86/$(@:_clean=)/ $(QUIET_SUBDIR1) clean + +clean: cpupower_clean firewire_clean lguest_clean perf_clean selftests_clean \ + turbostat_clean usb_clean virtio_clean vm_clean \ + x86_energy_perf_policy_clean + +.PHONY: FORCE diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c new file mode 100644 index 000000000000..d9834b362943 --- /dev/null +++ b/tools/hv/hv_kvp_daemon.c @@ -0,0 +1,870 @@ +/* + * An implementation of key value pair (KVP) functionality for Linux. + * + * + * Copyright (C) 2010, Novell, Inc. + * Author : K. Y. Srinivasan <ksrinivasan@novell.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/poll.h> +#include <sys/utsname.h> +#include <linux/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <arpa/inet.h> +#include <linux/connector.h> +#include <linux/hyperv.h> +#include <linux/netlink.h> +#include <ifaddrs.h> +#include <netdb.h> +#include <syslog.h> +#include <sys/stat.h> +#include <fcntl.h> + +/* + * KVP protocol: The user mode component first registers with the + * the kernel component. Subsequently, the kernel component requests, data + * for the specified keys. In response to this message the user mode component + * fills in the value corresponding to the specified key. We overload the + * sequence field in the cn_msg header to define our KVP message types. + * + * We use this infrastructure for also supporting queries from user mode + * application for state that may be maintained in the KVP kernel component. + * + */ + + +enum key_index { + FullyQualifiedDomainName = 0, + IntegrationServicesVersion, /*This key is serviced in the kernel*/ + NetworkAddressIPv4, + NetworkAddressIPv6, + OSBuildNumber, + OSName, + OSMajorVersion, + OSMinorVersion, + OSVersion, + ProcessorArchitecture +}; + +static char kvp_send_buffer[4096]; +static char kvp_recv_buffer[4096]; +static struct sockaddr_nl addr; + +static char *os_name = ""; +static char *os_major = ""; +static char *os_minor = ""; +static char *processor_arch; +static char *os_build; +static char *lic_version; +static struct utsname uts_buf; + + +#define MAX_FILE_NAME 100 +#define ENTRIES_PER_BLOCK 50 + +struct kvp_record { + __u8 key[HV_KVP_EXCHANGE_MAX_KEY_SIZE]; + __u8 value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE]; +}; + +struct kvp_file_state { + int fd; + int num_blocks; + struct kvp_record *records; + int num_records; + __u8 fname[MAX_FILE_NAME]; +}; + +static struct kvp_file_state kvp_file_info[KVP_POOL_COUNT]; + +static void kvp_acquire_lock(int pool) +{ + struct flock fl = {F_WRLCK, SEEK_SET, 0, 0, 0}; + fl.l_pid = getpid(); + + if (fcntl(kvp_file_info[pool].fd, F_SETLKW, &fl) == -1) { + syslog(LOG_ERR, "Failed to acquire the lock pool: %d", pool); + exit(-1); + } +} + +static void kvp_release_lock(int pool) +{ + struct flock fl = {F_UNLCK, SEEK_SET, 0, 0, 0}; + fl.l_pid = getpid(); + + if (fcntl(kvp_file_info[pool].fd, F_SETLK, &fl) == -1) { + perror("fcntl"); + syslog(LOG_ERR, "Failed to release the lock pool: %d", pool); + exit(-1); + } +} + +static void kvp_update_file(int pool) +{ + FILE *filep; + size_t bytes_written; + + /* + * We are going to write our in-memory registry out to + * disk; acquire the lock first. + */ + kvp_acquire_lock(pool); + + filep = fopen(kvp_file_info[pool].fname, "w"); + if (!filep) { + kvp_release_lock(pool); + syslog(LOG_ERR, "Failed to open file, pool: %d", pool); + exit(-1); + } + + bytes_written = fwrite(kvp_file_info[pool].records, + sizeof(struct kvp_record), + kvp_file_info[pool].num_records, filep); + + fflush(filep); + kvp_release_lock(pool); +} + +static void kvp_update_mem_state(int pool) +{ + FILE *filep; + size_t records_read = 0; + struct kvp_record *record = kvp_file_info[pool].records; + struct kvp_record *readp; + int num_blocks = kvp_file_info[pool].num_blocks; + int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK; + + kvp_acquire_lock(pool); + + filep = fopen(kvp_file_info[pool].fname, "r"); + if (!filep) { + kvp_release_lock(pool); + syslog(LOG_ERR, "Failed to open file, pool: %d", pool); + exit(-1); + } + while (!feof(filep)) { + readp = &record[records_read]; + records_read += fread(readp, sizeof(struct kvp_record), + ENTRIES_PER_BLOCK * num_blocks, + filep); + + if (!feof(filep)) { + /* + * We have more data to read. + */ + num_blocks++; + record = realloc(record, alloc_unit * num_blocks); + + if (record == NULL) { + syslog(LOG_ERR, "malloc failed"); + exit(-1); + } + continue; + } + break; + } + + kvp_file_info[pool].num_blocks = num_blocks; + kvp_file_info[pool].records = record; + kvp_file_info[pool].num_records = records_read; + + kvp_release_lock(pool); +} +static int kvp_file_init(void) +{ + int ret, fd; + FILE *filep; + size_t records_read; + __u8 *fname; + struct kvp_record *record; + struct kvp_record *readp; + int num_blocks; + int i; + int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK; + + if (access("/var/opt/hyperv", F_OK)) { + if (mkdir("/var/opt/hyperv", S_IRUSR | S_IWUSR | S_IROTH)) { + syslog(LOG_ERR, " Failed to create /var/opt/hyperv"); + exit(-1); + } + } + + for (i = 0; i < KVP_POOL_COUNT; i++) { + fname = kvp_file_info[i].fname; + records_read = 0; + num_blocks = 1; + sprintf(fname, "/var/opt/hyperv/.kvp_pool_%d", i); + fd = open(fname, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IROTH); + + if (fd == -1) + return 1; + + + filep = fopen(fname, "r"); + if (!filep) + return 1; + + record = malloc(alloc_unit * num_blocks); + if (record == NULL) { + fclose(filep); + return 1; + } + while (!feof(filep)) { + readp = &record[records_read]; + records_read += fread(readp, sizeof(struct kvp_record), + ENTRIES_PER_BLOCK, + filep); + + if (!feof(filep)) { + /* + * We have more data to read. + */ + num_blocks++; + record = realloc(record, alloc_unit * + num_blocks); + if (record == NULL) { + fclose(filep); + return 1; + } + continue; + } + break; + } + kvp_file_info[i].fd = fd; + kvp_file_info[i].num_blocks = num_blocks; + kvp_file_info[i].records = record; + kvp_file_info[i].num_records = records_read; + fclose(filep); + + } + + return 0; +} + +static int kvp_key_delete(int pool, __u8 *key, int key_size) +{ + int i; + int j, k; + int num_records; + struct kvp_record *record; + + /* + * First update the in-memory state. + */ + kvp_update_mem_state(pool); + + num_records = kvp_file_info[pool].num_records; + record = kvp_file_info[pool].records; + + for (i = 0; i < num_records; i++) { + if (memcmp(key, record[i].key, key_size)) + continue; + /* + * Found a match; just move the remaining + * entries up. + */ + if (i == num_records) { + kvp_file_info[pool].num_records--; + kvp_update_file(pool); + return 0; + } + + j = i; + k = j + 1; + for (; k < num_records; k++) { + strcpy(record[j].key, record[k].key); + strcpy(record[j].value, record[k].value); + j++; + } + + kvp_file_info[pool].num_records--; + kvp_update_file(pool); + return 0; + } + return 1; +} + +static int kvp_key_add_or_modify(int pool, __u8 *key, int key_size, __u8 *value, + int value_size) +{ + int i; + int j, k; + int num_records; + struct kvp_record *record; + int num_blocks; + + if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) || + (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) + return 1; + + /* + * First update the in-memory state. + */ + kvp_update_mem_state(pool); + + num_records = kvp_file_info[pool].num_records; + record = kvp_file_info[pool].records; + num_blocks = kvp_file_info[pool].num_blocks; + + for (i = 0; i < num_records; i++) { + if (memcmp(key, record[i].key, key_size)) + continue; + /* + * Found a match; just update the value - + * this is the modify case. + */ + memcpy(record[i].value, value, value_size); + kvp_update_file(pool); + return 0; + } + + /* + * Need to add a new entry; + */ + if (num_records == (ENTRIES_PER_BLOCK * num_blocks)) { + /* Need to allocate a larger array for reg entries. */ + record = realloc(record, sizeof(struct kvp_record) * + ENTRIES_PER_BLOCK * (num_blocks + 1)); + + if (record == NULL) + return 1; + kvp_file_info[pool].num_blocks++; + + } + memcpy(record[i].value, value, value_size); + memcpy(record[i].key, key, key_size); + kvp_file_info[pool].records = record; + kvp_file_info[pool].num_records++; + kvp_update_file(pool); + return 0; +} + +static int kvp_get_value(int pool, __u8 *key, int key_size, __u8 *value, + int value_size) +{ + int i; + int num_records; + struct kvp_record *record; + + if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) || + (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) + return 1; + + /* + * First update the in-memory state. + */ + kvp_update_mem_state(pool); + + num_records = kvp_file_info[pool].num_records; + record = kvp_file_info[pool].records; + + for (i = 0; i < num_records; i++) { + if (memcmp(key, record[i].key, key_size)) + continue; + /* + * Found a match; just copy the value out. + */ + memcpy(value, record[i].value, value_size); + return 0; + } + + return 1; +} + +static void kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size, + __u8 *value, int value_size) +{ + struct kvp_record *record; + + /* + * First update our in-memory database. + */ + kvp_update_mem_state(pool); + record = kvp_file_info[pool].records; + + if (index >= kvp_file_info[pool].num_records) { + /* + * This is an invalid index; terminate enumeration; + * - a NULL value will do the trick. + */ + strcpy(value, ""); + return; + } + + memcpy(key, record[index].key, key_size); + memcpy(value, record[index].value, value_size); +} + + +void kvp_get_os_info(void) +{ + FILE *file; + char *p, buf[512]; + + uname(&uts_buf); + os_build = uts_buf.release; + processor_arch = uts_buf.machine; + + /* + * The current windows host (win7) expects the build + * string to be of the form: x.y.z + * Strip additional information we may have. + */ + p = strchr(os_build, '-'); + if (p) + *p = '\0'; + + file = fopen("/etc/SuSE-release", "r"); + if (file != NULL) + goto kvp_osinfo_found; + file = fopen("/etc/redhat-release", "r"); + if (file != NULL) + goto kvp_osinfo_found; + /* + * Add code for other supported platforms. + */ + + /* + * We don't have information about the os. + */ + os_name = uts_buf.sysname; + return; + +kvp_osinfo_found: + /* up to three lines */ + p = fgets(buf, sizeof(buf), file); + if (p) { + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + p = strdup(buf); + if (!p) + goto done; + os_name = p; + + /* second line */ + p = fgets(buf, sizeof(buf), file); + if (p) { + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + p = strdup(buf); + if (!p) + goto done; + os_major = p; + + /* third line */ + p = fgets(buf, sizeof(buf), file); + if (p) { + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + p = strdup(buf); + if (p) + os_minor = p; + } + } + } + +done: + fclose(file); + return; +} + +static int +kvp_get_ip_address(int family, char *buffer, int length) +{ + struct ifaddrs *ifap; + struct ifaddrs *curp; + int ipv4_len = strlen("255.255.255.255") + 1; + int ipv6_len = strlen("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff")+1; + int offset = 0; + const char *str; + char tmp[50]; + int error = 0; + + /* + * On entry into this function, the buffer is capable of holding the + * maximum key value (2048 bytes). + */ + + if (getifaddrs(&ifap)) { + strcpy(buffer, "getifaddrs failed\n"); + return 1; + } + + curp = ifap; + while (curp != NULL) { + if ((curp->ifa_addr != NULL) && + (curp->ifa_addr->sa_family == family)) { + if (family == AF_INET) { + struct sockaddr_in *addr = + (struct sockaddr_in *) curp->ifa_addr; + + str = inet_ntop(family, &addr->sin_addr, + tmp, 50); + if (str == NULL) { + strcpy(buffer, "inet_ntop failed\n"); + error = 1; + goto getaddr_done; + } + if (offset == 0) + strcpy(buffer, tmp); + else + strcat(buffer, tmp); + strcat(buffer, ";"); + + offset += strlen(str) + 1; + if ((length - offset) < (ipv4_len + 1)) + goto getaddr_done; + + } else { + + /* + * We only support AF_INET and AF_INET6 + * and the list of addresses is separated by a ";". + */ + struct sockaddr_in6 *addr = + (struct sockaddr_in6 *) curp->ifa_addr; + + str = inet_ntop(family, + &addr->sin6_addr.s6_addr, + tmp, 50); + if (str == NULL) { + strcpy(buffer, "inet_ntop failed\n"); + error = 1; + goto getaddr_done; + } + if (offset == 0) + strcpy(buffer, tmp); + else + strcat(buffer, tmp); + strcat(buffer, ";"); + offset += strlen(str) + 1; + if ((length - offset) < (ipv6_len + 1)) + goto getaddr_done; + + } + + } + curp = curp->ifa_next; + } + +getaddr_done: + freeifaddrs(ifap); + return error; +} + + +static int +kvp_get_domain_name(char *buffer, int length) +{ + struct addrinfo hints, *info ; + int error = 0; + + gethostname(buffer, length); + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; /*Get only ipv4 addrinfo. */ + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_CANONNAME; + + error = getaddrinfo(buffer, NULL, &hints, &info); + if (error != 0) { + strcpy(buffer, "getaddrinfo failed\n"); + return error; + } + strcpy(buffer, info->ai_canonname); + freeaddrinfo(info); + return error; +} + +static int +netlink_send(int fd, struct cn_msg *msg) +{ + struct nlmsghdr *nlh; + unsigned int size; + struct msghdr message; + char buffer[64]; + struct iovec iov[2]; + + size = NLMSG_SPACE(sizeof(struct cn_msg) + msg->len); + + nlh = (struct nlmsghdr *)buffer; + nlh->nlmsg_seq = 0; + nlh->nlmsg_pid = getpid(); + nlh->nlmsg_type = NLMSG_DONE; + nlh->nlmsg_len = NLMSG_LENGTH(size - sizeof(*nlh)); + nlh->nlmsg_flags = 0; + + iov[0].iov_base = nlh; + iov[0].iov_len = sizeof(*nlh); + + iov[1].iov_base = msg; + iov[1].iov_len = size; + + memset(&message, 0, sizeof(message)); + message.msg_name = &addr; + message.msg_namelen = sizeof(addr); + message.msg_iov = iov; + message.msg_iovlen = 2; + + return sendmsg(fd, &message, 0); +} + +int main(void) +{ + int fd, len, sock_opt; + int error; + struct cn_msg *message; + struct pollfd pfd; + struct nlmsghdr *incoming_msg; + struct cn_msg *incoming_cn_msg; + struct hv_kvp_msg *hv_msg; + char *p; + char *key_value; + char *key_name; + + daemon(1, 0); + openlog("KVP", 0, LOG_USER); + syslog(LOG_INFO, "KVP starting; pid is:%d", getpid()); + /* + * Retrieve OS release information. + */ + kvp_get_os_info(); + + if (kvp_file_init()) { + syslog(LOG_ERR, "Failed to initialize the pools"); + exit(-1); + } + + fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); + if (fd < 0) { + syslog(LOG_ERR, "netlink socket creation failed; error:%d", fd); + exit(-1); + } + addr.nl_family = AF_NETLINK; + addr.nl_pad = 0; + addr.nl_pid = 0; + addr.nl_groups = CN_KVP_IDX; + + + error = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); + if (error < 0) { + syslog(LOG_ERR, "bind failed; error:%d", error); + close(fd); + exit(-1); + } + sock_opt = addr.nl_groups; + setsockopt(fd, 270, 1, &sock_opt, sizeof(sock_opt)); + /* + * Register ourselves with the kernel. + */ + message = (struct cn_msg *)kvp_send_buffer; + message->id.idx = CN_KVP_IDX; + message->id.val = CN_KVP_VAL; + + hv_msg = (struct hv_kvp_msg *)message->data; + hv_msg->kvp_hdr.operation = KVP_OP_REGISTER; + message->ack = 0; + message->len = sizeof(struct hv_kvp_msg); + + len = netlink_send(fd, message); + if (len < 0) { + syslog(LOG_ERR, "netlink_send failed; error:%d", len); + close(fd); + exit(-1); + } + + pfd.fd = fd; + + while (1) { + struct sockaddr *addr_p = (struct sockaddr *) &addr; + socklen_t addr_l = sizeof(addr); + pfd.events = POLLIN; + pfd.revents = 0; + poll(&pfd, 1, -1); + + len = recvfrom(fd, kvp_recv_buffer, sizeof(kvp_recv_buffer), 0, + addr_p, &addr_l); + + if (len < 0 || addr.nl_pid) { + syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s", + addr.nl_pid, errno, strerror(errno)); + close(fd); + return -1; + } + + incoming_msg = (struct nlmsghdr *)kvp_recv_buffer; + incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg); + hv_msg = (struct hv_kvp_msg *)incoming_cn_msg->data; + + switch (hv_msg->kvp_hdr.operation) { + case KVP_OP_REGISTER: + /* + * Driver is registering with us; stash away the version + * information. + */ + p = (char *)hv_msg->body.kvp_register.version; + lic_version = malloc(strlen(p) + 1); + if (lic_version) { + strcpy(lic_version, p); + syslog(LOG_INFO, "KVP LIC Version: %s", + lic_version); + } else { + syslog(LOG_ERR, "malloc failed"); + } + continue; + + /* + * The current protocol with the kernel component uses a + * NULL key name to pass an error condition. + * For the SET, GET and DELETE operations, + * use the existing protocol to pass back error. + */ + + case KVP_OP_SET: + if (kvp_key_add_or_modify(hv_msg->kvp_hdr.pool, + hv_msg->body.kvp_set.data.key, + hv_msg->body.kvp_set.data.key_size, + hv_msg->body.kvp_set.data.value, + hv_msg->body.kvp_set.data.value_size)) + strcpy(hv_msg->body.kvp_set.data.key, ""); + break; + + case KVP_OP_GET: + if (kvp_get_value(hv_msg->kvp_hdr.pool, + hv_msg->body.kvp_set.data.key, + hv_msg->body.kvp_set.data.key_size, + hv_msg->body.kvp_set.data.value, + hv_msg->body.kvp_set.data.value_size)) + strcpy(hv_msg->body.kvp_set.data.key, ""); + break; + + case KVP_OP_DELETE: + if (kvp_key_delete(hv_msg->kvp_hdr.pool, + hv_msg->body.kvp_delete.key, + hv_msg->body.kvp_delete.key_size)) + strcpy(hv_msg->body.kvp_delete.key, ""); + break; + + default: + break; + } + + if (hv_msg->kvp_hdr.operation != KVP_OP_ENUMERATE) + goto kvp_done; + + /* + * If the pool is KVP_POOL_AUTO, dynamically generate + * both the key and the value; if not read from the + * appropriate pool. + */ + if (hv_msg->kvp_hdr.pool != KVP_POOL_AUTO) { + kvp_pool_enumerate(hv_msg->kvp_hdr.pool, + hv_msg->body.kvp_enum_data.index, + hv_msg->body.kvp_enum_data.data.key, + HV_KVP_EXCHANGE_MAX_KEY_SIZE, + hv_msg->body.kvp_enum_data.data.value, + HV_KVP_EXCHANGE_MAX_VALUE_SIZE); + goto kvp_done; + } + + hv_msg = (struct hv_kvp_msg *)incoming_cn_msg->data; + key_name = (char *)hv_msg->body.kvp_enum_data.data.key; + key_value = (char *)hv_msg->body.kvp_enum_data.data.value; + + switch (hv_msg->body.kvp_enum_data.index) { + case FullyQualifiedDomainName: + kvp_get_domain_name(key_value, + HV_KVP_EXCHANGE_MAX_VALUE_SIZE); + strcpy(key_name, "FullyQualifiedDomainName"); + break; + case IntegrationServicesVersion: + strcpy(key_name, "IntegrationServicesVersion"); + strcpy(key_value, lic_version); + break; + case NetworkAddressIPv4: + kvp_get_ip_address(AF_INET, key_value, + HV_KVP_EXCHANGE_MAX_VALUE_SIZE); + strcpy(key_name, "NetworkAddressIPv4"); + break; + case NetworkAddressIPv6: + kvp_get_ip_address(AF_INET6, key_value, + HV_KVP_EXCHANGE_MAX_VALUE_SIZE); + strcpy(key_name, "NetworkAddressIPv6"); + break; + case OSBuildNumber: + strcpy(key_value, os_build); + strcpy(key_name, "OSBuildNumber"); + break; + case OSName: + strcpy(key_value, os_name); + strcpy(key_name, "OSName"); + break; + case OSMajorVersion: + strcpy(key_value, os_major); + strcpy(key_name, "OSMajorVersion"); + break; + case OSMinorVersion: + strcpy(key_value, os_minor); + strcpy(key_name, "OSMinorVersion"); + break; + case OSVersion: + strcpy(key_value, os_build); + strcpy(key_name, "OSVersion"); + break; + case ProcessorArchitecture: + strcpy(key_value, processor_arch); + strcpy(key_name, "ProcessorArchitecture"); + break; + default: + strcpy(key_value, "Unknown Key"); + /* + * We use a null key name to terminate enumeration. + */ + strcpy(key_name, ""); + break; + } + /* + * Send the value back to the kernel. The response is + * already in the receive buffer. Update the cn_msg header to + * reflect the key value that has been added to the message + */ +kvp_done: + + incoming_cn_msg->id.idx = CN_KVP_IDX; + incoming_cn_msg->id.val = CN_KVP_VAL; + incoming_cn_msg->ack = 0; + incoming_cn_msg->len = sizeof(struct hv_kvp_msg); + + len = netlink_send(fd, incoming_cn_msg); + if (len < 0) { + syslog(LOG_ERR, "net_link send failed; error:%d", len); + exit(-1); + } + } + +} diff --git a/tools/include/tools/be_byteshift.h b/tools/include/tools/be_byteshift.h new file mode 100644 index 000000000000..f4912e2668ba --- /dev/null +++ b/tools/include/tools/be_byteshift.h @@ -0,0 +1,70 @@ +#ifndef _TOOLS_BE_BYTESHIFT_H +#define _TOOLS_BE_BYTESHIFT_H + +#include <linux/types.h> + +static inline __u16 __get_unaligned_be16(const __u8 *p) +{ + return p[0] << 8 | p[1]; +} + +static inline __u32 __get_unaligned_be32(const __u8 *p) +{ + return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]; +} + +static inline __u64 __get_unaligned_be64(const __u8 *p) +{ + return (__u64)__get_unaligned_be32(p) << 32 | + __get_unaligned_be32(p + 4); +} + +static inline void __put_unaligned_be16(__u16 val, __u8 *p) +{ + *p++ = val >> 8; + *p++ = val; +} + +static inline void __put_unaligned_be32(__u32 val, __u8 *p) +{ + __put_unaligned_be16(val >> 16, p); + __put_unaligned_be16(val, p + 2); +} + +static inline void __put_unaligned_be64(__u64 val, __u8 *p) +{ + __put_unaligned_be32(val >> 32, p); + __put_unaligned_be32(val, p + 4); +} + +static inline __u16 get_unaligned_be16(const void *p) +{ + return __get_unaligned_be16((const __u8 *)p); +} + +static inline __u32 get_unaligned_be32(const void *p) +{ + return __get_unaligned_be32((const __u8 *)p); +} + +static inline __u64 get_unaligned_be64(const void *p) +{ + return __get_unaligned_be64((const __u8 *)p); +} + +static inline void put_unaligned_be16(__u16 val, void *p) +{ + __put_unaligned_be16(val, p); +} + +static inline void put_unaligned_be32(__u32 val, void *p) +{ + __put_unaligned_be32(val, p); +} + +static inline void put_unaligned_be64(__u64 val, void *p) +{ + __put_unaligned_be64(val, p); +} + +#endif /* _TOOLS_BE_BYTESHIFT_H */ diff --git a/tools/include/tools/le_byteshift.h b/tools/include/tools/le_byteshift.h new file mode 100644 index 000000000000..c99d45a68bda --- /dev/null +++ b/tools/include/tools/le_byteshift.h @@ -0,0 +1,70 @@ +#ifndef _TOOLS_LE_BYTESHIFT_H +#define _TOOLS_LE_BYTESHIFT_H + +#include <linux/types.h> + +static inline __u16 __get_unaligned_le16(const __u8 *p) +{ + return p[0] | p[1] << 8; +} + +static inline __u32 __get_unaligned_le32(const __u8 *p) +{ + return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; +} + +static inline __u64 __get_unaligned_le64(const __u8 *p) +{ + return (__u64)__get_unaligned_le32(p + 4) << 32 | + __get_unaligned_le32(p); +} + +static inline void __put_unaligned_le16(__u16 val, __u8 *p) +{ + *p++ = val; + *p++ = val >> 8; +} + +static inline void __put_unaligned_le32(__u32 val, __u8 *p) +{ + __put_unaligned_le16(val >> 16, p + 2); + __put_unaligned_le16(val, p); +} + +static inline void __put_unaligned_le64(__u64 val, __u8 *p) +{ + __put_unaligned_le32(val >> 32, p + 4); + __put_unaligned_le32(val, p); +} + +static inline __u16 get_unaligned_le16(const void *p) +{ + return __get_unaligned_le16((const __u8 *)p); +} + +static inline __u32 get_unaligned_le32(const void *p) +{ + return __get_unaligned_le32((const __u8 *)p); +} + +static inline __u64 get_unaligned_le64(const void *p) +{ + return __get_unaligned_le64((const __u8 *)p); +} + +static inline void put_unaligned_le16(__u16 val, void *p) +{ + __put_unaligned_le16(val, p); +} + +static inline void put_unaligned_le32(__u32 val, void *p) +{ + __put_unaligned_le32(val, p); +} + +static inline void put_unaligned_le64(__u64 val, void *p) +{ + __put_unaligned_le64(val, p); +} + +#endif /* _TOOLS_LE_BYTESHIFT_H */ diff --git a/tools/lguest/.gitignore b/tools/lguest/.gitignore new file mode 100644 index 000000000000..115587fd5f65 --- /dev/null +++ b/tools/lguest/.gitignore @@ -0,0 +1 @@ +lguest diff --git a/tools/lguest/Makefile b/tools/lguest/Makefile new file mode 100644 index 000000000000..0ac34206f7a7 --- /dev/null +++ b/tools/lguest/Makefile @@ -0,0 +1,8 @@ +# This creates the demonstration utility "lguest" which runs a Linux guest. +# Missing headers? Add "-I../../../include -I../../../arch/x86/include" +CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE + +all: lguest + +clean: + rm -f lguest diff --git a/tools/lguest/extract b/tools/lguest/extract new file mode 100644 index 000000000000..7730bb6e4b94 --- /dev/null +++ b/tools/lguest/extract @@ -0,0 +1,58 @@ +#! /bin/sh + +set -e + +PREFIX=$1 +shift + +trap 'rm -r $TMPDIR' 0 +TMPDIR=`mktemp -d` + +exec 3>/dev/null +for f; do + while IFS=" +" read -r LINE; do + case "$LINE" in + *$PREFIX:[0-9]*:\**) + NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"` + if [ -f $TMPDIR/$NUM ]; then + echo "$TMPDIR/$NUM already exits prior to $f" + exit 1 + fi + exec 3>>$TMPDIR/$NUM + echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM + /bin/echo "$LINE" | sed -e "s/$PREFIX:[0-9]*//" -e "s/:\*/*/" >&3 + ;; + *$PREFIX:[0-9]*) + NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"` + if [ -f $TMPDIR/$NUM ]; then + echo "$TMPDIR/$NUM already exits prior to $f" + exit 1 + fi + exec 3>>$TMPDIR/$NUM + echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM + /bin/echo "$LINE" | sed "s/$PREFIX:[0-9]*//" >&3 + ;; + *:\**) + /bin/echo "$LINE" | sed -e "s/:\*/*/" -e "s,/\*\*/,," >&3 + echo >&3 + exec 3>/dev/null + ;; + *) + /bin/echo "$LINE" >&3 + ;; + esac + done < $f + echo >&3 + exec 3>/dev/null +done + +LASTFILE="" +for f in $TMPDIR/*; do + if [ "$LASTFILE" != $(cat $TMPDIR/.$(basename $f) ) ]; then + LASTFILE=$(cat $TMPDIR/.$(basename $f) ) + echo "[ $LASTFILE ]" + fi + cat $f +done + diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c new file mode 100644 index 000000000000..f759f4f097c7 --- /dev/null +++ b/tools/lguest/lguest.c @@ -0,0 +1,2065 @@ +/*P:100 + * This is the Launcher code, a simple program which lays out the "physical" + * memory for the new Guest by mapping the kernel image and the virtual + * devices, then opens /dev/lguest to tell the kernel about the Guest and + * control it. +:*/ +#define _LARGEFILE64_SOURCE +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <err.h> +#include <stdint.h> +#include <stdlib.h> +#include <elf.h> +#include <sys/mman.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/eventfd.h> +#include <fcntl.h> +#include <stdbool.h> +#include <errno.h> +#include <ctype.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <time.h> +#include <netinet/in.h> +#include <net/if.h> +#include <linux/sockios.h> +#include <linux/if_tun.h> +#include <sys/uio.h> +#include <termios.h> +#include <getopt.h> +#include <assert.h> +#include <sched.h> +#include <limits.h> +#include <stddef.h> +#include <signal.h> +#include <pwd.h> +#include <grp.h> + +#include <linux/virtio_config.h> +#include <linux/virtio_net.h> +#include <linux/virtio_blk.h> +#include <linux/virtio_console.h> +#include <linux/virtio_rng.h> +#include <linux/virtio_ring.h> +#include <asm/bootparam.h> +#include "../../include/linux/lguest_launcher.h" +/*L:110 + * We can ignore the 43 include files we need for this program, but I do want + * to draw attention to the use of kernel-style types. + * + * As Linus said, "C is a Spartan language, and so should your naming be." I + * like these abbreviations, so we define them here. Note that u64 is always + * unsigned long long, which works on all Linux systems: this means that we can + * use %llu in printf for any u64. + */ +typedef unsigned long long u64; +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint8_t u8; +/*:*/ + +#define BRIDGE_PFX "bridge:" +#ifndef SIOCBRADDIF +#define SIOCBRADDIF 0x89a2 /* add interface to bridge */ +#endif +/* We can have up to 256 pages for devices. */ +#define DEVICE_PAGES 256 +/* This will occupy 3 pages: it must be a power of 2. */ +#define VIRTQUEUE_NUM 256 + +/*L:120 + * verbose is both a global flag and a macro. The C preprocessor allows + * this, and although I wouldn't recommend it, it works quite nicely here. + */ +static bool verbose; +#define verbose(args...) \ + do { if (verbose) printf(args); } while(0) +/*:*/ + +/* The pointer to the start of guest memory. */ +static void *guest_base; +/* The maximum guest physical address allowed, and maximum possible. */ +static unsigned long guest_limit, guest_max; +/* The /dev/lguest file descriptor. */ +static int lguest_fd; + +/* a per-cpu variable indicating whose vcpu is currently running */ +static unsigned int __thread cpu_id; + +/* This is our list of devices. */ +struct device_list { + /* Counter to assign interrupt numbers. */ + unsigned int next_irq; + + /* Counter to print out convenient device numbers. */ + unsigned int device_num; + + /* The descriptor page for the devices. */ + u8 *descpage; + + /* A single linked list of devices. */ + struct device *dev; + /* And a pointer to the last device for easy append. */ + struct device *lastdev; +}; + +/* The list of Guest devices, based on command line arguments. */ +static struct device_list devices; + +/* The device structure describes a single device. */ +struct device { + /* The linked-list pointer. */ + struct device *next; + + /* The device's descriptor, as mapped into the Guest. */ + struct lguest_device_desc *desc; + + /* We can't trust desc values once Guest has booted: we use these. */ + unsigned int feature_len; + unsigned int num_vq; + + /* The name of this device, for --verbose. */ + const char *name; + + /* Any queues attached to this device */ + struct virtqueue *vq; + + /* Is it operational */ + bool running; + + /* Device-specific data. */ + void *priv; +}; + +/* The virtqueue structure describes a queue attached to a device. */ +struct virtqueue { + struct virtqueue *next; + + /* Which device owns me. */ + struct device *dev; + + /* The configuration for this queue. */ + struct lguest_vqconfig config; + + /* The actual ring of buffers. */ + struct vring vring; + + /* Last available index we saw. */ + u16 last_avail_idx; + + /* How many are used since we sent last irq? */ + unsigned int pending_used; + + /* Eventfd where Guest notifications arrive. */ + int eventfd; + + /* Function for the thread which is servicing this virtqueue. */ + void (*service)(struct virtqueue *vq); + pid_t thread; +}; + +/* Remember the arguments to the program so we can "reboot" */ +static char **main_args; + +/* The original tty settings to restore on exit. */ +static struct termios orig_term; + +/* + * We have to be careful with barriers: our devices are all run in separate + * threads and so we need to make sure that changes visible to the Guest happen + * in precise order. + */ +#define wmb() __asm__ __volatile__("" : : : "memory") +#define mb() __asm__ __volatile__("" : : : "memory") + +/* + * Convert an iovec element to the given type. + * + * This is a fairly ugly trick: we need to know the size of the type and + * alignment requirement to check the pointer is kosher. It's also nice to + * have the name of the type in case we report failure. + * + * Typing those three things all the time is cumbersome and error prone, so we + * have a macro which sets them all up and passes to the real function. + */ +#define convert(iov, type) \ + ((type *)_convert((iov), sizeof(type), __alignof__(type), #type)) + +static void *_convert(struct iovec *iov, size_t size, size_t align, + const char *name) +{ + if (iov->iov_len != size) + errx(1, "Bad iovec size %zu for %s", iov->iov_len, name); + if ((unsigned long)iov->iov_base % align != 0) + errx(1, "Bad alignment %p for %s", iov->iov_base, name); + return iov->iov_base; +} + +/* Wrapper for the last available index. Makes it easier to change. */ +#define lg_last_avail(vq) ((vq)->last_avail_idx) + +/* + * The virtio configuration space is defined to be little-endian. x86 is + * little-endian too, but it's nice to be explicit so we have these helpers. + */ +#define cpu_to_le16(v16) (v16) +#define cpu_to_le32(v32) (v32) +#define cpu_to_le64(v64) (v64) +#define le16_to_cpu(v16) (v16) +#define le32_to_cpu(v32) (v32) +#define le64_to_cpu(v64) (v64) + +/* Is this iovec empty? */ +static bool iov_empty(const struct iovec iov[], unsigned int num_iov) +{ + unsigned int i; + + for (i = 0; i < num_iov; i++) + if (iov[i].iov_len) + return false; + return true; +} + +/* Take len bytes from the front of this iovec. */ +static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len) +{ + unsigned int i; + + for (i = 0; i < num_iov; i++) { + unsigned int used; + + used = iov[i].iov_len < len ? iov[i].iov_len : len; + iov[i].iov_base += used; + iov[i].iov_len -= used; + len -= used; + } + assert(len == 0); +} + +/* The device virtqueue descriptors are followed by feature bitmasks. */ +static u8 *get_feature_bits(struct device *dev) +{ + return (u8 *)(dev->desc + 1) + + dev->num_vq * sizeof(struct lguest_vqconfig); +} + +/*L:100 + * The Launcher code itself takes us out into userspace, that scary place where + * pointers run wild and free! Unfortunately, like most userspace programs, + * it's quite boring (which is why everyone likes to hack on the kernel!). + * Perhaps if you make up an Lguest Drinking Game at this point, it will get + * you through this section. Or, maybe not. + * + * The Launcher sets up a big chunk of memory to be the Guest's "physical" + * memory and stores it in "guest_base". In other words, Guest physical == + * Launcher virtual with an offset. + * + * This can be tough to get your head around, but usually it just means that we + * use these trivial conversion functions when the Guest gives us its + * "physical" addresses: + */ +static void *from_guest_phys(unsigned long addr) +{ + return guest_base + addr; +} + +static unsigned long to_guest_phys(const void *addr) +{ + return (addr - guest_base); +} + +/*L:130 + * Loading the Kernel. + * + * We start with couple of simple helper routines. open_or_die() avoids + * error-checking code cluttering the callers: + */ +static int open_or_die(const char *name, int flags) +{ + int fd = open(name, flags); + if (fd < 0) + err(1, "Failed to open %s", name); + return fd; +} + +/* map_zeroed_pages() takes a number of pages. */ +static void *map_zeroed_pages(unsigned int num) +{ + int fd = open_or_die("/dev/zero", O_RDONLY); + void *addr; + + /* + * We use a private mapping (ie. if we write to the page, it will be + * copied). We allocate an extra two pages PROT_NONE to act as guard + * pages against read/write attempts that exceed allocated space. + */ + addr = mmap(NULL, getpagesize() * (num+2), + PROT_NONE, MAP_PRIVATE, fd, 0); + + if (addr == MAP_FAILED) + err(1, "Mmapping %u pages of /dev/zero", num); + + if (mprotect(addr + getpagesize(), getpagesize() * num, + PROT_READ|PROT_WRITE) == -1) + err(1, "mprotect rw %u pages failed", num); + + /* + * One neat mmap feature is that you can close the fd, and it + * stays mapped. + */ + close(fd); + + /* Return address after PROT_NONE page */ + return addr + getpagesize(); +} + +/* Get some more pages for a device. */ +static void *get_pages(unsigned int num) +{ + void *addr = from_guest_phys(guest_limit); + + guest_limit += num * getpagesize(); + if (guest_limit > guest_max) + errx(1, "Not enough memory for devices"); + return addr; +} + +/* + * This routine is used to load the kernel or initrd. It tries mmap, but if + * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries), + * it falls back to reading the memory in. + */ +static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) +{ + ssize_t r; + + /* + * We map writable even though for some segments are marked read-only. + * The kernel really wants to be writable: it patches its own + * instructions. + * + * MAP_PRIVATE means that the page won't be copied until a write is + * done to it. This allows us to share untouched memory between + * Guests. + */ + if (mmap(addr, len, PROT_READ|PROT_WRITE, + MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED) + return; + + /* pread does a seek and a read in one shot: saves a few lines. */ + r = pread(fd, addr, len, offset); + if (r != len) + err(1, "Reading offset %lu len %lu gave %zi", offset, len, r); +} + +/* + * This routine takes an open vmlinux image, which is in ELF, and maps it into + * the Guest memory. ELF = Embedded Linking Format, which is the format used + * by all modern binaries on Linux including the kernel. + * + * The ELF headers give *two* addresses: a physical address, and a virtual + * address. We use the physical address; the Guest will map itself to the + * virtual address. + * + * We return the starting address. + */ +static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) +{ + Elf32_Phdr phdr[ehdr->e_phnum]; + unsigned int i; + + /* + * Sanity checks on the main ELF header: an x86 executable with a + * reasonable number of correctly-sized program headers. + */ + if (ehdr->e_type != ET_EXEC + || ehdr->e_machine != EM_386 + || ehdr->e_phentsize != sizeof(Elf32_Phdr) + || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr)) + errx(1, "Malformed elf header"); + + /* + * An ELF executable contains an ELF header and a number of "program" + * headers which indicate which parts ("segments") of the program to + * load where. + */ + + /* We read in all the program headers at once: */ + if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0) + err(1, "Seeking to program headers"); + if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) + err(1, "Reading program headers"); + + /* + * Try all the headers: there are usually only three. A read-only one, + * a read-write one, and a "note" section which we don't load. + */ + for (i = 0; i < ehdr->e_phnum; i++) { + /* If this isn't a loadable segment, we ignore it */ + if (phdr[i].p_type != PT_LOAD) + continue; + + verbose("Section %i: size %i addr %p\n", + i, phdr[i].p_memsz, (void *)phdr[i].p_paddr); + + /* We map this section of the file at its physical address. */ + map_at(elf_fd, from_guest_phys(phdr[i].p_paddr), + phdr[i].p_offset, phdr[i].p_filesz); + } + + /* The entry point is given in the ELF header. */ + return ehdr->e_entry; +} + +/*L:150 + * A bzImage, unlike an ELF file, is not meant to be loaded. You're supposed + * to jump into it and it will unpack itself. We used to have to perform some + * hairy magic because the unpacking code scared me. + * + * Fortunately, Jeremy Fitzhardinge convinced me it wasn't that hard and wrote + * a small patch to jump over the tricky bits in the Guest, so now we just read + * the funky header so we know where in the file to load, and away we go! + */ +static unsigned long load_bzimage(int fd) +{ + struct boot_params boot; + int r; + /* Modern bzImages get loaded at 1M. */ + void *p = from_guest_phys(0x100000); + + /* + * Go back to the start of the file and read the header. It should be + * a Linux boot header (see Documentation/x86/boot.txt) + */ + lseek(fd, 0, SEEK_SET); + read(fd, &boot, sizeof(boot)); + + /* Inside the setup_hdr, we expect the magic "HdrS" */ + if (memcmp(&boot.hdr.header, "HdrS", 4) != 0) + errx(1, "This doesn't look like a bzImage to me"); + + /* Skip over the extra sectors of the header. */ + lseek(fd, (boot.hdr.setup_sects+1) * 512, SEEK_SET); + + /* Now read everything into memory. in nice big chunks. */ + while ((r = read(fd, p, 65536)) > 0) + p += r; + + /* Finally, code32_start tells us where to enter the kernel. */ + return boot.hdr.code32_start; +} + +/*L:140 + * Loading the kernel is easy when it's a "vmlinux", but most kernels + * come wrapped up in the self-decompressing "bzImage" format. With a little + * work, we can load those, too. + */ +static unsigned long load_kernel(int fd) +{ + Elf32_Ehdr hdr; + + /* Read in the first few bytes. */ + if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) + err(1, "Reading kernel"); + + /* If it's an ELF file, it starts with "\177ELF" */ + if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) + return map_elf(fd, &hdr); + + /* Otherwise we assume it's a bzImage, and try to load it. */ + return load_bzimage(fd); +} + +/* + * This is a trivial little helper to align pages. Andi Kleen hated it because + * it calls getpagesize() twice: "it's dumb code." + * + * Kernel guys get really het up about optimization, even when it's not + * necessary. I leave this code as a reaction against that. + */ +static inline unsigned long page_align(unsigned long addr) +{ + /* Add upwards and truncate downwards. */ + return ((addr + getpagesize()-1) & ~(getpagesize()-1)); +} + +/*L:180 + * An "initial ram disk" is a disk image loaded into memory along with the + * kernel which the kernel can use to boot from without needing any drivers. + * Most distributions now use this as standard: the initrd contains the code to + * load the appropriate driver modules for the current machine. + * + * Importantly, James Morris works for RedHat, and Fedora uses initrds for its + * kernels. He sent me this (and tells me when I break it). + */ +static unsigned long load_initrd(const char *name, unsigned long mem) +{ + int ifd; + struct stat st; + unsigned long len; + + ifd = open_or_die(name, O_RDONLY); + /* fstat() is needed to get the file size. */ + if (fstat(ifd, &st) < 0) + err(1, "fstat() on initrd '%s'", name); + + /* + * We map the initrd at the top of memory, but mmap wants it to be + * page-aligned, so we round the size up for that. + */ + len = page_align(st.st_size); + map_at(ifd, from_guest_phys(mem - len), 0, st.st_size); + /* + * Once a file is mapped, you can close the file descriptor. It's a + * little odd, but quite useful. + */ + close(ifd); + verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len); + + /* We return the initrd size. */ + return len; +} +/*:*/ + +/* + * Simple routine to roll all the commandline arguments together with spaces + * between them. + */ +static void concat(char *dst, char *args[]) +{ + unsigned int i, len = 0; + + for (i = 0; args[i]; i++) { + if (i) { + strcat(dst+len, " "); + len++; + } + strcpy(dst+len, args[i]); + len += strlen(args[i]); + } + /* In case it's empty. */ + dst[len] = '\0'; +} + +/*L:185 + * This is where we actually tell the kernel to initialize the Guest. We + * saw the arguments it expects when we looked at initialize() in lguest_user.c: + * the base of Guest "physical" memory, the top physical page to allow and the + * entry point for the Guest. + */ +static void tell_kernel(unsigned long start) +{ + unsigned long args[] = { LHREQ_INITIALIZE, + (unsigned long)guest_base, + guest_limit / getpagesize(), start }; + verbose("Guest: %p - %p (%#lx)\n", + guest_base, guest_base + guest_limit, guest_limit); + lguest_fd = open_or_die("/dev/lguest", O_RDWR); + if (write(lguest_fd, args, sizeof(args)) < 0) + err(1, "Writing to /dev/lguest"); +} +/*:*/ + +/*L:200 + * Device Handling. + * + * When the Guest gives us a buffer, it sends an array of addresses and sizes. + * We need to make sure it's not trying to reach into the Launcher itself, so + * we have a convenient routine which checks it and exits with an error message + * if something funny is going on: + */ +static void *_check_pointer(unsigned long addr, unsigned int size, + unsigned int line) +{ + /* + * Check if the requested address and size exceeds the allocated memory, + * or addr + size wraps around. + */ + if ((addr + size) > guest_limit || (addr + size) < addr) + errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr); + /* + * We return a pointer for the caller's convenience, now we know it's + * safe to use. + */ + return from_guest_phys(addr); +} +/* A macro which transparently hands the line number to the real function. */ +#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) + +/* + * Each buffer in the virtqueues is actually a chain of descriptors. This + * function returns the next descriptor in the chain, or vq->vring.num if we're + * at the end. + */ +static unsigned next_desc(struct vring_desc *desc, + unsigned int i, unsigned int max) +{ + unsigned int next; + + /* If this descriptor says it doesn't chain, we're done. */ + if (!(desc[i].flags & VRING_DESC_F_NEXT)) + return max; + + /* Check they're not leading us off end of descriptors. */ + next = desc[i].next; + /* Make sure compiler knows to grab that: we don't want it changing! */ + wmb(); + + if (next >= max) + errx(1, "Desc next is %u", next); + + return next; +} + +/* + * This actually sends the interrupt for this virtqueue, if we've used a + * buffer. + */ +static void trigger_irq(struct virtqueue *vq) +{ + unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; + + /* Don't inform them if nothing used. */ + if (!vq->pending_used) + return; + vq->pending_used = 0; + + /* If they don't want an interrupt, don't send one... */ + if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { + return; + } + + /* Send the Guest an interrupt tell them we used something up. */ + if (write(lguest_fd, buf, sizeof(buf)) != 0) + err(1, "Triggering irq %i", vq->config.irq); +} + +/* + * This looks in the virtqueue for the first available buffer, and converts + * it to an iovec for convenient access. Since descriptors consist of some + * number of output then some number of input descriptors, it's actually two + * iovecs, but we pack them into one and note how many of each there were. + * + * This function waits if necessary, and returns the descriptor number found. + */ +static unsigned wait_for_vq_desc(struct virtqueue *vq, + struct iovec iov[], + unsigned int *out_num, unsigned int *in_num) +{ + unsigned int i, head, max; + struct vring_desc *desc; + u16 last_avail = lg_last_avail(vq); + + /* There's nothing available? */ + while (last_avail == vq->vring.avail->idx) { + u64 event; + + /* + * Since we're about to sleep, now is a good time to tell the + * Guest about what we've used up to now. + */ + trigger_irq(vq); + + /* OK, now we need to know about added descriptors. */ + vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; + + /* + * They could have slipped one in as we were doing that: make + * sure it's written, then check again. + */ + mb(); + if (last_avail != vq->vring.avail->idx) { + vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; + break; + } + + /* Nothing new? Wait for eventfd to tell us they refilled. */ + if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event)) + errx(1, "Event read failed?"); + + /* We don't need to be notified again. */ + vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; + } + + /* Check it isn't doing very strange things with descriptor numbers. */ + if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num) + errx(1, "Guest moved used index from %u to %u", + last_avail, vq->vring.avail->idx); + + /* + * Grab the next descriptor number they're advertising, and increment + * the index we've seen. + */ + head = vq->vring.avail->ring[last_avail % vq->vring.num]; + lg_last_avail(vq)++; + + /* If their number is silly, that's a fatal mistake. */ + if (head >= vq->vring.num) + errx(1, "Guest says index %u is available", head); + + /* When we start there are none of either input nor output. */ + *out_num = *in_num = 0; + + max = vq->vring.num; + desc = vq->vring.desc; + i = head; + + /* + * If this is an indirect entry, then this buffer contains a descriptor + * table which we handle as if it's any normal descriptor chain. + */ + if (desc[i].flags & VRING_DESC_F_INDIRECT) { + if (desc[i].len % sizeof(struct vring_desc)) + errx(1, "Invalid size for indirect buffer table"); + + max = desc[i].len / sizeof(struct vring_desc); + desc = check_pointer(desc[i].addr, desc[i].len); + i = 0; + } + + do { + /* Grab the first descriptor, and check it's OK. */ + iov[*out_num + *in_num].iov_len = desc[i].len; + iov[*out_num + *in_num].iov_base + = check_pointer(desc[i].addr, desc[i].len); + /* If this is an input descriptor, increment that count. */ + if (desc[i].flags & VRING_DESC_F_WRITE) + (*in_num)++; + else { + /* + * If it's an output descriptor, they're all supposed + * to come before any input descriptors. + */ + if (*in_num) + errx(1, "Descriptor has out after in"); + (*out_num)++; + } + + /* If we've got too many, that implies a descriptor loop. */ + if (*out_num + *in_num > max) + errx(1, "Looped descriptor"); + } while ((i = next_desc(desc, i, max)) != max); + + return head; +} + +/* + * After we've used one of their buffers, we tell the Guest about it. Sometime + * later we'll want to send them an interrupt using trigger_irq(); note that + * wait_for_vq_desc() does that for us if it has to wait. + */ +static void add_used(struct virtqueue *vq, unsigned int head, int len) +{ + struct vring_used_elem *used; + + /* + * The virtqueue contains a ring of used buffers. Get a pointer to the + * next entry in that used ring. + */ + used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num]; + used->id = head; + used->len = len; + /* Make sure buffer is written before we update index. */ + wmb(); + vq->vring.used->idx++; + vq->pending_used++; +} + +/* And here's the combo meal deal. Supersize me! */ +static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len) +{ + add_used(vq, head, len); + trigger_irq(vq); +} + +/* + * The Console + * + * We associate some data with the console for our exit hack. + */ +struct console_abort { + /* How many times have they hit ^C? */ + int count; + /* When did they start? */ + struct timeval start; +}; + +/* This is the routine which handles console input (ie. stdin). */ +static void console_input(struct virtqueue *vq) +{ + int len; + unsigned int head, in_num, out_num; + struct console_abort *abort = vq->dev->priv; + struct iovec iov[vq->vring.num]; + + /* Make sure there's a descriptor available. */ + head = wait_for_vq_desc(vq, iov, &out_num, &in_num); + if (out_num) + errx(1, "Output buffers in console in queue?"); + + /* Read into it. This is where we usually wait. */ + len = readv(STDIN_FILENO, iov, in_num); + if (len <= 0) { + /* Ran out of input? */ + warnx("Failed to get console input, ignoring console."); + /* + * For simplicity, dying threads kill the whole Launcher. So + * just nap here. + */ + for (;;) + pause(); + } + + /* Tell the Guest we used a buffer. */ + add_used_and_trigger(vq, head, len); + + /* + * Three ^C within one second? Exit. + * + * This is such a hack, but works surprisingly well. Each ^C has to + * be in a buffer by itself, so they can't be too fast. But we check + * that we get three within about a second, so they can't be too + * slow. + */ + if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) { + abort->count = 0; + return; + } + + abort->count++; + if (abort->count == 1) + gettimeofday(&abort->start, NULL); + else if (abort->count == 3) { + struct timeval now; + gettimeofday(&now, NULL); + /* Kill all Launcher processes with SIGINT, like normal ^C */ + if (now.tv_sec <= abort->start.tv_sec+1) + kill(0, SIGINT); + abort->count = 0; + } +} + +/* This is the routine which handles console output (ie. stdout). */ +static void console_output(struct virtqueue *vq) +{ + unsigned int head, out, in; + struct iovec iov[vq->vring.num]; + + /* We usually wait in here, for the Guest to give us something. */ + head = wait_for_vq_desc(vq, iov, &out, &in); + if (in) + errx(1, "Input buffers in console output queue?"); + + /* writev can return a partial write, so we loop here. */ + while (!iov_empty(iov, out)) { + int len = writev(STDOUT_FILENO, iov, out); + if (len <= 0) { + warn("Write to stdout gave %i (%d)", len, errno); + break; + } + iov_consume(iov, out, len); + } + + /* + * We're finished with that buffer: if we're going to sleep, + * wait_for_vq_desc() will prod the Guest with an interrupt. + */ + add_used(vq, head, 0); +} + +/* + * The Network + * + * Handling output for network is also simple: we get all the output buffers + * and write them to /dev/net/tun. + */ +struct net_info { + int tunfd; +}; + +static void net_output(struct virtqueue *vq) +{ + struct net_info *net_info = vq->dev->priv; + unsigned int head, out, in; + struct iovec iov[vq->vring.num]; + + /* We usually wait in here for the Guest to give us a packet. */ + head = wait_for_vq_desc(vq, iov, &out, &in); + if (in) + errx(1, "Input buffers in net output queue?"); + /* + * Send the whole thing through to /dev/net/tun. It expects the exact + * same format: what a coincidence! + */ + if (writev(net_info->tunfd, iov, out) < 0) + warnx("Write to tun failed (%d)?", errno); + + /* + * Done with that one; wait_for_vq_desc() will send the interrupt if + * all packets are processed. + */ + add_used(vq, head, 0); +} + +/* + * Handling network input is a bit trickier, because I've tried to optimize it. + * + * First we have a helper routine which tells is if from this file descriptor + * (ie. the /dev/net/tun device) will block: + */ +static bool will_block(int fd) +{ + fd_set fdset; + struct timeval zero = { 0, 0 }; + FD_ZERO(&fdset); + FD_SET(fd, &fdset); + return select(fd+1, &fdset, NULL, NULL, &zero) != 1; +} + +/* + * This handles packets coming in from the tun device to our Guest. Like all + * service routines, it gets called again as soon as it returns, so you don't + * see a while(1) loop here. + */ +static void net_input(struct virtqueue *vq) +{ + int len; + unsigned int head, out, in; + struct iovec iov[vq->vring.num]; + struct net_info *net_info = vq->dev->priv; + + /* + * Get a descriptor to write an incoming packet into. This will also + * send an interrupt if they're out of descriptors. + */ + head = wait_for_vq_desc(vq, iov, &out, &in); + if (out) + errx(1, "Output buffers in net input queue?"); + + /* + * If it looks like we'll block reading from the tun device, send them + * an interrupt. + */ + if (vq->pending_used && will_block(net_info->tunfd)) + trigger_irq(vq); + + /* + * Read in the packet. This is where we normally wait (when there's no + * incoming network traffic). + */ + len = readv(net_info->tunfd, iov, in); + if (len <= 0) + warn("Failed to read from tun (%d).", errno); + + /* + * Mark that packet buffer as used, but don't interrupt here. We want + * to wait until we've done as much work as we can. + */ + add_used(vq, head, len); +} +/*:*/ + +/* This is the helper to create threads: run the service routine in a loop. */ +static int do_thread(void *_vq) +{ + struct virtqueue *vq = _vq; + + for (;;) + vq->service(vq); + return 0; +} + +/* + * When a child dies, we kill our entire process group with SIGTERM. This + * also has the side effect that the shell restores the console for us! + */ +static void kill_launcher(int signal) +{ + kill(0, SIGTERM); +} + +static void reset_device(struct device *dev) +{ + struct virtqueue *vq; + + verbose("Resetting device %s\n", dev->name); + + /* Clear any features they've acked. */ + memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len); + + /* We're going to be explicitly killing threads, so ignore them. */ + signal(SIGCHLD, SIG_IGN); + + /* Zero out the virtqueues, get rid of their threads */ + for (vq = dev->vq; vq; vq = vq->next) { + if (vq->thread != (pid_t)-1) { + kill(vq->thread, SIGTERM); + waitpid(vq->thread, NULL, 0); + vq->thread = (pid_t)-1; + } + memset(vq->vring.desc, 0, + vring_size(vq->config.num, LGUEST_VRING_ALIGN)); + lg_last_avail(vq) = 0; + } + dev->running = false; + + /* Now we care if threads die. */ + signal(SIGCHLD, (void *)kill_launcher); +} + +/*L:216 + * This actually creates the thread which services the virtqueue for a device. + */ +static void create_thread(struct virtqueue *vq) +{ + /* + * Create stack for thread. Since the stack grows upwards, we point + * the stack pointer to the end of this region. + */ + char *stack = malloc(32768); + unsigned long args[] = { LHREQ_EVENTFD, + vq->config.pfn*getpagesize(), 0 }; + + /* Create a zero-initialized eventfd. */ + vq->eventfd = eventfd(0, 0); + if (vq->eventfd < 0) + err(1, "Creating eventfd"); + args[2] = vq->eventfd; + + /* + * Attach an eventfd to this virtqueue: it will go off when the Guest + * does an LHCALL_NOTIFY for this vq. + */ + if (write(lguest_fd, &args, sizeof(args)) != 0) + err(1, "Attaching eventfd"); + + /* + * CLONE_VM: because it has to access the Guest memory, and SIGCHLD so + * we get a signal if it dies. + */ + vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq); + if (vq->thread == (pid_t)-1) + err(1, "Creating clone"); + + /* We close our local copy now the child has it. */ + close(vq->eventfd); +} + +static void start_device(struct device *dev) +{ + unsigned int i; + struct virtqueue *vq; + + verbose("Device %s OK: offered", dev->name); + for (i = 0; i < dev->feature_len; i++) + verbose(" %02x", get_feature_bits(dev)[i]); + verbose(", accepted"); + for (i = 0; i < dev->feature_len; i++) + verbose(" %02x", get_feature_bits(dev) + [dev->feature_len+i]); + + for (vq = dev->vq; vq; vq = vq->next) { + if (vq->service) + create_thread(vq); + } + dev->running = true; +} + +static void cleanup_devices(void) +{ + struct device *dev; + + for (dev = devices.dev; dev; dev = dev->next) + reset_device(dev); + + /* If we saved off the original terminal settings, restore them now. */ + if (orig_term.c_lflag & (ISIG|ICANON|ECHO)) + tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); +} + +/* When the Guest tells us they updated the status field, we handle it. */ +static void update_device_status(struct device *dev) +{ + /* A zero status is a reset, otherwise it's a set of flags. */ + if (dev->desc->status == 0) + reset_device(dev); + else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { + warnx("Device %s configuration FAILED", dev->name); + if (dev->running) + reset_device(dev); + } else { + if (dev->running) + err(1, "Device %s features finalized twice", dev->name); + start_device(dev); + } +} + +/*L:215 + * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. In + * particular, it's used to notify us of device status changes during boot. + */ +static void handle_output(unsigned long addr) +{ + struct device *i; + + /* Check each device. */ + for (i = devices.dev; i; i = i->next) { + struct virtqueue *vq; + + /* + * Notifications to device descriptors mean they updated the + * device status. + */ + if (from_guest_phys(addr) == i->desc) { + update_device_status(i); + return; + } + + /* Devices should not be used before features are finalized. */ + for (vq = i->vq; vq; vq = vq->next) { + if (addr != vq->config.pfn*getpagesize()) + continue; + errx(1, "Notification on %s before setup!", i->name); + } + } + + /* + * Early console write is done using notify on a nul-terminated string + * in Guest memory. It's also great for hacking debugging messages + * into a Guest. + */ + if (addr >= guest_limit) + errx(1, "Bad NOTIFY %#lx", addr); + + write(STDOUT_FILENO, from_guest_phys(addr), + strnlen(from_guest_phys(addr), guest_limit - addr)); +} + +/*L:190 + * Device Setup + * + * All devices need a descriptor so the Guest knows it exists, and a "struct + * device" so the Launcher can keep track of it. We have common helper + * routines to allocate and manage them. + */ + +/* + * The layout of the device page is a "struct lguest_device_desc" followed by a + * number of virtqueue descriptors, then two sets of feature bits, then an + * array of configuration bytes. This routine returns the configuration + * pointer. + */ +static u8 *device_config(const struct device *dev) +{ + return (void *)(dev->desc + 1) + + dev->num_vq * sizeof(struct lguest_vqconfig) + + dev->feature_len * 2; +} + +/* + * This routine allocates a new "struct lguest_device_desc" from descriptor + * table page just above the Guest's normal memory. It returns a pointer to + * that descriptor. + */ +static struct lguest_device_desc *new_dev_desc(u16 type) +{ + struct lguest_device_desc d = { .type = type }; + void *p; + + /* Figure out where the next device config is, based on the last one. */ + if (devices.lastdev) + p = device_config(devices.lastdev) + + devices.lastdev->desc->config_len; + else + p = devices.descpage; + + /* We only have one page for all the descriptors. */ + if (p + sizeof(d) > (void *)devices.descpage + getpagesize()) + errx(1, "Too many devices"); + + /* p might not be aligned, so we memcpy in. */ + return memcpy(p, &d, sizeof(d)); +} + +/* + * Each device descriptor is followed by the description of its virtqueues. We + * specify how many descriptors the virtqueue is to have. + */ +static void add_virtqueue(struct device *dev, unsigned int num_descs, + void (*service)(struct virtqueue *)) +{ + unsigned int pages; + struct virtqueue **i, *vq = malloc(sizeof(*vq)); + void *p; + + /* First we need some memory for this virtqueue. */ + pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1) + / getpagesize(); + p = get_pages(pages); + + /* Initialize the virtqueue */ + vq->next = NULL; + vq->last_avail_idx = 0; + vq->dev = dev; + + /* + * This is the routine the service thread will run, and its Process ID + * once it's running. + */ + vq->service = service; + vq->thread = (pid_t)-1; + + /* Initialize the configuration. */ + vq->config.num = num_descs; + vq->config.irq = devices.next_irq++; + vq->config.pfn = to_guest_phys(p) / getpagesize(); + + /* Initialize the vring. */ + vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN); + + /* + * Append virtqueue to this device's descriptor. We use + * device_config() to get the end of the device's current virtqueues; + * we check that we haven't added any config or feature information + * yet, otherwise we'd be overwriting them. + */ + assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); + memcpy(device_config(dev), &vq->config, sizeof(vq->config)); + dev->num_vq++; + dev->desc->num_vq++; + + verbose("Virtqueue page %#lx\n", to_guest_phys(p)); + + /* + * Add to tail of list, so dev->vq is first vq, dev->vq->next is + * second. + */ + for (i = &dev->vq; *i; i = &(*i)->next); + *i = vq; +} + +/* + * The first half of the feature bitmask is for us to advertise features. The + * second half is for the Guest to accept features. + */ +static void add_feature(struct device *dev, unsigned bit) +{ + u8 *features = get_feature_bits(dev); + + /* We can't extend the feature bits once we've added config bytes */ + if (dev->desc->feature_len <= bit / CHAR_BIT) { + assert(dev->desc->config_len == 0); + dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1; + } + + features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); +} + +/* + * This routine sets the configuration fields for an existing device's + * descriptor. It only works for the last device, but that's OK because that's + * how we use it. + */ +static void set_config(struct device *dev, unsigned len, const void *conf) +{ + /* Check we haven't overflowed our single page. */ + if (device_config(dev) + len > devices.descpage + getpagesize()) + errx(1, "Too many devices"); + + /* Copy in the config information, and store the length. */ + memcpy(device_config(dev), conf, len); + dev->desc->config_len = len; + + /* Size must fit in config_len field (8 bits)! */ + assert(dev->desc->config_len == len); +} + +/* + * This routine does all the creation and setup of a new device, including + * calling new_dev_desc() to allocate the descriptor and device memory. We + * don't actually start the service threads until later. + * + * See what I mean about userspace being boring? + */ +static struct device *new_device(const char *name, u16 type) +{ + struct device *dev = malloc(sizeof(*dev)); + + /* Now we populate the fields one at a time. */ + dev->desc = new_dev_desc(type); + dev->name = name; + dev->vq = NULL; + dev->feature_len = 0; + dev->num_vq = 0; + dev->running = false; + + /* + * Append to device list. Prepending to a single-linked list is + * easier, but the user expects the devices to be arranged on the bus + * in command-line order. The first network device on the command line + * is eth0, the first block device /dev/vda, etc. + */ + if (devices.lastdev) + devices.lastdev->next = dev; + else + devices.dev = dev; + devices.lastdev = dev; + + return dev; +} + +/* + * Our first setup routine is the console. It's a fairly simple device, but + * UNIX tty handling makes it uglier than it could be. + */ +static void setup_console(void) +{ + struct device *dev; + + /* If we can save the initial standard input settings... */ + if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { + struct termios term = orig_term; + /* + * Then we turn off echo, line buffering and ^C etc: We want a + * raw input stream to the Guest. + */ + term.c_lflag &= ~(ISIG|ICANON|ECHO); + tcsetattr(STDIN_FILENO, TCSANOW, &term); + } + + dev = new_device("console", VIRTIO_ID_CONSOLE); + + /* We store the console state in dev->priv, and initialize it. */ + dev->priv = malloc(sizeof(struct console_abort)); + ((struct console_abort *)dev->priv)->count = 0; + + /* + * The console needs two virtqueues: the input then the output. When + * they put something the input queue, we make sure we're listening to + * stdin. When they put something in the output queue, we write it to + * stdout. + */ + add_virtqueue(dev, VIRTQUEUE_NUM, console_input); + add_virtqueue(dev, VIRTQUEUE_NUM, console_output); + + verbose("device %u: console\n", ++devices.device_num); +} +/*:*/ + +/*M:010 + * Inter-guest networking is an interesting area. Simplest is to have a + * --sharenet=<name> option which opens or creates a named pipe. This can be + * used to send packets to another guest in a 1:1 manner. + * + * More sophisticated is to use one of the tools developed for project like UML + * to do networking. + * + * Faster is to do virtio bonding in kernel. Doing this 1:1 would be + * completely generic ("here's my vring, attach to your vring") and would work + * for any traffic. Of course, namespace and permissions issues need to be + * dealt with. A more sophisticated "multi-channel" virtio_net.c could hide + * multiple inter-guest channels behind one interface, although it would + * require some manner of hotplugging new virtio channels. + * + * Finally, we could use a virtio network switch in the kernel, ie. vhost. +:*/ + +static u32 str2ip(const char *ipaddr) +{ + unsigned int b[4]; + + if (sscanf(ipaddr, "%u.%u.%u.%u", &b[0], &b[1], &b[2], &b[3]) != 4) + errx(1, "Failed to parse IP address '%s'", ipaddr); + return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]; +} + +static void str2mac(const char *macaddr, unsigned char mac[6]) +{ + unsigned int m[6]; + if (sscanf(macaddr, "%02x:%02x:%02x:%02x:%02x:%02x", + &m[0], &m[1], &m[2], &m[3], &m[4], &m[5]) != 6) + errx(1, "Failed to parse mac address '%s'", macaddr); + mac[0] = m[0]; + mac[1] = m[1]; + mac[2] = m[2]; + mac[3] = m[3]; + mac[4] = m[4]; + mac[5] = m[5]; +} + +/* + * This code is "adapted" from libbridge: it attaches the Host end of the + * network device to the bridge device specified by the command line. + * + * This is yet another James Morris contribution (I'm an IP-level guy, so I + * dislike bridging), and I just try not to break it. + */ +static void add_to_bridge(int fd, const char *if_name, const char *br_name) +{ + int ifidx; + struct ifreq ifr; + + if (!*br_name) + errx(1, "must specify bridge name"); + + ifidx = if_nametoindex(if_name); + if (!ifidx) + errx(1, "interface %s does not exist!", if_name); + + strncpy(ifr.ifr_name, br_name, IFNAMSIZ); + ifr.ifr_name[IFNAMSIZ-1] = '\0'; + ifr.ifr_ifindex = ifidx; + if (ioctl(fd, SIOCBRADDIF, &ifr) < 0) + err(1, "can't add %s to bridge %s", if_name, br_name); +} + +/* + * This sets up the Host end of the network device with an IP address, brings + * it up so packets will flow, the copies the MAC address into the hwaddr + * pointer. + */ +static void configure_device(int fd, const char *tapif, u32 ipaddr) +{ + struct ifreq ifr; + struct sockaddr_in sin; + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, tapif); + + /* Don't read these incantations. Just cut & paste them like I did! */ + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(ipaddr); + memcpy(&ifr.ifr_addr, &sin, sizeof(sin)); + if (ioctl(fd, SIOCSIFADDR, &ifr) != 0) + err(1, "Setting %s interface address", tapif); + ifr.ifr_flags = IFF_UP; + if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) + err(1, "Bringing interface %s up", tapif); +} + +static int get_tun_device(char tapif[IFNAMSIZ]) +{ + struct ifreq ifr; + int netfd; + + /* Start with this zeroed. Messy but sure. */ + memset(&ifr, 0, sizeof(ifr)); + + /* + * We open the /dev/net/tun device and tell it we want a tap device. A + * tap device is like a tun device, only somehow different. To tell + * the truth, I completely blundered my way through this code, but it + * works now! + */ + netfd = open_or_die("/dev/net/tun", O_RDWR); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; + strcpy(ifr.ifr_name, "tap%d"); + if (ioctl(netfd, TUNSETIFF, &ifr) != 0) + err(1, "configuring /dev/net/tun"); + + if (ioctl(netfd, TUNSETOFFLOAD, + TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0) + err(1, "Could not set features for tun device"); + + /* + * We don't need checksums calculated for packets coming in this + * device: trust us! + */ + ioctl(netfd, TUNSETNOCSUM, 1); + + memcpy(tapif, ifr.ifr_name, IFNAMSIZ); + return netfd; +} + +/*L:195 + * Our network is a Host<->Guest network. This can either use bridging or + * routing, but the principle is the same: it uses the "tun" device to inject + * packets into the Host as if they came in from a normal network card. We + * just shunt packets between the Guest and the tun device. + */ +static void setup_tun_net(char *arg) +{ + struct device *dev; + struct net_info *net_info = malloc(sizeof(*net_info)); + int ipfd; + u32 ip = INADDR_ANY; + bool bridging = false; + char tapif[IFNAMSIZ], *p; + struct virtio_net_config conf; + + net_info->tunfd = get_tun_device(tapif); + + /* First we create a new network device. */ + dev = new_device("net", VIRTIO_ID_NET); + dev->priv = net_info; + + /* Network devices need a recv and a send queue, just like console. */ + add_virtqueue(dev, VIRTQUEUE_NUM, net_input); + add_virtqueue(dev, VIRTQUEUE_NUM, net_output); + + /* + * We need a socket to perform the magic network ioctls to bring up the + * tap interface, connect to the bridge etc. Any socket will do! + */ + ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); + if (ipfd < 0) + err(1, "opening IP socket"); + + /* If the command line was --tunnet=bridge:<name> do bridging. */ + if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { + arg += strlen(BRIDGE_PFX); + bridging = true; + } + + /* A mac address may follow the bridge name or IP address */ + p = strchr(arg, ':'); + if (p) { + str2mac(p+1, conf.mac); + add_feature(dev, VIRTIO_NET_F_MAC); + *p = '\0'; + } + + /* arg is now either an IP address or a bridge name */ + if (bridging) + add_to_bridge(ipfd, tapif, arg); + else + ip = str2ip(arg); + + /* Set up the tun device. */ + configure_device(ipfd, tapif, ip); + + /* Expect Guest to handle everything except UFO */ + add_feature(dev, VIRTIO_NET_F_CSUM); + add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); + add_feature(dev, VIRTIO_NET_F_GUEST_TSO4); + add_feature(dev, VIRTIO_NET_F_GUEST_TSO6); + add_feature(dev, VIRTIO_NET_F_GUEST_ECN); + add_feature(dev, VIRTIO_NET_F_HOST_TSO4); + add_feature(dev, VIRTIO_NET_F_HOST_TSO6); + add_feature(dev, VIRTIO_NET_F_HOST_ECN); + /* We handle indirect ring entries */ + add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC); + set_config(dev, sizeof(conf), &conf); + + /* We don't need the socket any more; setup is done. */ + close(ipfd); + + devices.device_num++; + + if (bridging) + verbose("device %u: tun %s attached to bridge: %s\n", + devices.device_num, tapif, arg); + else + verbose("device %u: tun %s: %s\n", + devices.device_num, tapif, arg); +} +/*:*/ + +/* This hangs off device->priv. */ +struct vblk_info { + /* The size of the file. */ + off64_t len; + + /* The file descriptor for the file. */ + int fd; + +}; + +/*L:210 + * The Disk + * + * The disk only has one virtqueue, so it only has one thread. It is really + * simple: the Guest asks for a block number and we read or write that position + * in the file. + * + * Before we serviced each virtqueue in a separate thread, that was unacceptably + * slow: the Guest waits until the read is finished before running anything + * else, even if it could have been doing useful work. + * + * We could have used async I/O, except it's reputed to suck so hard that + * characters actually go missing from your code when you try to use it. + */ +static void blk_request(struct virtqueue *vq) +{ + struct vblk_info *vblk = vq->dev->priv; + unsigned int head, out_num, in_num, wlen; + int ret; + u8 *in; + struct virtio_blk_outhdr *out; + struct iovec iov[vq->vring.num]; + off64_t off; + + /* + * Get the next request, where we normally wait. It triggers the + * interrupt to acknowledge previously serviced requests (if any). + */ + head = wait_for_vq_desc(vq, iov, &out_num, &in_num); + + /* + * Every block request should contain at least one output buffer + * (detailing the location on disk and the type of request) and one + * input buffer (to hold the result). + */ + if (out_num == 0 || in_num == 0) + errx(1, "Bad virtblk cmd %u out=%u in=%u", + head, out_num, in_num); + + out = convert(&iov[0], struct virtio_blk_outhdr); + in = convert(&iov[out_num+in_num-1], u8); + /* + * For historical reasons, block operations are expressed in 512 byte + * "sectors". + */ + off = out->sector * 512; + + /* + * In general the virtio block driver is allowed to try SCSI commands. + * It'd be nice if we supported eject, for example, but we don't. + */ + if (out->type & VIRTIO_BLK_T_SCSI_CMD) { + fprintf(stderr, "Scsi commands unsupported\n"); + *in = VIRTIO_BLK_S_UNSUPP; + wlen = sizeof(*in); + } else if (out->type & VIRTIO_BLK_T_OUT) { + /* + * Write + * + * Move to the right location in the block file. This can fail + * if they try to write past end. + */ + if (lseek64(vblk->fd, off, SEEK_SET) != off) + err(1, "Bad seek to sector %llu", out->sector); + + ret = writev(vblk->fd, iov+1, out_num-1); + verbose("WRITE to sector %llu: %i\n", out->sector, ret); + + /* + * Grr... Now we know how long the descriptor they sent was, we + * make sure they didn't try to write over the end of the block + * file (possibly extending it). + */ + if (ret > 0 && off + ret > vblk->len) { + /* Trim it back to the correct length */ + ftruncate64(vblk->fd, vblk->len); + /* Die, bad Guest, die. */ + errx(1, "Write past end %llu+%u", off, ret); + } + + wlen = sizeof(*in); + *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); + } else if (out->type & VIRTIO_BLK_T_FLUSH) { + /* Flush */ + ret = fdatasync(vblk->fd); + verbose("FLUSH fdatasync: %i\n", ret); + wlen = sizeof(*in); + *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); + } else { + /* + * Read + * + * Move to the right location in the block file. This can fail + * if they try to read past end. + */ + if (lseek64(vblk->fd, off, SEEK_SET) != off) + err(1, "Bad seek to sector %llu", out->sector); + + ret = readv(vblk->fd, iov+1, in_num-1); + verbose("READ from sector %llu: %i\n", out->sector, ret); + if (ret >= 0) { + wlen = sizeof(*in) + ret; + *in = VIRTIO_BLK_S_OK; + } else { + wlen = sizeof(*in); + *in = VIRTIO_BLK_S_IOERR; + } + } + + /* Finished that request. */ + add_used(vq, head, wlen); +} + +/*L:198 This actually sets up a virtual block device. */ +static void setup_block_file(const char *filename) +{ + struct device *dev; + struct vblk_info *vblk; + struct virtio_blk_config conf; + + /* Creat the device. */ + dev = new_device("block", VIRTIO_ID_BLOCK); + + /* The device has one virtqueue, where the Guest places requests. */ + add_virtqueue(dev, VIRTQUEUE_NUM, blk_request); + + /* Allocate the room for our own bookkeeping */ + vblk = dev->priv = malloc(sizeof(*vblk)); + + /* First we open the file and store the length. */ + vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); + vblk->len = lseek64(vblk->fd, 0, SEEK_END); + + /* We support FLUSH. */ + add_feature(dev, VIRTIO_BLK_F_FLUSH); + + /* Tell Guest how many sectors this device has. */ + conf.capacity = cpu_to_le64(vblk->len / 512); + + /* + * Tell Guest not to put in too many descriptors at once: two are used + * for the in and out elements. + */ + add_feature(dev, VIRTIO_BLK_F_SEG_MAX); + conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2); + + /* Don't try to put whole struct: we have 8 bit limit. */ + set_config(dev, offsetof(struct virtio_blk_config, geometry), &conf); + + verbose("device %u: virtblock %llu sectors\n", + ++devices.device_num, le64_to_cpu(conf.capacity)); +} + +/*L:211 + * Our random number generator device reads from /dev/random into the Guest's + * input buffers. The usual case is that the Guest doesn't want random numbers + * and so has no buffers although /dev/random is still readable, whereas + * console is the reverse. + * + * The same logic applies, however. + */ +struct rng_info { + int rfd; +}; + +static void rng_input(struct virtqueue *vq) +{ + int len; + unsigned int head, in_num, out_num, totlen = 0; + struct rng_info *rng_info = vq->dev->priv; + struct iovec iov[vq->vring.num]; + + /* First we need a buffer from the Guests's virtqueue. */ + head = wait_for_vq_desc(vq, iov, &out_num, &in_num); + if (out_num) + errx(1, "Output buffers in rng?"); + + /* + * Just like the console write, we loop to cover the whole iovec. + * In this case, short reads actually happen quite a bit. + */ + while (!iov_empty(iov, in_num)) { + len = readv(rng_info->rfd, iov, in_num); + if (len <= 0) + err(1, "Read from /dev/random gave %i", len); + iov_consume(iov, in_num, len); + totlen += len; + } + + /* Tell the Guest about the new input. */ + add_used(vq, head, totlen); +} + +/*L:199 + * This creates a "hardware" random number device for the Guest. + */ +static void setup_rng(void) +{ + struct device *dev; + struct rng_info *rng_info = malloc(sizeof(*rng_info)); + + /* Our device's privat info simply contains the /dev/random fd. */ + rng_info->rfd = open_or_die("/dev/random", O_RDONLY); + + /* Create the new device. */ + dev = new_device("rng", VIRTIO_ID_RNG); + dev->priv = rng_info; + + /* The device has one virtqueue, where the Guest places inbufs. */ + add_virtqueue(dev, VIRTQUEUE_NUM, rng_input); + + verbose("device %u: rng\n", devices.device_num++); +} +/* That's the end of device setup. */ + +/*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */ +static void __attribute__((noreturn)) restart_guest(void) +{ + unsigned int i; + + /* + * Since we don't track all open fds, we simply close everything beyond + * stderr. + */ + for (i = 3; i < FD_SETSIZE; i++) + close(i); + + /* Reset all the devices (kills all threads). */ + cleanup_devices(); + + execv(main_args[0], main_args); + err(1, "Could not exec %s", main_args[0]); +} + +/*L:220 + * Finally we reach the core of the Launcher which runs the Guest, serves + * its input and output, and finally, lays it to rest. + */ +static void __attribute__((noreturn)) run_guest(void) +{ + for (;;) { + unsigned long notify_addr; + int readval; + + /* We read from the /dev/lguest device to run the Guest. */ + readval = pread(lguest_fd, ¬ify_addr, + sizeof(notify_addr), cpu_id); + + /* One unsigned long means the Guest did HCALL_NOTIFY */ + if (readval == sizeof(notify_addr)) { + verbose("Notify on address %#lx\n", notify_addr); + handle_output(notify_addr); + /* ENOENT means the Guest died. Reading tells us why. */ + } else if (errno == ENOENT) { + char reason[1024] = { 0 }; + pread(lguest_fd, reason, sizeof(reason)-1, cpu_id); + errx(1, "%s", reason); + /* ERESTART means that we need to reboot the guest */ + } else if (errno == ERESTART) { + restart_guest(); + /* Anything else means a bug or incompatible change. */ + } else + err(1, "Running guest failed"); + } +} +/*L:240 + * This is the end of the Launcher. The good news: we are over halfway + * through! The bad news: the most fiendish part of the code still lies ahead + * of us. + * + * Are you ready? Take a deep breath and join me in the core of the Host, in + * "make Host". +:*/ + +static struct option opts[] = { + { "verbose", 0, NULL, 'v' }, + { "tunnet", 1, NULL, 't' }, + { "block", 1, NULL, 'b' }, + { "rng", 0, NULL, 'r' }, + { "initrd", 1, NULL, 'i' }, + { "username", 1, NULL, 'u' }, + { "chroot", 1, NULL, 'c' }, + { NULL }, +}; +static void usage(void) +{ + errx(1, "Usage: lguest [--verbose] " + "[--tunnet=(<ipaddr>:<macaddr>|bridge:<bridgename>:<macaddr>)\n" + "|--block=<filename>|--initrd=<filename>]...\n" + "<mem-in-mb> vmlinux [args...]"); +} + +/*L:105 The main routine is where the real work begins: */ +int main(int argc, char *argv[]) +{ + /* Memory, code startpoint and size of the (optional) initrd. */ + unsigned long mem = 0, start, initrd_size = 0; + /* Two temporaries. */ + int i, c; + /* The boot information for the Guest. */ + struct boot_params *boot; + /* If they specify an initrd file to load. */ + const char *initrd_name = NULL; + + /* Password structure for initgroups/setres[gu]id */ + struct passwd *user_details = NULL; + + /* Directory to chroot to */ + char *chroot_path = NULL; + + /* Save the args: we "reboot" by execing ourselves again. */ + main_args = argv; + + /* + * First we initialize the device list. We keep a pointer to the last + * device, and the next interrupt number to use for devices (1: + * remember that 0 is used by the timer). + */ + devices.lastdev = NULL; + devices.next_irq = 1; + + /* We're CPU 0. In fact, that's the only CPU possible right now. */ + cpu_id = 0; + + /* + * We need to know how much memory so we can set up the device + * descriptor and memory pages for the devices as we parse the command + * line. So we quickly look through the arguments to find the amount + * of memory now. + */ + for (i = 1; i < argc; i++) { + if (argv[i][0] != '-') { + mem = atoi(argv[i]) * 1024 * 1024; + /* + * We start by mapping anonymous pages over all of + * guest-physical memory range. This fills it with 0, + * and ensures that the Guest won't be killed when it + * tries to access it. + */ + guest_base = map_zeroed_pages(mem / getpagesize() + + DEVICE_PAGES); + guest_limit = mem; + guest_max = mem + DEVICE_PAGES*getpagesize(); + devices.descpage = get_pages(1); + break; + } + } + + /* The options are fairly straight-forward */ + while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) { + switch (c) { + case 'v': + verbose = true; + break; + case 't': + setup_tun_net(optarg); + break; + case 'b': + setup_block_file(optarg); + break; + case 'r': + setup_rng(); + break; + case 'i': + initrd_name = optarg; + break; + case 'u': + user_details = getpwnam(optarg); + if (!user_details) + err(1, "getpwnam failed, incorrect username?"); + break; + case 'c': + chroot_path = optarg; + break; + default: + warnx("Unknown argument %s", argv[optind]); + usage(); + } + } + /* + * After the other arguments we expect memory and kernel image name, + * followed by command line arguments for the kernel. + */ + if (optind + 2 > argc) + usage(); + + verbose("Guest base is at %p\n", guest_base); + + /* We always have a console device */ + setup_console(); + + /* Now we load the kernel */ + start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); + + /* Boot information is stashed at physical address 0 */ + boot = from_guest_phys(0); + + /* Map the initrd image if requested (at top of physical memory) */ + if (initrd_name) { + initrd_size = load_initrd(initrd_name, mem); + /* + * These are the location in the Linux boot header where the + * start and size of the initrd are expected to be found. + */ + boot->hdr.ramdisk_image = mem - initrd_size; + boot->hdr.ramdisk_size = initrd_size; + /* The bootloader type 0xFF means "unknown"; that's OK. */ + boot->hdr.type_of_loader = 0xFF; + } + + /* + * The Linux boot header contains an "E820" memory map: ours is a + * simple, single region. + */ + boot->e820_entries = 1; + boot->e820_map[0] = ((struct e820entry) { 0, mem, E820_RAM }); + /* + * The boot header contains a command line pointer: we put the command + * line after the boot header. + */ + boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1); + /* We use a simple helper to copy the arguments separated by spaces. */ + concat((char *)(boot + 1), argv+optind+2); + + /* Set kernel alignment to 16M (CONFIG_PHYSICAL_ALIGN) */ + boot->hdr.kernel_alignment = 0x1000000; + + /* Boot protocol version: 2.07 supports the fields for lguest. */ + boot->hdr.version = 0x207; + + /* The hardware_subarch value of "1" tells the Guest it's an lguest. */ + boot->hdr.hardware_subarch = 1; + + /* Tell the entry path not to try to reload segment registers. */ + boot->hdr.loadflags |= KEEP_SEGMENTS; + + /* We tell the kernel to initialize the Guest. */ + tell_kernel(start); + + /* Ensure that we terminate if a device-servicing child dies. */ + signal(SIGCHLD, kill_launcher); + + /* If we exit via err(), this kills all the threads, restores tty. */ + atexit(cleanup_devices); + + /* If requested, chroot to a directory */ + if (chroot_path) { + if (chroot(chroot_path) != 0) + err(1, "chroot(\"%s\") failed", chroot_path); + + if (chdir("/") != 0) + err(1, "chdir(\"/\") failed"); + + verbose("chroot done\n"); + } + + /* If requested, drop privileges */ + if (user_details) { + uid_t u; + gid_t g; + + u = user_details->pw_uid; + g = user_details->pw_gid; + + if (initgroups(user_details->pw_name, g) != 0) + err(1, "initgroups failed"); + + if (setresgid(g, g, g) != 0) + err(1, "setresgid failed"); + + if (setresuid(u, u, u) != 0) + err(1, "setresuid failed"); + + verbose("Dropping privileges completed\n"); + } + + /* Finally, run the Guest. This doesn't return. */ + run_guest(); +} +/*:*/ + +/*M:999 + * Mastery is done: you now know everything I do. + * + * But surely you have seen code, features and bugs in your wanderings which + * you now yearn to attack? That is the real game, and I look forward to you + * patching and forking lguest into the Your-Name-Here-visor. + * + * Farewell, and good coding! + * Rusty Russell. + */ diff --git a/tools/lguest/lguest.txt b/tools/lguest/lguest.txt new file mode 100644 index 000000000000..bff0c554485d --- /dev/null +++ b/tools/lguest/lguest.txt @@ -0,0 +1,129 @@ + __ + (___()'`; Rusty's Remarkably Unreliable Guide to Lguest + /, /` - or, A Young Coder's Illustrated Hypervisor + \\"--\\ http://lguest.ozlabs.org + +Lguest is designed to be a minimal 32-bit x86 hypervisor for the Linux kernel, +for Linux developers and users to experiment with virtualization with the +minimum of complexity. Nonetheless, it should have sufficient features to +make it useful for specific tasks, and, of course, you are encouraged to fork +and enhance it (see drivers/lguest/README). + +Features: + +- Kernel module which runs in a normal kernel. +- Simple I/O model for communication. +- Simple program to create new guests. +- Logo contains cute puppies: http://lguest.ozlabs.org + +Developer features: + +- Fun to hack on. +- No ABI: being tied to a specific kernel anyway, you can change anything. +- Many opportunities for improvement or feature implementation. + +Running Lguest: + +- The easiest way to run lguest is to use same kernel as guest and host. + You can configure them differently, but usually it's easiest not to. + + You will need to configure your kernel with the following options: + + "General setup": + "Prompt for development and/or incomplete code/drivers" = Y + (CONFIG_EXPERIMENTAL=y) + + "Processor type and features": + "Paravirtualized guest support" = Y + "Lguest guest support" = Y + "High Memory Support" = off/4GB + "Alignment value to which kernel should be aligned" = 0x100000 + (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and + CONFIG_PHYSICAL_ALIGN=0x100000) + + "Device Drivers": + "Block devices" + "Virtio block driver (EXPERIMENTAL)" = M/Y + "Network device support" + "Universal TUN/TAP device driver support" = M/Y + "Virtio network driver (EXPERIMENTAL)" = M/Y + (CONFIG_VIRTIO_BLK=m, CONFIG_VIRTIO_NET=m and CONFIG_TUN=m) + + "Virtualization" + "Linux hypervisor example code" = M/Y + (CONFIG_LGUEST=m) + +- A tool called "lguest" is available in this directory: type "make" + to build it. If you didn't build your kernel in-tree, use "make + O=<builddir>". + +- Create or find a root disk image. There are several useful ones + around, such as the xm-test tiny root image at + http://xm-test.xensource.com/ramdisks/initrd-1.1-i386.img + + For more serious work, I usually use a distribution ISO image and + install it under qemu, then make multiple copies: + + dd if=/dev/zero of=rootfile bs=1M count=2048 + qemu -cdrom image.iso -hda rootfile -net user -net nic -boot d + + Make sure that you install a getty on /dev/hvc0 if you want to log in on the + console! + +- "modprobe lg" if you built it as a module. + +- Run an lguest as root: + + Documentation/virtual/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \ + --block=rootfile root=/dev/vda + + Explanation: + 64: the amount of memory to use, in MB. + + vmlinux: the kernel image found in the top of your build directory. You + can also use a standard bzImage. + + --tunnet=192.168.19.1: configures a "tap" device for networking with this + IP address. + + --block=rootfile: a file or block device which becomes /dev/vda + inside the guest. + + root=/dev/vda: this (and anything else on the command line) are + kernel boot parameters. + +- Configuring networking. I usually have the host masquerade, using + "iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE" and "echo 1 > + /proc/sys/net/ipv4/ip_forward". In this example, I would configure + eth0 inside the guest at 192.168.19.2. + + Another method is to bridge the tap device to an external interface + using --tunnet=bridge:<bridgename>, and perhaps run dhcp on the guest + to obtain an IP address. The bridge needs to be configured first: + this option simply adds the tap interface to it. + + A simple example on my system: + + ifconfig eth0 0.0.0.0 + brctl addbr lg0 + ifconfig lg0 up + brctl addif lg0 eth0 + dhclient lg0 + + Then use --tunnet=bridge:lg0 when launching the guest. + + See: + + http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge + + for general information on how to get bridging to work. + +- Random number generation. Using the --rng option will provide a + /dev/hwrng in the guest that will read from the host's /dev/random. + Use this option in conjunction with rng-tools (see ../hw_random.txt) + to provide entropy to the guest kernel's /dev/random. + +There is a helpful mailing list at http://ozlabs.org/mailman/listinfo/lguest + +Good luck! +Rusty Russell rusty@rustcorp.com.au. diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore new file mode 100644 index 000000000000..35f56be5a4cd --- /dev/null +++ b/tools/lib/traceevent/.gitignore @@ -0,0 +1 @@ +TRACEEVENT-CFLAGS diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile new file mode 100644 index 000000000000..14131cb0522d --- /dev/null +++ b/tools/lib/traceevent/Makefile @@ -0,0 +1,319 @@ +# trace-cmd version +EP_VERSION = 1 +EP_PATCHLEVEL = 1 +EP_EXTRAVERSION = 0 + +# file format version +FILE_VERSION = 6 + +MAKEFLAGS += --no-print-directory + + +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) + +EXT = -std=gnu99 +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +prefix ?= /usr/local +bindir_relative = bin +bindir = $(prefix)/$(bindir_relative) +man_dir = $(prefix)/share/man +man_dir_SQ = '$(subst ','\'',$(man_dir))' +html_install = $(prefix)/share/kernelshark/html +html_install_SQ = '$(subst ','\'',$(html_install))' +img_install = $(prefix)/share/kernelshark/html/images +img_install_SQ = '$(subst ','\'',$(img_install))' + +export man_dir man_dir_SQ html_install html_install_SQ INSTALL +export img_install img_install_SQ +export DESTDIR DESTDIR_SQ + +# copy a bit from Linux kbuild + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ("$(origin O)", "command line") + BUILD_OUTPUT := $(O) +endif + +ifeq ($(BUILD_SRC),) +ifneq ($(BUILD_OUTPUT),) + +define build_output + $(if $(VERBOSE:1=),@)$(MAKE) -C $(BUILD_OUTPUT) \ + BUILD_SRC=$(CURDIR) -f $(CURDIR)/Makefile $1 +endef + +saved-output := $(BUILD_OUTPUT) +BUILD_OUTPUT := $(shell cd $(BUILD_OUTPUT) && /bin/pwd) +$(if $(BUILD_OUTPUT),, \ + $(error output directory "$(saved-output)" does not exist)) + +all: sub-make + +gui: force + $(call build_output, all_cmd) + +$(filter-out gui,$(MAKECMDGOALS)): sub-make + +sub-make: force + $(call build_output, $(MAKECMDGOALS)) + + +# Leave processing to above invocation of make +skip-makefile := 1 + +endif # BUILD_OUTPUT +endif # BUILD_SRC + +# We process the rest of the Makefile if this is the final invocation of make +ifeq ($(skip-makefile),) + +srctree := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) +objtree := $(CURDIR) +src := $(srctree) +obj := $(objtree) + +export prefix bindir src obj + +# Shell quotes +bindir_SQ = $(subst ','\'',$(bindir)) +bindir_relative_SQ = $(subst ','\'',$(bindir_relative)) + +LIB_FILE = libtraceevent.a libtraceevent.so + +CONFIG_INCLUDES = +CONFIG_LIBS = +CONFIG_FLAGS = + +VERSION = $(EP_VERSION) +PATCHLEVEL = $(EP_PATCHLEVEL) +EXTRAVERSION = $(EP_EXTRAVERSION) + +OBJ = $@ +N = + +export Q VERBOSE + +EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) + +INCLUDES = -I. -I/usr/local/include $(CONFIG_INCLUDES) + +# Set compile option CFLAGS if not set elsewhere +CFLAGS ?= -g -Wall + +# Append required CFLAGS +override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) +override CFLAGS += $(udis86-flags) + +ifeq ($(VERBOSE),1) + Q = + print_compile = + print_app_build = + print_fpic_compile = + print_shared_lib_compile = + print_plugin_obj_compile = + print_plugin_build = + print_install = +else + Q = @ + print_compile = echo ' CC '$(OBJ); + print_app_build = echo ' BUILD '$(OBJ); + print_fpic_compile = echo ' CC FPIC '$(OBJ); + print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ); + print_plugin_obj_compile = echo ' CC PLUGIN OBJ '$(OBJ); + print_plugin_build = echo ' CC PLUGI '$(OBJ); + print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ); + print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; +endif + +do_fpic_compile = \ + ($(print_fpic_compile) \ + $(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@) + +do_app_build = \ + ($(print_app_build) \ + $(CC) $^ -rdynamic -o $@ $(CONFIG_LIBS) $(LIBS)) + +do_compile_shared_library = \ + ($(print_shared_lib_compile) \ + $(CC) --shared $^ -o $@) + +do_compile_plugin_obj = \ + ($(print_plugin_obj_compile) \ + $(CC) -c $(CFLAGS) -fPIC -o $@ $<) + +do_plugin_build = \ + ($(print_plugin_build) \ + $(CC) $(CFLAGS) -shared -nostartfiles -o $@ $<) + +do_build_static_lib = \ + ($(print_static_lib_build) \ + $(RM) $@; $(AR) rcs $@ $^) + + +define do_compile + $(print_compile) \ + $(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@; +endef + +$(obj)/%.o: $(src)/%.c + $(Q)$(call do_compile) + +%.o: $(src)/%.c + $(Q)$(call do_compile) + +PEVENT_LIB_OBJS = event-parse.o trace-seq.o parse-filter.o parse-utils.o + +ALL_OBJS = $(PEVENT_LIB_OBJS) + +CMD_TARGETS = $(LIB_FILE) + +TARGETS = $(CMD_TARGETS) + + +all: all_cmd + +all_cmd: $(CMD_TARGETS) + +libtraceevent.so: $(PEVENT_LIB_OBJS) + $(Q)$(do_compile_shared_library) + +libtraceevent.a: $(PEVENT_LIB_OBJS) + $(Q)$(do_build_static_lib) + +$(PEVENT_LIB_OBJS): %.o: $(src)/%.c TRACEEVENT-CFLAGS + $(Q)$(do_fpic_compile) + +define make_version.h + (echo '/* This file is automatically generated. Do not modify. */'; \ + echo \#define VERSION_CODE $(shell \ + expr $(VERSION) \* 256 + $(PATCHLEVEL)); \ + echo '#define EXTRAVERSION ' $(EXTRAVERSION); \ + echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \ + echo '#define FILE_VERSION '$(FILE_VERSION); \ + ) > $1 +endef + +define update_version.h + ($(call make_version.h, $@.tmp); \ + if [ -r $@ ] && cmp -s $@ $@.tmp; then \ + rm -f $@.tmp; \ + else \ + echo ' UPDATE $@'; \ + mv -f $@.tmp $@; \ + fi); +endef + +ep_version.h: force + $(Q)$(N)$(call update_version.h) + +VERSION_FILES = ep_version.h + +define update_dir + (echo $1 > $@.tmp; \ + if [ -r $@ ] && cmp -s $@ $@.tmp; then \ + rm -f $@.tmp; \ + else \ + echo ' UPDATE $@'; \ + mv -f $@.tmp $@; \ + fi); +endef + +## make deps + +all_objs := $(sort $(ALL_OBJS)) +all_deps := $(all_objs:%.o=.%.d) + +# let .d file also depends on the source and header files +define check_deps + @set -e; $(RM) $@; \ + $(CC) -M $(CFLAGS) $< > $@.$$$$; \ + sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ + $(RM) $@.$$$$ +endef + +$(gui_deps): ks_version.h +$(non_gui_deps): tc_version.h + +$(all_deps): .%.d: $(src)/%.c + $(Q)$(call check_deps) + +$(all_objs) : %.o : .%.d + +dep_includes := $(wildcard $(all_deps)) + +ifneq ($(dep_includes),) + include $(dep_includes) +endif + +### Detect environment changes +TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) + +TRACEEVENT-CFLAGS: force + @FLAGS='$(TRACK_CFLAGS)'; \ + if test x"$$FLAGS" != x"`cat TRACEEVENT-CFLAGS 2>/dev/null`" ; then \ + echo 1>&2 " * new build flags or cross compiler"; \ + echo "$$FLAGS" >TRACEEVENT-CFLAGS; \ + fi + +tags: force + $(RM) tags + find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ + --regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/' + +TAGS: force + $(RM) TAGS + find . -name '*.[ch]' | xargs etags \ + --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' + +define do_install + $(print_install) \ + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 '$(DESTDIR_SQ)$2' +endef + +install_lib: all_cmd install_plugins install_python + $(Q)$(call do_install,$(LIB_FILE),$(bindir_SQ)) + +install: install_lib + +clean: + $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d + $(RM) TRACEEVENT-CFLAGS tags TAGS + +endif # skip-makefile + +PHONY += force +force: + +# Declare the contents of the .PHONY variable as phony. We keep that +# information in a variable so we can use it in if_changed and friends. +.PHONY: $(PHONY) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c new file mode 100644 index 000000000000..5f34aa371b56 --- /dev/null +++ b/tools/lib/traceevent/event-parse.c @@ -0,0 +1,5270 @@ +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * The parts for function graph printing was taken and modified from the + * Linux Kernel that were written by + * - Copyright (C) 2009 Frederic Weisbecker, + * Frederic Weisbecker gave his permission to relicense the code to + * the Lesser General Public License. + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <ctype.h> +#include <errno.h> + +#include "event-parse.h" +#include "event-utils.h" + +static const char *input_buf; +static unsigned long long input_buf_ptr; +static unsigned long long input_buf_siz; + +static int is_flag_field; +static int is_symbolic_field; + +static int show_warning = 1; + +#define do_warning(fmt, ...) \ + do { \ + if (show_warning) \ + warning(fmt, ##__VA_ARGS__); \ + } while (0) + +static void init_input_buf(const char *buf, unsigned long long size) +{ + input_buf = buf; + input_buf_siz = size; + input_buf_ptr = 0; +} + +const char *pevent_get_input_buf(void) +{ + return input_buf; +} + +unsigned long long pevent_get_input_buf_ptr(void) +{ + return input_buf_ptr; +} + +struct event_handler { + struct event_handler *next; + int id; + const char *sys_name; + const char *event_name; + pevent_event_handler_func func; + void *context; +}; + +struct pevent_func_params { + struct pevent_func_params *next; + enum pevent_func_arg_type type; +}; + +struct pevent_function_handler { + struct pevent_function_handler *next; + enum pevent_func_arg_type ret_type; + char *name; + pevent_func_handler func; + struct pevent_func_params *params; + int nr_args; +}; + +static unsigned long long +process_defined_func(struct trace_seq *s, void *data, int size, + struct event_format *event, struct print_arg *arg); + +static void free_func_handle(struct pevent_function_handler *func); + +/** + * pevent_buffer_init - init buffer for parsing + * @buf: buffer to parse + * @size: the size of the buffer + * + * For use with pevent_read_token(), this initializes the internal + * buffer that pevent_read_token() will parse. + */ +void pevent_buffer_init(const char *buf, unsigned long long size) +{ + init_input_buf(buf, size); +} + +void breakpoint(void) +{ + static int x; + x++; +} + +struct print_arg *alloc_arg(void) +{ + struct print_arg *arg; + + arg = malloc_or_die(sizeof(*arg)); + if (!arg) + return NULL; + memset(arg, 0, sizeof(*arg)); + + return arg; +} + +struct cmdline { + char *comm; + int pid; +}; + +static int cmdline_cmp(const void *a, const void *b) +{ + const struct cmdline *ca = a; + const struct cmdline *cb = b; + + if (ca->pid < cb->pid) + return -1; + if (ca->pid > cb->pid) + return 1; + + return 0; +} + +struct cmdline_list { + struct cmdline_list *next; + char *comm; + int pid; +}; + +static int cmdline_init(struct pevent *pevent) +{ + struct cmdline_list *cmdlist = pevent->cmdlist; + struct cmdline_list *item; + struct cmdline *cmdlines; + int i; + + cmdlines = malloc_or_die(sizeof(*cmdlines) * pevent->cmdline_count); + + i = 0; + while (cmdlist) { + cmdlines[i].pid = cmdlist->pid; + cmdlines[i].comm = cmdlist->comm; + i++; + item = cmdlist; + cmdlist = cmdlist->next; + free(item); + } + + qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp); + + pevent->cmdlines = cmdlines; + pevent->cmdlist = NULL; + + return 0; +} + +static char *find_cmdline(struct pevent *pevent, int pid) +{ + const struct cmdline *comm; + struct cmdline key; + + if (!pid) + return "<idle>"; + + if (!pevent->cmdlines) + cmdline_init(pevent); + + key.pid = pid; + + comm = bsearch(&key, pevent->cmdlines, pevent->cmdline_count, + sizeof(*pevent->cmdlines), cmdline_cmp); + + if (comm) + return comm->comm; + return "<...>"; +} + +/** + * pevent_pid_is_registered - return if a pid has a cmdline registered + * @pevent: handle for the pevent + * @pid: The pid to check if it has a cmdline registered with. + * + * Returns 1 if the pid has a cmdline mapped to it + * 0 otherwise. + */ +int pevent_pid_is_registered(struct pevent *pevent, int pid) +{ + const struct cmdline *comm; + struct cmdline key; + + if (!pid) + return 1; + + if (!pevent->cmdlines) + cmdline_init(pevent); + + key.pid = pid; + + comm = bsearch(&key, pevent->cmdlines, pevent->cmdline_count, + sizeof(*pevent->cmdlines), cmdline_cmp); + + if (comm) + return 1; + return 0; +} + +/* + * If the command lines have been converted to an array, then + * we must add this pid. This is much slower than when cmdlines + * are added before the array is initialized. + */ +static int add_new_comm(struct pevent *pevent, const char *comm, int pid) +{ + struct cmdline *cmdlines = pevent->cmdlines; + const struct cmdline *cmdline; + struct cmdline key; + + if (!pid) + return 0; + + /* avoid duplicates */ + key.pid = pid; + + cmdline = bsearch(&key, pevent->cmdlines, pevent->cmdline_count, + sizeof(*pevent->cmdlines), cmdline_cmp); + if (cmdline) { + errno = EEXIST; + return -1; + } + + cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (pevent->cmdline_count + 1)); + if (!cmdlines) { + errno = ENOMEM; + return -1; + } + + cmdlines[pevent->cmdline_count].pid = pid; + cmdlines[pevent->cmdline_count].comm = strdup(comm); + if (!cmdlines[pevent->cmdline_count].comm) + die("malloc comm"); + + if (cmdlines[pevent->cmdline_count].comm) + pevent->cmdline_count++; + + qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp); + pevent->cmdlines = cmdlines; + + return 0; +} + +/** + * pevent_register_comm - register a pid / comm mapping + * @pevent: handle for the pevent + * @comm: the command line to register + * @pid: the pid to map the command line to + * + * This adds a mapping to search for command line names with + * a given pid. The comm is duplicated. + */ +int pevent_register_comm(struct pevent *pevent, const char *comm, int pid) +{ + struct cmdline_list *item; + + if (pevent->cmdlines) + return add_new_comm(pevent, comm, pid); + + item = malloc_or_die(sizeof(*item)); + item->comm = strdup(comm); + if (!item->comm) + die("malloc comm"); + item->pid = pid; + item->next = pevent->cmdlist; + + pevent->cmdlist = item; + pevent->cmdline_count++; + + return 0; +} + +struct func_map { + unsigned long long addr; + char *func; + char *mod; +}; + +struct func_list { + struct func_list *next; + unsigned long long addr; + char *func; + char *mod; +}; + +static int func_cmp(const void *a, const void *b) +{ + const struct func_map *fa = a; + const struct func_map *fb = b; + + if (fa->addr < fb->addr) + return -1; + if (fa->addr > fb->addr) + return 1; + + return 0; +} + +/* + * We are searching for a record in between, not an exact + * match. + */ +static int func_bcmp(const void *a, const void *b) +{ + const struct func_map *fa = a; + const struct func_map *fb = b; + + if ((fa->addr == fb->addr) || + + (fa->addr > fb->addr && + fa->addr < (fb+1)->addr)) + return 0; + + if (fa->addr < fb->addr) + return -1; + + return 1; +} + +static int func_map_init(struct pevent *pevent) +{ + struct func_list *funclist; + struct func_list *item; + struct func_map *func_map; + int i; + + func_map = malloc_or_die(sizeof(*func_map) * (pevent->func_count + 1)); + funclist = pevent->funclist; + + i = 0; + while (funclist) { + func_map[i].func = funclist->func; + func_map[i].addr = funclist->addr; + func_map[i].mod = funclist->mod; + i++; + item = funclist; + funclist = funclist->next; + free(item); + } + + qsort(func_map, pevent->func_count, sizeof(*func_map), func_cmp); + + /* + * Add a special record at the end. + */ + func_map[pevent->func_count].func = NULL; + func_map[pevent->func_count].addr = 0; + func_map[pevent->func_count].mod = NULL; + + pevent->func_map = func_map; + pevent->funclist = NULL; + + return 0; +} + +static struct func_map * +find_func(struct pevent *pevent, unsigned long long addr) +{ + struct func_map *func; + struct func_map key; + + if (!pevent->func_map) + func_map_init(pevent); + + key.addr = addr; + + func = bsearch(&key, pevent->func_map, pevent->func_count, + sizeof(*pevent->func_map), func_bcmp); + + return func; +} + +/** + * pevent_find_function - find a function by a given address + * @pevent: handle for the pevent + * @addr: the address to find the function with + * + * Returns a pointer to the function stored that has the given + * address. Note, the address does not have to be exact, it + * will select the function that would contain the address. + */ +const char *pevent_find_function(struct pevent *pevent, unsigned long long addr) +{ + struct func_map *map; + + map = find_func(pevent, addr); + if (!map) + return NULL; + + return map->func; +} + +/** + * pevent_find_function_address - find a function address by a given address + * @pevent: handle for the pevent + * @addr: the address to find the function with + * + * Returns the address the function starts at. This can be used in + * conjunction with pevent_find_function to print both the function + * name and the function offset. + */ +unsigned long long +pevent_find_function_address(struct pevent *pevent, unsigned long long addr) +{ + struct func_map *map; + + map = find_func(pevent, addr); + if (!map) + return 0; + + return map->addr; +} + +/** + * pevent_register_function - register a function with a given address + * @pevent: handle for the pevent + * @function: the function name to register + * @addr: the address the function starts at + * @mod: the kernel module the function may be in (NULL for none) + * + * This registers a function name with an address and module. + * The @func passed in is duplicated. + */ +int pevent_register_function(struct pevent *pevent, char *func, + unsigned long long addr, char *mod) +{ + struct func_list *item; + + item = malloc_or_die(sizeof(*item)); + + item->next = pevent->funclist; + item->func = strdup(func); + if (mod) + item->mod = strdup(mod); + else + item->mod = NULL; + item->addr = addr; + + if (!item->func || (mod && !item->mod)) + die("malloc func"); + + pevent->funclist = item; + pevent->func_count++; + + return 0; +} + +/** + * pevent_print_funcs - print out the stored functions + * @pevent: handle for the pevent + * + * This prints out the stored functions. + */ +void pevent_print_funcs(struct pevent *pevent) +{ + int i; + + if (!pevent->func_map) + func_map_init(pevent); + + for (i = 0; i < (int)pevent->func_count; i++) { + printf("%016llx %s", + pevent->func_map[i].addr, + pevent->func_map[i].func); + if (pevent->func_map[i].mod) + printf(" [%s]\n", pevent->func_map[i].mod); + else + printf("\n"); + } +} + +struct printk_map { + unsigned long long addr; + char *printk; +}; + +struct printk_list { + struct printk_list *next; + unsigned long long addr; + char *printk; +}; + +static int printk_cmp(const void *a, const void *b) +{ + const struct printk_map *pa = a; + const struct printk_map *pb = b; + + if (pa->addr < pb->addr) + return -1; + if (pa->addr > pb->addr) + return 1; + + return 0; +} + +static void printk_map_init(struct pevent *pevent) +{ + struct printk_list *printklist; + struct printk_list *item; + struct printk_map *printk_map; + int i; + + printk_map = malloc_or_die(sizeof(*printk_map) * (pevent->printk_count + 1)); + + printklist = pevent->printklist; + + i = 0; + while (printklist) { + printk_map[i].printk = printklist->printk; + printk_map[i].addr = printklist->addr; + i++; + item = printklist; + printklist = printklist->next; + free(item); + } + + qsort(printk_map, pevent->printk_count, sizeof(*printk_map), printk_cmp); + + pevent->printk_map = printk_map; + pevent->printklist = NULL; +} + +static struct printk_map * +find_printk(struct pevent *pevent, unsigned long long addr) +{ + struct printk_map *printk; + struct printk_map key; + + if (!pevent->printk_map) + printk_map_init(pevent); + + key.addr = addr; + + printk = bsearch(&key, pevent->printk_map, pevent->printk_count, + sizeof(*pevent->printk_map), printk_cmp); + + return printk; +} + +/** + * pevent_register_print_string - register a string by its address + * @pevent: handle for the pevent + * @fmt: the string format to register + * @addr: the address the string was located at + * + * This registers a string by the address it was stored in the kernel. + * The @fmt passed in is duplicated. + */ +int pevent_register_print_string(struct pevent *pevent, char *fmt, + unsigned long long addr) +{ + struct printk_list *item; + + item = malloc_or_die(sizeof(*item)); + + item->next = pevent->printklist; + item->printk = strdup(fmt); + item->addr = addr; + + if (!item->printk) + die("malloc fmt"); + + pevent->printklist = item; + pevent->printk_count++; + + return 0; +} + +/** + * pevent_print_printk - print out the stored strings + * @pevent: handle for the pevent + * + * This prints the string formats that were stored. + */ +void pevent_print_printk(struct pevent *pevent) +{ + int i; + + if (!pevent->printk_map) + printk_map_init(pevent); + + for (i = 0; i < (int)pevent->printk_count; i++) { + printf("%016llx %s\n", + pevent->printk_map[i].addr, + pevent->printk_map[i].printk); + } +} + +static struct event_format *alloc_event(void) +{ + struct event_format *event; + + event = malloc(sizeof(*event)); + if (!event) + return NULL; + memset(event, 0, sizeof(*event)); + + return event; +} + +static void add_event(struct pevent *pevent, struct event_format *event) +{ + int i; + + pevent->events = realloc(pevent->events, sizeof(event) * + (pevent->nr_events + 1)); + if (!pevent->events) + die("Can not allocate events"); + + for (i = 0; i < pevent->nr_events; i++) { + if (pevent->events[i]->id > event->id) + break; + } + if (i < pevent->nr_events) + memmove(&pevent->events[i + 1], + &pevent->events[i], + sizeof(event) * (pevent->nr_events - i)); + + pevent->events[i] = event; + pevent->nr_events++; + + event->pevent = pevent; +} + +static int event_item_type(enum event_type type) +{ + switch (type) { + case EVENT_ITEM ... EVENT_SQUOTE: + return 1; + case EVENT_ERROR ... EVENT_DELIM: + default: + return 0; + } +} + +static void free_flag_sym(struct print_flag_sym *fsym) +{ + struct print_flag_sym *next; + + while (fsym) { + next = fsym->next; + free(fsym->value); + free(fsym->str); + free(fsym); + fsym = next; + } +} + +static void free_arg(struct print_arg *arg) +{ + struct print_arg *farg; + + if (!arg) + return; + + switch (arg->type) { + case PRINT_ATOM: + free(arg->atom.atom); + break; + case PRINT_FIELD: + free(arg->field.name); + break; + case PRINT_FLAGS: + free_arg(arg->flags.field); + free(arg->flags.delim); + free_flag_sym(arg->flags.flags); + break; + case PRINT_SYMBOL: + free_arg(arg->symbol.field); + free_flag_sym(arg->symbol.symbols); + break; + case PRINT_HEX: + free_arg(arg->hex.field); + free_arg(arg->hex.size); + break; + case PRINT_TYPE: + free(arg->typecast.type); + free_arg(arg->typecast.item); + break; + case PRINT_STRING: + case PRINT_BSTRING: + free(arg->string.string); + break; + case PRINT_DYNAMIC_ARRAY: + free(arg->dynarray.index); + break; + case PRINT_OP: + free(arg->op.op); + free_arg(arg->op.left); + free_arg(arg->op.right); + break; + case PRINT_FUNC: + while (arg->func.args) { + farg = arg->func.args; + arg->func.args = farg->next; + free_arg(farg); + } + break; + + case PRINT_NULL: + default: + break; + } + + free(arg); +} + +static enum event_type get_type(int ch) +{ + if (ch == '\n') + return EVENT_NEWLINE; + if (isspace(ch)) + return EVENT_SPACE; + if (isalnum(ch) || ch == '_') + return EVENT_ITEM; + if (ch == '\'') + return EVENT_SQUOTE; + if (ch == '"') + return EVENT_DQUOTE; + if (!isprint(ch)) + return EVENT_NONE; + if (ch == '(' || ch == ')' || ch == ',') + return EVENT_DELIM; + + return EVENT_OP; +} + +static int __read_char(void) +{ + if (input_buf_ptr >= input_buf_siz) + return -1; + + return input_buf[input_buf_ptr++]; +} + +static int __peek_char(void) +{ + if (input_buf_ptr >= input_buf_siz) + return -1; + + return input_buf[input_buf_ptr]; +} + +/** + * pevent_peek_char - peek at the next character that will be read + * + * Returns the next character read, or -1 if end of buffer. + */ +int pevent_peek_char(void) +{ + return __peek_char(); +} + +static int extend_token(char **tok, char *buf, int size) +{ + char *newtok = realloc(*tok, size); + + if (!newtok) { + free(*tok); + *tok = NULL; + return -1; + } + + if (!*tok) + strcpy(newtok, buf); + else + strcat(newtok, buf); + *tok = newtok; + + return 0; +} + +static enum event_type force_token(const char *str, char **tok); + +static enum event_type __read_token(char **tok) +{ + char buf[BUFSIZ]; + int ch, last_ch, quote_ch, next_ch; + int i = 0; + int tok_size = 0; + enum event_type type; + + *tok = NULL; + + + ch = __read_char(); + if (ch < 0) + return EVENT_NONE; + + type = get_type(ch); + if (type == EVENT_NONE) + return type; + + buf[i++] = ch; + + switch (type) { + case EVENT_NEWLINE: + case EVENT_DELIM: + *tok = malloc_or_die(2); + (*tok)[0] = ch; + (*tok)[1] = 0; + return type; + + case EVENT_OP: + switch (ch) { + case '-': + next_ch = __peek_char(); + if (next_ch == '>') { + buf[i++] = __read_char(); + break; + } + /* fall through */ + case '+': + case '|': + case '&': + case '>': + case '<': + last_ch = ch; + ch = __peek_char(); + if (ch != last_ch) + goto test_equal; + buf[i++] = __read_char(); + switch (last_ch) { + case '>': + case '<': + goto test_equal; + default: + break; + } + break; + case '!': + case '=': + goto test_equal; + default: /* what should we do instead? */ + break; + } + buf[i] = 0; + *tok = strdup(buf); + return type; + + test_equal: + ch = __peek_char(); + if (ch == '=') + buf[i++] = __read_char(); + goto out; + + case EVENT_DQUOTE: + case EVENT_SQUOTE: + /* don't keep quotes */ + i--; + quote_ch = ch; + last_ch = 0; + concat: + do { + if (i == (BUFSIZ - 1)) { + buf[i] = 0; + tok_size += BUFSIZ; + + if (extend_token(tok, buf, tok_size) < 0) + return EVENT_NONE; + i = 0; + } + last_ch = ch; + ch = __read_char(); + buf[i++] = ch; + /* the '\' '\' will cancel itself */ + if (ch == '\\' && last_ch == '\\') + last_ch = 0; + } while (ch != quote_ch || last_ch == '\\'); + /* remove the last quote */ + i--; + + /* + * For strings (double quotes) check the next token. + * If it is another string, concatinate the two. + */ + if (type == EVENT_DQUOTE) { + unsigned long long save_input_buf_ptr = input_buf_ptr; + + do { + ch = __read_char(); + } while (isspace(ch)); + if (ch == '"') + goto concat; + input_buf_ptr = save_input_buf_ptr; + } + + goto out; + + case EVENT_ERROR ... EVENT_SPACE: + case EVENT_ITEM: + default: + break; + } + + while (get_type(__peek_char()) == type) { + if (i == (BUFSIZ - 1)) { + buf[i] = 0; + tok_size += BUFSIZ; + + if (extend_token(tok, buf, tok_size) < 0) + return EVENT_NONE; + i = 0; + } + ch = __read_char(); + buf[i++] = ch; + } + + out: + buf[i] = 0; + if (extend_token(tok, buf, tok_size + i + 1) < 0) + return EVENT_NONE; + + if (type == EVENT_ITEM) { + /* + * Older versions of the kernel has a bug that + * creates invalid symbols and will break the mac80211 + * parsing. This is a work around to that bug. + * + * See Linux kernel commit: + * 811cb50baf63461ce0bdb234927046131fc7fa8b + */ + if (strcmp(*tok, "LOCAL_PR_FMT") == 0) { + free(*tok); + *tok = NULL; + return force_token("\"\%s\" ", tok); + } else if (strcmp(*tok, "STA_PR_FMT") == 0) { + free(*tok); + *tok = NULL; + return force_token("\" sta:%pM\" ", tok); + } else if (strcmp(*tok, "VIF_PR_FMT") == 0) { + free(*tok); + *tok = NULL; + return force_token("\" vif:%p(%d)\" ", tok); + } + } + + return type; +} + +static enum event_type force_token(const char *str, char **tok) +{ + const char *save_input_buf; + unsigned long long save_input_buf_ptr; + unsigned long long save_input_buf_siz; + enum event_type type; + + /* save off the current input pointers */ + save_input_buf = input_buf; + save_input_buf_ptr = input_buf_ptr; + save_input_buf_siz = input_buf_siz; + + init_input_buf(str, strlen(str)); + + type = __read_token(tok); + + /* reset back to original token */ + input_buf = save_input_buf; + input_buf_ptr = save_input_buf_ptr; + input_buf_siz = save_input_buf_siz; + + return type; +} + +static void free_token(char *tok) +{ + if (tok) + free(tok); +} + +static enum event_type read_token(char **tok) +{ + enum event_type type; + + for (;;) { + type = __read_token(tok); + if (type != EVENT_SPACE) + return type; + + free_token(*tok); + } + + /* not reached */ + *tok = NULL; + return EVENT_NONE; +} + +/** + * pevent_read_token - access to utilites to use the pevent parser + * @tok: The token to return + * + * This will parse tokens from the string given by + * pevent_init_data(). + * + * Returns the token type. + */ +enum event_type pevent_read_token(char **tok) +{ + return read_token(tok); +} + +/** + * pevent_free_token - free a token returned by pevent_read_token + * @token: the token to free + */ +void pevent_free_token(char *token) +{ + free_token(token); +} + +/* no newline */ +static enum event_type read_token_item(char **tok) +{ + enum event_type type; + + for (;;) { + type = __read_token(tok); + if (type != EVENT_SPACE && type != EVENT_NEWLINE) + return type; + free_token(*tok); + *tok = NULL; + } + + /* not reached */ + *tok = NULL; + return EVENT_NONE; +} + +static int test_type(enum event_type type, enum event_type expect) +{ + if (type != expect) { + do_warning("Error: expected type %d but read %d", + expect, type); + return -1; + } + return 0; +} + +static int test_type_token(enum event_type type, const char *token, + enum event_type expect, const char *expect_tok) +{ + if (type != expect) { + do_warning("Error: expected type %d but read %d", + expect, type); + return -1; + } + + if (strcmp(token, expect_tok) != 0) { + do_warning("Error: expected '%s' but read '%s'", + expect_tok, token); + return -1; + } + return 0; +} + +static int __read_expect_type(enum event_type expect, char **tok, int newline_ok) +{ + enum event_type type; + + if (newline_ok) + type = read_token(tok); + else + type = read_token_item(tok); + return test_type(type, expect); +} + +static int read_expect_type(enum event_type expect, char **tok) +{ + return __read_expect_type(expect, tok, 1); +} + +static int __read_expected(enum event_type expect, const char *str, + int newline_ok) +{ + enum event_type type; + char *token; + int ret; + + if (newline_ok) + type = read_token(&token); + else + type = read_token_item(&token); + + ret = test_type_token(type, token, expect, str); + + free_token(token); + + return ret; +} + +static int read_expected(enum event_type expect, const char *str) +{ + return __read_expected(expect, str, 1); +} + +static int read_expected_item(enum event_type expect, const char *str) +{ + return __read_expected(expect, str, 0); +} + +static char *event_read_name(void) +{ + char *token; + + if (read_expected(EVENT_ITEM, "name") < 0) + return NULL; + + if (read_expected(EVENT_OP, ":") < 0) + return NULL; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + + return token; + + fail: + free_token(token); + return NULL; +} + +static int event_read_id(void) +{ + char *token; + int id; + + if (read_expected_item(EVENT_ITEM, "ID") < 0) + return -1; + + if (read_expected(EVENT_OP, ":") < 0) + return -1; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + + id = strtoul(token, NULL, 0); + free_token(token); + return id; + + fail: + free_token(token); + return -1; +} + +static int field_is_string(struct format_field *field) +{ + if ((field->flags & FIELD_IS_ARRAY) && + (strstr(field->type, "char") || strstr(field->type, "u8") || + strstr(field->type, "s8"))) + return 1; + + return 0; +} + +static int field_is_dynamic(struct format_field *field) +{ + if (strncmp(field->type, "__data_loc", 10) == 0) + return 1; + + return 0; +} + +static int field_is_long(struct format_field *field) +{ + /* includes long long */ + if (strstr(field->type, "long")) + return 1; + + return 0; +} + +static int event_read_fields(struct event_format *event, struct format_field **fields) +{ + struct format_field *field = NULL; + enum event_type type; + char *token; + char *last_token; + int count = 0; + + do { + type = read_token(&token); + if (type == EVENT_NEWLINE) { + free_token(token); + return count; + } + + count++; + + if (test_type_token(type, token, EVENT_ITEM, "field")) + goto fail; + free_token(token); + + type = read_token(&token); + /* + * The ftrace fields may still use the "special" name. + * Just ignore it. + */ + if (event->flags & EVENT_FL_ISFTRACE && + type == EVENT_ITEM && strcmp(token, "special") == 0) { + free_token(token); + type = read_token(&token); + } + + if (test_type_token(type, token, EVENT_OP, ":") < 0) + goto fail; + + free_token(token); + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + + last_token = token; + + field = malloc_or_die(sizeof(*field)); + memset(field, 0, sizeof(*field)); + field->event = event; + + /* read the rest of the type */ + for (;;) { + type = read_token(&token); + if (type == EVENT_ITEM || + (type == EVENT_OP && strcmp(token, "*") == 0) || + /* + * Some of the ftrace fields are broken and have + * an illegal "." in them. + */ + (event->flags & EVENT_FL_ISFTRACE && + type == EVENT_OP && strcmp(token, ".") == 0)) { + + if (strcmp(token, "*") == 0) + field->flags |= FIELD_IS_POINTER; + + if (field->type) { + char *new_type; + new_type = realloc(field->type, + strlen(field->type) + + strlen(last_token) + 2); + if (!new_type) { + free(last_token); + goto fail; + } + field->type = new_type; + strcat(field->type, " "); + strcat(field->type, last_token); + free(last_token); + } else + field->type = last_token; + last_token = token; + continue; + } + + break; + } + + if (!field->type) { + die("no type found"); + goto fail; + } + field->name = last_token; + + if (test_type(type, EVENT_OP)) + goto fail; + + if (strcmp(token, "[") == 0) { + enum event_type last_type = type; + char *brackets = token; + char *new_brackets; + int len; + + field->flags |= FIELD_IS_ARRAY; + + type = read_token(&token); + + if (type == EVENT_ITEM) + field->arraylen = strtoul(token, NULL, 0); + else + field->arraylen = 0; + + while (strcmp(token, "]") != 0) { + if (last_type == EVENT_ITEM && + type == EVENT_ITEM) + len = 2; + else + len = 1; + last_type = type; + + new_brackets = realloc(brackets, + strlen(brackets) + + strlen(token) + len); + if (!new_brackets) { + free(brackets); + goto fail; + } + brackets = new_brackets; + if (len == 2) + strcat(brackets, " "); + strcat(brackets, token); + /* We only care about the last token */ + field->arraylen = strtoul(token, NULL, 0); + free_token(token); + type = read_token(&token); + if (type == EVENT_NONE) { + die("failed to find token"); + goto fail; + } + } + + free_token(token); + + new_brackets = realloc(brackets, strlen(brackets) + 2); + if (!new_brackets) { + free(brackets); + goto fail; + } + brackets = new_brackets; + strcat(brackets, "]"); + + /* add brackets to type */ + + type = read_token(&token); + /* + * If the next token is not an OP, then it is of + * the format: type [] item; + */ + if (type == EVENT_ITEM) { + char *new_type; + new_type = realloc(field->type, + strlen(field->type) + + strlen(field->name) + + strlen(brackets) + 2); + if (!new_type) { + free(brackets); + goto fail; + } + field->type = new_type; + strcat(field->type, " "); + strcat(field->type, field->name); + free_token(field->name); + strcat(field->type, brackets); + field->name = token; + type = read_token(&token); + } else { + char *new_type; + new_type = realloc(field->type, + strlen(field->type) + + strlen(brackets) + 1); + if (!new_type) { + free(brackets); + goto fail; + } + field->type = new_type; + strcat(field->type, brackets); + } + free(brackets); + } + + if (field_is_string(field)) + field->flags |= FIELD_IS_STRING; + if (field_is_dynamic(field)) + field->flags |= FIELD_IS_DYNAMIC; + if (field_is_long(field)) + field->flags |= FIELD_IS_LONG; + + if (test_type_token(type, token, EVENT_OP, ";")) + goto fail; + free_token(token); + + if (read_expected(EVENT_ITEM, "offset") < 0) + goto fail_expect; + + if (read_expected(EVENT_OP, ":") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + field->offset = strtoul(token, NULL, 0); + free_token(token); + + if (read_expected(EVENT_OP, ";") < 0) + goto fail_expect; + + if (read_expected(EVENT_ITEM, "size") < 0) + goto fail_expect; + + if (read_expected(EVENT_OP, ":") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + field->size = strtoul(token, NULL, 0); + free_token(token); + + if (read_expected(EVENT_OP, ";") < 0) + goto fail_expect; + + type = read_token(&token); + if (type != EVENT_NEWLINE) { + /* newer versions of the kernel have a "signed" type */ + if (test_type_token(type, token, EVENT_ITEM, "signed")) + goto fail; + + free_token(token); + + if (read_expected(EVENT_OP, ":") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + + /* add signed type */ + + free_token(token); + if (read_expected(EVENT_OP, ";") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_NEWLINE, &token)) + goto fail; + } + + free_token(token); + + if (field->flags & FIELD_IS_ARRAY) { + if (field->arraylen) + field->elementsize = field->size / field->arraylen; + else if (field->flags & FIELD_IS_STRING) + field->elementsize = 1; + else + field->elementsize = event->pevent->long_size; + } else + field->elementsize = field->size; + + *fields = field; + fields = &field->next; + + } while (1); + + return 0; + +fail: + free_token(token); +fail_expect: + if (field) { + free(field->type); + free(field->name); + free(field); + } + return -1; +} + +static int event_read_format(struct event_format *event) +{ + char *token; + int ret; + + if (read_expected_item(EVENT_ITEM, "format") < 0) + return -1; + + if (read_expected(EVENT_OP, ":") < 0) + return -1; + + if (read_expect_type(EVENT_NEWLINE, &token)) + goto fail; + free_token(token); + + ret = event_read_fields(event, &event->format.common_fields); + if (ret < 0) + return ret; + event->format.nr_common = ret; + + ret = event_read_fields(event, &event->format.fields); + if (ret < 0) + return ret; + event->format.nr_fields = ret; + + return 0; + + fail: + free_token(token); + return -1; +} + +static enum event_type +process_arg_token(struct event_format *event, struct print_arg *arg, + char **tok, enum event_type type); + +static enum event_type +process_arg(struct event_format *event, struct print_arg *arg, char **tok) +{ + enum event_type type; + char *token; + + type = read_token(&token); + *tok = token; + + return process_arg_token(event, arg, tok, type); +} + +static enum event_type +process_op(struct event_format *event, struct print_arg *arg, char **tok); + +static enum event_type +process_cond(struct event_format *event, struct print_arg *top, char **tok) +{ + struct print_arg *arg, *left, *right; + enum event_type type; + char *token = NULL; + + arg = alloc_arg(); + left = alloc_arg(); + right = alloc_arg(); + + arg->type = PRINT_OP; + arg->op.left = left; + arg->op.right = right; + + *tok = NULL; + type = process_arg(event, left, &token); + + again: + /* Handle other operations in the arguments */ + if (type == EVENT_OP && strcmp(token, ":") != 0) { + type = process_op(event, left, &token); + goto again; + } + + if (test_type_token(type, token, EVENT_OP, ":")) + goto out_free; + + arg->op.op = token; + + type = process_arg(event, right, &token); + + top->op.right = arg; + + *tok = token; + return type; + +out_free: + /* Top may point to itself */ + top->op.right = NULL; + free_token(token); + free_arg(arg); + return EVENT_ERROR; +} + +static enum event_type +process_array(struct event_format *event, struct print_arg *top, char **tok) +{ + struct print_arg *arg; + enum event_type type; + char *token = NULL; + + arg = alloc_arg(); + + *tok = NULL; + type = process_arg(event, arg, &token); + if (test_type_token(type, token, EVENT_OP, "]")) + goto out_free; + + top->op.right = arg; + + free_token(token); + type = read_token_item(&token); + *tok = token; + + return type; + +out_free: + free_token(*tok); + *tok = NULL; + free_arg(arg); + return EVENT_ERROR; +} + +static int get_op_prio(char *op) +{ + if (!op[1]) { + switch (op[0]) { + case '~': + case '!': + return 4; + case '*': + case '/': + case '%': + return 6; + case '+': + case '-': + return 7; + /* '>>' and '<<' are 8 */ + case '<': + case '>': + return 9; + /* '==' and '!=' are 10 */ + case '&': + return 11; + case '^': + return 12; + case '|': + return 13; + case '?': + return 16; + default: + do_warning("unknown op '%c'", op[0]); + return -1; + } + } else { + if (strcmp(op, "++") == 0 || + strcmp(op, "--") == 0) { + return 3; + } else if (strcmp(op, ">>") == 0 || + strcmp(op, "<<") == 0) { + return 8; + } else if (strcmp(op, ">=") == 0 || + strcmp(op, "<=") == 0) { + return 9; + } else if (strcmp(op, "==") == 0 || + strcmp(op, "!=") == 0) { + return 10; + } else if (strcmp(op, "&&") == 0) { + return 14; + } else if (strcmp(op, "||") == 0) { + return 15; + } else { + do_warning("unknown op '%s'", op); + return -1; + } + } +} + +static int set_op_prio(struct print_arg *arg) +{ + + /* single ops are the greatest */ + if (!arg->op.left || arg->op.left->type == PRINT_NULL) + arg->op.prio = 0; + else + arg->op.prio = get_op_prio(arg->op.op); + + return arg->op.prio; +} + +/* Note, *tok does not get freed, but will most likely be saved */ +static enum event_type +process_op(struct event_format *event, struct print_arg *arg, char **tok) +{ + struct print_arg *left, *right = NULL; + enum event_type type; + char *token; + + /* the op is passed in via tok */ + token = *tok; + + if (arg->type == PRINT_OP && !arg->op.left) { + /* handle single op */ + if (token[1]) { + die("bad op token %s", token); + goto out_free; + } + switch (token[0]) { + case '~': + case '!': + case '+': + case '-': + break; + default: + do_warning("bad op token %s", token); + goto out_free; + + } + + /* make an empty left */ + left = alloc_arg(); + left->type = PRINT_NULL; + arg->op.left = left; + + right = alloc_arg(); + arg->op.right = right; + + /* do not free the token, it belongs to an op */ + *tok = NULL; + type = process_arg(event, right, tok); + + } else if (strcmp(token, "?") == 0) { + + left = alloc_arg(); + /* copy the top arg to the left */ + *left = *arg; + + arg->type = PRINT_OP; + arg->op.op = token; + arg->op.left = left; + arg->op.prio = 0; + + type = process_cond(event, arg, tok); + + } else if (strcmp(token, ">>") == 0 || + strcmp(token, "<<") == 0 || + strcmp(token, "&") == 0 || + strcmp(token, "|") == 0 || + strcmp(token, "&&") == 0 || + strcmp(token, "||") == 0 || + strcmp(token, "-") == 0 || + strcmp(token, "+") == 0 || + strcmp(token, "*") == 0 || + strcmp(token, "^") == 0 || + strcmp(token, "/") == 0 || + strcmp(token, "<") == 0 || + strcmp(token, ">") == 0 || + strcmp(token, "==") == 0 || + strcmp(token, "!=") == 0) { + + left = alloc_arg(); + + /* copy the top arg to the left */ + *left = *arg; + + arg->type = PRINT_OP; + arg->op.op = token; + arg->op.left = left; + + if (set_op_prio(arg) == -1) { + event->flags |= EVENT_FL_FAILED; + /* arg->op.op (= token) will be freed at out_free */ + arg->op.op = NULL; + goto out_free; + } + + type = read_token_item(&token); + *tok = token; + + /* could just be a type pointer */ + if ((strcmp(arg->op.op, "*") == 0) && + type == EVENT_DELIM && (strcmp(token, ")") == 0)) { + char *new_atom; + + if (left->type != PRINT_ATOM) + die("bad pointer type"); + new_atom = realloc(left->atom.atom, + strlen(left->atom.atom) + 3); + if (!new_atom) + goto out_free; + + left->atom.atom = new_atom; + strcat(left->atom.atom, " *"); + free(arg->op.op); + *arg = *left; + free(left); + + return type; + } + + right = alloc_arg(); + type = process_arg_token(event, right, tok, type); + arg->op.right = right; + + } else if (strcmp(token, "[") == 0) { + + left = alloc_arg(); + *left = *arg; + + arg->type = PRINT_OP; + arg->op.op = token; + arg->op.left = left; + + arg->op.prio = 0; + + type = process_array(event, arg, tok); + + } else { + do_warning("unknown op '%s'", token); + event->flags |= EVENT_FL_FAILED; + /* the arg is now the left side */ + goto out_free; + } + + if (type == EVENT_OP && strcmp(*tok, ":") != 0) { + int prio; + + /* higher prios need to be closer to the root */ + prio = get_op_prio(*tok); + + if (prio > arg->op.prio) + return process_op(event, arg, tok); + + return process_op(event, right, tok); + } + + return type; + + out_free: + free_token(token); + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type +process_entry(struct event_format *event __unused, struct print_arg *arg, + char **tok) +{ + enum event_type type; + char *field; + char *token; + + if (read_expected(EVENT_OP, "->") < 0) + goto out_err; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto out_free; + field = token; + + arg->type = PRINT_FIELD; + arg->field.name = field; + + if (is_flag_field) { + arg->field.field = pevent_find_any_field(event, arg->field.name); + arg->field.field->flags |= FIELD_IS_FLAG; + is_flag_field = 0; + } else if (is_symbolic_field) { + arg->field.field = pevent_find_any_field(event, arg->field.name); + arg->field.field->flags |= FIELD_IS_SYMBOLIC; + is_symbolic_field = 0; + } + + type = read_token(&token); + *tok = token; + + return type; + + out_free: + free_token(token); + out_err: + *tok = NULL; + return EVENT_ERROR; +} + +static char *arg_eval (struct print_arg *arg); + +static unsigned long long +eval_type_str(unsigned long long val, const char *type, int pointer) +{ + int sign = 0; + char *ref; + int len; + + len = strlen(type); + + if (pointer) { + + if (type[len-1] != '*') { + do_warning("pointer expected with non pointer type"); + return val; + } + + ref = malloc_or_die(len); + memcpy(ref, type, len); + + /* chop off the " *" */ + ref[len - 2] = 0; + + val = eval_type_str(val, ref, 0); + free(ref); + return val; + } + + /* check if this is a pointer */ + if (type[len - 1] == '*') + return val; + + /* Try to figure out the arg size*/ + if (strncmp(type, "struct", 6) == 0) + /* all bets off */ + return val; + + if (strcmp(type, "u8") == 0) + return val & 0xff; + + if (strcmp(type, "u16") == 0) + return val & 0xffff; + + if (strcmp(type, "u32") == 0) + return val & 0xffffffff; + + if (strcmp(type, "u64") == 0 || + strcmp(type, "s64")) + return val; + + if (strcmp(type, "s8") == 0) + return (unsigned long long)(char)val & 0xff; + + if (strcmp(type, "s16") == 0) + return (unsigned long long)(short)val & 0xffff; + + if (strcmp(type, "s32") == 0) + return (unsigned long long)(int)val & 0xffffffff; + + if (strncmp(type, "unsigned ", 9) == 0) { + sign = 0; + type += 9; + } + + if (strcmp(type, "char") == 0) { + if (sign) + return (unsigned long long)(char)val & 0xff; + else + return val & 0xff; + } + + if (strcmp(type, "short") == 0) { + if (sign) + return (unsigned long long)(short)val & 0xffff; + else + return val & 0xffff; + } + + if (strcmp(type, "int") == 0) { + if (sign) + return (unsigned long long)(int)val & 0xffffffff; + else + return val & 0xffffffff; + } + + return val; +} + +/* + * Try to figure out the type. + */ +static unsigned long long +eval_type(unsigned long long val, struct print_arg *arg, int pointer) +{ + if (arg->type != PRINT_TYPE) + die("expected type argument"); + + return eval_type_str(val, arg->typecast.type, pointer); +} + +static int arg_num_eval(struct print_arg *arg, long long *val) +{ + long long left, right; + int ret = 1; + + switch (arg->type) { + case PRINT_ATOM: + *val = strtoll(arg->atom.atom, NULL, 0); + break; + case PRINT_TYPE: + ret = arg_num_eval(arg->typecast.item, val); + if (!ret) + break; + *val = eval_type(*val, arg, 0); + break; + case PRINT_OP: + switch (arg->op.op[0]) { + case '|': + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + if (arg->op.op[1]) + *val = left || right; + else + *val = left | right; + break; + case '&': + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + if (arg->op.op[1]) + *val = left && right; + else + *val = left & right; + break; + case '<': + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + switch (arg->op.op[1]) { + case 0: + *val = left < right; + break; + case '<': + *val = left << right; + break; + case '=': + *val = left <= right; + break; + default: + do_warning("unknown op '%s'", arg->op.op); + ret = 0; + } + break; + case '>': + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + switch (arg->op.op[1]) { + case 0: + *val = left > right; + break; + case '>': + *val = left >> right; + break; + case '=': + *val = left >= right; + break; + default: + do_warning("unknown op '%s'", arg->op.op); + ret = 0; + } + break; + case '=': + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + + if (arg->op.op[1] != '=') { + do_warning("unknown op '%s'", arg->op.op); + ret = 0; + } else + *val = left == right; + break; + case '!': + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + + switch (arg->op.op[1]) { + case '=': + *val = left != right; + break; + default: + do_warning("unknown op '%s'", arg->op.op); + ret = 0; + } + break; + case '-': + /* check for negative */ + if (arg->op.left->type == PRINT_NULL) + left = 0; + else + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + *val = left - right; + break; + case '+': + if (arg->op.left->type == PRINT_NULL) + left = 0; + else + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + *val = left + right; + break; + default: + do_warning("unknown op '%s'", arg->op.op); + ret = 0; + } + break; + + case PRINT_NULL: + case PRINT_FIELD ... PRINT_SYMBOL: + case PRINT_STRING: + case PRINT_BSTRING: + default: + do_warning("invalid eval type %d", arg->type); + ret = 0; + + } + return ret; +} + +static char *arg_eval (struct print_arg *arg) +{ + long long val; + static char buf[20]; + + switch (arg->type) { + case PRINT_ATOM: + return arg->atom.atom; + case PRINT_TYPE: + return arg_eval(arg->typecast.item); + case PRINT_OP: + if (!arg_num_eval(arg, &val)) + break; + sprintf(buf, "%lld", val); + return buf; + + case PRINT_NULL: + case PRINT_FIELD ... PRINT_SYMBOL: + case PRINT_STRING: + case PRINT_BSTRING: + default: + die("invalid eval type %d", arg->type); + break; + } + + return NULL; +} + +static enum event_type +process_fields(struct event_format *event, struct print_flag_sym **list, char **tok) +{ + enum event_type type; + struct print_arg *arg = NULL; + struct print_flag_sym *field; + char *token = *tok; + char *value; + + do { + free_token(token); + type = read_token_item(&token); + if (test_type_token(type, token, EVENT_OP, "{")) + break; + + arg = alloc_arg(); + + free_token(token); + type = process_arg(event, arg, &token); + + if (type == EVENT_OP) + type = process_op(event, arg, &token); + + if (type == EVENT_ERROR) + goto out_free; + + if (test_type_token(type, token, EVENT_DELIM, ",")) + goto out_free; + + field = malloc_or_die(sizeof(*field)); + memset(field, 0, sizeof(*field)); + + value = arg_eval(arg); + if (value == NULL) + goto out_free; + field->value = strdup(value); + if (field->value == NULL) + goto out_free; + + free_arg(arg); + arg = alloc_arg(); + + free_token(token); + type = process_arg(event, arg, &token); + if (test_type_token(type, token, EVENT_OP, "}")) + goto out_free; + + value = arg_eval(arg); + if (value == NULL) + goto out_free; + field->str = strdup(value); + if (field->str == NULL) + goto out_free; + free_arg(arg); + arg = NULL; + + *list = field; + list = &field->next; + + free_token(token); + type = read_token_item(&token); + } while (type == EVENT_DELIM && strcmp(token, ",") == 0); + + *tok = token; + return type; + +out_free: + free_arg(arg); + free_token(token); + *tok = NULL; + + return EVENT_ERROR; +} + +static enum event_type +process_flags(struct event_format *event, struct print_arg *arg, char **tok) +{ + struct print_arg *field; + enum event_type type; + char *token; + + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_FLAGS; + + field = alloc_arg(); + + type = process_arg(event, field, &token); + + /* Handle operations in the first argument */ + while (type == EVENT_OP) + type = process_op(event, field, &token); + + if (test_type_token(type, token, EVENT_DELIM, ",")) + goto out_free; + free_token(token); + + arg->flags.field = field; + + type = read_token_item(&token); + if (event_item_type(type)) { + arg->flags.delim = token; + type = read_token_item(&token); + } + + if (test_type_token(type, token, EVENT_DELIM, ",")) + goto out_free; + + type = process_fields(event, &arg->flags.flags, &token); + if (test_type_token(type, token, EVENT_DELIM, ")")) + goto out_free; + + free_token(token); + type = read_token_item(tok); + return type; + + out_free: + free_token(token); + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type +process_symbols(struct event_format *event, struct print_arg *arg, char **tok) +{ + struct print_arg *field; + enum event_type type; + char *token; + + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_SYMBOL; + + field = alloc_arg(); + + type = process_arg(event, field, &token); + if (test_type_token(type, token, EVENT_DELIM, ",")) + goto out_free; + + arg->symbol.field = field; + + type = process_fields(event, &arg->symbol.symbols, &token); + if (test_type_token(type, token, EVENT_DELIM, ")")) + goto out_free; + + free_token(token); + type = read_token_item(tok); + return type; + + out_free: + free_token(token); + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type +process_hex(struct event_format *event, struct print_arg *arg, char **tok) +{ + struct print_arg *field; + enum event_type type; + char *token; + + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_HEX; + + field = alloc_arg(); + type = process_arg(event, field, &token); + + if (test_type_token(type, token, EVENT_DELIM, ",")) + goto out_free; + + arg->hex.field = field; + + free_token(token); + + field = alloc_arg(); + type = process_arg(event, field, &token); + + if (test_type_token(type, token, EVENT_DELIM, ")")) + goto out_free; + + arg->hex.size = field; + + free_token(token); + type = read_token_item(tok); + return type; + + out_free: + free_arg(field); + free_token(token); + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type +process_dynamic_array(struct event_format *event, struct print_arg *arg, char **tok) +{ + struct format_field *field; + enum event_type type; + char *token; + + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_DYNAMIC_ARRAY; + + /* + * The item within the parenthesis is another field that holds + * the index into where the array starts. + */ + type = read_token(&token); + *tok = token; + if (type != EVENT_ITEM) + goto out_free; + + /* Find the field */ + + field = pevent_find_field(event, token); + if (!field) + goto out_free; + + arg->dynarray.field = field; + arg->dynarray.index = 0; + + if (read_expected(EVENT_DELIM, ")") < 0) + goto out_free; + + free_token(token); + type = read_token_item(&token); + *tok = token; + if (type != EVENT_OP || strcmp(token, "[") != 0) + return type; + + free_token(token); + arg = alloc_arg(); + type = process_arg(event, arg, &token); + if (type == EVENT_ERROR) + goto out_free_arg; + + if (!test_type_token(type, token, EVENT_OP, "]")) + goto out_free_arg; + + free_token(token); + type = read_token_item(tok); + return type; + + out_free_arg: + free_arg(arg); + out_free: + free_token(token); + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type +process_paren(struct event_format *event, struct print_arg *arg, char **tok) +{ + struct print_arg *item_arg; + enum event_type type; + char *token; + + type = process_arg(event, arg, &token); + + if (type == EVENT_ERROR) + goto out_free; + + if (type == EVENT_OP) + type = process_op(event, arg, &token); + + if (type == EVENT_ERROR) + goto out_free; + + if (test_type_token(type, token, EVENT_DELIM, ")")) + goto out_free; + + free_token(token); + type = read_token_item(&token); + + /* + * If the next token is an item or another open paren, then + * this was a typecast. + */ + if (event_item_type(type) || + (type == EVENT_DELIM && strcmp(token, "(") == 0)) { + + /* make this a typecast and contine */ + + /* prevous must be an atom */ + if (arg->type != PRINT_ATOM) + die("previous needed to be PRINT_ATOM"); + + item_arg = alloc_arg(); + + arg->type = PRINT_TYPE; + arg->typecast.type = arg->atom.atom; + arg->typecast.item = item_arg; + type = process_arg_token(event, item_arg, &token, type); + + } + + *tok = token; + return type; + + out_free: + free_token(token); + *tok = NULL; + return EVENT_ERROR; +} + + +static enum event_type +process_str(struct event_format *event __unused, struct print_arg *arg, char **tok) +{ + enum event_type type; + char *token; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto out_free; + + arg->type = PRINT_STRING; + arg->string.string = token; + arg->string.offset = -1; + + if (read_expected(EVENT_DELIM, ")") < 0) + goto out_err; + + type = read_token(&token); + *tok = token; + + return type; + + out_free: + free_token(token); + out_err: + *tok = NULL; + return EVENT_ERROR; +} + +static struct pevent_function_handler * +find_func_handler(struct pevent *pevent, char *func_name) +{ + struct pevent_function_handler *func; + + for (func = pevent->func_handlers; func; func = func->next) { + if (strcmp(func->name, func_name) == 0) + break; + } + + return func; +} + +static void remove_func_handler(struct pevent *pevent, char *func_name) +{ + struct pevent_function_handler *func; + struct pevent_function_handler **next; + + next = &pevent->func_handlers; + while ((func = *next)) { + if (strcmp(func->name, func_name) == 0) { + *next = func->next; + free_func_handle(func); + break; + } + next = &func->next; + } +} + +static enum event_type +process_func_handler(struct event_format *event, struct pevent_function_handler *func, + struct print_arg *arg, char **tok) +{ + struct print_arg **next_arg; + struct print_arg *farg; + enum event_type type; + char *token; + char *test; + int i; + + arg->type = PRINT_FUNC; + arg->func.func = func; + + *tok = NULL; + + next_arg = &(arg->func.args); + for (i = 0; i < func->nr_args; i++) { + farg = alloc_arg(); + type = process_arg(event, farg, &token); + if (i < (func->nr_args - 1)) + test = ","; + else + test = ")"; + + if (test_type_token(type, token, EVENT_DELIM, test)) { + free_arg(farg); + free_token(token); + return EVENT_ERROR; + } + + *next_arg = farg; + next_arg = &(farg->next); + free_token(token); + } + + type = read_token(&token); + *tok = token; + + return type; +} + +static enum event_type +process_function(struct event_format *event, struct print_arg *arg, + char *token, char **tok) +{ + struct pevent_function_handler *func; + + if (strcmp(token, "__print_flags") == 0) { + free_token(token); + is_flag_field = 1; + return process_flags(event, arg, tok); + } + if (strcmp(token, "__print_symbolic") == 0) { + free_token(token); + is_symbolic_field = 1; + return process_symbols(event, arg, tok); + } + if (strcmp(token, "__print_hex") == 0) { + free_token(token); + return process_hex(event, arg, tok); + } + if (strcmp(token, "__get_str") == 0) { + free_token(token); + return process_str(event, arg, tok); + } + if (strcmp(token, "__get_dynamic_array") == 0) { + free_token(token); + return process_dynamic_array(event, arg, tok); + } + + func = find_func_handler(event->pevent, token); + if (func) { + free_token(token); + return process_func_handler(event, func, arg, tok); + } + + do_warning("function %s not defined", token); + free_token(token); + return EVENT_ERROR; +} + +static enum event_type +process_arg_token(struct event_format *event, struct print_arg *arg, + char **tok, enum event_type type) +{ + char *token; + char *atom; + + token = *tok; + + switch (type) { + case EVENT_ITEM: + if (strcmp(token, "REC") == 0) { + free_token(token); + type = process_entry(event, arg, &token); + break; + } + atom = token; + /* test the next token */ + type = read_token_item(&token); + + /* + * If the next token is a parenthesis, then this + * is a function. + */ + if (type == EVENT_DELIM && strcmp(token, "(") == 0) { + free_token(token); + token = NULL; + /* this will free atom. */ + type = process_function(event, arg, atom, &token); + break; + } + /* atoms can be more than one token long */ + while (type == EVENT_ITEM) { + char *new_atom; + new_atom = realloc(atom, + strlen(atom) + strlen(token) + 2); + if (!new_atom) { + free(atom); + *tok = NULL; + free_token(token); + return EVENT_ERROR; + } + atom = new_atom; + strcat(atom, " "); + strcat(atom, token); + free_token(token); + type = read_token_item(&token); + } + + arg->type = PRINT_ATOM; + arg->atom.atom = atom; + break; + + case EVENT_DQUOTE: + case EVENT_SQUOTE: + arg->type = PRINT_ATOM; + arg->atom.atom = token; + type = read_token_item(&token); + break; + case EVENT_DELIM: + if (strcmp(token, "(") == 0) { + free_token(token); + type = process_paren(event, arg, &token); + break; + } + case EVENT_OP: + /* handle single ops */ + arg->type = PRINT_OP; + arg->op.op = token; + arg->op.left = NULL; + type = process_op(event, arg, &token); + + /* On error, the op is freed */ + if (type == EVENT_ERROR) + arg->op.op = NULL; + + /* return error type if errored */ + break; + + case EVENT_ERROR ... EVENT_NEWLINE: + default: + die("unexpected type %d", type); + } + *tok = token; + + return type; +} + +static int event_read_print_args(struct event_format *event, struct print_arg **list) +{ + enum event_type type = EVENT_ERROR; + struct print_arg *arg; + char *token; + int args = 0; + + do { + if (type == EVENT_NEWLINE) { + type = read_token_item(&token); + continue; + } + + arg = alloc_arg(); + + type = process_arg(event, arg, &token); + + if (type == EVENT_ERROR) { + free_token(token); + free_arg(arg); + return -1; + } + + *list = arg; + args++; + + if (type == EVENT_OP) { + type = process_op(event, arg, &token); + free_token(token); + if (type == EVENT_ERROR) { + *list = NULL; + free_arg(arg); + return -1; + } + list = &arg->next; + continue; + } + + if (type == EVENT_DELIM && strcmp(token, ",") == 0) { + free_token(token); + *list = arg; + list = &arg->next; + continue; + } + break; + } while (type != EVENT_NONE); + + if (type != EVENT_NONE && type != EVENT_ERROR) + free_token(token); + + return args; +} + +static int event_read_print(struct event_format *event) +{ + enum event_type type; + char *token; + int ret; + + if (read_expected_item(EVENT_ITEM, "print") < 0) + return -1; + + if (read_expected(EVENT_ITEM, "fmt") < 0) + return -1; + + if (read_expected(EVENT_OP, ":") < 0) + return -1; + + if (read_expect_type(EVENT_DQUOTE, &token) < 0) + goto fail; + + concat: + event->print_fmt.format = token; + event->print_fmt.args = NULL; + + /* ok to have no arg */ + type = read_token_item(&token); + + if (type == EVENT_NONE) + return 0; + + /* Handle concatenation of print lines */ + if (type == EVENT_DQUOTE) { + char *cat; + + cat = malloc_or_die(strlen(event->print_fmt.format) + + strlen(token) + 1); + strcpy(cat, event->print_fmt.format); + strcat(cat, token); + free_token(token); + free_token(event->print_fmt.format); + event->print_fmt.format = NULL; + token = cat; + goto concat; + } + + if (test_type_token(type, token, EVENT_DELIM, ",")) + goto fail; + + free_token(token); + + ret = event_read_print_args(event, &event->print_fmt.args); + if (ret < 0) + return -1; + + return ret; + + fail: + free_token(token); + return -1; +} + +/** + * pevent_find_common_field - return a common field by event + * @event: handle for the event + * @name: the name of the common field to return + * + * Returns a common field from the event by the given @name. + * This only searchs the common fields and not all field. + */ +struct format_field * +pevent_find_common_field(struct event_format *event, const char *name) +{ + struct format_field *format; + + for (format = event->format.common_fields; + format; format = format->next) { + if (strcmp(format->name, name) == 0) + break; + } + + return format; +} + +/** + * pevent_find_field - find a non-common field + * @event: handle for the event + * @name: the name of the non-common field + * + * Returns a non-common field by the given @name. + * This does not search common fields. + */ +struct format_field * +pevent_find_field(struct event_format *event, const char *name) +{ + struct format_field *format; + + for (format = event->format.fields; + format; format = format->next) { + if (strcmp(format->name, name) == 0) + break; + } + + return format; +} + +/** + * pevent_find_any_field - find any field by name + * @event: handle for the event + * @name: the name of the field + * + * Returns a field by the given @name. + * This searchs the common field names first, then + * the non-common ones if a common one was not found. + */ +struct format_field * +pevent_find_any_field(struct event_format *event, const char *name) +{ + struct format_field *format; + + format = pevent_find_common_field(event, name); + if (format) + return format; + return pevent_find_field(event, name); +} + +/** + * pevent_read_number - read a number from data + * @pevent: handle for the pevent + * @ptr: the raw data + * @size: the size of the data that holds the number + * + * Returns the number (converted to host) from the + * raw data. + */ +unsigned long long pevent_read_number(struct pevent *pevent, + const void *ptr, int size) +{ + switch (size) { + case 1: + return *(unsigned char *)ptr; + case 2: + return data2host2(pevent, ptr); + case 4: + return data2host4(pevent, ptr); + case 8: + return data2host8(pevent, ptr); + default: + /* BUG! */ + return 0; + } +} + +/** + * pevent_read_number_field - read a number from data + * @field: a handle to the field + * @data: the raw data to read + * @value: the value to place the number in + * + * Reads raw data according to a field offset and size, + * and translates it into @value. + * + * Returns 0 on success, -1 otherwise. + */ +int pevent_read_number_field(struct format_field *field, const void *data, + unsigned long long *value) +{ + if (!field) + return -1; + switch (field->size) { + case 1: + case 2: + case 4: + case 8: + *value = pevent_read_number(field->event->pevent, + data + field->offset, field->size); + return 0; + default: + return -1; + } +} + +static int get_common_info(struct pevent *pevent, + const char *type, int *offset, int *size) +{ + struct event_format *event; + struct format_field *field; + + /* + * All events should have the same common elements. + * Pick any event to find where the type is; + */ + if (!pevent->events) + die("no event_list!"); + + event = pevent->events[0]; + field = pevent_find_common_field(event, type); + if (!field) + return -1; + + *offset = field->offset; + *size = field->size; + + return 0; +} + +static int __parse_common(struct pevent *pevent, void *data, + int *size, int *offset, const char *name) +{ + int ret; + + if (!*size) { + ret = get_common_info(pevent, name, offset, size); + if (ret < 0) + return ret; + } + return pevent_read_number(pevent, data + *offset, *size); +} + +static int trace_parse_common_type(struct pevent *pevent, void *data) +{ + return __parse_common(pevent, data, + &pevent->type_size, &pevent->type_offset, + "common_type"); +} + +static int parse_common_pid(struct pevent *pevent, void *data) +{ + return __parse_common(pevent, data, + &pevent->pid_size, &pevent->pid_offset, + "common_pid"); +} + +static int parse_common_pc(struct pevent *pevent, void *data) +{ + return __parse_common(pevent, data, + &pevent->pc_size, &pevent->pc_offset, + "common_preempt_count"); +} + +static int parse_common_flags(struct pevent *pevent, void *data) +{ + return __parse_common(pevent, data, + &pevent->flags_size, &pevent->flags_offset, + "common_flags"); +} + +static int parse_common_lock_depth(struct pevent *pevent, void *data) +{ + return __parse_common(pevent, data, + &pevent->ld_size, &pevent->ld_offset, + "common_lock_depth"); +} + +static int parse_common_migrate_disable(struct pevent *pevent, void *data) +{ + return __parse_common(pevent, data, + &pevent->ld_size, &pevent->ld_offset, + "common_migrate_disable"); +} + +static int events_id_cmp(const void *a, const void *b); + +/** + * pevent_find_event - find an event by given id + * @pevent: a handle to the pevent + * @id: the id of the event + * + * Returns an event that has a given @id. + */ +struct event_format *pevent_find_event(struct pevent *pevent, int id) +{ + struct event_format **eventptr; + struct event_format key; + struct event_format *pkey = &key; + + /* Check cache first */ + if (pevent->last_event && pevent->last_event->id == id) + return pevent->last_event; + + key.id = id; + + eventptr = bsearch(&pkey, pevent->events, pevent->nr_events, + sizeof(*pevent->events), events_id_cmp); + + if (eventptr) { + pevent->last_event = *eventptr; + return *eventptr; + } + + return NULL; +} + +/** + * pevent_find_event_by_name - find an event by given name + * @pevent: a handle to the pevent + * @sys: the system name to search for + * @name: the name of the event to search for + * + * This returns an event with a given @name and under the system + * @sys. If @sys is NULL the first event with @name is returned. + */ +struct event_format * +pevent_find_event_by_name(struct pevent *pevent, + const char *sys, const char *name) +{ + struct event_format *event; + int i; + + if (pevent->last_event && + strcmp(pevent->last_event->name, name) == 0 && + (!sys || strcmp(pevent->last_event->system, sys) == 0)) + return pevent->last_event; + + for (i = 0; i < pevent->nr_events; i++) { + event = pevent->events[i]; + if (strcmp(event->name, name) == 0) { + if (!sys) + break; + if (strcmp(event->system, sys) == 0) + break; + } + } + if (i == pevent->nr_events) + event = NULL; + + pevent->last_event = event; + return event; +} + +static unsigned long long +eval_num_arg(void *data, int size, struct event_format *event, struct print_arg *arg) +{ + struct pevent *pevent = event->pevent; + unsigned long long val = 0; + unsigned long long left, right; + struct print_arg *typearg = NULL; + struct print_arg *larg; + unsigned long offset; + unsigned int field_size; + + switch (arg->type) { + case PRINT_NULL: + /* ?? */ + return 0; + case PRINT_ATOM: + return strtoull(arg->atom.atom, NULL, 0); + case PRINT_FIELD: + if (!arg->field.field) { + arg->field.field = pevent_find_any_field(event, arg->field.name); + if (!arg->field.field) + die("field %s not found", arg->field.name); + } + /* must be a number */ + val = pevent_read_number(pevent, data + arg->field.field->offset, + arg->field.field->size); + break; + case PRINT_FLAGS: + case PRINT_SYMBOL: + case PRINT_HEX: + break; + case PRINT_TYPE: + val = eval_num_arg(data, size, event, arg->typecast.item); + return eval_type(val, arg, 0); + case PRINT_STRING: + case PRINT_BSTRING: + return 0; + case PRINT_FUNC: { + struct trace_seq s; + trace_seq_init(&s); + val = process_defined_func(&s, data, size, event, arg); + trace_seq_destroy(&s); + return val; + } + case PRINT_OP: + if (strcmp(arg->op.op, "[") == 0) { + /* + * Arrays are special, since we don't want + * to read the arg as is. + */ + right = eval_num_arg(data, size, event, arg->op.right); + + /* handle typecasts */ + larg = arg->op.left; + while (larg->type == PRINT_TYPE) { + if (!typearg) + typearg = larg; + larg = larg->typecast.item; + } + + /* Default to long size */ + field_size = pevent->long_size; + + switch (larg->type) { + case PRINT_DYNAMIC_ARRAY: + offset = pevent_read_number(pevent, + data + larg->dynarray.field->offset, + larg->dynarray.field->size); + if (larg->dynarray.field->elementsize) + field_size = larg->dynarray.field->elementsize; + /* + * The actual length of the dynamic array is stored + * in the top half of the field, and the offset + * is in the bottom half of the 32 bit field. + */ + offset &= 0xffff; + offset += right; + break; + case PRINT_FIELD: + if (!larg->field.field) { + larg->field.field = + pevent_find_any_field(event, larg->field.name); + if (!larg->field.field) + die("field %s not found", larg->field.name); + } + field_size = larg->field.field->elementsize; + offset = larg->field.field->offset + + right * larg->field.field->elementsize; + break; + default: + goto default_op; /* oops, all bets off */ + } + val = pevent_read_number(pevent, + data + offset, field_size); + if (typearg) + val = eval_type(val, typearg, 1); + break; + } else if (strcmp(arg->op.op, "?") == 0) { + left = eval_num_arg(data, size, event, arg->op.left); + arg = arg->op.right; + if (left) + val = eval_num_arg(data, size, event, arg->op.left); + else + val = eval_num_arg(data, size, event, arg->op.right); + break; + } + default_op: + left = eval_num_arg(data, size, event, arg->op.left); + right = eval_num_arg(data, size, event, arg->op.right); + switch (arg->op.op[0]) { + case '!': + switch (arg->op.op[1]) { + case 0: + val = !right; + break; + case '=': + val = left != right; + break; + default: + die("unknown op '%s'", arg->op.op); + } + break; + case '~': + val = ~right; + break; + case '|': + if (arg->op.op[1]) + val = left || right; + else + val = left | right; + break; + case '&': + if (arg->op.op[1]) + val = left && right; + else + val = left & right; + break; + case '<': + switch (arg->op.op[1]) { + case 0: + val = left < right; + break; + case '<': + val = left << right; + break; + case '=': + val = left <= right; + break; + default: + die("unknown op '%s'", arg->op.op); + } + break; + case '>': + switch (arg->op.op[1]) { + case 0: + val = left > right; + break; + case '>': + val = left >> right; + break; + case '=': + val = left >= right; + break; + default: + die("unknown op '%s'", arg->op.op); + } + break; + case '=': + if (arg->op.op[1] != '=') + die("unknown op '%s'", arg->op.op); + val = left == right; + break; + case '-': + val = left - right; + break; + case '+': + val = left + right; + break; + case '/': + val = left / right; + break; + case '*': + val = left * right; + break; + default: + die("unknown op '%s'", arg->op.op); + } + break; + default: /* not sure what to do there */ + return 0; + } + return val; +} + +struct flag { + const char *name; + unsigned long long value; +}; + +static const struct flag flags[] = { + { "HI_SOFTIRQ", 0 }, + { "TIMER_SOFTIRQ", 1 }, + { "NET_TX_SOFTIRQ", 2 }, + { "NET_RX_SOFTIRQ", 3 }, + { "BLOCK_SOFTIRQ", 4 }, + { "BLOCK_IOPOLL_SOFTIRQ", 5 }, + { "TASKLET_SOFTIRQ", 6 }, + { "SCHED_SOFTIRQ", 7 }, + { "HRTIMER_SOFTIRQ", 8 }, + { "RCU_SOFTIRQ", 9 }, + + { "HRTIMER_NORESTART", 0 }, + { "HRTIMER_RESTART", 1 }, +}; + +static unsigned long long eval_flag(const char *flag) +{ + int i; + + /* + * Some flags in the format files do not get converted. + * If the flag is not numeric, see if it is something that + * we already know about. + */ + if (isdigit(flag[0])) + return strtoull(flag, NULL, 0); + + for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++) + if (strcmp(flags[i].name, flag) == 0) + return flags[i].value; + + return 0; +} + +static void print_str_to_seq(struct trace_seq *s, const char *format, + int len_arg, const char *str) +{ + if (len_arg >= 0) + trace_seq_printf(s, format, len_arg, str); + else + trace_seq_printf(s, format, str); +} + +static void print_str_arg(struct trace_seq *s, void *data, int size, + struct event_format *event, const char *format, + int len_arg, struct print_arg *arg) +{ + struct pevent *pevent = event->pevent; + struct print_flag_sym *flag; + struct format_field *field; + unsigned long long val, fval; + unsigned long addr; + char *str; + unsigned char *hex; + int print; + int i, len; + + switch (arg->type) { + case PRINT_NULL: + /* ?? */ + return; + case PRINT_ATOM: + print_str_to_seq(s, format, len_arg, arg->atom.atom); + return; + case PRINT_FIELD: + field = arg->field.field; + if (!field) { + field = pevent_find_any_field(event, arg->field.name); + if (!field) + die("field %s not found", arg->field.name); + arg->field.field = field; + } + /* Zero sized fields, mean the rest of the data */ + len = field->size ? : size - field->offset; + + /* + * Some events pass in pointers. If this is not an array + * and the size is the same as long_size, assume that it + * is a pointer. + */ + if (!(field->flags & FIELD_IS_ARRAY) && + field->size == pevent->long_size) { + addr = *(unsigned long *)(data + field->offset); + trace_seq_printf(s, "%lx", addr); + break; + } + str = malloc_or_die(len + 1); + memcpy(str, data + field->offset, len); + str[len] = 0; + print_str_to_seq(s, format, len_arg, str); + free(str); + break; + case PRINT_FLAGS: + val = eval_num_arg(data, size, event, arg->flags.field); + print = 0; + for (flag = arg->flags.flags; flag; flag = flag->next) { + fval = eval_flag(flag->value); + if (!val && !fval) { + print_str_to_seq(s, format, len_arg, flag->str); + break; + } + if (fval && (val & fval) == fval) { + if (print && arg->flags.delim) + trace_seq_puts(s, arg->flags.delim); + print_str_to_seq(s, format, len_arg, flag->str); + print = 1; + val &= ~fval; + } + } + break; + case PRINT_SYMBOL: + val = eval_num_arg(data, size, event, arg->symbol.field); + for (flag = arg->symbol.symbols; flag; flag = flag->next) { + fval = eval_flag(flag->value); + if (val == fval) { + print_str_to_seq(s, format, len_arg, flag->str); + break; + } + } + break; + case PRINT_HEX: + field = arg->hex.field->field.field; + if (!field) { + str = arg->hex.field->field.name; + field = pevent_find_any_field(event, str); + if (!field) + die("field %s not found", str); + arg->hex.field->field.field = field; + } + hex = data + field->offset; + len = eval_num_arg(data, size, event, arg->hex.size); + for (i = 0; i < len; i++) { + if (i) + trace_seq_putc(s, ' '); + trace_seq_printf(s, "%02x", hex[i]); + } + break; + + case PRINT_TYPE: + break; + case PRINT_STRING: { + int str_offset; + + if (arg->string.offset == -1) { + struct format_field *f; + + f = pevent_find_any_field(event, arg->string.string); + arg->string.offset = f->offset; + } + str_offset = data2host4(pevent, data + arg->string.offset); + str_offset &= 0xffff; + print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); + break; + } + case PRINT_BSTRING: + print_str_to_seq(s, format, len_arg, arg->string.string); + break; + case PRINT_OP: + /* + * The only op for string should be ? : + */ + if (arg->op.op[0] != '?') + return; + val = eval_num_arg(data, size, event, arg->op.left); + if (val) + print_str_arg(s, data, size, event, + format, len_arg, arg->op.right->op.left); + else + print_str_arg(s, data, size, event, + format, len_arg, arg->op.right->op.right); + break; + case PRINT_FUNC: + process_defined_func(s, data, size, event, arg); + break; + default: + /* well... */ + break; + } +} + +static unsigned long long +process_defined_func(struct trace_seq *s, void *data, int size, + struct event_format *event, struct print_arg *arg) +{ + struct pevent_function_handler *func_handle = arg->func.func; + struct pevent_func_params *param; + unsigned long long *args; + unsigned long long ret; + struct print_arg *farg; + struct trace_seq str; + struct save_str { + struct save_str *next; + char *str; + } *strings = NULL, *string; + int i; + + if (!func_handle->nr_args) { + ret = (*func_handle->func)(s, NULL); + goto out; + } + + farg = arg->func.args; + param = func_handle->params; + + args = malloc_or_die(sizeof(*args) * func_handle->nr_args); + for (i = 0; i < func_handle->nr_args; i++) { + switch (param->type) { + case PEVENT_FUNC_ARG_INT: + case PEVENT_FUNC_ARG_LONG: + case PEVENT_FUNC_ARG_PTR: + args[i] = eval_num_arg(data, size, event, farg); + break; + case PEVENT_FUNC_ARG_STRING: + trace_seq_init(&str); + print_str_arg(&str, data, size, event, "%s", -1, farg); + trace_seq_terminate(&str); + string = malloc_or_die(sizeof(*string)); + string->next = strings; + string->str = strdup(str.buffer); + if (!string->str) + die("malloc str"); + + args[i] = (unsigned long long)string->str; + strings = string; + trace_seq_destroy(&str); + break; + default: + /* + * Something went totally wrong, this is not + * an input error, something in this code broke. + */ + die("Unexpected end of arguments\n"); + break; + } + farg = farg->next; + param = param->next; + } + + ret = (*func_handle->func)(s, args); + free(args); + while (strings) { + string = strings; + strings = string->next; + free(string->str); + free(string); + } + + out: + /* TBD : handle return type here */ + return ret; +} + +static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struct event_format *event) +{ + struct pevent *pevent = event->pevent; + struct format_field *field, *ip_field; + struct print_arg *args, *arg, **next; + unsigned long long ip, val; + char *ptr; + void *bptr; + int vsize; + + field = pevent->bprint_buf_field; + ip_field = pevent->bprint_ip_field; + + if (!field) { + field = pevent_find_field(event, "buf"); + if (!field) + die("can't find buffer field for binary printk"); + ip_field = pevent_find_field(event, "ip"); + if (!ip_field) + die("can't find ip field for binary printk"); + pevent->bprint_buf_field = field; + pevent->bprint_ip_field = ip_field; + } + + ip = pevent_read_number(pevent, data + ip_field->offset, ip_field->size); + + /* + * The first arg is the IP pointer. + */ + args = alloc_arg(); + arg = args; + arg->next = NULL; + next = &arg->next; + + arg->type = PRINT_ATOM; + arg->atom.atom = malloc_or_die(32); + sprintf(arg->atom.atom, "%lld", ip); + + /* skip the first "%pf : " */ + for (ptr = fmt + 6, bptr = data + field->offset; + bptr < data + size && *ptr; ptr++) { + int ls = 0; + + if (*ptr == '%') { + process_again: + ptr++; + switch (*ptr) { + case '%': + break; + case 'l': + ls++; + goto process_again; + case 'L': + ls = 2; + goto process_again; + case '0' ... '9': + goto process_again; + case '.': + goto process_again; + case 'p': + ls = 1; + /* fall through */ + case 'd': + case 'u': + case 'x': + case 'i': + switch (ls) { + case 0: + vsize = 4; + break; + case 1: + vsize = pevent->long_size; + break; + case 2: + vsize = 8; + break; + default: + vsize = ls; /* ? */ + break; + } + /* fall through */ + case '*': + if (*ptr == '*') + vsize = 4; + + /* the pointers are always 4 bytes aligned */ + bptr = (void *)(((unsigned long)bptr + 3) & + ~3); + val = pevent_read_number(pevent, bptr, vsize); + bptr += vsize; + arg = alloc_arg(); + arg->next = NULL; + arg->type = PRINT_ATOM; + arg->atom.atom = malloc_or_die(32); + sprintf(arg->atom.atom, "%lld", val); + *next = arg; + next = &arg->next; + /* + * The '*' case means that an arg is used as the length. + * We need to continue to figure out for what. + */ + if (*ptr == '*') + goto process_again; + + break; + case 's': + arg = alloc_arg(); + arg->next = NULL; + arg->type = PRINT_BSTRING; + arg->string.string = strdup(bptr); + if (!arg->string.string) + break; + bptr += strlen(bptr) + 1; + *next = arg; + next = &arg->next; + default: + break; + } + } + } + + return args; +} + +static void free_args(struct print_arg *args) +{ + struct print_arg *next; + + while (args) { + next = args->next; + + free_arg(args); + args = next; + } +} + +static char * +get_bprint_format(void *data, int size __unused, struct event_format *event) +{ + struct pevent *pevent = event->pevent; + unsigned long long addr; + struct format_field *field; + struct printk_map *printk; + char *format; + char *p; + + field = pevent->bprint_fmt_field; + + if (!field) { + field = pevent_find_field(event, "fmt"); + if (!field) + die("can't find format field for binary printk"); + pevent->bprint_fmt_field = field; + } + + addr = pevent_read_number(pevent, data + field->offset, field->size); + + printk = find_printk(pevent, addr); + if (!printk) { + format = malloc_or_die(45); + sprintf(format, "%%pf : (NO FORMAT FOUND at %llx)\n", + addr); + return format; + } + + p = printk->printk; + /* Remove any quotes. */ + if (*p == '"') + p++; + format = malloc_or_die(strlen(p) + 10); + sprintf(format, "%s : %s", "%pf", p); + /* remove ending quotes and new line since we will add one too */ + p = format + strlen(format) - 1; + if (*p == '"') + *p = 0; + + p -= 2; + if (strcmp(p, "\\n") == 0) + *p = 0; + + return format; +} + +static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size, + struct event_format *event, struct print_arg *arg) +{ + unsigned char *buf; + char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x"; + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", + arg->type); + return; + } + + if (mac == 'm') + fmt = "%.2x%.2x%.2x%.2x%.2x%.2x"; + if (!arg->field.field) { + arg->field.field = + pevent_find_any_field(event, arg->field.name); + if (!arg->field.field) + die("field %s not found", arg->field.name); + } + if (arg->field.field->size != 6) { + trace_seq_printf(s, "INVALIDMAC"); + return; + } + buf = data + arg->field.field->offset; + trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); +} + +static int is_printable_array(char *p, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < len && p[i]; i++) + if (!isprint(p[i])) + return 0; + return 1; +} + +static void print_event_fields(struct trace_seq *s, void *data, int size, + struct event_format *event) +{ + struct format_field *field; + unsigned long long val; + unsigned int offset, len, i; + + field = event->format.fields; + while (field) { + trace_seq_printf(s, " %s=", field->name); + if (field->flags & FIELD_IS_ARRAY) { + offset = field->offset; + len = field->size; + if (field->flags & FIELD_IS_DYNAMIC) { + val = pevent_read_number(event->pevent, data + offset, len); + offset = val; + len = offset >> 16; + offset &= 0xffff; + } + if (field->flags & FIELD_IS_STRING && + is_printable_array(data + offset, len)) { + trace_seq_printf(s, "%s", (char *)data + offset); + } else { + trace_seq_puts(s, "ARRAY["); + for (i = 0; i < len; i++) { + if (i) + trace_seq_puts(s, ", "); + trace_seq_printf(s, "%02x", + *((unsigned char *)data + offset + i)); + } + trace_seq_putc(s, ']'); + field->flags &= ~FIELD_IS_STRING; + } + } else { + val = pevent_read_number(event->pevent, data + field->offset, + field->size); + if (field->flags & FIELD_IS_POINTER) { + trace_seq_printf(s, "0x%llx", val); + } else if (field->flags & FIELD_IS_SIGNED) { + switch (field->size) { + case 4: + /* + * If field is long then print it in hex. + * A long usually stores pointers. + */ + if (field->flags & FIELD_IS_LONG) + trace_seq_printf(s, "0x%x", (int)val); + else + trace_seq_printf(s, "%d", (int)val); + break; + case 2: + trace_seq_printf(s, "%2d", (short)val); + break; + case 1: + trace_seq_printf(s, "%1d", (char)val); + break; + default: + trace_seq_printf(s, "%lld", val); + } + } else { + if (field->flags & FIELD_IS_LONG) + trace_seq_printf(s, "0x%llx", val); + else + trace_seq_printf(s, "%llu", val); + } + } + field = field->next; + } +} + +static void pretty_print(struct trace_seq *s, void *data, int size, struct event_format *event) +{ + struct pevent *pevent = event->pevent; + struct print_fmt *print_fmt = &event->print_fmt; + struct print_arg *arg = print_fmt->args; + struct print_arg *args = NULL; + const char *ptr = print_fmt->format; + unsigned long long val; + struct func_map *func; + const char *saveptr; + char *bprint_fmt = NULL; + char format[32]; + int show_func; + int len_as_arg; + int len_arg; + int len; + int ls; + + if (event->flags & EVENT_FL_FAILED) { + trace_seq_printf(s, "[FAILED TO PARSE]"); + print_event_fields(s, data, size, event); + return; + } + + if (event->flags & EVENT_FL_ISBPRINT) { + bprint_fmt = get_bprint_format(data, size, event); + args = make_bprint_args(bprint_fmt, data, size, event); + arg = args; + ptr = bprint_fmt; + } + + for (; *ptr; ptr++) { + ls = 0; + if (*ptr == '\\') { + ptr++; + switch (*ptr) { + case 'n': + trace_seq_putc(s, '\n'); + break; + case 't': + trace_seq_putc(s, '\t'); + break; + case 'r': + trace_seq_putc(s, '\r'); + break; + case '\\': + trace_seq_putc(s, '\\'); + break; + default: + trace_seq_putc(s, *ptr); + break; + } + + } else if (*ptr == '%') { + saveptr = ptr; + show_func = 0; + len_as_arg = 0; + cont_process: + ptr++; + switch (*ptr) { + case '%': + trace_seq_putc(s, '%'); + break; + case '#': + /* FIXME: need to handle properly */ + goto cont_process; + case 'h': + ls--; + goto cont_process; + case 'l': + ls++; + goto cont_process; + case 'L': + ls = 2; + goto cont_process; + case '*': + /* The argument is the length. */ + if (!arg) + die("no argument match"); + len_arg = eval_num_arg(data, size, event, arg); + len_as_arg = 1; + arg = arg->next; + goto cont_process; + case '.': + case 'z': + case 'Z': + case '0' ... '9': + goto cont_process; + case 'p': + if (pevent->long_size == 4) + ls = 1; + else + ls = 2; + + if (*(ptr+1) == 'F' || + *(ptr+1) == 'f') { + ptr++; + show_func = *ptr; + } else if (*(ptr+1) == 'M' || *(ptr+1) == 'm') { + print_mac_arg(s, *(ptr+1), data, size, event, arg); + ptr++; + arg = arg->next; + break; + } + + /* fall through */ + case 'd': + case 'i': + case 'x': + case 'X': + case 'u': + if (!arg) + die("no argument match"); + + len = ((unsigned long)ptr + 1) - + (unsigned long)saveptr; + + /* should never happen */ + if (len > 31) + die("bad format!"); + + memcpy(format, saveptr, len); + format[len] = 0; + + val = eval_num_arg(data, size, event, arg); + arg = arg->next; + + if (show_func) { + func = find_func(pevent, val); + if (func) { + trace_seq_puts(s, func->func); + if (show_func == 'F') + trace_seq_printf(s, + "+0x%llx", + val - func->addr); + break; + } + } + if (pevent->long_size == 8 && ls && + sizeof(long) != 8) { + char *p; + + ls = 2; + /* make %l into %ll */ + p = strchr(format, 'l'); + if (p) + memmove(p+1, p, strlen(p)+1); + else if (strcmp(format, "%p") == 0) + strcpy(format, "0x%llx"); + } + switch (ls) { + case -2: + if (len_as_arg) + trace_seq_printf(s, format, len_arg, (char)val); + else + trace_seq_printf(s, format, (char)val); + break; + case -1: + if (len_as_arg) + trace_seq_printf(s, format, len_arg, (short)val); + else + trace_seq_printf(s, format, (short)val); + break; + case 0: + if (len_as_arg) + trace_seq_printf(s, format, len_arg, (int)val); + else + trace_seq_printf(s, format, (int)val); + break; + case 1: + if (len_as_arg) + trace_seq_printf(s, format, len_arg, (long)val); + else + trace_seq_printf(s, format, (long)val); + break; + case 2: + if (len_as_arg) + trace_seq_printf(s, format, len_arg, + (long long)val); + else + trace_seq_printf(s, format, (long long)val); + break; + default: + die("bad count (%d)", ls); + } + break; + case 's': + if (!arg) + die("no matching argument"); + + len = ((unsigned long)ptr + 1) - + (unsigned long)saveptr; + + /* should never happen */ + if (len > 31) + die("bad format!"); + + memcpy(format, saveptr, len); + format[len] = 0; + if (!len_as_arg) + len_arg = -1; + print_str_arg(s, data, size, event, + format, len_arg, arg); + arg = arg->next; + break; + default: + trace_seq_printf(s, ">%c<", *ptr); + + } + } else + trace_seq_putc(s, *ptr); + } + + if (args) { + free_args(args); + free(bprint_fmt); + } +} + +/** + * pevent_data_lat_fmt - parse the data for the latency format + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @record: the record to read from + * + * This parses out the Latency format (interrupts disabled, + * need rescheduling, in hard/soft interrupt, preempt count + * and lock depth) and places it into the trace_seq. + */ +void pevent_data_lat_fmt(struct pevent *pevent, + struct trace_seq *s, struct pevent_record *record) +{ + static int check_lock_depth = 1; + static int check_migrate_disable = 1; + static int lock_depth_exists; + static int migrate_disable_exists; + unsigned int lat_flags; + unsigned int pc; + int lock_depth; + int migrate_disable; + int hardirq; + int softirq; + void *data = record->data; + + lat_flags = parse_common_flags(pevent, data); + pc = parse_common_pc(pevent, data); + /* lock_depth may not always exist */ + if (lock_depth_exists) + lock_depth = parse_common_lock_depth(pevent, data); + else if (check_lock_depth) { + lock_depth = parse_common_lock_depth(pevent, data); + if (lock_depth < 0) + check_lock_depth = 0; + else + lock_depth_exists = 1; + } + + /* migrate_disable may not always exist */ + if (migrate_disable_exists) + migrate_disable = parse_common_migrate_disable(pevent, data); + else if (check_migrate_disable) { + migrate_disable = parse_common_migrate_disable(pevent, data); + if (migrate_disable < 0) + check_migrate_disable = 0; + else + migrate_disable_exists = 1; + } + + hardirq = lat_flags & TRACE_FLAG_HARDIRQ; + softirq = lat_flags & TRACE_FLAG_SOFTIRQ; + + trace_seq_printf(s, "%c%c%c", + (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ? + 'X' : '.', + (lat_flags & TRACE_FLAG_NEED_RESCHED) ? + 'N' : '.', + (hardirq && softirq) ? 'H' : + hardirq ? 'h' : softirq ? 's' : '.'); + + if (pc) + trace_seq_printf(s, "%x", pc); + else + trace_seq_putc(s, '.'); + + if (migrate_disable_exists) { + if (migrate_disable < 0) + trace_seq_putc(s, '.'); + else + trace_seq_printf(s, "%d", migrate_disable); + } + + if (lock_depth_exists) { + if (lock_depth < 0) + trace_seq_putc(s, '.'); + else + trace_seq_printf(s, "%d", lock_depth); + } + + trace_seq_terminate(s); +} + +/** + * pevent_data_type - parse out the given event type + * @pevent: a handle to the pevent + * @rec: the record to read from + * + * This returns the event id from the @rec. + */ +int pevent_data_type(struct pevent *pevent, struct pevent_record *rec) +{ + return trace_parse_common_type(pevent, rec->data); +} + +/** + * pevent_data_event_from_type - find the event by a given type + * @pevent: a handle to the pevent + * @type: the type of the event. + * + * This returns the event form a given @type; + */ +struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type) +{ + return pevent_find_event(pevent, type); +} + +/** + * pevent_data_pid - parse the PID from raw data + * @pevent: a handle to the pevent + * @rec: the record to parse + * + * This returns the PID from a raw data. + */ +int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec) +{ + return parse_common_pid(pevent, rec->data); +} + +/** + * pevent_data_comm_from_pid - return the command line from PID + * @pevent: a handle to the pevent + * @pid: the PID of the task to search for + * + * This returns a pointer to the command line that has the given + * @pid. + */ +const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid) +{ + const char *comm; + + comm = find_cmdline(pevent, pid); + return comm; +} + +/** + * pevent_data_comm_from_pid - parse the data into the print format + * @s: the trace_seq to write to + * @event: the handle to the event + * @record: the record to read from + * + * This parses the raw @data using the given @event information and + * writes the print format into the trace_seq. + */ +void pevent_event_info(struct trace_seq *s, struct event_format *event, + struct pevent_record *record) +{ + int print_pretty = 1; + + if (event->pevent->print_raw) + print_event_fields(s, record->data, record->size, event); + else { + + if (event->handler) + print_pretty = event->handler(s, record, event, + event->context); + + if (print_pretty) + pretty_print(s, record->data, record->size, event); + } + + trace_seq_terminate(s); +} + +void pevent_print_event(struct pevent *pevent, struct trace_seq *s, + struct pevent_record *record) +{ + static char *spaces = " "; /* 20 spaces */ + struct event_format *event; + unsigned long secs; + unsigned long usecs; + unsigned long nsecs; + const char *comm; + void *data = record->data; + int type; + int pid; + int len; + int p; + + secs = record->ts / NSECS_PER_SEC; + nsecs = record->ts - secs * NSECS_PER_SEC; + + if (record->size < 0) { + do_warning("ug! negative record size %d", record->size); + return; + } + + type = trace_parse_common_type(pevent, data); + + event = pevent_find_event(pevent, type); + if (!event) { + do_warning("ug! no event found for type %d", type); + return; + } + + pid = parse_common_pid(pevent, data); + comm = find_cmdline(pevent, pid); + + if (pevent->latency_format) { + trace_seq_printf(s, "%8.8s-%-5d %3d", + comm, pid, record->cpu); + pevent_data_lat_fmt(pevent, s, record); + } else + trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu); + + if (pevent->flags & PEVENT_NSEC_OUTPUT) { + usecs = nsecs; + p = 9; + } else { + usecs = (nsecs + 500) / NSECS_PER_USEC; + p = 6; + } + + trace_seq_printf(s, " %5lu.%0*lu: %s: ", secs, p, usecs, event->name); + + /* Space out the event names evenly. */ + len = strlen(event->name); + if (len < 20) + trace_seq_printf(s, "%.*s", 20 - len, spaces); + + pevent_event_info(s, event, record); +} + +static int events_id_cmp(const void *a, const void *b) +{ + struct event_format * const * ea = a; + struct event_format * const * eb = b; + + if ((*ea)->id < (*eb)->id) + return -1; + + if ((*ea)->id > (*eb)->id) + return 1; + + return 0; +} + +static int events_name_cmp(const void *a, const void *b) +{ + struct event_format * const * ea = a; + struct event_format * const * eb = b; + int res; + + res = strcmp((*ea)->name, (*eb)->name); + if (res) + return res; + + res = strcmp((*ea)->system, (*eb)->system); + if (res) + return res; + + return events_id_cmp(a, b); +} + +static int events_system_cmp(const void *a, const void *b) +{ + struct event_format * const * ea = a; + struct event_format * const * eb = b; + int res; + + res = strcmp((*ea)->system, (*eb)->system); + if (res) + return res; + + res = strcmp((*ea)->name, (*eb)->name); + if (res) + return res; + + return events_id_cmp(a, b); +} + +struct event_format **pevent_list_events(struct pevent *pevent, enum event_sort_type sort_type) +{ + struct event_format **events; + int (*sort)(const void *a, const void *b); + + events = pevent->sort_events; + + if (events && pevent->last_type == sort_type) + return events; + + if (!events) { + events = malloc(sizeof(*events) * (pevent->nr_events + 1)); + if (!events) + return NULL; + + memcpy(events, pevent->events, sizeof(*events) * pevent->nr_events); + events[pevent->nr_events] = NULL; + + pevent->sort_events = events; + + /* the internal events are sorted by id */ + if (sort_type == EVENT_SORT_ID) { + pevent->last_type = sort_type; + return events; + } + } + + switch (sort_type) { + case EVENT_SORT_ID: + sort = events_id_cmp; + break; + case EVENT_SORT_NAME: + sort = events_name_cmp; + break; + case EVENT_SORT_SYSTEM: + sort = events_system_cmp; + break; + default: + return events; + } + + qsort(events, pevent->nr_events, sizeof(*events), sort); + pevent->last_type = sort_type; + + return events; +} + +static struct format_field ** +get_event_fields(const char *type, const char *name, + int count, struct format_field *list) +{ + struct format_field **fields; + struct format_field *field; + int i = 0; + + fields = malloc_or_die(sizeof(*fields) * (count + 1)); + for (field = list; field; field = field->next) { + fields[i++] = field; + if (i == count + 1) { + do_warning("event %s has more %s fields than specified", + name, type); + i--; + break; + } + } + + if (i != count) + do_warning("event %s has less %s fields than specified", + name, type); + + fields[i] = NULL; + + return fields; +} + +/** + * pevent_event_common_fields - return a list of common fields for an event + * @event: the event to return the common fields of. + * + * Returns an allocated array of fields. The last item in the array is NULL. + * The array must be freed with free(). + */ +struct format_field **pevent_event_common_fields(struct event_format *event) +{ + return get_event_fields("common", event->name, + event->format.nr_common, + event->format.common_fields); +} + +/** + * pevent_event_fields - return a list of event specific fields for an event + * @event: the event to return the fields of. + * + * Returns an allocated array of fields. The last item in the array is NULL. + * The array must be freed with free(). + */ +struct format_field **pevent_event_fields(struct event_format *event) +{ + return get_event_fields("event", event->name, + event->format.nr_fields, + event->format.fields); +} + +static void print_fields(struct trace_seq *s, struct print_flag_sym *field) +{ + trace_seq_printf(s, "{ %s, %s }", field->value, field->str); + if (field->next) { + trace_seq_puts(s, ", "); + print_fields(s, field->next); + } +} + +/* for debugging */ +static void print_args(struct print_arg *args) +{ + int print_paren = 1; + struct trace_seq s; + + switch (args->type) { + case PRINT_NULL: + printf("null"); + break; + case PRINT_ATOM: + printf("%s", args->atom.atom); + break; + case PRINT_FIELD: + printf("REC->%s", args->field.name); + break; + case PRINT_FLAGS: + printf("__print_flags("); + print_args(args->flags.field); + printf(", %s, ", args->flags.delim); + trace_seq_init(&s); + print_fields(&s, args->flags.flags); + trace_seq_do_printf(&s); + trace_seq_destroy(&s); + printf(")"); + break; + case PRINT_SYMBOL: + printf("__print_symbolic("); + print_args(args->symbol.field); + printf(", "); + trace_seq_init(&s); + print_fields(&s, args->symbol.symbols); + trace_seq_do_printf(&s); + trace_seq_destroy(&s); + printf(")"); + break; + case PRINT_HEX: + printf("__print_hex("); + print_args(args->hex.field); + printf(", "); + print_args(args->hex.size); + printf(")"); + break; + case PRINT_STRING: + case PRINT_BSTRING: + printf("__get_str(%s)", args->string.string); + break; + case PRINT_TYPE: + printf("(%s)", args->typecast.type); + print_args(args->typecast.item); + break; + case PRINT_OP: + if (strcmp(args->op.op, ":") == 0) + print_paren = 0; + if (print_paren) + printf("("); + print_args(args->op.left); + printf(" %s ", args->op.op); + print_args(args->op.right); + if (print_paren) + printf(")"); + break; + default: + /* we should warn... */ + return; + } + if (args->next) { + printf("\n"); + print_args(args->next); + } +} + +static void parse_header_field(const char *field, + int *offset, int *size, int mandatory) +{ + unsigned long long save_input_buf_ptr; + unsigned long long save_input_buf_siz; + char *token; + int type; + + save_input_buf_ptr = input_buf_ptr; + save_input_buf_siz = input_buf_siz; + + if (read_expected(EVENT_ITEM, "field") < 0) + return; + if (read_expected(EVENT_OP, ":") < 0) + return; + + /* type */ + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + free_token(token); + + /* + * If this is not a mandatory field, then test it first. + */ + if (mandatory) { + if (read_expected(EVENT_ITEM, field) < 0) + return; + } else { + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + if (strcmp(token, field) != 0) + goto discard; + free_token(token); + } + + if (read_expected(EVENT_OP, ";") < 0) + return; + if (read_expected(EVENT_ITEM, "offset") < 0) + return; + if (read_expected(EVENT_OP, ":") < 0) + return; + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + *offset = atoi(token); + free_token(token); + if (read_expected(EVENT_OP, ";") < 0) + return; + if (read_expected(EVENT_ITEM, "size") < 0) + return; + if (read_expected(EVENT_OP, ":") < 0) + return; + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + *size = atoi(token); + free_token(token); + if (read_expected(EVENT_OP, ";") < 0) + return; + type = read_token(&token); + if (type != EVENT_NEWLINE) { + /* newer versions of the kernel have a "signed" type */ + if (type != EVENT_ITEM) + goto fail; + + if (strcmp(token, "signed") != 0) + goto fail; + + free_token(token); + + if (read_expected(EVENT_OP, ":") < 0) + return; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + + free_token(token); + if (read_expected(EVENT_OP, ";") < 0) + return; + + if (read_expect_type(EVENT_NEWLINE, &token)) + goto fail; + } + fail: + free_token(token); + return; + + discard: + input_buf_ptr = save_input_buf_ptr; + input_buf_siz = save_input_buf_siz; + *offset = 0; + *size = 0; + free_token(token); +} + +/** + * pevent_parse_header_page - parse the data stored in the header page + * @pevent: the handle to the pevent + * @buf: the buffer storing the header page format string + * @size: the size of @buf + * @long_size: the long size to use if there is no header + * + * This parses the header page format for information on the + * ring buffer used. The @buf should be copied from + * + * /sys/kernel/debug/tracing/events/header_page + */ +int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long size, + int long_size) +{ + int ignore; + + if (!size) { + /* + * Old kernels did not have header page info. + * Sorry but we just use what we find here in user space. + */ + pevent->header_page_ts_size = sizeof(long long); + pevent->header_page_size_size = long_size; + pevent->header_page_data_offset = sizeof(long long) + long_size; + pevent->old_format = 1; + return -1; + } + init_input_buf(buf, size); + + parse_header_field("timestamp", &pevent->header_page_ts_offset, + &pevent->header_page_ts_size, 1); + parse_header_field("commit", &pevent->header_page_size_offset, + &pevent->header_page_size_size, 1); + parse_header_field("overwrite", &pevent->header_page_overwrite, + &ignore, 0); + parse_header_field("data", &pevent->header_page_data_offset, + &pevent->header_page_data_size, 1); + + return 0; +} + +static int event_matches(struct event_format *event, + int id, const char *sys_name, + const char *event_name) +{ + if (id >= 0 && id != event->id) + return 0; + + if (event_name && (strcmp(event_name, event->name) != 0)) + return 0; + + if (sys_name && (strcmp(sys_name, event->system) != 0)) + return 0; + + return 1; +} + +static void free_handler(struct event_handler *handle) +{ + free((void *)handle->sys_name); + free((void *)handle->event_name); + free(handle); +} + +static int find_event_handle(struct pevent *pevent, struct event_format *event) +{ + struct event_handler *handle, **next; + + for (next = &pevent->handlers; *next; + next = &(*next)->next) { + handle = *next; + if (event_matches(event, handle->id, + handle->sys_name, + handle->event_name)) + break; + } + + if (!(*next)) + return 0; + + pr_stat("overriding event (%d) %s:%s with new print handler", + event->id, event->system, event->name); + + event->handler = handle->func; + event->context = handle->context; + + *next = handle->next; + free_handler(handle); + + return 1; +} + +/** + * pevent_parse_event - parse the event format + * @pevent: the handle to the pevent + * @buf: the buffer storing the event format string + * @size: the size of @buf + * @sys: the system the event belongs to + * + * This parses the event format and creates an event structure + * to quickly parse raw data for a given event. + * + * These files currently come from: + * + * /sys/kernel/debug/tracing/events/.../.../format + */ +int pevent_parse_event(struct pevent *pevent, + const char *buf, unsigned long size, + const char *sys) +{ + struct event_format *event; + int ret; + + init_input_buf(buf, size); + + event = alloc_event(); + if (!event) + return -ENOMEM; + + event->name = event_read_name(); + if (!event->name) { + /* Bad event? */ + free(event); + return -1; + } + + if (strcmp(sys, "ftrace") == 0) { + + event->flags |= EVENT_FL_ISFTRACE; + + if (strcmp(event->name, "bprint") == 0) + event->flags |= EVENT_FL_ISBPRINT; + } + + event->id = event_read_id(); + if (event->id < 0) + die("failed to read event id"); + + event->system = strdup(sys); + if (!event->system) + die("failed to allocate system"); + + /* Add pevent to event so that it can be referenced */ + event->pevent = pevent; + + ret = event_read_format(event); + if (ret < 0) { + do_warning("failed to read event format for %s", event->name); + goto event_failed; + } + + /* + * If the event has an override, don't print warnings if the event + * print format fails to parse. + */ + if (find_event_handle(pevent, event)) + show_warning = 0; + + ret = event_read_print(event); + if (ret < 0) { + do_warning("failed to read event print fmt for %s", + event->name); + show_warning = 1; + goto event_failed; + } + show_warning = 1; + + add_event(pevent, event); + + if (!ret && (event->flags & EVENT_FL_ISFTRACE)) { + struct format_field *field; + struct print_arg *arg, **list; + + /* old ftrace had no args */ + + list = &event->print_fmt.args; + for (field = event->format.fields; field; field = field->next) { + arg = alloc_arg(); + *list = arg; + list = &arg->next; + arg->type = PRINT_FIELD; + arg->field.name = strdup(field->name); + if (!arg->field.name) { + do_warning("failed to allocate field name"); + event->flags |= EVENT_FL_FAILED; + return -1; + } + arg->field.field = field; + } + return 0; + } + +#define PRINT_ARGS 0 + if (PRINT_ARGS && event->print_fmt.args) + print_args(event->print_fmt.args); + + return 0; + + event_failed: + event->flags |= EVENT_FL_FAILED; + /* still add it even if it failed */ + add_event(pevent, event); + return -1; +} + +int get_field_val(struct trace_seq *s, struct format_field *field, + const char *name, struct pevent_record *record, + unsigned long long *val, int err) +{ + if (!field) { + if (err) + trace_seq_printf(s, "<CANT FIND FIELD %s>", name); + return -1; + } + + if (pevent_read_number_field(field, record->data, val)) { + if (err) + trace_seq_printf(s, " %s=INVALID", name); + return -1; + } + + return 0; +} + +/** + * pevent_get_field_raw - return the raw pointer into the data field + * @s: The seq to print to on error + * @event: the event that the field is for + * @name: The name of the field + * @record: The record with the field name. + * @len: place to store the field length. + * @err: print default error if failed. + * + * Returns a pointer into record->data of the field and places + * the length of the field in @len. + * + * On failure, it returns NULL. + */ +void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event, + const char *name, struct pevent_record *record, + int *len, int err) +{ + struct format_field *field; + void *data = record->data; + unsigned offset; + int dummy; + + if (!event) + return NULL; + + field = pevent_find_field(event, name); + + if (!field) { + if (err) + trace_seq_printf(s, "<CANT FIND FIELD %s>", name); + return NULL; + } + + /* Allow @len to be NULL */ + if (!len) + len = &dummy; + + offset = field->offset; + if (field->flags & FIELD_IS_DYNAMIC) { + offset = pevent_read_number(event->pevent, + data + offset, field->size); + *len = offset >> 16; + offset &= 0xffff; + } else + *len = field->size; + + return data + offset; +} + +/** + * pevent_get_field_val - find a field and return its value + * @s: The seq to print to on error + * @event: the event that the field is for + * @name: The name of the field + * @record: The record with the field name. + * @val: place to store the value of the field. + * @err: print default error if failed. + * + * Returns 0 on success -1 on field not found. + */ +int pevent_get_field_val(struct trace_seq *s, struct event_format *event, + const char *name, struct pevent_record *record, + unsigned long long *val, int err) +{ + struct format_field *field; + + if (!event) + return -1; + + field = pevent_find_field(event, name); + + return get_field_val(s, field, name, record, val, err); +} + +/** + * pevent_get_common_field_val - find a common field and return its value + * @s: The seq to print to on error + * @event: the event that the field is for + * @name: The name of the field + * @record: The record with the field name. + * @val: place to store the value of the field. + * @err: print default error if failed. + * + * Returns 0 on success -1 on field not found. + */ +int pevent_get_common_field_val(struct trace_seq *s, struct event_format *event, + const char *name, struct pevent_record *record, + unsigned long long *val, int err) +{ + struct format_field *field; + + if (!event) + return -1; + + field = pevent_find_common_field(event, name); + + return get_field_val(s, field, name, record, val, err); +} + +/** + * pevent_get_any_field_val - find a any field and return its value + * @s: The seq to print to on error + * @event: the event that the field is for + * @name: The name of the field + * @record: The record with the field name. + * @val: place to store the value of the field. + * @err: print default error if failed. + * + * Returns 0 on success -1 on field not found. + */ +int pevent_get_any_field_val(struct trace_seq *s, struct event_format *event, + const char *name, struct pevent_record *record, + unsigned long long *val, int err) +{ + struct format_field *field; + + if (!event) + return -1; + + field = pevent_find_any_field(event, name); + + return get_field_val(s, field, name, record, val, err); +} + +/** + * pevent_print_num_field - print a field and a format + * @s: The seq to print to + * @fmt: The printf format to print the field with. + * @event: the event that the field is for + * @name: The name of the field + * @record: The record with the field name. + * @err: print default error if failed. + * + * Returns: 0 on success, -1 field not found, or 1 if buffer is full. + */ +int pevent_print_num_field(struct trace_seq *s, const char *fmt, + struct event_format *event, const char *name, + struct pevent_record *record, int err) +{ + struct format_field *field = pevent_find_field(event, name); + unsigned long long val; + + if (!field) + goto failed; + + if (pevent_read_number_field(field, record->data, &val)) + goto failed; + + return trace_seq_printf(s, fmt, val); + + failed: + if (err) + trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name); + return -1; +} + +static void free_func_handle(struct pevent_function_handler *func) +{ + struct pevent_func_params *params; + + free(func->name); + + while (func->params) { + params = func->params; + func->params = params->next; + free(params); + } + + free(func); +} + +/** + * pevent_register_print_function - register a helper function + * @pevent: the handle to the pevent + * @func: the function to process the helper function + * @ret_type: the return type of the helper function + * @name: the name of the helper function + * @parameters: A list of enum pevent_func_arg_type + * + * Some events may have helper functions in the print format arguments. + * This allows a plugin to dynamically create a way to process one + * of these functions. + * + * The @parameters is a variable list of pevent_func_arg_type enums that + * must end with PEVENT_FUNC_ARG_VOID. + */ +int pevent_register_print_function(struct pevent *pevent, + pevent_func_handler func, + enum pevent_func_arg_type ret_type, + char *name, ...) +{ + struct pevent_function_handler *func_handle; + struct pevent_func_params **next_param; + struct pevent_func_params *param; + enum pevent_func_arg_type type; + va_list ap; + + func_handle = find_func_handler(pevent, name); + if (func_handle) { + /* + * This is most like caused by the users own + * plugins updating the function. This overrides the + * system defaults. + */ + pr_stat("override of function helper '%s'", name); + remove_func_handler(pevent, name); + } + + func_handle = malloc_or_die(sizeof(*func_handle)); + memset(func_handle, 0, sizeof(*func_handle)); + + func_handle->ret_type = ret_type; + func_handle->name = strdup(name); + func_handle->func = func; + if (!func_handle->name) + die("Failed to allocate function name"); + + next_param = &(func_handle->params); + va_start(ap, name); + for (;;) { + type = va_arg(ap, enum pevent_func_arg_type); + if (type == PEVENT_FUNC_ARG_VOID) + break; + + if (type < 0 || type >= PEVENT_FUNC_ARG_MAX_TYPES) { + warning("Invalid argument type %d", type); + goto out_free; + } + + param = malloc_or_die(sizeof(*param)); + param->type = type; + param->next = NULL; + + *next_param = param; + next_param = &(param->next); + + func_handle->nr_args++; + } + va_end(ap); + + func_handle->next = pevent->func_handlers; + pevent->func_handlers = func_handle; + + return 0; + out_free: + va_end(ap); + free_func_handle(func_handle); + return -1; +} + +/** + * pevent_register_event_handler - register a way to parse an event + * @pevent: the handle to the pevent + * @id: the id of the event to register + * @sys_name: the system name the event belongs to + * @event_name: the name of the event + * @func: the function to call to parse the event information + * @context: the data to be passed to @func + * + * This function allows a developer to override the parsing of + * a given event. If for some reason the default print format + * is not sufficient, this function will register a function + * for an event to be used to parse the data instead. + * + * If @id is >= 0, then it is used to find the event. + * else @sys_name and @event_name are used. + */ +int pevent_register_event_handler(struct pevent *pevent, + int id, char *sys_name, char *event_name, + pevent_event_handler_func func, + void *context) +{ + struct event_format *event; + struct event_handler *handle; + + if (id >= 0) { + /* search by id */ + event = pevent_find_event(pevent, id); + if (!event) + goto not_found; + if (event_name && (strcmp(event_name, event->name) != 0)) + goto not_found; + if (sys_name && (strcmp(sys_name, event->system) != 0)) + goto not_found; + } else { + event = pevent_find_event_by_name(pevent, sys_name, event_name); + if (!event) + goto not_found; + } + + pr_stat("overriding event (%d) %s:%s with new print handler", + event->id, event->system, event->name); + + event->handler = func; + event->context = context; + return 0; + + not_found: + /* Save for later use. */ + handle = malloc_or_die(sizeof(*handle)); + memset(handle, 0, sizeof(*handle)); + handle->id = id; + if (event_name) + handle->event_name = strdup(event_name); + if (sys_name) + handle->sys_name = strdup(sys_name); + + if ((event_name && !handle->event_name) || + (sys_name && !handle->sys_name)) { + die("Failed to allocate event/sys name"); + } + + handle->func = func; + handle->next = pevent->handlers; + pevent->handlers = handle; + handle->context = context; + + return -1; +} + +/** + * pevent_alloc - create a pevent handle + */ +struct pevent *pevent_alloc(void) +{ + struct pevent *pevent; + + pevent = malloc(sizeof(*pevent)); + if (!pevent) + return NULL; + memset(pevent, 0, sizeof(*pevent)); + pevent->ref_count = 1; + + return pevent; +} + +void pevent_ref(struct pevent *pevent) +{ + pevent->ref_count++; +} + +static void free_format_fields(struct format_field *field) +{ + struct format_field *next; + + while (field) { + next = field->next; + free(field->type); + free(field->name); + free(field); + field = next; + } +} + +static void free_formats(struct format *format) +{ + free_format_fields(format->common_fields); + free_format_fields(format->fields); +} + +static void free_event(struct event_format *event) +{ + free(event->name); + free(event->system); + + free_formats(&event->format); + + free(event->print_fmt.format); + free_args(event->print_fmt.args); + + free(event); +} + +/** + * pevent_free - free a pevent handle + * @pevent: the pevent handle to free + */ +void pevent_free(struct pevent *pevent) +{ + struct cmdline_list *cmdlist, *cmdnext; + struct func_list *funclist, *funcnext; + struct printk_list *printklist, *printknext; + struct pevent_function_handler *func_handler; + struct event_handler *handle; + int i; + + if (!pevent) + return; + + cmdlist = pevent->cmdlist; + funclist = pevent->funclist; + printklist = pevent->printklist; + + pevent->ref_count--; + if (pevent->ref_count) + return; + + if (pevent->cmdlines) { + for (i = 0; i < pevent->cmdline_count; i++) + free(pevent->cmdlines[i].comm); + free(pevent->cmdlines); + } + + while (cmdlist) { + cmdnext = cmdlist->next; + free(cmdlist->comm); + free(cmdlist); + cmdlist = cmdnext; + } + + if (pevent->func_map) { + for (i = 0; i < pevent->func_count; i++) { + free(pevent->func_map[i].func); + free(pevent->func_map[i].mod); + } + free(pevent->func_map); + } + + while (funclist) { + funcnext = funclist->next; + free(funclist->func); + free(funclist->mod); + free(funclist); + funclist = funcnext; + } + + while (pevent->func_handlers) { + func_handler = pevent->func_handlers; + pevent->func_handlers = func_handler->next; + free_func_handle(func_handler); + } + + if (pevent->printk_map) { + for (i = 0; i < pevent->printk_count; i++) + free(pevent->printk_map[i].printk); + free(pevent->printk_map); + } + + while (printklist) { + printknext = printklist->next; + free(printklist->printk); + free(printklist); + printklist = printknext; + } + + for (i = 0; i < pevent->nr_events; i++) + free_event(pevent->events[i]); + + while (pevent->handlers) { + handle = pevent->handlers; + pevent->handlers = handle->next; + free_handler(handle); + } + + free(pevent->events); + free(pevent->sort_events); + + free(pevent); +} + +void pevent_unref(struct pevent *pevent) +{ + pevent_free(pevent); +} diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h new file mode 100644 index 000000000000..5772ad8cb386 --- /dev/null +++ b/tools/lib/traceevent/event-parse.h @@ -0,0 +1,811 @@ +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#ifndef _PARSE_EVENTS_H +#define _PARSE_EVENTS_H + +#include <stdarg.h> +#include <regex.h> + +#ifndef __unused +#define __unused __attribute__ ((unused)) +#endif + +/* ----------------------- trace_seq ----------------------- */ + + +#ifndef TRACE_SEQ_BUF_SIZE +#define TRACE_SEQ_BUF_SIZE 4096 +#endif + +#ifndef DEBUG_RECORD +#define DEBUG_RECORD 0 +#endif + +struct pevent_record { + unsigned long long ts; + unsigned long long offset; + long long missed_events; /* buffer dropped events before */ + int record_size; /* size of binary record */ + int size; /* size of data */ + void *data; + int cpu; + int ref_count; + int locked; /* Do not free, even if ref_count is zero */ + void *private; +#if DEBUG_RECORD + struct pevent_record *prev; + struct pevent_record *next; + long alloc_addr; +#endif +}; + +/* + * Trace sequences are used to allow a function to call several other functions + * to create a string of data to use (up to a max of PAGE_SIZE). + */ + +struct trace_seq { + char *buffer; + unsigned int buffer_size; + unsigned int len; + unsigned int readpos; +}; + +void trace_seq_init(struct trace_seq *s); +void trace_seq_destroy(struct trace_seq *s); + +extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) + __attribute__ ((format (printf, 2, 0))); + +extern int trace_seq_puts(struct trace_seq *s, const char *str); +extern int trace_seq_putc(struct trace_seq *s, unsigned char c); + +extern void trace_seq_terminate(struct trace_seq *s); + +extern int trace_seq_do_printf(struct trace_seq *s); + + +/* ----------------------- pevent ----------------------- */ + +struct pevent; +struct event_format; + +typedef int (*pevent_event_handler_func)(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, + void *context); + +typedef int (*pevent_plugin_load_func)(struct pevent *pevent); +typedef int (*pevent_plugin_unload_func)(void); + +struct plugin_option { + struct plugin_option *next; + void *handle; + char *file; + char *name; + char *plugin_alias; + char *description; + char *value; + void *private; + int set; +}; + +/* + * Plugin hooks that can be called: + * + * PEVENT_PLUGIN_LOADER: (required) + * The function name to initialized the plugin. + * + * int PEVENT_PLUGIN_LOADER(struct pevent *pevent) + * + * PEVENT_PLUGIN_UNLOADER: (optional) + * The function called just before unloading + * + * int PEVENT_PLUGIN_UNLOADER(void) + * + * PEVENT_PLUGIN_OPTIONS: (optional) + * Plugin options that can be set before loading + * + * struct plugin_option PEVENT_PLUGIN_OPTIONS[] = { + * { + * .name = "option-name", + * .plugin_alias = "overide-file-name", (optional) + * .description = "description of option to show users", + * }, + * { + * .name = NULL, + * }, + * }; + * + * Array must end with .name = NULL; + * + * + * .plugin_alias is used to give a shorter name to access + * the vairable. Useful if a plugin handles more than one event. + * + * PEVENT_PLUGIN_ALIAS: (optional) + * The name to use for finding options (uses filename if not defined) + */ +#define PEVENT_PLUGIN_LOADER pevent_plugin_loader +#define PEVENT_PLUGIN_UNLOADER pevent_plugin_unloader +#define PEVENT_PLUGIN_OPTIONS pevent_plugin_options +#define PEVENT_PLUGIN_ALIAS pevent_plugin_alias +#define _MAKE_STR(x) #x +#define MAKE_STR(x) _MAKE_STR(x) +#define PEVENT_PLUGIN_LOADER_NAME MAKE_STR(PEVENT_PLUGIN_LOADER) +#define PEVENT_PLUGIN_UNLOADER_NAME MAKE_STR(PEVENT_PLUGIN_UNLOADER) +#define PEVENT_PLUGIN_OPTIONS_NAME MAKE_STR(PEVENT_PLUGIN_OPTIONS) +#define PEVENT_PLUGIN_ALIAS_NAME MAKE_STR(PEVENT_PLUGIN_ALIAS) + +#define NSECS_PER_SEC 1000000000ULL +#define NSECS_PER_USEC 1000ULL + +enum format_flags { + FIELD_IS_ARRAY = 1, + FIELD_IS_POINTER = 2, + FIELD_IS_SIGNED = 4, + FIELD_IS_STRING = 8, + FIELD_IS_DYNAMIC = 16, + FIELD_IS_LONG = 32, + FIELD_IS_FLAG = 64, + FIELD_IS_SYMBOLIC = 128, +}; + +struct format_field { + struct format_field *next; + struct event_format *event; + char *type; + char *name; + int offset; + int size; + unsigned int arraylen; + unsigned int elementsize; + unsigned long flags; +}; + +struct format { + int nr_common; + int nr_fields; + struct format_field *common_fields; + struct format_field *fields; +}; + +struct print_arg_atom { + char *atom; +}; + +struct print_arg_string { + char *string; + int offset; +}; + +struct print_arg_field { + char *name; + struct format_field *field; +}; + +struct print_flag_sym { + struct print_flag_sym *next; + char *value; + char *str; +}; + +struct print_arg_typecast { + char *type; + struct print_arg *item; +}; + +struct print_arg_flags { + struct print_arg *field; + char *delim; + struct print_flag_sym *flags; +}; + +struct print_arg_symbol { + struct print_arg *field; + struct print_flag_sym *symbols; +}; + +struct print_arg_hex { + struct print_arg *field; + struct print_arg *size; +}; + +struct print_arg_dynarray { + struct format_field *field; + struct print_arg *index; +}; + +struct print_arg; + +struct print_arg_op { + char *op; + int prio; + struct print_arg *left; + struct print_arg *right; +}; + +struct pevent_function_handler; + +struct print_arg_func { + struct pevent_function_handler *func; + struct print_arg *args; +}; + +enum print_arg_type { + PRINT_NULL, + PRINT_ATOM, + PRINT_FIELD, + PRINT_FLAGS, + PRINT_SYMBOL, + PRINT_HEX, + PRINT_TYPE, + PRINT_STRING, + PRINT_BSTRING, + PRINT_DYNAMIC_ARRAY, + PRINT_OP, + PRINT_FUNC, +}; + +struct print_arg { + struct print_arg *next; + enum print_arg_type type; + union { + struct print_arg_atom atom; + struct print_arg_field field; + struct print_arg_typecast typecast; + struct print_arg_flags flags; + struct print_arg_symbol symbol; + struct print_arg_hex hex; + struct print_arg_func func; + struct print_arg_string string; + struct print_arg_op op; + struct print_arg_dynarray dynarray; + }; +}; + +struct print_fmt { + char *format; + struct print_arg *args; +}; + +struct event_format { + struct pevent *pevent; + char *name; + int id; + int flags; + struct format format; + struct print_fmt print_fmt; + char *system; + pevent_event_handler_func handler; + void *context; +}; + +enum { + EVENT_FL_ISFTRACE = 0x01, + EVENT_FL_ISPRINT = 0x02, + EVENT_FL_ISBPRINT = 0x04, + EVENT_FL_ISFUNCENT = 0x10, + EVENT_FL_ISFUNCRET = 0x20, + + EVENT_FL_FAILED = 0x80000000 +}; + +enum event_sort_type { + EVENT_SORT_ID, + EVENT_SORT_NAME, + EVENT_SORT_SYSTEM, +}; + +enum event_type { + EVENT_ERROR, + EVENT_NONE, + EVENT_SPACE, + EVENT_NEWLINE, + EVENT_OP, + EVENT_DELIM, + EVENT_ITEM, + EVENT_DQUOTE, + EVENT_SQUOTE, +}; + +typedef unsigned long long (*pevent_func_handler)(struct trace_seq *s, + unsigned long long *args); + +enum pevent_func_arg_type { + PEVENT_FUNC_ARG_VOID, + PEVENT_FUNC_ARG_INT, + PEVENT_FUNC_ARG_LONG, + PEVENT_FUNC_ARG_STRING, + PEVENT_FUNC_ARG_PTR, + PEVENT_FUNC_ARG_MAX_TYPES +}; + +enum pevent_flag { + PEVENT_NSEC_OUTPUT = 1, /* output in NSECS */ +}; + +struct cmdline; +struct cmdline_list; +struct func_map; +struct func_list; +struct event_handler; + +struct pevent { + int ref_count; + + int header_page_ts_offset; + int header_page_ts_size; + int header_page_size_offset; + int header_page_size_size; + int header_page_data_offset; + int header_page_data_size; + int header_page_overwrite; + + int file_bigendian; + int host_bigendian; + + int latency_format; + + int old_format; + + int cpus; + int long_size; + + struct cmdline *cmdlines; + struct cmdline_list *cmdlist; + int cmdline_count; + + struct func_map *func_map; + struct func_list *funclist; + unsigned int func_count; + + struct printk_map *printk_map; + struct printk_list *printklist; + unsigned int printk_count; + + + struct event_format **events; + int nr_events; + struct event_format **sort_events; + enum event_sort_type last_type; + + int type_offset; + int type_size; + + int pid_offset; + int pid_size; + + int pc_offset; + int pc_size; + + int flags_offset; + int flags_size; + + int ld_offset; + int ld_size; + + int print_raw; + + int test_filters; + + int flags; + + struct format_field *bprint_ip_field; + struct format_field *bprint_fmt_field; + struct format_field *bprint_buf_field; + + struct event_handler *handlers; + struct pevent_function_handler *func_handlers; + + /* cache */ + struct event_format *last_event; +}; + +static inline void pevent_set_flag(struct pevent *pevent, int flag) +{ + pevent->flags |= flag; +} + +static inline unsigned short +__data2host2(struct pevent *pevent, unsigned short data) +{ + unsigned short swap; + + if (pevent->host_bigendian == pevent->file_bigendian) + return data; + + swap = ((data & 0xffULL) << 8) | + ((data & (0xffULL << 8)) >> 8); + + return swap; +} + +static inline unsigned int +__data2host4(struct pevent *pevent, unsigned int data) +{ + unsigned int swap; + + if (pevent->host_bigendian == pevent->file_bigendian) + return data; + + swap = ((data & 0xffULL) << 24) | + ((data & (0xffULL << 8)) << 8) | + ((data & (0xffULL << 16)) >> 8) | + ((data & (0xffULL << 24)) >> 24); + + return swap; +} + +static inline unsigned long long +__data2host8(struct pevent *pevent, unsigned long long data) +{ + unsigned long long swap; + + if (pevent->host_bigendian == pevent->file_bigendian) + return data; + + swap = ((data & 0xffULL) << 56) | + ((data & (0xffULL << 8)) << 40) | + ((data & (0xffULL << 16)) << 24) | + ((data & (0xffULL << 24)) << 8) | + ((data & (0xffULL << 32)) >> 8) | + ((data & (0xffULL << 40)) >> 24) | + ((data & (0xffULL << 48)) >> 40) | + ((data & (0xffULL << 56)) >> 56); + + return swap; +} + +#define data2host2(pevent, ptr) __data2host2(pevent, *(unsigned short *)(ptr)) +#define data2host4(pevent, ptr) __data2host4(pevent, *(unsigned int *)(ptr)) +#define data2host8(pevent, ptr) \ +({ \ + unsigned long long __val; \ + \ + memcpy(&__val, (ptr), sizeof(unsigned long long)); \ + __data2host8(pevent, __val); \ +}) + +/* taken from kernel/trace/trace.h */ +enum trace_flag_type { + TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_IRQS_NOSUPPORT = 0x02, + TRACE_FLAG_NEED_RESCHED = 0x04, + TRACE_FLAG_HARDIRQ = 0x08, + TRACE_FLAG_SOFTIRQ = 0x10, +}; + +int pevent_register_comm(struct pevent *pevent, const char *comm, int pid); +int pevent_register_function(struct pevent *pevent, char *name, + unsigned long long addr, char *mod); +int pevent_register_print_string(struct pevent *pevent, char *fmt, + unsigned long long addr); +int pevent_pid_is_registered(struct pevent *pevent, int pid); + +void pevent_print_event(struct pevent *pevent, struct trace_seq *s, + struct pevent_record *record); + +int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long size, + int long_size); + +int pevent_parse_event(struct pevent *pevent, const char *buf, + unsigned long size, const char *sys); + +void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event, + const char *name, struct pevent_record *record, + int *len, int err); + +int pevent_get_field_val(struct trace_seq *s, struct event_format *event, + const char *name, struct pevent_record *record, + unsigned long long *val, int err); +int pevent_get_common_field_val(struct trace_seq *s, struct event_format *event, + const char *name, struct pevent_record *record, + unsigned long long *val, int err); +int pevent_get_any_field_val(struct trace_seq *s, struct event_format *event, + const char *name, struct pevent_record *record, + unsigned long long *val, int err); + +int pevent_print_num_field(struct trace_seq *s, const char *fmt, + struct event_format *event, const char *name, + struct pevent_record *record, int err); + +int pevent_register_event_handler(struct pevent *pevent, int id, char *sys_name, char *event_name, + pevent_event_handler_func func, void *context); +int pevent_register_print_function(struct pevent *pevent, + pevent_func_handler func, + enum pevent_func_arg_type ret_type, + char *name, ...); + +struct format_field *pevent_find_common_field(struct event_format *event, const char *name); +struct format_field *pevent_find_field(struct event_format *event, const char *name); +struct format_field *pevent_find_any_field(struct event_format *event, const char *name); + +const char *pevent_find_function(struct pevent *pevent, unsigned long long addr); +unsigned long long +pevent_find_function_address(struct pevent *pevent, unsigned long long addr); +unsigned long long pevent_read_number(struct pevent *pevent, const void *ptr, int size); +int pevent_read_number_field(struct format_field *field, const void *data, + unsigned long long *value); + +struct event_format *pevent_find_event(struct pevent *pevent, int id); + +struct event_format * +pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name); + +void pevent_data_lat_fmt(struct pevent *pevent, + struct trace_seq *s, struct pevent_record *record); +int pevent_data_type(struct pevent *pevent, struct pevent_record *rec); +struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type); +int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec); +const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid); +void pevent_event_info(struct trace_seq *s, struct event_format *event, + struct pevent_record *record); + +struct event_format **pevent_list_events(struct pevent *pevent, enum event_sort_type); +struct format_field **pevent_event_common_fields(struct event_format *event); +struct format_field **pevent_event_fields(struct event_format *event); + +static inline int pevent_get_cpus(struct pevent *pevent) +{ + return pevent->cpus; +} + +static inline void pevent_set_cpus(struct pevent *pevent, int cpus) +{ + pevent->cpus = cpus; +} + +static inline int pevent_get_long_size(struct pevent *pevent) +{ + return pevent->long_size; +} + +static inline void pevent_set_long_size(struct pevent *pevent, int long_size) +{ + pevent->long_size = long_size; +} + +static inline int pevent_is_file_bigendian(struct pevent *pevent) +{ + return pevent->file_bigendian; +} + +static inline void pevent_set_file_bigendian(struct pevent *pevent, int endian) +{ + pevent->file_bigendian = endian; +} + +static inline int pevent_is_host_bigendian(struct pevent *pevent) +{ + return pevent->host_bigendian; +} + +static inline void pevent_set_host_bigendian(struct pevent *pevent, int endian) +{ + pevent->host_bigendian = endian; +} + +static inline int pevent_is_latency_format(struct pevent *pevent) +{ + return pevent->latency_format; +} + +static inline void pevent_set_latency_format(struct pevent *pevent, int lat) +{ + pevent->latency_format = lat; +} + +struct pevent *pevent_alloc(void); +void pevent_free(struct pevent *pevent); +void pevent_ref(struct pevent *pevent); +void pevent_unref(struct pevent *pevent); + +/* access to the internal parser */ +void pevent_buffer_init(const char *buf, unsigned long long size); +enum event_type pevent_read_token(char **tok); +void pevent_free_token(char *token); +int pevent_peek_char(void); +const char *pevent_get_input_buf(void); +unsigned long long pevent_get_input_buf_ptr(void); + +/* for debugging */ +void pevent_print_funcs(struct pevent *pevent); +void pevent_print_printk(struct pevent *pevent); + +/* ----------------------- filtering ----------------------- */ + +enum filter_boolean_type { + FILTER_FALSE, + FILTER_TRUE, +}; + +enum filter_op_type { + FILTER_OP_AND = 1, + FILTER_OP_OR, + FILTER_OP_NOT, +}; + +enum filter_cmp_type { + FILTER_CMP_NONE, + FILTER_CMP_EQ, + FILTER_CMP_NE, + FILTER_CMP_GT, + FILTER_CMP_LT, + FILTER_CMP_GE, + FILTER_CMP_LE, + FILTER_CMP_MATCH, + FILTER_CMP_NOT_MATCH, + FILTER_CMP_REGEX, + FILTER_CMP_NOT_REGEX, +}; + +enum filter_exp_type { + FILTER_EXP_NONE, + FILTER_EXP_ADD, + FILTER_EXP_SUB, + FILTER_EXP_MUL, + FILTER_EXP_DIV, + FILTER_EXP_MOD, + FILTER_EXP_RSHIFT, + FILTER_EXP_LSHIFT, + FILTER_EXP_AND, + FILTER_EXP_OR, + FILTER_EXP_XOR, + FILTER_EXP_NOT, +}; + +enum filter_arg_type { + FILTER_ARG_NONE, + FILTER_ARG_BOOLEAN, + FILTER_ARG_VALUE, + FILTER_ARG_FIELD, + FILTER_ARG_EXP, + FILTER_ARG_OP, + FILTER_ARG_NUM, + FILTER_ARG_STR, +}; + +enum filter_value_type { + FILTER_NUMBER, + FILTER_STRING, + FILTER_CHAR +}; + +struct fliter_arg; + +struct filter_arg_boolean { + enum filter_boolean_type value; +}; + +struct filter_arg_field { + struct format_field *field; +}; + +struct filter_arg_value { + enum filter_value_type type; + union { + char *str; + unsigned long long val; + }; +}; + +struct filter_arg_op { + enum filter_op_type type; + struct filter_arg *left; + struct filter_arg *right; +}; + +struct filter_arg_exp { + enum filter_exp_type type; + struct filter_arg *left; + struct filter_arg *right; +}; + +struct filter_arg_num { + enum filter_cmp_type type; + struct filter_arg *left; + struct filter_arg *right; +}; + +struct filter_arg_str { + enum filter_cmp_type type; + struct format_field *field; + char *val; + char *buffer; + regex_t reg; +}; + +struct filter_arg { + enum filter_arg_type type; + union { + struct filter_arg_boolean boolean; + struct filter_arg_field field; + struct filter_arg_value value; + struct filter_arg_op op; + struct filter_arg_exp exp; + struct filter_arg_num num; + struct filter_arg_str str; + }; +}; + +struct filter_type { + int event_id; + struct event_format *event; + struct filter_arg *filter; +}; + +struct event_filter { + struct pevent *pevent; + int filters; + struct filter_type *event_filters; +}; + +struct event_filter *pevent_filter_alloc(struct pevent *pevent); + +#define FILTER_NONE -2 +#define FILTER_NOEXIST -1 +#define FILTER_MISS 0 +#define FILTER_MATCH 1 + +enum filter_trivial_type { + FILTER_TRIVIAL_FALSE, + FILTER_TRIVIAL_TRUE, + FILTER_TRIVIAL_BOTH, +}; + +int pevent_filter_add_filter_str(struct event_filter *filter, + const char *filter_str, + char **error_str); + + +int pevent_filter_match(struct event_filter *filter, + struct pevent_record *record); + +int pevent_event_filtered(struct event_filter *filter, + int event_id); + +void pevent_filter_reset(struct event_filter *filter); + +void pevent_filter_clear_trivial(struct event_filter *filter, + enum filter_trivial_type type); + +void pevent_filter_free(struct event_filter *filter); + +char *pevent_filter_make_string(struct event_filter *filter, int event_id); + +int pevent_filter_remove_event(struct event_filter *filter, + int event_id); + +int pevent_filter_event_has_trivial(struct event_filter *filter, + int event_id, + enum filter_trivial_type type); + +int pevent_filter_copy(struct event_filter *dest, struct event_filter *source); + +int pevent_update_trivial(struct event_filter *dest, struct event_filter *source, + enum filter_trivial_type type); + +int pevent_filter_compare(struct event_filter *filter1, struct event_filter *filter2); + +#endif /* _PARSE_EVENTS_H */ diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h new file mode 100644 index 000000000000..08296383d1e6 --- /dev/null +++ b/tools/lib/traceevent/event-utils.h @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#ifndef __UTIL_H +#define __UTIL_H + +#include <ctype.h> + +/* Can be overridden */ +void die(const char *fmt, ...); +void *malloc_or_die(unsigned int size); +void warning(const char *fmt, ...); +void pr_stat(const char *fmt, ...); +void vpr_stat(const char *fmt, va_list ap); + +/* Always available */ +void __die(const char *fmt, ...); +void __warning(const char *fmt, ...); +void __pr_stat(const char *fmt, ...); + +void __vdie(const char *fmt, ...); +void __vwarning(const char *fmt, ...); +void __vpr_stat(const char *fmt, ...); + +static inline char *strim(char *string) +{ + char *ret; + + if (!string) + return NULL; + while (*string) { + if (!isspace(*string)) + break; + string++; + } + ret = string; + + string = ret + strlen(ret) - 1; + while (string > ret) { + if (!isspace(*string)) + break; + string--; + } + string[1] = 0; + + return ret; +} + +static inline int has_text(const char *text) +{ + if (!text) + return 0; + + while (*text) { + if (!isspace(*text)) + return 1; + text++; + } + + return 0; +} + +#endif diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c new file mode 100644 index 000000000000..ad17855528f9 --- /dev/null +++ b/tools/lib/traceevent/parse-filter.c @@ -0,0 +1,2289 @@ +/* + * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <errno.h> +#include <sys/types.h> + +#include "event-parse.h" +#include "event-utils.h" + +#define COMM "COMM" + +static struct format_field comm = { + .name = "COMM", +}; + +struct event_list { + struct event_list *next; + struct event_format *event; +}; + +#define MAX_ERR_STR_SIZE 256 + +static void show_error(char **error_str, const char *fmt, ...) +{ + unsigned long long index; + const char *input; + char *error; + va_list ap; + int len; + int i; + + if (!error_str) + return; + + input = pevent_get_input_buf(); + index = pevent_get_input_buf_ptr(); + len = input ? strlen(input) : 0; + + error = malloc_or_die(MAX_ERR_STR_SIZE + (len*2) + 3); + + if (len) { + strcpy(error, input); + error[len] = '\n'; + for (i = 1; i < len && i < index; i++) + error[len+i] = ' '; + error[len + i] = '^'; + error[len + i + 1] = '\n'; + len += i+2; + } + + va_start(ap, fmt); + vsnprintf(error + len, MAX_ERR_STR_SIZE, fmt, ap); + va_end(ap); + + *error_str = error; +} + +static void free_token(char *token) +{ + pevent_free_token(token); +} + +static enum event_type read_token(char **tok) +{ + enum event_type type; + char *token = NULL; + + do { + free_token(token); + type = pevent_read_token(&token); + } while (type == EVENT_NEWLINE || type == EVENT_SPACE); + + /* If token is = or ! check to see if the next char is ~ */ + if (token && + (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) && + pevent_peek_char() == '~') { + /* append it */ + *tok = malloc_or_die(3); + sprintf(*tok, "%c%c", *token, '~'); + free_token(token); + /* Now remove the '~' from the buffer */ + pevent_read_token(&token); + free_token(token); + } else + *tok = token; + + return type; +} + +static int filter_cmp(const void *a, const void *b) +{ + const struct filter_type *ea = a; + const struct filter_type *eb = b; + + if (ea->event_id < eb->event_id) + return -1; + + if (ea->event_id > eb->event_id) + return 1; + + return 0; +} + +static struct filter_type * +find_filter_type(struct event_filter *filter, int id) +{ + struct filter_type *filter_type; + struct filter_type key; + + key.event_id = id; + + filter_type = bsearch(&key, filter->event_filters, + filter->filters, + sizeof(*filter->event_filters), + filter_cmp); + + return filter_type; +} + +static struct filter_type * +add_filter_type(struct event_filter *filter, int id) +{ + struct filter_type *filter_type; + int i; + + filter_type = find_filter_type(filter, id); + if (filter_type) + return filter_type; + + filter->event_filters = realloc(filter->event_filters, + sizeof(*filter->event_filters) * + (filter->filters + 1)); + if (!filter->event_filters) + die("Could not allocate filter"); + + for (i = 0; i < filter->filters; i++) { + if (filter->event_filters[i].event_id > id) + break; + } + + if (i < filter->filters) + memmove(&filter->event_filters[i+1], + &filter->event_filters[i], + sizeof(*filter->event_filters) * + (filter->filters - i)); + + filter_type = &filter->event_filters[i]; + filter_type->event_id = id; + filter_type->event = pevent_find_event(filter->pevent, id); + filter_type->filter = NULL; + + filter->filters++; + + return filter_type; +} + +/** + * pevent_filter_alloc - create a new event filter + * @pevent: The pevent that this filter is associated with + */ +struct event_filter *pevent_filter_alloc(struct pevent *pevent) +{ + struct event_filter *filter; + + filter = malloc_or_die(sizeof(*filter)); + memset(filter, 0, sizeof(*filter)); + filter->pevent = pevent; + pevent_ref(pevent); + + return filter; +} + +static struct filter_arg *allocate_arg(void) +{ + struct filter_arg *arg; + + arg = malloc_or_die(sizeof(*arg)); + memset(arg, 0, sizeof(*arg)); + + return arg; +} + +static void free_arg(struct filter_arg *arg) +{ + if (!arg) + return; + + switch (arg->type) { + case FILTER_ARG_NONE: + case FILTER_ARG_BOOLEAN: + case FILTER_ARG_NUM: + break; + + case FILTER_ARG_STR: + free(arg->str.val); + regfree(&arg->str.reg); + free(arg->str.buffer); + break; + + case FILTER_ARG_OP: + free_arg(arg->op.left); + free_arg(arg->op.right); + default: + break; + } + + free(arg); +} + +static void add_event(struct event_list **events, + struct event_format *event) +{ + struct event_list *list; + + list = malloc_or_die(sizeof(*list)); + list->next = *events; + *events = list; + list->event = event; +} + +static int event_match(struct event_format *event, + regex_t *sreg, regex_t *ereg) +{ + if (sreg) { + return !regexec(sreg, event->system, 0, NULL, 0) && + !regexec(ereg, event->name, 0, NULL, 0); + } + + return !regexec(ereg, event->system, 0, NULL, 0) || + !regexec(ereg, event->name, 0, NULL, 0); +} + +static int +find_event(struct pevent *pevent, struct event_list **events, + char *sys_name, char *event_name) +{ + struct event_format *event; + regex_t ereg; + regex_t sreg; + int match = 0; + char *reg; + int ret; + int i; + + if (!event_name) { + /* if no name is given, then swap sys and name */ + event_name = sys_name; + sys_name = NULL; + } + + reg = malloc_or_die(strlen(event_name) + 3); + sprintf(reg, "^%s$", event_name); + + ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB); + free(reg); + + if (ret) + return -1; + + if (sys_name) { + reg = malloc_or_die(strlen(sys_name) + 3); + sprintf(reg, "^%s$", sys_name); + ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB); + free(reg); + if (ret) { + regfree(&ereg); + return -1; + } + } + + for (i = 0; i < pevent->nr_events; i++) { + event = pevent->events[i]; + if (event_match(event, sys_name ? &sreg : NULL, &ereg)) { + match = 1; + add_event(events, event); + } + } + + regfree(&ereg); + if (sys_name) + regfree(&sreg); + + if (!match) + return -1; + + return 0; +} + +static void free_events(struct event_list *events) +{ + struct event_list *event; + + while (events) { + event = events; + events = events->next; + free(event); + } +} + +static struct filter_arg * +create_arg_item(struct event_format *event, const char *token, + enum event_type type, char **error_str) +{ + struct format_field *field; + struct filter_arg *arg; + + arg = allocate_arg(); + + switch (type) { + + case EVENT_SQUOTE: + case EVENT_DQUOTE: + arg->type = FILTER_ARG_VALUE; + arg->value.type = + type == EVENT_DQUOTE ? FILTER_STRING : FILTER_CHAR; + arg->value.str = strdup(token); + if (!arg->value.str) + die("malloc string"); + break; + case EVENT_ITEM: + /* if it is a number, then convert it */ + if (isdigit(token[0])) { + arg->type = FILTER_ARG_VALUE; + arg->value.type = FILTER_NUMBER; + arg->value.val = strtoull(token, NULL, 0); + break; + } + /* Consider this a field */ + field = pevent_find_any_field(event, token); + if (!field) { + if (strcmp(token, COMM) != 0) { + /* not a field, Make it false */ + arg->type = FILTER_ARG_BOOLEAN; + arg->boolean.value = FILTER_FALSE; + break; + } + /* If token is 'COMM' then it is special */ + field = &comm; + } + arg->type = FILTER_ARG_FIELD; + arg->field.field = field; + break; + default: + free_arg(arg); + show_error(error_str, "expected a value but found %s", + token); + return NULL; + } + return arg; +} + +static struct filter_arg * +create_arg_op(enum filter_op_type btype) +{ + struct filter_arg *arg; + + arg = allocate_arg(); + arg->type = FILTER_ARG_OP; + arg->op.type = btype; + + return arg; +} + +static struct filter_arg * +create_arg_exp(enum filter_exp_type etype) +{ + struct filter_arg *arg; + + arg = allocate_arg(); + arg->type = FILTER_ARG_EXP; + arg->op.type = etype; + + return arg; +} + +static struct filter_arg * +create_arg_cmp(enum filter_exp_type etype) +{ + struct filter_arg *arg; + + arg = allocate_arg(); + /* Use NUM and change if necessary */ + arg->type = FILTER_ARG_NUM; + arg->op.type = etype; + + return arg; +} + +static int add_right(struct filter_arg *op, struct filter_arg *arg, + char **error_str) +{ + struct filter_arg *left; + char *str; + int op_type; + int ret; + + switch (op->type) { + case FILTER_ARG_EXP: + if (op->exp.right) + goto out_fail; + op->exp.right = arg; + break; + + case FILTER_ARG_OP: + if (op->op.right) + goto out_fail; + op->op.right = arg; + break; + + case FILTER_ARG_NUM: + if (op->op.right) + goto out_fail; + /* + * The arg must be num, str, or field + */ + switch (arg->type) { + case FILTER_ARG_VALUE: + case FILTER_ARG_FIELD: + break; + default: + show_error(error_str, + "Illegal rvalue"); + return -1; + } + + /* + * Depending on the type, we may need to + * convert this to a string or regex. + */ + switch (arg->value.type) { + case FILTER_CHAR: + /* + * A char should be converted to number if + * the string is 1 byte, and the compare + * is not a REGEX. + */ + if (strlen(arg->value.str) == 1 && + op->num.type != FILTER_CMP_REGEX && + op->num.type != FILTER_CMP_NOT_REGEX) { + arg->value.type = FILTER_NUMBER; + goto do_int; + } + /* fall through */ + case FILTER_STRING: + + /* convert op to a string arg */ + op_type = op->num.type; + left = op->num.left; + str = arg->value.str; + + /* reset the op for the new field */ + memset(op, 0, sizeof(*op)); + + /* + * If left arg was a field not found then + * NULL the entire op. + */ + if (left->type == FILTER_ARG_BOOLEAN) { + free_arg(left); + free_arg(arg); + op->type = FILTER_ARG_BOOLEAN; + op->boolean.value = FILTER_FALSE; + break; + } + + /* Left arg must be a field */ + if (left->type != FILTER_ARG_FIELD) { + show_error(error_str, + "Illegal lvalue for string comparison"); + return -1; + } + + /* Make sure this is a valid string compare */ + switch (op_type) { + case FILTER_CMP_EQ: + op_type = FILTER_CMP_MATCH; + break; + case FILTER_CMP_NE: + op_type = FILTER_CMP_NOT_MATCH; + break; + + case FILTER_CMP_REGEX: + case FILTER_CMP_NOT_REGEX: + ret = regcomp(&op->str.reg, str, REG_ICASE|REG_NOSUB); + if (ret) { + show_error(error_str, + "RegEx '%s' did not compute", + str); + return -1; + } + break; + default: + show_error(error_str, + "Illegal comparison for string"); + return -1; + } + + op->type = FILTER_ARG_STR; + op->str.type = op_type; + op->str.field = left->field.field; + op->str.val = strdup(str); + if (!op->str.val) + die("malloc string"); + /* + * Need a buffer to copy data for tests + */ + op->str.buffer = malloc_or_die(op->str.field->size + 1); + /* Null terminate this buffer */ + op->str.buffer[op->str.field->size] = 0; + + /* We no longer have left or right args */ + free_arg(arg); + free_arg(left); + + break; + + case FILTER_NUMBER: + + do_int: + switch (op->num.type) { + case FILTER_CMP_REGEX: + case FILTER_CMP_NOT_REGEX: + show_error(error_str, + "Op not allowed with integers"); + return -1; + + default: + break; + } + + /* numeric compare */ + op->num.right = arg; + break; + default: + goto out_fail; + } + break; + default: + goto out_fail; + } + + return 0; + + out_fail: + show_error(error_str, + "Syntax error"); + return -1; +} + +static struct filter_arg * +rotate_op_right(struct filter_arg *a, struct filter_arg *b) +{ + struct filter_arg *arg; + + arg = a->op.right; + a->op.right = b; + return arg; +} + +static int add_left(struct filter_arg *op, struct filter_arg *arg) +{ + switch (op->type) { + case FILTER_ARG_EXP: + if (arg->type == FILTER_ARG_OP) + arg = rotate_op_right(arg, op); + op->exp.left = arg; + break; + + case FILTER_ARG_OP: + op->op.left = arg; + break; + case FILTER_ARG_NUM: + if (arg->type == FILTER_ARG_OP) + arg = rotate_op_right(arg, op); + + /* left arg of compares must be a field */ + if (arg->type != FILTER_ARG_FIELD && + arg->type != FILTER_ARG_BOOLEAN) + return -1; + op->num.left = arg; + break; + default: + return -1; + } + return 0; +} + +enum op_type { + OP_NONE, + OP_BOOL, + OP_NOT, + OP_EXP, + OP_CMP, +}; + +static enum op_type process_op(const char *token, + enum filter_op_type *btype, + enum filter_cmp_type *ctype, + enum filter_exp_type *etype) +{ + *btype = FILTER_OP_NOT; + *etype = FILTER_EXP_NONE; + *ctype = FILTER_CMP_NONE; + + if (strcmp(token, "&&") == 0) + *btype = FILTER_OP_AND; + else if (strcmp(token, "||") == 0) + *btype = FILTER_OP_OR; + else if (strcmp(token, "!") == 0) + return OP_NOT; + + if (*btype != FILTER_OP_NOT) + return OP_BOOL; + + /* Check for value expressions */ + if (strcmp(token, "+") == 0) { + *etype = FILTER_EXP_ADD; + } else if (strcmp(token, "-") == 0) { + *etype = FILTER_EXP_SUB; + } else if (strcmp(token, "*") == 0) { + *etype = FILTER_EXP_MUL; + } else if (strcmp(token, "/") == 0) { + *etype = FILTER_EXP_DIV; + } else if (strcmp(token, "%") == 0) { + *etype = FILTER_EXP_MOD; + } else if (strcmp(token, ">>") == 0) { + *etype = FILTER_EXP_RSHIFT; + } else if (strcmp(token, "<<") == 0) { + *etype = FILTER_EXP_LSHIFT; + } else if (strcmp(token, "&") == 0) { + *etype = FILTER_EXP_AND; + } else if (strcmp(token, "|") == 0) { + *etype = FILTER_EXP_OR; + } else if (strcmp(token, "^") == 0) { + *etype = FILTER_EXP_XOR; + } else if (strcmp(token, "~") == 0) + *etype = FILTER_EXP_NOT; + + if (*etype != FILTER_EXP_NONE) + return OP_EXP; + + /* Check for compares */ + if (strcmp(token, "==") == 0) + *ctype = FILTER_CMP_EQ; + else if (strcmp(token, "!=") == 0) + *ctype = FILTER_CMP_NE; + else if (strcmp(token, "<") == 0) + *ctype = FILTER_CMP_LT; + else if (strcmp(token, ">") == 0) + *ctype = FILTER_CMP_GT; + else if (strcmp(token, "<=") == 0) + *ctype = FILTER_CMP_LE; + else if (strcmp(token, ">=") == 0) + *ctype = FILTER_CMP_GE; + else if (strcmp(token, "=~") == 0) + *ctype = FILTER_CMP_REGEX; + else if (strcmp(token, "!~") == 0) + *ctype = FILTER_CMP_NOT_REGEX; + else + return OP_NONE; + + return OP_CMP; +} + +static int check_op_done(struct filter_arg *arg) +{ + switch (arg->type) { + case FILTER_ARG_EXP: + return arg->exp.right != NULL; + + case FILTER_ARG_OP: + return arg->op.right != NULL; + + case FILTER_ARG_NUM: + return arg->num.right != NULL; + + case FILTER_ARG_STR: + /* A string conversion is always done */ + return 1; + + case FILTER_ARG_BOOLEAN: + /* field not found, is ok */ + return 1; + + default: + return 0; + } +} + +enum filter_vals { + FILTER_VAL_NORM, + FILTER_VAL_FALSE, + FILTER_VAL_TRUE, +}; + +void reparent_op_arg(struct filter_arg *parent, struct filter_arg *old_child, + struct filter_arg *arg) +{ + struct filter_arg *other_child; + struct filter_arg **ptr; + + if (parent->type != FILTER_ARG_OP && + arg->type != FILTER_ARG_OP) + die("can not reparent other than OP"); + + /* Get the sibling */ + if (old_child->op.right == arg) { + ptr = &old_child->op.right; + other_child = old_child->op.left; + } else if (old_child->op.left == arg) { + ptr = &old_child->op.left; + other_child = old_child->op.right; + } else + die("Error in reparent op, find other child"); + + /* Detach arg from old_child */ + *ptr = NULL; + + /* Check for root */ + if (parent == old_child) { + free_arg(other_child); + *parent = *arg; + /* Free arg without recussion */ + free(arg); + return; + } + + if (parent->op.right == old_child) + ptr = &parent->op.right; + else if (parent->op.left == old_child) + ptr = &parent->op.left; + else + die("Error in reparent op"); + *ptr = arg; + + free_arg(old_child); +} + +enum filter_vals test_arg(struct filter_arg *parent, struct filter_arg *arg) +{ + enum filter_vals lval, rval; + + switch (arg->type) { + + /* bad case */ + case FILTER_ARG_BOOLEAN: + return FILTER_VAL_FALSE + arg->boolean.value; + + /* good cases: */ + case FILTER_ARG_STR: + case FILTER_ARG_VALUE: + case FILTER_ARG_FIELD: + return FILTER_VAL_NORM; + + case FILTER_ARG_EXP: + lval = test_arg(arg, arg->exp.left); + if (lval != FILTER_VAL_NORM) + return lval; + rval = test_arg(arg, arg->exp.right); + if (rval != FILTER_VAL_NORM) + return rval; + return FILTER_VAL_NORM; + + case FILTER_ARG_NUM: + lval = test_arg(arg, arg->num.left); + if (lval != FILTER_VAL_NORM) + return lval; + rval = test_arg(arg, arg->num.right); + if (rval != FILTER_VAL_NORM) + return rval; + return FILTER_VAL_NORM; + + case FILTER_ARG_OP: + if (arg->op.type != FILTER_OP_NOT) { + lval = test_arg(arg, arg->op.left); + switch (lval) { + case FILTER_VAL_NORM: + break; + case FILTER_VAL_TRUE: + if (arg->op.type == FILTER_OP_OR) + return FILTER_VAL_TRUE; + rval = test_arg(arg, arg->op.right); + if (rval != FILTER_VAL_NORM) + return rval; + + reparent_op_arg(parent, arg, arg->op.right); + return FILTER_VAL_NORM; + + case FILTER_VAL_FALSE: + if (arg->op.type == FILTER_OP_AND) + return FILTER_VAL_FALSE; + rval = test_arg(arg, arg->op.right); + if (rval != FILTER_VAL_NORM) + return rval; + + reparent_op_arg(parent, arg, arg->op.right); + return FILTER_VAL_NORM; + } + } + + rval = test_arg(arg, arg->op.right); + switch (rval) { + case FILTER_VAL_NORM: + break; + case FILTER_VAL_TRUE: + if (arg->op.type == FILTER_OP_OR) + return FILTER_VAL_TRUE; + if (arg->op.type == FILTER_OP_NOT) + return FILTER_VAL_FALSE; + + reparent_op_arg(parent, arg, arg->op.left); + return FILTER_VAL_NORM; + + case FILTER_VAL_FALSE: + if (arg->op.type == FILTER_OP_AND) + return FILTER_VAL_FALSE; + if (arg->op.type == FILTER_OP_NOT) + return FILTER_VAL_TRUE; + + reparent_op_arg(parent, arg, arg->op.left); + return FILTER_VAL_NORM; + } + + return FILTER_VAL_NORM; + default: + die("bad arg in filter tree"); + } + return FILTER_VAL_NORM; +} + +/* Remove any unknown event fields */ +static struct filter_arg *collapse_tree(struct filter_arg *arg) +{ + enum filter_vals ret; + + ret = test_arg(arg, arg); + switch (ret) { + case FILTER_VAL_NORM: + return arg; + + case FILTER_VAL_TRUE: + case FILTER_VAL_FALSE: + free_arg(arg); + arg = allocate_arg(); + arg->type = FILTER_ARG_BOOLEAN; + arg->boolean.value = ret == FILTER_VAL_TRUE; + } + + return arg; +} + +static int +process_filter(struct event_format *event, struct filter_arg **parg, + char **error_str, int not) +{ + enum event_type type; + char *token = NULL; + struct filter_arg *current_op = NULL; + struct filter_arg *current_exp = NULL; + struct filter_arg *left_item = NULL; + struct filter_arg *arg = NULL; + enum op_type op_type; + enum filter_op_type btype; + enum filter_exp_type etype; + enum filter_cmp_type ctype; + int ret; + + *parg = NULL; + + do { + free(token); + type = read_token(&token); + switch (type) { + case EVENT_SQUOTE: + case EVENT_DQUOTE: + case EVENT_ITEM: + arg = create_arg_item(event, token, type, error_str); + if (!arg) + goto fail; + if (!left_item) + left_item = arg; + else if (current_exp) { + ret = add_right(current_exp, arg, error_str); + if (ret < 0) + goto fail; + left_item = NULL; + /* Not's only one one expression */ + if (not) { + arg = NULL; + if (current_op) + goto fail_print; + free(token); + *parg = current_exp; + return 0; + } + } else + goto fail_print; + arg = NULL; + break; + + case EVENT_DELIM: + if (*token == ',') { + show_error(error_str, + "Illegal token ','"); + goto fail; + } + + if (*token == '(') { + if (left_item) { + show_error(error_str, + "Open paren can not come after item"); + goto fail; + } + if (current_exp) { + show_error(error_str, + "Open paren can not come after expression"); + goto fail; + } + + ret = process_filter(event, &arg, error_str, 0); + if (ret != 1) { + if (ret == 0) + show_error(error_str, + "Unbalanced number of '('"); + goto fail; + } + ret = 0; + + /* A not wants just one expression */ + if (not) { + if (current_op) + goto fail_print; + *parg = arg; + return 0; + } + + if (current_op) + ret = add_right(current_op, arg, error_str); + else + current_exp = arg; + + if (ret < 0) + goto fail; + + } else { /* ')' */ + if (!current_op && !current_exp) + goto fail_print; + + /* Make sure everything is finished at this level */ + if (current_exp && !check_op_done(current_exp)) + goto fail_print; + if (current_op && !check_op_done(current_op)) + goto fail_print; + + if (current_op) + *parg = current_op; + else + *parg = current_exp; + return 1; + } + break; + + case EVENT_OP: + op_type = process_op(token, &btype, &ctype, &etype); + + /* All expect a left arg except for NOT */ + switch (op_type) { + case OP_BOOL: + /* Logic ops need a left expression */ + if (!current_exp && !current_op) + goto fail_print; + /* fall through */ + case OP_NOT: + /* logic only processes ops and exp */ + if (left_item) + goto fail_print; + break; + case OP_EXP: + case OP_CMP: + if (!left_item) + goto fail_print; + break; + case OP_NONE: + show_error(error_str, + "Unknown op token %s", token); + goto fail; + } + + ret = 0; + switch (op_type) { + case OP_BOOL: + arg = create_arg_op(btype); + if (current_op) + ret = add_left(arg, current_op); + else + ret = add_left(arg, current_exp); + current_op = arg; + current_exp = NULL; + break; + + case OP_NOT: + arg = create_arg_op(btype); + if (current_op) + ret = add_right(current_op, arg, error_str); + if (ret < 0) + goto fail; + current_exp = arg; + ret = process_filter(event, &arg, error_str, 1); + if (ret < 0) + goto fail; + ret = add_right(current_exp, arg, error_str); + if (ret < 0) + goto fail; + break; + + case OP_EXP: + case OP_CMP: + if (op_type == OP_EXP) + arg = create_arg_exp(etype); + else + arg = create_arg_cmp(ctype); + + if (current_op) + ret = add_right(current_op, arg, error_str); + if (ret < 0) + goto fail; + ret = add_left(arg, left_item); + if (ret < 0) { + arg = NULL; + goto fail_print; + } + current_exp = arg; + break; + default: + break; + } + arg = NULL; + if (ret < 0) + goto fail_print; + break; + case EVENT_NONE: + break; + default: + goto fail_print; + } + } while (type != EVENT_NONE); + + if (!current_op && !current_exp) + goto fail_print; + + if (!current_op) + current_op = current_exp; + + current_op = collapse_tree(current_op); + + *parg = current_op; + + return 0; + + fail_print: + show_error(error_str, "Syntax error"); + fail: + free_arg(current_op); + free_arg(current_exp); + free_arg(arg); + free(token); + return -1; +} + +static int +process_event(struct event_format *event, const char *filter_str, + struct filter_arg **parg, char **error_str) +{ + int ret; + + pevent_buffer_init(filter_str, strlen(filter_str)); + + ret = process_filter(event, parg, error_str, 0); + if (ret == 1) { + show_error(error_str, + "Unbalanced number of ')'"); + return -1; + } + if (ret < 0) + return ret; + + /* If parg is NULL, then make it into FALSE */ + if (!*parg) { + *parg = allocate_arg(); + (*parg)->type = FILTER_ARG_BOOLEAN; + (*parg)->boolean.value = FILTER_FALSE; + } + + return 0; +} + +static int filter_event(struct event_filter *filter, + struct event_format *event, + const char *filter_str, char **error_str) +{ + struct filter_type *filter_type; + struct filter_arg *arg; + int ret; + + if (filter_str) { + ret = process_event(event, filter_str, &arg, error_str); + if (ret < 0) + return ret; + + } else { + /* just add a TRUE arg */ + arg = allocate_arg(); + arg->type = FILTER_ARG_BOOLEAN; + arg->boolean.value = FILTER_TRUE; + } + + filter_type = add_filter_type(filter, event->id); + if (filter_type->filter) + free_arg(filter_type->filter); + filter_type->filter = arg; + + return 0; +} + +/** + * pevent_filter_add_filter_str - add a new filter + * @filter: the event filter to add to + * @filter_str: the filter string that contains the filter + * @error_str: string containing reason for failed filter + * + * Returns 0 if the filter was successfully added + * -1 if there was an error. + * + * On error, if @error_str points to a string pointer, + * it is set to the reason that the filter failed. + * This string must be freed with "free". + */ +int pevent_filter_add_filter_str(struct event_filter *filter, + const char *filter_str, + char **error_str) +{ + struct pevent *pevent = filter->pevent; + struct event_list *event; + struct event_list *events = NULL; + const char *filter_start; + const char *next_event; + char *this_event; + char *event_name = NULL; + char *sys_name = NULL; + char *sp; + int rtn = 0; + int len; + int ret; + + /* clear buffer to reset show error */ + pevent_buffer_init("", 0); + + if (error_str) + *error_str = NULL; + + filter_start = strchr(filter_str, ':'); + if (filter_start) + len = filter_start - filter_str; + else + len = strlen(filter_str); + + + do { + next_event = strchr(filter_str, ','); + if (next_event && + (!filter_start || next_event < filter_start)) + len = next_event - filter_str; + else if (filter_start) + len = filter_start - filter_str; + else + len = strlen(filter_str); + + this_event = malloc_or_die(len + 1); + memcpy(this_event, filter_str, len); + this_event[len] = 0; + + if (next_event) + next_event++; + + filter_str = next_event; + + sys_name = strtok_r(this_event, "/", &sp); + event_name = strtok_r(NULL, "/", &sp); + + if (!sys_name) { + show_error(error_str, "No filter found"); + /* This can only happen when events is NULL, but still */ + free_events(events); + free(this_event); + return -1; + } + + /* Find this event */ + ret = find_event(pevent, &events, strim(sys_name), strim(event_name)); + if (ret < 0) { + if (event_name) + show_error(error_str, + "No event found under '%s.%s'", + sys_name, event_name); + else + show_error(error_str, + "No event found under '%s'", + sys_name); + free_events(events); + free(this_event); + return -1; + } + free(this_event); + } while (filter_str); + + /* Skip the ':' */ + if (filter_start) + filter_start++; + + /* filter starts here */ + for (event = events; event; event = event->next) { + ret = filter_event(filter, event->event, filter_start, + error_str); + /* Failures are returned if a parse error happened */ + if (ret < 0) + rtn = ret; + + if (ret >= 0 && pevent->test_filters) { + char *test; + test = pevent_filter_make_string(filter, event->event->id); + printf(" '%s: %s'\n", event->event->name, test); + free(test); + } + } + + free_events(events); + + if (rtn >= 0 && pevent->test_filters) + exit(0); + + return rtn; +} + +static void free_filter_type(struct filter_type *filter_type) +{ + free_arg(filter_type->filter); +} + +/** + * pevent_filter_remove_event - remove a filter for an event + * @filter: the event filter to remove from + * @event_id: the event to remove a filter for + * + * Removes the filter saved for an event defined by @event_id + * from the @filter. + * + * Returns 1: if an event was removed + * 0: if the event was not found + */ +int pevent_filter_remove_event(struct event_filter *filter, + int event_id) +{ + struct filter_type *filter_type; + unsigned long len; + + if (!filter->filters) + return 0; + + filter_type = find_filter_type(filter, event_id); + + if (!filter_type) + return 0; + + free_filter_type(filter_type); + + /* The filter_type points into the event_filters array */ + len = (unsigned long)(filter->event_filters + filter->filters) - + (unsigned long)(filter_type + 1); + + memmove(filter_type, filter_type + 1, len); + filter->filters--; + + memset(&filter->event_filters[filter->filters], 0, + sizeof(*filter_type)); + + return 1; +} + +/** + * pevent_filter_reset - clear all filters in a filter + * @filter: the event filter to reset + * + * Removes all filters from a filter and resets it. + */ +void pevent_filter_reset(struct event_filter *filter) +{ + int i; + + for (i = 0; i < filter->filters; i++) + free_filter_type(&filter->event_filters[i]); + + free(filter->event_filters); + filter->filters = 0; + filter->event_filters = NULL; +} + +void pevent_filter_free(struct event_filter *filter) +{ + pevent_unref(filter->pevent); + + pevent_filter_reset(filter); + + free(filter); +} + +static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg); + +static int copy_filter_type(struct event_filter *filter, + struct event_filter *source, + struct filter_type *filter_type) +{ + struct filter_arg *arg; + struct event_format *event; + const char *sys; + const char *name; + char *str; + + /* Can't assume that the pevent's are the same */ + sys = filter_type->event->system; + name = filter_type->event->name; + event = pevent_find_event_by_name(filter->pevent, sys, name); + if (!event) + return -1; + + str = arg_to_str(source, filter_type->filter); + if (!str) + return -1; + + if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) { + /* Add trivial event */ + arg = allocate_arg(); + arg->type = FILTER_ARG_BOOLEAN; + if (strcmp(str, "TRUE") == 0) + arg->boolean.value = 1; + else + arg->boolean.value = 0; + + filter_type = add_filter_type(filter, event->id); + filter_type->filter = arg; + + free(str); + return 0; + } + + filter_event(filter, event, str, NULL); + free(str); + + return 0; +} + +/** + * pevent_filter_copy - copy a filter using another filter + * @dest - the filter to copy to + * @source - the filter to copy from + * + * Returns 0 on success and -1 if not all filters were copied + */ +int pevent_filter_copy(struct event_filter *dest, struct event_filter *source) +{ + int ret = 0; + int i; + + pevent_filter_reset(dest); + + for (i = 0; i < source->filters; i++) { + if (copy_filter_type(dest, source, &source->event_filters[i])) + ret = -1; + } + return ret; +} + + +/** + * pevent_update_trivial - update the trivial filters with the given filter + * @dest - the filter to update + * @source - the filter as the source of the update + * @type - the type of trivial filter to update. + * + * Scan dest for trivial events matching @type to replace with the source. + * + * Returns 0 on success and -1 if there was a problem updating, but + * events may have still been updated on error. + */ +int pevent_update_trivial(struct event_filter *dest, struct event_filter *source, + enum filter_trivial_type type) +{ + struct pevent *src_pevent; + struct pevent *dest_pevent; + struct event_format *event; + struct filter_type *filter_type; + struct filter_arg *arg; + char *str; + int i; + + src_pevent = source->pevent; + dest_pevent = dest->pevent; + + /* Do nothing if either of the filters has nothing to filter */ + if (!dest->filters || !source->filters) + return 0; + + for (i = 0; i < dest->filters; i++) { + filter_type = &dest->event_filters[i]; + arg = filter_type->filter; + if (arg->type != FILTER_ARG_BOOLEAN) + continue; + if ((arg->boolean.value && type == FILTER_TRIVIAL_FALSE) || + (!arg->boolean.value && type == FILTER_TRIVIAL_TRUE)) + continue; + + event = filter_type->event; + + if (src_pevent != dest_pevent) { + /* do a look up */ + event = pevent_find_event_by_name(src_pevent, + event->system, + event->name); + if (!event) + return -1; + } + + str = pevent_filter_make_string(source, event->id); + if (!str) + continue; + + /* Don't bother if the filter is trivial too */ + if (strcmp(str, "TRUE") != 0 && strcmp(str, "FALSE") != 0) + filter_event(dest, event, str, NULL); + free(str); + } + return 0; +} + +/** + * pevent_filter_clear_trivial - clear TRUE and FALSE filters + * @filter: the filter to remove trivial filters from + * @type: remove only true, false, or both + * + * Removes filters that only contain a TRUE or FALES boolean arg. + */ +void pevent_filter_clear_trivial(struct event_filter *filter, + enum filter_trivial_type type) +{ + struct filter_type *filter_type; + int count = 0; + int *ids = NULL; + int i; + + if (!filter->filters) + return; + + /* + * Two steps, first get all ids with trivial filters. + * then remove those ids. + */ + for (i = 0; i < filter->filters; i++) { + filter_type = &filter->event_filters[i]; + if (filter_type->filter->type != FILTER_ARG_BOOLEAN) + continue; + switch (type) { + case FILTER_TRIVIAL_FALSE: + if (filter_type->filter->boolean.value) + continue; + case FILTER_TRIVIAL_TRUE: + if (!filter_type->filter->boolean.value) + continue; + default: + break; + } + + ids = realloc(ids, sizeof(*ids) * (count + 1)); + if (!ids) + die("Can't allocate ids"); + ids[count++] = filter_type->event_id; + } + + if (!count) + return; + + for (i = 0; i < count; i++) + pevent_filter_remove_event(filter, ids[i]); + + free(ids); +} + +/** + * pevent_filter_event_has_trivial - return true event contains trivial filter + * @filter: the filter with the information + * @event_id: the id of the event to test + * @type: trivial type to test for (TRUE, FALSE, EITHER) + * + * Returns 1 if the event contains a matching trivial type + * otherwise 0. + */ +int pevent_filter_event_has_trivial(struct event_filter *filter, + int event_id, + enum filter_trivial_type type) +{ + struct filter_type *filter_type; + + if (!filter->filters) + return 0; + + filter_type = find_filter_type(filter, event_id); + + if (!filter_type) + return 0; + + if (filter_type->filter->type != FILTER_ARG_BOOLEAN) + return 0; + + switch (type) { + case FILTER_TRIVIAL_FALSE: + return !filter_type->filter->boolean.value; + + case FILTER_TRIVIAL_TRUE: + return filter_type->filter->boolean.value; + default: + return 1; + } +} + +static int test_filter(struct event_format *event, + struct filter_arg *arg, struct pevent_record *record); + +static const char * +get_comm(struct event_format *event, struct pevent_record *record) +{ + const char *comm; + int pid; + + pid = pevent_data_pid(event->pevent, record); + comm = pevent_data_comm_from_pid(event->pevent, pid); + return comm; +} + +static unsigned long long +get_value(struct event_format *event, + struct format_field *field, struct pevent_record *record) +{ + unsigned long long val; + + /* Handle our dummy "comm" field */ + if (field == &comm) { + const char *name; + + name = get_comm(event, record); + return (unsigned long)name; + } + + pevent_read_number_field(field, record->data, &val); + + if (!(field->flags & FIELD_IS_SIGNED)) + return val; + + switch (field->size) { + case 1: + return (char)val; + case 2: + return (short)val; + case 4: + return (int)val; + case 8: + return (long long)val; + } + return val; +} + +static unsigned long long +get_arg_value(struct event_format *event, struct filter_arg *arg, struct pevent_record *record); + +static unsigned long long +get_exp_value(struct event_format *event, struct filter_arg *arg, struct pevent_record *record) +{ + unsigned long long lval, rval; + + lval = get_arg_value(event, arg->exp.left, record); + rval = get_arg_value(event, arg->exp.right, record); + + switch (arg->exp.type) { + case FILTER_EXP_ADD: + return lval + rval; + + case FILTER_EXP_SUB: + return lval - rval; + + case FILTER_EXP_MUL: + return lval * rval; + + case FILTER_EXP_DIV: + return lval / rval; + + case FILTER_EXP_MOD: + return lval % rval; + + case FILTER_EXP_RSHIFT: + return lval >> rval; + + case FILTER_EXP_LSHIFT: + return lval << rval; + + case FILTER_EXP_AND: + return lval & rval; + + case FILTER_EXP_OR: + return lval | rval; + + case FILTER_EXP_XOR: + return lval ^ rval; + + case FILTER_EXP_NOT: + default: + die("error in exp"); + } + return 0; +} + +static unsigned long long +get_arg_value(struct event_format *event, struct filter_arg *arg, struct pevent_record *record) +{ + switch (arg->type) { + case FILTER_ARG_FIELD: + return get_value(event, arg->field.field, record); + + case FILTER_ARG_VALUE: + if (arg->value.type != FILTER_NUMBER) + die("must have number field!"); + return arg->value.val; + + case FILTER_ARG_EXP: + return get_exp_value(event, arg, record); + + default: + die("oops in filter"); + } + return 0; +} + +static int test_num(struct event_format *event, + struct filter_arg *arg, struct pevent_record *record) +{ + unsigned long long lval, rval; + + lval = get_arg_value(event, arg->num.left, record); + rval = get_arg_value(event, arg->num.right, record); + + switch (arg->num.type) { + case FILTER_CMP_EQ: + return lval == rval; + + case FILTER_CMP_NE: + return lval != rval; + + case FILTER_CMP_GT: + return lval > rval; + + case FILTER_CMP_LT: + return lval < rval; + + case FILTER_CMP_GE: + return lval >= rval; + + case FILTER_CMP_LE: + return lval <= rval; + + default: + /* ?? */ + return 0; + } +} + +static const char *get_field_str(struct filter_arg *arg, struct pevent_record *record) +{ + struct event_format *event; + struct pevent *pevent; + unsigned long long addr; + const char *val = NULL; + char hex[64]; + + /* If the field is not a string convert it */ + if (arg->str.field->flags & FIELD_IS_STRING) { + val = record->data + arg->str.field->offset; + + /* + * We need to copy the data since we can't be sure the field + * is null terminated. + */ + if (*(val + arg->str.field->size - 1)) { + /* copy it */ + memcpy(arg->str.buffer, val, arg->str.field->size); + /* the buffer is already NULL terminated */ + val = arg->str.buffer; + } + + } else { + event = arg->str.field->event; + pevent = event->pevent; + addr = get_value(event, arg->str.field, record); + + if (arg->str.field->flags & (FIELD_IS_POINTER | FIELD_IS_LONG)) + /* convert to a kernel symbol */ + val = pevent_find_function(pevent, addr); + + if (val == NULL) { + /* just use the hex of the string name */ + snprintf(hex, 64, "0x%llx", addr); + val = hex; + } + } + + return val; +} + +static int test_str(struct event_format *event, + struct filter_arg *arg, struct pevent_record *record) +{ + const char *val; + + if (arg->str.field == &comm) + val = get_comm(event, record); + else + val = get_field_str(arg, record); + + switch (arg->str.type) { + case FILTER_CMP_MATCH: + return strcmp(val, arg->str.val) == 0; + + case FILTER_CMP_NOT_MATCH: + return strcmp(val, arg->str.val) != 0; + + case FILTER_CMP_REGEX: + /* Returns zero on match */ + return !regexec(&arg->str.reg, val, 0, NULL, 0); + + case FILTER_CMP_NOT_REGEX: + return regexec(&arg->str.reg, val, 0, NULL, 0); + + default: + /* ?? */ + return 0; + } +} + +static int test_op(struct event_format *event, + struct filter_arg *arg, struct pevent_record *record) +{ + switch (arg->op.type) { + case FILTER_OP_AND: + return test_filter(event, arg->op.left, record) && + test_filter(event, arg->op.right, record); + + case FILTER_OP_OR: + return test_filter(event, arg->op.left, record) || + test_filter(event, arg->op.right, record); + + case FILTER_OP_NOT: + return !test_filter(event, arg->op.right, record); + + default: + /* ?? */ + return 0; + } +} + +static int test_filter(struct event_format *event, + struct filter_arg *arg, struct pevent_record *record) +{ + switch (arg->type) { + case FILTER_ARG_BOOLEAN: + /* easy case */ + return arg->boolean.value; + + case FILTER_ARG_OP: + return test_op(event, arg, record); + + case FILTER_ARG_NUM: + return test_num(event, arg, record); + + case FILTER_ARG_STR: + return test_str(event, arg, record); + + case FILTER_ARG_EXP: + case FILTER_ARG_VALUE: + case FILTER_ARG_FIELD: + /* + * Expressions, fields and values evaluate + * to true if they return non zero + */ + return !!get_arg_value(event, arg, record); + + default: + die("oops!"); + /* ?? */ + return 0; + } +} + +/** + * pevent_event_filtered - return true if event has filter + * @filter: filter struct with filter information + * @event_id: event id to test if filter exists + * + * Returns 1 if filter found for @event_id + * otherwise 0; + */ +int pevent_event_filtered(struct event_filter *filter, + int event_id) +{ + struct filter_type *filter_type; + + if (!filter->filters) + return 0; + + filter_type = find_filter_type(filter, event_id); + + return filter_type ? 1 : 0; +} + +/** + * pevent_filter_match - test if a record matches a filter + * @filter: filter struct with filter information + * @record: the record to test against the filter + * + * Returns: + * 1 - filter found for event and @record matches + * 0 - filter found for event and @record does not match + * -1 - no filter found for @record's event + * -2 - if no filters exist + */ +int pevent_filter_match(struct event_filter *filter, + struct pevent_record *record) +{ + struct pevent *pevent = filter->pevent; + struct filter_type *filter_type; + int event_id; + + if (!filter->filters) + return FILTER_NONE; + + event_id = pevent_data_type(pevent, record); + + filter_type = find_filter_type(filter, event_id); + + if (!filter_type) + return FILTER_NOEXIST; + + return test_filter(filter_type->event, filter_type->filter, record) ? + FILTER_MATCH : FILTER_MISS; +} + +static char *op_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + char *str = NULL; + char *left = NULL; + char *right = NULL; + char *op = NULL; + int left_val = -1; + int right_val = -1; + int val; + int len; + + switch (arg->op.type) { + case FILTER_OP_AND: + op = "&&"; + /* fall through */ + case FILTER_OP_OR: + if (!op) + op = "||"; + + left = arg_to_str(filter, arg->op.left); + right = arg_to_str(filter, arg->op.right); + if (!left || !right) + break; + + /* Try to consolidate boolean values */ + if (strcmp(left, "TRUE") == 0) + left_val = 1; + else if (strcmp(left, "FALSE") == 0) + left_val = 0; + + if (strcmp(right, "TRUE") == 0) + right_val = 1; + else if (strcmp(right, "FALSE") == 0) + right_val = 0; + + if (left_val >= 0) { + if ((arg->op.type == FILTER_OP_AND && !left_val) || + (arg->op.type == FILTER_OP_OR && left_val)) { + /* Just return left value */ + str = left; + left = NULL; + break; + } + if (right_val >= 0) { + /* just evaluate this. */ + val = 0; + switch (arg->op.type) { + case FILTER_OP_AND: + val = left_val && right_val; + break; + case FILTER_OP_OR: + val = left_val || right_val; + break; + default: + break; + } + str = malloc_or_die(6); + if (val) + strcpy(str, "TRUE"); + else + strcpy(str, "FALSE"); + break; + } + } + if (right_val >= 0) { + if ((arg->op.type == FILTER_OP_AND && !right_val) || + (arg->op.type == FILTER_OP_OR && right_val)) { + /* Just return right value */ + str = right; + right = NULL; + break; + } + /* The right value is meaningless */ + str = left; + left = NULL; + break; + } + + len = strlen(left) + strlen(right) + strlen(op) + 10; + str = malloc_or_die(len); + snprintf(str, len, "(%s) %s (%s)", + left, op, right); + break; + + case FILTER_OP_NOT: + op = "!"; + right = arg_to_str(filter, arg->op.right); + if (!right) + break; + + /* See if we can consolidate */ + if (strcmp(right, "TRUE") == 0) + right_val = 1; + else if (strcmp(right, "FALSE") == 0) + right_val = 0; + if (right_val >= 0) { + /* just return the opposite */ + str = malloc_or_die(6); + if (right_val) + strcpy(str, "FALSE"); + else + strcpy(str, "TRUE"); + break; + } + len = strlen(right) + strlen(op) + 3; + str = malloc_or_die(len); + snprintf(str, len, "%s(%s)", op, right); + break; + + default: + /* ?? */ + break; + } + free(left); + free(right); + return str; +} + +static char *val_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + char *str; + + str = malloc_or_die(30); + + snprintf(str, 30, "%lld", arg->value.val); + + return str; +} + +static char *field_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + return strdup(arg->field.field->name); +} + +static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + char *lstr; + char *rstr; + char *op; + char *str = NULL; + int len; + + lstr = arg_to_str(filter, arg->exp.left); + rstr = arg_to_str(filter, arg->exp.right); + if (!lstr || !rstr) + goto out; + + switch (arg->exp.type) { + case FILTER_EXP_ADD: + op = "+"; + break; + case FILTER_EXP_SUB: + op = "-"; + break; + case FILTER_EXP_MUL: + op = "*"; + break; + case FILTER_EXP_DIV: + op = "/"; + break; + case FILTER_EXP_MOD: + op = "%"; + break; + case FILTER_EXP_RSHIFT: + op = ">>"; + break; + case FILTER_EXP_LSHIFT: + op = "<<"; + break; + case FILTER_EXP_AND: + op = "&"; + break; + case FILTER_EXP_OR: + op = "|"; + break; + case FILTER_EXP_XOR: + op = "^"; + break; + default: + die("oops in exp"); + } + + len = strlen(op) + strlen(lstr) + strlen(rstr) + 4; + str = malloc_or_die(len); + snprintf(str, len, "%s %s %s", lstr, op, rstr); +out: + free(lstr); + free(rstr); + + return str; +} + +static char *num_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + char *lstr; + char *rstr; + char *str = NULL; + char *op = NULL; + int len; + + lstr = arg_to_str(filter, arg->num.left); + rstr = arg_to_str(filter, arg->num.right); + if (!lstr || !rstr) + goto out; + + switch (arg->num.type) { + case FILTER_CMP_EQ: + op = "=="; + /* fall through */ + case FILTER_CMP_NE: + if (!op) + op = "!="; + /* fall through */ + case FILTER_CMP_GT: + if (!op) + op = ">"; + /* fall through */ + case FILTER_CMP_LT: + if (!op) + op = "<"; + /* fall through */ + case FILTER_CMP_GE: + if (!op) + op = ">="; + /* fall through */ + case FILTER_CMP_LE: + if (!op) + op = "<="; + + len = strlen(lstr) + strlen(op) + strlen(rstr) + 4; + str = malloc_or_die(len); + sprintf(str, "%s %s %s", lstr, op, rstr); + + break; + + default: + /* ?? */ + break; + } + +out: + free(lstr); + free(rstr); + return str; +} + +static char *str_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + char *str = NULL; + char *op = NULL; + int len; + + switch (arg->str.type) { + case FILTER_CMP_MATCH: + op = "=="; + /* fall through */ + case FILTER_CMP_NOT_MATCH: + if (!op) + op = "!="; + /* fall through */ + case FILTER_CMP_REGEX: + if (!op) + op = "=~"; + /* fall through */ + case FILTER_CMP_NOT_REGEX: + if (!op) + op = "!~"; + + len = strlen(arg->str.field->name) + strlen(op) + + strlen(arg->str.val) + 6; + str = malloc_or_die(len); + snprintf(str, len, "%s %s \"%s\"", + arg->str.field->name, + op, arg->str.val); + break; + + default: + /* ?? */ + break; + } + return str; +} + +static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + char *str; + + switch (arg->type) { + case FILTER_ARG_BOOLEAN: + str = malloc_or_die(6); + if (arg->boolean.value) + strcpy(str, "TRUE"); + else + strcpy(str, "FALSE"); + return str; + + case FILTER_ARG_OP: + return op_to_str(filter, arg); + + case FILTER_ARG_NUM: + return num_to_str(filter, arg); + + case FILTER_ARG_STR: + return str_to_str(filter, arg); + + case FILTER_ARG_VALUE: + return val_to_str(filter, arg); + + case FILTER_ARG_FIELD: + return field_to_str(filter, arg); + + case FILTER_ARG_EXP: + return exp_to_str(filter, arg); + + default: + /* ?? */ + return NULL; + } + +} + +/** + * pevent_filter_make_string - return a string showing the filter + * @filter: filter struct with filter information + * @event_id: the event id to return the filter string with + * + * Returns a string that displays the filter contents. + * This string must be freed with free(str). + * NULL is returned if no filter is found. + */ +char * +pevent_filter_make_string(struct event_filter *filter, int event_id) +{ + struct filter_type *filter_type; + + if (!filter->filters) + return NULL; + + filter_type = find_filter_type(filter, event_id); + + if (!filter_type) + return NULL; + + return arg_to_str(filter, filter_type->filter); +} + +/** + * pevent_filter_compare - compare two filters and return if they are the same + * @filter1: Filter to compare with @filter2 + * @filter2: Filter to compare with @filter1 + * + * Returns: + * 1 if the two filters hold the same content. + * 0 if they do not. + */ +int pevent_filter_compare(struct event_filter *filter1, struct event_filter *filter2) +{ + struct filter_type *filter_type1; + struct filter_type *filter_type2; + char *str1, *str2; + int result; + int i; + + /* Do the easy checks first */ + if (filter1->filters != filter2->filters) + return 0; + if (!filter1->filters && !filter2->filters) + return 1; + + /* + * Now take a look at each of the events to see if they have the same + * filters to them. + */ + for (i = 0; i < filter1->filters; i++) { + filter_type1 = &filter1->event_filters[i]; + filter_type2 = find_filter_type(filter2, filter_type1->event_id); + if (!filter_type2) + break; + if (filter_type1->filter->type != filter_type2->filter->type) + break; + switch (filter_type1->filter->type) { + case FILTER_TRIVIAL_FALSE: + case FILTER_TRIVIAL_TRUE: + /* trivial types just need the type compared */ + continue; + default: + break; + } + /* The best way to compare complex filters is with strings */ + str1 = arg_to_str(filter1, filter_type1->filter); + str2 = arg_to_str(filter2, filter_type2->filter); + if (str1 && str2) + result = strcmp(str1, str2) != 0; + else + /* bail out if allocation fails */ + result = 1; + + free(str1); + free(str2); + if (result) + break; + } + + if (i < filter1->filters) + return 0; + return 1; +} + diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c new file mode 100644 index 000000000000..f023a133abb6 --- /dev/null +++ b/tools/lib/traceevent/parse-utils.c @@ -0,0 +1,110 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <errno.h> + +#define __weak __attribute__((weak)) + +void __vdie(const char *fmt, va_list ap) +{ + int ret = errno; + + if (errno) + perror("trace-cmd"); + else + ret = -1; + + fprintf(stderr, " "); + vfprintf(stderr, fmt, ap); + + fprintf(stderr, "\n"); + exit(ret); +} + +void __die(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + __vdie(fmt, ap); + va_end(ap); +} + +void __weak die(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + __vdie(fmt, ap); + va_end(ap); +} + +void __vwarning(const char *fmt, va_list ap) +{ + if (errno) + perror("trace-cmd"); + errno = 0; + + fprintf(stderr, " "); + vfprintf(stderr, fmt, ap); + + fprintf(stderr, "\n"); +} + +void __warning(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + __vwarning(fmt, ap); + va_end(ap); +} + +void __weak warning(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + __vwarning(fmt, ap); + va_end(ap); +} + +void __vpr_stat(const char *fmt, va_list ap) +{ + vprintf(fmt, ap); + printf("\n"); +} + +void __pr_stat(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + __vpr_stat(fmt, ap); + va_end(ap); +} + +void __weak vpr_stat(const char *fmt, va_list ap) +{ + __vpr_stat(fmt, ap); +} + +void __weak pr_stat(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + __vpr_stat(fmt, ap); + va_end(ap); +} + +void __weak *malloc_or_die(unsigned int size) +{ + void *data; + + data = malloc(size); + if (!data) + die("malloc"); + return data; +} diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c new file mode 100644 index 000000000000..b1ccc923e8a5 --- /dev/null +++ b/tools/lib/traceevent/trace-seq.c @@ -0,0 +1,200 @@ +/* + * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> + +#include "event-parse.h" +#include "event-utils.h" + +/* + * The TRACE_SEQ_POISON is to catch the use of using + * a trace_seq structure after it was destroyed. + */ +#define TRACE_SEQ_POISON ((void *)0xdeadbeef) +#define TRACE_SEQ_CHECK(s) \ +do { \ + if ((s)->buffer == TRACE_SEQ_POISON) \ + die("Usage of trace_seq after it was destroyed"); \ +} while (0) + +/** + * trace_seq_init - initialize the trace_seq structure + * @s: a pointer to the trace_seq structure to initialize + */ +void trace_seq_init(struct trace_seq *s) +{ + s->len = 0; + s->readpos = 0; + s->buffer_size = TRACE_SEQ_BUF_SIZE; + s->buffer = malloc_or_die(s->buffer_size); +} + +/** + * trace_seq_destroy - free up memory of a trace_seq + * @s: a pointer to the trace_seq to free the buffer + * + * Only frees the buffer, not the trace_seq struct itself. + */ +void trace_seq_destroy(struct trace_seq *s) +{ + if (!s) + return; + TRACE_SEQ_CHECK(s); + free(s->buffer); + s->buffer = TRACE_SEQ_POISON; +} + +static void expand_buffer(struct trace_seq *s) +{ + s->buffer_size += TRACE_SEQ_BUF_SIZE; + s->buffer = realloc(s->buffer, s->buffer_size); + if (!s->buffer) + die("Can't allocate trace_seq buffer memory"); +} + +/** + * trace_seq_printf - sequence printing of trace information + * @s: trace sequence descriptor + * @fmt: printf format string + * + * It returns 0 if the trace oversizes the buffer's free + * space, 1 otherwise. + * + * The tracer may use either sequence operations or its own + * copy to user routines. To simplify formating of a trace + * trace_seq_printf is used to store strings into a special + * buffer (@s). Then the output may be either used by + * the sequencer or pulled into another buffer. + */ +int +trace_seq_printf(struct trace_seq *s, const char *fmt, ...) +{ + va_list ap; + int len; + int ret; + + TRACE_SEQ_CHECK(s); + + try_again: + len = (s->buffer_size - 1) - s->len; + + va_start(ap, fmt); + ret = vsnprintf(s->buffer + s->len, len, fmt, ap); + va_end(ap); + + if (ret >= len) { + expand_buffer(s); + goto try_again; + } + + s->len += ret; + + return 1; +} + +/** + * trace_seq_vprintf - sequence printing of trace information + * @s: trace sequence descriptor + * @fmt: printf format string + * + * The tracer may use either sequence operations or its own + * copy to user routines. To simplify formating of a trace + * trace_seq_printf is used to store strings into a special + * buffer (@s). Then the output may be either used by + * the sequencer or pulled into another buffer. + */ +int +trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) +{ + int len; + int ret; + + TRACE_SEQ_CHECK(s); + + try_again: + len = (s->buffer_size - 1) - s->len; + + ret = vsnprintf(s->buffer + s->len, len, fmt, args); + + if (ret >= len) { + expand_buffer(s); + goto try_again; + } + + s->len += ret; + + return len; +} + +/** + * trace_seq_puts - trace sequence printing of simple string + * @s: trace sequence descriptor + * @str: simple string to record + * + * The tracer may use either the sequence operations or its own + * copy to user routines. This function records a simple string + * into a special buffer (@s) for later retrieval by a sequencer + * or other mechanism. + */ +int trace_seq_puts(struct trace_seq *s, const char *str) +{ + int len; + + TRACE_SEQ_CHECK(s); + + len = strlen(str); + + while (len > ((s->buffer_size - 1) - s->len)) + expand_buffer(s); + + memcpy(s->buffer + s->len, str, len); + s->len += len; + + return len; +} + +int trace_seq_putc(struct trace_seq *s, unsigned char c) +{ + TRACE_SEQ_CHECK(s); + + while (s->len >= (s->buffer_size - 1)) + expand_buffer(s); + + s->buffer[s->len++] = c; + + return 1; +} + +void trace_seq_terminate(struct trace_seq *s) +{ + TRACE_SEQ_CHECK(s); + + /* There's always one character left on the buffer */ + s->buffer[s->len] = 0; +} + +int trace_seq_do_printf(struct trace_seq *s) +{ + TRACE_SEQ_CHECK(s); + return printf("%.*s", s->len, s->buffer); +} diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh new file mode 100755 index 000000000000..06a399ac8b2f --- /dev/null +++ b/tools/nfsd/inject_fault.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# +# Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> +# +# Script for easier NFSD fault injection + +# Check that debugfs has been mounted +DEBUGFS=`cat /proc/mounts | grep debugfs` +if [ "$DEBUGFS" == "" ]; then + echo "debugfs does not appear to be mounted!" + echo "Please mount debugfs and try again" + exit 1 +fi + +# Check that the fault injection directory exists +DEBUGDIR=`echo $DEBUGFS | awk '{print $2}'`/nfsd +if [ ! -d "$DEBUGDIR" ]; then + echo "$DEBUGDIR does not exist" + echo "Check that your .config selects CONFIG_NFSD_FAULT_INJECTION" + exit 1 +fi + +function help() +{ + echo "Usage $0 injection_type [count]" + echo "" + echo "Injection types are:" + ls $DEBUGDIR + exit 1 +} + +if [ $# == 0 ]; then + help +elif [ ! -f $DEBUGDIR/$1 ]; then + help +elif [ $# != 2 ]; then + COUNT=0 +else + COUNT=$2 +fi + +BEFORE=`mktemp` +AFTER=`mktemp` +dmesg > $BEFORE +echo $COUNT > $DEBUGDIR/$1 +dmesg > $AFTER +# Capture lines that only exist in the $AFTER file +diff $BEFORE $AFTER | grep ">" +rm -f $BEFORE $AFTER diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 416684be0ad3..26b823b61aa1 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -19,3 +19,5 @@ TAGS cscope* config.mak config.mak.autogen +*-bison.* +*-flex.* diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 4626a398836a..ca600e09c8d4 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -1,3 +1,10 @@ +OUTPUT := ./ +ifeq ("$(origin O)", "command line") + ifneq ($(O),) + OUTPUT := $(O)/ + endif +endif + MAN1_TXT= \ $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ $(wildcard perf-*.txt)) \ @@ -6,10 +13,11 @@ MAN5_TXT= MAN7_TXT= MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT) -MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) -MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) +_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) +_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) -DOC_HTML=$(MAN_HTML) +MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML)) +MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML)) ARTICLES = # with their own formatting rules. @@ -18,11 +26,17 @@ API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technica SP_ARTICLES += $(API_DOCS) SP_ARTICLES += technical/api-index -DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES)) +_DOC_HTML = $(_MAN_HTML) +_DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES)) +DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML)) -DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) -DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) -DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) +_DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) +_DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) +_DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) + +DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1)) +DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5)) +DOC_MAN7=$(addprefix $(OUTPUT),$(_DOC_MAN7)) # Make the path relative to DESTDIR, not prefix ifndef DESTDIR @@ -150,9 +164,9 @@ man1: $(DOC_MAN1) man5: $(DOC_MAN5) man7: $(DOC_MAN7) -info: perf.info perfman.info +info: $(OUTPUT)perf.info $(OUTPUT)perfman.info -pdf: user-manual.pdf +pdf: $(OUTPUT)user-manual.pdf install: install-man @@ -166,7 +180,7 @@ install-man: man install-info: info $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) - $(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir) + $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir) if test -r $(DESTDIR)$(infodir)/dir; then \ $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ @@ -176,7 +190,7 @@ install-info: info install-pdf: pdf $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) - $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir) + $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir) #install-html: html # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) @@ -189,14 +203,14 @@ install-pdf: pdf # # Determine "include::" file references in asciidoc files. # -doc.dep : $(wildcard *.txt) build-docdep.perl +$(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl $(QUIET_GEN)$(RM) $@+ $@ && \ $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ mv $@+ $@ --include doc.dep +-include $(OUPTUT)doc.dep -cmds_txt = cmds-ancillaryinterrogators.txt \ +_cmds_txt = cmds-ancillaryinterrogators.txt \ cmds-ancillarymanipulators.txt \ cmds-mainporcelain.txt \ cmds-plumbinginterrogators.txt \ @@ -205,32 +219,36 @@ cmds_txt = cmds-ancillaryinterrogators.txt \ cmds-synchelpers.txt \ cmds-purehelpers.txt \ cmds-foreignscminterface.txt +cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt)) -$(cmds_txt): cmd-list.made +$(cmds_txt): $(OUTPUT)cmd-list.made -cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) +$(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) $(QUIET_GEN)$(RM) $@ && \ $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ date >$@ clean: - $(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7 - $(RM) *.texi *.texi+ *.texi++ perf.info perfman.info - $(RM) howto-index.txt howto/*.html doc.dep - $(RM) technical/api-*.html technical/api-index.txt - $(RM) $(cmds_txt) *.made - -$(MAN_HTML): %.html : %.txt + $(RM) $(MAN_XML) $(addsuffix +,$(MAN_XML)) + $(RM) $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) + $(RM) $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) + $(RM) $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ + $(RM) $(OUTPUT)perf.info $(OUTPUT)perfman.info + $(RM) $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep + $(RM) $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt + $(RM) $(cmds_txt) $(OUTPUT)*.made + +$(MAN_HTML): $(OUTPUT)%.html : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ mv $@+ $@ -%.1 %.5 %.7 : %.xml +$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml $(QUIET_XMLTO)$(RM) $@ && \ - xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< + xmlto -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< -%.xml : %.txt +$(OUTPUT)%.xml : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ @@ -239,25 +257,25 @@ $(MAN_HTML): %.html : %.txt XSLT = docbook.xsl XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css -user-manual.html: user-manual.xml +$(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< -perf.info: user-manual.texi - $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi +$(OUTPUT)perf.info: $(OUTPUT)user-manual.texi + $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi -user-manual.texi: user-manual.xml +$(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ - $(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ + $(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ rm $@++ && \ mv $@+ $@ -user-manual.pdf: user-manual.xml +$(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml $(QUIET_DBLATEX)$(RM) $@+ $@ && \ $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \ mv $@+ $@ -perfman.texi: $(MAN_XML) cat-texi.perl +$(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \ --to-stdout $(xml) &&) true) > $@++ && \ @@ -265,7 +283,7 @@ perfman.texi: $(MAN_XML) cat-texi.perl rm $@++ && \ mv $@+ $@ -perfman.info: perfman.texi +$(OUTPUT)perfman.info: $(OUTPUT)perfman.texi $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt index 8eb6c489fb15..77f952762426 100644 --- a/tools/perf/Documentation/examples.txt +++ b/tools/perf/Documentation/examples.txt @@ -17,8 +17,8 @@ titan:~> perf list kmem:kmem_cache_alloc_node [Tracepoint event] kmem:kfree [Tracepoint event] kmem:kmem_cache_free [Tracepoint event] - kmem:mm_page_free_direct [Tracepoint event] - kmem:mm_pagevec_free [Tracepoint event] + kmem:mm_page_free [Tracepoint event] + kmem:mm_page_free_batched [Tracepoint event] kmem:mm_page_alloc [Tracepoint event] kmem:mm_page_alloc_zone_locked [Tracepoint event] kmem:mm_page_pcpu_drain [Tracepoint event] @@ -29,15 +29,15 @@ measured. For example the page alloc/free properties of a 'hackbench run' are: titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc - -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10 + -e kmem:mm_page_free_batched -e kmem:mm_page_free ./hackbench 10 Time: 0.575 Performance counter stats for './hackbench 10': 13857 kmem:mm_page_pcpu_drain 27576 kmem:mm_page_alloc - 6025 kmem:mm_pagevec_free - 20934 kmem:mm_page_free_direct + 6025 kmem:mm_page_free_batched + 20934 kmem:mm_page_free 0.613972165 seconds time elapsed @@ -45,8 +45,8 @@ You can observe the statistical properties as well, by using the 'repeat the workload N times' feature of perf stat: titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e - kmem:mm_page_alloc -e kmem:mm_pagevec_free -e - kmem:mm_page_free_direct ./hackbench 10 + kmem:mm_page_alloc -e kmem:mm_page_free_batched -e + kmem:mm_page_free ./hackbench 10 Time: 0.627 Time: 0.644 Time: 0.564 @@ -57,8 +57,8 @@ You can observe the statistical properties as well, by using the 12920 kmem:mm_page_pcpu_drain ( +- 3.359% ) 25035 kmem:mm_page_alloc ( +- 3.783% ) - 6104 kmem:mm_pagevec_free ( +- 0.934% ) - 18376 kmem:mm_page_free_direct ( +- 4.941% ) + 6104 kmem:mm_page_free_batched ( +- 0.934% ) + 18376 kmem:mm_page_free ( +- 4.941% ) 0.643954516 seconds time elapsed ( +- 2.363% ) @@ -158,15 +158,15 @@ Or you can observe the whole system's page allocations for 10 seconds: titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e -kmem:mm_page_alloc -e kmem:mm_pagevec_free -e -kmem:mm_page_free_direct sleep 10 +kmem:mm_page_alloc -e kmem:mm_page_free_batched -e +kmem:mm_page_free sleep 10 Performance counter stats for 'sleep 10': 171585 kmem:mm_page_pcpu_drain 322114 kmem:mm_page_alloc - 73623 kmem:mm_pagevec_free - 254115 kmem:mm_page_free_direct + 73623 kmem:mm_page_free_batched + 254115 kmem:mm_page_free 10.000591410 seconds time elapsed @@ -174,15 +174,15 @@ Or observe how fluctuating the page allocations are, via statistical analysis done over ten 1-second intervals: titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e - kmem:mm_page_alloc -e kmem:mm_pagevec_free -e - kmem:mm_page_free_direct sleep 1 + kmem:mm_page_alloc -e kmem:mm_page_free_batched -e + kmem:mm_page_free sleep 1 Performance counter stats for 'sleep 1' (10 runs): 17254 kmem:mm_page_pcpu_drain ( +- 3.709% ) 34394 kmem:mm_page_alloc ( +- 4.617% ) - 7509 kmem:mm_pagevec_free ( +- 4.820% ) - 25653 kmem:mm_page_free_direct ( +- 3.672% ) + 7509 kmem:mm_page_free_batched ( +- 4.820% ) + 25653 kmem:mm_page_free ( +- 3.672% ) 1.058135029 seconds time elapsed ( +- 3.089% ) diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 85c5f026930d..c89f9e1453f7 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -22,7 +22,7 @@ OPTIONS ------- -i:: --input=:: - Input file name. (default: perf.data) + Input file name. (default: perf.data unless stdin is a fifo) -d:: --dsos=<dso[,dso...]>:: @@ -66,12 +66,25 @@ OPTIONS used. This interfaces starts by centering on the line with more samples, TAB/UNTAB cycles through the lines with more samples. --c:: +-C:: --cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can be provided as a comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default is to report samples on all CPUs. +--asm-raw:: + Show raw instruction encoding of assembly instructions. + +--source:: + Interleave source code with assembly code. Enabled by default, + disable with --no-source. + +--symfs=<directory>:: + Look for files with symbols relative to this directory. + +-M:: +--disassembler-style=:: Set disassembler style for objdump. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index a3dbadb26ef5..7065cd6fbdfc 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -12,7 +12,7 @@ SYNOPSIS DESCRIPTION ----------- -This 'perf bench' command is general framework for benchmark suites. +This 'perf bench' command is a general framework for benchmark suites. COMMON OPTIONS -------------- @@ -45,14 +45,20 @@ SUBSYSTEM 'sched':: Scheduler and IPC mechanisms. +'mem':: + Memory access performance. + +'all':: + All benchmark subsystems. + SUITES FOR 'sched' ~~~~~~~~~~~~~~~~~~ *messaging*:: Suite for evaluating performance of scheduler and IPC mechanisms. Based on hackbench by Rusty Russell. -Options of *pipe* -^^^^^^^^^^^^^^^^^ +Options of *messaging* +^^^^^^^^^^^^^^^^^^^^^^ -p:: --pipe:: Use pipe() instead of socketpair() @@ -115,6 +121,72 @@ Example of *pipe* 59004 ops/sec --------------------- +SUITES FOR 'mem' +~~~~~~~~~~~~~~~~ +*memcpy*:: +Suite for evaluating performance of simple memory copy in various ways. + +Options of *memcpy* +^^^^^^^^^^^^^^^^^^^ +-l:: +--length:: +Specify length of memory to copy (default: 1MB). +Available units are B, KB, MB, GB and TB (case insensitive). + +-r:: +--routine:: +Specify routine to copy (default: default). +Available routines are depend on the architecture. +On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported. + +-i:: +--iterations:: +Repeat memcpy invocation this number of times. + +-c:: +--cycle:: +Use perf's cpu-cycles event instead of gettimeofday syscall. + +-o:: +--only-prefault:: +Show only the result with page faults before memcpy. + +-n:: +--no-prefault:: +Show only the result without page faults before memcpy. + +*memset*:: +Suite for evaluating performance of simple memory set in various ways. + +Options of *memset* +^^^^^^^^^^^^^^^^^^^ +-l:: +--length:: +Specify length of memory to set (default: 1MB). +Available units are B, KB, MB, GB and TB (case insensitive). + +-r:: +--routine:: +Specify routine to set (default: default). +Available routines are depend on the architecture. +On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported. + +-i:: +--iterations:: +Repeat memset invocation this number of times. + +-c:: +--cycle:: +Use perf's cpu-cycles event instead of gettimeofday syscall. + +-o:: +--only-prefault:: +Show only the result with page faults before memset. + +-n:: +--no-prefault:: +Show only the result without page faults before memset. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt index 5eaac6f26d51..25c52efcc7f0 100644 --- a/tools/perf/Documentation/perf-buildid-list.txt +++ b/tools/perf/Documentation/perf-buildid-list.txt @@ -16,6 +16,9 @@ This command displays the buildids found in a perf.data file, so that other tools can be used to fetch packages with matching symbol tables for use by perf report. +It can also be used to show the build id of the running kernel or in an ELF +file using -i/--input. + OPTIONS ------- -H:: @@ -23,10 +26,13 @@ OPTIONS Show only DSOs with hits. -i:: --input=:: - Input file name. (default: perf.data) + Input file name. (default: perf.data unless stdin is a fifo) -f:: --force:: Don't do ownership validation. +-k:: +--kernel:: + Show running kernel build id. -v:: --verbose:: Be more verbose. diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt index 0cada9e053dc..15217345c2fa 100644 --- a/tools/perf/Documentation/perf-evlist.txt +++ b/tools/perf/Documentation/perf-evlist.txt @@ -18,7 +18,15 @@ OPTIONS ------- -i:: --input=:: - Input file name. (default: perf.data) + Input file name. (default: perf.data unless stdin is a fifo) + +-F:: +--freq=:: + Show just the sample frequency used for each event. + +-v:: +--verbose=:: + Show all fields. SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt index a52fcde894c7..7c8fbbf3f61c 100644 --- a/tools/perf/Documentation/perf-kmem.txt +++ b/tools/perf/Documentation/perf-kmem.txt @@ -23,7 +23,7 @@ OPTIONS ------- -i <file>:: --input=<file>:: - Select the input file (default: perf.data) + Select the input file (default: perf.data unless stdin is a fifo) --caller:: Show per-callsite statistics diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 7a527f7e9da9..ddc22525228d 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -21,6 +21,8 @@ EVENT MODIFIERS Events can optionally have a modifer by appending a colon and one or more modifiers. Modifiers allow the user to restrict when events are counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor. +Additional modifiers are 'G' for guest counting (in KVM guests) and 'H' +for host counting (not in KVM guests). The 'p' modifier can be used for specifying how precise the instruction address should be. The 'p' modifier is currently only implemented for diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index 4a26a2f3a6a3..c7f5f55634ac 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -8,7 +8,7 @@ perf-lock - Analyze lock events SYNOPSIS -------- [verse] -'perf lock' {record|report|trace} +'perf lock' {record|report|script|info} DESCRIPTION ----------- @@ -20,16 +20,19 @@ and statistics with this 'perf lock' command. produces the file "perf.data" which contains tracing results of lock events. - 'perf lock trace' shows raw lock events. - 'perf lock report' reports statistical data. + 'perf lock script' shows raw lock events. + + 'perf lock info' shows metadata like threads or addresses + of lock instances. + COMMON OPTIONS -------------- -i:: --input=<file>:: - Input file name. + Input file name. (default: perf.data unless stdin is a fifo) -v:: --verbose:: @@ -47,6 +50,17 @@ REPORT OPTIONS Sorting key. Possible values: acquired (default), contended, wait_total, wait_max, wait_min. +INFO OPTIONS +------------ + +-t:: +--threads:: + dump thread list in perf.data + +-m:: +--map:: + dump map of lock instances (address:name table) + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 2780d9ce48bf..b715cb71592b 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -77,7 +77,8 @@ OPTIONS -F:: --funcs:: - Show available functions in given module or kernel. + Show available functions in given module or kernel. With -x/--exec, + can also list functions in a user space executable / shared library. --filter=FILTER:: (Only for --vars and --funcs) Set filter. FILTER is a combination of glob @@ -98,6 +99,15 @@ OPTIONS --max-probes:: Set the maximum number of probe points for an event. Default is 128. +-x:: +--exec=PATH:: + Specify path to the executable or shared library file for user + space tracing. Can also be used with --funcs option. + +In absence of -m/-x options, perf probe checks if the first argument after +the options is an absolute path name. If its an absolute path, perf probe +uses it as a target module/target user space binary to probe. + PROBE SYNTAX ------------ Probe points are defined by following syntax. @@ -182,6 +192,13 @@ Delete all probes on schedule(). ./perf probe --del='schedule*' +Add probes at zfree() function on /bin/zsh + + ./perf probe -x /bin/zsh zfree or ./perf probe /bin/zsh zfree + +Add probes at malloc() function on libc + + ./perf probe -x /lib/libc.so.6 malloc or ./perf probe /lib/libc.so.6 malloc SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 5a520f825295..b38a1f9ad460 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -52,11 +52,15 @@ OPTIONS -p:: --pid=:: - Record events on existing process ID. + Record events on existing process ID (comma separated list). -t:: --tid=:: - Record events on existing thread ID. + Record events on existing thread ID (comma separated list). + +-u:: +--uid=:: + Record events in threads owned by uid. Name or number. -r:: --realtime=:: @@ -89,7 +93,7 @@ OPTIONS -m:: --mmap-pages=:: - Number of mmap data pages. + Number of mmap data pages. Must be a power of two. -g:: --call-graph:: @@ -148,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha corresponding events, i.e., they always refer to events defined earlier on the command line. +-b:: +--branch-any:: +Enable taken branch stack sampling. Any type of taken branch may be sampled. +This is a shortcut for --branch-filter any. See --branch-filter for more infos. + +-j:: +--branch-filter:: +Enable taken branch stack sampling. Each sample captures a series of consecutive +taken branches. The number of branches captured with each sample depends on the +underlying hardware, the type of branches of interest, and the executed code. +It is possible to select the types of branches captured by enabling filters. The +following filters are defined: + + - any: any type of branches + - any_call: any function call or system call + - any_ret: any function return or system call return + - ind_call: any indirect branch + - u: only when the branch target is at the user level + - k: only when the branch target is in the kernel + - hv: only when the target is at the hypervisor level + ++ +The option requires at least one branch type among any, any_call, any_ret, ind_call. +The privilege levels may be ommitted, in which case, the privilege levels of the associated +event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege +levels are subject to permissions. When sampling on multiple events, branch stack sampling +is enabled for all the sampling events. The sampled branch type is the same for all events. +The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k +Note that this feature may not be available on all processors. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 04253c07d19a..495210a612c4 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -19,7 +19,7 @@ OPTIONS ------- -i:: --input=:: - Input file name. (default: perf.data) + Input file name. (default: perf.data unless stdin is a fifo) -v:: --verbose:: @@ -39,7 +39,7 @@ OPTIONS -T:: --threads:: Show per-thread event counters --C:: +-c:: --comms=:: Only consider symbols in these comms. CSV that understands file://filename entries. @@ -48,13 +48,16 @@ OPTIONS Only consider these symbols. CSV that understands file://filename entries. +--symbol-filter=:: + Only show symbols that match (partially) with this filter. + -U:: --hide-unresolved:: Only display entries resolved to a symbol. -s:: --sort=:: - Sort by key(s): pid, comm, dso, symbol, parent. + Sort by key(s): pid, comm, dso, symbol, parent, srcline. -p:: --parent=<regex>:: @@ -80,9 +83,10 @@ OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. --g [type,min,order]:: +-g [type,min[,limit],order]:: --call-graph:: - Display call chains using type, min percent threshold and order. + Display call chains using type, min percent threshold, optional print + limit and order. type can be either: - flat: single column, linear exposure of call chains. - graph: use a graph tree, displaying absolute overhead rates. @@ -109,6 +113,8 @@ OPTIONS requires a tty, if one is not present, as when piping to other commands, the stdio interface is used. +--gtk:: Use the GTK2 interface. + -k:: --vmlinux=<file>:: vmlinux pathname @@ -128,12 +134,40 @@ OPTIONS --symfs=<directory>:: Look for files with symbols relative to this directory. --c:: +-C:: --cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can be provided as a comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default is to report samples on all CPUs. +-M:: +--disassembler-style=:: Set disassembler style for objdump. + +--source:: + Interleave source code with assembly code. Enabled by default, + disable with --no-source. + +--asm-raw:: + Show raw instruction encoding of assembly instructions. + +--show-total-period:: Show a column with the sum of periods. + +-I:: +--show-info:: + Display extended information about the perf.data file. This adds + information which may be very large and thus may clutter the display. + It currently includes: cpu and numa topology of the host system. + +-b:: +--branch-stack:: + Use the addresses of sampled taken branches instead of the instruction + address to build the histograms. To generate meaningful output, the + perf.data file must have been obtained using perf record -b or + perf record --branch-filter xxx where xxx is a branch filter option. + perf report is able to auto-detect whether a perf.data file contains + branch stacks and it will automatically switch to the branch view mode, + unless --no-branch-stack is used. + SEE ALSO -------- -linkperf:perf-stat[1] +linkperf:perf-stat[1], linkperf:perf-annotate[1] diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 46822d5fde1c..8ff4df956951 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -8,7 +8,7 @@ perf-sched - Tool to trace/measure scheduler properties (latencies) SYNOPSIS -------- [verse] -'perf sched' {record|latency|map|replay|trace} +'perf sched' {record|latency|map|replay|script} DESCRIPTION ----------- @@ -20,8 +20,8 @@ There are five variants of perf sched: 'perf sched latency' to report the per task scheduling latencies and other scheduling properties of the workload. - 'perf sched trace' to see a detailed trace of the workload that - was recorded. + 'perf sched script' to see a detailed trace of the workload that + was recorded (aliased to 'perf script' for now). 'perf sched replay' to simulate the workload that was recorded via perf sched record. (this is done by starting up mockup threads @@ -40,7 +40,7 @@ OPTIONS ------- -i:: --input=<file>:: - Input file name. (default: perf.data) + Input file name. (default: perf.data unless stdin is a fifo) -v:: --verbose:: diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index db017867d9e8..e9cbfcddfa3f 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -106,7 +106,7 @@ OPTIONS -i:: --input=:: - Input file name. + Input file name. (default: perf.data unless stdin is a fifo) -d:: --debug-mode:: @@ -115,7 +115,7 @@ OPTIONS -f:: --fields:: Comma separated list of fields to print. Options are: - comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr. + comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace @@ -182,12 +182,27 @@ OPTIONS --hide-call-graph:: When printing symbols do not display call chain. --c:: +-C:: --cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can be provided as a comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default is to report samples on all CPUs. +-c:: +--comms=:: + Only display events for these comms. CSV that understands + file://filename entries. + +-I:: +--show-info:: + Display extended information about the perf.data file. This adds + information which may be very large and thus may clutter the display. + It currently includes: cpu and numa topology of the host system. + It can only be used with the perf script report mode. + +--show-kernel-path:: + Try to resolve the path of [kernel.kallsyms] + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 918cc38ee6d1..2fa173b51970 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -35,11 +35,11 @@ OPTIONS child tasks do not inherit counters -p:: --pid=<pid>:: - stat events on existing process id + stat events on existing process id (comma separated list) -t:: --tid=<tid>:: - stat events on existing thread id + stat events on existing thread id (comma separated list) -a:: @@ -94,6 +94,22 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha corresponding events, i.e., they always refer to events defined earlier on the command line. +-o file:: +--output file:: +Print the output into the designated file. + +--append:: +Append to the output file designated with the -o option. Ignored if -o is not specified. + +--log-fd:: + +Log output to fd, instead of stderr. Complementary to --output, and mutually exclusive +with it. --append may be used here. Examples: + 3>results perf stat --log-fd 3 -- $cmd + 3>>results perf stat --log-fd 3 --append -- $cmd + + + EXAMPLES -------- diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt index 2c3b462f64b0..b24ac40fcd58 100644 --- a/tools/perf/Documentation/perf-test.txt +++ b/tools/perf/Documentation/perf-test.txt @@ -8,13 +8,19 @@ perf-test - Runs sanity tests. SYNOPSIS -------- [verse] -'perf test <options>' +'perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]' DESCRIPTION ----------- This command does assorted sanity tests, initially through linked routines but also will look for a directory with more tests in the form of scripts. +To get a list of available tests use 'perf test list', specifying a test name +fragment will show all tests that have it. + +To run just specific tests, inform test name fragments or the numbers obtained +from 'perf test list'. + OPTIONS ------- -v:: diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index d7b79e2ba2ad..1632b0efc757 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -27,7 +27,7 @@ OPTIONS Select the output file (default: output.svg) -i:: --input=:: - Select the input file (default: perf.data) + Select the input file (default: perf.data unless stdin is a fifo) -w:: --width=:: Select the width of the SVG file (default: 1000) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index f6eb1cdafb77..5b80d84d6b4a 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -72,11 +72,15 @@ Default is to monitor all CPUS. -p <pid>:: --pid=<pid>:: - Profile events on existing Process ID. + Profile events on existing Process ID (comma separated list). -t <tid>:: --tid=<tid>:: - Profile events on existing thread ID. + Profile events on existing thread ID (comma separated list). + +-u:: +--uid=:: + Record events in threads owned by uid. Name or number. -r <priority>:: --realtime=<priority>:: @@ -106,6 +110,51 @@ Default is to monitor all CPUS. --zero:: Zero history across display updates. +-s:: +--sort:: + Sort by key(s): pid, comm, dso, symbol, parent, srcline. + +-n:: +--show-nr-samples:: + Show a column with the number of samples. + +--show-total-period:: + Show a column with the sum of periods. + +--dsos:: + Only consider symbols in these dsos. + +--comms:: + Only consider symbols in these comms. + +--symbols:: + Only consider these symbols. + +-M:: +--disassembler-style=:: Set disassembler style for objdump. + +--source:: + Interleave source code with assembly code. Enabled by default, + disable with --no-source. + +--asm-raw:: + Show raw instruction encoding of assembly instructions. + +-G [type,min,order]:: +--call-graph:: + Display call chains using type, min percent threshold and order. + type can be either: + - flat: single column, linear exposure of call chains. + - graph: use a graph tree, displaying absolute overhead rates. + - fractal: like graph, but displays relative rates. Each branch of + the tree is considered as a new profiled object. + + order can be either: + - callee: callee based call graph. + - caller: inverted caller based call graph. + + Default: fractal,0.5,callee. + INTERACTIVE PROMPTING KEYS -------------------------- @@ -130,9 +179,6 @@ INTERACTIVE PROMPTING KEYS [S]:: Stop annotation, return to full profile display. -[w]:: - Toggle between weighted sum and individual count[E]r profile. - [z]:: Toggle event count zeroing across display updates. diff --git a/tools/perf/Documentation/perfconfig.example b/tools/perf/Documentation/perfconfig.example new file mode 100644 index 000000000000..767ea2436e1c --- /dev/null +++ b/tools/perf/Documentation/perfconfig.example @@ -0,0 +1,29 @@ +[colors] + + # These were the old defaults + top = red, lightgray + medium = green, lightgray + normal = black, lightgray + selected = lightgray, magenta + code = blue, lightgray + addr = magenta, lightgray + +[tui] + + # Defaults if linked with libslang + report = on + annotate = on + top = on + +[buildid] + + # Default, disable using /dev/null + dir = /root/.debug + +[annotate] + + # Defaults + hide_src_code = false + use_offset = true + jump_arrows = true + show_nr_jumps = false diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index c12659d8cb26..b4b572e8c100 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,4 +1,7 @@ tools/perf +tools/scripts +tools/lib/traceevent +include/linux/const.h include/linux/perf_event.h include/linux/rbtree.h include/linux/list.h @@ -8,6 +11,7 @@ lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h arch/*/lib/memcpy*.S +arch/*/lib/memset*.S include/linux/poison.h include/linux/magic.h include/linux/hw_breakpoint.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index e9d5c271db69..35655c3a7b7a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -1,20 +1,22 @@ -ifeq ("$(origin O)", "command line") - OUTPUT := $(O)/ -endif +include ../scripts/Makefile.include # The default target of this Makefile is... all: include config/utilities.mak -ifneq ($(OUTPUT),) -# check that the output directory actually exists -OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) -$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) -endif - # Define V to have a more verbose compile. # +# Define O to save output files in a separate directory. +# +# Define ARCH as name of target architecture if you want cross-builds. +# +# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds. +# +# Define NO_LIBPERL to disable perl script extension. +# +# Define NO_LIBPYTHON to disable python script extension. +# # Define PYTHON to point to the python binary if the default # `python' is not correct; for example: PYTHON=python2 # @@ -32,6 +34,10 @@ endif # Define NO_DWARF if you do not want debug-info analysis feature at all. # # Define WERROR=0 to disable treating any warnings as errors. +# +# Define NO_NEWT if you do not want TUI support. +# +# Define NO_DEMANGLE if you do not want C++ symbol demangling. $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) @@ -61,7 +67,7 @@ ifeq ($(ARCH),x86_64) ifeq (${IS_X86_64}, 1) RAW_ARCH := x86_64 ARCH_CFLAGS := -DARCH_X86_64 - ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S + ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S endif endif @@ -70,41 +76,22 @@ ifneq ($(WERROR),0) CFLAGS_WERROR := -Werror endif -# -# Include saner warnings here, which can catch bugs: -# - -EXTRA_WARNINGS := -Wformat -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wformat-security -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wformat-y2k -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wshadow -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Winit-self -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wpacked -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wredundant-decls -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstrict-aliasing=3 -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wswitch-default -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wswitch-enum -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wno-system-headers -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wundef -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wwrite-strings -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wbad-function-cast -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wmissing-declarations -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wmissing-prototypes -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wnested-externs -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wold-style-definition -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstrict-prototypes -EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wdeclaration-after-statement - ifeq ("$(origin DEBUG)", "command line") PERF_DEBUG = $(DEBUG) endif ifndef PERF_DEBUG - CFLAGS_OPTIMIZE = -O6 + CFLAGS_OPTIMIZE = -O6 -D_FORTIFY_SOURCE=2 endif -CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +ifdef PARSER_DEBUG + PARSER_DEBUG_BISON := -t + PARSER_DEBUG_FLEX := -d + PARSER_DEBUG_CFLAGS := -DPARSER_DEBUG +endif + +CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) EXTLIBS = -lpthread -lrt -lelf -lm -ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip @@ -168,10 +155,7 @@ endif ### --- END CONFIGURATION SECTION --- -# Those must not be GNU-specific; they are shared with perl/ which may -# be built by a different compiler. (Note that this is an artifact now -# but it still might be nice to keep that distinction.) -BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include +BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE BASIC_LDFLAGS = # Guard against environment variables @@ -186,7 +170,10 @@ SCRIPT_SH += perf-archive.sh grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) -$(OUTPUT)python/perf.so: $(PYRF_OBJS) +PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) +PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py + +$(OUTPUT)python/perf.so: $(PYRF_OBJS) $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ --quiet build_ext; \ mkdir -p $(OUTPUT)python && \ @@ -197,6 +184,17 @@ $(OUTPUT)python/perf.so: $(PYRF_OBJS) SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) +TRACE_EVENT_DIR = ../lib/traceevent/ + +ifeq ("$(origin O)", "command line") + TE_PATH=$(OUTPUT)/ +else + TE_PATH=$(TRACE_EVENT_DIR)/ +endif + +LIBTRACEEVENT = $(TE_PATH)libtraceevent.a +TE_LIB := -L$(TE_PATH) -ltraceevent + # # Single 'perf' binary right now: # @@ -220,6 +218,24 @@ endif export PERL_PATH +FLEX = flex +BISON= bison + +$(OUTPUT)util/parse-events-flex.c: util/parse-events.l + $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c + +$(OUTPUT)util/parse-events-bison.c: util/parse-events.y + $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c + +$(OUTPUT)util/pmu-flex.c: util/pmu.l + $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c + +$(OUTPUT)util/pmu-bison.c: util/pmu.y + $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c + +$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c +$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c + LIB_FILE=$(OUTPUT)libperf.a LIB_H += ../../include/linux/perf_event.h @@ -235,7 +251,7 @@ LIB_H += util/include/linux/const.h LIB_H += util/include/linux/ctype.h LIB_H += util/include/linux/kernel.h LIB_H += util/include/linux/list.h -LIB_H += util/include/linux/module.h +LIB_H += util/include/linux/export.h LIB_H += util/include/linux/poison.h LIB_H += util/include/linux/prefetch.h LIB_H += util/include/linux/rbtree.h @@ -252,6 +268,8 @@ LIB_H += util/include/asm/uaccess.h LIB_H += util/include/dwarf-regs.h LIB_H += util/include/asm/dwarf2.h LIB_H += util/include/asm/cpufeature.h +LIB_H += util/include/asm/unistd_32.h +LIB_H += util/include/asm/unistd_64.h LIB_H += perf.h LIB_H += util/annotate.h LIB_H += util/cache.h @@ -259,6 +277,8 @@ LIB_H += util/callchain.h LIB_H += util/build-id.h LIB_H += util/debug.h LIB_H += util/debugfs.h +LIB_H += util/sysfs.h +LIB_H += util/pmu.h LIB_H += util/event.h LIB_H += util/evsel.h LIB_H += util/evlist.h @@ -278,6 +298,7 @@ LIB_H += util/strbuf.h LIB_H += util/strlist.h LIB_H += util/strfilter.h LIB_H += util/svghelper.h +LIB_H += util/tool.h LIB_H += util/run-command.h LIB_H += util/sigchain.h LIB_H += util/symbol.h @@ -296,6 +317,10 @@ LIB_H += util/cpumap.h LIB_H += util/top.h LIB_H += $(ARCH_INCLUDE) LIB_H += util/cgroup.h +LIB_H += $(TRACE_EVENT_DIR)event-parse.h +LIB_H += util/target.h +LIB_H += util/rblist.h +LIB_H += util/intlist.h LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o @@ -304,6 +329,8 @@ LIB_OBJS += $(OUTPUT)util/build-id.o LIB_OBJS += $(OUTPUT)util/config.o LIB_OBJS += $(OUTPUT)util/ctype.o LIB_OBJS += $(OUTPUT)util/debugfs.o +LIB_OBJS += $(OUTPUT)util/sysfs.o +LIB_OBJS += $(OUTPUT)util/pmu.o LIB_OBJS += $(OUTPUT)util/environment.o LIB_OBJS += $(OUTPUT)util/event.o LIB_OBJS += $(OUTPUT)util/evlist.o @@ -313,6 +340,7 @@ LIB_OBJS += $(OUTPUT)util/help.o LIB_OBJS += $(OUTPUT)util/levenshtein.o LIB_OBJS += $(OUTPUT)util/parse-options.o LIB_OBJS += $(OUTPUT)util/parse-events.o +LIB_OBJS += $(OUTPUT)util/parse-events-test.o LIB_OBJS += $(OUTPUT)util/path.o LIB_OBJS += $(OUTPUT)util/rbtree.o LIB_OBJS += $(OUTPUT)util/bitmap.o @@ -328,6 +356,7 @@ LIB_OBJS += $(OUTPUT)util/usage.o LIB_OBJS += $(OUTPUT)util/wrapper.o LIB_OBJS += $(OUTPUT)util/sigchain.o LIB_OBJS += $(OUTPUT)util/symbol.o +LIB_OBJS += $(OUTPUT)util/dso-test-data.o LIB_OBJS += $(OUTPUT)util/color.o LIB_OBJS += $(OUTPUT)util/pager.o LIB_OBJS += $(OUTPUT)util/header.o @@ -340,6 +369,10 @@ LIB_OBJS += $(OUTPUT)util/session.o LIB_OBJS += $(OUTPUT)util/thread.o LIB_OBJS += $(OUTPUT)util/thread_map.o LIB_OBJS += $(OUTPUT)util/trace-event-parse.o +LIB_OBJS += $(OUTPUT)util/parse-events-flex.o +LIB_OBJS += $(OUTPUT)util/parse-events-bison.o +LIB_OBJS += $(OUTPUT)util/pmu-flex.o +LIB_OBJS += $(OUTPUT)util/pmu-bison.o LIB_OBJS += $(OUTPUT)util/trace-event-read.o LIB_OBJS += $(OUTPUT)util/trace-event-info.o LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o @@ -351,6 +384,9 @@ LIB_OBJS += $(OUTPUT)util/util.o LIB_OBJS += $(OUTPUT)util/xyarray.o LIB_OBJS += $(OUTPUT)util/cpumap.o LIB_OBJS += $(OUTPUT)util/cgroup.o +LIB_OBJS += $(OUTPUT)util/target.o +LIB_OBJS += $(OUTPUT)util/rblist.o +LIB_OBJS += $(OUTPUT)util/intlist.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o @@ -361,8 +397,10 @@ BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o ifeq ($(RAW_ARCH),x86_64) BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o +BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o endif BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o +BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o BUILTIN_OBJS += $(OUTPUT)builtin-diff.o BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o @@ -384,7 +422,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o BUILTIN_OBJS += $(OUTPUT)builtin-test.o BUILTIN_OBJS += $(OUTPUT)builtin-inject.o -PERFLIBS = $(LIB_FILE) +PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT) # Files needed for the python binding, perf.so # pyrf is just an internal name needed for all those wrappers. @@ -461,22 +499,48 @@ else # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h BASIC_CFLAGS += -I/usr/include/slang EXTLIBS += -lnewt -lslang - LIB_OBJS += $(OUTPUT)util/ui/setup.o - LIB_OBJS += $(OUTPUT)util/ui/browser.o - LIB_OBJS += $(OUTPUT)util/ui/browsers/annotate.o - LIB_OBJS += $(OUTPUT)util/ui/browsers/hists.o - LIB_OBJS += $(OUTPUT)util/ui/browsers/map.o - LIB_OBJS += $(OUTPUT)util/ui/browsers/top.o - LIB_OBJS += $(OUTPUT)util/ui/helpline.o - LIB_OBJS += $(OUTPUT)util/ui/progress.o - LIB_OBJS += $(OUTPUT)util/ui/util.o - LIB_H += util/ui/browser.h - LIB_H += util/ui/browsers/map.h - LIB_H += util/ui/helpline.h - LIB_H += util/ui/libslang.h - LIB_H += util/ui/progress.h - LIB_H += util/ui/util.h - LIB_H += util/ui/ui.h + LIB_OBJS += $(OUTPUT)ui/setup.o + LIB_OBJS += $(OUTPUT)ui/browser.o + LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o + LIB_OBJS += $(OUTPUT)ui/browsers/hists.o + LIB_OBJS += $(OUTPUT)ui/browsers/map.o + LIB_OBJS += $(OUTPUT)ui/helpline.o + LIB_OBJS += $(OUTPUT)ui/progress.o + LIB_OBJS += $(OUTPUT)ui/util.o + LIB_OBJS += $(OUTPUT)ui/tui/setup.o + LIB_OBJS += $(OUTPUT)ui/tui/util.o + LIB_H += ui/browser.h + LIB_H += ui/browsers/map.h + LIB_H += ui/helpline.h + LIB_H += ui/keysyms.h + LIB_H += ui/libslang.h + LIB_H += ui/progress.h + LIB_H += ui/util.h + LIB_H += ui/ui.h + endif +endif + +ifdef NO_GTK2 + BASIC_CFLAGS += -DNO_GTK2_SUPPORT +else + FLAGS_GTK2=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0) + ifneq ($(call try-cc,$(SOURCE_GTK2),$(FLAGS_GTK2)),y) + msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev); + BASIC_CFLAGS += -DNO_GTK2_SUPPORT + else + ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2)),y) + BASIC_CFLAGS += -DHAVE_GTK_INFO_BAR + endif + BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0) + EXTLIBS += $(shell pkg-config --libs gtk+-2.0) + LIB_OBJS += $(OUTPUT)ui/gtk/browser.o + LIB_OBJS += $(OUTPUT)ui/gtk/setup.o + LIB_OBJS += $(OUTPUT)ui/gtk/util.o + # Make sure that it'd be included only once. + ifneq ($(findstring -DNO_NEWT_SUPPORT,$(BASIC_CFLAGS)),) + LIB_OBJS += $(OUTPUT)ui/setup.o + LIB_OBJS += $(OUTPUT)ui/util.o + endif endif endif @@ -619,16 +683,6 @@ else endif endif -ifneq ($(findstring $(MAKEFLAGS),s),s) -ifndef V - QUIET_CC = @echo ' ' CC $@; - QUIET_AR = @echo ' ' AR $@; - QUIET_LINK = @echo ' ' LINK $@; - QUIET_MKDIR = @echo ' ' MKDIR $@; - QUIET_GEN = @echo ' ' GEN $@; -endif -endif - ifdef ASCIIDOC8 export ASCIIDOC8 endif @@ -706,12 +760,28 @@ $(OUTPUT)perf.o perf.spec \ $(SCRIPTS) \ : $(OUTPUT)PERF-VERSION-FILE +.SUFFIXES: +.SUFFIXES: .o .c .S .s + +# These two need to be here so that when O= is not used they take precedence +# over the general rule for .o + +$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Iutil/ -w $< + +$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -Iutil/ -w $< + $(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< +$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -E $(ALL_CFLAGS) $< $(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $< + $(QUIET_CC)$(CC) -o $@ -S $(ALL_CFLAGS) $< $(OUTPUT)%.o: %.S $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< +$(OUTPUT)%.s: %.S + $(QUIET_CC)$(CC) -o $@ -E $(ALL_CFLAGS) $< $(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ @@ -723,24 +793,24 @@ $(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS $(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< -$(OUTPUT)util/ui/browser.o: util/ui/browser.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< - -$(OUTPUT)util/ui/browsers/annotate.o: util/ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS +$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< -$(OUTPUT)util/ui/browsers/top.o: util/ui/browsers/top.c $(OUTPUT)PERF-CFLAGS +$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< -$(OUTPUT)util/ui/browsers/hists.o: util/ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS +$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< -$(OUTPUT)util/ui/browsers/map.o: util/ui/browsers/map.c $(OUTPUT)PERF-CFLAGS +$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< $(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Wno-redundant-decls $< + $(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< @@ -770,6 +840,10 @@ $(sort $(dir $(DIRECTORY_DEPS))): $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) +# libtraceevent.a +$(LIBTRACEEVENT): + $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) $(COMMAND_O) libtraceevent.a + help: @echo 'Perf make targets:' @echo ' doc - make *all* documentation (see below)' @@ -797,7 +871,6 @@ help: @echo ' quick-install-html - install the html documentation quickly' @echo '' @echo 'Perf maintainer targets:' - @echo ' distclean - alias to clean' @echo ' clean - clean all binary objects and build output' doc: @@ -914,8 +987,10 @@ clean: $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(MAKE) -C Documentation/ clean $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS + $(RM) $(OUTPUT)util/*-bison* + $(RM) $(OUTPUT)util/*-flex* $(python-clean) -.PHONY: all install clean strip +.PHONY: all install clean strip $(LIBTRACEEVENT) .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell .PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 15130b50dfe3..744e629797be 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -2,3 +2,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c index 48ae0c5e3f73..7cdd61d0e27c 100644 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ b/tools/perf/arch/powerpc/util/dwarf-regs.c @@ -9,7 +9,10 @@ * 2 of the License, or (at your option) any later version. */ +#include <stdlib.h> +#ifndef __UCLIBC__ #include <libio.h> +#endif #include <dwarf-regs.h> diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c new file mode 100644 index 000000000000..2f7073d107fd --- /dev/null +++ b/tools/perf/arch/powerpc/util/header.c @@ -0,0 +1,36 @@ +#include <sys/types.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "../../util/header.h" + +#define __stringify_1(x) #x +#define __stringify(x) __stringify_1(x) + +#define mfspr(rn) ({unsigned long rval; \ + asm volatile("mfspr %0," __stringify(rn) \ + : "=r" (rval)); rval; }) + +#define SPRN_PVR 0x11F /* Processor Version Register */ +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ +#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ + +int +get_cpuid(char *buffer, size_t sz) +{ + unsigned long pvr; + int nb; + + pvr = mfspr(SPRN_PVR); + + nb = scnprintf(buffer, sz, "%lu,%lu$", PVR_VER(pvr), PVR_REV(pvr)); + + /* look for end marker to ensure the entire data fit */ + if (strchr(buffer, '$')) { + buffer[nb-1] = '\0'; + return 0; + } + return -1; +} diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 15130b50dfe3..744e629797be 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -2,3 +2,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c new file mode 100644 index 000000000000..146d12a1cec0 --- /dev/null +++ b/tools/perf/arch/x86/util/header.c @@ -0,0 +1,59 @@ +#include <sys/types.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "../../util/header.h" + +static inline void +cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, + unsigned int *d) +{ + __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t" + "movl %%ebx, %%esi\n\t.byte 0x5b" + : "=a" (*a), + "=S" (*b), + "=c" (*c), + "=d" (*d) + : "a" (op)); +} + +int +get_cpuid(char *buffer, size_t sz) +{ + unsigned int a, b, c, d, lvl; + int family = -1, model = -1, step = -1; + int nb; + char vendor[16]; + + cpuid(0, &lvl, &b, &c, &d); + strncpy(&vendor[0], (char *)(&b), 4); + strncpy(&vendor[4], (char *)(&d), 4); + strncpy(&vendor[8], (char *)(&c), 4); + vendor[12] = '\0'; + + if (lvl >= 1) { + cpuid(1, &a, &b, &c, &d); + + family = (a >> 8) & 0xf; /* bits 11 - 8 */ + model = (a >> 4) & 0xf; /* Bits 7 - 4 */ + step = a & 0xf; + + /* extended family */ + if (family == 0xf) + family += (a >> 20) & 0xff; + + /* extended model */ + if (family >= 0x6) + model += ((a >> 16) & 0xf) << 4; + } + nb = scnprintf(buffer, sz, "%s,%u,%u,%u$", vendor, family, model, step); + + /* look for end marker to ensure the entire data fit */ + if (strchr(buffer, '$')) { + buffer[nb-1] = '\0'; + return 0; + } + return -1; +} diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index f7781c6267c0..a09bece6dad2 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -4,6 +4,7 @@ extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); +extern int bench_mem_memset(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h index d588b87696fc..d66ab799b35f 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h @@ -2,3 +2,11 @@ MEMCPY_FN(__memcpy, "x86-64-unrolled", "unrolled memcpy() in arch/x86/lib/memcpy_64.S") + +MEMCPY_FN(memcpy_c, + "x86-64-movsq", + "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") + +MEMCPY_FN(memcpy_c_e, + "x86-64-movsb", + "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index a57b66e853c2..fcd9cf00600a 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -1,2 +1,12 @@ - +#define memcpy MEMCPY /* don't hide glibc's memcpy() */ +#define altinstr_replacement text +#define globl p2align 4; .globl +#define Lmemcpy_c globl memcpy_c; memcpy_c +#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e #include "../../../arch/x86/lib/memcpy_64.S" +/* + * We need to provide note.GNU-stack section, saying that we want + * NOT executable stack. Otherwise the final linking will assume that + * the ELF stack should not be restricted at all and set it RWX. + */ +.section .note.GNU-stack,"",@progbits diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index db82021f4b91..02dad5d3359b 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -5,7 +5,6 @@ * * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> */ -#include <ctype.h> #include "../perf.h" #include "../util/util.h" @@ -24,19 +23,22 @@ static const char *length_str = "1MB"; static const char *routine = "default"; -static bool use_clock; -static int clock_fd; +static int iterations = 1; +static bool use_cycle; +static int cycle_fd; static bool only_prefault; static bool no_prefault; static const struct option options[] = { OPT_STRING('l', "length", &length_str, "1MB", "Specify length of memory to copy. " - "available unit: B, MB, GB (upper and lower)"), + "Available units: B, KB, MB, GB and TB (upper and lower)"), OPT_STRING('r', "routine", &routine, "default", "Specify routine to copy"), - OPT_BOOLEAN('c', "clock", &use_clock, - "Use CPU clock for measuring"), + OPT_INTEGER('i', "iterations", &iterations, + "repeat memcpy() invocation this number of times"), + OPT_BOOLEAN('c', "cycle", &use_cycle, + "Use cycles event instead of gettimeofday() for measuring"), OPT_BOOLEAN('o', "only-prefault", &only_prefault, "Show only the result with page faults before memcpy()"), OPT_BOOLEAN('n', "no-prefault", &no_prefault, @@ -74,27 +76,27 @@ static const char * const bench_mem_memcpy_usage[] = { NULL }; -static struct perf_event_attr clock_attr = { +static struct perf_event_attr cycle_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }; -static void init_clock(void) +static void init_cycle(void) { - clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); + cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0); - if (clock_fd < 0 && errno == ENOSYS) + if (cycle_fd < 0 && errno == ENOSYS) die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); else - BUG_ON(clock_fd < 0); + BUG_ON(cycle_fd < 0); } -static u64 get_clock(void) +static u64 get_cycle(void) { int ret; u64 clk; - ret = read(clock_fd, &clk, sizeof(u64)); + ret = read(cycle_fd, &clk, sizeof(u64)); BUG_ON(ret != sizeof(u64)); return clk; @@ -117,29 +119,32 @@ static void alloc_mem(void **dst, void **src, size_t length) die("memory allocation failed - maybe length is too large?\n"); } -static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) +static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault) { - u64 clock_start = 0ULL, clock_end = 0ULL; + u64 cycle_start = 0ULL, cycle_end = 0ULL; void *src = NULL, *dst = NULL; + int i; alloc_mem(&src, &dst, len); if (prefault) fn(dst, src, len); - clock_start = get_clock(); - fn(dst, src, len); - clock_end = get_clock(); + cycle_start = get_cycle(); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); + cycle_end = get_cycle(); free(src); free(dst); - return clock_end - clock_start; + return cycle_end - cycle_start; } static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) { struct timeval tv_start, tv_end, tv_diff; void *src = NULL, *dst = NULL; + int i; alloc_mem(&src, &dst, len); @@ -147,7 +152,8 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) fn(dst, src, len); BUG_ON(gettimeofday(&tv_start, NULL)); - fn(dst, src, len); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); BUG_ON(gettimeofday(&tv_end, NULL)); timersub(&tv_end, &tv_start, &tv_diff); @@ -176,17 +182,17 @@ int bench_mem_memcpy(int argc, const char **argv, int i; size_t len; double result_bps[2]; - u64 result_clock[2]; + u64 result_cycle[2]; argc = parse_options(argc, argv, options, bench_mem_memcpy_usage, 0); - if (use_clock) - init_clock(); + if (use_cycle) + init_cycle(); len = (size_t)perf_atoll((char *)length_str); - result_clock[0] = result_clock[1] = 0ULL; + result_cycle[0] = result_cycle[1] = 0ULL; result_bps[0] = result_bps[1] = 0.0; if ((s64)len <= 0) { @@ -217,11 +223,11 @@ int bench_mem_memcpy(int argc, const char **argv, if (!only_prefault && !no_prefault) { /* show both of results */ - if (use_clock) { - result_clock[0] = - do_memcpy_clock(routines[i].fn, len, false); - result_clock[1] = - do_memcpy_clock(routines[i].fn, len, true); + if (use_cycle) { + result_cycle[0] = + do_memcpy_cycle(routines[i].fn, len, false); + result_cycle[1] = + do_memcpy_cycle(routines[i].fn, len, true); } else { result_bps[0] = do_memcpy_gettimeofday(routines[i].fn, @@ -231,9 +237,9 @@ int bench_mem_memcpy(int argc, const char **argv, len, true); } } else { - if (use_clock) { - result_clock[pf] = - do_memcpy_clock(routines[i].fn, + if (use_cycle) { + result_cycle[pf] = + do_memcpy_cycle(routines[i].fn, len, only_prefault); } else { result_bps[pf] = @@ -245,12 +251,12 @@ int bench_mem_memcpy(int argc, const char **argv, switch (bench_format) { case BENCH_FORMAT_DEFAULT: if (!only_prefault && !no_prefault) { - if (use_clock) { - printf(" %14lf Clock/Byte\n", - (double)result_clock[0] + if (use_cycle) { + printf(" %14lf Cycle/Byte\n", + (double)result_cycle[0] / (double)len); - printf(" %14lf Clock/Byte (with prefault)\n", - (double)result_clock[1] + printf(" %14lf Cycle/Byte (with prefault)\n", + (double)result_cycle[1] / (double)len); } else { print_bps(result_bps[0]); @@ -259,9 +265,9 @@ int bench_mem_memcpy(int argc, const char **argv, printf(" (with prefault)\n"); } } else { - if (use_clock) { - printf(" %14lf Clock/Byte", - (double)result_clock[pf] + if (use_cycle) { + printf(" %14lf Cycle/Byte", + (double)result_cycle[pf] / (double)len); } else print_bps(result_bps[pf]); @@ -271,17 +277,17 @@ int bench_mem_memcpy(int argc, const char **argv, break; case BENCH_FORMAT_SIMPLE: if (!only_prefault && !no_prefault) { - if (use_clock) { + if (use_cycle) { printf("%lf %lf\n", - (double)result_clock[0] / (double)len, - (double)result_clock[1] / (double)len); + (double)result_cycle[0] / (double)len, + (double)result_cycle[1] / (double)len); } else { printf("%lf %lf\n", result_bps[0], result_bps[1]); } } else { - if (use_clock) { - printf("%lf\n", (double)result_clock[pf] + if (use_cycle) { + printf("%lf\n", (double)result_cycle[pf] / (double)len); } else printf("%lf\n", result_bps[pf]); diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h new file mode 100644 index 000000000000..a040fa77665b --- /dev/null +++ b/tools/perf/bench/mem-memset-arch.h @@ -0,0 +1,12 @@ + +#ifdef ARCH_X86_64 + +#define MEMSET_FN(fn, name, desc) \ + extern void *fn(void *, int, size_t); + +#include "mem-memset-x86-64-asm-def.h" + +#undef MEMSET_FN + +#endif + diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h new file mode 100644 index 000000000000..a71dff97c1f5 --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h @@ -0,0 +1,12 @@ + +MEMSET_FN(__memset, + "x86-64-unrolled", + "unrolled memset() in arch/x86/lib/memset_64.S") + +MEMSET_FN(memset_c, + "x86-64-stosq", + "movsq-based memset() in arch/x86/lib/memset_64.S") + +MEMSET_FN(memset_c_e, + "x86-64-stosb", + "movsb-based memset() in arch/x86/lib/memset_64.S") diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S new file mode 100644 index 000000000000..9e5af89ed13a --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -0,0 +1,13 @@ +#define memset MEMSET /* don't hide glibc's memset() */ +#define altinstr_replacement text +#define globl p2align 4; .globl +#define Lmemset_c globl memset_c; memset_c +#define Lmemset_c_e globl memset_c_e; memset_c_e +#include "../../../arch/x86/lib/memset_64.S" + +/* + * We need to provide note.GNU-stack section, saying that we want + * NOT executable stack. Otherwise the final linking will assume that + * the ELF stack should not be restricted at all and set it RWX. + */ +.section .note.GNU-stack,"",@progbits diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c new file mode 100644 index 000000000000..350cc9557265 --- /dev/null +++ b/tools/perf/bench/mem-memset.c @@ -0,0 +1,297 @@ +/* + * mem-memset.c + * + * memset: Simple memory set in various ways + * + * Trivial clone of mem-memcpy.c. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "mem-memset-arch.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <errno.h> + +#define K 1024 + +static const char *length_str = "1MB"; +static const char *routine = "default"; +static int iterations = 1; +static bool use_cycle; +static int cycle_fd; +static bool only_prefault; +static bool no_prefault; + +static const struct option options[] = { + OPT_STRING('l', "length", &length_str, "1MB", + "Specify length of memory to set. " + "Available units: B, KB, MB, GB and TB (upper and lower)"), + OPT_STRING('r', "routine", &routine, "default", + "Specify routine to set"), + OPT_INTEGER('i', "iterations", &iterations, + "repeat memset() invocation this number of times"), + OPT_BOOLEAN('c', "cycle", &use_cycle, + "Use cycles event instead of gettimeofday() for measuring"), + OPT_BOOLEAN('o', "only-prefault", &only_prefault, + "Show only the result with page faults before memset()"), + OPT_BOOLEAN('n', "no-prefault", &no_prefault, + "Show only the result without page faults before memset()"), + OPT_END() +}; + +typedef void *(*memset_t)(void *, int, size_t); + +struct routine { + const char *name; + const char *desc; + memset_t fn; +}; + +static const struct routine routines[] = { + { "default", + "Default memset() provided by glibc", + memset }, +#ifdef ARCH_X86_64 + +#define MEMSET_FN(fn, name, desc) { name, desc, fn }, +#include "mem-memset-x86-64-asm-def.h" +#undef MEMSET_FN + +#endif + + { NULL, + NULL, + NULL } +}; + +static const char * const bench_mem_memset_usage[] = { + "perf bench mem memset <options>", + NULL +}; + +static struct perf_event_attr cycle_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES +}; + +static void init_cycle(void) +{ + cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0); + + if (cycle_fd < 0 && errno == ENOSYS) + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + else + BUG_ON(cycle_fd < 0); +} + +static u64 get_cycle(void) +{ + int ret; + u64 clk; + + ret = read(cycle_fd, &clk, sizeof(u64)); + BUG_ON(ret != sizeof(u64)); + + return clk; +} + +static double timeval2double(struct timeval *ts) +{ + return (double)ts->tv_sec + + (double)ts->tv_usec / (double)1000000; +} + +static void alloc_mem(void **dst, size_t length) +{ + *dst = zalloc(length); + if (!dst) + die("memory allocation failed - maybe length is too large?\n"); +} + +static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + void *dst = NULL; + int i; + + alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + cycle_start = get_cycle(); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); + cycle_end = get_cycle(); + + free(dst); + return cycle_end - cycle_start; +} + +static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) +{ + struct timeval tv_start, tv_end, tv_diff; + void *dst = NULL; + int i; + + alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(dst); + return (double)((double)len / timeval2double(&tv_diff)); +} + +#define pf (no_prefault ? 0 : 1) + +#define print_bps(x) do { \ + if (x < K) \ + printf(" %14lf B/Sec", x); \ + else if (x < K * K) \ + printf(" %14lfd KB/Sec", x / K); \ + else if (x < K * K * K) \ + printf(" %14lf MB/Sec", x / K / K); \ + else \ + printf(" %14lf GB/Sec", x / K / K / K); \ + } while (0) + +int bench_mem_memset(int argc, const char **argv, + const char *prefix __used) +{ + int i; + size_t len; + double result_bps[2]; + u64 result_cycle[2]; + + argc = parse_options(argc, argv, options, + bench_mem_memset_usage, 0); + + if (use_cycle) + init_cycle(); + + len = (size_t)perf_atoll((char *)length_str); + + result_cycle[0] = result_cycle[1] = 0ULL; + result_bps[0] = result_bps[1] = 0.0; + + if ((s64)len <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + /* same to without specifying either of prefault and no-prefault */ + if (only_prefault && no_prefault) + only_prefault = no_prefault = false; + + for (i = 0; routines[i].name; i++) { + if (!strcmp(routines[i].name, routine)) + break; + } + if (!routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; routines[i].name; i++) { + printf("\t%s ... %s\n", + routines[i].name, routines[i].desc); + } + return 1; + } + + if (bench_format == BENCH_FORMAT_DEFAULT) + printf("# Copying %s Bytes ...\n\n", length_str); + + if (!only_prefault && !no_prefault) { + /* show both of results */ + if (use_cycle) { + result_cycle[0] = + do_memset_cycle(routines[i].fn, len, false); + result_cycle[1] = + do_memset_cycle(routines[i].fn, len, true); + } else { + result_bps[0] = + do_memset_gettimeofday(routines[i].fn, + len, false); + result_bps[1] = + do_memset_gettimeofday(routines[i].fn, + len, true); + } + } else { + if (use_cycle) { + result_cycle[pf] = + do_memset_cycle(routines[i].fn, + len, only_prefault); + } else { + result_bps[pf] = + do_memset_gettimeofday(routines[i].fn, + len, only_prefault); + } + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + if (!only_prefault && !no_prefault) { + if (use_cycle) { + printf(" %14lf Cycle/Byte\n", + (double)result_cycle[0] + / (double)len); + printf(" %14lf Cycle/Byte (with prefault)\n ", + (double)result_cycle[1] + / (double)len); + } else { + print_bps(result_bps[0]); + printf("\n"); + print_bps(result_bps[1]); + printf(" (with prefault)\n"); + } + } else { + if (use_cycle) { + printf(" %14lf Cycle/Byte", + (double)result_cycle[pf] + / (double)len); + } else + print_bps(result_bps[pf]); + + printf("%s\n", only_prefault ? " (with prefault)" : ""); + } + break; + case BENCH_FORMAT_SIMPLE: + if (!only_prefault && !no_prefault) { + if (use_cycle) { + printf("%lf %lf\n", + (double)result_cycle[0] / (double)len, + (double)result_cycle[1] / (double)len); + } else { + printf("%lf %lf\n", + result_bps[0], result_bps[1]); + } + } else { + if (use_cycle) { + printf("%lf\n", (double)result_cycle[pf] + / (double)len); + } else + printf("%lf\n", result_bps[pf]); + } + break; + default: + /* reaching this means there's some disaster: */ + die("unknown format: %d\n", bench_format); + break; + } + + return 0; +} diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 555aefd7fe01..67522cf87405 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -27,32 +27,32 @@ #include "util/sort.h" #include "util/hist.h" #include "util/session.h" +#include "util/tool.h" #include <linux/bitmap.h> -static char const *input_name = "perf.data"; - -static bool force, use_tui, use_stdio; - -static bool full_paths; - -static bool print_line; - -static const char *sym_hist_filter; - -static const char *cpu_list; -static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +struct perf_annotate { + struct perf_tool tool; + char const *input_name; + bool force, use_tui, use_stdio; + bool full_paths; + bool print_line; + const char *sym_hist_filter; + const char *cpu_list; + DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +}; -static int perf_evlist__add_sample(struct perf_evlist *evlist, - struct perf_sample *sample, - struct perf_evsel *evsel, - struct addr_location *al) +static int perf_evsel__add_sample(struct perf_evsel *evsel, + struct perf_sample *sample, + struct addr_location *al, + struct perf_annotate *ann) { struct hist_entry *he; int ret; - if (sym_hist_filter != NULL && - (al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) { + if (ann->sym_hist_filter != NULL && + (al->sym == NULL || + strcmp(ann->sym_hist_filter, al->sym->name) != 0)) { /* We're only interested in a symbol named sym_hist_filter */ if (al->sym != NULL) { rb_erase(&al->sym->rb_node, @@ -69,8 +69,7 @@ static int perf_evlist__add_sample(struct perf_evlist *evlist, ret = 0; if (he->ms.sym != NULL) { struct annotation *notes = symbol__annotation(he->ms.sym); - if (notes->src == NULL && - symbol__alloc_hist(he->ms.sym, evlist->nr_entries) < 0) + if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0) return -ENOMEM; ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); @@ -81,25 +80,26 @@ static int perf_evlist__add_sample(struct perf_evlist *evlist, return ret; } -static int process_sample_event(union perf_event *event, +static int process_sample_event(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct perf_session *session) + struct machine *machine) { + struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool); struct addr_location al; - if (perf_event__preprocess_sample(event, session, &al, sample, + if (perf_event__preprocess_sample(event, machine, &al, sample, symbol__annotate_init) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; } - if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) + if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap)) return 0; - if (!al.filtered && - perf_evlist__add_sample(session->evlist, sample, evsel, &al)) { + if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) { pr_warning("problem incrementing symbol count, " "skipping event\n"); return -1; @@ -108,16 +108,18 @@ static int process_sample_event(union perf_event *event, return 0; } -static int hist_entry__tty_annotate(struct hist_entry *he, int evidx) +static int hist_entry__tty_annotate(struct hist_entry *he, int evidx, + struct perf_annotate *ann) { return symbol__tty_annotate(he->ms.sym, he->ms.map, evidx, - print_line, full_paths, 0, 0); + ann->print_line, ann->full_paths, 0, 0); } -static void hists__find_annotations(struct hists *self, int evidx) +static void hists__find_annotations(struct hists *self, int evidx, + struct perf_annotate *ann) { struct rb_node *nd = rb_first(&self->entries), *next; - int key = KEY_RIGHT; + int key = K_RIGHT; while (nd) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); @@ -129,7 +131,7 @@ static void hists__find_annotations(struct hists *self, int evidx) notes = symbol__annotation(he->ms.sym); if (notes->src == NULL) { find_next: - if (key == KEY_LEFT) + if (key == K_LEFT) nd = rb_prev(nd); else nd = rb_next(nd); @@ -137,12 +139,12 @@ find_next: } if (use_browser > 0) { - key = hist_entry__tui_annotate(he, evidx); + key = hist_entry__tui_annotate(he, evidx, NULL, NULL, 0); switch (key) { - case KEY_RIGHT: + case K_RIGHT: next = rb_next(nd); break; - case KEY_LEFT: + case K_LEFT: next = rb_prev(nd); break; default: @@ -152,7 +154,7 @@ find_next: if (next != NULL) nd = next; } else { - hist_entry__tty_annotate(he, evidx); + hist_entry__tty_annotate(he, evidx, ann); nd = rb_next(nd); /* * Since we have a hist_entry per IP for the same @@ -165,33 +167,26 @@ find_next: } } -static struct perf_event_ops event_ops = { - .sample = process_sample_event, - .mmap = perf_event__process_mmap, - .comm = perf_event__process_comm, - .fork = perf_event__process_task, - .ordered_samples = true, - .ordering_requires_timestamps = true, -}; - -static int __cmd_annotate(void) +static int __cmd_annotate(struct perf_annotate *ann) { int ret; struct perf_session *session; struct perf_evsel *pos; u64 total_nr_samples; - session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); + session = perf_session__new(ann->input_name, O_RDONLY, + ann->force, false, &ann->tool); if (session == NULL) return -ENOMEM; - if (cpu_list) { - ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); + if (ann->cpu_list) { + ret = perf_session__cpu_bitmap(session, ann->cpu_list, + ann->cpu_bitmap); if (ret) goto out_delete; } - ret = perf_session__process_events(session, &event_ops); + ret = perf_session__process_events(session, &ann->tool); if (ret) goto out_delete; @@ -215,12 +210,12 @@ static int __cmd_annotate(void) total_nr_samples += nr_samples; hists__collapse_resort(hists); hists__output_resort(hists); - hists__find_annotations(hists, pos->idx); + hists__find_annotations(hists, pos->idx, ann); } } if (total_nr_samples == 0) { - ui__warning("The %s file has no samples!\n", input_name); + ui__error("The %s file has no samples!\n", session->filename); goto out_delete; } out_delete: @@ -240,43 +235,61 @@ out_delete: } static const char * const annotate_usage[] = { - "perf annotate [<options>] <command>", + "perf annotate [<options>]", NULL }; -static const struct option options[] = { - OPT_STRING('i', "input", &input_name, "file", +int cmd_annotate(int argc, const char **argv, const char *prefix __used) +{ + struct perf_annotate annotate = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .comm = perf_event__process_comm, + .fork = perf_event__process_task, + .ordered_samples = true, + .ordering_requires_timestamps = true, + }, + }; + const struct option options[] = { + OPT_STRING('i', "input", &annotate.input_name, "file", "input file name"), OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), - OPT_STRING('s', "symbol", &sym_hist_filter, "symbol", + OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol", "symbol to annotate"), - OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_BOOLEAN('f', "force", &annotate.force, "don't complain, do it"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), - OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), - OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), + OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"), + OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), - OPT_BOOLEAN('l', "print-line", &print_line, + OPT_BOOLEAN('l', "print-line", &annotate.print_line, "print matching source lines (may be slow)"), - OPT_BOOLEAN('P', "full-paths", &full_paths, + OPT_BOOLEAN('P', "full-paths", &annotate.full_paths, "Don't shorten the displayed pathnames"), - OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), + OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"), + OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", + "Look for files with symbols relative to this directory"), + OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, + "Interleave source code with assembly code (default)"), + OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, + "Display raw encoding of assembly instructions (default)"), + OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", + "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_END() -}; + }; -int cmd_annotate(int argc, const char **argv, const char *prefix __used) -{ argc = parse_options(argc, argv, options, annotate_usage, 0); - if (use_stdio) + if (annotate.use_stdio) use_browser = 0; - else if (use_tui) + else if (annotate.use_tui) use_browser = 1; setup_browser(true); @@ -297,13 +310,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) if (argc > 1) usage_with_options(annotate_usage, options); - sym_hist_filter = argv[0]; - } - - if (field_sep && *field_sep == '.') { - pr_err("'.' is the only non valid --field-separator argument\n"); - return -1; + annotate.sym_hist_filter = argv[0]; } - return __cmd_annotate(); + return __cmd_annotate(&annotate); } diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index fcb96269852a..1f3100216448 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -33,7 +33,7 @@ struct bench_suite { }; \ /* sentinel: easy for help */ -#define suite_all { "all", "test all suite (pseudo suite)", NULL } +#define suite_all { "all", "Test all benchmark suites", NULL } static struct bench_suite sched_suites[] = { { "messaging", @@ -52,6 +52,9 @@ static struct bench_suite mem_suites[] = { { "memcpy", "Simple memory copy in various ways", bench_mem_memcpy }, + { "memset", + "Simple memory set in various ways", + bench_mem_memset }, suite_all, { NULL, NULL, @@ -72,7 +75,7 @@ static struct bench_subsys subsystems[] = { "memory access performance", mem_suites }, { "all", /* sentinel: easy for help */ - "test all subsystem (pseudo subsystem)", + "all benchmark subsystem", NULL }, { NULL, NULL, diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 5af32ae9031e..6b2bcfbde150 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -1,7 +1,8 @@ /* * builtin-buildid-list.c * - * Builtin buildid-list command: list buildids in perf.data + * Builtin buildid-list command: list buildids in perf.data, in the running + * kernel and in ELF files. * * Copyright (C) 2009, Red Hat Inc. * Copyright (C) 2009, Arnaldo Carvalho de Melo <acme@redhat.com> @@ -15,8 +16,11 @@ #include "util/session.h" #include "util/symbol.h" -static char const *input_name = "perf.data"; +#include <libelf.h> + +static const char *input_name; static bool force; +static bool show_kernel; static bool with_hits; static const char * const buildid_list_usage[] = { @@ -29,29 +33,78 @@ static const struct option options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_BOOLEAN('k', "kernel", &show_kernel, "Show current kernel build id"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_END() }; -static int __cmd_buildid_list(void) +static int sysfs__fprintf_build_id(FILE *fp) +{ + u8 kallsyms_build_id[BUILD_ID_SIZE]; + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + + if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id, + sizeof(kallsyms_build_id)) != 0) + return -1; + + build_id__sprintf(kallsyms_build_id, sizeof(kallsyms_build_id), + sbuild_id); + fprintf(fp, "%s\n", sbuild_id); + return 0; +} + +static int filename__fprintf_build_id(const char *name, FILE *fp) +{ + u8 build_id[BUILD_ID_SIZE]; + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + + if (filename__read_build_id(name, build_id, + sizeof(build_id)) != sizeof(build_id)) + return 0; + + build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + return fprintf(fp, "%s\n", sbuild_id); +} + +static int perf_session__list_build_ids(void) { struct perf_session *session; + elf_version(EV_CURRENT); + session = perf_session__new(input_name, O_RDONLY, force, false, &build_id__mark_dso_hit_ops); if (session == NULL) return -1; - if (with_hits) + /* + * See if this is an ELF file first: + */ + if (filename__fprintf_build_id(session->filename, stdout)) + goto out; + + /* + * in pipe-mode, the only way to get the buildids is to parse + * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID + */ + if (with_hits || session->fd_pipe) perf_session__process_events(session, &build_id__mark_dso_hit_ops); perf_session__fprintf_dsos_buildid(session, stdout, with_hits); - +out: perf_session__delete(session); return 0; } +static int __cmd_buildid_list(void) +{ + if (show_kernel) + return sysfs__fprintf_build_id(stdout); + + return perf_session__list_build_ids(); +} + int cmd_buildid_list(int argc, const char **argv, const char *prefix __used) { argc = parse_options(argc, argv, options, buildid_list_usage, 0); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index e8219990f8b8..d29d350fb2b7 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -9,7 +9,9 @@ #include "util/debug.h" #include "util/event.h" #include "util/hist.h" +#include "util/evsel.h" #include "util/session.h" +#include "util/tool.h" #include "util/sort.h" #include "util/symbol.h" #include "util/util.h" @@ -22,6 +24,11 @@ static char diff__default_sort_order[] = "dso,symbol"; static bool force; static bool show_displacement; +struct perf_diff { + struct perf_tool tool; + struct perf_session *session; +}; + static int hists__add_entry(struct hists *self, struct addr_location *al, u64 period) { @@ -30,14 +37,17 @@ static int hists__add_entry(struct hists *self, return -ENOMEM; } -static int diff__process_sample_event(union perf_event *event, +static int diff__process_sample_event(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel __used, - struct perf_session *session) + struct machine *machine) { + struct perf_diff *_diff = container_of(tool, struct perf_diff, tool); + struct perf_session *session = _diff->session; struct addr_location al; - if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) { + if (perf_event__preprocess_sample(event, machine, &al, sample, NULL) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -55,15 +65,17 @@ static int diff__process_sample_event(union perf_event *event, return 0; } -static struct perf_event_ops event_ops = { - .sample = diff__process_sample_event, - .mmap = perf_event__process_mmap, - .comm = perf_event__process_comm, - .exit = perf_event__process_task, - .fork = perf_event__process_task, - .lost = perf_event__process_lost, - .ordered_samples = true, - .ordering_requires_timestamps = true, +static struct perf_diff diff = { + .tool = { + .sample = diff__process_sample_event, + .mmap = perf_event__process_mmap, + .comm = perf_event__process_comm, + .exit = perf_event__process_task, + .fork = perf_event__process_task, + .lost = perf_event__process_lost, + .ordered_samples = true, + .ordering_requires_timestamps = true, + }, }; static void perf_session__insert_hist_entry_by_name(struct rb_root *root, @@ -104,12 +116,6 @@ static void hists__resort_entries(struct hists *self) self->entries = tmp; } -static void hists__set_positions(struct hists *self) -{ - hists__output_resort(self); - hists__resort_entries(self); -} - static struct hist_entry *hists__find_entry(struct hists *self, struct hist_entry *he) { @@ -143,30 +149,37 @@ static void hists__match(struct hists *older, struct hists *newer) static int __cmd_diff(void) { int ret, i; +#define older (session[0]) +#define newer (session[1]) struct perf_session *session[2]; - session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops); - session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops); + older = perf_session__new(input_old, O_RDONLY, force, false, + &diff.tool); + newer = perf_session__new(input_new, O_RDONLY, force, false, + &diff.tool); if (session[0] == NULL || session[1] == NULL) return -ENOMEM; for (i = 0; i < 2; ++i) { - ret = perf_session__process_events(session[i], &event_ops); + diff.session = session[i]; + ret = perf_session__process_events(session[i], &diff.tool); if (ret) goto out_delete; + hists__output_resort(&session[i]->hists); } - hists__output_resort(&session[1]->hists); if (show_displacement) - hists__set_positions(&session[0]->hists); + hists__resort_entries(&older->hists); - hists__match(&session[0]->hists, &session[1]->hists); - hists__fprintf(&session[1]->hists, &session[0]->hists, - show_displacement, stdout); + hists__match(&older->hists, &newer->hists); + hists__fprintf(&newer->hists, &older->hists, + show_displacement, true, 0, 0, stdout); out_delete: for (i = 0; i < 2; ++i) perf_session__delete(session[i]); return ret; +#undef older +#undef newer } static const char * const diff_usage[] = { diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 4c5e9e04a41f..0dd5a058f766 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -15,9 +15,40 @@ #include "util/parse-options.h" #include "util/session.h" -static char const *input_name = "perf.data"; +struct perf_attr_details { + bool freq; + bool verbose; +}; + +static int comma_printf(bool *first, const char *fmt, ...) +{ + va_list args; + int ret = 0; + + if (!*first) { + ret += printf(","); + } else { + ret += printf(":"); + *first = false; + } + + va_start(args, fmt); + ret += vprintf(fmt, args); + va_end(args); + return ret; +} + +static int __if_print(bool *first, const char *field, u64 value) +{ + if (value == 0) + return 0; + + return comma_printf(first, " %s: %" PRIu64, field, value); +} + +#define if_print(field) __if_print(&first, #field, pos->attr.field) -static int __cmd_evlist(void) +static int __cmd_evlist(const char *input_name, struct perf_attr_details *details) { struct perf_session *session; struct perf_evsel *pos; @@ -26,8 +57,52 @@ static int __cmd_evlist(void) if (session == NULL) return -ENOMEM; - list_for_each_entry(pos, &session->evlist->entries, node) - printf("%s\n", event_name(pos)); + list_for_each_entry(pos, &session->evlist->entries, node) { + bool first = true; + + printf("%s", perf_evsel__name(pos)); + + if (details->verbose || details->freq) { + comma_printf(&first, " sample_freq=%" PRIu64, + (u64)pos->attr.sample_freq); + } + + if (details->verbose) { + if_print(type); + if_print(config); + if_print(config1); + if_print(config2); + if_print(size); + if_print(sample_type); + if_print(read_format); + if_print(disabled); + if_print(inherit); + if_print(pinned); + if_print(exclusive); + if_print(exclude_user); + if_print(exclude_kernel); + if_print(exclude_hv); + if_print(exclude_idle); + if_print(mmap); + if_print(comm); + if_print(freq); + if_print(inherit_stat); + if_print(enable_on_exec); + if_print(task); + if_print(watermark); + if_print(precise_ip); + if_print(mmap_data); + if_print(sample_id_all); + if_print(exclude_host); + if_print(exclude_guest); + if_print(__reserved_1); + if_print(wakeup_events); + if_print(bp_type); + if_print(branch_sample_type); + } + + putchar('\n'); + } perf_session__delete(session); return 0; @@ -38,17 +113,23 @@ static const char * const evlist_usage[] = { NULL }; -static const struct option options[] = { - OPT_STRING('i', "input", &input_name, "file", - "input file name"), - OPT_END() -}; - int cmd_evlist(int argc, const char **argv, const char *prefix __used) { + struct perf_attr_details details = { .verbose = false, }; + const char *input_name = NULL; + const struct option options[] = { + OPT_STRING('i', "input", &input_name, "file", + "Input file name"), + OPT_BOOLEAN('F', "freq", &details.freq, + "Show the sample frequency"), + OPT_BOOLEAN('v', "verbose", &details.verbose, + "Show all event attr details"), + OPT_END() + }; + argc = parse_options(argc, argv, options, evlist_usage, 0); if (argc) usage_with_options(evlist_usage, options); - return __cmd_evlist(); + return __cmd_evlist(input_name, &details); } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 8dfc12bb119b..3beab489afc5 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -9,6 +9,7 @@ #include "perf.h" #include "util/session.h" +#include "util/tool.h" #include "util/debug.h" #include "util/parse-options.h" @@ -16,8 +17,9 @@ static char const *input_name = "-"; static bool inject_build_ids; -static int perf_event__repipe_synth(union perf_event *event, - struct perf_session *session __used) +static int perf_event__repipe_synth(struct perf_tool *tool __used, + union perf_event *event, + struct machine *machine __used) { uint32_t size; void *buf = event; @@ -36,41 +38,75 @@ static int perf_event__repipe_synth(union perf_event *event, return 0; } -static int perf_event__repipe(union perf_event *event, +static int perf_event__repipe_op2_synth(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session __used) +{ + return perf_event__repipe_synth(tool, event, NULL); +} + +static int perf_event__repipe_event_type_synth(struct perf_tool *tool, + union perf_event *event) +{ + return perf_event__repipe_synth(tool, event, NULL); +} + +static int perf_event__repipe_tracing_data_synth(union perf_event *event, + struct perf_session *session __used) +{ + return perf_event__repipe_synth(NULL, event, NULL); +} + +static int perf_event__repipe_attr(union perf_event *event, + struct perf_evlist **pevlist __used) +{ + int ret; + ret = perf_event__process_attr(event, pevlist); + if (ret) + return ret; + + return perf_event__repipe_synth(NULL, event, NULL); +} + +static int perf_event__repipe(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample __used, - struct perf_session *session) + struct machine *machine) { - return perf_event__repipe_synth(event, session); + return perf_event__repipe_synth(tool, event, machine); } -static int perf_event__repipe_sample(union perf_event *event, +static int perf_event__repipe_sample(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample __used, struct perf_evsel *evsel __used, - struct perf_session *session) + struct machine *machine) { - return perf_event__repipe_synth(event, session); + return perf_event__repipe_synth(tool, event, machine); } -static int perf_event__repipe_mmap(union perf_event *event, +static int perf_event__repipe_mmap(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample, - struct perf_session *session) + struct machine *machine) { int err; - err = perf_event__process_mmap(event, sample, session); - perf_event__repipe(event, sample, session); + err = perf_event__process_mmap(tool, event, sample, machine); + perf_event__repipe(tool, event, sample, machine); return err; } -static int perf_event__repipe_task(union perf_event *event, +static int perf_event__repipe_task(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample, - struct perf_session *session) + struct machine *machine) { int err; - err = perf_event__process_task(event, sample, session); - perf_event__repipe(event, sample, session); + err = perf_event__process_task(tool, event, sample, machine); + perf_event__repipe(tool, event, sample, machine); return err; } @@ -80,7 +116,7 @@ static int perf_event__repipe_tracing_data(union perf_event *event, { int err; - perf_event__repipe_synth(event, session); + perf_event__repipe_synth(NULL, event, NULL); err = perf_event__process_tracing_data(event, session); return err; @@ -100,10 +136,10 @@ static int dso__read_build_id(struct dso *self) return -1; } -static int dso__inject_build_id(struct dso *self, struct perf_session *session) +static int dso__inject_build_id(struct dso *self, struct perf_tool *tool, + struct machine *machine) { u16 misc = PERF_RECORD_MISC_USER; - struct machine *machine; int err; if (dso__read_build_id(self) < 0) { @@ -111,17 +147,11 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session) return -1; } - machine = perf_session__find_host_machine(session); - if (machine == NULL) { - pr_err("Can't find machine for session\n"); - return -1; - } - if (self->kernel) misc = PERF_RECORD_MISC_KERNEL; - err = perf_event__synthesize_build_id(self, misc, perf_event__repipe, - machine, session); + err = perf_event__synthesize_build_id(tool, self, misc, perf_event__repipe, + machine); if (err) { pr_err("Can't synthesize build_id event for %s\n", self->long_name); return -1; @@ -130,10 +160,11 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session) return 0; } -static int perf_event__inject_buildid(union perf_event *event, +static int perf_event__inject_buildid(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel __used, - struct perf_session *session) + struct machine *machine) { struct addr_location al; struct thread *thread; @@ -141,21 +172,21 @@ static int perf_event__inject_buildid(union perf_event *event, cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - thread = perf_session__findnew(session, event->ip.pid); + thread = machine__findnew_thread(machine, event->ip.pid); if (thread == NULL) { pr_err("problem processing %d event, skipping it.\n", event->header.type); goto repipe; } - thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, - event->ip.pid, event->ip.ip, &al); + thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, + event->ip.ip, &al); if (al.map != NULL) { if (!al.map->dso->hit) { al.map->dso->hit = 1; if (map__load(al.map, NULL) >= 0) { - dso__inject_build_id(al.map->dso, session); + dso__inject_build_id(al.map->dso, tool, machine); /* * If this fails, too bad, let the other side * account this as unresolved. @@ -168,24 +199,24 @@ static int perf_event__inject_buildid(union perf_event *event, } repipe: - perf_event__repipe(event, sample, session); + perf_event__repipe(tool, event, sample, machine); return 0; } -struct perf_event_ops inject_ops = { +struct perf_tool perf_inject = { .sample = perf_event__repipe_sample, .mmap = perf_event__repipe, .comm = perf_event__repipe, .fork = perf_event__repipe, .exit = perf_event__repipe, .lost = perf_event__repipe, - .read = perf_event__repipe, + .read = perf_event__repipe_sample, .throttle = perf_event__repipe, .unthrottle = perf_event__repipe, - .attr = perf_event__repipe_synth, - .event_type = perf_event__repipe_synth, - .tracing_data = perf_event__repipe_synth, - .build_id = perf_event__repipe_synth, + .attr = perf_event__repipe_attr, + .event_type = perf_event__repipe_event_type_synth, + .tracing_data = perf_event__repipe_tracing_data_synth, + .build_id = perf_event__repipe_op2_synth, }; extern volatile int session_done; @@ -203,17 +234,17 @@ static int __cmd_inject(void) signal(SIGINT, sig_handler); if (inject_build_ids) { - inject_ops.sample = perf_event__inject_buildid; - inject_ops.mmap = perf_event__repipe_mmap; - inject_ops.fork = perf_event__repipe_task; - inject_ops.tracing_data = perf_event__repipe_tracing_data; + perf_inject.sample = perf_event__inject_buildid; + perf_inject.mmap = perf_event__repipe_mmap; + perf_inject.fork = perf_event__repipe_task; + perf_inject.tracing_data = perf_event__repipe_tracing_data; } - session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops); + session = perf_session__new(input_name, O_RDONLY, false, true, &perf_inject); if (session == NULL) return -ENOMEM; - ret = perf_session__process_events(session, &inject_ops); + ret = perf_session__process_events(session, &perf_inject); perf_session__delete(session); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 225e963df105..ce35015f2dc6 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -7,6 +7,7 @@ #include "util/thread.h" #include "util/header.h" #include "util/session.h" +#include "util/tool.h" #include "util/parse-options.h" #include "util/trace-event.h" @@ -18,7 +19,7 @@ struct alloc_stat; typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); -static char const *input_name = "perf.data"; +static const char *input_name; static int alloc_flag; static int caller_flag; @@ -56,6 +57,11 @@ static unsigned long nr_allocs, nr_cross_allocs; #define PATH_SYS_NODE "/sys/devices/system/node" +struct perf_kmem { + struct perf_tool tool; + struct perf_session *session; +}; + static void init_cpunode_map(void) { FILE *fp; @@ -107,7 +113,9 @@ static void setup_cpunode_map(void) continue; cpunode_map[cpu] = mem; } + closedir(dir2); } + closedir(dir1); } static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, @@ -189,7 +197,7 @@ static void insert_caller_stat(unsigned long call_site, } static void process_alloc_event(void *data, - struct event *event, + struct event_format *event, int cpu, u64 timestamp __used, struct thread *thread __used, @@ -250,7 +258,7 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr, } static void process_free_event(void *data, - struct event *event, + struct event_format *event, int cpu, u64 timestamp __used, struct thread *thread __used) @@ -275,14 +283,16 @@ static void process_free_event(void *data, s_alloc->alloc_cpu = -1; } -static void process_raw_event(union perf_event *raw_event __used, void *data, +static void process_raw_event(struct perf_tool *tool, + union perf_event *raw_event __used, void *data, int cpu, u64 timestamp, struct thread *thread) { - struct event *event; + struct perf_kmem *kmem = container_of(tool, struct perf_kmem, tool); + struct event_format *event; int type; - type = trace_parse_common_type(data); - event = trace_find_event(type); + type = trace_parse_common_type(kmem->session->pevent, data); + event = pevent_find_event(kmem->session->pevent, type); if (!strcmp(event->name, "kmalloc") || !strcmp(event->name, "kmem_cache_alloc")) { @@ -303,12 +313,13 @@ static void process_raw_event(union perf_event *raw_event __used, void *data, } } -static int process_sample_event(union perf_event *event, +static int process_sample_event(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel __used, - struct perf_session *session) + struct machine *machine) { - struct thread *thread = perf_session__findnew(session, event->ip.pid); + struct thread *thread = machine__findnew_thread(machine, event->ip.pid); if (thread == NULL) { pr_debug("problem processing %d event, skipping it.\n", @@ -318,16 +329,18 @@ static int process_sample_event(union perf_event *event, dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - process_raw_event(event, sample->raw_data, sample->cpu, + process_raw_event(tool, event, sample->raw_data, sample->cpu, sample->time, thread); return 0; } -static struct perf_event_ops event_ops = { - .sample = process_sample_event, - .comm = perf_event__process_comm, - .ordered_samples = true, +static struct perf_kmem perf_kmem = { + .tool = { + .sample = process_sample_event, + .comm = perf_event__process_comm, + .ordered_samples = true, + }, }; static double fragmentation(unsigned long n_req, unsigned long n_alloc) @@ -482,11 +495,15 @@ static void sort_result(void) static int __cmd_kmem(void) { int err = -EINVAL; - struct perf_session *session = perf_session__new(input_name, O_RDONLY, - 0, false, &event_ops); + struct perf_session *session; + + session = perf_session__new(input_name, O_RDONLY, 0, false, + &perf_kmem.tool); if (session == NULL) return -ENOMEM; + perf_kmem.session = session; + if (perf_session__create_kernel_maps(session) < 0) goto out_delete; @@ -494,7 +511,7 @@ static int __cmd_kmem(void) goto out_delete; setup_pager(); - err = perf_session__process_events(session, &event_ops); + err = perf_session__process_events(session, &perf_kmem.tool); if (err != 0) goto out_delete; sort_result(); @@ -643,6 +660,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) break; if (sort_dimension__add(tok, sort_list) < 0) { error("Unknown --sort key: '%s'", tok); + free(str); return -1; } } diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 34d1e853829d..9fc6e0fa3dce 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -22,9 +22,6 @@ static const char *file_name; static char name_buffer[256]; -bool perf_host = 1; -bool perf_guest; - static const char * const kvm_usage[] = { "perf kvm [<options>] {top|record|report|diff|buildid-list}", NULL @@ -38,7 +35,7 @@ static const struct option kvm_options[] = { OPT_BOOLEAN(0, "guest", &perf_guest, "Collect guest os data"), OPT_BOOLEAN(0, "host", &perf_host, - "Collect guest os data"), + "Collect host os data"), OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", "guest mount directory under which every guest os" " instance has a subdir"), @@ -107,7 +104,8 @@ static int __cmd_buildid_list(int argc, const char **argv) int cmd_kvm(int argc, const char **argv, const char *prefix __used) { - perf_host = perf_guest = 0; + perf_host = 0; + perf_guest = 1; argc = parse_options(argc, argv, kvm_options, kvm_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 899080ace267..b3c428548868 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -12,6 +12,7 @@ #include "util/debug.h" #include "util/session.h" +#include "util/tool.h" #include <sys/types.h> #include <sys/prctl.h> @@ -325,7 +326,7 @@ alloc_failed: die("memory allocation failed\n"); } -static char const *input_name = "perf.data"; +static const char *input_name; struct raw_event_sample { u32 size; @@ -355,25 +356,25 @@ struct trace_release_event { struct trace_lock_handler { void (*acquire_event)(struct trace_acquire_event *, - struct event *, + struct event_format *, int cpu, u64 timestamp, struct thread *thread); void (*acquired_event)(struct trace_acquired_event *, - struct event *, + struct event_format *, int cpu, u64 timestamp, struct thread *thread); void (*contended_event)(struct trace_contended_event *, - struct event *, + struct event_format *, int cpu, u64 timestamp, struct thread *thread); void (*release_event)(struct trace_release_event *, - struct event *, + struct event_format *, int cpu, u64 timestamp, struct thread *thread); @@ -415,7 +416,7 @@ enum acquire_flags { static void report_lock_acquire_event(struct trace_acquire_event *acquire_event, - struct event *__event __used, + struct event_format *__event __used, int cpu __used, u64 timestamp __used, struct thread *thread __used) @@ -479,7 +480,7 @@ end: static void report_lock_acquired_event(struct trace_acquired_event *acquired_event, - struct event *__event __used, + struct event_format *__event __used, int cpu __used, u64 timestamp __used, struct thread *thread __used) @@ -535,7 +536,7 @@ end: static void report_lock_contended_event(struct trace_contended_event *contended_event, - struct event *__event __used, + struct event_format *__event __used, int cpu __used, u64 timestamp __used, struct thread *thread __used) @@ -582,7 +583,7 @@ end: static void report_lock_release_event(struct trace_release_event *release_event, - struct event *__event __used, + struct event_format *__event __used, int cpu __used, u64 timestamp __used, struct thread *thread __used) @@ -646,7 +647,7 @@ static struct trace_lock_handler *trace_handler; static void process_lock_acquire_event(void *data, - struct event *event __used, + struct event_format *event __used, int cpu __used, u64 timestamp __used, struct thread *thread __used) @@ -665,7 +666,7 @@ process_lock_acquire_event(void *data, static void process_lock_acquired_event(void *data, - struct event *event __used, + struct event_format *event __used, int cpu __used, u64 timestamp __used, struct thread *thread __used) @@ -683,7 +684,7 @@ process_lock_acquired_event(void *data, static void process_lock_contended_event(void *data, - struct event *event __used, + struct event_format *event __used, int cpu __used, u64 timestamp __used, struct thread *thread __used) @@ -701,7 +702,7 @@ process_lock_contended_event(void *data, static void process_lock_release_event(void *data, - struct event *event __used, + struct event_format *event __used, int cpu __used, u64 timestamp __used, struct thread *thread __used) @@ -720,11 +721,11 @@ process_lock_release_event(void *data, static void process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread) { - struct event *event; + struct event_format *event; int type; - type = trace_parse_common_type(data); - event = trace_find_event(type); + type = trace_parse_common_type(session->pevent, data); + event = pevent_find_event(session->pevent, type); if (!strcmp(event->name, "lock_acquire")) process_lock_acquire_event(data, event, cpu, timestamp, thread); @@ -845,12 +846,13 @@ static void dump_info(void) die("Unknown type of information\n"); } -static int process_sample_event(union perf_event *event, +static int process_sample_event(struct perf_tool *tool __used, + union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel __used, - struct perf_session *s) + struct machine *machine) { - struct thread *thread = perf_session__findnew(s, sample->tid); + struct thread *thread = machine__findnew_thread(machine, sample->tid); if (thread == NULL) { pr_debug("problem processing %d event, skipping it.\n", @@ -863,7 +865,7 @@ static int process_sample_event(union perf_event *event, return 0; } -static struct perf_event_ops eops = { +static struct perf_tool eops = { .sample = process_sample_event, .comm = perf_event__process_comm, .ordered_samples = true, @@ -920,12 +922,12 @@ static const struct option info_options[] = { OPT_BOOLEAN('t', "threads", &info_threads, "dump thread list in perf.data"), OPT_BOOLEAN('m', "map", &info_map, - "map of lock instances (name:address table)"), + "map of lock instances (address:name table)"), OPT_END() }; static const char * const lock_usage[] = { - "perf lock [<options>] {record|trace|report}", + "perf lock [<options>] {record|report|script|info}", NULL }; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 710ae3d0a489..e215ae61b2ae 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -20,7 +20,6 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ -#define _GNU_SOURCE #include <sys/utsname.h> #include <sys/types.h> #include <sys/stat.h> @@ -31,7 +30,6 @@ #include <stdlib.h> #include <string.h> -#undef _GNU_SOURCE #include "perf.h" #include "builtin.h" #include "util/util.h" @@ -46,7 +44,6 @@ #define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*" #define DEFAULT_FUNC_FILTER "!_*" -#define MAX_PATH_LEN 256 /* Session management structure */ static struct { @@ -57,11 +54,12 @@ static struct { bool show_ext_vars; bool show_funcs; bool mod_events; + bool uprobes; int nevents; struct perf_probe_event events[MAX_PROBES]; struct strlist *dellist; struct line_range line_range; - const char *target_module; + const char *target; int max_probe_points; struct strfilter *filter; } params; @@ -78,6 +76,8 @@ static int parse_probe_event(const char *str) return -1; } + pev->uprobes = params.uprobes; + /* Parse a perf-probe command into event */ ret = parse_perf_probe_command(str, pev); pr_debug("%d arguments\n", pev->nargs); @@ -85,21 +85,58 @@ static int parse_probe_event(const char *str) return ret; } +static int set_target(const char *ptr) +{ + int found = 0; + const char *buf; + + /* + * The first argument after options can be an absolute path + * to an executable / library or kernel module. + * + * TODO: Support relative path, and $PATH, $LD_LIBRARY_PATH, + * short module name. + */ + if (!params.target && ptr && *ptr == '/') { + params.target = ptr; + found = 1; + buf = ptr + (strlen(ptr) - 3); + + if (strcmp(buf, ".ko")) + params.uprobes = true; + + } + + return found; +} + static int parse_probe_event_argv(int argc, const char **argv) { - int i, len, ret; + int i, len, ret, found_target; char *buf; + found_target = set_target(argv[0]); + if (found_target && argc == 1) + return 0; + /* Bind up rest arguments */ len = 0; - for (i = 0; i < argc; i++) + for (i = 0; i < argc; i++) { + if (i == 0 && found_target) + continue; + len += strlen(argv[i]) + 1; + } buf = zalloc(len + 1); if (buf == NULL) return -ENOMEM; len = 0; - for (i = 0; i < argc; i++) + for (i = 0; i < argc; i++) { + if (i == 0 && found_target) + continue; + len += sprintf(&buf[len], "%s ", argv[i]); + } params.mod_events = true; ret = parse_probe_event(buf); free(buf); @@ -128,6 +165,28 @@ static int opt_del_probe_event(const struct option *opt __used, return 0; } +static int opt_set_target(const struct option *opt, const char *str, + int unset __used) +{ + int ret = -ENOENT; + + if (str && !params.target) { + if (!strcmp(opt->long_name, "exec")) + params.uprobes = true; +#ifdef DWARF_SUPPORT + else if (!strcmp(opt->long_name, "module")) + params.uprobes = false; +#endif + else + return ret; + + params.target = str; + ret = 0; + } + + return ret; +} + #ifdef DWARF_SUPPORT static int opt_show_lines(const struct option *opt __used, const char *str, int unset __used) @@ -249,9 +308,9 @@ static const struct option options[] = { "file", "vmlinux pathname"), OPT_STRING('s', "source", &symbol_conf.source_prefix, "directory", "path to kernel source"), - OPT_STRING('m', "module", ¶ms.target_module, - "modname|path", - "target module name (for online) or path (for offline)"), + OPT_CALLBACK('m', "module", NULL, "modname|path", + "target module name (for online) or path (for offline)", + opt_set_target), #endif OPT__DRY_RUN(&probe_event_dry_run), OPT_INTEGER('\0', "max-probes", ¶ms.max_probe_points, @@ -263,6 +322,8 @@ static const struct option options[] = { "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n" "\t\t\t \"" DEFAULT_FUNC_FILTER "\" for --funcs)", opt_set_filter), + OPT_CALLBACK('x', "exec", NULL, "executable|path", + "target executable name or path", opt_set_target), OPT_END() }; @@ -313,6 +374,10 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) pr_err(" Error: Don't use --list with --funcs.\n"); usage_with_options(probe_usage, options); } + if (params.uprobes) { + pr_warning(" Error: Don't use --list with --exec.\n"); + usage_with_options(probe_usage, options); + } ret = show_perf_probe_events(); if (ret < 0) pr_err(" Error: Failed to show event list. (%d)\n", @@ -336,8 +401,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) if (!params.filter) params.filter = strfilter__new(DEFAULT_FUNC_FILTER, NULL); - ret = show_available_funcs(params.target_module, - params.filter); + ret = show_available_funcs(params.target, params.filter, + params.uprobes); strfilter__delete(params.filter); if (ret < 0) pr_err(" Error: Failed to show functions." @@ -346,7 +411,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) } #ifdef DWARF_SUPPORT - if (params.show_lines) { + if (params.show_lines && !params.uprobes) { if (params.mod_events) { pr_err(" Error: Don't use --line with" " --add/--del.\n"); @@ -357,7 +422,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) usage_with_options(probe_usage, options); } - ret = show_line_range(¶ms.line_range, params.target_module); + ret = show_line_range(¶ms.line_range, params.target); if (ret < 0) pr_err(" Error: Failed to show lines. (%d)\n", ret); return ret; @@ -374,7 +439,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) ret = show_available_vars(params.events, params.nevents, params.max_probe_points, - params.target_module, + params.target, params.filter, params.show_ext_vars); strfilter__delete(params.filter); @@ -396,7 +461,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) if (params.nevents) { ret = add_perf_probe_events(params.events, params.nevents, params.max_probe_points, - params.target_module, + params.target, params.force_add); if (ret < 0) { pr_err(" Error: Failed to add events. (%d)\n", ret); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f4c3fbee4bad..4db6e1ba54e3 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -22,6 +22,7 @@ #include "util/evsel.h" #include "util/debug.h" #include "util/session.h" +#include "util/tool.h" #include "util/symbol.h" #include "util/cpumap.h" #include "util/thread_map.h" @@ -35,54 +36,36 @@ enum write_mode_t { WRITE_APPEND }; -static u64 user_interval = ULLONG_MAX; -static u64 default_interval = 0; - -static unsigned int page_size; -static unsigned int mmap_pages = UINT_MAX; -static unsigned int user_freq = UINT_MAX; -static int freq = 1000; -static int output; -static int pipe_output = 0; -static const char *output_name = NULL; -static bool group = false; -static int realtime_prio = 0; -static bool nodelay = false; -static bool raw_samples = false; -static bool sample_id_all_avail = true; -static bool system_wide = false; -static pid_t target_pid = -1; -static pid_t target_tid = -1; -static pid_t child_pid = -1; -static bool no_inherit = false; -static enum write_mode_t write_mode = WRITE_FORCE; -static bool call_graph = false; -static bool inherit_stat = false; -static bool no_samples = false; -static bool sample_address = false; -static bool sample_time = false; -static bool no_buildid = false; -static bool no_buildid_cache = false; -static struct perf_evlist *evsel_list; - -static long samples = 0; -static u64 bytes_written = 0; - -static int file_new = 1; -static off_t post_processing_offset; - -static struct perf_session *session; -static const char *cpu_list; - -static void advance_output(size_t size) +struct perf_record { + struct perf_tool tool; + struct perf_record_opts opts; + u64 bytes_written; + const char *output_name; + struct perf_evlist *evlist; + struct perf_session *session; + const char *progname; + int output; + unsigned int page_size; + int realtime_prio; + enum write_mode_t write_mode; + bool no_buildid; + bool no_buildid_cache; + bool force; + bool file_new; + bool append_file; + long samples; + off_t post_processing_offset; +}; + +static void advance_output(struct perf_record *rec, size_t size) { - bytes_written += size; + rec->bytes_written += size; } -static void write_output(void *buf, size_t size) +static void write_output(struct perf_record *rec, void *buf, size_t size) { while (size) { - int ret = write(output, buf, size); + int ret = write(rec->output, buf, size); if (ret < 0) die("failed to write"); @@ -90,30 +73,33 @@ static void write_output(void *buf, size_t size) size -= ret; buf += ret; - bytes_written += ret; + rec->bytes_written += ret; } } -static int process_synthesized_event(union perf_event *event, +static int process_synthesized_event(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample __used, - struct perf_session *self __used) + struct machine *machine __used) { - write_output(event, event->header.size); + struct perf_record *rec = container_of(tool, struct perf_record, tool); + write_output(rec, event, event->header.size); return 0; } -static void mmap_read(struct perf_mmap *md) +static void perf_record__mmap_read(struct perf_record *rec, + struct perf_mmap *md) { unsigned int head = perf_mmap__read_head(md); unsigned int old = md->prev; - unsigned char *data = md->base + page_size; + unsigned char *data = md->base + rec->page_size; unsigned long size; void *buf; if (old == head) return; - samples++; + rec->samples++; size = head - old; @@ -122,14 +108,14 @@ static void mmap_read(struct perf_mmap *md) size = md->mask + 1 - (old & md->mask); old += size; - write_output(buf, size); + write_output(rec, buf, size); } buf = &data[old & md->mask]; size = head - old; old += size; - write_output(buf, size); + write_output(rec, buf, size); md->prev = old; perf_mmap__write_tail(md, old); @@ -137,17 +123,30 @@ static void mmap_read(struct perf_mmap *md) static volatile int done = 0; static volatile int signr = -1; +static volatile int child_finished = 0; static void sig_handler(int sig) { + if (sig == SIGCHLD) + child_finished = 1; + done = 1; signr = sig; } -static void sig_atexit(void) +static void perf_record__sig_exit(int exit_status __used, void *arg) { - if (child_pid > 0) - kill(child_pid, SIGTERM); + struct perf_record *rec = arg; + int status; + + if (rec->evlist->workload.pid > 0) { + if (!child_finished) + kill(rec->evlist->workload.pid, SIGTERM); + + wait(&status); + if (WIFSIGNALED(status)) + psignal(WTERMSIG(status), rec->progname); + } if (signr == -1 || signr == SIGUSR1) return; @@ -156,78 +155,6 @@ static void sig_atexit(void) kill(getpid(), signr); } -static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist) -{ - struct perf_event_attr *attr = &evsel->attr; - int track = !evsel->idx; /* only the first counter needs these */ - - attr->disabled = 1; - attr->inherit = !no_inherit; - attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING | - PERF_FORMAT_ID; - - attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; - - if (evlist->nr_entries > 1) - attr->sample_type |= PERF_SAMPLE_ID; - - /* - * We default some events to a 1 default interval. But keep - * it a weak assumption overridable by the user. - */ - if (!attr->sample_period || (user_freq != UINT_MAX && - user_interval != ULLONG_MAX)) { - if (freq) { - attr->sample_type |= PERF_SAMPLE_PERIOD; - attr->freq = 1; - attr->sample_freq = freq; - } else { - attr->sample_period = default_interval; - } - } - - if (no_samples) - attr->sample_freq = 0; - - if (inherit_stat) - attr->inherit_stat = 1; - - if (sample_address) { - attr->sample_type |= PERF_SAMPLE_ADDR; - attr->mmap_data = track; - } - - if (call_graph) - attr->sample_type |= PERF_SAMPLE_CALLCHAIN; - - if (system_wide) - attr->sample_type |= PERF_SAMPLE_CPU; - - if (sample_id_all_avail && - (sample_time || system_wide || !no_inherit || cpu_list)) - attr->sample_type |= PERF_SAMPLE_TIME; - - if (raw_samples) { - attr->sample_type |= PERF_SAMPLE_TIME; - attr->sample_type |= PERF_SAMPLE_RAW; - attr->sample_type |= PERF_SAMPLE_CPU; - } - - if (nodelay) { - attr->watermark = 0; - attr->wakeup_events = 1; - } - - attr->mmap = track; - attr->comm = track; - - if (target_pid == -1 && target_tid == -1 && !system_wide) { - attr->disabled = 1; - attr->enable_on_exec = 1; - } -} - static bool perf_evlist__equal(struct perf_evlist *evlist, struct perf_evlist *other) { @@ -247,15 +174,20 @@ static bool perf_evlist__equal(struct perf_evlist *evlist, return true; } -static void open_counters(struct perf_evlist *evlist) +static void perf_record__open(struct perf_record *rec) { - struct perf_evsel *pos; + struct perf_evsel *pos, *first; + struct perf_evlist *evlist = rec->evlist; + struct perf_session *session = rec->session; + struct perf_record_opts *opts = &rec->opts; - if (evlist->cpus->map[0] < 0) - no_inherit = true; + first = list_entry(evlist->entries.next, struct perf_evsel, node); + + perf_evlist__config_attrs(evlist, opts); list_for_each_entry(pos, &evlist->entries, node) { struct perf_event_attr *attr = &pos->attr; + struct xyarray *group_fd = NULL; /* * Check if parse_single_tracepoint_event has already asked for * PERF_SAMPLE_TIME. @@ -270,36 +202,53 @@ static void open_counters(struct perf_evlist *evlist) */ bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; - config_attr(pos, evlist); + if (opts->group && pos != first) + group_fd = first->fd; +fallback_missing_features: + if (opts->exclude_guest_missing) + attr->exclude_guest = attr->exclude_host = 0; retry_sample_id: - attr->sample_id_all = sample_id_all_avail ? 1 : 0; + attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; try_again: - if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) { + if (perf_evsel__open(pos, evlist->cpus, evlist->threads, + opts->group, group_fd) < 0) { int err = errno; if (err == EPERM || err == EACCES) { - ui__warning_paranoid(); + ui__error_paranoid(); exit(EXIT_FAILURE); - } else if (err == ENODEV && cpu_list) { + } else if (err == ENODEV && opts->target.cpu_list) { die("No such device - did you specify" " an out-of-range profile CPU?\n"); - } else if (err == EINVAL && sample_id_all_avail) { - /* - * Old kernel, no attr->sample_id_type_all field - */ - sample_id_all_avail = false; - if (!sample_time && !raw_samples && !time_needed) - attr->sample_type &= ~PERF_SAMPLE_TIME; - - goto retry_sample_id; + } else if (err == EINVAL) { + if (!opts->exclude_guest_missing && + (attr->exclude_guest || attr->exclude_host)) { + pr_debug("Old kernel, cannot exclude " + "guest or host samples.\n"); + opts->exclude_guest_missing = true; + goto fallback_missing_features; + } else if (!opts->sample_id_all_missing) { + /* + * Old kernel, no attr->sample_id_type_all field + */ + opts->sample_id_all_missing = true; + if (!opts->sample_time && !opts->raw_samples && !time_needed) + attr->sample_type &= ~PERF_SAMPLE_TIME; + + goto retry_sample_id; + } } /* * If it's cycles then fall back to hrtimer * based cpu-clock-tick sw counter, which - * is always available even if no PMU support: + * is always available even if no PMU support. + * + * PPC returns ENXIO until 2.6.37 (behavior changed + * with commit b0a873e). */ - if (attr->type == PERF_TYPE_HARDWARE + if ((err == ENOENT || err == ENXIO) + && attr->type == PERF_TYPE_HARDWARE && attr->config == PERF_COUNT_HW_CPU_CYCLES) { if (verbose) @@ -307,12 +256,16 @@ try_again: "trying to fall back to cpu-clock-ticks\n"); attr->type = PERF_TYPE_SOFTWARE; attr->config = PERF_COUNT_SW_CPU_CLOCK; + if (pos->name) { + free(pos->name); + pos->name = NULL; + } goto try_again; } if (err == ENOENT) { - ui__warning("The %s event is not supported.\n", - event_name(pos)); + ui__error("The %s event is not supported.\n", + perf_evsel__name(pos)); exit(EXIT_FAILURE); } @@ -338,10 +291,20 @@ try_again: exit(-1); } - if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) + if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) { + if (errno == EPERM) + die("Permission error mapping pages.\n" + "Consider increasing " + "/proc/sys/kernel/perf_event_mlock_kb,\n" + "or try again with a smaller value of -m/--mmap_pages.\n" + "(current value: %d)\n", opts->mmap_pages); + else if (!is_power_of_2(opts->mmap_pages)) + die("--mmap_pages/-m value must be a power of two."); + die("failed to mmap with %d (%s)\n", errno, strerror(errno)); + } - if (file_new) + if (rec->file_new) session->evlist = evlist; else { if (!perf_evlist__equal(session->evlist, evlist)) { @@ -350,32 +313,35 @@ try_again: } } - perf_session__update_sample_type(session); + perf_session__set_id_hdr_size(session); } -static int process_buildids(void) +static int process_buildids(struct perf_record *rec) { - u64 size = lseek(output, 0, SEEK_CUR); + u64 size = lseek(rec->output, 0, SEEK_CUR); if (size == 0) return 0; - session->fd = output; - return __perf_session__process_events(session, post_processing_offset, - size - post_processing_offset, + rec->session->fd = rec->output; + return __perf_session__process_events(rec->session, rec->post_processing_offset, + size - rec->post_processing_offset, size, &build_id__mark_dso_hit_ops); } -static void atexit_header(void) +static void perf_record__exit(int status __used, void *arg) { - if (!pipe_output) { - session->header.data_size += bytes_written; - - if (!no_buildid) - process_buildids(); - perf_session__write_header(session, evsel_list, output, true); - perf_session__delete(session); - perf_evlist__delete(evsel_list); + struct perf_record *rec = arg; + + if (!rec->opts.pipe_output) { + rec->session->header.data_size += rec->bytes_written; + + if (!rec->no_buildid) + process_buildids(rec); + perf_session__write_header(rec->session, rec->evlist, + rec->output, true); + perf_session__delete(rec->session); + perf_evlist__delete(rec->evlist); symbol__exit(); } } @@ -383,7 +349,7 @@ static void atexit_header(void) static void perf_event__synthesize_guest_os(struct machine *machine, void *data) { int err; - struct perf_session *psession = data; + struct perf_tool *tool = data; if (machine__is_host(machine)) return; @@ -396,8 +362,8 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data) *method is used to avoid symbol missing when the first addr is *in module instead of in guest kernel. */ - err = perf_event__synthesize_modules(process_synthesized_event, - psession, machine); + err = perf_event__synthesize_modules(tool, process_synthesized_event, + machine); if (err < 0) pr_err("Couldn't record guest kernel [%d]'s reference" " relocation symbol.\n", machine->pid); @@ -406,12 +372,11 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data) * We use _stext for guest kernel because guest kernel's /proc/kallsyms * have no _text sometimes. */ - err = perf_event__synthesize_kernel_mmap(process_synthesized_event, - psession, machine, "_text"); + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, + machine, "_text"); if (err < 0) - err = perf_event__synthesize_kernel_mmap(process_synthesized_event, - psession, machine, - "_stext"); + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, + machine, "_stext"); if (err < 0) pr_err("Couldn't record guest kernel [%d]'s reference" " relocation symbol.\n", machine->pid); @@ -422,71 +387,71 @@ static struct perf_event_header finished_round_event = { .type = PERF_RECORD_FINISHED_ROUND, }; -static void mmap_read_all(void) +static void perf_record__mmap_read_all(struct perf_record *rec) { int i; - for (i = 0; i < evsel_list->nr_mmaps; i++) { - if (evsel_list->mmap[i].base) - mmap_read(&evsel_list->mmap[i]); + for (i = 0; i < rec->evlist->nr_mmaps; i++) { + if (rec->evlist->mmap[i].base) + perf_record__mmap_read(rec, &rec->evlist->mmap[i]); } - if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO)) - write_output(&finished_round_event, sizeof(finished_round_event)); + if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA)) + write_output(rec, &finished_round_event, sizeof(finished_round_event)); } -static int __cmd_record(int argc, const char **argv) +static int __cmd_record(struct perf_record *rec, int argc, const char **argv) { struct stat st; int flags; - int err; + int err, output, feat; unsigned long waking = 0; - int child_ready_pipe[2], go_pipe[2]; const bool forks = argc > 0; - char buf; struct machine *machine; + struct perf_tool *tool = &rec->tool; + struct perf_record_opts *opts = &rec->opts; + struct perf_evlist *evsel_list = rec->evlist; + const char *output_name = rec->output_name; + struct perf_session *session; + + rec->progname = argv[0]; - page_size = sysconf(_SC_PAGE_SIZE); + rec->page_size = sysconf(_SC_PAGE_SIZE); - atexit(sig_atexit); + on_exit(perf_record__sig_exit, rec); signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); signal(SIGUSR1, sig_handler); - if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { - perror("failed to create pipes"); - exit(-1); - } - if (!output_name) { if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) - pipe_output = 1; + opts->pipe_output = true; else - output_name = "perf.data"; + rec->output_name = output_name = "perf.data"; } if (output_name) { if (!strcmp(output_name, "-")) - pipe_output = 1; + opts->pipe_output = true; else if (!stat(output_name, &st) && st.st_size) { - if (write_mode == WRITE_FORCE) { + if (rec->write_mode == WRITE_FORCE) { char oldname[PATH_MAX]; snprintf(oldname, sizeof(oldname), "%s.old", output_name); unlink(oldname); rename(output_name, oldname); } - } else if (write_mode == WRITE_APPEND) { - write_mode = WRITE_FORCE; + } else if (rec->write_mode == WRITE_APPEND) { + rec->write_mode = WRITE_FORCE; } } flags = O_CREAT|O_RDWR; - if (write_mode == WRITE_APPEND) - file_new = 0; + if (rec->write_mode == WRITE_APPEND) + rec->file_new = 0; else flags |= O_TRUNC; - if (pipe_output) + if (opts->pipe_output) output = STDOUT_FILENO; else output = open(output_name, flags, S_IRUSR | S_IWUSR); @@ -495,113 +460,86 @@ static int __cmd_record(int argc, const char **argv) exit(-1); } + rec->output = output; + session = perf_session__new(output_name, O_WRONLY, - write_mode == WRITE_FORCE, false, NULL); + rec->write_mode == WRITE_FORCE, false, NULL); if (session == NULL) { pr_err("Not enough memory for reading perf file header\n"); return -1; } - if (!no_buildid) - perf_header__set_feat(&session->header, HEADER_BUILD_ID); + rec->session = session; - if (!file_new) { + for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) + perf_header__set_feat(&session->header, feat); + + if (rec->no_buildid) + perf_header__clear_feat(&session->header, HEADER_BUILD_ID); + + if (!have_tracepoints(&evsel_list->entries)) + perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); + + if (!rec->opts.branch_stack) + perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); + + if (!rec->file_new) { err = perf_session__read_header(session, output); if (err < 0) goto out_delete_session; } - if (have_tracepoints(&evsel_list->entries)) - perf_header__set_feat(&session->header, HEADER_TRACE_INFO); - - /* 512 kiB: default amount of unprivileged mlocked memory */ - if (mmap_pages == UINT_MAX) - mmap_pages = (512 * 1024) / page_size; - if (forks) { - child_pid = fork(); - if (child_pid < 0) { - perror("failed to fork"); - exit(-1); - } - - if (!child_pid) { - if (pipe_output) - dup2(2, 1); - close(child_ready_pipe[0]); - close(go_pipe[1]); - fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); - - /* - * Do a dummy execvp to get the PLT entry resolved, - * so we avoid the resolver overhead on the real - * execvp call. - */ - execvp("", (char **)argv); - - /* - * Tell the parent we're ready to go - */ - close(child_ready_pipe[1]); - - /* - * Wait until the parent tells us to go. - */ - if (read(go_pipe[0], &buf, 1) == -1) - perror("unable to read pipe"); - - execvp(argv[0], (char **)argv); - - perror(argv[0]); - kill(getppid(), SIGUSR1); - exit(-1); - } - - if (!system_wide && target_tid == -1 && target_pid == -1) - evsel_list->threads->map[0] = child_pid; - - close(child_ready_pipe[1]); - close(go_pipe[0]); - /* - * wait for child to settle - */ - if (read(child_ready_pipe[0], &buf, 1) == -1) { - perror("unable to read pipe"); - exit(-1); + err = perf_evlist__prepare_workload(evsel_list, opts, argv); + if (err < 0) { + pr_err("Couldn't run the workload!\n"); + goto out_delete_session; } - close(child_ready_pipe[0]); } - open_counters(evsel_list); + perf_record__open(rec); /* - * perf_session__delete(session) will be called at atexit_header() + * perf_session__delete(session) will be called at perf_record__exit() */ - atexit(atexit_header); + on_exit(perf_record__exit, rec); - if (pipe_output) { + if (opts->pipe_output) { err = perf_header__write_pipe(output); if (err < 0) return err; - } else if (file_new) { + } else if (rec->file_new) { err = perf_session__write_header(session, evsel_list, output, false); if (err < 0) return err; } - post_processing_offset = lseek(output, 0, SEEK_CUR); + if (!rec->no_buildid + && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { + pr_err("Couldn't generate buildids. " + "Use --no-buildid to profile anyway.\n"); + return -1; + } - if (pipe_output) { - err = perf_session__synthesize_attrs(session, - process_synthesized_event); + rec->post_processing_offset = lseek(output, 0, SEEK_CUR); + + machine = perf_session__find_host_machine(session); + if (!machine) { + pr_err("Couldn't find native kernel information.\n"); + return -1; + } + + if (opts->pipe_output) { + err = perf_event__synthesize_attrs(tool, session, + process_synthesized_event); if (err < 0) { pr_err("Couldn't synthesize attrs.\n"); return err; } - err = perf_event__synthesize_event_types(process_synthesized_event, - session); + err = perf_event__synthesize_event_types(tool, process_synthesized_event, + machine); if (err < 0) { pr_err("Couldn't synthesize event_types.\n"); return err; @@ -616,56 +554,49 @@ static int __cmd_record(int argc, const char **argv) * return this more properly and also * propagate errors that now are calling die() */ - err = perf_event__synthesize_tracing_data(output, evsel_list, - process_synthesized_event, - session); + err = perf_event__synthesize_tracing_data(tool, output, evsel_list, + process_synthesized_event); if (err <= 0) { pr_err("Couldn't record tracing data.\n"); return err; } - advance_output(err); + advance_output(rec, err); } } - machine = perf_session__find_host_machine(session); - if (!machine) { - pr_err("Couldn't find native kernel information.\n"); - return -1; - } - - err = perf_event__synthesize_kernel_mmap(process_synthesized_event, - session, machine, "_text"); + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, + machine, "_text"); if (err < 0) - err = perf_event__synthesize_kernel_mmap(process_synthesized_event, - session, machine, "_stext"); + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, + machine, "_stext"); if (err < 0) pr_err("Couldn't record kernel reference relocation symbol\n" "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" "Check /proc/kallsyms permission or run as root.\n"); - err = perf_event__synthesize_modules(process_synthesized_event, - session, machine); + err = perf_event__synthesize_modules(tool, process_synthesized_event, + machine); if (err < 0) pr_err("Couldn't record kernel module information.\n" "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" "Check /proc/modules permission or run as root.\n"); if (perf_guest) - perf_session__process_machines(session, + perf_session__process_machines(session, tool, perf_event__synthesize_guest_os); - if (!system_wide) - perf_event__synthesize_thread_map(evsel_list->threads, + if (!opts->target.system_wide) + perf_event__synthesize_thread_map(tool, evsel_list->threads, process_synthesized_event, - session); + machine); else - perf_event__synthesize_threads(process_synthesized_event, - session); + perf_event__synthesize_threads(tool, process_synthesized_event, + machine); - if (realtime_prio) { + if (rec->realtime_prio) { struct sched_param param; - param.sched_priority = realtime_prio; + param.sched_priority = rec->realtime_prio; if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { pr_err("Could not set realtime priority.\n"); exit(-1); @@ -678,14 +609,14 @@ static int __cmd_record(int argc, const char **argv) * Let the child rip */ if (forks) - close(go_pipe[1]); + perf_evlist__start_workload(evsel_list); for (;;) { - int hits = samples; + int hits = rec->samples; - mmap_read_all(); + perf_record__mmap_read_all(rec); - if (hits == samples) { + if (hits == rec->samples) { if (done) break; err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1); @@ -706,9 +637,9 @@ static int __cmd_record(int argc, const char **argv) */ fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n", - (double)bytes_written / 1024.0 / 1024.0, + (double)rec->bytes_written / 1024.0 / 1024.0, output_name, - bytes_written / 24); + rec->bytes_written / 24); return 0; @@ -717,66 +648,191 @@ out_delete_session: return err; } +#define BRANCH_OPT(n, m) \ + { .name = n, .mode = (m) } + +#define BRANCH_END { .name = NULL } + +struct branch_mode { + const char *name; + int mode; +}; + +static const struct branch_mode branch_modes[] = { + BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER), + BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL), + BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV), + BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY), + BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), + BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), + BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), + BRANCH_END +}; + +static int +parse_branch_stack(const struct option *opt, const char *str, int unset) +{ +#define ONLY_PLM \ + (PERF_SAMPLE_BRANCH_USER |\ + PERF_SAMPLE_BRANCH_KERNEL |\ + PERF_SAMPLE_BRANCH_HV) + + uint64_t *mode = (uint64_t *)opt->value; + const struct branch_mode *br; + char *s, *os = NULL, *p; + int ret = -1; + + if (unset) + return 0; + + /* + * cannot set it twice, -b + --branch-filter for instance + */ + if (*mode) + return -1; + + /* str may be NULL in case no arg is passed to -b */ + if (str) { + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + for (br = branch_modes; br->name; br++) { + if (!strcasecmp(s, br->name)) + break; + } + if (!br->name) { + ui__warning("unknown branch filter %s," + " check man page\n", s); + goto error; + } + + *mode |= br->mode; + + if (!p) + break; + + s = p + 1; + } + } + ret = 0; + + /* default to any branch */ + if ((*mode & ~ONLY_PLM) == 0) { + *mode = PERF_SAMPLE_BRANCH_ANY; + } +error: + free(os); + return ret; +} + static const char * const record_usage[] = { "perf record [<options>] [<command>]", "perf record [<options>] -- <command> [<options>]", NULL }; -static bool force, append_file; +/* + * XXX Ideally would be local to cmd_record() and passed to a perf_record__new + * because we need to have access to it in perf_record__exit, that is called + * after cmd_record() exits, but since record_options need to be accessible to + * builtin-script, leave it here. + * + * At least we don't ouch it in all the other functions here directly. + * + * Just say no to tons of global variables, sigh. + */ +static struct perf_record record = { + .opts = { + .mmap_pages = UINT_MAX, + .user_freq = UINT_MAX, + .user_interval = ULLONG_MAX, + .freq = 4000, + .target = { + .uses_mmap = true, + }, + }, + .write_mode = WRITE_FORCE, + .file_new = true, +}; +/* + * XXX Will stay a global variable till we fix builtin-script.c to stop messing + * with it and switch to use the library functions in perf_evlist that came + * from builtin-record.c, i.e. use perf_record_opts, + * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', + * using pipes, etc. + */ const struct option record_options[] = { - OPT_CALLBACK('e', "event", &evsel_list, "event", + OPT_CALLBACK('e', "event", &record.evlist, "event", "event selector. use 'perf list' to list available events", parse_events_option), - OPT_CALLBACK(0, "filter", &evsel_list, "filter", + OPT_CALLBACK(0, "filter", &record.evlist, "filter", "event filter", parse_filter), - OPT_INTEGER('p', "pid", &target_pid, + OPT_STRING('p', "pid", &record.opts.target.pid, "pid", "record events on existing process id"), - OPT_INTEGER('t', "tid", &target_tid, + OPT_STRING('t', "tid", &record.opts.target.tid, "tid", "record events on existing thread id"), - OPT_INTEGER('r', "realtime", &realtime_prio, + OPT_INTEGER('r', "realtime", &record.realtime_prio, "collect data with this RT SCHED_FIFO priority"), - OPT_BOOLEAN('D', "no-delay", &nodelay, + OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay, "collect data without buffering"), - OPT_BOOLEAN('R', "raw-samples", &raw_samples, + OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, "collect raw sample records from all opened counters"), - OPT_BOOLEAN('a', "all-cpus", &system_wide, + OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, "system-wide collection from all CPUs"), - OPT_BOOLEAN('A', "append", &append_file, + OPT_BOOLEAN('A', "append", &record.append_file, "append to the output file to do incremental profiling"), - OPT_STRING('C', "cpu", &cpu_list, "cpu", + OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", "list of cpus to monitor"), - OPT_BOOLEAN('f', "force", &force, + OPT_BOOLEAN('f', "force", &record.force, "overwrite existing data file (deprecated)"), - OPT_U64('c', "count", &user_interval, "event period to sample"), - OPT_STRING('o', "output", &output_name, "file", + OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), + OPT_STRING('o', "output", &record.output_name, "file", "output file name"), - OPT_BOOLEAN('i', "no-inherit", &no_inherit, + OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit, "child tasks do not inherit counters"), - OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"), - OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), - OPT_BOOLEAN(0, "group", &group, + OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), + OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages, + "number of mmap data pages"), + OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), - OPT_BOOLEAN('g', "call-graph", &call_graph, + OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph, "do call-graph (stack chain/backtrace) recording"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), - OPT_BOOLEAN('s', "stat", &inherit_stat, + OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, "per thread counts"), - OPT_BOOLEAN('d', "data", &sample_address, + OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Sample addresses"), - OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"), - OPT_BOOLEAN('n', "no-samples", &no_samples, + OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"), + OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"), + OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, "don't sample"), - OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache, + OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache, "do not update the buildid cache"), - OPT_BOOLEAN('B', "no-buildid", &no_buildid, + OPT_BOOLEAN('B', "no-buildid", &record.no_buildid, "do not collect buildids in perf.data"), - OPT_CALLBACK('G', "cgroup", &evsel_list, "name", + OPT_CALLBACK('G', "cgroup", &record.evlist, "name", "monitor event in cgroup name only", parse_cgroups), + OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", + "user to profile"), + + OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, + "branch any", "sample any taken branches", + parse_branch_stack), + + OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, + "branch filter mask", "branch stack filter modes", + parse_branch_stack), OPT_END() }; @@ -784,30 +840,34 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) { int err = -ENOMEM; struct perf_evsel *pos; + struct perf_evlist *evsel_list; + struct perf_record *rec = &record; + char errbuf[BUFSIZ]; evsel_list = perf_evlist__new(NULL, NULL); if (evsel_list == NULL) return -ENOMEM; + rec->evlist = evsel_list; + argc = parse_options(argc, argv, record_options, record_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc && target_pid == -1 && target_tid == -1 && - !system_wide && !cpu_list) + if (!argc && perf_target__none(&rec->opts.target)) usage_with_options(record_usage, record_options); - if (force && append_file) { - fprintf(stderr, "Can't overwrite and append at the same time." - " You need to choose between -f and -A"); + if (rec->force && rec->append_file) { + ui__error("Can't overwrite and append at the same time." + " You need to choose between -f and -A"); usage_with_options(record_usage, record_options); - } else if (append_file) { - write_mode = WRITE_APPEND; + } else if (rec->append_file) { + rec->write_mode = WRITE_APPEND; } else { - write_mode = WRITE_FORCE; + rec->write_mode = WRITE_FORCE; } - if (nr_cgroups && !system_wide) { - fprintf(stderr, "cgroup monitoring only available in" - " system-wide mode\n"); + if (nr_cgroups && !rec->opts.target.system_wide) { + ui__error("cgroup monitoring only available in" + " system-wide mode\n"); usage_with_options(record_usage, record_options); } @@ -823,7 +883,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" "even with a suitable vmlinux or kallsyms file.\n\n"); - if (no_buildid_cache || no_buildid) + if (rec->no_buildid_cache || rec->no_buildid) disable_buildid_cache(); if (evsel_list->nr_entries == 0 && @@ -832,43 +892,51 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) goto out_symbol_exit; } - if (target_pid != -1) - target_tid = target_pid; + err = perf_target__validate(&rec->opts.target); + if (err) { + perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); + ui__warning("%s", errbuf); + } + + err = perf_target__parse_uid(&rec->opts.target); + if (err) { + int saved_errno = errno; + + perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); + ui__error("%s", errbuf); + + err = -saved_errno; + goto out_free_fd; + } - if (perf_evlist__create_maps(evsel_list, target_pid, - target_tid, cpu_list) < 0) + err = -ENOMEM; + if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0) usage_with_options(record_usage, record_options); list_for_each_entry(pos, &evsel_list->entries, node) { - if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, - evsel_list->threads->nr) < 0) - goto out_free_fd; - if (perf_header__push_event(pos->attr.config, event_name(pos))) + if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos))) goto out_free_fd; } - if (perf_evlist__alloc_pollfd(evsel_list) < 0) - goto out_free_fd; - - if (user_interval != ULLONG_MAX) - default_interval = user_interval; - if (user_freq != UINT_MAX) - freq = user_freq; + if (rec->opts.user_interval != ULLONG_MAX) + rec->opts.default_interval = rec->opts.user_interval; + if (rec->opts.user_freq != UINT_MAX) + rec->opts.freq = rec->opts.user_freq; /* * User specified count overrides default frequency. */ - if (default_interval) - freq = 0; - else if (freq) { - default_interval = freq; + if (rec->opts.default_interval) + rec->opts.freq = 0; + else if (rec->opts.freq) { + rec->opts.default_interval = rec->opts.freq; } else { - fprintf(stderr, "frequency and count are zero, aborting\n"); + ui__error("frequency and count are zero, aborting\n"); err = -EINVAL; goto out_free_fd; } - err = __cmd_record(argc, argv); + err = __cmd_record(&record, argc, argv); out_free_fd: perf_evlist__delete_maps(evsel_list); out_symbol_exit: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index d7ff277bdb78..7c88a243b5db 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -25,6 +25,7 @@ #include "util/evsel.h" #include "util/header.h" #include "util/session.h" +#include "util/tool.h" #include "util/parse-options.h" #include "util/parse-events.h" @@ -35,37 +36,112 @@ #include <linux/bitmap.h> -static char const *input_name = "perf.data"; +struct perf_report { + struct perf_tool tool; + struct perf_session *session; + char const *input_name; + bool force, use_tui, use_gtk, use_stdio; + bool hide_unresolved; + bool dont_use_callchains; + bool show_full_info; + bool show_threads; + bool inverted_callchain; + struct perf_read_values show_threads_values; + const char *pretty_printing_style; + symbol_filter_t annotate_init; + const char *cpu_list; + const char *symbol_filter_str; + DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +}; -static bool force, use_tui, use_stdio; -static bool hide_unresolved; -static bool dont_use_callchains; +static int perf_report__add_branch_hist_entry(struct perf_tool *tool, + struct addr_location *al, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct perf_report *rep = container_of(tool, struct perf_report, tool); + struct symbol *parent = NULL; + int err = 0; + unsigned i; + struct hist_entry *he; + struct branch_info *bi, *bx; -static bool show_threads; -static struct perf_read_values show_threads_values; + if ((sort__has_parent || symbol_conf.use_callchain) + && sample->callchain) { + err = machine__resolve_callchain(machine, al->thread, + sample->callchain, &parent); + if (err) + return err; + } -static const char default_pretty_printing_style[] = "normal"; -static const char *pretty_printing_style = default_pretty_printing_style; + bi = machine__resolve_bstack(machine, al->thread, + sample->branch_stack); + if (!bi) + return -ENOMEM; -static char callchain_default_opt[] = "fractal,0.5,callee"; -static bool inverted_callchain; -static symbol_filter_t annotate_init; + for (i = 0; i < sample->branch_stack->nr; i++) { + if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) + continue; + /* + * The report shows the percentage of total branches captured + * and not events sampled. Thus we use a pseudo period of 1. + */ + he = __hists__add_branch_entry(&evsel->hists, al, parent, + &bi[i], 1); + if (he) { + struct annotation *notes; + err = -ENOMEM; + bx = he->branch_info; + if (bx->from.sym && use_browser > 0) { + notes = symbol__annotation(bx->from.sym); + if (!notes->src + && symbol__alloc_hist(bx->from.sym) < 0) + goto out; + + err = symbol__inc_addr_samples(bx->from.sym, + bx->from.map, + evsel->idx, + bx->from.al_addr); + if (err) + goto out; + } -static const char *cpu_list; -static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); + if (bx->to.sym && use_browser > 0) { + notes = symbol__annotation(bx->to.sym); + if (!notes->src + && symbol__alloc_hist(bx->to.sym) < 0) + goto out; + + err = symbol__inc_addr_samples(bx->to.sym, + bx->to.map, + evsel->idx, + bx->to.al_addr); + if (err) + goto out; + } + evsel->hists.stats.total_period += 1; + hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + err = 0; + } else + return -ENOMEM; + } +out: + return err; +} -static int perf_session__add_hist_entry(struct perf_session *session, - struct addr_location *al, - struct perf_sample *sample, - struct perf_evsel *evsel) +static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, + struct addr_location *al, + struct perf_sample *sample, + struct machine *machine) { struct symbol *parent = NULL; int err = 0; struct hist_entry *he; if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { - err = perf_session__resolve_callchain(session, al->thread, - sample->callchain, &parent); + err = machine__resolve_callchain(machine, al->thread, + sample->callchain, &parent); if (err) return err; } @@ -75,7 +151,8 @@ static int perf_session__add_hist_entry(struct perf_session *session, return -ENOMEM; if (symbol_conf.use_callchain) { - err = callchain_append(he->callchain, &session->callchain_cursor, + err = callchain_append(he->callchain, + &callchain_cursor, sample->period); if (err) return err; @@ -85,14 +162,13 @@ static int perf_session__add_hist_entry(struct perf_session *session, * so we don't allocated the extra space needed because the stdio * code will not use it. */ - if (al->sym != NULL && use_browser > 0) { + if (he->ms.sym != NULL && use_browser > 0) { struct annotation *notes = symbol__annotation(he->ms.sym); assert(evsel != NULL); err = -ENOMEM; - if (notes->src == NULL && - symbol__alloc_hist(he->ms.sym, session->evlist->nr_entries) < 0) + if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0) goto out; err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); @@ -105,46 +181,57 @@ out: } -static int process_sample_event(union perf_event *event, +static int process_sample_event(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct perf_session *session) + struct machine *machine) { + struct perf_report *rep = container_of(tool, struct perf_report, tool); struct addr_location al; - if (perf_event__preprocess_sample(event, session, &al, sample, - annotate_init) < 0) { + if (perf_event__preprocess_sample(event, machine, &al, sample, + rep->annotate_init) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; } - if (al.filtered || (hide_unresolved && al.sym == NULL)) + if (al.filtered || (rep->hide_unresolved && al.sym == NULL)) return 0; - if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) + if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) return 0; - if (al.map != NULL) - al.map->dso->hit = 1; + if (sort__branch_mode == 1) { + if (perf_report__add_branch_hist_entry(tool, &al, sample, + evsel, machine)) { + pr_debug("problem adding lbr entry, skipping event\n"); + return -1; + } + } else { + if (al.map != NULL) + al.map->dso->hit = 1; - if (perf_session__add_hist_entry(session, &al, sample, evsel)) { - pr_debug("problem incrementing symbol period, skipping event\n"); - return -1; + if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { + pr_debug("problem incrementing symbol period, skipping event\n"); + return -1; + } } - return 0; } -static int process_read_event(union perf_event *event, +static int process_read_event(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample __used, - struct perf_session *session) + struct perf_evsel *evsel, + struct machine *machine __used) { - struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, - event->read.id); - if (show_threads) { - const char *name = evsel ? event_name(evsel) : "unknown"; - perf_read_values_add_value(&show_threads_values, + struct perf_report *rep = container_of(tool, struct perf_report, tool); + + if (rep->show_threads) { + const char *name = evsel ? perf_evsel__name(evsel) : "unknown"; + perf_read_values_add_value(&rep->show_threads_values, event->read.pid, event->read.tid, event->read.id, name, @@ -152,55 +239,52 @@ static int process_read_event(union perf_event *event, } dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, - evsel ? event_name(evsel) : "FAIL", + evsel ? perf_evsel__name(evsel) : "FAIL", event->read.value); return 0; } -static int perf_session__setup_sample_type(struct perf_session *self) +/* For pipe mode, sample_type is not currently set */ +static int perf_report__setup_sample_type(struct perf_report *rep) { - if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) { + struct perf_session *self = rep->session; + u64 sample_type = perf_evlist__sample_type(self->evlist); + + if (!self->fd_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { - ui__warning("Selected --sort parent, but no " + ui__error("Selected --sort parent, but no " "callchain data. Did you call " "'perf record' without -g?\n"); return -EINVAL; } if (symbol_conf.use_callchain) { - ui__warning("Selected -g but no callchain data. Did " + ui__error("Selected -g but no callchain data. Did " "you call 'perf record' without -g?\n"); return -1; } - } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE && + } else if (!rep->dont_use_callchains && + callchain_param.mode != CHAIN_NONE && !symbol_conf.use_callchain) { symbol_conf.use_callchain = true; if (callchain_register_param(&callchain_param) < 0) { - ui__warning("Can't register callchain " - "params.\n"); + ui__error("Can't register callchain params.\n"); return -EINVAL; } } + if (sort__branch_mode == 1) { + if (!self->fd_pipe && + !(sample_type & PERF_SAMPLE_BRANCH_STACK)) { + ui__error("Selected -b but no branch data. " + "Did you call perf record without -b?\n"); + return -1; + } + } + return 0; } -static struct perf_event_ops event_ops = { - .sample = process_sample_event, - .mmap = perf_event__process_mmap, - .comm = perf_event__process_comm, - .exit = perf_event__process_task, - .fork = perf_event__process_task, - .lost = perf_event__process_lost, - .read = process_read_event, - .attr = perf_event__process_attr, - .event_type = perf_event__process_event_type, - .tracing_data = perf_event__process_tracing_data, - .build_id = perf_event__process_build_id, - .ordered_samples = true, - .ordering_requires_timestamps = true, -}; - extern volatile int session_done; static void sig_handler(int sig __used) @@ -213,29 +297,30 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self, { size_t ret; char unit; - unsigned long nr_events = self->stats.nr_events[PERF_RECORD_SAMPLE]; + unsigned long nr_samples = self->stats.nr_events[PERF_RECORD_SAMPLE]; + u64 nr_events = self->stats.total_period; - nr_events = convert_unit(nr_events, &unit); - ret = fprintf(fp, "# Events: %lu%c", nr_events, unit); + nr_samples = convert_unit(nr_samples, &unit); + ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit); if (evname != NULL) - ret += fprintf(fp, " %s", evname); + ret += fprintf(fp, " of event '%s'", evname); + + ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events); return ret + fprintf(fp, "\n#\n"); } static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, + struct perf_report *rep, const char *help) { struct perf_evsel *pos; list_for_each_entry(pos, &evlist->entries, node) { struct hists *hists = &pos->hists; - const char *evname = NULL; - - if (rb_first(&hists->entries) != rb_last(&hists->entries)) - evname = event_name(pos); + const char *evname = perf_evsel__name(pos); hists__fprintf_nr_sample_events(hists, evname, stdout); - hists__fprintf(hists, NULL, false, stdout); + hists__fprintf(hists, NULL, false, true, 0, 0, stdout); fprintf(stdout, "\n\n"); } @@ -243,22 +328,22 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, parent_pattern == default_parent_pattern) { fprintf(stdout, "#\n# (%s)\n#\n", help); - if (show_threads) { - bool style = !strcmp(pretty_printing_style, "raw"); - perf_read_values_display(stdout, &show_threads_values, + if (rep->show_threads) { + bool style = !strcmp(rep->pretty_printing_style, "raw"); + perf_read_values_display(stdout, &rep->show_threads_values, style); - perf_read_values_destroy(&show_threads_values); + perf_read_values_destroy(&rep->show_threads_values); } } return 0; } -static int __cmd_report(void) +static int __cmd_report(struct perf_report *rep) { int ret = -EINVAL; u64 nr_samples; - struct perf_session *session; + struct perf_session *session = rep->session; struct perf_evsel *pos; struct map *kernel_map; struct kmap *kernel_kmap; @@ -266,24 +351,24 @@ static int __cmd_report(void) signal(SIGINT, sig_handler); - session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); - if (session == NULL) - return -ENOMEM; - - if (cpu_list) { - ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); + if (rep->cpu_list) { + ret = perf_session__cpu_bitmap(session, rep->cpu_list, + rep->cpu_bitmap); if (ret) goto out_delete; } - if (show_threads) - perf_read_values_init(&show_threads_values); + if (use_browser <= 0) + perf_session__fprintf_info(session, stdout, rep->show_full_info); + + if (rep->show_threads) + perf_read_values_init(&rep->show_threads_values); - ret = perf_session__setup_sample_type(session); + ret = perf_report__setup_sample_type(rep); if (ret) goto out_delete; - ret = perf_session__process_events(session, &event_ops); + ret = perf_session__process_events(session, &rep->tool); if (ret) goto out_delete; @@ -293,16 +378,23 @@ static int __cmd_report(void) (kernel_map->dso->hit && (kernel_kmap->ref_reloc_sym == NULL || kernel_kmap->ref_reloc_sym->addr == 0))) { - const struct dso *kdso = kernel_map->dso; + const char *desc = + "As no suitable kallsyms nor vmlinux was found, kernel samples\n" + "can't be resolved."; + + if (kernel_map) { + const struct dso *kdso = kernel_map->dso; + if (!RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION])) { + desc = "If some relocation was applied (e.g. " + "kexec) symbols may be misresolved."; + } + } ui__warning( "Kernel address maps (/proc/{kallsyms,modules}) were restricted.\n\n" "Check /proc/sys/kernel/kptr_restrict before running 'perf record'.\n\n%s\n\n" "Samples in kernel modules can't be resolved as well.\n\n", - RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION]) ? -"As no suitable kallsyms nor vmlinux was found, kernel samples\n" -"can't be resolved." : -"If some relocation was applied (e.g. kexec) symbols may be misresolved."); + desc); } if (dump_trace) { @@ -320,20 +412,29 @@ static int __cmd_report(void) list_for_each_entry(pos, &session->evlist->entries, node) { struct hists *hists = &pos->hists; + if (pos->idx == 0) + hists->symbol_filter_str = rep->symbol_filter_str; + hists__collapse_resort(hists); hists__output_resort(hists); nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE]; } if (nr_samples == 0) { - ui__warning("The %s file has no samples!\n", input_name); + ui__error("The %s file has no samples!\n", session->filename); goto out_delete; } - if (use_browser > 0) - perf_evlist__tui_browse_hists(session->evlist, help); - else - perf_evlist__tty_browse_hists(session->evlist, help); + if (use_browser > 0) { + if (use_browser == 1) { + perf_evlist__tui_browse_hists(session->evlist, help, + NULL, NULL, 0); + } else if (use_browser == 2) { + perf_evlist__gtk_browse_hists(session->evlist, help, + NULL, NULL, 0); + } + } else + perf_evlist__tty_browse_hists(session->evlist, rep, help); out_delete: /* @@ -352,9 +453,9 @@ out_delete: } static int -parse_callchain_opt(const struct option *opt __used, const char *arg, - int unset) +parse_callchain_opt(const struct option *opt, const char *arg, int unset) { + struct perf_report *rep = (struct perf_report *)opt->value; char *tok, *tok2; char *endptr; @@ -362,7 +463,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg, * --no-call-graph */ if (unset) { - dont_use_callchains = true; + rep->dont_use_callchains = true; return 0; } @@ -410,7 +511,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg, goto setup; if (tok2[0] != 'c') { - callchain_param.print_limit = strtod(tok2, &endptr); + callchain_param.print_limit = strtoul(tok2, &endptr, 0); tok2 = strtok(NULL, ","); if (!tok2) goto setup; @@ -431,13 +532,44 @@ setup: return 0; } -static const char * const report_usage[] = { - "perf report [<options>] <command>", - NULL -}; +static int +parse_branch_mode(const struct option *opt __used, const char *str __used, int unset) +{ + sort__branch_mode = !unset; + return 0; +} -static const struct option options[] = { - OPT_STRING('i', "input", &input_name, "file", +int cmd_report(int argc, const char **argv, const char *prefix __used) +{ + struct perf_session *session; + struct stat st; + bool has_br_stack = false; + int ret = -1; + char callchain_default_opt[] = "fractal,0.5,callee"; + const char * const report_usage[] = { + "perf report [<options>]", + NULL + }; + struct perf_report report = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .comm = perf_event__process_comm, + .exit = perf_event__process_task, + .fork = perf_event__process_task, + .lost = perf_event__process_lost, + .read = process_read_event, + .attr = perf_event__process_attr, + .event_type = perf_event__process_event_type, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .ordered_samples = true, + .ordering_requires_timestamps = true, + }, + .pretty_printing_style = "normal", + }; + const struct option options[] = { + OPT_STRING('i', "input", &report.input_name, "file", "input file name"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), @@ -447,65 +579,116 @@ static const struct option options[] = { "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", "kallsyms pathname"), - OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_BOOLEAN('f', "force", &report.force, "don't complain, do it"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), - OPT_BOOLEAN('T', "threads", &show_threads, + OPT_BOOLEAN('T', "threads", &report.show_threads, "Show per-thread event counters"), - OPT_STRING(0, "pretty", &pretty_printing_style, "key", + OPT_STRING(0, "pretty", &report.pretty_printing_style, "key", "pretty printing style key: normal raw"), - OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), - OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), + OPT_BOOLEAN(0, "tui", &report.use_tui, "Use the TUI interface"), + OPT_BOOLEAN(0, "gtk", &report.use_gtk, "Use the GTK2 interface"), + OPT_BOOLEAN(0, "stdio", &report.use_stdio, + "Use the stdio interface"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): pid, comm, dso, symbol, parent"), + "sort by key(s): pid, comm, dso, symbol, parent, dso_to," + " dso_from, symbol_to, symbol_from, mispredict"), OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_STRING('p', "parent", &parent_pattern, "regex", "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, "Only display entries with parent-match"), - OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent, call_order", - "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold and callchain order. " + OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", + "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit and callchain order. " "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt), - OPT_BOOLEAN('G', "inverted", &inverted_callchain, "alias for inverted call graph"), + OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, + "alias for inverted call graph"), OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), - OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", + OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", "only consider symbols in these comms"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), + OPT_STRING(0, "symbol-filter", &report.symbol_filter_str, "filter", + "only show symbols that (partially) match with this filter"), OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str, "width[,width...]", "don't try to adjust column width, use these fixed values"), OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), - OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved, + OPT_BOOLEAN('U', "hide-unresolved", &report.hide_unresolved, "Only display entries resolved to a symbol"), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), - OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), + OPT_STRING('C', "cpu", &report.cpu_list, "cpu", + "list of cpus to profile"), + OPT_BOOLEAN('I', "show-info", &report.show_full_info, + "Display extended information about perf.data file"), + OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, + "Interleave source code with assembly code (default)"), + OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, + "Display raw encoding of assembly instructions (default)"), + OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", + "Specify disassembler style (e.g. -M intel for intel syntax)"), + OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, + "Show a column with the sum of periods"), + OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", + "use branch records for histogram filling", parse_branch_mode), OPT_END() -}; + }; -int cmd_report(int argc, const char **argv, const char *prefix __used) -{ argc = parse_options(argc, argv, options, report_usage, 0); - if (use_stdio) + if (report.use_stdio) use_browser = 0; - else if (use_tui) + else if (report.use_tui) use_browser = 1; + else if (report.use_gtk) + use_browser = 2; - if (inverted_callchain) + if (report.inverted_callchain) callchain_param.order = ORDER_CALLER; - if (strcmp(input_name, "-") != 0) + if (!report.input_name || !strlen(report.input_name)) { + if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) + report.input_name = "-"; + else + report.input_name = "perf.data"; + } + session = perf_session__new(report.input_name, O_RDONLY, + report.force, false, &report.tool); + if (session == NULL) + return -ENOMEM; + + report.session = session; + + has_br_stack = perf_header__has_feat(&session->header, + HEADER_BRANCH_STACK); + + if (sort__branch_mode == -1 && has_br_stack) + sort__branch_mode = 1; + + /* sort__branch_mode could be 0 if --no-branch-stack */ + if (sort__branch_mode == 1) { + /* + * if no sort_order is provided, then specify + * branch-mode specific order + */ + if (sort_order == default_sort_order) + sort_order = "comm,dso_from,symbol_from," + "dso_to,symbol_to"; + + } + + if (strcmp(report.input_name, "-") != 0) setup_browser(true); else use_browser = 0; + /* * Only in the newt browser we are doing integrated annotation, * so don't allocate extra space that won't be used in the stdio @@ -513,7 +696,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) */ if (use_browser > 0) { symbol_conf.priv_size = sizeof(struct annotation); - annotate_init = symbol__annotate_init; + report.annotate_init = symbol__annotate_init; /* * For searching by name on the "Browse map details". * providing it only in verbose mode not to bloat too @@ -532,13 +715,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) } if (symbol__init() < 0) - return -1; + goto error; setup_sorting(report_usage, options); if (parent_pattern != default_parent_pattern) { if (sort_dimension__add("parent") < 0) - return -1; + goto error; /* * Only show the parent fields if we explicitly @@ -550,15 +733,31 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) } else symbol_conf.exclude_other = false; - /* - * Any (unrecognized) arguments left? - */ - if (argc) - usage_with_options(report_usage, options); + if (argc) { + /* + * Special case: if there's an argument left then assume that + * it's a symbol filter: + */ + if (argc > 1) + usage_with_options(report_usage, options); + + report.symbol_filter_str = argv[0]; + } - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); - sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); - return __cmd_report(); + if (sort__branch_mode == 1) { + sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout); + sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout); + sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); + sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); + } else { + sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); + sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); + } + + ret = __cmd_report(&report); +error: + perf_session__delete(session); + return ret; } diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5177964943e7..7a9ad2b1ee76 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2,11 +2,14 @@ #include "perf.h" #include "util/util.h" +#include "util/evlist.h" #include "util/cache.h" +#include "util/evsel.h" #include "util/symbol.h" #include "util/thread.h" #include "util/header.h" #include "util/session.h" +#include "util/tool.h" #include "util/parse-options.h" #include "util/trace-event.h" @@ -14,12 +17,13 @@ #include "util/debug.h" #include <sys/prctl.h> +#include <sys/resource.h> #include <semaphore.h> #include <pthread.h> #include <math.h> -static char const *input_name = "perf.data"; +static const char *input_name; static char default_sort_order[] = "avg, max, switch, runtime"; static const char *sort_order = default_sort_order; @@ -39,6 +43,11 @@ static u64 sleep_measurement_overhead; static unsigned long nr_tasks; +struct perf_sched { + struct perf_tool tool; + struct perf_session *session; +}; + struct sched_atom; struct task_desc { @@ -723,35 +732,35 @@ struct trace_migrate_task_event { struct trace_sched_handler { void (*switch_event)(struct trace_switch_event *, - struct perf_session *, - struct event *, + struct machine *, + struct event_format *, int cpu, u64 timestamp, struct thread *thread); void (*runtime_event)(struct trace_runtime_event *, - struct perf_session *, - struct event *, + struct machine *, + struct event_format *, int cpu, u64 timestamp, struct thread *thread); void (*wakeup_event)(struct trace_wakeup_event *, - struct perf_session *, - struct event *, + struct machine *, + struct event_format *, int cpu, u64 timestamp, struct thread *thread); void (*fork_event)(struct trace_fork_event *, - struct event *, + struct event_format *, int cpu, u64 timestamp, struct thread *thread); void (*migrate_task_event)(struct trace_migrate_task_event *, - struct perf_session *session, - struct event *, + struct machine *machine, + struct event_format *, int cpu, u64 timestamp, struct thread *thread); @@ -760,8 +769,8 @@ struct trace_sched_handler { static void replay_wakeup_event(struct trace_wakeup_event *wakeup_event, - struct perf_session *session __used, - struct event *event, + struct machine *machine __used, + struct event_format *event, int cpu __used, u64 timestamp __used, struct thread *thread __used) @@ -787,8 +796,8 @@ static u64 cpu_last_switched[MAX_CPUS]; static void replay_switch_event(struct trace_switch_event *switch_event, - struct perf_session *session __used, - struct event *event, + struct machine *machine __used, + struct event_format *event, int cpu, u64 timestamp, struct thread *thread __used) @@ -831,7 +840,7 @@ replay_switch_event(struct trace_switch_event *switch_event, static void replay_fork_event(struct trace_fork_event *fork_event, - struct event *event, + struct event_format *event, int cpu __used, u64 timestamp __used, struct thread *thread __used) @@ -940,7 +949,7 @@ static void thread_atoms_insert(struct thread *thread) static void latency_fork_event(struct trace_fork_event *fork_event __used, - struct event *event __used, + struct event_format *event __used, int cpu __used, u64 timestamp __used, struct thread *thread __used) @@ -1021,8 +1030,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) static void latency_switch_event(struct trace_switch_event *switch_event, - struct perf_session *session, - struct event *event __used, + struct machine *machine, + struct event_format *event __used, int cpu, u64 timestamp, struct thread *thread __used) @@ -1045,8 +1054,8 @@ latency_switch_event(struct trace_switch_event *switch_event, die("hm, delta: %" PRIu64 " < 0 ?\n", delta); - sched_out = perf_session__findnew(session, switch_event->prev_pid); - sched_in = perf_session__findnew(session, switch_event->next_pid); + sched_out = machine__findnew_thread(machine, switch_event->prev_pid); + sched_in = machine__findnew_thread(machine, switch_event->next_pid); out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); if (!out_events) { @@ -1074,13 +1083,13 @@ latency_switch_event(struct trace_switch_event *switch_event, static void latency_runtime_event(struct trace_runtime_event *runtime_event, - struct perf_session *session, - struct event *event __used, + struct machine *machine, + struct event_format *event __used, int cpu, u64 timestamp, struct thread *this_thread __used) { - struct thread *thread = perf_session__findnew(session, runtime_event->pid); + struct thread *thread = machine__findnew_thread(machine, runtime_event->pid); struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); BUG_ON(cpu >= MAX_CPUS || cpu < 0); @@ -1097,8 +1106,8 @@ latency_runtime_event(struct trace_runtime_event *runtime_event, static void latency_wakeup_event(struct trace_wakeup_event *wakeup_event, - struct perf_session *session, - struct event *__event __used, + struct machine *machine, + struct event_format *__event __used, int cpu __used, u64 timestamp, struct thread *thread __used) @@ -1111,7 +1120,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, if (!wakeup_event->success) return; - wakee = perf_session__findnew(session, wakeup_event->pid); + wakee = machine__findnew_thread(machine, wakeup_event->pid); atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); if (!atoms) { thread_atoms_insert(wakee); @@ -1145,8 +1154,8 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, static void latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, - struct perf_session *session, - struct event *__event __used, + struct machine *machine, + struct event_format *__event __used, int cpu __used, u64 timestamp, struct thread *thread __used) @@ -1161,7 +1170,7 @@ latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, if (profile_cpu == -1) return; - migrant = perf_session__findnew(session, migrate_task_event->pid); + migrant = machine__findnew_thread(machine, migrate_task_event->pid); atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); if (!atoms) { thread_atoms_insert(migrant); @@ -1356,12 +1365,13 @@ static void sort_lat(void) static struct trace_sched_handler *trace_handler; static void -process_sched_wakeup_event(void *data, struct perf_session *session, - struct event *event, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +process_sched_wakeup_event(struct perf_tool *tool __used, + struct event_format *event, + struct perf_sample *sample, + struct machine *machine, + struct thread *thread) { + void *data = sample->raw_data; struct trace_wakeup_event wakeup_event; FILL_COMMON_FIELDS(wakeup_event, event, data); @@ -1373,8 +1383,8 @@ process_sched_wakeup_event(void *data, struct perf_session *session, FILL_FIELD(wakeup_event, cpu, event, data); if (trace_handler->wakeup_event) - trace_handler->wakeup_event(&wakeup_event, session, event, - cpu, timestamp, thread); + trace_handler->wakeup_event(&wakeup_event, machine, event, + sample->cpu, sample->time, thread); } /* @@ -1392,8 +1402,8 @@ static char next_shortname2 = '0'; static void map_switch_event(struct trace_switch_event *switch_event, - struct perf_session *session, - struct event *event __used, + struct machine *machine, + struct event_format *event __used, int this_cpu, u64 timestamp, struct thread *thread __used) @@ -1420,8 +1430,8 @@ map_switch_event(struct trace_switch_event *switch_event, die("hm, delta: %" PRIu64 " < 0 ?\n", delta); - sched_out = perf_session__findnew(session, switch_event->prev_pid); - sched_in = perf_session__findnew(session, switch_event->next_pid); + sched_out = machine__findnew_thread(machine, switch_event->prev_pid); + sched_in = machine__findnew_thread(machine, switch_event->next_pid); curr_thread[this_cpu] = sched_in; @@ -1469,14 +1479,15 @@ map_switch_event(struct trace_switch_event *switch_event, } } - static void -process_sched_switch_event(void *data, struct perf_session *session, - struct event *event, - int this_cpu, - u64 timestamp __used, - struct thread *thread __used) +process_sched_switch_event(struct perf_tool *tool __used, + struct event_format *event, + struct perf_sample *sample, + struct machine *machine, + struct thread *thread) { + int this_cpu = sample->cpu; + void *data = sample->raw_data; struct trace_switch_event switch_event; FILL_COMMON_FIELDS(switch_event, event, data); @@ -1498,19 +1509,20 @@ process_sched_switch_event(void *data, struct perf_session *session, nr_context_switch_bugs++; } if (trace_handler->switch_event) - trace_handler->switch_event(&switch_event, session, event, - this_cpu, timestamp, thread); + trace_handler->switch_event(&switch_event, machine, event, + this_cpu, sample->time, thread); curr_pid[this_cpu] = switch_event.next_pid; } static void -process_sched_runtime_event(void *data, struct perf_session *session, - struct event *event, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +process_sched_runtime_event(struct perf_tool *tool __used, + struct event_format *event, + struct perf_sample *sample, + struct machine *machine, + struct thread *thread) { + void *data = sample->raw_data; struct trace_runtime_event runtime_event; FILL_ARRAY(runtime_event, comm, event, data); @@ -1519,16 +1531,18 @@ process_sched_runtime_event(void *data, struct perf_session *session, FILL_FIELD(runtime_event, vruntime, event, data); if (trace_handler->runtime_event) - trace_handler->runtime_event(&runtime_event, session, event, cpu, timestamp, thread); + trace_handler->runtime_event(&runtime_event, machine, event, + sample->cpu, sample->time, thread); } static void -process_sched_fork_event(void *data, - struct event *event, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +process_sched_fork_event(struct perf_tool *tool __used, + struct event_format *event, + struct perf_sample *sample, + struct machine *machine __used, + struct thread *thread) { + void *data = sample->raw_data; struct trace_fork_event fork_event; FILL_COMMON_FIELDS(fork_event, event, data); @@ -1540,13 +1554,14 @@ process_sched_fork_event(void *data, if (trace_handler->fork_event) trace_handler->fork_event(&fork_event, event, - cpu, timestamp, thread); + sample->cpu, sample->time, thread); } static void -process_sched_exit_event(struct event *event, - int cpu __used, - u64 timestamp __used, +process_sched_exit_event(struct perf_tool *tool __used, + struct event_format *event, + struct perf_sample *sample __used, + struct machine *machine __used, struct thread *thread __used) { if (verbose) @@ -1554,12 +1569,13 @@ process_sched_exit_event(struct event *event, } static void -process_sched_migrate_task_event(void *data, struct perf_session *session, - struct event *event, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +process_sched_migrate_task_event(struct perf_tool *tool __used, + struct event_format *event, + struct perf_sample *sample, + struct machine *machine, + struct thread *thread) { + void *data = sample->raw_data; struct trace_migrate_task_event migrate_task_event; FILL_COMMON_FIELDS(migrate_task_event, event, data); @@ -1570,83 +1586,84 @@ process_sched_migrate_task_event(void *data, struct perf_session *session, FILL_FIELD(migrate_task_event, cpu, event, data); if (trace_handler->migrate_task_event) - trace_handler->migrate_task_event(&migrate_task_event, session, - event, cpu, timestamp, thread); + trace_handler->migrate_task_event(&migrate_task_event, machine, + event, sample->cpu, + sample->time, thread); } -static void process_raw_event(union perf_event *raw_event __used, - struct perf_session *session, void *data, int cpu, - u64 timestamp, struct thread *thread) -{ - struct event *event; - int type; - - - type = trace_parse_common_type(data); - event = trace_find_event(type); - - if (!strcmp(event->name, "sched_switch")) - process_sched_switch_event(data, session, event, cpu, timestamp, thread); - if (!strcmp(event->name, "sched_stat_runtime")) - process_sched_runtime_event(data, session, event, cpu, timestamp, thread); - if (!strcmp(event->name, "sched_wakeup")) - process_sched_wakeup_event(data, session, event, cpu, timestamp, thread); - if (!strcmp(event->name, "sched_wakeup_new")) - process_sched_wakeup_event(data, session, event, cpu, timestamp, thread); - if (!strcmp(event->name, "sched_process_fork")) - process_sched_fork_event(data, event, cpu, timestamp, thread); - if (!strcmp(event->name, "sched_process_exit")) - process_sched_exit_event(event, cpu, timestamp, thread); - if (!strcmp(event->name, "sched_migrate_task")) - process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); -} +typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event_format *event, + struct perf_sample *sample, + struct machine *machine, + struct thread *thread); -static int process_sample_event(union perf_event *event, - struct perf_sample *sample, - struct perf_evsel *evsel __used, - struct perf_session *session) +static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, + union perf_event *event __used, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) { - struct thread *thread; + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); + struct pevent *pevent = sched->session->pevent; + struct thread *thread = machine__findnew_thread(machine, sample->pid); - if (!(session->sample_type & PERF_SAMPLE_RAW)) - return 0; - - thread = perf_session__findnew(session, sample->pid); if (thread == NULL) { - pr_debug("problem processing %d event, skipping it.\n", - event->header.type); + pr_debug("problem processing %s event, skipping it.\n", + perf_evsel__name(evsel)); return -1; } - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); + evsel->hists.stats.total_period += sample->period; + hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + + if (evsel->handler.func != NULL) { + tracepoint_handler f = evsel->handler.func; - if (profile_cpu != -1 && profile_cpu != (int)sample->cpu) - return 0; + if (evsel->handler.data == NULL) + evsel->handler.data = pevent_find_event(pevent, + evsel->attr.config); - process_raw_event(event, session, sample->raw_data, sample->cpu, - sample->time, thread); + f(tool, evsel->handler.data, sample, machine, thread); + } return 0; } -static struct perf_event_ops event_ops = { - .sample = process_sample_event, - .comm = perf_event__process_comm, - .lost = perf_event__process_lost, - .fork = perf_event__process_task, - .ordered_samples = true, +static struct perf_sched perf_sched = { + .tool = { + .sample = perf_sched__process_tracepoint_sample, + .comm = perf_event__process_comm, + .lost = perf_event__process_lost, + .fork = perf_event__process_task, + .ordered_samples = true, + }, }; static void read_events(bool destroy, struct perf_session **psession) { int err = -EINVAL; - struct perf_session *session = perf_session__new(input_name, O_RDONLY, - 0, false, &event_ops); + const struct perf_evsel_str_handler handlers[] = { + { "sched:sched_switch", process_sched_switch_event, }, + { "sched:sched_stat_runtime", process_sched_runtime_event, }, + { "sched:sched_wakeup", process_sched_wakeup_event, }, + { "sched:sched_wakeup_new", process_sched_wakeup_event, }, + { "sched:sched_process_fork", process_sched_fork_event, }, + { "sched:sched_process_exit", process_sched_exit_event, }, + { "sched:sched_migrate_task", process_sched_migrate_task_event, }, + }; + struct perf_session *session; + + session = perf_session__new(input_name, O_RDONLY, 0, false, + &perf_sched.tool); if (session == NULL) die("No Memory"); + perf_sched.session = session; + + err = perf_session__set_tracepoints_handlers(session, handlers); + assert(err == 0); + if (perf_session__has_traces(session, "record -R")) { - err = perf_session__process_events(session, &event_ops); + err = perf_session__process_events(session, &perf_sched.tool); if (err) die("Failed to process events, error %d", err); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 09024ec2ab2e..1e60ab70b2b1 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -7,6 +7,7 @@ #include "util/header.h" #include "util/parse-options.h" #include "util/session.h" +#include "util/tool.h" #include "util/symbol.h" #include "util/thread.h" #include "util/trace-event.h" @@ -22,9 +23,16 @@ static u64 last_timestamp; static u64 nr_unordered; extern const struct option record_options[]; static bool no_callchain; +static bool show_full_info; +static bool system_wide; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +struct perf_script { + struct perf_tool tool; + struct perf_session *session; +}; + enum perf_output_field { PERF_OUTPUT_COMM = 1U << 0, PERF_OUTPUT_TID = 1U << 1, @@ -37,6 +45,7 @@ enum perf_output_field { PERF_OUTPUT_SYM = 1U << 8, PERF_OUTPUT_DSO = 1U << 9, PERF_OUTPUT_ADDR = 1U << 10, + PERF_OUTPUT_SYMOFFSET = 1U << 11, }; struct output_option { @@ -54,6 +63,7 @@ struct output_option { {.str = "sym", .field = PERF_OUTPUT_SYM}, {.str = "dso", .field = PERF_OUTPUT_DSO}, {.str = "addr", .field = PERF_OUTPUT_ADDR}, + {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET}, }; /* default set to maintain compatibility with current format */ @@ -132,10 +142,11 @@ static const char *output_field2str(enum perf_output_field field) #define PRINT_FIELD(x) (output[attr->type].fields & PERF_OUTPUT_##x) -static int perf_event_attr__check_stype(struct perf_event_attr *attr, - u64 sample_type, const char *sample_msg, - enum perf_output_field field) +static int perf_evsel__check_stype(struct perf_evsel *evsel, + u64 sample_type, const char *sample_msg, + enum perf_output_field field) { + struct perf_event_attr *attr = &evsel->attr; int type = attr->type; const char *evname; @@ -143,7 +154,7 @@ static int perf_event_attr__check_stype(struct perf_event_attr *attr, return 0; if (output[type].user_set) { - evname = __event_name(attr->type, attr->config); + evname = perf_evsel__name(evsel); pr_err("Samples for '%s' event do not have %s attribute set. " "Cannot print '%s' field.\n", evname, sample_msg, output_field2str(field)); @@ -152,7 +163,7 @@ static int perf_event_attr__check_stype(struct perf_event_attr *attr, /* user did not ask for it explicitly so remove from the default list */ output[type].fields &= ~field; - evname = __event_name(attr->type, attr->config); + evname = perf_evsel__name(evsel); pr_debug("Samples for '%s' event do not have %s attribute set. " "Skipping '%s' field.\n", evname, sample_msg, output_field2str(field)); @@ -170,8 +181,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, return -EINVAL; if (PRINT_FIELD(IP)) { - if (perf_event_attr__check_stype(attr, PERF_SAMPLE_IP, "IP", - PERF_OUTPUT_IP)) + if (perf_evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP", + PERF_OUTPUT_IP)) return -EINVAL; if (!no_callchain && @@ -180,8 +191,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, } if (PRINT_FIELD(ADDR) && - perf_event_attr__check_stype(attr, PERF_SAMPLE_ADDR, "ADDR", - PERF_OUTPUT_ADDR)) + perf_evsel__check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR", + PERF_OUTPUT_ADDR)) return -EINVAL; if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { @@ -190,6 +201,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, "to symbols.\n"); return -EINVAL; } + if (PRINT_FIELD(SYMOFFSET) && !PRINT_FIELD(SYM)) { + pr_err("Display of offsets requested but symbol is not" + "selected.\n"); + return -EINVAL; + } if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { pr_err("Display of DSO requested but neither sample IP nor " "sample address\nis selected. Hence, no addresses to convert " @@ -198,18 +214,18 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, } if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && - perf_event_attr__check_stype(attr, PERF_SAMPLE_TID, "TID", - PERF_OUTPUT_TID|PERF_OUTPUT_PID)) + perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", + PERF_OUTPUT_TID|PERF_OUTPUT_PID)) return -EINVAL; if (PRINT_FIELD(TIME) && - perf_event_attr__check_stype(attr, PERF_SAMPLE_TIME, "TIME", - PERF_OUTPUT_TIME)) + perf_evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME", + PERF_OUTPUT_TIME)) return -EINVAL; if (PRINT_FIELD(CPU) && - perf_event_attr__check_stype(attr, PERF_SAMPLE_CPU, "CPU", - PERF_OUTPUT_CPU)) + perf_evsel__check_stype(evsel, PERF_SAMPLE_CPU, "CPU", + PERF_OUTPUT_CPU)) return -EINVAL; return 0; @@ -246,12 +262,14 @@ static int perf_session__check_output_opt(struct perf_session *session) return 0; } -static void print_sample_start(struct perf_sample *sample, +static void print_sample_start(struct pevent *pevent, + struct perf_sample *sample, struct thread *thread, - struct perf_event_attr *attr) + struct perf_evsel *evsel) { int type; - struct event *event; + struct perf_event_attr *attr = &evsel->attr; + struct event_format *event; const char *evname = NULL; unsigned long secs; unsigned long usecs; @@ -290,17 +308,30 @@ static void print_sample_start(struct perf_sample *sample, if (PRINT_FIELD(EVNAME)) { if (attr->type == PERF_TYPE_TRACEPOINT) { - type = trace_parse_common_type(sample->raw_data); - event = trace_find_event(type); + /* + * XXX Do we really need this here? + * perf_evlist__set_tracepoint_names should have done + * this already + */ + type = trace_parse_common_type(pevent, + sample->raw_data); + event = pevent_find_event(pevent, type); if (event) evname = event->name; } else - evname = __event_name(attr->type, attr->config); + evname = perf_evsel__name(evsel); - printf("%s: ", evname ? evname : "(unknown)"); + printf("%s: ", evname ? evname : "[unknown]"); } } +static bool is_bts_event(struct perf_event_attr *attr) +{ + return ((attr->type == PERF_TYPE_HARDWARE) && + (attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && + (attr->sample_period == 1)); +} + static bool sample_addr_correlates_sym(struct perf_event_attr *attr) { if ((attr->type == PERF_TYPE_SOFTWARE) && @@ -309,29 +340,31 @@ static bool sample_addr_correlates_sym(struct perf_event_attr *attr) (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))) return true; + if (is_bts_event(attr)) + return true; + return false; } static void print_sample_addr(union perf_event *event, struct perf_sample *sample, - struct perf_session *session, + struct machine *machine, struct thread *thread, struct perf_event_attr *attr) { struct addr_location al; u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - const char *symname, *dsoname; printf("%16" PRIx64, sample->addr); if (!sample_addr_correlates_sym(attr)) return; - thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, - event->ip.pid, sample->addr, &al); + thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, + sample->addr, &al); if (!al.map) - thread__find_addr_map(thread, session, cpumode, MAP__VARIABLE, - event->ip.pid, sample->addr, &al); + thread__find_addr_map(thread, machine, cpumode, MAP__VARIABLE, + sample->addr, &al); al.cpu = sample->cpu; al.sym = NULL; @@ -340,28 +373,53 @@ static void print_sample_addr(union perf_event *event, al.sym = map__find_symbol(al.map, al.addr, NULL); if (PRINT_FIELD(SYM)) { - if (al.sym && al.sym->name) - symname = al.sym->name; + printf(" "); + if (PRINT_FIELD(SYMOFFSET)) + symbol__fprintf_symname_offs(al.sym, &al, stdout); else - symname = ""; - - printf(" %16s", symname); + symbol__fprintf_symname(al.sym, stdout); } if (PRINT_FIELD(DSO)) { - if (al.map && al.map->dso && al.map->dso->name) - dsoname = al.map->dso->name; - else - dsoname = ""; + printf(" ("); + map__fprintf_dsoname(al.map, stdout); + printf(")"); + } +} - printf(" (%s)", dsoname); +static void print_sample_bts(union perf_event *event, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine, + struct thread *thread) +{ + struct perf_event_attr *attr = &evsel->attr; + + /* print branch_from information */ + if (PRINT_FIELD(IP)) { + if (!symbol_conf.use_callchain) + printf(" "); + else + printf("\n"); + perf_event__print_ip(event, sample, machine, + PRINT_FIELD(SYM), PRINT_FIELD(DSO), + PRINT_FIELD(SYMOFFSET)); } + + printf(" => "); + + /* print branch_to information */ + if (PRINT_FIELD(ADDR)) + print_sample_addr(event, sample, machine, thread, attr); + + printf("\n"); } static void process_event(union perf_event *event __unused, + struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, - struct perf_session *session, + struct machine *machine, struct thread *thread) { struct perf_event_attr *attr = &evsel->attr; @@ -369,22 +427,28 @@ static void process_event(union perf_event *event __unused, if (output[attr->type].fields == 0) return; - print_sample_start(sample, thread, attr); + print_sample_start(pevent, sample, thread, evsel); + + if (is_bts_event(attr)) { + print_sample_bts(event, sample, evsel, machine, thread); + return; + } if (PRINT_FIELD(TRACE)) - print_trace_event(sample->cpu, sample->raw_data, + print_trace_event(pevent, sample->cpu, sample->raw_data, sample->raw_size); if (PRINT_FIELD(ADDR)) - print_sample_addr(event, sample, session, thread, attr); + print_sample_addr(event, sample, machine, thread, attr); if (PRINT_FIELD(IP)) { if (!symbol_conf.use_callchain) printf(" "); else printf("\n"); - perf_session__print_ip(event, sample, session, - PRINT_FIELD(SYM), PRINT_FIELD(DSO)); + perf_event__print_ip(event, sample, machine, + PRINT_FIELD(SYM), PRINT_FIELD(DSO), + PRINT_FIELD(SYMOFFSET)); } printf("\n"); @@ -402,7 +466,8 @@ static int default_stop_script(void) return 0; } -static int default_generate_script(const char *outfile __unused) +static int default_generate_script(struct pevent *pevent __unused, + const char *outfile __unused) { return 0; } @@ -431,14 +496,17 @@ static int cleanup_scripting(void) return scripting_ops->stop_script(); } -static char const *input_name = "perf.data"; +static const char *input_name; -static int process_sample_event(union perf_event *event, +static int process_sample_event(struct perf_tool *tool __used, + union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct perf_session *session) + struct machine *machine) { - struct thread *thread = perf_session__findnew(session, event->ip.pid); + struct addr_location al; + struct perf_script *scr = container_of(tool, struct perf_script, tool); + struct thread *thread = machine__findnew_thread(machine, event->ip.tid); if (thread == NULL) { pr_debug("problem processing %d event, skipping it.\n", @@ -457,27 +525,39 @@ static int process_sample_event(union perf_event *event, return 0; } + if (perf_event__preprocess_sample(event, machine, &al, sample, 0) < 0) { + pr_err("problem processing %d event, skipping it.\n", + event->header.type); + return -1; + } + + if (al.filtered) + return 0; + if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) return 0; - scripting_ops->process_event(event, sample, evsel, session, thread); + scripting_ops->process_event(event, scr->session->pevent, + sample, evsel, machine, thread); - session->hists.stats.total_period += sample->period; + evsel->hists.stats.total_period += sample->period; return 0; } -static struct perf_event_ops event_ops = { - .sample = process_sample_event, - .mmap = perf_event__process_mmap, - .comm = perf_event__process_comm, - .exit = perf_event__process_task, - .fork = perf_event__process_task, - .attr = perf_event__process_attr, - .event_type = perf_event__process_event_type, - .tracing_data = perf_event__process_tracing_data, - .build_id = perf_event__process_build_id, - .ordered_samples = true, - .ordering_requires_timestamps = true, +static struct perf_script perf_script = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .comm = perf_event__process_comm, + .exit = perf_event__process_task, + .fork = perf_event__process_task, + .attr = perf_event__process_attr, + .event_type = perf_event__process_event_type, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .ordered_samples = true, + .ordering_requires_timestamps = true, + }, }; extern volatile int session_done; @@ -493,7 +573,7 @@ static int __cmd_script(struct perf_session *session) signal(SIGINT, sig_handler); - ret = perf_session__process_events(session, &event_ops); + ret = perf_session__process_events(session, &perf_script.tool); if (debug_mode) pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); @@ -522,12 +602,6 @@ static struct script_spec *script_spec__new(const char *spec, return s; } -static void script_spec__delete(struct script_spec *s) -{ - free(s->spec); - free(s); -} - static void script_spec__add(struct script_spec *s) { list_add_tail(&s->node, &script_specs); @@ -553,16 +627,11 @@ static struct script_spec *script_spec__findnew(const char *spec, s = script_spec__new(spec, ops); if (!s) - goto out_delete_spec; + return NULL; script_spec__add(s); return s; - -out_delete_spec: - script_spec__delete(s); - - return NULL; } int script_spec_register(const char *spec, struct scripting_ops *ops) @@ -680,7 +749,8 @@ static int parse_output_fields(const struct option *opt __used, type = PERF_TYPE_RAW; else { fprintf(stderr, "Invalid event type in field string.\n"); - return -EINVAL; + rc = -EINVAL; + goto out; } if (output[type].user_set) @@ -922,6 +992,24 @@ static int read_script_info(struct script_desc *desc, const char *filename) return 0; } +static char *get_script_root(struct dirent *script_dirent, const char *suffix) +{ + char *script_root, *str; + + script_root = strdup(script_dirent->d_name); + if (!script_root) + return NULL; + + str = (char *)ends_with(script_root, suffix); + if (!str) { + free(script_root); + return NULL; + } + + *str = '\0'; + return script_root; +} + static int list_available_scripts(const struct option *opt __used, const char *s __used, int unset __used) { @@ -933,7 +1021,6 @@ static int list_available_scripts(const struct option *opt __used, struct script_desc *desc; char first_half[BUFSIZ]; char *script_root; - char *str; snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path()); @@ -949,16 +1036,14 @@ static int list_available_scripts(const struct option *opt __used, continue; for_each_script(lang_path, lang_dir, script_dirent, script_next) { - script_root = strdup(script_dirent.d_name); - str = (char *)ends_with(script_root, REPORT_SUFFIX); - if (str) { - *str = '\0'; + script_root = get_script_root(&script_dirent, REPORT_SUFFIX); + if (script_root) { desc = script_desc__findnew(script_root); snprintf(script_path, MAXPATHLEN, "%s/%s", lang_path, script_dirent.d_name); read_script_info(desc, script_path); + free(script_root); } - free(script_root); } } @@ -980,8 +1065,7 @@ static char *get_script_path(const char *script_root, const char *suffix) char script_path[MAXPATHLEN]; DIR *scripts_dir, *lang_dir; char lang_path[MAXPATHLEN]; - char *str, *__script_root; - char *path = NULL; + char *__script_root; snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path()); @@ -997,23 +1081,22 @@ static char *get_script_path(const char *script_root, const char *suffix) continue; for_each_script(lang_path, lang_dir, script_dirent, script_next) { - __script_root = strdup(script_dirent.d_name); - str = (char *)ends_with(__script_root, suffix); - if (str) { - *str = '\0'; - if (strcmp(__script_root, script_root)) - continue; + __script_root = get_script_root(&script_dirent, suffix); + if (__script_root && !strcmp(script_root, __script_root)) { + free(__script_root); + closedir(lang_dir); + closedir(scripts_dir); snprintf(script_path, MAXPATHLEN, "%s/%s", lang_path, script_dirent.d_name); - path = strdup(script_path); - free(__script_root); - break; + return strdup(script_path); } free(__script_root); } + closedir(lang_dir); } + closedir(scripts_dir); - return path; + return NULL; } static bool is_top_script(const char *script_path) @@ -1080,9 +1163,20 @@ static const struct option options[] = { OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), OPT_CALLBACK('f', "fields", NULL, "str", - "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr", + "comma separated output fields prepend with 'type:'. " + "Valid types: hw,sw,trace,raw. " + "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," + "addr,symoff", parse_output_fields), - OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), + OPT_BOOLEAN('a', "all-cpus", &system_wide, + "system-wide collection from all CPUs"), + OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), + OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", + "only display events for these comms"), + OPT_BOOLEAN('I', "show-info", &show_full_info, + "display extended information from perf.data file"), + OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, + "Show the path of [kernel.kallsyms]"), OPT_END() }; @@ -1108,7 +1202,6 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) struct perf_session *session; char *script_path = NULL; const char **__argv; - bool system_wide; int i, j, err; setup_scripting(); @@ -1176,15 +1269,17 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) } if (!pid) { - system_wide = true; j = 0; dup2(live_pipe[1], 1); close(live_pipe[0]); - if (!is_top_script(argv[0])) + if (is_top_script(argv[0])) { + system_wide = true; + } else if (!system_wide) { system_wide = !have_cmd(argc - rep_args, &argv[rep_args]); + } __argv = malloc((argc + 6) * sizeof(const char *)); if (!__argv) @@ -1232,10 +1327,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) script_path = rep_script_path; if (script_path) { - system_wide = false; j = 0; - if (rec_script_path) + if (!rec_script_path) + system_wide = false; + else if (!system_wide) system_wide = !have_cmd(argc - 1, &argv[1]); __argv = malloc((argc + 2) * sizeof(const char *)); @@ -1259,15 +1355,20 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) if (!script_name) setup_pager(); - session = perf_session__new(input_name, O_RDONLY, 0, false, &event_ops); + session = perf_session__new(input_name, O_RDONLY, 0, false, + &perf_script.tool); if (session == NULL) return -ENOMEM; + perf_script.session = session; + if (cpu_list) { if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap)) return -1; } + perf_session__fprintf_info(session, stdout, show_full_info); + if (!no_callchain) symbol_conf.use_callchain = true; else @@ -1283,7 +1384,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) return -1; } - input = open(input_name, O_RDONLY); + input = open(session->filename, O_RDONLY); /* input_name */ if (input < 0) { perror("failed to open file"); exit(-1); @@ -1306,7 +1407,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) return -1; } - err = scripting_ops->generate_script("perf-script"); + err = scripting_ops->generate_script(session->pevent, + "perf-script"); goto out; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5deb17d9e795..861f0aec77ae 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -173,27 +173,29 @@ static struct perf_event_attr very_very_detailed_attrs[] = { -struct perf_evlist *evsel_list; +static struct perf_evlist *evsel_list; -static bool system_wide = false; -static int run_idx = 0; +static struct perf_target target = { + .uid = UINT_MAX, +}; +static int run_idx = 0; static int run_count = 1; static bool no_inherit = false; static bool scale = true; static bool no_aggr = false; -static pid_t target_pid = -1; -static pid_t target_tid = -1; static pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; static bool sync_run = false; static bool big_num = true; static int big_num_opt = -1; -static const char *cpu_list; static const char *csv_sep = NULL; static bool csv_output = false; static bool group = false; +static const char *output_name = NULL; +static FILE *output = NULL; +static int output_fd; static volatile int done = 0; @@ -251,28 +253,40 @@ static double avg_stats(struct stats *stats) */ static double stddev_stats(struct stats *stats) { - double variance = stats->M2 / (stats->n - 1); - double variance_mean = variance / stats->n; + double variance, variance_mean; + + if (!stats->n) + return 0.0; + + variance = stats->M2 / (stats->n - 1); + variance_mean = variance / stats->n; return sqrt(variance_mean); } -struct stats runtime_nsecs_stats[MAX_NR_CPUS]; -struct stats runtime_cycles_stats[MAX_NR_CPUS]; -struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; -struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; -struct stats runtime_branches_stats[MAX_NR_CPUS]; -struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; -struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; -struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; -struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; -struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; -struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; -struct stats walltime_nsecs_stats; - -static int create_perf_stat_counter(struct perf_evsel *evsel) +static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; +static struct stats runtime_cycles_stats[MAX_NR_CPUS]; +static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; +static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; +static struct stats runtime_branches_stats[MAX_NR_CPUS]; +static struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; +static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; +static struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; +static struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; +static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; +static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; +static struct stats walltime_nsecs_stats; + +static int create_perf_stat_counter(struct perf_evsel *evsel, + struct perf_evsel *first) { struct perf_event_attr *attr = &evsel->attr; + struct xyarray *group_fd = NULL; + bool exclude_guest_missing = false; + int ret; + + if (group && evsel != first) + group_fd = first->fd; if (scale) attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | @@ -280,15 +294,39 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) attr->inherit = !no_inherit; - if (system_wide) - return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, group); +retry: + if (exclude_guest_missing) + evsel->attr.exclude_guest = evsel->attr.exclude_host = 0; - if (target_pid == -1 && target_tid == -1) { + if (perf_target__has_cpu(&target)) { + ret = perf_evsel__open_per_cpu(evsel, evsel_list->cpus, + group, group_fd); + if (ret) + goto check_ret; + return 0; + } + + if (!perf_target__has_task(&target) && (!group || evsel == first)) { attr->disabled = 1; attr->enable_on_exec = 1; } - return perf_evsel__open_per_thread(evsel, evsel_list->threads, group); + ret = perf_evsel__open_per_thread(evsel, evsel_list->threads, + group, group_fd); + if (!ret) + return 0; + /* fall through */ +check_ret: + if (ret && errno == EINVAL) { + if (!exclude_guest_missing && + (evsel->attr.exclude_guest || evsel->attr.exclude_host)) { + pr_debug("Old kernel, cannot exclude " + "guest or host samples.\n"); + exclude_guest_missing = true; + goto retry; + } + } + return ret; } /* @@ -352,8 +390,8 @@ static int read_counter_aggr(struct perf_evsel *counter) update_stats(&ps->res_stats[i], count[i]); if (verbose) { - fprintf(stderr, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - event_name(counter), count[0], count[1], count[2]); + fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", + perf_evsel__name(counter), count[0], count[1], count[2]); } /* @@ -388,7 +426,7 @@ static int read_counter(struct perf_evsel *counter) static int run_perf_stat(int argc __used, const char **argv) { unsigned long long t0, t1; - struct perf_evsel *counter; + struct perf_evsel *counter, *first; int status = 0; int child_ready_pipe[2], go_pipe[2]; const bool forks = (argc > 0); @@ -432,7 +470,7 @@ static int run_perf_stat(int argc __used, const char **argv) exit(-1); } - if (target_tid == -1 && target_pid == -1 && !system_wide) + if (perf_target__none(&target)) evsel_list->threads->map[0] = child_pid; /* @@ -445,12 +483,20 @@ static int run_perf_stat(int argc __used, const char **argv) close(child_ready_pipe[0]); } + first = list_entry(evsel_list->entries.next, struct perf_evsel, node); + list_for_each_entry(counter, &evsel_list->entries, node) { - if (create_perf_stat_counter(counter) < 0) { - if (errno == EINVAL || errno == ENOSYS || errno == ENOENT) { + if (create_perf_stat_counter(counter, first) < 0) { + /* + * PPC returns ENXIO for HW counters until 2.6.37 + * (behavior changed with commit b0a873e). + */ + if (errno == EINVAL || errno == ENOSYS || + errno == ENOENT || errno == EOPNOTSUPP || + errno == ENXIO) { if (verbose) ui__warning("%s event is not supported by the kernel.\n", - event_name(counter)); + perf_evsel__name(counter)); counter->supported = false; continue; } @@ -459,7 +505,7 @@ static int run_perf_stat(int argc __used, const char **argv) error("You may not have permission to collect %sstats.\n" "\t Consider tweaking" " /proc/sys/kernel/perf_event_paranoid or running as root.", - system_wide ? "system-wide " : ""); + target.system_wide ? "system-wide " : ""); } else { error("open_counter returned with %d (%s). " "/bin/dmesg may provide additional information.\n", @@ -487,6 +533,8 @@ static int run_perf_stat(int argc __used, const char **argv) if (forks) { close(go_pipe[1]); wait(&status); + if (WIFSIGNALED(status)) + psignal(WTERMSIG(status), argv[0]); } else { while(!done) sleep(1); } @@ -519,9 +567,9 @@ static void print_noise_pct(double total, double avg) pct = 100.0*total/avg; if (csv_output) - fprintf(stderr, "%s%.2f%%", csv_sep, pct); - else - fprintf(stderr, " ( +-%6.2f%% )", pct); + fprintf(output, "%s%.2f%%", csv_sep, pct); + else if (pct) + fprintf(output, " ( +-%6.2f%% )", pct); } static void print_noise(struct perf_evsel *evsel, double avg) @@ -546,16 +594,46 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) csv_output ? 0 : -4, evsel_list->cpus->map[cpu], csv_sep); - fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel)); + fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel)); if (evsel->cgrp) - fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name); + fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); if (csv_output) return; if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) - fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats)); + fprintf(output, " # %8.3f CPUs utilized ", + avg / avg_stats(&walltime_nsecs_stats)); + else + fprintf(output, " "); +} + +/* used for get_ratio_color() */ +enum grc_type { + GRC_STALLED_CYCLES_FE, + GRC_STALLED_CYCLES_BE, + GRC_CACHE_MISSES, + GRC_MAX_NR +}; + +static const char *get_ratio_color(enum grc_type type, double ratio) +{ + static const double grc_table[GRC_MAX_NR][3] = { + [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, + [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, + [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, + }; + const char *color = PERF_COLOR_NORMAL; + + if (ratio > grc_table[type][0]) + color = PERF_COLOR_RED; + else if (ratio > grc_table[type][1]) + color = PERF_COLOR_MAGENTA; + else if (ratio > grc_table[type][2]) + color = PERF_COLOR_YELLOW; + + return color; } static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg) @@ -568,17 +646,11 @@ static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __us if (total) ratio = avg / total * 100.0; - color = PERF_COLOR_NORMAL; - if (ratio > 50.0) - color = PERF_COLOR_RED; - else if (ratio > 30.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 10.0) - color = PERF_COLOR_YELLOW; + color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " frontend cycles idle "); + fprintf(output, " # "); + color_fprintf(output, color, "%6.2f%%", ratio); + fprintf(output, " frontend cycles idle "); } static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg) @@ -591,17 +663,11 @@ static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __use if (total) ratio = avg / total * 100.0; - color = PERF_COLOR_NORMAL; - if (ratio > 75.0) - color = PERF_COLOR_RED; - else if (ratio > 50.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 20.0) - color = PERF_COLOR_YELLOW; + color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " backend cycles idle "); + fprintf(output, " # "); + color_fprintf(output, color, "%6.2f%%", ratio); + fprintf(output, " backend cycles idle "); } static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg) @@ -614,17 +680,11 @@ static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double if (total) ratio = avg / total * 100.0; - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; + color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all branches "); + fprintf(output, " # "); + color_fprintf(output, color, "%6.2f%%", ratio); + fprintf(output, " of all branches "); } static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg) @@ -637,17 +697,11 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou if (total) ratio = avg / total * 100.0; - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; + color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all L1-dcache hits "); + fprintf(output, " # "); + color_fprintf(output, color, "%6.2f%%", ratio); + fprintf(output, " of all L1-dcache hits "); } static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg) @@ -660,17 +714,11 @@ static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, dou if (total) ratio = avg / total * 100.0; - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; + color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all L1-icache hits "); + fprintf(output, " # "); + color_fprintf(output, color, "%6.2f%%", ratio); + fprintf(output, " of all L1-icache hits "); } static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) @@ -683,17 +731,11 @@ static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, do if (total) ratio = avg / total * 100.0; - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; + color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all dTLB cache hits "); + fprintf(output, " # "); + color_fprintf(output, color, "%6.2f%%", ratio); + fprintf(output, " of all dTLB cache hits "); } static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) @@ -706,17 +748,11 @@ static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, do if (total) ratio = avg / total * 100.0; - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; + color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all iTLB cache hits "); + fprintf(output, " # "); + color_fprintf(output, color, "%6.2f%%", ratio); + fprintf(output, " of all iTLB cache hits "); } static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) @@ -729,17 +765,11 @@ static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, doub if (total) ratio = avg / total * 100.0; - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; + color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all LL-cache hits "); + fprintf(output, " # "); + color_fprintf(output, color, "%6.2f%%", ratio); + fprintf(output, " of all LL-cache hits "); } static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) @@ -762,10 +792,10 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) else cpu = 0; - fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel)); + fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel)); if (evsel->cgrp) - fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name); + fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); if (csv_output) return; @@ -776,14 +806,14 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (total) ratio = avg / total; - fprintf(stderr, " # %5.2f insns per cycle ", ratio); + fprintf(output, " # %5.2f insns per cycle ", ratio); total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); if (total && avg) { ratio = total / avg; - fprintf(stderr, "\n # %5.2f stalled cycles per insn", ratio); + fprintf(output, "\n # %5.2f stalled cycles per insn", ratio); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && @@ -831,7 +861,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (total) ratio = avg * 100 / total; - fprintf(stderr, " # %8.3f %% of all cache refs ", ratio); + fprintf(output, " # %8.3f %% of all cache refs ", ratio); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { print_stalled_cycles_frontend(cpu, evsel, avg); @@ -843,16 +873,22 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (total) ratio = 1.0 * avg / total; - fprintf(stderr, " # %8.3f GHz ", ratio); + fprintf(output, " # %8.3f GHz ", ratio); } else if (runtime_nsecs_stats[cpu].n != 0) { + char unit = 'M'; + total = avg_stats(&runtime_nsecs_stats[cpu]); if (total) ratio = 1000.0 * avg / total; + if (ratio < 0.001) { + ratio *= 1000; + unit = 'K'; + } - fprintf(stderr, " # %8.3f M/sec ", ratio); + fprintf(output, " # %8.3f %c/sec ", ratio, unit); } else { - fprintf(stderr, " "); + fprintf(output, " "); } } @@ -867,17 +903,17 @@ static void print_counter_aggr(struct perf_evsel *counter) int scaled = counter->counts->scaled; if (scaled == -1) { - fprintf(stderr, "%*s%s%*s", + fprintf(output, "%*s%s%*s", csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, csv_sep, csv_output ? 0 : -24, - event_name(counter)); + perf_evsel__name(counter)); if (counter->cgrp) - fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name); + fprintf(output, "%s%s", csv_sep, counter->cgrp->name); - fputc('\n', stderr); + fputc('\n', output); return; } @@ -889,7 +925,7 @@ static void print_counter_aggr(struct perf_evsel *counter) print_noise(counter, avg); if (csv_output) { - fputc('\n', stderr); + fputc('\n', output); return; } @@ -899,9 +935,9 @@ static void print_counter_aggr(struct perf_evsel *counter) avg_enabled = avg_stats(&ps->res_stats[1]); avg_running = avg_stats(&ps->res_stats[2]); - fprintf(stderr, " [%5.2f%%]", 100 * avg_running / avg_enabled); + fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled); } - fprintf(stderr, "\n"); + fprintf(output, "\n"); } /* @@ -918,19 +954,20 @@ static void print_counter(struct perf_evsel *counter) ena = counter->counts->cpu[cpu].ena; run = counter->counts->cpu[cpu].run; if (run == 0 || ena == 0) { - fprintf(stderr, "CPU%*d%s%*s%s%*s", + fprintf(output, "CPU%*d%s%*s%s%*s", csv_output ? 0 : -4, evsel_list->cpus->map[cpu], csv_sep, csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, csv_sep, csv_output ? 0 : -24, - event_name(counter)); + perf_evsel__name(counter)); if (counter->cgrp) - fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name); + fprintf(output, "%s%s", + csv_sep, counter->cgrp->name); - fputc('\n', stderr); + fputc('\n', output); continue; } @@ -943,9 +980,10 @@ static void print_counter(struct perf_evsel *counter) print_noise(counter, 1.0); if (run != ena) - fprintf(stderr, " (%.2f%%)", 100.0 * run / ena); + fprintf(output, " (%.2f%%)", + 100.0 * run / ena); } - fputc('\n', stderr); + fputc('\n', output); } } @@ -957,21 +995,21 @@ static void print_stat(int argc, const char **argv) fflush(stdout); if (!csv_output) { - fprintf(stderr, "\n"); - fprintf(stderr, " Performance counter stats for "); - if(target_pid == -1 && target_tid == -1) { - fprintf(stderr, "\'%s", argv[0]); + fprintf(output, "\n"); + fprintf(output, " Performance counter stats for "); + if (!perf_target__has_task(&target)) { + fprintf(output, "\'%s", argv[0]); for (i = 1; i < argc; i++) - fprintf(stderr, " %s", argv[i]); - } else if (target_pid != -1) - fprintf(stderr, "process id \'%d", target_pid); + fprintf(output, " %s", argv[i]); + } else if (target.pid) + fprintf(output, "process id \'%s", target.pid); else - fprintf(stderr, "thread id \'%d", target_tid); + fprintf(output, "thread id \'%s", target.tid); - fprintf(stderr, "\'"); + fprintf(output, "\'"); if (run_count > 1) - fprintf(stderr, " (%d runs)", run_count); - fprintf(stderr, ":\n\n"); + fprintf(output, " (%d runs)", run_count); + fprintf(output, ":\n\n"); } if (no_aggr) { @@ -984,15 +1022,15 @@ static void print_stat(int argc, const char **argv) if (!csv_output) { if (!null_run) - fprintf(stderr, "\n"); - fprintf(stderr, " %17.9f seconds time elapsed", + fprintf(output, "\n"); + fprintf(output, " %17.9f seconds time elapsed", avg_stats(&walltime_nsecs_stats)/1e9); if (run_count > 1) { - fprintf(stderr, " "); + fprintf(output, " "); print_noise_pct(stddev_stats(&walltime_nsecs_stats), avg_stats(&walltime_nsecs_stats)); } - fprintf(stderr, "\n\n"); + fprintf(output, "\n\n"); } } @@ -1030,6 +1068,8 @@ static int stat__set_big_num(const struct option *opt __used, return 0; } +static bool append_file; + static const struct option options[] = { OPT_CALLBACK('e', "event", &evsel_list, "event", "event selector. use 'perf list' to list available events", @@ -1038,11 +1078,11 @@ static const struct option options[] = { "event filter", parse_filter), OPT_BOOLEAN('i', "no-inherit", &no_inherit, "child tasks do not inherit counters"), - OPT_INTEGER('p', "pid", &target_pid, - "stat events on existing process id"), - OPT_INTEGER('t', "tid", &target_tid, - "stat events on existing thread id"), - OPT_BOOLEAN('a', "all-cpus", &system_wide, + OPT_STRING('p', "pid", &target.pid, "pid", + "stat events on existing process id"), + OPT_STRING('t', "tid", &target.tid, "tid", + "stat events on existing thread id"), + OPT_BOOLEAN('a', "all-cpus", &target.system_wide, "system-wide collection from all CPUs"), OPT_BOOLEAN('g', "group", &group, "put the counters into a counter group"), @@ -1061,7 +1101,7 @@ static const struct option options[] = { OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, "print large numbers with thousands\' separators", stat__set_big_num), - OPT_STRING('C', "cpu", &cpu_list, "cpu", + OPT_STRING('C', "cpu", &target.cpu_list, "cpu", "list of cpus to monitor in system-wide"), OPT_BOOLEAN('A', "no-aggr", &no_aggr, "disable CPU count aggregation"), @@ -1070,6 +1110,11 @@ static const struct option options[] = { OPT_CALLBACK('G', "cgroup", &evsel_list, "name", "monitor event in cgroup name only", parse_cgroups), + OPT_STRING('o', "output", &output_name, "file", + "output file name"), + OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), + OPT_INTEGER(0, "log-fd", &output_fd, + "log output to fd, instead of stderr"), OPT_END() }; @@ -1079,22 +1124,13 @@ static const struct option options[] = { */ static int add_default_attributes(void) { - struct perf_evsel *pos; - size_t attr_nr = 0; - size_t c; - /* Set attrs if no event is selected and !null_run: */ if (null_run) return 0; if (!evsel_list->nr_entries) { - for (c = 0; c < ARRAY_SIZE(default_attrs); c++) { - pos = perf_evsel__new(default_attrs + c, c + attr_nr); - if (pos == NULL) - return -1; - perf_evlist__add(evsel_list, pos); - } - attr_nr += c; + if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) + return -1; } /* Detailed events get appended to the event list: */ @@ -1103,44 +1139,28 @@ static int add_default_attributes(void) return 0; /* Append detailed run extra attributes: */ - for (c = 0; c < ARRAY_SIZE(detailed_attrs); c++) { - pos = perf_evsel__new(detailed_attrs + c, c + attr_nr); - if (pos == NULL) - return -1; - perf_evlist__add(evsel_list, pos); - } - attr_nr += c; + if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) + return -1; if (detailed_run < 2) return 0; /* Append very detailed run extra attributes: */ - for (c = 0; c < ARRAY_SIZE(very_detailed_attrs); c++) { - pos = perf_evsel__new(very_detailed_attrs + c, c + attr_nr); - if (pos == NULL) - return -1; - perf_evlist__add(evsel_list, pos); - } + if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) + return -1; if (detailed_run < 3) return 0; /* Append very, very detailed run extra attributes: */ - for (c = 0; c < ARRAY_SIZE(very_very_detailed_attrs); c++) { - pos = perf_evsel__new(very_very_detailed_attrs + c, c + attr_nr); - if (pos == NULL) - return -1; - perf_evlist__add(evsel_list, pos); - } - - - return 0; + return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); } int cmd_stat(int argc, const char **argv, const char *prefix __used) { struct perf_evsel *pos; int status = -ENOMEM; + const char *mode; setlocale(LC_ALL, ""); @@ -1151,16 +1171,52 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) argc = parse_options(argc, argv, options, stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (csv_sep) + output = stderr; + if (output_name && strcmp(output_name, "-")) + output = NULL; + + if (output_name && output_fd) { + fprintf(stderr, "cannot use both --output and --log-fd\n"); + usage_with_options(stat_usage, options); + } + + if (output_fd < 0) { + fprintf(stderr, "argument to --log-fd must be a > 0\n"); + usage_with_options(stat_usage, options); + } + + if (!output) { + struct timespec tm; + mode = append_file ? "a" : "w"; + + output = fopen(output_name, mode); + if (!output) { + perror("failed to create output file"); + exit(-1); + } + clock_gettime(CLOCK_REALTIME, &tm); + fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); + } else if (output_fd > 0) { + mode = append_file ? "a" : "w"; + output = fdopen(output_fd, mode); + if (!output) { + perror("Failed opening logfd"); + return -errno; + } + } + + if (csv_sep) { csv_output = true; - else + if (!strcmp(csv_sep, "\\t")) + csv_sep = "\t"; + } else csv_sep = DEFAULT_SEPARATOR; /* * let the spreadsheet do the pretty-printing */ if (csv_output) { - /* User explicitely passed -B? */ + /* User explicitly passed -B? */ if (big_num_opt == 1) { fprintf(stderr, "-B option not supported with -x\n"); usage_with_options(stat_usage, options); @@ -1169,13 +1225,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) } else if (big_num_opt == 0) /* User passed --no-big-num */ big_num = false; - if (!argc && target_pid == -1 && target_tid == -1) + if (!argc && !perf_target__has_task(&target)) usage_with_options(stat_usage, options); if (run_count <= 0) usage_with_options(stat_usage, options); /* no_aggr, cgroup are for system-wide only */ - if ((no_aggr || nr_cgroups) && !system_wide) { + if ((no_aggr || nr_cgroups) && !perf_target__has_cpu(&target)) { fprintf(stderr, "both cgroup and no-aggregation " "modes only available in system-wide mode\n"); @@ -1185,30 +1241,21 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) if (add_default_attributes()) goto out; - if (target_pid != -1) - target_tid = target_pid; + perf_target__validate(&target); - evsel_list->threads = thread_map__new(target_pid, target_tid); - if (evsel_list->threads == NULL) { - pr_err("Problems finding threads of monitor\n"); - usage_with_options(stat_usage, options); - } - - if (system_wide) - evsel_list->cpus = cpu_map__new(cpu_list); - else - evsel_list->cpus = cpu_map__dummy_new(); + if (perf_evlist__create_maps(evsel_list, &target) < 0) { + if (perf_target__has_task(&target)) + pr_err("Problems finding threads of monitor\n"); + if (perf_target__has_cpu(&target)) + perror("failed to parse CPUs map"); - if (evsel_list->cpus == NULL) { - perror("failed to parse CPUs map"); usage_with_options(stat_usage, options); return -1; } list_for_each_entry(pos, &evsel_list->entries, node) { if (perf_evsel__alloc_stat_priv(pos) < 0 || - perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0 || - perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, evsel_list->threads->nr) < 0) + perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0) goto out_free_fd; } @@ -1226,7 +1273,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) status = 0; for (run_idx = 0; run_idx < run_count; run_idx++) { if (run_count != 1 && verbose) - fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); + fprintf(output, "[ perf stat: executing run #%d ... ]\n", + run_idx + 1); if (sync_run) sync(); diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index efe696f936e2..1d592f5cbea9 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -7,14 +7,16 @@ #include "util/cache.h" #include "util/debug.h" +#include "util/debugfs.h" #include "util/evlist.h" #include "util/parse-options.h" #include "util/parse-events.h" #include "util/symbol.h" #include "util/thread_map.h" +#include "util/pmu.h" #include "../../include/linux/hw_breakpoint.h" -static long page_size; +#include <sys/mman.h> static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) { @@ -31,6 +33,7 @@ static int test__vmlinux_matches_kallsyms(void) struct map *kallsyms_map, *vmlinux_map; struct machine kallsyms, vmlinux; enum map_type type = MAP__FUNCTION; + long page_size = sysconf(_SC_PAGE_SIZE); struct ref_reloc_sym ref_reloc_sym = { .name = "_stext", }; /* @@ -247,7 +250,7 @@ static int trace_event__id(const char *evname) if (asprintf(&filename, "%s/syscalls/%s/id", - debugfs_path, evname) < 0) + tracing_events_path, evname) < 0) return -1; fd = open(filename, O_RDONLY); @@ -276,7 +279,7 @@ static int test__open_syscall_event(void) return -1; } - threads = thread_map__new(-1, getpid()); + threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { pr_debug("thread_map__new\n"); return -1; @@ -291,7 +294,7 @@ static int test__open_syscall_event(void) goto out_thread_map_delete; } - if (perf_evsel__open_per_thread(evsel, threads, false) < 0) { + if (perf_evsel__open_per_thread(evsel, threads, false, NULL) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -342,7 +345,7 @@ static int test__open_syscall_event_on_all_cpus(void) return -1; } - threads = thread_map__new(-1, getpid()); + threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { pr_debug("thread_map__new\n"); return -1; @@ -366,7 +369,7 @@ static int test__open_syscall_event_on_all_cpus(void) goto out_thread_map_delete; } - if (perf_evsel__open(evsel, cpus, threads, false) < 0) { + if (perf_evsel__open(evsel, cpus, threads, false, NULL) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -475,7 +478,6 @@ static int test__basic_mmap(void) unsigned int nr_events[nsyscalls], expected_nr_events[nsyscalls], i, j; struct perf_evsel *evsels[nsyscalls], *evsel; - int sample_size = __perf_evsel__sample_size(attr.sample_type); for (i = 0; i < nsyscalls; ++i) { char name[64]; @@ -490,7 +492,7 @@ static int test__basic_mmap(void) expected_nr_events[i] = random() % 257; } - threads = thread_map__new(-1, getpid()); + threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { pr_debug("thread_map__new\n"); return -1; @@ -531,7 +533,7 @@ static int test__basic_mmap(void) perf_evlist__add(evlist, evsels[i]); - if (perf_evsel__open(evsels[i], cpus, threads, false) < 0) { + if (perf_evsel__open(evsels[i], cpus, threads, false, NULL) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -560,8 +562,7 @@ static int test__basic_mmap(void) goto out_munmap; } - err = perf_event__parse_sample(event, attr.sample_type, sample_size, - false, &sample, false); + err = perf_evlist__parse_sample(evlist, event, &sample, false); if (err) { pr_err("Can't parse sample, err = %d\n", err); goto out_munmap; @@ -580,7 +581,7 @@ static int test__basic_mmap(void) if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) { pr_debug("expected %d %s events, got %d\n", expected_nr_events[evsel->idx], - event_name(evsel), nr_events[evsel->idx]); + perf_evsel__name(evsel), nr_events[evsel->idx]); goto out_munmap; } } @@ -601,310 +602,618 @@ out_free_threads: #undef nsyscalls } -#define TEST_ASSERT_VAL(text, cond) \ -do { \ - if (!cond) { \ - pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \ - return -1; \ - } \ -} while (0) - -static int test__checkevent_tracepoint(struct perf_evlist *evlist) +static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t **maskp, + size_t *sizep) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); - - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); - TEST_ASSERT_VAL("wrong sample_type", - (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU) == - evsel->attr.sample_type); - TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period); - return 0; + cpu_set_t *mask; + size_t size; + int i, cpu = -1, nrcpus = 1024; +realloc: + mask = CPU_ALLOC(nrcpus); + size = CPU_ALLOC_SIZE(nrcpus); + CPU_ZERO_S(size, mask); + + if (sched_getaffinity(pid, size, mask) == -1) { + CPU_FREE(mask); + if (errno == EINVAL && nrcpus < (1024 << 8)) { + nrcpus = nrcpus << 2; + goto realloc; + } + perror("sched_getaffinity"); + return -1; + } + + for (i = 0; i < nrcpus; i++) { + if (CPU_ISSET_S(i, size, mask)) { + if (cpu == -1) { + cpu = i; + *maskp = mask; + *sizep = size; + } else + CPU_CLR_S(i, size, mask); + } + } + + if (cpu == -1) + CPU_FREE(mask); + + return cpu; } -static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist) +static int test__PERF_RECORD(void) { + struct perf_record_opts opts = { + .target = { + .uid = UINT_MAX, + .uses_mmap = true, + }, + .no_delay = true, + .freq = 10, + .mmap_pages = 256, + }; + cpu_set_t *cpu_mask = NULL; + size_t cpu_mask_size = 0; + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); struct perf_evsel *evsel; + struct perf_sample sample; + const char *cmd = "sleep"; + const char *argv[] = { cmd, "1", NULL, }; + char *bname; + u64 prev_time = 0; + bool found_cmd_mmap = false, + found_libc_mmap = false, + found_vdso_mmap = false, + found_ld_mmap = false; + int err = -1, errs = 0, i, wakeups = 0; + u32 cpu; + int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, }; + + if (evlist == NULL || argv == NULL) { + pr_debug("Not enough memory to create evlist\n"); + goto out; + } - TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1); + /* + * We need at least one evsel in the evlist, use the default + * one: "cycles". + */ + err = perf_evlist__add_default(evlist); + if (err < 0) { + pr_debug("Not enough memory to create evsel\n"); + goto out_delete_evlist; + } - list_for_each_entry(evsel, &evlist->entries, node) { - TEST_ASSERT_VAL("wrong type", - PERF_TYPE_TRACEPOINT == evsel->attr.type); - TEST_ASSERT_VAL("wrong sample_type", - (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU) - == evsel->attr.sample_type); - TEST_ASSERT_VAL("wrong sample_period", - 1 == evsel->attr.sample_period); + /* + * Create maps of threads and cpus to monitor. In this case + * we start with all threads and cpus (-1, -1) but then in + * perf_evlist__prepare_workload we'll fill in the only thread + * we're monitoring, the one forked there. + */ + err = perf_evlist__create_maps(evlist, &opts.target); + if (err < 0) { + pr_debug("Not enough memory to create thread/cpu maps\n"); + goto out_delete_evlist; } - return 0; -} -static int test__checkevent_raw(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + /* + * Prepare the workload in argv[] to run, it'll fork it, and then wait + * for perf_evlist__start_workload() to exec it. This is done this way + * so that we have time to open the evlist (calling sys_perf_event_open + * on all the fds) and then mmap them. + */ + err = perf_evlist__prepare_workload(evlist, &opts, argv); + if (err < 0) { + pr_debug("Couldn't run the workload!\n"); + goto out_delete_evlist; + } - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); - return 0; -} + /* + * Config the evsels, setting attr->comm on the first one, etc. + */ + evsel = list_entry(evlist->entries.next, struct perf_evsel, node); + evsel->attr.sample_type |= PERF_SAMPLE_CPU; + evsel->attr.sample_type |= PERF_SAMPLE_TID; + evsel->attr.sample_type |= PERF_SAMPLE_TIME; + perf_evlist__config_attrs(evlist, &opts); + + err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask, + &cpu_mask_size); + if (err < 0) { + pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno)); + goto out_delete_evlist; + } -static int test__checkevent_numeric(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + cpu = err; - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); - TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); - return 0; + /* + * So that we can check perf_sample.cpu on all the samples. + */ + if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 0) { + pr_debug("sched_setaffinity: %s\n", strerror(errno)); + goto out_free_cpu_mask; + } + + /* + * Call sys_perf_event_open on all the fds on all the evsels, + * grouping them if asked to. + */ + err = perf_evlist__open(evlist, opts.group); + if (err < 0) { + pr_debug("perf_evlist__open: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + /* + * mmap the first fd on a given CPU and ask for events for the other + * fds in the same CPU to be injected in the same mmap ring buffer + * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)). + */ + err = perf_evlist__mmap(evlist, opts.mmap_pages, false); + if (err < 0) { + pr_debug("perf_evlist__mmap: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + /* + * Now that all is properly set up, enable the events, they will + * count just on workload.pid, which will start... + */ + perf_evlist__enable(evlist); + + /* + * Now! + */ + perf_evlist__start_workload(evlist); + + while (1) { + int before = total_events; + + for (i = 0; i < evlist->nr_mmaps; i++) { + union perf_event *event; + + while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { + const u32 type = event->header.type; + const char *name = perf_event__name(type); + + ++total_events; + if (type < PERF_RECORD_MAX) + nr_events[type]++; + + err = perf_evlist__parse_sample(evlist, event, &sample, false); + if (err < 0) { + if (verbose) + perf_event__fprintf(event, stderr); + pr_debug("Couldn't parse sample\n"); + goto out_err; + } + + if (verbose) { + pr_info("%" PRIu64" %d ", sample.time, sample.cpu); + perf_event__fprintf(event, stderr); + } + + if (prev_time > sample.time) { + pr_debug("%s going backwards in time, prev=%" PRIu64 ", curr=%" PRIu64 "\n", + name, prev_time, sample.time); + ++errs; + } + + prev_time = sample.time; + + if (sample.cpu != cpu) { + pr_debug("%s with unexpected cpu, expected %d, got %d\n", + name, cpu, sample.cpu); + ++errs; + } + + if ((pid_t)sample.pid != evlist->workload.pid) { + pr_debug("%s with unexpected pid, expected %d, got %d\n", + name, evlist->workload.pid, sample.pid); + ++errs; + } + + if ((pid_t)sample.tid != evlist->workload.pid) { + pr_debug("%s with unexpected tid, expected %d, got %d\n", + name, evlist->workload.pid, sample.tid); + ++errs; + } + + if ((type == PERF_RECORD_COMM || + type == PERF_RECORD_MMAP || + type == PERF_RECORD_FORK || + type == PERF_RECORD_EXIT) && + (pid_t)event->comm.pid != evlist->workload.pid) { + pr_debug("%s with unexpected pid/tid\n", name); + ++errs; + } + + if ((type == PERF_RECORD_COMM || + type == PERF_RECORD_MMAP) && + event->comm.pid != event->comm.tid) { + pr_debug("%s with different pid/tid!\n", name); + ++errs; + } + + switch (type) { + case PERF_RECORD_COMM: + if (strcmp(event->comm.comm, cmd)) { + pr_debug("%s with unexpected comm!\n", name); + ++errs; + } + break; + case PERF_RECORD_EXIT: + goto found_exit; + case PERF_RECORD_MMAP: + bname = strrchr(event->mmap.filename, '/'); + if (bname != NULL) { + if (!found_cmd_mmap) + found_cmd_mmap = !strcmp(bname + 1, cmd); + if (!found_libc_mmap) + found_libc_mmap = !strncmp(bname + 1, "libc", 4); + if (!found_ld_mmap) + found_ld_mmap = !strncmp(bname + 1, "ld", 2); + } else if (!found_vdso_mmap) + found_vdso_mmap = !strcmp(event->mmap.filename, "[vdso]"); + break; + + case PERF_RECORD_SAMPLE: + /* Just ignore samples for now */ + break; + default: + pr_debug("Unexpected perf_event->header.type %d!\n", + type); + ++errs; + } + } + } + + /* + * We don't use poll here because at least at 3.1 times the + * PERF_RECORD_{!SAMPLE} events don't honour + * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does. + */ + if (total_events == before && false) + poll(evlist->pollfd, evlist->nr_fds, -1); + + sleep(1); + if (++wakeups > 5) { + pr_debug("No PERF_RECORD_EXIT event!\n"); + break; + } + } + +found_exit: + if (nr_events[PERF_RECORD_COMM] > 1) { + pr_debug("Excessive number of PERF_RECORD_COMM events!\n"); + ++errs; + } + + if (nr_events[PERF_RECORD_COMM] == 0) { + pr_debug("Missing PERF_RECORD_COMM for %s!\n", cmd); + ++errs; + } + + if (!found_cmd_mmap) { + pr_debug("PERF_RECORD_MMAP for %s missing!\n", cmd); + ++errs; + } + + if (!found_libc_mmap) { + pr_debug("PERF_RECORD_MMAP for %s missing!\n", "libc"); + ++errs; + } + + if (!found_ld_mmap) { + pr_debug("PERF_RECORD_MMAP for %s missing!\n", "ld"); + ++errs; + } + + if (!found_vdso_mmap) { + pr_debug("PERF_RECORD_MMAP for %s missing!\n", "[vdso]"); + ++errs; + } +out_err: + perf_evlist__munmap(evlist); +out_free_cpu_mask: + CPU_FREE(cpu_mask); +out_delete_evlist: + perf_evlist__delete(evlist); +out: + return (err < 0 || errs > 0) ? -1 : 0; } -static int test__checkevent_symbolic_name(struct perf_evlist *evlist) + +#if defined(__x86_64__) || defined(__i386__) + +#define barrier() asm volatile("" ::: "memory") + +static u64 rdpmc(unsigned int counter) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + unsigned int low, high; - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); - return 0; + asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter)); + + return low | ((u64)high) << 32; } -static int test__checkevent_symbolic_alias(struct perf_evlist *evlist) +static u64 rdtsc(void) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + unsigned int low, high; - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_SW_PAGE_FAULTS == evsel->attr.config); - return 0; + asm volatile("rdtsc" : "=a" (low), "=d" (high)); + + return low | ((u64)high) << 32; } -static int test__checkevent_genhw(struct perf_evlist *evlist) +static u64 mmap_read_self(void *addr) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_event_mmap_page *pc = addr; + u32 seq, idx, time_mult = 0, time_shift = 0; + u64 count, cyc = 0, time_offset = 0, enabled, running, delta; + + do { + seq = pc->lock; + barrier(); + + enabled = pc->time_enabled; + running = pc->time_running; + + if (enabled != running) { + cyc = rdtsc(); + time_mult = pc->time_mult; + time_shift = pc->time_shift; + time_offset = pc->time_offset; + } - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", (1 << 16) == evsel->attr.config); - return 0; + idx = pc->index; + count = pc->offset; + if (idx) + count += rdpmc(idx - 1); + + barrier(); + } while (pc->lock != seq); + + if (enabled != running) { + u64 quot, rem; + + quot = (cyc >> time_shift); + rem = cyc & ((1 << time_shift) - 1); + delta = time_offset + quot * time_mult + + ((rem * time_mult) >> time_shift); + + enabled += delta; + if (idx) + running += delta; + + quot = count / running; + rem = count % running; + count = quot * enabled + (rem * enabled) / running; + } + + return count; } -static int test__checkevent_breakpoint(struct perf_evlist *evlist) +/* + * If the RDPMC instruction faults then signal this back to the test parent task: + */ +static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); - - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); - TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) == - evsel->attr.bp_type); - TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_4 == - evsel->attr.bp_len); - return 0; + exit(-1); } -static int test__checkevent_breakpoint_x(struct perf_evlist *evlist) +static int __test__rdpmc(void) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); - - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); - TEST_ASSERT_VAL("wrong bp_type", - HW_BREAKPOINT_X == evsel->attr.bp_type); - TEST_ASSERT_VAL("wrong bp_len", sizeof(long) == evsel->attr.bp_len); + long page_size = sysconf(_SC_PAGE_SIZE); + volatile int tmp = 0; + u64 i, loops = 1000; + int n; + int fd; + void *addr; + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_INSTRUCTIONS, + .exclude_kernel = 1, + }; + u64 delta_sum = 0; + struct sigaction sa; + + sigfillset(&sa.sa_mask); + sa.sa_sigaction = segfault_handler; + sigaction(SIGSEGV, &sa, NULL); + + fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (fd < 0) { + die("Error: sys_perf_event_open() syscall returned " + "with %d (%s)\n", fd, strerror(errno)); + } + + addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); + if (addr == (void *)(-1)) { + die("Error: mmap() syscall returned " + "with (%s)\n", strerror(errno)); + } + + for (n = 0; n < 6; n++) { + u64 stamp, now, delta; + + stamp = mmap_read_self(addr); + + for (i = 0; i < loops; i++) + tmp++; + + now = mmap_read_self(addr); + loops *= 10; + + delta = now - stamp; + pr_debug("%14d: %14Lu\n", n, (long long)delta); + + delta_sum += delta; + } + + munmap(addr, page_size); + close(fd); + + pr_debug(" "); + + if (!delta_sum) + return -1; + return 0; } -static int test__checkevent_breakpoint_r(struct perf_evlist *evlist) +static int test__rdpmc(void) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); - - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); - TEST_ASSERT_VAL("wrong type", - PERF_TYPE_BREAKPOINT == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); - TEST_ASSERT_VAL("wrong bp_type", - HW_BREAKPOINT_R == evsel->attr.bp_type); - TEST_ASSERT_VAL("wrong bp_len", - HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len); + int status = 0; + int wret = 0; + int ret; + int pid; + + pid = fork(); + if (pid < 0) + return -1; + + if (!pid) { + ret = __test__rdpmc(); + + exit(ret); + } + + wret = waitpid(pid, &status, 0); + if (wret < 0 || status) + return -1; + return 0; } -static int test__checkevent_breakpoint_w(struct perf_evlist *evlist) +#endif + +static int test__perf_pmu(void) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); - - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); - TEST_ASSERT_VAL("wrong type", - PERF_TYPE_BREAKPOINT == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); - TEST_ASSERT_VAL("wrong bp_type", - HW_BREAKPOINT_W == evsel->attr.bp_type); - TEST_ASSERT_VAL("wrong bp_len", - HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len); - return 0; + return perf_pmu__test(); } -static struct test__event_st { - const char *name; - __u32 type; - int (*check)(struct perf_evlist *evlist); -} test__events[] = { - { - .name = "syscalls:sys_enter_open", - .check = test__checkevent_tracepoint, - }, +static struct test { + const char *desc; + int (*func)(void); +} tests[] = { { - .name = "syscalls:*", - .check = test__checkevent_tracepoint_multi, + .desc = "vmlinux symtab matches kallsyms", + .func = test__vmlinux_matches_kallsyms, }, { - .name = "r1", - .check = test__checkevent_raw, + .desc = "detect open syscall event", + .func = test__open_syscall_event, }, { - .name = "1:1", - .check = test__checkevent_numeric, + .desc = "detect open syscall event on all cpus", + .func = test__open_syscall_event_on_all_cpus, }, { - .name = "instructions", - .check = test__checkevent_symbolic_name, + .desc = "read samples using the mmap interface", + .func = test__basic_mmap, }, { - .name = "faults", - .check = test__checkevent_symbolic_alias, + .desc = "parse events tests", + .func = parse_events__test, }, +#if defined(__x86_64__) || defined(__i386__) { - .name = "L1-dcache-load-miss", - .check = test__checkevent_genhw, + .desc = "x86 rdpmc test", + .func = test__rdpmc, }, +#endif { - .name = "mem:0", - .check = test__checkevent_breakpoint, + .desc = "Validate PERF_RECORD_* events & perf_sample fields", + .func = test__PERF_RECORD, }, { - .name = "mem:0:x", - .check = test__checkevent_breakpoint_x, + .desc = "Test perf pmu format parsing", + .func = test__perf_pmu, }, { - .name = "mem:0:r", - .check = test__checkevent_breakpoint_r, + .desc = "Test dso data interface", + .func = dso__test_data, }, { - .name = "mem:0:w", - .check = test__checkevent_breakpoint_w, + .func = NULL, }, }; -#define TEST__EVENTS_CNT (sizeof(test__events) / sizeof(struct test__event_st)) - -static int test__parse_events(void) +static bool perf_test__matches(int curr, int argc, const char *argv[]) { - struct perf_evlist *evlist; - u_int i; - int ret = 0; + int i; - for (i = 0; i < TEST__EVENTS_CNT; i++) { - struct test__event_st *e = &test__events[i]; + if (argc == 0) + return true; - evlist = perf_evlist__new(NULL, NULL); - if (evlist == NULL) - break; + for (i = 0; i < argc; ++i) { + char *end; + long nr = strtoul(argv[i], &end, 10); - ret = parse_events(evlist, e->name, 0); - if (ret) { - pr_debug("failed to parse event '%s', err %d\n", - e->name, ret); - break; + if (*end == '\0') { + if (nr == curr + 1) + return true; + continue; } - ret = e->check(evlist); - if (ret) - break; - - perf_evlist__delete(evlist); + if (strstr(tests[curr].desc, argv[i])) + return true; } - return ret; + return false; } -static struct test { - const char *desc; - int (*func)(void); -} tests[] = { - { - .desc = "vmlinux symtab matches kallsyms", - .func = test__vmlinux_matches_kallsyms, - }, - { - .desc = "detect open syscall event", - .func = test__open_syscall_event, - }, - { - .desc = "detect open syscall event on all cpus", - .func = test__open_syscall_event_on_all_cpus, - }, - { - .desc = "read samples using the mmap interface", - .func = test__basic_mmap, - }, - { - .desc = "parse events tests", - .func = test__parse_events, - }, - { - .func = NULL, - }, -}; -static int __cmd_test(void) +static int __cmd_test(int argc, const char *argv[]) { int i = 0; - page_size = sysconf(_SC_PAGE_SIZE); - while (tests[i].func) { - int err; - pr_info("%2d: %s:", i + 1, tests[i].desc); + int curr = i++, err; + + if (!perf_test__matches(curr, argc, argv)) + continue; + + pr_info("%2d: %s:", i, tests[curr].desc); pr_debug("\n--- start ---\n"); - err = tests[i].func(); - pr_debug("---- end ----\n%s:", tests[i].desc); + err = tests[curr].func(); + pr_debug("---- end ----\n%s:", tests[curr].desc); pr_info(" %s\n", err ? "FAILED!\n" : "Ok"); - ++i; } return 0; } -static const char * const test_usage[] = { - "perf test [<options>]", - NULL, -}; +static int perf_test__list(int argc, const char **argv) +{ + int i = 0; -static const struct option test_options[] = { - OPT_INTEGER('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), - OPT_END() -}; + while (tests[i].func) { + int curr = i++; + + if (argc > 1 && !strstr(tests[curr].desc, argv[1])) + continue; + + pr_info("%2d: %s\n", i, tests[curr].desc); + } + + return 0; +} int cmd_test(int argc, const char **argv, const char *prefix __used) { + const char * const test_usage[] = { + "perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]", + NULL, + }; + const struct option test_options[] = { + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), + OPT_END() + }; + argc = parse_options(argc, argv, test_options, test_usage, 0); - if (argc) - usage_with_options(test_usage, test_options); + if (argc >= 1 && !strcmp(argv[0], "list")) + return perf_test__list(argc, argv); symbol_conf.priv_size = sizeof(int); symbol_conf.sort_by_name = true; @@ -913,7 +1222,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used) if (symbol__init() < 0) return -1; - setup_pager(); - - return __cmd_test(); + return __cmd_test(argc, argv); } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index aa26f4d66d10..3b75b2e21ea5 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -19,6 +19,7 @@ #include "util/color.h" #include <linux/list.h> #include "util/cache.h" +#include "util/evsel.h" #include <linux/rbtree.h> #include "util/symbol.h" #include "util/callchain.h" @@ -31,13 +32,14 @@ #include "util/event.h" #include "util/session.h" #include "util/svghelper.h" +#include "util/tool.h" #define SUPPORT_OLD_POWER_EVENTS 1 #define PWR_EVENT_EXIT -1 -static char const *input_name = "perf.data"; -static char const *output_name = "output.svg"; +static const char *input_name; +static const char *output_name = "output.svg"; static unsigned int numcpus; static u64 min_freq; /* Lowest CPU frequency seen */ @@ -273,25 +275,28 @@ static int cpus_cstate_state[MAX_CPUS]; static u64 cpus_pstate_start_times[MAX_CPUS]; static u64 cpus_pstate_state[MAX_CPUS]; -static int process_comm_event(union perf_event *event, +static int process_comm_event(struct perf_tool *tool __used, + union perf_event *event, struct perf_sample *sample __used, - struct perf_session *session __used) + struct machine *machine __used) { pid_set_comm(event->comm.tid, event->comm.comm); return 0; } -static int process_fork_event(union perf_event *event, +static int process_fork_event(struct perf_tool *tool __used, + union perf_event *event, struct perf_sample *sample __used, - struct perf_session *session __used) + struct machine *machine __used) { pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); return 0; } -static int process_exit_event(union perf_event *event, +static int process_exit_event(struct perf_tool *tool __used, + union perf_event *event, struct perf_sample *sample __used, - struct perf_session *session __used) + struct machine *machine __used) { pid_exit(event->fork.pid, event->fork.time); return 0; @@ -486,14 +491,15 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te) } -static int process_sample_event(union perf_event *event __used, +static int process_sample_event(struct perf_tool *tool __used, + union perf_event *event __used, struct perf_sample *sample, - struct perf_evsel *evsel __used, - struct perf_session *session) + struct perf_evsel *evsel, + struct machine *machine __used) { struct trace_entry *te; - if (session->sample_type & PERF_SAMPLE_TIME) { + if (evsel->attr.sample_type & PERF_SAMPLE_TIME) { if (!first_time || first_time > sample->time) first_time = sample->time; if (last_time < sample->time) @@ -501,7 +507,7 @@ static int process_sample_event(union perf_event *event __used, } te = (void *)sample->raw_data; - if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) { + if ((evsel->attr.sample_type & PERF_SAMPLE_RAW) && sample->raw_size > 0) { char *event_str; #ifdef SUPPORT_OLD_POWER_EVENTS struct power_entry_old *peo; @@ -974,7 +980,7 @@ static void write_svg_file(const char *filename) svg_close(); } -static struct perf_event_ops event_ops = { +static struct perf_tool perf_timechart = { .comm = process_comm_event, .fork = process_fork_event, .exit = process_exit_event, @@ -985,7 +991,7 @@ static struct perf_event_ops event_ops = { static int __cmd_timechart(void) { struct perf_session *session = perf_session__new(input_name, O_RDONLY, - 0, false, &event_ops); + 0, false, &perf_timechart); int ret = -EINVAL; if (session == NULL) @@ -994,7 +1000,7 @@ static int __cmd_timechart(void) if (!perf_session__has_traces(session, "timechart record")) goto out_delete; - ret = perf_session__process_events(session, &event_ops); + ret = perf_session__process_events(session, &perf_timechart); if (ret) goto out_delete; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index d28013b7d61c..68cd61ef6ac5 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -5,6 +5,7 @@ * any workload, CPU or specific PID. * * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> + * 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> * * Improvements and fixes by: * @@ -36,10 +37,13 @@ #include "util/parse-events.h" #include "util/cpumap.h" #include "util/xyarray.h" +#include "util/sort.h" +#include "util/intlist.h" #include "util/debug.h" #include <assert.h> +#include <elf.h> #include <fcntl.h> #include <stdio.h> @@ -57,49 +61,12 @@ #include <sys/prctl.h> #include <sys/wait.h> #include <sys/uio.h> +#include <sys/utsname.h> #include <sys/mman.h> #include <linux/unistd.h> #include <linux/types.h> -static struct perf_top top = { - .count_filter = 5, - .delay_secs = 2, - .display_weighted = -1, - .target_pid = -1, - .target_tid = -1, - .active_symbols = LIST_HEAD_INIT(top.active_symbols), - .active_symbols_lock = PTHREAD_MUTEX_INITIALIZER, - .active_symbols_cond = PTHREAD_COND_INITIALIZER, - .freq = 1000, /* 1 KHz */ -}; - -static bool system_wide = false; - -static bool use_tui, use_stdio; - -static int default_interval = 0; - -static bool kptr_restrict_warned; -static bool vmlinux_warned; -static bool inherit = false; -static int realtime_prio = 0; -static bool group = false; -static unsigned int page_size; -static unsigned int mmap_pages = 128; - -static bool dump_symtab = false; - -static struct winsize winsize; - -static const char *sym_filter = NULL; -struct sym_entry *sym_filter_entry_sched = NULL; -static int sym_pcnt_filter = 5; - -/* - * Source functions - */ - void get_term_dimensions(struct winsize *ws) { char *s = getenv("LINES"); @@ -122,37 +89,44 @@ void get_term_dimensions(struct winsize *ws) ws->ws_col = 80; } -static void update_print_entries(struct winsize *ws) +static void perf_top__update_print_entries(struct perf_top *top) { - top.print_entries = ws->ws_row; - - if (top.print_entries > 9) - top.print_entries -= 9; + if (top->print_entries > 9) + top->print_entries -= 9; } -static void sig_winch_handler(int sig __used) +static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg) { - get_term_dimensions(&winsize); - update_print_entries(&winsize); + struct perf_top *top = arg; + + get_term_dimensions(&top->winsize); + if (!top->print_entries + || (top->print_entries+4) > top->winsize.ws_row) { + top->print_entries = top->winsize.ws_row; + } else { + top->print_entries += 4; + top->winsize.ws_row = top->print_entries; + } + perf_top__update_print_entries(top); } -static int parse_source(struct sym_entry *syme) +static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) { struct symbol *sym; struct annotation *notes; struct map *map; int err = -1; - if (!syme) + if (!he || !he->ms.sym) return -1; - sym = sym_entry__symbol(syme); - map = syme->map; + sym = he->ms.sym; + map = he->ms.map; /* * We can't annotate with just /proc/kallsyms */ - if (map->dso->symtab_type == SYMTAB__KALLSYMS) { + if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) { pr_err("Can't annotate %s: No vmlinux file was found in the " "path\n", sym->name); sleep(1); @@ -167,7 +141,7 @@ static int parse_source(struct sym_entry *syme) pthread_mutex_lock(¬es->lock); - if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) { + if (symbol__alloc_hist(sym) < 0) { pthread_mutex_unlock(¬es->lock); pr_err("Not enough memory for annotating '%s' symbol!\n", sym->name); @@ -175,53 +149,96 @@ static int parse_source(struct sym_entry *syme) return err; } - err = symbol__annotate(sym, syme->map, 0); + err = symbol__annotate(sym, map, 0); if (err == 0) { out_assign: - top.sym_filter_entry = syme; + top->sym_filter_entry = he; } pthread_mutex_unlock(¬es->lock); return err; } -static void __zero_source_counters(struct sym_entry *syme) +static void __zero_source_counters(struct hist_entry *he) { - struct symbol *sym = sym_entry__symbol(syme); + struct symbol *sym = he->ms.sym; symbol__annotate_zero_histograms(sym); } -static void record_precise_ip(struct sym_entry *syme, struct map *map, - int counter, u64 ip) +static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip) +{ + struct utsname uts; + int err = uname(&uts); + + ui__warning("Out of bounds address found:\n\n" + "Addr: %" PRIx64 "\n" + "DSO: %s %c\n" + "Map: %" PRIx64 "-%" PRIx64 "\n" + "Symbol: %" PRIx64 "-%" PRIx64 " %c %s\n" + "Arch: %s\n" + "Kernel: %s\n" + "Tools: %s\n\n" + "Not all samples will be on the annotation output.\n\n" + "Please report to linux-kernel@vger.kernel.org\n", + ip, map->dso->long_name, dso__symtab_origin(map->dso), + map->start, map->end, sym->start, sym->end, + sym->binding == STB_GLOBAL ? 'g' : + sym->binding == STB_LOCAL ? 'l' : 'w', sym->name, + err ? "[unknown]" : uts.machine, + err ? "[unknown]" : uts.release, perf_version_string); + if (use_browser <= 0) + sleep(5); + + map->erange_warned = true; +} + +static void perf_top__record_precise_ip(struct perf_top *top, + struct hist_entry *he, + int counter, u64 ip) { struct annotation *notes; struct symbol *sym; + int err; - if (syme != top.sym_filter_entry) + if (he == NULL || he->ms.sym == NULL || + ((top->sym_filter_entry == NULL || + top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1)) return; - sym = sym_entry__symbol(syme); + sym = he->ms.sym; notes = symbol__annotation(sym); if (pthread_mutex_trylock(¬es->lock)) return; - ip = map->map_ip(map, ip); - symbol__inc_addr_samples(sym, map, counter, ip); + if (notes->src == NULL && symbol__alloc_hist(sym) < 0) { + pthread_mutex_unlock(¬es->lock); + pr_err("Not enough memory for annotating '%s' symbol!\n", + sym->name); + sleep(1); + return; + } + + ip = he->ms.map->map_ip(he->ms.map, ip); + err = symbol__inc_addr_samples(sym, he->ms.map, counter, ip); pthread_mutex_unlock(¬es->lock); + + if (err == -ERANGE && !he->ms.map->erange_warned) + ui__warn_map_erange(he->ms.map, sym, ip); } -static void show_details(struct sym_entry *syme) +static void perf_top__show_details(struct perf_top *top) { + struct hist_entry *he = top->sym_filter_entry; struct annotation *notes; struct symbol *symbol; int more; - if (!syme) + if (!he) return; - symbol = sym_entry__symbol(syme); + symbol = he->ms.sym; notes = symbol__annotation(symbol); pthread_mutex_lock(¬es->lock); @@ -229,15 +246,15 @@ static void show_details(struct sym_entry *syme) if (notes->src == NULL) goto out_unlock; - printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name); - printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); + printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name); + printf(" Events Pcnt (>=%d%%)\n", top->sym_pcnt_filter); - more = symbol__annotate_printf(symbol, syme->map, top.sym_evsel->idx, - 0, sym_pcnt_filter, top.print_entries, 4); - if (top.zero) - symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx); + more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx, + 0, top->sym_pcnt_filter, top->print_entries, 4); + if (top->zero) + symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx); else - symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx); + symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx); if (more != 0) printf("%d lines not displayed, maybe increase display entries [e]\n", more); out_unlock: @@ -246,94 +263,60 @@ out_unlock: static const char CONSOLE_CLEAR[] = "[H[2J"; -static void __list_insert_active_sym(struct sym_entry *syme) +static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, + struct addr_location *al, + struct perf_sample *sample) { - list_add(&syme->node, &top.active_symbols); + struct hist_entry *he; + + he = __hists__add_entry(&evsel->hists, al, NULL, sample->period); + if (he == NULL) + return NULL; + + hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + return he; } -static void print_sym_table(struct perf_session *session) +static void perf_top__print_sym_table(struct perf_top *top) { char bf[160]; int printed = 0; - struct rb_node *nd; - struct sym_entry *syme; - struct rb_root tmp = RB_ROOT; - const int win_width = winsize.ws_col - 1; - int sym_width, dso_width, dso_short_width; - float sum_ksamples = perf_top__decay_samples(&top, &tmp); + const int win_width = top->winsize.ws_col - 1; puts(CONSOLE_CLEAR); - perf_top__header_snprintf(&top, bf, sizeof(bf)); + perf_top__header_snprintf(top, bf, sizeof(bf)); printf("%s\n", bf); - perf_top__reset_sample_counters(&top); + perf_top__reset_sample_counters(top); printf("%-*.*s\n", win_width, win_width, graph_dotted_line); - if (session->hists.stats.total_lost != 0) { - color_fprintf(stdout, PERF_COLOR_RED, "WARNING:"); - printf(" LOST %" PRIu64 " events, Check IO/CPU overload\n", - session->hists.stats.total_lost); + if (top->sym_evsel->hists.stats.nr_lost_warned != + top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) { + top->sym_evsel->hists.stats.nr_lost_warned = + top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]; + color_fprintf(stdout, PERF_COLOR_RED, + "WARNING: LOST %d chunks, Check IO/CPU overload", + top->sym_evsel->hists.stats.nr_lost_warned); + ++printed; } - if (top.sym_filter_entry) { - show_details(top.sym_filter_entry); + if (top->sym_filter_entry) { + perf_top__show_details(top); return; } - perf_top__find_widths(&top, &tmp, &dso_width, &dso_short_width, - &sym_width); - - if (sym_width + dso_width > winsize.ws_col - 29) { - dso_width = dso_short_width; - if (sym_width + dso_width > winsize.ws_col - 29) - sym_width = winsize.ws_col - dso_width - 29; - } + hists__collapse_resort_threaded(&top->sym_evsel->hists); + hists__output_resort_threaded(&top->sym_evsel->hists); + hists__decay_entries_threaded(&top->sym_evsel->hists, + top->hide_user_symbols, + top->hide_kernel_symbols); + hists__output_recalc_col_len(&top->sym_evsel->hists, + top->winsize.ws_row - 3); putchar('\n'); - if (top.evlist->nr_entries == 1) - printf(" samples pcnt"); - else - printf(" weight samples pcnt"); - - if (verbose) - printf(" RIP "); - printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); - printf(" %s _______ _____", - top.evlist->nr_entries == 1 ? " " : "______"); - if (verbose) - printf(" ________________"); - printf(" %-*.*s", sym_width, sym_width, graph_line); - printf(" %-*.*s", dso_width, dso_width, graph_line); - puts("\n"); - - for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { - struct symbol *sym; - double pcnt; - - syme = rb_entry(nd, struct sym_entry, rb_node); - sym = sym_entry__symbol(syme); - if (++printed > top.print_entries || - (int)syme->snap_count < top.count_filter) - continue; - - pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / - sum_ksamples)); - - if (top.evlist->nr_entries == 1 || !top.display_weighted) - printf("%20.2f ", syme->weight); - else - printf("%9.1f %10ld ", syme->weight, syme->snap_count); - - percent_color_fprintf(stdout, "%4.1f%%", pcnt); - if (verbose) - printf(" %016" PRIx64, sym->start); - printf(" %-*.*s", sym_width, sym_width, sym->name); - printf(" %-*.*s\n", dso_width, dso_width, - dso_width >= syme->map->dso->long_name_len ? - syme->map->dso->long_name : - syme->map->dso->short_name); - } + hists__fprintf(&top->sym_evsel->hists, NULL, false, false, + top->winsize.ws_row - 4 - printed, win_width, stdout); } static void prompt_integer(int *target, const char *msg) @@ -371,16 +354,17 @@ static void prompt_percent(int *target, const char *msg) *target = tmp; } -static void prompt_symbol(struct sym_entry **target, const char *msg) +static void perf_top__prompt_symbol(struct perf_top *top, const char *msg) { char *buf = malloc(0), *p; - struct sym_entry *syme = *target, *n, *found = NULL; + struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL; + struct rb_node *next; size_t dummy = 0; /* zero counters of active symbol */ if (syme) { __zero_source_counters(syme); - *target = NULL; + top->sym_filter_entry = NULL; } fprintf(stdout, "\n%s: ", msg); @@ -391,66 +375,59 @@ static void prompt_symbol(struct sym_entry **target, const char *msg) if (p) *p = 0; - pthread_mutex_lock(&top.active_symbols_lock); - syme = list_entry(top.active_symbols.next, struct sym_entry, node); - pthread_mutex_unlock(&top.active_symbols_lock); - - list_for_each_entry_safe_from(syme, n, &top.active_symbols, node) { - struct symbol *sym = sym_entry__symbol(syme); - - if (!strcmp(buf, sym->name)) { - found = syme; + next = rb_first(&top->sym_evsel->hists.entries); + while (next) { + n = rb_entry(next, struct hist_entry, rb_node); + if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) { + found = n; break; } + next = rb_next(&n->rb_node); } if (!found) { fprintf(stderr, "Sorry, %s is not active.\n", buf); sleep(1); - return; } else - parse_source(found); + perf_top__parse_source(top, found); out_free: free(buf); } -static void print_mapped_keys(void) +static void perf_top__print_mapped_keys(struct perf_top *top) { char *name = NULL; - if (top.sym_filter_entry) { - struct symbol *sym = sym_entry__symbol(top.sym_filter_entry); + if (top->sym_filter_entry) { + struct symbol *sym = top->sym_filter_entry->ms.sym; name = sym->name; } fprintf(stdout, "\nMapped keys:\n"); - fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top.delay_secs); - fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top.print_entries); + fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top->delay_secs); + fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top->print_entries); - if (top.evlist->nr_entries > 1) - fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(top.sym_evsel)); + if (top->evlist->nr_entries > 1) + fprintf(stdout, "\t[E] active event counter. \t(%s)\n", perf_evsel__name(top->sym_evsel)); - fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top.count_filter); + fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter); - fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); + fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter); fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); fprintf(stdout, "\t[S] stop annotation.\n"); - if (top.evlist->nr_entries > 1) - fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", top.display_weighted ? 1 : 0); - fprintf(stdout, "\t[K] hide kernel_symbols symbols. \t(%s)\n", - top.hide_kernel_symbols ? "yes" : "no"); + top->hide_kernel_symbols ? "yes" : "no"); fprintf(stdout, "\t[U] hide user symbols. \t(%s)\n", - top.hide_user_symbols ? "yes" : "no"); - fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top.zero ? 1 : 0); + top->hide_user_symbols ? "yes" : "no"); + fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top->zero ? 1 : 0); fprintf(stdout, "\t[qQ] quit.\n"); } -static int key_mapped(int c) +static int perf_top__key_mapped(struct perf_top *top, int c) { switch (c) { case 'd': @@ -466,8 +443,7 @@ static int key_mapped(int c) case 'S': return 1; case 'E': - case 'w': - return top.evlist->nr_entries > 1 ? 1 : 0; + return top->evlist->nr_entries > 1 ? 1 : 0; default: break; } @@ -475,13 +451,13 @@ static int key_mapped(int c) return 0; } -static void handle_keypress(struct perf_session *session, int c) +static void perf_top__handle_keypress(struct perf_top *top, int c) { - if (!key_mapped(c)) { + if (!perf_top__key_mapped(top, c)) { struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; struct termios tc, save; - print_mapped_keys(); + perf_top__print_mapped_keys(top); fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); fflush(stdout); @@ -496,114 +472,140 @@ static void handle_keypress(struct perf_session *session, int c) c = getc(stdin); tcsetattr(0, TCSAFLUSH, &save); - if (!key_mapped(c)) + if (!perf_top__key_mapped(top, c)) return; } switch (c) { case 'd': - prompt_integer(&top.delay_secs, "Enter display delay"); - if (top.delay_secs < 1) - top.delay_secs = 1; + prompt_integer(&top->delay_secs, "Enter display delay"); + if (top->delay_secs < 1) + top->delay_secs = 1; break; case 'e': - prompt_integer(&top.print_entries, "Enter display entries (lines)"); - if (top.print_entries == 0) { - sig_winch_handler(SIGWINCH); - signal(SIGWINCH, sig_winch_handler); - } else + prompt_integer(&top->print_entries, "Enter display entries (lines)"); + if (top->print_entries == 0) { + struct sigaction act = { + .sa_sigaction = perf_top__sig_winch, + .sa_flags = SA_SIGINFO, + }; + perf_top__sig_winch(SIGWINCH, NULL, top); + sigaction(SIGWINCH, &act, NULL); + } else { + perf_top__sig_winch(SIGWINCH, NULL, top); signal(SIGWINCH, SIG_DFL); + } break; case 'E': - if (top.evlist->nr_entries > 1) { + if (top->evlist->nr_entries > 1) { /* Select 0 as the default event: */ int counter = 0; fprintf(stderr, "\nAvailable events:"); - list_for_each_entry(top.sym_evsel, &top.evlist->entries, node) - fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel)); + list_for_each_entry(top->sym_evsel, &top->evlist->entries, node) + fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel)); prompt_integer(&counter, "Enter details event counter"); - if (counter >= top.evlist->nr_entries) { - top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); - fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel)); + if (counter >= top->evlist->nr_entries) { + top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node); + fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel)); sleep(1); break; } - list_for_each_entry(top.sym_evsel, &top.evlist->entries, node) - if (top.sym_evsel->idx == counter) + list_for_each_entry(top->sym_evsel, &top->evlist->entries, node) + if (top->sym_evsel->idx == counter) break; } else - top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); + top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node); break; case 'f': - prompt_integer(&top.count_filter, "Enter display event count filter"); + prompt_integer(&top->count_filter, "Enter display event count filter"); break; case 'F': - prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); + prompt_percent(&top->sym_pcnt_filter, + "Enter details display event filter (percent)"); break; case 'K': - top.hide_kernel_symbols = !top.hide_kernel_symbols; + top->hide_kernel_symbols = !top->hide_kernel_symbols; break; case 'q': case 'Q': printf("exiting.\n"); - if (dump_symtab) - perf_session__fprintf_dsos(session, stderr); + if (top->dump_symtab) + perf_session__fprintf_dsos(top->session, stderr); exit(0); case 's': - prompt_symbol(&top.sym_filter_entry, "Enter details symbol"); + perf_top__prompt_symbol(top, "Enter details symbol"); break; case 'S': - if (!top.sym_filter_entry) + if (!top->sym_filter_entry) break; else { - struct sym_entry *syme = top.sym_filter_entry; + struct hist_entry *syme = top->sym_filter_entry; - top.sym_filter_entry = NULL; + top->sym_filter_entry = NULL; __zero_source_counters(syme); } break; case 'U': - top.hide_user_symbols = !top.hide_user_symbols; - break; - case 'w': - top.display_weighted = ~top.display_weighted; + top->hide_user_symbols = !top->hide_user_symbols; break; case 'z': - top.zero = !top.zero; + top->zero = !top->zero; break; default: break; } } -static void *display_thread_tui(void *arg __used) +static void perf_top__sort_new_samples(void *arg) { - int err = 0; - pthread_mutex_lock(&top.active_symbols_lock); - while (list_empty(&top.active_symbols)) { - err = pthread_cond_wait(&top.active_symbols_cond, - &top.active_symbols_lock); - if (err) - break; - } - pthread_mutex_unlock(&top.active_symbols_lock); - if (!err) - perf_top__tui_browser(&top); + struct perf_top *t = arg; + perf_top__reset_sample_counters(t); + + if (t->evlist->selected != NULL) + t->sym_evsel = t->evlist->selected; + + hists__collapse_resort_threaded(&t->sym_evsel->hists); + hists__output_resort_threaded(&t->sym_evsel->hists); + hists__decay_entries_threaded(&t->sym_evsel->hists, + t->hide_user_symbols, + t->hide_kernel_symbols); +} + +static void *display_thread_tui(void *arg) +{ + struct perf_evsel *pos; + struct perf_top *top = arg; + const char *help = "For a higher level overview, try: perf top --sort comm,dso"; + + perf_top__sort_new_samples(top); + + /* + * Initialize the uid_filter_str, in the future the TUI will allow + * Zooming in/out UIDs. For now juse use whatever the user passed + * via --uid. + */ + list_for_each_entry(pos, &top->evlist->entries, node) + pos->hists.uid_filter_str = top->target.uid_str; + + perf_evlist__tui_browse_hists(top->evlist, help, + perf_top__sort_new_samples, + top, top->delay_secs); + exit_browser(0); exit(0); return NULL; } -static void *display_thread(void *arg __used) +static void *display_thread(void *arg) { struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; struct termios tc, save; + struct perf_top *top = arg; int delay_msecs, c; - struct perf_session *session = (struct perf_session *) arg; tcgetattr(0, &save); tc = save; @@ -611,20 +613,35 @@ static void *display_thread(void *arg __used) tc.c_cc[VMIN] = 0; tc.c_cc[VTIME] = 0; + pthread__unblock_sigwinch(); repeat: - delay_msecs = top.delay_secs * 1000; + delay_msecs = top->delay_secs * 1000; tcsetattr(0, TCSANOW, &tc); /* trash return*/ getc(stdin); - do { - print_sym_table(session); - } while (!poll(&stdin_poll, 1, delay_msecs) == 1); - + while (1) { + perf_top__print_sym_table(top); + /* + * Either timeout expired or we got an EINTR due to SIGWINCH, + * refresh screen in both cases. + */ + switch (poll(&stdin_poll, 1, delay_msecs)) { + case 0: + continue; + case -1: + if (errno == EINTR) + continue; + /* Fall trhu */ + default: + goto process_hotkey; + } + } +process_hotkey: c = getc(stdin); tcsetattr(0, TCSAFLUSH, &save); - handle_keypress(session, c); + perf_top__handle_keypress(top, c); goto repeat; return NULL; @@ -632,6 +649,7 @@ repeat: /* Tag samples to be skipped. */ static const char *skip_symbols[] = { + "intel_idle", "default_idle", "native_safe_halt", "cpu_idle", @@ -645,9 +663,8 @@ static const char *skip_symbols[] = { NULL }; -static int symbol_filter(struct map *map, struct symbol *sym) +static int symbol_filter(struct map *map __used, struct symbol *sym) { - struct sym_entry *syme; const char *name = sym->name; int i; @@ -667,16 +684,6 @@ static int symbol_filter(struct map *map, struct symbol *sym) strstr(name, "_text_end")) return 1; - syme = symbol__priv(sym); - syme->map = map; - symbol__annotate_init(map, sym); - - if (!top.sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) { - /* schedule initial sym_filter_entry setup */ - sym_filter_entry_sched = syme; - sym_filter = NULL; - } - for (i = 0; skip_symbols[i]; i++) { if (!strcmp(skip_symbols[i], name)) { sym->ignore = true; @@ -687,61 +694,47 @@ static int symbol_filter(struct map *map, struct symbol *sym) return 0; } -static void perf_event__process_sample(const union perf_event *event, +static void perf_event__process_sample(struct perf_tool *tool, + const union perf_event *event, + struct perf_evsel *evsel, struct perf_sample *sample, - struct perf_session *session) + struct machine *machine) { + struct perf_top *top = container_of(tool, struct perf_top, tool); + struct symbol *parent = NULL; u64 ip = event->ip.ip; - struct sym_entry *syme; struct addr_location al; - struct machine *machine; - u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + int err; - ++top.samples; + if (!machine && perf_guest) { + static struct intlist *seen; - switch (origin) { - case PERF_RECORD_MISC_USER: - ++top.us_samples; - if (top.hide_user_symbols) - return; - machine = perf_session__find_host_machine(session); - break; - case PERF_RECORD_MISC_KERNEL: - ++top.kernel_samples; - if (top.hide_kernel_symbols) - return; - machine = perf_session__find_host_machine(session); - break; - case PERF_RECORD_MISC_GUEST_KERNEL: - ++top.guest_kernel_samples; - machine = perf_session__find_machine(session, event->ip.pid); - break; - case PERF_RECORD_MISC_GUEST_USER: - ++top.guest_us_samples; - /* - * TODO: we don't process guest user from host side - * except simple counting. - */ - return; - default: + if (!seen) + seen = intlist__new(); + + if (!intlist__has_entry(seen, event->ip.pid)) { + pr_err("Can't find guest [%d]'s kernel information\n", + event->ip.pid); + intlist__add(seen, event->ip.pid); + } return; } - if (!machine && perf_guest) { - pr_err("Can't find guest [%d]'s kernel information\n", - event->ip.pid); + if (!machine) { + pr_err("%u unprocessable samples recorded.", + top->session->hists.stats.nr_unprocessable_samples++); return; } if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) - top.exact_samples++; + top->exact_samples++; - if (perf_event__preprocess_sample(event, session, &al, sample, + if (perf_event__preprocess_sample(event, machine, &al, sample, symbol_filter) < 0 || al.filtered) return; - if (!kptr_restrict_warned && + if (!top->kptr_restrict_warned && symbol_conf.kptr_restrict && al.cpumode == PERF_RECORD_MISC_KERNEL) { ui__warning( @@ -752,7 +745,7 @@ static void perf_event__process_sample(const union perf_event *event, " modules" : ""); if (use_browser <= 0) sleep(5); - kptr_restrict_warned = true; + top->kptr_restrict_warned = true; } if (al.sym == NULL) { @@ -768,7 +761,7 @@ static void perf_event__process_sample(const union perf_event *event, * --hide-kernel-symbols, even if the user specifies an * invalid --vmlinux ;-) */ - if (!kptr_restrict_warned && !vmlinux_warned && + if (!top->kptr_restrict_warned && !top->vmlinux_warned && al.map == machine->vmlinux_maps[MAP__FUNCTION] && RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { if (symbol_conf.vmlinux_name) { @@ -781,91 +774,134 @@ static void perf_event__process_sample(const union perf_event *event, if (use_browser <= 0) sleep(5); - vmlinux_warned = true; + top->vmlinux_warned = true; } - - return; } - /* let's see, whether we need to install initial sym_filter_entry */ - if (sym_filter_entry_sched) { - top.sym_filter_entry = sym_filter_entry_sched; - sym_filter_entry_sched = NULL; - if (parse_source(top.sym_filter_entry) < 0) { - struct symbol *sym = sym_entry__symbol(top.sym_filter_entry); - - pr_err("Can't annotate %s", sym->name); - if (top.sym_filter_entry->map->dso->symtab_type == SYMTAB__KALLSYMS) { - pr_err(": No vmlinux file was found in the path:\n"); - machine__fprintf_vmlinux_path(machine, stderr); - } else - pr_err(".\n"); - exit(1); + if (al.sym == NULL || !al.sym->ignore) { + struct hist_entry *he; + + if ((sort__has_parent || symbol_conf.use_callchain) && + sample->callchain) { + err = machine__resolve_callchain(machine, al.thread, + sample->callchain, &parent); + if (err) + return; } - } - syme = symbol__priv(al.sym); - if (!al.sym->ignore) { - struct perf_evsel *evsel; + he = perf_evsel__add_hist_entry(evsel, &al, sample); + if (he == NULL) { + pr_err("Problem incrementing symbol period, skipping event\n"); + return; + } - evsel = perf_evlist__id2evsel(top.evlist, sample->id); - assert(evsel != NULL); - syme->count[evsel->idx]++; - record_precise_ip(syme, al.map, evsel->idx, ip); - pthread_mutex_lock(&top.active_symbols_lock); - if (list_empty(&syme->node) || !syme->node.next) { - static bool first = true; - __list_insert_active_sym(syme); - if (first) { - pthread_cond_broadcast(&top.active_symbols_cond); - first = false; - } + if (symbol_conf.use_callchain) { + err = callchain_append(he->callchain, &callchain_cursor, + sample->period); + if (err) + return; } - pthread_mutex_unlock(&top.active_symbols_lock); + + if (top->sort_has_symbols) + perf_top__record_precise_ip(top, he, evsel->idx, ip); } + + return; } -static void perf_session__mmap_read_idx(struct perf_session *self, int idx) +static void perf_top__mmap_read_idx(struct perf_top *top, int idx) { struct perf_sample sample; + struct perf_evsel *evsel; + struct perf_session *session = top->session; union perf_event *event; + struct machine *machine; + u8 origin; int ret; - while ((event = perf_evlist__mmap_read(top.evlist, idx)) != NULL) { - ret = perf_session__parse_sample(self, event, &sample); + while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) { + ret = perf_evlist__parse_sample(top->evlist, event, &sample, false); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); continue; } + evsel = perf_evlist__id2evsel(session->evlist, sample.id); + assert(evsel != NULL); + + origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + if (event->header.type == PERF_RECORD_SAMPLE) - perf_event__process_sample(event, &sample, self); - else - perf_event__process(event, &sample, self); + ++top->samples; + + switch (origin) { + case PERF_RECORD_MISC_USER: + ++top->us_samples; + if (top->hide_user_symbols) + continue; + machine = perf_session__find_host_machine(session); + break; + case PERF_RECORD_MISC_KERNEL: + ++top->kernel_samples; + if (top->hide_kernel_symbols) + continue; + machine = perf_session__find_host_machine(session); + break; + case PERF_RECORD_MISC_GUEST_KERNEL: + ++top->guest_kernel_samples; + machine = perf_session__find_machine(session, event->ip.pid); + break; + case PERF_RECORD_MISC_GUEST_USER: + ++top->guest_us_samples; + /* + * TODO: we don't process guest user from host side + * except simple counting. + */ + /* Fall thru */ + default: + continue; + } + + + if (event->header.type == PERF_RECORD_SAMPLE) { + perf_event__process_sample(&top->tool, event, evsel, + &sample, machine); + } else if (event->header.type < PERF_RECORD_MAX) { + hists__inc_nr_events(&evsel->hists, event->header.type); + perf_event__process(&top->tool, event, &sample, machine); + } else + ++session->hists.stats.nr_unknown_events; } } -static void perf_session__mmap_read(struct perf_session *self) +static void perf_top__mmap_read(struct perf_top *top) { int i; - for (i = 0; i < top.evlist->nr_mmaps; i++) - perf_session__mmap_read_idx(self, i); + for (i = 0; i < top->evlist->nr_mmaps; i++) + perf_top__mmap_read_idx(top, i); } -static void start_counters(struct perf_evlist *evlist) +static void perf_top__start_counters(struct perf_top *top) { - struct perf_evsel *counter; + struct perf_evsel *counter, *first; + struct perf_evlist *evlist = top->evlist; + + first = list_entry(evlist->entries.next, struct perf_evsel, node); list_for_each_entry(counter, &evlist->entries, node) { struct perf_event_attr *attr = &counter->attr; + struct xyarray *group_fd = NULL; + + if (top->group && counter != first) + group_fd = first->fd; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; - if (top.freq) { + if (top->freq) { attr->sample_type |= PERF_SAMPLE_PERIOD; attr->freq = 1; - attr->sample_freq = top.freq; + attr->sample_freq = top->freq; } if (evlist->nr_entries > 1) { @@ -873,40 +909,77 @@ static void start_counters(struct perf_evlist *evlist) attr->read_format |= PERF_FORMAT_ID; } + if (perf_target__has_cpu(&top->target)) + attr->sample_type |= PERF_SAMPLE_CPU; + + if (symbol_conf.use_callchain) + attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + attr->mmap = 1; - attr->inherit = inherit; + attr->comm = 1; + attr->inherit = top->inherit; +fallback_missing_features: + if (top->exclude_guest_missing) + attr->exclude_guest = attr->exclude_host = 0; +retry_sample_id: + attr->sample_id_all = top->sample_id_all_missing ? 0 : 1; try_again: - if (perf_evsel__open(counter, top.evlist->cpus, - top.evlist->threads, group) < 0) { + if (perf_evsel__open(counter, top->evlist->cpus, + top->evlist->threads, top->group, + group_fd) < 0) { int err = errno; if (err == EPERM || err == EACCES) { - ui__warning_paranoid(); + ui__error_paranoid(); goto out_err; + } else if (err == EINVAL) { + if (!top->exclude_guest_missing && + (attr->exclude_guest || attr->exclude_host)) { + pr_debug("Old kernel, cannot exclude " + "guest or host samples.\n"); + top->exclude_guest_missing = true; + goto fallback_missing_features; + } else if (!top->sample_id_all_missing) { + /* + * Old kernel, no attr->sample_id_type_all field + */ + top->sample_id_all_missing = true; + goto retry_sample_id; + } } /* * If it's cycles then fall back to hrtimer * based cpu-clock-tick sw counter, which * is always available even if no PMU support: */ - if (attr->type == PERF_TYPE_HARDWARE && - attr->config == PERF_COUNT_HW_CPU_CYCLES) { + if ((err == ENOENT || err == ENXIO) && + (attr->type == PERF_TYPE_HARDWARE) && + (attr->config == PERF_COUNT_HW_CPU_CYCLES)) { + if (verbose) ui__warning("Cycles event not supported,\n" "trying to fall back to cpu-clock-ticks\n"); attr->type = PERF_TYPE_SOFTWARE; attr->config = PERF_COUNT_SW_CPU_CLOCK; + if (counter->name) { + free(counter->name); + counter->name = NULL; + } goto try_again; } if (err == ENOENT) { - ui__warning("The %s event is not supported.\n", - event_name(counter)); + ui__error("The %s event is not supported.\n", + perf_evsel__name(counter)); + goto out_err; + } else if (err == EMFILE) { + ui__error("Too many events are opened.\n" + "Try again after reducing the number of events\n"); goto out_err; } - ui__warning("The sys_perf_event_open() syscall " + ui__error("The sys_perf_event_open() syscall " "returned with %d (%s). /bin/dmesg " "may provide additional information.\n" "No CONFIG_PERF_EVENTS=y kernel support " @@ -915,8 +988,8 @@ try_again: } } - if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) { - ui__warning("Failed to mmap with %d (%s)\n", + if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) { + ui__error("Failed to mmap with %d (%s)\n", errno, strerror(errno)); goto out_err; } @@ -928,58 +1001,162 @@ out_err: exit(0); } -static int __cmd_top(void) +static int perf_top__setup_sample_type(struct perf_top *top) +{ + if (!top->sort_has_symbols) { + if (symbol_conf.use_callchain) { + ui__error("Selected -g but \"sym\" not present in --sort/-s."); + return -EINVAL; + } + } else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) { + if (callchain_register_param(&callchain_param) < 0) { + ui__error("Can't register callchain params.\n"); + return -EINVAL; + } + } + + return 0; +} + +static int __cmd_top(struct perf_top *top) { pthread_t thread; - int ret __used; + int ret; /* * FIXME: perf_session__new should allow passing a O_MMAP, so that all this * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. */ - struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL); - if (session == NULL) + top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL); + if (top->session == NULL) return -ENOMEM; - if (top.target_tid != -1) - perf_event__synthesize_thread_map(top.evlist->threads, - perf_event__process, session); - else - perf_event__synthesize_threads(perf_event__process, session); + ret = perf_top__setup_sample_type(top); + if (ret) + goto out_delete; - start_counters(top.evlist); - session->evlist = top.evlist; - perf_session__update_sample_type(session); + if (perf_target__has_task(&top->target)) + perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, + perf_event__process, + &top->session->host_machine); + else + perf_event__synthesize_threads(&top->tool, perf_event__process, + &top->session->host_machine); + perf_top__start_counters(top); + top->session->evlist = top->evlist; + perf_session__set_id_hdr_size(top->session); /* Wait for a minimal set of events before starting the snapshot */ - poll(top.evlist->pollfd, top.evlist->nr_fds, 100); + poll(top->evlist->pollfd, top->evlist->nr_fds, 100); - perf_session__mmap_read(session); + perf_top__mmap_read(top); if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui : - display_thread), session)) { - printf("Could not create display thread.\n"); + display_thread), top)) { + ui__error("Could not create display thread.\n"); exit(-1); } - if (realtime_prio) { + if (top->realtime_prio) { struct sched_param param; - param.sched_priority = realtime_prio; + param.sched_priority = top->realtime_prio; if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { - printf("Could not set realtime priority.\n"); + ui__error("Could not set realtime priority.\n"); exit(-1); } } while (1) { - u64 hits = top.samples; + u64 hits = top->samples; + + perf_top__mmap_read(top); + + if (hits == top->samples) + ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100); + } + +out_delete: + perf_session__delete(top->session); + top->session = NULL; + + return 0; +} + +static int +parse_callchain_opt(const struct option *opt, const char *arg, int unset) +{ + struct perf_top *top = (struct perf_top *)opt->value; + char *tok, *tok2; + char *endptr; + + /* + * --no-call-graph + */ + if (unset) { + top->dont_use_callchains = true; + return 0; + } + + symbol_conf.use_callchain = true; + + if (!arg) + return 0; + + tok = strtok((char *)arg, ","); + if (!tok) + return -1; + + /* get the output mode */ + if (!strncmp(tok, "graph", strlen(arg))) + callchain_param.mode = CHAIN_GRAPH_ABS; + + else if (!strncmp(tok, "flat", strlen(arg))) + callchain_param.mode = CHAIN_FLAT; - perf_session__mmap_read(session); + else if (!strncmp(tok, "fractal", strlen(arg))) + callchain_param.mode = CHAIN_GRAPH_REL; - if (hits == top.samples) - ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100); + else if (!strncmp(tok, "none", strlen(arg))) { + callchain_param.mode = CHAIN_NONE; + symbol_conf.use_callchain = false; + + return 0; + } else + return -1; + + /* get the min percentage */ + tok = strtok(NULL, ","); + if (!tok) + goto setup; + + callchain_param.min_percent = strtod(tok, &endptr); + if (tok == endptr) + return -1; + + /* get the print limit */ + tok2 = strtok(NULL, ","); + if (!tok2) + goto setup; + + if (tok2[0] != 'c') { + callchain_param.print_limit = strtod(tok2, &endptr); + tok2 = strtok(NULL, ","); + if (!tok2) + goto setup; } + /* get the call chain order */ + if (!strcmp(tok2, "caller")) + callchain_param.order = ORDER_CALLER; + else if (!strcmp(tok2, "callee")) + callchain_param.order = ORDER_CALLEE; + else + return -1; +setup: + if (callchain_register_param(&callchain_param) < 0) { + fprintf(stderr, "Can't register callchain params\n"); + return -1; + } return 0; } @@ -988,38 +1165,54 @@ static const char * const top_usage[] = { NULL }; -static const struct option options[] = { +int cmd_top(int argc, const char **argv, const char *prefix __used) +{ + struct perf_evsel *pos; + int status; + char errbuf[BUFSIZ]; + struct perf_top top = { + .count_filter = 5, + .delay_secs = 2, + .freq = 4000, /* 4 KHz */ + .mmap_pages = 128, + .sym_pcnt_filter = 5, + .target = { + .uses_mmap = true, + }, + }; + char callchain_default_opt[] = "fractal,0.5,callee"; + const struct option options[] = { OPT_CALLBACK('e', "event", &top.evlist, "event", "event selector. use 'perf list' to list available events", parse_events_option), - OPT_INTEGER('c', "count", &default_interval, + OPT_INTEGER('c', "count", &top.default_interval, "event period to sample"), - OPT_INTEGER('p', "pid", &top.target_pid, + OPT_STRING('p', "pid", &top.target.pid, "pid", "profile events on existing process id"), - OPT_INTEGER('t', "tid", &top.target_tid, + OPT_STRING('t', "tid", &top.target.tid, "tid", "profile events on existing thread id"), - OPT_BOOLEAN('a', "all-cpus", &system_wide, + OPT_BOOLEAN('a', "all-cpus", &top.target.system_wide, "system-wide collection from all CPUs"), - OPT_STRING('C', "cpu", &top.cpu_list, "cpu", + OPT_STRING('C', "cpu", &top.target.cpu_list, "cpu", "list of cpus to monitor"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols, "hide kernel symbols"), - OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), - OPT_INTEGER('r', "realtime", &realtime_prio, + OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"), + OPT_INTEGER('r', "realtime", &top.realtime_prio, "collect data with this RT SCHED_FIFO priority"), OPT_INTEGER('d', "delay", &top.delay_secs, "number of seconds to delay between refreshes"), - OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, + OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab, "dump the symbol table used for profiling"), OPT_INTEGER('f', "count-filter", &top.count_filter, "only display functions with more events than this"), - OPT_BOOLEAN('g', "group", &group, + OPT_BOOLEAN('g', "group", &top.group, "put the counters into a counter group"), - OPT_BOOLEAN('i', "inherit", &inherit, + OPT_BOOLEAN('i', "inherit", &top.inherit, "child tasks inherit counters"), - OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", + OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name", "symbol to annotate"), OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"), @@ -1029,111 +1222,143 @@ static const struct option options[] = { "display this many functions"), OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols, "hide user symbols"), - OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), - OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), + OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"), + OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_STRING('s', "sort", &sort_order, "key[,key2...]", + "sort by key(s): pid, comm, dso, symbol, parent"), + OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, + "Show a column with the number of samples"), + OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order", + "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. " + "Default: fractal,0.5,callee", &parse_callchain_opt, + callchain_default_opt), + OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, + "Show a column with the sum of periods"), + OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", + "only consider symbols in these dsos"), + OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]", + "only consider symbols in these comms"), + OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", + "only consider these symbols"), + OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, + "Interleave source code with assembly code (default)"), + OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, + "Display raw encoding of assembly instructions (default)"), + OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", + "Specify disassembler style (e.g. -M intel for intel syntax)"), + OPT_STRING('u', "uid", &top.target.uid_str, "user", "user to profile"), OPT_END() -}; - -int cmd_top(int argc, const char **argv, const char *prefix __used) -{ - struct perf_evsel *pos; - int status = -ENOMEM; + }; top.evlist = perf_evlist__new(NULL, NULL); if (top.evlist == NULL) return -ENOMEM; - page_size = sysconf(_SC_PAGE_SIZE); + symbol_conf.exclude_other = false; argc = parse_options(argc, argv, options, top_usage, 0); if (argc) usage_with_options(top_usage, options); - /* - * XXX For now start disabled, only using TUI if explicitely asked for. - * Change that when handle_keys equivalent gets written, live annotation - * done, etc. - */ - use_browser = 0; + if (sort_order == default_sort_order) + sort_order = "dso,symbol"; + + setup_sorting(top_usage, options); - if (use_stdio) + if (top.use_stdio) use_browser = 0; - else if (use_tui) + else if (top.use_tui) use_browser = 1; setup_browser(false); - /* CPU and PID are mutually exclusive */ - if (top.target_tid > 0 && top.cpu_list) { - printf("WARNING: PID switch overriding CPU\n"); - sleep(1); - top.cpu_list = NULL; + status = perf_target__validate(&top.target); + if (status) { + perf_target__strerror(&top.target, status, errbuf, BUFSIZ); + ui__warning("%s", errbuf); + } + + status = perf_target__parse_uid(&top.target); + if (status) { + int saved_errno = errno; + + perf_target__strerror(&top.target, status, errbuf, BUFSIZ); + ui__error("%s", errbuf); + + status = -saved_errno; + goto out_delete_evlist; } - if (top.target_pid != -1) - top.target_tid = top.target_pid; + if (perf_target__none(&top.target)) + top.target.system_wide = true; - if (perf_evlist__create_maps(top.evlist, top.target_pid, - top.target_tid, top.cpu_list) < 0) + if (perf_evlist__create_maps(top.evlist, &top.target) < 0) usage_with_options(top_usage, options); if (!top.evlist->nr_entries && perf_evlist__add_default(top.evlist) < 0) { - pr_err("Not enough memory for event selector list\n"); + ui__error("Not enough memory for event selector list\n"); return -ENOMEM; } + symbol_conf.nr_events = top.evlist->nr_entries; + if (top.delay_secs < 1) top.delay_secs = 1; /* * User specified count overrides default frequency. */ - if (default_interval) + if (top.default_interval) top.freq = 0; else if (top.freq) { - default_interval = top.freq; + top.default_interval = top.freq; } else { - fprintf(stderr, "frequency and count are zero, aborting\n"); + ui__error("frequency and count are zero, aborting\n"); exit(EXIT_FAILURE); } list_for_each_entry(pos, &top.evlist->entries, node) { - if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr, - top.evlist->threads->nr) < 0) - goto out_free_fd; /* * Fill in the ones not specifically initialized via -c: */ - if (pos->attr.sample_period) - continue; - - pos->attr.sample_period = default_interval; + if (!pos->attr.sample_period) + pos->attr.sample_period = top.default_interval; } - if (perf_evlist__alloc_pollfd(top.evlist) < 0 || - perf_evlist__alloc_mmap(top.evlist) < 0) - goto out_free_fd; - top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); - symbol_conf.priv_size = (sizeof(struct sym_entry) + sizeof(struct annotation) + - (top.evlist->nr_entries + 1) * sizeof(unsigned long)); + symbol_conf.priv_size = sizeof(struct annotation); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); if (symbol__init() < 0) return -1; - get_term_dimensions(&winsize); + sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); + sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); + sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); + + /* + * Avoid annotation data structures overhead when symbols aren't on the + * sort list. + */ + top.sort_has_symbols = sort_sym.list.next != NULL; + + get_term_dimensions(&top.winsize); if (top.print_entries == 0) { - update_print_entries(&winsize); - signal(SIGWINCH, sig_winch_handler); + struct sigaction act = { + .sa_sigaction = perf_top__sig_winch, + .sa_flags = SA_SIGINFO, + }; + perf_top__update_print_entries(&top); + sigaction(SIGWINCH, &act, NULL); } - status = __cmd_top(); -out_free_fd: + status = __cmd_top(&top); + +out_delete_evlist: perf_evlist__delete(top.evlist); return status; diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 4702e2443a8e..b382bd551aac 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -4,7 +4,6 @@ #include "util/util.h" #include "util/strbuf.h" -extern const char perf_version_string[]; extern const char perf_usage_string[]; extern const char perf_more_info_string[]; diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index 6170fd2531b5..6c18785a6417 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak @@ -65,6 +65,34 @@ int main(void) endef endif +ifndef NO_GTK2 +define SOURCE_GTK2 +#pragma GCC diagnostic ignored \"-Wstrict-prototypes\" +#include <gtk/gtk.h> +#pragma GCC diagnostic error \"-Wstrict-prototypes\" + +int main(int argc, char *argv[]) +{ + gtk_init(&argc, &argv); + + return 0; +} +endef + +define SOURCE_GTK2_INFOBAR +#pragma GCC diagnostic ignored \"-Wstrict-prototypes\" +#include <gtk/gtk.h> +#pragma GCC diagnostic error \"-Wstrict-prototypes\" + +int main(void) +{ + gtk_info_bar_new(); + + return 0; +} +endef +endif + ifndef NO_LIBPERL define SOURCE_PERL_EMBED #include <EXTERN.h> diff --git a/tools/perf/design.txt b/tools/perf/design.txt index bd0bb1b1279b..67e5d0cace85 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -409,14 +409,15 @@ Counters can be enabled and disabled in two ways: via ioctl and via prctl. When a counter is disabled, it doesn't count or generate events but does continue to exist and maintain its count value. -An individual counter or counter group can be enabled with +An individual counter can be enabled with - ioctl(fd, PERF_EVENT_IOC_ENABLE); + ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); or disabled with - ioctl(fd, PERF_EVENT_IOC_DISABLE); + ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); +For a counter group, pass PERF_IOC_FLAG_GROUP as the third argument. Enabling or disabling the leader of a group enables or disables the whole group; that is, while the group leader is disabled, none of the counters in the group will count. Enabling or disabling a member of a diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh index 677e59d62a8d..95b6f8b6177a 100644 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh @@ -29,13 +29,14 @@ if [ ! -s $BUILDIDS ] ; then fi MANIFEST=$(mktemp /tmp/perf-archive-manifest.XXXXXX) +PERF_BUILDID_LINKDIR=$(readlink -f $PERF_BUILDID_DIR)/ cut -d ' ' -f 1 $BUILDIDS | \ while read build_id ; do linkname=$PERF_BUILDID_DIR.build-id/${build_id:0:2}/${build_id:2} filename=$(readlink -f $linkname) echo ${linkname#$PERF_BUILDID_DIR} >> $MANIFEST - echo ${filename#$PERF_BUILDID_DIR} >> $MANIFEST + echo ${filename#$PERF_BUILDID_LINKDIR} >> $MANIFEST done tar cfj $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST diff --git a/tools/perf/perf.c b/tools/perf/perf.c index ec635b7cc8ea..2b2e225a4d4c 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -29,8 +29,6 @@ struct pager_config { int val; }; -static char debugfs_mntpt[MAXPATHLEN]; - static int pager_command_config(const char *var, const char *value, void *data) { struct pager_config *c = data; @@ -81,15 +79,6 @@ static void commit_pager_choice(void) } } -static void set_debugfs_path(void) -{ - char *path; - - path = getenv(PERF_DEBUGFS_ENVIRONMENT); - snprintf(debugfs_path, MAXPATHLEN, "%s/%s", path ?: debugfs_mntpt, - "tracing/events"); -} - static int handle_options(const char ***argv, int *argc, int *envchanged) { int handled = 0; @@ -161,15 +150,14 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) fprintf(stderr, "No directory given for --debugfs-dir.\n"); usage(perf_usage_string); } - strncpy(debugfs_mntpt, (*argv)[1], MAXPATHLEN); - debugfs_mntpt[MAXPATHLEN - 1] = '\0'; + debugfs_set_path((*argv)[1]); if (envchanged) *envchanged = 1; (*argv)++; (*argc)--; } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { - strncpy(debugfs_mntpt, cmd + strlen(CMD_DEBUGFS_DIR), MAXPATHLEN); - debugfs_mntpt[MAXPATHLEN - 1] = '\0'; + debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR)); + fprintf(stderr, "dir: %s\n", debugfs_mountpoint); if (envchanged) *envchanged = 1; } else { @@ -281,7 +269,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) if (use_pager == -1 && p->option & USE_PAGER) use_pager = 1; commit_pager_choice(); - set_debugfs_path(); status = p->fn(argc, argv, prefix); exit_browser(status); @@ -416,15 +403,22 @@ static int run_argv(int *argcp, const char ***argv) return done_alias; } -/* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */ -static void get_debugfs_mntpt(void) +static void pthread__block_sigwinch(void) { - const char *path = debugfs_mount(NULL); + sigset_t set; - if (path) - strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt)); - else - debugfs_mntpt[0] = '\0'; + sigemptyset(&set); + sigaddset(&set, SIGWINCH); + pthread_sigmask(SIG_BLOCK, &set, NULL); +} + +void pthread__unblock_sigwinch(void) +{ + sigset_t set; + + sigemptyset(&set); + sigaddset(&set, SIGWINCH); + pthread_sigmask(SIG_UNBLOCK, &set, NULL); } int main(int argc, const char **argv) @@ -435,7 +429,7 @@ int main(int argc, const char **argv) if (!cmd) cmd = "perf-help"; /* get debugfs mount point from /proc/mounts */ - get_debugfs_mntpt(); + debugfs_mount(NULL); /* * "perf-xxxx" is the same as "perf xxxx", but we obviously: * @@ -458,7 +452,6 @@ int main(int argc, const char **argv) argc--; handle_options(&argv, &argc, NULL); commit_pager_choice(); - set_debugfs_path(); set_buildid_dir(); if (argc > 0) { @@ -480,6 +473,12 @@ int main(int argc, const char **argv) * time. */ setup_path(); + /* + * Block SIGWINCH notifications so that the thread that wants it can + * unblock and get syscalls like select interrupted instead of waiting + * forever while the signal goes to some other non interested thread. + */ + pthread__block_sigwinch(); while (1) { static int done_help; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index a5fc660c1f12..f960ccb2edc6 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -9,18 +9,27 @@ void get_term_dimensions(struct winsize *ws); #include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#define CPUINFO_PROC "model name" +#ifndef __NR_perf_event_open +# define __NR_perf_event_open 336 +#endif #endif #if defined(__x86_64__) #include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lfence" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#define CPUINFO_PROC "model name" +#ifndef __NR_perf_event_open +# define __NR_perf_event_open 298 +#endif #endif #ifdef __powerpc__ #include "../../arch/powerpc/include/asm/unistd.h" #define rmb() asm volatile ("sync" ::: "memory") #define cpu_relax() asm volatile ("" ::: "memory"); +#define CPUINFO_PROC "cpu" #endif #ifdef __s390__ @@ -37,30 +46,35 @@ void get_term_dimensions(struct winsize *ws); # define rmb() asm volatile("" ::: "memory") #endif #define cpu_relax() asm volatile("" ::: "memory") +#define CPUINFO_PROC "cpu type" #endif #ifdef __hppa__ #include "../../arch/parisc/include/asm/unistd.h" #define rmb() asm volatile("" ::: "memory") #define cpu_relax() asm volatile("" ::: "memory"); +#define CPUINFO_PROC "cpu" #endif #ifdef __sparc__ #include "../../arch/sparc/include/asm/unistd.h" #define rmb() asm volatile("":::"memory") #define cpu_relax() asm volatile("":::"memory") +#define CPUINFO_PROC "cpu" #endif #ifdef __alpha__ #include "../../arch/alpha/include/asm/unistd.h" #define rmb() asm volatile("mb" ::: "memory") #define cpu_relax() asm volatile("" ::: "memory") +#define CPUINFO_PROC "cpu model" #endif #ifdef __ia64__ #include "../../arch/ia64/include/asm/unistd.h" #define rmb() asm volatile ("mf" ::: "memory") #define cpu_relax() asm volatile ("hint @pause" ::: "memory") +#define CPUINFO_PROC "model name" #endif #ifdef __arm__ @@ -71,6 +85,7 @@ void get_term_dimensions(struct winsize *ws); */ #define rmb() ((void(*)(void))0xffff0fa0)() #define cpu_relax() asm volatile("":::"memory") +#define CPUINFO_PROC "Processor" #endif #ifdef __mips__ @@ -83,6 +98,7 @@ void get_term_dimensions(struct winsize *ws); : /* no input */ \ : "memory") #define cpu_relax() asm volatile("" ::: "memory") +#define CPUINFO_PROC "cpu model" #endif #include <time.h> @@ -157,7 +173,6 @@ sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { - attr->size = sizeof(*attr); return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } @@ -170,6 +185,51 @@ struct ip_callchain { u64 ips[0]; }; +struct branch_flags { + u64 mispred:1; + u64 predicted:1; + u64 reserved:62; +}; + +struct branch_entry { + u64 from; + u64 to; + struct branch_flags flags; +}; + +struct branch_stack { + u64 nr; + struct branch_entry entries[0]; +}; + extern bool perf_host, perf_guest; +extern const char perf_version_string[]; + +void pthread__unblock_sigwinch(void); + +#include "util/target.h" + +struct perf_record_opts { + struct perf_target target; + bool call_graph; + bool group; + bool inherit_stat; + bool no_delay; + bool no_inherit; + bool no_samples; + bool pipe_output; + bool raw_samples; + bool sample_address; + bool sample_time; + bool sample_id_all_missing; + bool exclude_guest_missing; + bool period; + unsigned int freq; + unsigned int mmap_pages; + unsigned int user_freq; + u64 branch_stack; + u64 default_interval; + u64 user_interval; +}; #endif diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py index df638c438a9f..b11cca584238 100755 --- a/tools/perf/python/twatch.py +++ b/tools/perf/python/twatch.py @@ -19,7 +19,7 @@ def main(): cpus = perf.cpu_map() threads = perf.thread_map() evsel = perf.evsel(task = 1, comm = 1, mmap = 0, - wakeup_events = 1, sample_period = 1, + wakeup_events = 1, watermark = 1, sample_id_all = 1, sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID) evsel.open(cpus = cpus, threads = threads); diff --git a/tools/perf/scripts/python/bin/net_dropmonitor-record b/tools/perf/scripts/python/bin/net_dropmonitor-record new file mode 100755 index 000000000000..423fb81dadae --- /dev/null +++ b/tools/perf/scripts/python/bin/net_dropmonitor-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -e skb:kfree_skb $@ diff --git a/tools/perf/scripts/python/bin/net_dropmonitor-report b/tools/perf/scripts/python/bin/net_dropmonitor-report new file mode 100755 index 000000000000..8d698f5a06aa --- /dev/null +++ b/tools/perf/scripts/python/bin/net_dropmonitor-report @@ -0,0 +1,4 @@ +#!/bin/bash +# description: display a table of dropped frames + +perf script -s "$PERF_EXEC_PATH"/scripts/python/net_dropmonitor.py $@ diff --git a/tools/perf/scripts/python/net_dropmonitor.py b/tools/perf/scripts/python/net_dropmonitor.py new file mode 100755 index 000000000000..a4ffc9500023 --- /dev/null +++ b/tools/perf/scripts/python/net_dropmonitor.py @@ -0,0 +1,72 @@ +# Monitor the system for dropped packets and proudce a report of drop locations and counts + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * +from Util import * + +drop_log = {} +kallsyms = [] + +def get_kallsyms_table(): + global kallsyms + try: + f = open("/proc/kallsyms", "r") + linecount = 0 + for line in f: + linecount = linecount+1 + f.seek(0) + except: + return + + + j = 0 + for line in f: + loc = int(line.split()[0], 16) + name = line.split()[2] + j = j +1 + if ((j % 100) == 0): + print "\r" + str(j) + "/" + str(linecount), + kallsyms.append({ 'loc': loc, 'name' : name}) + + print "\r" + str(j) + "/" + str(linecount) + kallsyms.sort() + return + +def get_sym(sloc): + loc = int(sloc) + for i in kallsyms: + if (i['loc'] >= loc): + return (i['name'], i['loc']-loc) + return (None, 0) + +def print_drop_table(): + print "%25s %25s %25s" % ("LOCATION", "OFFSET", "COUNT") + for i in drop_log.keys(): + (sym, off) = get_sym(i) + if sym == None: + sym = i + print "%25s %25s %25s" % (sym, off, drop_log[i]) + + +def trace_begin(): + print "Starting trace (Ctrl-C to dump results)" + +def trace_end(): + print "Gathering kallsyms data" + get_kallsyms_table() + print_drop_table() + +# called from perf, when it finds a correspoinding event +def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, + skbaddr, protocol, location): + slocation = str(location) + try: + drop_log[slocation] = drop_log[slocation] + 1 + except: + drop_log[slocation] = 1 diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c new file mode 100644 index 000000000000..1818a531f1d3 --- /dev/null +++ b/tools/perf/ui/browser.c @@ -0,0 +1,713 @@ +#include "../util.h" +#include "../cache.h" +#include "../../perf.h" +#include "libslang.h" +#include <newt.h> +#include "ui.h" +#include "util.h" +#include <linux/compiler.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <stdlib.h> +#include <sys/ttydefaults.h> +#include "browser.h" +#include "helpline.h" +#include "keysyms.h" +#include "../color.h" + +static int ui_browser__percent_color(struct ui_browser *browser, + double percent, bool current) +{ + if (current && (!browser->use_navkeypressed || browser->navkeypressed)) + return HE_COLORSET_SELECTED; + if (percent >= MIN_RED) + return HE_COLORSET_TOP; + if (percent >= MIN_GREEN) + return HE_COLORSET_MEDIUM; + return HE_COLORSET_NORMAL; +} + +int ui_browser__set_color(struct ui_browser *browser, int color) +{ + int ret = browser->current_color; + browser->current_color = color; + SLsmg_set_color(color); + return ret; +} + +void ui_browser__set_percent_color(struct ui_browser *browser, + double percent, bool current) +{ + int color = ui_browser__percent_color(browser, percent, current); + ui_browser__set_color(browser, color); +} + +void ui_browser__gotorc(struct ui_browser *browser, int y, int x) +{ + SLsmg_gotorc(browser->y + y, browser->x + x); +} + +static struct list_head * +ui_browser__list_head_filter_entries(struct ui_browser *browser, + struct list_head *pos) +{ + do { + if (!browser->filter || !browser->filter(browser, pos)) + return pos; + pos = pos->next; + } while (pos != browser->entries); + + return NULL; +} + +static struct list_head * +ui_browser__list_head_filter_prev_entries(struct ui_browser *browser, + struct list_head *pos) +{ + do { + if (!browser->filter || !browser->filter(browser, pos)) + return pos; + pos = pos->prev; + } while (pos != browser->entries); + + return NULL; +} + +void ui_browser__list_head_seek(struct ui_browser *browser, off_t offset, int whence) +{ + struct list_head *head = browser->entries; + struct list_head *pos; + + if (browser->nr_entries == 0) + return; + + switch (whence) { + case SEEK_SET: + pos = ui_browser__list_head_filter_entries(browser, head->next); + break; + case SEEK_CUR: + pos = browser->top; + break; + case SEEK_END: + pos = ui_browser__list_head_filter_prev_entries(browser, head->prev); + break; + default: + return; + } + + assert(pos != NULL); + + if (offset > 0) { + while (offset-- != 0) + pos = ui_browser__list_head_filter_entries(browser, pos->next); + } else { + while (offset++ != 0) + pos = ui_browser__list_head_filter_prev_entries(browser, pos->prev); + } + + browser->top = pos; +} + +void ui_browser__rb_tree_seek(struct ui_browser *browser, off_t offset, int whence) +{ + struct rb_root *root = browser->entries; + struct rb_node *nd; + + switch (whence) { + case SEEK_SET: + nd = rb_first(root); + break; + case SEEK_CUR: + nd = browser->top; + break; + case SEEK_END: + nd = rb_last(root); + break; + default: + return; + } + + if (offset > 0) { + while (offset-- != 0) + nd = rb_next(nd); + } else { + while (offset++ != 0) + nd = rb_prev(nd); + } + + browser->top = nd; +} + +unsigned int ui_browser__rb_tree_refresh(struct ui_browser *browser) +{ + struct rb_node *nd; + int row = 0; + + if (browser->top == NULL) + browser->top = rb_first(browser->entries); + + nd = browser->top; + + while (nd != NULL) { + ui_browser__gotorc(browser, row, 0); + browser->write(browser, nd, row); + if (++row == browser->height) + break; + nd = rb_next(nd); + } + + return row; +} + +bool ui_browser__is_current_entry(struct ui_browser *browser, unsigned row) +{ + return browser->top_idx + row == browser->index; +} + +void ui_browser__refresh_dimensions(struct ui_browser *browser) +{ + browser->width = SLtt_Screen_Cols - 1; + browser->height = SLtt_Screen_Rows - 2; + browser->y = 1; + browser->x = 0; +} + +void ui_browser__handle_resize(struct ui_browser *browser) +{ + ui__refresh_dimensions(false); + ui_browser__show(browser, browser->title, ui_helpline__current); + ui_browser__refresh(browser); +} + +int ui_browser__warning(struct ui_browser *browser, int timeout, + const char *format, ...) +{ + va_list args; + char *text; + int key = 0, err; + + va_start(args, format); + err = vasprintf(&text, format, args); + va_end(args); + + if (err < 0) { + va_start(args, format); + ui_helpline__vpush(format, args); + va_end(args); + } else { + while ((key == ui__question_window("Warning!", text, + "Press any key...", + timeout)) == K_RESIZE) + ui_browser__handle_resize(browser); + free(text); + } + + return key; +} + +int ui_browser__help_window(struct ui_browser *browser, const char *text) +{ + int key; + + while ((key = ui__help_window(text)) == K_RESIZE) + ui_browser__handle_resize(browser); + + return key; +} + +bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text) +{ + int key; + + while ((key = ui__dialog_yesno(text)) == K_RESIZE) + ui_browser__handle_resize(browser); + + return key == K_ENTER || toupper(key) == 'Y'; +} + +void ui_browser__reset_index(struct ui_browser *browser) +{ + browser->index = browser->top_idx = 0; + browser->seek(browser, 0, SEEK_SET); +} + +void __ui_browser__show_title(struct ui_browser *browser, const char *title) +{ + SLsmg_gotorc(0, 0); + ui_browser__set_color(browser, NEWT_COLORSET_ROOT); + slsmg_write_nstring(title, browser->width + 1); +} + +void ui_browser__show_title(struct ui_browser *browser, const char *title) +{ + pthread_mutex_lock(&ui__lock); + __ui_browser__show_title(browser, title); + pthread_mutex_unlock(&ui__lock); +} + +int ui_browser__show(struct ui_browser *browser, const char *title, + const char *helpline, ...) +{ + int err; + va_list ap; + + ui_browser__refresh_dimensions(browser); + + pthread_mutex_lock(&ui__lock); + __ui_browser__show_title(browser, title); + + browser->title = title; + free(browser->helpline); + browser->helpline = NULL; + + va_start(ap, helpline); + err = vasprintf(&browser->helpline, helpline, ap); + va_end(ap); + if (err > 0) + ui_helpline__push(browser->helpline); + pthread_mutex_unlock(&ui__lock); + return err ? 0 : -1; +} + +void ui_browser__hide(struct ui_browser *browser __used) +{ + pthread_mutex_lock(&ui__lock); + ui_helpline__pop(); + pthread_mutex_unlock(&ui__lock); +} + +static void ui_browser__scrollbar_set(struct ui_browser *browser) +{ + int height = browser->height, h = 0, pct = 0, + col = browser->width, + row = browser->y - 1; + + if (browser->nr_entries > 1) { + pct = ((browser->index * (browser->height - 1)) / + (browser->nr_entries - 1)); + } + + SLsmg_set_char_set(1); + + while (h < height) { + ui_browser__gotorc(browser, row++, col); + SLsmg_write_char(h == pct ? SLSMG_DIAMOND_CHAR : SLSMG_CKBRD_CHAR); + ++h; + } + + SLsmg_set_char_set(0); +} + +static int __ui_browser__refresh(struct ui_browser *browser) +{ + int row; + int width = browser->width; + + row = browser->refresh(browser); + ui_browser__set_color(browser, HE_COLORSET_NORMAL); + + if (!browser->use_navkeypressed || browser->navkeypressed) + ui_browser__scrollbar_set(browser); + else + width += 1; + + SLsmg_fill_region(browser->y + row, browser->x, + browser->height - row, width, ' '); + + return 0; +} + +int ui_browser__refresh(struct ui_browser *browser) +{ + pthread_mutex_lock(&ui__lock); + __ui_browser__refresh(browser); + pthread_mutex_unlock(&ui__lock); + + return 0; +} + +/* + * Here we're updating nr_entries _after_ we started browsing, i.e. we have to + * forget about any reference to any entry in the underlying data structure, + * that is why we do a SEEK_SET. Think about 'perf top' in the hists browser + * after an output_resort and hist decay. + */ +void ui_browser__update_nr_entries(struct ui_browser *browser, u32 nr_entries) +{ + off_t offset = nr_entries - browser->nr_entries; + + browser->nr_entries = nr_entries; + + if (offset < 0) { + if (browser->top_idx < (u64)-offset) + offset = -browser->top_idx; + + browser->index += offset; + browser->top_idx += offset; + } + + browser->top = NULL; + browser->seek(browser, browser->top_idx, SEEK_SET); +} + +int ui_browser__run(struct ui_browser *browser, int delay_secs) +{ + int err, key; + + while (1) { + off_t offset; + + pthread_mutex_lock(&ui__lock); + err = __ui_browser__refresh(browser); + SLsmg_refresh(); + pthread_mutex_unlock(&ui__lock); + if (err < 0) + break; + + key = ui__getch(delay_secs); + + if (key == K_RESIZE) { + ui__refresh_dimensions(false); + ui_browser__refresh_dimensions(browser); + __ui_browser__show_title(browser, browser->title); + ui_helpline__puts(browser->helpline); + continue; + } + + if (browser->use_navkeypressed && !browser->navkeypressed) { + if (key == K_DOWN || key == K_UP || + key == K_PGDN || key == K_PGUP || + key == K_HOME || key == K_END || + key == ' ') { + browser->navkeypressed = true; + continue; + } else + return key; + } + + switch (key) { + case K_DOWN: + if (browser->index == browser->nr_entries - 1) + break; + ++browser->index; + if (browser->index == browser->top_idx + browser->height) { + ++browser->top_idx; + browser->seek(browser, +1, SEEK_CUR); + } + break; + case K_UP: + if (browser->index == 0) + break; + --browser->index; + if (browser->index < browser->top_idx) { + --browser->top_idx; + browser->seek(browser, -1, SEEK_CUR); + } + break; + case K_PGDN: + case ' ': + if (browser->top_idx + browser->height > browser->nr_entries - 1) + break; + + offset = browser->height; + if (browser->index + offset > browser->nr_entries - 1) + offset = browser->nr_entries - 1 - browser->index; + browser->index += offset; + browser->top_idx += offset; + browser->seek(browser, +offset, SEEK_CUR); + break; + case K_PGUP: + if (browser->top_idx == 0) + break; + + if (browser->top_idx < browser->height) + offset = browser->top_idx; + else + offset = browser->height; + + browser->index -= offset; + browser->top_idx -= offset; + browser->seek(browser, -offset, SEEK_CUR); + break; + case K_HOME: + ui_browser__reset_index(browser); + break; + case K_END: + offset = browser->height - 1; + if (offset >= browser->nr_entries) + offset = browser->nr_entries - 1; + + browser->index = browser->nr_entries - 1; + browser->top_idx = browser->index - offset; + browser->seek(browser, -offset, SEEK_END); + break; + default: + return key; + } + } + return -1; +} + +unsigned int ui_browser__list_head_refresh(struct ui_browser *browser) +{ + struct list_head *pos; + struct list_head *head = browser->entries; + int row = 0; + + if (browser->top == NULL || browser->top == browser->entries) + browser->top = ui_browser__list_head_filter_entries(browser, head->next); + + pos = browser->top; + + list_for_each_from(pos, head) { + if (!browser->filter || !browser->filter(browser, pos)) { + ui_browser__gotorc(browser, row, 0); + browser->write(browser, pos, row); + if (++row == browser->height) + break; + } + } + + return row; +} + +static struct ui_browser__colorset { + const char *name, *fg, *bg; + int colorset; +} ui_browser__colorsets[] = { + { + .colorset = HE_COLORSET_TOP, + .name = "top", + .fg = "red", + .bg = "default", + }, + { + .colorset = HE_COLORSET_MEDIUM, + .name = "medium", + .fg = "green", + .bg = "default", + }, + { + .colorset = HE_COLORSET_NORMAL, + .name = "normal", + .fg = "default", + .bg = "default", + }, + { + .colorset = HE_COLORSET_SELECTED, + .name = "selected", + .fg = "black", + .bg = "lightgray", + }, + { + .colorset = HE_COLORSET_CODE, + .name = "code", + .fg = "blue", + .bg = "default", + }, + { + .colorset = HE_COLORSET_ADDR, + .name = "addr", + .fg = "magenta", + .bg = "default", + }, + { + .name = NULL, + } +}; + + +static int ui_browser__color_config(const char *var, const char *value, + void *data __used) +{ + char *fg = NULL, *bg; + int i; + + /* same dir for all commands */ + if (prefixcmp(var, "colors.") != 0) + return 0; + + for (i = 0; ui_browser__colorsets[i].name != NULL; ++i) { + const char *name = var + 7; + + if (strcmp(ui_browser__colorsets[i].name, name) != 0) + continue; + + fg = strdup(value); + if (fg == NULL) + break; + + bg = strchr(fg, ','); + if (bg == NULL) + break; + + *bg = '\0'; + while (isspace(*++bg)); + ui_browser__colorsets[i].bg = bg; + ui_browser__colorsets[i].fg = fg; + return 0; + } + + free(fg); + return -1; +} + +void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence) +{ + switch (whence) { + case SEEK_SET: + browser->top = browser->entries; + break; + case SEEK_CUR: + browser->top = browser->top + browser->top_idx + offset; + break; + case SEEK_END: + browser->top = browser->top + browser->nr_entries + offset; + break; + default: + return; + } +} + +unsigned int ui_browser__argv_refresh(struct ui_browser *browser) +{ + unsigned int row = 0, idx = browser->top_idx; + char **pos; + + if (browser->top == NULL) + browser->top = browser->entries; + + pos = (char **)browser->top; + while (idx < browser->nr_entries) { + if (!browser->filter || !browser->filter(browser, *pos)) { + ui_browser__gotorc(browser, row, 0); + browser->write(browser, pos, row); + if (++row == browser->height) + break; + } + + ++idx; + ++pos; + } + + return row; +} + +void __ui_browser__vline(struct ui_browser *browser, unsigned int column, + u16 start, u16 end) +{ + SLsmg_set_char_set(1); + ui_browser__gotorc(browser, start, column); + SLsmg_draw_vline(end - start + 1); + SLsmg_set_char_set(0); +} + +void ui_browser__write_graph(struct ui_browser *browser __used, int graph) +{ + SLsmg_set_char_set(1); + SLsmg_write_char(graph); + SLsmg_set_char_set(0); +} + +static void __ui_browser__line_arrow_up(struct ui_browser *browser, + unsigned int column, + u64 start, u64 end) +{ + unsigned int row, end_row; + + SLsmg_set_char_set(1); + + if (start < browser->top_idx + browser->height) { + row = start - browser->top_idx; + ui_browser__gotorc(browser, row, column); + SLsmg_write_char(SLSMG_LLCORN_CHAR); + ui_browser__gotorc(browser, row, column + 1); + SLsmg_draw_hline(2); + + if (row-- == 0) + goto out; + } else + row = browser->height - 1; + + if (end > browser->top_idx) + end_row = end - browser->top_idx; + else + end_row = 0; + + ui_browser__gotorc(browser, end_row, column); + SLsmg_draw_vline(row - end_row + 1); + + ui_browser__gotorc(browser, end_row, column); + if (end >= browser->top_idx) { + SLsmg_write_char(SLSMG_ULCORN_CHAR); + ui_browser__gotorc(browser, end_row, column + 1); + SLsmg_write_char(SLSMG_HLINE_CHAR); + ui_browser__gotorc(browser, end_row, column + 2); + SLsmg_write_char(SLSMG_RARROW_CHAR); + } +out: + SLsmg_set_char_set(0); +} + +static void __ui_browser__line_arrow_down(struct ui_browser *browser, + unsigned int column, + u64 start, u64 end) +{ + unsigned int row, end_row; + + SLsmg_set_char_set(1); + + if (start >= browser->top_idx) { + row = start - browser->top_idx; + ui_browser__gotorc(browser, row, column); + SLsmg_write_char(SLSMG_ULCORN_CHAR); + ui_browser__gotorc(browser, row, column + 1); + SLsmg_draw_hline(2); + + if (row++ == 0) + goto out; + } else + row = 0; + + if (end >= browser->top_idx + browser->height) + end_row = browser->height - 1; + else + end_row = end - browser->top_idx;; + + ui_browser__gotorc(browser, row, column); + SLsmg_draw_vline(end_row - row + 1); + + ui_browser__gotorc(browser, end_row, column); + if (end < browser->top_idx + browser->height) { + SLsmg_write_char(SLSMG_LLCORN_CHAR); + ui_browser__gotorc(browser, end_row, column + 1); + SLsmg_write_char(SLSMG_HLINE_CHAR); + ui_browser__gotorc(browser, end_row, column + 2); + SLsmg_write_char(SLSMG_RARROW_CHAR); + } +out: + SLsmg_set_char_set(0); +} + +void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, + u64 start, u64 end) +{ + if (start > end) + __ui_browser__line_arrow_up(browser, column, start, end); + else + __ui_browser__line_arrow_down(browser, column, start, end); +} + +void ui_browser__init(void) +{ + int i = 0; + + perf_config(ui_browser__color_config, NULL); + + while (ui_browser__colorsets[i].name) { + struct ui_browser__colorset *c = &ui_browser__colorsets[i++]; + sltt_set_color(c->colorset, c->name, c->fg, c->bg); + } + + annotate_browser__init(); +} diff --git a/tools/perf/util/ui/browser.h b/tools/perf/ui/browser.h index fc63dda10910..af70314605e5 100644 --- a/tools/perf/util/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -2,7 +2,6 @@ #define _PERF_UI_BROWSER_H_ 1 #include <stdbool.h> -#include <newt.h> #include <sys/types.h> #include "../types.h" @@ -11,20 +10,26 @@ #define HE_COLORSET_NORMAL 52 #define HE_COLORSET_SELECTED 53 #define HE_COLORSET_CODE 54 +#define HE_COLORSET_ADDR 55 struct ui_browser { - newtComponent form, sb; u64 index, top_idx; void *top, *entries; u16 y, x, width, height; + int current_color; void *priv; + const char *title; + char *helpline; unsigned int (*refresh)(struct ui_browser *self); void (*write)(struct ui_browser *self, void *entry, int row); void (*seek)(struct ui_browser *self, off_t offset, int whence); + bool (*filter)(struct ui_browser *self, void *entry); u32 nr_entries; + bool navkeypressed; + bool use_navkeypressed; }; -void ui_browser__set_color(struct ui_browser *self, int color); +int ui_browser__set_color(struct ui_browser *browser, int color); void ui_browser__set_percent_color(struct ui_browser *self, double percent, bool current); bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row); @@ -32,15 +37,30 @@ void ui_browser__refresh_dimensions(struct ui_browser *self); void ui_browser__reset_index(struct ui_browser *self); void ui_browser__gotorc(struct ui_browser *self, int y, int x); -void ui_browser__add_exit_key(struct ui_browser *self, int key); -void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]); +void ui_browser__write_graph(struct ui_browser *browser, int graph); +void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, + u64 start, u64 end); void __ui_browser__show_title(struct ui_browser *browser, const char *title); void ui_browser__show_title(struct ui_browser *browser, const char *title); int ui_browser__show(struct ui_browser *self, const char *title, const char *helpline, ...); void ui_browser__hide(struct ui_browser *self); int ui_browser__refresh(struct ui_browser *self); -int ui_browser__run(struct ui_browser *self); +int ui_browser__run(struct ui_browser *browser, int delay_secs); +void ui_browser__update_nr_entries(struct ui_browser *browser, u32 nr_entries); +void ui_browser__handle_resize(struct ui_browser *browser); +void __ui_browser__vline(struct ui_browser *browser, unsigned int column, + u16 start, u16 end); + +int ui_browser__warning(struct ui_browser *browser, int timeout, + const char *format, ...); +int ui_browser__help_window(struct ui_browser *browser, const char *text); +bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text); +int ui_browser__input_window(const char *title, const char *text, char *input, + const char *exit_msg, int delay_sec); + +void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence); +unsigned int ui_browser__argv_refresh(struct ui_browser *browser); void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence); unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self); @@ -49,4 +69,5 @@ void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whenc unsigned int ui_browser__list_head_refresh(struct ui_browser *self); void ui_browser__init(void); +void annotate_browser__init(void); #endif /* _PERF_UI_BROWSER_H_ */ diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c new file mode 100644 index 000000000000..67a2703e666a --- /dev/null +++ b/tools/perf/ui/browsers/annotate.c @@ -0,0 +1,953 @@ +#include "../../util/util.h" +#include "../browser.h" +#include "../helpline.h" +#include "../libslang.h" +#include "../ui.h" +#include "../util.h" +#include "../../util/annotate.h" +#include "../../util/hist.h" +#include "../../util/sort.h" +#include "../../util/symbol.h" +#include <pthread.h> +#include <newt.h> + +struct browser_disasm_line { + struct rb_node rb_node; + double percent; + u32 idx; + int idx_asm; + int jump_sources; +}; + +static struct annotate_browser_opt { + bool hide_src_code, + use_offset, + jump_arrows, + show_nr_jumps; +} annotate_browser__opts = { + .use_offset = true, + .jump_arrows = true, +}; + +struct annotate_browser { + struct ui_browser b; + struct rb_root entries; + struct rb_node *curr_hot; + struct disasm_line *selection; + struct disasm_line **offsets; + u64 start; + int nr_asm_entries; + int nr_entries; + int max_jump_sources; + int nr_jumps; + bool searching_backwards; + u8 addr_width; + u8 jumps_width; + u8 target_width; + u8 min_addr_width; + u8 max_addr_width; + char search_bf[128]; +}; + +static inline struct browser_disasm_line *disasm_line__browser(struct disasm_line *dl) +{ + return (struct browser_disasm_line *)(dl + 1); +} + +static bool disasm_line__filter(struct ui_browser *browser __used, void *entry) +{ + if (annotate_browser__opts.hide_src_code) { + struct disasm_line *dl = list_entry(entry, struct disasm_line, node); + return dl->offset == -1; + } + + return false; +} + +static int annotate_browser__jumps_percent_color(struct annotate_browser *browser, + int nr, bool current) +{ + if (current && (!browser->b.use_navkeypressed || browser->b.navkeypressed)) + return HE_COLORSET_SELECTED; + if (nr == browser->max_jump_sources) + return HE_COLORSET_TOP; + if (nr > 1) + return HE_COLORSET_MEDIUM; + return HE_COLORSET_NORMAL; +} + +static int annotate_browser__set_jumps_percent_color(struct annotate_browser *browser, + int nr, bool current) +{ + int color = annotate_browser__jumps_percent_color(browser, nr, current); + return ui_browser__set_color(&browser->b, color); +} + +static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) +{ + struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); + struct disasm_line *dl = list_entry(entry, struct disasm_line, node); + struct browser_disasm_line *bdl = disasm_line__browser(dl); + bool current_entry = ui_browser__is_current_entry(browser, row); + bool change_color = (!annotate_browser__opts.hide_src_code && + (!current_entry || (browser->use_navkeypressed && + !browser->navkeypressed))); + int width = browser->width, printed; + char bf[256]; + + if (dl->offset != -1 && bdl->percent != 0.0) { + ui_browser__set_percent_color(browser, bdl->percent, current_entry); + slsmg_printf("%6.2f ", bdl->percent); + } else { + ui_browser__set_percent_color(browser, 0, current_entry); + slsmg_write_nstring(" ", 7); + } + + SLsmg_write_char(' '); + + /* The scroll bar isn't being used */ + if (!browser->navkeypressed) + width += 1; + + if (!*dl->line) + slsmg_write_nstring(" ", width - 7); + else if (dl->offset == -1) { + printed = scnprintf(bf, sizeof(bf), "%*s ", + ab->addr_width, " "); + slsmg_write_nstring(bf, printed); + slsmg_write_nstring(dl->line, width - printed - 6); + } else { + u64 addr = dl->offset; + int color = -1; + + if (!annotate_browser__opts.use_offset) + addr += ab->start; + + if (!annotate_browser__opts.use_offset) { + printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); + } else { + if (bdl->jump_sources) { + if (annotate_browser__opts.show_nr_jumps) { + int prev; + printed = scnprintf(bf, sizeof(bf), "%*d ", + ab->jumps_width, + bdl->jump_sources); + prev = annotate_browser__set_jumps_percent_color(ab, bdl->jump_sources, + current_entry); + slsmg_write_nstring(bf, printed); + ui_browser__set_color(browser, prev); + } + + printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ", + ab->target_width, addr); + } else { + printed = scnprintf(bf, sizeof(bf), "%*s ", + ab->addr_width, " "); + } + } + + if (change_color) + color = ui_browser__set_color(browser, HE_COLORSET_ADDR); + slsmg_write_nstring(bf, printed); + if (change_color) + ui_browser__set_color(browser, color); + if (dl->ins && dl->ins->ops->scnprintf) { + if (ins__is_jump(dl->ins)) { + bool fwd = dl->ops.target.offset > (u64)dl->offset; + + ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : + SLSMG_UARROW_CHAR); + SLsmg_write_char(' '); + } else if (ins__is_call(dl->ins)) { + ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); + SLsmg_write_char(' '); + } else { + slsmg_write_nstring(" ", 2); + } + } else { + if (strcmp(dl->name, "retq")) { + slsmg_write_nstring(" ", 2); + } else { + ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); + SLsmg_write_char(' '); + } + } + + disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset); + slsmg_write_nstring(bf, width - 10 - printed); + } + + if (current_entry) + ab->selection = dl; +} + +static void annotate_browser__draw_current_jump(struct ui_browser *browser) +{ + struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); + struct disasm_line *cursor = ab->selection, *target; + struct browser_disasm_line *btarget, *bcursor; + unsigned int from, to; + + if (!cursor || !cursor->ins || !ins__is_jump(cursor->ins) || + !disasm_line__has_offset(cursor)) + return; + + target = ab->offsets[cursor->ops.target.offset]; + if (!target) + return; + + bcursor = disasm_line__browser(cursor); + btarget = disasm_line__browser(target); + + if (annotate_browser__opts.hide_src_code) { + from = bcursor->idx_asm; + to = btarget->idx_asm; + } else { + from = (u64)bcursor->idx; + to = (u64)btarget->idx; + } + + ui_browser__set_color(browser, HE_COLORSET_CODE); + __ui_browser__line_arrow(browser, 9 + ab->addr_width, from, to); +} + +static unsigned int annotate_browser__refresh(struct ui_browser *browser) +{ + int ret = ui_browser__list_head_refresh(browser); + + if (annotate_browser__opts.jump_arrows) + annotate_browser__draw_current_jump(browser); + + ui_browser__set_color(browser, HE_COLORSET_NORMAL); + __ui_browser__vline(browser, 7, 0, browser->height - 1); + return ret; +} + +static double disasm_line__calc_percent(struct disasm_line *dl, struct symbol *sym, int evidx) +{ + double percent = 0.0; + + if (dl->offset != -1) { + int len = sym->end - sym->start; + unsigned int hits = 0; + struct annotation *notes = symbol__annotation(sym); + struct source_line *src_line = notes->src->lines; + struct sym_hist *h = annotation__histogram(notes, evidx); + s64 offset = dl->offset; + struct disasm_line *next; + + next = disasm__get_next_ip_line(¬es->src->source, dl); + while (offset < (s64)len && + (next == NULL || offset < next->offset)) { + if (src_line) { + percent += src_line[offset].percent; + } else + hits += h->addr[offset]; + + ++offset; + } + /* + * If the percentage wasn't already calculated in + * symbol__get_source_line, do it now: + */ + if (src_line == NULL && h->sum) + percent = 100.0 * hits / h->sum; + } + + return percent; +} + +static void disasm_rb_tree__insert(struct rb_root *root, struct browser_disasm_line *bdl) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct browser_disasm_line *l; + + while (*p != NULL) { + parent = *p; + l = rb_entry(parent, struct browser_disasm_line, rb_node); + if (bdl->percent < l->percent) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&bdl->rb_node, parent, p); + rb_insert_color(&bdl->rb_node, root); +} + +static void annotate_browser__set_top(struct annotate_browser *browser, + struct disasm_line *pos, u32 idx) +{ + unsigned back; + + ui_browser__refresh_dimensions(&browser->b); + back = browser->b.height / 2; + browser->b.top_idx = browser->b.index = idx; + + while (browser->b.top_idx != 0 && back != 0) { + pos = list_entry(pos->node.prev, struct disasm_line, node); + + if (disasm_line__filter(&browser->b, &pos->node)) + continue; + + --browser->b.top_idx; + --back; + } + + browser->b.top = pos; + browser->b.navkeypressed = true; +} + +static void annotate_browser__set_rb_top(struct annotate_browser *browser, + struct rb_node *nd) +{ + struct browser_disasm_line *bpos; + struct disasm_line *pos; + u32 idx; + + bpos = rb_entry(nd, struct browser_disasm_line, rb_node); + pos = ((struct disasm_line *)bpos) - 1; + idx = bpos->idx; + if (annotate_browser__opts.hide_src_code) + idx = bpos->idx_asm; + annotate_browser__set_top(browser, pos, idx); + browser->curr_hot = nd; +} + +static void annotate_browser__calc_percent(struct annotate_browser *browser, + int evidx) +{ + struct map_symbol *ms = browser->b.priv; + struct symbol *sym = ms->sym; + struct annotation *notes = symbol__annotation(sym); + struct disasm_line *pos; + + browser->entries = RB_ROOT; + + pthread_mutex_lock(¬es->lock); + + list_for_each_entry(pos, ¬es->src->source, node) { + struct browser_disasm_line *bpos = disasm_line__browser(pos); + bpos->percent = disasm_line__calc_percent(pos, sym, evidx); + if (bpos->percent < 0.01) { + RB_CLEAR_NODE(&bpos->rb_node); + continue; + } + disasm_rb_tree__insert(&browser->entries, bpos); + } + pthread_mutex_unlock(¬es->lock); + + browser->curr_hot = rb_last(&browser->entries); +} + +static bool annotate_browser__toggle_source(struct annotate_browser *browser) +{ + struct disasm_line *dl; + struct browser_disasm_line *bdl; + off_t offset = browser->b.index - browser->b.top_idx; + + browser->b.seek(&browser->b, offset, SEEK_CUR); + dl = list_entry(browser->b.top, struct disasm_line, node); + bdl = disasm_line__browser(dl); + + if (annotate_browser__opts.hide_src_code) { + if (bdl->idx_asm < offset) + offset = bdl->idx; + + browser->b.nr_entries = browser->nr_entries; + annotate_browser__opts.hide_src_code = false; + browser->b.seek(&browser->b, -offset, SEEK_CUR); + browser->b.top_idx = bdl->idx - offset; + browser->b.index = bdl->idx; + } else { + if (bdl->idx_asm < 0) { + ui_helpline__puts("Only available for assembly lines."); + browser->b.seek(&browser->b, -offset, SEEK_CUR); + return false; + } + + if (bdl->idx_asm < offset) + offset = bdl->idx_asm; + + browser->b.nr_entries = browser->nr_asm_entries; + annotate_browser__opts.hide_src_code = true; + browser->b.seek(&browser->b, -offset, SEEK_CUR); + browser->b.top_idx = bdl->idx_asm - offset; + browser->b.index = bdl->idx_asm; + } + + return true; +} + +static void annotate_browser__init_asm_mode(struct annotate_browser *browser) +{ + ui_browser__reset_index(&browser->b); + browser->b.nr_entries = browser->nr_asm_entries; +} + +static bool annotate_browser__callq(struct annotate_browser *browser, + int evidx, void (*timer)(void *arg), + void *arg, int delay_secs) +{ + struct map_symbol *ms = browser->b.priv; + struct disasm_line *dl = browser->selection; + struct symbol *sym = ms->sym; + struct annotation *notes; + struct symbol *target; + u64 ip; + + if (!ins__is_call(dl->ins)) + return false; + + ip = ms->map->map_ip(ms->map, dl->ops.target.addr); + target = map__find_symbol(ms->map, ip, NULL); + if (target == NULL) { + ui_helpline__puts("The called function was not found."); + return true; + } + + notes = symbol__annotation(target); + pthread_mutex_lock(¬es->lock); + + if (notes->src == NULL && symbol__alloc_hist(target) < 0) { + pthread_mutex_unlock(¬es->lock); + ui__warning("Not enough memory for annotating '%s' symbol!\n", + target->name); + return true; + } + + pthread_mutex_unlock(¬es->lock); + symbol__tui_annotate(target, ms->map, evidx, timer, arg, delay_secs); + ui_browser__show_title(&browser->b, sym->name); + return true; +} + +static +struct disasm_line *annotate_browser__find_offset(struct annotate_browser *browser, + s64 offset, s64 *idx) +{ + struct map_symbol *ms = browser->b.priv; + struct symbol *sym = ms->sym; + struct annotation *notes = symbol__annotation(sym); + struct disasm_line *pos; + + *idx = 0; + list_for_each_entry(pos, ¬es->src->source, node) { + if (pos->offset == offset) + return pos; + if (!disasm_line__filter(&browser->b, &pos->node)) + ++*idx; + } + + return NULL; +} + +static bool annotate_browser__jump(struct annotate_browser *browser) +{ + struct disasm_line *dl = browser->selection; + s64 idx; + + if (!ins__is_jump(dl->ins)) + return false; + + dl = annotate_browser__find_offset(browser, dl->ops.target.offset, &idx); + if (dl == NULL) { + ui_helpline__puts("Invallid jump offset"); + return true; + } + + annotate_browser__set_top(browser, dl, idx); + + return true; +} + +static +struct disasm_line *annotate_browser__find_string(struct annotate_browser *browser, + char *s, s64 *idx) +{ + struct map_symbol *ms = browser->b.priv; + struct symbol *sym = ms->sym; + struct annotation *notes = symbol__annotation(sym); + struct disasm_line *pos = browser->selection; + + *idx = browser->b.index; + list_for_each_entry_continue(pos, ¬es->src->source, node) { + if (disasm_line__filter(&browser->b, &pos->node)) + continue; + + ++*idx; + + if (pos->line && strstr(pos->line, s) != NULL) + return pos; + } + + return NULL; +} + +static bool __annotate_browser__search(struct annotate_browser *browser) +{ + struct disasm_line *dl; + s64 idx; + + dl = annotate_browser__find_string(browser, browser->search_bf, &idx); + if (dl == NULL) { + ui_helpline__puts("String not found!"); + return false; + } + + annotate_browser__set_top(browser, dl, idx); + browser->searching_backwards = false; + return true; +} + +static +struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browser *browser, + char *s, s64 *idx) +{ + struct map_symbol *ms = browser->b.priv; + struct symbol *sym = ms->sym; + struct annotation *notes = symbol__annotation(sym); + struct disasm_line *pos = browser->selection; + + *idx = browser->b.index; + list_for_each_entry_continue_reverse(pos, ¬es->src->source, node) { + if (disasm_line__filter(&browser->b, &pos->node)) + continue; + + --*idx; + + if (pos->line && strstr(pos->line, s) != NULL) + return pos; + } + + return NULL; +} + +static bool __annotate_browser__search_reverse(struct annotate_browser *browser) +{ + struct disasm_line *dl; + s64 idx; + + dl = annotate_browser__find_string_reverse(browser, browser->search_bf, &idx); + if (dl == NULL) { + ui_helpline__puts("String not found!"); + return false; + } + + annotate_browser__set_top(browser, dl, idx); + browser->searching_backwards = true; + return true; +} + +static bool annotate_browser__search_window(struct annotate_browser *browser, + int delay_secs) +{ + if (ui_browser__input_window("Search", "String: ", browser->search_bf, + "ENTER: OK, ESC: Cancel", + delay_secs * 2) != K_ENTER || + !*browser->search_bf) + return false; + + return true; +} + +static bool annotate_browser__search(struct annotate_browser *browser, int delay_secs) +{ + if (annotate_browser__search_window(browser, delay_secs)) + return __annotate_browser__search(browser); + + return false; +} + +static bool annotate_browser__continue_search(struct annotate_browser *browser, + int delay_secs) +{ + if (!*browser->search_bf) + return annotate_browser__search(browser, delay_secs); + + return __annotate_browser__search(browser); +} + +static bool annotate_browser__search_reverse(struct annotate_browser *browser, + int delay_secs) +{ + if (annotate_browser__search_window(browser, delay_secs)) + return __annotate_browser__search_reverse(browser); + + return false; +} + +static +bool annotate_browser__continue_search_reverse(struct annotate_browser *browser, + int delay_secs) +{ + if (!*browser->search_bf) + return annotate_browser__search_reverse(browser, delay_secs); + + return __annotate_browser__search_reverse(browser); +} + +static void annotate_browser__update_addr_width(struct annotate_browser *browser) +{ + if (annotate_browser__opts.use_offset) + browser->target_width = browser->min_addr_width; + else + browser->target_width = browser->max_addr_width; + + browser->addr_width = browser->target_width; + + if (annotate_browser__opts.show_nr_jumps) + browser->addr_width += browser->jumps_width + 1; +} + +static int annotate_browser__run(struct annotate_browser *browser, int evidx, + void(*timer)(void *arg), + void *arg, int delay_secs) +{ + struct rb_node *nd = NULL; + struct map_symbol *ms = browser->b.priv; + struct symbol *sym = ms->sym; + const char *help = "Press 'h' for help on key bindings"; + int key; + + if (ui_browser__show(&browser->b, sym->name, help) < 0) + return -1; + + annotate_browser__calc_percent(browser, evidx); + + if (browser->curr_hot) { + annotate_browser__set_rb_top(browser, browser->curr_hot); + browser->b.navkeypressed = false; + } + + nd = browser->curr_hot; + + while (1) { + key = ui_browser__run(&browser->b, delay_secs); + + if (delay_secs != 0) { + annotate_browser__calc_percent(browser, evidx); + /* + * Current line focus got out of the list of most active + * lines, NULL it so that if TAB|UNTAB is pressed, we + * move to curr_hot (current hottest line). + */ + if (nd != NULL && RB_EMPTY_NODE(nd)) + nd = NULL; + } + + switch (key) { + case K_TIMER: + if (timer != NULL) + timer(arg); + + if (delay_secs != 0) + symbol__annotate_decay_histogram(sym, evidx); + continue; + case K_TAB: + if (nd != NULL) { + nd = rb_prev(nd); + if (nd == NULL) + nd = rb_last(&browser->entries); + } else + nd = browser->curr_hot; + break; + case K_UNTAB: + if (nd != NULL) + nd = rb_next(nd); + if (nd == NULL) + nd = rb_first(&browser->entries); + else + nd = browser->curr_hot; + break; + case K_F1: + case 'h': + ui_browser__help_window(&browser->b, + "UP/DOWN/PGUP\n" + "PGDN/SPACE Navigate\n" + "q/ESC/CTRL+C Exit\n\n" + "-> Go to target\n" + "<- Exit\n" + "H Cycle thru hottest instructions\n" + "j Toggle showing jump to target arrows\n" + "J Toggle showing number of jump sources on targets\n" + "n Search next string\n" + "o Toggle disassembler output/simplified view\n" + "s Toggle source code view\n" + "/ Search string\n" + "? Search previous string\n"); + continue; + case 'H': + nd = browser->curr_hot; + break; + case 's': + if (annotate_browser__toggle_source(browser)) + ui_helpline__puts(help); + continue; + case 'o': + annotate_browser__opts.use_offset = !annotate_browser__opts.use_offset; + annotate_browser__update_addr_width(browser); + continue; + case 'j': + annotate_browser__opts.jump_arrows = !annotate_browser__opts.jump_arrows; + continue; + case 'J': + annotate_browser__opts.show_nr_jumps = !annotate_browser__opts.show_nr_jumps; + annotate_browser__update_addr_width(browser); + continue; + case '/': + if (annotate_browser__search(browser, delay_secs)) { +show_help: + ui_helpline__puts(help); + } + continue; + case 'n': + if (browser->searching_backwards ? + annotate_browser__continue_search_reverse(browser, delay_secs) : + annotate_browser__continue_search(browser, delay_secs)) + goto show_help; + continue; + case '?': + if (annotate_browser__search_reverse(browser, delay_secs)) + goto show_help; + continue; + case 'D': { + static int seq; + ui_helpline__pop(); + ui_helpline__fpush("%d: nr_ent=%d, height=%d, idx=%d, top_idx=%d, nr_asm_entries=%d", + seq++, browser->b.nr_entries, + browser->b.height, + browser->b.index, + browser->b.top_idx, + browser->nr_asm_entries); + } + continue; + case K_ENTER: + case K_RIGHT: + if (browser->selection == NULL) + ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org"); + else if (browser->selection->offset == -1) + ui_helpline__puts("Actions are only available for assembly lines."); + else if (!browser->selection->ins) { + if (strcmp(browser->selection->name, "retq")) + goto show_sup_ins; + goto out; + } else if (!(annotate_browser__jump(browser) || + annotate_browser__callq(browser, evidx, timer, arg, delay_secs))) { +show_sup_ins: + ui_helpline__puts("Actions are only available for 'callq', 'retq' & jump instructions."); + } + continue; + case K_LEFT: + case K_ESC: + case 'q': + case CTRL('c'): + goto out; + default: + continue; + } + + if (nd != NULL) + annotate_browser__set_rb_top(browser, nd); + } +out: + ui_browser__hide(&browser->b); + return key; +} + +int hist_entry__tui_annotate(struct hist_entry *he, int evidx, + void(*timer)(void *arg), void *arg, int delay_secs) +{ + return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, + timer, arg, delay_secs); +} + +static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, + size_t size) +{ + u64 offset; + + for (offset = 0; offset < size; ++offset) { + struct disasm_line *dl = browser->offsets[offset], *dlt; + struct browser_disasm_line *bdlt; + + if (!dl || !dl->ins || !ins__is_jump(dl->ins) || + !disasm_line__has_offset(dl)) + continue; + + if (dl->ops.target.offset >= size) { + ui__error("jump to after symbol!\n" + "size: %zx, jump target: %" PRIx64, + size, dl->ops.target.offset); + continue; + } + + dlt = browser->offsets[dl->ops.target.offset]; + /* + * FIXME: Oops, no jump target? Buggy disassembler? Or do we + * have to adjust to the previous offset? + */ + if (dlt == NULL) + continue; + + bdlt = disasm_line__browser(dlt); + if (++bdlt->jump_sources > browser->max_jump_sources) + browser->max_jump_sources = bdlt->jump_sources; + + ++browser->nr_jumps; + } + +} + +static inline int width_jumps(int n) +{ + if (n >= 100) + return 5; + if (n / 10) + return 2; + return 1; +} + +int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, + void(*timer)(void *arg), void *arg, + int delay_secs) +{ + struct disasm_line *pos, *n; + struct annotation *notes; + size_t size; + struct map_symbol ms = { + .map = map, + .sym = sym, + }; + struct annotate_browser browser = { + .b = { + .refresh = annotate_browser__refresh, + .seek = ui_browser__list_head_seek, + .write = annotate_browser__write, + .filter = disasm_line__filter, + .priv = &ms, + .use_navkeypressed = true, + }, + }; + int ret = -1; + + if (sym == NULL) + return -1; + + size = symbol__size(sym); + + if (map->dso->annotate_warned) + return -1; + + browser.offsets = zalloc(size * sizeof(struct disasm_line *)); + if (browser.offsets == NULL) { + ui__error("Not enough memory!"); + return -1; + } + + if (symbol__annotate(sym, map, sizeof(struct browser_disasm_line)) < 0) { + ui__error("%s", ui_helpline__last_msg); + goto out_free_offsets; + } + + ui_helpline__push("Press <- or ESC to exit"); + + notes = symbol__annotation(sym); + browser.start = map__rip_2objdump(map, sym->start); + + list_for_each_entry(pos, ¬es->src->source, node) { + struct browser_disasm_line *bpos; + size_t line_len = strlen(pos->line); + + if (browser.b.width < line_len) + browser.b.width = line_len; + bpos = disasm_line__browser(pos); + bpos->idx = browser.nr_entries++; + if (pos->offset != -1) { + bpos->idx_asm = browser.nr_asm_entries++; + /* + * FIXME: short term bandaid to cope with assembly + * routines that comes with labels in the same column + * as the address in objdump, sigh. + * + * E.g. copy_user_generic_unrolled + */ + if (pos->offset < (s64)size) + browser.offsets[pos->offset] = pos; + } else + bpos->idx_asm = -1; + } + + annotate_browser__mark_jump_targets(&browser, size); + + browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size); + browser.max_addr_width = hex_width(sym->end); + browser.jumps_width = width_jumps(browser.max_jump_sources); + browser.b.nr_entries = browser.nr_entries; + browser.b.entries = ¬es->src->source, + browser.b.width += 18; /* Percentage */ + + if (annotate_browser__opts.hide_src_code) + annotate_browser__init_asm_mode(&browser); + + annotate_browser__update_addr_width(&browser); + + ret = annotate_browser__run(&browser, evidx, timer, arg, delay_secs); + list_for_each_entry_safe(pos, n, ¬es->src->source, node) { + list_del(&pos->node); + disasm_line__free(pos); + } + +out_free_offsets: + free(browser.offsets); + return ret; +} + +#define ANNOTATE_CFG(n) \ + { .name = #n, .value = &annotate_browser__opts.n, } + +/* + * Keep the entries sorted, they are bsearch'ed + */ +static struct annotate__config { + const char *name; + bool *value; +} annotate__configs[] = { + ANNOTATE_CFG(hide_src_code), + ANNOTATE_CFG(jump_arrows), + ANNOTATE_CFG(show_nr_jumps), + ANNOTATE_CFG(use_offset), +}; + +#undef ANNOTATE_CFG + +static int annotate_config__cmp(const void *name, const void *cfgp) +{ + const struct annotate__config *cfg = cfgp; + + return strcmp(name, cfg->name); +} + +static int annotate__config(const char *var, const char *value, void *data __used) +{ + struct annotate__config *cfg; + const char *name; + + if (prefixcmp(var, "annotate.") != 0) + return 0; + + name = var + 9; + cfg = bsearch(name, annotate__configs, ARRAY_SIZE(annotate__configs), + sizeof(struct annotate__config), annotate_config__cmp); + + if (cfg == NULL) + return -1; + + *cfg->value = perf_config_bool(name, value); + return 0; +} + +void annotate_browser__init(void) +{ + perf_config(annotate__config, NULL); +} diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c new file mode 100644 index 000000000000..413bd62eedb1 --- /dev/null +++ b/tools/perf/ui/browsers/hists.c @@ -0,0 +1,1533 @@ +#include <stdio.h> +#include "../libslang.h" +#include <stdlib.h> +#include <string.h> +#include <newt.h> +#include <linux/rbtree.h> + +#include "../../util/evsel.h" +#include "../../util/evlist.h" +#include "../../util/hist.h" +#include "../../util/pstack.h" +#include "../../util/sort.h" +#include "../../util/util.h" + +#include "../browser.h" +#include "../helpline.h" +#include "../util.h" +#include "../ui.h" +#include "map.h" + +struct hist_browser { + struct ui_browser b; + struct hists *hists; + struct hist_entry *he_selection; + struct map_symbol *selection; + int print_seq; + bool has_symbols; +}; + +static int hists__browser_title(struct hists *hists, char *bf, size_t size, + const char *ev_name); + +static void hist_browser__refresh_dimensions(struct hist_browser *browser) +{ + /* 3 == +/- toggle symbol before actual hist_entry rendering */ + browser->b.width = 3 + (hists__sort_list_width(browser->hists) + + sizeof("[k]")); +} + +static void hist_browser__reset(struct hist_browser *browser) +{ + browser->b.nr_entries = browser->hists->nr_entries; + hist_browser__refresh_dimensions(browser); + ui_browser__reset_index(&browser->b); +} + +static char tree__folded_sign(bool unfolded) +{ + return unfolded ? '-' : '+'; +} + +static char map_symbol__folded(const struct map_symbol *ms) +{ + return ms->has_children ? tree__folded_sign(ms->unfolded) : ' '; +} + +static char hist_entry__folded(const struct hist_entry *he) +{ + return map_symbol__folded(&he->ms); +} + +static char callchain_list__folded(const struct callchain_list *cl) +{ + return map_symbol__folded(&cl->ms); +} + +static void map_symbol__set_folding(struct map_symbol *ms, bool unfold) +{ + ms->unfolded = unfold ? ms->has_children : false; +} + +static int callchain_node__count_rows_rb_tree(struct callchain_node *node) +{ + int n = 0; + struct rb_node *nd; + + for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) { + struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); + struct callchain_list *chain; + char folded_sign = ' '; /* No children */ + + list_for_each_entry(chain, &child->val, list) { + ++n; + /* We need this because we may not have children */ + folded_sign = callchain_list__folded(chain); + if (folded_sign == '+') + break; + } + + if (folded_sign == '-') /* Have children and they're unfolded */ + n += callchain_node__count_rows_rb_tree(child); + } + + return n; +} + +static int callchain_node__count_rows(struct callchain_node *node) +{ + struct callchain_list *chain; + bool unfolded = false; + int n = 0; + + list_for_each_entry(chain, &node->val, list) { + ++n; + unfolded = chain->ms.unfolded; + } + + if (unfolded) + n += callchain_node__count_rows_rb_tree(node); + + return n; +} + +static int callchain__count_rows(struct rb_root *chain) +{ + struct rb_node *nd; + int n = 0; + + for (nd = rb_first(chain); nd; nd = rb_next(nd)) { + struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); + n += callchain_node__count_rows(node); + } + + return n; +} + +static bool map_symbol__toggle_fold(struct map_symbol *ms) +{ + if (!ms) + return false; + + if (!ms->has_children) + return false; + + ms->unfolded = !ms->unfolded; + return true; +} + +static void callchain_node__init_have_children_rb_tree(struct callchain_node *node) +{ + struct rb_node *nd = rb_first(&node->rb_root); + + for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) { + struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); + struct callchain_list *chain; + bool first = true; + + list_for_each_entry(chain, &child->val, list) { + if (first) { + first = false; + chain->ms.has_children = chain->list.next != &child->val || + !RB_EMPTY_ROOT(&child->rb_root); + } else + chain->ms.has_children = chain->list.next == &child->val && + !RB_EMPTY_ROOT(&child->rb_root); + } + + callchain_node__init_have_children_rb_tree(child); + } +} + +static void callchain_node__init_have_children(struct callchain_node *node) +{ + struct callchain_list *chain; + + list_for_each_entry(chain, &node->val, list) + chain->ms.has_children = !RB_EMPTY_ROOT(&node->rb_root); + + callchain_node__init_have_children_rb_tree(node); +} + +static void callchain__init_have_children(struct rb_root *root) +{ + struct rb_node *nd; + + for (nd = rb_first(root); nd; nd = rb_next(nd)) { + struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); + callchain_node__init_have_children(node); + } +} + +static void hist_entry__init_have_children(struct hist_entry *he) +{ + if (!he->init_have_children) { + he->ms.has_children = !RB_EMPTY_ROOT(&he->sorted_chain); + callchain__init_have_children(&he->sorted_chain); + he->init_have_children = true; + } +} + +static bool hist_browser__toggle_fold(struct hist_browser *browser) +{ + if (map_symbol__toggle_fold(browser->selection)) { + struct hist_entry *he = browser->he_selection; + + hist_entry__init_have_children(he); + browser->hists->nr_entries -= he->nr_rows; + + if (he->ms.unfolded) + he->nr_rows = callchain__count_rows(&he->sorted_chain); + else + he->nr_rows = 0; + browser->hists->nr_entries += he->nr_rows; + browser->b.nr_entries = browser->hists->nr_entries; + + return true; + } + + /* If it doesn't have children, no toggling performed */ + return false; +} + +static int callchain_node__set_folding_rb_tree(struct callchain_node *node, bool unfold) +{ + int n = 0; + struct rb_node *nd; + + for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) { + struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); + struct callchain_list *chain; + bool has_children = false; + + list_for_each_entry(chain, &child->val, list) { + ++n; + map_symbol__set_folding(&chain->ms, unfold); + has_children = chain->ms.has_children; + } + + if (has_children) + n += callchain_node__set_folding_rb_tree(child, unfold); + } + + return n; +} + +static int callchain_node__set_folding(struct callchain_node *node, bool unfold) +{ + struct callchain_list *chain; + bool has_children = false; + int n = 0; + + list_for_each_entry(chain, &node->val, list) { + ++n; + map_symbol__set_folding(&chain->ms, unfold); + has_children = chain->ms.has_children; + } + + if (has_children) + n += callchain_node__set_folding_rb_tree(node, unfold); + + return n; +} + +static int callchain__set_folding(struct rb_root *chain, bool unfold) +{ + struct rb_node *nd; + int n = 0; + + for (nd = rb_first(chain); nd; nd = rb_next(nd)) { + struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); + n += callchain_node__set_folding(node, unfold); + } + + return n; +} + +static void hist_entry__set_folding(struct hist_entry *he, bool unfold) +{ + hist_entry__init_have_children(he); + map_symbol__set_folding(&he->ms, unfold); + + if (he->ms.has_children) { + int n = callchain__set_folding(&he->sorted_chain, unfold); + he->nr_rows = unfold ? n : 0; + } else + he->nr_rows = 0; +} + +static void hists__set_folding(struct hists *hists, bool unfold) +{ + struct rb_node *nd; + + hists->nr_entries = 0; + + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); + hist_entry__set_folding(he, unfold); + hists->nr_entries += 1 + he->nr_rows; + } +} + +static void hist_browser__set_folding(struct hist_browser *browser, bool unfold) +{ + hists__set_folding(browser->hists, unfold); + browser->b.nr_entries = browser->hists->nr_entries; + /* Go to the start, we may be way after valid entries after a collapse */ + ui_browser__reset_index(&browser->b); +} + +static void ui_browser__warn_lost_events(struct ui_browser *browser) +{ + ui_browser__warning(browser, 4, + "Events are being lost, check IO/CPU overload!\n\n" + "You may want to run 'perf' using a RT scheduler policy:\n\n" + " perf top -r 80\n\n" + "Or reduce the sampling frequency."); +} + +static int hist_browser__run(struct hist_browser *browser, const char *ev_name, + void(*timer)(void *arg), void *arg, int delay_secs) +{ + int key; + char title[160]; + + browser->b.entries = &browser->hists->entries; + browser->b.nr_entries = browser->hists->nr_entries; + + hist_browser__refresh_dimensions(browser); + hists__browser_title(browser->hists, title, sizeof(title), ev_name); + + if (ui_browser__show(&browser->b, title, + "Press '?' for help on key bindings") < 0) + return -1; + + while (1) { + key = ui_browser__run(&browser->b, delay_secs); + + switch (key) { + case K_TIMER: + timer(arg); + ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); + + if (browser->hists->stats.nr_lost_warned != + browser->hists->stats.nr_events[PERF_RECORD_LOST]) { + browser->hists->stats.nr_lost_warned = + browser->hists->stats.nr_events[PERF_RECORD_LOST]; + ui_browser__warn_lost_events(&browser->b); + } + + hists__browser_title(browser->hists, title, sizeof(title), ev_name); + ui_browser__show_title(&browser->b, title); + continue; + case 'D': { /* Debug */ + static int seq; + struct hist_entry *h = rb_entry(browser->b.top, + struct hist_entry, rb_node); + ui_helpline__pop(); + ui_helpline__fpush("%d: nr_ent=(%d,%d), height=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d", + seq++, browser->b.nr_entries, + browser->hists->nr_entries, + browser->b.height, + browser->b.index, + browser->b.top_idx, + h->row_offset, h->nr_rows); + } + break; + case 'C': + /* Collapse the whole world. */ + hist_browser__set_folding(browser, false); + break; + case 'E': + /* Expand the whole world. */ + hist_browser__set_folding(browser, true); + break; + case K_ENTER: + if (hist_browser__toggle_fold(browser)) + break; + /* fall thru */ + default: + goto out; + } + } +out: + ui_browser__hide(&browser->b); + return key; +} + +static char *callchain_list__sym_name(struct callchain_list *cl, + char *bf, size_t bfsize) +{ + if (cl->ms.sym) + return cl->ms.sym->name; + + snprintf(bf, bfsize, "%#" PRIx64, cl->ip); + return bf; +} + +#define LEVEL_OFFSET_STEP 3 + +static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *browser, + struct callchain_node *chain_node, + u64 total, int level, + unsigned short row, + off_t *row_offset, + bool *is_current_entry) +{ + struct rb_node *node; + int first_row = row, width, offset = level * LEVEL_OFFSET_STEP; + u64 new_total, remaining; + + if (callchain_param.mode == CHAIN_GRAPH_REL) + new_total = chain_node->children_hit; + else + new_total = total; + + remaining = new_total; + node = rb_first(&chain_node->rb_root); + while (node) { + struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); + struct rb_node *next = rb_next(node); + u64 cumul = callchain_cumul_hits(child); + struct callchain_list *chain; + char folded_sign = ' '; + int first = true; + int extra_offset = 0; + + remaining -= cumul; + + list_for_each_entry(chain, &child->val, list) { + char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str; + const char *str; + int color; + bool was_first = first; + + if (first) + first = false; + else + extra_offset = LEVEL_OFFSET_STEP; + + folded_sign = callchain_list__folded(chain); + if (*row_offset != 0) { + --*row_offset; + goto do_next; + } + + alloc_str = NULL; + str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + if (was_first) { + double percent = cumul * 100.0 / new_total; + + if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0) + str = "Not enough memory!"; + else + str = alloc_str; + } + + color = HE_COLORSET_NORMAL; + width = browser->b.width - (offset + extra_offset + 2); + if (ui_browser__is_current_entry(&browser->b, row)) { + browser->selection = &chain->ms; + color = HE_COLORSET_SELECTED; + *is_current_entry = true; + } + + ui_browser__set_color(&browser->b, color); + ui_browser__gotorc(&browser->b, row, 0); + slsmg_write_nstring(" ", offset + extra_offset); + slsmg_printf("%c ", folded_sign); + slsmg_write_nstring(str, width); + free(alloc_str); + + if (++row == browser->b.height) + goto out; +do_next: + if (folded_sign == '+') + break; + } + + if (folded_sign == '-') { + const int new_level = level + (extra_offset ? 2 : 1); + row += hist_browser__show_callchain_node_rb_tree(browser, child, new_total, + new_level, row, row_offset, + is_current_entry); + } + if (row == browser->b.height) + goto out; + node = next; + } +out: + return row - first_row; +} + +static int hist_browser__show_callchain_node(struct hist_browser *browser, + struct callchain_node *node, + int level, unsigned short row, + off_t *row_offset, + bool *is_current_entry) +{ + struct callchain_list *chain; + int first_row = row, + offset = level * LEVEL_OFFSET_STEP, + width = browser->b.width - offset; + char folded_sign = ' '; + + list_for_each_entry(chain, &node->val, list) { + char ipstr[BITS_PER_LONG / 4 + 1], *s; + int color; + + folded_sign = callchain_list__folded(chain); + + if (*row_offset != 0) { + --*row_offset; + continue; + } + + color = HE_COLORSET_NORMAL; + if (ui_browser__is_current_entry(&browser->b, row)) { + browser->selection = &chain->ms; + color = HE_COLORSET_SELECTED; + *is_current_entry = true; + } + + s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + ui_browser__gotorc(&browser->b, row, 0); + ui_browser__set_color(&browser->b, color); + slsmg_write_nstring(" ", offset); + slsmg_printf("%c ", folded_sign); + slsmg_write_nstring(s, width - 2); + + if (++row == browser->b.height) + goto out; + } + + if (folded_sign == '-') + row += hist_browser__show_callchain_node_rb_tree(browser, node, + browser->hists->stats.total_period, + level + 1, row, + row_offset, + is_current_entry); +out: + return row - first_row; +} + +static int hist_browser__show_callchain(struct hist_browser *browser, + struct rb_root *chain, + int level, unsigned short row, + off_t *row_offset, + bool *is_current_entry) +{ + struct rb_node *nd; + int first_row = row; + + for (nd = rb_first(chain); nd; nd = rb_next(nd)) { + struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); + + row += hist_browser__show_callchain_node(browser, node, level, + row, row_offset, + is_current_entry); + if (row == browser->b.height) + break; + } + + return row - first_row; +} + +static int hist_browser__show_entry(struct hist_browser *browser, + struct hist_entry *entry, + unsigned short row) +{ + char s[256]; + double percent; + int printed = 0; + int width = browser->b.width - 6; /* The percentage */ + char folded_sign = ' '; + bool current_entry = ui_browser__is_current_entry(&browser->b, row); + off_t row_offset = entry->row_offset; + + if (current_entry) { + browser->he_selection = entry; + browser->selection = &entry->ms; + } + + if (symbol_conf.use_callchain) { + hist_entry__init_have_children(entry); + folded_sign = hist_entry__folded(entry); + } + + if (row_offset == 0) { + hist_entry__snprintf(entry, s, sizeof(s), browser->hists); + percent = (entry->period * 100.0) / browser->hists->stats.total_period; + + ui_browser__set_percent_color(&browser->b, percent, current_entry); + ui_browser__gotorc(&browser->b, row, 0); + if (symbol_conf.use_callchain) { + slsmg_printf("%c ", folded_sign); + width -= 2; + } + + slsmg_printf(" %5.2f%%", percent); + + /* The scroll bar isn't being used */ + if (!browser->b.navkeypressed) + width += 1; + + if (!current_entry || !browser->b.navkeypressed) + ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL); + + if (symbol_conf.show_nr_samples) { + slsmg_printf(" %11u", entry->nr_events); + width -= 12; + } + + if (symbol_conf.show_total_period) { + slsmg_printf(" %12" PRIu64, entry->period); + width -= 13; + } + + slsmg_write_nstring(s, width); + ++row; + ++printed; + } else + --row_offset; + + if (folded_sign == '-' && row != browser->b.height) { + printed += hist_browser__show_callchain(browser, &entry->sorted_chain, + 1, row, &row_offset, + ¤t_entry); + if (current_entry) + browser->he_selection = entry; + } + + return printed; +} + +static void ui_browser__hists_init_top(struct ui_browser *browser) +{ + if (browser->top == NULL) { + struct hist_browser *hb; + + hb = container_of(browser, struct hist_browser, b); + browser->top = rb_first(&hb->hists->entries); + } +} + +static unsigned int hist_browser__refresh(struct ui_browser *browser) +{ + unsigned row = 0; + struct rb_node *nd; + struct hist_browser *hb = container_of(browser, struct hist_browser, b); + + ui_browser__hists_init_top(browser); + + for (nd = browser->top; nd; nd = rb_next(nd)) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + if (h->filtered) + continue; + + row += hist_browser__show_entry(hb, h, row); + if (row == browser->height) + break; + } + + return row; +} + +static struct rb_node *hists__filter_entries(struct rb_node *nd) +{ + while (nd != NULL) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + if (!h->filtered) + return nd; + + nd = rb_next(nd); + } + + return NULL; +} + +static struct rb_node *hists__filter_prev_entries(struct rb_node *nd) +{ + while (nd != NULL) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + if (!h->filtered) + return nd; + + nd = rb_prev(nd); + } + + return NULL; +} + +static void ui_browser__hists_seek(struct ui_browser *browser, + off_t offset, int whence) +{ + struct hist_entry *h; + struct rb_node *nd; + bool first = true; + + if (browser->nr_entries == 0) + return; + + ui_browser__hists_init_top(browser); + + switch (whence) { + case SEEK_SET: + nd = hists__filter_entries(rb_first(browser->entries)); + break; + case SEEK_CUR: + nd = browser->top; + goto do_offset; + case SEEK_END: + nd = hists__filter_prev_entries(rb_last(browser->entries)); + first = false; + break; + default: + return; + } + + /* + * Moves not relative to the first visible entry invalidates its + * row_offset: + */ + h = rb_entry(browser->top, struct hist_entry, rb_node); + h->row_offset = 0; + + /* + * Here we have to check if nd is expanded (+), if it is we can't go + * the next top level hist_entry, instead we must compute an offset of + * what _not_ to show and not change the first visible entry. + * + * This offset increments when we are going from top to bottom and + * decreases when we're going from bottom to top. + * + * As we don't have backpointers to the top level in the callchains + * structure, we need to always print the whole hist_entry callchain, + * skipping the first ones that are before the first visible entry + * and stop when we printed enough lines to fill the screen. + */ +do_offset: + if (offset > 0) { + do { + h = rb_entry(nd, struct hist_entry, rb_node); + if (h->ms.unfolded) { + u16 remaining = h->nr_rows - h->row_offset; + if (offset > remaining) { + offset -= remaining; + h->row_offset = 0; + } else { + h->row_offset += offset; + offset = 0; + browser->top = nd; + break; + } + } + nd = hists__filter_entries(rb_next(nd)); + if (nd == NULL) + break; + --offset; + browser->top = nd; + } while (offset != 0); + } else if (offset < 0) { + while (1) { + h = rb_entry(nd, struct hist_entry, rb_node); + if (h->ms.unfolded) { + if (first) { + if (-offset > h->row_offset) { + offset += h->row_offset; + h->row_offset = 0; + } else { + h->row_offset += offset; + offset = 0; + browser->top = nd; + break; + } + } else { + if (-offset > h->nr_rows) { + offset += h->nr_rows; + h->row_offset = 0; + } else { + h->row_offset = h->nr_rows + offset; + offset = 0; + browser->top = nd; + break; + } + } + } + + nd = hists__filter_prev_entries(rb_prev(nd)); + if (nd == NULL) + break; + ++offset; + browser->top = nd; + if (offset == 0) { + /* + * Last unfiltered hist_entry, check if it is + * unfolded, if it is then we should have + * row_offset at its last entry. + */ + h = rb_entry(nd, struct hist_entry, rb_node); + if (h->ms.unfolded) + h->row_offset = h->nr_rows; + break; + } + first = false; + } + } else { + browser->top = nd; + h = rb_entry(nd, struct hist_entry, rb_node); + h->row_offset = 0; + } +} + +static int hist_browser__fprintf_callchain_node_rb_tree(struct hist_browser *browser, + struct callchain_node *chain_node, + u64 total, int level, + FILE *fp) +{ + struct rb_node *node; + int offset = level * LEVEL_OFFSET_STEP; + u64 new_total, remaining; + int printed = 0; + + if (callchain_param.mode == CHAIN_GRAPH_REL) + new_total = chain_node->children_hit; + else + new_total = total; + + remaining = new_total; + node = rb_first(&chain_node->rb_root); + while (node) { + struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); + struct rb_node *next = rb_next(node); + u64 cumul = callchain_cumul_hits(child); + struct callchain_list *chain; + char folded_sign = ' '; + int first = true; + int extra_offset = 0; + + remaining -= cumul; + + list_for_each_entry(chain, &child->val, list) { + char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str; + const char *str; + bool was_first = first; + + if (first) + first = false; + else + extra_offset = LEVEL_OFFSET_STEP; + + folded_sign = callchain_list__folded(chain); + + alloc_str = NULL; + str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + if (was_first) { + double percent = cumul * 100.0 / new_total; + + if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0) + str = "Not enough memory!"; + else + str = alloc_str; + } + + printed += fprintf(fp, "%*s%c %s\n", offset + extra_offset, " ", folded_sign, str); + free(alloc_str); + if (folded_sign == '+') + break; + } + + if (folded_sign == '-') { + const int new_level = level + (extra_offset ? 2 : 1); + printed += hist_browser__fprintf_callchain_node_rb_tree(browser, child, new_total, + new_level, fp); + } + + node = next; + } + + return printed; +} + +static int hist_browser__fprintf_callchain_node(struct hist_browser *browser, + struct callchain_node *node, + int level, FILE *fp) +{ + struct callchain_list *chain; + int offset = level * LEVEL_OFFSET_STEP; + char folded_sign = ' '; + int printed = 0; + + list_for_each_entry(chain, &node->val, list) { + char ipstr[BITS_PER_LONG / 4 + 1], *s; + + folded_sign = callchain_list__folded(chain); + s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + printed += fprintf(fp, "%*s%c %s\n", offset, " ", folded_sign, s); + } + + if (folded_sign == '-') + printed += hist_browser__fprintf_callchain_node_rb_tree(browser, node, + browser->hists->stats.total_period, + level + 1, fp); + return printed; +} + +static int hist_browser__fprintf_callchain(struct hist_browser *browser, + struct rb_root *chain, int level, FILE *fp) +{ + struct rb_node *nd; + int printed = 0; + + for (nd = rb_first(chain); nd; nd = rb_next(nd)) { + struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); + + printed += hist_browser__fprintf_callchain_node(browser, node, level, fp); + } + + return printed; +} + +static int hist_browser__fprintf_entry(struct hist_browser *browser, + struct hist_entry *he, FILE *fp) +{ + char s[8192]; + double percent; + int printed = 0; + char folded_sign = ' '; + + if (symbol_conf.use_callchain) + folded_sign = hist_entry__folded(he); + + hist_entry__snprintf(he, s, sizeof(s), browser->hists); + percent = (he->period * 100.0) / browser->hists->stats.total_period; + + if (symbol_conf.use_callchain) + printed += fprintf(fp, "%c ", folded_sign); + + printed += fprintf(fp, " %5.2f%%", percent); + + if (symbol_conf.show_nr_samples) + printed += fprintf(fp, " %11u", he->nr_events); + + if (symbol_conf.show_total_period) + printed += fprintf(fp, " %12" PRIu64, he->period); + + printed += fprintf(fp, "%s\n", rtrim(s)); + + if (folded_sign == '-') + printed += hist_browser__fprintf_callchain(browser, &he->sorted_chain, 1, fp); + + return printed; +} + +static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) +{ + struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries)); + int printed = 0; + + while (nd) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + printed += hist_browser__fprintf_entry(browser, h, fp); + nd = hists__filter_entries(rb_next(nd)); + } + + return printed; +} + +static int hist_browser__dump(struct hist_browser *browser) +{ + char filename[64]; + FILE *fp; + + while (1) { + scnprintf(filename, sizeof(filename), "perf.hist.%d", browser->print_seq); + if (access(filename, F_OK)) + break; + /* + * XXX: Just an arbitrary lazy upper limit + */ + if (++browser->print_seq == 8192) { + ui_helpline__fpush("Too many perf.hist.N files, nothing written!"); + return -1; + } + } + + fp = fopen(filename, "w"); + if (fp == NULL) { + char bf[64]; + const char *err = strerror_r(errno, bf, sizeof(bf)); + ui_helpline__fpush("Couldn't write to %s: %s", filename, err); + return -1; + } + + ++browser->print_seq; + hist_browser__fprintf(browser, fp); + fclose(fp); + ui_helpline__fpush("%s written!", filename); + + return 0; +} + +static struct hist_browser *hist_browser__new(struct hists *hists) +{ + struct hist_browser *browser = zalloc(sizeof(*browser)); + + if (browser) { + browser->hists = hists; + browser->b.refresh = hist_browser__refresh; + browser->b.seek = ui_browser__hists_seek; + browser->b.use_navkeypressed = true; + if (sort__branch_mode == 1) + browser->has_symbols = sort_sym_from.list.next != NULL; + else + browser->has_symbols = sort_sym.list.next != NULL; + } + + return browser; +} + +static void hist_browser__delete(struct hist_browser *browser) +{ + free(browser); +} + +static struct hist_entry *hist_browser__selected_entry(struct hist_browser *browser) +{ + return browser->he_selection; +} + +static struct thread *hist_browser__selected_thread(struct hist_browser *browser) +{ + return browser->he_selection->thread; +} + +static int hists__browser_title(struct hists *hists, char *bf, size_t size, + const char *ev_name) +{ + char unit; + int printed; + const struct dso *dso = hists->dso_filter; + const struct thread *thread = hists->thread_filter; + unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; + u64 nr_events = hists->stats.total_period; + + nr_samples = convert_unit(nr_samples, &unit); + printed = scnprintf(bf, size, + "Samples: %lu%c of event '%s', Event count (approx.): %lu", + nr_samples, unit, ev_name, nr_events); + + + if (hists->uid_filter_str) + printed += snprintf(bf + printed, size - printed, + ", UID: %s", hists->uid_filter_str); + if (thread) + printed += scnprintf(bf + printed, size - printed, + ", Thread: %s(%d)", + (thread->comm_set ? thread->comm : ""), + thread->pid); + if (dso) + printed += scnprintf(bf + printed, size - printed, + ", DSO: %s", dso->short_name); + return printed; +} + +static inline void free_popup_options(char **options, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + free(options[i]); + options[i] = NULL; + } +} + +static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, + const char *helpline, const char *ev_name, + bool left_exits, + void(*timer)(void *arg), void *arg, + int delay_secs) +{ + struct hists *hists = &evsel->hists; + struct hist_browser *browser = hist_browser__new(hists); + struct branch_info *bi; + struct pstack *fstack; + char *options[16]; + int nr_options = 0; + int key = -1; + char buf[64]; + + if (browser == NULL) + return -1; + + fstack = pstack__new(2); + if (fstack == NULL) + goto out; + + ui_helpline__push(helpline); + + memset(options, 0, sizeof(options)); + + while (1) { + const struct thread *thread = NULL; + const struct dso *dso = NULL; + int choice = 0, + annotate = -2, zoom_dso = -2, zoom_thread = -2, + annotate_f = -2, annotate_t = -2, browse_map = -2; + + nr_options = 0; + + key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); + + if (browser->he_selection != NULL) { + thread = hist_browser__selected_thread(browser); + dso = browser->selection->map ? browser->selection->map->dso : NULL; + } + switch (key) { + case K_TAB: + case K_UNTAB: + if (nr_events == 1) + continue; + /* + * Exit the browser, let hists__browser_tree + * go to the next or previous + */ + goto out_free_stack; + case 'a': + if (!browser->has_symbols) { + ui_browser__warning(&browser->b, delay_secs * 2, + "Annotation is only available for symbolic views, " + "include \"sym*\" in --sort to use it."); + continue; + } + + if (browser->selection == NULL || + browser->selection->sym == NULL || + browser->selection->map->dso->annotate_warned) + continue; + goto do_annotate; + case 'P': + hist_browser__dump(browser); + continue; + case 'd': + goto zoom_dso; + case 't': + goto zoom_thread; + case '/': + if (ui_browser__input_window("Symbol to show", + "Please enter the name of symbol you want to see", + buf, "ENTER: OK, ESC: Cancel", + delay_secs * 2) == K_ENTER) { + hists->symbol_filter_str = *buf ? buf : NULL; + hists__filter_by_symbol(hists); + hist_browser__reset(browser); + } + continue; + case K_F1: + case 'h': + case '?': + ui_browser__help_window(&browser->b, + "h/?/F1 Show this window\n" + "UP/DOWN/PGUP\n" + "PGDN/SPACE Navigate\n" + "q/ESC/CTRL+C Exit browser\n\n" + "For multiple event sessions:\n\n" + "TAB/UNTAB Switch events\n\n" + "For symbolic views (--sort has sym):\n\n" + "-> Zoom into DSO/Threads & Annotate current symbol\n" + "<- Zoom out\n" + "a Annotate current symbol\n" + "C Collapse all callchains\n" + "E Expand all callchains\n" + "d Zoom into current DSO\n" + "t Zoom into current Thread\n" + "P Print histograms to perf.hist.N\n" + "/ Filter symbol by name"); + continue; + case K_ENTER: + case K_RIGHT: + /* menu */ + break; + case K_LEFT: { + const void *top; + + if (pstack__empty(fstack)) { + /* + * Go back to the perf_evsel_menu__run or other user + */ + if (left_exits) + goto out_free_stack; + continue; + } + top = pstack__pop(fstack); + if (top == &browser->hists->dso_filter) + goto zoom_out_dso; + if (top == &browser->hists->thread_filter) + goto zoom_out_thread; + continue; + } + case K_ESC: + if (!left_exits && + !ui_browser__dialog_yesno(&browser->b, + "Do you really want to exit?")) + continue; + /* Fall thru */ + case 'q': + case CTRL('c'): + goto out_free_stack; + default: + continue; + } + + if (!browser->has_symbols) + goto add_exit_option; + + if (sort__branch_mode == 1) { + bi = browser->he_selection->branch_info; + if (browser->selection != NULL && + bi && + bi->from.sym != NULL && + !bi->from.map->dso->annotate_warned && + asprintf(&options[nr_options], "Annotate %s", + bi->from.sym->name) > 0) + annotate_f = nr_options++; + + if (browser->selection != NULL && + bi && + bi->to.sym != NULL && + !bi->to.map->dso->annotate_warned && + (bi->to.sym != bi->from.sym || + bi->to.map->dso != bi->from.map->dso) && + asprintf(&options[nr_options], "Annotate %s", + bi->to.sym->name) > 0) + annotate_t = nr_options++; + } else { + + if (browser->selection != NULL && + browser->selection->sym != NULL && + !browser->selection->map->dso->annotate_warned && + asprintf(&options[nr_options], "Annotate %s", + browser->selection->sym->name) > 0) + annotate = nr_options++; + } + + if (thread != NULL && + asprintf(&options[nr_options], "Zoom %s %s(%d) thread", + (browser->hists->thread_filter ? "out of" : "into"), + (thread->comm_set ? thread->comm : ""), + thread->pid) > 0) + zoom_thread = nr_options++; + + if (dso != NULL && + asprintf(&options[nr_options], "Zoom %s %s DSO", + (browser->hists->dso_filter ? "out of" : "into"), + (dso->kernel ? "the Kernel" : dso->short_name)) > 0) + zoom_dso = nr_options++; + + if (browser->selection != NULL && + browser->selection->map != NULL && + asprintf(&options[nr_options], "Browse map details") > 0) + browse_map = nr_options++; +add_exit_option: + options[nr_options++] = (char *)"Exit"; +retry_popup_menu: + choice = ui__popup_menu(nr_options, options); + + if (choice == nr_options - 1) + break; + + if (choice == -1) { + free_popup_options(options, nr_options - 1); + continue; + } + + if (choice == annotate || choice == annotate_t || choice == annotate_f) { + struct hist_entry *he; + int err; +do_annotate: + he = hist_browser__selected_entry(browser); + if (he == NULL) + continue; + + /* + * we stash the branch_info symbol + map into the + * the ms so we don't have to rewrite all the annotation + * code to use branch_info. + * in branch mode, the ms struct is not used + */ + if (choice == annotate_f) { + he->ms.sym = he->branch_info->from.sym; + he->ms.map = he->branch_info->from.map; + } else if (choice == annotate_t) { + he->ms.sym = he->branch_info->to.sym; + he->ms.map = he->branch_info->to.map; + } + + /* + * Don't let this be freed, say, by hists__decay_entry. + */ + he->used = true; + err = hist_entry__tui_annotate(he, evsel->idx, + timer, arg, delay_secs); + he->used = false; + /* + * offer option to annotate the other branch source or target + * (if they exists) when returning from annotate + */ + if ((err == 'q' || err == CTRL('c')) + && annotate_t != -2 && annotate_f != -2) + goto retry_popup_menu; + + ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); + if (err) + ui_browser__handle_resize(&browser->b); + + } else if (choice == browse_map) + map__browse(browser->selection->map); + else if (choice == zoom_dso) { +zoom_dso: + if (browser->hists->dso_filter) { + pstack__remove(fstack, &browser->hists->dso_filter); +zoom_out_dso: + ui_helpline__pop(); + browser->hists->dso_filter = NULL; + sort_dso.elide = false; + } else { + if (dso == NULL) + continue; + ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", + dso->kernel ? "the Kernel" : dso->short_name); + browser->hists->dso_filter = dso; + sort_dso.elide = true; + pstack__push(fstack, &browser->hists->dso_filter); + } + hists__filter_by_dso(hists); + hist_browser__reset(browser); + } else if (choice == zoom_thread) { +zoom_thread: + if (browser->hists->thread_filter) { + pstack__remove(fstack, &browser->hists->thread_filter); +zoom_out_thread: + ui_helpline__pop(); + browser->hists->thread_filter = NULL; + sort_thread.elide = false; + } else { + ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", + thread->comm_set ? thread->comm : "", + thread->pid); + browser->hists->thread_filter = thread; + sort_thread.elide = true; + pstack__push(fstack, &browser->hists->thread_filter); + } + hists__filter_by_thread(hists); + hist_browser__reset(browser); + } + } +out_free_stack: + pstack__delete(fstack); +out: + hist_browser__delete(browser); + free_popup_options(options, nr_options - 1); + return key; +} + +struct perf_evsel_menu { + struct ui_browser b; + struct perf_evsel *selection; + bool lost_events, lost_events_warned; +}; + +static void perf_evsel_menu__write(struct ui_browser *browser, + void *entry, int row) +{ + struct perf_evsel_menu *menu = container_of(browser, + struct perf_evsel_menu, b); + struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node); + bool current_entry = ui_browser__is_current_entry(browser, row); + unsigned long nr_events = evsel->hists.stats.nr_events[PERF_RECORD_SAMPLE]; + const char *ev_name = perf_evsel__name(evsel); + char bf[256], unit; + const char *warn = " "; + size_t printed; + + ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : + HE_COLORSET_NORMAL); + + nr_events = convert_unit(nr_events, &unit); + printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events, + unit, unit == ' ' ? "" : " ", ev_name); + slsmg_printf("%s", bf); + + nr_events = evsel->hists.stats.nr_events[PERF_RECORD_LOST]; + if (nr_events != 0) { + menu->lost_events = true; + if (!current_entry) + ui_browser__set_color(browser, HE_COLORSET_TOP); + nr_events = convert_unit(nr_events, &unit); + printed += scnprintf(bf, sizeof(bf), ": %ld%c%schunks LOST!", + nr_events, unit, unit == ' ' ? "" : " "); + warn = bf; + } + + slsmg_write_nstring(warn, browser->width - printed); + + if (current_entry) + menu->selection = evsel; +} + +static int perf_evsel_menu__run(struct perf_evsel_menu *menu, + int nr_events, const char *help, + void(*timer)(void *arg), void *arg, int delay_secs) +{ + struct perf_evlist *evlist = menu->b.priv; + struct perf_evsel *pos; + const char *ev_name, *title = "Available samples"; + int key; + + if (ui_browser__show(&menu->b, title, + "ESC: exit, ENTER|->: Browse histograms") < 0) + return -1; + + while (1) { + key = ui_browser__run(&menu->b, delay_secs); + + switch (key) { + case K_TIMER: + timer(arg); + + if (!menu->lost_events_warned && menu->lost_events) { + ui_browser__warn_lost_events(&menu->b); + menu->lost_events_warned = true; + } + continue; + case K_RIGHT: + case K_ENTER: + if (!menu->selection) + continue; + pos = menu->selection; +browse_hists: + perf_evlist__set_selected(evlist, pos); + /* + * Give the calling tool a chance to populate the non + * default evsel resorted hists tree. + */ + if (timer) + timer(arg); + ev_name = perf_evsel__name(pos); + key = perf_evsel__hists_browse(pos, nr_events, help, + ev_name, true, timer, + arg, delay_secs); + ui_browser__show_title(&menu->b, title); + switch (key) { + case K_TAB: + if (pos->node.next == &evlist->entries) + pos = list_entry(evlist->entries.next, struct perf_evsel, node); + else + pos = list_entry(pos->node.next, struct perf_evsel, node); + goto browse_hists; + case K_UNTAB: + if (pos->node.prev == &evlist->entries) + pos = list_entry(evlist->entries.prev, struct perf_evsel, node); + else + pos = list_entry(pos->node.prev, struct perf_evsel, node); + goto browse_hists; + case K_ESC: + if (!ui_browser__dialog_yesno(&menu->b, + "Do you really want to exit?")) + continue; + /* Fall thru */ + case 'q': + case CTRL('c'): + goto out; + default: + continue; + } + case K_LEFT: + continue; + case K_ESC: + if (!ui_browser__dialog_yesno(&menu->b, + "Do you really want to exit?")) + continue; + /* Fall thru */ + case 'q': + case CTRL('c'): + goto out; + default: + continue; + } + } + +out: + ui_browser__hide(&menu->b); + return key; +} + +static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, + const char *help, + void(*timer)(void *arg), void *arg, + int delay_secs) +{ + struct perf_evsel *pos; + struct perf_evsel_menu menu = { + .b = { + .entries = &evlist->entries, + .refresh = ui_browser__list_head_refresh, + .seek = ui_browser__list_head_seek, + .write = perf_evsel_menu__write, + .nr_entries = evlist->nr_entries, + .priv = evlist, + }, + }; + + ui_helpline__push("Press ESC to exit"); + + list_for_each_entry(pos, &evlist->entries, node) { + const char *ev_name = perf_evsel__name(pos); + size_t line_len = strlen(ev_name) + 7; + + if (menu.b.width < line_len) + menu.b.width = line_len; + } + + return perf_evsel_menu__run(&menu, evlist->nr_entries, help, timer, + arg, delay_secs); +} + +int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, + void(*timer)(void *arg), void *arg, + int delay_secs) +{ + if (evlist->nr_entries == 1) { + struct perf_evsel *first = list_entry(evlist->entries.next, + struct perf_evsel, node); + const char *ev_name = perf_evsel__name(first); + return perf_evsel__hists_browse(first, evlist->nr_entries, help, + ev_name, false, timer, arg, + delay_secs); + } + + return __perf_evlist__tui_browse_hists(evlist, help, + timer, arg, delay_secs); +} diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index 8462bffe20bc..98851d55a53e 100644 --- a/tools/perf/util/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c @@ -1,12 +1,13 @@ #include "../libslang.h" #include <elf.h> +#include <newt.h> #include <inttypes.h> #include <sys/ttydefaults.h> -#include <ctype.h> #include <string.h> #include <linux/bitops.h> -#include "../../debug.h" -#include "../../symbol.h" +#include "../../util/util.h" +#include "../../util/debug.h" +#include "../../util/symbol.h" #include "../browser.h" #include "../helpline.h" #include "map.h" @@ -108,11 +109,8 @@ static int map_browser__run(struct map_browser *self) verbose ? "" : "restart with -v to use") < 0) return -1; - if (verbose) - ui_browser__add_exit_key(&self->b, '/'); - while (1) { - key = ui_browser__run(&self->b); + key = ui_browser__run(&self->b, 0); if (verbose && key == '/') map_browser__search(self); diff --git a/tools/perf/util/ui/browsers/map.h b/tools/perf/ui/browsers/map.h index df8581a43e17..df8581a43e17 100644 --- a/tools/perf/util/ui/browsers/map.h +++ b/tools/perf/ui/browsers/map.h diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c new file mode 100644 index 000000000000..ec12e0b4ded6 --- /dev/null +++ b/tools/perf/ui/gtk/browser.c @@ -0,0 +1,241 @@ +#include "../evlist.h" +#include "../cache.h" +#include "../evsel.h" +#include "../sort.h" +#include "../hist.h" +#include "gtk.h" + +#include <signal.h> + +#define MAX_COLUMNS 32 + +static void perf_gtk__signal(int sig) +{ + perf_gtk__exit(false); + psignal(sig, "perf"); +} + +static void perf_gtk__resize_window(GtkWidget *window) +{ + GdkRectangle rect; + GdkScreen *screen; + int monitor; + int height; + int width; + + screen = gtk_widget_get_screen(window); + + monitor = gdk_screen_get_monitor_at_window(screen, window->window); + + gdk_screen_get_monitor_geometry(screen, monitor, &rect); + + width = rect.width * 3 / 4; + height = rect.height * 3 / 4; + + gtk_window_resize(GTK_WINDOW(window), width, height); +} + +static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) +{ + GType col_types[MAX_COLUMNS]; + GtkCellRenderer *renderer; + struct sort_entry *se; + GtkListStore *store; + struct rb_node *nd; + u64 total_period; + GtkWidget *view; + int col_idx; + int nr_cols; + + nr_cols = 0; + + /* The percentage column */ + col_types[nr_cols++] = G_TYPE_STRING; + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + + col_types[nr_cols++] = G_TYPE_STRING; + } + + store = gtk_list_store_newv(nr_cols, col_types); + + view = gtk_tree_view_new(); + + renderer = gtk_cell_renderer_text_new(); + + col_idx = 0; + + /* The percentage column */ + gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), + -1, "Overhead (%)", + renderer, "text", + col_idx++, NULL); + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + + gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), + -1, se->se_header, + renderer, "text", + col_idx++, NULL); + } + + gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store)); + + g_object_unref(GTK_TREE_MODEL(store)); + + total_period = hists->stats.total_period; + + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + GtkTreeIter iter; + double percent; + char s[512]; + + if (h->filtered) + continue; + + gtk_list_store_append(store, &iter); + + col_idx = 0; + + percent = (h->period * 100.0) / total_period; + + snprintf(s, ARRAY_SIZE(s), "%.2f", percent); + + gtk_list_store_set(store, &iter, col_idx++, s, -1); + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + + se->se_snprintf(h, s, ARRAY_SIZE(s), + hists__col_len(hists, se->se_width_idx)); + + gtk_list_store_set(store, &iter, col_idx++, s, -1); + } + } + + gtk_container_add(GTK_CONTAINER(window), view); +} + +#ifdef HAVE_GTK_INFO_BAR +static GtkWidget *perf_gtk__setup_info_bar(void) +{ + GtkWidget *info_bar; + GtkWidget *label; + GtkWidget *content_area; + + info_bar = gtk_info_bar_new(); + gtk_widget_set_no_show_all(info_bar, TRUE); + + label = gtk_label_new(""); + gtk_widget_show(label); + + content_area = gtk_info_bar_get_content_area(GTK_INFO_BAR(info_bar)); + gtk_container_add(GTK_CONTAINER(content_area), label); + + gtk_info_bar_add_button(GTK_INFO_BAR(info_bar), GTK_STOCK_OK, + GTK_RESPONSE_OK); + g_signal_connect(info_bar, "response", + G_CALLBACK(gtk_widget_hide), NULL); + + pgctx->info_bar = info_bar; + pgctx->message_label = label; + + return info_bar; +} +#endif + +static GtkWidget *perf_gtk__setup_statusbar(void) +{ + GtkWidget *stbar; + unsigned ctxid; + + stbar = gtk_statusbar_new(); + + ctxid = gtk_statusbar_get_context_id(GTK_STATUSBAR(stbar), + "perf report"); + pgctx->statbar = stbar; + pgctx->statbar_ctx_id = ctxid; + + return stbar; +} + +int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, + const char *help __used, + void (*timer) (void *arg)__used, + void *arg __used, int delay_secs __used) +{ + struct perf_evsel *pos; + GtkWidget *vbox; + GtkWidget *notebook; + GtkWidget *info_bar; + GtkWidget *statbar; + GtkWidget *window; + + signal(SIGSEGV, perf_gtk__signal); + signal(SIGFPE, perf_gtk__signal); + signal(SIGINT, perf_gtk__signal); + signal(SIGQUIT, perf_gtk__signal); + signal(SIGTERM, perf_gtk__signal); + + window = gtk_window_new(GTK_WINDOW_TOPLEVEL); + + gtk_window_set_title(GTK_WINDOW(window), "perf report"); + + g_signal_connect(window, "delete_event", gtk_main_quit, NULL); + + pgctx = perf_gtk__activate_context(window); + if (!pgctx) + return -1; + + vbox = gtk_vbox_new(FALSE, 0); + + notebook = gtk_notebook_new(); + + list_for_each_entry(pos, &evlist->entries, node) { + struct hists *hists = &pos->hists; + const char *evname = perf_evsel__name(pos); + GtkWidget *scrolled_window; + GtkWidget *tab_label; + + scrolled_window = gtk_scrolled_window_new(NULL, NULL); + + gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window), + GTK_POLICY_AUTOMATIC, + GTK_POLICY_AUTOMATIC); + + perf_gtk__show_hists(scrolled_window, hists); + + tab_label = gtk_label_new(evname); + + gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label); + } + + gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0); + + info_bar = perf_gtk__setup_info_bar(); + if (info_bar) + gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0); + + statbar = perf_gtk__setup_statusbar(); + gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0); + + gtk_container_add(GTK_CONTAINER(window), vbox); + + gtk_widget_show_all(window); + + perf_gtk__resize_window(window); + + gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER); + + gtk_main(); + + perf_gtk__deactivate_context(&pgctx); + + return 0; +} diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h new file mode 100644 index 000000000000..a4d0f2b4a2dc --- /dev/null +++ b/tools/perf/ui/gtk/gtk.h @@ -0,0 +1,39 @@ +#ifndef _PERF_GTK_H_ +#define _PERF_GTK_H_ 1 + +#include <stdbool.h> + +#pragma GCC diagnostic ignored "-Wstrict-prototypes" +#include <gtk/gtk.h> +#pragma GCC diagnostic error "-Wstrict-prototypes" + + +struct perf_gtk_context { + GtkWidget *main_window; + +#ifdef HAVE_GTK_INFO_BAR + GtkWidget *info_bar; + GtkWidget *message_label; +#endif + GtkWidget *statbar; + guint statbar_ctx_id; +}; + +extern struct perf_gtk_context *pgctx; + +static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx) +{ + return ctx && ctx->main_window; +} + +struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window); +int perf_gtk__deactivate_context(struct perf_gtk_context **ctx); + +#ifndef HAVE_GTK_INFO_BAR +static inline GtkWidget *perf_gtk__setup_info_bar(void) +{ + return NULL; +} +#endif + +#endif /* _PERF_GTK_H_ */ diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c new file mode 100644 index 000000000000..92879ce61e2f --- /dev/null +++ b/tools/perf/ui/gtk/setup.c @@ -0,0 +1,17 @@ +#include "gtk.h" +#include "../../util/cache.h" +#include "../../util/debug.h" + +extern struct perf_error_ops perf_gtk_eops; + +int perf_gtk__init(void) +{ + perf_error__register(&perf_gtk_eops); + return gtk_init_check(NULL, NULL) ? 0 : -1; +} + +void perf_gtk__exit(bool wait_for_ok __used) +{ + perf_error__unregister(&perf_gtk_eops); + gtk_main_quit(); +} diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c new file mode 100644 index 000000000000..0ead373c0dfb --- /dev/null +++ b/tools/perf/ui/gtk/util.c @@ -0,0 +1,129 @@ +#include "../util.h" +#include "../../util/debug.h" +#include "gtk.h" + +#include <string.h> + + +struct perf_gtk_context *pgctx; + +struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window) +{ + struct perf_gtk_context *ctx; + + ctx = malloc(sizeof(*pgctx)); + if (ctx) + ctx->main_window = window; + + return ctx; +} + +int perf_gtk__deactivate_context(struct perf_gtk_context **ctx) +{ + if (!perf_gtk__is_active_context(*ctx)) + return -1; + + free(*ctx); + *ctx = NULL; + return 0; +} + +static int perf_gtk__error(const char *format, va_list args) +{ + char *msg; + GtkWidget *dialog; + + if (!perf_gtk__is_active_context(pgctx) || + vasprintf(&msg, format, args) < 0) { + fprintf(stderr, "Error:\n"); + vfprintf(stderr, format, args); + fprintf(stderr, "\n"); + return -1; + } + + dialog = gtk_message_dialog_new_with_markup(GTK_WINDOW(pgctx->main_window), + GTK_DIALOG_DESTROY_WITH_PARENT, + GTK_MESSAGE_ERROR, + GTK_BUTTONS_CLOSE, + "<b>Error</b>\n\n%s", msg); + gtk_dialog_run(GTK_DIALOG(dialog)); + + gtk_widget_destroy(dialog); + free(msg); + return 0; +} + +#ifdef HAVE_GTK_INFO_BAR +static int perf_gtk__warning_info_bar(const char *format, va_list args) +{ + char *msg; + + if (!perf_gtk__is_active_context(pgctx) || + vasprintf(&msg, format, args) < 0) { + fprintf(stderr, "Warning:\n"); + vfprintf(stderr, format, args); + fprintf(stderr, "\n"); + return -1; + } + + gtk_label_set_text(GTK_LABEL(pgctx->message_label), msg); + gtk_info_bar_set_message_type(GTK_INFO_BAR(pgctx->info_bar), + GTK_MESSAGE_WARNING); + gtk_widget_show(pgctx->info_bar); + + free(msg); + return 0; +} +#else +static int perf_gtk__warning_statusbar(const char *format, va_list args) +{ + char *msg, *p; + + if (!perf_gtk__is_active_context(pgctx) || + vasprintf(&msg, format, args) < 0) { + fprintf(stderr, "Warning:\n"); + vfprintf(stderr, format, args); + fprintf(stderr, "\n"); + return -1; + } + + gtk_statusbar_pop(GTK_STATUSBAR(pgctx->statbar), + pgctx->statbar_ctx_id); + + /* Only first line can be displayed */ + p = strchr(msg, '\n'); + if (p) + *p = '\0'; + + gtk_statusbar_push(GTK_STATUSBAR(pgctx->statbar), + pgctx->statbar_ctx_id, msg); + + free(msg); + return 0; +} +#endif + +struct perf_error_ops perf_gtk_eops = { + .error = perf_gtk__error, +#ifdef HAVE_GTK_INFO_BAR + .warning = perf_gtk__warning_info_bar, +#else + .warning = perf_gtk__warning_statusbar, +#endif +}; + +/* + * FIXME: Functions below should be implemented properly. + * For now, just add stubs for NO_NEWT=1 build. + */ +#ifdef NO_NEWT_SUPPORT +int ui_helpline__show_help(const char *format __used, va_list ap __used) +{ + return 0; +} + +void ui_progress__update(u64 curr __used, u64 total __used, + const char *title __used) +{ +} +#endif diff --git a/tools/perf/util/ui/helpline.c b/tools/perf/ui/helpline.c index f36d2ff509ed..2f950c2641c8 100644 --- a/tools/perf/util/ui/helpline.c +++ b/tools/perf/ui/helpline.c @@ -1,20 +1,27 @@ -#define _GNU_SOURCE #include <stdio.h> #include <stdlib.h> -#include <newt.h> +#include <string.h> #include "../debug.h" #include "helpline.h" #include "ui.h" +#include "libslang.h" void ui_helpline__pop(void) { - newtPopHelpLine(); } +char ui_helpline__current[512]; + void ui_helpline__push(const char *msg) { - newtPushHelpLine(msg); + const size_t sz = sizeof(ui_helpline__current); + + SLsmg_gotorc(SLtt_Screen_Rows - 1, 0); + SLsmg_set_color(0); + SLsmg_write_nstring((char *)msg, SLtt_Screen_Cols); + SLsmg_refresh(); + strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0'; } void ui_helpline__vpush(const char *fmt, va_list ap) @@ -57,13 +64,13 @@ int ui_helpline__show_help(const char *format, va_list ap) static int backlog; pthread_mutex_lock(&ui__lock); - ret = vsnprintf(ui_helpline__last_msg + backlog, + ret = vscnprintf(ui_helpline__last_msg + backlog, sizeof(ui_helpline__last_msg) - backlog, format, ap); backlog += ret; if (ui_helpline__last_msg[backlog - 1] == '\n') { ui_helpline__puts(ui_helpline__last_msg); - newtRefresh(); + SLsmg_refresh(); backlog = 0; } pthread_mutex_unlock(&ui__lock); diff --git a/tools/perf/util/ui/helpline.h b/tools/perf/ui/helpline.h index ab6028d0c401..7bab6b34e35e 100644 --- a/tools/perf/util/ui/helpline.h +++ b/tools/perf/ui/helpline.h @@ -1,6 +1,9 @@ #ifndef _PERF_UI_HELPLINE_H_ #define _PERF_UI_HELPLINE_H_ 1 +#include <stdio.h> +#include <stdarg.h> + void ui_helpline__init(void); void ui_helpline__pop(void); void ui_helpline__push(const char *msg); @@ -8,4 +11,6 @@ void ui_helpline__vpush(const char *fmt, va_list ap); void ui_helpline__fpush(const char *fmt, ...); void ui_helpline__puts(const char *msg); +extern char ui_helpline__current[]; + #endif /* _PERF_UI_HELPLINE_H_ */ diff --git a/tools/perf/ui/keysyms.h b/tools/perf/ui/keysyms.h new file mode 100644 index 000000000000..809eca5707fa --- /dev/null +++ b/tools/perf/ui/keysyms.h @@ -0,0 +1,27 @@ +#ifndef _PERF_KEYSYMS_H_ +#define _PERF_KEYSYMS_H_ 1 + +#include "libslang.h" + +#define K_DOWN SL_KEY_DOWN +#define K_END SL_KEY_END +#define K_ENTER '\r' +#define K_ESC 033 +#define K_F1 SL_KEY_F(1) +#define K_HOME SL_KEY_HOME +#define K_LEFT SL_KEY_LEFT +#define K_PGDN SL_KEY_NPAGE +#define K_PGUP SL_KEY_PPAGE +#define K_RIGHT SL_KEY_RIGHT +#define K_TAB '\t' +#define K_UNTAB SL_KEY_UNTAB +#define K_UP SL_KEY_UP +#define K_BKSPC 0x7f +#define K_DEL SL_KEY_DELETE + +/* Not really keys */ +#define K_TIMER -1 +#define K_ERROR -2 +#define K_RESIZE -3 + +#endif /* _PERF_KEYSYMS_H_ */ diff --git a/tools/perf/util/ui/libslang.h b/tools/perf/ui/libslang.h index 2b63e1c9b181..4d54b6450f5b 100644 --- a/tools/perf/util/ui/libslang.h +++ b/tools/perf/ui/libslang.h @@ -24,4 +24,6 @@ #define sltt_set_color SLtt_set_color #endif +#define SL_KEY_UNTAB 0x1000 + #endif /* _PERF_UI_SLANG_H_ */ diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c new file mode 100644 index 000000000000..13aa64e50e11 --- /dev/null +++ b/tools/perf/ui/progress.c @@ -0,0 +1,32 @@ +#include "../cache.h" +#include "progress.h" +#include "libslang.h" +#include "ui.h" +#include "browser.h" + +void ui_progress__update(u64 curr, u64 total, const char *title) +{ + int bar, y; + /* + * FIXME: We should have a per UI backend way of showing progress, + * stdio will just show a percentage as NN%, etc. + */ + if (use_browser <= 0) + return; + + if (total == 0) + return; + + ui__refresh_dimensions(true); + pthread_mutex_lock(&ui__lock); + y = SLtt_Screen_Rows / 2 - 2; + SLsmg_set_color(0); + SLsmg_draw_box(y, 0, 3, SLtt_Screen_Cols); + SLsmg_gotorc(y++, 1); + SLsmg_write_string((char *)title); + SLsmg_set_color(HE_COLORSET_SELECTED); + bar = ((SLtt_Screen_Cols - 2) * curr) / total; + SLsmg_fill_region(y, 1, 1, bar, ' '); + SLsmg_refresh(); + pthread_mutex_unlock(&ui__lock); +} diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h new file mode 100644 index 000000000000..d9c205b59aa1 --- /dev/null +++ b/tools/perf/ui/progress.h @@ -0,0 +1,8 @@ +#ifndef _PERF_UI_PROGRESS_H_ +#define _PERF_UI_PROGRESS_H_ 1 + +#include <../types.h> + +void ui_progress__update(u64 curr, u64 total, const char *title); + +#endif diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c new file mode 100644 index 000000000000..791fb15ce350 --- /dev/null +++ b/tools/perf/ui/setup.c @@ -0,0 +1,46 @@ +#include "../cache.h" +#include "../debug.h" + + +void setup_browser(bool fallback_to_pager) +{ + if (!isatty(1) || dump_trace) + use_browser = 0; + + /* default to TUI */ + if (use_browser < 0) + use_browser = 1; + + switch (use_browser) { + case 2: + if (perf_gtk__init() == 0) + break; + /* fall through */ + case 1: + use_browser = 1; + if (ui__init() == 0) + break; + /* fall through */ + default: + use_browser = 0; + if (fallback_to_pager) + setup_pager(); + break; + } +} + +void exit_browser(bool wait_for_ok) +{ + switch (use_browser) { + case 2: + perf_gtk__exit(wait_for_ok); + break; + + case 1: + ui__exit(wait_for_ok); + break; + + default: + break; + } +} diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c new file mode 100644 index 000000000000..e813c1d17346 --- /dev/null +++ b/tools/perf/ui/tui/setup.c @@ -0,0 +1,146 @@ +#include <newt.h> +#include <signal.h> +#include <stdbool.h> + +#include "../../util/cache.h" +#include "../../util/debug.h" +#include "../browser.h" +#include "../helpline.h" +#include "../ui.h" +#include "../util.h" +#include "../libslang.h" +#include "../keysyms.h" + +pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; + +static volatile int ui__need_resize; + +extern struct perf_error_ops perf_tui_eops; + +void ui__refresh_dimensions(bool force) +{ + if (force || ui__need_resize) { + ui__need_resize = 0; + pthread_mutex_lock(&ui__lock); + SLtt_get_screen_size(); + SLsmg_reinit_smg(); + pthread_mutex_unlock(&ui__lock); + } +} + +static void ui__sigwinch(int sig __used) +{ + ui__need_resize = 1; +} + +static void ui__setup_sigwinch(void) +{ + static bool done; + + if (done) + return; + + done = true; + pthread__unblock_sigwinch(); + signal(SIGWINCH, ui__sigwinch); +} + +int ui__getch(int delay_secs) +{ + struct timeval timeout, *ptimeout = delay_secs ? &timeout : NULL; + fd_set read_set; + int err, key; + + ui__setup_sigwinch(); + + FD_ZERO(&read_set); + FD_SET(0, &read_set); + + if (delay_secs) { + timeout.tv_sec = delay_secs; + timeout.tv_usec = 0; + } + + err = select(1, &read_set, NULL, NULL, ptimeout); + + if (err == 0) + return K_TIMER; + + if (err == -1) { + if (errno == EINTR) + return K_RESIZE; + return K_ERROR; + } + + key = SLang_getkey(); + if (key != K_ESC) + return key; + + FD_ZERO(&read_set); + FD_SET(0, &read_set); + timeout.tv_sec = 0; + timeout.tv_usec = 20; + err = select(1, &read_set, NULL, NULL, &timeout); + if (err == 0) + return K_ESC; + + SLang_ungetkey(key); + return SLkp_getkey(); +} + +static void newt_suspend(void *d __used) +{ + newtSuspend(); + raise(SIGTSTP); + newtResume(); +} + +static void ui__signal(int sig) +{ + ui__exit(false); + psignal(sig, "perf"); + exit(0); +} + +int ui__init(void) +{ + int err; + + newtInit(); + err = SLkp_init(); + if (err < 0) { + pr_err("TUI initialization failed.\n"); + goto out; + } + + SLkp_define_keysym((char *)"^(kB)", SL_KEY_UNTAB); + + newtSetSuspendCallback(newt_suspend, NULL); + ui_helpline__init(); + ui_browser__init(); + + signal(SIGSEGV, ui__signal); + signal(SIGFPE, ui__signal); + signal(SIGINT, ui__signal); + signal(SIGQUIT, ui__signal); + signal(SIGTERM, ui__signal); + + perf_error__register(&perf_tui_eops); +out: + return err; +} + +void ui__exit(bool wait_for_ok) +{ + if (wait_for_ok) + ui__question_window("Fatal Error", + ui_helpline__last_msg, + "Press any key...", 0); + + SLtt_set_cursor_visibility(1); + SLsmg_refresh(); + SLsmg_reset_smg(); + SLang_reset_tty(); + + perf_error__unregister(&perf_tui_eops); +} diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c new file mode 100644 index 000000000000..092902e30cee --- /dev/null +++ b/tools/perf/ui/tui/util.c @@ -0,0 +1,243 @@ +#include "../../util/util.h" +#include <signal.h> +#include <stdbool.h> +#include <string.h> +#include <sys/ttydefaults.h> + +#include "../../util/cache.h" +#include "../../util/debug.h" +#include "../browser.h" +#include "../keysyms.h" +#include "../helpline.h" +#include "../ui.h" +#include "../util.h" +#include "../libslang.h" + +static void ui_browser__argv_write(struct ui_browser *browser, + void *entry, int row) +{ + char **arg = entry; + bool current_entry = ui_browser__is_current_entry(browser, row); + + ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : + HE_COLORSET_NORMAL); + slsmg_write_nstring(*arg, browser->width); +} + +static int popup_menu__run(struct ui_browser *menu) +{ + int key; + + if (ui_browser__show(menu, " ", "ESC: exit, ENTER|->: Select option") < 0) + return -1; + + while (1) { + key = ui_browser__run(menu, 0); + + switch (key) { + case K_RIGHT: + case K_ENTER: + key = menu->index; + break; + case K_LEFT: + case K_ESC: + case 'q': + case CTRL('c'): + key = -1; + break; + default: + continue; + } + + break; + } + + ui_browser__hide(menu); + return key; +} + +int ui__popup_menu(int argc, char * const argv[]) +{ + struct ui_browser menu = { + .entries = (void *)argv, + .refresh = ui_browser__argv_refresh, + .seek = ui_browser__argv_seek, + .write = ui_browser__argv_write, + .nr_entries = argc, + }; + + return popup_menu__run(&menu); +} + +int ui_browser__input_window(const char *title, const char *text, char *input, + const char *exit_msg, int delay_secs) +{ + int x, y, len, key; + int max_len = 60, nr_lines = 0; + static char buf[50]; + const char *t; + + t = text; + while (1) { + const char *sep = strchr(t, '\n'); + + if (sep == NULL) + sep = strchr(t, '\0'); + len = sep - t; + if (max_len < len) + max_len = len; + ++nr_lines; + if (*sep == '\0') + break; + t = sep + 1; + } + + max_len += 2; + nr_lines += 8; + y = SLtt_Screen_Rows / 2 - nr_lines / 2; + x = SLtt_Screen_Cols / 2 - max_len / 2; + + SLsmg_set_color(0); + SLsmg_draw_box(y, x++, nr_lines, max_len); + if (title) { + SLsmg_gotorc(y, x + 1); + SLsmg_write_string((char *)title); + } + SLsmg_gotorc(++y, x); + nr_lines -= 7; + max_len -= 2; + SLsmg_write_wrapped_string((unsigned char *)text, y, x, + nr_lines, max_len, 1); + y += nr_lines; + len = 5; + while (len--) { + SLsmg_gotorc(y + len - 1, x); + SLsmg_write_nstring((char *)" ", max_len); + } + SLsmg_draw_box(y++, x + 1, 3, max_len - 2); + + SLsmg_gotorc(y + 3, x); + SLsmg_write_nstring((char *)exit_msg, max_len); + SLsmg_refresh(); + + x += 2; + len = 0; + key = ui__getch(delay_secs); + while (key != K_TIMER && key != K_ENTER && key != K_ESC) { + if (key == K_BKSPC) { + if (len == 0) + goto next_key; + SLsmg_gotorc(y, x + --len); + SLsmg_write_char(' '); + } else { + buf[len] = key; + SLsmg_gotorc(y, x + len++); + SLsmg_write_char(key); + } + SLsmg_refresh(); + + /* XXX more graceful overflow handling needed */ + if (len == sizeof(buf) - 1) { + ui_helpline__push("maximum size of symbol name reached!"); + key = K_ENTER; + break; + } +next_key: + key = ui__getch(delay_secs); + } + + buf[len] = '\0'; + strncpy(input, buf, len+1); + return key; +} + +int ui__question_window(const char *title, const char *text, + const char *exit_msg, int delay_secs) +{ + int x, y; + int max_len = 0, nr_lines = 0; + const char *t; + + t = text; + while (1) { + const char *sep = strchr(t, '\n'); + int len; + + if (sep == NULL) + sep = strchr(t, '\0'); + len = sep - t; + if (max_len < len) + max_len = len; + ++nr_lines; + if (*sep == '\0') + break; + t = sep + 1; + } + + max_len += 2; + nr_lines += 4; + y = SLtt_Screen_Rows / 2 - nr_lines / 2, + x = SLtt_Screen_Cols / 2 - max_len / 2; + + SLsmg_set_color(0); + SLsmg_draw_box(y, x++, nr_lines, max_len); + if (title) { + SLsmg_gotorc(y, x + 1); + SLsmg_write_string((char *)title); + } + SLsmg_gotorc(++y, x); + nr_lines -= 2; + max_len -= 2; + SLsmg_write_wrapped_string((unsigned char *)text, y, x, + nr_lines, max_len, 1); + SLsmg_gotorc(y + nr_lines - 2, x); + SLsmg_write_nstring((char *)" ", max_len); + SLsmg_gotorc(y + nr_lines - 1, x); + SLsmg_write_nstring((char *)exit_msg, max_len); + SLsmg_refresh(); + return ui__getch(delay_secs); +} + +int ui__help_window(const char *text) +{ + return ui__question_window("Help", text, "Press any key...", 0); +} + +int ui__dialog_yesno(const char *msg) +{ + return ui__question_window(NULL, msg, "Enter: Yes, ESC: No", 0); +} + +static int __ui__warning(const char *title, const char *format, va_list args) +{ + char *s; + + if (vasprintf(&s, format, args) > 0) { + int key; + + pthread_mutex_lock(&ui__lock); + key = ui__question_window(title, s, "Press any key...", 0); + pthread_mutex_unlock(&ui__lock); + free(s); + return key; + } + + fprintf(stderr, "%s\n", title); + vfprintf(stderr, format, args); + return K_ESC; +} + +static int perf_tui__error(const char *format, va_list args) +{ + return __ui__warning("Error:", format, args); +} + +static int perf_tui__warning(const char *format, va_list args) +{ + return __ui__warning("Warning:", format, args); +} + +struct perf_error_ops perf_tui_eops = { + .error = perf_tui__error, + .warning = perf_tui__warning, +}; diff --git a/tools/perf/util/ui/ui.h b/tools/perf/ui/ui.h index d264e059c829..7b67045479f6 100644 --- a/tools/perf/util/ui/ui.h +++ b/tools/perf/ui/ui.h @@ -2,7 +2,10 @@ #define _PERF_UI_H_ 1 #include <pthread.h> +#include <stdbool.h> extern pthread_mutex_t ui__lock; +void ui__refresh_dimensions(bool force); + #endif /* _PERF_UI_H_ */ diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c new file mode 100644 index 000000000000..4f989774c8c6 --- /dev/null +++ b/tools/perf/ui/util.c @@ -0,0 +1,85 @@ +#include "util.h" +#include "../debug.h" + + +/* + * Default error logging functions + */ +static int perf_stdio__error(const char *format, va_list args) +{ + fprintf(stderr, "Error:\n"); + vfprintf(stderr, format, args); + return 0; +} + +static int perf_stdio__warning(const char *format, va_list args) +{ + fprintf(stderr, "Warning:\n"); + vfprintf(stderr, format, args); + return 0; +} + +static struct perf_error_ops default_eops = +{ + .error = perf_stdio__error, + .warning = perf_stdio__warning, +}; + +static struct perf_error_ops *perf_eops = &default_eops; + + +int ui__error(const char *format, ...) +{ + int ret; + va_list args; + + va_start(args, format); + ret = perf_eops->error(format, args); + va_end(args); + + return ret; +} + +int ui__warning(const char *format, ...) +{ + int ret; + va_list args; + + va_start(args, format); + ret = perf_eops->warning(format, args); + va_end(args); + + return ret; +} + + +/** + * perf_error__register - Register error logging functions + * @eops: The pointer to error logging function struct + * + * Register UI-specific error logging functions. Before calling this, + * other logging functions should be unregistered, if any. + */ +int perf_error__register(struct perf_error_ops *eops) +{ + if (perf_eops != &default_eops) + return -1; + + perf_eops = eops; + return 0; +} + +/** + * perf_error__unregister - Unregister error logging functions + * @eops: The pointer to error logging function struct + * + * Unregister already registered error logging functions. + */ +int perf_error__unregister(struct perf_error_ops *eops) +{ + if (perf_eops != eops) + return -1; + + perf_eops = &default_eops; + return 0; +} diff --git a/tools/perf/ui/util.h b/tools/perf/ui/util.h new file mode 100644 index 000000000000..361f08c52d37 --- /dev/null +++ b/tools/perf/ui/util.h @@ -0,0 +1,21 @@ +#ifndef _PERF_UI_UTIL_H_ +#define _PERF_UI_UTIL_H_ 1 + +#include <stdarg.h> + +int ui__getch(int delay_secs); +int ui__popup_menu(int argc, char * const argv[]); +int ui__help_window(const char *text); +int ui__dialog_yesno(const char *msg); +int ui__question_window(const char *title, const char *text, + const char *exit_msg, int delay_secs); + +struct perf_error_ops { + int (*error)(const char *format, va_list args); + int (*warning)(const char *format, va_list args); +}; + +int perf_error__register(struct perf_error_ops *eops); +int perf_error__unregister(struct perf_error_ops *eops); + +#endif /* _PERF_UI_UTIL_H_ */ diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index ad73300f7bac..95264f304179 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -12,7 +12,7 @@ LF=' # First check if there is a .git to get the version from git describe # otherwise try to get the version from the kernel makefile if test -d ../../.git -o -f ../../.git && - VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && + VN=$(git describe --match 'v[0-9].[0-9]*' --abbrev=4 HEAD 2>/dev/null) && case "$VN" in *$LF*) (exit 1) ;; v[0-9]*) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e01af2b1a469..3a282c0057d2 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -16,6 +16,405 @@ #include "annotate.h" #include <pthread.h> +const char *disassembler_style; + +static struct ins *ins__find(const char *name); +static int disasm_line__parse(char *line, char **namep, char **rawp); + +static void ins__delete(struct ins_operands *ops) +{ + free(ops->source.raw); + free(ops->source.name); + free(ops->target.raw); + free(ops->target.name); +} + +static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops) +{ + return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw); +} + +int ins__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops) +{ + if (ins->ops->scnprintf) + return ins->ops->scnprintf(ins, bf, size, ops); + + return ins__raw_scnprintf(ins, bf, size, ops); +} + +static int call__parse(struct ins_operands *ops) +{ + char *endptr, *tok, *name; + + ops->target.addr = strtoull(ops->raw, &endptr, 16); + + name = strchr(endptr, '<'); + if (name == NULL) + goto indirect_call; + + name++; + + tok = strchr(name, '>'); + if (tok == NULL) + return -1; + + *tok = '\0'; + ops->target.name = strdup(name); + *tok = '>'; + + return ops->target.name == NULL ? -1 : 0; + +indirect_call: + tok = strchr(endptr, '('); + if (tok != NULL) { + ops->target.addr = 0; + return 0; + } + + tok = strchr(endptr, '*'); + if (tok == NULL) + return -1; + + ops->target.addr = strtoull(tok + 1, NULL, 16); + return 0; +} + +static int call__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops) +{ + if (ops->target.name) + return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->target.name); + + if (ops->target.addr == 0) + return ins__raw_scnprintf(ins, bf, size, ops); + + return scnprintf(bf, size, "%-6.6s *%" PRIx64, ins->name, ops->target.addr); +} + +static struct ins_ops call_ops = { + .parse = call__parse, + .scnprintf = call__scnprintf, +}; + +bool ins__is_call(const struct ins *ins) +{ + return ins->ops == &call_ops; +} + +static int jump__parse(struct ins_operands *ops) +{ + const char *s = strchr(ops->raw, '+'); + + ops->target.addr = strtoll(ops->raw, NULL, 16); + + if (s++ != NULL) + ops->target.offset = strtoll(s, NULL, 16); + else + ops->target.offset = UINT64_MAX; + + return 0; +} + +static int jump__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops) +{ + return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset); +} + +static struct ins_ops jump_ops = { + .parse = jump__parse, + .scnprintf = jump__scnprintf, +}; + +bool ins__is_jump(const struct ins *ins) +{ + return ins->ops == &jump_ops; +} + +static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) +{ + char *endptr, *name, *t; + + if (strstr(raw, "(%rip)") == NULL) + return 0; + + *addrp = strtoull(comment, &endptr, 16); + name = strchr(endptr, '<'); + if (name == NULL) + return -1; + + name++; + + t = strchr(name, '>'); + if (t == NULL) + return 0; + + *t = '\0'; + *namep = strdup(name); + *t = '>'; + + return 0; +} + +static int lock__parse(struct ins_operands *ops) +{ + char *name; + + ops->locked.ops = zalloc(sizeof(*ops->locked.ops)); + if (ops->locked.ops == NULL) + return 0; + + if (disasm_line__parse(ops->raw, &name, &ops->locked.ops->raw) < 0) + goto out_free_ops; + + ops->locked.ins = ins__find(name); + if (ops->locked.ins == NULL) + goto out_free_ops; + + if (!ops->locked.ins->ops) + return 0; + + if (ops->locked.ins->ops->parse) + ops->locked.ins->ops->parse(ops->locked.ops); + + return 0; + +out_free_ops: + free(ops->locked.ops); + ops->locked.ops = NULL; + return 0; +} + +static int lock__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops) +{ + int printed; + + if (ops->locked.ins == NULL) + return ins__raw_scnprintf(ins, bf, size, ops); + + printed = scnprintf(bf, size, "%-6.6s ", ins->name); + return printed + ins__scnprintf(ops->locked.ins, bf + printed, + size - printed, ops->locked.ops); +} + +static void lock__delete(struct ins_operands *ops) +{ + free(ops->locked.ops); + free(ops->target.raw); + free(ops->target.name); +} + +static struct ins_ops lock_ops = { + .free = lock__delete, + .parse = lock__parse, + .scnprintf = lock__scnprintf, +}; + +static int mov__parse(struct ins_operands *ops) +{ + char *s = strchr(ops->raw, ','), *target, *comment, prev; + + if (s == NULL) + return -1; + + *s = '\0'; + ops->source.raw = strdup(ops->raw); + *s = ','; + + if (ops->source.raw == NULL) + return -1; + + target = ++s; + + while (s[0] != '\0' && !isspace(s[0])) + ++s; + prev = *s; + *s = '\0'; + + ops->target.raw = strdup(target); + *s = prev; + + if (ops->target.raw == NULL) + goto out_free_source; + + comment = strchr(s, '#'); + if (comment == NULL) + return 0; + + while (comment[0] != '\0' && isspace(comment[0])) + ++comment; + + comment__symbol(ops->source.raw, comment, &ops->source.addr, &ops->source.name); + comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); + + return 0; + +out_free_source: + free(ops->source.raw); + ops->source.raw = NULL; + return -1; +} + +static int mov__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops) +{ + return scnprintf(bf, size, "%-6.6s %s,%s", ins->name, + ops->source.name ?: ops->source.raw, + ops->target.name ?: ops->target.raw); +} + +static struct ins_ops mov_ops = { + .parse = mov__parse, + .scnprintf = mov__scnprintf, +}; + +static int dec__parse(struct ins_operands *ops) +{ + char *target, *comment, *s, prev; + + target = s = ops->raw; + + while (s[0] != '\0' && !isspace(s[0])) + ++s; + prev = *s; + *s = '\0'; + + ops->target.raw = strdup(target); + *s = prev; + + if (ops->target.raw == NULL) + return -1; + + comment = strchr(s, '#'); + if (comment == NULL) + return 0; + + while (comment[0] != '\0' && isspace(comment[0])) + ++comment; + + comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); + + return 0; +} + +static int dec__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops) +{ + return scnprintf(bf, size, "%-6.6s %s", ins->name, + ops->target.name ?: ops->target.raw); +} + +static struct ins_ops dec_ops = { + .parse = dec__parse, + .scnprintf = dec__scnprintf, +}; + +static int nop__scnprintf(struct ins *ins __used, char *bf, size_t size, + struct ins_operands *ops __used) +{ + return scnprintf(bf, size, "%-6.6s", "nop"); +} + +static struct ins_ops nop_ops = { + .scnprintf = nop__scnprintf, +}; + +/* + * Must be sorted by name! + */ +static struct ins instructions[] = { + { .name = "add", .ops = &mov_ops, }, + { .name = "addl", .ops = &mov_ops, }, + { .name = "addq", .ops = &mov_ops, }, + { .name = "addw", .ops = &mov_ops, }, + { .name = "and", .ops = &mov_ops, }, + { .name = "bts", .ops = &mov_ops, }, + { .name = "call", .ops = &call_ops, }, + { .name = "callq", .ops = &call_ops, }, + { .name = "cmp", .ops = &mov_ops, }, + { .name = "cmpb", .ops = &mov_ops, }, + { .name = "cmpl", .ops = &mov_ops, }, + { .name = "cmpq", .ops = &mov_ops, }, + { .name = "cmpw", .ops = &mov_ops, }, + { .name = "cmpxch", .ops = &mov_ops, }, + { .name = "dec", .ops = &dec_ops, }, + { .name = "decl", .ops = &dec_ops, }, + { .name = "imul", .ops = &mov_ops, }, + { .name = "inc", .ops = &dec_ops, }, + { .name = "incl", .ops = &dec_ops, }, + { .name = "ja", .ops = &jump_ops, }, + { .name = "jae", .ops = &jump_ops, }, + { .name = "jb", .ops = &jump_ops, }, + { .name = "jbe", .ops = &jump_ops, }, + { .name = "jc", .ops = &jump_ops, }, + { .name = "jcxz", .ops = &jump_ops, }, + { .name = "je", .ops = &jump_ops, }, + { .name = "jecxz", .ops = &jump_ops, }, + { .name = "jg", .ops = &jump_ops, }, + { .name = "jge", .ops = &jump_ops, }, + { .name = "jl", .ops = &jump_ops, }, + { .name = "jle", .ops = &jump_ops, }, + { .name = "jmp", .ops = &jump_ops, }, + { .name = "jmpq", .ops = &jump_ops, }, + { .name = "jna", .ops = &jump_ops, }, + { .name = "jnae", .ops = &jump_ops, }, + { .name = "jnb", .ops = &jump_ops, }, + { .name = "jnbe", .ops = &jump_ops, }, + { .name = "jnc", .ops = &jump_ops, }, + { .name = "jne", .ops = &jump_ops, }, + { .name = "jng", .ops = &jump_ops, }, + { .name = "jnge", .ops = &jump_ops, }, + { .name = "jnl", .ops = &jump_ops, }, + { .name = "jnle", .ops = &jump_ops, }, + { .name = "jno", .ops = &jump_ops, }, + { .name = "jnp", .ops = &jump_ops, }, + { .name = "jns", .ops = &jump_ops, }, + { .name = "jnz", .ops = &jump_ops, }, + { .name = "jo", .ops = &jump_ops, }, + { .name = "jp", .ops = &jump_ops, }, + { .name = "jpe", .ops = &jump_ops, }, + { .name = "jpo", .ops = &jump_ops, }, + { .name = "jrcxz", .ops = &jump_ops, }, + { .name = "js", .ops = &jump_ops, }, + { .name = "jz", .ops = &jump_ops, }, + { .name = "lea", .ops = &mov_ops, }, + { .name = "lock", .ops = &lock_ops, }, + { .name = "mov", .ops = &mov_ops, }, + { .name = "movb", .ops = &mov_ops, }, + { .name = "movdqa",.ops = &mov_ops, }, + { .name = "movl", .ops = &mov_ops, }, + { .name = "movq", .ops = &mov_ops, }, + { .name = "movslq", .ops = &mov_ops, }, + { .name = "movzbl", .ops = &mov_ops, }, + { .name = "movzwl", .ops = &mov_ops, }, + { .name = "nop", .ops = &nop_ops, }, + { .name = "nopl", .ops = &nop_ops, }, + { .name = "nopw", .ops = &nop_ops, }, + { .name = "or", .ops = &mov_ops, }, + { .name = "orl", .ops = &mov_ops, }, + { .name = "test", .ops = &mov_ops, }, + { .name = "testb", .ops = &mov_ops, }, + { .name = "testl", .ops = &mov_ops, }, + { .name = "xadd", .ops = &mov_ops, }, +}; + +static int ins__cmp(const void *name, const void *insp) +{ + const struct ins *ins = insp; + + return strcmp(name, ins->name); +} + +static struct ins *ins__find(const char *name) +{ + const int nmemb = ARRAY_SIZE(instructions); + + return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__cmp); +} + int symbol__annotate_init(struct map *map __used, struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); @@ -23,17 +422,28 @@ int symbol__annotate_init(struct map *map __used, struct symbol *sym) return 0; } -int symbol__alloc_hist(struct symbol *sym, int nevents) +int symbol__alloc_hist(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); - size_t sizeof_sym_hist = (sizeof(struct sym_hist) + - (sym->end - sym->start) * sizeof(u64)); + const size_t size = symbol__size(sym); + size_t sizeof_sym_hist; + + /* Check for overflow when calculating sizeof_sym_hist */ + if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(u64)) + return -1; + + sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(u64)); + + /* Check for overflow in zalloc argument */ + if (sizeof_sym_hist > (SIZE_MAX - sizeof(*notes->src)) + / symbol_conf.nr_events) + return -1; - notes->src = zalloc(sizeof(*notes->src) + nevents * sizeof_sym_hist); + notes->src = zalloc(sizeof(*notes->src) + symbol_conf.nr_events * sizeof_sym_hist); if (notes->src == NULL) return -1; notes->src->sizeof_sym_hist = sizeof_sym_hist; - notes->src->nr_histograms = nevents; + notes->src->nr_histograms = symbol_conf.nr_events; INIT_LIST_HEAD(¬es->src->source); return 0; } @@ -62,8 +472,8 @@ int symbol__inc_addr_samples(struct symbol *sym, struct map *map, pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); - if (addr >= sym->end) - return 0; + if (addr < sym->start || addr > sym->end) + return -ERANGE; offset = addr - sym->start; h = annotation__histogram(notes, evidx); @@ -76,31 +486,110 @@ int symbol__inc_addr_samples(struct symbol *sym, struct map *map, return 0; } -static struct objdump_line *objdump_line__new(s64 offset, char *line, size_t privsize) +static void disasm_line__init_ins(struct disasm_line *dl) { - struct objdump_line *self = malloc(sizeof(*self) + privsize); + dl->ins = ins__find(dl->name); + + if (dl->ins == NULL) + return; + + if (!dl->ins->ops) + return; - if (self != NULL) { - self->offset = offset; - self->line = line; + if (dl->ins->ops->parse) + dl->ins->ops->parse(&dl->ops); +} + +static int disasm_line__parse(char *line, char **namep, char **rawp) +{ + char *name = line, tmp; + + while (isspace(name[0])) + ++name; + + if (name[0] == '\0') + return -1; + + *rawp = name + 1; + + while ((*rawp)[0] != '\0' && !isspace((*rawp)[0])) + ++*rawp; + + tmp = (*rawp)[0]; + (*rawp)[0] = '\0'; + *namep = strdup(name); + + if (*namep == NULL) + goto out_free_name; + + (*rawp)[0] = tmp; + + if ((*rawp)[0] != '\0') { + (*rawp)++; + while (isspace((*rawp)[0])) + ++(*rawp); } - return self; + return 0; + +out_free_name: + free(*namep); + *namep = NULL; + return -1; +} + +static struct disasm_line *disasm_line__new(s64 offset, char *line, size_t privsize) +{ + struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); + + if (dl != NULL) { + dl->offset = offset; + dl->line = strdup(line); + if (dl->line == NULL) + goto out_delete; + + if (offset != -1) { + if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0) + goto out_free_line; + + disasm_line__init_ins(dl); + } + } + + return dl; + +out_free_line: + free(dl->line); +out_delete: + free(dl); + return NULL; +} + +void disasm_line__free(struct disasm_line *dl) +{ + free(dl->line); + free(dl->name); + if (dl->ins && dl->ins->ops->free) + dl->ins->ops->free(&dl->ops); + else + ins__delete(&dl->ops); + free(dl); } -void objdump_line__free(struct objdump_line *self) +int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw) { - free(self->line); - free(self); + if (raw || !dl->ins) + return scnprintf(bf, size, "%-6.6s %s", dl->name, dl->ops.raw); + + return ins__scnprintf(dl->ins, bf, size, &dl->ops); } -static void objdump__add_line(struct list_head *head, struct objdump_line *line) +static void disasm__add(struct list_head *head, struct disasm_line *line) { list_add_tail(&line->node, head); } -struct objdump_line *objdump__get_next_ip_line(struct list_head *head, - struct objdump_line *pos) +struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos) { list_for_each_entry_continue(pos, head, node) if (pos->offset >= 0) @@ -109,15 +598,14 @@ struct objdump_line *objdump__get_next_ip_line(struct list_head *head, return NULL; } -static int objdump_line__print(struct objdump_line *oline, struct symbol *sym, - int evidx, u64 len, int min_pcnt, - int printed, int max_lines, - struct objdump_line *queue) +static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start, + int evidx, u64 len, int min_pcnt, int printed, + int max_lines, struct disasm_line *queue) { static const char *prev_line; static const char *prev_color; - if (oline->offset != -1) { + if (dl->offset != -1) { const char *path = NULL; unsigned int hits = 0; double percent = 0.0; @@ -125,10 +613,11 @@ static int objdump_line__print(struct objdump_line *oline, struct symbol *sym, struct annotation *notes = symbol__annotation(sym); struct source_line *src_line = notes->src->lines; struct sym_hist *h = annotation__histogram(notes, evidx); - s64 offset = oline->offset; - struct objdump_line *next; + s64 offset = dl->offset; + const u64 addr = start + offset; + struct disasm_line *next; - next = objdump__get_next_ip_line(¬es->src->source, oline); + next = disasm__get_next_ip_line(¬es->src->source, dl); while (offset < (s64)len && (next == NULL || offset < next->offset)) { @@ -153,9 +642,9 @@ static int objdump_line__print(struct objdump_line *oline, struct symbol *sym, if (queue != NULL) { list_for_each_entry_from(queue, ¬es->src->source, node) { - if (queue == oline) + if (queue == dl) break; - objdump_line__print(queue, sym, evidx, len, + disasm_line__print(queue, sym, start, evidx, len, 0, 0, 1, NULL); } } @@ -178,17 +667,18 @@ static int objdump_line__print(struct objdump_line *oline, struct symbol *sym, color_fprintf(stdout, color, " %7.2f", percent); printf(" : "); - color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", oline->line); + color_fprintf(stdout, PERF_COLOR_MAGENTA, " %" PRIx64 ":", addr); + color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", dl->line); } else if (max_lines && printed >= max_lines) return 1; else { if (queue) return -1; - if (!*oline->line) + if (!*dl->line) printf(" :\n"); else - printf(" : %s\n", oline->line); + printf(" : %s\n", dl->line); } return 0; @@ -198,8 +688,8 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, FILE *file, size_t privsize) { struct annotation *notes = symbol__annotation(sym); - struct objdump_line *objdump_line; - char *line = NULL, *tmp, *tmp2, *c; + struct disasm_line *dl; + char *line = NULL, *parsed_line, *tmp, *tmp2, *c; size_t line_len; s64 line_ip, offset = -1; @@ -217,6 +707,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, *c = 0; line_ip = -1; + parsed_line = line; /* * Strip leading spaces: @@ -244,14 +735,17 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, offset = line_ip - start; if (offset < 0 || (u64)line_ip > end) offset = -1; + else + parsed_line = tmp2 + 1; } - objdump_line = objdump_line__new(offset, line, privsize); - if (objdump_line == NULL) { - free(line); + dl = disasm_line__new(offset, parsed_line, privsize); + free(line); + + if (dl == NULL) return -1; - } - objdump__add_line(¬es->src->source, objdump_line); + + disasm__add(¬es->src->source, dl); return 0; } @@ -294,7 +788,7 @@ fallback: free_filename = false; } - if (dso->symtab_type == SYMTAB__KALLSYMS) { + if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) { char bf[BUILD_ID_SIZE * 2 + 16] = " with build id "; char *build_id_msg = NULL; @@ -308,9 +802,12 @@ fallback: } err = -ENOENT; dso->annotate_warned = 1; - pr_err("Can't annotate %s: No vmlinux file%s was found in the " - "path.\nPlease use 'perf buildid-cache -av vmlinux' or " - "--vmlinux vmlinux.\n", + pr_err("Can't annotate %s:\n\n" + "No vmlinux file%s\nwas found in the path.\n\n" + "Please use:\n\n" + " perf buildid-cache -av vmlinux\n\n" + "or:\n\n" + " --vmlinux vmlinux\n", sym->name, build_id_msg ?: ""); goto out_free_filename; } @@ -323,10 +820,15 @@ fallback: dso, dso->long_name, sym, sym->name); snprintf(command, sizeof(command), - "objdump --start-address=0x%016" PRIx64 - " --stop-address=0x%016" PRIx64 " -dS -C %s|grep -v %s|expand", + "objdump %s%s --start-address=0x%016" PRIx64 + " --stop-address=0x%016" PRIx64 + " -d %s %s -C %s|grep -v %s|expand", + disassembler_style ? "-M " : "", + disassembler_style ? disassembler_style : "", map__rip_2objdump(map, sym->start), - map__rip_2objdump(map, sym->end), + map__rip_2objdump(map, sym->end+1), + symbol_conf.annotate_asm_raw ? "" : "--no-show-raw", + symbol_conf.annotate_src ? "-S" : "", symfs_filename, filename); pr_debug("Executing: %s\n", command); @@ -398,7 +900,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, if (!notes->src->lines) return -1; - start = map->unmap_ip(map, sym->start); + start = map__rip_2objdump(map, sym->start); for (i = 0; i < len; i++) { char *path = NULL; @@ -466,7 +968,7 @@ static void symbol__annotate_hits(struct symbol *sym, int evidx) { struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evidx); - u64 len = sym->end - sym->start, offset; + u64 len = symbol__size(sym), offset; for (offset = 0; offset < len; ++offset) if (h->addr[offset] != 0) @@ -482,7 +984,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, struct dso *dso = map->dso; const char *filename = dso->long_name, *d_filename; struct annotation *notes = symbol__annotation(sym); - struct objdump_line *pos, *queue = NULL; + struct disasm_line *pos, *queue = NULL; + u64 start = map__rip_2objdump(map, sym->start); int printed = 2, queue_len = 0; int more = 0; u64 len; @@ -492,7 +995,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, else d_filename = basename(filename); - len = sym->end - sym->start; + len = symbol__size(sym); printf(" Percent | Source code & Disassembly of %s\n", d_filename); printf("------------------------------------------------\n"); @@ -506,8 +1009,9 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, queue_len = 0; } - switch (objdump_line__print(pos, sym, evidx, len, min_pcnt, - printed, max_lines, queue)) { + switch (disasm_line__print(pos, sym, start, evidx, len, + min_pcnt, printed, max_lines, + queue)) { case 0: ++printed; if (context) { @@ -551,29 +1055,53 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) { struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evidx); - struct objdump_line *pos; - int len = sym->end - sym->start; + int len = symbol__size(sym), offset; h->sum = 0; - - list_for_each_entry(pos, ¬es->src->source, node) { - if (pos->offset != -1 && pos->offset < len) { - h->addr[pos->offset] = h->addr[pos->offset] * 7 / 8; - h->sum += h->addr[pos->offset]; - } + for (offset = 0; offset < len; ++offset) { + h->addr[offset] = h->addr[offset] * 7 / 8; + h->sum += h->addr[offset]; } } -void objdump_line_list__purge(struct list_head *head) +void disasm__purge(struct list_head *head) { - struct objdump_line *pos, *n; + struct disasm_line *pos, *n; list_for_each_entry_safe(pos, n, head, node) { list_del(&pos->node); - objdump_line__free(pos); + disasm_line__free(pos); } } +static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp) +{ + size_t printed; + + if (dl->offset == -1) + return fprintf(fp, "%s\n", dl->line); + + printed = fprintf(fp, "%#" PRIx64 " %s", dl->offset, dl->name); + + if (dl->ops.raw[0] != '\0') { + printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ", + dl->ops.raw); + } + + return printed + fprintf(fp, "\n"); +} + +size_t disasm__fprintf(struct list_head *head, FILE *fp) +{ + struct disasm_line *pos; + size_t printed = 0; + + list_for_each_entry(pos, head, node) + printed += disasm_line__fprintf(pos, fp); + + return printed; +} + int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, bool print_lines, bool full_paths, int min_pcnt, int max_lines) @@ -586,7 +1114,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, if (symbol__annotate(sym, map, 0) < 0) return -1; - len = sym->end - sym->start; + len = symbol__size(sym); if (print_lines) { symbol__get_source_line(sym, map, evidx, &source_line, @@ -599,7 +1127,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, if (print_lines) symbol__free_source_line(sym, len); - objdump_line_list__purge(&symbol__annotation(sym)->src->source); + disasm__purge(&symbol__annotation(sym)->src->source); return 0; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index c2c286896801..78a5692dd718 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -2,20 +2,69 @@ #define __PERF_ANNOTATE_H #include <stdbool.h> +#include <stdint.h> #include "types.h" #include "symbol.h" #include <linux/list.h> #include <linux/rbtree.h> -struct objdump_line { - struct list_head node; - s64 offset; - char *line; +struct ins; + +struct ins_operands { + char *raw; + struct { + char *raw; + char *name; + u64 addr; + u64 offset; + } target; + union { + struct { + char *raw; + char *name; + u64 addr; + } source; + struct { + struct ins *ins; + struct ins_operands *ops; + } locked; + }; +}; + +struct ins_ops { + void (*free)(struct ins_operands *ops); + int (*parse)(struct ins_operands *ops); + int (*scnprintf)(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops); +}; + +struct ins { + const char *name; + struct ins_ops *ops; +}; + +bool ins__is_jump(const struct ins *ins); +bool ins__is_call(const struct ins *ins); +int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops); + +struct disasm_line { + struct list_head node; + s64 offset; + char *line; + char *name; + struct ins *ins; + struct ins_operands ops; }; -void objdump_line__free(struct objdump_line *self); -struct objdump_line *objdump__get_next_ip_line(struct list_head *head, - struct objdump_line *pos); +static inline bool disasm_line__has_offset(const struct disasm_line *dl) +{ + return dl->ops.target.offset != UINT64_MAX; +} + +void disasm_line__free(struct disasm_line *dl); +struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos); +int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); +size_t disasm__fprintf(struct list_head *head, FILE *fp); struct sym_hist { u64 sum; @@ -32,7 +81,7 @@ struct source_line { * * @histogram: Array of addr hit histograms per event being monitored * @lines: If 'print_lines' is specified, per source code line percentages - * @source: source parsed from objdump -dS + * @source: source parsed from a disassembler like objdump -dS * * lines is allocated, percentages calculated and all sorted by percentage * when the annotation is about to be presented, so the percentages are for @@ -72,7 +121,7 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) int symbol__inc_addr_samples(struct symbol *sym, struct map *map, int evidx, u64 addr); -int symbol__alloc_hist(struct symbol *sym, int nevents); +int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize); @@ -82,7 +131,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, int context); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); -void objdump_line_list__purge(struct list_head *head); +void disasm__purge(struct list_head *head); int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, bool print_lines, bool full_paths, int min_pcnt, @@ -91,13 +140,17 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, #ifdef NO_NEWT_SUPPORT static inline int symbol__tui_annotate(struct symbol *sym __used, struct map *map __used, - int evidx __used, int refresh __used) + int evidx __used, + void(*timer)(void *arg) __used, + void *arg __used, int delay_secs __used) { return 0; } #else int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, - int refresh); + void(*timer)(void *arg), void *arg, int delay_secs); #endif +extern const char *disassembler_style; + #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/bitmap.c b/tools/perf/util/bitmap.c index 5e230acae1e9..0a1adc1111fd 100644 --- a/tools/perf/util/bitmap.c +++ b/tools/perf/util/bitmap.c @@ -19,3 +19,13 @@ int __bitmap_weight(const unsigned long *bitmap, int bits) return w; } + +void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] | bitmap2[k]; +} diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index a91cd99f26ea..fd9a5944b627 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -13,15 +13,18 @@ #include "symbol.h" #include <linux/kernel.h> #include "debug.h" +#include "session.h" +#include "tool.h" -static int build_id__mark_dso_hit(union perf_event *event, +static int build_id__mark_dso_hit(struct perf_tool *tool __used, + union perf_event *event, struct perf_sample *sample __used, struct perf_evsel *evsel __used, - struct perf_session *session) + struct machine *machine) { struct addr_location al; u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - struct thread *thread = perf_session__findnew(session, event->ip.pid); + struct thread *thread = machine__findnew_thread(machine, event->ip.pid); if (thread == NULL) { pr_err("problem processing %d event, skipping it.\n", @@ -29,8 +32,8 @@ static int build_id__mark_dso_hit(union perf_event *event, return -1; } - thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, - event->ip.pid, event->ip.ip, &al); + thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, + event->ip.ip, &al); if (al.map != NULL) al.map->dso->hit = 1; @@ -38,29 +41,32 @@ static int build_id__mark_dso_hit(union perf_event *event, return 0; } -static int perf_event__exit_del_thread(union perf_event *event, +static int perf_event__exit_del_thread(struct perf_tool *tool __used, + union perf_event *event, struct perf_sample *sample __used, - struct perf_session *session) + struct machine *machine) { - struct thread *thread = perf_session__findnew(session, event->fork.tid); + struct thread *thread = machine__findnew_thread(machine, event->fork.tid); dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, event->fork.ppid, event->fork.ptid); if (thread) { - rb_erase(&thread->rb_node, &session->threads); - session->last_match = NULL; + rb_erase(&thread->rb_node, &machine->threads); + machine->last_match = NULL; thread__delete(thread); } return 0; } -struct perf_event_ops build_id__mark_dso_hit_ops = { +struct perf_tool build_id__mark_dso_hit_ops = { .sample = build_id__mark_dso_hit, .mmap = perf_event__process_mmap, .fork = perf_event__process_task, .exit = perf_event__exit_del_thread, + .attr = perf_event__process_attr, + .build_id = perf_event__process_build_id, }; char *dso__build_id_filename(struct dso *self, char *bf, size_t size) diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 5dafb00eaa06..a993ba87d996 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -3,7 +3,7 @@ #include "session.h" -extern struct perf_event_ops build_id__mark_dso_hit_ops; +extern struct perf_tool build_id__mark_dso_hit_ops; char *dso__build_id_filename(struct dso *self, char *bf, size_t size); diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index fc5e5a09d5b9..cff18c617d13 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -33,7 +33,7 @@ extern int pager_use_color; extern int use_browser; -#ifdef NO_NEWT_SUPPORT +#if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT) static inline void setup_browser(bool fallback_to_pager) { if (fallback_to_pager) @@ -43,7 +43,29 @@ static inline void exit_browser(bool wait_for_ok __used) {} #else void setup_browser(bool fallback_to_pager); void exit_browser(bool wait_for_ok); + +#ifdef NO_NEWT_SUPPORT +static inline int ui__init(void) +{ + return -1; +} +static inline void ui__exit(bool wait_for_ok __used) {} +#else +int ui__init(void); +void ui__exit(bool wait_for_ok); +#endif + +#ifdef NO_GTK2_SUPPORT +static inline int perf_gtk__init(void) +{ + return -1; +} +static inline void perf_gtk__exit(bool wait_for_ok __used) {} +#else +int perf_gtk__init(void); +void perf_gtk__exit(bool wait_for_ok); #endif +#endif /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */ char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9f7106a8d9a4..3a6bff47614f 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -18,6 +18,8 @@ #include "util.h" #include "callchain.h" +__thread struct callchain_cursor callchain_cursor; + bool ip_callchain__valid(struct ip_callchain *chain, const union perf_event *event) { diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 9b4ff16cac96..3bdb407f9cd9 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -76,6 +76,8 @@ struct callchain_cursor { struct callchain_cursor_node *curr; }; +extern __thread struct callchain_cursor callchain_cursor; + static inline void callchain_init(struct callchain_root *root) { INIT_LIST_HEAD(&root->node.siblings); @@ -101,6 +103,9 @@ int callchain_append(struct callchain_root *root, int callchain_merge(struct callchain_cursor *cursor, struct callchain_root *dst, struct callchain_root *src); +struct ip_callchain; +union perf_event; + bool ip_callchain__valid(struct ip_callchain *chain, const union perf_event *event); /* diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 96bee5c46008..dbe2f16b1a1a 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -3,7 +3,6 @@ #include "parse-options.h" #include "evsel.h" #include "cgroup.h" -#include "debugfs.h" /* MAX_PATH, STR() */ #include "evlist.h" int nr_cgroups; @@ -12,7 +11,7 @@ static int cgroupfs_find_mountpoint(char *buf, size_t maxlen) { FILE *fp; - char mountpoint[MAX_PATH+1], tokens[MAX_PATH+1], type[MAX_PATH+1]; + char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1]; char *token, *saved_ptr = NULL; int found = 0; @@ -25,8 +24,8 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen) * and inspect every cgroupfs mount point to find one that has * perf_event subsystem */ - while (fscanf(fp, "%*s %"STR(MAX_PATH)"s %"STR(MAX_PATH)"s %" - STR(MAX_PATH)"s %*d %*d\n", + while (fscanf(fp, "%*s %"STR(PATH_MAX)"s %"STR(PATH_MAX)"s %" + STR(PATH_MAX)"s %*d %*d\n", mountpoint, type, tokens) == 3) { if (!strcmp(type, "cgroup")) { @@ -57,15 +56,15 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen) static int open_cgroup(char *name) { - char path[MAX_PATH+1]; - char mnt[MAX_PATH+1]; + char path[PATH_MAX + 1]; + char mnt[PATH_MAX + 1]; int fd; - if (cgroupfs_find_mountpoint(mnt, MAX_PATH+1)) + if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1)) return -1; - snprintf(path, MAX_PATH, "%s/%s", mnt, name); + snprintf(path, PATH_MAX, "%s/%s", mnt, name); fd = open(path, O_RDONLY); if (fd == -1) diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index e191eb9a667f..11e46da17bbb 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -1,3 +1,4 @@ +#include <linux/kernel.h> #include "cache.h" #include "color.h" @@ -182,12 +183,12 @@ static int __color_vsnprintf(char *bf, size_t size, const char *color, } if (perf_use_color_default && *color) - r += snprintf(bf, size, "%s", color); - r += vsnprintf(bf + r, size - r, fmt, args); + r += scnprintf(bf, size, "%s", color); + r += vscnprintf(bf + r, size - r, fmt, args); if (perf_use_color_default && *color) - r += snprintf(bf + r, size - r, "%s", PERF_COLOR_RESET); + r += scnprintf(bf + r, size - r, "%s", PERF_COLOR_RESET); if (trail) - r += snprintf(bf + r, size - r, "%s", trail); + r += scnprintf(bf + r, size - r, "%s", trail); return r; } @@ -200,7 +201,7 @@ static int __color_vfprintf(FILE *fp, const char *color, const char *fmt, * Auto-detect: */ if (perf_use_color_default < 0) { - if (isatty(1) || pager_in_use()) + if (isatty(fileno(fp)) || pager_in_use()) perf_use_color_default = 1; else perf_use_color_default = 0; diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index fe02903f7d0f..6faa3a18bfbd 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -1,5 +1,8 @@ /* - * GIT - The information manager from hell + * config.c + * + * Helper functions for parsing config items. + * Originally copied from GIT source. * * Copyright (C) Linus Torvalds, 2005 * Copyright (C) Johannes Schindelin, 2005 @@ -117,7 +120,7 @@ static char *parse_value(void) static inline int iskeychar(int c) { - return isalnum(c) || c == '-'; + return isalnum(c) || c == '-' || c == '_'; } static int get_value(config_fn_t fn, void *data, char *name, unsigned int len) @@ -341,7 +344,7 @@ const char *perf_config_dirname(const char *name, const char *value) static int perf_default_core_config(const char *var __used, const char *value __used) { - /* Add other config variables here and to Documentation/config.txt. */ + /* Add other config variables here. */ return 0; } @@ -350,7 +353,7 @@ int perf_default_config(const char *var, const char *value, void *dummy __used) if (!prefixcmp(var, "core.")) return perf_default_core_config(var, value); - /* Add other config variables here and to Documentation/config.txt. */ + /* Add other config variables here. */ return 0; } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 6893eec693ab..adc72f09914d 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -166,6 +166,17 @@ out: return cpus; } +size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp) +{ + int i; + size_t printed = fprintf(fp, "%d cpu%s: ", + map->nr, map->nr > 1 ? "s" : ""); + for (i = 0; i < map->nr; ++i) + printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]); + + return printed + fprintf(fp, "\n"); +} + struct cpu_map *cpu_map__dummy_new(void) { struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 072c0a374794..c41518573c6a 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -1,6 +1,8 @@ #ifndef __PERF_CPUMAP_H #define __PERF_CPUMAP_H +#include <stdio.h> + struct cpu_map { int nr; int map[]; @@ -10,4 +12,6 @@ struct cpu_map *cpu_map__new(const char *cpu_list); struct cpu_map *cpu_map__dummy_new(void); void cpu_map__delete(struct cpu_map *map); +size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); + #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index 35073621e5de..aada3ac5e891 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c @@ -3,7 +3,7 @@ * * No surprises, and works with signed and unsigned chars. */ -#include "cache.h" +#include "util.h" enum { S = GIT_SPACE, diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 155749d74350..4dfe0bb3c322 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -11,6 +11,7 @@ #include "event.h" #include "debug.h" #include "util.h" +#include "target.h" int verbose; bool dump_trace = false, quiet = false; @@ -46,20 +47,21 @@ int dump_printf(const char *fmt, ...) return ret; } -#ifdef NO_NEWT_SUPPORT -void ui__warning(const char *format, ...) +#if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT) +int ui__warning(const char *format, ...) { va_list args; va_start(args, format); vfprintf(stderr, format, args); va_end(args); + return 0; } #endif -void ui__warning_paranoid(void) +int ui__error_paranoid(void) { - ui__warning("Permission error - are you root?\n" + return ui__error("Permission error - are you root?\n" "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n" " -1 - Not paranoid at all\n" " 0 - Disallow raw tracepoint access for unpriv\n" diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index fd53db47e3de..015c91dbc096 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -12,30 +12,42 @@ int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); void trace_event(union perf_event *event); struct ui_progress; +struct perf_error_ops; -#ifdef NO_NEWT_SUPPORT +#if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT) static inline int ui_helpline__show_help(const char *format __used, va_list ap __used) { return 0; } -static inline struct ui_progress *ui_progress__new(const char *title __used, - u64 total __used) +static inline void ui_progress__update(u64 curr __used, u64 total __used, + const char *title __used) {} + +#define ui__error(format, arg...) ui__warning(format, ##arg) + +static inline int +perf_error__register(struct perf_error_ops *eops __used) +{ + return 0; +} + +static inline int +perf_error__unregister(struct perf_error_ops *eops __used) { - return (struct ui_progress *)1; + return 0; } -static inline void ui_progress__update(struct ui_progress *self __used, - u64 curr __used) {} +#else /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */ -static inline void ui_progress__delete(struct ui_progress *self __used) {} -#else extern char ui_helpline__last_msg[]; int ui_helpline__show_help(const char *format, va_list ap); -#include "ui/progress.h" -#endif +#include "../ui/progress.h" +int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2))); +#include "../ui/util.h" + +#endif /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */ -void ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2))); -void ui__warning_paranoid(void); +int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2))); +int ui__error_paranoid(void); #endif /* __PERF_DEBUG_H */ diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c index a88fefc0cc0a..dd8b19319c03 100644 --- a/tools/perf/util/debugfs.c +++ b/tools/perf/util/debugfs.c @@ -2,8 +2,12 @@ #include "debugfs.h" #include "cache.h" +#include <linux/kernel.h> +#include <sys/mount.h> + static int debugfs_premounted; -static char debugfs_mountpoint[MAX_PATH+1]; +char debugfs_mountpoint[PATH_MAX + 1] = "/sys/kernel/debug"; +char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events"; static const char *debugfs_known_mountpoints[] = { "/sys/kernel/debug/", @@ -11,32 +15,6 @@ static const char *debugfs_known_mountpoints[] = { 0, }; -/* use this to force a umount */ -void debugfs_force_cleanup(void) -{ - debugfs_find_mountpoint(); - debugfs_premounted = 0; - debugfs_umount(); -} - -/* construct a full path to a debugfs element */ -int debugfs_make_path(const char *element, char *buffer, int size) -{ - int len; - - if (strlen(debugfs_mountpoint) == 0) { - buffer[0] = '\0'; - return -1; - } - - len = strlen(debugfs_mountpoint) + strlen(element) + 1; - if (len >= size) - return len+1; - - snprintf(buffer, size-1, "%s/%s", debugfs_mountpoint, element); - return 0; -} - static int debugfs_found; /* find the path to the mounted debugfs */ @@ -62,11 +40,9 @@ const char *debugfs_find_mountpoint(void) /* give up and parse /proc/mounts */ fp = fopen("/proc/mounts", "r"); if (fp == NULL) - die("Can't open /proc/mounts for read"); + return NULL; - while (fscanf(fp, "%*s %" - STR(MAX_PATH) - "s %99s %*s %*d %*d\n", + while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", debugfs_mountpoint, type) == 2) { if (strcmp(type, "debugfs") == 0) break; @@ -95,15 +71,10 @@ int debugfs_valid_mountpoint(const char *debugfs) return 0; } - -int debugfs_valid_entry(const char *path) +static void debugfs_set_tracing_events_path(const char *mountpoint) { - struct stat st; - - if (stat(path, &st)) - return -errno; - - return 0; + snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", + mountpoint, "tracing/events"); } /* mount the debugfs somewhere if it's not mounted */ @@ -113,7 +84,7 @@ char *debugfs_mount(const char *mountpoint) /* see if it's already mounted */ if (debugfs_find_mountpoint()) { debugfs_premounted = 1; - return debugfs_mountpoint; + goto out; } /* if not mounted and no argument */ @@ -129,112 +100,15 @@ char *debugfs_mount(const char *mountpoint) return NULL; /* save the mountpoint */ - strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint)); debugfs_found = 1; - + strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint)); +out: + debugfs_set_tracing_events_path(debugfs_mountpoint); return debugfs_mountpoint; } -/* umount the debugfs */ - -int debugfs_umount(void) -{ - char umountcmd[128]; - int ret; - - /* if it was already mounted, leave it */ - if (debugfs_premounted) - return 0; - - /* make sure it's a valid mount point */ - ret = debugfs_valid_mountpoint(debugfs_mountpoint); - if (ret) - return ret; - - snprintf(umountcmd, sizeof(umountcmd), - "/bin/umount %s", debugfs_mountpoint); - return system(umountcmd); -} - -int debugfs_write(const char *entry, const char *value) +void debugfs_set_path(const char *mountpoint) { - char path[MAX_PATH+1]; - int ret, count; - int fd; - - /* construct the path */ - snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry); - - /* verify that it exists */ - ret = debugfs_valid_entry(path); - if (ret) - return ret; - - /* get how many chars we're going to write */ - count = strlen(value); - - /* open the debugfs entry */ - fd = open(path, O_RDWR); - if (fd < 0) - return -errno; - - while (count > 0) { - /* write it */ - ret = write(fd, value, count); - if (ret <= 0) { - if (ret == EAGAIN) - continue; - close(fd); - return -errno; - } - count -= ret; - } - - /* close it */ - close(fd); - - /* return success */ - return 0; -} - -/* - * read a debugfs entry - * returns the number of chars read or a negative errno - */ -int debugfs_read(const char *entry, char *buffer, size_t size) -{ - char path[MAX_PATH+1]; - int ret; - int fd; - - /* construct the path */ - snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry); - - /* verify that it exists */ - ret = debugfs_valid_entry(path); - if (ret) - return ret; - - /* open the debugfs entry */ - fd = open(path, O_RDONLY); - if (fd < 0) - return -errno; - - do { - /* read it */ - ret = read(fd, buffer, size); - if (ret == 0) { - close(fd); - return EOF; - } - } while (ret < 0 && errno == EAGAIN); - - /* close it */ - close(fd); - - /* make *sure* there's a null character at the end */ - buffer[ret] = '\0'; - - /* return the number of chars read */ - return ret; + snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint); + debugfs_set_tracing_events_path(mountpoint); } diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h index 83a02879745f..68f3e87ec57f 100644 --- a/tools/perf/util/debugfs.h +++ b/tools/perf/util/debugfs.h @@ -1,25 +1,12 @@ #ifndef __DEBUGFS_H__ #define __DEBUGFS_H__ -#include <sys/mount.h> +const char *debugfs_find_mountpoint(void); +int debugfs_valid_mountpoint(const char *debugfs); +char *debugfs_mount(const char *mountpoint); +void debugfs_set_path(const char *mountpoint); -#ifndef MAX_PATH -# define MAX_PATH 256 -#endif - -#ifndef STR -# define _STR(x) #x -# define STR(x) _STR(x) -#endif - -extern const char *debugfs_find_mountpoint(void); -extern int debugfs_valid_mountpoint(const char *debugfs); -extern int debugfs_valid_entry(const char *path); -extern char *debugfs_mount(const char *mountpoint); -extern int debugfs_umount(void); -extern int debugfs_write(const char *entry, const char *value); -extern int debugfs_read(const char *entry, char *buffer, size_t size); -extern void debugfs_force_cleanup(void); -extern int debugfs_make_path(const char *element, char *buffer, int size); +extern char debugfs_mountpoint[]; +extern char tracing_events_path[]; #endif /* __DEBUGFS_H__ */ diff --git a/tools/perf/util/dso-test-data.c b/tools/perf/util/dso-test-data.c new file mode 100644 index 000000000000..541cdc72c7df --- /dev/null +++ b/tools/perf/util/dso-test-data.c @@ -0,0 +1,153 @@ +#include "util.h" + +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <string.h> + +#include "symbol.h" + +#define TEST_ASSERT_VAL(text, cond) \ +do { \ + if (!(cond)) { \ + pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \ + return -1; \ + } \ +} while (0) + +static char *test_file(int size) +{ + static char buf_templ[] = "/tmp/test-XXXXXX"; + char *templ = buf_templ; + int fd, i; + unsigned char *buf; + + fd = mkostemp(templ, O_CREAT|O_WRONLY|O_TRUNC); + + buf = malloc(size); + if (!buf) { + close(fd); + return NULL; + } + + for (i = 0; i < size; i++) + buf[i] = (unsigned char) ((int) i % 10); + + if (size != write(fd, buf, size)) + templ = NULL; + + close(fd); + return templ; +} + +#define TEST_FILE_SIZE (DSO__DATA_CACHE_SIZE * 20) + +struct test_data_offset { + off_t offset; + u8 data[10]; + int size; +}; + +struct test_data_offset offsets[] = { + /* Fill first cache page. */ + { + .offset = 10, + .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, + .size = 10, + }, + /* Read first cache page. */ + { + .offset = 10, + .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, + .size = 10, + }, + /* Fill cache boundary pages. */ + { + .offset = DSO__DATA_CACHE_SIZE - DSO__DATA_CACHE_SIZE % 10, + .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, + .size = 10, + }, + /* Read cache boundary pages. */ + { + .offset = DSO__DATA_CACHE_SIZE - DSO__DATA_CACHE_SIZE % 10, + .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, + .size = 10, + }, + /* Fill final cache page. */ + { + .offset = TEST_FILE_SIZE - 10, + .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, + .size = 10, + }, + /* Read final cache page. */ + { + .offset = TEST_FILE_SIZE - 10, + .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, + .size = 10, + }, + /* Read final cache page. */ + { + .offset = TEST_FILE_SIZE - 3, + .data = { 7, 8, 9, 0, 0, 0, 0, 0, 0, 0 }, + .size = 3, + }, +}; + +int dso__test_data(void) +{ + struct machine machine; + struct dso *dso; + char *file = test_file(TEST_FILE_SIZE); + size_t i; + + TEST_ASSERT_VAL("No test file", file); + + memset(&machine, 0, sizeof(machine)); + + dso = dso__new((const char *)file); + + /* Basic 10 bytes tests. */ + for (i = 0; i < ARRAY_SIZE(offsets); i++) { + struct test_data_offset *data = &offsets[i]; + ssize_t size; + u8 buf[10]; + + memset(buf, 0, 10); + size = dso__data_read_offset(dso, &machine, data->offset, + buf, 10); + + TEST_ASSERT_VAL("Wrong size", size == data->size); + TEST_ASSERT_VAL("Wrong data", !memcmp(buf, data->data, 10)); + } + + /* Read cross multiple cache pages. */ + { + ssize_t size; + int c; + u8 *buf; + + buf = malloc(TEST_FILE_SIZE); + TEST_ASSERT_VAL("ENOMEM\n", buf); + + /* First iteration to fill caches, second one to read them. */ + for (c = 0; c < 2; c++) { + memset(buf, 0, TEST_FILE_SIZE); + size = dso__data_read_offset(dso, &machine, 10, + buf, TEST_FILE_SIZE); + + TEST_ASSERT_VAL("Wrong size", + size == (TEST_FILE_SIZE - 10)); + + for (i = 0; i < (size_t)size; i++) + TEST_ASSERT_VAL("Wrong data", + buf[i] == (i % 10)); + } + + free(buf); + } + + dso__delete(dso); + unlink(file); + return 0; +} diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 437f8ca679a0..2a6f33cd888c 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,7 +1,6 @@ #include <linux/types.h> #include "event.h" #include "debug.h" -#include "session.h" #include "sort.h" #include "string.h" #include "strlist.h" @@ -44,36 +43,27 @@ static struct perf_sample synth_sample = { .period = 1, }; -static pid_t perf_event__synthesize_comm(union perf_event *event, pid_t pid, - int full, perf_event__handler_t process, - struct perf_session *session) +static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len) { char filename[PATH_MAX]; char bf[BUFSIZ]; FILE *fp; size_t size = 0; - DIR *tasks; - struct dirent dirent, *next; - pid_t tgid = 0; + pid_t tgid = -1; snprintf(filename, sizeof(filename), "/proc/%d/status", pid); fp = fopen(filename, "r"); if (fp == NULL) { -out_race: - /* - * We raced with a task exiting - just return: - */ pr_debug("couldn't open %s\n", filename); return 0; } - memset(&event->comm, 0, sizeof(event->comm)); - - while (!event->comm.comm[0] || !event->comm.pid) { + while (!comm[0] || (tgid < 0)) { if (fgets(bf, sizeof(bf), fp) == NULL) { - pr_warning("couldn't get COMM and pgid, malformed %s\n", filename); - goto out; + pr_warning("couldn't get COMM and pgid, malformed %s\n", + filename); + break; } if (memcmp(bf, "Name:", 5) == 0) { @@ -81,33 +71,66 @@ out_race: while (*name && isspace(*name)) ++name; size = strlen(name) - 1; - memcpy(event->comm.comm, name, size++); + if (size >= len) + size = len - 1; + memcpy(comm, name, size); + comm[size] = '\0'; + } else if (memcmp(bf, "Tgid:", 5) == 0) { char *tgids = bf + 5; while (*tgids && isspace(*tgids)) ++tgids; - tgid = event->comm.pid = atoi(tgids); + tgid = atoi(tgids); } } + fclose(fp); + + return tgid; +} + +static pid_t perf_event__synthesize_comm(struct perf_tool *tool, + union perf_event *event, pid_t pid, + int full, + perf_event__handler_t process, + struct machine *machine) +{ + char filename[PATH_MAX]; + size_t size; + DIR *tasks; + struct dirent dirent, *next; + pid_t tgid; + + memset(&event->comm, 0, sizeof(event->comm)); + + tgid = perf_event__get_comm_tgid(pid, event->comm.comm, + sizeof(event->comm.comm)); + if (tgid < 0) + goto out; + + event->comm.pid = tgid; event->comm.header.type = PERF_RECORD_COMM; + + size = strlen(event->comm.comm) + 1; size = ALIGN(size, sizeof(u64)); - memset(event->comm.comm + size, 0, session->id_hdr_size); + memset(event->comm.comm + size, 0, machine->id_hdr_size); event->comm.header.size = (sizeof(event->comm) - (sizeof(event->comm.comm) - size) + - session->id_hdr_size); + machine->id_hdr_size); if (!full) { event->comm.tid = pid; - process(event, &synth_sample, session); + process(tool, event, &synth_sample, machine); goto out; } snprintf(filename, sizeof(filename), "/proc/%d/task", pid); tasks = opendir(filename); - if (tasks == NULL) - goto out_race; + if (tasks == NULL) { + pr_debug("couldn't open %s\n", filename); + return 0; + } while (!readdir_r(tasks, &dirent, &next) && next) { char *end; @@ -115,22 +138,32 @@ out_race: if (*end) continue; + /* already have tgid; jut want to update the comm */ + (void) perf_event__get_comm_tgid(pid, event->comm.comm, + sizeof(event->comm.comm)); + + size = strlen(event->comm.comm) + 1; + size = ALIGN(size, sizeof(u64)); + memset(event->comm.comm + size, 0, machine->id_hdr_size); + event->comm.header.size = (sizeof(event->comm) - + (sizeof(event->comm.comm) - size) + + machine->id_hdr_size); + event->comm.tid = pid; - process(event, &synth_sample, session); + process(tool, event, &synth_sample, machine); } closedir(tasks); out: - fclose(fp); - return tgid; } -static int perf_event__synthesize_mmap_events(union perf_event *event, +static int perf_event__synthesize_mmap_events(struct perf_tool *tool, + union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, - struct perf_session *session) + struct machine *machine) { char filename[PATH_MAX]; FILE *fp; @@ -193,12 +226,12 @@ static int perf_event__synthesize_mmap_events(union perf_event *event, event->mmap.len -= event->mmap.start; event->mmap.header.size = (sizeof(event->mmap) - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, session->id_hdr_size); - event->mmap.header.size += session->id_hdr_size; + memset(event->mmap.filename + size, 0, machine->id_hdr_size); + event->mmap.header.size += machine->id_hdr_size; event->mmap.pid = tgid; event->mmap.tid = pid; - process(event, &synth_sample, session); + process(tool, event, &synth_sample, machine); } } @@ -206,14 +239,14 @@ static int perf_event__synthesize_mmap_events(union perf_event *event, return 0; } -int perf_event__synthesize_modules(perf_event__handler_t process, - struct perf_session *session, +int perf_event__synthesize_modules(struct perf_tool *tool, + perf_event__handler_t process, struct machine *machine) { struct rb_node *nd; struct map_groups *kmaps = &machine->kmaps; union perf_event *event = zalloc((sizeof(event->mmap) + - session->id_hdr_size)); + machine->id_hdr_size)); if (event == NULL) { pr_debug("Not enough memory synthesizing mmap event " "for kernel modules\n"); @@ -243,15 +276,15 @@ int perf_event__synthesize_modules(perf_event__handler_t process, event->mmap.header.type = PERF_RECORD_MMAP; event->mmap.header.size = (sizeof(event->mmap) - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, session->id_hdr_size); - event->mmap.header.size += session->id_hdr_size; + memset(event->mmap.filename + size, 0, machine->id_hdr_size); + event->mmap.header.size += machine->id_hdr_size; event->mmap.start = pos->start; event->mmap.len = pos->end - pos->start; event->mmap.pid = machine->pid; memcpy(event->mmap.filename, pos->dso->long_name, pos->dso->long_name_len + 1); - process(event, &synth_sample, session); + process(tool, event, &synth_sample, machine); } free(event); @@ -260,40 +293,69 @@ int perf_event__synthesize_modules(perf_event__handler_t process, static int __event__synthesize_thread(union perf_event *comm_event, union perf_event *mmap_event, - pid_t pid, perf_event__handler_t process, - struct perf_session *session) + pid_t pid, int full, + perf_event__handler_t process, + struct perf_tool *tool, + struct machine *machine) { - pid_t tgid = perf_event__synthesize_comm(comm_event, pid, 1, process, - session); + pid_t tgid = perf_event__synthesize_comm(tool, comm_event, pid, full, + process, machine); if (tgid == -1) return -1; - return perf_event__synthesize_mmap_events(mmap_event, pid, tgid, - process, session); + return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, + process, machine); } -int perf_event__synthesize_thread_map(struct thread_map *threads, +int perf_event__synthesize_thread_map(struct perf_tool *tool, + struct thread_map *threads, perf_event__handler_t process, - struct perf_session *session) + struct machine *machine) { union perf_event *comm_event, *mmap_event; - int err = -1, thread; + int err = -1, thread, j; - comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); + comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); if (comm_event == NULL) goto out; - mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size); + mmap_event = malloc(sizeof(mmap_event->mmap) + machine->id_hdr_size); if (mmap_event == NULL) goto out_free_comm; err = 0; for (thread = 0; thread < threads->nr; ++thread) { if (__event__synthesize_thread(comm_event, mmap_event, - threads->map[thread], - process, session)) { + threads->map[thread], 0, + process, tool, machine)) { err = -1; break; } + + /* + * comm.pid is set to thread group id by + * perf_event__synthesize_comm + */ + if ((int) comm_event->comm.pid != threads->map[thread]) { + bool need_leader = true; + + /* is thread group leader in thread_map? */ + for (j = 0; j < threads->nr; ++j) { + if ((int) comm_event->comm.pid == threads->map[j]) { + need_leader = false; + break; + } + } + + /* if not, generate events for it */ + if (need_leader && + __event__synthesize_thread(comm_event, + mmap_event, + comm_event->comm.pid, 0, + process, tool, machine)) { + err = -1; + break; + } + } } free(mmap_event); out_free_comm: @@ -302,19 +364,20 @@ out: return err; } -int perf_event__synthesize_threads(perf_event__handler_t process, - struct perf_session *session) +int perf_event__synthesize_threads(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) { DIR *proc; struct dirent dirent, *next; union perf_event *comm_event, *mmap_event; int err = -1; - comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); + comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); if (comm_event == NULL) goto out; - mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size); + mmap_event = malloc(sizeof(mmap_event->mmap) + machine->id_hdr_size); if (mmap_event == NULL) goto out_free_comm; @@ -329,8 +392,8 @@ int perf_event__synthesize_threads(perf_event__handler_t process, if (*end) /* only interested in proper numerical dirents */ continue; - __event__synthesize_thread(comm_event, mmap_event, pid, - process, session); + __event__synthesize_thread(comm_event, mmap_event, pid, 1, + process, tool, machine); } closedir(proc); @@ -365,8 +428,8 @@ static int find_symbol_cb(void *arg, const char *name, char type, return 1; } -int perf_event__synthesize_kernel_mmap(perf_event__handler_t process, - struct perf_session *session, +int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, + perf_event__handler_t process, struct machine *machine, const char *symbol_name) { @@ -383,7 +446,7 @@ int perf_event__synthesize_kernel_mmap(perf_event__handler_t process, */ struct process_symbol_args args = { .name = symbol_name, }; union perf_event *event = zalloc((sizeof(event->mmap) + - session->id_hdr_size)); + machine->id_hdr_size)); if (event == NULL) { pr_debug("Not enough memory synthesizing mmap event " "for kernel modules\n"); @@ -417,25 +480,32 @@ int perf_event__synthesize_kernel_mmap(perf_event__handler_t process, size = ALIGN(size, sizeof(u64)); event->mmap.header.type = PERF_RECORD_MMAP; event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size) + session->id_hdr_size); + (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); event->mmap.pgoff = args.start; event->mmap.start = map->start; event->mmap.len = map->end - event->mmap.start; event->mmap.pid = machine->pid; - err = process(event, &synth_sample, session); + err = process(tool, event, &synth_sample, machine); free(event); return err; } -int perf_event__process_comm(union perf_event *event, +size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) +{ + return fprintf(fp, ": %s:%d\n", event->comm.comm, event->comm.tid); +} + +int perf_event__process_comm(struct perf_tool *tool __used, + union perf_event *event, struct perf_sample *sample __used, - struct perf_session *session) + struct machine *machine) { - struct thread *thread = perf_session__findnew(session, event->comm.tid); + struct thread *thread = machine__findnew_thread(machine, event->comm.tid); - dump_printf(": %s:%d\n", event->comm.comm, event->comm.tid); + if (dump_trace) + perf_event__fprintf_comm(event, stdout); if (thread == NULL || thread__set_comm(thread, event->comm.comm)) { dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); @@ -445,13 +515,13 @@ int perf_event__process_comm(union perf_event *event, return 0; } -int perf_event__process_lost(union perf_event *event, +int perf_event__process_lost(struct perf_tool *tool __used, + union perf_event *event, struct perf_sample *sample __used, - struct perf_session *session) + struct machine *machine __used) { dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n", event->lost.id, event->lost.lost); - session->hists.stats.total_lost += event->lost.lost; return 0; } @@ -468,21 +538,15 @@ static void perf_event__set_kernel_mmap_len(union perf_event *event, maps[MAP__FUNCTION]->end = ~0ULL; } -static int perf_event__process_kernel_mmap(union perf_event *event, - struct perf_session *session) +static int perf_event__process_kernel_mmap(struct perf_tool *tool __used, + union perf_event *event, + struct machine *machine) { struct map *map; char kmmap_prefix[PATH_MAX]; - struct machine *machine; enum dso_kernel_type kernel_type; bool is_kernel_mmap; - machine = perf_session__findnew_machine(session, event->mmap.pid); - if (!machine) { - pr_err("Can't find id %d's machine\n", event->mmap.pid); - goto out_problem; - } - machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix)); if (machine__is_host(machine)) kernel_type = DSO_TYPE_KERNEL; @@ -491,7 +555,7 @@ static int perf_event__process_kernel_mmap(union perf_event *event, is_kernel_mmap = memcmp(event->mmap.filename, kmmap_prefix, - strlen(kmmap_prefix)) == 0; + strlen(kmmap_prefix) - 1) == 0; if (event->mmap.filename[0] == '/' || (!is_kernel_mmap && event->mmap.filename[0] == '[')) { @@ -549,9 +613,9 @@ static int perf_event__process_kernel_mmap(union perf_event *event, * time /proc/sys/kernel/kptr_restrict was non zero. */ if (event->mmap.pgoff != 0) { - perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, - symbol_name, - event->mmap.pgoff); + maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, + symbol_name, + event->mmap.pgoff); } if (machine__is_default_guest(machine)) { @@ -567,32 +631,35 @@ out_problem: return -1; } -int perf_event__process_mmap(union perf_event *event, +size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n", + event->mmap.pid, event->mmap.tid, event->mmap.start, + event->mmap.len, event->mmap.pgoff, event->mmap.filename); +} + +int perf_event__process_mmap(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample __used, - struct perf_session *session) + struct machine *machine) { - struct machine *machine; struct thread *thread; struct map *map; u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; int ret = 0; - dump_printf(" %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n", - event->mmap.pid, event->mmap.tid, event->mmap.start, - event->mmap.len, event->mmap.pgoff, event->mmap.filename); + if (dump_trace) + perf_event__fprintf_mmap(event, stdout); if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || cpumode == PERF_RECORD_MISC_KERNEL) { - ret = perf_event__process_kernel_mmap(event, session); + ret = perf_event__process_kernel_mmap(tool, event, machine); if (ret < 0) goto out_problem; return 0; } - machine = perf_session__find_host_machine(session); - if (machine == NULL) - goto out_problem; - thread = perf_session__findnew(session, event->mmap.pid); + thread = machine__findnew_thread(machine, event->mmap.pid); if (thread == NULL) goto out_problem; map = map__new(&machine->user_dsos, event->mmap.start, @@ -610,18 +677,26 @@ out_problem: return 0; } -int perf_event__process_task(union perf_event *event, +size_t perf_event__fprintf_task(union perf_event *event, FILE *fp) +{ + return fprintf(fp, "(%d:%d):(%d:%d)\n", + event->fork.pid, event->fork.tid, + event->fork.ppid, event->fork.ptid); +} + +int perf_event__process_task(struct perf_tool *tool __used, + union perf_event *event, struct perf_sample *sample __used, - struct perf_session *session) + struct machine *machine) { - struct thread *thread = perf_session__findnew(session, event->fork.tid); - struct thread *parent = perf_session__findnew(session, event->fork.ptid); + struct thread *thread = machine__findnew_thread(machine, event->fork.tid); + struct thread *parent = machine__findnew_thread(machine, event->fork.ptid); - dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, - event->fork.ppid, event->fork.ptid); + if (dump_trace) + perf_event__fprintf_task(event, stdout); if (event->header.type == PERF_RECORD_EXIT) { - perf_session__remove_thread(session, thread); + machine__remove_thread(machine, thread); return 0; } @@ -634,22 +709,45 @@ int perf_event__process_task(union perf_event *event, return 0; } -int perf_event__process(union perf_event *event, struct perf_sample *sample, - struct perf_session *session) +size_t perf_event__fprintf(union perf_event *event, FILE *fp) +{ + size_t ret = fprintf(fp, "PERF_RECORD_%s", + perf_event__name(event->header.type)); + + switch (event->header.type) { + case PERF_RECORD_COMM: + ret += perf_event__fprintf_comm(event, fp); + break; + case PERF_RECORD_FORK: + case PERF_RECORD_EXIT: + ret += perf_event__fprintf_task(event, fp); + break; + case PERF_RECORD_MMAP: + ret += perf_event__fprintf_mmap(event, fp); + break; + default: + ret += fprintf(fp, "\n"); + } + + return ret; +} + +int perf_event__process(struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, struct machine *machine) { switch (event->header.type) { case PERF_RECORD_COMM: - perf_event__process_comm(event, sample, session); + perf_event__process_comm(tool, event, sample, machine); break; case PERF_RECORD_MMAP: - perf_event__process_mmap(event, sample, session); + perf_event__process_mmap(tool, event, sample, machine); break; case PERF_RECORD_FORK: case PERF_RECORD_EXIT: - perf_event__process_task(event, sample, session); + perf_event__process_task(tool, event, sample, machine); break; case PERF_RECORD_LOST: - perf_event__process_lost(event, sample, session); + perf_event__process_lost(tool, event, sample, machine); default: break; } @@ -658,36 +756,29 @@ int perf_event__process(union perf_event *event, struct perf_sample *sample, } void thread__find_addr_map(struct thread *self, - struct perf_session *session, u8 cpumode, - enum map_type type, pid_t pid, u64 addr, + struct machine *machine, u8 cpumode, + enum map_type type, u64 addr, struct addr_location *al) { struct map_groups *mg = &self->mg; - struct machine *machine = NULL; al->thread = self; al->addr = addr; al->cpumode = cpumode; al->filtered = false; + if (machine == NULL) { + al->map = NULL; + return; + } + if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { al->level = 'k'; - machine = perf_session__find_host_machine(session); - if (machine == NULL) { - al->map = NULL; - return; - } mg = &machine->kmaps; } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) { al->level = '.'; - machine = perf_session__find_host_machine(session); } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { al->level = 'g'; - machine = perf_session__find_machine(session, pid); - if (machine == NULL) { - al->map = NULL; - return; - } mg = &machine->kmaps; } else { /* @@ -733,13 +824,12 @@ try_again: al->addr = al->map->map_ip(al->map, al->addr); } -void thread__find_addr_location(struct thread *self, - struct perf_session *session, u8 cpumode, - enum map_type type, pid_t pid, u64 addr, +void thread__find_addr_location(struct thread *thread, struct machine *machine, + u8 cpumode, enum map_type type, u64 addr, struct addr_location *al, symbol_filter_t filter) { - thread__find_addr_map(self, session, cpumode, type, pid, addr, al); + thread__find_addr_map(thread, machine, cpumode, type, addr, al); if (al->map != NULL) al->sym = map__find_symbol(al->map, al->addr, filter); else @@ -747,13 +837,13 @@ void thread__find_addr_location(struct thread *self, } int perf_event__preprocess_sample(const union perf_event *event, - struct perf_session *session, + struct machine *machine, struct addr_location *al, struct perf_sample *sample, symbol_filter_t filter) { u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - struct thread *thread = perf_session__findnew(session, event->ip.pid); + struct thread *thread = machine__findnew_thread(machine, event->ip.pid); if (thread == NULL) return -1; @@ -764,18 +854,18 @@ int perf_event__preprocess_sample(const union perf_event *event, dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); /* - * Have we already created the kernel maps for the host machine? + * Have we already created the kernel maps for this machine? * * This should have happened earlier, when we processed the kernel MMAP * events, but for older perf.data files there was no such thing, so do * it now. */ if (cpumode == PERF_RECORD_MISC_KERNEL && - session->host_machine.vmlinux_maps[MAP__FUNCTION] == NULL) - machine__create_kernel_maps(&session->host_machine); + machine->vmlinux_maps[MAP__FUNCTION] == NULL) + machine__create_kernel_maps(machine); - thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, - event->ip.pid, event->ip.ip, al); + thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, + event->ip.ip, al); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : "<not found>"); @@ -783,13 +873,14 @@ int perf_event__preprocess_sample(const union perf_event *event, al->cpu = sample->cpu; if (al->map) { + struct dso *dso = al->map->dso; + if (symbol_conf.dso_list && - (!al->map || !al->map->dso || - !(strlist__has_entry(symbol_conf.dso_list, - al->map->dso->short_name) || - (al->map->dso->short_name != al->map->dso->long_name && - strlist__has_entry(symbol_conf.dso_list, - al->map->dso->long_name))))) + (!dso || !(strlist__has_entry(symbol_conf.dso_list, + dso->short_name) || + (dso->short_name != dso->long_name && + strlist__has_entry(symbol_conf.dso_list, + dso->long_name))))) goto out_filtered; al->sym = map__find_symbol(al->map, al->addr, filter); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 357a85b85248..d84870b06426 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -2,6 +2,7 @@ #define __PERF_RECORD_H #include <limits.h> +#include <stdio.h> #include "../perf.h" #include "map.h" @@ -80,6 +81,7 @@ struct perf_sample { u32 raw_size; void *raw_data; struct ip_callchain *callchain; + struct branch_stack *branch_stack; }; #define BUILD_ID_SIZE 20 @@ -141,51 +143,67 @@ union perf_event { void perf_event__print_totals(void); -struct perf_session; +struct perf_tool; struct thread_map; -typedef int (*perf_event__handler_synth_t)(union perf_event *event, - struct perf_session *session); -typedef int (*perf_event__handler_t)(union perf_event *event, +typedef int (*perf_event__handler_t)(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample, - struct perf_session *session); + struct machine *machine); -int perf_event__synthesize_thread_map(struct thread_map *threads, +int perf_event__synthesize_thread_map(struct perf_tool *tool, + struct thread_map *threads, perf_event__handler_t process, - struct perf_session *session); -int perf_event__synthesize_threads(perf_event__handler_t process, - struct perf_session *session); -int perf_event__synthesize_kernel_mmap(perf_event__handler_t process, - struct perf_session *session, + struct machine *machine); +int perf_event__synthesize_threads(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine); +int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, + perf_event__handler_t process, struct machine *machine, const char *symbol_name); -int perf_event__synthesize_modules(perf_event__handler_t process, - struct perf_session *session, +int perf_event__synthesize_modules(struct perf_tool *tool, + perf_event__handler_t process, struct machine *machine); -int perf_event__process_comm(union perf_event *event, struct perf_sample *sample, - struct perf_session *session); -int perf_event__process_lost(union perf_event *event, struct perf_sample *sample, - struct perf_session *session); -int perf_event__process_mmap(union perf_event *event, struct perf_sample *sample, - struct perf_session *session); -int perf_event__process_task(union perf_event *event, struct perf_sample *sample, - struct perf_session *session); -int perf_event__process(union perf_event *event, struct perf_sample *sample, - struct perf_session *session); +int perf_event__process_comm(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); +int perf_event__process_lost(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); +int perf_event__process_mmap(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); +int perf_event__process_task(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); +int perf_event__process(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); struct addr_location; int perf_event__preprocess_sample(const union perf_event *self, - struct perf_session *session, + struct machine *machine, struct addr_location *al, struct perf_sample *sample, symbol_filter_t filter); const char *perf_event__name(unsigned int id); -int perf_event__parse_sample(const union perf_event *event, u64 type, - int sample_size, bool sample_id_all, - struct perf_sample *sample, bool swapped); +int perf_event__synthesize_sample(union perf_event *event, u64 type, + const struct perf_sample *sample, + bool swapped); + +size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); +size_t perf_event__fprintf(union perf_event *event, FILE *fp); #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 72e9f4886b6d..9b38681add9e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -6,12 +6,17 @@ * * Released under the GPL v2. (and only v2, not any later version) */ +#include "util.h" +#include "debugfs.h" #include <poll.h> #include "cpumap.h" #include "thread_map.h" +#include "target.h" #include "evlist.h" #include "evsel.h" -#include "util.h" +#include <unistd.h> + +#include "parse-events.h" #include <sys/mman.h> @@ -30,6 +35,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, INIT_HLIST_HEAD(&evlist->heads[i]); INIT_LIST_HEAD(&evlist->entries); perf_evlist__set_maps(evlist, cpus, threads); + evlist->workload.pid = -1; } struct perf_evlist *perf_evlist__new(struct cpu_map *cpus, @@ -43,6 +49,24 @@ struct perf_evlist *perf_evlist__new(struct cpu_map *cpus, return evlist; } +void perf_evlist__config_attrs(struct perf_evlist *evlist, + struct perf_record_opts *opts) +{ + struct perf_evsel *evsel, *first; + + if (evlist->cpus->map[0] < 0) + opts->no_inherit = true; + + first = list_entry(evlist->entries.next, struct perf_evsel, node); + + list_for_each_entry(evsel, &evlist->entries, node) { + perf_evsel__config(evsel, opts, first); + + if (evlist->nr_entries > 1) + evsel->attr.sample_type |= PERF_SAMPLE_ID; + } +} + static void perf_evlist__purge(struct perf_evlist *evlist) { struct perf_evsel *pos, *n; @@ -76,14 +100,25 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) ++evlist->nr_entries; } +void perf_evlist__splice_list_tail(struct perf_evlist *evlist, + struct list_head *list, + int nr_entries) +{ + list_splice_tail(list, &evlist->entries); + evlist->nr_entries += nr_entries; +} + int perf_evlist__add_default(struct perf_evlist *evlist) { struct perf_event_attr attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, }; - struct perf_evsel *evsel = perf_evsel__new(&attr, 0); + struct perf_evsel *evsel; + event_attr_init(&attr); + + evsel = perf_evsel__new(&attr, 0); if (evsel == NULL) goto error; @@ -100,6 +135,137 @@ error: return -ENOMEM; } +int perf_evlist__add_attrs(struct perf_evlist *evlist, + struct perf_event_attr *attrs, size_t nr_attrs) +{ + struct perf_evsel *evsel, *n; + LIST_HEAD(head); + size_t i; + + for (i = 0; i < nr_attrs; i++) { + evsel = perf_evsel__new(attrs + i, evlist->nr_entries + i); + if (evsel == NULL) + goto out_delete_partial_list; + list_add_tail(&evsel->node, &head); + } + + perf_evlist__splice_list_tail(evlist, &head, nr_attrs); + + return 0; + +out_delete_partial_list: + list_for_each_entry_safe(evsel, n, &head, node) + perf_evsel__delete(evsel); + return -1; +} + +int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, + struct perf_event_attr *attrs, size_t nr_attrs) +{ + size_t i; + + for (i = 0; i < nr_attrs; i++) + event_attr_init(attrs + i); + + return perf_evlist__add_attrs(evlist, attrs, nr_attrs); +} + +static int trace_event__id(const char *evname) +{ + char *filename, *colon; + int err = -1, fd; + + if (asprintf(&filename, "%s/%s/id", tracing_events_path, evname) < 0) + return -1; + + colon = strrchr(filename, ':'); + if (colon != NULL) + *colon = '/'; + + fd = open(filename, O_RDONLY); + if (fd >= 0) { + char id[16]; + if (read(fd, id, sizeof(id)) > 0) + err = atoi(id); + close(fd); + } + + free(filename); + return err; +} + +int perf_evlist__add_tracepoints(struct perf_evlist *evlist, + const char *tracepoints[], + size_t nr_tracepoints) +{ + int err; + size_t i; + struct perf_event_attr *attrs = zalloc(nr_tracepoints * sizeof(*attrs)); + + if (attrs == NULL) + return -1; + + for (i = 0; i < nr_tracepoints; i++) { + err = trace_event__id(tracepoints[i]); + + if (err < 0) + goto out_free_attrs; + + attrs[i].type = PERF_TYPE_TRACEPOINT; + attrs[i].config = err; + attrs[i].sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | + PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD); + attrs[i].sample_period = 1; + } + + err = perf_evlist__add_attrs(evlist, attrs, nr_tracepoints); +out_free_attrs: + free(attrs); + return err; +} + +struct perf_evsel * +perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) +{ + struct perf_evsel *evsel; + + list_for_each_entry(evsel, &evlist->entries, node) { + if (evsel->attr.type == PERF_TYPE_TRACEPOINT && + (int)evsel->attr.config == id) + return evsel; + } + + return NULL; +} + +int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, + const struct perf_evsel_str_handler *assocs, + size_t nr_assocs) +{ + struct perf_evsel *evsel; + int err; + size_t i; + + for (i = 0; i < nr_assocs; i++) { + err = trace_event__id(assocs[i].name); + if (err < 0) + goto out; + + evsel = perf_evlist__find_tracepoint_by_id(evlist, err); + if (evsel == NULL) + continue; + + err = -EEXIST; + if (evsel->handler.func != NULL) + goto out; + evsel->handler.func = assocs[i].handler; + } + + err = 0; +out: + return err; +} + void perf_evlist__disable(struct perf_evlist *evlist) { int cpu, thread; @@ -108,7 +274,8 @@ void perf_evlist__disable(struct perf_evlist *evlist) for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { list_for_each_entry(pos, &evlist->entries, node) { for (thread = 0; thread < evlist->threads->nr; thread++) - ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_DISABLE); + ioctl(FD(pos, cpu, thread), + PERF_EVENT_IOC_DISABLE, 0); } } } @@ -121,12 +288,13 @@ void perf_evlist__enable(struct perf_evlist *evlist) for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { list_for_each_entry(pos, &evlist->entries, node) { for (thread = 0; thread < evlist->threads->nr; thread++) - ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_ENABLE); + ioctl(FD(pos, cpu, thread), + PERF_EVENT_IOC_ENABLE, 0); } } } -int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) +static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries; evlist->pollfd = malloc(sizeof(struct pollfd) * nfds); @@ -197,6 +365,10 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) hlist_for_each_entry(sid, pos, head, node) if (sid->id == id) return sid->evsel; + + if (!perf_evlist__sample_id_all(evlist)) + return list_entry(evlist->entries.next, struct perf_evsel, node); + return NULL; } @@ -282,7 +454,7 @@ void perf_evlist__munmap(struct perf_evlist *evlist) evlist->mmap = NULL; } -int perf_evlist__alloc_mmap(struct perf_evlist *evlist) +static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) { evlist->nr_mmaps = evlist->cpus->nr; if (evlist->cpus->map[0] == -1) @@ -298,8 +470,10 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, evlist->mmap[idx].mask = mask; evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot, MAP_SHARED, fd, 0); - if (evlist->mmap[idx].base == MAP_FAILED) + if (evlist->mmap[idx].base == MAP_FAILED) { + evlist->mmap[idx].base = NULL; return -1; + } perf_evlist__add_pollfd(evlist, fd); return 0; @@ -400,14 +574,22 @@ out_unmap: * * Using perf_evlist__read_on_cpu does this automatically. */ -int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite) +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, + bool overwrite) { unsigned int page_size = sysconf(_SC_PAGE_SIZE); - int mask = pages * page_size - 1; struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; const struct thread_map *threads = evlist->threads; - int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE); + int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), mask; + + /* 512 kiB: default amount of unprivileged mlocked memory */ + if (pages == UINT_MAX) + pages = (512 * 1024) / page_size; + else if (!is_power_of_2(pages)) + return -EINVAL; + + mask = pages * page_size - 1; if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) return -ENOMEM; @@ -431,18 +613,21 @@ int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite) return perf_evlist__mmap_per_cpu(evlist, prot, mask); } -int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, - pid_t target_tid, const char *cpu_list) +int perf_evlist__create_maps(struct perf_evlist *evlist, + struct perf_target *target) { - evlist->threads = thread_map__new(target_pid, target_tid); + evlist->threads = thread_map__new_str(target->pid, target->tid, + target->uid); if (evlist->threads == NULL) return -1; - if (cpu_list == NULL && target_tid != -1) + if (perf_target__has_task(target)) + evlist->cpus = cpu_map__dummy_new(); + else if (!perf_target__has_cpu(target) && !target->uses_mmap) evlist->cpus = cpu_map__dummy_new(); else - evlist->cpus = cpu_map__new(cpu_list); + evlist->cpus = cpu_map__new(target->cpu_list); if (evlist->cpus == NULL) goto out_delete_threads; @@ -512,6 +697,38 @@ u64 perf_evlist__sample_type(const struct perf_evlist *evlist) return first->attr.sample_type; } +u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist) +{ + struct perf_evsel *first; + struct perf_sample *data; + u64 sample_type; + u16 size = 0; + + first = list_entry(evlist->entries.next, struct perf_evsel, node); + + if (!first->attr.sample_id_all) + goto out; + + sample_type = first->attr.sample_type; + + if (sample_type & PERF_SAMPLE_TID) + size += sizeof(data->tid) * 2; + + if (sample_type & PERF_SAMPLE_TIME) + size += sizeof(data->time); + + if (sample_type & PERF_SAMPLE_ID) + size += sizeof(data->id); + + if (sample_type & PERF_SAMPLE_STREAM_ID) + size += sizeof(data->stream_id); + + if (sample_type & PERF_SAMPLE_CPU) + size += sizeof(data->cpu) * 2; +out: + return size; +} + bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist) { struct perf_evsel *pos, *first; @@ -533,3 +750,141 @@ bool perf_evlist__sample_id_all(const struct perf_evlist *evlist) first = list_entry(evlist->entries.next, struct perf_evsel, node); return first->attr.sample_id_all; } + +void perf_evlist__set_selected(struct perf_evlist *evlist, + struct perf_evsel *evsel) +{ + evlist->selected = evsel; +} + +int perf_evlist__open(struct perf_evlist *evlist, bool group) +{ + struct perf_evsel *evsel, *first; + int err, ncpus, nthreads; + + first = list_entry(evlist->entries.next, struct perf_evsel, node); + + list_for_each_entry(evsel, &evlist->entries, node) { + struct xyarray *group_fd = NULL; + + if (group && evsel != first) + group_fd = first->fd; + + err = perf_evsel__open(evsel, evlist->cpus, evlist->threads, + group, group_fd); + if (err < 0) + goto out_err; + } + + return 0; +out_err: + ncpus = evlist->cpus ? evlist->cpus->nr : 1; + nthreads = evlist->threads ? evlist->threads->nr : 1; + + list_for_each_entry_reverse(evsel, &evlist->entries, node) + perf_evsel__close(evsel, ncpus, nthreads); + + errno = -err; + return err; +} + +int perf_evlist__prepare_workload(struct perf_evlist *evlist, + struct perf_record_opts *opts, + const char *argv[]) +{ + int child_ready_pipe[2], go_pipe[2]; + char bf; + + if (pipe(child_ready_pipe) < 0) { + perror("failed to create 'ready' pipe"); + return -1; + } + + if (pipe(go_pipe) < 0) { + perror("failed to create 'go' pipe"); + goto out_close_ready_pipe; + } + + evlist->workload.pid = fork(); + if (evlist->workload.pid < 0) { + perror("failed to fork"); + goto out_close_pipes; + } + + if (!evlist->workload.pid) { + if (opts->pipe_output) + dup2(2, 1); + + close(child_ready_pipe[0]); + close(go_pipe[1]); + fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); + + /* + * Do a dummy execvp to get the PLT entry resolved, + * so we avoid the resolver overhead on the real + * execvp call. + */ + execvp("", (char **)argv); + + /* + * Tell the parent we're ready to go + */ + close(child_ready_pipe[1]); + + /* + * Wait until the parent tells us to go. + */ + if (read(go_pipe[0], &bf, 1) == -1) + perror("unable to read pipe"); + + execvp(argv[0], (char **)argv); + + perror(argv[0]); + kill(getppid(), SIGUSR1); + exit(-1); + } + + if (perf_target__none(&opts->target)) + evlist->threads->map[0] = evlist->workload.pid; + + close(child_ready_pipe[1]); + close(go_pipe[0]); + /* + * wait for child to settle + */ + if (read(child_ready_pipe[0], &bf, 1) == -1) { + perror("unable to read pipe"); + goto out_close_pipes; + } + + evlist->workload.cork_fd = go_pipe[1]; + close(child_ready_pipe[0]); + return 0; + +out_close_pipes: + close(go_pipe[0]); + close(go_pipe[1]); +out_close_ready_pipe: + close(child_ready_pipe[0]); + close(child_ready_pipe[1]); + return -1; +} + +int perf_evlist__start_workload(struct perf_evlist *evlist) +{ + if (evlist->workload.cork_fd > 0) { + /* + * Remove the cork, let it rip! + */ + return close(evlist->workload.cork_fd); + } + + return 0; +} + +int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, + struct perf_sample *sample, bool swapped) +{ + struct perf_evsel *e = list_entry(evlist->entries.next, struct perf_evsel, node); + return perf_evsel__parse_sample(e, event, sample, swapped); +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index f34915002745..528c1acd9298 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -2,12 +2,16 @@ #define __PERF_EVLIST_H 1 #include <linux/list.h> +#include <stdio.h> #include "../perf.h" #include "event.h" +#include "util.h" +#include <unistd.h> struct pollfd; struct thread_map; struct cpu_map; +struct perf_record_opts; #define PERF_EVLIST__HLIST_BITS 8 #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) @@ -19,12 +23,22 @@ struct perf_evlist { int nr_fds; int nr_mmaps; int mmap_len; + struct { + int cork_fd; + pid_t pid; + } workload; bool overwrite; union perf_event event_copy; struct perf_mmap *mmap; struct pollfd *pollfd; struct thread_map *threads; struct cpu_map *cpus; + struct perf_evsel *selected; +}; + +struct perf_evsel_str_handler { + const char *name; + void *handler; }; struct perf_evsel; @@ -38,24 +52,59 @@ void perf_evlist__delete(struct perf_evlist *evlist); void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); int perf_evlist__add_default(struct perf_evlist *evlist); +int perf_evlist__add_attrs(struct perf_evlist *evlist, + struct perf_event_attr *attrs, size_t nr_attrs); +int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, + struct perf_event_attr *attrs, size_t nr_attrs); +int perf_evlist__add_tracepoints(struct perf_evlist *evlist, + const char *tracepoints[], size_t nr_tracepoints); +int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, + const struct perf_evsel_str_handler *assocs, + size_t nr_assocs); + +#define perf_evlist__add_attrs_array(evlist, array) \ + perf_evlist__add_attrs(evlist, array, ARRAY_SIZE(array)) +#define perf_evlist__add_default_attrs(evlist, array) \ + __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) + +#define perf_evlist__add_tracepoints_array(evlist, array) \ + perf_evlist__add_tracepoints(evlist, array, ARRAY_SIZE(array)) + +#define perf_evlist__set_tracepoints_handlers_array(evlist, array) \ + perf_evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) + +struct perf_evsel * +perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id); void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, u64 id); -int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx); -int perf_evlist__alloc_mmap(struct perf_evlist *evlist); -int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite); +int perf_evlist__open(struct perf_evlist *evlist, bool group); + +void perf_evlist__config_attrs(struct perf_evlist *evlist, + struct perf_record_opts *opts); + +int perf_evlist__prepare_workload(struct perf_evlist *evlist, + struct perf_record_opts *opts, + const char *argv[]); +int perf_evlist__start_workload(struct perf_evlist *evlist); + +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, + bool overwrite); void perf_evlist__munmap(struct perf_evlist *evlist); void perf_evlist__disable(struct perf_evlist *evlist); void perf_evlist__enable(struct perf_evlist *evlist); +void perf_evlist__set_selected(struct perf_evlist *evlist, + struct perf_evsel *evsel); + static inline void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads) @@ -64,14 +113,23 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, evlist->threads = threads; } -int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, - pid_t target_tid, const char *cpu_list); +int perf_evlist__create_maps(struct perf_evlist *evlist, + struct perf_target *target); void perf_evlist__delete_maps(struct perf_evlist *evlist); int perf_evlist__set_filters(struct perf_evlist *evlist); u64 perf_evlist__sample_type(const struct perf_evlist *evlist); bool perf_evlist__sample_id_all(const const struct perf_evlist *evlist); +u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist); + +int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, + struct perf_sample *sample, bool swapped); bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); + +void perf_evlist__splice_list_tail(struct perf_evlist *evlist, + struct list_head *list, + int nr_entries); + #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index e389815078d3..2eaae140def2 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -14,10 +14,13 @@ #include "util.h" #include "cpumap.h" #include "thread_map.h" +#include "target.h" +#include "../../../include/linux/hw_breakpoint.h" #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) +#define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0)) -int __perf_evsel__sample_size(u64 sample_type) +static int __perf_evsel__sample_size(u64 sample_type) { u64 mask = sample_type & PERF_SAMPLE_MASK; int size = 0; @@ -33,12 +36,24 @@ int __perf_evsel__sample_size(u64 sample_type) return size; } +void hists__init(struct hists *hists) +{ + memset(hists, 0, sizeof(*hists)); + hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; + hists->entries_in = &hists->entries_in_array[0]; + hists->entries_collapsed = RB_ROOT; + hists->entries = RB_ROOT; + pthread_mutex_init(&hists->lock, NULL); +} + void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx) { evsel->idx = idx; evsel->attr = *attr; INIT_LIST_HEAD(&evsel->node); + hists__init(&evsel->hists); + evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); } struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) @@ -51,6 +66,345 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) return evsel; } +static const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { + "cycles", + "instructions", + "cache-references", + "cache-misses", + "branches", + "branch-misses", + "bus-cycles", + "stalled-cycles-frontend", + "stalled-cycles-backend", + "ref-cycles", +}; + +static const char *__perf_evsel__hw_name(u64 config) +{ + if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config]) + return perf_evsel__hw_names[config]; + + return "unknown-hardware"; +} + +static int perf_evsel__add_modifiers(struct perf_evsel *evsel, char *bf, size_t size) +{ + int colon = 0, r = 0; + struct perf_event_attr *attr = &evsel->attr; + bool exclude_guest_default = false; + +#define MOD_PRINT(context, mod) do { \ + if (!attr->exclude_##context) { \ + if (!colon) colon = ++r; \ + r += scnprintf(bf + r, size - r, "%c", mod); \ + } } while(0) + + if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) { + MOD_PRINT(kernel, 'k'); + MOD_PRINT(user, 'u'); + MOD_PRINT(hv, 'h'); + exclude_guest_default = true; + } + + if (attr->precise_ip) { + if (!colon) + colon = ++r; + r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp"); + exclude_guest_default = true; + } + + if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) { + MOD_PRINT(host, 'H'); + MOD_PRINT(guest, 'G'); + } +#undef MOD_PRINT + if (colon) + bf[colon - 1] = ':'; + return r; +} + +static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size) +{ + int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->attr.config)); + return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); +} + +static const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { + "cpu-clock", + "task-clock", + "page-faults", + "context-switches", + "CPU-migrations", + "minor-faults", + "major-faults", + "alignment-faults", + "emulation-faults", +}; + +static const char *__perf_evsel__sw_name(u64 config) +{ + if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config]) + return perf_evsel__sw_names[config]; + return "unknown-software"; +} + +static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size) +{ + int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->attr.config)); + return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); +} + +static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) +{ + int r; + + r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr); + + if (type & HW_BREAKPOINT_R) + r += scnprintf(bf + r, size - r, "r"); + + if (type & HW_BREAKPOINT_W) + r += scnprintf(bf + r, size - r, "w"); + + if (type & HW_BREAKPOINT_X) + r += scnprintf(bf + r, size - r, "x"); + + return r; +} + +static int perf_evsel__bp_name(struct perf_evsel *evsel, char *bf, size_t size) +{ + struct perf_event_attr *attr = &evsel->attr; + int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type); + return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); +} + +const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] + [PERF_EVSEL__MAX_ALIASES] = { + { "L1-dcache", "l1-d", "l1d", "L1-data", }, + { "L1-icache", "l1-i", "l1i", "L1-instruction", }, + { "LLC", "L2", }, + { "dTLB", "d-tlb", "Data-TLB", }, + { "iTLB", "i-tlb", "Instruction-TLB", }, + { "branch", "branches", "bpu", "btb", "bpc", }, + { "node", }, +}; + +const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_EVSEL__MAX_ALIASES] = { + { "load", "loads", "read", }, + { "store", "stores", "write", }, + { "prefetch", "prefetches", "speculative-read", "speculative-load", }, +}; + +const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] + [PERF_EVSEL__MAX_ALIASES] = { + { "refs", "Reference", "ops", "access", }, + { "misses", "miss", }, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x +#define CACHE_READ (1 << C(OP_READ)) +#define CACHE_WRITE (1 << C(OP_WRITE)) +#define CACHE_PREFETCH (1 << C(OP_PREFETCH)) +#define COP(x) (1 << x) + +/* + * cache operartion stat + * L1I : Read and prefetch only + * ITLB and BPU : Read-only + */ +static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = { + [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), + [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(ITLB)] = (CACHE_READ), + [C(BPU)] = (CACHE_READ), + [C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), +}; + +bool perf_evsel__is_cache_op_valid(u8 type, u8 op) +{ + if (perf_evsel__hw_cache_stat[type] & COP(op)) + return true; /* valid */ + else + return false; /* invalid */ +} + +int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, + char *bf, size_t size) +{ + if (result) { + return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0], + perf_evsel__hw_cache_op[op][0], + perf_evsel__hw_cache_result[result][0]); + } + + return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0], + perf_evsel__hw_cache_op[op][1]); +} + +static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) +{ + u8 op, result, type = (config >> 0) & 0xff; + const char *err = "unknown-ext-hardware-cache-type"; + + if (type > PERF_COUNT_HW_CACHE_MAX) + goto out_err; + + op = (config >> 8) & 0xff; + err = "unknown-ext-hardware-cache-op"; + if (op > PERF_COUNT_HW_CACHE_OP_MAX) + goto out_err; + + result = (config >> 16) & 0xff; + err = "unknown-ext-hardware-cache-result"; + if (result > PERF_COUNT_HW_CACHE_RESULT_MAX) + goto out_err; + + err = "invalid-cache"; + if (!perf_evsel__is_cache_op_valid(type, op)) + goto out_err; + + return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size); +out_err: + return scnprintf(bf, size, "%s", err); +} + +static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t size) +{ + int ret = __perf_evsel__hw_cache_name(evsel->attr.config, bf, size); + return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); +} + +static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size) +{ + int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config); + return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); +} + +const char *perf_evsel__name(struct perf_evsel *evsel) +{ + char bf[128]; + + if (evsel->name) + return evsel->name; + + switch (evsel->attr.type) { + case PERF_TYPE_RAW: + perf_evsel__raw_name(evsel, bf, sizeof(bf)); + break; + + case PERF_TYPE_HARDWARE: + perf_evsel__hw_name(evsel, bf, sizeof(bf)); + break; + + case PERF_TYPE_HW_CACHE: + perf_evsel__hw_cache_name(evsel, bf, sizeof(bf)); + break; + + case PERF_TYPE_SOFTWARE: + perf_evsel__sw_name(evsel, bf, sizeof(bf)); + break; + + case PERF_TYPE_TRACEPOINT: + scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint"); + break; + + case PERF_TYPE_BREAKPOINT: + perf_evsel__bp_name(evsel, bf, sizeof(bf)); + break; + + default: + scnprintf(bf, sizeof(bf), "%s", "unknown attr type"); + break; + } + + evsel->name = strdup(bf); + + return evsel->name ?: "unknown"; +} + +void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts, + struct perf_evsel *first) +{ + struct perf_event_attr *attr = &evsel->attr; + int track = !evsel->idx; /* only the first counter needs these */ + + attr->disabled = 1; + attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; + attr->inherit = !opts->no_inherit; + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING | + PERF_FORMAT_ID; + + attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; + + /* + * We default some events to a 1 default interval. But keep + * it a weak assumption overridable by the user. + */ + if (!attr->sample_period || (opts->user_freq != UINT_MAX && + opts->user_interval != ULLONG_MAX)) { + if (opts->freq) { + attr->sample_type |= PERF_SAMPLE_PERIOD; + attr->freq = 1; + attr->sample_freq = opts->freq; + } else { + attr->sample_period = opts->default_interval; + } + } + + if (opts->no_samples) + attr->sample_freq = 0; + + if (opts->inherit_stat) + attr->inherit_stat = 1; + + if (opts->sample_address) { + attr->sample_type |= PERF_SAMPLE_ADDR; + attr->mmap_data = track; + } + + if (opts->call_graph) + attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + + if (perf_target__has_cpu(&opts->target)) + attr->sample_type |= PERF_SAMPLE_CPU; + + if (opts->period) + attr->sample_type |= PERF_SAMPLE_PERIOD; + + if (!opts->sample_id_all_missing && + (opts->sample_time || !opts->no_inherit || + perf_target__has_cpu(&opts->target))) + attr->sample_type |= PERF_SAMPLE_TIME; + + if (opts->raw_samples) { + attr->sample_type |= PERF_SAMPLE_TIME; + attr->sample_type |= PERF_SAMPLE_RAW; + attr->sample_type |= PERF_SAMPLE_CPU; + } + + if (opts->no_delay) { + attr->watermark = 0; + attr->wakeup_events = 1; + } + if (opts->branch_stack) { + attr->sample_type |= PERF_SAMPLE_BRANCH_STACK; + attr->branch_sample_type = opts->branch_stack; + } + + attr->mmap = track; + attr->comm = track; + + if (perf_target__none(&opts->target) && + (!opts->group || evsel == first)) { + attr->enable_on_exec = 1; + } +} + int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { int cpu, thread; @@ -203,15 +557,16 @@ int __perf_evsel__read(struct perf_evsel *evsel, } static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group) + struct thread_map *threads, bool group, + struct xyarray *group_fds) { int cpu, thread; unsigned long flags = 0; - int pid = -1; + int pid = -1, err; if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) - return -1; + return -ENOMEM; if (evsel->cgrp) { flags = PERF_FLAG_PID_CGROUP; @@ -219,7 +574,7 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, } for (cpu = 0; cpu < cpus->nr; cpu++) { - int group_fd = -1; + int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1; for (thread = 0; thread < threads->nr; thread++) { @@ -230,8 +585,10 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, pid, cpus->map[cpu], group_fd, flags); - if (FD(evsel, cpu, thread) < 0) + if (FD(evsel, cpu, thread) < 0) { + err = -errno; goto out_close; + } if (group && group_fd == -1) group_fd = FD(evsel, cpu, thread); @@ -248,7 +605,17 @@ out_close: } thread = threads->nr; } while (--cpu >= 0); - return -1; + return err; +} + +void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads) +{ + if (evsel->fd == NULL) + return; + + perf_evsel__close_fd(evsel, ncpus, nthreads); + perf_evsel__free_fd(evsel); + evsel->fd = NULL; } static struct { @@ -268,7 +635,8 @@ static struct { }; int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group) + struct thread_map *threads, bool group, + struct xyarray *group_fd) { if (cpus == NULL) { /* Work around old compiler warnings about strict aliasing */ @@ -278,32 +646,44 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, if (threads == NULL) threads = &empty_thread_map.map; - return __perf_evsel__open(evsel, cpus, threads, group); + return __perf_evsel__open(evsel, cpus, threads, group, group_fd); } int perf_evsel__open_per_cpu(struct perf_evsel *evsel, - struct cpu_map *cpus, bool group) + struct cpu_map *cpus, bool group, + struct xyarray *group_fd) { - return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group); + return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, + group_fd); } int perf_evsel__open_per_thread(struct perf_evsel *evsel, - struct thread_map *threads, bool group) + struct thread_map *threads, bool group, + struct xyarray *group_fd) { - return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group); + return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, + group_fd); } static int perf_event__parse_id_sample(const union perf_event *event, u64 type, - struct perf_sample *sample) + struct perf_sample *sample, + bool swapped) { const u64 *array = event->sample.array; + union u64_swap u; array += ((event->header.size - sizeof(event->header)) / sizeof(u64)) - 1; if (type & PERF_SAMPLE_CPU) { - u32 *p = (u32 *)array; - sample->cpu = *p; + u.val64 = *array; + if (swapped) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + } + + sample->cpu = u.val32[0]; array--; } @@ -323,9 +703,16 @@ static int perf_event__parse_id_sample(const union perf_event *event, u64 type, } if (type & PERF_SAMPLE_TID) { - u32 *p = (u32 *)array; - sample->pid = p[0]; - sample->tid = p[1]; + u.val64 = *array; + if (swapped) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + u.val32[1] = bswap_32(u.val32[1]); + } + + sample->pid = u.val32[0]; + sample->tid = u.val32[1]; } return 0; @@ -342,34 +729,32 @@ static bool sample_overlap(const union perf_event *event, return false; } -int perf_event__parse_sample(const union perf_event *event, u64 type, - int sample_size, bool sample_id_all, +int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *data, bool swapped) { + u64 type = evsel->attr.sample_type; const u64 *array; /* * used for cross-endian analysis. See git commit 65014ab3 * for why this goofiness is needed. */ - union { - u64 val64; - u32 val32[2]; - } u; - + union u64_swap u; + memset(data, 0, sizeof(*data)); data->cpu = data->pid = data->tid = -1; data->stream_id = data->id = data->time = -1ULL; + data->period = 1; if (event->header.type != PERF_RECORD_SAMPLE) { - if (!sample_id_all) + if (!evsel->attr.sample_id_all) return 0; - return perf_event__parse_id_sample(event, type, data); + return perf_event__parse_id_sample(event, type, data, swapped); } array = event->sample.array; - if (sample_size + sizeof(event->header) > event->header.size) + if (evsel->sample_size + sizeof(event->header) > event->header.size) return -EFAULT; if (type & PERF_SAMPLE_IP) { @@ -432,7 +817,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, } if (type & PERF_SAMPLE_READ) { - fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n"); + fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n"); return -1; } @@ -470,6 +855,94 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, return -EFAULT; data->raw_data = (void *) pdata; + + array = (void *)array + data->raw_size + sizeof(u32); + } + + if (type & PERF_SAMPLE_BRANCH_STACK) { + u64 sz; + + data->branch_stack = (struct branch_stack *)array; + array++; /* nr */ + + sz = data->branch_stack->nr * sizeof(struct branch_entry); + sz /= sizeof(u64); + array += sz; + } + return 0; +} + +int perf_event__synthesize_sample(union perf_event *event, u64 type, + const struct perf_sample *sample, + bool swapped) +{ + u64 *array; + + /* + * used for cross-endian analysis. See git commit 65014ab3 + * for why this goofiness is needed. + */ + union u64_swap u; + + array = event->sample.array; + + if (type & PERF_SAMPLE_IP) { + event->ip.ip = sample->ip; + array++; + } + + if (type & PERF_SAMPLE_TID) { + u.val32[0] = sample->pid; + u.val32[1] = sample->tid; + if (swapped) { + /* + * Inverse of what is done in perf_evsel__parse_sample + */ + u.val32[0] = bswap_32(u.val32[0]); + u.val32[1] = bswap_32(u.val32[1]); + u.val64 = bswap_64(u.val64); + } + + *array = u.val64; + array++; + } + + if (type & PERF_SAMPLE_TIME) { + *array = sample->time; + array++; + } + + if (type & PERF_SAMPLE_ADDR) { + *array = sample->addr; + array++; + } + + if (type & PERF_SAMPLE_ID) { + *array = sample->id; + array++; + } + + if (type & PERF_SAMPLE_STREAM_ID) { + *array = sample->stream_id; + array++; + } + + if (type & PERF_SAMPLE_CPU) { + u.val32[0] = sample->cpu; + if (swapped) { + /* + * Inverse of what is done in perf_evsel__parse_sample + */ + u.val32[0] = bswap_32(u.val32[0]); + u.val64 = bswap_64(u.val64); + } + *array = u.val64; + array++; + } + + if (type & PERF_SAMPLE_PERIOD) { + *array = sample->period; + array++; } return 0; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index e9a31554e265..b559929983bb 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -61,12 +61,18 @@ struct perf_evsel { off_t id_offset; }; struct cgroup_sel *cgrp; + struct { + void *func; + void *data; + } handler; + unsigned int sample_size; bool supported; }; struct cpu_map; struct thread_map; struct perf_evlist; +struct perf_record_opts; struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); void perf_evsel__init(struct perf_evsel *evsel, @@ -74,6 +80,24 @@ void perf_evsel__init(struct perf_evsel *evsel, void perf_evsel__exit(struct perf_evsel *evsel); void perf_evsel__delete(struct perf_evsel *evsel); +void perf_evsel__config(struct perf_evsel *evsel, + struct perf_record_opts *opts, + struct perf_evsel *first); + +bool perf_evsel__is_cache_op_valid(u8 type, u8 op); + +#define PERF_EVSEL__MAX_ALIASES 8 + +extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] + [PERF_EVSEL__MAX_ALIASES]; +extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_EVSEL__MAX_ALIASES]; +const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] + [PERF_EVSEL__MAX_ALIASES]; +int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, + char *bf, size_t size); +const char *perf_evsel__name(struct perf_evsel *evsel); + int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); @@ -82,11 +106,15 @@ void perf_evsel__free_id(struct perf_evsel *evsel); void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__open_per_cpu(struct perf_evsel *evsel, - struct cpu_map *cpus, bool group); + struct cpu_map *cpus, bool group, + struct xyarray *group_fds); int perf_evsel__open_per_thread(struct perf_evsel *evsel, - struct thread_map *threads, bool group); + struct thread_map *threads, bool group, + struct xyarray *group_fds); int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group); + struct thread_map *threads, bool group, + struct xyarray *group_fds); +void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads); #define perf_evsel__match(evsel, t, c) \ (evsel->attr.type == PERF_TYPE_##t && \ @@ -150,11 +178,8 @@ static inline int perf_evsel__read_scaled(struct perf_evsel *evsel, return __perf_evsel__read(evsel, ncpus, nthreads, true); } -int __perf_evsel__sample_size(u64 sample_type); - -static inline int perf_evsel__sample_size(struct perf_evsel *evsel) -{ - return __perf_evsel__sample_size(evsel->attr.sample_type); -} +void hists__init(struct hists *hists); +int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, + struct perf_sample *sample, bool swapped); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b6c1ad123ca9..74ea3c2f8138 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1,5 +1,6 @@ #define _FILE_OFFSET_BITS 64 +#include "util.h" #include <sys/types.h> #include <byteswap.h> #include <unistd.h> @@ -7,39 +8,39 @@ #include <stdlib.h> #include <linux/list.h> #include <linux/kernel.h> +#include <linux/bitops.h> +#include <sys/utsname.h> #include "evlist.h" #include "evsel.h" -#include "util.h" #include "header.h" #include "../perf.h" #include "trace-event.h" #include "session.h" #include "symbol.h" #include "debug.h" +#include "cpumap.h" static bool no_buildid_cache = false; static int event_count; static struct perf_trace_event_type *events; +static u32 header_argc; +static const char **header_argv; + int perf_header__push_event(u64 id, const char *name) { + struct perf_trace_event_type *nevents; + if (strlen(name) > MAX_EVENT_NAME) pr_warning("Event %s will be truncated\n", name); - if (!events) { - events = malloc(sizeof(struct perf_trace_event_type)); - if (events == NULL) - return -ENOMEM; - } else { - struct perf_trace_event_type *nevents; + nevents = realloc(events, (event_count + 1) * sizeof(*events)); + if (nevents == NULL) + return -ENOMEM; + events = nevents; - nevents = realloc(events, (event_count + 1) * sizeof(*events)); - if (nevents == NULL) - return -ENOMEM; - events = nevents; - } memset(&events[event_count], 0, sizeof(struct perf_trace_event_type)); events[event_count].event_id = id; strncpy(events[event_count].name, name, MAX_EVENT_NAME - 1); @@ -57,9 +58,20 @@ char *perf_header__find_event(u64 id) return NULL; } -static const char *__perf_magic = "PERFFILE"; - -#define PERF_MAGIC (*(u64 *)__perf_magic) +/* + * magic2 = "PERFILE2" + * must be a numerical value to let the endianness + * determine the memory layout. That way we are able + * to detect endianness when reading the perf.data file + * back. + * + * we check for legacy (PERFFILE) format. + */ +static const char *__perf_magic1 = "PERFFILE"; +static const u64 __perf_magic2 = 0x32454c4946524550ULL; +static const u64 __perf_magic2_sw = 0x50455246494c4532ULL; + +#define PERF_MAGIC __perf_magic2 struct perf_file_attr { struct perf_event_attr attr; @@ -110,6 +122,84 @@ static int write_padded(int fd, const void *bf, size_t count, return err; } +static int do_write_string(int fd, const char *str) +{ + u32 len, olen; + int ret; + + olen = strlen(str) + 1; + len = ALIGN(olen, NAME_ALIGN); + + /* write len, incl. \0 */ + ret = do_write(fd, &len, sizeof(len)); + if (ret < 0) + return ret; + + return write_padded(fd, str, olen, len); +} + +static char *do_read_string(int fd, struct perf_header *ph) +{ + ssize_t sz, ret; + u32 len; + char *buf; + + sz = read(fd, &len, sizeof(len)); + if (sz < (ssize_t)sizeof(len)) + return NULL; + + if (ph->needs_swap) + len = bswap_32(len); + + buf = malloc(len); + if (!buf) + return NULL; + + ret = read(fd, buf, len); + if (ret == (ssize_t)len) { + /* + * strings are padded by zeroes + * thus the actual strlen of buf + * may be less than len + */ + return buf; + } + + free(buf); + return NULL; +} + +int +perf_header__set_cmdline(int argc, const char **argv) +{ + int i; + + /* + * If header_argv has already been set, do not override it. + * This allows a command to set the cmdline, parse args and + * then call another builtin function that implements a + * command -- e.g, cmd_kvm calling cmd_record. + */ + if (header_argv) + return 0; + + header_argc = (u32)argc; + + /* do not include NULL termination */ + header_argv = calloc(argc, sizeof(char *)); + if (!header_argv) + return -ENOMEM; + + /* + * must copy argv contents because it gets moved + * around during option parsing + */ + for (i = 0; i < argc ; i++) + header_argv[i] = argv[i]; + + return 0; +} + #define dsos__for_each_with_build_id(pos, head) \ list_for_each_entry(pos, head, node) \ if (!pos->has_build_id) \ @@ -205,12 +295,12 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, if (realname == NULL || filename == NULL || linkname == NULL) goto out_free; - len = snprintf(filename, size, "%s%s%s", + len = scnprintf(filename, size, "%s%s%s", debugdir, is_kallsyms ? "/" : "", realname); if (mkdir_p(filename, 0755)) goto out_free; - snprintf(filename + len, sizeof(filename) - len, "/%s", sbuild_id); + snprintf(filename + len, size - len, "/%s", sbuild_id); if (access(filename, F_OK)) { if (is_kallsyms) { @@ -220,7 +310,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, goto out_free; } - len = snprintf(linkname, size, "%s/.build-id/%.2s", + len = scnprintf(linkname, size, "%s/.build-id/%.2s", debugdir, sbuild_id); if (access(linkname, X_OK) && mkdir_p(linkname, 0755)) @@ -267,7 +357,7 @@ int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) if (access(linkname, F_OK)) goto out_free; - if (readlink(linkname, filename, size) < 0) + if (readlink(linkname, filename, size - 1) < 0) goto out_free; if (unlink(linkname)) @@ -356,292 +446,895 @@ static bool perf_session__read_build_ids(struct perf_session *session, bool with return ret; } -static int perf_header__adds_write(struct perf_header *header, - struct perf_evlist *evlist, int fd) +static int write_tracing_data(int fd, struct perf_header *h __used, + struct perf_evlist *evlist) +{ + return read_tracing_data(fd, &evlist->entries); +} + + +static int write_build_id(int fd, struct perf_header *h, + struct perf_evlist *evlist __used) { - int nr_sections; struct perf_session *session; - struct perf_file_section *feat_sec; - int sec_size; - u64 sec_start; - int idx = 0, err; + int err; - session = container_of(header, struct perf_session, header); + session = container_of(h, struct perf_session, header); - if (perf_header__has_feat(header, HEADER_BUILD_ID && - !perf_session__read_build_ids(session, true))) - perf_header__clear_feat(header, HEADER_BUILD_ID); + if (!perf_session__read_build_ids(session, true)) + return -1; - nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS); - if (!nr_sections) - return 0; + err = dsos__write_buildid_table(h, fd); + if (err < 0) { + pr_debug("failed to write buildid table\n"); + return err; + } + if (!no_buildid_cache) + perf_session__cache_build_ids(session); - feat_sec = calloc(sizeof(*feat_sec), nr_sections); - if (feat_sec == NULL) - return -ENOMEM; + return 0; +} - sec_size = sizeof(*feat_sec) * nr_sections; +static int write_hostname(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + struct utsname uts; + int ret; - sec_start = header->data_offset + header->data_size; - lseek(fd, sec_start + sec_size, SEEK_SET); + ret = uname(&uts); + if (ret < 0) + return -1; - if (perf_header__has_feat(header, HEADER_TRACE_INFO)) { - struct perf_file_section *trace_sec; + return do_write_string(fd, uts.nodename); +} - trace_sec = &feat_sec[idx++]; +static int write_osrelease(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + struct utsname uts; + int ret; - /* Write trace info */ - trace_sec->offset = lseek(fd, 0, SEEK_CUR); - read_tracing_data(fd, &evlist->entries); - trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset; - } + ret = uname(&uts); + if (ret < 0) + return -1; - if (perf_header__has_feat(header, HEADER_BUILD_ID)) { - struct perf_file_section *buildid_sec; + return do_write_string(fd, uts.release); +} - buildid_sec = &feat_sec[idx++]; +static int write_arch(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + struct utsname uts; + int ret; - /* Write build-ids */ - buildid_sec->offset = lseek(fd, 0, SEEK_CUR); - err = dsos__write_buildid_table(header, fd); - if (err < 0) { - pr_debug("failed to write buildid table\n"); - goto out_free; + ret = uname(&uts); + if (ret < 0) + return -1; + + return do_write_string(fd, uts.machine); +} + +static int write_version(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + return do_write_string(fd, perf_version_string); +} + +static int write_cpudesc(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ +#ifndef CPUINFO_PROC +#define CPUINFO_PROC NULL +#endif + FILE *file; + char *buf = NULL; + char *s, *p; + const char *search = CPUINFO_PROC; + size_t len = 0; + int ret = -1; + + if (!search) + return -1; + + file = fopen("/proc/cpuinfo", "r"); + if (!file) + return -1; + + while (getline(&buf, &len, file) > 0) { + ret = strncmp(buf, search, strlen(search)); + if (!ret) + break; + } + + if (ret) + goto done; + + s = buf; + + p = strchr(buf, ':'); + if (p && *(p+1) == ' ' && *(p+2)) + s = p + 2; + p = strchr(s, '\n'); + if (p) + *p = '\0'; + + /* squash extra space characters (branding string) */ + p = s; + while (*p) { + if (isspace(*p)) { + char *r = p + 1; + char *q = r; + *p = ' '; + while (*q && isspace(*q)) + q++; + if (q != (p+1)) + while ((*r++ = *q++)); } - buildid_sec->size = lseek(fd, 0, SEEK_CUR) - - buildid_sec->offset; - if (!no_buildid_cache) - perf_session__cache_build_ids(session); + p++; } + ret = do_write_string(fd, s); +done: + free(buf); + fclose(file); + return ret; +} - lseek(fd, sec_start, SEEK_SET); - err = do_write(fd, feat_sec, sec_size); - if (err < 0) - pr_debug("failed to write feature section\n"); -out_free: - free(feat_sec); - return err; +static int write_nrcpus(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + long nr; + u32 nrc, nra; + int ret; + + nr = sysconf(_SC_NPROCESSORS_CONF); + if (nr < 0) + return -1; + + nrc = (u32)(nr & UINT_MAX); + + nr = sysconf(_SC_NPROCESSORS_ONLN); + if (nr < 0) + return -1; + + nra = (u32)(nr & UINT_MAX); + + ret = do_write(fd, &nrc, sizeof(nrc)); + if (ret < 0) + return ret; + + return do_write(fd, &nra, sizeof(nra)); } -int perf_header__write_pipe(int fd) +static int write_event_desc(int fd, struct perf_header *h __used, + struct perf_evlist *evlist) { - struct perf_pipe_file_header f_header; - int err; + struct perf_evsel *attr; + u32 nre = 0, nri, sz; + int ret; - f_header = (struct perf_pipe_file_header){ - .magic = PERF_MAGIC, - .size = sizeof(f_header), - }; + list_for_each_entry(attr, &evlist->entries, node) + nre++; - err = do_write(fd, &f_header, sizeof(f_header)); - if (err < 0) { - pr_debug("failed to write perf pipe header\n"); - return err; + /* + * write number of events + */ + ret = do_write(fd, &nre, sizeof(nre)); + if (ret < 0) + return ret; + + /* + * size of perf_event_attr struct + */ + sz = (u32)sizeof(attr->attr); + ret = do_write(fd, &sz, sizeof(sz)); + if (ret < 0) + return ret; + + list_for_each_entry(attr, &evlist->entries, node) { + + ret = do_write(fd, &attr->attr, sz); + if (ret < 0) + return ret; + /* + * write number of unique id per event + * there is one id per instance of an event + * + * copy into an nri to be independent of the + * type of ids, + */ + nri = attr->ids; + ret = do_write(fd, &nri, sizeof(nri)); + if (ret < 0) + return ret; + + /* + * write event string as passed on cmdline + */ + ret = do_write_string(fd, perf_evsel__name(attr)); + if (ret < 0) + return ret; + /* + * write unique ids for this event + */ + ret = do_write(fd, attr->id, attr->ids * sizeof(u64)); + if (ret < 0) + return ret; } + return 0; +} + +static int write_cmdline(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + char buf[MAXPATHLEN]; + char proc[32]; + u32 i, n; + int ret; + + /* + * actual atual path to perf binary + */ + sprintf(proc, "/proc/%d/exe", getpid()); + ret = readlink(proc, buf, sizeof(buf)); + if (ret <= 0) + return -1; + + /* readlink() does not add null termination */ + buf[ret] = '\0'; + /* account for binary path */ + n = header_argc + 1; + + ret = do_write(fd, &n, sizeof(n)); + if (ret < 0) + return ret; + + ret = do_write_string(fd, buf); + if (ret < 0) + return ret; + + for (i = 0 ; i < header_argc; i++) { + ret = do_write_string(fd, header_argv[i]); + if (ret < 0) + return ret; + } return 0; } -int perf_session__write_header(struct perf_session *session, - struct perf_evlist *evlist, - int fd, bool at_exit) +#define CORE_SIB_FMT \ + "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list" +#define THRD_SIB_FMT \ + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list" + +struct cpu_topo { + u32 core_sib; + u32 thread_sib; + char **core_siblings; + char **thread_siblings; +}; + +static int build_cpu_topo(struct cpu_topo *tp, int cpu) { - struct perf_file_header f_header; - struct perf_file_attr f_attr; - struct perf_header *header = &session->header; - struct perf_evsel *attr, *pair = NULL; - int err; + FILE *fp; + char filename[MAXPATHLEN]; + char *buf = NULL, *p; + size_t len = 0; + u32 i = 0; + int ret = -1; + + sprintf(filename, CORE_SIB_FMT, cpu); + fp = fopen(filename, "r"); + if (!fp) + return -1; - lseek(fd, sizeof(f_header), SEEK_SET); + if (getline(&buf, &len, fp) <= 0) + goto done; - if (session->evlist != evlist) - pair = list_entry(session->evlist->entries.next, struct perf_evsel, node); + fclose(fp); - list_for_each_entry(attr, &evlist->entries, node) { - attr->id_offset = lseek(fd, 0, SEEK_CUR); - err = do_write(fd, attr->id, attr->ids * sizeof(u64)); - if (err < 0) { -out_err_write: - pr_debug("failed to write perf header\n"); - return err; - } - if (session->evlist != evlist) { - err = do_write(fd, pair->id, pair->ids * sizeof(u64)); - if (err < 0) - goto out_err_write; - attr->ids += pair->ids; - pair = list_entry(pair->node.next, struct perf_evsel, node); - } + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + + for (i = 0; i < tp->core_sib; i++) { + if (!strcmp(buf, tp->core_siblings[i])) + break; + } + if (i == tp->core_sib) { + tp->core_siblings[i] = buf; + tp->core_sib++; + buf = NULL; + len = 0; } - header->attr_offset = lseek(fd, 0, SEEK_CUR); + sprintf(filename, THRD_SIB_FMT, cpu); + fp = fopen(filename, "r"); + if (!fp) + goto done; - list_for_each_entry(attr, &evlist->entries, node) { - f_attr = (struct perf_file_attr){ - .attr = attr->attr, - .ids = { - .offset = attr->id_offset, - .size = attr->ids * sizeof(u64), - } - }; - err = do_write(fd, &f_attr, sizeof(f_attr)); - if (err < 0) { - pr_debug("failed to write perf header attribute\n"); - return err; - } + if (getline(&buf, &len, fp) <= 0) + goto done; + + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + + for (i = 0; i < tp->thread_sib; i++) { + if (!strcmp(buf, tp->thread_siblings[i])) + break; + } + if (i == tp->thread_sib) { + tp->thread_siblings[i] = buf; + tp->thread_sib++; + buf = NULL; } + ret = 0; +done: + if(fp) + fclose(fp); + free(buf); + return ret; +} - header->event_offset = lseek(fd, 0, SEEK_CUR); - header->event_size = event_count * sizeof(struct perf_trace_event_type); - if (events) { - err = do_write(fd, events, header->event_size); - if (err < 0) { - pr_debug("failed to write perf header events\n"); - return err; - } +static void free_cpu_topo(struct cpu_topo *tp) +{ + u32 i; + + if (!tp) + return; + + for (i = 0 ; i < tp->core_sib; i++) + free(tp->core_siblings[i]); + + for (i = 0 ; i < tp->thread_sib; i++) + free(tp->thread_siblings[i]); + + free(tp); +} + +static struct cpu_topo *build_cpu_topology(void) +{ + struct cpu_topo *tp; + void *addr; + u32 nr, i; + size_t sz; + long ncpus; + int ret = -1; + + ncpus = sysconf(_SC_NPROCESSORS_CONF); + if (ncpus < 0) + return NULL; + + nr = (u32)(ncpus & UINT_MAX); + + sz = nr * sizeof(char *); + + addr = calloc(1, sizeof(*tp) + 2 * sz); + if (!addr) + return NULL; + + tp = addr; + + addr += sizeof(*tp); + tp->core_siblings = addr; + addr += sz; + tp->thread_siblings = addr; + + for (i = 0; i < nr; i++) { + ret = build_cpu_topo(tp, i); + if (ret < 0) + break; } + if (ret) { + free_cpu_topo(tp); + tp = NULL; + } + return tp; +} - header->data_offset = lseek(fd, 0, SEEK_CUR); +static int write_cpu_topology(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + struct cpu_topo *tp; + u32 i; + int ret; - if (at_exit) { - err = perf_header__adds_write(header, evlist, fd); - if (err < 0) - return err; + tp = build_cpu_topology(); + if (!tp) + return -1; + + ret = do_write(fd, &tp->core_sib, sizeof(tp->core_sib)); + if (ret < 0) + goto done; + + for (i = 0; i < tp->core_sib; i++) { + ret = do_write_string(fd, tp->core_siblings[i]); + if (ret < 0) + goto done; } + ret = do_write(fd, &tp->thread_sib, sizeof(tp->thread_sib)); + if (ret < 0) + goto done; - f_header = (struct perf_file_header){ - .magic = PERF_MAGIC, - .size = sizeof(f_header), - .attr_size = sizeof(f_attr), - .attrs = { - .offset = header->attr_offset, - .size = evlist->nr_entries * sizeof(f_attr), - }, - .data = { - .offset = header->data_offset, - .size = header->data_size, - }, - .event_types = { - .offset = header->event_offset, - .size = header->event_size, - }, - }; + for (i = 0; i < tp->thread_sib; i++) { + ret = do_write_string(fd, tp->thread_siblings[i]); + if (ret < 0) + break; + } +done: + free_cpu_topo(tp); + return ret; +} - memcpy(&f_header.adds_features, &header->adds_features, sizeof(header->adds_features)); - lseek(fd, 0, SEEK_SET); - err = do_write(fd, &f_header, sizeof(f_header)); - if (err < 0) { - pr_debug("failed to write perf header\n"); - return err; + +static int write_total_mem(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + char *buf = NULL; + FILE *fp; + size_t len = 0; + int ret = -1, n; + uint64_t mem; + + fp = fopen("/proc/meminfo", "r"); + if (!fp) + return -1; + + while (getline(&buf, &len, fp) > 0) { + ret = strncmp(buf, "MemTotal:", 9); + if (!ret) + break; } - lseek(fd, header->data_offset + header->data_size, SEEK_SET); + if (!ret) { + n = sscanf(buf, "%*s %"PRIu64, &mem); + if (n == 1) + ret = do_write(fd, &mem, sizeof(mem)); + } + free(buf); + fclose(fp); + return ret; +} - header->frozen = 1; - return 0; +static int write_topo_node(int fd, int node) +{ + char str[MAXPATHLEN]; + char field[32]; + char *buf = NULL, *p; + size_t len = 0; + FILE *fp; + u64 mem_total, mem_free, mem; + int ret = -1; + + sprintf(str, "/sys/devices/system/node/node%d/meminfo", node); + fp = fopen(str, "r"); + if (!fp) + return -1; + + while (getline(&buf, &len, fp) > 0) { + /* skip over invalid lines */ + if (!strchr(buf, ':')) + continue; + if (sscanf(buf, "%*s %*d %s %"PRIu64, field, &mem) != 2) + goto done; + if (!strcmp(field, "MemTotal:")) + mem_total = mem; + if (!strcmp(field, "MemFree:")) + mem_free = mem; + } + + fclose(fp); + + ret = do_write(fd, &mem_total, sizeof(u64)); + if (ret) + goto done; + + ret = do_write(fd, &mem_free, sizeof(u64)); + if (ret) + goto done; + + ret = -1; + sprintf(str, "/sys/devices/system/node/node%d/cpulist", node); + + fp = fopen(str, "r"); + if (!fp) + goto done; + + if (getline(&buf, &len, fp) <= 0) + goto done; + + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + + ret = do_write_string(fd, buf); +done: + free(buf); + fclose(fp); + return ret; } -static int perf_header__getbuffer64(struct perf_header *header, - int fd, void *buf, size_t size) +static int write_numa_topology(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) { - if (readn(fd, buf, size) <= 0) + char *buf = NULL; + size_t len = 0; + FILE *fp; + struct cpu_map *node_map = NULL; + char *c; + u32 nr, i, j; + int ret = -1; + + fp = fopen("/sys/devices/system/node/online", "r"); + if (!fp) return -1; - if (header->needs_swap) - mem_bswap_64(buf, size); + if (getline(&buf, &len, fp) <= 0) + goto done; + + c = strchr(buf, '\n'); + if (c) + *c = '\0'; + + node_map = cpu_map__new(buf); + if (!node_map) + goto done; + + nr = (u32)node_map->nr; + ret = do_write(fd, &nr, sizeof(nr)); + if (ret < 0) + goto done; + + for (i = 0; i < nr; i++) { + j = (u32)node_map->map[i]; + ret = do_write(fd, &j, sizeof(j)); + if (ret < 0) + break; + + ret = write_topo_node(fd, i); + if (ret < 0) + break; + } +done: + free(buf); + fclose(fp); + free(node_map); + return ret; +} + +/* + * default get_cpuid(): nothing gets recorded + * actual implementation must be in arch/$(ARCH)/util/header.c + */ +int __attribute__((weak)) get_cpuid(char *buffer __used, size_t sz __used) +{ + return -1; +} + +static int write_cpuid(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + char buffer[64]; + int ret; + + ret = get_cpuid(buffer, sizeof(buffer)); + if (!ret) + goto write_it; + + return -1; +write_it: + return do_write_string(fd, buffer); +} + +static int write_branch_stack(int fd __used, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ return 0; } -int perf_header__process_sections(struct perf_header *header, int fd, - int (*process)(struct perf_file_section *section, - struct perf_header *ph, - int feat, int fd)) +static void print_hostname(struct perf_header *ph, int fd, FILE *fp) { - struct perf_file_section *feat_sec; - int nr_sections; - int sec_size; - int idx = 0; - int err = -1, feat = 1; + char *str = do_read_string(fd, ph); + fprintf(fp, "# hostname : %s\n", str); + free(str); +} - nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS); - if (!nr_sections) - return 0; +static void print_osrelease(struct perf_header *ph, int fd, FILE *fp) +{ + char *str = do_read_string(fd, ph); + fprintf(fp, "# os release : %s\n", str); + free(str); +} - feat_sec = calloc(sizeof(*feat_sec), nr_sections); - if (!feat_sec) - return -1; +static void print_arch(struct perf_header *ph, int fd, FILE *fp) +{ + char *str = do_read_string(fd, ph); + fprintf(fp, "# arch : %s\n", str); + free(str); +} - sec_size = sizeof(*feat_sec) * nr_sections; +static void print_cpudesc(struct perf_header *ph, int fd, FILE *fp) +{ + char *str = do_read_string(fd, ph); + fprintf(fp, "# cpudesc : %s\n", str); + free(str); +} - lseek(fd, header->data_offset + header->data_size, SEEK_SET); +static void print_nrcpus(struct perf_header *ph, int fd, FILE *fp) +{ + ssize_t ret; + u32 nr; - if (perf_header__getbuffer64(header, fd, feat_sec, sec_size)) - goto out_free; + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + nr = -1; /* interpreted as error */ - err = 0; - while (idx < nr_sections && feat < HEADER_LAST_FEATURE) { - if (perf_header__has_feat(header, feat)) { - struct perf_file_section *sec = &feat_sec[idx++]; + if (ph->needs_swap) + nr = bswap_32(nr); - err = process(sec, header, feat, fd); - if (err < 0) - break; - } - ++feat; + fprintf(fp, "# nrcpus online : %u\n", nr); + + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + nr = -1; /* interpreted as error */ + + if (ph->needs_swap) + nr = bswap_32(nr); + + fprintf(fp, "# nrcpus avail : %u\n", nr); +} + +static void print_version(struct perf_header *ph, int fd, FILE *fp) +{ + char *str = do_read_string(fd, ph); + fprintf(fp, "# perf version : %s\n", str); + free(str); +} + +static void print_cmdline(struct perf_header *ph, int fd, FILE *fp) +{ + ssize_t ret; + char *str; + u32 nr, i; + + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + return; + + if (ph->needs_swap) + nr = bswap_32(nr); + + fprintf(fp, "# cmdline : "); + + for (i = 0; i < nr; i++) { + str = do_read_string(fd, ph); + fprintf(fp, "%s ", str); + free(str); } -out_free: - free(feat_sec); - return err; + fputc('\n', fp); } -int perf_file_header__read(struct perf_file_header *header, - struct perf_header *ph, int fd) +static void print_cpu_topology(struct perf_header *ph, int fd, FILE *fp) { - lseek(fd, 0, SEEK_SET); + ssize_t ret; + u32 nr, i; + char *str; - if (readn(fd, header, sizeof(*header)) <= 0 || - memcmp(&header->magic, __perf_magic, sizeof(header->magic))) - return -1; + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + return; - if (header->attr_size != sizeof(struct perf_file_attr)) { - u64 attr_size = bswap_64(header->attr_size); + if (ph->needs_swap) + nr = bswap_32(nr); - if (attr_size != sizeof(struct perf_file_attr)) - return -1; + for (i = 0; i < nr; i++) { + str = do_read_string(fd, ph); + fprintf(fp, "# sibling cores : %s\n", str); + free(str); + } - mem_bswap_64(header, offsetof(struct perf_file_header, - adds_features)); - ph->needs_swap = true; + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + return; + + if (ph->needs_swap) + nr = bswap_32(nr); + + for (i = 0; i < nr; i++) { + str = do_read_string(fd, ph); + fprintf(fp, "# sibling threads : %s\n", str); + free(str); } +} - if (header->size != sizeof(*header)) { - /* Support the previous format */ - if (header->size == offsetof(typeof(*header), adds_features)) - bitmap_zero(header->adds_features, HEADER_FEAT_BITS); - else - return -1; +static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) +{ + struct perf_event_attr attr; + uint64_t id; + void *buf = NULL; + char *str; + u32 nre, sz, nr, i, j; + ssize_t ret; + size_t msz; + + /* number of events */ + ret = read(fd, &nre, sizeof(nre)); + if (ret != (ssize_t)sizeof(nre)) + goto error; + + if (ph->needs_swap) + nre = bswap_32(nre); + + ret = read(fd, &sz, sizeof(sz)); + if (ret != (ssize_t)sizeof(sz)) + goto error; + + if (ph->needs_swap) + sz = bswap_32(sz); + + memset(&attr, 0, sizeof(attr)); + + /* buffer to hold on file attr struct */ + buf = malloc(sz); + if (!buf) + goto error; + + msz = sizeof(attr); + if (sz < msz) + msz = sz; + + for (i = 0 ; i < nre; i++) { + + /* + * must read entire on-file attr struct to + * sync up with layout. + */ + ret = read(fd, buf, sz); + if (ret != (ssize_t)sz) + goto error; + + if (ph->needs_swap) + perf_event__attr_swap(buf); + + memcpy(&attr, buf, msz); + + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + goto error; + + if (ph->needs_swap) + nr = bswap_32(nr); + + str = do_read_string(fd, ph); + fprintf(fp, "# event : name = %s, ", str); + free(str); + + fprintf(fp, "type = %d, config = 0x%"PRIx64 + ", config1 = 0x%"PRIx64", config2 = 0x%"PRIx64, + attr.type, + (u64)attr.config, + (u64)attr.config1, + (u64)attr.config2); + + fprintf(fp, ", excl_usr = %d, excl_kern = %d", + attr.exclude_user, + attr.exclude_kernel); + + fprintf(fp, ", excl_host = %d, excl_guest = %d", + attr.exclude_host, + attr.exclude_guest); + + fprintf(fp, ", precise_ip = %d", attr.precise_ip); + + if (nr) + fprintf(fp, ", id = {"); + + for (j = 0 ; j < nr; j++) { + ret = read(fd, &id, sizeof(id)); + if (ret != (ssize_t)sizeof(id)) + goto error; + + if (ph->needs_swap) + id = bswap_64(id); + + if (j) + fputc(',', fp); + + fprintf(fp, " %"PRIu64, id); + } + if (nr && j == nr) + fprintf(fp, " }"); + fputc('\n', fp); } + free(buf); + return; +error: + fprintf(fp, "# event desc: not available or unable to read\n"); +} - memcpy(&ph->adds_features, &header->adds_features, - sizeof(ph->adds_features)); - /* - * FIXME: hack that assumes that if we need swap the perf.data file - * may be coming from an arch with a different word-size, ergo different - * DEFINE_BITMAP format, investigate more later, but for now its mostly - * safe to assume that we have a build-id section. Trace files probably - * have several other issues in this realm anyway... - */ - if (ph->needs_swap) { - memset(&ph->adds_features, 0, sizeof(ph->adds_features)); - perf_header__set_feat(ph, HEADER_BUILD_ID); +static void print_total_mem(struct perf_header *h __used, int fd, FILE *fp) +{ + uint64_t mem; + ssize_t ret; + + ret = read(fd, &mem, sizeof(mem)); + if (ret != sizeof(mem)) + goto error; + + if (h->needs_swap) + mem = bswap_64(mem); + + fprintf(fp, "# total memory : %"PRIu64" kB\n", mem); + return; +error: + fprintf(fp, "# total memory : unknown\n"); +} + +static void print_numa_topology(struct perf_header *h __used, int fd, FILE *fp) +{ + ssize_t ret; + u32 nr, c, i; + char *str; + uint64_t mem_total, mem_free; + + /* nr nodes */ + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + goto error; + + if (h->needs_swap) + nr = bswap_32(nr); + + for (i = 0; i < nr; i++) { + + /* node number */ + ret = read(fd, &c, sizeof(c)); + if (ret != (ssize_t)sizeof(c)) + goto error; + + if (h->needs_swap) + c = bswap_32(c); + + ret = read(fd, &mem_total, sizeof(u64)); + if (ret != sizeof(u64)) + goto error; + + ret = read(fd, &mem_free, sizeof(u64)); + if (ret != sizeof(u64)) + goto error; + + if (h->needs_swap) { + mem_total = bswap_64(mem_total); + mem_free = bswap_64(mem_free); + } + + fprintf(fp, "# node%u meminfo : total = %"PRIu64" kB," + " free = %"PRIu64" kB\n", + c, + mem_total, + mem_free); + + str = do_read_string(fd, h); + fprintf(fp, "# node%u cpu list : %s\n", c, str); + free(str); } + return; +error: + fprintf(fp, "# numa topology : not available\n"); +} - ph->event_offset = header->event_types.offset; - ph->event_size = header->event_types.size; - ph->data_offset = header->data.offset; - ph->data_size = header->data.size; - return 0; +static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) +{ + char *str = do_read_string(fd, ph); + fprintf(fp, "# cpuid : %s\n", str); + free(str); +} + +static void print_branch_stack(struct perf_header *ph __used, int fd __used, + FILE *fp) +{ + fprintf(fp, "# contains samples with branch stack\n"); } static int __event_process_build_id(struct build_id_event *bev, @@ -794,9 +1487,523 @@ out: return err; } +static int process_tracing_data(struct perf_file_section *section __unused, + struct perf_header *ph __unused, + int feat __unused, int fd, void *data) +{ + trace_report(fd, data, false); + return 0; +} + +static int process_build_id(struct perf_file_section *section, + struct perf_header *ph, + int feat __unused, int fd, void *data __used) +{ + if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) + pr_debug("Failed to read buildids, continuing...\n"); + return 0; +} + +struct feature_ops { + int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); + void (*print)(struct perf_header *h, int fd, FILE *fp); + int (*process)(struct perf_file_section *section, + struct perf_header *h, int feat, int fd, void *data); + const char *name; + bool full_only; +}; + +#define FEAT_OPA(n, func) \ + [n] = { .name = #n, .write = write_##func, .print = print_##func } +#define FEAT_OPP(n, func) \ + [n] = { .name = #n, .write = write_##func, .print = print_##func, \ + .process = process_##func } +#define FEAT_OPF(n, func) \ + [n] = { .name = #n, .write = write_##func, .print = print_##func, \ + .full_only = true } + +/* feature_ops not implemented: */ +#define print_tracing_data NULL +#define print_build_id NULL + +static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { + FEAT_OPP(HEADER_TRACING_DATA, tracing_data), + FEAT_OPP(HEADER_BUILD_ID, build_id), + FEAT_OPA(HEADER_HOSTNAME, hostname), + FEAT_OPA(HEADER_OSRELEASE, osrelease), + FEAT_OPA(HEADER_VERSION, version), + FEAT_OPA(HEADER_ARCH, arch), + FEAT_OPA(HEADER_NRCPUS, nrcpus), + FEAT_OPA(HEADER_CPUDESC, cpudesc), + FEAT_OPA(HEADER_CPUID, cpuid), + FEAT_OPA(HEADER_TOTAL_MEM, total_mem), + FEAT_OPA(HEADER_EVENT_DESC, event_desc), + FEAT_OPA(HEADER_CMDLINE, cmdline), + FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), + FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), + FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), +}; + +struct header_print_data { + FILE *fp; + bool full; /* extended list of headers */ +}; + +static int perf_file_section__fprintf_info(struct perf_file_section *section, + struct perf_header *ph, + int feat, int fd, void *data) +{ + struct header_print_data *hd = data; + + if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) { + pr_debug("Failed to lseek to %" PRIu64 " offset for feature " + "%d, continuing...\n", section->offset, feat); + return 0; + } + if (feat >= HEADER_LAST_FEATURE) { + pr_warning("unknown feature %d\n", feat); + return 0; + } + if (!feat_ops[feat].print) + return 0; + + if (!feat_ops[feat].full_only || hd->full) + feat_ops[feat].print(ph, fd, hd->fp); + else + fprintf(hd->fp, "# %s info available, use -I to display\n", + feat_ops[feat].name); + + return 0; +} + +int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) +{ + struct header_print_data hd; + struct perf_header *header = &session->header; + int fd = session->fd; + hd.fp = fp; + hd.full = full; + + perf_header__process_sections(header, fd, &hd, + perf_file_section__fprintf_info); + return 0; +} + +static int do_write_feat(int fd, struct perf_header *h, int type, + struct perf_file_section **p, + struct perf_evlist *evlist) +{ + int err; + int ret = 0; + + if (perf_header__has_feat(h, type)) { + if (!feat_ops[type].write) + return -1; + + (*p)->offset = lseek(fd, 0, SEEK_CUR); + + err = feat_ops[type].write(fd, h, evlist); + if (err < 0) { + pr_debug("failed to write feature %d\n", type); + + /* undo anything written */ + lseek(fd, (*p)->offset, SEEK_SET); + + return -1; + } + (*p)->size = lseek(fd, 0, SEEK_CUR) - (*p)->offset; + (*p)++; + } + return ret; +} + +static int perf_header__adds_write(struct perf_header *header, + struct perf_evlist *evlist, int fd) +{ + int nr_sections; + struct perf_file_section *feat_sec, *p; + int sec_size; + u64 sec_start; + int feat; + int err; + + nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS); + if (!nr_sections) + return 0; + + feat_sec = p = calloc(sizeof(*feat_sec), nr_sections); + if (feat_sec == NULL) + return -ENOMEM; + + sec_size = sizeof(*feat_sec) * nr_sections; + + sec_start = header->data_offset + header->data_size; + lseek(fd, sec_start + sec_size, SEEK_SET); + + for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) { + if (do_write_feat(fd, header, feat, &p, evlist)) + perf_header__clear_feat(header, feat); + } + + lseek(fd, sec_start, SEEK_SET); + /* + * may write more than needed due to dropped feature, but + * this is okay, reader will skip the mising entries + */ + err = do_write(fd, feat_sec, sec_size); + if (err < 0) + pr_debug("failed to write feature section\n"); + free(feat_sec); + return err; +} + +int perf_header__write_pipe(int fd) +{ + struct perf_pipe_file_header f_header; + int err; + + f_header = (struct perf_pipe_file_header){ + .magic = PERF_MAGIC, + .size = sizeof(f_header), + }; + + err = do_write(fd, &f_header, sizeof(f_header)); + if (err < 0) { + pr_debug("failed to write perf pipe header\n"); + return err; + } + + return 0; +} + +int perf_session__write_header(struct perf_session *session, + struct perf_evlist *evlist, + int fd, bool at_exit) +{ + struct perf_file_header f_header; + struct perf_file_attr f_attr; + struct perf_header *header = &session->header; + struct perf_evsel *attr, *pair = NULL; + int err; + + lseek(fd, sizeof(f_header), SEEK_SET); + + if (session->evlist != evlist) + pair = list_entry(session->evlist->entries.next, struct perf_evsel, node); + + list_for_each_entry(attr, &evlist->entries, node) { + attr->id_offset = lseek(fd, 0, SEEK_CUR); + err = do_write(fd, attr->id, attr->ids * sizeof(u64)); + if (err < 0) { +out_err_write: + pr_debug("failed to write perf header\n"); + return err; + } + if (session->evlist != evlist) { + err = do_write(fd, pair->id, pair->ids * sizeof(u64)); + if (err < 0) + goto out_err_write; + attr->ids += pair->ids; + pair = list_entry(pair->node.next, struct perf_evsel, node); + } + } + + header->attr_offset = lseek(fd, 0, SEEK_CUR); + + list_for_each_entry(attr, &evlist->entries, node) { + f_attr = (struct perf_file_attr){ + .attr = attr->attr, + .ids = { + .offset = attr->id_offset, + .size = attr->ids * sizeof(u64), + } + }; + err = do_write(fd, &f_attr, sizeof(f_attr)); + if (err < 0) { + pr_debug("failed to write perf header attribute\n"); + return err; + } + } + + header->event_offset = lseek(fd, 0, SEEK_CUR); + header->event_size = event_count * sizeof(struct perf_trace_event_type); + if (events) { + err = do_write(fd, events, header->event_size); + if (err < 0) { + pr_debug("failed to write perf header events\n"); + return err; + } + } + + header->data_offset = lseek(fd, 0, SEEK_CUR); + + if (at_exit) { + err = perf_header__adds_write(header, evlist, fd); + if (err < 0) + return err; + } + + f_header = (struct perf_file_header){ + .magic = PERF_MAGIC, + .size = sizeof(f_header), + .attr_size = sizeof(f_attr), + .attrs = { + .offset = header->attr_offset, + .size = evlist->nr_entries * sizeof(f_attr), + }, + .data = { + .offset = header->data_offset, + .size = header->data_size, + }, + .event_types = { + .offset = header->event_offset, + .size = header->event_size, + }, + }; + + memcpy(&f_header.adds_features, &header->adds_features, sizeof(header->adds_features)); + + lseek(fd, 0, SEEK_SET); + err = do_write(fd, &f_header, sizeof(f_header)); + if (err < 0) { + pr_debug("failed to write perf header\n"); + return err; + } + lseek(fd, header->data_offset + header->data_size, SEEK_SET); + + header->frozen = 1; + return 0; +} + +static int perf_header__getbuffer64(struct perf_header *header, + int fd, void *buf, size_t size) +{ + if (readn(fd, buf, size) <= 0) + return -1; + + if (header->needs_swap) + mem_bswap_64(buf, size); + + return 0; +} + +int perf_header__process_sections(struct perf_header *header, int fd, + void *data, + int (*process)(struct perf_file_section *section, + struct perf_header *ph, + int feat, int fd, void *data)) +{ + struct perf_file_section *feat_sec, *sec; + int nr_sections; + int sec_size; + int feat; + int err; + + nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS); + if (!nr_sections) + return 0; + + feat_sec = sec = calloc(sizeof(*feat_sec), nr_sections); + if (!feat_sec) + return -1; + + sec_size = sizeof(*feat_sec) * nr_sections; + + lseek(fd, header->data_offset + header->data_size, SEEK_SET); + + err = perf_header__getbuffer64(header, fd, feat_sec, sec_size); + if (err < 0) + goto out_free; + + for_each_set_bit(feat, header->adds_features, HEADER_LAST_FEATURE) { + err = process(sec++, header, feat, fd, data); + if (err < 0) + goto out_free; + } + err = 0; +out_free: + free(feat_sec); + return err; +} + +static const int attr_file_abi_sizes[] = { + [0] = PERF_ATTR_SIZE_VER0, + [1] = PERF_ATTR_SIZE_VER1, + 0, +}; + +/* + * In the legacy file format, the magic number is not used to encode endianness. + * hdr_sz was used to encode endianness. But given that hdr_sz can vary based + * on ABI revisions, we need to try all combinations for all endianness to + * detect the endianness. + */ +static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph) +{ + uint64_t ref_size, attr_size; + int i; + + for (i = 0 ; attr_file_abi_sizes[i]; i++) { + ref_size = attr_file_abi_sizes[i] + + sizeof(struct perf_file_section); + if (hdr_sz != ref_size) { + attr_size = bswap_64(hdr_sz); + if (attr_size != ref_size) + continue; + + ph->needs_swap = true; + } + pr_debug("ABI%d perf.data file detected, need_swap=%d\n", + i, + ph->needs_swap); + return 0; + } + /* could not determine endianness */ + return -1; +} + +#define PERF_PIPE_HDR_VER0 16 + +static const size_t attr_pipe_abi_sizes[] = { + [0] = PERF_PIPE_HDR_VER0, + 0, +}; + +/* + * In the legacy pipe format, there is an implicit assumption that endiannesss + * between host recording the samples, and host parsing the samples is the + * same. This is not always the case given that the pipe output may always be + * redirected into a file and analyzed on a different machine with possibly a + * different endianness and perf_event ABI revsions in the perf tool itself. + */ +static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph) +{ + u64 attr_size; + int i; + + for (i = 0 ; attr_pipe_abi_sizes[i]; i++) { + if (hdr_sz != attr_pipe_abi_sizes[i]) { + attr_size = bswap_64(hdr_sz); + if (attr_size != hdr_sz) + continue; + + ph->needs_swap = true; + } + pr_debug("Pipe ABI%d perf.data file detected\n", i); + return 0; + } + return -1; +} + +static int check_magic_endian(u64 magic, uint64_t hdr_sz, + bool is_pipe, struct perf_header *ph) +{ + int ret; + + /* check for legacy format */ + ret = memcmp(&magic, __perf_magic1, sizeof(magic)); + if (ret == 0) { + pr_debug("legacy perf.data format\n"); + if (is_pipe) + return try_all_pipe_abis(hdr_sz, ph); + + return try_all_file_abis(hdr_sz, ph); + } + /* + * the new magic number serves two purposes: + * - unique number to identify actual perf.data files + * - encode endianness of file + */ + + /* check magic number with one endianness */ + if (magic == __perf_magic2) + return 0; + + /* check magic number with opposite endianness */ + if (magic != __perf_magic2_sw) + return -1; + + ph->needs_swap = true; + + return 0; +} + +int perf_file_header__read(struct perf_file_header *header, + struct perf_header *ph, int fd) +{ + int ret; + + lseek(fd, 0, SEEK_SET); + + ret = readn(fd, header, sizeof(*header)); + if (ret <= 0) + return -1; + + if (check_magic_endian(header->magic, + header->attr_size, false, ph) < 0) { + pr_debug("magic/endian check failed\n"); + return -1; + } + + if (ph->needs_swap) { + mem_bswap_64(header, offsetof(struct perf_file_header, + adds_features)); + } + + if (header->size != sizeof(*header)) { + /* Support the previous format */ + if (header->size == offsetof(typeof(*header), adds_features)) + bitmap_zero(header->adds_features, HEADER_FEAT_BITS); + else + return -1; + } else if (ph->needs_swap) { + /* + * feature bitmap is declared as an array of unsigned longs -- + * not good since its size can differ between the host that + * generated the data file and the host analyzing the file. + * + * We need to handle endianness, but we don't know the size of + * the unsigned long where the file was generated. Take a best + * guess at determining it: try 64-bit swap first (ie., file + * created on a 64-bit host), and check if the hostname feature + * bit is set (this feature bit is forced on as of fbe96f2). + * If the bit is not, undo the 64-bit swap and try a 32-bit + * swap. If the hostname bit is still not set (e.g., older data + * file), punt and fallback to the original behavior -- + * clearing all feature bits and setting buildid. + */ + mem_bswap_64(&header->adds_features, + BITS_TO_U64(HEADER_FEAT_BITS)); + + if (!test_bit(HEADER_HOSTNAME, header->adds_features)) { + /* unswap as u64 */ + mem_bswap_64(&header->adds_features, + BITS_TO_U64(HEADER_FEAT_BITS)); + + /* unswap as u32 */ + mem_bswap_32(&header->adds_features, + BITS_TO_U32(HEADER_FEAT_BITS)); + } + + if (!test_bit(HEADER_HOSTNAME, header->adds_features)) { + bitmap_zero(header->adds_features, HEADER_FEAT_BITS); + set_bit(HEADER_BUILD_ID, header->adds_features); + } + } + + memcpy(&ph->adds_features, &header->adds_features, + sizeof(ph->adds_features)); + + ph->event_offset = header->event_types.offset; + ph->event_size = header->event_types.size; + ph->data_offset = header->data.offset; + ph->data_size = header->data.size; + return 0; +} + static int perf_file_section__process(struct perf_file_section *section, struct perf_header *ph, - int feat, int fd) + int feat, int fd, void *data) { if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) { pr_debug("Failed to lseek to %" PRIu64 " offset for feature " @@ -804,41 +2011,37 @@ static int perf_file_section__process(struct perf_file_section *section, return 0; } - switch (feat) { - case HEADER_TRACE_INFO: - trace_report(fd, false); - break; - - case HEADER_BUILD_ID: - if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) - pr_debug("Failed to read buildids, continuing...\n"); - break; - default: + if (feat >= HEADER_LAST_FEATURE) { pr_debug("unknown feature %d, continuing...\n", feat); + return 0; } - return 0; + if (!feat_ops[feat].process) + return 0; + + return feat_ops[feat].process(section, ph, feat, fd, data); } static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, struct perf_header *ph, int fd, bool repipe) { - if (readn(fd, header, sizeof(*header)) <= 0 || - memcmp(&header->magic, __perf_magic, sizeof(header->magic))) - return -1; + int ret; - if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) + ret = readn(fd, header, sizeof(*header)); + if (ret <= 0) return -1; - if (header->size != sizeof(*header)) { - u64 size = bswap_64(header->size); + if (check_magic_endian(header->magic, header->size, true, ph) < 0) { + pr_debug("endian/magic failed\n"); + return -1; + } - if (size != sizeof(*header)) - return -1; + if (ph->needs_swap) + header->size = bswap_64(header->size); - ph->needs_swap = true; - } + if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) + return -1; return 0; } @@ -859,6 +2062,84 @@ static int perf_header__read_pipe(struct perf_session *session, int fd) return 0; } +static int read_attr(int fd, struct perf_header *ph, + struct perf_file_attr *f_attr) +{ + struct perf_event_attr *attr = &f_attr->attr; + size_t sz, left; + size_t our_sz = sizeof(f_attr->attr); + int ret; + + memset(f_attr, 0, sizeof(*f_attr)); + + /* read minimal guaranteed structure */ + ret = readn(fd, attr, PERF_ATTR_SIZE_VER0); + if (ret <= 0) { + pr_debug("cannot read %d bytes of header attr\n", + PERF_ATTR_SIZE_VER0); + return -1; + } + + /* on file perf_event_attr size */ + sz = attr->size; + + if (ph->needs_swap) + sz = bswap_32(sz); + + if (sz == 0) { + /* assume ABI0 */ + sz = PERF_ATTR_SIZE_VER0; + } else if (sz > our_sz) { + pr_debug("file uses a more recent and unsupported ABI" + " (%zu bytes extra)\n", sz - our_sz); + return -1; + } + /* what we have not yet read and that we know about */ + left = sz - PERF_ATTR_SIZE_VER0; + if (left) { + void *ptr = attr; + ptr += PERF_ATTR_SIZE_VER0; + + ret = readn(fd, ptr, left); + } + /* read perf_file_section, ids are read in caller */ + ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids)); + + return ret <= 0 ? -1 : 0; +} + +static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel, + struct pevent *pevent) +{ + struct event_format *event = pevent_find_event(pevent, + evsel->attr.config); + char bf[128]; + + if (event == NULL) + return -1; + + snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name); + evsel->name = strdup(bf); + if (event->name == NULL) + return -1; + + return 0; +} + +static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist, + struct pevent *pevent) +{ + struct perf_evsel *pos; + + list_for_each_entry(pos, &evlist->entries, node) { + if (pos->attr.type == PERF_TYPE_TRACEPOINT && + perf_evsel__set_tracepoint_name(pos, pevent)) + return -1; + } + + return 0; +} + int perf_session__read_header(struct perf_session *session, int fd) { struct perf_header *header = &session->header; @@ -874,19 +2155,17 @@ int perf_session__read_header(struct perf_session *session, int fd) if (session->fd_pipe) return perf_header__read_pipe(session, fd); - if (perf_file_header__read(&f_header, header, fd) < 0) { - pr_debug("incompatible file format\n"); + if (perf_file_header__read(&f_header, header, fd) < 0) return -EINVAL; - } - nr_attrs = f_header.attrs.size / sizeof(f_attr); + nr_attrs = f_header.attrs.size / f_header.attr_size; lseek(fd, f_header.attrs.offset, SEEK_SET); for (i = 0; i < nr_attrs; i++) { struct perf_evsel *evsel; off_t tmp; - if (readn(fd, &f_attr, sizeof(f_attr)) <= 0) + if (read_attr(fd, header, &f_attr) < 0) goto out_errno; if (header->needs_swap) @@ -924,6 +2203,8 @@ int perf_session__read_header(struct perf_session *session, int fd) lseek(fd, tmp, SEEK_SET); } + symbol_conf.nr_events = nr_attrs; + if (f_header.event_types.size) { lseek(fd, f_header.event_types.offset, SEEK_SET); events = malloc(f_header.event_types.size); @@ -935,10 +2216,14 @@ int perf_session__read_header(struct perf_session *session, int fd) event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } - perf_header__process_sections(header, fd, perf_file_section__process); + perf_header__process_sections(header, fd, &session->pevent, + perf_file_section__process); lseek(fd, header->data_offset, SEEK_SET); + if (perf_evlist__set_tracepoint_names(session->evlist, session->pevent)) + goto out_delete_evlist; + header->frozen = 1; return 0; out_errno: @@ -950,9 +2235,9 @@ out_delete_evlist: return -ENOMEM; } -int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, - perf_event__handler_t process, - struct perf_session *session) +int perf_event__synthesize_attr(struct perf_tool *tool, + struct perf_event_attr *attr, u16 ids, u64 *id, + perf_event__handler_t process) { union perf_event *ev; size_t size; @@ -974,22 +2259,23 @@ int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, ev->attr.header.type = PERF_RECORD_HEADER_ATTR; ev->attr.header.size = size; - err = process(ev, NULL, session); + err = process(tool, ev, NULL, NULL); free(ev); return err; } -int perf_session__synthesize_attrs(struct perf_session *session, +int perf_event__synthesize_attrs(struct perf_tool *tool, + struct perf_session *session, perf_event__handler_t process) { struct perf_evsel *attr; int err = 0; list_for_each_entry(attr, &session->evlist->entries, node) { - err = perf_event__synthesize_attr(&attr->attr, attr->ids, - attr->id, process, session); + err = perf_event__synthesize_attr(tool, &attr->attr, attr->ids, + attr->id, process); if (err) { pr_debug("failed to create perf header attribute\n"); return err; @@ -1000,23 +2286,23 @@ int perf_session__synthesize_attrs(struct perf_session *session, } int perf_event__process_attr(union perf_event *event, - struct perf_session *session) + struct perf_evlist **pevlist) { unsigned int i, ids, n_ids; struct perf_evsel *evsel; + struct perf_evlist *evlist = *pevlist; - if (session->evlist == NULL) { - session->evlist = perf_evlist__new(NULL, NULL); - if (session->evlist == NULL) + if (evlist == NULL) { + *pevlist = evlist = perf_evlist__new(NULL, NULL); + if (evlist == NULL) return -ENOMEM; } - evsel = perf_evsel__new(&event->attr.attr, - session->evlist->nr_entries); + evsel = perf_evsel__new(&event->attr.attr, evlist->nr_entries); if (evsel == NULL) return -ENOMEM; - perf_evlist__add(session->evlist, evsel); + perf_evlist__add(evlist, evsel); ids = event->header.size; ids -= (void *)&event->attr.id - (void *)event; @@ -1030,18 +2316,16 @@ int perf_event__process_attr(union perf_event *event, return -ENOMEM; for (i = 0; i < n_ids; i++) { - perf_evlist__id_add(session->evlist, evsel, 0, i, - event->attr.id[i]); + perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]); } - perf_session__update_sample_type(session); - return 0; } -int perf_event__synthesize_event_type(u64 event_id, char *name, +int perf_event__synthesize_event_type(struct perf_tool *tool, + u64 event_id, char *name, perf_event__handler_t process, - struct perf_session *session) + struct machine *machine) { union perf_event ev; size_t size = 0; @@ -1054,18 +2338,19 @@ int perf_event__synthesize_event_type(u64 event_id, char *name, strncpy(ev.event_type.event_type.name, name, MAX_EVENT_NAME - 1); ev.event_type.header.type = PERF_RECORD_HEADER_EVENT_TYPE; - size = strlen(name); + size = strlen(ev.event_type.event_type.name); size = ALIGN(size, sizeof(u64)); ev.event_type.header.size = sizeof(ev.event_type) - (sizeof(ev.event_type.event_type.name) - size); - err = process(&ev, NULL, session); + err = process(tool, &ev, NULL, machine); return err; } -int perf_event__synthesize_event_types(perf_event__handler_t process, - struct perf_session *session) +int perf_event__synthesize_event_types(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) { struct perf_trace_event_type *type; int i, err = 0; @@ -1073,9 +2358,9 @@ int perf_event__synthesize_event_types(perf_event__handler_t process, for (i = 0; i < event_count; i++) { type = &events[i]; - err = perf_event__synthesize_event_type(type->event_id, + err = perf_event__synthesize_event_type(tool, type->event_id, type->name, process, - session); + machine); if (err) { pr_debug("failed to create perf header event type\n"); return err; @@ -1085,8 +2370,8 @@ int perf_event__synthesize_event_types(perf_event__handler_t process, return err; } -int perf_event__process_event_type(union perf_event *event, - struct perf_session *session __unused) +int perf_event__process_event_type(struct perf_tool *tool __unused, + union perf_event *event) { if (perf_header__push_event(event->event_type.event_type.event_id, event->event_type.event_type.name) < 0) @@ -1095,28 +2380,47 @@ int perf_event__process_event_type(union perf_event *event, return 0; } -int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, - perf_event__handler_t process, - struct perf_session *session __unused) +int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, + struct perf_evlist *evlist, + perf_event__handler_t process) { union perf_event ev; + struct tracing_data *tdata; ssize_t size = 0, aligned_size = 0, padding; int err __used = 0; + /* + * We are going to store the size of the data followed + * by the data contents. Since the fd descriptor is a pipe, + * we cannot seek back to store the size of the data once + * we know it. Instead we: + * + * - write the tracing data to the temp file + * - get/write the data size to pipe + * - write the tracing data from the temp file + * to the pipe + */ + tdata = tracing_data_get(&evlist->entries, fd, true); + if (!tdata) + return -1; + memset(&ev, 0, sizeof(ev)); ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA; - size = read_tracing_data_size(fd, &evlist->entries); - if (size <= 0) - return size; + size = tdata->size; aligned_size = ALIGN(size, sizeof(u64)); padding = aligned_size - size; ev.tracing_data.header.size = sizeof(ev.tracing_data); ev.tracing_data.size = aligned_size; - process(&ev, NULL, session); + process(tool, &ev, NULL, NULL); + + /* + * The put function will copy all the tracing data + * stored in temp file to the pipe. + */ + tracing_data_put(tdata); - err = read_tracing_data(fd, &evlist->entries); write_padded(fd, NULL, 0, padding); return aligned_size; @@ -1133,8 +2437,8 @@ int perf_event__process_tracing_data(union perf_event *event, lseek(session->fd, offset + sizeof(struct tracing_data_event), SEEK_SET); - size_read = trace_report(session->fd, session->repipe); - + size_read = trace_report(session->fd, &session->pevent, + session->repipe); padding = ALIGN(size_read, sizeof(u64)) - size_read; if (read(session->fd, buf, padding) < 0) @@ -1151,10 +2455,10 @@ int perf_event__process_tracing_data(union perf_event *event, return size_read + padding; } -int perf_event__synthesize_build_id(struct dso *pos, u16 misc, +int perf_event__synthesize_build_id(struct perf_tool *tool, + struct dso *pos, u16 misc, perf_event__handler_t process, - struct machine *machine, - struct perf_session *session) + struct machine *machine) { union perf_event ev; size_t len; @@ -1174,12 +2478,13 @@ int perf_event__synthesize_build_id(struct dso *pos, u16 misc, ev.build_id.header.size = sizeof(ev.build_id) + len; memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); - err = process(&ev, NULL, session); + err = process(tool, &ev, NULL, machine); return err; } -int perf_event__process_build_id(union perf_event *event, +int perf_event__process_build_id(struct perf_tool *tool __used, + union perf_event *event, struct perf_session *session) { __event_process_build_id(&event->build_id, diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 1886256768a1..2d42b3e1826f 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -10,13 +10,28 @@ #include <linux/bitmap.h> enum { - HEADER_TRACE_INFO = 1, + HEADER_RESERVED = 0, /* always cleared */ + HEADER_FIRST_FEATURE = 1, + HEADER_TRACING_DATA = 1, HEADER_BUILD_ID, + + HEADER_HOSTNAME, + HEADER_OSRELEASE, + HEADER_VERSION, + HEADER_ARCH, + HEADER_NRCPUS, + HEADER_CPUDESC, + HEADER_CPUID, + HEADER_TOTAL_MEM, + HEADER_CMDLINE, + HEADER_EVENT_DESC, + HEADER_CPU_TOPOLOGY, + HEADER_NUMA_TOPOLOGY, + HEADER_BRANCH_STACK, HEADER_LAST_FEATURE, + HEADER_FEAT_BITS = 256, }; -#define HEADER_FEAT_BITS 256 - struct perf_file_section { u64 offset; u64 size; @@ -54,6 +69,7 @@ struct perf_header { }; struct perf_evlist; +struct perf_session; int perf_session__read_header(struct perf_session *session, int fd); int perf_session__write_header(struct perf_session *session, @@ -68,40 +84,55 @@ void perf_header__set_feat(struct perf_header *header, int feat); void perf_header__clear_feat(struct perf_header *header, int feat); bool perf_header__has_feat(const struct perf_header *header, int feat); +int perf_header__set_cmdline(int argc, const char **argv); + int perf_header__process_sections(struct perf_header *header, int fd, + void *data, int (*process)(struct perf_file_section *section, - struct perf_header *ph, - int feat, int fd)); + struct perf_header *ph, + int feat, int fd, void *data)); + +int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full); int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, const char *name, bool is_kallsyms); int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); -int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, - perf_event__handler_t process, - struct perf_session *session); -int perf_session__synthesize_attrs(struct perf_session *session, - perf_event__handler_t process); -int perf_event__process_attr(union perf_event *event, struct perf_session *session); +int perf_event__synthesize_attr(struct perf_tool *tool, + struct perf_event_attr *attr, u16 ids, u64 *id, + perf_event__handler_t process); +int perf_event__synthesize_attrs(struct perf_tool *tool, + struct perf_session *session, + perf_event__handler_t process); +int perf_event__process_attr(union perf_event *event, struct perf_evlist **pevlist); -int perf_event__synthesize_event_type(u64 event_id, char *name, +int perf_event__synthesize_event_type(struct perf_tool *tool, + u64 event_id, char *name, perf_event__handler_t process, - struct perf_session *session); -int perf_event__synthesize_event_types(perf_event__handler_t process, - struct perf_session *session); -int perf_event__process_event_type(union perf_event *event, - struct perf_session *session); - -int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, - perf_event__handler_t process, - struct perf_session *session); + struct machine *machine); +int perf_event__synthesize_event_types(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine); +int perf_event__process_event_type(struct perf_tool *tool, + union perf_event *event); + +int perf_event__synthesize_tracing_data(struct perf_tool *tool, + int fd, struct perf_evlist *evlist, + perf_event__handler_t process); int perf_event__process_tracing_data(union perf_event *event, struct perf_session *session); -int perf_event__synthesize_build_id(struct dso *pos, u16 misc, +int perf_event__synthesize_build_id(struct perf_tool *tool, + struct dso *pos, u16 misc, perf_event__handler_t process, - struct machine *machine, - struct perf_session *session); -int perf_event__process_build_id(union perf_event *event, + struct machine *machine); +int perf_event__process_build_id(struct perf_tool *tool, + union perf_event *event, struct perf_session *session); + +/* + * arch specific callback + */ +int get_cpuid(char *buffer, size_t sz); + #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 677e1da6bb3e..f247ef2789a4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -6,10 +6,18 @@ #include "sort.h" #include <math.h> +static bool hists__filter_entry_by_dso(struct hists *hists, + struct hist_entry *he); +static bool hists__filter_entry_by_thread(struct hists *hists, + struct hist_entry *he); +static bool hists__filter_entry_by_symbol(struct hists *hists, + struct hist_entry *he); + enum hist_filter { HIST_FILTER__DSO, HIST_FILTER__THREAD, HIST_FILTER__PARENT, + HIST_FILTER__SYMBOL, }; struct callchain_param callchain_param = { @@ -18,80 +26,176 @@ struct callchain_param callchain_param = { .order = ORDER_CALLEE }; -u16 hists__col_len(struct hists *self, enum hist_column col) +u16 hists__col_len(struct hists *hists, enum hist_column col) { - return self->col_len[col]; + return hists->col_len[col]; } -void hists__set_col_len(struct hists *self, enum hist_column col, u16 len) +void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len) { - self->col_len[col] = len; + hists->col_len[col] = len; } -bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len) +bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len) { - if (len > hists__col_len(self, col)) { - hists__set_col_len(self, col, len); + if (len > hists__col_len(hists, col)) { + hists__set_col_len(hists, col, len); return true; } return false; } -static void hists__reset_col_len(struct hists *self) +static void hists__reset_col_len(struct hists *hists) { enum hist_column col; for (col = 0; col < HISTC_NR_COLS; ++col) - hists__set_col_len(self, col, 0); + hists__set_col_len(hists, col, 0); +} + +static void hists__set_unres_dso_col_len(struct hists *hists, int dso) +{ + const unsigned int unresolved_col_width = BITS_PER_LONG / 4; + + if (hists__col_len(hists, dso) < unresolved_col_width && + !symbol_conf.col_width_list_str && !symbol_conf.field_sep && + !symbol_conf.dso_list) + hists__set_col_len(hists, dso, unresolved_col_width); } -static void hists__calc_col_len(struct hists *self, struct hist_entry *h) +static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) { + const unsigned int unresolved_col_width = BITS_PER_LONG / 4; u16 len; if (h->ms.sym) - hists__new_col_len(self, HISTC_SYMBOL, h->ms.sym->namelen); - else { - const unsigned int unresolved_col_width = BITS_PER_LONG / 4; - - if (hists__col_len(self, HISTC_DSO) < unresolved_col_width && - !symbol_conf.col_width_list_str && !symbol_conf.field_sep && - !symbol_conf.dso_list) - hists__set_col_len(self, HISTC_DSO, - unresolved_col_width); - } + hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4); + else + hists__set_unres_dso_col_len(hists, HISTC_DSO); len = thread__comm_len(h->thread); - if (hists__new_col_len(self, HISTC_COMM, len)) - hists__set_col_len(self, HISTC_THREAD, len + 6); + if (hists__new_col_len(hists, HISTC_COMM, len)) + hists__set_col_len(hists, HISTC_THREAD, len + 6); if (h->ms.map) { len = dso__name_len(h->ms.map->dso); - hists__new_col_len(self, HISTC_DSO, len); + hists__new_col_len(hists, HISTC_DSO, len); + } + + if (h->branch_info) { + int symlen; + /* + * +4 accounts for '[x] ' priv level info + * +2 account of 0x prefix on raw addresses + */ + if (h->branch_info->from.sym) { + symlen = (int)h->branch_info->from.sym->namelen + 4; + hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); + + symlen = dso__name_len(h->branch_info->from.map->dso); + hists__new_col_len(hists, HISTC_DSO_FROM, symlen); + } else { + symlen = unresolved_col_width + 4 + 2; + hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); + hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM); + } + + if (h->branch_info->to.sym) { + symlen = (int)h->branch_info->to.sym->namelen + 4; + hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); + + symlen = dso__name_len(h->branch_info->to.map->dso); + hists__new_col_len(hists, HISTC_DSO_TO, symlen); + } else { + symlen = unresolved_col_width + 4 + 2; + hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); + hists__set_unres_dso_col_len(hists, HISTC_DSO_TO); + } } } -static void hist_entry__add_cpumode_period(struct hist_entry *self, +static void hist_entry__add_cpumode_period(struct hist_entry *he, unsigned int cpumode, u64 period) { switch (cpumode) { case PERF_RECORD_MISC_KERNEL: - self->period_sys += period; + he->period_sys += period; break; case PERF_RECORD_MISC_USER: - self->period_us += period; + he->period_us += period; break; case PERF_RECORD_MISC_GUEST_KERNEL: - self->period_guest_sys += period; + he->period_guest_sys += period; break; case PERF_RECORD_MISC_GUEST_USER: - self->period_guest_us += period; + he->period_guest_us += period; break; default: break; } } +static void hist_entry__decay(struct hist_entry *he) +{ + he->period = (he->period * 7) / 8; + he->nr_events = (he->nr_events * 7) / 8; +} + +static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) +{ + u64 prev_period = he->period; + + if (prev_period == 0) + return true; + + hist_entry__decay(he); + + if (!he->filtered) + hists->stats.total_period -= prev_period - he->period; + + return he->period == 0; +} + +static void __hists__decay_entries(struct hists *hists, bool zap_user, + bool zap_kernel, bool threaded) +{ + struct rb_node *next = rb_first(&hists->entries); + struct hist_entry *n; + + while (next) { + n = rb_entry(next, struct hist_entry, rb_node); + next = rb_next(&n->rb_node); + /* + * We may be annotating this, for instance, so keep it here in + * case some it gets new samples, we'll eventually free it when + * the user stops browsing and it agains gets fully decayed. + */ + if (((zap_user && n->level == '.') || + (zap_kernel && n->level != '.') || + hists__decay_entry(hists, n)) && + !n->used) { + rb_erase(&n->rb_node, &hists->entries); + + if (sort__need_collapse || threaded) + rb_erase(&n->rb_node_in, &hists->entries_collapsed); + + hist_entry__free(n); + --hists->nr_entries; + } + } +} + +void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) +{ + return __hists__decay_entries(hists, zap_user, zap_kernel, false); +} + +void hists__decay_entries_threaded(struct hists *hists, + bool zap_user, bool zap_kernel) +{ + return __hists__decay_entries(hists, zap_user, zap_kernel, true); +} + /* * histogram, sorted on item, collects periods */ @@ -99,25 +203,26 @@ static void hist_entry__add_cpumode_period(struct hist_entry *self, static struct hist_entry *hist_entry__new(struct hist_entry *template) { size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; - struct hist_entry *self = malloc(sizeof(*self) + callchain_size); + struct hist_entry *he = malloc(sizeof(*he) + callchain_size); - if (self != NULL) { - *self = *template; - self->nr_events = 1; - if (self->ms.map) - self->ms.map->referenced = true; + if (he != NULL) { + *he = *template; + he->nr_events = 1; + if (he->ms.map) + he->ms.map->referenced = true; if (symbol_conf.use_callchain) - callchain_init(self->callchain); + callchain_init(he->callchain); } - return self; + return he; } -static void hists__inc_nr_entries(struct hists *self, struct hist_entry *h) +static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h) { if (!h->filtered) { - hists__calc_col_len(self, h); - ++self->nr_entries; + hists__calc_col_len(hists, h); + ++hists->nr_entries; + hists->stats.total_period += h->period; } } @@ -128,37 +233,41 @@ static u8 symbol__parent_filter(const struct symbol *parent) return 0; } -struct hist_entry *__hists__add_entry(struct hists *self, +static struct hist_entry *add_hist_entry(struct hists *hists, + struct hist_entry *entry, struct addr_location *al, - struct symbol *sym_parent, u64 period) + u64 period) { - struct rb_node **p = &self->entries.rb_node; + struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; - struct hist_entry entry = { - .thread = al->thread, - .ms = { - .map = al->map, - .sym = al->sym, - }, - .cpu = al->cpu, - .ip = al->addr, - .level = al->level, - .period = period, - .parent = sym_parent, - .filtered = symbol__parent_filter(sym_parent), - }; int cmp; + pthread_mutex_lock(&hists->lock); + + p = &hists->entries_in->rb_node; + while (*p != NULL) { parent = *p; - he = rb_entry(parent, struct hist_entry, rb_node); + he = rb_entry(parent, struct hist_entry, rb_node_in); - cmp = hist_entry__cmp(&entry, he); + cmp = hist_entry__cmp(entry, he); if (!cmp) { he->period += period; ++he->nr_events; + + /* If the map of an existing hist_entry has + * become out-of-date due to an exec() or + * similar, update it. Otherwise we will + * mis-adjust symbol addresses when computing + * the history counter to increment. + */ + if (he->ms.map != entry->ms.map) { + he->ms.map = entry->ms.map; + if (he->ms.map) + he->ms.map->referenced = true; + } goto out; } @@ -168,17 +277,64 @@ struct hist_entry *__hists__add_entry(struct hists *self, p = &(*p)->rb_right; } - he = hist_entry__new(&entry); + he = hist_entry__new(entry); if (!he) - return NULL; - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &self->entries); - hists__inc_nr_entries(self, he); + goto out_unlock; + + rb_link_node(&he->rb_node_in, parent, p); + rb_insert_color(&he->rb_node_in, hists->entries_in); out: hist_entry__add_cpumode_period(he, al->cpumode, period); +out_unlock: + pthread_mutex_unlock(&hists->lock); return he; } +struct hist_entry *__hists__add_branch_entry(struct hists *self, + struct addr_location *al, + struct symbol *sym_parent, + struct branch_info *bi, + u64 period) +{ + struct hist_entry entry = { + .thread = al->thread, + .ms = { + .map = bi->to.map, + .sym = bi->to.sym, + }, + .cpu = al->cpu, + .ip = bi->to.addr, + .level = al->level, + .period = period, + .parent = sym_parent, + .filtered = symbol__parent_filter(sym_parent), + .branch_info = bi, + }; + + return add_hist_entry(self, &entry, al, period); +} + +struct hist_entry *__hists__add_entry(struct hists *self, + struct addr_location *al, + struct symbol *sym_parent, u64 period) +{ + struct hist_entry entry = { + .thread = al->thread, + .ms = { + .map = al->map, + .sym = al->sym, + }, + .cpu = al->cpu, + .ip = al->addr, + .level = al->level, + .period = period, + .parent = sym_parent, + .filtered = symbol__parent_filter(sym_parent), + }; + + return add_hist_entry(self, &entry, al, period); +} + int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { @@ -222,7 +378,7 @@ void hist_entry__free(struct hist_entry *he) * collapse the histogram */ -static bool hists__collapse_insert_entry(struct hists *self, +static bool hists__collapse_insert_entry(struct hists *hists __used, struct rb_root *root, struct hist_entry *he) { @@ -233,15 +389,17 @@ static bool hists__collapse_insert_entry(struct hists *self, while (*p != NULL) { parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); + iter = rb_entry(parent, struct hist_entry, rb_node_in); cmp = hist_entry__collapse(iter, he); if (!cmp) { iter->period += he->period; + iter->nr_events += he->nr_events; if (symbol_conf.use_callchain) { - callchain_cursor_reset(&self->callchain_cursor); - callchain_merge(&self->callchain_cursor, iter->callchain, + callchain_cursor_reset(&callchain_cursor); + callchain_merge(&callchain_cursor, + iter->callchain, he->callchain); } hist_entry__free(he); @@ -254,35 +412,69 @@ static bool hists__collapse_insert_entry(struct hists *self, p = &(*p)->rb_right; } - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, root); + rb_link_node(&he->rb_node_in, parent, p); + rb_insert_color(&he->rb_node_in, root); return true; } -void hists__collapse_resort(struct hists *self) +static struct rb_root *hists__get_rotate_entries_in(struct hists *hists) +{ + struct rb_root *root; + + pthread_mutex_lock(&hists->lock); + + root = hists->entries_in; + if (++hists->entries_in > &hists->entries_in_array[1]) + hists->entries_in = &hists->entries_in_array[0]; + + pthread_mutex_unlock(&hists->lock); + + return root; +} + +static void hists__apply_filters(struct hists *hists, struct hist_entry *he) +{ + hists__filter_entry_by_dso(hists, he); + hists__filter_entry_by_thread(hists, he); + hists__filter_entry_by_symbol(hists, he); +} + +static void __hists__collapse_resort(struct hists *hists, bool threaded) { - struct rb_root tmp; + struct rb_root *root; struct rb_node *next; struct hist_entry *n; - if (!sort__need_collapse) + if (!sort__need_collapse && !threaded) return; - tmp = RB_ROOT; - next = rb_first(&self->entries); - self->nr_entries = 0; - hists__reset_col_len(self); + root = hists__get_rotate_entries_in(hists); + next = rb_first(root); while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, &self->entries); - if (hists__collapse_insert_entry(self, &tmp, n)) - hists__inc_nr_entries(self, n); + n = rb_entry(next, struct hist_entry, rb_node_in); + next = rb_next(&n->rb_node_in); + + rb_erase(&n->rb_node_in, root); + if (hists__collapse_insert_entry(hists, &hists->entries_collapsed, n)) { + /* + * If it wasn't combined with one of the entries already + * collapsed, we need to apply the filters that may have + * been set by, say, the hist_browser. + */ + hists__apply_filters(hists, n); + } } +} + +void hists__collapse_resort(struct hists *hists) +{ + return __hists__collapse_resort(hists, false); +} - self->entries = tmp; +void hists__collapse_resort_threaded(struct hists *hists) +{ + return __hists__collapse_resort(hists, true); } /* @@ -315,31 +507,44 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_insert_color(&he->rb_node, entries); } -void hists__output_resort(struct hists *self) +static void __hists__output_resort(struct hists *hists, bool threaded) { - struct rb_root tmp; + struct rb_root *root; struct rb_node *next; struct hist_entry *n; u64 min_callchain_hits; - min_callchain_hits = self->stats.total_period * (callchain_param.min_percent / 100); + min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); + + if (sort__need_collapse || threaded) + root = &hists->entries_collapsed; + else + root = hists->entries_in; - tmp = RB_ROOT; - next = rb_first(&self->entries); + next = rb_first(root); + hists->entries = RB_ROOT; - self->nr_entries = 0; - hists__reset_col_len(self); + hists->nr_entries = 0; + hists->stats.total_period = 0; + hists__reset_col_len(hists); while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); + n = rb_entry(next, struct hist_entry, rb_node_in); + next = rb_next(&n->rb_node_in); - rb_erase(&n->rb_node, &self->entries); - __hists__insert_output_entry(&tmp, n, min_callchain_hits); - hists__inc_nr_entries(self, n); + __hists__insert_output_entry(&hists->entries, n, min_callchain_hits); + hists__inc_nr_entries(hists, n); } +} + +void hists__output_resort(struct hists *hists) +{ + return __hists__output_resort(hists, false); +} - self->entries = tmp; +void hists__output_resort_threaded(struct hists *hists) +{ + return __hists__output_resort(hists, true); } static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) @@ -395,7 +600,7 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, if (chain->ms.sym) ret += fprintf(fp, "%s\n", chain->ms.sym->name); else - ret += fprintf(fp, "%p\n", (void *)(long)chain->ip); + ret += fprintf(fp, "0x%0" PRIx64 "\n", chain->ip); return ret; } @@ -415,7 +620,7 @@ static void init_rem_hits(void) rem_hits.ms.sym = rem_sq_bracket; } -static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self, +static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, u64 total_samples, int depth, int depth_mask, int left_margin) { @@ -423,21 +628,16 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self, struct callchain_node *child; struct callchain_list *chain; int new_depth_mask = depth_mask; - u64 new_total; u64 remaining; size_t ret = 0; int i; uint entries_printed = 0; - if (callchain_param.mode == CHAIN_GRAPH_REL) - new_total = self->children_hit; - else - new_total = total_samples; + remaining = total_samples; - remaining = new_total; - - node = rb_first(&self->rb_root); + node = rb_first(root); while (node) { + u64 new_total; u64 cumul; child = rb_entry(node, struct callchain_node, rb_node); @@ -465,11 +665,17 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self, list_for_each_entry(chain, &child->val, list) { ret += ipchain__fprintf_graph(fp, chain, depth, new_depth_mask, i++, - new_total, + total_samples, cumul, left_margin); } - ret += __callchain__fprintf_graph(fp, child, new_total, + + if (callchain_param.mode == CHAIN_GRAPH_REL) + new_total = child->children_hit; + else + new_total = total_samples; + + ret += __callchain__fprintf_graph(fp, &child->rb_root, new_total, depth + 1, new_depth_mask | (1 << depth), left_margin); @@ -479,61 +685,78 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self, } if (callchain_param.mode == CHAIN_GRAPH_REL && - remaining && remaining != new_total) { + remaining && remaining != total_samples) { if (!rem_sq_bracket) return ret; new_depth_mask &= ~(1 << (depth - 1)); - ret += ipchain__fprintf_graph(fp, &rem_hits, depth, - new_depth_mask, 0, new_total, + new_depth_mask, 0, total_samples, remaining, left_margin); } return ret; } -static size_t callchain__fprintf_graph(FILE *fp, struct callchain_node *self, +static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, u64 total_samples, int left_margin) { + struct callchain_node *cnode; struct callchain_list *chain; + u32 entries_printed = 0; bool printed = false; + struct rb_node *node; int i = 0; int ret = 0; - u32 entries_printed = 0; - - list_for_each_entry(chain, &self->val, list) { - if (!i++ && sort__first_dimension == SORT_SYM) - continue; - if (!printed) { - ret += callchain__fprintf_left_margin(fp, left_margin); - ret += fprintf(fp, "|\n"); - ret += callchain__fprintf_left_margin(fp, left_margin); - ret += fprintf(fp, "---"); - - left_margin += 3; - printed = true; - } else - ret += callchain__fprintf_left_margin(fp, left_margin); - - if (chain->ms.sym) - ret += fprintf(fp, " %s\n", chain->ms.sym->name); - else - ret += fprintf(fp, " %p\n", (void *)(long)chain->ip); + /* + * If have one single callchain root, don't bother printing + * its percentage (100 % in fractal mode and the same percentage + * than the hist in graph mode). This also avoid one level of column. + */ + node = rb_first(root); + if (node && !rb_next(node)) { + cnode = rb_entry(node, struct callchain_node, rb_node); + list_for_each_entry(chain, &cnode->val, list) { + /* + * If we sort by symbol, the first entry is the same than + * the symbol. No need to print it otherwise it appears as + * displayed twice. + */ + if (!i++ && sort__first_dimension == SORT_SYM) + continue; + if (!printed) { + ret += callchain__fprintf_left_margin(fp, left_margin); + ret += fprintf(fp, "|\n"); + ret += callchain__fprintf_left_margin(fp, left_margin); + ret += fprintf(fp, "---"); + left_margin += 3; + printed = true; + } else + ret += callchain__fprintf_left_margin(fp, left_margin); + + if (chain->ms.sym) + ret += fprintf(fp, " %s\n", chain->ms.sym->name); + else + ret += fprintf(fp, " %p\n", (void *)(long)chain->ip); - if (++entries_printed == callchain_param.print_limit) - break; + if (++entries_printed == callchain_param.print_limit) + break; + } + root = &cnode->rb_root; } - ret += __callchain__fprintf_graph(fp, self, total_samples, 1, 1, left_margin); + ret += __callchain__fprintf_graph(fp, root, total_samples, + 1, 1, left_margin); + ret += fprintf(fp, "\n"); return ret; } -static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self, - u64 total_samples) +static size_t __callchain__fprintf_flat(FILE *fp, + struct callchain_node *self, + u64 total_samples) { struct callchain_list *chain; size_t ret = 0; @@ -541,7 +764,7 @@ static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self, if (!self) return 0; - ret += callchain__fprintf_flat(fp, self->parent, total_samples); + ret += __callchain__fprintf_flat(fp, self->parent, total_samples); list_for_each_entry(chain, &self->val, list) { @@ -557,73 +780,103 @@ static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self, return ret; } -static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, - u64 total_samples, int left_margin) +static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *self, + u64 total_samples) { - struct rb_node *rb_node; - struct callchain_node *chain; size_t ret = 0; u32 entries_printed = 0; + struct rb_node *rb_node; + struct callchain_node *chain; - rb_node = rb_first(&self->sorted_chain); + rb_node = rb_first(self); while (rb_node) { double percent; chain = rb_entry(rb_node, struct callchain_node, rb_node); percent = chain->hit * 100.0 / total_samples; - switch (callchain_param.mode) { - case CHAIN_FLAT: - ret += percent_color_fprintf(fp, " %6.2f%%\n", - percent); - ret += callchain__fprintf_flat(fp, chain, total_samples); - break; - case CHAIN_GRAPH_ABS: /* Falldown */ - case CHAIN_GRAPH_REL: - ret += callchain__fprintf_graph(fp, chain, total_samples, - left_margin); - case CHAIN_NONE: - default: - break; - } + + ret = percent_color_fprintf(fp, " %6.2f%%\n", percent); + ret += __callchain__fprintf_flat(fp, chain, total_samples); ret += fprintf(fp, "\n"); if (++entries_printed == callchain_param.print_limit) break; + rb_node = rb_next(rb_node); } return ret; } -int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size, - struct hists *hists, struct hists *pair_hists, - bool show_displacement, long displacement, - bool color, u64 session_total) +static size_t hist_entry_callchain__fprintf(struct hist_entry *he, + u64 total_samples, int left_margin, + FILE *fp) +{ + switch (callchain_param.mode) { + case CHAIN_GRAPH_REL: + return callchain__fprintf_graph(fp, &he->sorted_chain, he->period, + left_margin); + break; + case CHAIN_GRAPH_ABS: + return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, + left_margin); + break; + case CHAIN_FLAT: + return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples); + break; + case CHAIN_NONE: + break; + default: + pr_err("Bad callchain mode\n"); + } + + return 0; +} + +void hists__output_recalc_col_len(struct hists *hists, int max_rows) +{ + struct rb_node *next = rb_first(&hists->entries); + struct hist_entry *n; + int row = 0; + + hists__reset_col_len(hists); + + while (next && row++ < max_rows) { + n = rb_entry(next, struct hist_entry, rb_node); + if (!n->filtered) + hists__calc_col_len(hists, n); + next = rb_next(&n->rb_node); + } +} + +static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s, + size_t size, struct hists *pair_hists, + bool show_displacement, long displacement, + bool color, u64 total_period) { - struct sort_entry *se; u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; u64 nr_events; const char *sep = symbol_conf.field_sep; int ret; - if (symbol_conf.exclude_other && !self->parent) + if (symbol_conf.exclude_other && !he->parent) return 0; if (pair_hists) { - period = self->pair ? self->pair->period : 0; - nr_events = self->pair ? self->pair->nr_events : 0; + period = he->pair ? he->pair->period : 0; + nr_events = he->pair ? he->pair->nr_events : 0; total = pair_hists->stats.total_period; - period_sys = self->pair ? self->pair->period_sys : 0; - period_us = self->pair ? self->pair->period_us : 0; - period_guest_sys = self->pair ? self->pair->period_guest_sys : 0; - period_guest_us = self->pair ? self->pair->period_guest_us : 0; + period_sys = he->pair ? he->pair->period_sys : 0; + period_us = he->pair ? he->pair->period_us : 0; + period_guest_sys = he->pair ? he->pair->period_guest_sys : 0; + period_guest_us = he->pair ? he->pair->period_guest_us : 0; } else { - period = self->period; - nr_events = self->nr_events; - total = session_total; - period_sys = self->period_sys; - period_us = self->period_us; - period_guest_sys = self->period_guest_sys; - period_guest_us = self->period_guest_us; + period = he->period; + nr_events = he->nr_events; + total = total_period; + period_sys = he->period_sys; + period_us = he->period_us; + period_guest_sys = he->period_guest_sys; + period_guest_us = he->period_guest_us; } if (total) { @@ -632,7 +885,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size, sep ? "%.2f" : " %6.2f%%", (period * 100.0) / total); else - ret = snprintf(s, size, sep ? "%.2f" : " %6.2f%%", + ret = scnprintf(s, size, sep ? "%.2f" : " %6.2f%%", (period * 100.0) / total); if (symbol_conf.show_cpu_utilization) { ret += percent_color_snprintf(s + ret, size - ret, @@ -655,13 +908,20 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size, } } } else - ret = snprintf(s, size, sep ? "%" PRIu64 : "%12" PRIu64 " ", period); + ret = scnprintf(s, size, sep ? "%" PRIu64 : "%12" PRIu64 " ", period); if (symbol_conf.show_nr_samples) { if (sep) - ret += snprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events); + ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events); + else + ret += scnprintf(s + ret, size - ret, "%11" PRIu64, nr_events); + } + + if (symbol_conf.show_total_period) { + if (sep) + ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period); else - ret += snprintf(s + ret, size - ret, "%11" PRIu64, nr_events); + ret += scnprintf(s + ret, size - ret, " %12" PRIu64, period); } if (pair_hists) { @@ -670,60 +930,77 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size, if (total > 0) old_percent = (period * 100.0) / total; - if (session_total > 0) - new_percent = (self->period * 100.0) / session_total; + if (total_period > 0) + new_percent = (he->period * 100.0) / total_period; diff = new_percent - old_percent; if (fabs(diff) >= 0.01) - snprintf(bf, sizeof(bf), "%+4.2F%%", diff); + scnprintf(bf, sizeof(bf), "%+4.2F%%", diff); else - snprintf(bf, sizeof(bf), " "); + scnprintf(bf, sizeof(bf), " "); if (sep) - ret += snprintf(s + ret, size - ret, "%c%s", *sep, bf); + ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); else - ret += snprintf(s + ret, size - ret, "%11.11s", bf); + ret += scnprintf(s + ret, size - ret, "%11.11s", bf); if (show_displacement) { if (displacement) - snprintf(bf, sizeof(bf), "%+4ld", displacement); + scnprintf(bf, sizeof(bf), "%+4ld", displacement); else - snprintf(bf, sizeof(bf), " "); + scnprintf(bf, sizeof(bf), " "); if (sep) - ret += snprintf(s + ret, size - ret, "%c%s", *sep, bf); + ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); else - ret += snprintf(s + ret, size - ret, "%6.6s", bf); + ret += scnprintf(s + ret, size - ret, "%6.6s", bf); } } + return ret; +} + +int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size, + struct hists *hists) +{ + const char *sep = symbol_conf.field_sep; + struct sort_entry *se; + int ret = 0; + list_for_each_entry(se, &hist_entry__sort_list, list) { if (se->elide) continue; - ret += snprintf(s + ret, size - ret, "%s", sep ?: " "); - ret += se->se_snprintf(self, s + ret, size - ret, + ret += scnprintf(s + ret, size - ret, "%s", sep ?: " "); + ret += se->se_snprintf(he, s + ret, size - ret, hists__col_len(hists, se->se_width_idx)); } return ret; } -int hist_entry__fprintf(struct hist_entry *self, struct hists *hists, - struct hists *pair_hists, bool show_displacement, - long displacement, FILE *fp, u64 session_total) +static int hist_entry__fprintf(struct hist_entry *he, size_t size, + struct hists *hists, struct hists *pair_hists, + bool show_displacement, long displacement, + u64 total_period, FILE *fp) { char bf[512]; - hist_entry__snprintf(self, bf, sizeof(bf), hists, pair_hists, - show_displacement, displacement, - true, session_total); + int ret; + + if (size == 0 || size > sizeof(bf)) + size = sizeof(bf); + + ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists, + show_displacement, displacement, + true, total_period); + hist_entry__snprintf(he, bf + ret, size - ret, hists); return fprintf(fp, "%s\n", bf); } -static size_t hist_entry__fprintf_callchain(struct hist_entry *self, - struct hists *hists, FILE *fp, - u64 session_total) +static size_t hist_entry__fprintf_callchain(struct hist_entry *he, + struct hists *hists, + u64 total_period, FILE *fp) { int left_margin = 0; @@ -731,35 +1008,33 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self, struct sort_entry *se = list_first_entry(&hist_entry__sort_list, typeof(*se), list); left_margin = hists__col_len(hists, se->se_width_idx); - left_margin -= thread__comm_len(self->thread); + left_margin -= thread__comm_len(he->thread); } - return hist_entry_callchain__fprintf(fp, self, session_total, - left_margin); + return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); } -size_t hists__fprintf(struct hists *self, struct hists *pair, - bool show_displacement, FILE *fp) +size_t hists__fprintf(struct hists *hists, struct hists *pair, + bool show_displacement, bool show_header, int max_rows, + int max_cols, FILE *fp) { struct sort_entry *se; struct rb_node *nd; size_t ret = 0; + u64 total_period; unsigned long position = 1; long displacement = 0; unsigned int width; const char *sep = symbol_conf.field_sep; const char *col_width = symbol_conf.col_width_list_str; + int nr_rows = 0; init_rem_hits(); - fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); + if (!show_header) + goto print_entries; - if (symbol_conf.show_nr_samples) { - if (sep) - fprintf(fp, "%cSamples", *sep); - else - fputs(" Samples ", fp); - } + fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); if (symbol_conf.show_cpu_utilization) { if (sep) { @@ -770,8 +1045,8 @@ size_t hists__fprintf(struct hists *self, struct hists *pair, ret += fprintf(fp, "%cguest us", *sep); } } else { - ret += fprintf(fp, " sys "); - ret += fprintf(fp, " us "); + ret += fprintf(fp, " sys "); + ret += fprintf(fp, " us "); if (perf_guest) { ret += fprintf(fp, " guest sys "); ret += fprintf(fp, " guest us "); @@ -779,6 +1054,20 @@ size_t hists__fprintf(struct hists *self, struct hists *pair, } } + if (symbol_conf.show_nr_samples) { + if (sep) + fprintf(fp, "%cSamples", *sep); + else + fputs(" Samples ", fp); + } + + if (symbol_conf.show_total_period) { + if (sep) + ret += fprintf(fp, "%cPeriod", *sep); + else + ret += fprintf(fp, " Period "); + } + if (pair) { if (sep) ret += fprintf(fp, "%cDelta", *sep); @@ -803,25 +1092,32 @@ size_t hists__fprintf(struct hists *self, struct hists *pair, width = strlen(se->se_header); if (symbol_conf.col_width_list_str) { if (col_width) { - hists__set_col_len(self, se->se_width_idx, + hists__set_col_len(hists, se->se_width_idx, atoi(col_width)); col_width = strchr(col_width, ','); if (col_width) ++col_width; } } - if (!hists__new_col_len(self, se->se_width_idx, width)) - width = hists__col_len(self, se->se_width_idx); + if (!hists__new_col_len(hists, se->se_width_idx, width)) + width = hists__col_len(hists, se->se_width_idx); fprintf(fp, " %*s", width, se->se_header); } + fprintf(fp, "\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; if (sep) goto print_entries; fprintf(fp, "# ........"); + if (symbol_conf.show_cpu_utilization) + fprintf(fp, " ....... ......."); if (symbol_conf.show_nr_samples) fprintf(fp, " .........."); + if (symbol_conf.show_total_period) + fprintf(fp, " ............"); if (pair) { fprintf(fp, " .........."); if (show_displacement) @@ -834,17 +1130,25 @@ size_t hists__fprintf(struct hists *self, struct hists *pair, continue; fprintf(fp, " "); - width = hists__col_len(self, se->se_width_idx); + width = hists__col_len(hists, se->se_width_idx); if (width == 0) width = strlen(se->se_header); for (i = 0; i < width; i++) fprintf(fp, "."); } - fprintf(fp, "\n#\n"); + fprintf(fp, "\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; + + fprintf(fp, "#\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; print_entries: - for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { + total_period = hists->stats.total_period; + + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); if (h->filtered) @@ -858,19 +1162,21 @@ print_entries: displacement = 0; ++position; } - ret += hist_entry__fprintf(h, self, pair, show_displacement, - displacement, fp, self->stats.total_period); + ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, + displacement, total_period, fp); if (symbol_conf.use_callchain) - ret += hist_entry__fprintf_callchain(h, self, fp, - self->stats.total_period); + ret += hist_entry__fprintf_callchain(h, hists, total_period, fp); + if (max_rows && ++nr_rows >= max_rows) + goto out; + if (h->ms.map == NULL && verbose > 1) { __map_groups__fprintf_maps(&h->thread->mg, MAP__FUNCTION, verbose, fp); fprintf(fp, "%.10s end\n", graph_dotted_line); } } - +out: free(rem_sq_bracket); return ret; @@ -879,7 +1185,7 @@ print_entries: /* * See hists__fprintf to match the column widths */ -unsigned int hists__sort_list_width(struct hists *self) +unsigned int hists__sort_list_width(struct hists *hists) { struct sort_entry *se; int ret = 9; /* total % */ @@ -896,9 +1202,12 @@ unsigned int hists__sort_list_width(struct hists *self) if (symbol_conf.show_nr_samples) ret += 11; + if (symbol_conf.show_total_period) + ret += 13; + list_for_each_entry(se, &hist_entry__sort_list, list) if (!se->elide) - ret += 2 + hists__col_len(self, se->se_width_idx); + ret += 2 + hists__col_len(hists, se->se_width_idx); if (verbose) /* Addr + origin */ ret += 3 + BITS_PER_LONG / 4; @@ -906,63 +1215,115 @@ unsigned int hists__sort_list_width(struct hists *self) return ret; } -static void hists__remove_entry_filter(struct hists *self, struct hist_entry *h, +static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, enum hist_filter filter) { h->filtered &= ~(1 << filter); if (h->filtered) return; - ++self->nr_entries; + ++hists->nr_entries; if (h->ms.unfolded) - self->nr_entries += h->nr_rows; + hists->nr_entries += h->nr_rows; h->row_offset = 0; - self->stats.total_period += h->period; - self->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events; + hists->stats.total_period += h->period; + hists->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events; - hists__calc_col_len(self, h); + hists__calc_col_len(hists, h); } -void hists__filter_by_dso(struct hists *self, const struct dso *dso) + +static bool hists__filter_entry_by_dso(struct hists *hists, + struct hist_entry *he) +{ + if (hists->dso_filter != NULL && + (he->ms.map == NULL || he->ms.map->dso != hists->dso_filter)) { + he->filtered |= (1 << HIST_FILTER__DSO); + return true; + } + + return false; +} + +void hists__filter_by_dso(struct hists *hists) { struct rb_node *nd; - self->nr_entries = self->stats.total_period = 0; - self->stats.nr_events[PERF_RECORD_SAMPLE] = 0; - hists__reset_col_len(self); + hists->nr_entries = hists->stats.total_period = 0; + hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0; + hists__reset_col_len(hists); - for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); if (symbol_conf.exclude_other && !h->parent) continue; - if (dso != NULL && (h->ms.map == NULL || h->ms.map->dso != dso)) { - h->filtered |= (1 << HIST_FILTER__DSO); + if (hists__filter_entry_by_dso(hists, h)) continue; - } - hists__remove_entry_filter(self, h, HIST_FILTER__DSO); + hists__remove_entry_filter(hists, h, HIST_FILTER__DSO); } } -void hists__filter_by_thread(struct hists *self, const struct thread *thread) +static bool hists__filter_entry_by_thread(struct hists *hists, + struct hist_entry *he) +{ + if (hists->thread_filter != NULL && + he->thread != hists->thread_filter) { + he->filtered |= (1 << HIST_FILTER__THREAD); + return true; + } + + return false; +} + +void hists__filter_by_thread(struct hists *hists) { struct rb_node *nd; - self->nr_entries = self->stats.total_period = 0; - self->stats.nr_events[PERF_RECORD_SAMPLE] = 0; - hists__reset_col_len(self); + hists->nr_entries = hists->stats.total_period = 0; + hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0; + hists__reset_col_len(hists); - for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - if (thread != NULL && h->thread != thread) { - h->filtered |= (1 << HIST_FILTER__THREAD); + if (hists__filter_entry_by_thread(hists, h)) + continue; + + hists__remove_entry_filter(hists, h, HIST_FILTER__THREAD); + } +} + +static bool hists__filter_entry_by_symbol(struct hists *hists, + struct hist_entry *he) +{ + if (hists->symbol_filter_str != NULL && + (!he->ms.sym || strstr(he->ms.sym->name, + hists->symbol_filter_str) == NULL)) { + he->filtered |= (1 << HIST_FILTER__SYMBOL); + return true; + } + + return false; +} + +void hists__filter_by_symbol(struct hists *hists) +{ + struct rb_node *nd; + + hists->nr_entries = hists->stats.total_period = 0; + hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0; + hists__reset_col_len(hists); + + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + if (hists__filter_entry_by_symbol(hists, h)) continue; - } - hists__remove_entry_filter(self, h, HIST_FILTER__THREAD); + hists__remove_entry_filter(hists, h, HIST_FILTER__SYMBOL); } } @@ -976,13 +1337,13 @@ int hist_entry__annotate(struct hist_entry *he, size_t privsize) return symbol__annotate(he->ms.sym, he->ms.map, privsize); } -void hists__inc_nr_events(struct hists *self, u32 type) +void hists__inc_nr_events(struct hists *hists, u32 type) { - ++self->stats.nr_events[0]; - ++self->stats.nr_events[type]; + ++hists->stats.nr_events[0]; + ++hists->stats.nr_events[type]; } -size_t hists__fprintf_nr_events(struct hists *self, FILE *fp) +size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) { int i; size_t ret = 0; @@ -990,7 +1351,7 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp) for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { const char *name; - if (self->stats.nr_events[i] == 0) + if (hists->stats.nr_events[i] == 0) continue; name = perf_event__name(i); @@ -998,7 +1359,7 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp) continue; ret += fprintf(fp, "%16s events: %10d\n", name, - self->stats.nr_events[i]); + hists->stats.nr_events[i]); } return ret; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 3beb97c4d822..0b096c27a419 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -2,6 +2,7 @@ #define __PERF_HIST_H #include <linux/types.h> +#include <pthread.h> #include "callchain.h" extern struct callchain_param callchain_param; @@ -27,9 +28,11 @@ struct events_stats { u64 total_lost; u64 total_invalid_chains; u32 nr_events[PERF_RECORD_HEADER_MAX]; + u32 nr_lost_warned; u32 nr_unknown_events; u32 nr_invalid_chains; u32 nr_unknown_id; + u32 nr_unprocessable_samples; }; enum hist_column { @@ -39,47 +42,72 @@ enum hist_column { HISTC_COMM, HISTC_PARENT, HISTC_CPU, + HISTC_MISPREDICT, + HISTC_SYMBOL_FROM, + HISTC_SYMBOL_TO, + HISTC_DSO_FROM, + HISTC_DSO_TO, + HISTC_SRCLINE, HISTC_NR_COLS, /* Last entry */ }; +struct thread; +struct dso; + struct hists { + struct rb_root entries_in_array[2]; + struct rb_root *entries_in; struct rb_root entries; + struct rb_root entries_collapsed; u64 nr_entries; + const struct thread *thread_filter; + const struct dso *dso_filter; + const char *uid_filter_str; + const char *symbol_filter_str; + pthread_mutex_t lock; struct events_stats stats; u64 event_stream; u16 col_len[HISTC_NR_COLS]; - /* Best would be to reuse the session callchain cursor */ - struct callchain_cursor callchain_cursor; }; struct hist_entry *__hists__add_entry(struct hists *self, struct addr_location *al, struct symbol *parent, u64 period); -extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); -extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); -int hist_entry__fprintf(struct hist_entry *self, struct hists *hists, - struct hists *pair_hists, bool show_displacement, - long displacement, FILE *fp, u64 total); +int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); +int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, - struct hists *hists, struct hists *pair_hists, - bool show_displacement, long displacement, - bool color, u64 total); + struct hists *hists); void hist_entry__free(struct hist_entry *); +struct hist_entry *__hists__add_branch_entry(struct hists *self, + struct addr_location *al, + struct symbol *sym_parent, + struct branch_info *bi, + u64 period); + void hists__output_resort(struct hists *self); +void hists__output_resort_threaded(struct hists *hists); void hists__collapse_resort(struct hists *self); +void hists__collapse_resort_threaded(struct hists *hists); + +void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); +void hists__decay_entries_threaded(struct hists *hists, bool zap_user, + bool zap_kernel); +void hists__output_recalc_col_len(struct hists *hists, int max_rows); void hists__inc_nr_events(struct hists *self, u32 type); size_t hists__fprintf_nr_events(struct hists *self, FILE *fp); size_t hists__fprintf(struct hists *self, struct hists *pair, - bool show_displacement, FILE *fp); + bool show_displacement, bool show_header, + int max_rows, int max_cols, FILE *fp); int hist_entry__inc_addr_samples(struct hist_entry *self, int evidx, u64 addr); int hist_entry__annotate(struct hist_entry *self, size_t privsize); -void hists__filter_by_dso(struct hists *self, const struct dso *dso); -void hists__filter_by_thread(struct hists *self, const struct thread *thread); +void hists__filter_by_dso(struct hists *hists); +void hists__filter_by_thread(struct hists *hists); +void hists__filter_by_symbol(struct hists *hists); u16 hists__col_len(struct hists *self, enum hist_column col); void hists__set_col_len(struct hists *self, enum hist_column col, u16 len); @@ -90,26 +118,49 @@ struct perf_evlist; #ifdef NO_NEWT_SUPPORT static inline int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __used, - const char *help __used) + const char *help __used, + void(*timer)(void *arg) __used, + void *arg __used, + int refresh __used) { return 0; } static inline int hist_entry__tui_annotate(struct hist_entry *self __used, - int evidx __used) + int evidx __used, + void(*timer)(void *arg) __used, + void *arg __used, + int delay_secs __used) { return 0; } -#define KEY_LEFT -1 -#define KEY_RIGHT -2 +#define K_LEFT -1 +#define K_RIGHT -2 #else -#include <newt.h> -int hist_entry__tui_annotate(struct hist_entry *self, int evidx); +#include "../ui/keysyms.h" +int hist_entry__tui_annotate(struct hist_entry *he, int evidx, + void(*timer)(void *arg), void *arg, int delay_secs); + +int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, + void(*timer)(void *arg), void *arg, + int refresh); +#endif -#define KEY_LEFT NEWT_KEY_LEFT -#define KEY_RIGHT NEWT_KEY_RIGHT +#ifdef NO_GTK2_SUPPORT +static inline +int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __used, + const char *help __used, + void(*timer)(void *arg) __used, + void *arg __used, + int refresh __used) +{ + return 0; +} -int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help); +#else +int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help, + void(*timer)(void *arg), void *arg, + int refresh); #endif unsigned int hists__sort_list_width(struct hists *self); diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h index bb4198e7837a..afe38199e922 100644 --- a/tools/perf/util/include/asm/dwarf2.h +++ b/tools/perf/util/include/asm/dwarf2.h @@ -2,10 +2,12 @@ #ifndef PERF_DWARF2_H #define PERF_DWARF2_H -/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ +/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */ #define CFI_STARTPROC #define CFI_ENDPROC +#define CFI_REMEMBER_STATE +#define CFI_RESTORE_STATE #endif /* PERF_DWARF2_H */ diff --git a/tools/perf/util/include/asm/unistd_32.h b/tools/perf/util/include/asm/unistd_32.h new file mode 100644 index 000000000000..8b137891791f --- /dev/null +++ b/tools/perf/util/include/asm/unistd_32.h @@ -0,0 +1 @@ + diff --git a/tools/perf/util/include/asm/unistd_64.h b/tools/perf/util/include/asm/unistd_64.h new file mode 100644 index 000000000000..8b137891791f --- /dev/null +++ b/tools/perf/util/include/asm/unistd_64.h @@ -0,0 +1 @@ + diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h index eda4416efa0a..bb162e40c76c 100644 --- a/tools/perf/util/include/linux/bitmap.h +++ b/tools/perf/util/include/linux/bitmap.h @@ -5,6 +5,8 @@ #include <linux/bitops.h> int __bitmap_weight(const unsigned long *bitmap, int bits); +void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); #define BITMAP_LAST_WORD_MASK(nbits) \ ( \ @@ -32,4 +34,13 @@ static inline int bitmap_weight(const unsigned long *src, int nbits) return __bitmap_weight(src, nbits); } +static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (small_const_nbits(nbits)) + *dst = *src1 | *src2; + else + __bitmap_or(dst, src1, src2, nbits); +} + #endif /* _PERF_BITOPS_H */ diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index 305c8484f200..587a230d2075 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h @@ -8,6 +8,19 @@ #define BITS_PER_LONG __WORDSIZE #define BITS_PER_BYTE 8 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) +#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) + +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +/* same as for_each_set_bit() but use bit as value to start with */ +#define for_each_set_bit_from(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) static inline void set_bit(int nr, unsigned long *addr) { @@ -30,4 +43,111 @@ static inline unsigned long hweight_long(unsigned long w) return sizeof(w) == 4 ? hweight32(w) : hweight64(w); } +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static __always_inline unsigned long __ffs(unsigned long word) +{ + int num = 0; + +#if BITS_PER_LONG == 64 + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } +#endif + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +/* + * Find the first set bit in a memory region. + */ +static inline unsigned long +find_first_bit(const unsigned long *addr, unsigned long size) +{ + const unsigned long *p = addr; + unsigned long result = 0; + unsigned long tmp; + + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + + tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found: + return result + __ffs(tmp); +} + +/* + * Find the next set bit in a memory region. + */ +static inline unsigned long +find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __ffs(tmp); +} + #endif diff --git a/tools/perf/util/include/linux/module.h b/tools/perf/util/include/linux/export.h index b43e2dc21e04..b43e2dc21e04 100644 --- a/tools/perf/util/include/linux/module.h +++ b/tools/perf/util/include/linux/export.h diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index 1eb804fd3fbf..b6842c1d02a8 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -108,4 +108,14 @@ int eprintf(int level, #define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__) #define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__) +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + #endif diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c new file mode 100644 index 000000000000..fd530dced9cb --- /dev/null +++ b/tools/perf/util/intlist.c @@ -0,0 +1,101 @@ +/* + * Based on intlist.c by: + * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com> + * + * Licensed under the GPLv2. + */ + +#include <errno.h> +#include <stdlib.h> +#include <linux/compiler.h> + +#include "intlist.h" + +static struct rb_node *intlist__node_new(struct rblist *rblist __used, + const void *entry) +{ + int i = (int)((long)entry); + struct rb_node *rc = NULL; + struct int_node *node = malloc(sizeof(*node)); + + if (node != NULL) { + node->i = i; + rc = &node->rb_node; + } + + return rc; +} + +static void int_node__delete(struct int_node *ilist) +{ + free(ilist); +} + +static void intlist__node_delete(struct rblist *rblist __used, + struct rb_node *rb_node) +{ + struct int_node *node = container_of(rb_node, struct int_node, rb_node); + + int_node__delete(node); +} + +static int intlist__node_cmp(struct rb_node *rb_node, const void *entry) +{ + int i = (int)((long)entry); + struct int_node *node = container_of(rb_node, struct int_node, rb_node); + + return node->i - i; +} + +int intlist__add(struct intlist *ilist, int i) +{ + return rblist__add_node(&ilist->rblist, (void *)((long)i)); +} + +void intlist__remove(struct intlist *ilist __used, struct int_node *node) +{ + int_node__delete(node); +} + +struct int_node *intlist__find(struct intlist *ilist, int i) +{ + struct int_node *node = NULL; + struct rb_node *rb_node = rblist__find(&ilist->rblist, (void *)((long)i)); + + if (rb_node) + node = container_of(rb_node, struct int_node, rb_node); + + return node; +} + +struct intlist *intlist__new(void) +{ + struct intlist *ilist = malloc(sizeof(*ilist)); + + if (ilist != NULL) { + rblist__init(&ilist->rblist); + ilist->rblist.node_cmp = intlist__node_cmp; + ilist->rblist.node_new = intlist__node_new; + ilist->rblist.node_delete = intlist__node_delete; + } + + return ilist; +} + +void intlist__delete(struct intlist *ilist) +{ + if (ilist != NULL) + rblist__delete(&ilist->rblist); +} + +struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx) +{ + struct int_node *node = NULL; + struct rb_node *rb_node; + + rb_node = rblist__entry(&ilist->rblist, idx); + if (rb_node) + node = container_of(rb_node, struct int_node, rb_node); + + return node; +} diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h new file mode 100644 index 000000000000..6d63ab90db50 --- /dev/null +++ b/tools/perf/util/intlist.h @@ -0,0 +1,75 @@ +#ifndef __PERF_INTLIST_H +#define __PERF_INTLIST_H + +#include <linux/rbtree.h> +#include <stdbool.h> + +#include "rblist.h" + +struct int_node { + struct rb_node rb_node; + int i; +}; + +struct intlist { + struct rblist rblist; +}; + +struct intlist *intlist__new(void); +void intlist__delete(struct intlist *ilist); + +void intlist__remove(struct intlist *ilist, struct int_node *in); +int intlist__add(struct intlist *ilist, int i); + +struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx); +struct int_node *intlist__find(struct intlist *ilist, int i); + +static inline bool intlist__has_entry(struct intlist *ilist, int i) +{ + return intlist__find(ilist, i) != NULL; +} + +static inline bool intlist__empty(const struct intlist *ilist) +{ + return rblist__empty(&ilist->rblist); +} + +static inline unsigned int intlist__nr_entries(const struct intlist *ilist) +{ + return rblist__nr_entries(&ilist->rblist); +} + +/* For intlist iteration */ +static inline struct int_node *intlist__first(struct intlist *ilist) +{ + struct rb_node *rn = rb_first(&ilist->rblist.entries); + return rn ? rb_entry(rn, struct int_node, rb_node) : NULL; +} +static inline struct int_node *intlist__next(struct int_node *in) +{ + struct rb_node *rn; + if (!in) + return NULL; + rn = rb_next(&in->rb_node); + return rn ? rb_entry(rn, struct int_node, rb_node) : NULL; +} + +/** + * intlist_for_each - iterate over a intlist + * @pos: the &struct int_node to use as a loop cursor. + * @ilist: the &struct intlist for loop. + */ +#define intlist__for_each(pos, ilist) \ + for (pos = intlist__first(ilist); pos; pos = intlist__next(pos)) + +/** + * intlist_for_each_safe - iterate over a intlist safe against removal of + * int_node + * @pos: the &struct int_node to use as a loop cursor. + * @n: another &struct int_node to use as temporary storage. + * @ilist: the &struct intlist for loop. + */ +#define intlist__for_each_safe(pos, n, ilist) \ + for (pos = intlist__first(ilist), n = intlist__next(pos); pos;\ + pos = n, n = intlist__next(n)) +#endif /* __PERF_INTLIST_H */ diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index a16ecab5229d..cc33486ad9e2 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -7,6 +7,8 @@ #include <stdio.h> #include <unistd.h> #include "map.h" +#include "thread.h" +#include "strlist.h" const char *map_type__name[MAP__NR_TYPES] = { [MAP__FUNCTION] = "Functions", @@ -18,6 +20,13 @@ static inline int is_anon_memory(const char *filename) return strcmp(filename, "//anon") == 0; } +static inline int is_no_dso_memory(const char *filename) +{ + return !strcmp(filename, "[stack]") || + !strcmp(filename, "[vdso]") || + !strcmp(filename, "[heap]"); +} + void map__init(struct map *self, enum map_type type, u64 start, u64 end, u64 pgoff, struct dso *dso) { @@ -31,6 +40,7 @@ void map__init(struct map *self, enum map_type type, RB_CLEAR_NODE(&self->rb_node); self->groups = NULL; self->referenced = false; + self->erange_warned = false; } struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, @@ -42,9 +52,10 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, if (self != NULL) { char newfilename[PATH_MAX]; struct dso *dso; - int anon; + int anon, no_dso; anon = is_anon_memory(filename); + no_dso = is_no_dso_memory(filename); if (anon) { snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", pid); @@ -57,12 +68,16 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, map__init(self, type, start, start + len, pgoff, dso); - if (anon) { -set_identity: + if (anon || no_dso) { self->map_ip = self->unmap_ip = identity__map_ip; - } else if (strcmp(filename, "[vdso]") == 0) { - dso__set_loaded(dso, self->type); - goto set_identity; + + /* + * Set memory without DSO as loaded. All map__find_* + * functions still return NULL, and we avoid the + * unnecessary map__load warning. + */ + if (no_dso) + dso__set_loaded(dso, self->type); } } return self; @@ -127,8 +142,8 @@ int map__load(struct map *self, symbol_filter_t filter) if (len > sizeof(DSO__DELETED) && strcmp(name + real_len + 1, DSO__DELETED) == 0) { - pr_warning("%.*s was updated, restart the long " - "running apps that use it!\n", + pr_warning("%.*s was updated (is prelink enabled?). " + "Restart the long running apps that use it!\n", (int)real_len, name); } else { pr_warning("no symbols found in %s, maybe install " @@ -200,6 +215,21 @@ size_t map__fprintf(struct map *self, FILE *fp) self->start, self->end, self->pgoff, self->dso->name); } +size_t map__fprintf_dsoname(struct map *map, FILE *fp) +{ + const char *dsoname; + + if (map && map->dso && (map->dso->name || map->dso->long_name)) { + if (symbol_conf.show_kernel_path && map->dso->long_name) + dsoname = map->dso->long_name; + else if (map->dso->name) + dsoname = map->dso->name; + } else + dsoname = "[unknown]"; + + return fprintf(fp, "%s", dsoname); +} + /* * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN. * map->dso->adjust_symbols==1 for ET_EXEC-like cases. @@ -220,55 +250,55 @@ u64 map__objdump_2ip(struct map *map, u64 addr) return ip; } -void map_groups__init(struct map_groups *self) +void map_groups__init(struct map_groups *mg) { int i; for (i = 0; i < MAP__NR_TYPES; ++i) { - self->maps[i] = RB_ROOT; - INIT_LIST_HEAD(&self->removed_maps[i]); + mg->maps[i] = RB_ROOT; + INIT_LIST_HEAD(&mg->removed_maps[i]); } - self->machine = NULL; + mg->machine = NULL; } -static void maps__delete(struct rb_root *self) +static void maps__delete(struct rb_root *maps) { - struct rb_node *next = rb_first(self); + struct rb_node *next = rb_first(maps); while (next) { struct map *pos = rb_entry(next, struct map, rb_node); next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, self); + rb_erase(&pos->rb_node, maps); map__delete(pos); } } -static void maps__delete_removed(struct list_head *self) +static void maps__delete_removed(struct list_head *maps) { struct map *pos, *n; - list_for_each_entry_safe(pos, n, self, node) { + list_for_each_entry_safe(pos, n, maps, node) { list_del(&pos->node); map__delete(pos); } } -void map_groups__exit(struct map_groups *self) +void map_groups__exit(struct map_groups *mg) { int i; for (i = 0; i < MAP__NR_TYPES; ++i) { - maps__delete(&self->maps[i]); - maps__delete_removed(&self->removed_maps[i]); + maps__delete(&mg->maps[i]); + maps__delete_removed(&mg->removed_maps[i]); } } -void map_groups__flush(struct map_groups *self) +void map_groups__flush(struct map_groups *mg) { int type; for (type = 0; type < MAP__NR_TYPES; type++) { - struct rb_root *root = &self->maps[type]; + struct rb_root *root = &mg->maps[type]; struct rb_node *next = rb_first(root); while (next) { @@ -280,17 +310,17 @@ void map_groups__flush(struct map_groups *self) * instance in some hist_entry instances, so * just move them to a separate list. */ - list_add_tail(&pos->node, &self->removed_maps[pos->type]); + list_add_tail(&pos->node, &mg->removed_maps[pos->type]); } } } -struct symbol *map_groups__find_symbol(struct map_groups *self, +struct symbol *map_groups__find_symbol(struct map_groups *mg, enum map_type type, u64 addr, struct map **mapp, symbol_filter_t filter) { - struct map *map = map_groups__find(self, type, addr); + struct map *map = map_groups__find(mg, type, addr); if (map != NULL) { if (mapp != NULL) @@ -301,7 +331,7 @@ struct symbol *map_groups__find_symbol(struct map_groups *self, return NULL; } -struct symbol *map_groups__find_symbol_by_name(struct map_groups *self, +struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, enum map_type type, const char *name, struct map **mapp, @@ -309,7 +339,7 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *self, { struct rb_node *nd; - for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) { + for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node); struct symbol *sym = map__find_symbol_by_name(pos, name, filter); @@ -323,13 +353,13 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *self, return NULL; } -size_t __map_groups__fprintf_maps(struct map_groups *self, +size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type, int verbose, FILE *fp) { size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); struct rb_node *nd; - for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) { + for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node); printed += fprintf(fp, "Map:"); printed += map__fprintf(pos, fp); @@ -342,22 +372,22 @@ size_t __map_groups__fprintf_maps(struct map_groups *self, return printed; } -size_t map_groups__fprintf_maps(struct map_groups *self, int verbose, FILE *fp) +size_t map_groups__fprintf_maps(struct map_groups *mg, int verbose, FILE *fp) { size_t printed = 0, i; for (i = 0; i < MAP__NR_TYPES; ++i) - printed += __map_groups__fprintf_maps(self, i, verbose, fp); + printed += __map_groups__fprintf_maps(mg, i, verbose, fp); return printed; } -static size_t __map_groups__fprintf_removed_maps(struct map_groups *self, +static size_t __map_groups__fprintf_removed_maps(struct map_groups *mg, enum map_type type, int verbose, FILE *fp) { struct map *pos; size_t printed = 0; - list_for_each_entry(pos, &self->removed_maps[type], node) { + list_for_each_entry(pos, &mg->removed_maps[type], node) { printed += fprintf(fp, "Map:"); printed += map__fprintf(pos, fp); if (verbose > 1) { @@ -368,26 +398,26 @@ static size_t __map_groups__fprintf_removed_maps(struct map_groups *self, return printed; } -static size_t map_groups__fprintf_removed_maps(struct map_groups *self, +static size_t map_groups__fprintf_removed_maps(struct map_groups *mg, int verbose, FILE *fp) { size_t printed = 0, i; for (i = 0; i < MAP__NR_TYPES; ++i) - printed += __map_groups__fprintf_removed_maps(self, i, verbose, fp); + printed += __map_groups__fprintf_removed_maps(mg, i, verbose, fp); return printed; } -size_t map_groups__fprintf(struct map_groups *self, int verbose, FILE *fp) +size_t map_groups__fprintf(struct map_groups *mg, int verbose, FILE *fp) { - size_t printed = map_groups__fprintf_maps(self, verbose, fp); + size_t printed = map_groups__fprintf_maps(mg, verbose, fp); printed += fprintf(fp, "Removed maps:\n"); - return printed + map_groups__fprintf_removed_maps(self, verbose, fp); + return printed + map_groups__fprintf_removed_maps(mg, verbose, fp); } -int map_groups__fixup_overlappings(struct map_groups *self, struct map *map, +int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, int verbose, FILE *fp) { - struct rb_root *root = &self->maps[map->type]; + struct rb_root *root = &mg->maps[map->type]; struct rb_node *next = rb_first(root); int err = 0; @@ -418,7 +448,7 @@ int map_groups__fixup_overlappings(struct map_groups *self, struct map *map, } before->end = map->start - 1; - map_groups__insert(self, before); + map_groups__insert(mg, before); if (verbose >= 2) map__fprintf(before, fp); } @@ -432,7 +462,7 @@ int map_groups__fixup_overlappings(struct map_groups *self, struct map *map, } after->start = map->end + 1; - map_groups__insert(self, after); + map_groups__insert(mg, after); if (verbose >= 2) map__fprintf(after, fp); } @@ -441,7 +471,7 @@ move_map: * If we have references, just move them to a separate list. */ if (pos->referenced) - list_add_tail(&pos->node, &self->removed_maps[map->type]); + list_add_tail(&pos->node, &mg->removed_maps[map->type]); else map__delete(pos); @@ -455,7 +485,7 @@ move_map: /* * XXX This should not really _copy_ te maps, but refcount them. */ -int map_groups__clone(struct map_groups *self, +int map_groups__clone(struct map_groups *mg, struct map_groups *parent, enum map_type type) { struct rb_node *nd; @@ -464,7 +494,7 @@ int map_groups__clone(struct map_groups *self, struct map *new = map__clone(map); if (new == NULL) return -ENOMEM; - map_groups__insert(self, new); + map_groups__insert(mg, new); } return 0; } @@ -550,10 +580,28 @@ int machine__init(struct machine *self, const char *root_dir, pid_t pid) INIT_LIST_HEAD(&self->user_dsos); INIT_LIST_HEAD(&self->kernel_dsos); + self->threads = RB_ROOT; + INIT_LIST_HEAD(&self->dead_threads); + self->last_match = NULL; + self->kmaps.machine = self; self->pid = pid; self->root_dir = strdup(root_dir); - return self->root_dir == NULL ? -ENOMEM : 0; + if (self->root_dir == NULL) + return -ENOMEM; + + if (pid != HOST_KERNEL_ID) { + struct thread *thread = machine__findnew_thread(self, pid); + char comm[64]; + + if (thread == NULL) + return -ENOMEM; + + snprintf(comm, sizeof(comm), "[guest/%d]", pid); + thread__set_comm(thread, comm); + } + + return 0; } static void dsos__delete(struct list_head *self) @@ -637,25 +685,34 @@ struct machine *machines__find(struct rb_root *self, pid_t pid) struct machine *machines__findnew(struct rb_root *self, pid_t pid) { char path[PATH_MAX]; - const char *root_dir; + const char *root_dir = ""; struct machine *machine = machines__find(self, pid); - if (!machine || machine->pid != pid) { - if (pid == HOST_KERNEL_ID || pid == DEFAULT_GUEST_KERNEL_ID) - root_dir = ""; - else { - if (!symbol_conf.guestmount) - goto out; - sprintf(path, "%s/%d", symbol_conf.guestmount, pid); - if (access(path, R_OK)) { + if (machine && (machine->pid == pid)) + goto out; + + if ((pid != HOST_KERNEL_ID) && + (pid != DEFAULT_GUEST_KERNEL_ID) && + (symbol_conf.guestmount)) { + sprintf(path, "%s/%d", symbol_conf.guestmount, pid); + if (access(path, R_OK)) { + static struct strlist *seen; + + if (!seen) + seen = strlist__new(true, NULL); + + if (!strlist__has_entry(seen, path)) { pr_err("Can't access file %s\n", path); - goto out; + strlist__add(seen, path); } - root_dir = path; + machine = NULL; + goto out; } - machine = machines__add(self, pid, root_dir); + root_dir = path; } + machine = machines__add(self, pid, root_dir); + out: return machine; } @@ -681,3 +738,16 @@ char *machine__mmap_name(struct machine *self, char *bf, size_t size) return bf; } + +void machines__set_id_hdr_size(struct rb_root *machines, u16 id_hdr_size) +{ + struct rb_node *node; + struct machine *machine; + + for (node = rb_first(machines); node; node = rb_next(node)) { + machine = rb_entry(node, struct machine, rb_node); + machine->id_hdr_size = id_hdr_size; + } + + return; +} diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index b397c0383728..03a1e9b08b21 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -18,9 +18,11 @@ enum map_type { extern const char *map_type__name[MAP__NR_TYPES]; struct dso; +struct ip_callchain; struct ref_reloc_sym; struct map_groups; struct machine; +struct perf_evsel; struct map { union { @@ -31,6 +33,7 @@ struct map { u64 end; u8 /* enum map_type */ type; bool referenced; + bool erange_warned; u32 priv; u64 pgoff; @@ -61,7 +64,11 @@ struct map_groups { struct machine { struct rb_node rb_node; pid_t pid; + u16 id_hdr_size; char *root_dir; + struct rb_root threads; + struct list_head dead_threads; + struct thread *last_match; struct list_head user_dsos; struct list_head kernel_dsos; struct map_groups kmaps; @@ -112,6 +119,7 @@ void map__delete(struct map *self); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *self, FILE *fp); +size_t map__fprintf_dsoname(struct map *map, FILE *fp); int map__load(struct map *self, symbol_filter_t filter); struct symbol *map__find_symbol(struct map *self, @@ -123,17 +131,17 @@ void map__fixup_end(struct map *self); void map__reloc_vmlinux(struct map *self); -size_t __map_groups__fprintf_maps(struct map_groups *self, +size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type, int verbose, FILE *fp); void maps__insert(struct rb_root *maps, struct map *map); -void maps__remove(struct rb_root *self, struct map *map); +void maps__remove(struct rb_root *maps, struct map *map); struct map *maps__find(struct rb_root *maps, u64 addr); -void map_groups__init(struct map_groups *self); -void map_groups__exit(struct map_groups *self); -int map_groups__clone(struct map_groups *self, +void map_groups__init(struct map_groups *mg); +void map_groups__exit(struct map_groups *mg); +int map_groups__clone(struct map_groups *mg, struct map_groups *parent, enum map_type type); -size_t map_groups__fprintf(struct map_groups *self, int verbose, FILE *fp); -size_t map_groups__fprintf_maps(struct map_groups *self, int verbose, FILE *fp); +size_t map_groups__fprintf(struct map_groups *mg, int verbose, FILE *fp); +size_t map_groups__fprintf_maps(struct map_groups *mg, int verbose, FILE *fp); typedef void (*machine__process_t)(struct machine *self, void *data); @@ -143,11 +151,19 @@ struct machine *machines__add(struct rb_root *self, pid_t pid, struct machine *machines__find_host(struct rb_root *self); struct machine *machines__find(struct rb_root *self, pid_t pid); struct machine *machines__findnew(struct rb_root *self, pid_t pid); +void machines__set_id_hdr_size(struct rb_root *self, u16 id_hdr_size); char *machine__mmap_name(struct machine *self, char *bf, size_t size); int machine__init(struct machine *self, const char *root_dir, pid_t pid); void machine__exit(struct machine *self); void machine__delete(struct machine *self); +int machine__resolve_callchain(struct machine *machine, + struct thread *thread, + struct ip_callchain *chain, + struct symbol **parent); +int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name, + u64 addr); + /* * Default guest kernel is defined by parameter --guestkallsyms * and --guestmodules @@ -162,34 +178,40 @@ static inline bool machine__is_host(struct machine *self) return self ? self->pid == HOST_KERNEL_ID : false; } -static inline void map_groups__insert(struct map_groups *self, struct map *map) +static inline void map_groups__insert(struct map_groups *mg, struct map *map) { - maps__insert(&self->maps[map->type], map); - map->groups = self; + maps__insert(&mg->maps[map->type], map); + map->groups = mg; } -static inline void map_groups__remove(struct map_groups *self, struct map *map) +static inline void map_groups__remove(struct map_groups *mg, struct map *map) { - maps__remove(&self->maps[map->type], map); + maps__remove(&mg->maps[map->type], map); } -static inline struct map *map_groups__find(struct map_groups *self, +static inline struct map *map_groups__find(struct map_groups *mg, enum map_type type, u64 addr) { - return maps__find(&self->maps[type], addr); + return maps__find(&mg->maps[type], addr); } -struct symbol *map_groups__find_symbol(struct map_groups *self, +struct symbol *map_groups__find_symbol(struct map_groups *mg, enum map_type type, u64 addr, struct map **mapp, symbol_filter_t filter); -struct symbol *map_groups__find_symbol_by_name(struct map_groups *self, +struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, enum map_type type, const char *name, struct map **mapp, symbol_filter_t filter); + +struct thread *machine__findnew_thread(struct machine *machine, pid_t pid); +void machine__remove_thread(struct machine *machine, struct thread *th); + +size_t machine__fprintf(struct machine *machine, FILE *fp); + static inline struct symbol *machine__find_kernel_symbol(struct machine *self, enum map_type type, u64 addr, @@ -208,11 +230,11 @@ struct symbol *machine__find_kernel_function(struct machine *self, u64 addr, } static inline -struct symbol *map_groups__find_function_by_name(struct map_groups *self, +struct symbol *map_groups__find_function_by_name(struct map_groups *mg, const char *name, struct map **mapp, symbol_filter_t filter) { - return map_groups__find_symbol_by_name(self, MAP__FUNCTION, name, mapp, filter); + return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp, filter); } static inline @@ -225,13 +247,13 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *self, filter); } -int map_groups__fixup_overlappings(struct map_groups *self, struct map *map, +int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, int verbose, FILE *fp); -struct map *map_groups__find_by_name(struct map_groups *self, +struct map *map_groups__find_by_name(struct map_groups *mg, enum map_type type, const char *name); struct map *machine__new_module(struct machine *self, u64 start, const char *filename); -void map_groups__flush(struct map_groups *self); +void map_groups__flush(struct map_groups *mg); #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index 1915de20dcac..3322b8446e89 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c @@ -57,6 +57,10 @@ void setup_pager(void) } if (!pager) pager = getenv("PAGER"); + if (!pager) { + if (!access("/usr/bin/pager", X_OK)) + pager = "/usr/bin/pager"; + } if (!pager) pager = "less"; else if (!*pager || !strcmp(pager, "cat")) diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c new file mode 100644 index 000000000000..127d648cc548 --- /dev/null +++ b/tools/perf/util/parse-events-test.c @@ -0,0 +1,776 @@ + +#include "parse-events.h" +#include "evsel.h" +#include "evlist.h" +#include "sysfs.h" +#include "../../../include/linux/hw_breakpoint.h" + +#define TEST_ASSERT_VAL(text, cond) \ +do { \ + if (!(cond)) { \ + pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \ + return -1; \ + } \ +} while (0) + +#define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ + PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) + +static int test__checkevent_tracepoint(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong sample_type", + PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type); + TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period); + return 0; +} + +static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1); + + list_for_each_entry(evsel, &evlist->entries, node) { + TEST_ASSERT_VAL("wrong type", + PERF_TYPE_TRACEPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong sample_type", + PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type); + TEST_ASSERT_VAL("wrong sample_period", + 1 == evsel->attr.sample_period); + } + return 0; +} + +static int test__checkevent_raw(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 0x1a == evsel->attr.config); + return 0; +} + +static int test__checkevent_numeric(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); + return 0; +} + +static int test__checkevent_symbolic_name(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + return 0; +} + +static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong period", + 100000 == evsel->attr.sample_period); + TEST_ASSERT_VAL("wrong config1", + 0 == evsel->attr.config1); + TEST_ASSERT_VAL("wrong config2", + 1 == evsel->attr.config2); + return 0; +} + +static int test__checkevent_symbolic_alias(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_SW_PAGE_FAULTS == evsel->attr.config); + return 0; +} + +static int test__checkevent_genhw(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", (1 << 16) == evsel->attr.config); + return 0; +} + +static int test__checkevent_breakpoint(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); + TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) == + evsel->attr.bp_type); + TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_4 == + evsel->attr.bp_len); + return 0; +} + +static int test__checkevent_breakpoint_x(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); + TEST_ASSERT_VAL("wrong bp_type", + HW_BREAKPOINT_X == evsel->attr.bp_type); + TEST_ASSERT_VAL("wrong bp_len", sizeof(long) == evsel->attr.bp_len); + return 0; +} + +static int test__checkevent_breakpoint_r(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", + PERF_TYPE_BREAKPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); + TEST_ASSERT_VAL("wrong bp_type", + HW_BREAKPOINT_R == evsel->attr.bp_type); + TEST_ASSERT_VAL("wrong bp_len", + HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len); + return 0; +} + +static int test__checkevent_breakpoint_w(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", + PERF_TYPE_BREAKPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); + TEST_ASSERT_VAL("wrong bp_type", + HW_BREAKPOINT_W == evsel->attr.bp_type); + TEST_ASSERT_VAL("wrong bp_len", + HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len); + return 0; +} + +static int test__checkevent_breakpoint_rw(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", + PERF_TYPE_BREAKPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); + TEST_ASSERT_VAL("wrong bp_type", + (HW_BREAKPOINT_R|HW_BREAKPOINT_W) == evsel->attr.bp_type); + TEST_ASSERT_VAL("wrong bp_len", + HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len); + return 0; +} + +static int test__checkevent_tracepoint_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + return test__checkevent_tracepoint(evlist); +} + +static int +test__checkevent_tracepoint_multi_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1); + + list_for_each_entry(evsel, &evlist->entries, node) { + TEST_ASSERT_VAL("wrong exclude_user", + !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", + evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + } + + return test__checkevent_tracepoint_multi(evlist); +} + +static int test__checkevent_raw_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + + return test__checkevent_raw(evlist); +} + +static int test__checkevent_numeric_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + + return test__checkevent_numeric(evlist); +} + +static int test__checkevent_symbolic_name_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + return test__checkevent_symbolic_name(evlist); +} + +static int test__checkevent_exclude_host_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + + return test__checkevent_symbolic_name(evlist); +} + +static int test__checkevent_exclude_guest_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + + return test__checkevent_symbolic_name(evlist); +} + +static int test__checkevent_symbolic_alias_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + return test__checkevent_symbolic_alias(evlist); +} + +static int test__checkevent_genhw_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + + return test__checkevent_genhw(evlist); +} + +static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong name", + !strcmp(perf_evsel__name(evsel), "mem:0x0:rw:u")); + + return test__checkevent_breakpoint(evlist); +} + +static int test__checkevent_breakpoint_x_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong name", + !strcmp(perf_evsel__name(evsel), "mem:0x0:x:k")); + + return test__checkevent_breakpoint_x(evlist); +} + +static int test__checkevent_breakpoint_r_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong name", + !strcmp(perf_evsel__name(evsel), "mem:0x0:r:hp")); + + return test__checkevent_breakpoint_r(evlist); +} + +static int test__checkevent_breakpoint_w_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong name", + !strcmp(perf_evsel__name(evsel), "mem:0x0:w:up")); + + return test__checkevent_breakpoint_w(evlist); +} + +static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong name", + !strcmp(perf_evsel__name(evsel), "mem:0x0:rw:kp")); + + return test__checkevent_breakpoint_rw(evlist); +} + +static int test__checkevent_pmu(struct perf_evlist *evlist) +{ + + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 10 == evsel->attr.config); + TEST_ASSERT_VAL("wrong config1", 1 == evsel->attr.config1); + TEST_ASSERT_VAL("wrong config2", 3 == evsel->attr.config2); + TEST_ASSERT_VAL("wrong period", 1000 == evsel->attr.sample_period); + + return 0; +} + +static int test__checkevent_list(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries); + + /* r1 */ + evsel = list_entry(evlist->entries.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); + TEST_ASSERT_VAL("wrong config1", 0 == evsel->attr.config1); + TEST_ASSERT_VAL("wrong config2", 0 == evsel->attr.config2); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + /* syscalls:sys_enter_open:k */ + evsel = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong sample_type", + PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type); + TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + /* 1:1:hp */ + evsel = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + + return 0; +} + +static int test__checkevent_pmu_name(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + /* cpu/config=1,name=krava/u */ + evsel = list_entry(evlist->entries.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); + TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava")); + + /* cpu/config=2/u" */ + evsel = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config); + TEST_ASSERT_VAL("wrong name", + !strcmp(perf_evsel__name(evsel), "raw 0x2:u")); + + return 0; +} + +static int test__checkterms_simple(struct list_head *terms) +{ + struct parse_events__term *term; + + /* config=10 */ + term = list_entry(terms->next, struct parse_events__term, list); + TEST_ASSERT_VAL("wrong type term", + term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG); + TEST_ASSERT_VAL("wrong type val", + term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + TEST_ASSERT_VAL("wrong val", term->val.num == 10); + TEST_ASSERT_VAL("wrong config", !term->config); + + /* config1 */ + term = list_entry(term->list.next, struct parse_events__term, list); + TEST_ASSERT_VAL("wrong type term", + term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG1); + TEST_ASSERT_VAL("wrong type val", + term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + TEST_ASSERT_VAL("wrong val", term->val.num == 1); + TEST_ASSERT_VAL("wrong config", !term->config); + + /* config2=3 */ + term = list_entry(term->list.next, struct parse_events__term, list); + TEST_ASSERT_VAL("wrong type term", + term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG2); + TEST_ASSERT_VAL("wrong type val", + term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + TEST_ASSERT_VAL("wrong val", term->val.num == 3); + TEST_ASSERT_VAL("wrong config", !term->config); + + /* umask=1*/ + term = list_entry(term->list.next, struct parse_events__term, list); + TEST_ASSERT_VAL("wrong type term", + term->type_term == PARSE_EVENTS__TERM_TYPE_USER); + TEST_ASSERT_VAL("wrong type val", + term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + TEST_ASSERT_VAL("wrong val", term->val.num == 1); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "umask")); + + return 0; +} + +struct test__event_st { + const char *name; + __u32 type; + int (*check)(struct perf_evlist *evlist); +}; + +static struct test__event_st test__events[] = { + [0] = { + .name = "syscalls:sys_enter_open", + .check = test__checkevent_tracepoint, + }, + [1] = { + .name = "syscalls:*", + .check = test__checkevent_tracepoint_multi, + }, + [2] = { + .name = "r1a", + .check = test__checkevent_raw, + }, + [3] = { + .name = "1:1", + .check = test__checkevent_numeric, + }, + [4] = { + .name = "instructions", + .check = test__checkevent_symbolic_name, + }, + [5] = { + .name = "cycles/period=100000,config2/", + .check = test__checkevent_symbolic_name_config, + }, + [6] = { + .name = "faults", + .check = test__checkevent_symbolic_alias, + }, + [7] = { + .name = "L1-dcache-load-miss", + .check = test__checkevent_genhw, + }, + [8] = { + .name = "mem:0", + .check = test__checkevent_breakpoint, + }, + [9] = { + .name = "mem:0:x", + .check = test__checkevent_breakpoint_x, + }, + [10] = { + .name = "mem:0:r", + .check = test__checkevent_breakpoint_r, + }, + [11] = { + .name = "mem:0:w", + .check = test__checkevent_breakpoint_w, + }, + [12] = { + .name = "syscalls:sys_enter_open:k", + .check = test__checkevent_tracepoint_modifier, + }, + [13] = { + .name = "syscalls:*:u", + .check = test__checkevent_tracepoint_multi_modifier, + }, + [14] = { + .name = "r1a:kp", + .check = test__checkevent_raw_modifier, + }, + [15] = { + .name = "1:1:hp", + .check = test__checkevent_numeric_modifier, + }, + [16] = { + .name = "instructions:h", + .check = test__checkevent_symbolic_name_modifier, + }, + [17] = { + .name = "faults:u", + .check = test__checkevent_symbolic_alias_modifier, + }, + [18] = { + .name = "L1-dcache-load-miss:kp", + .check = test__checkevent_genhw_modifier, + }, + [19] = { + .name = "mem:0:u", + .check = test__checkevent_breakpoint_modifier, + }, + [20] = { + .name = "mem:0:x:k", + .check = test__checkevent_breakpoint_x_modifier, + }, + [21] = { + .name = "mem:0:r:hp", + .check = test__checkevent_breakpoint_r_modifier, + }, + [22] = { + .name = "mem:0:w:up", + .check = test__checkevent_breakpoint_w_modifier, + }, + [23] = { + .name = "r1,syscalls:sys_enter_open:k,1:1:hp", + .check = test__checkevent_list, + }, + [24] = { + .name = "instructions:G", + .check = test__checkevent_exclude_host_modifier, + }, + [25] = { + .name = "instructions:H", + .check = test__checkevent_exclude_guest_modifier, + }, + [26] = { + .name = "mem:0:rw", + .check = test__checkevent_breakpoint_rw, + }, + [27] = { + .name = "mem:0:rw:kp", + .check = test__checkevent_breakpoint_rw_modifier, + }, +}; + +static struct test__event_st test__events_pmu[] = { + [0] = { + .name = "cpu/config=10,config1,config2=3,period=1000/u", + .check = test__checkevent_pmu, + }, + [1] = { + .name = "cpu/config=1,name=krava/u,cpu/config=2/u", + .check = test__checkevent_pmu_name, + }, +}; + +struct test__term { + const char *str; + __u32 type; + int (*check)(struct list_head *terms); +}; + +static struct test__term test__terms[] = { + [0] = { + .str = "config=10,config1,config2=3,umask=1", + .check = test__checkterms_simple, + }, +}; + +#define TEST__TERMS_CNT (sizeof(test__terms) / \ + sizeof(struct test__term)) + +static int test_event(struct test__event_st *e) +{ + struct perf_evlist *evlist; + int ret; + + evlist = perf_evlist__new(NULL, NULL); + if (evlist == NULL) + return -ENOMEM; + + ret = parse_events(evlist, e->name, 0); + if (ret) { + pr_debug("failed to parse event '%s', err %d\n", + e->name, ret); + return ret; + } + + ret = e->check(evlist); + perf_evlist__delete(evlist); + + return ret; +} + +static int test_events(struct test__event_st *events, unsigned cnt) +{ + int ret = 0; + unsigned i; + + for (i = 0; i < cnt; i++) { + struct test__event_st *e = &events[i]; + + pr_debug("running test %d '%s'\n", i, e->name); + ret = test_event(e); + if (ret) + break; + } + + return ret; +} + +static int test_term(struct test__term *t) +{ + struct list_head *terms; + int ret; + + terms = malloc(sizeof(*terms)); + if (!terms) + return -ENOMEM; + + INIT_LIST_HEAD(terms); + + ret = parse_events_terms(terms, t->str); + if (ret) { + pr_debug("failed to parse terms '%s', err %d\n", + t->str , ret); + return ret; + } + + ret = t->check(terms); + parse_events__free_terms(terms); + + return ret; +} + +static int test_terms(struct test__term *terms, unsigned cnt) +{ + int ret = 0; + unsigned i; + + for (i = 0; i < cnt; i++) { + struct test__term *t = &terms[i]; + + pr_debug("running test %d '%s'\n", i, t->str); + ret = test_term(t); + if (ret) + break; + } + + return ret; +} + +static int test_pmu(void) +{ + struct stat st; + char path[PATH_MAX]; + int ret; + + snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/format/", + sysfs_find_mountpoint()); + + ret = stat(path, &st); + if (ret) + pr_debug("ommiting PMU cpu tests\n"); + return !ret; +} + +int parse_events__test(void) +{ + int ret; + +#define TEST_EVENTS(tests) \ +do { \ + ret = test_events(tests, ARRAY_SIZE(tests)); \ + if (ret) \ + return ret; \ +} while (0) + + TEST_EVENTS(test__events); + + if (test_pmu()) + TEST_EVENTS(test__events_pmu); + + return test_terms(test__terms, ARRAY_SIZE(test__terms)); +} diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 928918b796b2..74a5af4d33ec 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -11,45 +11,103 @@ #include "cache.h" #include "header.h" #include "debugfs.h" +#include "parse-events-bison.h" +#define YY_EXTRA_TYPE int +#include "parse-events-flex.h" +#include "pmu.h" + +#define MAX_NAME_LEN 100 struct event_symbol { - u8 type; - u64 config; const char *symbol; const char *alias; }; -enum event_result { - EVT_FAILED, - EVT_HANDLED, - EVT_HANDLED_ALL +#ifdef PARSER_DEBUG +extern int parse_events_debug; +#endif +int parse_events_parse(void *data, void *scanner); + +static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = { + .symbol = "cpu-cycles", + .alias = "cycles", + }, + [PERF_COUNT_HW_INSTRUCTIONS] = { + .symbol = "instructions", + .alias = "", + }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { + .symbol = "cache-references", + .alias = "", + }, + [PERF_COUNT_HW_CACHE_MISSES] = { + .symbol = "cache-misses", + .alias = "", + }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { + .symbol = "branch-instructions", + .alias = "branches", + }, + [PERF_COUNT_HW_BRANCH_MISSES] = { + .symbol = "branch-misses", + .alias = "", + }, + [PERF_COUNT_HW_BUS_CYCLES] = { + .symbol = "bus-cycles", + .alias = "", + }, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = { + .symbol = "stalled-cycles-frontend", + .alias = "idle-cycles-frontend", + }, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = { + .symbol = "stalled-cycles-backend", + .alias = "idle-cycles-backend", + }, + [PERF_COUNT_HW_REF_CPU_CYCLES] = { + .symbol = "ref-cycles", + .alias = "", + }, }; -char debugfs_path[MAXPATHLEN]; - -#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x -#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x - -static struct event_symbol event_symbols[] = { - { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, - { CHW(STALLED_CYCLES_FRONTEND), "stalled-cycles-frontend", "idle-cycles-frontend" }, - { CHW(STALLED_CYCLES_BACKEND), "stalled-cycles-backend", "idle-cycles-backend" }, - { CHW(INSTRUCTIONS), "instructions", "" }, - { CHW(CACHE_REFERENCES), "cache-references", "" }, - { CHW(CACHE_MISSES), "cache-misses", "" }, - { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, - { CHW(BRANCH_MISSES), "branch-misses", "" }, - { CHW(BUS_CYCLES), "bus-cycles", "" }, - - { CSW(CPU_CLOCK), "cpu-clock", "" }, - { CSW(TASK_CLOCK), "task-clock", "" }, - { CSW(PAGE_FAULTS), "page-faults", "faults" }, - { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, - { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, - { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, - { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, - { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" }, - { CSW(EMULATION_FAULTS), "emulation-faults", "" }, +static struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { + [PERF_COUNT_SW_CPU_CLOCK] = { + .symbol = "cpu-clock", + .alias = "", + }, + [PERF_COUNT_SW_TASK_CLOCK] = { + .symbol = "task-clock", + .alias = "", + }, + [PERF_COUNT_SW_PAGE_FAULTS] = { + .symbol = "page-faults", + .alias = "faults", + }, + [PERF_COUNT_SW_CONTEXT_SWITCHES] = { + .symbol = "context-switches", + .alias = "cs", + }, + [PERF_COUNT_SW_CPU_MIGRATIONS] = { + .symbol = "cpu-migrations", + .alias = "migrations", + }, + [PERF_COUNT_SW_PAGE_FAULTS_MIN] = { + .symbol = "minor-faults", + .alias = "", + }, + [PERF_COUNT_SW_PAGE_FAULTS_MAJ] = { + .symbol = "major-faults", + .alias = "", + }, + [PERF_COUNT_SW_ALIGNMENT_FAULTS] = { + .symbol = "alignment-faults", + .alias = "", + }, + [PERF_COUNT_SW_EMULATION_FAULTS] = { + .symbol = "emulation-faults", + .alias = "", + }, }; #define __PERF_EVENT_FIELD(config, name) \ @@ -60,75 +118,6 @@ static struct event_symbol event_symbols[] = { #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) -static const char *hw_event_names[PERF_COUNT_HW_MAX] = { - "cycles", - "instructions", - "cache-references", - "cache-misses", - "branches", - "branch-misses", - "bus-cycles", - "stalled-cycles-frontend", - "stalled-cycles-backend", -}; - -static const char *sw_event_names[PERF_COUNT_SW_MAX] = { - "cpu-clock", - "task-clock", - "page-faults", - "context-switches", - "CPU-migrations", - "minor-faults", - "major-faults", - "alignment-faults", - "emulation-faults", -}; - -#define MAX_ALIASES 8 - -static const char *hw_cache[PERF_COUNT_HW_CACHE_MAX][MAX_ALIASES] = { - { "L1-dcache", "l1-d", "l1d", "L1-data", }, - { "L1-icache", "l1-i", "l1i", "L1-instruction", }, - { "LLC", "L2", }, - { "dTLB", "d-tlb", "Data-TLB", }, - { "iTLB", "i-tlb", "Instruction-TLB", }, - { "branch", "branches", "bpu", "btb", "bpc", }, - { "node", }, -}; - -static const char *hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][MAX_ALIASES] = { - { "load", "loads", "read", }, - { "store", "stores", "write", }, - { "prefetch", "prefetches", "speculative-read", "speculative-load", }, -}; - -static const char *hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] - [MAX_ALIASES] = { - { "refs", "Reference", "ops", "access", }, - { "misses", "miss", }, -}; - -#define C(x) PERF_COUNT_HW_CACHE_##x -#define CACHE_READ (1 << C(OP_READ)) -#define CACHE_WRITE (1 << C(OP_WRITE)) -#define CACHE_PREFETCH (1 << C(OP_PREFETCH)) -#define COP(x) (1 << x) - -/* - * cache operartion stat - * L1I : Read and prefetch only - * ITLB and BPU : Read-only - */ -static unsigned long hw_cache_stat[C(MAX)] = { - [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), - [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), - [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), - [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), - [C(ITLB)] = (CACHE_READ), - [C(BPU)] = (CACHE_READ), - [C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), -}; - #define for_each_subsystem(sys_dir, sys_dirent, sys_next) \ while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next) \ if (sys_dirent.d_type == DT_DIR && \ @@ -140,7 +129,7 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) char evt_path[MAXPATHLEN]; int fd; - snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, + snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path, sys_dir->d_name, evt_dir->d_name); fd = open(evt_path, O_RDONLY); if (fd < 0) @@ -165,22 +154,22 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) struct tracepoint_path *path = NULL; DIR *sys_dir, *evt_dir; struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; - char id_buf[4]; + char id_buf[24]; int fd; u64 id; char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(debugfs_path)) + if (debugfs_valid_mountpoint(tracing_events_path)) return NULL; - sys_dir = opendir(debugfs_path); + sys_dir = opendir(tracing_events_path); if (!sys_dir) return NULL; for_each_subsystem(sys_dir, sys_dirent, sys_next) { - snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path, + snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_dirent.d_name); evt_dir = opendir(dir_path); if (!evt_dir) @@ -228,48 +217,6 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) return NULL; } -#define TP_PATH_LEN (MAX_EVENT_LENGTH * 2 + 1) -static const char *tracepoint_id_to_name(u64 config) -{ - static char buf[TP_PATH_LEN]; - struct tracepoint_path *path; - - path = tracepoint_id_to_path(config); - if (path) { - snprintf(buf, TP_PATH_LEN, "%s:%s", path->system, path->name); - free(path->name); - free(path->system); - free(path); - } else - snprintf(buf, TP_PATH_LEN, "%s:%s", "unknown", "unknown"); - - return buf; -} - -static int is_cache_op_valid(u8 cache_type, u8 cache_op) -{ - if (hw_cache_stat[cache_type] & COP(cache_op)) - return 1; /* valid */ - else - return 0; /* invalid */ -} - -static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result) -{ - static char name[50]; - - if (cache_result) { - sprintf(name, "%s-%s-%s", hw_cache[cache_type][0], - hw_cache_op[cache_op][0], - hw_cache_result[cache_result][0]); - } else { - sprintf(name, "%s-%s", hw_cache[cache_type][0], - hw_cache_op[cache_op][1]); - } - - return name; -} - const char *event_type(int type) { switch (type) { @@ -292,128 +239,93 @@ const char *event_type(int type) return "unknown"; } -const char *event_name(struct perf_evsel *evsel) +static int add_event(struct list_head **_list, int *idx, + struct perf_event_attr *attr, char *name) { - u64 config = evsel->attr.config; - int type = evsel->attr.type; - - if (evsel->name) - return evsel->name; - - return __event_name(type, config); -} - -const char *__event_name(int type, u64 config) -{ - static char buf[32]; - - if (type == PERF_TYPE_RAW) { - sprintf(buf, "raw 0x%" PRIx64, config); - return buf; + struct perf_evsel *evsel; + struct list_head *list = *_list; + + if (!list) { + list = malloc(sizeof(*list)); + if (!list) + return -ENOMEM; + INIT_LIST_HEAD(list); } - switch (type) { - case PERF_TYPE_HARDWARE: - if (config < PERF_COUNT_HW_MAX && hw_event_names[config]) - return hw_event_names[config]; - return "unknown-hardware"; - - case PERF_TYPE_HW_CACHE: { - u8 cache_type, cache_op, cache_result; + event_attr_init(attr); - cache_type = (config >> 0) & 0xff; - if (cache_type > PERF_COUNT_HW_CACHE_MAX) - return "unknown-ext-hardware-cache-type"; - - cache_op = (config >> 8) & 0xff; - if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX) - return "unknown-ext-hardware-cache-op"; - - cache_result = (config >> 16) & 0xff; - if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX) - return "unknown-ext-hardware-cache-result"; - - if (!is_cache_op_valid(cache_type, cache_op)) - return "invalid-cache"; - - return event_cache_name(cache_type, cache_op, cache_result); + evsel = perf_evsel__new(attr, (*idx)++); + if (!evsel) { + free(list); + return -ENOMEM; } - case PERF_TYPE_SOFTWARE: - if (config < PERF_COUNT_SW_MAX && sw_event_names[config]) - return sw_event_names[config]; - return "unknown-software"; - - case PERF_TYPE_TRACEPOINT: - return tracepoint_id_to_name(config); - - default: - break; - } - - return "unknown"; + if (name) + evsel->name = strdup(name); + list_add_tail(&evsel->node, list); + *_list = list; + return 0; } -static int parse_aliases(const char **str, const char *names[][MAX_ALIASES], int size) +static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) { int i, j; int n, longest = -1; for (i = 0; i < size; i++) { - for (j = 0; j < MAX_ALIASES && names[i][j]; j++) { + for (j = 0; j < PERF_EVSEL__MAX_ALIASES && names[i][j]; j++) { n = strlen(names[i][j]); - if (n > longest && !strncasecmp(*str, names[i][j], n)) + if (n > longest && !strncasecmp(str, names[i][j], n)) longest = n; } - if (longest > 0) { - *str += longest; + if (longest > 0) return i; - } } return -1; } -static enum event_result -parse_generic_hw_event(const char **str, struct perf_event_attr *attr) +int parse_events_add_cache(struct list_head **list, int *idx, + char *type, char *op_result1, char *op_result2) { - const char *s = *str; + struct perf_event_attr attr; + char name[MAX_NAME_LEN]; int cache_type = -1, cache_op = -1, cache_result = -1; + char *op_result[2] = { op_result1, op_result2 }; + int i, n; - cache_type = parse_aliases(&s, hw_cache, PERF_COUNT_HW_CACHE_MAX); /* * No fallback - if we cannot get a clear cache type * then bail out: */ + cache_type = parse_aliases(type, perf_evsel__hw_cache, + PERF_COUNT_HW_CACHE_MAX); if (cache_type == -1) - return EVT_FAILED; + return -EINVAL; + + n = snprintf(name, MAX_NAME_LEN, "%s", type); + + for (i = 0; (i < 2) && (op_result[i]); i++) { + char *str = op_result[i]; - while ((cache_op == -1 || cache_result == -1) && *s == '-') { - ++s; + snprintf(name + n, MAX_NAME_LEN - n, "-%s\n", str); if (cache_op == -1) { - cache_op = parse_aliases(&s, hw_cache_op, - PERF_COUNT_HW_CACHE_OP_MAX); + cache_op = parse_aliases(str, perf_evsel__hw_cache_op, + PERF_COUNT_HW_CACHE_OP_MAX); if (cache_op >= 0) { - if (!is_cache_op_valid(cache_type, cache_op)) - return EVT_FAILED; + if (!perf_evsel__is_cache_op_valid(cache_type, cache_op)) + return -EINVAL; continue; } } if (cache_result == -1) { - cache_result = parse_aliases(&s, hw_cache_result, - PERF_COUNT_HW_CACHE_RESULT_MAX); + cache_result = parse_aliases(str, perf_evsel__hw_cache_result, + PERF_COUNT_HW_CACHE_RESULT_MAX); if (cache_result >= 0) continue; } - - /* - * Can't parse this as a cache op or result, so back up - * to the '-'. - */ - --s; - break; } /* @@ -428,324 +340,297 @@ parse_generic_hw_event(const char **str, struct perf_event_attr *attr) if (cache_result == -1) cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS; - attr->config = cache_type | (cache_op << 8) | (cache_result << 16); - attr->type = PERF_TYPE_HW_CACHE; - - *str = s; - return EVT_HANDLED; + memset(&attr, 0, sizeof(attr)); + attr.config = cache_type | (cache_op << 8) | (cache_result << 16); + attr.type = PERF_TYPE_HW_CACHE; + return add_event(list, idx, &attr, name); } -static enum event_result -parse_single_tracepoint_event(char *sys_name, - const char *evt_name, - unsigned int evt_length, - struct perf_event_attr *attr, - const char **strp) +static int add_tracepoint(struct list_head **list, int *idx, + char *sys_name, char *evt_name) { + struct perf_event_attr attr; + char name[MAX_NAME_LEN]; char evt_path[MAXPATHLEN]; char id_buf[4]; u64 id; int fd; - snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, + snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path, sys_name, evt_name); fd = open(evt_path, O_RDONLY); if (fd < 0) - return EVT_FAILED; + return -1; if (read(fd, id_buf, sizeof(id_buf)) < 0) { close(fd); - return EVT_FAILED; + return -1; } close(fd); id = atoll(id_buf); - attr->config = id; - attr->type = PERF_TYPE_TRACEPOINT; - *strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */ - - attr->sample_type |= PERF_SAMPLE_RAW; - attr->sample_type |= PERF_SAMPLE_TIME; - attr->sample_type |= PERF_SAMPLE_CPU; - - attr->sample_period = 1; - - return EVT_HANDLED; + memset(&attr, 0, sizeof(attr)); + attr.config = id; + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type |= PERF_SAMPLE_RAW; + attr.sample_type |= PERF_SAMPLE_TIME; + attr.sample_type |= PERF_SAMPLE_CPU; + attr.sample_type |= PERF_SAMPLE_PERIOD; + attr.sample_period = 1; + + snprintf(name, MAX_NAME_LEN, "%s:%s", sys_name, evt_name); + return add_event(list, idx, &attr, name); } -/* sys + ':' + event + ':' + flags*/ -#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128) -static enum event_result -parse_multiple_tracepoint_event(struct perf_evlist *evlist, char *sys_name, - const char *evt_exp, char *flags) +static int add_tracepoint_multi(struct list_head **list, int *idx, + char *sys_name, char *evt_name) { char evt_path[MAXPATHLEN]; struct dirent *evt_ent; DIR *evt_dir; + int ret = 0; - snprintf(evt_path, MAXPATHLEN, "%s/%s", debugfs_path, sys_name); + snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name); evt_dir = opendir(evt_path); - if (!evt_dir) { perror("Can't open event dir"); - return EVT_FAILED; + return -1; } - while ((evt_ent = readdir(evt_dir))) { - char event_opt[MAX_EVOPT_LEN + 1]; - int len; - + while (!ret && (evt_ent = readdir(evt_dir))) { if (!strcmp(evt_ent->d_name, ".") || !strcmp(evt_ent->d_name, "..") || !strcmp(evt_ent->d_name, "enable") || !strcmp(evt_ent->d_name, "filter")) continue; - if (!strglobmatch(evt_ent->d_name, evt_exp)) + if (!strglobmatch(evt_ent->d_name, evt_name)) continue; - len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s%s%s", sys_name, - evt_ent->d_name, flags ? ":" : "", - flags ?: ""); - if (len < 0) - return EVT_FAILED; - - if (parse_events(evlist, event_opt, 0)) - return EVT_FAILED; + ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name); } - return EVT_HANDLED_ALL; + return ret; } -static enum event_result -parse_tracepoint_event(struct perf_evlist *evlist, const char **strp, - struct perf_event_attr *attr) +int parse_events_add_tracepoint(struct list_head **list, int *idx, + char *sys, char *event) { - const char *evt_name; - char *flags = NULL, *comma_loc; - char sys_name[MAX_EVENT_LENGTH]; - unsigned int sys_length, evt_length; - - if (debugfs_valid_mountpoint(debugfs_path)) - return 0; + int ret; - evt_name = strchr(*strp, ':'); - if (!evt_name) - return EVT_FAILED; + ret = debugfs_valid_mountpoint(tracing_events_path); + if (ret) + return ret; - sys_length = evt_name - *strp; - if (sys_length >= MAX_EVENT_LENGTH) - return 0; - - strncpy(sys_name, *strp, sys_length); - sys_name[sys_length] = '\0'; - evt_name = evt_name + 1; - - comma_loc = strchr(evt_name, ','); - if (comma_loc) { - /* take the event name up to the comma */ - evt_name = strndup(evt_name, comma_loc - evt_name); - } - flags = strchr(evt_name, ':'); - if (flags) { - /* split it out: */ - evt_name = strndup(evt_name, flags - evt_name); - flags++; - } - - evt_length = strlen(evt_name); - if (evt_length >= MAX_EVENT_LENGTH) - return EVT_FAILED; - if (strpbrk(evt_name, "*?")) { - *strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */ - return parse_multiple_tracepoint_event(evlist, sys_name, - evt_name, flags); - } else { - return parse_single_tracepoint_event(sys_name, evt_name, - evt_length, attr, strp); - } + return strpbrk(event, "*?") ? + add_tracepoint_multi(list, idx, sys, event) : + add_tracepoint(list, idx, sys, event); } -static enum event_result -parse_breakpoint_type(const char *type, const char **strp, - struct perf_event_attr *attr) +static int +parse_breakpoint_type(const char *type, struct perf_event_attr *attr) { int i; for (i = 0; i < 3; i++) { - if (!type[i]) + if (!type || !type[i]) break; +#define CHECK_SET_TYPE(bit) \ +do { \ + if (attr->bp_type & bit) \ + return -EINVAL; \ + else \ + attr->bp_type |= bit; \ +} while (0) + switch (type[i]) { case 'r': - attr->bp_type |= HW_BREAKPOINT_R; + CHECK_SET_TYPE(HW_BREAKPOINT_R); break; case 'w': - attr->bp_type |= HW_BREAKPOINT_W; + CHECK_SET_TYPE(HW_BREAKPOINT_W); break; case 'x': - attr->bp_type |= HW_BREAKPOINT_X; + CHECK_SET_TYPE(HW_BREAKPOINT_X); break; default: - return EVT_FAILED; + return -EINVAL; } } + +#undef CHECK_SET_TYPE + if (!attr->bp_type) /* Default */ attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W; - *strp = type + i; - - return EVT_HANDLED; + return 0; } -static enum event_result -parse_breakpoint_event(const char **strp, struct perf_event_attr *attr) +int parse_events_add_breakpoint(struct list_head **list, int *idx, + void *ptr, char *type) { - const char *target; - const char *type; - char *endaddr; - u64 addr; - enum event_result err; - - target = strchr(*strp, ':'); - if (!target) - return EVT_FAILED; - - if (strncmp(*strp, "mem", target - *strp) != 0) - return EVT_FAILED; - - target++; - - addr = strtoull(target, &endaddr, 0); - if (target == endaddr) - return EVT_FAILED; - - attr->bp_addr = addr; - *strp = endaddr; + struct perf_event_attr attr; - type = strchr(target, ':'); + memset(&attr, 0, sizeof(attr)); + attr.bp_addr = (unsigned long) ptr; - /* If no type is defined, just rw as default */ - if (!type) { - attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W; - } else { - err = parse_breakpoint_type(++type, strp, attr); - if (err == EVT_FAILED) - return EVT_FAILED; - } + if (parse_breakpoint_type(type, &attr)) + return -EINVAL; /* * We should find a nice way to override the access length * Provide some defaults for now */ - if (attr->bp_type == HW_BREAKPOINT_X) - attr->bp_len = sizeof(long); + if (attr.bp_type == HW_BREAKPOINT_X) + attr.bp_len = sizeof(long); else - attr->bp_len = HW_BREAKPOINT_LEN_4; + attr.bp_len = HW_BREAKPOINT_LEN_4; - attr->type = PERF_TYPE_BREAKPOINT; + attr.type = PERF_TYPE_BREAKPOINT; + attr.sample_period = 1; - return EVT_HANDLED; + return add_event(list, idx, &attr, NULL); } -static int check_events(const char *str, unsigned int i) +static int config_term(struct perf_event_attr *attr, + struct parse_events__term *term) { - int n; +#define CHECK_TYPE_VAL(type) \ +do { \ + if (PARSE_EVENTS__TERM_TYPE_ ## type != term->type_val) \ + return -EINVAL; \ +} while (0) + + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_CONFIG: + CHECK_TYPE_VAL(NUM); + attr->config = term->val.num; + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG1: + CHECK_TYPE_VAL(NUM); + attr->config1 = term->val.num; + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG2: + CHECK_TYPE_VAL(NUM); + attr->config2 = term->val.num; + break; + case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: + CHECK_TYPE_VAL(NUM); + attr->sample_period = term->val.num; + break; + case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: + /* + * TODO uncomment when the field is available + * attr->branch_sample_type = term->val.num; + */ + break; + case PARSE_EVENTS__TERM_TYPE_NAME: + CHECK_TYPE_VAL(STR); + break; + default: + return -EINVAL; + } - n = strlen(event_symbols[i].symbol); - if (!strncasecmp(str, event_symbols[i].symbol, n)) - return n; + return 0; +#undef CHECK_TYPE_VAL +} - n = strlen(event_symbols[i].alias); - if (n) { - if (!strncasecmp(str, event_symbols[i].alias, n)) - return n; - } +static int config_attr(struct perf_event_attr *attr, + struct list_head *head, int fail) +{ + struct parse_events__term *term; + + list_for_each_entry(term, head, list) + if (config_term(attr, term) && fail) + return -EINVAL; return 0; } -static enum event_result -parse_symbolic_event(const char **strp, struct perf_event_attr *attr) +int parse_events_add_numeric(struct list_head **list, int *idx, + unsigned long type, unsigned long config, + struct list_head *head_config) { - const char *str = *strp; - unsigned int i; - int n; - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - n = check_events(str, i); - if (n > 0) { - attr->type = event_symbols[i].type; - attr->config = event_symbols[i].config; - *strp = str + n; - return EVT_HANDLED; - } - } - return EVT_FAILED; + struct perf_event_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.type = type; + attr.config = config; + + if (head_config && + config_attr(&attr, head_config, 1)) + return -EINVAL; + + return add_event(list, idx, &attr, NULL); } -static enum event_result -parse_raw_event(const char **strp, struct perf_event_attr *attr) +static int parse_events__is_name_term(struct parse_events__term *term) { - const char *str = *strp; - u64 config; - int n; - - if (*str != 'r') - return EVT_FAILED; - n = hex2u64(str + 1, &config); - if (n > 0) { - const char *end = str + n + 1; - if (*end != '\0' && *end != ',' && *end != ':') - return EVT_FAILED; - - *strp = end; - attr->type = PERF_TYPE_RAW; - attr->config = config; - return EVT_HANDLED; - } - return EVT_FAILED; + return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; } -static enum event_result -parse_numeric_event(const char **strp, struct perf_event_attr *attr) +static char *pmu_event_name(struct list_head *head_terms) { - const char *str = *strp; - char *endp; - unsigned long type; - u64 config; - - type = strtoul(str, &endp, 0); - if (endp > str && type < PERF_TYPE_MAX && *endp == ':') { - str = endp + 1; - config = strtoul(str, &endp, 0); - if (endp > str) { - attr->type = type; - attr->config = config; - *strp = endp; - return EVT_HANDLED; - } - } - return EVT_FAILED; + struct parse_events__term *term; + + list_for_each_entry(term, head_terms, list) + if (parse_events__is_name_term(term)) + return term->val.str; + + return NULL; } -static int -parse_event_modifier(const char **strp, struct perf_event_attr *attr) +int parse_events_add_pmu(struct list_head **list, int *idx, + char *name, struct list_head *head_config) { - const char *str = *strp; - int exclude = 0; - int eu = 0, ek = 0, eh = 0, precise = 0; + struct perf_event_attr attr; + struct perf_pmu *pmu; - if (!*str) - return 0; + pmu = perf_pmu__find(name); + if (!pmu) + return -EINVAL; - if (*str == ',') - return 0; + memset(&attr, 0, sizeof(attr)); - if (*str++ != ':') - return -1; + if (perf_pmu__check_alias(pmu, head_config)) + return -EINVAL; + + /* + * Configure hardcoded terms first, no need to check + * return value when called with fail == 0 ;) + */ + config_attr(&attr, head_config, 0); + + if (perf_pmu__config(pmu, &attr, head_config)) + return -EINVAL; + + return add_event(list, idx, &attr, + pmu_event_name(head_config)); +} + +void parse_events_update_lists(struct list_head *list_event, + struct list_head *list_all) +{ + /* + * Called for single event definition. Update the + * 'all event' list, and reinit the 'signle event' + * list, for next event definition. + */ + list_splice_tail(list_event, list_all); + free(list_event); +} + +int parse_events_modifier(struct list_head *list, char *str) +{ + struct perf_evsel *evsel; + int exclude = 0, exclude_GH = 0; + int eu = 0, ek = 0, eh = 0, eH = 0, eG = 0, precise = 0; + + if (str == NULL) + return 0; while (*str) { if (*str == 'u') { @@ -760,6 +645,14 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr) if (!exclude) exclude = eu = ek = eh = 1; eh = 0; + } else if (*str == 'G') { + if (!exclude_GH) + exclude_GH = eG = eH = 1; + eG = 0; + } else if (*str == 'H') { + if (!exclude_GH) + exclude_GH = eG = eH = 1; + eH = 0; } else if (*str == 'p') { precise++; } else @@ -767,106 +660,99 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr) ++str; } - if (str < *strp + 2) - return -1; - *strp = str; + /* + * precise ip: + * + * 0 - SAMPLE_IP can have arbitrary skid + * 1 - SAMPLE_IP must have constant skid + * 2 - SAMPLE_IP requested to have 0 skid + * 3 - SAMPLE_IP must have 0 skid + * + * See also PERF_RECORD_MISC_EXACT_IP + */ + if (precise > 3) + return -EINVAL; - attr->exclude_user = eu; - attr->exclude_kernel = ek; - attr->exclude_hv = eh; - attr->precise_ip = precise; + list_for_each_entry(evsel, list, node) { + evsel->attr.exclude_user = eu; + evsel->attr.exclude_kernel = ek; + evsel->attr.exclude_hv = eh; + evsel->attr.precise_ip = precise; + evsel->attr.exclude_host = eH; + evsel->attr.exclude_guest = eG; + } return 0; } -/* - * Each event can have multiple symbolic names. - * Symbolic names are (almost) exactly matched. - */ -static enum event_result -parse_event_symbols(struct perf_evlist *evlist, const char **str, - struct perf_event_attr *attr) +static int parse_events__scanner(const char *str, void *data, int start_token) { - enum event_result ret; - - ret = parse_tracepoint_event(evlist, str, attr); - if (ret != EVT_FAILED) - goto modifier; - - ret = parse_raw_event(str, attr); - if (ret != EVT_FAILED) - goto modifier; + YY_BUFFER_STATE buffer; + void *scanner; + int ret; - ret = parse_numeric_event(str, attr); - if (ret != EVT_FAILED) - goto modifier; + ret = parse_events_lex_init_extra(start_token, &scanner); + if (ret) + return ret; - ret = parse_symbolic_event(str, attr); - if (ret != EVT_FAILED) - goto modifier; + buffer = parse_events__scan_string(str, scanner); - ret = parse_generic_hw_event(str, attr); - if (ret != EVT_FAILED) - goto modifier; +#ifdef PARSER_DEBUG + parse_events_debug = 1; +#endif + ret = parse_events_parse(data, scanner); - ret = parse_breakpoint_event(str, attr); - if (ret != EVT_FAILED) - goto modifier; - - fprintf(stderr, "invalid or unsupported event: '%s'\n", *str); - fprintf(stderr, "Run 'perf list' for a list of valid events\n"); - return EVT_FAILED; - -modifier: - if (parse_event_modifier(str, attr) < 0) { - fprintf(stderr, "invalid event modifier: '%s'\n", *str); - fprintf(stderr, "Run 'perf list' for a list of valid events and modifiers\n"); + parse_events__flush_buffer(buffer, scanner); + parse_events__delete_buffer(buffer, scanner); + parse_events_lex_destroy(scanner); + return ret; +} - return EVT_FAILED; +/* + * parse event config string, return a list of event terms. + */ +int parse_events_terms(struct list_head *terms, const char *str) +{ + struct parse_events_data__terms data = { + .terms = NULL, + }; + int ret; + + ret = parse_events__scanner(str, &data, PE_START_TERMS); + if (!ret) { + list_splice(data.terms, terms); + free(data.terms); + return 0; } + parse_events__free_terms(data.terms); return ret; } -int parse_events(struct perf_evlist *evlist , const char *str, int unset __used) +int parse_events(struct perf_evlist *evlist, const char *str, int unset __used) { - struct perf_event_attr attr; - enum event_result ret; - const char *ostr; - - for (;;) { - ostr = str; - memset(&attr, 0, sizeof(attr)); - ret = parse_event_symbols(evlist, &str, &attr); - if (ret == EVT_FAILED) - return -1; - - if (!(*str == 0 || *str == ',' || isspace(*str))) - return -1; - - if (ret != EVT_HANDLED_ALL) { - struct perf_evsel *evsel; - evsel = perf_evsel__new(&attr, evlist->nr_entries); - if (evsel == NULL) - return -1; - perf_evlist__add(evlist, evsel); - - evsel->name = calloc(str - ostr + 1, 1); - if (!evsel->name) - return -1; - strncpy(evsel->name, ostr, str - ostr); - } - - if (*str == 0) - break; - if (*str == ',') - ++str; - while (isspace(*str)) - ++str; + struct parse_events_data__events data = { + .list = LIST_HEAD_INIT(data.list), + .idx = evlist->nr_entries, + }; + int ret; + + ret = parse_events__scanner(str, &data, PE_START_EVENTS); + if (!ret) { + int entries = data.idx - evlist->nr_entries; + perf_evlist__splice_list_tail(evlist, &data.list, entries); + return 0; } - return 0; + /* + * There are 2 users - builtin-record and builtin-test objects. + * Both call perf_evlist__delete in case of error, so we dont + * need to bother. + */ + fprintf(stderr, "invalid or unsupported event: '%s'\n", str); + fprintf(stderr, "Run 'perf list' for a list of valid events\n"); + return ret; } int parse_events_option(const struct option *opt, const char *str, @@ -920,10 +806,10 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(debugfs_path)) + if (debugfs_valid_mountpoint(tracing_events_path)) return; - sys_dir = opendir(debugfs_path); + sys_dir = opendir(tracing_events_path); if (!sys_dir) return; @@ -932,7 +818,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob) !strglobmatch(sys_dirent.d_name, subsys_glob)) continue; - snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path, + snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_dirent.d_name); evt_dir = opendir(dir_path); if (!evt_dir) @@ -964,16 +850,16 @@ int is_valid_tracepoint(const char *event_string) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(debugfs_path)) + if (debugfs_valid_mountpoint(tracing_events_path)) return 0; - sys_dir = opendir(debugfs_path); + sys_dir = opendir(tracing_events_path); if (!sys_dir) return 0; for_each_subsystem(sys_dir, sys_dirent, sys_next) { - snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path, + snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_dirent.d_name); evt_dir = opendir(dir_path); if (!evt_dir) @@ -994,16 +880,13 @@ int is_valid_tracepoint(const char *event_string) return 0; } -void print_events_type(u8 type) +static void __print_events_type(u8 type, struct event_symbol *syms, + unsigned max) { - struct event_symbol *syms = event_symbols; - unsigned int i; char name[64]; + unsigned i; - for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { - if (type != syms->type) - continue; - + for (i = 0; i < max ; i++, syms++) { if (strlen(syms->alias)) snprintf(name, sizeof(name), "%s OR %s", syms->symbol, syms->alias); @@ -1015,19 +898,28 @@ void print_events_type(u8 type) } } +void print_events_type(u8 type) +{ + if (type == PERF_TYPE_SOFTWARE) + __print_events_type(type, event_symbols_sw, PERF_COUNT_SW_MAX); + else + __print_events_type(type, event_symbols_hw, PERF_COUNT_HW_MAX); +} + int print_hwcache_events(const char *event_glob) { unsigned int type, op, i, printed = 0; + char name[64]; for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ - if (!is_cache_op_valid(type, op)) + if (!perf_evsel__is_cache_op_valid(type, op)) continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - char *name = event_cache_name(type, op, i); - + __perf_evsel__hw_cache_type_op_res_name(type, op, i, + name, sizeof(name)); if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; @@ -1041,28 +933,13 @@ int print_hwcache_events(const char *event_glob) return printed; } -#define MAX_NAME_LEN 100 - -/* - * Print the help text for the event symbols: - */ -void print_events(const char *event_glob) +static void print_symbol_events(const char *event_glob, unsigned type, + struct event_symbol *syms, unsigned max) { - unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0; - struct event_symbol *syms = event_symbols; + unsigned i, printed = 0; char name[MAX_NAME_LEN]; - printf("\n"); - printf("List of pre-defined events (to be used in -e):\n"); - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { - type = syms->type; - - if (type != prev_type && printed) { - printf("\n"); - printed = 0; - ntypes_printed++; - } + for (i = 0; i < max; i++, syms++) { if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) || @@ -1073,17 +950,31 @@ void print_events(const char *event_glob) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else strncpy(name, syms->symbol, MAX_NAME_LEN); - printf(" %-50s [%s]\n", name, - event_type_descriptors[type]); - prev_type = type; - ++printed; + printf(" %-50s [%s]\n", name, event_type_descriptors[type]); + + printed++; } - if (ntypes_printed) { - printed = 0; + if (printed) printf("\n"); - } +} + +/* + * Print the help text for the event symbols: + */ +void print_events(const char *event_glob) +{ + + printf("\n"); + printf("List of pre-defined events (to be used in -e):\n"); + + print_symbol_events(event_glob, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX); + + print_symbol_events(event_glob, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX); + print_hwcache_events(event_glob); if (event_glob != NULL) @@ -1091,8 +982,12 @@ void print_events(const char *event_glob) printf("\n"); printf(" %-50s [%s]\n", - "rNNN (see 'perf list --help' on how to encode it)", + "rNNN", + event_type_descriptors[PERF_TYPE_RAW]); + printf(" %-50s [%s]\n", + "cpu/t1=v1[,t2=v2,t3 ...]/modifier", event_type_descriptors[PERF_TYPE_RAW]); + printf(" (see 'perf list --help' on how to encode it)\n"); printf("\n"); printf(" %-50s [%s]\n", @@ -1102,3 +997,69 @@ void print_events(const char *event_glob) print_tracepoint_events(NULL, NULL); } + +int parse_events__is_hardcoded_term(struct parse_events__term *term) +{ + return term->type_term != PARSE_EVENTS__TERM_TYPE_USER; +} + +static int new_term(struct parse_events__term **_term, int type_val, + int type_term, char *config, + char *str, long num) +{ + struct parse_events__term *term; + + term = zalloc(sizeof(*term)); + if (!term) + return -ENOMEM; + + INIT_LIST_HEAD(&term->list); + term->type_val = type_val; + term->type_term = type_term; + term->config = config; + + switch (type_val) { + case PARSE_EVENTS__TERM_TYPE_NUM: + term->val.num = num; + break; + case PARSE_EVENTS__TERM_TYPE_STR: + term->val.str = str; + break; + default: + return -EINVAL; + } + + *_term = term; + return 0; +} + +int parse_events__term_num(struct parse_events__term **term, + int type_term, char *config, long num) +{ + return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term, + config, NULL, num); +} + +int parse_events__term_str(struct parse_events__term **term, + int type_term, char *config, char *str) +{ + return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term, + config, str, 0); +} + +int parse_events__term_clone(struct parse_events__term **new, + struct parse_events__term *term) +{ + return new_term(new, term->type_val, term->type_term, term->config, + term->val.str, term->val.num); +} + +void parse_events__free_terms(struct list_head *terms) +{ + struct parse_events__term *term, *h; + + list_for_each_entry_safe(term, h, terms, list) + free(term); + + free(terms); +} diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 2f8e375e038d..ee9c218a193c 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -4,7 +4,11 @@ * Parse symbolic events/counts passed in as options: */ +#include <linux/list.h> +#include <stdbool.h> +#include "types.h" #include "../../../include/linux/perf_event.h" +#include "types.h" struct list_head; struct perf_evsel; @@ -22,24 +26,82 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config); extern bool have_tracepoints(struct list_head *evlist); const char *event_type(int type); -const char *event_name(struct perf_evsel *event); -extern const char *__event_name(int type, u64 config); extern int parse_events_option(const struct option *opt, const char *str, int unset); extern int parse_events(struct perf_evlist *evlist, const char *str, int unset); +extern int parse_events_terms(struct list_head *terms, const char *str); extern int parse_filter(const struct option *opt, const char *str, int unset); #define EVENTS_HELP_MAX (128*1024) +enum { + PARSE_EVENTS__TERM_TYPE_NUM, + PARSE_EVENTS__TERM_TYPE_STR, +}; + +enum { + PARSE_EVENTS__TERM_TYPE_USER, + PARSE_EVENTS__TERM_TYPE_CONFIG, + PARSE_EVENTS__TERM_TYPE_CONFIG1, + PARSE_EVENTS__TERM_TYPE_CONFIG2, + PARSE_EVENTS__TERM_TYPE_NAME, + PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD, + PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE, +}; + +struct parse_events__term { + char *config; + union { + char *str; + long num; + } val; + int type_val; + int type_term; + struct list_head list; +}; + +struct parse_events_data__events { + struct list_head list; + int idx; +}; + +struct parse_events_data__terms { + struct list_head *terms; +}; + +int parse_events__is_hardcoded_term(struct parse_events__term *term); +int parse_events__term_num(struct parse_events__term **_term, + int type_term, char *config, long num); +int parse_events__term_str(struct parse_events__term **_term, + int type_term, char *config, char *str); +int parse_events__term_clone(struct parse_events__term **new, + struct parse_events__term *term); +void parse_events__free_terms(struct list_head *terms); +int parse_events_modifier(struct list_head *list, char *str); +int parse_events_add_tracepoint(struct list_head **list, int *idx, + char *sys, char *event); +int parse_events_add_numeric(struct list_head **list, int *idx, + unsigned long type, unsigned long config, + struct list_head *head_config); +int parse_events_add_cache(struct list_head **list, int *idx, + char *type, char *op_result1, char *op_result2); +int parse_events_add_breakpoint(struct list_head **list, int *idx, + void *ptr, char *type); +int parse_events_add_pmu(struct list_head **list, int *idx, + char *pmu , struct list_head *head_config); +void parse_events_update_lists(struct list_head *list_event, + struct list_head *list_all); +void parse_events_error(void *data, void *scanner, char const *msg); +int parse_events__test(void); + void print_events(const char *event_glob); void print_events_type(u8 type); void print_tracepoint_events(const char *subsys_glob, const char *event_glob); int print_hwcache_events(const char *event_glob); extern int is_valid_tracepoint(const char *event_string); -extern char debugfs_path[]; extern int valid_debugfs_mount(const char *debugfs); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l new file mode 100644 index 000000000000..384ca74c6b22 --- /dev/null +++ b/tools/perf/util/parse-events.l @@ -0,0 +1,181 @@ + +%option reentrant +%option bison-bridge +%option prefix="parse_events_" +%option stack + +%{ +#include <errno.h> +#include "../perf.h" +#include "parse-events-bison.h" +#include "parse-events.h" + +char *parse_events_get_text(yyscan_t yyscanner); +YYSTYPE *parse_events_get_lval(yyscan_t yyscanner); + +static int __value(YYSTYPE *yylval, char *str, int base, int token) +{ + long num; + + errno = 0; + num = strtoul(str, NULL, base); + if (errno) + return PE_ERROR; + + yylval->num = num; + return token; +} + +static int value(yyscan_t scanner, int base) +{ + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); + + return __value(yylval, text, base, PE_VALUE); +} + +static int raw(yyscan_t scanner) +{ + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); + + return __value(yylval, text + 1, 16, PE_RAW); +} + +static int str(yyscan_t scanner, int token) +{ + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); + + yylval->str = strdup(text); + return token; +} + +static int sym(yyscan_t scanner, int type, int config) +{ + YYSTYPE *yylval = parse_events_get_lval(scanner); + + yylval->num = (type << 16) + config; + return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW; +} + +static int term(yyscan_t scanner, int type) +{ + YYSTYPE *yylval = parse_events_get_lval(scanner); + + yylval->num = type; + return PE_TERM; +} + +%} + +%x mem + +num_dec [0-9]+ +num_hex 0x[a-fA-F0-9]+ +num_raw_hex [a-fA-F0-9]+ +name [a-zA-Z_*?][a-zA-Z0-9_*?]* +modifier_event [ukhpGH]{1,8} +modifier_bp [rwx]{1,3} + +%% + +%{ + { + int start_token; + + start_token = (int) parse_events_get_extra(yyscanner); + if (start_token) { + parse_events_set_extra(NULL, yyscanner); + return start_token; + } + } +%} + +cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); } +stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } +stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } +instructions { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); } +cache-references { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); } +cache-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); } +branch-instructions|branches { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } +branch-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); } +bus-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); } +ref-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); } +cpu-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); } +task-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); } +page-faults|faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); } +minor-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); } +major-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); } +context-switches|cs { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); } +cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); } +alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } +emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } + +L1-dcache|l1-d|l1d|L1-data | +L1-icache|l1-i|l1i|L1-instruction | +LLC|L2 | +dTLB|d-tlb|Data-TLB | +iTLB|i-tlb|Instruction-TLB | +branch|branches|bpu|btb|bpc | +node { return str(yyscanner, PE_NAME_CACHE_TYPE); } + +load|loads|read | +store|stores|write | +prefetch|prefetches | +speculative-read|speculative-load | +refs|Reference|ops|access | +misses|miss { return str(yyscanner, PE_NAME_CACHE_OP_RESULT); } + + /* + * These are event config hardcoded term names to be specified + * within xxx/.../ syntax. So far we dont clash with other names, + * so we can put them here directly. In case the we have a conflict + * in future, this needs to go into '//' condition block. + */ +config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } +config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } +config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } +name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } +period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } +branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } + +mem: { BEGIN(mem); return PE_PREFIX_MEM; } +r{num_raw_hex} { return raw(yyscanner); } +{num_dec} { return value(yyscanner, 10); } +{num_hex} { return value(yyscanner, 16); } + +{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } +{name} { return str(yyscanner, PE_NAME); } +"/" { return '/'; } +- { return '-'; } +, { return ','; } +: { return ':'; } += { return '='; } +\n { } + +<mem>{ +{modifier_bp} { return str(yyscanner, PE_MODIFIER_BP); } +: { return ':'; } +{num_dec} { return value(yyscanner, 10); } +{num_hex} { return value(yyscanner, 16); } + /* + * We need to separate 'mem:' scanner part, in order to get specific + * modifier bits parsed out. Otherwise we would need to handle PE_NAME + * and we'd need to parse it manually. During the escape from <mem> + * state we need to put the escaping char back, so we dont miss it. + */ +. { unput(*yytext); BEGIN(INITIAL); } + /* + * We destroy the scanner after reaching EOF, + * but anyway just to be sure get back to INIT state. + */ +<<EOF>> { BEGIN(INITIAL); } +} + +%% + +int parse_events_wrap(void *scanner __used) +{ + return 1; +} diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y new file mode 100644 index 000000000000..2bc5fbff2b5d --- /dev/null +++ b/tools/perf/util/parse-events.y @@ -0,0 +1,314 @@ +%pure-parser +%name-prefix "parse_events_" +%parse-param {void *_data} +%parse-param {void *scanner} +%lex-param {void* scanner} + +%{ + +#define YYDEBUG 1 + +#include <linux/compiler.h> +#include <linux/list.h> +#include "types.h" +#include "util.h" +#include "parse-events.h" +#include "parse-events-bison.h" + +extern int parse_events_lex (YYSTYPE* lvalp, void* scanner); + +#define ABORT_ON(val) \ +do { \ + if (val) \ + YYABORT; \ +} while (0) + +%} + +%token PE_START_EVENTS PE_START_TERMS +%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM +%token PE_NAME +%token PE_MODIFIER_EVENT PE_MODIFIER_BP +%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT +%token PE_PREFIX_MEM PE_PREFIX_RAW +%token PE_ERROR +%type <num> PE_VALUE +%type <num> PE_VALUE_SYM_HW +%type <num> PE_VALUE_SYM_SW +%type <num> PE_RAW +%type <num> PE_TERM +%type <str> PE_NAME +%type <str> PE_NAME_CACHE_TYPE +%type <str> PE_NAME_CACHE_OP_RESULT +%type <str> PE_MODIFIER_EVENT +%type <str> PE_MODIFIER_BP +%type <num> value_sym +%type <head> event_config +%type <term> event_term +%type <head> event_pmu +%type <head> event_legacy_symbol +%type <head> event_legacy_cache +%type <head> event_legacy_mem +%type <head> event_legacy_tracepoint +%type <head> event_legacy_numeric +%type <head> event_legacy_raw +%type <head> event_def + +%union +{ + char *str; + unsigned long num; + struct list_head *head; + struct parse_events__term *term; +} +%% + +start: +PE_START_EVENTS events +| +PE_START_TERMS terms + +events: +events ',' event | event + +event: +event_def PE_MODIFIER_EVENT +{ + struct parse_events_data__events *data = _data; + + /* + * Apply modifier on all events added by single event definition + * (there could be more events added for multiple tracepoint + * definitions via '*?'. + */ + ABORT_ON(parse_events_modifier($1, $2)); + parse_events_update_lists($1, &data->list); +} +| +event_def +{ + struct parse_events_data__events *data = _data; + + parse_events_update_lists($1, &data->list); +} + +event_def: event_pmu | + event_legacy_symbol | + event_legacy_cache sep_dc | + event_legacy_mem | + event_legacy_tracepoint sep_dc | + event_legacy_numeric sep_dc | + event_legacy_raw sep_dc + +event_pmu: +PE_NAME '/' event_config '/' +{ + struct parse_events_data__events *data = _data; + struct list_head *list = NULL; + + ABORT_ON(parse_events_add_pmu(&list, &data->idx, $1, $3)); + parse_events__free_terms($3); + $$ = list; +} + +value_sym: +PE_VALUE_SYM_HW +| +PE_VALUE_SYM_SW + +event_legacy_symbol: +value_sym '/' event_config '/' +{ + struct parse_events_data__events *data = _data; + struct list_head *list = NULL; + int type = $1 >> 16; + int config = $1 & 255; + + ABORT_ON(parse_events_add_numeric(&list, &data->idx, + type, config, $3)); + parse_events__free_terms($3); + $$ = list; +} +| +value_sym sep_slash_dc +{ + struct parse_events_data__events *data = _data; + struct list_head *list = NULL; + int type = $1 >> 16; + int config = $1 & 255; + + ABORT_ON(parse_events_add_numeric(&list, &data->idx, + type, config, NULL)); + $$ = list; +} + +event_legacy_cache: +PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT +{ + struct parse_events_data__events *data = _data; + struct list_head *list = NULL; + + ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, $5)); + $$ = list; +} +| +PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT +{ + struct parse_events_data__events *data = _data; + struct list_head *list = NULL; + + ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, NULL)); + $$ = list; +} +| +PE_NAME_CACHE_TYPE +{ + struct parse_events_data__events *data = _data; + struct list_head *list = NULL; + + ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, NULL, NULL)); + $$ = list; +} + +event_legacy_mem: +PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc +{ + struct parse_events_data__events *data = _data; + struct list_head *list = NULL; + + ABORT_ON(parse_events_add_breakpoint(&list, &data->idx, + (void *) $2, $4)); + $$ = list; +} +| +PE_PREFIX_MEM PE_VALUE sep_dc +{ + struct parse_events_data__events *data = _data; + struct list_head *list = NULL; + + ABORT_ON(parse_events_add_breakpoint(&list, &data->idx, + (void *) $2, NULL)); + $$ = list; +} + +event_legacy_tracepoint: +PE_NAME ':' PE_NAME +{ + struct parse_events_data__events *data = _data; + struct list_head *list = NULL; + + ABORT_ON(parse_events_add_tracepoint(&list, &data->idx, $1, $3)); + $$ = list; +} + +event_legacy_numeric: +PE_VALUE ':' PE_VALUE +{ + struct parse_events_data__events *data = _data; + struct list_head *list = NULL; + + ABORT_ON(parse_events_add_numeric(&list, &data->idx, $1, $3, NULL)); + $$ = list; +} + +event_legacy_raw: +PE_RAW +{ + struct parse_events_data__events *data = _data; + struct list_head *list = NULL; + + ABORT_ON(parse_events_add_numeric(&list, &data->idx, + PERF_TYPE_RAW, $1, NULL)); + $$ = list; +} + +terms: event_config +{ + struct parse_events_data__terms *data = _data; + data->terms = $1; +} + +event_config: +event_config ',' event_term +{ + struct list_head *head = $1; + struct parse_events__term *term = $3; + + ABORT_ON(!head); + list_add_tail(&term->list, head); + $$ = $1; +} +| +event_term +{ + struct list_head *head = malloc(sizeof(*head)); + struct parse_events__term *term = $1; + + ABORT_ON(!head); + INIT_LIST_HEAD(head); + list_add_tail(&term->list, head); + $$ = head; +} + +event_term: +PE_NAME '=' PE_NAME +{ + struct parse_events__term *term; + + ABORT_ON(parse_events__term_str(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, $3)); + $$ = term; +} +| +PE_NAME '=' PE_VALUE +{ + struct parse_events__term *term; + + ABORT_ON(parse_events__term_num(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, $3)); + $$ = term; +} +| +PE_NAME +{ + struct parse_events__term *term; + + ABORT_ON(parse_events__term_num(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, 1)); + $$ = term; +} +| +PE_TERM '=' PE_NAME +{ + struct parse_events__term *term; + + ABORT_ON(parse_events__term_str(&term, $1, NULL, $3)); + $$ = term; +} +| +PE_TERM '=' PE_VALUE +{ + struct parse_events__term *term; + + ABORT_ON(parse_events__term_num(&term, $1, NULL, $3)); + $$ = term; +} +| +PE_TERM +{ + struct parse_events__term *term; + + ABORT_ON(parse_events__term_num(&term, $1, NULL, 1)); + $$ = term; +} + +sep_dc: ':' | + +sep_slash_dc: '/' | ':' | + +%% + +void parse_events_error(void *data __used, void *scanner __used, + char const *msg __used) +{ +} diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 99d02aa57dbf..594f8fad5ecd 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -1,6 +1,7 @@ #include "util.h" #include "parse-options.h" #include "cache.h" +#include "header.h" #define OPT_SHORT 1 #define OPT_UNSET 2 @@ -413,6 +414,8 @@ int parse_options(int argc, const char **argv, const struct option *options, { struct parse_opt_ctx_t ctx; + perf_header__set_cmdline(argc, argv); + parse_options_start(&ctx, argc, argv, flags); switch (parse_options_step(&ctx, options, usagestr)) { case PARSE_OPT_HELP: diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c new file mode 100644 index 000000000000..67715a42cd6d --- /dev/null +++ b/tools/perf/util/pmu.c @@ -0,0 +1,650 @@ + +#include <linux/list.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <stdio.h> +#include <dirent.h> +#include "sysfs.h" +#include "util.h" +#include "pmu.h" +#include "parse-events.h" + +int perf_pmu_parse(struct list_head *list, char *name); +extern FILE *perf_pmu_in; + +static LIST_HEAD(pmus); + +/* + * Parse & process all the sysfs attributes located under + * the directory specified in 'dir' parameter. + */ +static int pmu_format_parse(char *dir, struct list_head *head) +{ + struct dirent *evt_ent; + DIR *format_dir; + int ret = 0; + + format_dir = opendir(dir); + if (!format_dir) + return -EINVAL; + + while (!ret && (evt_ent = readdir(format_dir))) { + char path[PATH_MAX]; + char *name = evt_ent->d_name; + FILE *file; + + if (!strcmp(name, ".") || !strcmp(name, "..")) + continue; + + snprintf(path, PATH_MAX, "%s/%s", dir, name); + + ret = -EINVAL; + file = fopen(path, "r"); + if (!file) + break; + + perf_pmu_in = file; + ret = perf_pmu_parse(head, name); + fclose(file); + } + + closedir(format_dir); + return ret; +} + +/* + * Reading/parsing the default pmu format definition, which should be + * located at: + * /sys/bus/event_source/devices/<dev>/format as sysfs group attributes. + */ +static int pmu_format(char *name, struct list_head *format) +{ + struct stat st; + char path[PATH_MAX]; + const char *sysfs; + + sysfs = sysfs_find_mountpoint(); + if (!sysfs) + return -1; + + snprintf(path, PATH_MAX, + "%s/bus/event_source/devices/%s/format", sysfs, name); + + if (stat(path, &st) < 0) + return 0; /* no error if format does not exist */ + + if (pmu_format_parse(path, format)) + return -1; + + return 0; +} + +static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) +{ + struct perf_pmu__alias *alias; + char buf[256]; + int ret; + + ret = fread(buf, 1, sizeof(buf), file); + if (ret == 0) + return -EINVAL; + buf[ret] = 0; + + alias = malloc(sizeof(*alias)); + if (!alias) + return -ENOMEM; + + INIT_LIST_HEAD(&alias->terms); + ret = parse_events_terms(&alias->terms, buf); + if (ret) { + free(alias); + return ret; + } + + alias->name = strdup(name); + list_add_tail(&alias->list, list); + return 0; +} + +/* + * Process all the sysfs attributes located under the directory + * specified in 'dir' parameter. + */ +static int pmu_aliases_parse(char *dir, struct list_head *head) +{ + struct dirent *evt_ent; + DIR *event_dir; + int ret = 0; + + event_dir = opendir(dir); + if (!event_dir) + return -EINVAL; + + while (!ret && (evt_ent = readdir(event_dir))) { + char path[PATH_MAX]; + char *name = evt_ent->d_name; + FILE *file; + + if (!strcmp(name, ".") || !strcmp(name, "..")) + continue; + + snprintf(path, PATH_MAX, "%s/%s", dir, name); + + ret = -EINVAL; + file = fopen(path, "r"); + if (!file) + break; + ret = perf_pmu__new_alias(head, name, file); + fclose(file); + } + + closedir(event_dir); + return ret; +} + +/* + * Reading the pmu event aliases definition, which should be located at: + * /sys/bus/event_source/devices/<dev>/events as sysfs group attributes. + */ +static int pmu_aliases(char *name, struct list_head *head) +{ + struct stat st; + char path[PATH_MAX]; + const char *sysfs; + + sysfs = sysfs_find_mountpoint(); + if (!sysfs) + return -1; + + snprintf(path, PATH_MAX, + "%s/bus/event_source/devices/%s/events", sysfs, name); + + if (stat(path, &st) < 0) + return -1; + + if (pmu_aliases_parse(path, head)) + return -1; + + return 0; +} + +static int pmu_alias_terms(struct perf_pmu__alias *alias, + struct list_head *terms) +{ + struct parse_events__term *term, *clone; + LIST_HEAD(list); + int ret; + + list_for_each_entry(term, &alias->terms, list) { + ret = parse_events__term_clone(&clone, term); + if (ret) { + parse_events__free_terms(&list); + return ret; + } + list_add_tail(&clone->list, &list); + } + list_splice(&list, terms); + return 0; +} + +/* + * Reading/parsing the default pmu type value, which should be + * located at: + * /sys/bus/event_source/devices/<dev>/type as sysfs attribute. + */ +static int pmu_type(char *name, __u32 *type) +{ + struct stat st; + char path[PATH_MAX]; + const char *sysfs; + FILE *file; + int ret = 0; + + sysfs = sysfs_find_mountpoint(); + if (!sysfs) + return -1; + + snprintf(path, PATH_MAX, + "%s/bus/event_source/devices/%s/type", sysfs, name); + + if (stat(path, &st) < 0) + return -1; + + file = fopen(path, "r"); + if (!file) + return -EINVAL; + + if (1 != fscanf(file, "%u", type)) + ret = -1; + + fclose(file); + return ret; +} + +static struct perf_pmu *pmu_lookup(char *name) +{ + struct perf_pmu *pmu; + LIST_HEAD(format); + LIST_HEAD(aliases); + __u32 type; + + /* + * The pmu data we store & need consists of the pmu + * type value and format definitions. Load both right + * now. + */ + if (pmu_format(name, &format)) + return NULL; + + if (pmu_type(name, &type)) + return NULL; + + pmu = zalloc(sizeof(*pmu)); + if (!pmu) + return NULL; + + pmu_aliases(name, &aliases); + + INIT_LIST_HEAD(&pmu->format); + INIT_LIST_HEAD(&pmu->aliases); + list_splice(&format, &pmu->format); + list_splice(&aliases, &pmu->aliases); + pmu->name = strdup(name); + pmu->type = type; + list_add_tail(&pmu->list, &pmus); + return pmu; +} + +static struct perf_pmu *pmu_find(char *name) +{ + struct perf_pmu *pmu; + + list_for_each_entry(pmu, &pmus, list) + if (!strcmp(pmu->name, name)) + return pmu; + + return NULL; +} + +struct perf_pmu *perf_pmu__find(char *name) +{ + struct perf_pmu *pmu; + + /* + * Once PMU is loaded it stays in the list, + * so we keep us from multiple reading/parsing + * the pmu format definitions. + */ + pmu = pmu_find(name); + if (pmu) + return pmu; + + return pmu_lookup(name); +} + +static struct perf_pmu__format* +pmu_find_format(struct list_head *formats, char *name) +{ + struct perf_pmu__format *format; + + list_for_each_entry(format, formats, list) + if (!strcmp(format->name, name)) + return format; + + return NULL; +} + +/* + * Returns value based on the format definition (format parameter) + * and unformated value (value parameter). + * + * TODO maybe optimize a little ;) + */ +static __u64 pmu_format_value(unsigned long *format, __u64 value) +{ + unsigned long fbit, vbit; + __u64 v = 0; + + for (fbit = 0, vbit = 0; fbit < PERF_PMU_FORMAT_BITS; fbit++) { + + if (!test_bit(fbit, format)) + continue; + + if (!(value & (1llu << vbit++))) + continue; + + v |= (1llu << fbit); + } + + return v; +} + +/* + * Setup one of config[12] attr members based on the + * user input data - temr parameter. + */ +static int pmu_config_term(struct list_head *formats, + struct perf_event_attr *attr, + struct parse_events__term *term) +{ + struct perf_pmu__format *format; + __u64 *vp; + + /* + * Support only for hardcoded and numnerial terms. + * Hardcoded terms should be already in, so nothing + * to be done for them. + */ + if (parse_events__is_hardcoded_term(term)) + return 0; + + if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) + return -EINVAL; + + format = pmu_find_format(formats, term->config); + if (!format) + return -EINVAL; + + switch (format->value) { + case PERF_PMU_FORMAT_VALUE_CONFIG: + vp = &attr->config; + break; + case PERF_PMU_FORMAT_VALUE_CONFIG1: + vp = &attr->config1; + break; + case PERF_PMU_FORMAT_VALUE_CONFIG2: + vp = &attr->config2; + break; + default: + return -EINVAL; + } + + /* + * XXX If we ever decide to go with string values for + * non-hardcoded terms, here's the place to translate + * them into value. + */ + *vp |= pmu_format_value(format->bits, term->val.num); + return 0; +} + +static int pmu_config(struct list_head *formats, struct perf_event_attr *attr, + struct list_head *head_terms) +{ + struct parse_events__term *term; + + list_for_each_entry(term, head_terms, list) + if (pmu_config_term(formats, attr, term)) + return -EINVAL; + + return 0; +} + +/* + * Configures event's 'attr' parameter based on the: + * 1) users input - specified in terms parameter + * 2) pmu format definitions - specified by pmu parameter + */ +int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, + struct list_head *head_terms) +{ + attr->type = pmu->type; + return pmu_config(&pmu->format, attr, head_terms); +} + +static struct perf_pmu__alias *pmu_find_alias(struct perf_pmu *pmu, + struct parse_events__term *term) +{ + struct perf_pmu__alias *alias; + char *name; + + if (parse_events__is_hardcoded_term(term)) + return NULL; + + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { + if (term->val.num != 1) + return NULL; + if (pmu_find_format(&pmu->format, term->config)) + return NULL; + name = term->config; + } else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) { + if (strcasecmp(term->config, "event")) + return NULL; + name = term->val.str; + } else { + return NULL; + } + + list_for_each_entry(alias, &pmu->aliases, list) { + if (!strcasecmp(alias->name, name)) + return alias; + } + return NULL; +} + +/* + * Find alias in the terms list and replace it with the terms + * defined for the alias + */ +int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms) +{ + struct parse_events__term *term, *h; + struct perf_pmu__alias *alias; + int ret; + + list_for_each_entry_safe(term, h, head_terms, list) { + alias = pmu_find_alias(pmu, term); + if (!alias) + continue; + ret = pmu_alias_terms(alias, &term->list); + if (ret) + return ret; + list_del(&term->list); + free(term); + } + return 0; +} + +int perf_pmu__new_format(struct list_head *list, char *name, + int config, unsigned long *bits) +{ + struct perf_pmu__format *format; + + format = zalloc(sizeof(*format)); + if (!format) + return -ENOMEM; + + format->name = strdup(name); + format->value = config; + memcpy(format->bits, bits, sizeof(format->bits)); + + list_add_tail(&format->list, list); + return 0; +} + +void perf_pmu__set_format(unsigned long *bits, long from, long to) +{ + long b; + + if (!to) + to = from; + + memset(bits, 0, BITS_TO_LONGS(PERF_PMU_FORMAT_BITS)); + for (b = from; b <= to; b++) + set_bit(b, bits); +} + +/* Simulated format definitions. */ +static struct test_format { + const char *name; + const char *value; +} test_formats[] = { + { "krava01", "config:0-1,62-63\n", }, + { "krava02", "config:10-17\n", }, + { "krava03", "config:5\n", }, + { "krava11", "config1:0,2,4,6,8,20-28\n", }, + { "krava12", "config1:63\n", }, + { "krava13", "config1:45-47\n", }, + { "krava21", "config2:0-3,10-13,20-23,30-33,40-43,50-53,60-63\n", }, + { "krava22", "config2:8,18,48,58\n", }, + { "krava23", "config2:28-29,38\n", }, +}; + +#define TEST_FORMATS_CNT (sizeof(test_formats) / sizeof(struct test_format)) + +/* Simulated users input. */ +static struct parse_events__term test_terms[] = { + { + .config = (char *) "krava01", + .val.num = 15, + .type_val = PARSE_EVENTS__TERM_TYPE_NUM, + .type_term = PARSE_EVENTS__TERM_TYPE_USER, + }, + { + .config = (char *) "krava02", + .val.num = 170, + .type_val = PARSE_EVENTS__TERM_TYPE_NUM, + .type_term = PARSE_EVENTS__TERM_TYPE_USER, + }, + { + .config = (char *) "krava03", + .val.num = 1, + .type_val = PARSE_EVENTS__TERM_TYPE_NUM, + .type_term = PARSE_EVENTS__TERM_TYPE_USER, + }, + { + .config = (char *) "krava11", + .val.num = 27, + .type_val = PARSE_EVENTS__TERM_TYPE_NUM, + .type_term = PARSE_EVENTS__TERM_TYPE_USER, + }, + { + .config = (char *) "krava12", + .val.num = 1, + .type_val = PARSE_EVENTS__TERM_TYPE_NUM, + .type_term = PARSE_EVENTS__TERM_TYPE_USER, + }, + { + .config = (char *) "krava13", + .val.num = 2, + .type_val = PARSE_EVENTS__TERM_TYPE_NUM, + .type_term = PARSE_EVENTS__TERM_TYPE_USER, + }, + { + .config = (char *) "krava21", + .val.num = 119, + .type_val = PARSE_EVENTS__TERM_TYPE_NUM, + .type_term = PARSE_EVENTS__TERM_TYPE_USER, + }, + { + .config = (char *) "krava22", + .val.num = 11, + .type_val = PARSE_EVENTS__TERM_TYPE_NUM, + .type_term = PARSE_EVENTS__TERM_TYPE_USER, + }, + { + .config = (char *) "krava23", + .val.num = 2, + .type_val = PARSE_EVENTS__TERM_TYPE_NUM, + .type_term = PARSE_EVENTS__TERM_TYPE_USER, + }, +}; +#define TERMS_CNT (sizeof(test_terms) / sizeof(struct parse_events__term)) + +/* + * Prepare format directory data, exported by kernel + * at /sys/bus/event_source/devices/<dev>/format. + */ +static char *test_format_dir_get(void) +{ + static char dir[PATH_MAX]; + unsigned int i; + + snprintf(dir, PATH_MAX, "/tmp/perf-pmu-test-format-XXXXXX"); + if (!mkdtemp(dir)) + return NULL; + + for (i = 0; i < TEST_FORMATS_CNT; i++) { + static char name[PATH_MAX]; + struct test_format *format = &test_formats[i]; + FILE *file; + + snprintf(name, PATH_MAX, "%s/%s", dir, format->name); + + file = fopen(name, "w"); + if (!file) + return NULL; + + if (1 != fwrite(format->value, strlen(format->value), 1, file)) + break; + + fclose(file); + } + + return dir; +} + +/* Cleanup format directory. */ +static int test_format_dir_put(char *dir) +{ + char buf[PATH_MAX]; + snprintf(buf, PATH_MAX, "rm -f %s/*\n", dir); + if (system(buf)) + return -1; + + snprintf(buf, PATH_MAX, "rmdir %s\n", dir); + return system(buf); +} + +static struct list_head *test_terms_list(void) +{ + static LIST_HEAD(terms); + unsigned int i; + + for (i = 0; i < TERMS_CNT; i++) + list_add_tail(&test_terms[i].list, &terms); + + return &terms; +} + +#undef TERMS_CNT + +int perf_pmu__test(void) +{ + char *format = test_format_dir_get(); + LIST_HEAD(formats); + struct list_head *terms = test_terms_list(); + int ret; + + if (!format) + return -EINVAL; + + do { + struct perf_event_attr attr; + + memset(&attr, 0, sizeof(attr)); + + ret = pmu_format_parse(format, &formats); + if (ret) + break; + + ret = pmu_config(&formats, &attr, terms); + if (ret) + break; + + ret = -EINVAL; + + if (attr.config != 0xc00000000002a823) + break; + if (attr.config1 != 0x8000400000000145) + break; + if (attr.config2 != 0x0400000020041d07) + break; + + ret = 0; + } while (0); + + test_format_dir_put(format); + return ret; +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h new file mode 100644 index 000000000000..535f2c5258ab --- /dev/null +++ b/tools/perf/util/pmu.h @@ -0,0 +1,50 @@ +#ifndef __PMU_H +#define __PMU_H + +#include <linux/bitops.h> +#include "../../../include/linux/perf_event.h" + +enum { + PERF_PMU_FORMAT_VALUE_CONFIG, + PERF_PMU_FORMAT_VALUE_CONFIG1, + PERF_PMU_FORMAT_VALUE_CONFIG2, +}; + +#define PERF_PMU_FORMAT_BITS 64 + +struct perf_pmu__format { + char *name; + int value; + DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); + struct list_head list; +}; + +struct perf_pmu__alias { + char *name; + struct list_head terms; + struct list_head list; +}; + +struct perf_pmu { + char *name; + __u32 type; + struct list_head format; + struct list_head aliases; + struct list_head list; +}; + +struct perf_pmu *perf_pmu__find(char *name); +int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, + struct list_head *head_terms); +int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms); +struct list_head *perf_pmu__alias(struct perf_pmu *pmu, + struct list_head *head_terms); +int perf_pmu_wrap(void); +void perf_pmu_error(struct list_head *list, char *name, char const *msg); + +int perf_pmu__new_format(struct list_head *list, char *name, + int config, unsigned long *bits); +void perf_pmu__set_format(unsigned long *bits, long from, long to); + +int perf_pmu__test(void); +#endif /* __PMU_H */ diff --git a/tools/perf/util/pmu.l b/tools/perf/util/pmu.l new file mode 100644 index 000000000000..a15d9fbd7c0e --- /dev/null +++ b/tools/perf/util/pmu.l @@ -0,0 +1,43 @@ +%option prefix="perf_pmu_" + +%{ +#include <stdlib.h> +#include <linux/bitops.h> +#include "pmu.h" +#include "pmu-bison.h" + +static int value(int base) +{ + long num; + + errno = 0; + num = strtoul(perf_pmu_text, NULL, base); + if (errno) + return PP_ERROR; + + perf_pmu_lval.num = num; + return PP_VALUE; +} + +%} + +num_dec [0-9]+ + +%% + +{num_dec} { return value(10); } +config { return PP_CONFIG; } +config1 { return PP_CONFIG1; } +config2 { return PP_CONFIG2; } +- { return '-'; } +: { return ':'; } +, { return ','; } +. { ; } +\n { ; } + +%% + +int perf_pmu_wrap(void) +{ + return 1; +} diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y new file mode 100644 index 000000000000..20ea77e93169 --- /dev/null +++ b/tools/perf/util/pmu.y @@ -0,0 +1,93 @@ + +%name-prefix "perf_pmu_" +%parse-param {struct list_head *format} +%parse-param {char *name} + +%{ + +#include <linux/compiler.h> +#include <linux/list.h> +#include <linux/bitmap.h> +#include <string.h> +#include "pmu.h" + +extern int perf_pmu_lex (void); + +#define ABORT_ON(val) \ +do { \ + if (val) \ + YYABORT; \ +} while (0) + +%} + +%token PP_CONFIG PP_CONFIG1 PP_CONFIG2 +%token PP_VALUE PP_ERROR +%type <num> PP_VALUE +%type <bits> bit_term +%type <bits> bits + +%union +{ + unsigned long num; + DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); +} + +%% + +format: +format format_term +| +format_term + +format_term: +PP_CONFIG ':' bits +{ + ABORT_ON(perf_pmu__new_format(format, name, + PERF_PMU_FORMAT_VALUE_CONFIG, + $3)); +} +| +PP_CONFIG1 ':' bits +{ + ABORT_ON(perf_pmu__new_format(format, name, + PERF_PMU_FORMAT_VALUE_CONFIG1, + $3)); +} +| +PP_CONFIG2 ':' bits +{ + ABORT_ON(perf_pmu__new_format(format, name, + PERF_PMU_FORMAT_VALUE_CONFIG2, + $3)); +} + +bits: +bits ',' bit_term +{ + bitmap_or($$, $1, $3, 64); +} +| +bit_term +{ + memcpy($$, $1, sizeof($1)); +} + +bit_term: +PP_VALUE '-' PP_VALUE +{ + perf_pmu__set_format($$, $1, $3); +} +| +PP_VALUE +{ + perf_pmu__set_format($$, $1, 0); +} + +%% + +void perf_pmu_error(struct list_head *list __used, + char *name __used, + char const *msg __used) +{ +} diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 1c7bfa5fe0a8..0dda25d82d06 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -19,7 +19,6 @@ * */ -#define _GNU_SOURCE #include <sys/utsname.h> #include <sys/types.h> #include <sys/stat.h> @@ -33,10 +32,8 @@ #include <limits.h> #include <elf.h> -#undef _GNU_SOURCE #include "util.h" #include "event.h" -#include "string.h" #include "strlist.h" #include "debug.h" #include "cache.h" @@ -47,6 +44,7 @@ #include "trace-event.h" /* For __unused */ #include "probe-event.h" #include "probe-finder.h" +#include "session.h" #define MAX_CMDLEN 256 #define MAX_PROBE_ARGS 128 @@ -73,6 +71,8 @@ static int e_snprintf(char *str, size_t size, const char *format, ...) } static char *synthesize_perf_probe_point(struct perf_probe_point *pp); +static int convert_name_to_addr(struct perf_probe_event *pev, + const char *exec); static struct machine machine; /* Initialize symbol maps and path of vmlinux/modules */ @@ -173,6 +173,34 @@ const char *kernel_get_module_path(const char *module) return (dso) ? dso->long_name : NULL; } +static int init_user_exec(void) +{ + int ret = 0; + + symbol_conf.try_vmlinux_path = false; + symbol_conf.sort_by_name = true; + ret = symbol__init(); + + if (ret < 0) + pr_debug("Failed to init symbol map.\n"); + + return ret; +} + +static int convert_to_perf_probe_point(struct probe_trace_point *tp, + struct perf_probe_point *pp) +{ + pp->function = strdup(tp->symbol); + + if (pp->function == NULL) + return -ENOMEM; + + pp->offset = tp->offset; + pp->retprobe = tp->retprobe; + + return 0; +} + #ifdef DWARF_SUPPORT /* Open new debuginfo of given module */ static struct debuginfo *open_debuginfo(const char *module) @@ -227,10 +255,7 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, if (ret <= 0) { pr_debug("Failed to find corresponding probes from " "debuginfo. Use kprobe event information.\n"); - pp->function = strdup(tp->symbol); - if (pp->function == NULL) - return -ENOMEM; - pp->offset = tp->offset; + return convert_to_perf_probe_point(tp, pp); } pp->retprobe = tp->retprobe; @@ -275,12 +300,23 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs, /* Try to find perf_probe_event with debuginfo */ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event **tevs, - int max_tevs, const char *module) + int max_tevs, const char *target) { bool need_dwarf = perf_probe_event_need_dwarf(pev); - struct debuginfo *dinfo = open_debuginfo(module); + struct debuginfo *dinfo; int ntevs, ret = 0; + if (pev->uprobes) { + if (need_dwarf) { + pr_warning("Debuginfo-analysis is not yet supported" + " with -x/--exec option.\n"); + return -ENOSYS; + } + return convert_name_to_addr(pev, target); + } + + dinfo = open_debuginfo(target); + if (!dinfo) { if (need_dwarf) { pr_warning("Failed to open debuginfo file.\n"); @@ -297,9 +333,9 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, if (ntevs > 0) { /* Succeeded to find trace events */ pr_debug("find %d probe_trace_events.\n", ntevs); - if (module) + if (target) ret = add_module_to_probe_trace_events(*tevs, ntevs, - module); + target); return ret < 0 ? ret : ntevs; } @@ -606,23 +642,22 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, pr_err("Failed to find symbol %s in kernel.\n", tp->symbol); return -ENOENT; } - pp->function = strdup(tp->symbol); - if (pp->function == NULL) - return -ENOMEM; - pp->offset = tp->offset; - pp->retprobe = tp->retprobe; - return 0; + return convert_to_perf_probe_point(tp, pp); } static int try_to_find_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event **tevs __unused, - int max_tevs __unused, const char *mod __unused) + int max_tevs __unused, const char *target) { if (perf_probe_event_need_dwarf(pev)) { pr_warning("Debuginfo-analysis is not supported.\n"); return -ENOSYS; } + + if (pev->uprobes) + return convert_name_to_addr(pev, target); + return 0; } @@ -1344,11 +1379,18 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev) if (buf == NULL) return NULL; - len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s%s%s+%lu", - tp->retprobe ? 'r' : 'p', - tev->group, tev->event, - tp->module ?: "", tp->module ? ":" : "", - tp->symbol, tp->offset); + if (tev->uprobes) + len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s:%s", + tp->retprobe ? 'r' : 'p', + tev->group, tev->event, + tp->module, tp->symbol); + else + len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s%s%s+%lu", + tp->retprobe ? 'r' : 'p', + tev->group, tev->event, + tp->module ?: "", tp->module ? ":" : "", + tp->symbol, tp->offset); + if (len <= 0) goto error; @@ -1367,7 +1409,7 @@ error: } static int convert_to_perf_probe_event(struct probe_trace_event *tev, - struct perf_probe_event *pev) + struct perf_probe_event *pev, bool is_kprobe) { char buf[64] = ""; int i, ret; @@ -1379,7 +1421,11 @@ static int convert_to_perf_probe_event(struct probe_trace_event *tev, return -ENOMEM; /* Convert trace_point to probe_point */ - ret = kprobe_convert_to_perf_probe(&tev->point, &pev->point); + if (is_kprobe) + ret = kprobe_convert_to_perf_probe(&tev->point, &pev->point); + else + ret = convert_to_perf_probe_point(&tev->point, &pev->point); + if (ret < 0) return ret; @@ -1475,7 +1521,26 @@ static void clear_probe_trace_event(struct probe_trace_event *tev) memset(tev, 0, sizeof(*tev)); } -static int open_kprobe_events(bool readwrite) +static void print_warn_msg(const char *file, bool is_kprobe) +{ + + if (errno == ENOENT) { + const char *config; + + if (!is_kprobe) + config = "CONFIG_UPROBE_EVENTS"; + else + config = "CONFIG_KPROBE_EVENTS"; + + pr_warning("%s file does not exist - please rebuild kernel" + " with %s.\n", file, config); + } else + pr_warning("Failed to open %s file: %s\n", file, + strerror(errno)); +} + +static int open_probe_events(const char *trace_file, bool readwrite, + bool is_kprobe) { char buf[PATH_MAX]; const char *__debugfs; @@ -1487,27 +1552,31 @@ static int open_kprobe_events(bool readwrite) return -ENOENT; } - ret = e_snprintf(buf, PATH_MAX, "%stracing/kprobe_events", __debugfs); + ret = e_snprintf(buf, PATH_MAX, "%s/%s", __debugfs, trace_file); if (ret >= 0) { pr_debug("Opening %s write=%d\n", buf, readwrite); if (readwrite && !probe_event_dry_run) ret = open(buf, O_RDWR, O_APPEND); else ret = open(buf, O_RDONLY, 0); - } - if (ret < 0) { - if (errno == ENOENT) - pr_warning("kprobe_events file does not exist - please" - " rebuild kernel with CONFIG_KPROBE_EVENT.\n"); - else - pr_warning("Failed to open kprobe_events file: %s\n", - strerror(errno)); + if (ret < 0) + print_warn_msg(buf, is_kprobe); } return ret; } -/* Get raw string list of current kprobe_events */ +static int open_kprobe_events(bool readwrite) +{ + return open_probe_events("tracing/kprobe_events", readwrite, true); +} + +static int open_uprobe_events(bool readwrite) +{ + return open_probe_events("tracing/uprobe_events", readwrite, false); +} + +/* Get raw string list of current kprobe_events or uprobe_events */ static struct strlist *get_probe_trace_command_rawlist(int fd) { int ret, idx; @@ -1572,36 +1641,26 @@ static int show_perf_probe_event(struct perf_probe_event *pev) return ret; } -/* List up current perf-probe events */ -int show_perf_probe_events(void) +static int __show_perf_probe_events(int fd, bool is_kprobe) { - int fd, ret; + int ret = 0; struct probe_trace_event tev; struct perf_probe_event pev; struct strlist *rawlist; struct str_node *ent; - setup_pager(); - ret = init_vmlinux(); - if (ret < 0) - return ret; - memset(&tev, 0, sizeof(tev)); memset(&pev, 0, sizeof(pev)); - fd = open_kprobe_events(false); - if (fd < 0) - return fd; - rawlist = get_probe_trace_command_rawlist(fd); - close(fd); if (!rawlist) return -ENOENT; strlist__for_each(ent, rawlist) { ret = parse_probe_trace_command(ent->s, &tev); if (ret >= 0) { - ret = convert_to_perf_probe_event(&tev, &pev); + ret = convert_to_perf_probe_event(&tev, &pev, + is_kprobe); if (ret >= 0) ret = show_perf_probe_event(&pev); } @@ -1615,6 +1674,33 @@ int show_perf_probe_events(void) return ret; } +/* List up current perf-probe events */ +int show_perf_probe_events(void) +{ + int fd, ret; + + setup_pager(); + fd = open_kprobe_events(false); + + if (fd < 0) + return fd; + + ret = init_vmlinux(); + if (ret < 0) + return ret; + + ret = __show_perf_probe_events(fd, true); + close(fd); + + fd = open_uprobe_events(false); + if (fd >= 0) { + ret = __show_perf_probe_events(fd, false); + close(fd); + } + + return ret; +} + /* Get current perf-probe event names */ static struct strlist *get_probe_trace_event_names(int fd, bool include_group) { @@ -1720,7 +1806,11 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, const char *event, *group; struct strlist *namelist; - fd = open_kprobe_events(true); + if (pev->uprobes) + fd = open_uprobe_events(true); + else + fd = open_kprobe_events(true); + if (fd < 0) return fd; /* Get current event names */ @@ -1731,7 +1821,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, } ret = 0; - printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":"); + printf("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); for (i = 0; i < ntevs; i++) { tev = &tevs[i]; if (pev->event) @@ -1786,7 +1876,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, if (ret >= 0) { /* Show how to use the event. */ - printf("\nYou can now use it on all perf tools, such as:\n\n"); + printf("\nYou can now use it in all perf tools, such as:\n\n"); printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, tev->event); } @@ -1798,14 +1888,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, static int convert_to_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event **tevs, - int max_tevs, const char *module) + int max_tevs, const char *target) { struct symbol *sym; int ret = 0, i; struct probe_trace_event *tev; /* Convert perf_probe_event with debuginfo */ - ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module); + ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target); if (ret != 0) return ret; /* Found in debuginfo or got an error */ @@ -1821,8 +1911,8 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, goto error; } - if (module) { - tev->point.module = strdup(module); + if (target) { + tev->point.module = strdup(target); if (tev->point.module == NULL) { ret = -ENOMEM; goto error; @@ -1832,6 +1922,8 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, tev->point.offset = pev->point.offset; tev->point.retprobe = pev->point.retprobe; tev->nargs = pev->nargs; + tev->uprobes = pev->uprobes; + if (tev->nargs) { tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); @@ -1862,6 +1954,9 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, } } + if (pev->uprobes) + return 1; + /* Currently just checking function name from symbol map */ sym = __find_kernel_function_by_name(tev->point.symbol, NULL); if (!sym) { @@ -1869,6 +1964,12 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, tev->point.symbol); ret = -ENOENT; goto error; + } else if (tev->point.offset > sym->end - sym->start) { + pr_warning("Offset specified is greater than size of %s\n", + tev->point.symbol); + ret = -ENOENT; + goto error; + } return 1; @@ -1886,17 +1987,23 @@ struct __event_package { }; int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, - int max_tevs, const char *module, bool force_add) + int max_tevs, const char *target, bool force_add) { int i, j, ret; struct __event_package *pkgs; + ret = 0; pkgs = zalloc(sizeof(struct __event_package) * npevs); + if (pkgs == NULL) return -ENOMEM; - /* Init vmlinux path */ - ret = init_vmlinux(); + if (!pevs->uprobes) + /* Init vmlinux path */ + ret = init_vmlinux(); + else + ret = init_user_exec(); + if (ret < 0) { free(pkgs); return ret; @@ -1909,7 +2016,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, ret = convert_to_probe_trace_events(pkgs[i].pev, &pkgs[i].tevs, max_tevs, - module); + target); if (ret < 0) goto end; pkgs[i].ntevs = ret; @@ -1956,33 +2063,27 @@ static int __del_trace_probe_event(int fd, struct str_node *ent) pr_debug("Writing event: %s\n", buf); ret = write(fd, buf, strlen(buf)); - if (ret < 0) + if (ret < 0) { + ret = -errno; goto error; + } - printf("Remove event: %s\n", ent->s); + printf("Removed event: %s\n", ent->s); return 0; error: pr_warning("Failed to delete event: %s\n", strerror(-ret)); return ret; } -static int del_trace_probe_event(int fd, const char *group, - const char *event, struct strlist *namelist) +static int del_trace_probe_event(int fd, const char *buf, + struct strlist *namelist) { - char buf[128]; struct str_node *ent, *n; - int found = 0, ret = 0; - - ret = e_snprintf(buf, 128, "%s:%s", group, event); - if (ret < 0) { - pr_err("Failed to copy event.\n"); - return ret; - } + int ret = -1; if (strpbrk(buf, "*?")) { /* Glob-exp */ strlist__for_each_safe(ent, n, namelist) if (strglobmatch(ent->s, buf)) { - found++; ret = __del_trace_probe_event(fd, ent); if (ret < 0) break; @@ -1991,40 +2092,43 @@ static int del_trace_probe_event(int fd, const char *group, } else { ent = strlist__find(namelist, buf); if (ent) { - found++; ret = __del_trace_probe_event(fd, ent); if (ret >= 0) strlist__remove(namelist, ent); } } - if (found == 0 && ret >= 0) - pr_info("Info: Event \"%s\" does not exist.\n", buf); return ret; } int del_perf_probe_events(struct strlist *dellist) { - int fd, ret = 0; + int ret = -1, ufd = -1, kfd = -1; + char buf[128]; const char *group, *event; char *p, *str; struct str_node *ent; - struct strlist *namelist; - - fd = open_kprobe_events(true); - if (fd < 0) - return fd; + struct strlist *namelist = NULL, *unamelist = NULL; /* Get current event names */ - namelist = get_probe_trace_event_names(fd, true); - if (namelist == NULL) - return -EINVAL; + kfd = open_kprobe_events(true); + if (kfd < 0) + return kfd; + + namelist = get_probe_trace_event_names(kfd, true); + ufd = open_uprobe_events(true); + + if (ufd >= 0) + unamelist = get_probe_trace_event_names(ufd, true); + + if (namelist == NULL && unamelist == NULL) + goto error; strlist__for_each(ent, dellist) { str = strdup(ent->s); if (str == NULL) { ret = -ENOMEM; - break; + goto error; } pr_debug("Parsing: %s\n", str); p = strchr(str, ':'); @@ -2036,17 +2140,42 @@ int del_perf_probe_events(struct strlist *dellist) group = "*"; event = str; } + + ret = e_snprintf(buf, 128, "%s:%s", group, event); + if (ret < 0) { + pr_err("Failed to copy event."); + free(str); + goto error; + } + pr_debug("Group: %s, Event: %s\n", group, event); - ret = del_trace_probe_event(fd, group, event, namelist); + + if (namelist) + ret = del_trace_probe_event(kfd, buf, namelist); + + if (unamelist && ret != 0) + ret = del_trace_probe_event(ufd, buf, unamelist); + + if (ret != 0) + pr_info("Info: Event \"%s\" does not exist.\n", buf); + free(str); - if (ret < 0) - break; } - strlist__delete(namelist); - close(fd); + +error: + if (kfd >= 0) { + strlist__delete(namelist); + close(kfd); + } + + if (ufd >= 0) { + strlist__delete(unamelist); + close(ufd); + } return ret; } + /* TODO: don't use a global variable for filter ... */ static struct strfilter *available_func_filter; @@ -2063,13 +2192,24 @@ static int filter_available_functions(struct map *map __unused, return 1; } -int show_available_funcs(const char *module, struct strfilter *_filter) +static int __show_available_funcs(struct map *map) +{ + if (map__load(map, filter_available_functions)) { + pr_err("Failed to load map.\n"); + return -EINVAL; + } + if (!dso__sorted_by_name(map->dso, map->type)) + dso__sort_by_name(map->dso, map->type); + + dso__fprintf_symbols_by_name(map->dso, map->type, stdout); + return 0; +} + +static int available_kernel_funcs(const char *module) { struct map *map; int ret; - setup_pager(); - ret = init_vmlinux(); if (ret < 0) return ret; @@ -2079,14 +2219,125 @@ int show_available_funcs(const char *module, struct strfilter *_filter) pr_err("Failed to find %s map.\n", (module) ? : "kernel"); return -EINVAL; } + return __show_available_funcs(map); +} + +static int available_user_funcs(const char *target) +{ + struct map *map; + int ret; + + ret = init_user_exec(); + if (ret < 0) + return ret; + + map = dso__new_map(target); + ret = __show_available_funcs(map); + dso__delete(map->dso); + map__delete(map); + return ret; +} + +int show_available_funcs(const char *target, struct strfilter *_filter, + bool user) +{ + setup_pager(); available_func_filter = _filter; + + if (!user) + return available_kernel_funcs(target); + + return available_user_funcs(target); +} + +/* + * uprobe_events only accepts address: + * Convert function and any offset to address + */ +static int convert_name_to_addr(struct perf_probe_event *pev, const char *exec) +{ + struct perf_probe_point *pp = &pev->point; + struct symbol *sym; + struct map *map = NULL; + char *function = NULL, *name = NULL; + int ret = -EINVAL; + unsigned long long vaddr = 0; + + if (!pp->function) { + pr_warning("No function specified for uprobes"); + goto out; + } + + function = strdup(pp->function); + if (!function) { + pr_warning("Failed to allocate memory by strdup.\n"); + ret = -ENOMEM; + goto out; + } + + name = realpath(exec, NULL); + if (!name) { + pr_warning("Cannot find realpath for %s.\n", exec); + goto out; + } + map = dso__new_map(name); + if (!map) { + pr_warning("Cannot find appropriate DSO for %s.\n", exec); + goto out; + } + available_func_filter = strfilter__new(function, NULL); if (map__load(map, filter_available_functions)) { pr_err("Failed to load map.\n"); - return -EINVAL; + goto out; } - if (!dso__sorted_by_name(map->dso, map->type)) - dso__sort_by_name(map->dso, map->type); - dso__fprintf_symbols_by_name(map->dso, map->type, stdout); - return 0; + sym = map__find_symbol_by_name(map, function, NULL); + if (!sym) { + pr_warning("Cannot find %s in DSO %s\n", function, exec); + goto out; + } + + if (map->start > sym->start) + vaddr = map->start; + vaddr += sym->start + pp->offset + map->pgoff; + pp->offset = 0; + + if (!pev->event) { + pev->event = function; + function = NULL; + } + if (!pev->group) { + char *ptr1, *ptr2; + + pev->group = zalloc(sizeof(char *) * 64); + ptr1 = strdup(basename(exec)); + if (ptr1) { + ptr2 = strpbrk(ptr1, "-._"); + if (ptr2) + *ptr2 = '\0'; + e_snprintf(pev->group, 64, "%s_%s", PERFPROBE_GROUP, + ptr1); + free(ptr1); + } + } + free(pp->function); + pp->function = zalloc(sizeof(char *) * MAX_PROBE_ARGS); + if (!pp->function) { + ret = -ENOMEM; + pr_warning("Failed to allocate memory by zalloc.\n"); + goto out; + } + e_snprintf(pp->function, MAX_PROBE_ARGS, "0x%llx", vaddr); + ret = 0; + +out: + if (map) { + dso__delete(map->dso); + map__delete(map); + } + if (function) + free(function); + if (name) + free(name); + return ret; } diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index a7dee835f49c..f9f3de8b4220 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -7,7 +7,7 @@ extern bool probe_event_dry_run; -/* kprobe-tracer tracing point */ +/* kprobe-tracer and uprobe-tracer tracing point */ struct probe_trace_point { char *symbol; /* Base symbol */ char *module; /* Module name */ @@ -21,7 +21,7 @@ struct probe_trace_arg_ref { long offset; /* Offset value */ }; -/* kprobe-tracer tracing argument */ +/* kprobe-tracer and uprobe-tracer tracing argument */ struct probe_trace_arg { char *name; /* Argument name */ char *value; /* Base value */ @@ -29,12 +29,13 @@ struct probe_trace_arg { struct probe_trace_arg_ref *ref; /* Referencing offset */ }; -/* kprobe-tracer tracing event (point + arg) */ +/* kprobe-tracer and uprobe-tracer tracing event (point + arg) */ struct probe_trace_event { char *event; /* Event name */ char *group; /* Group name */ struct probe_trace_point point; /* Trace point */ int nargs; /* Number of args */ + bool uprobes; /* uprobes only */ struct probe_trace_arg *args; /* Arguments */ }; @@ -70,6 +71,7 @@ struct perf_probe_event { char *group; /* Group name */ struct perf_probe_point point; /* Probe point */ int nargs; /* Number of arguments */ + bool uprobes; struct perf_probe_arg *args; /* Arguments */ }; @@ -129,8 +131,8 @@ extern int show_line_range(struct line_range *lr, const char *module); extern int show_available_vars(struct perf_probe_event *pevs, int npevs, int max_probe_points, const char *module, struct strfilter *filter, bool externs); -extern int show_available_funcs(const char *module, struct strfilter *filter); - +extern int show_available_funcs(const char *module, struct strfilter *filter, + bool user); /* Maximum index number of event-name postfix */ #define MAX_EVENT_INDEX 1024 diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 5d732621a462..d448984ed789 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -30,7 +30,6 @@ #include <stdlib.h> #include <string.h> #include <stdarg.h> -#include <ctype.h> #include <dwarf-regs.h> #include <linux/bitops.h> @@ -672,7 +671,7 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr, bool retprobe, struct probe_trace_point *tp) { - Dwarf_Addr eaddr; + Dwarf_Addr eaddr, highaddr; const char *name; /* Copy the name of probe point */ @@ -683,6 +682,16 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr, dwarf_diename(sp_die)); return -ENOENT; } + if (dwarf_highpc(sp_die, &highaddr) != 0) { + pr_warning("Failed to get end address of %s\n", + dwarf_diename(sp_die)); + return -ENOENT; + } + if (paddr > highaddr) { + pr_warning("Offset specified is greater than size of %s\n", + dwarf_diename(sp_die)); + return -EINVAL; + } tp->symbol = strdup(name); if (tp->symbol == NULL) return -ENOMEM; @@ -963,10 +972,12 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) struct dwarf_callback_param *param = data; struct probe_finder *pf = param->data; struct perf_probe_point *pp = &pf->pev->point; + Dwarf_Attribute attr; /* Check tag and diename */ if (dwarf_tag(sp_die) != DW_TAG_subprogram || - !die_compare_name(sp_die, pp->function)) + !die_compare_name(sp_die, pp->function) || + dwarf_attr(sp_die, DW_AT_declaration, &attr)) return DWARF_CB_OK; /* Check declared file */ diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 1132c8f0ce89..17e94d0c36f9 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -5,7 +5,6 @@ #include "util.h" #include "probe-event.h" -#define MAX_PATH_LEN 256 #define MAX_PROBE_BUFFER 1024 #define MAX_PROBES 128 diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources new file mode 100644 index 000000000000..213362850abd --- /dev/null +++ b/tools/perf/util/python-ext-sources @@ -0,0 +1,21 @@ +# +# List of files needed by perf python extention +# +# Each source file must be placed on its own line so that it can be +# processed by Makefile and util/setup.py accordingly. +# + +util/python.c +util/ctype.c +util/evlist.c +util/evsel.c +util/cpumap.c +util/hweight.c +util/thread_map.c +util/util.c +util/xyarray.c +util/cgroup.c +util/debugfs.c +util/rblist.c +util/strlist.c +../../lib/rbtree.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 7624324efad4..0688bfb6d280 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -425,14 +425,14 @@ struct pyrf_thread_map { static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = { "pid", "tid", NULL }; - int pid = -1, tid = -1; + static char *kwlist[] = { "pid", "tid", "uid", NULL }; + int pid = -1, tid = -1, uid = UINT_MAX; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", - kwlist, &pid, &tid)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii", + kwlist, &pid, &tid, &uid)) return -1; - pthreads->threads = thread_map__new(pid, tid); + pthreads->threads = thread_map__new(pid, tid, uid); if (pthreads->threads == NULL) return -1; return 0; @@ -623,7 +623,11 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel, cpus = ((struct pyrf_cpu_map *)pcpus)->cpus; evsel->attr.inherit = inherit; - if (perf_evsel__open(evsel, cpus, threads, group) < 0) { + /* + * This will group just the fds for this single evsel, to group + * multiple events, use evlist.open(). + */ + if (perf_evsel__open(evsel, cpus, threads, group, NULL) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } @@ -793,17 +797,13 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, event = perf_evlist__mmap_read(evlist, cpu); if (event != NULL) { - struct perf_evsel *first; PyObject *pyevent = pyrf_event__new(event); struct pyrf_event *pevent = (struct pyrf_event *)pyevent; if (pyevent == NULL) return PyErr_NoMemory(); - first = list_entry(evlist->entries.next, struct perf_evsel, node); - err = perf_event__parse_sample(event, first->attr.sample_type, - perf_evsel__sample_size(first), - sample_id_all, &pevent->sample, false); + err = perf_evlist__parse_sample(evlist, event, &pevent->sample, false); if (err) return PyErr_Format(PyExc_OSError, "perf: can't parse sample, err=%d", err); @@ -814,6 +814,25 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, return Py_None; } +static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist, + PyObject *args, PyObject *kwargs) +{ + struct perf_evlist *evlist = &pevlist->evlist; + int group = 0; + static char *kwlist[] = { "group", NULL }; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist, &group)) + return NULL; + + if (perf_evlist__open(evlist, group) < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + + Py_INCREF(Py_None); + return Py_None; +} + static PyMethodDef pyrf_evlist__methods[] = { { .ml_name = "mmap", @@ -822,6 +841,12 @@ static PyMethodDef pyrf_evlist__methods[] = { .ml_doc = PyDoc_STR("mmap the file descriptor table.") }, { + .ml_name = "open", + .ml_meth = (PyCFunction)pyrf_evlist__open, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("open the file descriptors.") + }, + { .ml_name = "poll", .ml_meth = (PyCFunction)pyrf_evlist__poll, .ml_flags = METH_VARARGS | METH_KEYWORDS, diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c new file mode 100644 index 000000000000..0171fb611004 --- /dev/null +++ b/tools/perf/util/rblist.c @@ -0,0 +1,107 @@ +/* + * Based on strlist.c by: + * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com> + * + * Licensed under the GPLv2. + */ + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> + +#include "rblist.h" + +int rblist__add_node(struct rblist *rblist, const void *new_entry) +{ + struct rb_node **p = &rblist->entries.rb_node; + struct rb_node *parent = NULL, *new_node; + + while (*p != NULL) { + int rc; + + parent = *p; + + rc = rblist->node_cmp(parent, new_entry); + if (rc > 0) + p = &(*p)->rb_left; + else if (rc < 0) + p = &(*p)->rb_right; + else + return -EEXIST; + } + + new_node = rblist->node_new(rblist, new_entry); + if (new_node == NULL) + return -ENOMEM; + + rb_link_node(new_node, parent, p); + rb_insert_color(new_node, &rblist->entries); + ++rblist->nr_entries; + + return 0; +} + +void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node) +{ + rb_erase(rb_node, &rblist->entries); + rblist->node_delete(rblist, rb_node); +} + +struct rb_node *rblist__find(struct rblist *rblist, const void *entry) +{ + struct rb_node **p = &rblist->entries.rb_node; + struct rb_node *parent = NULL; + + while (*p != NULL) { + int rc; + + parent = *p; + + rc = rblist->node_cmp(parent, entry); + if (rc > 0) + p = &(*p)->rb_left; + else if (rc < 0) + p = &(*p)->rb_right; + else + return parent; + } + + return NULL; +} + +void rblist__init(struct rblist *rblist) +{ + if (rblist != NULL) { + rblist->entries = RB_ROOT; + rblist->nr_entries = 0; + } + + return; +} + +void rblist__delete(struct rblist *rblist) +{ + if (rblist != NULL) { + struct rb_node *pos, *next = rb_first(&rblist->entries); + + while (next) { + pos = next; + next = rb_next(pos); + rb_erase(pos, &rblist->entries); + rblist->node_delete(rblist, pos); + } + free(rblist); + } +} + +struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx) +{ + struct rb_node *node; + + for (node = rb_first(&rblist->entries); node; node = rb_next(node)) { + if (!idx--) + return node; + } + + return NULL; +} diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h new file mode 100644 index 000000000000..6d0cae5ae83d --- /dev/null +++ b/tools/perf/util/rblist.h @@ -0,0 +1,47 @@ +#ifndef __PERF_RBLIST_H +#define __PERF_RBLIST_H + +#include <linux/rbtree.h> +#include <stdbool.h> + +/* + * create node structs of the form: + * struct my_node { + * struct rb_node rb_node; + * ... my data ... + * }; + * + * create list structs of the form: + * struct mylist { + * struct rblist rblist; + * ... my data ... + * }; + */ + +struct rblist { + struct rb_root entries; + unsigned int nr_entries; + + int (*node_cmp)(struct rb_node *rbn, const void *entry); + struct rb_node *(*node_new)(struct rblist *rlist, const void *new_entry); + void (*node_delete)(struct rblist *rblist, struct rb_node *rb_node); +}; + +void rblist__init(struct rblist *rblist); +void rblist__delete(struct rblist *rblist); +int rblist__add_node(struct rblist *rblist, const void *new_entry); +void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node); +struct rb_node *rblist__find(struct rblist *rblist, const void *entry); +struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx); + +static inline bool rblist__empty(const struct rblist *rblist) +{ + return rblist->nr_entries == 0; +} + +static inline unsigned int rblist__nr_entries(const struct rblist *rblist) +{ + return rblist->nr_entries; +} + +#endif /* __PERF_RBLIST_H */ diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 74350ffb57fe..02dfa19a467f 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -27,7 +27,10 @@ #include "../../perf.h" #include "../util.h" +#include "../thread.h" +#include "../event.h" #include "../trace-event.h" +#include "../evsel.h" #include <EXTERN.h> #include <perl.h> @@ -53,7 +56,7 @@ INTERP my_perl; #define FTRACE_MAX_EVENT \ ((1 << (sizeof(unsigned short) * 8)) - 1) -struct event *events[FTRACE_MAX_EVENT]; +struct event_format *events[FTRACE_MAX_EVENT]; extern struct scripting_context *scripting_context; @@ -178,7 +181,7 @@ static void define_flag_field(const char *ev_name, LEAVE; } -static void define_event_symbols(struct event *event, +static void define_event_symbols(struct event_format *event, const char *ev_name, struct print_arg *args) { @@ -206,6 +209,12 @@ static void define_event_symbols(struct event *event, define_symbolic_values(args->symbol.symbols, ev_name, cur_field_name); break; + case PRINT_HEX: + define_event_symbols(event, ev_name, args->hex.field); + define_event_symbols(event, ev_name, args->hex.size); + break; + case PRINT_BSTRING: + case PRINT_DYNAMIC_ARRAY: case PRINT_STRING: break; case PRINT_TYPE: @@ -217,7 +226,9 @@ static void define_event_symbols(struct event *event, define_event_symbols(event, ev_name, args->op.left); define_event_symbols(event, ev_name, args->op.right); break; + case PRINT_FUNC: default: + pr_err("Unsupported print arg type\n"); /* we should warn... */ return; } @@ -226,15 +237,16 @@ static void define_event_symbols(struct event *event, define_event_symbols(event, ev_name, args->next); } -static inline struct event *find_cache_event(int type) +static inline +struct event_format *find_cache_event(struct pevent *pevent, int type) { static char ev_name[256]; - struct event *event; + struct event_format *event; if (events[type]) return events[type]; - events[type] = event = trace_find_event(type); + events[type] = event = pevent_find_event(pevent, type); if (!event) return NULL; @@ -245,17 +257,18 @@ static inline struct event *find_cache_event(int type) return event; } -static void perl_process_event(union perf_event *pevent __unused, - struct perf_sample *sample, - struct perf_evsel *evsel, - struct perf_session *session __unused, - struct thread *thread) +static void perl_process_tracepoint(union perf_event *perf_event __unused, + struct pevent *pevent, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine __unused, + struct thread *thread) { struct format_field *field; static char handler[256]; unsigned long long val; unsigned long s, ns; - struct event *event; + struct event_format *event; int type; int pid; int cpu = sample->cpu; @@ -265,13 +278,16 @@ static void perl_process_event(union perf_event *pevent __unused, dSP; - type = trace_parse_common_type(data); + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) + return; + + type = trace_parse_common_type(pevent, data); - event = find_cache_event(type); + event = find_cache_event(pevent, type); if (!event) die("ug! no event found for type %d", type); - pid = trace_parse_common_pid(data); + pid = trace_parse_common_pid(pevent, data); sprintf(handler, "%s::%s", event->system, event->name); @@ -304,7 +320,8 @@ static void perl_process_event(union perf_event *pevent __unused, offset = field->offset; XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); } else { /* FIELD_IS_NUMERIC */ - val = read_size(data + field->offset, field->size); + val = read_size(pevent, data + field->offset, + field->size); if (field->flags & FIELD_IS_SIGNED) { XPUSHs(sv_2mortal(newSViv(val))); } else { @@ -332,6 +349,43 @@ static void perl_process_event(union perf_event *pevent __unused, LEAVE; } +static void perl_process_event_generic(union perf_event *pevent __unused, + struct perf_sample *sample, + struct perf_evsel *evsel __unused, + struct machine *machine __unused, + struct thread *thread __unused) +{ + dSP; + + if (!get_cv("process_event", 0)) + return; + + ENTER; + SAVETMPS; + PUSHMARK(SP); + XPUSHs(sv_2mortal(newSVpvn((const char *)pevent, pevent->header.size))); + XPUSHs(sv_2mortal(newSVpvn((const char *)&evsel->attr, sizeof(evsel->attr)))); + XPUSHs(sv_2mortal(newSVpvn((const char *)sample, sizeof(*sample)))); + XPUSHs(sv_2mortal(newSVpvn((const char *)sample->raw_data, sample->raw_size))); + PUTBACK; + call_pv("process_event", G_SCALAR); + SPAGAIN; + PUTBACK; + FREETMPS; + LEAVE; +} + +static void perl_process_event(union perf_event *event, + struct pevent *pevent, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine, + struct thread *thread) +{ + perl_process_tracepoint(event, pevent, sample, evsel, machine, thread); + perl_process_event_generic(event, sample, evsel, machine, thread); +} + static void run_start_sub(void) { dSP; /* access to Perl stack */ @@ -402,9 +456,9 @@ static int perl_stop_script(void) return 0; } -static int perl_generate_script(const char *outfile) +static int perl_generate_script(struct pevent *pevent, const char *outfile) { - struct event *event = NULL; + struct event_format *event = NULL; struct format_field *f; char fname[PATH_MAX]; int not_first, count; @@ -449,7 +503,7 @@ static int perl_generate_script(const char *outfile) fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n"); fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n"); - while ((event = trace_find_next_event(event))) { + while ((event = trace_find_next_event(pevent, event))) { fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name); fprintf(ofp, "\tmy ("); @@ -553,7 +607,28 @@ static int perl_generate_script(const char *outfile) fprintf(ofp, "sub print_header\n{\n" "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n" "\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t " - "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}"); + "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}\n"); + + fprintf(ofp, + "\n# Packed byte string args of process_event():\n" + "#\n" + "# $event:\tunion perf_event\tutil/event.h\n" + "# $attr:\tstruct perf_event_attr\tlinux/perf_event.h\n" + "# $sample:\tstruct perf_sample\tutil/event.h\n" + "# $raw_data:\tperf_sample->raw_data\tutil/event.h\n" + "\n" + "sub process_event\n" + "{\n" + "\tmy ($event, $attr, $sample, $raw_data) = @_;\n" + "\n" + "\tmy @event\t= unpack(\"LSS\", $event);\n" + "\tmy @attr\t= unpack(\"LLQQQQQLLQQ\", $attr);\n" + "\tmy @sample\t= unpack(\"QLLQQQQQLL\", $sample);\n" + "\tmy @raw_data\t= unpack(\"C*\", $raw_data);\n" + "\n" + "\tuse Data::Dumper;\n" + "\tprint Dumper \\@event, \\@attr, \\@sample, \\@raw_data;\n" + "}\n"); fclose(ofp); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 6ccf70e8d8f2..ce4d1b0c3862 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -24,11 +24,12 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <ctype.h> #include <errno.h> #include "../../perf.h" #include "../util.h" +#include "../event.h" +#include "../thread.h" #include "../trace-event.h" PyMODINIT_FUNC initperf_trace_context(void); @@ -36,7 +37,7 @@ PyMODINIT_FUNC initperf_trace_context(void); #define FTRACE_MAX_EVENT \ ((1 << (sizeof(unsigned short) * 8)) - 1) -struct event *events[FTRACE_MAX_EVENT]; +struct event_format *events[FTRACE_MAX_EVENT]; #define MAX_FIELDS 64 #define N_COMMON_FIELDS 7 @@ -135,7 +136,7 @@ static void define_field(enum print_arg_type field_type, Py_DECREF(t); } -static void define_event_symbols(struct event *event, +static void define_event_symbols(struct event_format *event, const char *ev_name, struct print_arg *args) { @@ -165,6 +166,10 @@ static void define_event_symbols(struct event *event, define_values(PRINT_SYMBOL, args->symbol.symbols, ev_name, cur_field_name); break; + case PRINT_HEX: + define_event_symbols(event, ev_name, args->hex.field); + define_event_symbols(event, ev_name, args->hex.size); + break; case PRINT_STRING: break; case PRINT_TYPE: @@ -177,6 +182,10 @@ static void define_event_symbols(struct event *event, define_event_symbols(event, ev_name, args->op.right); break; default: + /* gcc warns for these? */ + case PRINT_BSTRING: + case PRINT_DYNAMIC_ARRAY: + case PRINT_FUNC: /* we should warn... */ return; } @@ -185,15 +194,16 @@ static void define_event_symbols(struct event *event, define_event_symbols(event, ev_name, args->next); } -static inline struct event *find_cache_event(int type) +static inline +struct event_format *find_cache_event(struct pevent *pevent, int type) { static char ev_name[256]; - struct event *event; + struct event_format *event; if (events[type]) return events[type]; - events[type] = event = trace_find_event(type); + events[type] = event = pevent_find_event(pevent, type); if (!event) return NULL; @@ -204,10 +214,11 @@ static inline struct event *find_cache_event(int type) return event; } -static void python_process_event(union perf_event *pevent __unused, +static void python_process_event(union perf_event *perf_event __unused, + struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel __unused, - struct perf_session *session __unused, + struct machine *machine __unused, struct thread *thread) { PyObject *handler, *retval, *context, *t, *obj, *dict = NULL; @@ -215,7 +226,7 @@ static void python_process_event(union perf_event *pevent __unused, struct format_field *field; unsigned long long val; unsigned long s, ns; - struct event *event; + struct event_format *event; unsigned n = 0; int type; int pid; @@ -228,13 +239,13 @@ static void python_process_event(union perf_event *pevent __unused, if (!t) Py_FatalError("couldn't create Python tuple"); - type = trace_parse_common_type(data); + type = trace_parse_common_type(pevent, data); - event = find_cache_event(type); + event = find_cache_event(pevent, type); if (!event) die("ug! no event found for type %d", type); - pid = trace_parse_common_pid(data); + pid = trace_parse_common_pid(pevent, data); sprintf(handler_name, "%s__%s", event->system, event->name); @@ -279,7 +290,8 @@ static void python_process_event(union perf_event *pevent __unused, offset = field->offset; obj = PyString_FromString((char *)data + offset); } else { /* FIELD_IS_NUMERIC */ - val = read_size(data + field->offset, field->size); + val = read_size(pevent, data + field->offset, + field->size); if (field->flags & FIELD_IS_SIGNED) { if ((long long)val >= LONG_MIN && (long long)val <= LONG_MAX) @@ -433,9 +445,9 @@ out: return err; } -static int python_generate_script(const char *outfile) +static int python_generate_script(struct pevent *pevent, const char *outfile) { - struct event *event = NULL; + struct event_format *event = NULL; struct format_field *f; char fname[PATH_MAX]; int not_first, count; @@ -482,7 +494,7 @@ static int python_generate_script(const char *outfile) fprintf(ofp, "def trace_end():\n"); fprintf(ofp, "\tprint \"in trace_end\"\n\n"); - while ((event = trace_find_next_event(event))) { + while ((event = trace_find_next_event(pevent, event))) { fprintf(ofp, "def %s__%s(", event->system, event->name); fprintf(ofp, "event_name, "); fprintf(ofp, "context, "); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 72458d9da5b1..2437fb0b463a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -10,9 +10,11 @@ #include "evlist.h" #include "evsel.h" #include "session.h" +#include "tool.h" #include "sort.h" #include "util.h" #include "cpumap.h" +#include "event-parse.h" static int perf_session__open(struct perf_session *self, bool force) { @@ -23,7 +25,7 @@ static int perf_session__open(struct perf_session *self, bool force) self->fd = STDIN_FILENO; if (perf_session__read_header(self, self->fd) < 0) - pr_err("incompatible file format"); + pr_err("incompatible file format (rerun with -v to learn more)"); return 0; } @@ -55,7 +57,7 @@ static int perf_session__open(struct perf_session *self, bool force) } if (perf_session__read_header(self, self->fd) < 0) { - pr_err("incompatible file format"); + pr_err("incompatible file format (rerun with -v to learn more)"); goto out_close; } @@ -78,39 +80,12 @@ out_close: return -1; } -static void perf_session__id_header_size(struct perf_session *session) +void perf_session__set_id_hdr_size(struct perf_session *session) { - struct perf_sample *data; - u64 sample_type = session->sample_type; - u16 size = 0; + u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist); - if (!session->sample_id_all) - goto out; - - if (sample_type & PERF_SAMPLE_TID) - size += sizeof(data->tid) * 2; - - if (sample_type & PERF_SAMPLE_TIME) - size += sizeof(data->time); - - if (sample_type & PERF_SAMPLE_ID) - size += sizeof(data->id); - - if (sample_type & PERF_SAMPLE_STREAM_ID) - size += sizeof(data->stream_id); - - if (sample_type & PERF_SAMPLE_CPU) - size += sizeof(data->cpu) * 2; -out: - session->id_hdr_size = size; -} - -void perf_session__update_sample_type(struct perf_session *self) -{ - self->sample_type = perf_evlist__sample_type(self->evlist); - self->sample_size = __perf_evsel__sample_size(self->sample_type); - self->sample_id_all = perf_evlist__sample_id_all(self->evlist); - perf_session__id_header_size(self); + session->host_machine.id_hdr_size = id_hdr_size; + machines__set_id_hdr_size(&session->machines, id_hdr_size); } int perf_session__create_kernel_maps(struct perf_session *self) @@ -130,18 +105,26 @@ static void perf_session__destroy_kernel_maps(struct perf_session *self) struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe, - struct perf_event_ops *ops) + struct perf_tool *tool) { - size_t len = filename ? strlen(filename) + 1 : 0; - struct perf_session *self = zalloc(sizeof(*self) + len); + struct perf_session *self; + struct stat st; + size_t len; + + if (!filename || !strlen(filename)) { + if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) + filename = "-"; + else + filename = "perf.data"; + } + + len = strlen(filename); + self = zalloc(sizeof(*self) + len); if (self == NULL) goto out; memcpy(self->filename, filename, len); - self->threads = RB_ROOT; - INIT_LIST_HEAD(&self->dead_threads); - self->last_match = NULL; /* * On 64bit we can mmap the data file in one go. No need for tiny mmap * slices. On 32bit we use 32MB. @@ -157,11 +140,12 @@ struct perf_session *perf_session__new(const char *filename, int mode, INIT_LIST_HEAD(&self->ordered_samples.sample_cache); INIT_LIST_HEAD(&self->ordered_samples.to_free); machine__init(&self->host_machine, "", HOST_KERNEL_ID); + hists__init(&self->hists); if (mode == O_RDONLY) { if (perf_session__open(self, force) < 0) goto out_delete; - perf_session__update_sample_type(self); + perf_session__set_id_hdr_size(self); } else if (mode == O_WRONLY) { /* * In O_RDONLY mode this will be performed when reading the @@ -171,10 +155,10 @@ struct perf_session *perf_session__new(const char *filename, int mode, goto out_delete; } - if (ops && ops->ordering_requires_timestamps && - ops->ordered_samples && !self->sample_id_all) { + if (tool && tool->ordering_requires_timestamps && + tool->ordered_samples && !perf_evlist__sample_id_all(self->evlist)) { dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); - ops->ordered_samples = false; + tool->ordered_samples = false; } out: @@ -184,17 +168,22 @@ out_delete: return NULL; } -static void perf_session__delete_dead_threads(struct perf_session *self) +static void machine__delete_dead_threads(struct machine *machine) { struct thread *n, *t; - list_for_each_entry_safe(t, n, &self->dead_threads, node) { + list_for_each_entry_safe(t, n, &machine->dead_threads, node) { list_del(&t->node); thread__delete(t); } } -static void perf_session__delete_threads(struct perf_session *self) +static void perf_session__delete_dead_threads(struct perf_session *session) +{ + machine__delete_dead_threads(&session->host_machine); +} + +static void machine__delete_threads(struct machine *self) { struct rb_node *nd = rb_first(&self->threads); @@ -207,6 +196,11 @@ static void perf_session__delete_threads(struct perf_session *self) } } +static void perf_session__delete_threads(struct perf_session *session) +{ + machine__delete_threads(&session->host_machine); +} + void perf_session__delete(struct perf_session *self) { perf_session__destroy_kernel_maps(self); @@ -217,7 +211,7 @@ void perf_session__delete(struct perf_session *self) free(self); } -void perf_session__remove_thread(struct perf_session *self, struct thread *th) +void machine__remove_thread(struct machine *self, struct thread *th) { self->last_match = NULL; rb_erase(&th->rb_node, &self->threads); @@ -236,16 +230,79 @@ static bool symbol__match_parent_regex(struct symbol *sym) return 0; } -int perf_session__resolve_callchain(struct perf_session *self, - struct thread *thread, - struct ip_callchain *chain, - struct symbol **parent) +static const u8 cpumodes[] = { + PERF_RECORD_MISC_USER, + PERF_RECORD_MISC_KERNEL, + PERF_RECORD_MISC_GUEST_USER, + PERF_RECORD_MISC_GUEST_KERNEL +}; +#define NCPUMODES (sizeof(cpumodes)/sizeof(u8)) + +static void ip__resolve_ams(struct machine *self, struct thread *thread, + struct addr_map_symbol *ams, + u64 ip) +{ + struct addr_location al; + size_t i; + u8 m; + + memset(&al, 0, sizeof(al)); + + for (i = 0; i < NCPUMODES; i++) { + m = cpumodes[i]; + /* + * We cannot use the header.misc hint to determine whether a + * branch stack address is user, kernel, guest, hypervisor. + * Branches may straddle the kernel/user/hypervisor boundaries. + * Thus, we have to try consecutively until we find a match + * or else, the symbol is unknown + */ + thread__find_addr_location(thread, self, m, MAP__FUNCTION, + ip, &al, NULL); + if (al.sym) + goto found; + } +found: + ams->addr = ip; + ams->al_addr = al.addr; + ams->sym = al.sym; + ams->map = al.map; +} + +struct branch_info *machine__resolve_bstack(struct machine *self, + struct thread *thr, + struct branch_stack *bs) +{ + struct branch_info *bi; + unsigned int i; + + bi = calloc(bs->nr, sizeof(struct branch_info)); + if (!bi) + return NULL; + + for (i = 0; i < bs->nr; i++) { + ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to); + ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from); + bi[i].flags = bs->entries[i].flags; + } + return bi; +} + +int machine__resolve_callchain(struct machine *self, + struct thread *thread, + struct ip_callchain *chain, + struct symbol **parent) { u8 cpumode = PERF_RECORD_MISC_USER; unsigned int i; int err; - callchain_cursor_reset(&self->callchain_cursor); + callchain_cursor_reset(&callchain_cursor); + + if (chain->nr > PERF_MAX_STACK_DEPTH) { + pr_warning("corrupted callchain. skipping...\n"); + return 0; + } for (i = 0; i < chain->nr; i++) { u64 ip; @@ -265,14 +322,21 @@ int perf_session__resolve_callchain(struct perf_session *self, case PERF_CONTEXT_USER: cpumode = PERF_RECORD_MISC_USER; break; default: - break; + pr_debug("invalid callchain context: " + "%"PRId64"\n", (s64) ip); + /* + * It seems the callchain is corrupted. + * Discard all. + */ + callchain_cursor_reset(&callchain_cursor); + return 0; } continue; } al.filtered = false; thread__find_addr_location(thread, self, cpumode, - MAP__FUNCTION, thread->pid, ip, &al, NULL); + MAP__FUNCTION, ip, &al, NULL); if (al.sym != NULL) { if (sort__has_parent && !*parent && symbol__match_parent_regex(al.sym)) @@ -281,7 +345,7 @@ int perf_session__resolve_callchain(struct perf_session *self, break; } - err = callchain_cursor_append(&self->callchain_cursor, + err = callchain_cursor_append(&callchain_cursor, ip, al.map, al.sym); if (err) return err; @@ -290,75 +354,101 @@ int perf_session__resolve_callchain(struct perf_session *self, return 0; } -static int process_event_synth_stub(union perf_event *event __used, - struct perf_session *session __used) +static int process_event_synth_tracing_data_stub(union perf_event *event __used, + struct perf_session *session __used) { dump_printf(": unhandled!\n"); return 0; } -static int process_event_sample_stub(union perf_event *event __used, +static int process_event_synth_attr_stub(union perf_event *event __used, + struct perf_evlist **pevlist __used) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_event_sample_stub(struct perf_tool *tool __used, + union perf_event *event __used, struct perf_sample *sample __used, struct perf_evsel *evsel __used, - struct perf_session *session __used) + struct machine *machine __used) { dump_printf(": unhandled!\n"); return 0; } -static int process_event_stub(union perf_event *event __used, +static int process_event_stub(struct perf_tool *tool __used, + union perf_event *event __used, struct perf_sample *sample __used, - struct perf_session *session __used) + struct machine *machine __used) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_finished_round_stub(struct perf_tool *tool __used, + union perf_event *event __used, + struct perf_session *perf_session __used) { dump_printf(": unhandled!\n"); return 0; } -static int process_finished_round_stub(union perf_event *event __used, - struct perf_session *session __used, - struct perf_event_ops *ops __used) +static int process_event_type_stub(struct perf_tool *tool __used, + union perf_event *event __used) { dump_printf(": unhandled!\n"); return 0; } -static int process_finished_round(union perf_event *event, - struct perf_session *session, - struct perf_event_ops *ops); - -static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) -{ - if (handler->sample == NULL) - handler->sample = process_event_sample_stub; - if (handler->mmap == NULL) - handler->mmap = process_event_stub; - if (handler->comm == NULL) - handler->comm = process_event_stub; - if (handler->fork == NULL) - handler->fork = process_event_stub; - if (handler->exit == NULL) - handler->exit = process_event_stub; - if (handler->lost == NULL) - handler->lost = perf_event__process_lost; - if (handler->read == NULL) - handler->read = process_event_stub; - if (handler->throttle == NULL) - handler->throttle = process_event_stub; - if (handler->unthrottle == NULL) - handler->unthrottle = process_event_stub; - if (handler->attr == NULL) - handler->attr = process_event_synth_stub; - if (handler->event_type == NULL) - handler->event_type = process_event_synth_stub; - if (handler->tracing_data == NULL) - handler->tracing_data = process_event_synth_stub; - if (handler->build_id == NULL) - handler->build_id = process_event_synth_stub; - if (handler->finished_round == NULL) { - if (handler->ordered_samples) - handler->finished_round = process_finished_round; +static int process_finished_round(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session); + +static void perf_tool__fill_defaults(struct perf_tool *tool) +{ + if (tool->sample == NULL) + tool->sample = process_event_sample_stub; + if (tool->mmap == NULL) + tool->mmap = process_event_stub; + if (tool->comm == NULL) + tool->comm = process_event_stub; + if (tool->fork == NULL) + tool->fork = process_event_stub; + if (tool->exit == NULL) + tool->exit = process_event_stub; + if (tool->lost == NULL) + tool->lost = perf_event__process_lost; + if (tool->read == NULL) + tool->read = process_event_sample_stub; + if (tool->throttle == NULL) + tool->throttle = process_event_stub; + if (tool->unthrottle == NULL) + tool->unthrottle = process_event_stub; + if (tool->attr == NULL) + tool->attr = process_event_synth_attr_stub; + if (tool->event_type == NULL) + tool->event_type = process_event_type_stub; + if (tool->tracing_data == NULL) + tool->tracing_data = process_event_synth_tracing_data_stub; + if (tool->build_id == NULL) + tool->build_id = process_finished_round_stub; + if (tool->finished_round == NULL) { + if (tool->ordered_samples) + tool->finished_round = process_finished_round; else - handler->finished_round = process_finished_round_stub; + tool->finished_round = process_finished_round_stub; + } +} + +void mem_bswap_32(void *src, int byte_size) +{ + u32 *m = src; + while (byte_size > 0) { + *m = bswap_32(*m); + byte_size -= sizeof(u32); + ++m; } } @@ -373,37 +463,65 @@ void mem_bswap_64(void *src, int byte_size) } } -static void perf_event__all64_swap(union perf_event *event) +static void swap_sample_id_all(union perf_event *event, void *data) +{ + void *end = (void *) event + event->header.size; + int size = end - data; + + BUG_ON(size % sizeof(u64)); + mem_bswap_64(data, size); +} + +static void perf_event__all64_swap(union perf_event *event, + bool sample_id_all __used) { struct perf_event_header *hdr = &event->header; mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); } -static void perf_event__comm_swap(union perf_event *event) +static void perf_event__comm_swap(union perf_event *event, bool sample_id_all) { event->comm.pid = bswap_32(event->comm.pid); event->comm.tid = bswap_32(event->comm.tid); + + if (sample_id_all) { + void *data = &event->comm.comm; + + data += ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } } -static void perf_event__mmap_swap(union perf_event *event) +static void perf_event__mmap_swap(union perf_event *event, + bool sample_id_all) { event->mmap.pid = bswap_32(event->mmap.pid); event->mmap.tid = bswap_32(event->mmap.tid); event->mmap.start = bswap_64(event->mmap.start); event->mmap.len = bswap_64(event->mmap.len); event->mmap.pgoff = bswap_64(event->mmap.pgoff); + + if (sample_id_all) { + void *data = &event->mmap.filename; + + data += ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } } -static void perf_event__task_swap(union perf_event *event) +static void perf_event__task_swap(union perf_event *event, bool sample_id_all) { event->fork.pid = bswap_32(event->fork.pid); event->fork.tid = bswap_32(event->fork.tid); event->fork.ppid = bswap_32(event->fork.ppid); event->fork.ptid = bswap_32(event->fork.ptid); event->fork.time = bswap_64(event->fork.time); + + if (sample_id_all) + swap_sample_id_all(event, &event->fork + 1); } -static void perf_event__read_swap(union perf_event *event) +static void perf_event__read_swap(union perf_event *event, bool sample_id_all) { event->read.pid = bswap_32(event->read.pid); event->read.tid = bswap_32(event->read.tid); @@ -411,6 +529,41 @@ static void perf_event__read_swap(union perf_event *event) event->read.time_enabled = bswap_64(event->read.time_enabled); event->read.time_running = bswap_64(event->read.time_running); event->read.id = bswap_64(event->read.id); + + if (sample_id_all) + swap_sample_id_all(event, &event->read + 1); +} + +static u8 revbyte(u8 b) +{ + int rev = (b >> 4) | ((b & 0xf) << 4); + rev = ((rev & 0xcc) >> 2) | ((rev & 0x33) << 2); + rev = ((rev & 0xaa) >> 1) | ((rev & 0x55) << 1); + return (u8) rev; +} + +/* + * XXX this is hack in attempt to carry flags bitfield + * throught endian village. ABI says: + * + * Bit-fields are allocated from right to left (least to most significant) + * on little-endian implementations and from left to right (most to least + * significant) on big-endian implementations. + * + * The above seems to be byte specific, so we need to reverse each + * byte of the bitfield. 'Internet' also says this might be implementation + * specific and we probably need proper fix and carry perf_event_attr + * bitfield flags in separate data file FEAT_ section. Thought this seems + * to work for now. + */ +static void swap_bitfield(u8 *p, unsigned len) +{ + unsigned i; + + for (i = 0; i < len; i++) { + *p = revbyte(*p); + p++; + } } /* exported for swapping attributes in file header */ @@ -426,9 +579,12 @@ void perf_event__attr_swap(struct perf_event_attr *attr) attr->bp_type = bswap_32(attr->bp_type); attr->bp_addr = bswap_64(attr->bp_addr); attr->bp_len = bswap_64(attr->bp_len); + + swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); } -static void perf_event__hdr_attr_swap(union perf_event *event) +static void perf_event__hdr_attr_swap(union perf_event *event, + bool sample_id_all __used) { size_t size; @@ -439,18 +595,21 @@ static void perf_event__hdr_attr_swap(union perf_event *event) mem_bswap_64(event->attr.id, size); } -static void perf_event__event_type_swap(union perf_event *event) +static void perf_event__event_type_swap(union perf_event *event, + bool sample_id_all __used) { event->event_type.event_type.event_id = bswap_64(event->event_type.event_type.event_id); } -static void perf_event__tracing_data_swap(union perf_event *event) +static void perf_event__tracing_data_swap(union perf_event *event, + bool sample_id_all __used) { event->tracing_data.size = bswap_32(event->tracing_data.size); } -typedef void (*perf_event__swap_op)(union perf_event *event); +typedef void (*perf_event__swap_op)(union perf_event *event, + bool sample_id_all); static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_MMAP] = perf_event__mmap_swap, @@ -490,11 +649,11 @@ static void perf_session_free_sample_buffers(struct perf_session *session) static int perf_session_deliver_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_event_ops *ops, + struct perf_tool *tool, u64 file_offset); static void flush_sample_queue(struct perf_session *s, - struct perf_event_ops *ops) + struct perf_tool *tool) { struct ordered_samples *os = &s->ordered_samples; struct list_head *head = &os->samples; @@ -502,25 +661,32 @@ static void flush_sample_queue(struct perf_session *s, struct perf_sample sample; u64 limit = os->next_flush; u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL; + unsigned idx = 0, progress_next = os->nr_samples / 16; int ret; - if (!ops->ordered_samples || !limit) + if (!tool->ordered_samples || !limit) return; list_for_each_entry_safe(iter, tmp, head, list) { if (iter->timestamp > limit) break; - ret = perf_session__parse_sample(s, iter->event, &sample); + ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample, + s->header.needs_swap); if (ret) pr_err("Can't parse sample, err = %d\n", ret); else - perf_session_deliver_event(s, iter->event, &sample, ops, + perf_session_deliver_event(s, iter->event, &sample, tool, iter->file_offset); os->last_flush = iter->timestamp; list_del(&iter->list); list_add(&iter->list, &os->sample_cache); + if (++idx >= progress_next) { + progress_next += os->nr_samples / 16; + ui_progress__update(idx, os->nr_samples, + "Processing time ordered events..."); + } } if (list_empty(head)) { @@ -529,6 +695,8 @@ static void flush_sample_queue(struct perf_session *s, os->last_sample = list_entry(head->prev, struct sample_queue, list); } + + os->nr_samples = 0; } /* @@ -570,11 +738,11 @@ static void flush_sample_queue(struct perf_session *s, * Flush every events below timestamp 7 * etc... */ -static int process_finished_round(union perf_event *event __used, - struct perf_session *session, - struct perf_event_ops *ops) +static int process_finished_round(struct perf_tool *tool, + union perf_event *event __used, + struct perf_session *session) { - flush_sample_queue(session, ops); + flush_sample_queue(session, tool); session->ordered_samples.next_flush = session->ordered_samples.max_timestamp; return 0; @@ -588,6 +756,7 @@ static void __queue_event(struct sample_queue *new, struct perf_session *s) u64 timestamp = new->timestamp; struct list_head *p; + ++os->nr_samples; os->last_sample = new; if (!sample) { @@ -679,20 +848,34 @@ static void callchain__printf(struct perf_sample *sample) i, sample->callchain->ips[i]); } +static void branch_stack__printf(struct perf_sample *sample) +{ + uint64_t i; + + printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); + + for (i = 0; i < sample->branch_stack->nr; i++) + printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", + i, sample->branch_stack->entries[i].from, + sample->branch_stack->entries[i].to); +} + static void perf_session__print_tstamp(struct perf_session *session, union perf_event *event, struct perf_sample *sample) { + u64 sample_type = perf_evlist__sample_type(session->evlist); + if (event->header.type != PERF_RECORD_SAMPLE && - !session->sample_id_all) { + !perf_evlist__sample_id_all(session->evlist)) { fputs("-1 -1 ", stdout); return; } - if ((session->sample_type & PERF_SAMPLE_CPU)) + if ((sample_type & PERF_SAMPLE_CPU)) printf("%u ", sample->cpu); - if (session->sample_type & PERF_SAMPLE_TIME) + if (sample_type & PERF_SAMPLE_TIME) printf("%" PRIu64 " ", sample->time); } @@ -717,6 +900,8 @@ static void dump_event(struct perf_session *session, union perf_event *event, static void dump_sample(struct perf_session *session, union perf_event *event, struct perf_sample *sample) { + u64 sample_type; + if (!dump_trace) return; @@ -724,45 +909,98 @@ static void dump_sample(struct perf_session *session, union perf_event *event, event->header.misc, sample->pid, sample->tid, sample->ip, sample->period, sample->addr); - if (session->sample_type & PERF_SAMPLE_CALLCHAIN) + sample_type = perf_evlist__sample_type(session->evlist); + + if (sample_type & PERF_SAMPLE_CALLCHAIN) callchain__printf(sample); + + if (sample_type & PERF_SAMPLE_BRANCH_STACK) + branch_stack__printf(sample); +} + +static struct machine * + perf_session__find_machine_for_cpumode(struct perf_session *session, + union perf_event *event) +{ + const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + if (perf_guest && + ((cpumode == PERF_RECORD_MISC_GUEST_KERNEL) || + (cpumode == PERF_RECORD_MISC_GUEST_USER))) { + u32 pid; + + if (event->header.type == PERF_RECORD_MMAP) + pid = event->mmap.pid; + else + pid = event->ip.pid; + + return perf_session__findnew_machine(session, pid); + } + + return perf_session__find_host_machine(session); } static int perf_session_deliver_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_event_ops *ops, + struct perf_tool *tool, u64 file_offset) { struct perf_evsel *evsel; + struct machine *machine; dump_event(session, event, file_offset, sample); + evsel = perf_evlist__id2evsel(session->evlist, sample->id); + if (evsel != NULL && event->header.type != PERF_RECORD_SAMPLE) { + /* + * XXX We're leaving PERF_RECORD_SAMPLE unnacounted here + * because the tools right now may apply filters, discarding + * some of the samples. For consistency, in the future we + * should have something like nr_filtered_samples and remove + * the sample->period from total_sample_period, etc, KISS for + * now tho. + * + * Also testing against NULL allows us to handle files without + * attr.sample_id_all and/or without PERF_SAMPLE_ID. In the + * future probably it'll be a good idea to restrict event + * processing via perf_session to files with both set. + */ + hists__inc_nr_events(&evsel->hists, event->header.type); + } + + machine = perf_session__find_machine_for_cpumode(session, event); + switch (event->header.type) { case PERF_RECORD_SAMPLE: dump_sample(session, event, sample); - evsel = perf_evlist__id2evsel(session->evlist, sample->id); if (evsel == NULL) { ++session->hists.stats.nr_unknown_id; - return -1; + return 0; + } + if (machine == NULL) { + ++session->hists.stats.nr_unprocessable_samples; + return 0; } - return ops->sample(event, sample, evsel, session); + return tool->sample(tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: - return ops->mmap(event, sample, session); + return tool->mmap(tool, event, sample, machine); case PERF_RECORD_COMM: - return ops->comm(event, sample, session); + return tool->comm(tool, event, sample, machine); case PERF_RECORD_FORK: - return ops->fork(event, sample, session); + return tool->fork(tool, event, sample, machine); case PERF_RECORD_EXIT: - return ops->exit(event, sample, session); + return tool->exit(tool, event, sample, machine); case PERF_RECORD_LOST: - return ops->lost(event, sample, session); + if (tool->lost == perf_event__process_lost) + session->hists.stats.total_lost += event->lost.lost; + return tool->lost(tool, event, sample, machine); case PERF_RECORD_READ: - return ops->read(event, sample, session); + return tool->read(tool, event, sample, evsel, machine); case PERF_RECORD_THROTTLE: - return ops->throttle(event, sample, session); + return tool->throttle(tool, event, sample, machine); case PERF_RECORD_UNTHROTTLE: - return ops->unthrottle(event, sample, session); + return tool->unthrottle(tool, event, sample, machine); default: ++session->hists.stats.nr_unknown_events; return -1; @@ -773,7 +1011,7 @@ static int perf_session__preprocess_sample(struct perf_session *session, union perf_event *event, struct perf_sample *sample) { if (event->header.type != PERF_RECORD_SAMPLE || - !(session->sample_type & PERF_SAMPLE_CALLCHAIN)) + !(perf_evlist__sample_type(session->evlist) & PERF_SAMPLE_CALLCHAIN)) return 0; if (!ip_callchain__valid(sample->callchain, event)) { @@ -786,40 +1024,53 @@ static int perf_session__preprocess_sample(struct perf_session *session, } static int perf_session__process_user_event(struct perf_session *session, union perf_event *event, - struct perf_event_ops *ops, u64 file_offset) + struct perf_tool *tool, u64 file_offset) { + int err; + dump_event(session, event, file_offset, NULL); /* These events are processed right away */ switch (event->header.type) { case PERF_RECORD_HEADER_ATTR: - return ops->attr(event, session); + err = tool->attr(event, &session->evlist); + if (err == 0) + perf_session__set_id_hdr_size(session); + return err; case PERF_RECORD_HEADER_EVENT_TYPE: - return ops->event_type(event, session); + return tool->event_type(tool, event); case PERF_RECORD_HEADER_TRACING_DATA: /* setup for reading amidst mmap */ lseek(session->fd, file_offset, SEEK_SET); - return ops->tracing_data(event, session); + return tool->tracing_data(event, session); case PERF_RECORD_HEADER_BUILD_ID: - return ops->build_id(event, session); + return tool->build_id(tool, event, session); case PERF_RECORD_FINISHED_ROUND: - return ops->finished_round(event, session, ops); + return tool->finished_round(tool, event, session); default: return -EINVAL; } } +static void event_swap(union perf_event *event, bool sample_id_all) +{ + perf_event__swap_op swap; + + swap = perf_event__swap_ops[event->header.type]; + if (swap) + swap(event, sample_id_all); +} + static int perf_session__process_event(struct perf_session *session, union perf_event *event, - struct perf_event_ops *ops, + struct perf_tool *tool, u64 file_offset) { struct perf_sample sample; int ret; - if (session->header.needs_swap && - perf_event__swap_ops[event->header.type]) - perf_event__swap_ops[event->header.type](event); + if (session->header.needs_swap) + event_swap(event, perf_evlist__sample_id_all(session->evlist)); if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; @@ -827,12 +1078,13 @@ static int perf_session__process_event(struct perf_session *session, hists__inc_nr_events(&session->hists, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) - return perf_session__process_user_event(session, event, ops, file_offset); + return perf_session__process_user_event(session, event, tool, file_offset); /* * For all kernel events we get the sample data */ - ret = perf_session__parse_sample(session, event, &sample); + ret = perf_evlist__parse_sample(session->evlist, event, &sample, + session->header.needs_swap); if (ret) return ret; @@ -840,14 +1092,14 @@ static int perf_session__process_event(struct perf_session *session, if (perf_session__preprocess_sample(session, event, &sample)) return 0; - if (ops->ordered_samples) { + if (tool->ordered_samples) { ret = perf_session_queue_event(session, event, &sample, file_offset); if (ret != -ETIME) return ret; } - return perf_session_deliver_event(session, event, &sample, ops, + return perf_session_deliver_event(session, event, &sample, tool, file_offset); } @@ -858,6 +1110,11 @@ void perf_event_header__bswap(struct perf_event_header *self) self->size = bswap_16(self->size); } +struct thread *perf_session__findnew(struct perf_session *session, pid_t pid) +{ + return machine__findnew_thread(&session->host_machine, pid); +} + static struct thread *perf_session__register_idle_thread(struct perf_session *self) { struct thread *thread = perf_session__findnew(self, 0); @@ -871,14 +1128,14 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se } static void perf_session__warn_about_errors(const struct perf_session *session, - const struct perf_event_ops *ops) -{ - if (ops->lost == perf_event__process_lost && - session->hists.stats.total_lost != 0) { - ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64 - "!\n\nCheck IO/CPU overload!\n\n", - session->hists.stats.total_period, - session->hists.stats.total_lost); + const struct perf_tool *tool) +{ + if (tool->lost == perf_event__process_lost && + session->hists.stats.nr_events[PERF_RECORD_LOST] != 0) { + ui__warning("Processed %d events and lost %d chunks!\n\n" + "Check IO/CPU overload!\n\n", + session->hists.stats.nr_events[0], + session->hists.stats.nr_events[PERF_RECORD_LOST]); } if (session->hists.stats.nr_unknown_events != 0) { @@ -902,26 +1159,39 @@ static void perf_session__warn_about_errors(const struct perf_session *session, session->hists.stats.nr_invalid_chains, session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); } + + if (session->hists.stats.nr_unprocessable_samples != 0) { + ui__warning("%u unprocessable samples recorded.\n" + "Do you have a KVM guest running and not using 'perf kvm'?\n", + session->hists.stats.nr_unprocessable_samples); + } } #define session_done() (*(volatile int *)(&session_done)) volatile int session_done; static int __perf_session__process_pipe_events(struct perf_session *self, - struct perf_event_ops *ops) + struct perf_tool *tool) { - union perf_event event; - uint32_t size; + union perf_event *event; + uint32_t size, cur_size = 0; + void *buf = NULL; int skip = 0; u64 head; int err; void *p; - perf_event_ops__fill_defaults(ops); + perf_tool__fill_defaults(tool); head = 0; + cur_size = sizeof(union perf_event); + + buf = malloc(cur_size); + if (!buf) + return -errno; more: - err = readn(self->fd, &event, sizeof(struct perf_event_header)); + event = buf; + err = readn(self->fd, event, sizeof(struct perf_event_header)); if (err <= 0) { if (err == 0) goto done; @@ -931,13 +1201,23 @@ more: } if (self->header.needs_swap) - perf_event_header__bswap(&event.header); + perf_event_header__bswap(&event->header); - size = event.header.size; + size = event->header.size; if (size == 0) size = 8; - p = &event; + if (size > cur_size) { + void *new = realloc(buf, size); + if (!new) { + pr_err("failed to allocate memory to read event\n"); + goto out_err; + } + buf = new; + cur_size = size; + event = buf; + } + p = event; p += sizeof(struct perf_event_header); if (size - sizeof(struct perf_event_header)) { @@ -953,18 +1233,11 @@ more: } } - if (size == 0 || - (skip = perf_session__process_event(self, &event, ops, head)) < 0) { - dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n", - head, event.header.size, event.header.type); - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ - if (unlikely(head & 7)) - head &= ~7ULL; - - size = 8; + if ((skip = perf_session__process_event(self, event, tool, head)) < 0) { + pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", + head, event->header.size, event->header.type); + err = -EINVAL; + goto out_err; } head += size; @@ -977,7 +1250,8 @@ more: done: err = 0; out_err: - perf_session__warn_about_errors(self, ops); + free(buf); + perf_session__warn_about_errors(self, tool); perf_session_free_sample_buffers(self); return err; } @@ -1008,17 +1282,16 @@ fetch_mmaped_event(struct perf_session *session, int __perf_session__process_events(struct perf_session *session, u64 data_offset, u64 data_size, - u64 file_size, struct perf_event_ops *ops) + u64 file_size, struct perf_tool *tool) { u64 head, page_offset, file_offset, file_pos, progress_next; int err, mmap_prot, mmap_flags, map_idx = 0; - struct ui_progress *progress; size_t page_size, mmap_size; char *buf, *mmaps[8]; union perf_event *event; uint32_t size; - perf_event_ops__fill_defaults(ops); + perf_tool__fill_defaults(tool); page_size = sysconf(_SC_PAGESIZE); @@ -1030,9 +1303,6 @@ int __perf_session__process_events(struct perf_session *session, file_size = data_offset + data_size; progress_next = file_size / 16; - progress = ui_progress__new("Processing events...", file_size); - if (progress == NULL) - return -1; mmap_size = session->mmap_window; if (mmap_size > file_size) @@ -1076,18 +1346,12 @@ more: size = event->header.size; if (size == 0 || - perf_session__process_event(session, event, ops, file_pos) < 0) { - dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n", - file_offset + head, event->header.size, - event->header.type); - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ - if (unlikely(head & 7)) - head &= ~7ULL; - - size = 8; + perf_session__process_event(session, event, tool, file_pos) < 0) { + pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", + file_offset + head, event->header.size, + event->header.type); + err = -EINVAL; + goto out_err; } head += size; @@ -1095,7 +1359,8 @@ more: if (file_pos >= progress_next) { progress_next += file_size / 16; - ui_progress__update(progress, file_pos); + ui_progress__update(file_pos, file_size, + "Processing events..."); } if (file_pos < file_size) @@ -1104,16 +1369,15 @@ more: err = 0; /* do the final flush for ordered samples */ session->ordered_samples.next_flush = ULLONG_MAX; - flush_sample_queue(session, ops); + flush_sample_queue(session, tool); out_err: - ui_progress__delete(progress); - perf_session__warn_about_errors(session, ops); + perf_session__warn_about_errors(session, tool); perf_session_free_sample_buffers(session); return err; } int perf_session__process_events(struct perf_session *self, - struct perf_event_ops *ops) + struct perf_tool *tool) { int err; @@ -1124,16 +1388,16 @@ int perf_session__process_events(struct perf_session *self, err = __perf_session__process_events(self, self->header.data_offset, self->header.data_size, - self->size, ops); + self->size, tool); else - err = __perf_session__process_pipe_events(self, ops); + err = __perf_session__process_pipe_events(self, tool); return err; } -bool perf_session__has_traces(struct perf_session *self, const char *msg) +bool perf_session__has_traces(struct perf_session *session, const char *msg) { - if (!(self->sample_type & PERF_SAMPLE_RAW)) { + if (!(perf_evlist__sample_type(session->evlist) & PERF_SAMPLE_RAW)) { pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg); return false; } @@ -1141,9 +1405,8 @@ bool perf_session__has_traces(struct perf_session *self, const char *msg) return true; } -int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps, - const char *symbol_name, - u64 addr) +int maps__set_kallsyms_ref_reloc_sym(struct map **maps, + const char *symbol_name, u64 addr) { char *bracket; enum map_type i; @@ -1195,13 +1458,34 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) ret += hists__fprintf_nr_events(&session->hists, fp); list_for_each_entry(pos, &session->evlist->entries, node) { - ret += fprintf(fp, "%s stats:\n", event_name(pos)); + ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos)); ret += hists__fprintf_nr_events(&pos->hists, fp); } return ret; } +size_t perf_session__fprintf(struct perf_session *session, FILE *fp) +{ + /* + * FIXME: Here we have to actually print all the machines in this + * session, not just the host... + */ + return machine__fprintf(&session->host_machine, fp); +} + +void perf_session__remove_thread(struct perf_session *session, + struct thread *th) +{ + /* + * FIXME: This one makes no sense, we need to remove the thread from + * the machine it belongs to, perf_session can have many machines, so + * doing it always on ->host_machine is wrong. Fix when auditing all + * the 'perf kvm' code. + */ + machine__remove_thread(&session->host_machine, th); +} + struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type) { @@ -1214,17 +1498,14 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, return NULL; } -void perf_session__print_ip(union perf_event *event, - struct perf_sample *sample, - struct perf_session *session, - int print_sym, int print_dso) +void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, + struct machine *machine, int print_sym, + int print_dso, int print_symoffset) { struct addr_location al; - const char *symname, *dsoname; - struct callchain_cursor *cursor = &session->callchain_cursor; struct callchain_cursor_node *node; - if (perf_event__preprocess_sample(event, session, &al, sample, + if (perf_event__preprocess_sample(event, machine, &al, sample, NULL) < 0) { error("problem processing %d event, skipping it.\n", event->header.type); @@ -1233,59 +1514,49 @@ void perf_session__print_ip(union perf_event *event, if (symbol_conf.use_callchain && sample->callchain) { - if (perf_session__resolve_callchain(session, al.thread, + if (machine__resolve_callchain(machine, al.thread, sample->callchain, NULL) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); return; } - callchain_cursor_commit(cursor); + callchain_cursor_commit(&callchain_cursor); while (1) { - node = callchain_cursor_current(cursor); + node = callchain_cursor_current(&callchain_cursor); if (!node) break; printf("\t%16" PRIx64, node->ip); if (print_sym) { - if (node->sym && node->sym->name) - symname = node->sym->name; - else - symname = ""; - - printf(" %s", symname); + printf(" "); + symbol__fprintf_symname(node->sym, stdout); } if (print_dso) { - if (node->map && node->map->dso && node->map->dso->name) - dsoname = node->map->dso->name; - else - dsoname = ""; - - printf(" (%s)", dsoname); + printf(" ("); + map__fprintf_dsoname(node->map, stdout); + printf(")"); } printf("\n"); - callchain_cursor_advance(cursor); + callchain_cursor_advance(&callchain_cursor); } } else { printf("%16" PRIx64, sample->ip); if (print_sym) { - if (al.sym && al.sym->name) - symname = al.sym->name; + printf(" "); + if (print_symoffset) + symbol__fprintf_symname_offs(al.sym, &al, + stdout); else - symname = ""; - - printf(" %s", symname); + symbol__fprintf_symname(al.sym, stdout); } if (print_dso) { - if (al.map && al.map->dso && al.map->dso->name) - dsoname = al.map->dso->name; - else - dsoname = ""; - - printf(" (%s)", dsoname); + printf(" ("); + map__fprintf_dsoname(al.map, stdout); + printf(")"); } } } @@ -1311,6 +1582,10 @@ int perf_session__cpu_bitmap(struct perf_session *session, } map = cpu_map__new(cpu_list); + if (map == NULL) { + pr_err("Invalid cpu_list\n"); + return -1; + } for (i = 0; i < map->nr; i++) { int cpu = map->map[i]; @@ -1326,3 +1601,77 @@ int perf_session__cpu_bitmap(struct perf_session *session, return 0; } + +void perf_session__fprintf_info(struct perf_session *session, FILE *fp, + bool full) +{ + struct stat st; + int ret; + + if (session == NULL || fp == NULL) + return; + + ret = fstat(session->fd, &st); + if (ret == -1) + return; + + fprintf(fp, "# ========\n"); + fprintf(fp, "# captured on: %s", ctime(&st.st_ctime)); + perf_header__fprintf_info(session, fp, full); + fprintf(fp, "# ========\n#\n"); +} + + +int __perf_session__set_tracepoints_handlers(struct perf_session *session, + const struct perf_evsel_str_handler *assocs, + size_t nr_assocs) +{ + struct perf_evlist *evlist = session->evlist; + struct event_format *format; + struct perf_evsel *evsel; + char *tracepoint, *name; + size_t i; + int err; + + for (i = 0; i < nr_assocs; i++) { + err = -ENOMEM; + tracepoint = strdup(assocs[i].name); + if (tracepoint == NULL) + goto out; + + err = -ENOENT; + name = strchr(tracepoint, ':'); + if (name == NULL) + goto out_free; + + *name++ = '\0'; + format = pevent_find_event_by_name(session->pevent, + tracepoint, name); + if (format == NULL) { + /* + * Adding a handler for an event not in the session, + * just ignore it. + */ + goto next; + } + + evsel = perf_evlist__find_tracepoint_by_id(evlist, format->id); + if (evsel == NULL) + goto next; + + err = -EEXIST; + if (evsel->handler.func != NULL) + goto out_free; + evsel->handler.func = assocs[i].handler; +next: + free(tracepoint); + } + + err = 0; +out: + return err; + +out_free: + free(tracepoint); + goto out; +} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 974d0cbee5e9..1f7ec87db7d7 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -23,18 +23,17 @@ struct ordered_samples { struct sample_queue *sample_buffer; struct sample_queue *last_sample; int sample_buffer_idx; + unsigned int nr_samples; }; struct perf_session { struct perf_header header; unsigned long size; unsigned long mmap_window; - struct rb_root threads; - struct list_head dead_threads; - struct thread *last_match; struct machine host_machine; struct rb_root machines; struct perf_evlist *evlist; + struct pevent *pevent; /* * FIXME: Need to split this up further, we need global * stats + per event stats. 'perf diff' also needs @@ -42,81 +41,48 @@ struct perf_session { * perf.data file. */ struct hists hists; - u64 sample_type; - int sample_size; int fd; bool fd_pipe; bool repipe; - bool sample_id_all; - u16 id_hdr_size; int cwdlen; char *cwd; struct ordered_samples ordered_samples; - struct callchain_cursor callchain_cursor; - char filename[0]; + char filename[1]; }; -struct perf_evsel; -struct perf_event_ops; - -typedef int (*event_sample)(union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel, struct perf_session *session); -typedef int (*event_op)(union perf_event *self, struct perf_sample *sample, - struct perf_session *session); -typedef int (*event_synth_op)(union perf_event *self, - struct perf_session *session); -typedef int (*event_op2)(union perf_event *self, struct perf_session *session, - struct perf_event_ops *ops); - -struct perf_event_ops { - event_sample sample; - event_op mmap, - comm, - fork, - exit, - lost, - read, - throttle, - unthrottle; - event_synth_op attr, - event_type, - tracing_data, - build_id; - event_op2 finished_round; - bool ordered_samples; - bool ordering_requires_timestamps; -}; +struct perf_tool; struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe, - struct perf_event_ops *ops); + struct perf_tool *tool); void perf_session__delete(struct perf_session *self); void perf_event_header__bswap(struct perf_event_header *self); int __perf_session__process_events(struct perf_session *self, u64 data_offset, u64 data_size, u64 size, - struct perf_event_ops *ops); + struct perf_tool *tool); int perf_session__process_events(struct perf_session *self, - struct perf_event_ops *event_ops); + struct perf_tool *tool); -int perf_session__resolve_callchain(struct perf_session *self, +int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel *evsel, struct thread *thread, struct ip_callchain *chain, struct symbol **parent); -bool perf_session__has_traces(struct perf_session *self, const char *msg); +struct branch_info *machine__resolve_bstack(struct machine *self, + struct thread *thread, + struct branch_stack *bs); -int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps, - const char *symbol_name, - u64 addr); +bool perf_session__has_traces(struct perf_session *self, const char *msg); void mem_bswap_64(void *src, int byte_size); +void mem_bswap_32(void *src, int byte_size); void perf_event__attr_swap(struct perf_event_attr *attr); int perf_session__create_kernel_maps(struct perf_session *self); -void perf_session__update_sample_type(struct perf_session *self); +void perf_session__set_id_hdr_size(struct perf_session *session); void perf_session__remove_thread(struct perf_session *self, struct thread *th); static inline @@ -143,12 +109,16 @@ struct machine *perf_session__findnew_machine(struct perf_session *self, pid_t p static inline void perf_session__process_machines(struct perf_session *self, + struct perf_tool *tool, machine__process_t process) { - process(&self->host_machine, self); - return machines__process(&self->machines, process, self); + process(&self->host_machine, tool); + return machines__process(&self->machines, process, tool); } +struct thread *perf_session__findnew(struct perf_session *self, pid_t pid); +size_t perf_session__fprintf(struct perf_session *self, FILE *fp); + size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp); size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, @@ -156,25 +126,24 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); -static inline int perf_session__parse_sample(struct perf_session *session, - const union perf_event *event, - struct perf_sample *sample) -{ - return perf_event__parse_sample(event, session->sample_type, - session->sample_size, - session->sample_id_all, sample, - session->header.needs_swap); -} - struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type); -void perf_session__print_ip(union perf_event *event, - struct perf_sample *sample, - struct perf_session *session, - int print_sym, int print_dso); +void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, + struct machine *machine, int print_sym, + int print_dso, int print_symoffset); int perf_session__cpu_bitmap(struct perf_session *session, const char *cpu_list, unsigned long *cpu_bitmap); +void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full); + +struct perf_evsel_str_handler; + +int __perf_session__set_tracepoints_handlers(struct perf_session *session, + const struct perf_evsel_str_handler *assocs, + size_t nr_assocs); + +#define perf_session__set_tracepoints_handlers(session, array) \ + __perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array)) #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 95d370074928..d0f9f29cf181 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -24,10 +24,11 @@ cflags += getenv('CFLAGS', '').split() build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') +ext_sources = [f.strip() for f in file('util/python-ext-sources') + if len(f.strip()) > 0 and f[0] != '#'] + perf = Extension('perf', - sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c', - 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c', - 'util/util.c', 'util/xyarray.c', 'util/cgroup.c'], + sources = ext_sources, include_dirs = ['util/include'], extra_compile_args = cflags, ) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 1ee8f1e40f18..0f5a0a496bc4 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -8,6 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol"; const char *sort_order = default_sort_order; int sort__need_collapse = 0; int sort__has_parent = 0; +int sort__branch_mode = -1; /* -1 = means not set */ enum sort_type sort__first_dimension; @@ -33,6 +34,9 @@ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...) } } va_end(ap); + + if (n >= (int)size) + return size - 1; return n; } @@ -94,6 +98,26 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf, return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); } +static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r) +{ + struct dso *dso_l = map_l ? map_l->dso : NULL; + struct dso *dso_r = map_r ? map_r->dso : NULL; + const char *dso_name_l, *dso_name_r; + + if (!dso_l || !dso_r) + return cmp_null(dso_l, dso_r); + + if (verbose) { + dso_name_l = dso_l->long_name; + dso_name_r = dso_r->long_name; + } else { + dso_name_l = dso_l->short_name; + dso_name_r = dso_r->short_name; + } + + return strcmp(dso_name_l, dso_name_r); +} + struct sort_entry sort_comm = { .se_header = "Command", .se_cmp = sort__comm_cmp, @@ -107,36 +131,74 @@ struct sort_entry sort_comm = { static int64_t sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) { - struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL; - struct dso *dso_r = right->ms.map ? right->ms.map->dso : NULL; - const char *dso_name_l, *dso_name_r; + return _sort__dso_cmp(left->ms.map, right->ms.map); +} - if (!dso_l || !dso_r) - return cmp_null(dso_l, dso_r); - if (verbose) { - dso_name_l = dso_l->long_name; - dso_name_r = dso_r->long_name; - } else { - dso_name_l = dso_l->short_name; - dso_name_r = dso_r->short_name; +static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r, + u64 ip_l, u64 ip_r) +{ + if (!sym_l || !sym_r) + return cmp_null(sym_l, sym_r); + + if (sym_l == sym_r) + return 0; + + if (sym_l) + ip_l = sym_l->start; + if (sym_r) + ip_r = sym_r->start; + + return (int64_t)(ip_r - ip_l); +} + +static int _hist_entry__dso_snprintf(struct map *map, char *bf, + size_t size, unsigned int width) +{ + if (map && map->dso) { + const char *dso_name = !verbose ? map->dso->short_name : + map->dso->long_name; + return repsep_snprintf(bf, size, "%-*s", width, dso_name); } - return strcmp(dso_name_l, dso_name_r); + return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); } static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, size_t size, unsigned int width) { - if (self->ms.map && self->ms.map->dso) { - const char *dso_name = !verbose ? self->ms.map->dso->short_name : - self->ms.map->dso->long_name; - return repsep_snprintf(bf, size, "%-*s", width, dso_name); + return _hist_entry__dso_snprintf(self->ms.map, bf, size, width); +} + +static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, + u64 ip, char level, char *bf, size_t size, + unsigned int width __used) +{ + size_t ret = 0; + + if (verbose) { + char o = map ? dso__symtab_origin(map->dso) : '!'; + ret += repsep_snprintf(bf, size, "%-#*llx %c ", + BITS_PER_LONG / 4, ip, o); } - return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); + ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); + if (sym) + ret += repsep_snprintf(bf + ret, size - ret, "%-*s", + width - ret, + sym->name); + else { + size_t len = BITS_PER_LONG / 4; + ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", + len, ip); + ret += repsep_snprintf(bf + ret, size - ret, "%-*s", + width - ret, ""); + } + + return ret; } + struct sort_entry sort_dso = { .se_header = "Shared Object", .se_cmp = sort__dso_cmp, @@ -144,8 +206,14 @@ struct sort_entry sort_dso = { .se_width_idx = HISTC_DSO, }; -/* --sort symbol */ +static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip, + self->level, bf, size, width); +} +/* --sort symbol */ static int64_t sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -163,29 +231,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) ip_l = left->ms.sym->start; ip_r = right->ms.sym->start; - return (int64_t)(ip_r - ip_l); -} - -static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, - size_t size, unsigned int width __used) -{ - size_t ret = 0; - - if (verbose) { - char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; - ret += repsep_snprintf(bf, size, "%-#*llx %c ", - BITS_PER_LONG / 4, self->ip, o); - } - - ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level); - if (self->ms.sym) - ret += repsep_snprintf(bf + ret, size - ret, "%s", - self->ms.sym->name); - else - ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx", - BITS_PER_LONG / 4, self->ip); - - return ret; + return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r); } struct sort_entry sort_sym = { @@ -195,6 +241,54 @@ struct sort_entry sort_sym = { .se_width_idx = HISTC_SYMBOL, }; +/* --sort srcline */ + +static int64_t +sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return (int64_t)(right->ip - left->ip); +} + +static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + FILE *fp; + char cmd[PATH_MAX + 2], *path = self->srcline, *nl; + size_t line_len; + + if (path != NULL) + goto out_path; + + snprintf(cmd, sizeof(cmd), "addr2line -e %s %016" PRIx64, + self->ms.map->dso->long_name, self->ip); + fp = popen(cmd, "r"); + if (!fp) + goto out_ip; + + if (getline(&path, &line_len, fp) < 0 || !line_len) + goto out_ip; + fclose(fp); + self->srcline = strdup(path); + if (self->srcline == NULL) + goto out_ip; + + nl = strchr(self->srcline, '\n'); + if (nl != NULL) + *nl = '\0'; + path = self->srcline; +out_path: + return repsep_snprintf(bf, size, "%s", path); +out_ip: + return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip); +} + +struct sort_entry sort_srcline = { + .se_header = "Source:Line", + .se_cmp = sort__srcline_cmp, + .se_snprintf = hist_entry__srcline_snprintf, + .se_width_idx = HISTC_SRCLINE, +}; + /* --sort parent */ static int64_t @@ -244,19 +338,156 @@ struct sort_entry sort_cpu = { .se_width_idx = HISTC_CPU, }; +static int64_t +sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return _sort__dso_cmp(left->branch_info->from.map, + right->branch_info->from.map); +} + +static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + return _hist_entry__dso_snprintf(self->branch_info->from.map, + bf, size, width); +} + +struct sort_entry sort_dso_from = { + .se_header = "Source Shared Object", + .se_cmp = sort__dso_from_cmp, + .se_snprintf = hist_entry__dso_from_snprintf, + .se_width_idx = HISTC_DSO_FROM, +}; + +static int64_t +sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return _sort__dso_cmp(left->branch_info->to.map, + right->branch_info->to.map); +} + +static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + return _hist_entry__dso_snprintf(self->branch_info->to.map, + bf, size, width); +} + +static int64_t +sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct addr_map_symbol *from_l = &left->branch_info->from; + struct addr_map_symbol *from_r = &right->branch_info->from; + + if (!from_l->sym && !from_r->sym) + return right->level - left->level; + + return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr, + from_r->addr); +} + +static int64_t +sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct addr_map_symbol *to_l = &left->branch_info->to; + struct addr_map_symbol *to_r = &right->branch_info->to; + + if (!to_l->sym && !to_r->sym) + return right->level - left->level; + + return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr); +} + +static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + struct addr_map_symbol *from = &self->branch_info->from; + return _hist_entry__sym_snprintf(from->map, from->sym, from->addr, + self->level, bf, size, width); + +} + +static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + struct addr_map_symbol *to = &self->branch_info->to; + return _hist_entry__sym_snprintf(to->map, to->sym, to->addr, + self->level, bf, size, width); + +} + +struct sort_entry sort_dso_to = { + .se_header = "Target Shared Object", + .se_cmp = sort__dso_to_cmp, + .se_snprintf = hist_entry__dso_to_snprintf, + .se_width_idx = HISTC_DSO_TO, +}; + +struct sort_entry sort_sym_from = { + .se_header = "Source Symbol", + .se_cmp = sort__sym_from_cmp, + .se_snprintf = hist_entry__sym_from_snprintf, + .se_width_idx = HISTC_SYMBOL_FROM, +}; + +struct sort_entry sort_sym_to = { + .se_header = "Target Symbol", + .se_cmp = sort__sym_to_cmp, + .se_snprintf = hist_entry__sym_to_snprintf, + .se_width_idx = HISTC_SYMBOL_TO, +}; + +static int64_t +sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right) +{ + const unsigned char mp = left->branch_info->flags.mispred != + right->branch_info->flags.mispred; + const unsigned char p = left->branch_info->flags.predicted != + right->branch_info->flags.predicted; + + return mp || p; +} + +static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width){ + static const char *out = "N/A"; + + if (self->branch_info->flags.predicted) + out = "N"; + else if (self->branch_info->flags.mispred) + out = "Y"; + + return repsep_snprintf(bf, size, "%-*s", width, out); +} + +struct sort_entry sort_mispredict = { + .se_header = "Branch Mispredicted", + .se_cmp = sort__mispredict_cmp, + .se_snprintf = hist_entry__mispredict_snprintf, + .se_width_idx = HISTC_MISPREDICT, +}; + struct sort_dimension { const char *name; struct sort_entry *entry; int taken; }; +#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } + static struct sort_dimension sort_dimensions[] = { - { .name = "pid", .entry = &sort_thread, }, - { .name = "comm", .entry = &sort_comm, }, - { .name = "dso", .entry = &sort_dso, }, - { .name = "symbol", .entry = &sort_sym, }, - { .name = "parent", .entry = &sort_parent, }, - { .name = "cpu", .entry = &sort_cpu, }, + DIM(SORT_PID, "pid", sort_thread), + DIM(SORT_COMM, "comm", sort_comm), + DIM(SORT_DSO, "dso", sort_dso), + DIM(SORT_DSO_FROM, "dso_from", sort_dso_from), + DIM(SORT_DSO_TO, "dso_to", sort_dso_to), + DIM(SORT_SYM, "symbol", sort_sym), + DIM(SORT_SYM_TO, "symbol_from", sort_sym_from), + DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to), + DIM(SORT_PARENT, "parent", sort_parent), + DIM(SORT_CPU, "cpu", sort_cpu), + DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), + DIM(SORT_SRCLINE, "srcline", sort_srcline), }; int sort_dimension__add(const char *tok) @@ -268,7 +499,6 @@ int sort_dimension__add(const char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sd->entry == &sort_parent) { int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); if (ret) { @@ -300,6 +530,16 @@ int sort_dimension__add(const char *tok) sort__first_dimension = SORT_PARENT; else if (!strcmp(sd->name, "cpu")) sort__first_dimension = SORT_CPU; + else if (!strcmp(sd->name, "symbol_from")) + sort__first_dimension = SORT_SYM_FROM; + else if (!strcmp(sd->name, "symbol_to")) + sort__first_dimension = SORT_SYM_TO; + else if (!strcmp(sd->name, "dso_from")) + sort__first_dimension = SORT_DSO_FROM; + else if (!strcmp(sd->name, "dso_to")) + sort__first_dimension = SORT_DSO_TO; + else if (!strcmp(sd->name, "mispredict")) + sort__first_dimension = SORT_MISPREDICT; } list_add_tail(&sd->entry->list, &hist_entry__sort_list); @@ -307,7 +547,6 @@ int sort_dimension__add(const char *tok) return 0; } - return -ESRCH; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 77d0388ad415..e724b26acd51 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -31,11 +31,16 @@ extern const char *parent_pattern; extern const char default_sort_order[]; extern int sort__need_collapse; extern int sort__has_parent; +extern int sort__branch_mode; extern char *field_sep; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; extern struct sort_entry sort_sym; extern struct sort_entry sort_parent; +extern struct sort_entry sort_dso_from; +extern struct sort_entry sort_dso_to; +extern struct sort_entry sort_sym_from; +extern struct sort_entry sort_sym_to; extern enum sort_type sort__first_dimension; /** @@ -45,6 +50,7 @@ extern enum sort_type sort__first_dimension; * @nr_rows - rows expanded in callchain, recalculated on folding/unfolding */ struct hist_entry { + struct rb_node rb_node_in; struct rb_node rb_node; u64 period; u64 period_sys; @@ -63,13 +69,16 @@ struct hist_entry { bool init_have_children; char level; + bool used; u8 filtered; + char *srcline; struct symbol *parent; union { unsigned long position; struct hist_entry *pair; struct rb_root sorted_chain; }; + struct branch_info *branch_info; struct callchain_root callchain[0]; }; @@ -80,6 +89,12 @@ enum sort_type { SORT_SYM, SORT_PARENT, SORT_CPU, + SORT_DSO_FROM, + SORT_DSO_TO, + SORT_SYM_FROM, + SORT_SYM_TO, + SORT_MISPREDICT, + SORT_SRCLINE, }; /* diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 92e068517c1a..2eeb51baf077 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -1,4 +1,5 @@ #include "cache.h" +#include <linux/kernel.h> int prefixcmp(const char *str, const char *prefix) { @@ -89,14 +90,14 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...) if (!strbuf_avail(sb)) strbuf_grow(sb, 64); va_start(ap, fmt); - len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); + len = vscnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); va_end(ap); if (len < 0) - die("your vsnprintf is broken"); + die("your vscnprintf is broken"); if (len > strbuf_avail(sb)) { strbuf_grow(sb, len); va_start(ap, fmt); - len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); + len = vscnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); va_end(ap); if (len > strbuf_avail(sb)) { die("this should not happen, your snprintf is broken"); diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index d5836382ff2c..199bc4d8905d 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -313,3 +313,25 @@ int strtailcmp(const char *s1, const char *s2) return 0; } +/** + * rtrim - Removes trailing whitespace from @s. + * @s: The string to be stripped. + * + * Note that the first trailing whitespace is replaced with a %NUL-terminator + * in the given string @s. Returns @s. + */ +char *rtrim(char *s) +{ + size_t size = strlen(s); + char *end; + + if (!size) + return s; + + end = s + size - 1; + while (end >= s && isspace(*end)) + end--; + *(end + 1) = '\0'; + + return s; +} diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 6783a2043555..95856ff3dda4 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c @@ -10,23 +10,28 @@ #include <stdlib.h> #include <string.h> -static struct str_node *str_node__new(const char *s, bool dupstr) +static +struct rb_node *strlist__node_new(struct rblist *rblist, const void *entry) { - struct str_node *self = malloc(sizeof(*self)); + const char *s = entry; + struct rb_node *rc = NULL; + struct strlist *strlist = container_of(rblist, struct strlist, rblist); + struct str_node *snode = malloc(sizeof(*snode)); - if (self != NULL) { - if (dupstr) { + if (snode != NULL) { + if (strlist->dupstr) { s = strdup(s); if (s == NULL) goto out_delete; } - self->s = s; + snode->s = s; + rc = &snode->rb_node; } - return self; + return rc; out_delete: - free(self); + free(snode); return NULL; } @@ -37,36 +42,26 @@ static void str_node__delete(struct str_node *self, bool dupstr) free(self); } -int strlist__add(struct strlist *self, const char *new_entry) +static +void strlist__node_delete(struct rblist *rblist, struct rb_node *rb_node) { - struct rb_node **p = &self->entries.rb_node; - struct rb_node *parent = NULL; - struct str_node *sn; - - while (*p != NULL) { - int rc; - - parent = *p; - sn = rb_entry(parent, struct str_node, rb_node); - rc = strcmp(sn->s, new_entry); - - if (rc > 0) - p = &(*p)->rb_left; - else if (rc < 0) - p = &(*p)->rb_right; - else - return -EEXIST; - } + struct strlist *slist = container_of(rblist, struct strlist, rblist); + struct str_node *snode = container_of(rb_node, struct str_node, rb_node); - sn = str_node__new(new_entry, self->dupstr); - if (sn == NULL) - return -ENOMEM; + str_node__delete(snode, slist->dupstr); +} - rb_link_node(&sn->rb_node, parent, p); - rb_insert_color(&sn->rb_node, &self->entries); - ++self->nr_entries; +static int strlist__node_cmp(struct rb_node *rb_node, const void *entry) +{ + const char *str = entry; + struct str_node *snode = container_of(rb_node, struct str_node, rb_node); + + return strcmp(snode->s, str); +} - return 0; +int strlist__add(struct strlist *self, const char *new_entry) +{ + return rblist__add_node(&self->rblist, new_entry); } int strlist__load(struct strlist *self, const char *filename) @@ -96,34 +91,20 @@ out: return err; } -void strlist__remove(struct strlist *self, struct str_node *sn) +void strlist__remove(struct strlist *slist, struct str_node *snode) { - rb_erase(&sn->rb_node, &self->entries); - str_node__delete(sn, self->dupstr); + str_node__delete(snode, slist->dupstr); } -struct str_node *strlist__find(struct strlist *self, const char *entry) +struct str_node *strlist__find(struct strlist *slist, const char *entry) { - struct rb_node **p = &self->entries.rb_node; - struct rb_node *parent = NULL; - - while (*p != NULL) { - struct str_node *sn; - int rc; - - parent = *p; - sn = rb_entry(parent, struct str_node, rb_node); - rc = strcmp(sn->s, entry); - - if (rc > 0) - p = &(*p)->rb_left; - else if (rc < 0) - p = &(*p)->rb_right; - else - return sn; - } + struct str_node *snode = NULL; + struct rb_node *rb_node = rblist__find(&slist->rblist, entry); - return NULL; + if (rb_node) + snode = container_of(rb_node, struct str_node, rb_node); + + return snode; } static int strlist__parse_list_entry(struct strlist *self, const char *s) @@ -156,9 +137,12 @@ struct strlist *strlist__new(bool dupstr, const char *slist) struct strlist *self = malloc(sizeof(*self)); if (self != NULL) { - self->entries = RB_ROOT; + rblist__init(&self->rblist); + self->rblist.node_cmp = strlist__node_cmp; + self->rblist.node_new = strlist__node_new; + self->rblist.node_delete = strlist__node_delete; + self->dupstr = dupstr; - self->nr_entries = 0; if (slist && strlist__parse_list(self, slist) != 0) goto out_error; } @@ -171,30 +155,18 @@ out_error: void strlist__delete(struct strlist *self) { - if (self != NULL) { - struct str_node *pos; - struct rb_node *next = rb_first(&self->entries); - - while (next) { - pos = rb_entry(next, struct str_node, rb_node); - next = rb_next(&pos->rb_node); - strlist__remove(self, pos); - } - self->entries = RB_ROOT; - free(self); - } + if (self != NULL) + rblist__delete(&self->rblist); } -struct str_node *strlist__entry(const struct strlist *self, unsigned int idx) +struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx) { - struct rb_node *nd; + struct str_node *snode = NULL; + struct rb_node *rb_node; - for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { - struct str_node *pos = rb_entry(nd, struct str_node, rb_node); + rb_node = rblist__entry(&slist->rblist, idx); + if (rb_node) + snode = container_of(rb_node, struct str_node, rb_node); - if (!idx--) - return pos; - } - - return NULL; + return snode; } diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index 3ba839007d2c..dd9f922ec67c 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -4,14 +4,15 @@ #include <linux/rbtree.h> #include <stdbool.h> +#include "rblist.h" + struct str_node { struct rb_node rb_node; const char *s; }; struct strlist { - struct rb_root entries; - unsigned int nr_entries; + struct rblist rblist; bool dupstr; }; @@ -32,18 +33,18 @@ static inline bool strlist__has_entry(struct strlist *self, const char *entry) static inline bool strlist__empty(const struct strlist *self) { - return self->nr_entries == 0; + return rblist__empty(&self->rblist); } static inline unsigned int strlist__nr_entries(const struct strlist *self) { - return self->nr_entries; + return rblist__nr_entries(&self->rblist); } /* For strlist iteration */ static inline struct str_node *strlist__first(struct strlist *self) { - struct rb_node *rn = rb_first(&self->entries); + struct rb_node *rn = rb_first(&self->rblist.entries); return rn ? rb_entry(rn, struct str_node, rb_node) : NULL; } static inline struct str_node *strlist__next(struct str_node *sn) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 40eeaf07725b..8b63b678e127 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1,8 +1,5 @@ -#define _GNU_SOURCE -#include <ctype.h> #include <dirent.h> #include <errno.h> -#include <libgen.h> #include <stdlib.h> #include <stdio.h> #include <string.h> @@ -13,6 +10,7 @@ #include <unistd.h> #include <inttypes.h> #include "build-id.h" +#include "util.h" #include "debug.h" #include "symbol.h" #include "strlist.h" @@ -24,13 +22,14 @@ #include <sys/utsname.h> #ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 256 #endif #ifndef NT_GNU_BUILD_ID #define NT_GNU_BUILD_ID 3 #endif +static void dso_cache__free(struct rb_root *root); static bool dso__build_id_equal(const struct dso *dso, u8 *build_id); static int elf_read_build_id(Elf *elf, void *bf, size_t size); static void dsos__add(struct list_head *head, struct dso *dso); @@ -46,11 +45,39 @@ struct symbol_conf symbol_conf = { .exclude_other = true, .use_modules = true, .try_vmlinux_path = true, + .annotate_src = true, .symfs = "", }; +static enum dso_binary_type binary_type_symtab[] = { + DSO_BINARY_TYPE__KALLSYMS, + DSO_BINARY_TYPE__GUEST_KALLSYMS, + DSO_BINARY_TYPE__JAVA_JIT, + DSO_BINARY_TYPE__DEBUGLINK, + DSO_BINARY_TYPE__BUILD_ID_CACHE, + DSO_BINARY_TYPE__FEDORA_DEBUGINFO, + DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__SYSTEM_PATH_DSO, + DSO_BINARY_TYPE__GUEST_KMODULE, + DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, + DSO_BINARY_TYPE__NOT_FOUND, +}; + +#define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab) + +static enum dso_binary_type binary_type_data[] = { + DSO_BINARY_TYPE__BUILD_ID_CACHE, + DSO_BINARY_TYPE__SYSTEM_PATH_DSO, + DSO_BINARY_TYPE__NOT_FOUND, +}; + +#define DSO_BINARY_TYPE__DATA_CNT ARRAY_SIZE(binary_type_data) + int dso__name_len(const struct dso *dso) { + if (!dso) + return strlen("[unknown]"); if (verbose) return dso->long_name_len; @@ -263,6 +290,28 @@ static size_t symbol__fprintf(struct symbol *sym, FILE *fp) sym->name); } +size_t symbol__fprintf_symname_offs(const struct symbol *sym, + const struct addr_location *al, FILE *fp) +{ + unsigned long offset; + size_t length; + + if (sym && sym->name) { + length = fprintf(fp, "%s", sym->name); + if (al) { + offset = al->addr - sym->start; + length += fprintf(fp, "+0x%lx", offset); + } + return length; + } else + return fprintf(fp, "[unknown]"); +} + +size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) +{ + return symbol__fprintf_symname_offs(sym, NULL, fp); +} + void dso__set_long_name(struct dso *dso, char *name) { if (name == NULL) @@ -295,11 +344,14 @@ struct dso *dso__new(const char *name) dso__set_short_name(dso, dso->name); for (i = 0; i < MAP__NR_TYPES; ++i) dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; - dso->symtab_type = SYMTAB__NOT_FOUND; + dso->cache = RB_ROOT; + dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; + dso->data_type = DSO_BINARY_TYPE__NOT_FOUND; dso->loaded = 0; dso->sorted_by_name = 0; dso->has_build_id = 0; dso->kernel = DSO_TYPE_USER; + dso->needs_swap = DSO_SWAP__UNSET; INIT_LIST_HEAD(&dso->node); } @@ -328,6 +380,7 @@ void dso__delete(struct dso *dso) free((char *)dso->short_name); if (dso->lname_alloc) free(dso->long_name); + dso_cache__free(&dso->cache); free(dso); } @@ -782,9 +835,9 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, symbols__fixup_end(&dso->symbols[map->type]); if (dso->kernel == DSO_TYPE_GUEST_KERNEL) - dso->symtab_type = SYMTAB__GUEST_KALLSYMS; + dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS; else - dso->symtab_type = SYMTAB__KALLSYMS; + dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS; return dso__split_kallsyms(dso, map, filter); } @@ -954,8 +1007,9 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, * And always look at the original dso, not at debuginfo packages, that * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). */ -static int dso__synthesize_plt_symbols(struct dso *dso, struct map *map, - symbol_filter_t filter) +static int +dso__synthesize_plt_symbols(struct dso *dso, char *name, struct map *map, + symbol_filter_t filter) { uint32_t nr_rel_entries, idx; GElf_Sym sym; @@ -970,10 +1024,7 @@ static int dso__synthesize_plt_symbols(struct dso *dso, struct map *map, char sympltname[1024]; Elf *elf; int nr = 0, symidx, fd, err = 0; - char name[PATH_MAX]; - snprintf(name, sizeof(name), "%s%s", - symbol_conf.symfs, dso->long_name); fd = open(name, O_RDONLY); if (fd < 0) goto out; @@ -1135,6 +1186,33 @@ static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) return -1; } +static int dso__swap_init(struct dso *dso, unsigned char eidata) +{ + static unsigned int const endian = 1; + + dso->needs_swap = DSO_SWAP__NO; + + switch (eidata) { + case ELFDATA2LSB: + /* We are big endian, DSO is little endian. */ + if (*(unsigned char const *)&endian != 1) + dso->needs_swap = DSO_SWAP__YES; + break; + + case ELFDATA2MSB: + /* We are little endian, DSO is big endian. */ + if (*(unsigned char const *)&endian != 0) + dso->needs_swap = DSO_SWAP__YES; + break; + + default: + pr_err("unrecognized DSO data encoding %d\n", eidata); + return -EINVAL; + } + + return 0; +} + static int dso__load_sym(struct dso *dso, struct map *map, const char *name, int fd, symbol_filter_t filter, int kmodule, int want_symtab) @@ -1166,6 +1244,9 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, goto out_elf_end; } + if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) + goto out_elf_end; + /* Always reject images with a mismatched build-id: */ if (dso->has_build_id) { u8 build_id[BUILD_ID_SIZE]; @@ -1251,7 +1332,7 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, if (opdsec && sym.st_shndx == opdidx) { u32 offset = sym.st_value - opdshdr.sh_addr; u64 *opd = opddata->d_buf + offset; - sym.st_value = *opd; + sym.st_value = DSO__SWAP(dso, u64, *opd); sym.st_shndx = elf_addr_to_index(elf, sym.st_value); } @@ -1426,14 +1507,31 @@ static int elf_read_build_id(Elf *elf, void *bf, size_t size) goto out; } - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".note.gnu.build-id", NULL); - if (sec == NULL) { + /* + * Check following sections for notes: + * '.note.gnu.build-id' + * '.notes' + * '.note' (VDSO specific) + */ + do { + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".note.gnu.build-id", NULL); + if (sec) + break; + sec = elf_section_by_name(elf, &ehdr, &shdr, ".notes", NULL); - if (sec == NULL) - goto out; - } + if (sec) + break; + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".note", NULL); + if (sec) + break; + + return err; + + } while (0); data = elf_getdata(sec, NULL); if (data == NULL) @@ -1538,34 +1636,163 @@ out: return err; } +static int filename__read_debuglink(const char *filename, + char *debuglink, size_t size) +{ + int fd, err = -1; + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Data *data; + Elf_Scn *sec; + Elf_Kind ek; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); + goto out_close; + } + + ek = elf_kind(elf); + if (ek != ELF_K_ELF) + goto out_close; + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_err("%s: cannot get elf header.\n", __func__); + goto out_close; + } + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".gnu_debuglink", NULL); + if (sec == NULL) + goto out_close; + + data = elf_getdata(sec, NULL); + if (data == NULL) + goto out_close; + + /* the start of this section is a zero-terminated string */ + strncpy(debuglink, data->d_buf, size); + + elf_end(elf); + +out_close: + close(fd); +out: + return err; +} + char dso__symtab_origin(const struct dso *dso) { static const char origin[] = { - [SYMTAB__KALLSYMS] = 'k', - [SYMTAB__JAVA_JIT] = 'j', - [SYMTAB__BUILD_ID_CACHE] = 'B', - [SYMTAB__FEDORA_DEBUGINFO] = 'f', - [SYMTAB__UBUNTU_DEBUGINFO] = 'u', - [SYMTAB__BUILDID_DEBUGINFO] = 'b', - [SYMTAB__SYSTEM_PATH_DSO] = 'd', - [SYMTAB__SYSTEM_PATH_KMODULE] = 'K', - [SYMTAB__GUEST_KALLSYMS] = 'g', - [SYMTAB__GUEST_KMODULE] = 'G', + [DSO_BINARY_TYPE__KALLSYMS] = 'k', + [DSO_BINARY_TYPE__JAVA_JIT] = 'j', + [DSO_BINARY_TYPE__DEBUGLINK] = 'l', + [DSO_BINARY_TYPE__BUILD_ID_CACHE] = 'B', + [DSO_BINARY_TYPE__FEDORA_DEBUGINFO] = 'f', + [DSO_BINARY_TYPE__UBUNTU_DEBUGINFO] = 'u', + [DSO_BINARY_TYPE__BUILDID_DEBUGINFO] = 'b', + [DSO_BINARY_TYPE__SYSTEM_PATH_DSO] = 'd', + [DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE] = 'K', + [DSO_BINARY_TYPE__GUEST_KALLSYMS] = 'g', + [DSO_BINARY_TYPE__GUEST_KMODULE] = 'G', }; - if (dso == NULL || dso->symtab_type == SYMTAB__NOT_FOUND) + if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND) return '!'; return origin[dso->symtab_type]; } +int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, + char *root_dir, char *file, size_t size) +{ + char build_id_hex[BUILD_ID_SIZE * 2 + 1]; + int ret = 0; + + switch (type) { + case DSO_BINARY_TYPE__DEBUGLINK: { + char *debuglink; + + strncpy(file, dso->long_name, size); + debuglink = file + dso->long_name_len; + while (debuglink != file && *debuglink != '/') + debuglink--; + if (*debuglink == '/') + debuglink++; + filename__read_debuglink(dso->long_name, debuglink, + size - (debuglink - file)); + } + break; + case DSO_BINARY_TYPE__BUILD_ID_CACHE: + /* skip the locally configured cache if a symfs is given */ + if (symbol_conf.symfs[0] || + (dso__build_id_filename(dso, file, size) == NULL)) + ret = -1; + break; + + case DSO_BINARY_TYPE__FEDORA_DEBUGINFO: + snprintf(file, size, "%s/usr/lib/debug%s.debug", + symbol_conf.symfs, dso->long_name); + break; + + case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: + snprintf(file, size, "%s/usr/lib/debug%s", + symbol_conf.symfs, dso->long_name); + break; + + case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: + if (!dso->has_build_id) { + ret = -1; + break; + } + + build_id__sprintf(dso->build_id, + sizeof(dso->build_id), + build_id_hex); + snprintf(file, size, + "%s/usr/lib/debug/.build-id/%.2s/%s.debug", + symbol_conf.symfs, build_id_hex, build_id_hex + 2); + break; + + case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: + snprintf(file, size, "%s%s", + symbol_conf.symfs, dso->long_name); + break; + + case DSO_BINARY_TYPE__GUEST_KMODULE: + snprintf(file, size, "%s%s%s", symbol_conf.symfs, + root_dir, dso->long_name); + break; + + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: + snprintf(file, size, "%s%s", symbol_conf.symfs, + dso->long_name); + break; + + default: + case DSO_BINARY_TYPE__KALLSYMS: + case DSO_BINARY_TYPE__GUEST_KALLSYMS: + case DSO_BINARY_TYPE__JAVA_JIT: + case DSO_BINARY_TYPE__NOT_FOUND: + ret = -1; + break; + } + + return ret; +} + int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) { - int size = PATH_MAX; char *name; int ret = -1; int fd; + u_int i; struct machine *machine; - const char *root_dir; + char *root_dir = (char *) ""; int want_symtab; dso__set_loaded(dso, map->type); @@ -1580,7 +1807,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) else machine = NULL; - name = malloc(size); + name = malloc(PATH_MAX); if (!name) return -1; @@ -1599,69 +1826,27 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) } ret = dso__load_perf_map(dso, map, filter); - dso->symtab_type = ret > 0 ? SYMTAB__JAVA_JIT : - SYMTAB__NOT_FOUND; + dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT : + DSO_BINARY_TYPE__NOT_FOUND; return ret; } + if (machine) + root_dir = machine->root_dir; + /* Iterate over candidate debug images. * On the first pass, only load images if they have a full symtab. * Failing that, do a second pass where we accept .dynsym also */ want_symtab = 1; restart: - for (dso->symtab_type = SYMTAB__BUILD_ID_CACHE; - dso->symtab_type != SYMTAB__NOT_FOUND; - dso->symtab_type++) { - switch (dso->symtab_type) { - case SYMTAB__BUILD_ID_CACHE: - /* skip the locally configured cache if a symfs is given */ - if (symbol_conf.symfs[0] || - (dso__build_id_filename(dso, name, size) == NULL)) { - continue; - } - break; - case SYMTAB__FEDORA_DEBUGINFO: - snprintf(name, size, "%s/usr/lib/debug%s.debug", - symbol_conf.symfs, dso->long_name); - break; - case SYMTAB__UBUNTU_DEBUGINFO: - snprintf(name, size, "%s/usr/lib/debug%s", - symbol_conf.symfs, dso->long_name); - break; - case SYMTAB__BUILDID_DEBUGINFO: { - char build_id_hex[BUILD_ID_SIZE * 2 + 1]; + for (i = 0; i < DSO_BINARY_TYPE__SYMTAB_CNT; i++) { - if (!dso->has_build_id) - continue; + dso->symtab_type = binary_type_symtab[i]; - build_id__sprintf(dso->build_id, - sizeof(dso->build_id), - build_id_hex); - snprintf(name, size, - "%s/usr/lib/debug/.build-id/%.2s/%s.debug", - symbol_conf.symfs, build_id_hex, build_id_hex + 2); - } - break; - case SYMTAB__SYSTEM_PATH_DSO: - snprintf(name, size, "%s%s", - symbol_conf.symfs, dso->long_name); - break; - case SYMTAB__GUEST_KMODULE: - if (map->groups && machine) - root_dir = machine->root_dir; - else - root_dir = ""; - snprintf(name, size, "%s%s%s", symbol_conf.symfs, - root_dir, dso->long_name); - break; - - case SYMTAB__SYSTEM_PATH_KMODULE: - snprintf(name, size, "%s%s", symbol_conf.symfs, - dso->long_name); - break; - default:; - } + if (dso__binary_type_file(dso, dso->symtab_type, + root_dir, name, PATH_MAX)) + continue; /* Name is now the name of the next image to try */ fd = open(name, O_RDONLY); @@ -1680,8 +1865,9 @@ restart: continue; if (ret > 0) { - int nr_plt = dso__synthesize_plt_symbols(dso, map, - filter); + int nr_plt; + + nr_plt = dso__synthesize_plt_symbols(dso, name, map, filter); if (nr_plt > 0) ret += nr_plt; break; @@ -1756,7 +1942,7 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg, struct stat st; /*sshfs might return bad dent->d_type, so we have to stat*/ - sprintf(path, "%s/%s", dir_name, dent->d_name); + snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name); if (stat(path, &st)) continue; @@ -1765,8 +1951,6 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg, !strcmp(dent->d_name, "..")) continue; - snprintf(path, sizeof(path), "%s/%s", - dir_name, dent->d_name); ret = map_groups__set_modules_path_dir(mg, path); if (ret < 0) goto out; @@ -1787,9 +1971,6 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg, if (map == NULL) continue; - snprintf(path, sizeof(path), "%s/%s", - dir_name, dent->d_name); - long_name = strdup(path); if (long_name == NULL) { ret = -1; @@ -1882,9 +2063,9 @@ struct map *machine__new_module(struct machine *machine, u64 start, return NULL; if (machine__is_host(machine)) - dso->symtab_type = SYMTAB__SYSTEM_PATH_KMODULE; + dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; else - dso->symtab_type = SYMTAB__GUEST_KMODULE; + dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; map_groups__insert(&machine->kmaps, map); return map; } @@ -2436,8 +2617,15 @@ int machine__create_kernel_maps(struct machine *machine) __machine__create_kernel_maps(machine, kernel) < 0) return -1; - if (symbol_conf.use_modules && machine__create_modules(machine) < 0) - pr_debug("Problems creating module maps, continuing anyway...\n"); + if (symbol_conf.use_modules && machine__create_modules(machine) < 0) { + if (machine__is_host(machine)) + pr_debug("Problems creating module maps, " + "continuing anyway...\n"); + else + pr_debug("Problems creating module maps for guest %d, " + "continuing anyway...\n", machine->pid); + } + /* * Now that we have all the maps created, just set the ->end of them: */ @@ -2608,10 +2796,10 @@ int symbol__init(void) symbol_conf.initialized = true; return 0; -out_free_dso_list: - strlist__delete(symbol_conf.dso_list); out_free_comm_list: strlist__delete(symbol_conf.comm_list); +out_free_dso_list: + strlist__delete(symbol_conf.dso_list); return -1; } @@ -2687,6 +2875,7 @@ int machines__create_guest_kernel_maps(struct rb_root *machines) int i, items = 0; char path[PATH_MAX]; pid_t pid; + char *endp; if (symbol_conf.default_guest_vmlinux_name || symbol_conf.default_guest_modules || @@ -2703,7 +2892,14 @@ int machines__create_guest_kernel_maps(struct rb_root *machines) /* Filter out . and .. */ continue; } - pid = atoi(namelist[i]->d_name); + pid = (pid_t)strtol(namelist[i]->d_name, &endp, 10); + if ((*endp != '\0') || + (endp == namelist[i]->d_name) || + (errno == ERANGE)) { + pr_debug("invalid directory (%s). Skipping.\n", + namelist[i]->d_name); + continue; + } sprintf(path, "%s/%s/proc/kallsyms", symbol_conf.guestmount, namelist[i]->d_name); @@ -2766,3 +2962,229 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type, return ret; } + +struct map *dso__new_map(const char *name) +{ + struct map *map = NULL; + struct dso *dso = dso__new(name); + + if (dso) + map = map__new2(0, dso, MAP__FUNCTION); + + return map; +} + +static int open_dso(struct dso *dso, struct machine *machine) +{ + char *root_dir = (char *) ""; + char *name; + int fd; + + name = malloc(PATH_MAX); + if (!name) + return -ENOMEM; + + if (machine) + root_dir = machine->root_dir; + + if (dso__binary_type_file(dso, dso->data_type, + root_dir, name, PATH_MAX)) { + free(name); + return -EINVAL; + } + + fd = open(name, O_RDONLY); + free(name); + return fd; +} + +int dso__data_fd(struct dso *dso, struct machine *machine) +{ + int i = 0; + + if (dso->data_type != DSO_BINARY_TYPE__NOT_FOUND) + return open_dso(dso, machine); + + do { + int fd; + + dso->data_type = binary_type_data[i++]; + + fd = open_dso(dso, machine); + if (fd >= 0) + return fd; + + } while (dso->data_type != DSO_BINARY_TYPE__NOT_FOUND); + + return -EINVAL; +} + +static void +dso_cache__free(struct rb_root *root) +{ + struct rb_node *next = rb_first(root); + + while (next) { + struct dso_cache *cache; + + cache = rb_entry(next, struct dso_cache, rb_node); + next = rb_next(&cache->rb_node); + rb_erase(&cache->rb_node, root); + free(cache); + } +} + +static struct dso_cache* +dso_cache__find(struct rb_root *root, u64 offset) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct dso_cache *cache; + + while (*p != NULL) { + u64 end; + + parent = *p; + cache = rb_entry(parent, struct dso_cache, rb_node); + end = cache->offset + DSO__DATA_CACHE_SIZE; + + if (offset < cache->offset) + p = &(*p)->rb_left; + else if (offset >= end) + p = &(*p)->rb_right; + else + return cache; + } + return NULL; +} + +static void +dso_cache__insert(struct rb_root *root, struct dso_cache *new) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct dso_cache *cache; + u64 offset = new->offset; + + while (*p != NULL) { + u64 end; + + parent = *p; + cache = rb_entry(parent, struct dso_cache, rb_node); + end = cache->offset + DSO__DATA_CACHE_SIZE; + + if (offset < cache->offset) + p = &(*p)->rb_left; + else if (offset >= end) + p = &(*p)->rb_right; + } + + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, root); +} + +static ssize_t +dso_cache__memcpy(struct dso_cache *cache, u64 offset, + u8 *data, u64 size) +{ + u64 cache_offset = offset - cache->offset; + u64 cache_size = min(cache->size - cache_offset, size); + + memcpy(data, cache->data + cache_offset, cache_size); + return cache_size; +} + +static ssize_t +dso_cache__read(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size) +{ + struct dso_cache *cache; + ssize_t ret; + int fd; + + fd = dso__data_fd(dso, machine); + if (fd < 0) + return -1; + + do { + u64 cache_offset; + + ret = -ENOMEM; + + cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE); + if (!cache) + break; + + cache_offset = offset & DSO__DATA_CACHE_MASK; + ret = -EINVAL; + + if (-1 == lseek(fd, cache_offset, SEEK_SET)) + break; + + ret = read(fd, cache->data, DSO__DATA_CACHE_SIZE); + if (ret <= 0) + break; + + cache->offset = cache_offset; + cache->size = ret; + dso_cache__insert(&dso->cache, cache); + + ret = dso_cache__memcpy(cache, offset, data, size); + + } while (0); + + if (ret <= 0) + free(cache); + + close(fd); + return ret; +} + +static ssize_t dso_cache_read(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size) +{ + struct dso_cache *cache; + + cache = dso_cache__find(&dso->cache, offset); + if (cache) + return dso_cache__memcpy(cache, offset, data, size); + else + return dso_cache__read(dso, machine, offset, data, size); +} + +ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size) +{ + ssize_t r = 0; + u8 *p = data; + + do { + ssize_t ret; + + ret = dso_cache_read(dso, machine, offset, p, size); + if (ret < 0) + return ret; + + /* Reached EOF, return what we have. */ + if (!ret) + break; + + BUG_ON(ret > size); + + r += ret; + p += ret; + offset += ret; + size -= ret; + + } while (size); + + return r; +} + +ssize_t dso__data_read_addr(struct dso *dso, struct map *map, + struct machine *machine, u64 addr, + u8 *data, ssize_t size) +{ + u64 offset = map->map_ip(map, addr); + return dso__data_read_offset(dso, machine, offset, data, size); +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 4f377d92e75a..1fe733a1e21f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -5,9 +5,11 @@ #include <stdbool.h> #include <stdint.h> #include "map.h" +#include "../perf.h" #include <linux/list.h> #include <linux/rbtree.h> #include <stdio.h> +#include <byteswap.h> #ifdef HAVE_CPLUS_DEMANGLE extern char *cplus_demangle(const char *, int); @@ -64,19 +66,29 @@ struct symbol { void symbol__delete(struct symbol *sym); +static inline size_t symbol__size(const struct symbol *sym) +{ + return sym->end - sym->start + 1; +} + struct strlist; struct symbol_conf { unsigned short priv_size; + unsigned short nr_events; bool try_vmlinux_path, + show_kernel_path, use_modules, sort_by_name, show_nr_samples, + show_total_period, use_callchain, exclude_other, show_cpu_utilization, initialized, - kptr_restrict; + kptr_restrict, + annotate_asm_raw, + annotate_src; const char *vmlinux_name, *kallsyms_name, *source_prefix, @@ -91,7 +103,11 @@ struct symbol_conf { *col_width_list_str; struct strlist *dso_list, *comm_list, - *sym_list; + *sym_list, + *dso_from_list, + *dso_to_list, + *sym_from_list, + *sym_to_list; const char *symfs; }; @@ -115,6 +131,19 @@ struct map_symbol { bool has_children; }; +struct addr_map_symbol { + struct map *map; + struct symbol *sym; + u64 addr; + u64 al_addr; +}; + +struct branch_info { + struct addr_map_symbol from; + struct addr_map_symbol to; + struct branch_flags flags; +}; + struct addr_location { struct thread *thread; struct map *map; @@ -126,24 +155,58 @@ struct addr_location { s32 cpu; }; +enum dso_binary_type { + DSO_BINARY_TYPE__KALLSYMS = 0, + DSO_BINARY_TYPE__GUEST_KALLSYMS, + DSO_BINARY_TYPE__JAVA_JIT, + DSO_BINARY_TYPE__DEBUGLINK, + DSO_BINARY_TYPE__BUILD_ID_CACHE, + DSO_BINARY_TYPE__FEDORA_DEBUGINFO, + DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__SYSTEM_PATH_DSO, + DSO_BINARY_TYPE__GUEST_KMODULE, + DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, + DSO_BINARY_TYPE__NOT_FOUND, +}; + enum dso_kernel_type { DSO_TYPE_USER = 0, DSO_TYPE_KERNEL, DSO_TYPE_GUEST_KERNEL }; +enum dso_swap_type { + DSO_SWAP__UNSET, + DSO_SWAP__NO, + DSO_SWAP__YES, +}; + +#define DSO__DATA_CACHE_SIZE 4096 +#define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1) + +struct dso_cache { + struct rb_node rb_node; + u64 offset; + u64 size; + char data[0]; +}; + struct dso { struct list_head node; struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; + struct rb_root cache; enum dso_kernel_type kernel; + enum dso_swap_type needs_swap; + enum dso_binary_type symtab_type; + enum dso_binary_type data_type; u8 adjust_symbols:1; u8 has_build_id:1; u8 hit:1; u8 annotate_warned:1; u8 sname_alloc:1; u8 lname_alloc:1; - unsigned char symtab_type; u8 sorted_by_name; u8 loaded; u8 build_id[BUILD_ID_SIZE]; @@ -154,6 +217,28 @@ struct dso { char name[0]; }; +#define DSO__SWAP(dso, type, val) \ +({ \ + type ____r = val; \ + BUG_ON(dso->needs_swap == DSO_SWAP__UNSET); \ + if (dso->needs_swap == DSO_SWAP__YES) { \ + switch (sizeof(____r)) { \ + case 2: \ + ____r = bswap_16(val); \ + break; \ + case 4: \ + ____r = bswap_32(val); \ + break; \ + case 8: \ + ____r = bswap_64(val); \ + break; \ + default: \ + BUG_ON(1); \ + } \ + } \ + ____r; \ +}) + struct dso *dso__new(const char *name); void dso__delete(struct dso *dso); @@ -195,25 +280,12 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso, enum map_type type, FILE *fp); size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp); -enum symtab_type { - SYMTAB__KALLSYMS = 0, - SYMTAB__GUEST_KALLSYMS, - SYMTAB__JAVA_JIT, - SYMTAB__BUILD_ID_CACHE, - SYMTAB__FEDORA_DEBUGINFO, - SYMTAB__UBUNTU_DEBUGINFO, - SYMTAB__BUILDID_DEBUGINFO, - SYMTAB__SYSTEM_PATH_DSO, - SYMTAB__GUEST_KMODULE, - SYMTAB__SYSTEM_PATH_KMODULE, - SYMTAB__NOT_FOUND, -}; - char dso__symtab_origin(const struct dso *dso); void dso__set_long_name(struct dso *dso, char *name); void dso__set_build_id(struct dso *dso, void *build_id); void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine); +struct map *dso__new_map(const char *name); struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, @@ -237,8 +309,21 @@ void machines__destroy_guest_kernel_maps(struct rb_root *machines); int symbol__init(void); void symbol__exit(void); +size_t symbol__fprintf_symname_offs(const struct symbol *sym, + const struct addr_location *al, FILE *fp); +size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); bool symbol_type__is_a(char symbol_type, enum map_type map_type); size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); +int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, + char *root_dir, char *file, size_t size); + +int dso__data_fd(struct dso *dso, struct machine *machine); +ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size); +ssize_t dso__data_read_addr(struct dso *dso, struct map *map, + struct machine *machine, u64 addr, + u8 *data, ssize_t size); +int dso__test_data(void); #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/sysfs.c b/tools/perf/util/sysfs.c new file mode 100644 index 000000000000..48c6902e749f --- /dev/null +++ b/tools/perf/util/sysfs.c @@ -0,0 +1,60 @@ + +#include "util.h" +#include "sysfs.h" + +static const char * const sysfs_known_mountpoints[] = { + "/sys", + 0, +}; + +static int sysfs_found; +char sysfs_mountpoint[PATH_MAX]; + +static int sysfs_valid_mountpoint(const char *sysfs) +{ + struct statfs st_fs; + + if (statfs(sysfs, &st_fs) < 0) + return -ENOENT; + else if (st_fs.f_type != (long) SYSFS_MAGIC) + return -ENOENT; + + return 0; +} + +const char *sysfs_find_mountpoint(void) +{ + const char * const *ptr; + char type[100]; + FILE *fp; + + if (sysfs_found) + return (const char *) sysfs_mountpoint; + + ptr = sysfs_known_mountpoints; + while (*ptr) { + if (sysfs_valid_mountpoint(*ptr) == 0) { + sysfs_found = 1; + strcpy(sysfs_mountpoint, *ptr); + return sysfs_mountpoint; + } + ptr++; + } + + /* give up and parse /proc/mounts */ + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + return NULL; + + while (!sysfs_found && + fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", + sysfs_mountpoint, type) == 2) { + + if (strcmp(type, "sysfs") == 0) + sysfs_found = 1; + } + + fclose(fp); + + return sysfs_found ? sysfs_mountpoint : NULL; +} diff --git a/tools/perf/util/sysfs.h b/tools/perf/util/sysfs.h new file mode 100644 index 000000000000..a813b7203938 --- /dev/null +++ b/tools/perf/util/sysfs.h @@ -0,0 +1,6 @@ +#ifndef __SYSFS_H__ +#define __SYSFS_H__ + +const char *sysfs_find_mountpoint(void); + +#endif /* __DEBUGFS_H__ */ diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c new file mode 100644 index 000000000000..051eaa68095e --- /dev/null +++ b/tools/perf/util/target.c @@ -0,0 +1,151 @@ +/* + * Helper functions for handling target threads/cpus + * + * Copyright (C) 2012, LG Electronics, Namhyung Kim <namhyung.kim@lge.com> + * + * Released under the GPL v2. + */ + +#include "target.h" +#include "debug.h" + +#include <pwd.h> +#include <string.h> + + +enum perf_target_errno perf_target__validate(struct perf_target *target) +{ + enum perf_target_errno ret = PERF_ERRNO_TARGET__SUCCESS; + + if (target->pid) + target->tid = target->pid; + + /* CPU and PID are mutually exclusive */ + if (target->tid && target->cpu_list) { + target->cpu_list = NULL; + if (ret == PERF_ERRNO_TARGET__SUCCESS) + ret = PERF_ERRNO_TARGET__PID_OVERRIDE_CPU; + } + + /* UID and PID are mutually exclusive */ + if (target->tid && target->uid_str) { + target->uid_str = NULL; + if (ret == PERF_ERRNO_TARGET__SUCCESS) + ret = PERF_ERRNO_TARGET__PID_OVERRIDE_UID; + } + + /* UID and CPU are mutually exclusive */ + if (target->uid_str && target->cpu_list) { + target->cpu_list = NULL; + if (ret == PERF_ERRNO_TARGET__SUCCESS) + ret = PERF_ERRNO_TARGET__UID_OVERRIDE_CPU; + } + + /* PID and SYSTEM are mutually exclusive */ + if (target->tid && target->system_wide) { + target->system_wide = false; + if (ret == PERF_ERRNO_TARGET__SUCCESS) + ret = PERF_ERRNO_TARGET__PID_OVERRIDE_SYSTEM; + } + + /* UID and SYSTEM are mutually exclusive */ + if (target->uid_str && target->system_wide) { + target->system_wide = false; + if (ret == PERF_ERRNO_TARGET__SUCCESS) + ret = PERF_ERRNO_TARGET__UID_OVERRIDE_SYSTEM; + } + + return ret; +} + +enum perf_target_errno perf_target__parse_uid(struct perf_target *target) +{ + struct passwd pwd, *result; + char buf[1024]; + const char *str = target->uid_str; + + target->uid = UINT_MAX; + if (str == NULL) + return PERF_ERRNO_TARGET__SUCCESS; + + /* Try user name first */ + getpwnam_r(str, &pwd, buf, sizeof(buf), &result); + + if (result == NULL) { + /* + * The user name not found. Maybe it's a UID number. + */ + char *endptr; + int uid = strtol(str, &endptr, 10); + + if (*endptr != '\0') + return PERF_ERRNO_TARGET__INVALID_UID; + + getpwuid_r(uid, &pwd, buf, sizeof(buf), &result); + + if (result == NULL) + return PERF_ERRNO_TARGET__USER_NOT_FOUND; + } + + target->uid = result->pw_uid; + return PERF_ERRNO_TARGET__SUCCESS; +} + +/* + * This must have a same ordering as the enum perf_target_errno. + */ +static const char *perf_target__error_str[] = { + "PID/TID switch overriding CPU", + "PID/TID switch overriding UID", + "UID switch overriding CPU", + "PID/TID switch overriding SYSTEM", + "UID switch overriding SYSTEM", + "Invalid User: %s", + "Problems obtaining information for user %s", +}; + +int perf_target__strerror(struct perf_target *target, int errnum, + char *buf, size_t buflen) +{ + int idx; + const char *msg; + + BUG_ON(buflen == 0); + + if (errnum >= 0) { + const char *err = strerror_r(errnum, buf, buflen); + + if (err != buf) { + size_t len = strlen(err); + char *c = mempcpy(buf, err, min(buflen - 1, len)); + *c = '\0'; + } + + return 0; + } + + if (errnum < __PERF_ERRNO_TARGET__START || + errnum >= __PERF_ERRNO_TARGET__END) + return -1; + + idx = errnum - __PERF_ERRNO_TARGET__START; + msg = perf_target__error_str[idx]; + + switch (errnum) { + case PERF_ERRNO_TARGET__PID_OVERRIDE_CPU + ... PERF_ERRNO_TARGET__UID_OVERRIDE_SYSTEM: + snprintf(buf, buflen, "%s", msg); + break; + + case PERF_ERRNO_TARGET__INVALID_UID: + case PERF_ERRNO_TARGET__USER_NOT_FOUND: + snprintf(buf, buflen, msg, target->uid_str); + break; + + default: + /* cannot reach here */ + break; + } + + return 0; +} diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h new file mode 100644 index 000000000000..a4be8575fda5 --- /dev/null +++ b/tools/perf/util/target.h @@ -0,0 +1,65 @@ +#ifndef _PERF_TARGET_H +#define _PERF_TARGET_H + +#include <stdbool.h> +#include <sys/types.h> + +struct perf_target { + const char *pid; + const char *tid; + const char *cpu_list; + const char *uid_str; + uid_t uid; + bool system_wide; + bool uses_mmap; +}; + +enum perf_target_errno { + PERF_ERRNO_TARGET__SUCCESS = 0, + + /* + * Choose an arbitrary negative big number not to clash with standard + * errno since SUS requires the errno has distinct positive values. + * See 'Issue 6' in the link below. + * + * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html + */ + __PERF_ERRNO_TARGET__START = -10000, + + + /* for perf_target__validate() */ + PERF_ERRNO_TARGET__PID_OVERRIDE_CPU = __PERF_ERRNO_TARGET__START, + PERF_ERRNO_TARGET__PID_OVERRIDE_UID, + PERF_ERRNO_TARGET__UID_OVERRIDE_CPU, + PERF_ERRNO_TARGET__PID_OVERRIDE_SYSTEM, + PERF_ERRNO_TARGET__UID_OVERRIDE_SYSTEM, + + /* for perf_target__parse_uid() */ + PERF_ERRNO_TARGET__INVALID_UID, + PERF_ERRNO_TARGET__USER_NOT_FOUND, + + __PERF_ERRNO_TARGET__END, +}; + +enum perf_target_errno perf_target__validate(struct perf_target *target); +enum perf_target_errno perf_target__parse_uid(struct perf_target *target); + +int perf_target__strerror(struct perf_target *target, int errnum, char *buf, + size_t buflen); + +static inline bool perf_target__has_task(struct perf_target *target) +{ + return target->tid || target->pid || target->uid_str; +} + +static inline bool perf_target__has_cpu(struct perf_target *target) +{ + return target->system_wide || target->cpu_list; +} + +static inline bool perf_target__none(struct perf_target *target) +{ + return !perf_target__has_task(target) && !perf_target__has_cpu(target); +} + +#endif /* _PERF_TARGET_H */ diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index d5d3b22250f3..fb4b7ea6752f 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -61,7 +61,7 @@ static size_t thread__fprintf(struct thread *self, FILE *fp) map_groups__fprintf(&self->mg, verbose, fp); } -struct thread *perf_session__findnew(struct perf_session *self, pid_t pid) +struct thread *machine__findnew_thread(struct machine *self, pid_t pid) { struct rb_node **p = &self->threads.rb_node; struct rb_node *parent = NULL; @@ -125,12 +125,12 @@ int thread__fork(struct thread *self, struct thread *parent) return 0; } -size_t perf_session__fprintf(struct perf_session *self, FILE *fp) +size_t machine__fprintf(struct machine *machine, FILE *fp) { size_t ret = 0; struct rb_node *nd; - for (nd = rb_first(&self->threads); nd; nd = rb_next(nd)) { + for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { struct thread *pos = rb_entry(nd, struct thread, rb_node); ret += thread__fprintf(pos, fp); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index e5f2401c1b5e..70c2c13ff679 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -18,16 +18,14 @@ struct thread { int comm_len; }; -struct perf_session; +struct machine; void thread__delete(struct thread *self); int thread__set_comm(struct thread *self, const char *comm); int thread__comm_len(struct thread *self); -struct thread *perf_session__findnew(struct perf_session *self, pid_t pid); void thread__insert_map(struct thread *self, struct map *map); int thread__fork(struct thread *self, struct thread *parent); -size_t perf_session__fprintf(struct perf_session *self, FILE *fp); static inline struct map *thread__find_map(struct thread *self, enum map_type type, u64 addr) @@ -35,14 +33,12 @@ static inline struct map *thread__find_map(struct thread *self, return self ? map_groups__find(&self->mg, type, addr) : NULL; } -void thread__find_addr_map(struct thread *self, - struct perf_session *session, u8 cpumode, - enum map_type type, pid_t pid, u64 addr, +void thread__find_addr_map(struct thread *thread, struct machine *machine, + u8 cpumode, enum map_type type, u64 addr, struct addr_location *al); -void thread__find_addr_location(struct thread *self, - struct perf_session *session, u8 cpumode, - enum map_type type, pid_t pid, u64 addr, +void thread__find_addr_location(struct thread *thread, struct machine *machine, + u8 cpumode, enum map_type type, u64 addr, struct addr_location *al, symbol_filter_t filter); #endif /* __PERF_THREAD_H */ diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index a5df131b77c3..9b5f856cc280 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -1,6 +1,13 @@ #include <dirent.h> +#include <limits.h> +#include <stdbool.h> #include <stdlib.h> #include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include "strlist.h" +#include <string.h> #include "thread_map.h" /* Skip "." and ".." directories */ @@ -23,7 +30,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) sprintf(name, "/proc/%d/task", pid); items = scandir(name, &namelist, filter, NULL); if (items <= 0) - return NULL; + return NULL; threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); if (threads != NULL) { @@ -51,14 +58,239 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) return threads; } -struct thread_map *thread_map__new(pid_t pid, pid_t tid) +struct thread_map *thread_map__new_by_uid(uid_t uid) +{ + DIR *proc; + int max_threads = 32, items, i; + char path[256]; + struct dirent dirent, *next, **namelist = NULL; + struct thread_map *threads = malloc(sizeof(*threads) + + max_threads * sizeof(pid_t)); + if (threads == NULL) + goto out; + + proc = opendir("/proc"); + if (proc == NULL) + goto out_free_threads; + + threads->nr = 0; + + while (!readdir_r(proc, &dirent, &next) && next) { + char *end; + bool grow = false; + struct stat st; + pid_t pid = strtol(dirent.d_name, &end, 10); + + if (*end) /* only interested in proper numerical dirents */ + continue; + + snprintf(path, sizeof(path), "/proc/%s", dirent.d_name); + + if (stat(path, &st) != 0) + continue; + + if (st.st_uid != uid) + continue; + + snprintf(path, sizeof(path), "/proc/%d/task", pid); + items = scandir(path, &namelist, filter, NULL); + if (items <= 0) + goto out_free_closedir; + + while (threads->nr + items >= max_threads) { + max_threads *= 2; + grow = true; + } + + if (grow) { + struct thread_map *tmp; + + tmp = realloc(threads, (sizeof(*threads) + + max_threads * sizeof(pid_t))); + if (tmp == NULL) + goto out_free_namelist; + + threads = tmp; + } + + for (i = 0; i < items; i++) + threads->map[threads->nr + i] = atoi(namelist[i]->d_name); + + for (i = 0; i < items; i++) + free(namelist[i]); + free(namelist); + + threads->nr += items; + } + +out_closedir: + closedir(proc); +out: + return threads; + +out_free_threads: + free(threads); + return NULL; + +out_free_namelist: + for (i = 0; i < items; i++) + free(namelist[i]); + free(namelist); + +out_free_closedir: + free(threads); + threads = NULL; + goto out_closedir; +} + +struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) { if (pid != -1) return thread_map__new_by_pid(pid); + + if (tid == -1 && uid != UINT_MAX) + return thread_map__new_by_uid(uid); + return thread_map__new_by_tid(tid); } +static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) +{ + struct thread_map *threads = NULL, *nt; + char name[256]; + int items, total_tasks = 0; + struct dirent **namelist = NULL; + int i, j = 0; + pid_t pid, prev_pid = INT_MAX; + char *end_ptr; + struct str_node *pos; + struct strlist *slist = strlist__new(false, pid_str); + + if (!slist) + return NULL; + + strlist__for_each(pos, slist) { + pid = strtol(pos->s, &end_ptr, 10); + + if (pid == INT_MIN || pid == INT_MAX || + (*end_ptr != '\0' && *end_ptr != ',')) + goto out_free_threads; + + if (pid == prev_pid) + continue; + + sprintf(name, "/proc/%d/task", pid); + items = scandir(name, &namelist, filter, NULL); + if (items <= 0) + goto out_free_threads; + + total_tasks += items; + nt = realloc(threads, (sizeof(*threads) + + sizeof(pid_t) * total_tasks)); + if (nt == NULL) + goto out_free_namelist; + + threads = nt; + + for (i = 0; i < items; i++) { + threads->map[j++] = atoi(namelist[i]->d_name); + free(namelist[i]); + } + threads->nr = total_tasks; + free(namelist); + } + +out: + strlist__delete(slist); + return threads; + +out_free_namelist: + for (i = 0; i < items; i++) + free(namelist[i]); + free(namelist); + +out_free_threads: + free(threads); + threads = NULL; + goto out; +} + +static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) +{ + struct thread_map *threads = NULL, *nt; + int ntasks = 0; + pid_t tid, prev_tid = INT_MAX; + char *end_ptr; + struct str_node *pos; + struct strlist *slist; + + /* perf-stat expects threads to be generated even if tid not given */ + if (!tid_str) { + threads = malloc(sizeof(*threads) + sizeof(pid_t)); + if (threads != NULL) { + threads->map[0] = -1; + threads->nr = 1; + } + return threads; + } + + slist = strlist__new(false, tid_str); + if (!slist) + return NULL; + + strlist__for_each(pos, slist) { + tid = strtol(pos->s, &end_ptr, 10); + + if (tid == INT_MIN || tid == INT_MAX || + (*end_ptr != '\0' && *end_ptr != ',')) + goto out_free_threads; + + if (tid == prev_tid) + continue; + + ntasks++; + nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks); + + if (nt == NULL) + goto out_free_threads; + + threads = nt; + threads->map[ntasks - 1] = tid; + threads->nr = ntasks; + } +out: + return threads; + +out_free_threads: + free(threads); + threads = NULL; + goto out; +} + +struct thread_map *thread_map__new_str(const char *pid, const char *tid, + uid_t uid) +{ + if (pid) + return thread_map__new_by_pid_str(pid); + + if (!tid && uid != UINT_MAX) + return thread_map__new_by_uid(uid); + + return thread_map__new_by_tid_str(tid); +} + void thread_map__delete(struct thread_map *threads) { free(threads); } + +size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) +{ + int i; + size_t printed = fprintf(fp, "%d thread%s: ", + threads->nr, threads->nr > 1 ? "s" : ""); + for (i = 0; i < threads->nr; ++i) + printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]); + + return printed + fprintf(fp, "\n"); +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 3cb907311409..f718df8a3c59 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -2,14 +2,23 @@ #define __PERF_THREAD_MAP_H #include <sys/types.h> +#include <stdio.h> struct thread_map { int nr; - int map[]; + pid_t map[]; }; struct thread_map *thread_map__new_by_pid(pid_t pid); struct thread_map *thread_map__new_by_tid(pid_t tid); -struct thread_map *thread_map__new(pid_t pid, pid_t tid); +struct thread_map *thread_map__new_by_uid(uid_t uid); +struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); + +struct thread_map *thread_map__new_str(const char *pid, + const char *tid, uid_t uid); + void thread_map__delete(struct thread_map *threads); + +size_t thread_map__fprintf(struct thread_map *threads, FILE *fp); + #endif /* __PERF_THREAD_MAP_H */ diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h new file mode 100644 index 000000000000..b0e1aadba8d5 --- /dev/null +++ b/tools/perf/util/tool.h @@ -0,0 +1,50 @@ +#ifndef __PERF_TOOL_H +#define __PERF_TOOL_H + +#include <stdbool.h> + +struct perf_session; +union perf_event; +struct perf_evlist; +struct perf_evsel; +struct perf_sample; +struct perf_tool; +struct machine; + +typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, + struct perf_evsel *evsel, struct machine *machine); + +typedef int (*event_op)(struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, struct machine *machine); + +typedef int (*event_attr_op)(union perf_event *event, + struct perf_evlist **pevlist); +typedef int (*event_simple_op)(struct perf_tool *tool, union perf_event *event); + +typedef int (*event_synth_op)(union perf_event *event, + struct perf_session *session); + +typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event, + struct perf_session *session); + +struct perf_tool { + event_sample sample, + read; + event_op mmap, + comm, + fork, + exit, + lost, + throttle, + unthrottle; + event_attr_op attr; + event_synth_op tracing_data; + event_simple_op event_type; + event_op2 finished_round, + build_id; + bool ordered_samples; + bool ordering_requires_timestamps; +}; + +#endif /* __PERF_TOOL_H */ diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index a11f60735a18..7eeebcee291c 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -15,52 +15,6 @@ #include "top.h" #include <inttypes.h> -/* - * Ordering weight: count-1 * count-2 * ... / count-n - */ -static double sym_weight(const struct sym_entry *sym, struct perf_top *top) -{ - double weight = sym->snap_count; - int counter; - - if (!top->display_weighted) - return weight; - - for (counter = 1; counter < top->evlist->nr_entries - 1; counter++) - weight *= sym->count[counter]; - - weight /= (sym->count[counter] + 1); - - return weight; -} - -static void perf_top__remove_active_sym(struct perf_top *top, struct sym_entry *syme) -{ - pthread_mutex_lock(&top->active_symbols_lock); - list_del_init(&syme->node); - pthread_mutex_unlock(&top->active_symbols_lock); -} - -static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) -{ - struct rb_node **p = &tree->rb_node; - struct rb_node *parent = NULL; - struct sym_entry *iter; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct sym_entry, rb_node); - - if (se->weight > iter->weight) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&se->rb_node, parent, p); - rb_insert_color(&se->rb_node, tree); -} - #define SNPRINTF(buf, size, fmt, args...) \ ({ \ size_t r = snprintf(buf, size, fmt, ## args); \ @@ -69,7 +23,6 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) { - struct perf_evsel *counter; float samples_per_sec = top->samples / top->delay_secs; float ksamples_per_sec = top->kernel_samples / top->delay_secs; float esamples_percent = (100.0 * top->exact_samples) / top->samples; @@ -104,7 +57,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) esamples_percent); } - if (top->evlist->nr_entries == 1 || !top->display_weighted) { + if (top->evlist->nr_entries == 1) { struct perf_evsel *first; first = list_entry(top->evlist->entries.next, struct perf_evsel, node); ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ", @@ -112,44 +65,28 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) top->freq ? "Hz" : ""); } - if (!top->display_weighted) { - ret += SNPRINTF(bf + ret, size - ret, "%s", - event_name(top->sym_evsel)); - } else { - /* - * Don't let events eat all the space. Leaving 30 bytes - * for the rest should be enough. - */ - size_t last_pos = size - 30; - - list_for_each_entry(counter, &top->evlist->entries, node) { - ret += SNPRINTF(bf + ret, size - ret, "%s%s", - counter->idx ? "/" : "", - event_name(counter)); - if (ret > last_pos) { - sprintf(bf + last_pos - 3, ".."); - ret = last_pos - 1; - break; - } - } - } + ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel)); ret += SNPRINTF(bf + ret, size - ret, "], "); - if (top->target_pid != -1) - ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d", - top->target_pid); - else if (top->target_tid != -1) - ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d", - top->target_tid); + if (top->target.pid) + ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s", + top->target.pid); + else if (top->target.tid) + ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s", + top->target.tid); + else if (top->target.uid_str != NULL) + ret += SNPRINTF(bf + ret, size - ret, " (uid: %s", + top->target.uid_str); else ret += SNPRINTF(bf + ret, size - ret, " (all"); - if (top->cpu_list) + if (top->target.cpu_list) ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", - top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list); + top->evlist->cpus->nr > 1 ? "s" : "", + top->target.cpu_list); else { - if (top->target_tid != -1) + if (top->target.tid) ret += SNPRINTF(bf + ret, size - ret, ")"); else ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", @@ -166,73 +103,3 @@ void perf_top__reset_sample_counters(struct perf_top *top) top->exact_samples = top->guest_kernel_samples = top->guest_us_samples = 0; } - -float perf_top__decay_samples(struct perf_top *top, struct rb_root *root) -{ - struct sym_entry *syme, *n; - float sum_ksamples = 0.0; - int snap = !top->display_weighted ? top->sym_evsel->idx : 0, j; - - /* Sort the active symbols */ - pthread_mutex_lock(&top->active_symbols_lock); - syme = list_entry(top->active_symbols.next, struct sym_entry, node); - pthread_mutex_unlock(&top->active_symbols_lock); - - top->rb_entries = 0; - list_for_each_entry_safe_from(syme, n, &top->active_symbols, node) { - syme->snap_count = syme->count[snap]; - if (syme->snap_count != 0) { - - if ((top->hide_user_symbols && - syme->map->dso->kernel == DSO_TYPE_USER) || - (top->hide_kernel_symbols && - syme->map->dso->kernel == DSO_TYPE_KERNEL)) { - perf_top__remove_active_sym(top, syme); - continue; - } - syme->weight = sym_weight(syme, top); - - if ((int)syme->snap_count >= top->count_filter) { - rb_insert_active_sym(root, syme); - ++top->rb_entries; - } - sum_ksamples += syme->snap_count; - - for (j = 0; j < top->evlist->nr_entries; j++) - syme->count[j] = top->zero ? 0 : syme->count[j] * 7 / 8; - } else - perf_top__remove_active_sym(top, syme); - } - - return sum_ksamples; -} - -/* - * Find the longest symbol name that will be displayed - */ -void perf_top__find_widths(struct perf_top *top, struct rb_root *root, - int *dso_width, int *dso_short_width, int *sym_width) -{ - struct rb_node *nd; - int printed = 0; - - *sym_width = *dso_width = *dso_short_width = 0; - - for (nd = rb_first(root); nd; nd = rb_next(nd)) { - struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node); - struct symbol *sym = sym_entry__symbol(syme); - - if (++printed > top->print_entries || - (int)syme->snap_count < top->count_filter) - continue; - - if (syme->map->dso->long_name_len > *dso_width) - *dso_width = syme->map->dso->long_name_len; - - if (syme->map->dso->short_name_len > *dso_short_width) - *dso_short_width = syme->map->dso->short_name_len; - - if (sym->namelen > *sym_width) - *sym_width = sym->namelen; - } -} diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index bfbf95bcc603..33347ca89ee4 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -1,64 +1,51 @@ #ifndef __PERF_TOP_H #define __PERF_TOP_H 1 +#include "tool.h" #include "types.h" -#include "../perf.h" #include <stddef.h> -#include <pthread.h> -#include <linux/list.h> -#include <linux/rbtree.h> +#include <stdbool.h> struct perf_evlist; struct perf_evsel; - -struct sym_entry { - struct rb_node rb_node; - struct list_head node; - unsigned long snap_count; - double weight; - struct map *map; - unsigned long count[0]; -}; - -static inline struct symbol *sym_entry__symbol(struct sym_entry *self) -{ - return ((void *)self) + symbol_conf.priv_size; -} +struct perf_session; struct perf_top { + struct perf_tool tool; struct perf_evlist *evlist; + struct perf_target target; /* * Symbols will be added here in perf_event__process_sample and will * get out after decayed. */ - struct list_head active_symbols; - pthread_mutex_t active_symbols_lock; - pthread_cond_t active_symbols_cond; u64 samples; u64 kernel_samples, us_samples; u64 exact_samples; u64 guest_us_samples, guest_kernel_samples; int print_entries, count_filter, delay_secs; - int display_weighted, freq, rb_entries; - pid_t target_pid, target_tid; + int freq; bool hide_kernel_symbols, hide_user_symbols, zero; - const char *cpu_list; - struct sym_entry *sym_filter_entry; + bool use_tui, use_stdio; + bool sort_has_symbols; + bool dont_use_callchains; + bool kptr_restrict_warned; + bool vmlinux_warned; + bool inherit; + bool group; + bool sample_id_all_missing; + bool exclude_guest_missing; + bool dump_symtab; + struct hist_entry *sym_filter_entry; struct perf_evsel *sym_evsel; + struct perf_session *session; + struct winsize winsize; + unsigned int mmap_pages; + int default_interval; + int realtime_prio; + int sym_pcnt_filter; + const char *sym_filter; }; size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); void perf_top__reset_sample_counters(struct perf_top *top); -float perf_top__decay_samples(struct perf_top *top, struct rb_root *root); -void perf_top__find_widths(struct perf_top *top, struct rb_root *root, - int *dso_width, int *dso_short_width, int *sym_width); - -#ifdef NO_NEWT_SUPPORT -static inline int perf_top__tui_browser(struct perf_top *top __used) -{ - return 0; -} -#else -int perf_top__tui_browser(struct perf_top *top); -#endif #endif /* __PERF_TOP_H */ diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 3403f814ad72..a8d81c35ef66 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -18,7 +18,7 @@ * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -#define _GNU_SOURCE +#include "util.h" #include <dirent.h> #include <mntent.h> #include <stdio.h> @@ -31,7 +31,6 @@ #include <pthread.h> #include <fcntl.h> #include <unistd.h> -#include <ctype.h> #include <errno.h> #include <stdbool.h> #include <linux/list.h> @@ -44,10 +43,6 @@ #define VERSION "0.5" -#define _STR(x) #x -#define STR(x) _STR(x) -#define MAX_PATH 256 - #define TRACE_CTRL "tracing_on" #define TRACE "trace" #define AVAILABLE "available_tracers" @@ -73,27 +68,7 @@ struct events { }; - -static void die(const char *fmt, ...) -{ - va_list ap; - int ret = errno; - - if (errno) - perror("trace-cmd"); - else - ret = -1; - - va_start(ap, fmt); - fprintf(stderr, " "); - vfprintf(stderr, fmt, ap); - va_end(ap); - - fprintf(stderr, "\n"); - exit(ret); -} - -void *malloc_or_die(unsigned int size) +static void *malloc_or_die(unsigned int size) { void *data; @@ -196,7 +171,8 @@ static void record_file(const char *file, size_t hdr_sz) die("Can't read '%s'", file); /* put in zeros for file size, then fill true size later */ - write_or_die(&size, hdr_sz); + if (hdr_sz) + write_or_die(&size, hdr_sz); do { r = read(fd, buf, BUFSIZ); @@ -212,7 +188,7 @@ static void record_file(const char *file, size_t hdr_sz) if (bigendian()) sizep += sizeof(u64) - hdr_sz; - if (pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0) + if (hdr_sz && pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0) die("writing to %s", output_file); } @@ -428,6 +404,19 @@ get_tracepoints_path(struct list_head *pattrs) return nr_tracepoints > 0 ? path.next : NULL; } +static void +put_tracepoints_path(struct tracepoint_path *tps) +{ + while (tps) { + struct tracepoint_path *t = tps; + + tps = tps->next; + free(t->name); + free(t->system); + free(t); + } +} + bool have_tracepoints(struct list_head *pattrs) { struct perf_evsel *pos; @@ -439,19 +428,11 @@ bool have_tracepoints(struct list_head *pattrs) return false; } -int read_tracing_data(int fd, struct list_head *pattrs) +static void tracing_data_header(void) { - char buf[BUFSIZ]; - struct tracepoint_path *tps = get_tracepoints_path(pattrs); - - /* - * What? No tracepoints? No sense writing anything here, bail out. - */ - if (tps == NULL) - return -1; - - output_fd = fd; + char buf[20]; + /* just guessing this is someone's birthday.. ;) */ buf[0] = 23; buf[1] = 8; buf[2] = 68; @@ -467,6 +448,8 @@ int read_tracing_data(int fd, struct list_head *pattrs) else buf[0] = 0; + read_trace_init(buf[0], buf[0]); + write_or_die(buf, 1); /* save size of long */ @@ -476,28 +459,86 @@ int read_tracing_data(int fd, struct list_head *pattrs) /* save page_size */ page_size = sysconf(_SC_PAGESIZE); write_or_die(&page_size, 4); +} +struct tracing_data *tracing_data_get(struct list_head *pattrs, + int fd, bool temp) +{ + struct tracepoint_path *tps; + struct tracing_data *tdata; + + output_fd = fd; + + tps = get_tracepoints_path(pattrs); + if (!tps) + return NULL; + + tdata = malloc_or_die(sizeof(*tdata)); + tdata->temp = temp; + tdata->size = 0; + + if (temp) { + int temp_fd; + + snprintf(tdata->temp_file, sizeof(tdata->temp_file), + "/tmp/perf-XXXXXX"); + if (!mkstemp(tdata->temp_file)) + die("Can't make temp file"); + + temp_fd = open(tdata->temp_file, O_RDWR); + if (temp_fd < 0) + die("Can't read '%s'", tdata->temp_file); + + /* + * Set the temp file the default output, so all the + * tracing data are stored into it. + */ + output_fd = temp_fd; + } + + tracing_data_header(); read_header_files(); read_ftrace_files(tps); read_event_files(tps); read_proc_kallsyms(); read_ftrace_printk(); - return 0; + /* + * All tracing data are stored by now, we can restore + * the default output file in case we used temp file. + */ + if (temp) { + tdata->size = lseek(output_fd, 0, SEEK_CUR); + close(output_fd); + output_fd = fd; + } + + put_tracepoints_path(tps); + return tdata; } -ssize_t read_tracing_data_size(int fd, struct list_head *pattrs) +void tracing_data_put(struct tracing_data *tdata) { - ssize_t size; - int err = 0; + if (tdata->temp) { + record_file(tdata->temp_file, 0); + unlink(tdata->temp_file); + } + + free(tdata); +} - calc_data_size = 1; - err = read_tracing_data(fd, pattrs); - size = calc_data_size - 1; - calc_data_size = 0; +int read_tracing_data(int fd, struct list_head *pattrs) +{ + struct tracing_data *tdata; - if (err < 0) - return err; + /* + * We work over the real file, so we can write data + * directly, no temp file is needed. + */ + tdata = tracing_data_get(pattrs, fd, false); + if (!tdata) + return -ENOMEM; - return size; + tracing_data_put(tdata); + return 0; } diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 0a7ed5b5e281..0715c843c2e7 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -17,2159 +17,298 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * The parts for function graph printing was taken and modified from the - * Linux Kernel that were written by Frederic Weisbecker. */ -#define _GNU_SOURCE #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include <errno.h> -#undef _GNU_SOURCE #include "../perf.h" #include "util.h" #include "trace-event.h" -int header_page_ts_offset; -int header_page_ts_size; -int header_page_size_offset; int header_page_size_size; -int header_page_overwrite_offset; -int header_page_overwrite_size; +int header_page_ts_size; int header_page_data_offset; -int header_page_data_size; bool latency_format; -static char *input_buf; -static unsigned long long input_buf_ptr; -static unsigned long long input_buf_siz; - -static int cpus; -static int long_size; -static int is_flag_field; -static int is_symbolic_field; - -static struct format_field * -find_any_field(struct event *event, const char *name); - -static void init_input_buf(char *buf, unsigned long long size) -{ - input_buf = buf; - input_buf_siz = size; - input_buf_ptr = 0; -} - -struct cmdline { - char *comm; - int pid; -}; - -static struct cmdline *cmdlines; -static int cmdline_count; - -static int cmdline_cmp(const void *a, const void *b) -{ - const struct cmdline *ca = a; - const struct cmdline *cb = b; - - if (ca->pid < cb->pid) - return -1; - if (ca->pid > cb->pid) - return 1; - - return 0; -} - -void parse_cmdlines(char *file, int size __unused) -{ - struct cmdline_list { - struct cmdline_list *next; - char *comm; - int pid; - } *list = NULL, *item; - char *line; - char *next = NULL; - int i; - - line = strtok_r(file, "\n", &next); - while (line) { - item = malloc_or_die(sizeof(*item)); - sscanf(line, "%d %as", &item->pid, - (float *)(void *)&item->comm); /* workaround gcc warning */ - item->next = list; - list = item; - line = strtok_r(NULL, "\n", &next); - cmdline_count++; - } - - cmdlines = malloc_or_die(sizeof(*cmdlines) * cmdline_count); - - i = 0; - while (list) { - cmdlines[i].pid = list->pid; - cmdlines[i].comm = list->comm; - i++; - item = list; - list = list->next; - free(item); - } - - qsort(cmdlines, cmdline_count, sizeof(*cmdlines), cmdline_cmp); -} - -static struct func_map { - unsigned long long addr; - char *func; - char *mod; -} *func_list; -static unsigned int func_count; - -static int func_cmp(const void *a, const void *b) -{ - const struct func_map *fa = a; - const struct func_map *fb = b; - - if (fa->addr < fb->addr) - return -1; - if (fa->addr > fb->addr) - return 1; - - return 0; -} - -void parse_proc_kallsyms(char *file, unsigned int size __unused) -{ - struct func_list { - struct func_list *next; - unsigned long long addr; - char *func; - char *mod; - } *list = NULL, *item; - char *line; - char *next = NULL; - char *addr_str; - char ch; - int ret __used; - int i; - - line = strtok_r(file, "\n", &next); - while (line) { - item = malloc_or_die(sizeof(*item)); - item->mod = NULL; - ret = sscanf(line, "%as %c %as\t[%as", - (float *)(void *)&addr_str, /* workaround gcc warning */ - &ch, - (float *)(void *)&item->func, - (float *)(void *)&item->mod); - item->addr = strtoull(addr_str, NULL, 16); - free(addr_str); - - /* truncate the extra ']' */ - if (item->mod) - item->mod[strlen(item->mod) - 1] = 0; - - - item->next = list; - list = item; - line = strtok_r(NULL, "\n", &next); - func_count++; - } - - func_list = malloc_or_die(sizeof(*func_list) * (func_count + 1)); - - i = 0; - while (list) { - func_list[i].func = list->func; - func_list[i].addr = list->addr; - func_list[i].mod = list->mod; - i++; - item = list; - list = list->next; - free(item); - } - - qsort(func_list, func_count, sizeof(*func_list), func_cmp); - - /* - * Add a special record at the end. - */ - func_list[func_count].func = NULL; - func_list[func_count].addr = 0; - func_list[func_count].mod = NULL; -} - -/* - * We are searching for a record in between, not an exact - * match. - */ -static int func_bcmp(const void *a, const void *b) -{ - const struct func_map *fa = a; - const struct func_map *fb = b; - - if ((fa->addr == fb->addr) || - - (fa->addr > fb->addr && - fa->addr < (fb+1)->addr)) - return 0; - - if (fa->addr < fb->addr) - return -1; - - return 1; -} - -static struct func_map *find_func(unsigned long long addr) -{ - struct func_map *func; - struct func_map key; - - key.addr = addr; - - func = bsearch(&key, func_list, func_count, sizeof(*func_list), - func_bcmp); - - return func; -} - -void print_funcs(void) -{ - int i; - - for (i = 0; i < (int)func_count; i++) { - printf("%016llx %s", - func_list[i].addr, - func_list[i].func); - if (func_list[i].mod) - printf(" [%s]\n", func_list[i].mod); - else - printf("\n"); - } -} - -static struct printk_map { - unsigned long long addr; - char *printk; -} *printk_list; -static unsigned int printk_count; - -static int printk_cmp(const void *a, const void *b) -{ - const struct func_map *fa = a; - const struct func_map *fb = b; - - if (fa->addr < fb->addr) - return -1; - if (fa->addr > fb->addr) - return 1; - - return 0; -} - -static struct printk_map *find_printk(unsigned long long addr) -{ - struct printk_map *printk; - struct printk_map key; - - key.addr = addr; - - printk = bsearch(&key, printk_list, printk_count, sizeof(*printk_list), - printk_cmp); - - return printk; -} - -void parse_ftrace_printk(char *file, unsigned int size __unused) -{ - struct printk_list { - struct printk_list *next; - unsigned long long addr; - char *printk; - } *list = NULL, *item; - char *line; - char *next = NULL; - char *addr_str; - int i; - - line = strtok_r(file, "\n", &next); - while (line) { - addr_str = strsep(&line, ":"); - if (!line) { - warning("error parsing print strings"); - break; - } - item = malloc_or_die(sizeof(*item)); - item->addr = strtoull(addr_str, NULL, 16); - /* fmt still has a space, skip it */ - item->printk = strdup(line+1); - item->next = list; - list = item; - line = strtok_r(NULL, "\n", &next); - printk_count++; - } - - printk_list = malloc_or_die(sizeof(*printk_list) * printk_count + 1); - - i = 0; - while (list) { - printk_list[i].printk = list->printk; - printk_list[i].addr = list->addr; - i++; - item = list; - list = list->next; - free(item); - } - - qsort(printk_list, printk_count, sizeof(*printk_list), printk_cmp); -} - -void print_printk(void) +struct pevent *read_trace_init(int file_bigendian, int host_bigendian) { - int i; + struct pevent *pevent = pevent_alloc(); - for (i = 0; i < (int)printk_count; i++) { - printf("%016llx %s\n", - printk_list[i].addr, - printk_list[i].printk); + if (pevent != NULL) { + pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); + pevent_set_file_bigendian(pevent, file_bigendian); + pevent_set_host_bigendian(pevent, host_bigendian); } -} - -static struct event *alloc_event(void) -{ - struct event *event; - event = malloc_or_die(sizeof(*event)); - memset(event, 0, sizeof(*event)); - - return event; + return pevent; } -enum event_type { - EVENT_ERROR, - EVENT_NONE, - EVENT_SPACE, - EVENT_NEWLINE, - EVENT_OP, - EVENT_DELIM, - EVENT_ITEM, - EVENT_DQUOTE, - EVENT_SQUOTE, -}; - -static struct event *event_list; - -static void add_event(struct event *event) +static int get_common_field(struct scripting_context *context, + int *offset, int *size, const char *type) { - event->next = event_list; - event_list = event; -} - -static int event_item_type(enum event_type type) -{ - switch (type) { - case EVENT_ITEM ... EVENT_SQUOTE: - return 1; - case EVENT_ERROR ... EVENT_DELIM: - default: - return 0; - } -} - -static void free_arg(struct print_arg *arg) -{ - if (!arg) - return; - - switch (arg->type) { - case PRINT_ATOM: - if (arg->atom.atom) - free(arg->atom.atom); - break; - case PRINT_NULL: - case PRINT_FIELD ... PRINT_OP: - default: - /* todo */ - break; - } - - free(arg); -} - -static enum event_type get_type(int ch) -{ - if (ch == '\n') - return EVENT_NEWLINE; - if (isspace(ch)) - return EVENT_SPACE; - if (isalnum(ch) || ch == '_') - return EVENT_ITEM; - if (ch == '\'') - return EVENT_SQUOTE; - if (ch == '"') - return EVENT_DQUOTE; - if (!isprint(ch)) - return EVENT_NONE; - if (ch == '(' || ch == ')' || ch == ',') - return EVENT_DELIM; - - return EVENT_OP; -} - -static int __read_char(void) -{ - if (input_buf_ptr >= input_buf_siz) - return -1; - - return input_buf[input_buf_ptr++]; -} - -static int __peek_char(void) -{ - if (input_buf_ptr >= input_buf_siz) - return -1; - - return input_buf[input_buf_ptr]; -} - -static enum event_type __read_token(char **tok) -{ - char buf[BUFSIZ]; - int ch, last_ch, quote_ch, next_ch; - int i = 0; - int tok_size = 0; - enum event_type type; - - *tok = NULL; - - - ch = __read_char(); - if (ch < 0) - return EVENT_NONE; - - type = get_type(ch); - if (type == EVENT_NONE) - return type; - - buf[i++] = ch; - - switch (type) { - case EVENT_NEWLINE: - case EVENT_DELIM: - *tok = malloc_or_die(2); - (*tok)[0] = ch; - (*tok)[1] = 0; - return type; - - case EVENT_OP: - switch (ch) { - case '-': - next_ch = __peek_char(); - if (next_ch == '>') { - buf[i++] = __read_char(); - break; - } - /* fall through */ - case '+': - case '|': - case '&': - case '>': - case '<': - last_ch = ch; - ch = __peek_char(); - if (ch != last_ch) - goto test_equal; - buf[i++] = __read_char(); - switch (last_ch) { - case '>': - case '<': - goto test_equal; - default: - break; - } - break; - case '!': - case '=': - goto test_equal; - default: /* what should we do instead? */ - break; - } - buf[i] = 0; - *tok = strdup(buf); - return type; - - test_equal: - ch = __peek_char(); - if (ch == '=') - buf[i++] = __read_char(); - break; - - case EVENT_DQUOTE: - case EVENT_SQUOTE: - /* don't keep quotes */ - i--; - quote_ch = ch; - last_ch = 0; - do { - if (i == (BUFSIZ - 1)) { - buf[i] = 0; - if (*tok) { - *tok = realloc(*tok, tok_size + BUFSIZ); - if (!*tok) - return EVENT_NONE; - strcat(*tok, buf); - } else - *tok = strdup(buf); - - if (!*tok) - return EVENT_NONE; - tok_size += BUFSIZ; - i = 0; - } - last_ch = ch; - ch = __read_char(); - buf[i++] = ch; - /* the '\' '\' will cancel itself */ - if (ch == '\\' && last_ch == '\\') - last_ch = 0; - } while (ch != quote_ch || last_ch == '\\'); - /* remove the last quote */ - i--; - goto out; - - case EVENT_ERROR ... EVENT_SPACE: - case EVENT_ITEM: - default: - break; - } - - while (get_type(__peek_char()) == type) { - if (i == (BUFSIZ - 1)) { - buf[i] = 0; - if (*tok) { - *tok = realloc(*tok, tok_size + BUFSIZ); - if (!*tok) - return EVENT_NONE; - strcat(*tok, buf); - } else - *tok = strdup(buf); - - if (!*tok) - return EVENT_NONE; - tok_size += BUFSIZ; - i = 0; - } - ch = __read_char(); - buf[i++] = ch; - } - - out: - buf[i] = 0; - if (*tok) { - *tok = realloc(*tok, tok_size + i); - if (!*tok) - return EVENT_NONE; - strcat(*tok, buf); - } else - *tok = strdup(buf); - if (!*tok) - return EVENT_NONE; - - return type; -} - -static void free_token(char *tok) -{ - if (tok) - free(tok); -} - -static enum event_type read_token(char **tok) -{ - enum event_type type; - - for (;;) { - type = __read_token(tok); - if (type != EVENT_SPACE) - return type; - - free_token(*tok); - } - - /* not reached */ - return EVENT_NONE; -} - -/* no newline */ -static enum event_type read_token_item(char **tok) -{ - enum event_type type; - - for (;;) { - type = __read_token(tok); - if (type != EVENT_SPACE && type != EVENT_NEWLINE) - return type; - - free_token(*tok); - } - - /* not reached */ - return EVENT_NONE; -} - -static int test_type(enum event_type type, enum event_type expect) -{ - if (type != expect) { - warning("Error: expected type %d but read %d", - expect, type); - return -1; - } - return 0; -} + struct pevent *pevent = context->pevent; + struct event_format *event; + struct format_field *field; -static int __test_type_token(enum event_type type, char *token, - enum event_type expect, const char *expect_tok, - bool warn) -{ - if (type != expect) { - if (warn) - warning("Error: expected type %d but read %d", - expect, type); - return -1; - } + if (!*size) { + if (!pevent->events) + return 0; - if (strcmp(token, expect_tok) != 0) { - if (warn) - warning("Error: expected '%s' but read '%s'", - expect_tok, token); - return -1; + event = pevent->events[0]; + field = pevent_find_common_field(event, type); + if (!field) + return 0; + *offset = field->offset; + *size = field->size; } - return 0; -} -static int test_type_token(enum event_type type, char *token, - enum event_type expect, const char *expect_tok) -{ - return __test_type_token(type, token, expect, expect_tok, true); + return pevent_read_number(pevent, context->event_data + *offset, *size); } -static int __read_expect_type(enum event_type expect, char **tok, int newline_ok) -{ - enum event_type type; - - if (newline_ok) - type = read_token(tok); - else - type = read_token_item(tok); - return test_type(type, expect); -} - -static int read_expect_type(enum event_type expect, char **tok) -{ - return __read_expect_type(expect, tok, 1); -} - -static int __read_expected(enum event_type expect, const char *str, - int newline_ok, bool warn) +int common_lock_depth(struct scripting_context *context) { - enum event_type type; - char *token; + static int offset; + static int size; int ret; - if (newline_ok) - type = read_token(&token); - else - type = read_token_item(&token); - - ret = __test_type_token(type, token, expect, str, warn); - - free_token(token); + ret = get_common_field(context, &size, &offset, + "common_lock_depth"); + if (ret < 0) + return -1; return ret; } -static int read_expected(enum event_type expect, const char *str) -{ - return __read_expected(expect, str, 1, true); -} - -static int read_expected_item(enum event_type expect, const char *str) -{ - return __read_expected(expect, str, 0, true); -} - -static char *event_read_name(void) -{ - char *token; - - if (read_expected(EVENT_ITEM, "name") < 0) - return NULL; - - if (read_expected(EVENT_OP, ":") < 0) - return NULL; - - if (read_expect_type(EVENT_ITEM, &token) < 0) - goto fail; - - return token; - - fail: - free_token(token); - return NULL; -} - -static int event_read_id(void) +int common_flags(struct scripting_context *context) { - char *token; - int id; - - if (read_expected_item(EVENT_ITEM, "ID") < 0) - return -1; + static int offset; + static int size; + int ret; - if (read_expected(EVENT_OP, ":") < 0) + ret = get_common_field(context, &size, &offset, + "common_flags"); + if (ret < 0) return -1; - if (read_expect_type(EVENT_ITEM, &token) < 0) - goto fail; - - id = strtoul(token, NULL, 0); - free_token(token); - return id; - - fail: - free_token(token); - return -1; -} - -static int field_is_string(struct format_field *field) -{ - if ((field->flags & FIELD_IS_ARRAY) && - (!strstr(field->type, "char") || !strstr(field->type, "u8") || - !strstr(field->type, "s8"))) - return 1; - - return 0; -} - -static int field_is_dynamic(struct format_field *field) -{ - if (!strncmp(field->type, "__data_loc", 10)) - return 1; - - return 0; -} - -static int event_read_fields(struct event *event, struct format_field **fields) -{ - struct format_field *field = NULL; - enum event_type type; - char *token; - char *last_token; - int count = 0; - - do { - type = read_token(&token); - if (type == EVENT_NEWLINE) { - free_token(token); - return count; - } - - count++; - - if (test_type_token(type, token, EVENT_ITEM, "field")) - goto fail; - free_token(token); - - type = read_token(&token); - /* - * The ftrace fields may still use the "special" name. - * Just ignore it. - */ - if (event->flags & EVENT_FL_ISFTRACE && - type == EVENT_ITEM && strcmp(token, "special") == 0) { - free_token(token); - type = read_token(&token); - } - - if (test_type_token(type, token, EVENT_OP, ":") < 0) - return -1; - - if (read_expect_type(EVENT_ITEM, &token) < 0) - goto fail; - - last_token = token; - - field = malloc_or_die(sizeof(*field)); - memset(field, 0, sizeof(*field)); - - /* read the rest of the type */ - for (;;) { - type = read_token(&token); - if (type == EVENT_ITEM || - (type == EVENT_OP && strcmp(token, "*") == 0) || - /* - * Some of the ftrace fields are broken and have - * an illegal "." in them. - */ - (event->flags & EVENT_FL_ISFTRACE && - type == EVENT_OP && strcmp(token, ".") == 0)) { - - if (strcmp(token, "*") == 0) - field->flags |= FIELD_IS_POINTER; - - if (field->type) { - field->type = realloc(field->type, - strlen(field->type) + - strlen(last_token) + 2); - strcat(field->type, " "); - strcat(field->type, last_token); - } else - field->type = last_token; - last_token = token; - continue; - } - - break; - } - - if (!field->type) { - die("no type found"); - goto fail; - } - field->name = last_token; - - if (test_type(type, EVENT_OP)) - goto fail; - - if (strcmp(token, "[") == 0) { - enum event_type last_type = type; - char *brackets = token; - int len; - - field->flags |= FIELD_IS_ARRAY; - - type = read_token(&token); - while (strcmp(token, "]") != 0) { - if (last_type == EVENT_ITEM && - type == EVENT_ITEM) - len = 2; - else - len = 1; - last_type = type; - - brackets = realloc(brackets, - strlen(brackets) + - strlen(token) + len); - if (len == 2) - strcat(brackets, " "); - strcat(brackets, token); - free_token(token); - type = read_token(&token); - if (type == EVENT_NONE) { - die("failed to find token"); - goto fail; - } - } - - free_token(token); - - brackets = realloc(brackets, strlen(brackets) + 2); - strcat(brackets, "]"); - - /* add brackets to type */ - - type = read_token(&token); - /* - * If the next token is not an OP, then it is of - * the format: type [] item; - */ - if (type == EVENT_ITEM) { - field->type = realloc(field->type, - strlen(field->type) + - strlen(field->name) + - strlen(brackets) + 2); - strcat(field->type, " "); - strcat(field->type, field->name); - free_token(field->name); - strcat(field->type, brackets); - field->name = token; - type = read_token(&token); - } else { - field->type = realloc(field->type, - strlen(field->type) + - strlen(brackets) + 1); - strcat(field->type, brackets); - } - free(brackets); - } - - if (field_is_string(field)) { - field->flags |= FIELD_IS_STRING; - if (field_is_dynamic(field)) - field->flags |= FIELD_IS_DYNAMIC; - } - - if (test_type_token(type, token, EVENT_OP, ";")) - goto fail; - free_token(token); - - if (read_expected(EVENT_ITEM, "offset") < 0) - goto fail_expect; - - if (read_expected(EVENT_OP, ":") < 0) - goto fail_expect; - - if (read_expect_type(EVENT_ITEM, &token)) - goto fail; - field->offset = strtoul(token, NULL, 0); - free_token(token); - - if (read_expected(EVENT_OP, ";") < 0) - goto fail_expect; - - if (read_expected(EVENT_ITEM, "size") < 0) - goto fail_expect; - - if (read_expected(EVENT_OP, ":") < 0) - goto fail_expect; - - if (read_expect_type(EVENT_ITEM, &token)) - goto fail; - field->size = strtoul(token, NULL, 0); - free_token(token); - - if (read_expected(EVENT_OP, ";") < 0) - goto fail_expect; - - type = read_token(&token); - if (type != EVENT_NEWLINE) { - /* newer versions of the kernel have a "signed" type */ - if (test_type_token(type, token, EVENT_ITEM, "signed")) - goto fail; - - free_token(token); - - if (read_expected(EVENT_OP, ":") < 0) - goto fail_expect; - - if (read_expect_type(EVENT_ITEM, &token)) - goto fail; - - if (strtoul(token, NULL, 0)) - field->flags |= FIELD_IS_SIGNED; - - free_token(token); - if (read_expected(EVENT_OP, ";") < 0) - goto fail_expect; - - if (read_expect_type(EVENT_NEWLINE, &token)) - goto fail; - } - - free_token(token); - - *fields = field; - fields = &field->next; - - } while (1); - - return 0; - -fail: - free_token(token); -fail_expect: - if (field) - free(field); - return -1; + return ret; } -static int event_read_format(struct event *event) +int common_pc(struct scripting_context *context) { - char *token; + static int offset; + static int size; int ret; - if (read_expected_item(EVENT_ITEM, "format") < 0) - return -1; - - if (read_expected(EVENT_OP, ":") < 0) - return -1; - - if (read_expect_type(EVENT_NEWLINE, &token)) - goto fail; - free_token(token); - - ret = event_read_fields(event, &event->format.common_fields); - if (ret < 0) - return ret; - event->format.nr_common = ret; - - ret = event_read_fields(event, &event->format.fields); + ret = get_common_field(context, &size, &offset, + "common_preempt_count"); if (ret < 0) - return ret; - event->format.nr_fields = ret; - - return 0; - - fail: - free_token(token); - return -1; -} - -enum event_type -process_arg_token(struct event *event, struct print_arg *arg, - char **tok, enum event_type type); - -static enum event_type -process_arg(struct event *event, struct print_arg *arg, char **tok) -{ - enum event_type type; - char *token; - - type = read_token(&token); - *tok = token; - - return process_arg_token(event, arg, tok, type); -} - -static enum event_type -process_cond(struct event *event, struct print_arg *top, char **tok) -{ - struct print_arg *arg, *left, *right; - enum event_type type; - char *token = NULL; - - arg = malloc_or_die(sizeof(*arg)); - memset(arg, 0, sizeof(*arg)); - - left = malloc_or_die(sizeof(*left)); - - right = malloc_or_die(sizeof(*right)); - - arg->type = PRINT_OP; - arg->op.left = left; - arg->op.right = right; - - *tok = NULL; - type = process_arg(event, left, &token); - if (test_type_token(type, token, EVENT_OP, ":")) - goto out_free; - - arg->op.op = token; - - type = process_arg(event, right, &token); - - top->op.right = arg; - - *tok = token; - return type; - -out_free: - free_token(*tok); - free(right); - free(left); - free_arg(arg); - return EVENT_ERROR; -} - -static enum event_type -process_array(struct event *event, struct print_arg *top, char **tok) -{ - struct print_arg *arg; - enum event_type type; - char *token = NULL; - - arg = malloc_or_die(sizeof(*arg)); - memset(arg, 0, sizeof(*arg)); - - *tok = NULL; - type = process_arg(event, arg, &token); - if (test_type_token(type, token, EVENT_OP, "]")) - goto out_free; - - top->op.right = arg; - - free_token(token); - type = read_token_item(&token); - *tok = token; - - return type; + return -1; -out_free: - free_token(*tok); - free_arg(arg); - return EVENT_ERROR; + return ret; } -static int get_op_prio(char *op) +unsigned long long +raw_field_value(struct event_format *event, const char *name, void *data) { - if (!op[1]) { - switch (op[0]) { - case '*': - case '/': - case '%': - return 6; - case '+': - case '-': - return 7; - /* '>>' and '<<' are 8 */ - case '<': - case '>': - return 9; - /* '==' and '!=' are 10 */ - case '&': - return 11; - case '^': - return 12; - case '|': - return 13; - case '?': - return 16; - default: - die("unknown op '%c'", op[0]); - return -1; - } - } else { - if (strcmp(op, "++") == 0 || - strcmp(op, "--") == 0) { - return 3; - } else if (strcmp(op, ">>") == 0 || - strcmp(op, "<<") == 0) { - return 8; - } else if (strcmp(op, ">=") == 0 || - strcmp(op, "<=") == 0) { - return 9; - } else if (strcmp(op, "==") == 0 || - strcmp(op, "!=") == 0) { - return 10; - } else if (strcmp(op, "&&") == 0) { - return 14; - } else if (strcmp(op, "||") == 0) { - return 15; - } else { - die("unknown op '%s'", op); - return -1; - } - } -} + struct format_field *field; + unsigned long long val; -static void set_op_prio(struct print_arg *arg) -{ + field = pevent_find_any_field(event, name); + if (!field) + return 0ULL; - /* single ops are the greatest */ - if (!arg->op.left || arg->op.left->type == PRINT_NULL) { - arg->op.prio = 0; - return; - } + pevent_read_number_field(field, data, &val); - arg->op.prio = get_op_prio(arg->op.op); + return val; } -static enum event_type -process_op(struct event *event, struct print_arg *arg, char **tok) +void *raw_field_ptr(struct event_format *event, const char *name, void *data) { - struct print_arg *left, *right = NULL; - enum event_type type; - char *token; - - /* the op is passed in via tok */ - token = *tok; - - if (arg->type == PRINT_OP && !arg->op.left) { - /* handle single op */ - if (token[1]) { - die("bad op token %s", token); - return EVENT_ERROR; - } - switch (token[0]) { - case '!': - case '+': - case '-': - break; - default: - die("bad op token %s", token); - return EVENT_ERROR; - } - - /* make an empty left */ - left = malloc_or_die(sizeof(*left)); - left->type = PRINT_NULL; - arg->op.left = left; - - right = malloc_or_die(sizeof(*right)); - arg->op.right = right; - - type = process_arg(event, right, tok); - - } else if (strcmp(token, "?") == 0) { - - left = malloc_or_die(sizeof(*left)); - /* copy the top arg to the left */ - *left = *arg; - - arg->type = PRINT_OP; - arg->op.op = token; - arg->op.left = left; - arg->op.prio = 0; - - type = process_cond(event, arg, tok); - - } else if (strcmp(token, ">>") == 0 || - strcmp(token, "<<") == 0 || - strcmp(token, "&") == 0 || - strcmp(token, "|") == 0 || - strcmp(token, "&&") == 0 || - strcmp(token, "||") == 0 || - strcmp(token, "-") == 0 || - strcmp(token, "+") == 0 || - strcmp(token, "*") == 0 || - strcmp(token, "^") == 0 || - strcmp(token, "/") == 0 || - strcmp(token, "<") == 0 || - strcmp(token, ">") == 0 || - strcmp(token, "==") == 0 || - strcmp(token, "!=") == 0) { - - left = malloc_or_die(sizeof(*left)); - - /* copy the top arg to the left */ - *left = *arg; - - arg->type = PRINT_OP; - arg->op.op = token; - arg->op.left = left; - - set_op_prio(arg); - - right = malloc_or_die(sizeof(*right)); - - type = read_token_item(&token); - *tok = token; - - /* could just be a type pointer */ - if ((strcmp(arg->op.op, "*") == 0) && - type == EVENT_DELIM && (strcmp(token, ")") == 0)) { - if (left->type != PRINT_ATOM) - die("bad pointer type"); - left->atom.atom = realloc(left->atom.atom, - sizeof(left->atom.atom) + 3); - strcat(left->atom.atom, " *"); - *arg = *left; - free(arg); - - return type; - } - - type = process_arg_token(event, right, tok, type); - - arg->op.right = right; - - } else if (strcmp(token, "[") == 0) { - - left = malloc_or_die(sizeof(*left)); - *left = *arg; - - arg->type = PRINT_OP; - arg->op.op = token; - arg->op.left = left; - - arg->op.prio = 0; - type = process_array(event, arg, tok); - - } else { - warning("unknown op '%s'", token); - event->flags |= EVENT_FL_FAILED; - /* the arg is now the left side */ - return EVENT_NONE; - } + struct format_field *field; - if (type == EVENT_OP) { - int prio; + field = pevent_find_any_field(event, name); + if (!field) + return NULL; - /* higher prios need to be closer to the root */ - prio = get_op_prio(*tok); + if (field->flags & FIELD_IS_DYNAMIC) { + int offset; - if (prio > arg->op.prio) - return process_op(event, arg, tok); + offset = *(int *)(data + field->offset); + offset &= 0xffff; - return process_op(event, right, tok); + return data + offset; } - return type; + return data + field->offset; } -static enum event_type -process_entry(struct event *event __unused, struct print_arg *arg, - char **tok) +int trace_parse_common_type(struct pevent *pevent, void *data) { - enum event_type type; - char *field; - char *token; - - if (read_expected(EVENT_OP, "->") < 0) - return EVENT_ERROR; - - if (read_expect_type(EVENT_ITEM, &token) < 0) - goto fail; - field = token; - - arg->type = PRINT_FIELD; - arg->field.name = field; - - if (is_flag_field) { - arg->field.field = find_any_field(event, arg->field.name); - arg->field.field->flags |= FIELD_IS_FLAG; - is_flag_field = 0; - } else if (is_symbolic_field) { - arg->field.field = find_any_field(event, arg->field.name); - arg->field.field->flags |= FIELD_IS_SYMBOLIC; - is_symbolic_field = 0; - } - - type = read_token(&token); - *tok = token; - - return type; + struct pevent_record record; -fail: - free_token(token); - return EVENT_ERROR; + record.data = data; + return pevent_data_type(pevent, &record); } -static char *arg_eval (struct print_arg *arg); - -static long long arg_num_eval(struct print_arg *arg) +int trace_parse_common_pid(struct pevent *pevent, void *data) { - long long left, right; - long long val = 0; - - switch (arg->type) { - case PRINT_ATOM: - val = strtoll(arg->atom.atom, NULL, 0); - break; - case PRINT_TYPE: - val = arg_num_eval(arg->typecast.item); - break; - case PRINT_OP: - switch (arg->op.op[0]) { - case '|': - left = arg_num_eval(arg->op.left); - right = arg_num_eval(arg->op.right); - if (arg->op.op[1]) - val = left || right; - else - val = left | right; - break; - case '&': - left = arg_num_eval(arg->op.left); - right = arg_num_eval(arg->op.right); - if (arg->op.op[1]) - val = left && right; - else - val = left & right; - break; - case '<': - left = arg_num_eval(arg->op.left); - right = arg_num_eval(arg->op.right); - switch (arg->op.op[1]) { - case 0: - val = left < right; - break; - case '<': - val = left << right; - break; - case '=': - val = left <= right; - break; - default: - die("unknown op '%s'", arg->op.op); - } - break; - case '>': - left = arg_num_eval(arg->op.left); - right = arg_num_eval(arg->op.right); - switch (arg->op.op[1]) { - case 0: - val = left > right; - break; - case '>': - val = left >> right; - break; - case '=': - val = left >= right; - break; - default: - die("unknown op '%s'", arg->op.op); - } - break; - case '=': - left = arg_num_eval(arg->op.left); - right = arg_num_eval(arg->op.right); - - if (arg->op.op[1] != '=') - die("unknown op '%s'", arg->op.op); + struct pevent_record record; - val = left == right; - break; - case '!': - left = arg_num_eval(arg->op.left); - right = arg_num_eval(arg->op.right); - - switch (arg->op.op[1]) { - case '=': - val = left != right; - break; - default: - die("unknown op '%s'", arg->op.op); - } - break; - default: - die("unknown op '%s'", arg->op.op); - } - break; - - case PRINT_NULL: - case PRINT_FIELD ... PRINT_SYMBOL: - case PRINT_STRING: - default: - die("invalid eval type %d", arg->type); - - } - return val; -} - -static char *arg_eval (struct print_arg *arg) -{ - long long val; - static char buf[20]; - - switch (arg->type) { - case PRINT_ATOM: - return arg->atom.atom; - case PRINT_TYPE: - return arg_eval(arg->typecast.item); - case PRINT_OP: - val = arg_num_eval(arg); - sprintf(buf, "%lld", val); - return buf; - - case PRINT_NULL: - case PRINT_FIELD ... PRINT_SYMBOL: - case PRINT_STRING: - default: - die("invalid eval type %d", arg->type); - break; - } - - return NULL; + record.data = data; + return pevent_data_pid(pevent, &record); } -static enum event_type -process_fields(struct event *event, struct print_flag_sym **list, char **tok) +unsigned long long read_size(struct pevent *pevent, void *ptr, int size) { - enum event_type type; - struct print_arg *arg = NULL; - struct print_flag_sym *field; - char *token = NULL; - char *value; - - do { - free_token(token); - type = read_token_item(&token); - if (test_type_token(type, token, EVENT_OP, "{")) - break; - - arg = malloc_or_die(sizeof(*arg)); - - free_token(token); - type = process_arg(event, arg, &token); - if (test_type_token(type, token, EVENT_DELIM, ",")) - goto out_free; - - field = malloc_or_die(sizeof(*field)); - memset(field, 0, sizeof(*field)); - - value = arg_eval(arg); - field->value = strdup(value); - - free_token(token); - type = process_arg(event, arg, &token); - if (test_type_token(type, token, EVENT_OP, "}")) - goto out_free; - - value = arg_eval(arg); - field->str = strdup(value); - free_arg(arg); - arg = NULL; - - *list = field; - list = &field->next; - - free_token(token); - type = read_token_item(&token); - } while (type == EVENT_DELIM && strcmp(token, ",") == 0); - - *tok = token; - return type; - -out_free: - free_arg(arg); - free_token(token); - - return EVENT_ERROR; + return pevent_read_number(pevent, ptr, size); } -static enum event_type -process_flags(struct event *event, struct print_arg *arg, char **tok) +void print_trace_event(struct pevent *pevent, int cpu, void *data, int size) { - struct print_arg *field; - enum event_type type; - char *token; - - memset(arg, 0, sizeof(*arg)); - arg->type = PRINT_FLAGS; - - if (read_expected_item(EVENT_DELIM, "(") < 0) - return EVENT_ERROR; - - field = malloc_or_die(sizeof(*field)); - - type = process_arg(event, field, &token); - if (test_type_token(type, token, EVENT_DELIM, ",")) - goto out_free; + struct event_format *event; + struct pevent_record record; + struct trace_seq s; + int type; - arg->flags.field = field; + type = trace_parse_common_type(pevent, data); - type = read_token_item(&token); - if (event_item_type(type)) { - arg->flags.delim = token; - type = read_token_item(&token); + event = pevent_find_event(pevent, type); + if (!event) { + warning("ug! no event found for type %d", type); + return; } - if (test_type_token(type, token, EVENT_DELIM, ",")) - goto out_free; + memset(&record, 0, sizeof(record)); + record.cpu = cpu; + record.size = size; + record.data = data; - type = process_fields(event, &arg->flags.flags, &token); - if (test_type_token(type, token, EVENT_DELIM, ")")) - goto out_free; - - free_token(token); - type = read_token_item(tok); - return type; - -out_free: - free_token(token); - return EVENT_ERROR; + trace_seq_init(&s); + pevent_event_info(&s, event, &record); + trace_seq_do_printf(&s); } -static enum event_type -process_symbols(struct event *event, struct print_arg *arg, char **tok) +void print_event(struct pevent *pevent, int cpu, void *data, int size, + unsigned long long nsecs, char *comm) { - struct print_arg *field; - enum event_type type; - char *token; - - memset(arg, 0, sizeof(*arg)); - arg->type = PRINT_SYMBOL; - - if (read_expected_item(EVENT_DELIM, "(") < 0) - return EVENT_ERROR; - - field = malloc_or_die(sizeof(*field)); - - type = process_arg(event, field, &token); - if (test_type_token(type, token, EVENT_DELIM, ",")) - goto out_free; + struct pevent_record record; + struct trace_seq s; + int pid; - arg->symbol.field = field; + pevent->latency_format = latency_format; - type = process_fields(event, &arg->symbol.symbols, &token); - if (test_type_token(type, token, EVENT_DELIM, ")")) - goto out_free; + record.ts = nsecs; + record.cpu = cpu; + record.size = size; + record.data = data; + pid = pevent_data_pid(pevent, &record); - free_token(token); - type = read_token_item(tok); - return type; + if (!pevent_pid_is_registered(pevent, pid)) + pevent_register_comm(pevent, comm, pid); -out_free: - free_token(token); - return EVENT_ERROR; + trace_seq_init(&s); + pevent_print_event(pevent, &s, &record); + trace_seq_do_printf(&s); + printf("\n"); } -static enum event_type -process_paren(struct event *event, struct print_arg *arg, char **tok) +void parse_proc_kallsyms(struct pevent *pevent, + char *file, unsigned int size __unused) { - struct print_arg *item_arg; - enum event_type type; - char *token; - - type = process_arg(event, arg, &token); - - if (type == EVENT_ERROR) - return EVENT_ERROR; - - if (type == EVENT_OP) - type = process_op(event, arg, &token); - - if (type == EVENT_ERROR) - return EVENT_ERROR; - - if (test_type_token(type, token, EVENT_DELIM, ")")) { - free_token(token); - return EVENT_ERROR; - } - - free_token(token); - type = read_token_item(&token); - - /* - * If the next token is an item or another open paren, then - * this was a typecast. - */ - if (event_item_type(type) || - (type == EVENT_DELIM && strcmp(token, "(") == 0)) { - - /* make this a typecast and contine */ + unsigned long long addr; + char *func; + char *line; + char *next = NULL; + char *addr_str; + char *mod; + char ch; - /* prevous must be an atom */ - if (arg->type != PRINT_ATOM) - die("previous needed to be PRINT_ATOM"); + line = strtok_r(file, "\n", &next); + while (line) { + mod = NULL; + sscanf(line, "%as %c %as\t[%as", + (float *)(void *)&addr_str, /* workaround gcc warning */ + &ch, (float *)(void *)&func, (float *)(void *)&mod); + addr = strtoull(addr_str, NULL, 16); + free(addr_str); - item_arg = malloc_or_die(sizeof(*item_arg)); + /* truncate the extra ']' */ + if (mod) + mod[strlen(mod) - 1] = 0; - arg->type = PRINT_TYPE; - arg->typecast.type = arg->atom.atom; - arg->typecast.item = item_arg; - type = process_arg_token(event, item_arg, &token, type); + pevent_register_function(pevent, func, addr, mod); + free(func); + free(mod); + line = strtok_r(NULL, "\n", &next); } - - *tok = token; - return type; } - -static enum event_type -process_str(struct event *event __unused, struct print_arg *arg, char **tok) +void parse_ftrace_printk(struct pevent *pevent, + char *file, unsigned int size __unused) { - enum event_type type; - char *token; - - if (read_expected(EVENT_DELIM, "(") < 0) - return EVENT_ERROR; - - if (read_expect_type(EVENT_ITEM, &token) < 0) - goto fail; - - arg->type = PRINT_STRING; - arg->string.string = token; - arg->string.offset = -1; - - if (read_expected(EVENT_DELIM, ")") < 0) - return EVENT_ERROR; - - type = read_token(&token); - *tok = token; - - return type; -fail: - free_token(token); - return EVENT_ERROR; -} + unsigned long long addr; + char *printk; + char *line; + char *next = NULL; + char *addr_str; + char *fmt; -enum event_type -process_arg_token(struct event *event, struct print_arg *arg, - char **tok, enum event_type type) -{ - char *token; - char *atom; - - token = *tok; - - switch (type) { - case EVENT_ITEM: - if (strcmp(token, "REC") == 0) { - free_token(token); - type = process_entry(event, arg, &token); - } else if (strcmp(token, "__print_flags") == 0) { - free_token(token); - is_flag_field = 1; - type = process_flags(event, arg, &token); - } else if (strcmp(token, "__print_symbolic") == 0) { - free_token(token); - is_symbolic_field = 1; - type = process_symbols(event, arg, &token); - } else if (strcmp(token, "__get_str") == 0) { - free_token(token); - type = process_str(event, arg, &token); - } else { - atom = token; - /* test the next token */ - type = read_token_item(&token); - - /* atoms can be more than one token long */ - while (type == EVENT_ITEM) { - atom = realloc(atom, strlen(atom) + strlen(token) + 2); - strcat(atom, " "); - strcat(atom, token); - free_token(token); - type = read_token_item(&token); - } - - /* todo, test for function */ - - arg->type = PRINT_ATOM; - arg->atom.atom = atom; - } - break; - case EVENT_DQUOTE: - case EVENT_SQUOTE: - arg->type = PRINT_ATOM; - arg->atom.atom = token; - type = read_token_item(&token); - break; - case EVENT_DELIM: - if (strcmp(token, "(") == 0) { - free_token(token); - type = process_paren(event, arg, &token); + line = strtok_r(file, "\n", &next); + while (line) { + addr_str = strtok_r(line, ":", &fmt); + if (!addr_str) { + warning("printk format with empty entry"); break; } - case EVENT_OP: - /* handle single ops */ - arg->type = PRINT_OP; - arg->op.op = token; - arg->op.left = NULL; - type = process_op(event, arg, &token); - - break; - - case EVENT_ERROR ... EVENT_NEWLINE: - default: - die("unexpected type %d", type); - } - *tok = token; - - return type; -} - -static int event_read_print_args(struct event *event, struct print_arg **list) -{ - enum event_type type = EVENT_ERROR; - struct print_arg *arg; - char *token; - int args = 0; - - do { - if (type == EVENT_NEWLINE) { - free_token(token); - type = read_token_item(&token); - continue; - } - - arg = malloc_or_die(sizeof(*arg)); - memset(arg, 0, sizeof(*arg)); - - type = process_arg(event, arg, &token); - - if (type == EVENT_ERROR) { - free_arg(arg); - return -1; - } - - *list = arg; - args++; - - if (type == EVENT_OP) { - type = process_op(event, arg, &token); - list = &arg->next; - continue; - } - - if (type == EVENT_DELIM && strcmp(token, ",") == 0) { - free_token(token); - *list = arg; - list = &arg->next; - continue; - } - break; - } while (type != EVENT_NONE); - - if (type != EVENT_NONE) - free_token(token); - - return args; -} - -static int event_read_print(struct event *event) -{ - enum event_type type; - char *token; - int ret; - - if (read_expected_item(EVENT_ITEM, "print") < 0) - return -1; - - if (read_expected(EVENT_ITEM, "fmt") < 0) - return -1; - - if (read_expected(EVENT_OP, ":") < 0) - return -1; - - if (read_expect_type(EVENT_DQUOTE, &token) < 0) - goto fail; - - concat: - event->print_fmt.format = token; - event->print_fmt.args = NULL; - - /* ok to have no arg */ - type = read_token_item(&token); - - if (type == EVENT_NONE) - return 0; - - /* Handle concatination of print lines */ - if (type == EVENT_DQUOTE) { - char *cat; - - cat = malloc_or_die(strlen(event->print_fmt.format) + - strlen(token) + 1); - strcpy(cat, event->print_fmt.format); - strcat(cat, token); - free_token(token); - free_token(event->print_fmt.format); - event->print_fmt.format = NULL; - token = cat; - goto concat; - } - - if (test_type_token(type, token, EVENT_DELIM, ",")) - goto fail; - - free_token(token); - - ret = event_read_print_args(event, &event->print_fmt.args); - if (ret < 0) - return -1; - - return ret; - - fail: - free_token(token); - return -1; -} - -static struct format_field * -find_common_field(struct event *event, const char *name) -{ - struct format_field *format; - - for (format = event->format.common_fields; - format; format = format->next) { - if (strcmp(format->name, name) == 0) - break; - } - - return format; -} - -static struct format_field * -find_field(struct event *event, const char *name) -{ - struct format_field *format; - - for (format = event->format.fields; - format; format = format->next) { - if (strcmp(format->name, name) == 0) - break; + addr = strtoull(addr_str, NULL, 16); + /* fmt still has a space, skip it */ + printk = strdup(fmt+1); + line = strtok_r(NULL, "\n", &next); + pevent_register_print_string(pevent, printk, addr); } - - return format; } -static struct format_field * -find_any_field(struct event *event, const char *name) +int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size) { - struct format_field *format; - - format = find_common_field(event, name); - if (format) - return format; - return find_field(event, name); + return pevent_parse_event(pevent, buf, size, "ftrace"); } -unsigned long long read_size(void *ptr, int size) +int parse_event_file(struct pevent *pevent, + char *buf, unsigned long size, char *sys) { - switch (size) { - case 1: - return *(unsigned char *)ptr; - case 2: - return data2host2(ptr); - case 4: - return data2host4(ptr); - case 8: - return data2host8(ptr); - default: - /* BUG! */ - return 0; - } -} - -unsigned long long -raw_field_value(struct event *event, const char *name, void *data) -{ - struct format_field *field; - - field = find_any_field(event, name); - if (!field) - return 0ULL; - - return read_size(data + field->offset, field->size); + return pevent_parse_event(pevent, buf, size, sys); } -void *raw_field_ptr(struct event *event, const char *name, void *data) +struct event_format *trace_find_next_event(struct pevent *pevent, + struct event_format *event) { - struct format_field *field; + static int idx; - field = find_any_field(event, name); - if (!field) + if (!pevent->events) return NULL; - if (field->flags & FIELD_IS_DYNAMIC) { - int offset; - - offset = *(int *)(data + field->offset); - offset &= 0xffff; - - return data + offset; - } - - return data + field->offset; -} - -static int get_common_info(const char *type, int *offset, int *size) -{ - struct event *event; - struct format_field *field; - - /* - * All events should have the same common elements. - * Pick any event to find where the type is; - */ - if (!event_list) - die("no event_list!"); - - event = event_list; - field = find_common_field(event, type); - if (!field) - die("field '%s' not found", type); - - *offset = field->offset; - *size = field->size; - - return 0; -} - -static int __parse_common(void *data, int *size, int *offset, - const char *name) -{ - int ret; - - if (!*size) { - ret = get_common_info(name, offset, size); - if (ret < 0) - return ret; + if (!event) { + idx = 0; + return pevent->events[0]; } - return read_size(data + *offset, *size); -} - -int trace_parse_common_type(void *data) -{ - static int type_offset; - static int type_size; - return __parse_common(data, &type_size, &type_offset, - "common_type"); -} - -int trace_parse_common_pid(void *data) -{ - static int pid_offset; - static int pid_size; - - return __parse_common(data, &pid_size, &pid_offset, - "common_pid"); -} - -int parse_common_pc(void *data) -{ - static int pc_offset; - static int pc_size; - - return __parse_common(data, &pc_size, &pc_offset, - "common_preempt_count"); -} - -int parse_common_flags(void *data) -{ - static int flags_offset; - static int flags_size; - - return __parse_common(data, &flags_size, &flags_offset, - "common_flags"); -} - -int parse_common_lock_depth(void *data) -{ - static int ld_offset; - static int ld_size; - int ret; - - ret = __parse_common(data, &ld_size, &ld_offset, - "common_lock_depth"); - if (ret < 0) - return -1; - - return ret; -} - -struct event *trace_find_event(int id) -{ - struct event *event; - - for (event = event_list; event; event = event->next) { - if (event->id == id) - break; + if (idx < pevent->nr_events && event == pevent->events[idx]) { + idx++; + if (idx == pevent->nr_events) + return NULL; + return pevent->events[idx]; } - return event; -} - -struct event *trace_find_next_event(struct event *event) -{ - if (!event) - return event_list; - return event->next; -} - -static unsigned long long eval_num_arg(void *data, int size, - struct event *event, struct print_arg *arg) -{ - unsigned long long val = 0; - unsigned long long left, right; - struct print_arg *larg; - - switch (arg->type) { - case PRINT_NULL: - /* ?? */ - return 0; - case PRINT_ATOM: - return strtoull(arg->atom.atom, NULL, 0); - case PRINT_FIELD: - if (!arg->field.field) { - arg->field.field = find_any_field(event, arg->field.name); - if (!arg->field.field) - die("field %s not found", arg->field.name); - } - /* must be a number */ - val = read_size(data + arg->field.field->offset, - arg->field.field->size); - break; - case PRINT_FLAGS: - case PRINT_SYMBOL: - break; - case PRINT_TYPE: - return eval_num_arg(data, size, event, arg->typecast.item); - case PRINT_STRING: - return 0; - break; - case PRINT_OP: - if (strcmp(arg->op.op, "[") == 0) { - /* - * Arrays are special, since we don't want - * to read the arg as is. - */ - if (arg->op.left->type != PRINT_FIELD) - goto default_op; /* oops, all bets off */ - larg = arg->op.left; - if (!larg->field.field) { - larg->field.field = - find_any_field(event, larg->field.name); - if (!larg->field.field) - die("field %s not found", larg->field.name); - } - right = eval_num_arg(data, size, event, arg->op.right); - val = read_size(data + larg->field.field->offset + - right * long_size, long_size); - break; - } - default_op: - left = eval_num_arg(data, size, event, arg->op.left); - right = eval_num_arg(data, size, event, arg->op.right); - switch (arg->op.op[0]) { - case '|': - if (arg->op.op[1]) - val = left || right; - else - val = left | right; - break; - case '&': - if (arg->op.op[1]) - val = left && right; - else - val = left & right; - break; - case '<': - switch (arg->op.op[1]) { - case 0: - val = left < right; - break; - case '<': - val = left << right; - break; - case '=': - val = left <= right; - break; - default: - die("unknown op '%s'", arg->op.op); - } - break; - case '>': - switch (arg->op.op[1]) { - case 0: - val = left > right; - break; - case '>': - val = left >> right; - break; - case '=': - val = left >= right; - break; - default: - die("unknown op '%s'", arg->op.op); - } - break; - case '=': - if (arg->op.op[1] != '=') - die("unknown op '%s'", arg->op.op); - val = left == right; - break; - case '-': - val = left - right; - break; - case '+': - val = left + right; - break; - default: - die("unknown op '%s'", arg->op.op); - } - break; - default: /* not sure what to do there */ - return 0; + for (idx = 1; idx < pevent->nr_events; idx++) { + if (event == pevent->events[idx - 1]) + return pevent->events[idx]; } - return val; + return NULL; } struct flag { @@ -2211,933 +350,3 @@ unsigned long long eval_flag(const char *flag) return 0; } - -static void print_str_arg(void *data, int size, - struct event *event, struct print_arg *arg) -{ - struct print_flag_sym *flag; - unsigned long long val, fval; - char *str; - int print; - - switch (arg->type) { - case PRINT_NULL: - /* ?? */ - return; - case PRINT_ATOM: - printf("%s", arg->atom.atom); - return; - case PRINT_FIELD: - if (!arg->field.field) { - arg->field.field = find_any_field(event, arg->field.name); - if (!arg->field.field) - die("field %s not found", arg->field.name); - } - str = malloc_or_die(arg->field.field->size + 1); - memcpy(str, data + arg->field.field->offset, - arg->field.field->size); - str[arg->field.field->size] = 0; - printf("%s", str); - free(str); - break; - case PRINT_FLAGS: - val = eval_num_arg(data, size, event, arg->flags.field); - print = 0; - for (flag = arg->flags.flags; flag; flag = flag->next) { - fval = eval_flag(flag->value); - if (!val && !fval) { - printf("%s", flag->str); - break; - } - if (fval && (val & fval) == fval) { - if (print && arg->flags.delim) - printf("%s", arg->flags.delim); - printf("%s", flag->str); - print = 1; - val &= ~fval; - } - } - break; - case PRINT_SYMBOL: - val = eval_num_arg(data, size, event, arg->symbol.field); - for (flag = arg->symbol.symbols; flag; flag = flag->next) { - fval = eval_flag(flag->value); - if (val == fval) { - printf("%s", flag->str); - break; - } - } - break; - - case PRINT_TYPE: - break; - case PRINT_STRING: { - int str_offset; - - if (arg->string.offset == -1) { - struct format_field *f; - - f = find_any_field(event, arg->string.string); - arg->string.offset = f->offset; - } - str_offset = *(int *)(data + arg->string.offset); - str_offset &= 0xffff; - printf("%s", ((char *)data) + str_offset); - break; - } - case PRINT_OP: - /* - * The only op for string should be ? : - */ - if (arg->op.op[0] != '?') - return; - val = eval_num_arg(data, size, event, arg->op.left); - if (val) - print_str_arg(data, size, event, arg->op.right->op.left); - else - print_str_arg(data, size, event, arg->op.right->op.right); - break; - default: - /* well... */ - break; - } -} - -static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struct event *event) -{ - static struct format_field *field, *ip_field; - struct print_arg *args, *arg, **next; - unsigned long long ip, val; - char *ptr; - void *bptr; - - if (!field) { - field = find_field(event, "buf"); - if (!field) - die("can't find buffer field for binary printk"); - ip_field = find_field(event, "ip"); - if (!ip_field) - die("can't find ip field for binary printk"); - } - - ip = read_size(data + ip_field->offset, ip_field->size); - - /* - * The first arg is the IP pointer. - */ - args = malloc_or_die(sizeof(*args)); - arg = args; - arg->next = NULL; - next = &arg->next; - - arg->type = PRINT_ATOM; - arg->atom.atom = malloc_or_die(32); - sprintf(arg->atom.atom, "%lld", ip); - - /* skip the first "%pf : " */ - for (ptr = fmt + 6, bptr = data + field->offset; - bptr < data + size && *ptr; ptr++) { - int ls = 0; - - if (*ptr == '%') { - process_again: - ptr++; - switch (*ptr) { - case '%': - break; - case 'l': - ls++; - goto process_again; - case 'L': - ls = 2; - goto process_again; - case '0' ... '9': - goto process_again; - case 'p': - ls = 1; - /* fall through */ - case 'd': - case 'u': - case 'x': - case 'i': - /* the pointers are always 4 bytes aligned */ - bptr = (void *)(((unsigned long)bptr + 3) & - ~3); - switch (ls) { - case 0: - case 1: - ls = long_size; - break; - case 2: - ls = 8; - default: - break; - } - val = read_size(bptr, ls); - bptr += ls; - arg = malloc_or_die(sizeof(*arg)); - arg->next = NULL; - arg->type = PRINT_ATOM; - arg->atom.atom = malloc_or_die(32); - sprintf(arg->atom.atom, "%lld", val); - *next = arg; - next = &arg->next; - break; - case 's': - arg = malloc_or_die(sizeof(*arg)); - arg->next = NULL; - arg->type = PRINT_STRING; - arg->string.string = strdup(bptr); - bptr += strlen(bptr) + 1; - *next = arg; - next = &arg->next; - default: - break; - } - } - } - - return args; -} - -static void free_args(struct print_arg *args) -{ - struct print_arg *next; - - while (args) { - next = args->next; - - if (args->type == PRINT_ATOM) - free(args->atom.atom); - else - free(args->string.string); - free(args); - args = next; - } -} - -static char *get_bprint_format(void *data, int size __unused, struct event *event) -{ - unsigned long long addr; - static struct format_field *field; - struct printk_map *printk; - char *format; - char *p; - - if (!field) { - field = find_field(event, "fmt"); - if (!field) - die("can't find format field for binary printk"); - printf("field->offset = %d size=%d\n", field->offset, field->size); - } - - addr = read_size(data + field->offset, field->size); - - printk = find_printk(addr); - if (!printk) { - format = malloc_or_die(45); - sprintf(format, "%%pf : (NO FORMAT FOUND at %llx)\n", - addr); - return format; - } - - p = printk->printk; - /* Remove any quotes. */ - if (*p == '"') - p++; - format = malloc_or_die(strlen(p) + 10); - sprintf(format, "%s : %s", "%pf", p); - /* remove ending quotes and new line since we will add one too */ - p = format + strlen(format) - 1; - if (*p == '"') - *p = 0; - - p -= 2; - if (strcmp(p, "\\n") == 0) - *p = 0; - - return format; -} - -static void pretty_print(void *data, int size, struct event *event) -{ - struct print_fmt *print_fmt = &event->print_fmt; - struct print_arg *arg = print_fmt->args; - struct print_arg *args = NULL; - const char *ptr = print_fmt->format; - unsigned long long val; - struct func_map *func; - const char *saveptr; - char *bprint_fmt = NULL; - char format[32]; - int show_func; - int len; - int ls; - - if (event->flags & EVENT_FL_ISFUNC) - ptr = " %pF <-- %pF"; - - if (event->flags & EVENT_FL_ISBPRINT) { - bprint_fmt = get_bprint_format(data, size, event); - args = make_bprint_args(bprint_fmt, data, size, event); - arg = args; - ptr = bprint_fmt; - } - - for (; *ptr; ptr++) { - ls = 0; - if (*ptr == '\\') { - ptr++; - switch (*ptr) { - case 'n': - printf("\n"); - break; - case 't': - printf("\t"); - break; - case 'r': - printf("\r"); - break; - case '\\': - printf("\\"); - break; - default: - printf("%c", *ptr); - break; - } - - } else if (*ptr == '%') { - saveptr = ptr; - show_func = 0; - cont_process: - ptr++; - switch (*ptr) { - case '%': - printf("%%"); - break; - case 'l': - ls++; - goto cont_process; - case 'L': - ls = 2; - goto cont_process; - case 'z': - case 'Z': - case '0' ... '9': - goto cont_process; - case 'p': - if (long_size == 4) - ls = 1; - else - ls = 2; - - if (*(ptr+1) == 'F' || - *(ptr+1) == 'f') { - ptr++; - show_func = *ptr; - } - - /* fall through */ - case 'd': - case 'i': - case 'x': - case 'X': - case 'u': - if (!arg) - die("no argument match"); - - len = ((unsigned long)ptr + 1) - - (unsigned long)saveptr; - - /* should never happen */ - if (len > 32) - die("bad format!"); - - memcpy(format, saveptr, len); - format[len] = 0; - - val = eval_num_arg(data, size, event, arg); - arg = arg->next; - - if (show_func) { - func = find_func(val); - if (func) { - printf("%s", func->func); - if (show_func == 'F') - printf("+0x%llx", - val - func->addr); - break; - } - } - switch (ls) { - case 0: - printf(format, (int)val); - break; - case 1: - printf(format, (long)val); - break; - case 2: - printf(format, (long long)val); - break; - default: - die("bad count (%d)", ls); - } - break; - case 's': - if (!arg) - die("no matching argument"); - - print_str_arg(data, size, event, arg); - arg = arg->next; - break; - default: - printf(">%c<", *ptr); - - } - } else - printf("%c", *ptr); - } - - if (args) { - free_args(args); - free(bprint_fmt); - } -} - -static inline int log10_cpu(int nb) -{ - if (nb / 100) - return 3; - if (nb / 10) - return 2; - return 1; -} - -static void print_lat_fmt(void *data, int size __unused) -{ - unsigned int lat_flags; - unsigned int pc; - int lock_depth; - int hardirq; - int softirq; - - lat_flags = parse_common_flags(data); - pc = parse_common_pc(data); - lock_depth = parse_common_lock_depth(data); - - hardirq = lat_flags & TRACE_FLAG_HARDIRQ; - softirq = lat_flags & TRACE_FLAG_SOFTIRQ; - - printf("%c%c%c", - (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' : - (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ? - 'X' : '.', - (lat_flags & TRACE_FLAG_NEED_RESCHED) ? - 'N' : '.', - (hardirq && softirq) ? 'H' : - hardirq ? 'h' : softirq ? 's' : '.'); - - if (pc) - printf("%x", pc); - else - printf("."); - - if (lock_depth < 0) - printf(". "); - else - printf("%d ", lock_depth); -} - -#define TRACE_GRAPH_INDENT 2 - -static struct record * -get_return_for_leaf(int cpu, int cur_pid, unsigned long long cur_func, - struct record *next) -{ - struct format_field *field; - struct event *event; - unsigned long val; - int type; - int pid; - - type = trace_parse_common_type(next->data); - event = trace_find_event(type); - if (!event) - return NULL; - - if (!(event->flags & EVENT_FL_ISFUNCRET)) - return NULL; - - pid = trace_parse_common_pid(next->data); - field = find_field(event, "func"); - if (!field) - die("function return does not have field func"); - - val = read_size(next->data + field->offset, field->size); - - if (cur_pid != pid || cur_func != val) - return NULL; - - /* this is a leaf, now advance the iterator */ - return trace_read_data(cpu); -} - -/* Signal a overhead of time execution to the output */ -static void print_graph_overhead(unsigned long long duration) -{ - /* Non nested entry or return */ - if (duration == ~0ULL) - return (void)printf(" "); - - /* Duration exceeded 100 msecs */ - if (duration > 100000ULL) - return (void)printf("! "); - - /* Duration exceeded 10 msecs */ - if (duration > 10000ULL) - return (void)printf("+ "); - - printf(" "); -} - -static void print_graph_duration(unsigned long long duration) -{ - unsigned long usecs = duration / 1000; - unsigned long nsecs_rem = duration % 1000; - /* log10(ULONG_MAX) + '\0' */ - char msecs_str[21]; - char nsecs_str[5]; - int len; - int i; - - sprintf(msecs_str, "%lu", usecs); - - /* Print msecs */ - len = printf("%lu", usecs); - - /* Print nsecs (we don't want to exceed 7 numbers) */ - if (len < 7) { - snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem); - len += printf(".%s", nsecs_str); - } - - printf(" us "); - - /* Print remaining spaces to fit the row's width */ - for (i = len; i < 7; i++) - printf(" "); - - printf("| "); -} - -static void -print_graph_entry_leaf(struct event *event, void *data, struct record *ret_rec) -{ - unsigned long long rettime, calltime; - unsigned long long duration, depth; - unsigned long long val; - struct format_field *field; - struct func_map *func; - struct event *ret_event; - int type; - int i; - - type = trace_parse_common_type(ret_rec->data); - ret_event = trace_find_event(type); - - field = find_field(ret_event, "rettime"); - if (!field) - die("can't find rettime in return graph"); - rettime = read_size(ret_rec->data + field->offset, field->size); - - field = find_field(ret_event, "calltime"); - if (!field) - die("can't find rettime in return graph"); - calltime = read_size(ret_rec->data + field->offset, field->size); - - duration = rettime - calltime; - - /* Overhead */ - print_graph_overhead(duration); - - /* Duration */ - print_graph_duration(duration); - - field = find_field(event, "depth"); - if (!field) - die("can't find depth in entry graph"); - depth = read_size(data + field->offset, field->size); - - /* Function */ - for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++) - printf(" "); - - field = find_field(event, "func"); - if (!field) - die("can't find func in entry graph"); - val = read_size(data + field->offset, field->size); - func = find_func(val); - - if (func) - printf("%s();", func->func); - else - printf("%llx();", val); -} - -static void print_graph_nested(struct event *event, void *data) -{ - struct format_field *field; - unsigned long long depth; - unsigned long long val; - struct func_map *func; - int i; - - /* No overhead */ - print_graph_overhead(-1); - - /* No time */ - printf(" | "); - - field = find_field(event, "depth"); - if (!field) - die("can't find depth in entry graph"); - depth = read_size(data + field->offset, field->size); - - /* Function */ - for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++) - printf(" "); - - field = find_field(event, "func"); - if (!field) - die("can't find func in entry graph"); - val = read_size(data + field->offset, field->size); - func = find_func(val); - - if (func) - printf("%s() {", func->func); - else - printf("%llx() {", val); -} - -static void -pretty_print_func_ent(void *data, int size, struct event *event, - int cpu, int pid) -{ - struct format_field *field; - struct record *rec; - void *copy_data; - unsigned long val; - - if (latency_format) { - print_lat_fmt(data, size); - printf(" | "); - } - - field = find_field(event, "func"); - if (!field) - die("function entry does not have func field"); - - val = read_size(data + field->offset, field->size); - - /* - * peek_data may unmap the data pointer. Copy it first. - */ - copy_data = malloc_or_die(size); - memcpy(copy_data, data, size); - data = copy_data; - - rec = trace_peek_data(cpu); - if (rec) { - rec = get_return_for_leaf(cpu, pid, val, rec); - if (rec) { - print_graph_entry_leaf(event, data, rec); - goto out_free; - } - } - print_graph_nested(event, data); -out_free: - free(data); -} - -static void -pretty_print_func_ret(void *data, int size __unused, struct event *event) -{ - unsigned long long rettime, calltime; - unsigned long long duration, depth; - struct format_field *field; - int i; - - if (latency_format) { - print_lat_fmt(data, size); - printf(" | "); - } - - field = find_field(event, "rettime"); - if (!field) - die("can't find rettime in return graph"); - rettime = read_size(data + field->offset, field->size); - - field = find_field(event, "calltime"); - if (!field) - die("can't find calltime in return graph"); - calltime = read_size(data + field->offset, field->size); - - duration = rettime - calltime; - - /* Overhead */ - print_graph_overhead(duration); - - /* Duration */ - print_graph_duration(duration); - - field = find_field(event, "depth"); - if (!field) - die("can't find depth in entry graph"); - depth = read_size(data + field->offset, field->size); - - /* Function */ - for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++) - printf(" "); - - printf("}"); -} - -static void -pretty_print_func_graph(void *data, int size, struct event *event, - int cpu, int pid) -{ - if (event->flags & EVENT_FL_ISFUNCENT) - pretty_print_func_ent(data, size, event, cpu, pid); - else if (event->flags & EVENT_FL_ISFUNCRET) - pretty_print_func_ret(data, size, event); - printf("\n"); -} - -void print_trace_event(int cpu, void *data, int size) -{ - struct event *event; - int type; - int pid; - - type = trace_parse_common_type(data); - - event = trace_find_event(type); - if (!event) { - warning("ug! no event found for type %d", type); - return; - } - - pid = trace_parse_common_pid(data); - - if (event->flags & (EVENT_FL_ISFUNCENT | EVENT_FL_ISFUNCRET)) - return pretty_print_func_graph(data, size, event, cpu, pid); - - if (latency_format) - print_lat_fmt(data, size); - - if (event->flags & EVENT_FL_FAILED) { - printf("EVENT '%s' FAILED TO PARSE\n", - event->name); - return; - } - - pretty_print(data, size, event); -} - -static void print_fields(struct print_flag_sym *field) -{ - printf("{ %s, %s }", field->value, field->str); - if (field->next) { - printf(", "); - print_fields(field->next); - } -} - -static void print_args(struct print_arg *args) -{ - int print_paren = 1; - - switch (args->type) { - case PRINT_NULL: - printf("null"); - break; - case PRINT_ATOM: - printf("%s", args->atom.atom); - break; - case PRINT_FIELD: - printf("REC->%s", args->field.name); - break; - case PRINT_FLAGS: - printf("__print_flags("); - print_args(args->flags.field); - printf(", %s, ", args->flags.delim); - print_fields(args->flags.flags); - printf(")"); - break; - case PRINT_SYMBOL: - printf("__print_symbolic("); - print_args(args->symbol.field); - printf(", "); - print_fields(args->symbol.symbols); - printf(")"); - break; - case PRINT_STRING: - printf("__get_str(%s)", args->string.string); - break; - case PRINT_TYPE: - printf("(%s)", args->typecast.type); - print_args(args->typecast.item); - break; - case PRINT_OP: - if (strcmp(args->op.op, ":") == 0) - print_paren = 0; - if (print_paren) - printf("("); - print_args(args->op.left); - printf(" %s ", args->op.op); - print_args(args->op.right); - if (print_paren) - printf(")"); - break; - default: - /* we should warn... */ - return; - } - if (args->next) { - printf("\n"); - print_args(args->next); - } -} - -int parse_ftrace_file(char *buf, unsigned long size) -{ - struct format_field *field; - struct print_arg *arg, **list; - struct event *event; - int ret; - - init_input_buf(buf, size); - - event = alloc_event(); - if (!event) - return -ENOMEM; - - event->flags |= EVENT_FL_ISFTRACE; - - event->name = event_read_name(); - if (!event->name) - die("failed to read ftrace event name"); - - if (strcmp(event->name, "function") == 0) - event->flags |= EVENT_FL_ISFUNC; - - else if (strcmp(event->name, "funcgraph_entry") == 0) - event->flags |= EVENT_FL_ISFUNCENT; - - else if (strcmp(event->name, "funcgraph_exit") == 0) - event->flags |= EVENT_FL_ISFUNCRET; - - else if (strcmp(event->name, "bprint") == 0) - event->flags |= EVENT_FL_ISBPRINT; - - event->id = event_read_id(); - if (event->id < 0) - die("failed to read ftrace event id"); - - add_event(event); - - ret = event_read_format(event); - if (ret < 0) - die("failed to read ftrace event format"); - - ret = event_read_print(event); - if (ret < 0) - die("failed to read ftrace event print fmt"); - - /* New ftrace handles args */ - if (ret > 0) - return 0; - /* - * The arguments for ftrace files are parsed by the fields. - * Set up the fields as their arguments. - */ - list = &event->print_fmt.args; - for (field = event->format.fields; field; field = field->next) { - arg = malloc_or_die(sizeof(*arg)); - memset(arg, 0, sizeof(*arg)); - *list = arg; - list = &arg->next; - arg->type = PRINT_FIELD; - arg->field.name = field->name; - arg->field.field = field; - } - return 0; -} - -int parse_event_file(char *buf, unsigned long size, char *sys) -{ - struct event *event; - int ret; - - init_input_buf(buf, size); - - event = alloc_event(); - if (!event) - return -ENOMEM; - - event->name = event_read_name(); - if (!event->name) - die("failed to read event name"); - - event->id = event_read_id(); - if (event->id < 0) - die("failed to read event id"); - - ret = event_read_format(event); - if (ret < 0) { - warning("failed to read event format for %s", event->name); - goto event_failed; - } - - ret = event_read_print(event); - if (ret < 0) { - warning("failed to read event print fmt for %s", event->name); - goto event_failed; - } - - event->system = strdup(sys); - -#define PRINT_ARGS 0 - if (PRINT_ARGS && event->print_fmt.args) - print_args(event->print_fmt.args); - - add_event(event); - return 0; - - event_failed: - event->flags |= EVENT_FL_FAILED; - /* still add it even if it failed */ - add_event(event); - return -1; -} - -void parse_set_info(int nr_cpus, int long_sz) -{ - cpus = nr_cpus; - long_size = long_sz; -} - -int common_pc(struct scripting_context *context) -{ - return parse_common_pc(context->event_data); -} - -int common_flags(struct scripting_context *context) -{ - return parse_common_flags(context->event_data); -} - -int common_lock_depth(struct scripting_context *context) -{ - return parse_common_lock_depth(context->event_data); -} diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index f55cc3a765a1..719ed74a8565 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -33,7 +33,6 @@ #include <pthread.h> #include <fcntl.h> #include <unistd.h> -#include <ctype.h> #include <errno.h> #include "../perf.h" @@ -53,6 +52,16 @@ static unsigned long page_size; static ssize_t calc_data_size; static bool repipe; +static void *malloc_or_die(int size) +{ + void *ret; + + ret = malloc(size); + if (!ret) + die("malloc"); + return ret; +} + static int do_read(int fd, void *buf, int size) { int rsize = size; @@ -105,20 +114,20 @@ static void skip(int size) }; } -static unsigned int read4(void) +static unsigned int read4(struct pevent *pevent) { unsigned int data; read_or_die(&data, 4); - return __data2host4(data); + return __data2host4(pevent, data); } -static unsigned long long read8(void) +static unsigned long long read8(struct pevent *pevent) { unsigned long long data; read_or_die(&data, 8); - return __data2host8(data); + return __data2host8(pevent, data); } static char *read_string(void) @@ -159,12 +168,12 @@ static char *read_string(void) return str; } -static void read_proc_kallsyms(void) +static void read_proc_kallsyms(struct pevent *pevent) { unsigned int size; char *buf; - size = read4(); + size = read4(pevent); if (!size) return; @@ -172,29 +181,29 @@ static void read_proc_kallsyms(void) read_or_die(buf, size); buf[size] = '\0'; - parse_proc_kallsyms(buf, size); + parse_proc_kallsyms(pevent, buf, size); free(buf); } -static void read_ftrace_printk(void) +static void read_ftrace_printk(struct pevent *pevent) { unsigned int size; char *buf; - size = read4(); + size = read4(pevent); if (!size) return; buf = malloc_or_die(size); read_or_die(buf, size); - parse_ftrace_printk(buf, size); + parse_ftrace_printk(pevent, buf, size); free(buf); } -static void read_header_files(void) +static void read_header_files(struct pevent *pevent) { unsigned long long size; char *header_event; @@ -205,7 +214,7 @@ static void read_header_files(void) if (memcmp(buf, "header_page", 12) != 0) die("did not read header page"); - size = read8(); + size = read8(pevent); skip(size); /* @@ -218,47 +227,48 @@ static void read_header_files(void) if (memcmp(buf, "header_event", 13) != 0) die("did not read header event"); - size = read8(); + size = read8(pevent); header_event = malloc_or_die(size); read_or_die(header_event, size); free(header_event); } -static void read_ftrace_file(unsigned long long size) +static void read_ftrace_file(struct pevent *pevent, unsigned long long size) { char *buf; buf = malloc_or_die(size); read_or_die(buf, size); - parse_ftrace_file(buf, size); + parse_ftrace_file(pevent, buf, size); free(buf); } -static void read_event_file(char *sys, unsigned long long size) +static void read_event_file(struct pevent *pevent, char *sys, + unsigned long long size) { char *buf; buf = malloc_or_die(size); read_or_die(buf, size); - parse_event_file(buf, size, sys); + parse_event_file(pevent, buf, size, sys); free(buf); } -static void read_ftrace_files(void) +static void read_ftrace_files(struct pevent *pevent) { unsigned long long size; int count; int i; - count = read4(); + count = read4(pevent); for (i = 0; i < count; i++) { - size = read8(); - read_ftrace_file(size); + size = read8(pevent); + read_ftrace_file(pevent, size); } } -static void read_event_files(void) +static void read_event_files(struct pevent *pevent) { unsigned long long size; char *sys; @@ -266,15 +276,15 @@ static void read_event_files(void) int count; int i,x; - systems = read4(); + systems = read4(pevent); for (i = 0; i < systems; i++) { sys = read_string(); - count = read4(); + count = read4(pevent); for (x=0; x < count; x++) { - size = read8(); - read_event_file(sys, size); + size = read8(pevent); + read_event_file(pevent, sys, size); } } } @@ -283,7 +293,7 @@ struct cpu_data { unsigned long long offset; unsigned long long size; unsigned long long timestamp; - struct record *next; + struct pevent_record *next; char *page; int cpu; int index; @@ -368,9 +378,9 @@ static int calc_index(void *ptr, int cpu) return (unsigned long)ptr - (unsigned long)cpu_data[cpu].page; } -struct record *trace_peek_data(int cpu) +struct pevent_record *trace_peek_data(struct pevent *pevent, int cpu) { - struct record *data; + struct pevent_record *data; void *page = cpu_data[cpu].page; int idx = cpu_data[cpu].index; void *ptr = page + idx; @@ -390,15 +400,15 @@ struct record *trace_peek_data(int cpu) /* FIXME: handle header page */ if (header_page_ts_size != 8) die("expected a long long type for timestamp"); - cpu_data[cpu].timestamp = data2host8(ptr); + cpu_data[cpu].timestamp = data2host8(pevent, ptr); ptr += 8; switch (header_page_size_size) { case 4: - cpu_data[cpu].page_size = data2host4(ptr); + cpu_data[cpu].page_size = data2host4(pevent, ptr); ptr += 4; break; case 8: - cpu_data[cpu].page_size = data2host8(ptr); + cpu_data[cpu].page_size = data2host8(pevent, ptr); ptr += 8; break; default: @@ -412,10 +422,10 @@ read_again: if (idx >= cpu_data[cpu].page_size) { get_next_page(cpu); - return trace_peek_data(cpu); + return trace_peek_data(pevent, cpu); } - type_len_ts = data2host4(ptr); + type_len_ts = data2host4(pevent, ptr); ptr += 4; type_len = type_len4host(type_len_ts); @@ -425,14 +435,14 @@ read_again: case RINGBUF_TYPE_PADDING: if (!delta) die("error, hit unexpected end of page"); - length = data2host4(ptr); + length = data2host4(pevent, ptr); ptr += 4; length *= 4; ptr += length; goto read_again; case RINGBUF_TYPE_TIME_EXTEND: - extend = data2host4(ptr); + extend = data2host4(pevent, ptr); ptr += 4; extend <<= TS_SHIFT; extend += delta; @@ -443,7 +453,7 @@ read_again: ptr += 12; break; case 0: - length = data2host4(ptr); + length = data2host4(pevent, ptr); ptr += 4; die("here! length=%d", length); break; @@ -468,17 +478,17 @@ read_again: return data; } -struct record *trace_read_data(int cpu) +struct pevent_record *trace_read_data(struct pevent *pevent, int cpu) { - struct record *data; + struct pevent_record *data; - data = trace_peek_data(cpu); + data = trace_peek_data(pevent, cpu); cpu_data[cpu].next = NULL; return data; } -ssize_t trace_report(int fd, bool __repipe) +ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe) { char buf[BUFSIZ]; char test[] = { 23, 8, 68 }; @@ -510,28 +520,32 @@ ssize_t trace_report(int fd, bool __repipe) file_bigendian = buf[0]; host_bigendian = bigendian(); + *ppevent = read_trace_init(file_bigendian, host_bigendian); + if (*ppevent == NULL) + die("read_trace_init failed"); + read_or_die(buf, 1); long_size = buf[0]; - page_size = read4(); + page_size = read4(*ppevent); - read_header_files(); + read_header_files(*ppevent); - read_ftrace_files(); - read_event_files(); - read_proc_kallsyms(); - read_ftrace_printk(); + read_ftrace_files(*ppevent); + read_event_files(*ppevent); + read_proc_kallsyms(*ppevent); + read_ftrace_printk(*ppevent); size = calc_data_size - 1; calc_data_size = 0; repipe = false; if (show_funcs) { - print_funcs(); + pevent_print_funcs(*ppevent); return size; } if (show_printk) { - print_printk(); + pevent_print_printk(*ppevent); return size; } diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index c9dcbec7d800..474aa7a7df43 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -22,7 +22,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <ctype.h> #include <errno.h> #include "../perf.h" @@ -37,9 +36,10 @@ static int stop_script_unsupported(void) } static void process_event_unsupported(union perf_event *event __unused, + struct pevent *pevent __unused, struct perf_sample *sample __unused, struct perf_evsel *evsel __unused, - struct perf_session *session __unused, + struct machine *machine __unused, struct thread *thread __unused) { } @@ -62,7 +62,8 @@ static int python_start_script_unsupported(const char *script __unused, return -1; } -static int python_generate_script_unsupported(const char *outfile __unused) +static int python_generate_script_unsupported(struct pevent *pevent __unused, + const char *outfile __unused) { print_python_unsupported_msg(); @@ -123,7 +124,8 @@ static int perl_start_script_unsupported(const char *script __unused, return -1; } -static int perl_generate_script_unsupported(const char *outfile __unused) +static int perl_generate_script_unsupported(struct pevent *pevent __unused, + const char *outfile __unused) { print_perl_unsupported_msg(); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index f674dda3363b..8fef1d6687b7 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -1,16 +1,22 @@ -#ifndef __PERF_TRACE_EVENTS_H -#define __PERF_TRACE_EVENTS_H +#ifndef _PERF_UTIL_TRACE_EVENT_H +#define _PERF_UTIL_TRACE_EVENT_H -#include <stdbool.h> #include "parse-events.h" +#include "event-parse.h" #include "session.h" -#define __unused __attribute__((unused)) +struct machine; +struct perf_sample; +union perf_event; +struct perf_tool; +struct thread; +extern int header_page_size_size; +extern int header_page_ts_size; +extern int header_page_data_offset; -#ifndef PAGE_MASK -#define PAGE_MASK (page_size - 1) -#endif +extern bool latency_format; +extern struct pevent *perf_pevent; enum { RINGBUF_TYPE_PADDING = 29, @@ -22,268 +28,63 @@ enum { #define TS_SHIFT 27 #endif -#define NSECS_PER_SEC 1000000000ULL -#define NSECS_PER_USEC 1000ULL - -enum format_flags { - FIELD_IS_ARRAY = 1, - FIELD_IS_POINTER = 2, - FIELD_IS_SIGNED = 4, - FIELD_IS_STRING = 8, - FIELD_IS_DYNAMIC = 16, - FIELD_IS_FLAG = 32, - FIELD_IS_SYMBOLIC = 64, -}; - -struct format_field { - struct format_field *next; - char *type; - char *name; - int offset; - int size; - unsigned long flags; -}; - -struct format { - int nr_common; - int nr_fields; - struct format_field *common_fields; - struct format_field *fields; -}; - -struct print_arg_atom { - char *atom; -}; - -struct print_arg_string { - char *string; - int offset; -}; - -struct print_arg_field { - char *name; - struct format_field *field; -}; - -struct print_flag_sym { - struct print_flag_sym *next; - char *value; - char *str; -}; - -struct print_arg_typecast { - char *type; - struct print_arg *item; -}; - -struct print_arg_flags { - struct print_arg *field; - char *delim; - struct print_flag_sym *flags; -}; - -struct print_arg_symbol { - struct print_arg *field; - struct print_flag_sym *symbols; -}; - -struct print_arg; - -struct print_arg_op { - char *op; - int prio; - struct print_arg *left; - struct print_arg *right; -}; - -struct print_arg_func { - char *name; - struct print_arg *args; -}; - -enum print_arg_type { - PRINT_NULL, - PRINT_ATOM, - PRINT_FIELD, - PRINT_FLAGS, - PRINT_SYMBOL, - PRINT_TYPE, - PRINT_STRING, - PRINT_OP, -}; - -struct print_arg { - struct print_arg *next; - enum print_arg_type type; - union { - struct print_arg_atom atom; - struct print_arg_field field; - struct print_arg_typecast typecast; - struct print_arg_flags flags; - struct print_arg_symbol symbol; - struct print_arg_func func; - struct print_arg_string string; - struct print_arg_op op; - }; -}; - -struct print_fmt { - char *format; - struct print_arg *args; -}; - -struct event { - struct event *next; - char *name; - int id; - int flags; - struct format format; - struct print_fmt print_fmt; - char *system; -}; - -enum { - EVENT_FL_ISFTRACE = 0x01, - EVENT_FL_ISPRINT = 0x02, - EVENT_FL_ISBPRINT = 0x04, - EVENT_FL_ISFUNC = 0x08, - EVENT_FL_ISFUNCENT = 0x10, - EVENT_FL_ISFUNCRET = 0x20, - - EVENT_FL_FAILED = 0x80000000 -}; - -struct record { - unsigned long long ts; - int size; - void *data; -}; - -struct record *trace_peek_data(int cpu); -struct record *trace_read_data(int cpu); - -void parse_set_info(int nr_cpus, int long_sz); - -ssize_t trace_report(int fd, bool repipe); - -void *malloc_or_die(unsigned int size); - -void parse_cmdlines(char *file, int size); -void parse_proc_kallsyms(char *file, unsigned int size); -void parse_ftrace_printk(char *file, unsigned int size); - -void print_funcs(void); -void print_printk(void); - -int parse_ftrace_file(char *buf, unsigned long size); -int parse_event_file(char *buf, unsigned long size, char *sys); -void print_trace_event(int cpu, void *data, int size); - -extern int file_bigendian; -extern int host_bigendian; - int bigendian(void); -static inline unsigned short __data2host2(unsigned short data) -{ - unsigned short swap; - - if (host_bigendian == file_bigendian) - return data; - - swap = ((data & 0xffULL) << 8) | - ((data & (0xffULL << 8)) >> 8); - - return swap; -} - -static inline unsigned int __data2host4(unsigned int data) -{ - unsigned int swap; - - if (host_bigendian == file_bigendian) - return data; - - swap = ((data & 0xffULL) << 24) | - ((data & (0xffULL << 8)) << 8) | - ((data & (0xffULL << 16)) >> 8) | - ((data & (0xffULL << 24)) >> 24); - - return swap; -} +struct pevent *read_trace_init(int file_bigendian, int host_bigendian); +void print_trace_event(struct pevent *pevent, int cpu, void *data, int size); -static inline unsigned long long __data2host8(unsigned long long data) -{ - unsigned long long swap; +void print_event(struct pevent *pevent, int cpu, void *data, int size, + unsigned long long nsecs, char *comm); - if (host_bigendian == file_bigendian) - return data; +int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size); +int parse_event_file(struct pevent *pevent, + char *buf, unsigned long size, char *sys); - swap = ((data & 0xffULL) << 56) | - ((data & (0xffULL << 8)) << 40) | - ((data & (0xffULL << 16)) << 24) | - ((data & (0xffULL << 24)) << 8) | - ((data & (0xffULL << 32)) >> 8) | - ((data & (0xffULL << 40)) >> 24) | - ((data & (0xffULL << 48)) >> 40) | - ((data & (0xffULL << 56)) >> 56); +struct pevent_record *trace_peek_data(struct pevent *pevent, int cpu); - return swap; -} +unsigned long long +raw_field_value(struct event_format *event, const char *name, void *data); +void *raw_field_ptr(struct event_format *event, const char *name, void *data); -#define data2host2(ptr) __data2host2(*(unsigned short *)ptr) -#define data2host4(ptr) __data2host4(*(unsigned int *)ptr) -#define data2host8(ptr) ({ \ - unsigned long long __val; \ - \ - memcpy(&__val, (ptr), sizeof(unsigned long long)); \ - __data2host8(__val); \ -}) +void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size); +void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size); -extern int header_page_ts_offset; -extern int header_page_ts_size; -extern int header_page_size_offset; -extern int header_page_size_size; -extern int header_page_data_offset; -extern int header_page_data_size; +ssize_t trace_report(int fd, struct pevent **pevent, bool repipe); -extern bool latency_format; +int trace_parse_common_type(struct pevent *pevent, void *data); +int trace_parse_common_pid(struct pevent *pevent, void *data); -int trace_parse_common_type(void *data); -int trace_parse_common_pid(void *data); -int parse_common_pc(void *data); -int parse_common_flags(void *data); -int parse_common_lock_depth(void *data); -struct event *trace_find_event(int id); -struct event *trace_find_next_event(struct event *event); -unsigned long long read_size(void *ptr, int size); -unsigned long long -raw_field_value(struct event *event, const char *name, void *data); -void *raw_field_ptr(struct event *event, const char *name, void *data); +struct event_format *trace_find_next_event(struct pevent *pevent, + struct event_format *event); +unsigned long long read_size(struct pevent *pevent, void *ptr, int size); unsigned long long eval_flag(const char *flag); +struct pevent_record *trace_read_data(struct pevent *pevent, int cpu); int read_tracing_data(int fd, struct list_head *pattrs); -ssize_t read_tracing_data_size(int fd, struct list_head *pattrs); -/* taken from kernel/trace/trace.h */ -enum trace_flag_type { - TRACE_FLAG_IRQS_OFF = 0x01, - TRACE_FLAG_IRQS_NOSUPPORT = 0x02, - TRACE_FLAG_NEED_RESCHED = 0x04, - TRACE_FLAG_HARDIRQ = 0x08, - TRACE_FLAG_SOFTIRQ = 0x10, +struct tracing_data { + /* size is only valid if temp is 'true' */ + ssize_t size; + bool temp; + char temp_file[50]; }; +struct tracing_data *tracing_data_get(struct list_head *pattrs, + int fd, bool temp); +void tracing_data_put(struct tracing_data *tdata); + + struct scripting_ops { const char *name; int (*start_script) (const char *script, int argc, const char **argv); int (*stop_script) (void); void (*process_event) (union perf_event *event, + struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, - struct perf_session *session, + struct machine *machine, struct thread *thread); - int (*generate_script) (const char *outfile); + int (*generate_script) (struct pevent *pevent, const char *outfile); }; int script_spec_register(const char *spec, struct scripting_ops *ops); @@ -292,6 +93,7 @@ void setup_perl_scripting(void); void setup_python_scripting(void); struct scripting_context { + struct pevent *pevent; void *event_data; }; @@ -299,4 +101,4 @@ int common_pc(struct scripting_context *context); int common_flags(struct scripting_context *context); int common_lock_depth(struct scripting_context *context); -#endif /* __PERF_TRACE_EVENTS_H */ +#endif /* _PERF_UTIL_TRACE_EVENT_H */ diff --git a/tools/perf/util/types.h b/tools/perf/util/types.h index 5f3689a3d085..c51fa6b70a28 100644 --- a/tools/perf/util/types.h +++ b/tools/perf/util/types.h @@ -16,4 +16,9 @@ typedef signed short s16; typedef unsigned char u8; typedef signed char s8; +union u64_swap { + u64 val64; + u32 val32[2]; +}; + #endif /* __PERF_TYPES_H */ diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c deleted file mode 100644 index 611219f80680..000000000000 --- a/tools/perf/util/ui/browser.c +++ /dev/null @@ -1,356 +0,0 @@ -#include "libslang.h" -#include "ui.h" -#include <linux/compiler.h> -#include <linux/list.h> -#include <linux/rbtree.h> -#include <stdlib.h> -#include <sys/ttydefaults.h> -#include "browser.h" -#include "helpline.h" -#include "../color.h" -#include "../util.h" -#include <stdio.h> - -static int ui_browser__percent_color(double percent, bool current) -{ - if (current) - return HE_COLORSET_SELECTED; - if (percent >= MIN_RED) - return HE_COLORSET_TOP; - if (percent >= MIN_GREEN) - return HE_COLORSET_MEDIUM; - return HE_COLORSET_NORMAL; -} - -void ui_browser__set_color(struct ui_browser *self __used, int color) -{ - SLsmg_set_color(color); -} - -void ui_browser__set_percent_color(struct ui_browser *self, - double percent, bool current) -{ - int color = ui_browser__percent_color(percent, current); - ui_browser__set_color(self, color); -} - -void ui_browser__gotorc(struct ui_browser *self, int y, int x) -{ - SLsmg_gotorc(self->y + y, self->x + x); -} - -void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whence) -{ - struct list_head *head = self->entries; - struct list_head *pos; - - switch (whence) { - case SEEK_SET: - pos = head->next; - break; - case SEEK_CUR: - pos = self->top; - break; - case SEEK_END: - pos = head->prev; - break; - default: - return; - } - - if (offset > 0) { - while (offset-- != 0) - pos = pos->next; - } else { - while (offset++ != 0) - pos = pos->prev; - } - - self->top = pos; -} - -void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence) -{ - struct rb_root *root = self->entries; - struct rb_node *nd; - - switch (whence) { - case SEEK_SET: - nd = rb_first(root); - break; - case SEEK_CUR: - nd = self->top; - break; - case SEEK_END: - nd = rb_last(root); - break; - default: - return; - } - - if (offset > 0) { - while (offset-- != 0) - nd = rb_next(nd); - } else { - while (offset++ != 0) - nd = rb_prev(nd); - } - - self->top = nd; -} - -unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self) -{ - struct rb_node *nd; - int row = 0; - - if (self->top == NULL) - self->top = rb_first(self->entries); - - nd = self->top; - - while (nd != NULL) { - ui_browser__gotorc(self, row, 0); - self->write(self, nd, row); - if (++row == self->height) - break; - nd = rb_next(nd); - } - - return row; -} - -bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row) -{ - return self->top_idx + row == self->index; -} - -void ui_browser__refresh_dimensions(struct ui_browser *self) -{ - int cols, rows; - newtGetScreenSize(&cols, &rows); - - self->width = cols - 1; - self->height = rows - 2; - self->y = 1; - self->x = 0; -} - -void ui_browser__reset_index(struct ui_browser *self) -{ - self->index = self->top_idx = 0; - self->seek(self, 0, SEEK_SET); -} - -void ui_browser__add_exit_key(struct ui_browser *self, int key) -{ - newtFormAddHotKey(self->form, key); -} - -void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]) -{ - int i = 0; - - while (keys[i] && i < 64) { - ui_browser__add_exit_key(self, keys[i]); - ++i; - } -} - -void __ui_browser__show_title(struct ui_browser *browser, const char *title) -{ - SLsmg_gotorc(0, 0); - ui_browser__set_color(browser, NEWT_COLORSET_ROOT); - slsmg_write_nstring(title, browser->width); -} - -void ui_browser__show_title(struct ui_browser *browser, const char *title) -{ - pthread_mutex_lock(&ui__lock); - __ui_browser__show_title(browser, title); - pthread_mutex_unlock(&ui__lock); -} - -int ui_browser__show(struct ui_browser *self, const char *title, - const char *helpline, ...) -{ - va_list ap; - int keys[] = { NEWT_KEY_UP, NEWT_KEY_DOWN, NEWT_KEY_PGUP, - NEWT_KEY_PGDN, NEWT_KEY_HOME, NEWT_KEY_END, ' ', - NEWT_KEY_LEFT, NEWT_KEY_ESCAPE, 'q', CTRL('c'), 0 }; - - if (self->form != NULL) - newtFormDestroy(self->form); - - ui_browser__refresh_dimensions(self); - self->form = newtForm(NULL, NULL, 0); - if (self->form == NULL) - return -1; - - self->sb = newtVerticalScrollbar(self->width, 1, self->height, - HE_COLORSET_NORMAL, - HE_COLORSET_SELECTED); - if (self->sb == NULL) - return -1; - - pthread_mutex_lock(&ui__lock); - __ui_browser__show_title(self, title); - - ui_browser__add_exit_keys(self, keys); - newtFormAddComponent(self->form, self->sb); - - va_start(ap, helpline); - ui_helpline__vpush(helpline, ap); - va_end(ap); - pthread_mutex_unlock(&ui__lock); - return 0; -} - -void ui_browser__hide(struct ui_browser *self) -{ - pthread_mutex_lock(&ui__lock); - newtFormDestroy(self->form); - self->form = NULL; - ui_helpline__pop(); - pthread_mutex_unlock(&ui__lock); -} - -int ui_browser__refresh(struct ui_browser *self) -{ - int row; - - pthread_mutex_lock(&ui__lock); - newtScrollbarSet(self->sb, self->index, self->nr_entries - 1); - row = self->refresh(self); - ui_browser__set_color(self, HE_COLORSET_NORMAL); - SLsmg_fill_region(self->y + row, self->x, - self->height - row, self->width, ' '); - pthread_mutex_unlock(&ui__lock); - - return 0; -} - -int ui_browser__run(struct ui_browser *self) -{ - struct newtExitStruct es; - - if (ui_browser__refresh(self) < 0) - return -1; - - while (1) { - off_t offset; - - newtFormRun(self->form, &es); - - if (es.reason != NEWT_EXIT_HOTKEY) - break; - switch (es.u.key) { - case NEWT_KEY_DOWN: - if (self->index == self->nr_entries - 1) - break; - ++self->index; - if (self->index == self->top_idx + self->height) { - ++self->top_idx; - self->seek(self, +1, SEEK_CUR); - } - break; - case NEWT_KEY_UP: - if (self->index == 0) - break; - --self->index; - if (self->index < self->top_idx) { - --self->top_idx; - self->seek(self, -1, SEEK_CUR); - } - break; - case NEWT_KEY_PGDN: - case ' ': - if (self->top_idx + self->height > self->nr_entries - 1) - break; - - offset = self->height; - if (self->index + offset > self->nr_entries - 1) - offset = self->nr_entries - 1 - self->index; - self->index += offset; - self->top_idx += offset; - self->seek(self, +offset, SEEK_CUR); - break; - case NEWT_KEY_PGUP: - if (self->top_idx == 0) - break; - - if (self->top_idx < self->height) - offset = self->top_idx; - else - offset = self->height; - - self->index -= offset; - self->top_idx -= offset; - self->seek(self, -offset, SEEK_CUR); - break; - case NEWT_KEY_HOME: - ui_browser__reset_index(self); - break; - case NEWT_KEY_END: - offset = self->height - 1; - if (offset >= self->nr_entries) - offset = self->nr_entries - 1; - - self->index = self->nr_entries - 1; - self->top_idx = self->index - offset; - self->seek(self, -offset, SEEK_END); - break; - default: - return es.u.key; - } - if (ui_browser__refresh(self) < 0) - return -1; - } - return -1; -} - -unsigned int ui_browser__list_head_refresh(struct ui_browser *self) -{ - struct list_head *pos; - struct list_head *head = self->entries; - int row = 0; - - if (self->top == NULL || self->top == self->entries) - self->top = head->next; - - pos = self->top; - - list_for_each_from(pos, head) { - ui_browser__gotorc(self, row, 0); - self->write(self, pos, row); - if (++row == self->height) - break; - } - - return row; -} - -static struct newtPercentTreeColors { - const char *topColorFg, *topColorBg; - const char *mediumColorFg, *mediumColorBg; - const char *normalColorFg, *normalColorBg; - const char *selColorFg, *selColorBg; - const char *codeColorFg, *codeColorBg; -} defaultPercentTreeColors = { - "red", "lightgray", - "green", "lightgray", - "black", "lightgray", - "lightgray", "magenta", - "blue", "lightgray", -}; - -void ui_browser__init(void) -{ - struct newtPercentTreeColors *c = &defaultPercentTreeColors; - - sltt_set_color(HE_COLORSET_TOP, NULL, c->topColorFg, c->topColorBg); - sltt_set_color(HE_COLORSET_MEDIUM, NULL, c->mediumColorFg, c->mediumColorBg); - sltt_set_color(HE_COLORSET_NORMAL, NULL, c->normalColorFg, c->normalColorBg); - sltt_set_color(HE_COLORSET_SELECTED, NULL, c->selColorFg, c->selColorBg); - sltt_set_color(HE_COLORSET_CODE, NULL, c->codeColorFg, c->codeColorBg); -} diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c deleted file mode 100644 index 0229723aceb3..000000000000 --- a/tools/perf/util/ui/browsers/annotate.c +++ /dev/null @@ -1,302 +0,0 @@ -#include "../browser.h" -#include "../helpline.h" -#include "../libslang.h" -#include "../../annotate.h" -#include "../../hist.h" -#include "../../sort.h" -#include "../../symbol.h" -#include <pthread.h> - -static void ui__error_window(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - newtWinMessagev((char *)"Error", (char *)"Ok", (char *)fmt, ap); - va_end(ap); -} - -struct annotate_browser { - struct ui_browser b; - struct rb_root entries; - struct rb_node *curr_hot; -}; - -struct objdump_line_rb_node { - struct rb_node rb_node; - double percent; - u32 idx; -}; - -static inline -struct objdump_line_rb_node *objdump_line__rb(struct objdump_line *self) -{ - return (struct objdump_line_rb_node *)(self + 1); -} - -static void annotate_browser__write(struct ui_browser *self, void *entry, int row) -{ - struct objdump_line *ol = rb_entry(entry, struct objdump_line, node); - bool current_entry = ui_browser__is_current_entry(self, row); - int width = self->width; - - if (ol->offset != -1) { - struct objdump_line_rb_node *olrb = objdump_line__rb(ol); - ui_browser__set_percent_color(self, olrb->percent, current_entry); - slsmg_printf(" %7.2f ", olrb->percent); - } else { - ui_browser__set_percent_color(self, 0, current_entry); - slsmg_write_nstring(" ", 9); - } - - SLsmg_write_char(':'); - slsmg_write_nstring(" ", 8); - if (!*ol->line) - slsmg_write_nstring(" ", width - 18); - else - slsmg_write_nstring(ol->line, width - 18); - - if (!current_entry) - ui_browser__set_color(self, HE_COLORSET_CODE); -} - -static double objdump_line__calc_percent(struct objdump_line *self, - struct symbol *sym, int evidx) -{ - double percent = 0.0; - - if (self->offset != -1) { - int len = sym->end - sym->start; - unsigned int hits = 0; - struct annotation *notes = symbol__annotation(sym); - struct source_line *src_line = notes->src->lines; - struct sym_hist *h = annotation__histogram(notes, evidx); - s64 offset = self->offset; - struct objdump_line *next; - - next = objdump__get_next_ip_line(¬es->src->source, self); - while (offset < (s64)len && - (next == NULL || offset < next->offset)) { - if (src_line) { - percent += src_line[offset].percent; - } else - hits += h->addr[offset]; - - ++offset; - } - /* - * If the percentage wasn't already calculated in - * symbol__get_source_line, do it now: - */ - if (src_line == NULL && h->sum) - percent = 100.0 * hits / h->sum; - } - - return percent; -} - -static void objdump__insert_line(struct rb_root *self, - struct objdump_line_rb_node *line) -{ - struct rb_node **p = &self->rb_node; - struct rb_node *parent = NULL; - struct objdump_line_rb_node *l; - - while (*p != NULL) { - parent = *p; - l = rb_entry(parent, struct objdump_line_rb_node, rb_node); - if (line->percent < l->percent) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - rb_link_node(&line->rb_node, parent, p); - rb_insert_color(&line->rb_node, self); -} - -static void annotate_browser__set_top(struct annotate_browser *self, - struct rb_node *nd) -{ - struct objdump_line_rb_node *rbpos; - struct objdump_line *pos; - unsigned back; - - ui_browser__refresh_dimensions(&self->b); - back = self->b.height / 2; - rbpos = rb_entry(nd, struct objdump_line_rb_node, rb_node); - pos = ((struct objdump_line *)rbpos) - 1; - self->b.top_idx = self->b.index = rbpos->idx; - - while (self->b.top_idx != 0 && back != 0) { - pos = list_entry(pos->node.prev, struct objdump_line, node); - - --self->b.top_idx; - --back; - } - - self->b.top = pos; - self->curr_hot = nd; -} - -static void annotate_browser__calc_percent(struct annotate_browser *browser, - int evidx) -{ - struct symbol *sym = browser->b.priv; - struct annotation *notes = symbol__annotation(sym); - struct objdump_line *pos; - - browser->entries = RB_ROOT; - - pthread_mutex_lock(¬es->lock); - - list_for_each_entry(pos, ¬es->src->source, node) { - struct objdump_line_rb_node *rbpos = objdump_line__rb(pos); - rbpos->percent = objdump_line__calc_percent(pos, sym, evidx); - if (rbpos->percent < 0.01) { - RB_CLEAR_NODE(&rbpos->rb_node); - continue; - } - objdump__insert_line(&browser->entries, rbpos); - } - pthread_mutex_unlock(¬es->lock); - - browser->curr_hot = rb_last(&browser->entries); -} - -static int annotate_browser__run(struct annotate_browser *self, int evidx, - int refresh) -{ - struct rb_node *nd = NULL; - struct symbol *sym = self->b.priv; - /* - * RIGHT To allow builtin-annotate to cycle thru multiple symbols by - * examining the exit key for this function. - */ - int exit_keys[] = { 'H', NEWT_KEY_TAB, NEWT_KEY_UNTAB, - NEWT_KEY_RIGHT, 0 }; - int key; - - if (ui_browser__show(&self->b, sym->name, - "<-, -> or ESC: exit, TAB/shift+TAB: " - "cycle hottest lines, H: Hottest") < 0) - return -1; - - ui_browser__add_exit_keys(&self->b, exit_keys); - annotate_browser__calc_percent(self, evidx); - - if (self->curr_hot) - annotate_browser__set_top(self, self->curr_hot); - - nd = self->curr_hot; - - if (refresh != 0) - newtFormSetTimer(self->b.form, refresh); - - while (1) { - key = ui_browser__run(&self->b); - - if (refresh != 0) { - annotate_browser__calc_percent(self, evidx); - /* - * Current line focus got out of the list of most active - * lines, NULL it so that if TAB|UNTAB is pressed, we - * move to curr_hot (current hottest line). - */ - if (nd != NULL && RB_EMPTY_NODE(nd)) - nd = NULL; - } - - switch (key) { - case -1: - /* - * FIXME we need to check if it was - * es.reason == NEWT_EXIT_TIMER - */ - if (refresh != 0) - symbol__annotate_decay_histogram(sym, evidx); - continue; - case NEWT_KEY_TAB: - if (nd != NULL) { - nd = rb_prev(nd); - if (nd == NULL) - nd = rb_last(&self->entries); - } else - nd = self->curr_hot; - break; - case NEWT_KEY_UNTAB: - if (nd != NULL) - nd = rb_next(nd); - if (nd == NULL) - nd = rb_first(&self->entries); - else - nd = self->curr_hot; - break; - case 'H': - nd = self->curr_hot; - break; - default: - goto out; - } - - if (nd != NULL) - annotate_browser__set_top(self, nd); - } -out: - ui_browser__hide(&self->b); - return key; -} - -int hist_entry__tui_annotate(struct hist_entry *he, int evidx) -{ - return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, 0); -} - -int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, - int refresh) -{ - struct objdump_line *pos, *n; - struct annotation *notes; - struct annotate_browser browser = { - .b = { - .refresh = ui_browser__list_head_refresh, - .seek = ui_browser__list_head_seek, - .write = annotate_browser__write, - .priv = sym, - }, - }; - int ret; - - if (sym == NULL) - return -1; - - if (map->dso->annotate_warned) - return -1; - - if (symbol__annotate(sym, map, sizeof(struct objdump_line_rb_node)) < 0) { - ui__error_window(ui_helpline__last_msg); - return -1; - } - - ui_helpline__push("Press <- or ESC to exit"); - - notes = symbol__annotation(sym); - - list_for_each_entry(pos, ¬es->src->source, node) { - struct objdump_line_rb_node *rbpos; - size_t line_len = strlen(pos->line); - - if (browser.b.width < line_len) - browser.b.width = line_len; - rbpos = objdump_line__rb(pos); - rbpos->idx = browser.b.nr_entries++; - } - - browser.b.entries = ¬es->src->source, - browser.b.width += 18; /* Percentage */ - ret = annotate_browser__run(&browser, evidx, refresh); - list_for_each_entry_safe(pos, n, ¬es->src->source, node) { - list_del(&pos->node); - objdump_line__free(pos); - } - return ret; -} diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c deleted file mode 100644 index 5d767c622dfc..000000000000 --- a/tools/perf/util/ui/browsers/hists.c +++ /dev/null @@ -1,1138 +0,0 @@ -#define _GNU_SOURCE -#include <stdio.h> -#undef _GNU_SOURCE -#include "../libslang.h" -#include <stdlib.h> -#include <string.h> -#include <newt.h> -#include <linux/rbtree.h> - -#include "../../evsel.h" -#include "../../evlist.h" -#include "../../hist.h" -#include "../../pstack.h" -#include "../../sort.h" -#include "../../util.h" - -#include "../browser.h" -#include "../helpline.h" -#include "../util.h" -#include "map.h" - -struct hist_browser { - struct ui_browser b; - struct hists *hists; - struct hist_entry *he_selection; - struct map_symbol *selection; -}; - -static void hist_browser__refresh_dimensions(struct hist_browser *self) -{ - /* 3 == +/- toggle symbol before actual hist_entry rendering */ - self->b.width = 3 + (hists__sort_list_width(self->hists) + - sizeof("[k]")); -} - -static void hist_browser__reset(struct hist_browser *self) -{ - self->b.nr_entries = self->hists->nr_entries; - hist_browser__refresh_dimensions(self); - ui_browser__reset_index(&self->b); -} - -static char tree__folded_sign(bool unfolded) -{ - return unfolded ? '-' : '+'; -} - -static char map_symbol__folded(const struct map_symbol *self) -{ - return self->has_children ? tree__folded_sign(self->unfolded) : ' '; -} - -static char hist_entry__folded(const struct hist_entry *self) -{ - return map_symbol__folded(&self->ms); -} - -static char callchain_list__folded(const struct callchain_list *self) -{ - return map_symbol__folded(&self->ms); -} - -static void map_symbol__set_folding(struct map_symbol *self, bool unfold) -{ - self->unfolded = unfold ? self->has_children : false; -} - -static int callchain_node__count_rows_rb_tree(struct callchain_node *self) -{ - int n = 0; - struct rb_node *nd; - - for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) { - struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); - struct callchain_list *chain; - char folded_sign = ' '; /* No children */ - - list_for_each_entry(chain, &child->val, list) { - ++n; - /* We need this because we may not have children */ - folded_sign = callchain_list__folded(chain); - if (folded_sign == '+') - break; - } - - if (folded_sign == '-') /* Have children and they're unfolded */ - n += callchain_node__count_rows_rb_tree(child); - } - - return n; -} - -static int callchain_node__count_rows(struct callchain_node *node) -{ - struct callchain_list *chain; - bool unfolded = false; - int n = 0; - - list_for_each_entry(chain, &node->val, list) { - ++n; - unfolded = chain->ms.unfolded; - } - - if (unfolded) - n += callchain_node__count_rows_rb_tree(node); - - return n; -} - -static int callchain__count_rows(struct rb_root *chain) -{ - struct rb_node *nd; - int n = 0; - - for (nd = rb_first(chain); nd; nd = rb_next(nd)) { - struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); - n += callchain_node__count_rows(node); - } - - return n; -} - -static bool map_symbol__toggle_fold(struct map_symbol *self) -{ - if (!self->has_children) - return false; - - self->unfolded = !self->unfolded; - return true; -} - -static void callchain_node__init_have_children_rb_tree(struct callchain_node *self) -{ - struct rb_node *nd = rb_first(&self->rb_root); - - for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) { - struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); - struct callchain_list *chain; - bool first = true; - - list_for_each_entry(chain, &child->val, list) { - if (first) { - first = false; - chain->ms.has_children = chain->list.next != &child->val || - !RB_EMPTY_ROOT(&child->rb_root); - } else - chain->ms.has_children = chain->list.next == &child->val && - !RB_EMPTY_ROOT(&child->rb_root); - } - - callchain_node__init_have_children_rb_tree(child); - } -} - -static void callchain_node__init_have_children(struct callchain_node *self) -{ - struct callchain_list *chain; - - list_for_each_entry(chain, &self->val, list) - chain->ms.has_children = !RB_EMPTY_ROOT(&self->rb_root); - - callchain_node__init_have_children_rb_tree(self); -} - -static void callchain__init_have_children(struct rb_root *self) -{ - struct rb_node *nd; - - for (nd = rb_first(self); nd; nd = rb_next(nd)) { - struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); - callchain_node__init_have_children(node); - } -} - -static void hist_entry__init_have_children(struct hist_entry *self) -{ - if (!self->init_have_children) { - self->ms.has_children = !RB_EMPTY_ROOT(&self->sorted_chain); - callchain__init_have_children(&self->sorted_chain); - self->init_have_children = true; - } -} - -static bool hist_browser__toggle_fold(struct hist_browser *self) -{ - if (map_symbol__toggle_fold(self->selection)) { - struct hist_entry *he = self->he_selection; - - hist_entry__init_have_children(he); - self->hists->nr_entries -= he->nr_rows; - - if (he->ms.unfolded) - he->nr_rows = callchain__count_rows(&he->sorted_chain); - else - he->nr_rows = 0; - self->hists->nr_entries += he->nr_rows; - self->b.nr_entries = self->hists->nr_entries; - - return true; - } - - /* If it doesn't have children, no toggling performed */ - return false; -} - -static int callchain_node__set_folding_rb_tree(struct callchain_node *self, bool unfold) -{ - int n = 0; - struct rb_node *nd; - - for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) { - struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); - struct callchain_list *chain; - bool has_children = false; - - list_for_each_entry(chain, &child->val, list) { - ++n; - map_symbol__set_folding(&chain->ms, unfold); - has_children = chain->ms.has_children; - } - - if (has_children) - n += callchain_node__set_folding_rb_tree(child, unfold); - } - - return n; -} - -static int callchain_node__set_folding(struct callchain_node *node, bool unfold) -{ - struct callchain_list *chain; - bool has_children = false; - int n = 0; - - list_for_each_entry(chain, &node->val, list) { - ++n; - map_symbol__set_folding(&chain->ms, unfold); - has_children = chain->ms.has_children; - } - - if (has_children) - n += callchain_node__set_folding_rb_tree(node, unfold); - - return n; -} - -static int callchain__set_folding(struct rb_root *chain, bool unfold) -{ - struct rb_node *nd; - int n = 0; - - for (nd = rb_first(chain); nd; nd = rb_next(nd)) { - struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); - n += callchain_node__set_folding(node, unfold); - } - - return n; -} - -static void hist_entry__set_folding(struct hist_entry *self, bool unfold) -{ - hist_entry__init_have_children(self); - map_symbol__set_folding(&self->ms, unfold); - - if (self->ms.has_children) { - int n = callchain__set_folding(&self->sorted_chain, unfold); - self->nr_rows = unfold ? n : 0; - } else - self->nr_rows = 0; -} - -static void hists__set_folding(struct hists *self, bool unfold) -{ - struct rb_node *nd; - - self->nr_entries = 0; - - for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { - struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); - hist_entry__set_folding(he, unfold); - self->nr_entries += 1 + he->nr_rows; - } -} - -static void hist_browser__set_folding(struct hist_browser *self, bool unfold) -{ - hists__set_folding(self->hists, unfold); - self->b.nr_entries = self->hists->nr_entries; - /* Go to the start, we may be way after valid entries after a collapse */ - ui_browser__reset_index(&self->b); -} - -static int hist_browser__run(struct hist_browser *self, const char *title) -{ - int key; - int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't', - NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, - NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0, }; - - self->b.entries = &self->hists->entries; - self->b.nr_entries = self->hists->nr_entries; - - hist_browser__refresh_dimensions(self); - - if (ui_browser__show(&self->b, title, - "Press '?' for help on key bindings") < 0) - return -1; - - ui_browser__add_exit_keys(&self->b, exit_keys); - - while (1) { - key = ui_browser__run(&self->b); - - switch (key) { - case 'D': { /* Debug */ - static int seq; - struct hist_entry *h = rb_entry(self->b.top, - struct hist_entry, rb_node); - ui_helpline__pop(); - ui_helpline__fpush("%d: nr_ent=(%d,%d), height=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d", - seq++, self->b.nr_entries, - self->hists->nr_entries, - self->b.height, - self->b.index, - self->b.top_idx, - h->row_offset, h->nr_rows); - } - break; - case 'C': - /* Collapse the whole world. */ - hist_browser__set_folding(self, false); - break; - case 'E': - /* Expand the whole world. */ - hist_browser__set_folding(self, true); - break; - case NEWT_KEY_ENTER: - if (hist_browser__toggle_fold(self)) - break; - /* fall thru */ - default: - goto out; - } - } -out: - ui_browser__hide(&self->b); - return key; -} - -static char *callchain_list__sym_name(struct callchain_list *self, - char *bf, size_t bfsize) -{ - if (self->ms.sym) - return self->ms.sym->name; - - snprintf(bf, bfsize, "%#" PRIx64, self->ip); - return bf; -} - -#define LEVEL_OFFSET_STEP 3 - -static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self, - struct callchain_node *chain_node, - u64 total, int level, - unsigned short row, - off_t *row_offset, - bool *is_current_entry) -{ - struct rb_node *node; - int first_row = row, width, offset = level * LEVEL_OFFSET_STEP; - u64 new_total, remaining; - - if (callchain_param.mode == CHAIN_GRAPH_REL) - new_total = chain_node->children_hit; - else - new_total = total; - - remaining = new_total; - node = rb_first(&chain_node->rb_root); - while (node) { - struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); - struct rb_node *next = rb_next(node); - u64 cumul = callchain_cumul_hits(child); - struct callchain_list *chain; - char folded_sign = ' '; - int first = true; - int extra_offset = 0; - - remaining -= cumul; - - list_for_each_entry(chain, &child->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str; - const char *str; - int color; - bool was_first = first; - - if (first) - first = false; - else - extra_offset = LEVEL_OFFSET_STEP; - - folded_sign = callchain_list__folded(chain); - if (*row_offset != 0) { - --*row_offset; - goto do_next; - } - - alloc_str = NULL; - str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); - if (was_first) { - double percent = cumul * 100.0 / new_total; - - if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0) - str = "Not enough memory!"; - else - str = alloc_str; - } - - color = HE_COLORSET_NORMAL; - width = self->b.width - (offset + extra_offset + 2); - if (ui_browser__is_current_entry(&self->b, row)) { - self->selection = &chain->ms; - color = HE_COLORSET_SELECTED; - *is_current_entry = true; - } - - ui_browser__set_color(&self->b, color); - ui_browser__gotorc(&self->b, row, 0); - slsmg_write_nstring(" ", offset + extra_offset); - slsmg_printf("%c ", folded_sign); - slsmg_write_nstring(str, width); - free(alloc_str); - - if (++row == self->b.height) - goto out; -do_next: - if (folded_sign == '+') - break; - } - - if (folded_sign == '-') { - const int new_level = level + (extra_offset ? 2 : 1); - row += hist_browser__show_callchain_node_rb_tree(self, child, new_total, - new_level, row, row_offset, - is_current_entry); - } - if (row == self->b.height) - goto out; - node = next; - } -out: - return row - first_row; -} - -static int hist_browser__show_callchain_node(struct hist_browser *self, - struct callchain_node *node, - int level, unsigned short row, - off_t *row_offset, - bool *is_current_entry) -{ - struct callchain_list *chain; - int first_row = row, - offset = level * LEVEL_OFFSET_STEP, - width = self->b.width - offset; - char folded_sign = ' '; - - list_for_each_entry(chain, &node->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *s; - int color; - - folded_sign = callchain_list__folded(chain); - - if (*row_offset != 0) { - --*row_offset; - continue; - } - - color = HE_COLORSET_NORMAL; - if (ui_browser__is_current_entry(&self->b, row)) { - self->selection = &chain->ms; - color = HE_COLORSET_SELECTED; - *is_current_entry = true; - } - - s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); - ui_browser__gotorc(&self->b, row, 0); - ui_browser__set_color(&self->b, color); - slsmg_write_nstring(" ", offset); - slsmg_printf("%c ", folded_sign); - slsmg_write_nstring(s, width - 2); - - if (++row == self->b.height) - goto out; - } - - if (folded_sign == '-') - row += hist_browser__show_callchain_node_rb_tree(self, node, - self->hists->stats.total_period, - level + 1, row, - row_offset, - is_current_entry); -out: - return row - first_row; -} - -static int hist_browser__show_callchain(struct hist_browser *self, - struct rb_root *chain, - int level, unsigned short row, - off_t *row_offset, - bool *is_current_entry) -{ - struct rb_node *nd; - int first_row = row; - - for (nd = rb_first(chain); nd; nd = rb_next(nd)) { - struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); - - row += hist_browser__show_callchain_node(self, node, level, - row, row_offset, - is_current_entry); - if (row == self->b.height) - break; - } - - return row - first_row; -} - -static int hist_browser__show_entry(struct hist_browser *self, - struct hist_entry *entry, - unsigned short row) -{ - char s[256]; - double percent; - int printed = 0; - int color, width = self->b.width; - char folded_sign = ' '; - bool current_entry = ui_browser__is_current_entry(&self->b, row); - off_t row_offset = entry->row_offset; - - if (current_entry) { - self->he_selection = entry; - self->selection = &entry->ms; - } - - if (symbol_conf.use_callchain) { - hist_entry__init_have_children(entry); - folded_sign = hist_entry__folded(entry); - } - - if (row_offset == 0) { - hist_entry__snprintf(entry, s, sizeof(s), self->hists, NULL, false, - 0, false, self->hists->stats.total_period); - percent = (entry->period * 100.0) / self->hists->stats.total_period; - - color = HE_COLORSET_SELECTED; - if (!current_entry) { - if (percent >= MIN_RED) - color = HE_COLORSET_TOP; - else if (percent >= MIN_GREEN) - color = HE_COLORSET_MEDIUM; - else - color = HE_COLORSET_NORMAL; - } - - ui_browser__set_color(&self->b, color); - ui_browser__gotorc(&self->b, row, 0); - if (symbol_conf.use_callchain) { - slsmg_printf("%c ", folded_sign); - width -= 2; - } - slsmg_write_nstring(s, width); - ++row; - ++printed; - } else - --row_offset; - - if (folded_sign == '-' && row != self->b.height) { - printed += hist_browser__show_callchain(self, &entry->sorted_chain, - 1, row, &row_offset, - ¤t_entry); - if (current_entry) - self->he_selection = entry; - } - - return printed; -} - -static unsigned int hist_browser__refresh(struct ui_browser *self) -{ - unsigned row = 0; - struct rb_node *nd; - struct hist_browser *hb = container_of(self, struct hist_browser, b); - - if (self->top == NULL) - self->top = rb_first(&hb->hists->entries); - - for (nd = self->top; nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (h->filtered) - continue; - - row += hist_browser__show_entry(hb, h, row); - if (row == self->height) - break; - } - - return row; -} - -static struct rb_node *hists__filter_entries(struct rb_node *nd) -{ - while (nd != NULL) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - if (!h->filtered) - return nd; - - nd = rb_next(nd); - } - - return NULL; -} - -static struct rb_node *hists__filter_prev_entries(struct rb_node *nd) -{ - while (nd != NULL) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - if (!h->filtered) - return nd; - - nd = rb_prev(nd); - } - - return NULL; -} - -static void ui_browser__hists_seek(struct ui_browser *self, - off_t offset, int whence) -{ - struct hist_entry *h; - struct rb_node *nd; - bool first = true; - - if (self->nr_entries == 0) - return; - - switch (whence) { - case SEEK_SET: - nd = hists__filter_entries(rb_first(self->entries)); - break; - case SEEK_CUR: - nd = self->top; - goto do_offset; - case SEEK_END: - nd = hists__filter_prev_entries(rb_last(self->entries)); - first = false; - break; - default: - return; - } - - /* - * Moves not relative to the first visible entry invalidates its - * row_offset: - */ - h = rb_entry(self->top, struct hist_entry, rb_node); - h->row_offset = 0; - - /* - * Here we have to check if nd is expanded (+), if it is we can't go - * the next top level hist_entry, instead we must compute an offset of - * what _not_ to show and not change the first visible entry. - * - * This offset increments when we are going from top to bottom and - * decreases when we're going from bottom to top. - * - * As we don't have backpointers to the top level in the callchains - * structure, we need to always print the whole hist_entry callchain, - * skipping the first ones that are before the first visible entry - * and stop when we printed enough lines to fill the screen. - */ -do_offset: - if (offset > 0) { - do { - h = rb_entry(nd, struct hist_entry, rb_node); - if (h->ms.unfolded) { - u16 remaining = h->nr_rows - h->row_offset; - if (offset > remaining) { - offset -= remaining; - h->row_offset = 0; - } else { - h->row_offset += offset; - offset = 0; - self->top = nd; - break; - } - } - nd = hists__filter_entries(rb_next(nd)); - if (nd == NULL) - break; - --offset; - self->top = nd; - } while (offset != 0); - } else if (offset < 0) { - while (1) { - h = rb_entry(nd, struct hist_entry, rb_node); - if (h->ms.unfolded) { - if (first) { - if (-offset > h->row_offset) { - offset += h->row_offset; - h->row_offset = 0; - } else { - h->row_offset += offset; - offset = 0; - self->top = nd; - break; - } - } else { - if (-offset > h->nr_rows) { - offset += h->nr_rows; - h->row_offset = 0; - } else { - h->row_offset = h->nr_rows + offset; - offset = 0; - self->top = nd; - break; - } - } - } - - nd = hists__filter_prev_entries(rb_prev(nd)); - if (nd == NULL) - break; - ++offset; - self->top = nd; - if (offset == 0) { - /* - * Last unfiltered hist_entry, check if it is - * unfolded, if it is then we should have - * row_offset at its last entry. - */ - h = rb_entry(nd, struct hist_entry, rb_node); - if (h->ms.unfolded) - h->row_offset = h->nr_rows; - break; - } - first = false; - } - } else { - self->top = nd; - h = rb_entry(nd, struct hist_entry, rb_node); - h->row_offset = 0; - } -} - -static struct hist_browser *hist_browser__new(struct hists *hists) -{ - struct hist_browser *self = zalloc(sizeof(*self)); - - if (self) { - self->hists = hists; - self->b.refresh = hist_browser__refresh; - self->b.seek = ui_browser__hists_seek; - } - - return self; -} - -static void hist_browser__delete(struct hist_browser *self) -{ - free(self); -} - -static struct hist_entry *hist_browser__selected_entry(struct hist_browser *self) -{ - return self->he_selection; -} - -static struct thread *hist_browser__selected_thread(struct hist_browser *self) -{ - return self->he_selection->thread; -} - -static int hists__browser_title(struct hists *self, char *bf, size_t size, - const char *ev_name, const struct dso *dso, - const struct thread *thread) -{ - char unit; - int printed; - unsigned long nr_events = self->stats.nr_events[PERF_RECORD_SAMPLE]; - - nr_events = convert_unit(nr_events, &unit); - printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name); - - if (thread) - printed += snprintf(bf + printed, size - printed, - ", Thread: %s(%d)", - (thread->comm_set ? thread->comm : ""), - thread->pid); - if (dso) - printed += snprintf(bf + printed, size - printed, - ", DSO: %s", dso->short_name); - return printed; -} - -static int perf_evsel__hists_browse(struct perf_evsel *evsel, - const char *helpline, const char *ev_name, - bool left_exits) -{ - struct hists *self = &evsel->hists; - struct hist_browser *browser = hist_browser__new(self); - struct pstack *fstack; - const struct thread *thread_filter = NULL; - const struct dso *dso_filter = NULL; - char msg[160]; - int key = -1; - - if (browser == NULL) - return -1; - - fstack = pstack__new(2); - if (fstack == NULL) - goto out; - - ui_helpline__push(helpline); - - hists__browser_title(self, msg, sizeof(msg), ev_name, - dso_filter, thread_filter); - while (1) { - const struct thread *thread = NULL; - const struct dso *dso = NULL; - char *options[16]; - int nr_options = 0, choice = 0, i, - annotate = -2, zoom_dso = -2, zoom_thread = -2, - browse_map = -2; - - key = hist_browser__run(browser, msg); - - if (browser->he_selection != NULL) { - thread = hist_browser__selected_thread(browser); - dso = browser->selection->map ? browser->selection->map->dso : NULL; - } - - switch (key) { - case NEWT_KEY_TAB: - case NEWT_KEY_UNTAB: - /* - * Exit the browser, let hists__browser_tree - * go to the next or previous - */ - goto out_free_stack; - case 'a': - if (browser->selection == NULL || - browser->selection->sym == NULL || - browser->selection->map->dso->annotate_warned) - continue; - goto do_annotate; - case 'd': - goto zoom_dso; - case 't': - goto zoom_thread; - case NEWT_KEY_F1: - case 'h': - case '?': - ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n" - "<- Zoom out\n" - "a Annotate current symbol\n" - "h/?/F1 Show this window\n" - "C Collapse all callchains\n" - "E Expand all callchains\n" - "d Zoom into current DSO\n" - "t Zoom into current Thread\n" - "TAB/UNTAB Switch events\n" - "q/CTRL+C Exit browser"); - continue; - case NEWT_KEY_ENTER: - case NEWT_KEY_RIGHT: - /* menu */ - break; - case NEWT_KEY_LEFT: { - const void *top; - - if (pstack__empty(fstack)) { - /* - * Go back to the perf_evsel_menu__run or other user - */ - if (left_exits) - goto out_free_stack; - continue; - } - top = pstack__pop(fstack); - if (top == &dso_filter) - goto zoom_out_dso; - if (top == &thread_filter) - goto zoom_out_thread; - continue; - } - case NEWT_KEY_ESCAPE: - if (!left_exits && - !ui__dialog_yesno("Do you really want to exit?")) - continue; - /* Fall thru */ - default: - goto out_free_stack; - } - - if (browser->selection != NULL && - browser->selection->sym != NULL && - !browser->selection->map->dso->annotate_warned && - asprintf(&options[nr_options], "Annotate %s", - browser->selection->sym->name) > 0) - annotate = nr_options++; - - if (thread != NULL && - asprintf(&options[nr_options], "Zoom %s %s(%d) thread", - (thread_filter ? "out of" : "into"), - (thread->comm_set ? thread->comm : ""), - thread->pid) > 0) - zoom_thread = nr_options++; - - if (dso != NULL && - asprintf(&options[nr_options], "Zoom %s %s DSO", - (dso_filter ? "out of" : "into"), - (dso->kernel ? "the Kernel" : dso->short_name)) > 0) - zoom_dso = nr_options++; - - if (browser->selection != NULL && - browser->selection->map != NULL && - asprintf(&options[nr_options], "Browse map details") > 0) - browse_map = nr_options++; - - options[nr_options++] = (char *)"Exit"; - - choice = ui__popup_menu(nr_options, options); - - for (i = 0; i < nr_options - 1; ++i) - free(options[i]); - - if (choice == nr_options - 1) - break; - - if (choice == -1) - continue; - - if (choice == annotate) { - struct hist_entry *he; -do_annotate: - he = hist_browser__selected_entry(browser); - if (he == NULL) - continue; - - hist_entry__tui_annotate(he, evsel->idx); - } else if (choice == browse_map) - map__browse(browser->selection->map); - else if (choice == zoom_dso) { -zoom_dso: - if (dso_filter) { - pstack__remove(fstack, &dso_filter); -zoom_out_dso: - ui_helpline__pop(); - dso_filter = NULL; - } else { - if (dso == NULL) - continue; - ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", - dso->kernel ? "the Kernel" : dso->short_name); - dso_filter = dso; - pstack__push(fstack, &dso_filter); - } - hists__filter_by_dso(self, dso_filter); - hists__browser_title(self, msg, sizeof(msg), ev_name, - dso_filter, thread_filter); - hist_browser__reset(browser); - } else if (choice == zoom_thread) { -zoom_thread: - if (thread_filter) { - pstack__remove(fstack, &thread_filter); -zoom_out_thread: - ui_helpline__pop(); - thread_filter = NULL; - } else { - ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", - thread->comm_set ? thread->comm : "", - thread->pid); - thread_filter = thread; - pstack__push(fstack, &thread_filter); - } - hists__filter_by_thread(self, thread_filter); - hists__browser_title(self, msg, sizeof(msg), ev_name, - dso_filter, thread_filter); - hist_browser__reset(browser); - } - } -out_free_stack: - pstack__delete(fstack); -out: - hist_browser__delete(browser); - return key; -} - -struct perf_evsel_menu { - struct ui_browser b; - struct perf_evsel *selection; -}; - -static void perf_evsel_menu__write(struct ui_browser *browser, - void *entry, int row) -{ - struct perf_evsel_menu *menu = container_of(browser, - struct perf_evsel_menu, b); - struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node); - bool current_entry = ui_browser__is_current_entry(browser, row); - unsigned long nr_events = evsel->hists.stats.nr_events[PERF_RECORD_SAMPLE]; - const char *ev_name = event_name(evsel); - char bf[256], unit; - - ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : - HE_COLORSET_NORMAL); - - nr_events = convert_unit(nr_events, &unit); - snprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events, - unit, unit == ' ' ? "" : " ", ev_name); - slsmg_write_nstring(bf, browser->width); - - if (current_entry) - menu->selection = evsel; -} - -static int perf_evsel_menu__run(struct perf_evsel_menu *menu, const char *help) -{ - int exit_keys[] = { NEWT_KEY_ENTER, NEWT_KEY_RIGHT, 0, }; - struct perf_evlist *evlist = menu->b.priv; - struct perf_evsel *pos; - const char *ev_name, *title = "Available samples"; - int key; - - if (ui_browser__show(&menu->b, title, - "ESC: exit, ENTER|->: Browse histograms") < 0) - return -1; - - ui_browser__add_exit_keys(&menu->b, exit_keys); - - while (1) { - key = ui_browser__run(&menu->b); - - switch (key) { - case NEWT_KEY_RIGHT: - case NEWT_KEY_ENTER: - if (!menu->selection) - continue; - pos = menu->selection; -browse_hists: - ev_name = event_name(pos); - key = perf_evsel__hists_browse(pos, help, ev_name, true); - ui_browser__show_title(&menu->b, title); - break; - case NEWT_KEY_LEFT: - continue; - case NEWT_KEY_ESCAPE: - if (!ui__dialog_yesno("Do you really want to exit?")) - continue; - /* Fall thru */ - default: - goto out; - } - - switch (key) { - case NEWT_KEY_TAB: - if (pos->node.next == &evlist->entries) - pos = list_entry(evlist->entries.next, struct perf_evsel, node); - else - pos = list_entry(pos->node.next, struct perf_evsel, node); - goto browse_hists; - case NEWT_KEY_UNTAB: - if (pos->node.prev == &evlist->entries) - pos = list_entry(evlist->entries.prev, struct perf_evsel, node); - else - pos = list_entry(pos->node.prev, struct perf_evsel, node); - goto browse_hists; - case 'q': - case CTRL('c'): - goto out; - default: - break; - } - } - -out: - ui_browser__hide(&menu->b); - return key; -} - -static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, - const char *help) -{ - struct perf_evsel *pos; - struct perf_evsel_menu menu = { - .b = { - .entries = &evlist->entries, - .refresh = ui_browser__list_head_refresh, - .seek = ui_browser__list_head_seek, - .write = perf_evsel_menu__write, - .nr_entries = evlist->nr_entries, - .priv = evlist, - }, - }; - - ui_helpline__push("Press ESC to exit"); - - list_for_each_entry(pos, &evlist->entries, node) { - const char *ev_name = event_name(pos); - size_t line_len = strlen(ev_name) + 7; - - if (menu.b.width < line_len) - menu.b.width = line_len; - /* - * Cache the evsel name, tracepoints have a _high_ cost per - * event_name() call. - */ - if (pos->name == NULL) - pos->name = strdup(ev_name); - } - - return perf_evsel_menu__run(&menu, help); -} - -int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help) -{ - - if (evlist->nr_entries == 1) { - struct perf_evsel *first = list_entry(evlist->entries.next, - struct perf_evsel, node); - const char *ev_name = event_name(first); - return perf_evsel__hists_browse(first, help, ev_name, false); - } - - return __perf_evlist__tui_browse_hists(evlist, help); -} diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c deleted file mode 100644 index 88403cf8396a..000000000000 --- a/tools/perf/util/ui/browsers/top.c +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Parts came from builtin-{top,stat,record}.c, see those files for further - * copyright notes. - * - * Released under the GPL v2. (and only v2, not any later version) - */ -#include "../browser.h" -#include "../../annotate.h" -#include "../helpline.h" -#include "../libslang.h" -#include "../util.h" -#include "../../evlist.h" -#include "../../hist.h" -#include "../../sort.h" -#include "../../symbol.h" -#include "../../top.h" - -struct perf_top_browser { - struct ui_browser b; - struct rb_root root; - struct sym_entry *selection; - float sum_ksamples; - int dso_width; - int dso_short_width; - int sym_width; -}; - -static void perf_top_browser__write(struct ui_browser *browser, void *entry, int row) -{ - struct perf_top_browser *top_browser = container_of(browser, struct perf_top_browser, b); - struct sym_entry *syme = rb_entry(entry, struct sym_entry, rb_node); - bool current_entry = ui_browser__is_current_entry(browser, row); - struct symbol *symbol = sym_entry__symbol(syme); - struct perf_top *top = browser->priv; - int width = browser->width; - double pcnt; - - pcnt = 100.0 - (100.0 * ((top_browser->sum_ksamples - syme->snap_count) / - top_browser->sum_ksamples)); - ui_browser__set_percent_color(browser, pcnt, current_entry); - - if (top->evlist->nr_entries == 1 || !top->display_weighted) { - slsmg_printf("%20.2f ", syme->weight); - width -= 24; - } else { - slsmg_printf("%9.1f %10ld ", syme->weight, syme->snap_count); - width -= 23; - } - - slsmg_printf("%4.1f%%", pcnt); - width -= 7; - - if (verbose) { - slsmg_printf(" %016" PRIx64, symbol->start); - width -= 17; - } - - slsmg_printf(" %-*.*s ", top_browser->sym_width, top_browser->sym_width, - symbol->name); - width -= top_browser->sym_width; - slsmg_write_nstring(width >= syme->map->dso->long_name_len ? - syme->map->dso->long_name : - syme->map->dso->short_name, width); - - if (current_entry) - top_browser->selection = syme; -} - -static void perf_top_browser__update_rb_tree(struct perf_top_browser *browser) -{ - struct perf_top *top = browser->b.priv; - u64 top_idx = browser->b.top_idx; - - browser->root = RB_ROOT; - browser->b.top = NULL; - browser->sum_ksamples = perf_top__decay_samples(top, &browser->root); - /* - * No active symbols - */ - if (top->rb_entries == 0) - return; - - perf_top__find_widths(top, &browser->root, &browser->dso_width, - &browser->dso_short_width, - &browser->sym_width); - if (browser->sym_width + browser->dso_width > browser->b.width - 29) { - browser->dso_width = browser->dso_short_width; - if (browser->sym_width + browser->dso_width > browser->b.width - 29) - browser->sym_width = browser->b.width - browser->dso_width - 29; - } - - /* - * Adjust the ui_browser indexes since the entries in the browser->root - * rb_tree may have changed, then seek it from start, so that we get a - * possible new top of the screen. - */ - browser->b.nr_entries = top->rb_entries; - - if (top_idx >= browser->b.nr_entries) { - if (browser->b.height >= browser->b.nr_entries) - top_idx = browser->b.nr_entries - browser->b.height; - else - top_idx = 0; - } - - if (browser->b.index >= top_idx + browser->b.height) - browser->b.index = top_idx + browser->b.index - browser->b.top_idx; - - if (browser->b.index >= browser->b.nr_entries) - browser->b.index = browser->b.nr_entries - 1; - - browser->b.top_idx = top_idx; - browser->b.seek(&browser->b, top_idx, SEEK_SET); -} - -static void perf_top_browser__annotate(struct perf_top_browser *browser) -{ - struct sym_entry *syme = browser->selection; - struct symbol *sym = sym_entry__symbol(syme); - struct annotation *notes = symbol__annotation(sym); - struct perf_top *top = browser->b.priv; - - if (notes->src != NULL) - goto do_annotation; - - pthread_mutex_lock(¬es->lock); - - top->sym_filter_entry = NULL; - - if (symbol__alloc_hist(sym, top->evlist->nr_entries) < 0) { - pr_err("Not enough memory for annotating '%s' symbol!\n", - sym->name); - pthread_mutex_unlock(¬es->lock); - return; - } - - top->sym_filter_entry = syme; - - pthread_mutex_unlock(¬es->lock); -do_annotation: - symbol__tui_annotate(sym, syme->map, 0, top->delay_secs * 1000); -} - -static int perf_top_browser__run(struct perf_top_browser *browser) -{ - int key; - char title[160]; - struct perf_top *top = browser->b.priv; - int delay_msecs = top->delay_secs * 1000; - int exit_keys[] = { 'a', NEWT_KEY_ENTER, NEWT_KEY_RIGHT, 0, }; - - perf_top_browser__update_rb_tree(browser); - perf_top__header_snprintf(top, title, sizeof(title)); - perf_top__reset_sample_counters(top); - - if (ui_browser__show(&browser->b, title, - "ESC: exit, ENTER|->|a: Live Annotate") < 0) - return -1; - - newtFormSetTimer(browser->b.form, delay_msecs); - ui_browser__add_exit_keys(&browser->b, exit_keys); - - while (1) { - key = ui_browser__run(&browser->b); - - switch (key) { - case -1: - /* FIXME we need to check if it was es.reason == NEWT_EXIT_TIMER */ - perf_top_browser__update_rb_tree(browser); - perf_top__header_snprintf(top, title, sizeof(title)); - perf_top__reset_sample_counters(top); - ui_browser__set_color(&browser->b, NEWT_COLORSET_ROOT); - SLsmg_gotorc(0, 0); - slsmg_write_nstring(title, browser->b.width); - break; - case 'a': - case NEWT_KEY_RIGHT: - case NEWT_KEY_ENTER: - if (browser->selection) - perf_top_browser__annotate(browser); - break; - case NEWT_KEY_LEFT: - continue; - case NEWT_KEY_ESCAPE: - if (!ui__dialog_yesno("Do you really want to exit?")) - continue; - /* Fall thru */ - default: - goto out; - } - } -out: - ui_browser__hide(&browser->b); - return key; -} - -int perf_top__tui_browser(struct perf_top *top) -{ - struct perf_top_browser browser = { - .b = { - .entries = &browser.root, - .refresh = ui_browser__rb_tree_refresh, - .seek = ui_browser__rb_tree_seek, - .write = perf_top_browser__write, - .priv = top, - }, - }; - - return perf_top_browser__run(&browser); -} diff --git a/tools/perf/util/ui/progress.c b/tools/perf/util/ui/progress.c deleted file mode 100644 index d7fc399d36b3..000000000000 --- a/tools/perf/util/ui/progress.c +++ /dev/null @@ -1,60 +0,0 @@ -#include <stdlib.h> -#include <newt.h> -#include "../cache.h" -#include "progress.h" - -struct ui_progress { - newtComponent form, scale; -}; - -struct ui_progress *ui_progress__new(const char *title, u64 total) -{ - struct ui_progress *self = malloc(sizeof(*self)); - - if (self != NULL) { - int cols; - - if (use_browser <= 0) - return self; - newtGetScreenSize(&cols, NULL); - cols -= 4; - newtCenteredWindow(cols, 1, title); - self->form = newtForm(NULL, NULL, 0); - if (self->form == NULL) - goto out_free_self; - self->scale = newtScale(0, 0, cols, total); - if (self->scale == NULL) - goto out_free_form; - newtFormAddComponent(self->form, self->scale); - newtRefresh(); - } - - return self; - -out_free_form: - newtFormDestroy(self->form); -out_free_self: - free(self); - return NULL; -} - -void ui_progress__update(struct ui_progress *self, u64 curr) -{ - /* - * FIXME: We should have a per UI backend way of showing progress, - * stdio will just show a percentage as NN%, etc. - */ - if (use_browser <= 0) - return; - newtScaleSet(self->scale, curr); - newtRefresh(); -} - -void ui_progress__delete(struct ui_progress *self) -{ - if (use_browser > 0) { - newtFormDestroy(self->form); - newtPopWindow(); - } - free(self); -} diff --git a/tools/perf/util/ui/progress.h b/tools/perf/util/ui/progress.h deleted file mode 100644 index a3820a0beb5b..000000000000 --- a/tools/perf/util/ui/progress.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _PERF_UI_PROGRESS_H_ -#define _PERF_UI_PROGRESS_H_ 1 - -struct ui_progress; - -struct ui_progress *ui_progress__new(const char *title, u64 total); -void ui_progress__delete(struct ui_progress *self); - -void ui_progress__update(struct ui_progress *self, u64 curr); - -#endif diff --git a/tools/perf/util/ui/setup.c b/tools/perf/util/ui/setup.c deleted file mode 100644 index ee46d671db59..000000000000 --- a/tools/perf/util/ui/setup.c +++ /dev/null @@ -1,46 +0,0 @@ -#include <newt.h> -#include <signal.h> -#include <stdbool.h> - -#include "../cache.h" -#include "../debug.h" -#include "browser.h" -#include "helpline.h" -#include "ui.h" - -pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; - -static void newt_suspend(void *d __used) -{ - newtSuspend(); - raise(SIGTSTP); - newtResume(); -} - -void setup_browser(bool fallback_to_pager) -{ - if (!isatty(1) || !use_browser || dump_trace) { - use_browser = 0; - if (fallback_to_pager) - setup_pager(); - return; - } - - use_browser = 1; - newtInit(); - newtCls(); - newtSetSuspendCallback(newt_suspend, NULL); - ui_helpline__init(); - ui_browser__init(); -} - -void exit_browser(bool wait_for_ok) -{ - if (use_browser > 0) { - if (wait_for_ok) { - char title[] = "Fatal Error", ok[] = "Ok"; - newtWinMessage(title, ok, ui_helpline__last_msg); - } - newtFinished(); - } -} diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c deleted file mode 100644 index fdf1fc8f08bc..000000000000 --- a/tools/perf/util/ui/util.c +++ /dev/null @@ -1,130 +0,0 @@ -#include <newt.h> -#include <signal.h> -#include <stdio.h> -#include <stdbool.h> -#include <string.h> -#include <sys/ttydefaults.h> - -#include "../cache.h" -#include "../debug.h" -#include "browser.h" -#include "helpline.h" -#include "ui.h" -#include "util.h" - -static void newt_form__set_exit_keys(newtComponent self) -{ - newtFormAddHotKey(self, NEWT_KEY_LEFT); - newtFormAddHotKey(self, NEWT_KEY_ESCAPE); - newtFormAddHotKey(self, 'Q'); - newtFormAddHotKey(self, 'q'); - newtFormAddHotKey(self, CTRL('c')); -} - -static newtComponent newt_form__new(void) -{ - newtComponent self = newtForm(NULL, NULL, 0); - if (self) - newt_form__set_exit_keys(self); - return self; -} - -int ui__popup_menu(int argc, char * const argv[]) -{ - struct newtExitStruct es; - int i, rc = -1, max_len = 5; - newtComponent listbox, form = newt_form__new(); - - if (form == NULL) - return -1; - - listbox = newtListbox(0, 0, argc, NEWT_FLAG_RETURNEXIT); - if (listbox == NULL) - goto out_destroy_form; - - newtFormAddComponent(form, listbox); - - for (i = 0; i < argc; ++i) { - int len = strlen(argv[i]); - if (len > max_len) - max_len = len; - if (newtListboxAddEntry(listbox, argv[i], (void *)(long)i)) - goto out_destroy_form; - } - - newtCenteredWindow(max_len, argc, NULL); - newtFormRun(form, &es); - rc = newtListboxGetCurrent(listbox) - NULL; - if (es.reason == NEWT_EXIT_HOTKEY) - rc = -1; - newtPopWindow(); -out_destroy_form: - newtFormDestroy(form); - return rc; -} - -int ui__help_window(const char *text) -{ - struct newtExitStruct es; - newtComponent tb, form = newt_form__new(); - int rc = -1; - int max_len = 0, nr_lines = 0; - const char *t; - - if (form == NULL) - return -1; - - t = text; - while (1) { - const char *sep = strchr(t, '\n'); - int len; - - if (sep == NULL) - sep = strchr(t, '\0'); - len = sep - t; - if (max_len < len) - max_len = len; - ++nr_lines; - if (*sep == '\0') - break; - t = sep + 1; - } - - tb = newtTextbox(0, 0, max_len, nr_lines, 0); - if (tb == NULL) - goto out_destroy_form; - - newtTextboxSetText(tb, text); - newtFormAddComponent(form, tb); - newtCenteredWindow(max_len, nr_lines, NULL); - newtFormRun(form, &es); - newtPopWindow(); - rc = 0; -out_destroy_form: - newtFormDestroy(form); - return rc; -} - -static const char yes[] = "Yes", no[] = "No", - warning_str[] = "Warning!", ok[] = "Ok"; - -bool ui__dialog_yesno(const char *msg) -{ - /* newtWinChoice should really be accepting const char pointers... */ - return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1; -} - -void ui__warning(const char *format, ...) -{ - va_list args; - - va_start(args, format); - if (use_browser > 0) { - pthread_mutex_lock(&ui__lock); - newtWinMessagev((char *)warning_str, (char *)ok, - (char *)format, args); - pthread_mutex_unlock(&ui__lock); - } else - vfprintf(stderr, format, args); - va_end(args); -} diff --git a/tools/perf/util/ui/util.h b/tools/perf/util/ui/util.h deleted file mode 100644 index afcbc1d99531..000000000000 --- a/tools/perf/util/ui/util.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _PERF_UI_UTIL_H_ -#define _PERF_UI_UTIL_H_ 1 - -#include <stdbool.h> - -int ui__popup_menu(int argc, char * const argv[]); -int ui__help_window(const char *text); -bool ui__dialog_yesno(const char *msg); - -#endif /* _PERF_UI_UTIL_H_ */ diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index e16bf9a707e8..4007aca8e0ca 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -1,9 +1,13 @@ /* - * GIT - The information manager from hell + * usage.c + * + * Various reporting routines. + * Originally copied from GIT source. * * Copyright (C) Linus Torvalds, 2005 */ #include "util.h" +#include "debug.h" static void report(const char *prefix, const char *err, va_list params) { diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 5b3ea49aa63e..d03599fbe78b 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -1,6 +1,23 @@ +#include "../perf.h" #include "util.h" #include <sys/mman.h> +/* + * XXX We need to find a better place for these things... + */ +bool perf_host = true; +bool perf_guest = false; + +void event_attr_init(struct perf_event_attr *attr) +{ + if (!perf_host) + attr->exclude_host = 1; + if (!perf_guest) + attr->exclude_guest = 1; + /* to capture ABI version */ + attr->size = sizeof(*attr); +} + int mkdir_p(char *path, mode_t mode) { struct stat st; @@ -131,3 +148,13 @@ int readn(int fd, void *buf, size_t n) return buf - buf_start; } + +size_t hex_width(u64 v) +{ + size_t n = 1; + + while ((v >>= 4)) + ++n; + + return n; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 0128906bac88..b13c7331eaf8 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -40,7 +40,6 @@ #define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) #define _ALL_SOURCE 1 -#define _GNU_SOURCE 1 #define _BSD_SOURCE 1 #define HAS_BOOL @@ -75,7 +74,6 @@ #include <netinet/tcp.h> #include <arpa/inet.h> #include <netdb.h> -#include <pwd.h> #include <inttypes.h> #include "../../../include/linux/magic.h" #include "types.h" @@ -200,6 +198,8 @@ static inline int has_extension(const char *filename, const char *ext) #undef isalpha #undef isprint #undef isalnum +#undef islower +#undef isupper #undef tolower #undef toupper @@ -220,6 +220,8 @@ extern unsigned char sane_ctype[256]; #define isalpha(x) sane_istest(x,GIT_ALPHA) #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) #define isprint(x) sane_istest(x,GIT_PRINT) +#define islower(x) (sane_istest(x,GIT_ALPHA) && sane_istest(x,0x20)) +#define isupper(x) (sane_istest(x,GIT_ALPHA) && !sane_istest(x,0x20)) #define tolower(x) sane_case((unsigned char)(x), 0x20) #define toupper(x) sane_case((unsigned char)(x), 0) @@ -242,7 +244,26 @@ int strtailcmp(const char *s1, const char *s2); unsigned long convert_unit(unsigned long value, char *unit); int readn(int fd, void *buf, size_t size); +struct perf_event_attr; + +void event_attr_init(struct perf_event_attr *attr); + #define _STR(x) #x #define STR(x) _STR(x) +/* + * Determine whether some value is a power of two, where zero is + * *not* considered a power of two. + */ + +static inline __attribute__((const)) +bool is_power_of_2(unsigned long n) +{ + return (n != 0 && ((n & (n - 1)) == 0)); +} + +size_t hex_width(u64 v); + +char *rtrim(char *s); + #endif diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index bdd33470b235..697c8b4e59cc 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -32,6 +32,7 @@ void perf_read_values_destroy(struct perf_read_values *values) for (i = 0; i < values->threads; i++) free(values->value[i]); + free(values->value); free(values->pid); free(values->tid); free(values->counterrawid); diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index e8a03aceceb1..a93e06cfcc2a 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -19,6 +19,16 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # +OUTPUT=./ +ifeq ("$(origin O)", "command line") + OUTPUT := $(O)/ +endif + +ifneq ($(OUTPUT),) +# check that the output directory actually exists +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) +$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) +endif # --- CONFIGURATION BEGIN --- @@ -87,6 +97,7 @@ AR = $(CROSS)ar STRIP = $(CROSS)strip RANLIB = $(CROSS)ranlib HOSTCC = gcc +MKDIR = mkdir # Now we set up the build system @@ -95,7 +106,7 @@ HOSTCC = gcc # set up PWD so that older versions of make will work with our build. PWD = $(shell pwd) -GMO_FILES = ${shell for HLANG in ${LANGUAGES}; do echo po/$$HLANG.gmo; done;} +GMO_FILES = ${shell for HLANG in ${LANGUAGES}; do echo $(OUTPUT)po/$$HLANG.gmo; done;} export CROSS CC AR STRIP RANLIB CFLAGS LDFLAGS LIB_OBJS @@ -122,15 +133,18 @@ UTIL_OBJS = utils/helpers/amd.o utils/helpers/topology.o utils/helpers/msr.o \ utils/cpupower.o utils/cpufreq-info.o utils/cpufreq-set.o \ utils/cpupower-set.o utils/cpupower-info.o utils/cpuidle-info.o +UTIL_SRC := $(UTIL_OBJS:.o=.c) + +UTIL_OBJS := $(addprefix $(OUTPUT),$(UTIL_OBJS)) + UTIL_HEADERS = utils/helpers/helpers.h utils/idle_monitor/cpupower-monitor.h \ utils/helpers/bitmask.h \ utils/idle_monitor/idle_monitors.h utils/idle_monitor/idle_monitors.def -UTIL_SRC := $(UTIL_OBJS:.o=.c) - LIB_HEADERS = lib/cpufreq.h lib/sysfs.h LIB_SRC = lib/cpufreq.c lib/sysfs.c LIB_OBJS = lib/cpufreq.o lib/sysfs.o +LIB_OBJS := $(addprefix $(OUTPUT),$(LIB_OBJS)) CFLAGS += -pipe @@ -168,83 +182,90 @@ endif # the actual make rules -all: libcpupower cpupower $(COMPILE_NLS) $(COMPILE_BENCH) +all: libcpupower $(OUTPUT)cpupower $(COMPILE_NLS) $(COMPILE_BENCH) -lib/%.o: $(LIB_SRC) $(LIB_HEADERS) +$(OUTPUT)lib/%.o: $(LIB_SRC) $(LIB_HEADERS) $(ECHO) " CC " $@ $(QUIET) $(CC) $(CFLAGS) -fPIC -o $@ -c lib/$*.c -libcpupower.so.$(LIB_MAJ): $(LIB_OBJS) +$(OUTPUT)libcpupower.so.$(LIB_MAJ): $(LIB_OBJS) $(ECHO) " LD " $@ $(QUIET) $(CC) -shared $(CFLAGS) $(LDFLAGS) -o $@ \ -Wl,-soname,libcpupower.so.$(LIB_MIN) $(LIB_OBJS) - @ln -sf $@ libcpupower.so - @ln -sf $@ libcpupower.so.$(LIB_MIN) + @ln -sf $(@F) $(OUTPUT)libcpupower.so + @ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MIN) -libcpupower: libcpupower.so.$(LIB_MAJ) +libcpupower: $(OUTPUT)libcpupower.so.$(LIB_MAJ) # Let all .o files depend on its .c file and all headers # Might be worth to put this into utils/Makefile at some point of time $(UTIL_OBJS): $(UTIL_HEADERS) -.c.o: +$(OUTPUT)%.o: %.c $(ECHO) " CC " $@ $(QUIET) $(CC) $(CFLAGS) -I./lib -I ./utils -o $@ -c $*.c -cpupower: $(UTIL_OBJS) libcpupower.so.$(LIB_MAJ) +$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_MAJ) $(ECHO) " CC " $@ - $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) -lcpupower -lrt -lpci -L. -o $@ $(UTIL_OBJS) + $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lcpupower -lrt -lpci -L$(OUTPUT) -o $@ $(QUIET) $(STRIPCMD) $@ -po/$(PACKAGE).pot: $(UTIL_SRC) +$(OUTPUT)po/$(PACKAGE).pot: $(UTIL_SRC) $(ECHO) " GETTEXT " $@ $(QUIET) xgettext --default-domain=$(PACKAGE) --add-comments \ - --keyword=_ --keyword=N_ $(UTIL_SRC) && \ - test -f $(PACKAGE).po && \ - mv -f $(PACKAGE).po po/$(PACKAGE).pot + --keyword=_ --keyword=N_ $(UTIL_SRC) -p $(@D) -o $(@F) -po/%.gmo: po/%.po +$(OUTPUT)po/%.gmo: po/%.po $(ECHO) " MSGFMT " $@ $(QUIET) msgfmt -o $@ po/$*.po create-gmo: ${GMO_FILES} -update-po: po/$(PACKAGE).pot +update-po: $(OUTPUT)po/$(PACKAGE).pot $(ECHO) " MSGMRG " $@ $(QUIET) @for HLANG in $(LANGUAGES); do \ echo -n "Updating $$HLANG "; \ - if msgmerge po/$$HLANG.po po/$(PACKAGE).pot -o \ - po/$$HLANG.new.po; then \ - mv -f po/$$HLANG.new.po po/$$HLANG.po; \ + if msgmerge po/$$HLANG.po $< -o \ + $(OUTPUT)po/$$HLANG.new.po; then \ + mv -f $(OUTPUT)po/$$HLANG.new.po $(OUTPUT)po/$$HLANG.po; \ else \ echo "msgmerge for $$HLANG failed!"; \ - rm -f po/$$HLANG.new.po; \ + rm -f $(OUTPUT)po/$$HLANG.new.po; \ fi; \ done; -compile-bench: libcpupower.so.$(LIB_MAJ) - @V=$(V) confdir=$(confdir) $(MAKE) -C bench +compile-bench: $(OUTPUT)libcpupower.so.$(LIB_MAJ) + @V=$(V) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) + +# we compile into subdirectories. if the target directory is not the +# source directory, they might not exists. So we depend the various +# files onto their directories. +DIRECTORY_DEPS = $(LIB_OBJS) $(UTIL_OBJS) $(GMO_FILES) +$(DIRECTORY_DEPS): | $(sort $(dir $(DIRECTORY_DEPS))) + +# In the second step, we make a rule to actually create these directories +$(sort $(dir $(DIRECTORY_DEPS))): + $(ECHO) " MKDIR " $@ + $(QUIET) $(MKDIR) -p $@ 2>/dev/null clean: - -find . \( -not -type d \) -and \( -name '*~' -o -name '*.[oas]' \) -type f -print \ + -find $(OUTPUT) \( -not -type d \) -and \( -name '*~' -o -name '*.[oas]' \) -type f -print \ | xargs rm -f - -rm -f $(UTIL_BINS) - -rm -f $(IDLE_OBJS) - -rm -f cpupower - -rm -f libcpupower.so* - -rm -rf po/*.gmo po/*.pot - $(MAKE) -C bench clean + -rm -f $(OUTPUT)cpupower + -rm -f $(OUTPUT)libcpupower.so* + -rm -rf $(OUTPUT)po/*.{gmo,pot} + $(MAKE) -C bench O=$(OUTPUT) clean install-lib: $(INSTALL) -d $(DESTDIR)${libdir} - $(CP) libcpupower.so* $(DESTDIR)${libdir}/ + $(CP) $(OUTPUT)libcpupower.so* $(DESTDIR)${libdir}/ $(INSTALL) -d $(DESTDIR)${includedir} $(INSTALL_DATA) lib/cpufreq.h $(DESTDIR)${includedir}/cpufreq.h install-tools: $(INSTALL) -d $(DESTDIR)${bindir} - $(INSTALL_PROGRAM) cpupower $(DESTDIR)${bindir} + $(INSTALL_PROGRAM) $(OUTPUT)cpupower $(DESTDIR)${bindir} install-man: $(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1 @@ -257,13 +278,13 @@ install-man: install-gmo: $(INSTALL) -d $(DESTDIR)${localedir} for HLANG in $(LANGUAGES); do \ - echo '$(INSTALL_DATA) -D po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo'; \ - $(INSTALL_DATA) -D po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo; \ + echo '$(INSTALL_DATA) -D $(OUTPUT)po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo'; \ + $(INSTALL_DATA) -D $(OUTPUT)po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo; \ done; install-bench: @#DESTDIR must be set from outside to survive - @sbindir=$(sbindir) bindir=$(bindir) docdir=$(docdir) confdir=$(confdir) $(MAKE) -C bench install + @sbindir=$(sbindir) bindir=$(bindir) docdir=$(docdir) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) install install: all install-lib install-tools install-man $(INSTALL_NLS) $(INSTALL_BENCH) diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile index 2b67606fc3e3..7ec7021a29cd 100644 --- a/tools/power/cpupower/bench/Makefile +++ b/tools/power/cpupower/bench/Makefile @@ -1,29 +1,36 @@ -LIBS = -L../ -lm -lcpupower +OUTPUT := ./ +ifeq ("$(origin O)", "command line") +ifneq ($(O),) + OUTPUT := $(O)/ +endif +endif -OBJS = main.o parse.o system.o benchmark.o +LIBS = -L../ -L$(OUTPUT) -lm -lcpupower + +OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o CFLAGS += -D_GNU_SOURCE -I../lib -DDEFAULT_CONFIG_FILE=\"$(confdir)/cpufreq-bench.conf\" -%.o : %.c +$(OUTPUT)%.o : %.c $(ECHO) " CC " $@ $(QUIET) $(CC) -c $(CFLAGS) $< -o $@ -cpufreq-bench: $(OBJS) +$(OUTPUT)cpufreq-bench: $(OBJS) $(ECHO) " CC " $@ $(QUIET) $(CC) -o $@ $(CFLAGS) $(OBJS) $(LIBS) -all: cpufreq-bench +all: $(OUTPUT)cpufreq-bench install: mkdir -p $(DESTDIR)/$(sbindir) mkdir -p $(DESTDIR)/$(bindir) mkdir -p $(DESTDIR)/$(docdir) mkdir -p $(DESTDIR)/$(confdir) - install -m 755 cpufreq-bench $(DESTDIR)/$(sbindir)/cpufreq-bench + install -m 755 $(OUTPUT)cpufreq-bench $(DESTDIR)/$(sbindir)/cpufreq-bench install -m 755 cpufreq-bench_plot.sh $(DESTDIR)/$(bindir)/cpufreq-bench_plot.sh install -m 644 README-BENCH $(DESTDIR)/$(docdir)/README-BENCH install -m 755 cpufreq-bench_script.sh $(DESTDIR)/$(docdir)/cpufreq-bench_script.sh install -m 644 example.cfg $(DESTDIR)/$(confdir)/cpufreq-bench.conf clean: - rm -f *.o - rm -f cpufreq-bench + rm -f $(OUTPUT)*.o + rm -f $(OUTPUT)cpufreq-bench diff --git a/tools/power/cpupower/debug/i386/Makefile b/tools/power/cpupower/debug/i386/Makefile index d08cc1ead9bc..3ba158f0e287 100644 --- a/tools/power/cpupower/debug/i386/Makefile +++ b/tools/power/cpupower/debug/i386/Makefile @@ -1,20 +1,38 @@ +OUTPUT=./ +ifeq ("$(origin O)", "command line") + OUTPUT := $(O)/ +endif + +DESTDIR = +bindir = /usr/bin + +INSTALL = /usr/bin/install + + default: all -centrino-decode: centrino-decode.c - $(CC) $(CFLAGS) -o centrino-decode centrino-decode.c +$(OUTPUT)centrino-decode: centrino-decode.c + $(CC) $(CFLAGS) -o $@ centrino-decode.c -dump_psb: dump_psb.c - $(CC) $(CFLAGS) -o dump_psb dump_psb.c +$(OUTPUT)dump_psb: dump_psb.c + $(CC) $(CFLAGS) -o $@ dump_psb.c -intel_gsic: intel_gsic.c - $(CC) $(CFLAGS) -o intel_gsic -llrmi intel_gsic.c +$(OUTPUT)intel_gsic: intel_gsic.c + $(CC) $(CFLAGS) -o $@ -llrmi intel_gsic.c -powernow-k8-decode: powernow-k8-decode.c - $(CC) $(CFLAGS) -o powernow-k8-decode powernow-k8-decode.c +$(OUTPUT)powernow-k8-decode: powernow-k8-decode.c + $(CC) $(CFLAGS) -o $@ powernow-k8-decode.c -all: centrino-decode dump_psb intel_gsic powernow-k8-decode +all: $(OUTPUT)centrino-decode $(OUTPUT)dump_psb $(OUTPUT)intel_gsic $(OUTPUT)powernow-k8-decode clean: - rm -rf centrino-decode dump_psb intel_gsic powernow-k8-decode + rm -rf $(OUTPUT){centrino-decode,dump_psb,intel_gsic,powernow-k8-decode} + +install: + $(INSTALL) -d $(DESTDIR)${bindir} + $(INSTALL) $(OUTPUT)centrino-decode $(DESTDIR)${bindir} + $(INSTALL) $(OUTPUT)powernow-k8-decode $(DESTDIR)${bindir} + $(INSTALL) $(OUTPUT)dump_psb $(DESTDIR)${bindir} + $(INSTALL) $(OUTPUT)intel_gsic $(DESTDIR)${bindir} -.PHONY: all default clean +.PHONY: all default clean install diff --git a/tools/power/cpupower/debug/x86_64/Makefile b/tools/power/cpupower/debug/x86_64/Makefile index 3326217dd311..1c5214526716 100644 --- a/tools/power/cpupower/debug/x86_64/Makefile +++ b/tools/power/cpupower/debug/x86_64/Makefile @@ -1,14 +1,30 @@ +OUTPUT=./ +ifeq ("$(origin O)", "command line") + OUTPUT := $(O)/ +endif + +DESTDIR = +bindir = /usr/bin + +INSTALL = /usr/bin/install + + default: all -centrino-decode: ../i386/centrino-decode.c +$(OUTPUT)centrino-decode: ../i386/centrino-decode.c $(CC) $(CFLAGS) -o $@ $< -powernow-k8-decode: ../i386/powernow-k8-decode.c +$(OUTPUT)powernow-k8-decode: ../i386/powernow-k8-decode.c $(CC) $(CFLAGS) -o $@ $< -all: centrino-decode powernow-k8-decode +all: $(OUTPUT)centrino-decode $(OUTPUT)powernow-k8-decode clean: - rm -rf centrino-decode powernow-k8-decode + rm -rf $(OUTPUT)centrino-decode $(OUTPUT)powernow-k8-decode + +install: + $(INSTALL) -d $(DESTDIR)${bindir} + $(INSTALL) $(OUTPUT)centrino-decode $(DESTDIR)${bindir} + $(INSTALL) $(OUTPUT)powernow-k8-decode $(DESTDIR)${bindir} -.PHONY: all default clean +.PHONY: all default clean install diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1 index bb60a8d1e45a..4a1918ea8f9c 100644 --- a/tools/power/cpupower/man/cpupower-frequency-info.1 +++ b/tools/power/cpupower/man/cpupower-frequency-info.1 @@ -1,4 +1,4 @@ -.TH "cpupower-frequency-info" "1" "0.1" "Mattia Dongili" "" +.TH "CPUPOWER\-FREQUENCY\-INFO" "1" "0.1" "" "cpupower Manual" .SH "NAME" .LP cpupower frequency\-info \- Utility to retrieve cpufreq kernel information @@ -50,8 +50,6 @@ Prints out information like provided by the /proc/cpufreq interface in 2.4. and \fB\-m\fR \fB\-\-human\fR human\-readable output for the \-f, \-w, \-s and \-y parameters. .TP -\fB\-h\fR \fB\-\-help\fR -Prints out the help screen. .SH "REMARKS" .LP By default only values of core zero are displayed. How to display settings of diff --git a/tools/power/cpupower/man/cpupower-frequency-set.1 b/tools/power/cpupower/man/cpupower-frequency-set.1 index 685f469093ad..3eacc8d03d1a 100644 --- a/tools/power/cpupower/man/cpupower-frequency-set.1 +++ b/tools/power/cpupower/man/cpupower-frequency-set.1 @@ -1,4 +1,4 @@ -.TH "cpupower-freqency-set" "1" "0.1" "Mattia Dongili" "" +.TH "CPUPOWER\-FREQUENCY\-SET" "1" "0.1" "" "cpupower Manual" .SH "NAME" .LP cpupower frequency\-set \- A small tool which allows to modify cpufreq settings. @@ -26,8 +26,6 @@ specific frequency to be set. Requires userspace governor to be available and lo \fB\-r\fR \fB\-\-related\fR modify all hardware-related CPUs at the same time .TP -\fB\-h\fR \fB\-\-help\fR -Prints out the help screen. .SH "REMARKS" .LP By default values are applied on all cores. How to modify single core diff --git a/tools/power/cpupower/man/cpupower-idle-info.1 b/tools/power/cpupower/man/cpupower-idle-info.1 new file mode 100644 index 000000000000..4178effd9e99 --- /dev/null +++ b/tools/power/cpupower/man/cpupower-idle-info.1 @@ -0,0 +1,90 @@ +.TH "CPUPOWER-IDLE-INFO" "1" "0.1" "" "cpupower Manual" +.SH "NAME" +.LP +cpupower idle\-info \- Utility to retrieve cpu idle kernel information +.SH "SYNTAX" +.LP +cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP] +.SH "DESCRIPTION" +.LP +A tool which prints out per cpu idle information helpful to developers and interested users. +.SH "OPTIONS" +.LP +.TP +\fB\-f\fR \fB\-\-silent\fR +Only print a summary of all available C-states in the system. +.TP +\fB\-e\fR \fB\-\-proc\fR +deprecated. +Prints out idle information in old /proc/acpi/processor/*/power format. This +interface has been removed from the kernel for quite some time, do not let +further code depend on this option, best do not use it. + +.SH IDLE\-INFO DESCRIPTIONS +CPU sleep state statistics and descriptions are retrieved from sysfs files, +exported by the cpuidle kernel subsystem. The kernel only updates these +statistics when it enters or leaves an idle state, therefore on a very idle or +a very busy system, these statistics may not be accurate. They still provide a +good overview about the usage and availability of processor sleep states on +the platform. + +Be aware that the sleep states as exported by the hardware or BIOS and used by +the Linux kernel may not exactly reflect the capabilities of the +processor. This often is the case on the X86 architecture when the acpi_idle +driver is used. It is also possible that the hardware overrules the kernel +requests, due to internal activity monitors or other reasons. +On recent X86 platforms it is often possible to read out hardware registers +which monitor the duration of sleep states the processor resided in. The +cpupower monitor tool (cpupower\-monitor(1)) can be used to show real sleep +state residencies. Please refer to the architecture specific description +section below. + +.SH IDLE\-INFO ARCHITECTURE SPECIFIC DESCRIPTIONS +.SS "X86" +POLL idle state + +If cpuidle is active, X86 platforms have one special idle state. +The POLL idle state is not a real idle state, it does not save any +power. Instead, a busy\-loop is executed doing nothing for a short period of +time. This state is used if the kernel knows that work has to be processed +very soon and entering any real hardware idle state may result in a slight +performance penalty. + +There exist two different cpuidle drivers on the X86 architecture platform: + +"acpi_idle" cpuidle driver + +The acpi_idle cpuidle driver retrieves available sleep states (C\-states) from +the ACPI BIOS tables (from the _CST ACPI function on recent platforms or from +the FADT BIOS table on older ones). +The C1 state is not retrieved from ACPI tables. If the C1 state is entered, +the kernel will call the hlt instruction (or mwait on Intel). + +"intel_idle" cpuidle driver + +In kernel 2.6.36 the intel_idle driver was introduced. +It only serves recent Intel CPUs (Nehalem, Westmere, Sandybridge, Atoms or +newer). On older Intel CPUs the acpi_idle driver is still used (if the BIOS +provides C\-state ACPI tables). +The intel_idle driver knows the sleep state capabilities of the processor and +ignores ACPI BIOS exported processor sleep states tables. + +.SH "REMARKS" +.LP +By default only values of core zero are displayed. How to display settings of +other cores is described in the cpupower(1) manpage in the \-\-cpu option +section. +.SH REFERENCES +http://www.acpi.info/spec.htm +.SH "FILES" +.nf +\fI/sys/devices/system/cpu/cpu*/cpuidle/state*\fP +\fI/sys/devices/system/cpu/cpuidle/*\fP +.fi +.SH "AUTHORS" +.nf +Thomas Renninger <trenn@suse.de> +.fi +.SH "SEE ALSO" +.LP +cpupower(1), cpupower\-monitor(1), cpupower\-info(1), cpupower\-set(1) diff --git a/tools/power/cpupower/man/cpupower-monitor.1 b/tools/power/cpupower/man/cpupower-monitor.1 index d5cfa265c3d3..1141c2073719 100644 --- a/tools/power/cpupower/man/cpupower-monitor.1 +++ b/tools/power/cpupower/man/cpupower-monitor.1 @@ -107,7 +107,7 @@ Deepest package sleep states may in reality show up as machine/platform wide sleep states and can only be entered if all cores are idle. Look up Intel manuals (some are provided in the References section) for further details. -.SS "Ontario" "Liano" +.SS "Fam_12h" "Fam_14h" AMD laptop and desktop processor (family 12h and 14h) sleep state counters. The registers are accessed via PCI and therefore can still be read out while cores have been offlined. diff --git a/tools/power/cpupower/man/cpupower-set.1 b/tools/power/cpupower/man/cpupower-set.1 index c4954a9fe4e7..9dbd536518ab 100644 --- a/tools/power/cpupower/man/cpupower-set.1 +++ b/tools/power/cpupower/man/cpupower-set.1 @@ -85,15 +85,6 @@ Possible values are: savings .RE -sched_mc_power_savings is dependent upon SCHED_MC, which is -itself architecture dependent. - -sched_smt_power_savings is dependent upon SCHED_SMT, which -is itself architecture dependent. - -The two files are independent of each other. It is possible -that one file may be present without the other. - .SH "SEE ALSO" cpupower-info(1), cpupower-monitor(1), powertop(1) .PP diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index b028267c1376..8145af5f93a6 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -35,17 +35,9 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) printf(_("CPU %u: Can't read idle state info\n"), cpu); return; } - tmp = sysfs_get_idlestate_name(cpu, idlestates - 1); - if (!tmp) { - printf(_("Could not determine max idle state %u\n"), - idlestates - 1); - return; - } - printf(_("Number of idle states: %d\n"), idlestates); - printf(_("Available idle states:")); - for (idlestate = 1; idlestate < idlestates; idlestate++) { + for (idlestate = 0; idlestate < idlestates; idlestate++) { tmp = sysfs_get_idlestate_name(cpu, idlestate); if (!tmp) continue; @@ -57,7 +49,7 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) if (!verbose) return; - for (idlestate = 1; idlestate < idlestates; idlestate++) { + for (idlestate = 0; idlestate < idlestates; idlestate++) { tmp = sysfs_get_idlestate_name(cpu, idlestate); if (!tmp) continue; diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index 87d5605bdda8..6437ef39aeea 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -112,14 +112,12 @@ int decode_pstates(unsigned int cpu, unsigned int cpu_family, int amd_pci_get_num_boost_states(int *active, int *states) { struct pci_access *pci_acc; - int vendor_id = 0x1022; - int boost_dev_ids[4] = {0x1204, 0x1604, 0x1704, 0}; struct pci_dev *device; uint8_t val = 0; *active = *states = 0; - device = pci_acc_init(&pci_acc, vendor_id, boost_dev_ids); + device = pci_slot_func_init(&pci_acc, 0x18, 4); if (device == NULL) return -ENODEV; diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 2747e738efb0..2eb584cf2f55 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -66,8 +66,8 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL, #define CPUPOWER_CAP_AMD_CBP 0x00000004 #define CPUPOWER_CAP_PERF_BIAS 0x00000008 #define CPUPOWER_CAP_HAS_TURBO_RATIO 0x00000010 -#define CPUPOWER_CAP_IS_SNB 0x00000011 -#define CPUPOWER_CAP_INTEL_IDA 0x00000012 +#define CPUPOWER_CAP_IS_SNB 0x00000020 +#define CPUPOWER_CAP_INTEL_IDA 0x00000040 #define MAX_HW_PSTATES 10 @@ -132,8 +132,11 @@ extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu); /* PCI stuff ****************************/ extern int amd_pci_get_num_boost_states(int *active, int *states); -extern struct pci_dev *pci_acc_init(struct pci_access **pacc, int vendor_id, - int *dev_ids); +extern struct pci_dev *pci_acc_init(struct pci_access **pacc, int domain, + int bus, int slot, int func, int vendor, + int dev); +extern struct pci_dev *pci_slot_func_init(struct pci_access **pacc, + int slot, int func); /* PCI stuff ****************************/ diff --git a/tools/power/cpupower/utils/helpers/pci.c b/tools/power/cpupower/utils/helpers/pci.c index cd2eb6fe41c4..9690798e6446 100644 --- a/tools/power/cpupower/utils/helpers/pci.c +++ b/tools/power/cpupower/utils/helpers/pci.c @@ -10,19 +10,24 @@ * **pacc : if a valid pci_dev is returned * *pacc must be passed to pci_acc_cleanup to free it * - * vendor_id : the pci vendor id matching the pci device to access - * dev_ids : device ids matching the pci device to access + * domain: domain + * bus: bus + * slot: slot + * func: func + * vendor: vendor + * device: device + * Pass -1 for one of the six above to match any * * Returns : * struct pci_dev which can be used with pci_{read,write}_* functions * to access the PCI config space of matching pci devices */ -struct pci_dev *pci_acc_init(struct pci_access **pacc, int vendor_id, - int *dev_ids) +struct pci_dev *pci_acc_init(struct pci_access **pacc, int domain, int bus, + int slot, int func, int vendor, int dev) { - struct pci_filter filter_nb_link = { -1, -1, -1, -1, vendor_id, 0}; + struct pci_filter filter_nb_link = { domain, bus, slot, func, + vendor, dev }; struct pci_dev *device; - unsigned int i; *pacc = pci_alloc(); if (*pacc == NULL) @@ -31,14 +36,20 @@ struct pci_dev *pci_acc_init(struct pci_access **pacc, int vendor_id, pci_init(*pacc); pci_scan_bus(*pacc); - for (i = 0; dev_ids[i] != 0; i++) { - filter_nb_link.device = dev_ids[i]; - for (device = (*pacc)->devices; device; device = device->next) { - if (pci_filter_match(&filter_nb_link, device)) - return device; - } + for (device = (*pacc)->devices; device; device = device->next) { + if (pci_filter_match(&filter_nb_link, device)) + return device; } pci_cleanup(*pacc); return NULL; } + +/* Typically one wants to get a specific slot(device)/func of the root domain + and bus */ +struct pci_dev *pci_slot_func_init(struct pci_access **pacc, int slot, + int func) +{ + return pci_acc_init(pacc, 0, 0, slot, func, -1, -1); +} + #endif /* defined(__i386__) || defined(__x86_64__) */ diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c index c6343024a611..96e28c124b5c 100644 --- a/tools/power/cpupower/utils/helpers/sysfs.c +++ b/tools/power/cpupower/utils/helpers/sysfs.c @@ -362,22 +362,7 @@ char *sysfs_get_cpuidle_driver(void) */ int sysfs_get_sched(const char *smt_mc) { - unsigned long value; - char linebuf[MAX_LINE_LEN]; - char *endp; - char path[SYSFS_PATH_MAX]; - - if (strcmp("mc", smt_mc) && strcmp("smt", smt_mc)) - return -EINVAL; - - snprintf(path, sizeof(path), - PATH_TO_CPU "sched_%s_power_savings", smt_mc); - if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0) - return -1; - value = strtoul(linebuf, &endp, 0); - if (endp == linebuf || errno == ERANGE) - return -1; - return value; + return -ENODEV; } /* @@ -388,21 +373,5 @@ int sysfs_get_sched(const char *smt_mc) */ int sysfs_set_sched(const char *smt_mc, int val) { - char linebuf[MAX_LINE_LEN]; - char path[SYSFS_PATH_MAX]; - struct stat statbuf; - - if (strcmp("mc", smt_mc) && strcmp("smt", smt_mc)) - return -EINVAL; - - snprintf(path, sizeof(path), - PATH_TO_CPU "sched_%s_power_savings", smt_mc); - sprintf(linebuf, "%d", val); - - if (stat(path, &statbuf) != 0) - return -ENODEV; - - if (sysfs_write_file(path, linebuf, MAX_LINE_LEN) == 0) - return -1; - return 0; + return -ENODEV; } diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c index 202e555988be..2116df9ad832 100644 --- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c @@ -20,8 +20,6 @@ #include "idle_monitor/cpupower-monitor.h" #include "helpers/helpers.h" -/******** PCI parts could go into own file and get shared ***************/ - #define PCI_NON_PC0_OFFSET 0xb0 #define PCI_PC1_OFFSET 0xb4 #define PCI_PC6_OFFSET 0xb8 @@ -82,10 +80,7 @@ static cstate_t amd_fam14h_cstates[AMD_FAM14H_STATE_NUM] = { }; static struct pci_access *pci_acc; -static int pci_vendor_id = 0x1022; -static int pci_dev_ids[2] = {0x1716, 0}; static struct pci_dev *amd_fam14h_pci_dev; - static int nbp1_entered; struct timespec start_time; @@ -286,13 +281,13 @@ struct cpuidle_monitor *amd_fam14h_register(void) if (cpupower_cpu_info.vendor != X86_VENDOR_AMD) return NULL; - if (cpupower_cpu_info.family == 0x14) { - if (cpu_count <= 0 || cpu_count > 2) { - fprintf(stderr, "AMD fam14h: Invalid cpu count: %d\n", - cpu_count); - return NULL; - } - } else + if (cpupower_cpu_info.family == 0x14) + strncpy(amd_fam14h_monitor.name, "Fam_14h", + MONITOR_NAME_LEN - 1); + else if (cpupower_cpu_info.family == 0x12) + strncpy(amd_fam14h_monitor.name, "Fam_12h", + MONITOR_NAME_LEN - 1); + else return NULL; /* We do not alloc for nbp1 machine wide counter */ @@ -303,7 +298,9 @@ struct cpuidle_monitor *amd_fam14h_register(void) sizeof(unsigned long long)); } - amd_fam14h_pci_dev = pci_acc_init(&pci_acc, pci_vendor_id, pci_dev_ids); + /* We need PCI device: Slot 18, Func 6, compare with BKDG + for fam 12h/14h */ + amd_fam14h_pci_dev = pci_slot_func_init(&pci_acc, 0x18, 6); if (amd_fam14h_pci_dev == NULL || pci_acc == NULL) return NULL; @@ -325,7 +322,7 @@ static void amd_fam14h_unregister(void) } struct cpuidle_monitor amd_fam14h_monitor = { - .name = "Ontario", + .name = "", .hw_states = amd_fam14h_cstates, .hw_states_num = AMD_FAM14H_STATE_NUM, .start = amd_fam14h_start, diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index fd8e1f1297aa..f85649554191 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -1,4 +1,5 @@ turbostat : turbostat.c +CFLAGS += -Wall clean : rm -f turbostat diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index ff75125deed0..74e44507dfe9 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -4,11 +4,13 @@ turbostat \- Report processor frequency and idle statistics .SH SYNOPSIS .ft B .B turbostat +.RB [ "\-s" ] .RB [ "\-v" ] .RB [ "\-M MSR#" ] .RB command .br .B turbostat +.RB [ "\-s" ] .RB [ "\-v" ] .RB [ "\-M MSR#" ] .RB [ "\-i interval_sec" ] @@ -25,6 +27,12 @@ supports an "invariant" TSC, plus the APERF and MPERF MSRs. on processors that additionally support C-state residency counters. .SS Options +The \fB-s\fP option limits output to a 1-line system summary for each interval. +.PP +The \fB-c\fP option limits output to the 1st thread in each core. +.PP +The \fB-p\fP option limits output to the 1st thread in each package. +.PP The \fB-v\fP option increases verbosity. .PP The \fB-M MSR#\fP option dumps the specified MSR, @@ -38,14 +46,15 @@ displays the statistics gathered since it was forked. .PP .SH FIELD DESCRIPTIONS .nf -\fBpkg\fP processor package number. -\fBcore\fP processor core number. +\fBpk\fP processor package number. +\fBcor\fP processor core number. \fBCPU\fP Linux CPU (logical processor) number. +Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology. \fB%c0\fP percent of the interval that the CPU retired instructions. \fBGHz\fP average clock rate while the CPU was in c0 state. \fBTSC\fP average GHz that the TSC ran during the entire interval. -\fB%c1, %c3, %c6\fP show the percentage residency in hardware core idle states. -\fB%pc3, %pc6\fP percentage residency in hardware package idle states. +\fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states. +\fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states. .fi .PP .SH EXAMPLE @@ -53,25 +62,38 @@ Without any parameters, turbostat prints out counters ever 5 seconds. (override interval with "-i sec" option, or specify a command for turbostat to fork). -The first row of statistics reflect the average for the entire system. +The first row of statistics is a summary for the entire system. +Note that the summary is a weighted average. Subsequent rows show per-CPU statistics. .nf [root@x980]# ./turbostat -core CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 - 0.04 1.62 3.38 0.11 0.00 99.85 0.00 95.07 - 0 0 0.04 1.62 3.38 0.06 0.00 99.90 0.00 95.07 - 0 6 0.02 1.62 3.38 0.08 0.00 99.90 0.00 95.07 - 1 2 0.10 1.62 3.38 0.29 0.00 99.61 0.00 95.07 - 1 8 0.11 1.62 3.38 0.28 0.00 99.61 0.00 95.07 - 2 4 0.01 1.62 3.38 0.01 0.00 99.98 0.00 95.07 - 2 10 0.01 1.61 3.38 0.02 0.00 99.98 0.00 95.07 - 8 1 0.07 1.62 3.38 0.15 0.00 99.78 0.00 95.07 - 8 7 0.03 1.62 3.38 0.19 0.00 99.78 0.00 95.07 - 9 3 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07 - 9 9 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07 - 10 5 0.01 1.62 3.38 0.13 0.00 99.86 0.00 95.07 - 10 11 0.08 1.62 3.38 0.05 0.00 99.86 0.00 95.07 +cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 + 0.09 1.62 3.38 1.83 0.32 97.76 1.26 83.61 + 0 0 0.15 1.62 3.38 10.23 0.05 89.56 1.26 83.61 + 0 6 0.05 1.62 3.38 10.34 + 1 2 0.03 1.62 3.38 0.07 0.05 99.86 + 1 8 0.03 1.62 3.38 0.06 + 2 4 0.21 1.62 3.38 0.10 1.49 98.21 + 2 10 0.02 1.62 3.38 0.29 + 8 1 0.04 1.62 3.38 0.04 0.08 99.84 + 8 7 0.01 1.62 3.38 0.06 + 9 3 0.53 1.62 3.38 0.10 0.20 99.17 + 9 9 0.02 1.62 3.38 0.60 + 10 5 0.01 1.62 3.38 0.02 0.04 99.92 + 10 11 0.02 1.62 3.38 0.02 +.fi +.SH SUMMARY EXAMPLE +The "-s" option prints the column headers just once, +and then the one line system summary for each sample interval. + +.nf +[root@x980]# ./turbostat -s + %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 + 0.23 1.67 3.38 2.00 0.30 97.47 1.07 82.12 + 0.10 1.62 3.38 1.87 2.25 95.77 12.02 72.60 + 0.20 1.64 3.38 1.98 0.11 97.72 0.30 83.36 + 0.11 1.70 3.38 1.86 1.81 96.22 9.71 74.90 .fi .SH VERBOSE EXAMPLE The "-v" option adds verbosity to the output: @@ -101,33 +123,31 @@ until ^C while the other CPUs are mostly idle: .nf [root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null - -^Ccore CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 - 8.49 3.63 3.38 16.23 0.66 74.63 0.00 0.00 - 0 0 1.22 3.62 3.38 32.18 0.00 66.60 0.00 0.00 - 0 6 0.40 3.61 3.38 33.00 0.00 66.60 0.00 0.00 - 1 2 0.11 3.14 3.38 0.19 3.95 95.75 0.00 0.00 - 1 8 0.05 2.88 3.38 0.25 3.95 95.75 0.00 0.00 - 2 4 0.00 3.13 3.38 0.02 0.00 99.98 0.00 0.00 - 2 10 0.00 3.09 3.38 0.02 0.00 99.98 0.00 0.00 - 8 1 0.04 3.50 3.38 14.43 0.00 85.54 0.00 0.00 - 8 7 0.03 2.98 3.38 14.43 0.00 85.54 0.00 0.00 - 9 3 0.00 3.16 3.38 100.00 0.00 0.00 0.00 0.00 - 9 9 99.93 3.63 3.38 0.06 0.00 0.00 0.00 0.00 - 10 5 0.01 2.82 3.38 0.08 0.00 99.91 0.00 0.00 - 10 11 0.02 3.36 3.38 0.06 0.00 99.91 0.00 0.00 -6.950866 sec - +^C +cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 + 8.86 3.61 3.38 15.06 31.19 44.89 0.00 0.00 + 0 0 1.46 3.22 3.38 16.84 29.48 52.22 0.00 0.00 + 0 6 0.21 3.06 3.38 18.09 + 1 2 0.53 3.33 3.38 2.80 46.40 50.27 + 1 8 0.89 3.47 3.38 2.44 + 2 4 1.36 3.43 3.38 9.04 23.71 65.89 + 2 10 0.18 2.86 3.38 10.22 + 8 1 0.04 2.87 3.38 99.96 0.01 0.00 + 8 7 99.72 3.63 3.38 0.27 + 9 3 0.31 3.21 3.38 7.64 56.55 35.50 + 9 9 0.08 2.95 3.38 7.88 + 10 5 1.42 3.43 3.38 2.14 30.99 65.44 + 10 11 0.16 2.88 3.38 3.40 .fi -Above the cycle soaker drives cpu9 up 3.6 Ghz turbo limit +Above the cycle soaker drives cpu7 up its 3.6 Ghz turbo limit while the other processors are generally in various states of idle. -Note that cpu3 is an HT sibling sharing core9 -with cpu9, and thus it is unable to get to an idle state -deeper than c1 while cpu9 is busy. +Note that cpu1 and cpu7 are HT siblings within core8. +As cpu7 is very busy, it prevents its sibling, cpu1, +from entering a c-state deeper than c1. -Note that turbostat reports average GHz of 3.61, while -the arithmetic average of the GHz column above is 3.24. +Note that turbostat reports average GHz of 3.63, while +the arithmetic average of the GHz column above is lower. This is a weighted average, where the weight is %c0. ie. it is the total number of un-halted cycles elapsed per time divided by the number of CPUs. .SH NOTES @@ -167,6 +187,6 @@ http://www.intel.com/products/processor/manuals/ .SH "SEE ALSO" msr(4), vmstat(8) .PP -.SH AUTHORS +.SH AUTHOR .nf Written by Len Brown <len.brown@intel.com> diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 8b2d37b59c9e..861d77190206 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2,7 +2,7 @@ * turbostat -- show CPU frequency and C-state residency * on modern Intel turbo-capable processors. * - * Copyright (c) 2010, Intel Corporation. + * Copyright (c) 2012 Intel Corporation. * Len Brown <len.brown@intel.com> * * This program is free software; you can redistribute it and/or modify it @@ -19,6 +19,7 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ +#define _GNU_SOURCE #include <stdio.h> #include <unistd.h> #include <sys/types.h> @@ -32,6 +33,7 @@ #include <dirent.h> #include <string.h> #include <ctype.h> +#include <sched.h> #define MSR_TSC 0x10 #define MSR_NEHALEM_PLATFORM_INFO 0xCE @@ -49,6 +51,7 @@ char *proc_stat = "/proc/stat"; unsigned int interval_sec = 5; /* set with -i interval_sec */ unsigned int verbose; /* set with -v */ +unsigned int summary_only; /* set with -s */ unsigned int skip_c0; unsigned int skip_c1; unsigned int do_nhm_cstates; @@ -64,408 +67,607 @@ double bclk; unsigned int show_pkg; unsigned int show_core; unsigned int show_cpu; +unsigned int show_pkg_only; +unsigned int show_core_only; +char *output_buffer, *outp; int aperf_mperf_unstable; int backwards_count; char *progname; -int need_reinitialize; - -int num_cpus; - -struct counters { - unsigned long long tsc; /* per thread */ - unsigned long long aperf; /* per thread */ - unsigned long long mperf; /* per thread */ - unsigned long long c1; /* per thread (calculated) */ - unsigned long long c3; /* per core */ - unsigned long long c6; /* per core */ - unsigned long long c7; /* per core */ - unsigned long long pc2; /* per package */ - unsigned long long pc3; /* per package */ - unsigned long long pc6; /* per package */ - unsigned long long pc7; /* per package */ - unsigned long long extra_msr; /* per thread */ - int pkg; - int core; - int cpu; - struct counters *next; -}; - -struct counters *cnt_even; -struct counters *cnt_odd; -struct counters *cnt_delta; -struct counters *cnt_average; -struct timeval tv_even; -struct timeval tv_odd; -struct timeval tv_delta; - -unsigned long long get_msr(int cpu, off_t offset) + +cpu_set_t *cpu_present_set, *cpu_affinity_set; +size_t cpu_present_setsize, cpu_affinity_setsize; + +struct thread_data { + unsigned long long tsc; + unsigned long long aperf; + unsigned long long mperf; + unsigned long long c1; /* derived */ + unsigned long long extra_msr; + unsigned int cpu_id; + unsigned int flags; +#define CPU_IS_FIRST_THREAD_IN_CORE 0x2 +#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 +} *thread_even, *thread_odd; + +struct core_data { + unsigned long long c3; + unsigned long long c6; + unsigned long long c7; + unsigned int core_id; +} *core_even, *core_odd; + +struct pkg_data { + unsigned long long pc2; + unsigned long long pc3; + unsigned long long pc6; + unsigned long long pc7; + unsigned int package_id; +} *package_even, *package_odd; + +#define ODD_COUNTERS thread_odd, core_odd, package_odd +#define EVEN_COUNTERS thread_even, core_even, package_even + +#define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ + (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ + topo.num_threads_per_core + \ + (core_no) * topo.num_threads_per_core + (thread_no)) +#define GET_CORE(core_base, core_no, pkg_no) \ + (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) +#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) + +struct system_summary { + struct thread_data threads; + struct core_data cores; + struct pkg_data packages; +} sum, average; + + +struct topo_params { + int num_packages; + int num_cpus; + int num_cores; + int max_cpu_num; + int num_cores_per_pkg; + int num_threads_per_core; +} topo; + +struct timeval tv_even, tv_odd, tv_delta; + +void setup_all_buffers(void); + +int cpu_is_not_present(int cpu) +{ + return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); +} +/* + * run func(thread, core, package) in topology order + * skip non-present cpus + */ + +int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), + struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) +{ + int retval, pkg_no, core_no, thread_no; + + for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { + for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { + for (thread_no = 0; thread_no < + topo.num_threads_per_core; ++thread_no) { + struct thread_data *t; + struct core_data *c; + struct pkg_data *p; + + t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); + + if (cpu_is_not_present(t->cpu_id)) + continue; + + c = GET_CORE(core_base, core_no, pkg_no); + p = GET_PKG(pkg_base, pkg_no); + + retval = func(t, c, p); + if (retval) + return retval; + } + } + } + return 0; +} + +int cpu_migrate(int cpu) +{ + CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); + CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); + if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) + return -1; + else + return 0; +} + +int get_msr(int cpu, off_t offset, unsigned long long *msr) { ssize_t retval; - unsigned long long msr; char pathname[32]; int fd; sprintf(pathname, "/dev/cpu/%d/msr", cpu); fd = open(pathname, O_RDONLY); - if (fd < 0) { - perror(pathname); - need_reinitialize = 1; - return 0; - } - - retval = pread(fd, &msr, sizeof msr, offset); - if (retval != sizeof msr) { - fprintf(stderr, "cpu%d pread(..., 0x%zx) = %jd\n", - cpu, offset, retval); - exit(-2); - } + if (fd < 0) + return -1; + retval = pread(fd, msr, sizeof *msr, offset); close(fd); - return msr; + + if (retval != sizeof *msr) + return -1; + + return 0; } void print_header(void) { if (show_pkg) - fprintf(stderr, "pk"); + outp += sprintf(outp, "pk"); + if (show_pkg) + outp += sprintf(outp, " "); if (show_core) - fprintf(stderr, " cr"); + outp += sprintf(outp, "cor"); if (show_cpu) - fprintf(stderr, " CPU"); + outp += sprintf(outp, " CPU"); + if (show_pkg || show_core || show_cpu) + outp += sprintf(outp, " "); if (do_nhm_cstates) - fprintf(stderr, " %%c0 "); + outp += sprintf(outp, " %%c0"); if (has_aperf) - fprintf(stderr, " GHz"); - fprintf(stderr, " TSC"); + outp += sprintf(outp, " GHz"); + outp += sprintf(outp, " TSC"); if (do_nhm_cstates) - fprintf(stderr, " %%c1"); + outp += sprintf(outp, " %%c1"); if (do_nhm_cstates) - fprintf(stderr, " %%c3"); + outp += sprintf(outp, " %%c3"); if (do_nhm_cstates) - fprintf(stderr, " %%c6"); + outp += sprintf(outp, " %%c6"); if (do_snb_cstates) - fprintf(stderr, " %%c7"); + outp += sprintf(outp, " %%c7"); if (do_snb_cstates) - fprintf(stderr, " %%pc2"); + outp += sprintf(outp, " %%pc2"); if (do_nhm_cstates) - fprintf(stderr, " %%pc3"); + outp += sprintf(outp, " %%pc3"); if (do_nhm_cstates) - fprintf(stderr, " %%pc6"); + outp += sprintf(outp, " %%pc6"); if (do_snb_cstates) - fprintf(stderr, " %%pc7"); + outp += sprintf(outp, " %%pc7"); if (extra_msr_offset) - fprintf(stderr, " MSR 0x%x ", extra_msr_offset); + outp += sprintf(outp, " MSR 0x%x ", extra_msr_offset); - putc('\n', stderr); + outp += sprintf(outp, "\n"); } -void dump_cnt(struct counters *cnt) +int dump_counters(struct thread_data *t, struct core_data *c, + struct pkg_data *p) { - fprintf(stderr, "package: %d ", cnt->pkg); - fprintf(stderr, "core:: %d ", cnt->core); - fprintf(stderr, "CPU: %d ", cnt->cpu); - fprintf(stderr, "TSC: %016llX\n", cnt->tsc); - fprintf(stderr, "c3: %016llX\n", cnt->c3); - fprintf(stderr, "c6: %016llX\n", cnt->c6); - fprintf(stderr, "c7: %016llX\n", cnt->c7); - fprintf(stderr, "aperf: %016llX\n", cnt->aperf); - fprintf(stderr, "pc2: %016llX\n", cnt->pc2); - fprintf(stderr, "pc3: %016llX\n", cnt->pc3); - fprintf(stderr, "pc6: %016llX\n", cnt->pc6); - fprintf(stderr, "pc7: %016llX\n", cnt->pc7); - fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, cnt->extra_msr); -} + fprintf(stderr, "t %p, c %p, p %p\n", t, c, p); + + if (t) { + fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); + fprintf(stderr, "TSC: %016llX\n", t->tsc); + fprintf(stderr, "aperf: %016llX\n", t->aperf); + fprintf(stderr, "mperf: %016llX\n", t->mperf); + fprintf(stderr, "c1: %016llX\n", t->c1); + fprintf(stderr, "msr0x%x: %016llX\n", + extra_msr_offset, t->extra_msr); + } -void dump_list(struct counters *cnt) -{ - printf("dump_list 0x%p\n", cnt); + if (c) { + fprintf(stderr, "core: %d\n", c->core_id); + fprintf(stderr, "c3: %016llX\n", c->c3); + fprintf(stderr, "c6: %016llX\n", c->c6); + fprintf(stderr, "c7: %016llX\n", c->c7); + } - for (; cnt; cnt = cnt->next) - dump_cnt(cnt); + if (p) { + fprintf(stderr, "package: %d\n", p->package_id); + fprintf(stderr, "pc2: %016llX\n", p->pc2); + fprintf(stderr, "pc3: %016llX\n", p->pc3); + fprintf(stderr, "pc6: %016llX\n", p->pc6); + fprintf(stderr, "pc7: %016llX\n", p->pc7); + } + return 0; } -void print_cnt(struct counters *p) +/* + * column formatting convention & formats + * package: "pk" 2 columns %2d + * core: "cor" 3 columns %3d + * CPU: "CPU" 3 columns %3d + * GHz: "GHz" 3 columns %3.2 + * TSC: "TSC" 3 columns %3.2 + * percentage " %pc3" %6.2 + */ +int format_counters(struct thread_data *t, struct core_data *c, + struct pkg_data *p) { double interval_float; + /* if showing only 1st thread in core and this isn't one, bail out */ + if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + return 0; + + /* if showing only 1st thread in pkg and this isn't one, bail out */ + if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; - /* topology columns, print blanks on 1st (average) line */ - if (p == cnt_average) { + /* topo columns, print blanks on 1st (average) line */ + if (t == &average.threads) { if (show_pkg) - fprintf(stderr, " "); + outp += sprintf(outp, " "); + if (show_pkg && show_core) + outp += sprintf(outp, " "); if (show_core) - fprintf(stderr, " "); + outp += sprintf(outp, " "); if (show_cpu) - fprintf(stderr, " "); + outp += sprintf(outp, " " " "); } else { - if (show_pkg) - fprintf(stderr, "%d", p->pkg); - if (show_core) - fprintf(stderr, "%4d", p->core); + if (show_pkg) { + if (p) + outp += sprintf(outp, "%2d", p->package_id); + else + outp += sprintf(outp, " "); + } + if (show_pkg && show_core) + outp += sprintf(outp, " "); + if (show_core) { + if (c) + outp += sprintf(outp, "%3d", c->core_id); + else + outp += sprintf(outp, " "); + } if (show_cpu) - fprintf(stderr, "%4d", p->cpu); + outp += sprintf(outp, " %3d", t->cpu_id); } /* %c0 */ if (do_nhm_cstates) { + if (show_pkg || show_core || show_cpu) + outp += sprintf(outp, " "); if (!skip_c0) - fprintf(stderr, "%7.2f", 100.0 * p->mperf/p->tsc); + outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc); else - fprintf(stderr, " ****"); + outp += sprintf(outp, " ****"); } /* GHz */ if (has_aperf) { if (!aperf_mperf_unstable) { - fprintf(stderr, "%5.2f", - 1.0 * p->tsc / units * p->aperf / - p->mperf / interval_float); + outp += sprintf(outp, " %3.2f", + 1.0 * t->tsc / units * t->aperf / + t->mperf / interval_float); } else { - if (p->aperf > p->tsc || p->mperf > p->tsc) { - fprintf(stderr, " ****"); + if (t->aperf > t->tsc || t->mperf > t->tsc) { + outp += sprintf(outp, " ***"); } else { - fprintf(stderr, "%4.1f*", - 1.0 * p->tsc / - units * p->aperf / - p->mperf / interval_float); + outp += sprintf(outp, "%3.1f*", + 1.0 * t->tsc / + units * t->aperf / + t->mperf / interval_float); } } } /* TSC */ - fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float); + outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); if (do_nhm_cstates) { if (!skip_c1) - fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc); + outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc); else - fprintf(stderr, " ****"); + outp += sprintf(outp, " ****"); } + + /* print per-core data only for 1st thread in core */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + goto done; + if (do_nhm_cstates) - fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc); + outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc); if (do_nhm_cstates) - fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc); + outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc); if (do_snb_cstates) - fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); + outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); + + /* print per-package data only for 1st core in package */ + if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + goto done; + if (do_snb_cstates) - fprintf(stderr, " %5.2f", 100.0 * p->pc2/p->tsc); + outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); if (do_nhm_cstates) - fprintf(stderr, " %5.2f", 100.0 * p->pc3/p->tsc); + outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc); if (do_nhm_cstates) - fprintf(stderr, " %5.2f", 100.0 * p->pc6/p->tsc); + outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); if (do_snb_cstates) - fprintf(stderr, " %5.2f", 100.0 * p->pc7/p->tsc); + outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); +done: if (extra_msr_offset) - fprintf(stderr, " 0x%016llx", p->extra_msr); - putc('\n', stderr); + outp += sprintf(outp, " 0x%016llx", t->extra_msr); + outp += sprintf(outp, "\n"); + + return 0; } -void print_counters(struct counters *counters) +void flush_stdout() +{ + fputs(output_buffer, stdout); + outp = output_buffer; +} +void flush_stderr() { - struct counters *cnt; + fputs(output_buffer, stderr); + outp = output_buffer; +} +void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + static int printed; - print_header(); + if (!printed || !summary_only) + print_header(); - if (num_cpus > 1) - print_cnt(cnt_average); + if (topo.num_cpus > 1) + format_counters(&average.threads, &average.cores, + &average.packages); - for (cnt = counters; cnt != NULL; cnt = cnt->next) - print_cnt(cnt); + printed = 1; + if (summary_only) + return; + + for_all_cpus(format_counters, t, c, p); } -#define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after)) +void +delta_package(struct pkg_data *new, struct pkg_data *old) +{ + old->pc2 = new->pc2 - old->pc2; + old->pc3 = new->pc3 - old->pc3; + old->pc6 = new->pc6 - old->pc6; + old->pc7 = new->pc7 - old->pc7; +} -int compute_delta(struct counters *after, - struct counters *before, struct counters *delta) +void +delta_core(struct core_data *new, struct core_data *old) { - int errors = 0; - int perf_err = 0; + old->c3 = new->c3 - old->c3; + old->c6 = new->c6 - old->c6; + old->c7 = new->c7 - old->c7; +} - skip_c0 = skip_c1 = 0; +/* + * old = new - old + */ +void +delta_thread(struct thread_data *new, struct thread_data *old, + struct core_data *core_delta) +{ + old->tsc = new->tsc - old->tsc; + + /* check for TSC < 1 Mcycles over interval */ + if (old->tsc < (1000 * 1000)) { + fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n"); + fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n"); + fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n"); + exit(-3); + } - for ( ; after && before && delta; - after = after->next, before = before->next, delta = delta->next) { - if (before->cpu != after->cpu) { - printf("cpu configuration changed: %d != %d\n", - before->cpu, after->cpu); - return -1; - } + old->c1 = new->c1 - old->c1; - if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) { - fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n", - before->cpu, before->tsc, after->tsc); - errors++; - } - /* check for TSC < 1 Mcycles over interval */ - if (delta->tsc < (1000 * 1000)) { - fprintf(stderr, "Insanely slow TSC rate," - " TSC stops in idle?\n"); - fprintf(stderr, "You can disable all c-states" - " by booting with \"idle=poll\"\n"); - fprintf(stderr, "or just the deep ones with" - " \"processor.max_cstate=1\"\n"); - exit(-3); - } - if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) { - fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n", - before->cpu, before->c3, after->c3); - errors++; - } - if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) { - fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n", - before->cpu, before->c6, after->c6); - errors++; - } - if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) { - fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n", - before->cpu, before->c7, after->c7); - errors++; - } - if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) { - fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n", - before->cpu, before->pc2, after->pc2); - errors++; - } - if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) { - fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n", - before->cpu, before->pc3, after->pc3); - errors++; - } - if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) { - fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n", - before->cpu, before->pc6, after->pc6); - errors++; - } - if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) { - fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n", - before->cpu, before->pc7, after->pc7); - errors++; - } + if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { + old->aperf = new->aperf - old->aperf; + old->mperf = new->mperf - old->mperf; + } else { - perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf); - if (perf_err) { - fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n", - before->cpu, before->aperf, after->aperf); - } - perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf); - if (perf_err) { - fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n", - before->cpu, before->mperf, after->mperf); - } - if (perf_err) { - if (!aperf_mperf_unstable) { - fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); - fprintf(stderr, "* Frequency results do not cover entire interval *\n"); - fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); + if (!aperf_mperf_unstable) { + fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); + fprintf(stderr, "* Frequency results do not cover entire interval *\n"); + fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); - aperf_mperf_unstable = 1; - } - /* - * mperf delta is likely a huge "positive" number - * can not use it for calculating c0 time - */ - skip_c0 = 1; - skip_c1 = 1; + aperf_mperf_unstable = 1; } - /* - * As mperf and tsc collection are not atomic, - * it is possible for mperf's non-halted cycles - * to exceed TSC's all cycles: show c1 = 0% in that case. + * mperf delta is likely a huge "positive" number + * can not use it for calculating c0 time */ - if (delta->mperf > delta->tsc) - delta->c1 = 0; - else /* normal case, derive c1 */ - delta->c1 = delta->tsc - delta->mperf - - delta->c3 - delta->c6 - delta->c7; + skip_c0 = 1; + skip_c1 = 1; + } - if (delta->mperf == 0) - delta->mperf = 1; /* divide by 0 protection */ - /* - * for "extra msr", just copy the latest w/o subtracting - */ - delta->extra_msr = after->extra_msr; - if (errors) { - fprintf(stderr, "ERROR cpu%d before:\n", before->cpu); - dump_cnt(before); - fprintf(stderr, "ERROR cpu%d after:\n", before->cpu); - dump_cnt(after); - errors = 0; - } + /* + * As counter collection is not atomic, + * it is possible for mperf's non-halted cycles + idle states + * to exceed TSC's all cycles: show c1 = 0% in that case. + */ + if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc) + old->c1 = 0; + else { + /* normal case, derive c1 */ + old->c1 = old->tsc - old->mperf - core_delta->c3 + - core_delta->c6 - core_delta->c7; } + + if (old->mperf == 0) { + if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); + old->mperf = 1; /* divide by 0 protection */ + } + + /* + * for "extra msr", just copy the latest w/o subtracting + */ + old->extra_msr = new->extra_msr; +} + +int delta_cpu(struct thread_data *t, struct core_data *c, + struct pkg_data *p, struct thread_data *t2, + struct core_data *c2, struct pkg_data *p2) +{ + /* calculate core delta only for 1st thread in core */ + if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) + delta_core(c, c2); + + /* always calculate thread delta */ + delta_thread(t, t2, c2); /* c2 is core delta */ + + /* calculate package delta only for 1st core in package */ + if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) + delta_package(p, p2); + return 0; } -void compute_average(struct counters *delta, struct counters *avg) +void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + t->tsc = 0; + t->aperf = 0; + t->mperf = 0; + t->c1 = 0; + + /* tells format_counters to dump all fields from this set */ + t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; + + c->c3 = 0; + c->c6 = 0; + c->c7 = 0; + + p->pc2 = 0; + p->pc3 = 0; + p->pc6 = 0; + p->pc7 = 0; +} +int sum_counters(struct thread_data *t, struct core_data *c, + struct pkg_data *p) +{ + average.threads.tsc += t->tsc; + average.threads.aperf += t->aperf; + average.threads.mperf += t->mperf; + average.threads.c1 += t->c1; + + /* sum per-core values only for 1st thread in core */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + return 0; + + average.cores.c3 += c->c3; + average.cores.c6 += c->c6; + average.cores.c7 += c->c7; + + /* sum per-pkg values only for 1st core in pkg */ + if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + average.packages.pc2 += p->pc2; + average.packages.pc3 += p->pc3; + average.packages.pc6 += p->pc6; + average.packages.pc7 += p->pc7; + + return 0; +} +/* + * sum the counters for all cpus in the system + * compute the weighted average + */ +void compute_average(struct thread_data *t, struct core_data *c, + struct pkg_data *p) { - struct counters *sum; + clear_counters(&average.threads, &average.cores, &average.packages); - sum = calloc(1, sizeof(struct counters)); - if (sum == NULL) { - perror("calloc sum"); - exit(1); - } + for_all_cpus(sum_counters, t, c, p); - for (; delta; delta = delta->next) { - sum->tsc += delta->tsc; - sum->c1 += delta->c1; - sum->c3 += delta->c3; - sum->c6 += delta->c6; - sum->c7 += delta->c7; - sum->aperf += delta->aperf; - sum->mperf += delta->mperf; - sum->pc2 += delta->pc2; - sum->pc3 += delta->pc3; - sum->pc6 += delta->pc6; - sum->pc7 += delta->pc7; - } - avg->tsc = sum->tsc/num_cpus; - avg->c1 = sum->c1/num_cpus; - avg->c3 = sum->c3/num_cpus; - avg->c6 = sum->c6/num_cpus; - avg->c7 = sum->c7/num_cpus; - avg->aperf = sum->aperf/num_cpus; - avg->mperf = sum->mperf/num_cpus; - avg->pc2 = sum->pc2/num_cpus; - avg->pc3 = sum->pc3/num_cpus; - avg->pc6 = sum->pc6/num_cpus; - avg->pc7 = sum->pc7/num_cpus; - - free(sum); + average.threads.tsc /= topo.num_cpus; + average.threads.aperf /= topo.num_cpus; + average.threads.mperf /= topo.num_cpus; + average.threads.c1 /= topo.num_cpus; + + average.cores.c3 /= topo.num_cores; + average.cores.c6 /= topo.num_cores; + average.cores.c7 /= topo.num_cores; + + average.packages.pc2 /= topo.num_packages; + average.packages.pc3 /= topo.num_packages; + average.packages.pc6 /= topo.num_packages; + average.packages.pc7 /= topo.num_packages; } -void get_counters(struct counters *cnt) +static unsigned long long rdtsc(void) { - for ( ; cnt; cnt = cnt->next) { - cnt->tsc = get_msr(cnt->cpu, MSR_TSC); - if (do_nhm_cstates) - cnt->c3 = get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY); - if (do_nhm_cstates) - cnt->c6 = get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY); - if (do_snb_cstates) - cnt->c7 = get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY); - if (has_aperf) - cnt->aperf = get_msr(cnt->cpu, MSR_APERF); - if (has_aperf) - cnt->mperf = get_msr(cnt->cpu, MSR_MPERF); - if (do_snb_cstates) - cnt->pc2 = get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY); - if (do_nhm_cstates) - cnt->pc3 = get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY); - if (do_nhm_cstates) - cnt->pc6 = get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY); - if (do_snb_cstates) - cnt->pc7 = get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY); - if (extra_msr_offset) - cnt->extra_msr = get_msr(cnt->cpu, extra_msr_offset); + unsigned int low, high; + + asm volatile("rdtsc" : "=a" (low), "=d" (high)); + + return low | ((unsigned long long)high) << 32; +} + + +/* + * get_counters(...) + * migrate to cpu + * acquire and record local counters for that cpu + */ +int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + int cpu = t->cpu_id; + + if (cpu_migrate(cpu)) + return -1; + + t->tsc = rdtsc(); /* we are running on local CPU of interest */ + + if (has_aperf) { + if (get_msr(cpu, MSR_APERF, &t->aperf)) + return -3; + if (get_msr(cpu, MSR_MPERF, &t->mperf)) + return -4; } + + if (extra_msr_offset) + if (get_msr(cpu, extra_msr_offset, &t->extra_msr)) + return -5; + + /* collect core counters only for 1st thread in core */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + return 0; + + if (do_nhm_cstates) { + if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) + return -6; + if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) + return -7; + } + + if (do_snb_cstates) + if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) + return -8; + + /* collect package counters only for 1st core in package */ + if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + if (do_nhm_cstates) { + if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) + return -9; + if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) + return -10; + } + if (do_snb_cstates) { + if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) + return -11; + if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) + return -12; + } + return 0; } -void print_nehalem_info(void) +void print_verbose_header(void) { unsigned long long msr; unsigned int ratio; @@ -473,7 +675,7 @@ void print_nehalem_info(void) if (!do_nehalem_platform_info) return; - msr = get_msr(0, MSR_NEHALEM_PLATFORM_INFO); + get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); ratio = (msr >> 40) & 0xFF; fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", @@ -489,7 +691,7 @@ void print_nehalem_info(void) if (!do_nehalem_turbo_ratio_limit) return; - msr = get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT); + get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); ratio = (msr >> 24) & 0xFF; if (ratio) @@ -513,140 +715,82 @@ void print_nehalem_info(void) } -void free_counter_list(struct counters *list) +void free_all_buffers(void) { - struct counters *p; + CPU_FREE(cpu_present_set); + cpu_present_set = NULL; + cpu_present_set = 0; - for (p = list; p; ) { - struct counters *free_me; + CPU_FREE(cpu_affinity_set); + cpu_affinity_set = NULL; + cpu_affinity_setsize = 0; - free_me = p; - p = p->next; - free(free_me); - } -} + free(thread_even); + free(core_even); + free(package_even); -void free_all_counters(void) -{ - free_counter_list(cnt_even); - cnt_even = NULL; + thread_even = NULL; + core_even = NULL; + package_even = NULL; - free_counter_list(cnt_odd); - cnt_odd = NULL; + free(thread_odd); + free(core_odd); + free(package_odd); - free_counter_list(cnt_delta); - cnt_delta = NULL; + thread_odd = NULL; + core_odd = NULL; + package_odd = NULL; - free_counter_list(cnt_average); - cnt_average = NULL; + free(output_buffer); + output_buffer = NULL; + outp = NULL; } -void insert_counters(struct counters **list, - struct counters *new) +/* + * cpu_is_first_sibling_in_core(cpu) + * return 1 if given CPU is 1st HT sibling in the core + */ +int cpu_is_first_sibling_in_core(int cpu) { - struct counters *prev; - - /* - * list was empty - */ - if (*list == NULL) { - new->next = *list; - *list = new; - return; - } - - show_cpu = 1; /* there is more than one CPU */ - - /* - * insert on front of list. - * It is sorted by ascending package#, core#, cpu# - */ - if (((*list)->pkg > new->pkg) || - (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) || - (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) { - new->next = *list; - *list = new; - return; - } - - prev = *list; - - while (prev->next && (prev->next->pkg < new->pkg)) { - prev = prev->next; - show_pkg = 1; /* there is more than 1 package */ - } - - while (prev->next && (prev->next->pkg == new->pkg) - && (prev->next->core < new->core)) { - prev = prev->next; - show_core = 1; /* there is more than 1 core */ - } + char path[64]; + FILE *filep; + int first_cpu; - while (prev->next && (prev->next->pkg == new->pkg) - && (prev->next->core == new->core) - && (prev->next->cpu < new->cpu)) { - prev = prev->next; + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); + filep = fopen(path, "r"); + if (filep == NULL) { + perror(path); + exit(1); } - - /* - * insert after "prev" - */ - new->next = prev->next; - prev->next = new; + fscanf(filep, "%d", &first_cpu); + fclose(filep); + return (cpu == first_cpu); } -void alloc_new_counters(int pkg, int core, int cpu) +/* + * cpu_is_first_core_in_package(cpu) + * return 1 if given CPU is 1st core in package + */ +int cpu_is_first_core_in_package(int cpu) { - struct counters *new; - - if (verbose > 1) - printf("pkg%d core%d, cpu%d\n", pkg, core, cpu); - - new = (struct counters *)calloc(1, sizeof(struct counters)); - if (new == NULL) { - perror("calloc"); - exit(1); - } - new->pkg = pkg; - new->core = core; - new->cpu = cpu; - insert_counters(&cnt_odd, new); - - new = (struct counters *)calloc(1, - sizeof(struct counters)); - if (new == NULL) { - perror("calloc"); - exit(1); - } - new->pkg = pkg; - new->core = core; - new->cpu = cpu; - insert_counters(&cnt_even, new); - - new = (struct counters *)calloc(1, sizeof(struct counters)); - if (new == NULL) { - perror("calloc"); - exit(1); - } - new->pkg = pkg; - new->core = core; - new->cpu = cpu; - insert_counters(&cnt_delta, new); + char path[64]; + FILE *filep; + int first_cpu; - new = (struct counters *)calloc(1, sizeof(struct counters)); - if (new == NULL) { - perror("calloc"); + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); + filep = fopen(path, "r"); + if (filep == NULL) { + perror(path); exit(1); } - new->pkg = pkg; - new->core = core; - new->cpu = cpu; - cnt_average = new; + fscanf(filep, "%d", &first_cpu); + fclose(filep); + return (cpu == first_cpu); } int get_physical_package_id(int cpu) { - char path[64]; + char path[80]; FILE *filep; int pkg; @@ -663,7 +807,7 @@ int get_physical_package_id(int cpu) int get_core_id(int cpu) { - char path[64]; + char path[80]; FILE *filep; int core; @@ -678,14 +822,87 @@ int get_core_id(int cpu) return core; } +int get_num_ht_siblings(int cpu) +{ + char path[80]; + FILE *filep; + int sib1, sib2; + int matches; + char character; + + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); + filep = fopen(path, "r"); + if (filep == NULL) { + perror(path); + exit(1); + } + /* + * file format: + * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) + * otherwinse 1 sibling (self). + */ + matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); + + fclose(filep); + + if (matches == 3) + return 2; + else + return 1; +} + /* - * run func(index, cpu) on every cpu in /proc/stat + * run func(thread, core, package) in topology order + * skip non-present cpus */ -int for_all_cpus(void (func)(int, int, int)) +int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, + struct pkg_data *, struct thread_data *, struct core_data *, + struct pkg_data *), struct thread_data *thread_base, + struct core_data *core_base, struct pkg_data *pkg_base, + struct thread_data *thread_base2, struct core_data *core_base2, + struct pkg_data *pkg_base2) +{ + int retval, pkg_no, core_no, thread_no; + + for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { + for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { + for (thread_no = 0; thread_no < + topo.num_threads_per_core; ++thread_no) { + struct thread_data *t, *t2; + struct core_data *c, *c2; + struct pkg_data *p, *p2; + + t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); + + if (cpu_is_not_present(t->cpu_id)) + continue; + + t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); + + c = GET_CORE(core_base, core_no, pkg_no); + c2 = GET_CORE(core_base2, core_no, pkg_no); + + p = GET_PKG(pkg_base, pkg_no); + p2 = GET_PKG(pkg_base2, pkg_no); + + retval = func(t, c, p, t2, c2, p2); + if (retval) + return retval; + } + } + } + return 0; +} + +/* + * run func(cpu) on every cpu in /proc/stat + * return max_cpu number + */ +int for_all_proc_cpus(int (func)(int)) { FILE *fp; - int cpu_count; + int cpu_num; int retval; fp = fopen(proc_stat, "r"); @@ -700,77 +917,88 @@ int for_all_cpus(void (func)(int, int, int)) exit(1); } - for (cpu_count = 0; ; cpu_count++) { - int cpu; - - retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu); + while (1) { + retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); if (retval != 1) break; - func(get_physical_package_id(cpu), get_core_id(cpu), cpu); + retval = func(cpu_num); + if (retval) { + fclose(fp); + return(retval); + } } fclose(fp); - return cpu_count; + return 0; } void re_initialize(void) { - printf("turbostat: topology changed, re-initializing.\n"); - free_all_counters(); - num_cpus = for_all_cpus(alloc_new_counters); - need_reinitialize = 0; - printf("num_cpus is now %d\n", num_cpus); + free_all_buffers(); + setup_all_buffers(); + printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); } -void dummy(int pkg, int core, int cpu) { return; } + /* - * check to see if a cpu came on-line + * count_cpus() + * remember the last one seen, it will be the max */ -void verify_num_cpus(void) +int count_cpus(int cpu) { - int new_num_cpus; + if (topo.max_cpu_num < cpu) + topo.max_cpu_num = cpu; - new_num_cpus = for_all_cpus(dummy); - - if (new_num_cpus != num_cpus) { - if (verbose) - printf("num_cpus was %d, is now %d\n", - num_cpus, new_num_cpus); - need_reinitialize = 1; - } + topo.num_cpus += 1; + return 0; +} +int mark_cpu_present(int cpu) +{ + CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); + return 0; } void turbostat_loop() { + int retval; + restart: - get_counters(cnt_even); + retval = for_all_cpus(get_counters, EVEN_COUNTERS); + if (retval) { + re_initialize(); + goto restart; + } gettimeofday(&tv_even, (struct timezone *)NULL); while (1) { - verify_num_cpus(); - if (need_reinitialize) { + if (for_all_proc_cpus(cpu_is_not_present)) { re_initialize(); goto restart; } sleep(interval_sec); - get_counters(cnt_odd); + retval = for_all_cpus(get_counters, ODD_COUNTERS); + if (retval) { + re_initialize(); + goto restart; + } gettimeofday(&tv_odd, (struct timezone *)NULL); - - compute_delta(cnt_odd, cnt_even, cnt_delta); timersub(&tv_odd, &tv_even, &tv_delta); - compute_average(cnt_delta, cnt_average); - print_counters(cnt_delta); - if (need_reinitialize) { + for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); + compute_average(EVEN_COUNTERS); + format_all_counters(EVEN_COUNTERS); + flush_stdout(); + sleep(interval_sec); + retval = for_all_cpus(get_counters, EVEN_COUNTERS); + if (retval) { re_initialize(); goto restart; } - sleep(interval_sec); - get_counters(cnt_even); gettimeofday(&tv_even, (struct timezone *)NULL); - compute_delta(cnt_even, cnt_odd, cnt_delta); timersub(&tv_even, &tv_odd, &tv_delta); - compute_average(cnt_delta, cnt_average); - print_counters(cnt_delta); + for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); + compute_average(ODD_COUNTERS); + format_all_counters(ODD_COUNTERS); + flush_stdout(); } } @@ -809,6 +1037,8 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) case 0x2C: /* Westmere EP - Gulftown */ case 0x2A: /* SNB */ case 0x2D: /* SNB Xeon */ + case 0x3A: /* IVB */ + case 0x3D: /* IVB Xeon */ return 1; case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */ @@ -825,6 +1055,8 @@ int is_snb(unsigned int family, unsigned int model) switch (model) { case 0x2A: case 0x2D: + case 0x3A: /* IVB */ + case 0x3D: /* IVB Xeon */ return 1; } return 0; @@ -941,6 +1173,208 @@ int open_dev_cpu_msr(int dummy1) return 0; } +void topology_probe() +{ + int i; + int max_core_id = 0; + int max_package_id = 0; + int max_siblings = 0; + struct cpu_topology { + int core_id; + int physical_package_id; + } *cpus; + + /* Initialize num_cpus, max_cpu_num */ + topo.num_cpus = 0; + topo.max_cpu_num = 0; + for_all_proc_cpus(count_cpus); + if (!summary_only && topo.num_cpus > 1) + show_cpu = 1; + + if (verbose > 1) + fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); + + cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); + if (cpus == NULL) { + perror("calloc cpus"); + exit(1); + } + + /* + * Allocate and initialize cpu_present_set + */ + cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); + if (cpu_present_set == NULL) { + perror("CPU_ALLOC"); + exit(3); + } + cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(cpu_present_setsize, cpu_present_set); + for_all_proc_cpus(mark_cpu_present); + + /* + * Allocate and initialize cpu_affinity_set + */ + cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); + if (cpu_affinity_set == NULL) { + perror("CPU_ALLOC"); + exit(3); + } + cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); + + + /* + * For online cpus + * find max_core_id, max_package_id + */ + for (i = 0; i <= topo.max_cpu_num; ++i) { + int siblings; + + if (cpu_is_not_present(i)) { + if (verbose > 1) + fprintf(stderr, "cpu%d NOT PRESENT\n", i); + continue; + } + cpus[i].core_id = get_core_id(i); + if (cpus[i].core_id > max_core_id) + max_core_id = cpus[i].core_id; + + cpus[i].physical_package_id = get_physical_package_id(i); + if (cpus[i].physical_package_id > max_package_id) + max_package_id = cpus[i].physical_package_id; + + siblings = get_num_ht_siblings(i); + if (siblings > max_siblings) + max_siblings = siblings; + if (verbose > 1) + fprintf(stderr, "cpu %d pkg %d core %d\n", + i, cpus[i].physical_package_id, cpus[i].core_id); + } + topo.num_cores_per_pkg = max_core_id + 1; + if (verbose > 1) + fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", + max_core_id, topo.num_cores_per_pkg); + if (!summary_only && topo.num_cores_per_pkg > 1) + show_core = 1; + + topo.num_packages = max_package_id + 1; + if (verbose > 1) + fprintf(stderr, "max_package_id %d, sizing for %d packages\n", + max_package_id, topo.num_packages); + if (!summary_only && topo.num_packages > 1) + show_pkg = 1; + + topo.num_threads_per_core = max_siblings; + if (verbose > 1) + fprintf(stderr, "max_siblings %d\n", max_siblings); + + free(cpus); +} + +void +allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) +{ + int i; + + *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * + topo.num_packages, sizeof(struct thread_data)); + if (*t == NULL) + goto error; + + for (i = 0; i < topo.num_threads_per_core * + topo.num_cores_per_pkg * topo.num_packages; i++) + (*t)[i].cpu_id = -1; + + *c = calloc(topo.num_cores_per_pkg * topo.num_packages, + sizeof(struct core_data)); + if (*c == NULL) + goto error; + + for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) + (*c)[i].core_id = -1; + + *p = calloc(topo.num_packages, sizeof(struct pkg_data)); + if (*p == NULL) + goto error; + + for (i = 0; i < topo.num_packages; i++) + (*p)[i].package_id = i; + + return; +error: + perror("calloc counters"); + exit(1); +} +/* + * init_counter() + * + * set cpu_id, core_num, pkg_num + * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE + * + * increment topo.num_cores when 1st core in pkg seen + */ +void init_counter(struct thread_data *thread_base, struct core_data *core_base, + struct pkg_data *pkg_base, int thread_num, int core_num, + int pkg_num, int cpu_id) +{ + struct thread_data *t; + struct core_data *c; + struct pkg_data *p; + + t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); + c = GET_CORE(core_base, core_num, pkg_num); + p = GET_PKG(pkg_base, pkg_num); + + t->cpu_id = cpu_id; + if (thread_num == 0) { + t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; + if (cpu_is_first_core_in_package(cpu_id)) + t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; + } + + c->core_id = core_num; + p->package_id = pkg_num; +} + + +int initialize_counters(int cpu_id) +{ + int my_thread_id, my_core_id, my_package_id; + + my_package_id = get_physical_package_id(cpu_id); + my_core_id = get_core_id(cpu_id); + + if (cpu_is_first_sibling_in_core(cpu_id)) { + my_thread_id = 0; + topo.num_cores++; + } else { + my_thread_id = 1; + } + + init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); + init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); + return 0; +} + +void allocate_output_buffer() +{ + output_buffer = calloc(1, (1 + topo.num_cpus) * 128); + outp = output_buffer; + if (outp == NULL) { + perror("calloc"); + exit(-1); + } +} + +void setup_all_buffers(void) +{ + topology_probe(); + allocate_counters(&thread_even, &core_even, &package_even); + allocate_counters(&thread_odd, &core_odd, &package_odd); + allocate_output_buffer(); + for_all_proc_cpus(initialize_counters); +} void turbostat_init() { check_cpuid(); @@ -948,17 +1382,19 @@ void turbostat_init() check_dev_msr(); check_super_user(); - num_cpus = for_all_cpus(alloc_new_counters); + setup_all_buffers(); if (verbose) - print_nehalem_info(); + print_verbose_header(); } int fork_it(char **argv) { - int retval; pid_t child_pid; - get_counters(cnt_even); + + for_all_cpus(get_counters, EVEN_COUNTERS); + /* clear affinity side-effect of get_counters() */ + sched_setaffinity(0, cpu_present_setsize, cpu_present_set); gettimeofday(&tv_even, (struct timezone *)NULL); child_pid = fork(); @@ -981,14 +1417,17 @@ int fork_it(char **argv) exit(1); } } - get_counters(cnt_odd); + /* + * n.b. fork_it() does not check for errors from for_all_cpus() + * because re-starting is problematic when forking + */ + for_all_cpus(get_counters, ODD_COUNTERS); gettimeofday(&tv_odd, (struct timezone *)NULL); - retval = compute_delta(cnt_odd, cnt_even, cnt_delta); - timersub(&tv_odd, &tv_even, &tv_delta); - compute_average(cnt_delta, cnt_average); - if (!retval) - print_counters(cnt_delta); + for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); + compute_average(EVEN_COUNTERS); + format_all_counters(EVEN_COUNTERS); + flush_stderr(); fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); @@ -1001,8 +1440,17 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt(argc, argv, "+vi:M:")) != -1) { + while ((opt = getopt(argc, argv, "+cpsvi:M:")) != -1) { switch (opt) { + case 'c': + show_core_only++; + break; + case 'p': + show_pkg_only++; + break; + case 's': + summary_only++; + break; case 'v': verbose++; break; @@ -1025,10 +1473,8 @@ int main(int argc, char **argv) cmdline(argc, argv); if (verbose > 1) - fprintf(stderr, "turbostat Dec 6, 2010" + fprintf(stderr, "turbostat v2.0 May 16, 2012" " - Len Brown <lenb@kernel.org>\n"); - if (verbose > 1) - fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n"); turbostat_init(); diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include new file mode 100644 index 000000000000..bde8521d56bb --- /dev/null +++ b/tools/scripts/Makefile.include @@ -0,0 +1,58 @@ +ifeq ("$(origin O)", "command line") + OUTPUT := $(O)/ + COMMAND_O := O=$(O) +endif + +ifneq ($(OUTPUT),) +# check that the output directory actually exists +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) +$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) +endif + +# +# Include saner warnings here, which can catch bugs: +# +EXTRA_WARNINGS := -Wbad-function-cast +EXTRA_WARNINGS += -Wdeclaration-after-statement +EXTRA_WARNINGS += -Wformat-security +EXTRA_WARNINGS += -Wformat-y2k +EXTRA_WARNINGS += -Winit-self +EXTRA_WARNINGS += -Wmissing-declarations +EXTRA_WARNINGS += -Wmissing-prototypes +EXTRA_WARNINGS += -Wnested-externs +EXTRA_WARNINGS += -Wno-system-headers +EXTRA_WARNINGS += -Wold-style-definition +EXTRA_WARNINGS += -Wpacked +EXTRA_WARNINGS += -Wredundant-decls +EXTRA_WARNINGS += -Wshadow +EXTRA_WARNINGS += -Wstrict-aliasing=3 +EXTRA_WARNINGS += -Wstrict-prototypes +EXTRA_WARNINGS += -Wswitch-default +EXTRA_WARNINGS += -Wswitch-enum +EXTRA_WARNINGS += -Wundef +EXTRA_WARNINGS += -Wwrite-strings +EXTRA_WARNINGS += -Wformat + +ifneq ($(findstring $(MAKEFLAGS), w),w) +PRINT_DIR = --no-print-directory +else +NO_SUBDIR = : +endif + +QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir +QUIET_SUBDIR1 = + +ifneq ($(findstring $(MAKEFLAGS),s),s) +ifndef V + QUIET_CC = @echo ' ' CC $@; + QUIET_AR = @echo ' ' AR $@; + QUIET_LINK = @echo ' ' LINK $@; + QUIET_MKDIR = @echo ' ' MKDIR $@; + QUIET_GEN = @echo ' ' GEN $@; + QUIET_SUBDIR0 = +@subdir= + QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ + $(MAKE) $(PRINT_DIR) -C $$subdir + QUIET_FLEX = @echo ' ' FLEX $@; + QUIET_BISON = @echo ' ' BISON $@; +endif +endif diff --git a/tools/testing/fault-injection/failcmd.sh b/tools/testing/fault-injection/failcmd.sh new file mode 100644 index 000000000000..78a9ed7fecdb --- /dev/null +++ b/tools/testing/fault-injection/failcmd.sh @@ -0,0 +1,219 @@ +#!/bin/bash +# +# NAME +# failcmd.sh - run a command with injecting slab/page allocation failures +# +# SYNOPSIS +# failcmd.sh --help +# failcmd.sh [<options>] command [arguments] +# +# DESCRIPTION +# Run command with injecting slab/page allocation failures by fault +# injection. +# +# NOTE: you need to run this script as root. +# + +usage() +{ + cat >&2 <<EOF +Usage: $0 [options] command [arguments] + +OPTIONS + -p percent + --probability=percent + likelihood of failure injection, in percent. + Default value is 1 + + -t value + --times=value + specifies how many times failures may happen at most. + Default value is 1 + + --oom-kill-allocating-task=value + set /proc/sys/vm/oom_kill_allocating_task to specified value + before running the command. + Default value is 1 + + -h, --help + Display a usage message and exit + + --interval=value, --space=value, --verbose=value, --task-filter=value, + --stacktrace-depth=value, --require-start=value, --require-end=value, + --reject-start=value, --reject-end=value, --ignore-gfp-wait=value + See Documentation/fault-injection/fault-injection.txt for more + information + + failslab options: + --cache-filter=value + + fail_page_alloc options: + --ignore-gfp-highmem=value, --min-order=value + +ENVIRONMENT + FAILCMD_TYPE + The following values for FAILCMD_TYPE are recognized: + + failslab + inject slab allocation failures + fail_page_alloc + inject page allocation failures + + If FAILCMD_TYPE is not defined, then failslab is used. +EOF +} + +if [ $UID != 0 ]; then + echo must be run as root >&2 + exit 1 +fi + +DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3}'` + +if [ ! -d "$DEBUGFS" ]; then + echo debugfs is not mounted >&2 + exit 1 +fi + +FAILCMD_TYPE=${FAILCMD_TYPE:-failslab} +FAULTATTR=$DEBUGFS/$FAILCMD_TYPE + +if [ ! -d $FAULTATTR ]; then + echo $FAILCMD_TYPE is not available >&2 + exit 1 +fi + +LONGOPTS=probability:,interval:,times:,space:,verbose:,task-filter: +LONGOPTS=$LONGOPTS,stacktrace-depth:,require-start:,require-end: +LONGOPTS=$LONGOPTS,reject-start:,reject-end:,oom-kill-allocating-task:,help + +if [ $FAILCMD_TYPE = failslab ]; then + LONGOPTS=$LONGOPTS,ignore-gfp-wait:,cache-filter: +elif [ $FAILCMD_TYPE = fail_page_alloc ]; then + LONGOPTS=$LONGOPTS,ignore-gfp-wait:,ignore-gfp-highmem:,min-order: +fi + +TEMP=`getopt -o p:i:t:s:v:h --long $LONGOPTS -n 'failcmd.sh' -- "$@"` + +if [ $? != 0 ]; then + usage + exit 1 +fi + +eval set -- "$TEMP" + +fault_attr_default() +{ + echo N > $FAULTATTR/task-filter + echo 0 > $FAULTATTR/probability + echo 1 > $FAULTATTR/times +} + +fault_attr_default + +oom_kill_allocating_task_saved=`cat /proc/sys/vm/oom_kill_allocating_task` + +restore_values() +{ + fault_attr_default + echo $oom_kill_allocating_task_saved \ + > /proc/sys/vm/oom_kill_allocating_task +} + +# +# Default options +# +declare -i oom_kill_allocating_task=1 +declare task_filter=Y +declare -i probability=1 +declare -i times=1 + +while true; do + case "$1" in + -p|--probability) + probability=$2 + shift 2 + ;; + -i|--interval) + echo $2 > $FAULTATTR/interval + shift 2 + ;; + -t|--times) + times=$2 + shift 2 + ;; + -s|--space) + echo $2 > $FAULTATTR/space + shift 2 + ;; + -v|--verbose) + echo $2 > $FAULTATTR/verbose + shift 2 + ;; + --task-filter) + task_filter=$2 + shift 2 + ;; + --stacktrace-depth) + echo $2 > $FAULTATTR/stacktrace-depth + shift 2 + ;; + --require-start) + echo $2 > $FAULTATTR/require-start + shift 2 + ;; + --require-end) + echo $2 > $FAULTATTR/require-end + shift 2 + ;; + --reject-start) + echo $2 > $FAULTATTR/reject-start + shift 2 + ;; + --reject-end) + echo $2 > $FAULTATTR/reject-end + shift 2 + ;; + --oom-kill-allocating-task) + oom_kill_allocating_task=$2 + shift 2 + ;; + --ignore-gfp-wait) + echo $2 > $FAULTATTR/ignore-gfp-wait + shift 2 + ;; + --cache-filter) + echo $2 > $FAULTATTR/cache_filter + shift 2 + ;; + --ignore-gfp-highmem) + echo $2 > $FAULTATTR/ignore-gfp-highmem + shift 2 + ;; + --min-order) + echo $2 > $FAULTATTR/min-order + shift 2 + ;; + -h|--help) + usage + exit 0 + shift + ;; + --) + shift + break + ;; + esac +done + +[ -z "$1" ] && exit 0 + +echo $oom_kill_allocating_task > /proc/sys/vm/oom_kill_allocating_task +echo $task_filter > $FAULTATTR/task-filter +echo $probability > $FAULTATTR/probability +echo $times > $FAULTATTR/times + +trap "restore_values" SIGINT SIGTERM EXIT + +cmd="echo 1 > /proc/self/make-it-fail && exec $@" +bash -c "$cmd" diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl index 9a571e71683c..a373a5bfff68 100755 --- a/tools/testing/ktest/compare-ktest-sample.pl +++ b/tools/testing/ktest/compare-ktest-sample.pl @@ -2,7 +2,9 @@ open (IN,"ktest.pl"); while (<IN>) { + # hashes are now used if (/\$opt\{"?([A-Z].*?)(\[.*\])?"?\}/ || + /^\s*"?([A-Z].*?)"?\s*=>\s*/ || /set_test_option\("(.*?)"/) { $opt{$1} = 1; } @@ -11,7 +13,7 @@ close IN; open (IN, "sample.conf"); while (<IN>) { - if (/^\s*#?\s*(\S+)\s*=/) { + if (/^\s*#?\s*([A-Z]\S*)\s*=/) { $samp{$1} = 1; } } diff --git a/tools/testing/ktest/examples/README b/tools/testing/ktest/examples/README new file mode 100644 index 000000000000..a12d295a09d8 --- /dev/null +++ b/tools/testing/ktest/examples/README @@ -0,0 +1,32 @@ +This directory contains example configs to use ktest for various tasks. +The configs still need to be customized for your environment, but it +is broken up by task which makes it easier to understand how to set up +ktest. + +The configs are based off of real working configs but have been modified +and commented to show more generic use cases that are more helpful for +developers. + +crosstests.conf - this config shows an example of testing a git repo against + lots of different architectures. It only does build tests, but makes + it easy to compile test different archs. You can download the arch + cross compilers from: + http://kernel.org/pub/tools/crosstool/files/bin/x86_64/ + +test.conf - A generic example of a config. This is based on an actual config + used to perform real testing. + +kvm.conf - A example of a config that is used to test a virtual guest running + on a host. + +snowball.conf - An example config that was used to demo ktest.pl against + a snowball ARM board. + +include/ - The include directory holds default configs that can be + included into other configs. This is a real use example that shows how + to reuse configs for various machines or set ups. The files here + are included by other config files, where the other config files define + options and variables that will make the included config work for the + given environment. + + diff --git a/tools/testing/ktest/examples/crosstests.conf b/tools/testing/ktest/examples/crosstests.conf new file mode 100644 index 000000000000..46736604c26c --- /dev/null +++ b/tools/testing/ktest/examples/crosstests.conf @@ -0,0 +1,260 @@ +# +# Example config for cross compiling +# +# In this config, it is expected that the tool chains from: +# +# http://kernel.org/pub/tools/crosstool/files/bin/x86_64/ +# +# running on a x86_64 system have been downloaded and installed into: +# +# /usr/local/ +# +# such that the compiler binaries are something like: +# +# /usr/local/gcc-4.5.2-nolibc/mips-linux/bin/mips-linux-gcc +# +# Some of the archs will use gcc-4.5.1 instead of gcc-4.5.2 +# this config uses variables to differentiate them. +# +# Comments describe some of the options, but full descriptions of +# options are described in the samples.conf file. + +# ${PWD} is defined by ktest.pl to be the directory that the user +# was in when they executed ktest.pl. It may be better to hardcode the +# path name here. THIS_DIR is the variable used through out the config file +# in case you want to change it. + +THIS_DIR := ${PWD} + +# Update the BUILD_DIR option to the location of your git repo you want to test. +BUILD_DIR = ${THIS_DIR}/linux.git + +# The build will go into this directory. It will be created when you run the test. +OUTPUT_DIR = ${THIS_DIR}/cross-compile + +# The build will be compiled with -j8 +BUILD_OPTIONS = -j8 + +# The test will not stop when it hits a failure. +DIE_ON_FAILURE = 0 + +# If you want to have ktest.pl store the failure somewhere, uncomment this option +# and change the directory where ktest should store the failures. +#STORE_FAILURES = ${THIS_DIR}/failures + +# The log file is stored in the OUTPUT_DIR called cross.log +# If you enable this, you need to create the OUTPUT_DIR. It wont be created for you. +LOG_FILE = ${OUTPUT_DIR}/cross.log + +# The log file will be cleared each time you run ktest. +CLEAR_LOG = 1 + +# As some archs do not build with the defconfig, they have been marked +# to be ignored. If you want to test them anyway, change DO_FAILED to 1. +# If a test that has been marked as DO_FAILED passes, then you should change +# that test to be DO_DEFAULT + +DO_FAILED := 0 +DO_DEFAULT := 1 + +# By setting both DO_FAILED and DO_DEFAULT to zero, you can pick a single +# arch that you want to test. (uncomment RUN and chose your arch) +#RUN := m32r + +# At the bottom of the config file exists a bisect test. You can update that +# test and set DO_FAILED and DO_DEFAULT to zero, and uncomment this variable +# to run the bisect on the arch. +#RUN := bisect + +# By default all tests will be running gcc 4.5.2. Some tests are using 4.5.1 +# and they select that in the test. +# Note: GCC_VER is declared as on option and not a variable ('=' instead of ':=') +# This is important. A variable is used only in the config file and if it is set +# it stays that way for the rest of the config file until it is change again. +# Here we want GCC_VER to remain persistent and change for each test, as it is used in +# the MAKE_CMD. By using '=' instead of ':=' we achieve our goal. + +GCC_VER = 4.5.2 +MAKE_CMD = PATH=/usr/local/gcc-${GCC_VER}-nolibc/${CROSS}/bin:$PATH CROSS_COMPILE=${CROSS}- make ARCH=${ARCH} + +# all tests are only doing builds. +TEST_TYPE = build + +# If you want to add configs on top of the defconfig, you can add those configs into +# the add-config file and uncomment this option. This is useful if you want to test +# all cross compiles with PREEMPT set, or TRACING on, etc. +#ADD_CONFIG = ${THIS_DIR}/add-config + +# All tests are using defconfig +BUILD_TYPE = defconfig + +# The test names will have the arch and cross compiler used. This will be shown in +# the results. +TEST_NAME = ${ARCH} ${CROSS} + +# alpha +TEST_START IF ${RUN} == alpha || ${DO_DEFAULT} +# Notice that CROSS and ARCH are also options and not variables (again '=' instead +# of ':='). This is because TEST_NAME and MAKE_CMD wil use them for each test. +# Only options are available during runs. Variables are only present in parsing the +# config file. +CROSS = alpha-linux +ARCH = alpha + +# arm +TEST_START IF ${RUN} == arm || ${DO_DEFAULT} +CROSS = arm-unknown-linux-gnueabi +ARCH = arm + +# black fin +TEST_START IF ${RUN} == bfin || ${DO_DEFAULT} +CROSS = bfin-uclinux +ARCH = blackfin +BUILD_OPTIONS = -j8 vmlinux + +# cris - FAILS? +TEST_START IF ${RUN} == cris || ${RUN} == cris64 || ${DO_FAILED} +CROSS = cris-linux +ARCH = cris + +# cris32 - not right arch? +TEST_START IF ${RUN} == cris || ${RUN} == cris32 || ${DO_FAILED} +CROSS = crisv32-linux +ARCH = cris + +# ia64 +TEST_START IF ${RUN} == ia64 || ${DO_DEFAULT} +CROSS = ia64-linux +ARCH = ia64 + +# frv +TEST_START IF ${RUN} == frv || ${DO_FAILED} +CROSS = frv-linux +ARCH = frv +GCC_VER = 4.5.1 + +# h8300 - failed make defconfig?? +TEST_START IF ${RUN} == h8300 || ${DO_FAILED} +CROSS = h8300-elf +ARCH = h8300 +GCC_VER = 4.5.1 + +# m68k fails with error? +TEST_START IF ${RUN} == m68k || ${DO_DEFAULT} +CROSS = m68k-linux +ARCH = m68k + +# mips64 +TEST_START IF ${RUN} == mips || ${RUN} == mips64 || ${DO_DEFAULT} +CROSS = mips64-linux +ARCH = mips + +# mips32 +TEST_START IF ${RUN} == mips || ${RUN} == mips32 || ${DO_DEFAULT} +CROSS = mips-linux +ARCH = mips + +# m32r +TEST_START IF ${RUN} == m32r || ${DO_FAILED} +CROSS = m32r-linux +ARCH = m32r +GCC_VER = 4.5.1 +BUILD_OPTIONS = -j8 vmlinux + +# parisc64 failed? +TEST_START IF ${RUN} == hppa || ${RUN} == hppa64 || ${DO_FAILED} +CROSS = hppa64-linux +ARCH = parisc + +# parisc +TEST_START IF ${RUN} == hppa || ${RUN} == hppa32 || ${DO_FAILED} +CROSS = hppa-linux +ARCH = parisc + +# ppc +TEST_START IF ${RUN} == ppc || ${RUN} == ppc32 || ${DO_DEFAULT} +CROSS = powerpc-linux +ARCH = powerpc + +# ppc64 +TEST_START IF ${RUN} == ppc || ${RUN} == ppc64 || ${DO_DEFAULT} +CROSS = powerpc64-linux +ARCH = powerpc + +# s390 +TEST_START IF ${RUN} == s390 || ${DO_DEFAULT} +CROSS = s390x-linux +ARCH = s390 + +# sh +TEST_START IF ${RUN} == sh || ${DO_DEFAULT} +CROSS = sh4-linux +ARCH = sh + +# sparc64 +TEST_START IF ${RUN} == sparc || ${RUN} == sparc64 || ${DO_DEFAULT} +CROSS = sparc64-linux +ARCH = sparc64 + +# sparc +TEST_START IF ${RUN} == sparc || ${RUN} == sparc32 || ${DO_DEFAULT} +CROSS = sparc-linux +ARCH = sparc + +# xtensa failed +TEST_START IF ${RUN} == xtensa || ${DO_FAILED} +CROSS = xtensa-linux +ARCH = xtensa + +# UML +TEST_START IF ${RUN} == uml || ${DO_DEFAULT} +MAKE_CMD = make ARCH=um SUBARCH=x86_64 +ARCH = uml +CROSS = + +TEST_START IF ${RUN} == x86 || ${RUN} == i386 || ${DO_DEFAULT} +MAKE_CMD = make ARCH=i386 +ARCH = i386 +CROSS = + +TEST_START IF ${RUN} == x86 || ${RUN} == x86_64 || ${DO_DEFAULT} +MAKE_CMD = make ARCH=x86_64 +ARCH = x86_64 +CROSS = + +################################# + +# This is a bisect if needed. You need to give it a MIN_CONFIG that +# will be the config file it uses. Basically, just copy the created defconfig +# for the arch someplace and point MIN_CONFIG to it. +TEST_START IF ${RUN} == bisect +MIN_CONFIG = ${THIS_DIR}/min-config +CROSS = s390x-linux +ARCH = s390 +TEST_TYPE = bisect +BISECT_TYPE = build +BISECT_GOOD = v3.1 +BISECT_BAD = v3.2 +CHECKOUT = v3.2 + +################################# + +# These defaults are needed to keep ktest.pl from complaining. They are +# ignored because the test does not go pass the build. No install or +# booting of the target images. + +DEFAULTS +MACHINE = crosstest +SSH_USER = root +BUILD_TARGET = cross +TARGET_IMAGE = image +POWER_CYCLE = cycle +CONSOLE = console +LOCALVERSION = version +GRUB_MENU = grub + +REBOOT_ON_ERROR = 0 +POWEROFF_ON_ERROR = 0 +POWEROFF_ON_SUCCESS = 0 +REBOOT_ON_SUCCESS = 0 + diff --git a/tools/testing/ktest/examples/include/bisect.conf b/tools/testing/ktest/examples/include/bisect.conf new file mode 100644 index 000000000000..009bea65bfb6 --- /dev/null +++ b/tools/testing/ktest/examples/include/bisect.conf @@ -0,0 +1,90 @@ +# +# This example shows the bisect tests (git bisect and config bisect) +# + + +# The config that includes this file may define a RUN_TEST +# variable that will tell this config what test to run. +# (what to set the TEST option to). +# +DEFAULTS IF NOT DEFINED RUN_TEST +# Requires that hackbench is in the PATH +RUN_TEST := ${SSH} hackbench 50 + + +# Set TEST to 'bisect' to do a normal git bisect. You need +# to modify the options below to make it bisect the exact +# commits you are interested in. +# +TEST_START IF ${TEST} == bisect +TEST_TYPE = bisect +# You must set the commit that was considered good (git bisect good) +BISECT_GOOD = v3.3 +# You must set the commit that was considered bad (git bisect bad) +BISECT_BAD = HEAD +# It's best to specify the branch to checkout before starting the bisect. +CHECKOUT = origin/master +# This can be build, boot, or test. Here we are doing a bisect +# that requires to run a test to know if the bisect was good or bad. +# The test should exit with 0 on good, non-zero for bad. But see +# the BISECT_RET_* options in samples.conf to override this. +BISECT_TYPE = test +TEST = ${RUN_TEST} +# It is usually a good idea to confirm that the GOOD and the BAD +# commits are truly good and bad respectively. Having BISECT_CHECK +# set to 1 will check both that the good commit works and the bad +# commit fails. If you only want to check one or the other, +# set BISECT_CHECK to 'good' or to 'bad'. +BISECT_CHECK = 1 +#BISECT_CHECK = good +#BISECT_CHECK = bad + +# Usually it's a good idea to specify the exact config you +# want to use throughout the entire bisect. Here we placed +# it in the directory we called ktest.pl from and named it +# 'config-bisect'. +MIN_CONFIG = ${THIS_DIR}/config-bisect +# By default, if we are doing a BISECT_TYPE = test run but the +# build or boot fails, ktest.pl will do a 'git bisect skip'. +# Uncomment the below option to make ktest stop testing on such +# an error. +#BISECT_SKIP = 0 +# Now if you had BISECT_SKIP = 0 and the test fails, you can +# examine what happened and then do 'git bisect log > /tmp/replay' +# Set BISECT_REPLAY to /tmp/replay and ktest.pl will run the +# 'git bisect replay /tmp/replay' before continuing the bisect test. +#BISECT_REPLAY = /tmp/replay +# If you used BISECT_REPLAY after the bisect test failed, you may +# not want to continue the bisect on that commit that failed. +# By setting BISECT_START to a new commit. ktest.pl will checkout +# that commit after it has performed the 'git bisect replay' but +# before it continues running the bisect test. +#BISECT_START = 2545eb6198e7e1ec50daa0cfc64a4cdfecf24ec9 + +# Now if you don't trust ktest.pl to make the decisions for you, then +# set BISECT_MANUAL to 1. This will cause ktest.pl not to decide +# if the commit was good or bad. Instead, it will ask you to tell +# it if the current commit was good. In the mean time, you could +# take the result, load it on any machine you want. Run several tests, +# or whatever you feel like. Then, when you are happy, you can tell +# ktest if you think it was good or not and ktest.pl will continue +# the git bisect. You can even change what commit it is currently at. +#BISECT_MANUAL = 1 + + +# One of the unique tests that ktest does is the config bisect. +# Currently (which hopefully will be fixed soon), the bad config +# must be a superset of the good config. This is because it only +# searches for a config that causes the target to fail. If the +# good config is not a subset of the bad config, or if the target +# fails because of a lack of a config, then it will not find +# the config for you. +TEST_START IF ${TEST} == config-bisect +TEST_TYPE = config_bisect +# set to build, boot, test +CONFIG_BISECT_TYPE = boot +# Set the config that is considered bad. +CONFIG_BISECT = ${THIS_DIR}/config-bad +# This config is optional. By default it uses the +# MIN_CONFIG as the good config. +CONFIG_BISECT_GOOD = ${THIS_DIR}/config-good diff --git a/tools/testing/ktest/examples/include/defaults.conf b/tools/testing/ktest/examples/include/defaults.conf new file mode 100644 index 000000000000..323a552ce642 --- /dev/null +++ b/tools/testing/ktest/examples/include/defaults.conf @@ -0,0 +1,157 @@ +# This file holds defaults for most the tests. It defines the options that +# are most common to tests that are likely to be shared. +# +# Note, after including this file, a config file may override any option +# with a DEFAULTS OVERRIDE section. +# + +# For those cases that use the same machine to boot a 64 bit +# and a 32 bit version. The MACHINE is the DNS name to get to the +# box (usually different if it was 64 bit or 32 bit) but the +# BOX here is defined as a variable that will be the name of the box +# itself. It is useful for calling scripts that will power cycle +# the box, as only one script needs to be created to power cycle +# even though the box itself has multiple operating systems on it. +# By default, BOX and MACHINE are the same. + +DEFAULTS IF NOT DEFINED BOX +BOX := ${MACHINE} + + +# Consider each box as 64 bit box, unless the config including this file +# has defined BITS = 32 + +DEFAULTS IF NOT DEFINED BITS +BITS := 64 + + +DEFAULTS + +# THIS_DIR is used through out the configs and defaults to ${PWD} which +# is the directory that ktest.pl was called from. + +THIS_DIR := ${PWD} + + +# to orginize your configs, having each machine save their configs +# into a separate directly is useful. +CONFIG_DIR := ${THIS_DIR}/configs/${MACHINE} + +# Reset the log before running each test. +CLEAR_LOG = 1 + +# As installing kernels usually requires root privilege, default the +# user on the target as root. It is also required that the target +# allows ssh to root from the host without asking for a password. + +SSH_USER = root + +# For accesing the machine, we will ssh to root@machine. +SSH := ssh ${SSH_USER}@${MACHINE} + +# Update this. The default here is ktest will ssh to the target box +# and run a script called 'run-test' located on that box. +TEST = ${SSH} run-test + +# Point build dir to the git repo you use +BUILD_DIR = ${THIS_DIR}/linux.git + +# Each machine will have its own output build directory. +OUTPUT_DIR = ${THIS_DIR}/build/${MACHINE} + +# Yes this config is focused on x86 (but ktest works for other archs too) +BUILD_TARGET = arch/x86/boot/bzImage +TARGET_IMAGE = /boot/vmlinuz-test + +# have directory for the scripts to reboot and power cycle the boxes +SCRIPTS_DIR := ${THIS_DIR}/scripts + +# You can have each box/machine have a script to power cycle it. +# Name your script <box>-cycle. +POWER_CYCLE = ${SCRIPTS_DIR}/${BOX}-cycle + +# This script is used to power off the box. +POWER_OFF = ${SCRIPTS_DIR}/${BOX}-poweroff + +# Keep your test kernels separate from your other kernels. +LOCALVERSION = -test + +# The /boot/grub/menu.lst is searched for the line: +# title Test Kernel +# and ktest will use that kernel to reboot into. +# For grub2 or other boot loaders, you need to set BOOT_TYPE +# to 'script' and define other ways to load the kernel. +# See snowball.conf example. +# +GRUB_MENU = Test Kernel + +# The kernel build will use this option. +BUILD_OPTIONS = -j8 + +# Keeping the log file with the output dir is convenient. +LOG_FILE = ${OUTPUT_DIR}/${MACHINE}.log + +# Each box should have their own minum configuration +# See min-config.conf +MIN_CONFIG = ${CONFIG_DIR}/config-min + +# For things like randconfigs, there may be configs you find that +# are already broken, or there may be some configs that you always +# want set. Uncomment ADD_CONFIG and point it to the make config files +# that set the configs you want to keep on (or off) in your build. +# ADD_CONFIG is usually something to add configs to all machines, +# where as, MIN_CONFIG is specific per machine. +#ADD_CONFIG = ${THIS_DIR}/config-broken ${THIS_DIR}/config-general + +# To speed up reboots for bisects and patchcheck, instead of +# waiting 60 seconds for the console to be idle, if this line is +# seen in the console output, ktest will know the good kernel has +# finished rebooting and it will be able to continue the tests. +REBOOT_SUCCESS_LINE = ${MACHINE} login: + +# The following is different ways to end the test. +# by setting the variable REBOOT to: none, error, fail or +# something else, ktest will power cycle or reboot the target box +# at the end of the tests. +# +# REBOOT := none +# Don't do anything at the end of the test. +# +# REBOOT := error +# Reboot the box if ktest detects an error +# +# REBOOT := fail +# Do not stop on failure, and after all tests are complete +# power off the box (for both success and error) +# This is good to run over a weekend and you don't want to waste +# electricity. +# + +DEFAULTS IF ${REBOOT} == none +REBOOT_ON_SUCCESS = 0 +REBOOT_ON_ERROR = 0 +POWEROFF_ON_ERROR = 0 +POWEROFF_ON_SUCCESS = 0 + +DEFAULTS ELSE IF ${REBOOT} == error +REBOOT_ON_SUCCESS = 0 +REBOOT_ON_ERROR = 1 +POWEROFF_ON_ERROR = 0 +POWEROFF_ON_SUCCESS = 0 + +DEFAULTS ELSE IF ${REBOOT} == fail +REBOOT_ON_SUCCESS = 0 +POWEROFF_ON_ERROR = 1 +POWEROFF_ON_SUCCESS = 1 +POWEROFF_AFTER_HALT = 120 +DIE_ON_FAILURE = 0 + +# Store the failure information into this directory +# such as the .config, dmesg, and build log. +STORE_FAILURES = ${THIS_DIR}/failures + +DEFAULTS ELSE +REBOOT_ON_SUCCESS = 1 +REBOOT_ON_ERROR = 1 +POWEROFF_ON_ERROR = 0 +POWEROFF_ON_SUCCESS = 0 diff --git a/tools/testing/ktest/examples/include/min-config.conf b/tools/testing/ktest/examples/include/min-config.conf new file mode 100644 index 000000000000..c703cc46d151 --- /dev/null +++ b/tools/testing/ktest/examples/include/min-config.conf @@ -0,0 +1,60 @@ +# +# This file has some examples for creating a MIN_CONFIG. +# (A .config file that is the minimum for a machine to boot, or +# to boot and make a network connection.) +# +# A MIN_CONFIG is very useful as it is the minimum configuration +# needed to boot a given machine. You can debug someone else's +# .config by only setting the configs in your MIN_CONFIG. The closer +# your MIN_CONFIG is to the true minimum set of configs needed to +# boot your machine, the closer the config you test with will be +# to the users config that had the failure. +# +# The make_min_config test allows you to create a MIN_CONFIG that +# is truly the minimum set of configs needed to boot a box. +# +# In this example, the final config will reside in +# ${CONFIG_DIR}/config-new-min and ${CONFIG_DIR}/config-new-min-net. +# Just move one to the location you have set for MIN_CONFIG. +# +# The first test creates a MIN_CONFIG that will be the minimum +# configuration to boot ${MACHINE} and be able to ssh to it. +# +# The second test creates a MIN_CONFIG that will only boot +# the target and most likely will not let you ssh to it. (Notice +# how the second test uses the first test's result to continue with. +# This is because the second test config is a subset of the first). +# +# The ${CONFIG_DIR}/config-skip (and -net) will hold the configs +# that ktest.pl found would not boot the target without them set. +# The config-new-min holds configs that ktest.pl could not test +# directly because another config that was needed to boot the box +# selected them. Sometimes it is possible that this file will hold +# the true minimum configuration. You can test to see if this is +# the case by running the boot test with BOOT_TYPE = allnoconfig and +# setting setting the MIN_CONFIG to ${CONFIG_DIR}/config-skip. If the +# machine still boots, then you can use the config-skip as your MIN_CONFIG. +# +# These tests can run for several hours (and perhaps days). +# It's OK to kill the test with a Ctrl^C. By restarting without +# modifying this config, ktest.pl will notice that the config-new-min(-net) +# exists, and will use that instead as the starting point. +# The USE_OUTPUT_MIN_CONFIG is set to 1 to keep ktest.pl from asking +# you if you want to use the OUTPUT_MIN_CONFIG as the starting point. +# By using the OUTPUT_MIN_CONFIG as the starting point will allow ktest.pl to +# start almost where it left off. +# +TEST_START IF ${TEST} == min-config +TEST_TYPE = make_min_config +OUTPUT_MIN_CONFIG = ${CONFIG_DIR}/config-new-min-net +IGNORE_CONFIG = ${CONFIG_DIR}/config-skip-net +MIN_CONFIG_TYPE = test +TEST = ${SSH} echo hi +USE_OUTPUT_MIN_CONFIG = 1 + +TEST_START IF ${TEST} == min-config && ${MULTI} +TEST_TYPE = make_min_config +OUTPUT_MIN_CONFIG = ${CONFIG_DIR}/config-new-min +IGNORE_CONFIG = ${CONFIG_DIR}/config-skip +MIN_CONFIG = ${CONFIG_DIR}/config-new-min-net +USE_OUTPUT_MIN_CONFIG = 1 diff --git a/tools/testing/ktest/examples/include/patchcheck.conf b/tools/testing/ktest/examples/include/patchcheck.conf new file mode 100644 index 000000000000..339d3e1700ff --- /dev/null +++ b/tools/testing/ktest/examples/include/patchcheck.conf @@ -0,0 +1,74 @@ +# patchcheck.conf +# +# This contains a test that takes two git commits and will test each +# commit between the two. The build test will look at what files the +# commit has touched, and if any of those files produce a warning, then +# the build will fail. + + +# PATCH_START is the commit to begin with and PATCH_END is the commit +# to end with (inclusive). This is similar to doing a git rebase -i PATCH_START~1 +# and then testing each commit and doing a git rebase --continue. +# You can use a SHA1, a git tag, or anything that git will accept for a checkout + +PATCH_START := HEAD~3 +PATCH_END := HEAD + +# Change PATCH_CHECKOUT to be the branch you want to test. The test will +# do a git checkout of this branch before starting. Obviously both +# PATCH_START and PATCH_END must be in this branch (and PATCH_START must +# be contained by PATCH_END). + +PATCH_CHECKOUT := test/branch + +# Usually it's a good idea to have a set config to use for testing individual +# patches. +PATCH_CONFIG := ${CONFIG_DIR}/config-patchcheck + +# Change PATCH_TEST to run some test for each patch. Each commit that is +# tested, after it is built and installed on the test machine, this command +# will be executed. Usually what is done is to ssh to the target box and +# run some test scripts. If you just want to boot test your patches +# comment PATCH_TEST out. +PATCH_TEST := ${SSH} "/usr/local/bin/ktest-test-script" + +DEFAULTS IF DEFINED PATCH_TEST +PATCH_TEST_TYPE := test + +DEFAULTS ELSE +PATCH_TEST_TYPE := boot + +# If for some reason a file has a warning that one of your patches touch +# but you do not care about it, set IGNORE_WARNINGS to that commit(s) +# (space delimited) +#IGNORE_WARNINGS = 39eaf7ef884dcc44f7ff1bac803ca2a1dcf43544 6edb2a8a385f0cdef51dae37ff23e74d76d8a6ce + +# If you are running a multi test, and the test failed on the first +# test but on, say the 5th patch. If you want to restart on the +# fifth patch, set PATCH_START1. This will make the first test start +# from this commit instead of the PATCH_START commit. +# Note, do not change this option. Just define PATCH_START1 in the +# top config (the one you pass to ktest.pl), and this will use it, +# otherwise it will just use PATCH_START if PATCH_START1 is not defined. +DEFAULTS IF NOT DEFINED PATCH_START1 +PATCH_START1 := ${PATCH_START} + +TEST_START IF ${TEST} == patchcheck +TEST_TYPE = patchcheck +MIN_CONFIG = ${PATCH_CONFIG} +TEST = ${PATCH_TEST} +PATCHCHECK_TYPE = ${PATCH_TEST_TYPE} +PATCHCHECK_START = ${PATCH_START1} +PATCHCHECK_END = ${PATCH_END} +CHECKOUT = ${PATCH_CHECKOUT} + +TEST_START IF ${TEST} == patchcheck && ${MULTI} +TEST_TYPE = patchcheck +MIN_CONFIG = ${PATCH_CONFIG} +TEST = ${PATCH_TEST} +PATCHCHECK_TYPE = ${PATCH_TEST_TYPE} +PATCHCHECK_START = ${PATCH_START} +PATCHCHECK_END = ${PATCH_END} +CHECKOUT = ${PATCH_CHECKOUT} +# Use multi to test different compilers? +MAKE_CMD = CC=gcc-4.5.1 make diff --git a/tools/testing/ktest/examples/include/tests.conf b/tools/testing/ktest/examples/include/tests.conf new file mode 100644 index 000000000000..4fdb811bd810 --- /dev/null +++ b/tools/testing/ktest/examples/include/tests.conf @@ -0,0 +1,74 @@ +# +# This is an example of various tests that you can run +# +# The variable TEST can be of boot, build, randconfig, or test. +# +# Note that TEST is a variable created with ':=' and only exists +# throughout the config processing (not during the tests itself). +# +# The TEST option (defined with '=') is used to tell ktest.pl +# what test to run after a successful boot. The TEST option is +# persistent into the test runs. +# + +# The config that includes this file may define a BOOT_TYPE +# variable that tells this config what type of boot test to run. +# If it's not defined, the below DEFAULTS will set the default +# to 'oldconfig'. +# +DEFAULTS IF NOT DEFINED BOOT_TYPE +BOOT_TYPE := oldconfig + +# The config that includes this file may define a RUN_TEST +# variable that will tell this config what test to run. +# (what to set the TEST option to). +# +DEFAULTS IF NOT DEFINED RUN_TEST +# Requires that hackbench is in the PATH +RUN_TEST := ${SSH} hackbench 50 + + +# If TEST is set to 'boot' then just build a kernel and boot +# the target. +TEST_START IF ${TEST} == boot +TEST_TYPE = boot +# Notice how we set the BUILD_TYPE option to the BOOT_TYPE variable. +BUILD_TYPE = ${BOOT_TYPE} +# Do not do a make mrproper. +BUILD_NOCLEAN = 1 + +# If you only want to build the kernel, and perhaps install +# and test it yourself, then just set TEST to build. +TEST_START IF ${TEST} == build +TEST_TYPE = build +BUILD_TYPE = ${BOOT_TYPE} +BUILD_NOCLEAN = 1 + +# Build, install, boot and test with a randconfg 10 times. +# It is important that you have set MIN_CONFIG in the config +# that includes this file otherwise it is likely that the +# randconfig will not have the neccessary configs needed to +# boot your box. This version of the test requires a min +# config that has enough to make sure the target has network +# working. +TEST_START ITERATE 10 IF ${TEST} == randconfig +MIN_CONFIG = ${CONFIG_DIR}/config-min-net +TEST_TYPE = test +BUILD_TYPE = randconfig +TEST = ${RUN_TEST} + +# This is the same as above, but only tests to a boot prompt. +# The MIN_CONFIG used here does not need to have networking +# working. +TEST_START ITERATE 10 IF ${TEST} == randconfig && ${MULTI} +TEST_TYPE = boot +BUILD_TYPE = randconfig +MIN_CONFIG = ${CONFIG_DIR}/config-min +MAKE_CMD = make + +# This builds, installs, boots and tests the target. +TEST_START IF ${TEST} == test +TEST_TYPE = test +BUILD_TYPE = ${BOOT_TYPE} +TEST = ${RUN_TEST} +BUILD_NOCLEAN = 1 diff --git a/tools/testing/ktest/examples/kvm.conf b/tools/testing/ktest/examples/kvm.conf new file mode 100644 index 000000000000..831c7c5395f1 --- /dev/null +++ b/tools/testing/ktest/examples/kvm.conf @@ -0,0 +1,88 @@ +# +# This config is an example usage of ktest.pl with a kvm guest +# +# The guest is called 'Guest' and this would be something that +# could be run on the host to test a virtual machine target. + +MACHINE = Guest + + +# Use virsh to read the serial console of the guest +CONSOLE = virsh console ${MACHINE} + +#*************************************# +# This part is the same as test.conf # +#*************************************# + +# The include files will set up the type of test to run. Just set TEST to +# which test you want to run. +# +# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config +# +# See the include/*.conf files that define these tests +# +TEST := patchcheck + +# Some tests may have more than one test to run. Define MULTI := 1 to run +# the extra tests. +MULTI := 0 + +# In case you want to differentiate which type of system you are testing +BITS := 64 + +# REBOOT = none, error, fail, empty +# See include/defaults.conf +REBOOT := empty + + +# The defaults file will set up various settings that can be used by all +# machine configs. +INCLUDE include/defaults.conf + + +#*************************************# +# Now we are different from test.conf # +#*************************************# + + +# The example here assumes that Guest is running a Fedora release +# that uses dracut for its initfs. The POST_INSTALL will be executed +# after the install of the kernel and modules are complete. +# +POST_INSTALL = ${SSH} /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION + +# Guests sometimes get stuck on reboot. We wait 3 seconds after running +# the reboot command and then do a full power-cycle of the guest. +# This forces the guest to restart. +# +POWERCYCLE_AFTER_REBOOT = 3 + +# We do the same after the halt command, but this time we wait 20 seconds. +POWEROFF_AFTER_HALT = 20 + + +# As the defaults.conf file has a POWER_CYCLE option already defined, +# and options can not be defined in the same section more than once +# (all DEFAULTS sections are considered the same). We use the +# DEFAULTS OVERRIDE to tell ktest.pl to ignore the previous defined +# options, for the options set in the OVERRIDE section. +# +DEFAULTS OVERRIDE + +# Instead of using the default POWER_CYCLE option defined in +# defaults.conf, we use virsh to cycle it. To do so, we destroy +# the guest, wait 5 seconds, and then start it up again. +# Crude, but effective. +# +POWER_CYCLE = virsh destroy ${MACHINE}; sleep 5; virsh start ${MACHINE} + + +DEFAULTS + +# The following files each handle a different test case. +# Having them included allows you to set up more than one machine and share +# the same tests. +INCLUDE include/patchcheck.conf +INCLUDE include/tests.conf +INCLUDE include/bisect.conf +INCLUDE include/min-config.conf diff --git a/tools/testing/ktest/examples/snowball.conf b/tools/testing/ktest/examples/snowball.conf new file mode 100644 index 000000000000..a82a3c5bc2b7 --- /dev/null +++ b/tools/testing/ktest/examples/snowball.conf @@ -0,0 +1,53 @@ +# This example was used to boot the snowball ARM board. +# See http://people.redhat.com/srostedt/ktest-embedded-2012/ + +# PWD is a ktest.pl variable that will result in the process working +# directory that ktest.pl is executed in. + +# THIS_DIR is automatically assigned the PWD of the path that generated +# the config file. It is best to use this variable when assigning other +# directory paths within this directory. This allows you to easily +# move the test cases to other locations or to other machines. +# +THIS_DIR := /home/rostedt/work/demo/ktest-embed +LOG_FILE = ${OUTPUT_DIR}/snowball.log +CLEAR_LOG = 1 +MAKE_CMD = PATH=/usr/local/gcc-4.5.2-nolibc/arm-unknown-linux-gnueabi/bin:$PATH CROSS_COMPILE=arm-unknown-linux-gnueabi- make ARCH=arm +ADD_CONFIG = ${THIS_DIR}/addconfig + +SCP_TO_TARGET = echo "don't do scp" + +TFTPBOOT := /var/lib/tftpboot +TFTPDEF := ${TFTPBOOT}/snowball-default +TFTPTEST := ${OUTPUT_DIR}/${BUILD_TARGET} + +SWITCH_TO_GOOD = cp ${TFTPDEF} ${TARGET_IMAGE} +SWITCH_TO_TEST = cp ${TFTPTEST} ${TARGET_IMAGE} + +# Define each test with TEST_START +# The config options below it will override the defaults +TEST_START SKIP +TEST_TYPE = boot +BUILD_TYPE = u8500_defconfig +BUILD_NOCLEAN = 1 + +TEST_START +TEST_TYPE = make_min_config +OUTPUT_MIN_CONFIG = ${THIS_DIR}/config.newmin +START_MIN_CONFIG = ${THIS_DIR}/config.orig +IGNORE_CONFIG = ${THIS_DIR}/config.ignore +BUILD_NOCLEAN = 1 + + +DEFAULTS +LOCALVERSION = -test +POWER_CYCLE = echo use the thumb luke; read a +CONSOLE = cat ${THIS_DIR}/snowball-cat +REBOOT_TYPE = script +SSH_USER = root +BUILD_OPTIONS = -j8 uImage +BUILD_DIR = ${THIS_DIR}/linux.git +OUTPUT_DIR = ${THIS_DIR}/snowball-build +MACHINE = snowball +TARGET_IMAGE = /var/lib/tftpboot/snowball-image +BUILD_TARGET = arch/arm/boot/uImage diff --git a/tools/testing/ktest/examples/test.conf b/tools/testing/ktest/examples/test.conf new file mode 100644 index 000000000000..b725210efb7f --- /dev/null +++ b/tools/testing/ktest/examples/test.conf @@ -0,0 +1,62 @@ +# +# Generic config for a machine +# + +# Name your machine (the DNS name, what you ssh to) +MACHINE = foo + +# BOX can be different than foo, if the machine BOX has +# multiple partitions with different systems installed. For example, +# you may have a i386 and x86_64 installation on a test box. +# If this is the case, MACHINE defines the way to connect to the +# machine, which may be different between which system the machine +# is booting into. BOX is used for the scripts to reboot and power cycle +# the machine, where it does not matter which system the machine boots into. +# +#BOX := bar + +# Define a way to read the console +CONSOLE = stty -F /dev/ttyS0 115200 parodd; cat /dev/ttyS0 + +# The include files will set up the type of test to run. Just set TEST to +# which test you want to run. +# +# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config +# +# See the include/*.conf files that define these tests +# +TEST := patchcheck + +# Some tests may have more than one test to run. Define MULTI := 1 to run +# the extra tests. +MULTI := 0 + +# In case you want to differentiate which type of system you are testing +BITS := 64 + +# REBOOT = none, error, fail, empty +# See include/defaults.conf +REBOOT := empty + +# The defaults file will set up various settings that can be used by all +# machine configs. +INCLUDE include/defaults.conf + +# In case you need to add a patch for a bisect or something +#PRE_BUILD = patch -p1 < ${THIS_DIR}/fix.patch + +# Reset the repo after the build and remove all 'test' modules from the target +# Notice that DO_POST_BUILD is a variable (defined by ':=') and POST_BUILD +# is the option (defined by '=') + +DO_POST_BUILD := git reset --hard +POST_BUILD = ${SSH} 'rm -rf /lib/modules/*-test*'; ${DO_POST_BUILD} + +# The following files each handle a different test case. +# Having them included allows you to set up more than one machine and share +# the same tests. +INCLUDE include/patchcheck.conf +INCLUDE include/tests.conf +INCLUDE include/bisect.conf +INCLUDE include/min-config.conf + diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 8d02ccb10c59..52b7959cd513 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -18,42 +18,57 @@ $| = 1; my %opt; my %repeat_tests; my %repeats; -my %default; #default opts -$default{"NUM_TESTS"} = 1; -$default{"REBOOT_TYPE"} = "grub"; -$default{"TEST_TYPE"} = "test"; -$default{"BUILD_TYPE"} = "randconfig"; -$default{"MAKE_CMD"} = "make"; -$default{"TIMEOUT"} = 120; -$default{"TMP_DIR"} = "/tmp/ktest/\${MACHINE}"; -$default{"SLEEP_TIME"} = 60; # sleep time between tests -$default{"BUILD_NOCLEAN"} = 0; -$default{"REBOOT_ON_ERROR"} = 0; -$default{"POWEROFF_ON_ERROR"} = 0; -$default{"REBOOT_ON_SUCCESS"} = 1; -$default{"POWEROFF_ON_SUCCESS"} = 0; -$default{"BUILD_OPTIONS"} = ""; -$default{"BISECT_SLEEP_TIME"} = 60; # sleep time between bisects -$default{"PATCHCHECK_SLEEP_TIME"} = 60; # sleep time between patch checks -$default{"CLEAR_LOG"} = 0; -$default{"BISECT_MANUAL"} = 0; -$default{"BISECT_SKIP"} = 1; -$default{"SUCCESS_LINE"} = "login:"; -$default{"DETECT_TRIPLE_FAULT"} = 1; -$default{"BOOTED_TIMEOUT"} = 1; -$default{"DIE_ON_FAILURE"} = 1; -$default{"SSH_EXEC"} = "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND"; -$default{"SCP_TO_TARGET"} = "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE"; -$default{"REBOOT"} = "ssh \$SSH_USER\@\$MACHINE reboot"; -$default{"STOP_AFTER_SUCCESS"} = 10; -$default{"STOP_AFTER_FAILURE"} = 60; -$default{"STOP_TEST_AFTER"} = 600; -$default{"LOCALVERSION"} = "-test"; +my %default = ( + "NUM_TESTS" => 1, + "TEST_TYPE" => "build", + "BUILD_TYPE" => "randconfig", + "MAKE_CMD" => "make", + "TIMEOUT" => 120, + "TMP_DIR" => "/tmp/ktest/\${MACHINE}", + "SLEEP_TIME" => 60, # sleep time between tests + "BUILD_NOCLEAN" => 0, + "REBOOT_ON_ERROR" => 0, + "POWEROFF_ON_ERROR" => 0, + "REBOOT_ON_SUCCESS" => 1, + "POWEROFF_ON_SUCCESS" => 0, + "BUILD_OPTIONS" => "", + "BISECT_SLEEP_TIME" => 60, # sleep time between bisects + "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks + "CLEAR_LOG" => 0, + "BISECT_MANUAL" => 0, + "BISECT_SKIP" => 1, + "MIN_CONFIG_TYPE" => "boot", + "SUCCESS_LINE" => "login:", + "DETECT_TRIPLE_FAULT" => 1, + "NO_INSTALL" => 0, + "BOOTED_TIMEOUT" => 1, + "DIE_ON_FAILURE" => 1, + "SSH_EXEC" => "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND", + "SCP_TO_TARGET" => "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE", + "SCP_TO_TARGET_INSTALL" => "\${SCP_TO_TARGET}", + "REBOOT" => "ssh \$SSH_USER\@\$MACHINE reboot", + "STOP_AFTER_SUCCESS" => 10, + "STOP_AFTER_FAILURE" => 60, + "STOP_TEST_AFTER" => 600, + "MAX_MONITOR_WAIT" => 1800, + +# required, and we will ask users if they don't have them but we keep the default +# value something that is common. + "REBOOT_TYPE" => "grub", + "LOCALVERSION" => "-test", + "SSH_USER" => "root", + "BUILD_TARGET" => "arch/x86/boot/bzImage", + "TARGET_IMAGE" => "/boot/vmlinuz-test", + + "LOG_FILE" => undef, + "IGNORE_UNUSED" => 0, +); my $ktest_config; my $version; +my $have_version = 0; my $machine; my $ssh_user; my $tmpdir; @@ -63,6 +78,11 @@ my $output_config; my $test_type; my $build_type; my $build_options; +my $final_post_ktest; +my $pre_ktest; +my $post_ktest; +my $pre_test; +my $post_test; my $pre_build; my $post_build; my $pre_build_die; @@ -72,35 +92,51 @@ my $reboot_script; my $power_cycle; my $reboot; my $reboot_on_error; +my $switch_to_good; +my $switch_to_test; my $poweroff_on_error; +my $reboot_on_success; my $die_on_failure; my $powercycle_after_reboot; my $poweroff_after_halt; +my $max_monitor_wait; my $ssh_exec; my $scp_to_target; +my $scp_to_target_install; my $power_off; my $grub_menu; my $grub_number; my $target; my $make; +my $pre_install; my $post_install; +my $no_install; my $noclean; my $minconfig; my $start_minconfig; my $start_minconfig_defined; my $output_minconfig; +my $minconfig_type; +my $use_output_minconfig; my $ignore_config; +my $ignore_errors; my $addconfig; my $in_bisect = 0; -my $bisect_bad = ""; +my $bisect_bad_commit = ""; my $reverse_bisect; my $bisect_manual; my $bisect_skip; my $config_bisect_good; +my $bisect_ret_good; +my $bisect_ret_bad; +my $bisect_ret_skip; +my $bisect_ret_abort; +my $bisect_ret_default; my $in_patchcheck = 0; my $run_test; my $redirect; my $buildlog; +my $testlog; my $dmesg; my $monitor_fp; my $monitor_pid; @@ -110,28 +146,166 @@ my $bisect_sleep_time; my $patchcheck_sleep_time; my $ignore_warnings; my $store_failures; +my $store_successes; my $test_name; my $timeout; my $booted_timeout; my $detect_triplefault; my $console; +my $reboot_success_line; my $success_line; my $stop_after_success; my $stop_after_failure; my $stop_test_after; my $build_target; my $target_image; +my $checkout; my $localversion; my $iteration = 0; my $successes = 0; +my $bisect_good; +my $bisect_bad; +my $bisect_type; +my $bisect_start; +my $bisect_replay; +my $bisect_files; +my $bisect_reverse; +my $bisect_check; + +my $config_bisect; +my $config_bisect_type; +my $config_bisect_check; + +my $patchcheck_type; +my $patchcheck_start; +my $patchcheck_end; + +# set when a test is something other that just building or install +# which would require more options. +my $buildonly = 1; + +# set when creating a new config +my $newconfig = 0; + my %entered_configs; my %config_help; my %variable; + +# force_config is the list of configs that we force enabled (or disabled) +# in a .config file. The MIN_CONFIG and ADD_CONFIG configs. my %force_config; +# do not force reboots on config problems +my $no_reboot = 1; + +# reboot on success +my $reboot_success = 0; + +my %option_map = ( + "MACHINE" => \$machine, + "SSH_USER" => \$ssh_user, + "TMP_DIR" => \$tmpdir, + "OUTPUT_DIR" => \$outputdir, + "BUILD_DIR" => \$builddir, + "TEST_TYPE" => \$test_type, + "PRE_KTEST" => \$pre_ktest, + "POST_KTEST" => \$post_ktest, + "PRE_TEST" => \$pre_test, + "POST_TEST" => \$post_test, + "BUILD_TYPE" => \$build_type, + "BUILD_OPTIONS" => \$build_options, + "PRE_BUILD" => \$pre_build, + "POST_BUILD" => \$post_build, + "PRE_BUILD_DIE" => \$pre_build_die, + "POST_BUILD_DIE" => \$post_build_die, + "POWER_CYCLE" => \$power_cycle, + "REBOOT" => \$reboot, + "BUILD_NOCLEAN" => \$noclean, + "MIN_CONFIG" => \$minconfig, + "OUTPUT_MIN_CONFIG" => \$output_minconfig, + "START_MIN_CONFIG" => \$start_minconfig, + "MIN_CONFIG_TYPE" => \$minconfig_type, + "USE_OUTPUT_MIN_CONFIG" => \$use_output_minconfig, + "IGNORE_CONFIG" => \$ignore_config, + "TEST" => \$run_test, + "ADD_CONFIG" => \$addconfig, + "REBOOT_TYPE" => \$reboot_type, + "GRUB_MENU" => \$grub_menu, + "PRE_INSTALL" => \$pre_install, + "POST_INSTALL" => \$post_install, + "NO_INSTALL" => \$no_install, + "REBOOT_SCRIPT" => \$reboot_script, + "REBOOT_ON_ERROR" => \$reboot_on_error, + "SWITCH_TO_GOOD" => \$switch_to_good, + "SWITCH_TO_TEST" => \$switch_to_test, + "POWEROFF_ON_ERROR" => \$poweroff_on_error, + "REBOOT_ON_SUCCESS" => \$reboot_on_success, + "DIE_ON_FAILURE" => \$die_on_failure, + "POWER_OFF" => \$power_off, + "POWERCYCLE_AFTER_REBOOT" => \$powercycle_after_reboot, + "POWEROFF_AFTER_HALT" => \$poweroff_after_halt, + "MAX_MONITOR_WAIT" => \$max_monitor_wait, + "SLEEP_TIME" => \$sleep_time, + "BISECT_SLEEP_TIME" => \$bisect_sleep_time, + "PATCHCHECK_SLEEP_TIME" => \$patchcheck_sleep_time, + "IGNORE_WARNINGS" => \$ignore_warnings, + "IGNORE_ERRORS" => \$ignore_errors, + "BISECT_MANUAL" => \$bisect_manual, + "BISECT_SKIP" => \$bisect_skip, + "CONFIG_BISECT_GOOD" => \$config_bisect_good, + "BISECT_RET_GOOD" => \$bisect_ret_good, + "BISECT_RET_BAD" => \$bisect_ret_bad, + "BISECT_RET_SKIP" => \$bisect_ret_skip, + "BISECT_RET_ABORT" => \$bisect_ret_abort, + "BISECT_RET_DEFAULT" => \$bisect_ret_default, + "STORE_FAILURES" => \$store_failures, + "STORE_SUCCESSES" => \$store_successes, + "TEST_NAME" => \$test_name, + "TIMEOUT" => \$timeout, + "BOOTED_TIMEOUT" => \$booted_timeout, + "CONSOLE" => \$console, + "DETECT_TRIPLE_FAULT" => \$detect_triplefault, + "SUCCESS_LINE" => \$success_line, + "REBOOT_SUCCESS_LINE" => \$reboot_success_line, + "STOP_AFTER_SUCCESS" => \$stop_after_success, + "STOP_AFTER_FAILURE" => \$stop_after_failure, + "STOP_TEST_AFTER" => \$stop_test_after, + "BUILD_TARGET" => \$build_target, + "SSH_EXEC" => \$ssh_exec, + "SCP_TO_TARGET" => \$scp_to_target, + "SCP_TO_TARGET_INSTALL" => \$scp_to_target_install, + "CHECKOUT" => \$checkout, + "TARGET_IMAGE" => \$target_image, + "LOCALVERSION" => \$localversion, + + "BISECT_GOOD" => \$bisect_good, + "BISECT_BAD" => \$bisect_bad, + "BISECT_TYPE" => \$bisect_type, + "BISECT_START" => \$bisect_start, + "BISECT_REPLAY" => \$bisect_replay, + "BISECT_FILES" => \$bisect_files, + "BISECT_REVERSE" => \$bisect_reverse, + "BISECT_CHECK" => \$bisect_check, + + "CONFIG_BISECT" => \$config_bisect, + "CONFIG_BISECT_TYPE" => \$config_bisect_type, + "CONFIG_BISECT_CHECK" => \$config_bisect_check, + + "PATCHCHECK_TYPE" => \$patchcheck_type, + "PATCHCHECK_START" => \$patchcheck_start, + "PATCHCHECK_END" => \$patchcheck_end, +); + +# Options may be used by other options, record them. +my %used_options; + +# default variables that can be used +chomp ($variable{"PWD"} = `pwd`); + $config_help{"MACHINE"} = << "EOF" The machine hostname that you will test. + For build only tests, it is still needed to differentiate log files. EOF ; $config_help{"SSH_USER"} = << "EOF" @@ -141,11 +315,15 @@ EOF ; $config_help{"BUILD_DIR"} = << "EOF" The directory that contains the Linux source code (full path). + You can use \${PWD} that will be the path where ktest.pl is run, or use + \${THIS_DIR} which is assigned \${PWD} but may be changed later. EOF ; $config_help{"OUTPUT_DIR"} = << "EOF" The directory that the objects will be built (full path). (can not be same as BUILD_DIR) + You can use \${PWD} that will be the path where ktest.pl is run, or use + \${THIS_DIR} which is assigned \${PWD} but may be changed later. EOF ; $config_help{"BUILD_TARGET"} = << "EOF" @@ -153,6 +331,11 @@ $config_help{"BUILD_TARGET"} = << "EOF" (relative to OUTPUT_DIR) EOF ; +$config_help{"BUILD_OPTIONS"} = << "EOF" + Options to add to \"make\" when building. + i.e. -j20 +EOF + ; $config_help{"TARGET_IMAGE"} = << "EOF" The place to put your image on the test machine. EOF @@ -218,20 +401,36 @@ $config_help{"REBOOT_SCRIPT"} = << "EOF" EOF ; -sub read_yn { - my ($prompt) = @_; +sub read_prompt { + my ($cancel, $prompt) = @_; my $ans; for (;;) { - print "$prompt [Y/n] "; + if ($cancel) { + print "$prompt [y/n/C] "; + } else { + print "$prompt [Y/n] "; + } $ans = <STDIN>; chomp $ans; if ($ans =~ /^\s*$/) { - $ans = "y"; + if ($cancel) { + $ans = "c"; + } else { + $ans = "y"; + } } last if ($ans =~ /^y$/i || $ans =~ /^n$/i); - print "Please answer either 'y' or 'n'.\n"; + if ($cancel) { + last if ($ans =~ /^c$/i); + print "Please answer either 'y', 'n' or 'c'.\n"; + } else { + print "Please answer either 'y' or 'n'.\n"; + } + } + if ($ans =~ /^c/i) { + exit; } if ($ans !~ /^y$/i) { return 0; @@ -239,8 +438,21 @@ sub read_yn { return 1; } +sub read_yn { + my ($prompt) = @_; + + return read_prompt 0, $prompt; +} + +sub read_ync { + my ($prompt) = @_; + + return read_prompt 1, $prompt; +} + sub get_ktest_config { my ($config) = @_; + my $ans; return if (defined($opt{$config})); @@ -251,34 +463,50 @@ sub get_ktest_config { for (;;) { print "$config = "; - if (defined($default{$config})) { + if (defined($default{$config}) && length($default{$config})) { print "\[$default{$config}\] "; } - $entered_configs{$config} = <STDIN>; - $entered_configs{$config} =~ s/^\s*(.*\S)\s*$/$1/; - if ($entered_configs{$config} =~ /^\s*$/) { + $ans = <STDIN>; + $ans =~ s/^\s*(.*\S)\s*$/$1/; + if ($ans =~ /^\s*$/) { if ($default{$config}) { - $entered_configs{$config} = $default{$config}; + $ans = $default{$config}; } else { print "Your answer can not be blank\n"; next; } } + $entered_configs{$config} = ${ans}; last; } } sub get_ktest_configs { get_ktest_config("MACHINE"); - get_ktest_config("SSH_USER"); get_ktest_config("BUILD_DIR"); get_ktest_config("OUTPUT_DIR"); - get_ktest_config("BUILD_TARGET"); - get_ktest_config("TARGET_IMAGE"); - get_ktest_config("POWER_CYCLE"); - get_ktest_config("CONSOLE"); + + if ($newconfig) { + get_ktest_config("BUILD_OPTIONS"); + } + + # options required for other than just building a kernel + if (!$buildonly) { + get_ktest_config("POWER_CYCLE"); + get_ktest_config("CONSOLE"); + } + + # options required for install and more + if ($buildonly != 1) { + get_ktest_config("SSH_USER"); + get_ktest_config("BUILD_TARGET"); + get_ktest_config("TARGET_IMAGE"); + } + get_ktest_config("LOCALVERSION"); + return if ($buildonly); + my $rtype = $opt{"REBOOT_TYPE"}; if (!defined($rtype)) { @@ -292,13 +520,11 @@ sub get_ktest_configs { if ($rtype eq "grub") { get_ktest_config("GRUB_MENU"); - } else { - get_ktest_config("REBOOT_SCRIPT"); } } sub process_variables { - my ($value) = @_; + my ($value, $remove_undef) = @_; my $retval = ""; # We want to check for '\', and it is just easier @@ -316,9 +542,17 @@ sub process_variables { $retval = "$retval$begin"; if (defined($variable{$var})) { $retval = "$retval$variable{$var}"; + } elsif (defined($remove_undef) && $remove_undef) { + # for if statements, any variable that is not defined, + # we simple convert to 0 + $retval = "${retval}0"; } else { # put back the origin piece. $retval = "$retval\$\{$var\}"; + # This could be an option that is used later, save + # it so we don't warn if this option is not one of + # ktests options. + $used_options{$var} = 1; } $value = $end; } @@ -331,16 +565,35 @@ sub process_variables { } sub set_value { - my ($lvalue, $rvalue) = @_; + my ($lvalue, $rvalue, $override, $overrides, $name) = @_; + + my $prvalue = process_variables($rvalue); + + if ($buildonly && $lvalue =~ /^TEST_TYPE(\[.*\])?$/ && $prvalue ne "build") { + # Note if a test is something other than build, then we + # will need other manditory options. + if ($prvalue ne "install") { + $buildonly = 0; + } else { + # install still limits some manditory options. + $buildonly = 2; + } + } if (defined($opt{$lvalue})) { - die "Error: Option $lvalue defined more than once!\n"; + if (!$override || defined(${$overrides}{$lvalue})) { + my $extra = ""; + if ($override) { + $extra = "In the same override section!\n"; + } + die "$name: $.: Option $lvalue defined more than once!\n$extra"; + } + ${$overrides}{$lvalue} = $prvalue; } if ($rvalue =~ /^\s*$/) { delete $opt{$lvalue}; } else { - $rvalue = process_variables($rvalue); - $opt{$lvalue} = $rvalue; + $opt{$lvalue} = $prvalue; } } @@ -355,86 +608,278 @@ sub set_variable { } } -sub read_config { - my ($config) = @_; +sub process_compare { + my ($lval, $cmp, $rval) = @_; + + # remove whitespace - open(IN, $config) || die "can't read file $config"; + $lval =~ s/^\s*//; + $lval =~ s/\s*$//; + + $rval =~ s/^\s*//; + $rval =~ s/\s*$//; + + if ($cmp eq "==") { + return $lval eq $rval; + } elsif ($cmp eq "!=") { + return $lval ne $rval; + } elsif ($cmp eq "=~") { + return $lval =~ m/$rval/; + } elsif ($cmp eq "!~") { + return $lval !~ m/$rval/; + } + + my $statement = "$lval $cmp $rval"; + my $ret = eval $statement; + + # $@ stores error of eval + if ($@) { + return -1; + } + + return $ret; +} + +sub value_defined { + my ($val) = @_; + + return defined($variable{$2}) || + defined($opt{$2}); +} + +my $d = 0; +sub process_expression { + my ($name, $val) = @_; + + my $c = $d++; + + while ($val =~ s/\(([^\(]*?)\)/\&\&\&\&VAL\&\&\&\&/) { + my $express = $1; + + if (process_expression($name, $express)) { + $val =~ s/\&\&\&\&VAL\&\&\&\&/ 1 /; + } else { + $val =~ s/\&\&\&\&VAL\&\&\&\&/ 0 /; + } + } + + $d--; + my $OR = "\\|\\|"; + my $AND = "\\&\\&"; + + while ($val =~ s/^(.*?)($OR|$AND)//) { + my $express = $1; + my $op = $2; + + if (process_expression($name, $express)) { + if ($op eq "||") { + return 1; + } + } else { + if ($op eq "&&") { + return 0; + } + } + } + + if ($val =~ /(.*)(==|\!=|>=|<=|>|<|=~|\!~)(.*)/) { + my $ret = process_compare($1, $2, $3); + if ($ret < 0) { + die "$name: $.: Unable to process comparison\n"; + } + return $ret; + } + + if ($val =~ /^\s*(NOT\s*)?DEFINED\s+(\S+)\s*$/) { + if (defined $1) { + return !value_defined($2); + } else { + return value_defined($2); + } + } + + if ($val =~ /^\s*0\s*$/) { + return 0; + } elsif ($val =~ /^\s*\d+\s*$/) { + return 1; + } + + die ("$name: $.: Undefined content $val in if statement\n"); +} + +sub process_if { + my ($name, $value) = @_; + + # Convert variables and replace undefined ones with 0 + my $val = process_variables($value, 1); + my $ret = process_expression $name, $val; + + return $ret; +} + +sub __read_config { + my ($config, $current_test_num) = @_; + + my $in; + open($in, $config) || die "can't read file $config"; my $name = $config; $name =~ s,.*/(.*),$1,; - my $test_num = 0; + my $test_num = $$current_test_num; my $default = 1; my $repeat = 1; my $num_tests_set = 0; my $skip = 0; my $rest; + my $line; my $test_case = 0; + my $if = 0; + my $if_set = 0; + my $override = 0; - while (<IN>) { + my %overrides; + + while (<$in>) { # ignore blank lines and comments next if (/^\s*$/ || /\s*\#/); - if (/^\s*TEST_START(.*)/) { + if (/^\s*(TEST_START|DEFAULTS)\b(.*)/) { - $rest = $1; + my $type = $1; + $rest = $2; + $line = $2; - if ($num_tests_set) { - die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n"; - } + my $old_test_num; + my $old_repeat; + $override = 0; + + if ($type eq "TEST_START") { + + if ($num_tests_set) { + die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n"; + } - my $old_test_num = $test_num; - my $old_repeat = $repeat; + $old_test_num = $test_num; + $old_repeat = $repeat; - $test_num += $repeat; - $default = 0; - $repeat = 1; + $test_num += $repeat; + $default = 0; + $repeat = 1; + } else { + $default = 1; + } - if ($rest =~ /\s+SKIP(.*)/) { - $rest = $1; + # If SKIP is anywhere in the line, the command will be skipped + if ($rest =~ s/\s+SKIP\b//) { $skip = 1; } else { $test_case = 1; $skip = 0; } - if ($rest =~ /\s+ITERATE\s+(\d+)(.*)$/) { - $repeat = $1; - $rest = $2; - $repeat_tests{"$test_num"} = $repeat; + if ($rest =~ s/\sELSE\b//) { + if (!$if) { + die "$name: $.: ELSE found with out matching IF section\n$_"; + } + $if = 0; + + if ($if_set) { + $skip = 1; + } else { + $skip = 0; + } + } + + if ($rest =~ s/\sIF\s+(.*)//) { + if (process_if($name, $1)) { + $if_set = 1; + } else { + $skip = 1; + } + $if = 1; + } else { + $if = 0; + $if_set = 0; } - if ($rest =~ /\s+SKIP(.*)/) { - $rest = $1; - $skip = 1; + if (!$skip) { + if ($type eq "TEST_START") { + if ($rest =~ s/\s+ITERATE\s+(\d+)//) { + $repeat = $1; + $repeat_tests{"$test_num"} = $repeat; + } + } elsif ($rest =~ s/\sOVERRIDE\b//) { + # DEFAULT only + $override = 1; + # Clear previous overrides + %overrides = (); + } } - if ($rest !~ /^\s*$/) { - die "$name: $.: Gargbage found after TEST_START\n$_"; + if (!$skip && $rest !~ /^\s*$/) { + die "$name: $.: Gargbage found after $type\n$_"; } - if ($skip) { + if ($skip && $type eq "TEST_START") { $test_num = $old_test_num; $repeat = $old_repeat; } - } elsif (/^\s*DEFAULTS(.*)$/) { - $default = 1; - + } elsif (/^\s*ELSE\b(.*)$/) { + if (!$if) { + die "$name: $.: ELSE found with out matching IF section\n$_"; + } $rest = $1; - - if ($rest =~ /\s+SKIP(.*)/) { - $rest = $1; + if ($if_set) { $skip = 1; + $rest = ""; } else { $skip = 0; + + if ($rest =~ /\sIF\s+(.*)/) { + # May be a ELSE IF section. + if (!process_if($name, $1)) { + $skip = 1; + } + $rest = ""; + } else { + $if = 0; + } } if ($rest !~ /^\s*$/) { die "$name: $.: Gargbage found after DEFAULTS\n$_"; } + } elsif (/^\s*INCLUDE\s+(\S+)/) { + + next if ($skip); + + if (!$default) { + die "$name: $.: INCLUDE can only be done in default sections\n$_"; + } + + my $file = process_variables($1); + + if ($file !~ m,^/,) { + # check the path of the config file first + if ($config =~ m,(.*)/,) { + if (-f "$1/$file") { + $file = "$1/$file"; + } + } + } + + if ( ! -r $file ) { + die "$name: $.: Can't read file $file\n$_"; + } + + if (__read_config($file, \$test_num)) { + $test_case = 1; + } + } elsif (/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) { next if ($skip); @@ -460,10 +905,10 @@ sub read_config { } if ($default || $lvalue =~ /\[\d+\]$/) { - set_value($lvalue, $rvalue); + set_value($lvalue, $rvalue, $override, \%overrides, $name); } else { my $val = "$lvalue\[$test_num\]"; - set_value($val, $rvalue); + set_value($val, $rvalue, $override, \%overrides, $name); if ($repeat > 1) { $repeats{$val} = $repeat; @@ -490,23 +935,42 @@ sub read_config { } } - close(IN); - if ($test_num) { $test_num += $repeat - 1; $opt{"NUM_TESTS"} = $test_num; } + close($in); + + $$current_test_num = $test_num; + + return $test_case; +} + +sub get_test_case { + print "What test case would you like to run?\n"; + print " (build, install or boot)\n"; + print " Other tests are available but require editing the config file\n"; + my $ans = <STDIN>; + chomp $ans; + $default{"TEST_TYPE"} = $ans; +} + +sub read_config { + my ($config) = @_; + + my $test_case; + my $test_num = 0; + + $test_case = __read_config $config, \$test_num; + # make sure we have all mandatory configs get_ktest_configs; # was a test specified? if (!$test_case) { print "No test case specified.\n"; - print "What test case would you like to run?\n"; - my $ans = <STDIN>; - chomp $ans; - $default{"TEST_TYPE"} = $ans; + get_test_case; } # set any defaults @@ -516,6 +980,37 @@ sub read_config { $opt{$default} = $default{$default}; } } + + if ($opt{"IGNORE_UNUSED"} == 1) { + return; + } + + my %not_used; + + # check if there are any stragglers (typos?) + foreach my $option (keys %opt) { + my $op = $option; + # remove per test labels. + $op =~ s/\[.*\]//; + if (!exists($option_map{$op}) && + !exists($default{$op}) && + !exists($used_options{$op})) { + $not_used{$op} = 1; + } + } + + if (%not_used) { + my $s = "s are"; + $s = " is" if (keys %not_used == 1); + print "The following option$s not used; could be a typo:\n"; + foreach my $option (keys %not_used) { + print "$option\n"; + } + print "Set IGRNORE_UNUSED = 1 to have ktest ignore unused variables\n"; + if (!read_yn "Do you want to continue?") { + exit -1; + } + } } sub __eval_option { @@ -524,6 +1019,18 @@ sub __eval_option { # Add space to evaluate the character before $ $option = " $option"; my $retval = ""; + my $repeated = 0; + my $parent = 0; + + foreach my $test (keys %repeat_tests) { + if ($i >= $test && + $i < $test + $repeat_tests{$test}) { + + $repeated = 1; + $parent = $test; + last; + } + } while ($option =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) { my $start = $1; @@ -537,10 +1044,14 @@ sub __eval_option { # otherwise see if the default OPT (without [$i]) exists. my $o = "$var\[$i\]"; + my $parento = "$var\[$parent\]"; if (defined($opt{$o})) { $o = $opt{$o}; $retval = "$retval$o"; + } elsif ($repeated && defined($opt{$parento})) { + $o = $opt{$parento}; + $retval = "$retval$o"; } elsif (defined($opt{$var})) { $o = $opt{$var}; $retval = "$retval$o"; @@ -603,8 +1114,20 @@ sub doprint { } sub run_command; +sub start_monitor; +sub end_monitor; +sub wait_for_monitor; sub reboot { + my ($time) = @_; + + if (defined($time)) { + start_monitor; + # flush out current monitor + # May contain the reboot success line + wait_for_monitor 1; + } + # try to reboot normally if (run_command $reboot) { if (defined($powercycle_after_reboot)) { @@ -615,12 +1138,31 @@ sub reboot { # nope? power cycle it. run_command "$power_cycle"; } + + if (defined($time)) { + if (wait_for_monitor($time, $reboot_success_line)) { + # reboot got stuck? + doprint "Reboot did not finish. Forcing power cycle\n"; + run_command "$power_cycle"; + } + end_monitor; + } +} + +sub reboot_to_good { + my ($time) = @_; + + if (defined($switch_to_good)) { + run_command $switch_to_good; + } + + reboot $time; } sub do_not_reboot { my $i = $iteration; - return $test_type eq "build" || + return $test_type eq "build" || $no_reboot || ($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") || ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build"); } @@ -633,7 +1175,7 @@ sub dodie { if ($reboot_on_error && !do_not_reboot) { doprint "REBOOTING\n"; - reboot; + reboot_to_good; } elsif ($poweroff_on_error && defined($power_off)) { doprint "POWERING OFF\n"; @@ -693,21 +1235,108 @@ sub end_monitor { } sub wait_for_monitor { - my ($time) = @_; + my ($time, $stop) = @_; + my $full_line = ""; my $line; + my $booted = 0; + my $start_time = time; + my $skip_call_trace = 0; + my $bug = 0; + my $bug_ignored = 0; + my $now; doprint "** Wait for monitor to settle down **\n"; # read the monitor and wait for the system to calm down - do { + while (!$booted) { $line = wait_for_input($monitor_fp, $time); - print "$line" if (defined($line)); - } while (defined($line)); + last if (!defined($line)); + print "$line"; + $full_line .= $line; + + if (defined($stop) && $full_line =~ /$stop/) { + doprint "wait for monitor detected $stop\n"; + $booted = 1; + } + + if ($full_line =~ /\[ backtrace testing \]/) { + $skip_call_trace = 1; + } + + if ($full_line =~ /call trace:/i) { + if (!$bug && !$skip_call_trace) { + if ($ignore_errors) { + $bug_ignored = 1; + } else { + $bug = 1; + } + } + } + + if ($full_line =~ /\[ end of backtrace testing \]/) { + $skip_call_trace = 0; + } + + if ($full_line =~ /Kernel panic -/) { + $bug = 1; + } + + if ($line =~ /\n/) { + $full_line = ""; + } + $now = time; + if ($now - $start_time >= $max_monitor_wait) { + doprint "Exiting monitor flush due to hitting MAX_MONITOR_WAIT\n"; + return 1; + } + } print "** Monitor flushed **\n"; + return $bug; +} + +sub save_logs { + my ($result, $basedir) = @_; + my @t = localtime; + my $date = sprintf "%04d%02d%02d%02d%02d%02d", + 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0]; + + my $type = $build_type; + if ($type =~ /useconfig/) { + $type = "useconfig"; + } + + my $dir = "$machine-$test_type-$type-$result-$date"; + + $dir = "$basedir/$dir"; + + if (!-d $dir) { + mkpath($dir) or + die "can't create $dir"; + } + + my %files = ( + "config" => $output_config, + "buildlog" => $buildlog, + "dmesg" => $dmesg, + "testlog" => $testlog, + ); + + while (my ($name, $source) = each(%files)) { + if (-f "$source") { + cp "$source", "$dir/$name" or + die "failed to copy $source"; + } + } + + doprint "*** Saved info to $dir ***\n"; } sub fail { + if (defined($post_test)) { + run_command $post_test; + } + if ($die_on_failure) { dodie @_; } @@ -719,10 +1348,7 @@ sub fail { # no need to reboot for just building. if (!do_not_reboot) { doprint "REBOOTING\n"; - reboot; - start_monitor; - wait_for_monitor $sleep_time; - end_monitor; + reboot_to_good $sleep_time; } my $name = ""; @@ -737,38 +1363,9 @@ sub fail { doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; - return 1 if (!defined($store_failures)); - - my @t = localtime; - my $date = sprintf "%04d%02d%02d%02d%02d%02d", - 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0]; - - my $type = $build_type; - if ($type =~ /useconfig/) { - $type = "useconfig"; - } - - my $dir = "$machine-$test_type-$type-fail-$date"; - my $faildir = "$store_failures/$dir"; - - if (!-d $faildir) { - mkpath($faildir) or - die "can't create $faildir"; - } - if (-f "$output_config") { - cp "$output_config", "$faildir/config" or - die "failed to copy .config"; - } - if (-f $buildlog) { - cp $buildlog, "$faildir/buildlog" or - die "failed to move $buildlog"; - } - if (-f $dmesg) { - cp $dmesg, "$faildir/dmesg" or - die "failed to move $dmesg"; - } - - doprint "*** Saved info to $faildir ***\n"; + if (defined($store_failures)) { + save_logs "fail", $store_failures; + } return 1; } @@ -829,8 +1426,7 @@ sub run_ssh { } sub run_scp { - my ($src, $dst) = @_; - my $cp_scp = $scp_to_target; + my ($src, $dst, $cp_scp) = @_; $cp_scp =~ s/\$SRC_FILE/$src/g; $cp_scp =~ s/\$DST_FILE/$dst/g; @@ -838,6 +1434,22 @@ sub run_scp { return run_command "$cp_scp"; } +sub run_scp_install { + my ($src, $dst) = @_; + + my $cp_scp = $scp_to_target_install; + + return run_scp($src, $dst, $cp_scp); +} + +sub run_scp_mod { + my ($src, $dst) = @_; + + my $cp_scp = $scp_to_target; + + return run_scp($src, $dst, $cp_scp); +} + sub get_grub_index { if ($reboot_type ne "grub") { @@ -854,9 +1466,12 @@ sub get_grub_index { open(IN, "$ssh_grub |") or die "unable to get menu.lst"; + my $found = 0; + while (<IN>) { if (/^\s*title\s+$grub_menu\s*$/) { $grub_number++; + $found = 1; last; } elsif (/^\s*title\s/) { $grub_number++; @@ -865,7 +1480,7 @@ sub get_grub_index { close(IN); die "Could not find '$grub_menu' in /boot/grub/menu on $machine" - if ($grub_number < 0); + if (!$found); doprint "$grub_number\n"; } @@ -901,12 +1516,16 @@ sub wait_for_input } sub reboot_to { - if ($reboot_type eq "grub") { - run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch && reboot)'"; - return; + if (defined($switch_to_test)) { + run_command $switch_to_test; } - run_command "$reboot_script"; + if ($reboot_type eq "grub") { + run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'"; + } elsif (defined $reboot_script) { + run_command "$reboot_script"; + } + reboot; } sub get_sha1 { @@ -933,6 +1552,7 @@ sub get_sha1 { sub monitor { my $booted = 0; my $bug = 0; + my $bug_ignored = 0; my $skip_call_trace = 0; my $loops; @@ -1005,8 +1625,12 @@ sub monitor { if ($full_line =~ /call trace:/i) { if (!$bug && !$skip_call_trace) { - $bug = 1; - $failure_start = time; + if ($ignore_errors) { + $bug_ignored = 1; + } else { + $bug = 1; + $failure_start = time; + } } } @@ -1068,22 +1692,43 @@ sub monitor { fail "failed - never got a boot prompt." and return 0; } + if ($bug_ignored) { + doprint "WARNING: Call Trace detected but ignored due to IGNORE_ERRORS=1\n"; + } + return 1; } +sub eval_kernel_version { + my ($option) = @_; + + $option =~ s/\$KERNEL_VERSION/$version/g; + + return $option; +} + sub do_post_install { return if (!defined($post_install)); - my $cp_post_install = $post_install; - $cp_post_install =~ s/\$KERNEL_VERSION/$version/g; + my $cp_post_install = eval_kernel_version $post_install; run_command "$cp_post_install" or dodie "Failed to run post install"; } sub install { - run_scp "$outputdir/$build_target", "$target_image" or + return if ($no_install); + + if (defined($pre_install)) { + my $cp_pre_install = eval_kernel_version $pre_install; + run_command "$cp_pre_install" or + dodie "Failed to run pre install"; + } + + my $cp_target = eval_kernel_version $target_image; + + run_scp_install "$outputdir/$build_target", "$cp_target" or dodie "failed to copy image"; my $install_mods = 0; @@ -1105,7 +1750,7 @@ sub install { return; } - run_command "$make INSTALL_MOD_PATH=$tmpdir modules_install" or + run_command "$make INSTALL_MOD_STRIP=1 INSTALL_MOD_PATH=$tmpdir modules_install" or dodie "Failed to install modules"; my $modlib = "/lib/modules/$version"; @@ -1118,7 +1763,7 @@ sub install { run_command "cd $tmpdir && tar -cjf $modtar lib/modules/$version" or dodie "making tarball"; - run_scp "$tmpdir/$modtar", "/tmp" or + run_scp_mod "$tmpdir/$modtar", "/tmp" or dodie "failed to copy modules"; unlink "$tmpdir/$modtar"; @@ -1133,13 +1778,20 @@ sub install { sub get_version { # get the release name + return if ($have_version); doprint "$make kernelrelease ... "; $version = `$make kernelrelease | tail -1`; chomp($version); doprint "$version\n"; + $have_version = 1; } sub start_monitor_and_boot { + # Make sure the stable kernel has finished booting + start_monitor; + wait_for_monitor 5; + end_monitor; + get_grub_index; get_version; install; @@ -1232,6 +1884,7 @@ sub make_oldconfig { sub load_force_config { my ($config) = @_; + doprint "Loading force configs from $config\n"; open(IN, $config) or dodie "failed to read $config"; while (<IN>) { @@ -1250,6 +1903,13 @@ sub build { unlink $buildlog; + # Failed builds should not reboot the target + my $save_no_reboot = $no_reboot; + $no_reboot = 1; + + # Calculate a new version from here. + $have_version = 0; + if (defined($pre_build)) { my $ret = run_command $pre_build; if (!$ret && defined($pre_build_die) && @@ -1272,15 +1932,15 @@ sub build { # allow for empty configs run_command "touch $output_config"; - run_command "mv $output_config $outputdir/config_temp" or - dodie "moving .config"; + if (!$noclean) { + run_command "mv $output_config $outputdir/config_temp" or + dodie "moving .config"; - if (!$noclean && !run_command "$make mrproper") { - dodie "make mrproper"; - } + run_command "$make mrproper" or dodie "make mrproper"; - run_command "mv $outputdir/config_temp $output_config" or - dodie "moving config_temp"; + run_command "mv $outputdir/config_temp $output_config" or + dodie "moving config_temp"; + } } elsif (!$noclean) { unlink "$output_config"; @@ -1309,6 +1969,9 @@ sub build { undef $redirect; if (defined($post_build)) { + # Because a post build may change the kernel version + # do it now. + get_version; my $ret = run_command $post_build; if (!$ret && defined($post_build_die) && $post_build_die) { @@ -1318,10 +1981,15 @@ sub build { if (!$build_ret) { # bisect may need this to pass - return 0 if ($in_bisect); + if ($in_bisect) { + $no_reboot = $save_no_reboot; + return 0; + } fail "failed build" and return 0; } + $no_reboot = $save_no_reboot; + return 1; } @@ -1340,6 +2008,10 @@ sub halt { sub success { my ($i) = @_; + if (defined($post_test)) { + run_command $post_test; + } + $successes++; my $name = ""; @@ -1354,12 +2026,13 @@ sub success { doprint "*******************************************\n"; doprint "*******************************************\n"; + if (defined($store_successes)) { + save_logs "success", $store_successes; + } + if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) { doprint "Reboot and wait $sleep_time seconds\n"; - reboot; - start_monitor; - wait_for_monitor $sleep_time; - end_monitor; + reboot_to_good $sleep_time; } } @@ -1386,7 +2059,10 @@ sub child_run_test { $poweroff_on_error = 0; $die_on_failure = 1; + $redirect = "$testlog"; run_command $run_test or $failed = 1; + undef $redirect; + exit $failed; } @@ -1402,6 +2078,7 @@ sub do_run_test { my $line; my $full_line; my $bug = 0; + my $bug_ignored = 0; wait_for_monitor 1; @@ -1426,7 +2103,11 @@ sub do_run_test { doprint $line; if ($full_line =~ /call trace:/i) { - $bug = 1; + if ($ignore_errors) { + $bug_ignored = 1; + } else { + $bug = 1; + } } if ($full_line =~ /Kernel panic -/) { @@ -1439,6 +2120,10 @@ sub do_run_test { } } while (!$child_done && !$bug); + if (!$bug && $bug_ignored) { + doprint "WARNING: Call Trace detected but ignored due to IGNORE_ERRORS=1\n"; + } + if ($bug) { my $failure_start = time; my $now; @@ -1461,6 +2146,43 @@ sub do_run_test { waitpid $child_pid, 0; $child_exit = $?; + if (!$bug && $in_bisect) { + if (defined($bisect_ret_good)) { + if ($child_exit == $bisect_ret_good) { + return 1; + } + } + if (defined($bisect_ret_skip)) { + if ($child_exit == $bisect_ret_skip) { + return -1; + } + } + if (defined($bisect_ret_abort)) { + if ($child_exit == $bisect_ret_abort) { + fail "test abort" and return -2; + } + } + if (defined($bisect_ret_bad)) { + if ($child_exit == $bisect_ret_skip) { + return 0; + } + } + if (defined($bisect_ret_default)) { + if ($bisect_ret_default eq "good") { + return 1; + } elsif ($bisect_ret_default eq "bad") { + return 0; + } elsif ($bisect_ret_default eq "skip") { + return -1; + } elsif ($bisect_ret_default eq "abort") { + return -2; + } else { + fail "unknown default action: $bisect_ret_default" + and return -2; + } + } + } + if ($bug || $child_exit) { return 0 if $in_bisect; fail "test failed" and return 0; @@ -1487,7 +2209,7 @@ sub run_git_bisect { if ($output =~ m/^(Bisecting: .*\(roughly \d+ steps?\))\s+\[([[:xdigit:]]+)\]/) { doprint "$1 [$2]\n"; } elsif ($output =~ m/^([[:xdigit:]]+) is the first bad commit/) { - $bisect_bad = $1; + $bisect_bad_commit = $1; doprint "Found bad commit... $1\n"; return 0; } else { @@ -1500,10 +2222,7 @@ sub run_git_bisect { sub bisect_reboot { doprint "Reboot and sleep $bisect_sleep_time seconds\n"; - reboot; - start_monitor; - wait_for_monitor $bisect_sleep_time; - end_monitor; + reboot_to_good $bisect_sleep_time; } # returns 1 on success, 0 on failure, -1 on skip @@ -1574,7 +2293,7 @@ sub run_bisect { } # Are we looking for where it worked, not failed? - if ($reverse_bisect) { + if ($reverse_bisect && $ret >= 0) { $ret = !$ret; } @@ -1588,21 +2307,28 @@ sub run_bisect { } } +sub update_bisect_replay { + my $tmp_log = "$tmpdir/ktest_bisect_log"; + run_command "git bisect log > $tmp_log" or + die "can't create bisect log"; + return $tmp_log; +} + sub bisect { my ($i) = @_; my $result; - die "BISECT_GOOD[$i] not defined\n" if (!defined($opt{"BISECT_GOOD[$i]"})); - die "BISECT_BAD[$i] not defined\n" if (!defined($opt{"BISECT_BAD[$i]"})); - die "BISECT_TYPE[$i] not defined\n" if (!defined($opt{"BISECT_TYPE[$i]"})); + die "BISECT_GOOD[$i] not defined\n" if (!defined($bisect_good)); + die "BISECT_BAD[$i] not defined\n" if (!defined($bisect_bad)); + die "BISECT_TYPE[$i] not defined\n" if (!defined($bisect_type)); - my $good = $opt{"BISECT_GOOD[$i]"}; - my $bad = $opt{"BISECT_BAD[$i]"}; - my $type = $opt{"BISECT_TYPE[$i]"}; - my $start = $opt{"BISECT_START[$i]"}; - my $replay = $opt{"BISECT_REPLAY[$i]"}; - my $start_files = $opt{"BISECT_FILES[$i]"}; + my $good = $bisect_good; + my $bad = $bisect_bad; + my $type = $bisect_type; + my $start = $bisect_start; + my $replay = $bisect_replay; + my $start_files = $bisect_files; if (defined($start_files)) { $start_files = " -- " . $start_files; @@ -1614,8 +2340,7 @@ sub bisect { $good = get_sha1($good); $bad = get_sha1($bad); - if (defined($opt{"BISECT_REVERSE[$i]"}) && - $opt{"BISECT_REVERSE[$i]"} == 1) { + if (defined($bisect_reverse) && $bisect_reverse == 1) { doprint "Performing a reverse bisect (bad is good, good is bad!)\n"; $reverse_bisect = 1; } else { @@ -1627,8 +2352,31 @@ sub bisect { $type = "boot"; } - my $check = $opt{"BISECT_CHECK[$i]"}; - if (defined($check) && $check ne "0") { + # Check if a bisect was running + my $bisect_start_file = "$builddir/.git/BISECT_START"; + + my $check = $bisect_check; + my $do_check = defined($check) && $check ne "0"; + + if ( -f $bisect_start_file ) { + print "Bisect in progress found\n"; + if ($do_check) { + print " If you say yes, then no checks of good or bad will be done\n"; + } + if (defined($replay)) { + print "** BISECT_REPLAY is defined in config file **"; + print " Ignore config option and perform new git bisect log?\n"; + if (read_ync " (yes, no, or cancel) ") { + $replay = update_bisect_replay; + $do_check = 0; + } + } elsif (read_yn "read git log and continue?") { + $replay = update_bisect_replay; + $do_check = 0; + } + } + + if ($do_check) { # get current HEAD my $head = get_sha1("HEAD"); @@ -1693,14 +2441,29 @@ sub bisect { run_command "git bisect reset" or dodie "could not reset git bisect"; - doprint "Bad commit was [$bisect_bad]\n"; + doprint "Bad commit was [$bisect_bad_commit]\n"; success $i; } +# config_ignore holds the configs that were set (or unset) for +# a good config and we will ignore these configs for the rest +# of a config bisect. These configs stay as they were. my %config_ignore; + +# config_set holds what all configs were set as. my %config_set; +# config_off holds the set of configs that the bad config had disabled. +# We need to record them and set them in the .config when running +# oldnoconfig, because oldnoconfig does not turn off new symbols, but +# instead just keeps the defaults. +my %config_off; + +# config_off_tmp holds a set of configs to turn off for now +my @config_off_tmp; + +# config_list is the set of configs that are being tested my %config_list; my %null_config; @@ -1779,12 +2542,21 @@ sub create_config { } } + # turn off configs to keep off + foreach my $config (keys %config_off) { + print OUT "# $config is not set\n"; + } + + # turn off configs that should be off for now + foreach my $config (@config_off_tmp) { + print OUT "# $config is not set\n"; + } + foreach my $config (keys %config_ignore) { print OUT "$config_ignore{$config}\n"; } close(OUT); -# exit; make_oldconfig; } @@ -1849,7 +2621,7 @@ sub run_config_bisect { } doprint "***** RUN TEST ***\n"; - my $type = $opt{"CONFIG_BISECT_TYPE[$iteration]"}; + my $type = $config_bisect_type; my $ret; my %current_config; @@ -1861,6 +2633,13 @@ sub run_config_bisect { do { my @tophalf = @start_list[0 .. $half]; + # keep the bottom half off + if ($half < $#start_list) { + @config_off_tmp = @start_list[$half + 1 .. $#start_list]; + } else { + @config_off_tmp = (); + } + create_config @tophalf; read_current_config \%current_config; @@ -1877,7 +2656,11 @@ sub run_config_bisect { if (!$found) { # try the other half doprint "Top half produced no set configs, trying bottom half\n"; + + # keep the top half off + @config_off_tmp = @tophalf; @tophalf = @start_list[$half + 1 .. $#start_list]; + create_config @tophalf; read_current_config \%current_config; foreach my $config (@tophalf) { @@ -1953,7 +2736,7 @@ sub run_config_bisect { sub config_bisect { my ($i) = @_; - my $start_config = $opt{"CONFIG_BISECT[$i]"}; + my $start_config = $config_bisect; my $tmpconfig = "$tmpdir/use_config"; @@ -1982,7 +2765,7 @@ sub config_bisect { # read directly what we want to check my %config_check; open (IN, $output_config) - or dodie "faied to open $output_config"; + or dodie "failed to open $output_config"; while (<IN>) { if (/^((CONFIG\S*)=.*)/) { @@ -2015,6 +2798,10 @@ sub config_bisect { $added_configs{$2} = $1; $config_list{$2} = $1; } + } elsif (/^# ((CONFIG\S*).*)/) { + # Keep these configs disabled + $config_set{$2} = $1; + $config_off{$2} = $1; } } close(IN); @@ -2037,6 +2824,8 @@ sub config_bisect { my %config_test; my $once = 0; + @config_off_tmp = (); + # Sometimes kconfig does weird things. We must make sure # that the config we autocreate has everything we need # to test, otherwise we may miss testing configs, or @@ -2055,6 +2844,18 @@ sub config_bisect { } } my $ret; + + if (defined($config_bisect_check) && $config_bisect_check) { + doprint " Checking to make sure bad config with min config fails\n"; + create_config keys %config_list; + $ret = run_config_bisect_test $config_bisect_type; + if ($ret) { + doprint " FAILED! Bad config with min config boots fine\n"; + return -1; + } + doprint " Bad config with min config fails as expected\n"; + } + do { $ret = run_config_bisect; } while (!$ret); @@ -2066,32 +2867,29 @@ sub config_bisect { sub patchcheck_reboot { doprint "Reboot and sleep $patchcheck_sleep_time seconds\n"; - reboot; - start_monitor; - wait_for_monitor $patchcheck_sleep_time; - end_monitor; + reboot_to_good $patchcheck_sleep_time; } sub patchcheck { my ($i) = @_; die "PATCHCHECK_START[$i] not defined\n" - if (!defined($opt{"PATCHCHECK_START[$i]"})); + if (!defined($patchcheck_start)); die "PATCHCHECK_TYPE[$i] not defined\n" - if (!defined($opt{"PATCHCHECK_TYPE[$i]"})); + if (!defined($patchcheck_type)); - my $start = $opt{"PATCHCHECK_START[$i]"}; + my $start = $patchcheck_start; my $end = "HEAD"; - if (defined($opt{"PATCHCHECK_END[$i]"})) { - $end = $opt{"PATCHCHECK_END[$i]"}; + if (defined($patchcheck_end)) { + $end = $patchcheck_end; } # Get the true sha1's since we can use things like HEAD~3 $start = get_sha1($start); $end = get_sha1($end); - my $type = $opt{"PATCHCHECK_TYPE[$i]"}; + my $type = $patchcheck_type; # Can't have a test without having a test to run if ($type eq "test" && !defined($run_test)) { @@ -2178,12 +2976,31 @@ sub patchcheck { } my %depends; +my %depcount; my $iflevel = 0; my @ifdeps; # prevent recursion my %read_kconfigs; +sub add_dep { + # $config depends on $dep + my ($config, $dep) = @_; + + if (defined($depends{$config})) { + $depends{$config} .= " " . $dep; + } else { + $depends{$config} = $dep; + } + + # record the number of configs depending on $dep + if (defined $depcount{$dep}) { + $depcount{$dep}++; + } else { + $depcount{$dep} = 1; + } +} + # taken from streamline_config.pl sub read_kconfig { my ($kconfig) = @_; @@ -2230,30 +3047,19 @@ sub read_kconfig { $config = $2; for (my $i = 0; $i < $iflevel; $i++) { - if ($i) { - $depends{$config} .= " " . $ifdeps[$i]; - } else { - $depends{$config} = $ifdeps[$i]; - } - $state = "DEP"; + add_dep $config, $ifdeps[$i]; } # collect the depends for the config } elsif ($state eq "NEW" && /^\s*depends\s+on\s+(.*)$/) { - if (defined($depends{$1})) { - $depends{$config} .= " " . $1; - } else { - $depends{$config} = $1; - } + add_dep $config, $1; # Get the configs that select this config - } elsif ($state ne "NONE" && /^\s*select\s+(\S+)/) { - if (defined($depends{$1})) { - $depends{$1} .= " " . $config; - } else { - $depends{$1} = $config; - } + } elsif ($state eq "NEW" && /^\s*select\s+(\S+)/) { + + # selected by depends on config + add_dep $1, $config; # Check for if statements } elsif (/^if\s+(.*\S)\s*$/) { @@ -2365,11 +3171,18 @@ sub make_new_config { close OUT; } +sub chomp_config { + my ($config) = @_; + + $config =~ s/CONFIG_//; + + return $config; +} + sub get_depends { my ($dep) = @_; - my $kconfig = $dep; - $kconfig =~ s/CONFIG_//; + my $kconfig = chomp_config $dep; $dep = $depends{"$kconfig"}; @@ -2419,8 +3232,7 @@ sub test_this_config { return undef; } - my $kconfig = $config; - $kconfig =~ s/CONFIG_//; + my $kconfig = chomp_config $config; # Test dependencies first if (defined($depends{"$kconfig"})) { @@ -2458,6 +3270,12 @@ sub test_this_config { sub make_min_config { my ($i) = @_; + my $type = $minconfig_type; + if ($type ne "boot" && $type ne "test") { + fail "Invalid MIN_CONFIG_TYPE '$minconfig_type'\n" . + " make_min_config works only with 'boot' and 'test'\n" and return; + } + if (!defined($output_minconfig)) { fail "OUTPUT_MIN_CONFIG not defined" and return; } @@ -2467,8 +3285,15 @@ sub make_min_config { # that instead. if (-f $output_minconfig && !$start_minconfig_defined) { print "$output_minconfig exists\n"; - if (read_yn " Use it as minconfig?") { + if (!defined($use_output_minconfig)) { + if (read_yn " Use it as minconfig?") { + $start_minconfig = $output_minconfig; + } + } elsif ($use_output_minconfig > 0) { + doprint "Using $output_minconfig as MIN_CONFIG\n"; $start_minconfig = $output_minconfig; + } else { + doprint "Set to still use MIN_CONFIG as starting point\n"; } } @@ -2510,6 +3335,14 @@ sub make_min_config { my @config_keys = keys %min_configs; + # All configs need a depcount + foreach my $config (@config_keys) { + my $kconfig = chomp_config $config; + if (!defined $depcount{$kconfig}) { + $depcount{$kconfig} = 0; + } + } + # Remove anything that was set by the make allnoconfig # we shouldn't need them as they get set for us anyway. foreach my $config (@config_keys) { @@ -2548,8 +3381,13 @@ sub make_min_config { # Now disable each config one by one and do a make oldconfig # till we find a config that changes our list. - # Put configs that did not modify the config at the end. my @test_configs = keys %min_configs; + + # Sort keys by who is most dependent on + @test_configs = sort { $depcount{chomp_config($b)} <=> $depcount{chomp_config($a)} } + @test_configs ; + + # Put configs that did not modify the config at the end. my $reset = 1; for (my $i = 0; $i < $#test_configs; $i++) { if (!defined($nochange_config{$test_configs[0]})) { @@ -2601,9 +3439,16 @@ sub make_min_config { $in_bisect = 1; my $failed = 0; - build "oldconfig"; - start_monitor_and_boot or $failed = 1; - end_monitor; + build "oldconfig" or $failed = 1; + if (!$failed) { + start_monitor_and_boot or $failed = 1; + + if ($type eq "test" && !$failed) { + do_run_test or $failed = 1; + } + + end_monitor; + } $in_bisect = 0; @@ -2659,10 +3504,7 @@ sub make_min_config { } doprint "Reboot and wait $sleep_time seconds\n"; - reboot; - start_monitor; - wait_for_monitor $sleep_time; - end_monitor; + reboot_to_good $sleep_time; } success $i; @@ -2684,13 +3526,27 @@ if ($#ARGV == 0) { } if (! -f $ktest_config) { + $newconfig = 1; + get_test_case; open(OUT, ">$ktest_config") or die "Can not create $ktest_config"; print OUT << "EOF" # Generated by ktest.pl # + +# PWD is a ktest.pl variable that will result in the process working +# directory that ktest.pl is executed in. + +# THIS_DIR is automatically assigned the PWD of the path that generated +# the config file. It is best to use this variable when assigning other +# directory paths within this directory. This allows you to easily +# move the test cases to other locations or to other machines. +# +THIS_DIR := $variable{"PWD"} + # Define each test with TEST_START # The config options below it will override the defaults TEST_START +TEST_TYPE = $default{"TEST_TYPE"} DEFAULTS EOF @@ -2710,7 +3566,7 @@ if ($#new_configs >= 0) { open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config"; foreach my $config (@new_configs) { print OUT "$config = $entered_configs{$config}\n"; - $opt{$config} = $entered_configs{$config}; + $opt{$config} = process_variables($entered_configs{$config}); } } @@ -2783,66 +3639,37 @@ sub set_test_option { # First we need to do is the builds for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { + # Do not reboot on failing test options + $no_reboot = 1; + $reboot_success = 0; + + $have_version = 0; + $iteration = $i; + undef %force_config; + my $makecmd = set_test_option("MAKE_CMD", $i); - $machine = set_test_option("MACHINE", $i); - $ssh_user = set_test_option("SSH_USER", $i); - $tmpdir = set_test_option("TMP_DIR", $i); - $outputdir = set_test_option("OUTPUT_DIR", $i); - $builddir = set_test_option("BUILD_DIR", $i); - $test_type = set_test_option("TEST_TYPE", $i); - $build_type = set_test_option("BUILD_TYPE", $i); - $build_options = set_test_option("BUILD_OPTIONS", $i); - $pre_build = set_test_option("PRE_BUILD", $i); - $post_build = set_test_option("POST_BUILD", $i); - $pre_build_die = set_test_option("PRE_BUILD_DIE", $i); - $post_build_die = set_test_option("POST_BUILD_DIE", $i); - $power_cycle = set_test_option("POWER_CYCLE", $i); - $reboot = set_test_option("REBOOT", $i); - $noclean = set_test_option("BUILD_NOCLEAN", $i); - $minconfig = set_test_option("MIN_CONFIG", $i); - $output_minconfig = set_test_option("OUTPUT_MIN_CONFIG", $i); - $start_minconfig = set_test_option("START_MIN_CONFIG", $i); - $ignore_config = set_test_option("IGNORE_CONFIG", $i); - $run_test = set_test_option("TEST", $i); - $addconfig = set_test_option("ADD_CONFIG", $i); - $reboot_type = set_test_option("REBOOT_TYPE", $i); - $grub_menu = set_test_option("GRUB_MENU", $i); - $post_install = set_test_option("POST_INSTALL", $i); - $reboot_script = set_test_option("REBOOT_SCRIPT", $i); - $reboot_on_error = set_test_option("REBOOT_ON_ERROR", $i); - $poweroff_on_error = set_test_option("POWEROFF_ON_ERROR", $i); - $die_on_failure = set_test_option("DIE_ON_FAILURE", $i); - $power_off = set_test_option("POWER_OFF", $i); - $powercycle_after_reboot = set_test_option("POWERCYCLE_AFTER_REBOOT", $i); - $poweroff_after_halt = set_test_option("POWEROFF_AFTER_HALT", $i); - $sleep_time = set_test_option("SLEEP_TIME", $i); - $bisect_sleep_time = set_test_option("BISECT_SLEEP_TIME", $i); - $patchcheck_sleep_time = set_test_option("PATCHCHECK_SLEEP_TIME", $i); - $ignore_warnings = set_test_option("IGNORE_WARNINGS", $i); - $bisect_manual = set_test_option("BISECT_MANUAL", $i); - $bisect_skip = set_test_option("BISECT_SKIP", $i); - $config_bisect_good = set_test_option("CONFIG_BISECT_GOOD", $i); - $store_failures = set_test_option("STORE_FAILURES", $i); - $test_name = set_test_option("TEST_NAME", $i); - $timeout = set_test_option("TIMEOUT", $i); - $booted_timeout = set_test_option("BOOTED_TIMEOUT", $i); - $console = set_test_option("CONSOLE", $i); - $detect_triplefault = set_test_option("DETECT_TRIPLE_FAULT", $i); - $success_line = set_test_option("SUCCESS_LINE", $i); - $stop_after_success = set_test_option("STOP_AFTER_SUCCESS", $i); - $stop_after_failure = set_test_option("STOP_AFTER_FAILURE", $i); - $stop_test_after = set_test_option("STOP_TEST_AFTER", $i); - $build_target = set_test_option("BUILD_TARGET", $i); - $ssh_exec = set_test_option("SSH_EXEC", $i); - $scp_to_target = set_test_option("SCP_TO_TARGET", $i); - $target_image = set_test_option("TARGET_IMAGE", $i); - $localversion = set_test_option("LOCALVERSION", $i); + # Load all the options into their mapped variable names + foreach my $opt (keys %option_map) { + ${$option_map{$opt}} = set_test_option($opt, $i); + } $start_minconfig_defined = 1; + # The first test may override the PRE_KTEST option + if (defined($pre_ktest) && $i == 1) { + doprint "\n"; + run_command $pre_ktest; + } + + # Any test can override the POST_KTEST option + # The last test takes precedence. + if (defined($post_ktest)) { + $final_post_ktest = $post_ktest; + } + if (!defined($start_minconfig)) { $start_minconfig_defined = 0; $start_minconfig = $minconfig; @@ -2850,34 +3677,36 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { chdir $builddir || die "can't change directory to $builddir"; - if (!-d $tmpdir) { - mkpath($tmpdir) or - die "can't create $tmpdir"; + foreach my $dir ($tmpdir, $outputdir) { + if (!-d $dir) { + mkpath($dir) or + die "can't create $dir"; + } } $ENV{"SSH_USER"} = $ssh_user; $ENV{"MACHINE"} = $machine; - $target = "$ssh_user\@$machine"; - $buildlog = "$tmpdir/buildlog-$machine"; + $testlog = "$tmpdir/testlog-$machine"; $dmesg = "$tmpdir/dmesg-$machine"; $make = "$makecmd O=$outputdir"; $output_config = "$outputdir/.config"; - if ($reboot_type eq "grub") { - dodie "GRUB_MENU not defined" if (!defined($grub_menu)); - } elsif (!defined($reboot_script)) { - dodie "REBOOT_SCRIPT not defined" + if (!$buildonly) { + $target = "$ssh_user\@$machine"; + if ($reboot_type eq "grub") { + dodie "GRUB_MENU not defined" if (!defined($grub_menu)); + } } my $run_type = $build_type; if ($test_type eq "patchcheck") { - $run_type = $opt{"PATCHCHECK_TYPE[$i]"}; + $run_type = $patchcheck_type; } elsif ($test_type eq "bisect") { - $run_type = $opt{"BISECT_TYPE[$i]"}; + $run_type = $bisect_type; } elsif ($test_type eq "config_bisect") { - $run_type = $opt{"CONFIG_BISECT_TYPE[$i]"}; + $run_type = $config_bisect_type; } if ($test_type eq "make_min_config") { @@ -2889,11 +3718,19 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { $run_type = "ERROR"; } + my $installme = ""; + $installme = " no_install" if ($no_install); + doprint "\n\n"; - doprint "RUNNING TEST $i of $opt{NUM_TESTS} with option $test_type $run_type\n\n"; + doprint "RUNNING TEST $i of $opt{NUM_TESTS} with option $test_type $run_type$installme\n\n"; + + if (defined($pre_test)) { + run_command $pre_test; + } unlink $dmesg; unlink $buildlog; + unlink $testlog; if (defined($addconfig)) { my $min = $minconfig; @@ -2905,12 +3742,18 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { $minconfig = "$tmpdir/add_config"; } - my $checkout = $opt{"CHECKOUT[$i]"}; if (defined($checkout)) { run_command "git checkout $checkout" or die "failed to checkout $checkout"; } + $no_reboot = 0; + + # A test may opt to not reboot the box + if ($reboot_on_success) { + $reboot_success = 1; + } + if ($test_type eq "bisect") { bisect $i; next; @@ -2929,6 +3772,13 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { build $build_type or next; } + if ($test_type eq "install") { + get_version; + install; + success $i; + next; + } + if ($test_type ne "build") { my $failed = 0; start_monitor_and_boot or $failed = 1; @@ -2943,12 +3793,20 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { success $i; } +if (defined($final_post_ktest)) { + run_command $final_post_ktest; +} + if ($opt{"POWEROFF_ON_SUCCESS"}) { halt; -} elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot) { - reboot; +} elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot && $reboot_success) { + reboot_to_good; +} elsif (defined($switch_to_good)) { + # still need to get to the good kernel + run_command $switch_to_good; } + doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n"; exit 0; diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf index b8bcd14b5a4d..de28a0a3b8fc 100644 --- a/tools/testing/ktest/sample.conf +++ b/tools/testing/ktest/sample.conf @@ -72,6 +72,128 @@ # the same option name under the same test or as default # ktest will fail to execute, and no tests will run. # +# DEFAULTS OVERRIDE +# +# Options defined in the DEFAULTS section can not be duplicated +# even if they are defined in two different DEFAULT sections. +# This is done to catch mistakes where an option is added but +# the previous option was forgotten about and not commented. +# +# The OVERRIDE keyword can be added to a section to allow this +# section to override other DEFAULT sections values that have +# been defined previously. It will only override options that +# have been defined before its use. Options defined later +# in a non override section will still error. The same option +# can not be defined in the same section even if that section +# is marked OVERRIDE. +# +# +# +# Both TEST_START and DEFAULTS sections can also have the IF keyword +# The value after the IF must evaluate into a 0 or non 0 positive +# integer, and can use the config variables (explained below). +# +# DEFAULTS IF ${IS_X86_32} +# +# The above will process the DEFAULTS section if the config +# variable IS_X86_32 evaluates to a non zero positive integer +# otherwise if it evaluates to zero, it will act the same +# as if the SKIP keyword was used. +# +# The ELSE keyword can be used directly after a section with +# a IF statement. +# +# TEST_START IF ${RUN_NET_TESTS} +# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network +# +# ELSE +# +# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-normal +# +# +# The ELSE keyword can also contain an IF statement to allow multiple +# if then else sections. But all the sections must be either +# DEFAULT or TEST_START, they can not be a mixture. +# +# TEST_START IF ${RUN_NET_TESTS} +# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network +# +# ELSE IF ${RUN_DISK_TESTS} +# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-tests +# +# ELSE IF ${RUN_CPU_TESTS} +# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-cpu +# +# ELSE +# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network +# +# The if statement may also have comparisons that will and for +# == and !=, strings may be used for both sides. +# +# BOX_TYPE := x86_32 +# +# DEFAULTS IF ${BOX_TYPE} == x86_32 +# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-32 +# ELSE +# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-64 +# +# The DEFINED keyword can be used by the IF statements too. +# It returns true if the given config variable or option has been defined +# or false otherwise. +# +# +# DEFAULTS IF DEFINED USE_CC +# CC := ${USE_CC} +# ELSE +# CC := gcc +# +# +# As well as NOT DEFINED. +# +# DEFAULTS IF NOT DEFINED MAKE_CMD +# MAKE_CMD := make ARCH=x86 +# +# +# And/or ops (&&,||) may also be used to make complex conditionals. +# +# TEST_START IF (DEFINED ALL_TESTS || ${MYTEST} == boottest) && ${MACHINE} == gandalf +# +# Notice the use of parentheses. Without any parentheses the above would be +# processed the same as: +# +# TEST_START IF DEFINED ALL_TESTS || (${MYTEST} == boottest && ${MACHINE} == gandalf) +# +# +# +# INCLUDE file +# +# The INCLUDE keyword may be used in DEFAULT sections. This will +# read another config file and process that file as well. The included +# file can include other files, add new test cases or default +# statements. Config variables will be passed to these files and changes +# to config variables will be seen by top level config files. Including +# a file is processed just like the contents of the file was cut and pasted +# into the top level file, except, that include files that end with +# TEST_START sections will have that section ended at the end of +# the include file. That is, an included file is included followed +# by another DEFAULT keyword. +# +# Unlike other files referenced in this config, the file path does not need +# to be absolute. If the file does not start with '/', then the directory +# that the current config file was located in is used. If no config by the +# given name is found there, then the current directory is searched. +# +# INCLUDE myfile +# DEFAULT +# +# is the same as: +# +# INCLUDE myfile +# +# Note, if the include file does not contain a full path, the file is +# searched first by the location of the original include file, and then +# by the location that ktest.pl was executed in. +# #### Config variables #### # @@ -224,7 +346,10 @@ #GRUB_MENU = Test Kernel # A script to reboot the target into the test kernel -# (Only mandatory if REBOOT_TYPE = script) +# This and SWITCH_TO_TEST are about the same, except +# SWITCH_TO_TEST is run even for REBOOT_TYPE = grub. +# This may be left undefined. +# (default undefined) #REBOOT_SCRIPT = #### Optional Config Options (all have defaults) #### @@ -251,11 +376,30 @@ # DEFAULTS # DEFAULTS SKIP +# If you want to execute some command before the first test runs +# you can set this option. Note, it can be set as a default option +# or an option in the first test case. All other test cases will +# ignore it. If both the default and first test have this option +# set, then the first test will take precedence. +# +# default (undefined) +#PRE_KTEST = ${SSH} ~/set_up_test + +# If you want to execute some command after all the tests have +# completed, you can set this option. Note, it can be set as a +# default or any test case can override it. If multiple test cases +# set this option, then the last test case that set it will take +# precedence +# +# default (undefined) +#POST_KTEST = ${SSH} ~/dismantle_test + # The default test type (default test) # The test types may be: -# build - only build the kernel, do nothing else -# boot - build and boot the kernel -# test - build, boot and if TEST is set, run the test script +# build - only build the kernel, do nothing else +# install - build and install, but do nothing else (does not reboot) +# boot - build, install, and boot the kernel +# test - build, boot and if TEST is set, run the test script # (If TEST is not set, it defaults back to boot) # bisect - Perform a bisect on the kernel (see BISECT_TYPE below) # patchcheck - Do a test on a series of commits in git (see PATCHCHECK below) @@ -282,6 +426,14 @@ # (default "") #BUILD_OPTIONS = -j20 +# If you need to do some special handling before installing +# you can add a script with this option. +# The environment variable KERNEL_VERSION will be set to the +# kernel version that is used. +# +# default (undefined) +#PRE_INSTALL = ssh user@target rm -rf '/lib/modules/*-test*' + # If you need an initrd, you can add a script or code here to install # it. The environment variable KERNEL_VERSION will be set to the # kernel version that is used. Remember to add the initrd line @@ -293,6 +445,25 @@ # or on some systems: #POST_INSTALL = ssh user@target /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION +# If for some reason you just want to boot the kernel and you do not +# want the test to install anything new. For example, you may just want +# to boot test the same kernel over and over and do not want to go through +# the hassle of installing anything, you can set this option to 1 +# (default 0) +#NO_INSTALL = 1 + +# If there is a command that you want to run before the individual test +# case executes, then you can set this option +# +# default (undefined) +#PRE_TEST = ${SSH} reboot_to_special_kernel + +# If there is a command you want to run after the individual test case +# completes, then you can set this option. +# +# default (undefined) +#POST_TEST = cd ${BUILD_DIR}; git reset --hard + # If there is a script that you require to run before the build is done # you can specify it with PRE_BUILD. # @@ -338,6 +509,27 @@ # The test will not modify that file. #REBOOT_TYPE = grub +# If you are using a machine that doesn't boot with grub, and +# perhaps gets its kernel from a remote server (tftp), then +# you can use this option to update the target image with the +# test image. +# +# You could also do the same with POST_INSTALL, but the difference +# between that option and this option is that POST_INSTALL runs +# after the install, where this one runs just before a reboot. +# (default undefined) +#SWITCH_TO_TEST = cp ${OUTPUT_DIR}/${BUILD_TARGET} ${TARGET_IMAGE} + +# If you are using a machine that doesn't boot with grub, and +# perhaps gets its kernel from a remote server (tftp), then +# you can use this option to update the target image with the +# the known good image to reboot safely back into. +# +# This option holds a command that will execute before needing +# to reboot to a good known image. +# (default undefined) +#SWITCH_TO_GOOD = ssh ${SSH_USER}/${MACHINE} cp good_image ${TARGET_IMAGE} + # The min config that is needed to build for the machine # A nice way to create this is with the following: # @@ -415,6 +607,14 @@ # (default "login:") #SUCCESS_LINE = login: +# To speed up between reboots, defining a line that the +# default kernel produces that represents that the default +# kernel has successfully booted and can be used to pass +# a new test kernel to it. Otherwise ktest.pl will wait till +# SLEEP_TIME to continue. +# (default undefined) +#REBOOT_SUCCESS_LINE = login: + # In case the console constantly fills the screen, having # a specified time to stop the test after success is recommended. # (in seconds) @@ -451,6 +651,12 @@ # (default undefined) #STORE_FAILURES = /home/test/failures +# Directory to store success directories on success. If this is not +# set, the .config, dmesg and bootlog will not be saved if a +# test succeeds. +# (default undefined) +#STORE_SUCCESSES = /home/test/successes + # Build without doing a make mrproper, or removing .config # (default 0) #BUILD_NOCLEAN = 0 @@ -480,6 +686,8 @@ # another test. If a reboot to the reliable kernel happens, # we wait SLEEP_TIME for the console to stop producing output # before starting the next test. +# +# You can speed up reboot times even more by setting REBOOT_SUCCESS_LINE. # (default 60) #SLEEP_TIME = 60 @@ -487,6 +695,14 @@ # (default 60) #BISECT_SLEEP_TIME = 60 +# The max wait time (in seconds) for waiting for the console to finish. +# If for some reason, the console is outputting content without +# ever finishing, this will cause ktest to get stuck. This +# option is the max time ktest will wait for the monitor (console) +# to settle down before continuing. +# (default 1800) +#MAX_MONITOR_WAIT + # The time in between patch checks to sleep (in seconds) # (default 60) #PATCHCHECK_SLEEP_TIME = 60 @@ -540,10 +756,18 @@ # The variables SSH_USER, MACHINE and SSH_COMMAND are defined #SSH_EXEC = ssh $SSH_USER@$MACHINE $SSH_COMMAND"; -# The way to copy a file to the target +# The way to copy a file to the target (install and modules) # (default scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE) -# The variables SSH_USER, MACHINE, SRC_FILE and DST_FILE are defined. -#SCP_TO_TARGET = scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE +# The variables SSH_USER, MACHINE are defined by the config +# SRC_FILE and DST_FILE are ktest internal variables and +# should only have '$' and not the '${}' notation. +# (default scp $SRC_FILE ${SSH_USER}@${MACHINE}:$DST_FILE) +#SCP_TO_TARGET = echo skip scp for $SRC_FILE $DST_FILE + +# If install needs to be different than modules, then this +# option will override the SCP_TO_TARGET for installation. +# (default ${SCP_TO_TARGET} ) +#SCP_TO_TARGET_INSTALL = scp $SRC_FILE tftp@tftpserver:$DST_FILE # The nice way to reboot the target # (default ssh $SSH_USER@$MACHINE reboot) @@ -560,6 +784,25 @@ # (default 1) #DETECT_TRIPLE_FAULT = 0 +# All options in the config file should be either used by ktest +# or could be used within a value of another option. If an option +# in the config file is not used, ktest will warn about it and ask +# if you want to continue. +# +# If you don't care if there are non-used options, enable this +# option. Be careful though, a non-used option is usually a sign +# of an option name being typed incorrectly. +# (default 0) +#IGNORE_UNUSED = 1 + +# When testing a kernel that happens to have WARNINGs, and call +# traces, ktest.pl will detect these and fail a boot or test run +# due to warnings. By setting this option, ktest will ignore +# call traces, and will not fail a test if the kernel produces +# an oops. Use this option with care. +# (default 0) +#IGNORE_ERRORS = 1 + #### Per test run options #### # The following options are only allowed in TEST_START sections. # They are ignored in the DEFAULTS sections. @@ -722,6 +965,42 @@ # BISECT_BAD with BISECT_CHECK = good or # BISECT_CHECK = bad, respectively. # +# BISECT_RET_GOOD = 0 (optional, default undefined) +# +# In case the specificed test returns something other than just +# 0 for good, and non-zero for bad, you can override 0 being +# good by defining BISECT_RET_GOOD. +# +# BISECT_RET_BAD = 1 (optional, default undefined) +# +# In case the specificed test returns something other than just +# 0 for good, and non-zero for bad, you can override non-zero being +# bad by defining BISECT_RET_BAD. +# +# BISECT_RET_ABORT = 255 (optional, default undefined) +# +# If you need to abort the bisect if the test discovers something +# that was wrong, you can define BISECT_RET_ABORT to be the error +# code returned by the test in order to abort the bisect. +# +# BISECT_RET_SKIP = 2 (optional, default undefined) +# +# If the test detects that the current commit is neither good +# nor bad, but something else happened (another bug detected) +# you can specify BISECT_RET_SKIP to an error code that the +# test returns when it should skip the current commit. +# +# BISECT_RET_DEFAULT = good (optional, default undefined) +# +# You can override the default of what to do when the above +# options are not hit. This may be one of, "good", "bad", +# "abort" or "skip" (without the quotes). +# +# Note, if you do not define any of the previous BISECT_RET_* +# and define BISECT_RET_DEFAULT, all bisects results will do +# what the BISECT_RET_DEFAULT has. +# +# # Example: # TEST_START # TEST_TYPE = bisect @@ -806,11 +1085,17 @@ # can specify it with CONFIG_BISECT_GOOD. Otherwise # the MIN_CONFIG is the base. # +# CONFIG_BISECT_CHECK (optional) +# Set this to 1 if you want to confirm that the config ktest +# generates (the bad config with the min config) is still bad. +# It may be that the min config fixes what broke the bad config +# and the test will not return a result. +# # Example: # TEST_START # TEST_TYPE = config_bisect # CONFIG_BISECT_TYPE = build -# CONFIG_BISECT = /home/test/¢onfig-bad +# CONFIG_BISECT = /home/test/config-bad # MIN_CONFIG = /home/test/config-min # BISECT_MANUAL = 1 # @@ -872,10 +1157,26 @@ # and will not be tested again in later runs. # (optional) # +# MIN_CONFIG_TYPE can be either 'boot' or 'test'. With 'boot' it will +# test if the created config can just boot the machine. If this is +# set to 'test', then the TEST option must be defined and the created +# config will not only boot the target, but also make sure that the +# config lets the test succeed. This is useful to make sure the final +# config that is generated allows network activity (ssh). +# (optional) +# +# USE_OUTPUT_MIN_CONFIG set this to 1 if you do not want to be prompted +# about using the OUTPUT_MIN_CONFIG as the MIN_CONFIG as the starting +# point. Set it to 0 if you want to always just use the given MIN_CONFIG. +# If it is not defined, it will prompt you to pick which config +# to start with (MIN_CONFIG or OUTPUT_MIN_CONFIG). +# # Example: # # TEST_TYPE = make_min_config # OUTPUT_MIN_CONFIG = /path/to/config-new-min # START_MIN_CONFIG = /path/to/config-min # IGNORE_CONFIG = /path/to/config-tested +# MIN_CONFIG_TYPE = test +# TEST = ssh ${USER}@${MACHINE} echo hi # diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile new file mode 100644 index 000000000000..85baf11e2acd --- /dev/null +++ b/tools/testing/selftests/Makefile @@ -0,0 +1,16 @@ +TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug + +all: + for TARGET in $(TARGETS); do \ + make -C $$TARGET; \ + done; + +run_tests: all + for TARGET in $(TARGETS); do \ + make -C $$TARGET run_tests; \ + done; + +clean: + for TARGET in $(TARGETS); do \ + make -C $$TARGET clean; \ + done; diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile new file mode 100644 index 000000000000..931278035f5c --- /dev/null +++ b/tools/testing/selftests/breakpoints/Makefile @@ -0,0 +1,23 @@ +# Taken from perf makefile +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) +ifeq ($(ARCH),i386) + ARCH := x86 +endif +ifeq ($(ARCH),x86_64) + ARCH := x86 +endif + + +all: +ifeq ($(ARCH),x86) + gcc breakpoint_test.c -o breakpoint_test +else + echo "Not an x86 target, can't build breakpoints selftests" +endif + +run_tests: + ./breakpoint_test + +clean: + rm -fr breakpoint_test diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c new file mode 100644 index 000000000000..a0743f3b2b57 --- /dev/null +++ b/tools/testing/selftests/breakpoints/breakpoint_test.c @@ -0,0 +1,394 @@ +/* + * Copyright (C) 2011 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com> + * + * Licensed under the terms of the GNU GPL License version 2 + * + * Selftests for breakpoints (and more generally the do_debug() path) in x86. + */ + + +#include <sys/ptrace.h> +#include <unistd.h> +#include <stddef.h> +#include <sys/user.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/wait.h> + + +/* Breakpoint access modes */ +enum { + BP_X = 1, + BP_RW = 2, + BP_W = 4, +}; + +static pid_t child_pid; + +/* + * Ensures the child and parent are always "talking" about + * the same test sequence. (ie: that we haven't forgotten + * to call check_trapped() somewhere). + */ +static int nr_tests; + +static void set_breakpoint_addr(void *addr, int n) +{ + int ret; + + ret = ptrace(PTRACE_POKEUSER, child_pid, + offsetof(struct user, u_debugreg[n]), addr); + if (ret) { + perror("Can't set breakpoint addr\n"); + exit(-1); + } +} + +static void toggle_breakpoint(int n, int type, int len, + int local, int global, int set) +{ + int ret; + + int xtype, xlen; + unsigned long vdr7, dr7; + + switch (type) { + case BP_X: + xtype = 0; + break; + case BP_W: + xtype = 1; + break; + case BP_RW: + xtype = 3; + break; + } + + switch (len) { + case 1: + xlen = 0; + break; + case 2: + xlen = 4; + break; + case 4: + xlen = 0xc; + break; + case 8: + xlen = 8; + break; + } + + dr7 = ptrace(PTRACE_PEEKUSER, child_pid, + offsetof(struct user, u_debugreg[7]), 0); + + vdr7 = (xlen | xtype) << 16; + vdr7 <<= 4 * n; + + if (local) { + vdr7 |= 1 << (2 * n); + vdr7 |= 1 << 8; + } + if (global) { + vdr7 |= 2 << (2 * n); + vdr7 |= 1 << 9; + } + + if (set) + dr7 |= vdr7; + else + dr7 &= ~vdr7; + + ret = ptrace(PTRACE_POKEUSER, child_pid, + offsetof(struct user, u_debugreg[7]), dr7); + if (ret) { + perror("Can't set dr7"); + exit(-1); + } +} + +/* Dummy variables to test read/write accesses */ +static unsigned long long dummy_var[4]; + +/* Dummy functions to test execution accesses */ +static void dummy_func(void) { } +static void dummy_func1(void) { } +static void dummy_func2(void) { } +static void dummy_func3(void) { } + +static void (*dummy_funcs[])(void) = { + dummy_func, + dummy_func1, + dummy_func2, + dummy_func3, +}; + +static int trapped; + +static void check_trapped(void) +{ + /* + * If we haven't trapped, wake up the parent + * so that it notices the failure. + */ + if (!trapped) + kill(getpid(), SIGUSR1); + trapped = 0; + + nr_tests++; +} + +static void write_var(int len) +{ + char *pcval; short *psval; int *pival; long long *plval; + int i; + + for (i = 0; i < 4; i++) { + switch (len) { + case 1: + pcval = (char *)&dummy_var[i]; + *pcval = 0xff; + break; + case 2: + psval = (short *)&dummy_var[i]; + *psval = 0xffff; + break; + case 4: + pival = (int *)&dummy_var[i]; + *pival = 0xffffffff; + break; + case 8: + plval = (long long *)&dummy_var[i]; + *plval = 0xffffffffffffffffLL; + break; + } + check_trapped(); + } +} + +static void read_var(int len) +{ + char cval; short sval; int ival; long long lval; + int i; + + for (i = 0; i < 4; i++) { + switch (len) { + case 1: + cval = *(char *)&dummy_var[i]; + break; + case 2: + sval = *(short *)&dummy_var[i]; + break; + case 4: + ival = *(int *)&dummy_var[i]; + break; + case 8: + lval = *(long long *)&dummy_var[i]; + break; + } + check_trapped(); + } +} + +/* + * Do the r/w/x accesses to trigger the breakpoints. And run + * the usual traps. + */ +static void trigger_tests(void) +{ + int len, local, global, i; + char val; + int ret; + + ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + perror("Can't be traced?\n"); + return; + } + + /* Wake up father so that it sets up the first test */ + kill(getpid(), SIGUSR1); + + /* Test instruction breakpoints */ + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + + for (i = 0; i < 4; i++) { + dummy_funcs[i](); + check_trapped(); + } + } + } + + /* Test write watchpoints */ + for (len = 1; len <= sizeof(long); len <<= 1) { + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + write_var(len); + } + } + } + + /* Test read/write watchpoints (on read accesses) */ + for (len = 1; len <= sizeof(long); len <<= 1) { + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + read_var(len); + } + } + } + + /* Icebp trap */ + asm(".byte 0xf1\n"); + check_trapped(); + + /* Int 3 trap */ + asm("int $3\n"); + check_trapped(); + + kill(getpid(), SIGUSR1); +} + +static void check_success(const char *msg) +{ + const char *msg2; + int child_nr_tests; + int status; + + /* Wait for the child to SIGTRAP */ + wait(&status); + + msg2 = "Failed"; + + if (WSTOPSIG(status) == SIGTRAP) { + child_nr_tests = ptrace(PTRACE_PEEKDATA, child_pid, + &nr_tests, 0); + if (child_nr_tests == nr_tests) + msg2 = "Ok"; + if (ptrace(PTRACE_POKEDATA, child_pid, &trapped, 1)) { + perror("Can't poke\n"); + exit(-1); + } + } + + nr_tests++; + + printf("%s [%s]\n", msg, msg2); +} + +static void launch_instruction_breakpoints(char *buf, int local, int global) +{ + int i; + + for (i = 0; i < 4; i++) { + set_breakpoint_addr(dummy_funcs[i], i); + toggle_breakpoint(i, BP_X, 1, local, global, 1); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test breakpoint %d with local: %d global: %d", + i, local, global); + check_success(buf); + toggle_breakpoint(i, BP_X, 1, local, global, 0); + } +} + +static void launch_watchpoints(char *buf, int mode, int len, + int local, int global) +{ + const char *mode_str; + int i; + + if (mode == BP_W) + mode_str = "write"; + else + mode_str = "read"; + + for (i = 0; i < 4; i++) { + set_breakpoint_addr(&dummy_var[i], i); + toggle_breakpoint(i, mode, len, local, global, 1); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test %s watchpoint %d with len: %d local: " + "%d global: %d", mode_str, i, len, local, global); + check_success(buf); + toggle_breakpoint(i, mode, len, local, global, 0); + } +} + +/* Set the breakpoints and check the child successfully trigger them */ +static void launch_tests(void) +{ + char buf[1024]; + int len, local, global, i; + + /* Instruction breakpoints */ + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + launch_instruction_breakpoints(buf, local, global); + } + } + + /* Write watchpoint */ + for (len = 1; len <= sizeof(long); len <<= 1) { + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + launch_watchpoints(buf, BP_W, len, + local, global); + } + } + } + + /* Read-Write watchpoint */ + for (len = 1; len <= sizeof(long); len <<= 1) { + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + launch_watchpoints(buf, BP_RW, len, + local, global); + } + } + } + + /* Icebp traps */ + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success("Test icebp"); + + /* Int 3 traps */ + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success("Test int 3 trap"); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); +} + +int main(int argc, char **argv) +{ + pid_t pid; + int ret; + + pid = fork(); + if (!pid) { + trigger_tests(); + return 0; + } + + child_pid = pid; + + wait(NULL); + + launch_tests(); + + wait(NULL); + + return 0; +} diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile new file mode 100644 index 000000000000..7c9c20ff578a --- /dev/null +++ b/tools/testing/selftests/cpu-hotplug/Makefile @@ -0,0 +1,6 @@ +all: + +run_tests: + ./on-off-test.sh + +clean: diff --git a/tools/testing/selftests/cpu-hotplug/on-off-test.sh b/tools/testing/selftests/cpu-hotplug/on-off-test.sh new file mode 100644 index 000000000000..bdde7cf428bb --- /dev/null +++ b/tools/testing/selftests/cpu-hotplug/on-off-test.sh @@ -0,0 +1,221 @@ +#!/bin/bash + +SYSFS= + +prerequisite() +{ + msg="skip all tests:" + + if [ $UID != 0 ]; then + echo $msg must be run as root >&2 + exit 0 + fi + + SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` + + if [ ! -d "$SYSFS" ]; then + echo $msg sysfs is not mounted >&2 + exit 0 + fi + + if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then + echo $msg cpu hotplug is not supported >&2 + exit 0 + fi +} + +# +# list all hot-pluggable CPUs +# +hotpluggable_cpus() +{ + local state=${1:-.\*} + + for cpu in $SYSFS/devices/system/cpu/cpu*; do + if [ -f $cpu/online ] && grep -q $state $cpu/online; then + echo ${cpu##/*/cpu} + fi + done +} + +hotplaggable_offline_cpus() +{ + hotpluggable_cpus 0 +} + +hotpluggable_online_cpus() +{ + hotpluggable_cpus 1 +} + +cpu_is_online() +{ + grep -q 1 $SYSFS/devices/system/cpu/cpu$1/online +} + +cpu_is_offline() +{ + grep -q 0 $SYSFS/devices/system/cpu/cpu$1/online +} + +online_cpu() +{ + echo 1 > $SYSFS/devices/system/cpu/cpu$1/online +} + +offline_cpu() +{ + echo 0 > $SYSFS/devices/system/cpu/cpu$1/online +} + +online_cpu_expect_success() +{ + local cpu=$1 + + if ! online_cpu $cpu; then + echo $FUNCNAME $cpu: unexpected fail >&2 + elif ! cpu_is_online $cpu; then + echo $FUNCNAME $cpu: unexpected offline >&2 + fi +} + +online_cpu_expect_fail() +{ + local cpu=$1 + + if online_cpu $cpu 2> /dev/null; then + echo $FUNCNAME $cpu: unexpected success >&2 + elif ! cpu_is_offline $cpu; then + echo $FUNCNAME $cpu: unexpected online >&2 + fi +} + +offline_cpu_expect_success() +{ + local cpu=$1 + + if ! offline_cpu $cpu; then + echo $FUNCNAME $cpu: unexpected fail >&2 + elif ! cpu_is_offline $cpu; then + echo $FUNCNAME $cpu: unexpected offline >&2 + fi +} + +offline_cpu_expect_fail() +{ + local cpu=$1 + + if offline_cpu $cpu 2> /dev/null; then + echo $FUNCNAME $cpu: unexpected success >&2 + elif ! cpu_is_online $cpu; then + echo $FUNCNAME $cpu: unexpected offline >&2 + fi +} + +error=-12 +priority=0 + +while getopts e:hp: opt; do + case $opt in + e) + error=$OPTARG + ;; + h) + echo "Usage $0 [ -e errno ] [ -p notifier-priority ]" + exit + ;; + p) + priority=$OPTARG + ;; + esac +done + +if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then + echo "error code must be -4095 <= errno < 0" >&2 + exit 1 +fi + +prerequisite + +# +# Online all hot-pluggable CPUs +# +for cpu in `hotplaggable_offline_cpus`; do + online_cpu_expect_success $cpu +done + +# +# Offline all hot-pluggable CPUs +# +for cpu in `hotpluggable_online_cpus`; do + offline_cpu_expect_success $cpu +done + +# +# Online all hot-pluggable CPUs again +# +for cpu in `hotplaggable_offline_cpus`; do + online_cpu_expect_success $cpu +done + +# +# Test with cpu notifier error injection +# + +DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'` +NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/cpu + +prerequisite_extra() +{ + msg="skip extra tests:" + + /sbin/modprobe -q -r cpu-notifier-error-inject + /sbin/modprobe -q cpu-notifier-error-inject priority=$priority + + if [ ! -d "$DEBUGFS" ]; then + echo $msg debugfs is not mounted >&2 + exit 0 + fi + + if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then + echo $msg cpu-notifier-error-inject module is not available >&2 + exit 0 + fi +} + +prerequisite_extra + +# +# Offline all hot-pluggable CPUs +# +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error +for cpu in `hotpluggable_online_cpus`; do + offline_cpu_expect_success $cpu +done + +# +# Test CPU hot-add error handling (offline => online) +# +echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error +for cpu in `hotplaggable_offline_cpus`; do + online_cpu_expect_fail $cpu +done + +# +# Online all hot-pluggable CPUs +# +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error +for cpu in `hotplaggable_offline_cpus`; do + online_cpu_expect_success $cpu +done + +# +# Test CPU hot-remove error handling (online => offline) +# +echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error +for cpu in `hotpluggable_online_cpus`; do + offline_cpu_expect_fail $cpu +done + +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error +/sbin/modprobe -q -r cpu-notifier-error-inject diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile new file mode 100644 index 000000000000..dc79b86ea65c --- /dev/null +++ b/tools/testing/selftests/kcmp/Makefile @@ -0,0 +1,29 @@ +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) +ifeq ($(ARCH),i386) + ARCH := X86 + CFLAGS := -DCONFIG_X86_32 -D__i386__ +endif +ifeq ($(ARCH),x86_64) + ARCH := X86 + CFLAGS := -DCONFIG_X86_64 -D__x86_64__ +endif + +CFLAGS += -I../../../../arch/x86/include/generated/ +CFLAGS += -I../../../../include/ +CFLAGS += -I../../../../usr/include/ +CFLAGS += -I../../../../arch/x86/include/ + +all: +ifeq ($(ARCH),X86) + gcc $(CFLAGS) kcmp_test.c -o run_test +else + echo "Not an x86 target, can't build kcmp selftest" +endif + +run-tests: all + ./kcmp_test + +clean: + rm -fr ./run_test + rm -fr ./test-file diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c new file mode 100644 index 000000000000..358cc6bfa35d --- /dev/null +++ b/tools/testing/selftests/kcmp/kcmp_test.c @@ -0,0 +1,94 @@ +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <limits.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <fcntl.h> + +#include <linux/unistd.h> +#include <linux/kcmp.h> + +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> + +static long sys_kcmp(int pid1, int pid2, int type, int fd1, int fd2) +{ + return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2); +} + +int main(int argc, char **argv) +{ + const char kpath[] = "kcmp-test-file"; + int pid1, pid2; + int fd1, fd2; + int status; + + fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644); + pid1 = getpid(); + + if (fd1 < 0) { + perror("Can't create file"); + exit(1); + } + + pid2 = fork(); + if (pid2 < 0) { + perror("fork failed"); + exit(1); + } + + if (!pid2) { + int pid2 = getpid(); + int ret; + + fd2 = open(kpath, O_RDWR, 0644); + if (fd2 < 0) { + perror("Can't open file"); + exit(1); + } + + /* An example of output and arguments */ + printf("pid1: %6d pid2: %6d FD: %2ld FILES: %2ld VM: %2ld " + "FS: %2ld SIGHAND: %2ld IO: %2ld SYSVSEM: %2ld " + "INV: %2ld\n", + pid1, pid2, + sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd2), + sys_kcmp(pid1, pid2, KCMP_FILES, 0, 0), + sys_kcmp(pid1, pid2, KCMP_VM, 0, 0), + sys_kcmp(pid1, pid2, KCMP_FS, 0, 0), + sys_kcmp(pid1, pid2, KCMP_SIGHAND, 0, 0), + sys_kcmp(pid1, pid2, KCMP_IO, 0, 0), + sys_kcmp(pid1, pid2, KCMP_SYSVSEM, 0, 0), + + /* This one should fail */ + sys_kcmp(pid1, pid2, KCMP_TYPES + 1, 0, 0)); + + /* This one should return same fd */ + ret = sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd1); + if (ret) { + printf("FAIL: 0 expected but %d returned\n", ret); + ret = -1; + } else + printf("PASS: 0 returned as expected\n"); + + /* Compare with self */ + ret = sys_kcmp(pid1, pid1, KCMP_VM, 0, 0); + if (ret) { + printf("FAIL: 0 expected but %li returned\n", ret); + ret = -1; + } else + printf("PASS: 0 returned as expected\n"); + + exit(ret); + } + + waitpid(pid2, &status, P_ALL); + + return 0; +} diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile new file mode 100644 index 000000000000..7c9c20ff578a --- /dev/null +++ b/tools/testing/selftests/memory-hotplug/Makefile @@ -0,0 +1,6 @@ +all: + +run_tests: + ./on-off-test.sh + +clean: diff --git a/tools/testing/selftests/memory-hotplug/on-off-test.sh b/tools/testing/selftests/memory-hotplug/on-off-test.sh new file mode 100644 index 000000000000..a2816f631542 --- /dev/null +++ b/tools/testing/selftests/memory-hotplug/on-off-test.sh @@ -0,0 +1,230 @@ +#!/bin/bash + +SYSFS= + +prerequisite() +{ + msg="skip all tests:" + + if [ $UID != 0 ]; then + echo $msg must be run as root >&2 + exit 0 + fi + + SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` + + if [ ! -d "$SYSFS" ]; then + echo $msg sysfs is not mounted >&2 + exit 0 + fi + + if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then + echo $msg memory hotplug is not supported >&2 + exit 0 + fi +} + +# +# list all hot-pluggable memory +# +hotpluggable_memory() +{ + local state=${1:-.\*} + + for memory in $SYSFS/devices/system/memory/memory*; do + if grep -q 1 $memory/removable && + grep -q $state $memory/state; then + echo ${memory##/*/memory} + fi + done +} + +hotplaggable_offline_memory() +{ + hotpluggable_memory offline +} + +hotpluggable_online_memory() +{ + hotpluggable_memory online +} + +memory_is_online() +{ + grep -q online $SYSFS/devices/system/memory/memory$1/state +} + +memory_is_offline() +{ + grep -q offline $SYSFS/devices/system/memory/memory$1/state +} + +online_memory() +{ + echo online > $SYSFS/devices/system/memory/memory$1/state +} + +offline_memory() +{ + echo offline > $SYSFS/devices/system/memory/memory$1/state +} + +online_memory_expect_success() +{ + local memory=$1 + + if ! online_memory $memory; then + echo $FUNCNAME $memory: unexpected fail >&2 + elif ! memory_is_online $memory; then + echo $FUNCNAME $memory: unexpected offline >&2 + fi +} + +online_memory_expect_fail() +{ + local memory=$1 + + if online_memory $memory 2> /dev/null; then + echo $FUNCNAME $memory: unexpected success >&2 + elif ! memory_is_offline $memory; then + echo $FUNCNAME $memory: unexpected online >&2 + fi +} + +offline_memory_expect_success() +{ + local memory=$1 + + if ! offline_memory $memory; then + echo $FUNCNAME $memory: unexpected fail >&2 + elif ! memory_is_offline $memory; then + echo $FUNCNAME $memory: unexpected offline >&2 + fi +} + +offline_memory_expect_fail() +{ + local memory=$1 + + if offline_memory $memory 2> /dev/null; then + echo $FUNCNAME $memory: unexpected success >&2 + elif ! memory_is_online $memory; then + echo $FUNCNAME $memory: unexpected offline >&2 + fi +} + +error=-12 +priority=0 +ratio=10 + +while getopts e:hp:r: opt; do + case $opt in + e) + error=$OPTARG + ;; + h) + echo "Usage $0 [ -e errno ] [ -p notifier-priority ] [ -r percent-of-memory-to-offline ]" + exit + ;; + p) + priority=$OPTARG + ;; + r) + ratio=$OPTARG + ;; + esac +done + +if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then + echo "error code must be -4095 <= errno < 0" >&2 + exit 1 +fi + +prerequisite + +# +# Online all hot-pluggable memory +# +for memory in `hotplaggable_offline_memory`; do + online_memory_expect_success $memory +done + +# +# Offline $ratio percent of hot-pluggable memory +# +for memory in `hotpluggable_online_memory`; do + if [ $((RANDOM % 100)) -lt $ratio ]; then + offline_memory_expect_success $memory + fi +done + +# +# Online all hot-pluggable memory again +# +for memory in `hotplaggable_offline_memory`; do + online_memory_expect_success $memory +done + +# +# Test with memory notifier error injection +# + +DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'` +NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/memory + +prerequisite_extra() +{ + msg="skip extra tests:" + + /sbin/modprobe -q -r memory-notifier-error-inject + /sbin/modprobe -q memory-notifier-error-inject priority=$priority + + if [ ! -d "$DEBUGFS" ]; then + echo $msg debugfs is not mounted >&2 + exit 0 + fi + + if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then + echo $msg memory-notifier-error-inject module is not available >&2 + exit 0 + fi +} + +prerequisite_extra + +# +# Offline $ratio percent of hot-pluggable memory +# +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error +for memory in `hotpluggable_online_memory`; do + if [ $((RANDOM % 100)) -lt $ratio ]; then + offline_memory_expect_success $memory + fi +done + +# +# Test memory hot-add error handling (offline => online) +# +echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error +for memory in `hotplaggable_offline_memory`; do + online_memory_expect_fail $memory +done + +# +# Online all hot-pluggable memory +# +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error +for memory in `hotplaggable_offline_memory`; do + online_memory_expect_success $memory +done + +# +# Test memory hot-remove error handling (online => offline) +# +echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error +for memory in `hotpluggable_online_memory`; do + offline_memory_expect_fail $memory +done + +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error +/sbin/modprobe -q -r memory-notifier-error-inject diff --git a/tools/testing/selftests/mqueue/.gitignore b/tools/testing/selftests/mqueue/.gitignore new file mode 100644 index 000000000000..d8d42377205a --- /dev/null +++ b/tools/testing/selftests/mqueue/.gitignore @@ -0,0 +1,2 @@ +mq_open_tests +mq_perf_tests diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile new file mode 100644 index 000000000000..54c0aad2b47c --- /dev/null +++ b/tools/testing/selftests/mqueue/Makefile @@ -0,0 +1,10 @@ +all: + gcc -O2 -lrt mq_open_tests.c -o mq_open_tests + gcc -O2 -lrt -lpthread -lpopt -o mq_perf_tests mq_perf_tests.c + +run_tests: + ./mq_open_tests /test1 + ./mq_perf_tests + +clean: + rm -f mq_open_tests mq_perf_tests diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c new file mode 100644 index 000000000000..711cc2923047 --- /dev/null +++ b/tools/testing/selftests/mqueue/mq_open_tests.c @@ -0,0 +1,492 @@ +/* + * This application is Copyright 2012 Red Hat, Inc. + * Doug Ledford <dledford@redhat.com> + * + * mq_open_tests is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * mq_open_tests is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For the full text of the license, see <http://www.gnu.org/licenses/>. + * + * mq_open_tests.c + * Tests the various situations that should either succeed or fail to + * open a posix message queue and then reports whether or not they + * did as they were supposed to. + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <limits.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <mqueue.h> + +static char *usage = +"Usage:\n" +" %s path\n" +"\n" +" path Path name of the message queue to create\n" +"\n" +" Note: this program must be run as root in order to enable all tests\n" +"\n"; + +char *DEF_MSGS = "/proc/sys/fs/mqueue/msg_default"; +char *DEF_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_default"; +char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max"; +char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max"; + +int default_settings; +struct rlimit saved_limits, cur_limits; +int saved_def_msgs, saved_def_msgsize, saved_max_msgs, saved_max_msgsize; +int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize; +FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize; +char *queue_path; +mqd_t queue = -1; + +static inline void __set(FILE *stream, int value, char *err_msg); +void shutdown(int exit_val, char *err_cause, int line_no); +static inline int get(FILE *stream); +static inline void set(FILE *stream, int value); +static inline void getr(int type, struct rlimit *rlim); +static inline void setr(int type, struct rlimit *rlim); +void validate_current_settings(); +static inline void test_queue(struct mq_attr *attr, struct mq_attr *result); +static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result); + +static inline void __set(FILE *stream, int value, char *err_msg) +{ + rewind(stream); + if (fprintf(stream, "%d", value) < 0) + perror(err_msg); +} + + +void shutdown(int exit_val, char *err_cause, int line_no) +{ + static int in_shutdown = 0; + + /* In case we get called recursively by a set() call below */ + if (in_shutdown++) + return; + + seteuid(0); + + if (queue != -1) + if (mq_close(queue)) + perror("mq_close() during shutdown"); + if (queue_path) + /* + * Be silent if this fails, if we cleaned up already it's + * expected to fail + */ + mq_unlink(queue_path); + if (default_settings) { + if (saved_def_msgs) + __set(def_msgs, saved_def_msgs, + "failed to restore saved_def_msgs"); + if (saved_def_msgsize) + __set(def_msgsize, saved_def_msgsize, + "failed to restore saved_def_msgsize"); + } + if (saved_max_msgs) + __set(max_msgs, saved_max_msgs, + "failed to restore saved_max_msgs"); + if (saved_max_msgsize) + __set(max_msgsize, saved_max_msgsize, + "failed to restore saved_max_msgsize"); + if (exit_val) + error(exit_val, errno, "%s at %d", err_cause, line_no); + exit(0); +} + +static inline int get(FILE *stream) +{ + int value; + rewind(stream); + if (fscanf(stream, "%d", &value) != 1) + shutdown(4, "Error reading /proc entry", __LINE__ - 1); + return value; +} + +static inline void set(FILE *stream, int value) +{ + int new_value; + + rewind(stream); + if (fprintf(stream, "%d", value) < 0) + return shutdown(5, "Failed writing to /proc file", + __LINE__ - 1); + new_value = get(stream); + if (new_value != value) + return shutdown(5, "We didn't get what we wrote to /proc back", + __LINE__ - 1); +} + +static inline void getr(int type, struct rlimit *rlim) +{ + if (getrlimit(type, rlim)) + shutdown(6, "getrlimit()", __LINE__ - 1); +} + +static inline void setr(int type, struct rlimit *rlim) +{ + if (setrlimit(type, rlim)) + shutdown(7, "setrlimit()", __LINE__ - 1); +} + +void validate_current_settings() +{ + int rlim_needed; + + if (cur_limits.rlim_cur < 4096) { + printf("Current rlimit value for POSIX message queue bytes is " + "unreasonably low,\nincreasing.\n\n"); + cur_limits.rlim_cur = 8192; + cur_limits.rlim_max = 16384; + setr(RLIMIT_MSGQUEUE, &cur_limits); + } + + if (default_settings) { + rlim_needed = (cur_def_msgs + 1) * (cur_def_msgsize + 1 + + 2 * sizeof(void *)); + if (rlim_needed > cur_limits.rlim_cur) { + printf("Temporarily lowering default queue parameters " + "to something that will work\n" + "with the current rlimit values.\n\n"); + set(def_msgs, 10); + cur_def_msgs = 10; + set(def_msgsize, 128); + cur_def_msgsize = 128; + } + } else { + rlim_needed = (cur_max_msgs + 1) * (cur_max_msgsize + 1 + + 2 * sizeof(void *)); + if (rlim_needed > cur_limits.rlim_cur) { + printf("Temporarily lowering maximum queue parameters " + "to something that will work\n" + "with the current rlimit values in case this is " + "a kernel that ties the default\n" + "queue parameters to the maximum queue " + "parameters.\n\n"); + set(max_msgs, 10); + cur_max_msgs = 10; + set(max_msgsize, 128); + cur_max_msgsize = 128; + } + } +} + +/* + * test_queue - Test opening a queue, shutdown if we fail. This should + * only be called in situations that should never fail. We clean up + * after ourselves and return the queue attributes in *result. + */ +static inline void test_queue(struct mq_attr *attr, struct mq_attr *result) +{ + int flags = O_RDWR | O_EXCL | O_CREAT; + int perms = DEFFILEMODE; + + if ((queue = mq_open(queue_path, flags, perms, attr)) == -1) + shutdown(1, "mq_open()", __LINE__); + if (mq_getattr(queue, result)) + shutdown(1, "mq_getattr()", __LINE__); + if (mq_close(queue)) + shutdown(1, "mq_close()", __LINE__); + queue = -1; + if (mq_unlink(queue_path)) + shutdown(1, "mq_unlink()", __LINE__); +} + +/* + * Same as test_queue above, but failure is not fatal. + * Returns: + * 0 - Failed to create a queue + * 1 - Created a queue, attributes in *result + */ +static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result) +{ + int flags = O_RDWR | O_EXCL | O_CREAT; + int perms = DEFFILEMODE; + + if ((queue = mq_open(queue_path, flags, perms, attr)) == -1) + return 0; + if (mq_getattr(queue, result)) + shutdown(1, "mq_getattr()", __LINE__); + if (mq_close(queue)) + shutdown(1, "mq_close()", __LINE__); + queue = -1; + if (mq_unlink(queue_path)) + shutdown(1, "mq_unlink()", __LINE__); + return 1; +} + +int main(int argc, char *argv[]) +{ + struct mq_attr attr, result; + + if (argc != 2) { + fprintf(stderr, "Must pass a valid queue name\n\n"); + fprintf(stderr, usage, argv[0]); + exit(1); + } + + /* + * Although we can create a msg queue with a non-absolute path name, + * unlink will fail. So, if the name doesn't start with a /, add one + * when we save it. + */ + if (*argv[1] == '/') + queue_path = strdup(argv[1]); + else { + queue_path = malloc(strlen(argv[1]) + 2); + if (!queue_path) { + perror("malloc()"); + exit(1); + } + queue_path[0] = '/'; + queue_path[1] = 0; + strcat(queue_path, argv[1]); + } + + if (getuid() != 0) { + fprintf(stderr, "Not running as root, but almost all tests " + "require root in order to modify\nsystem settings. " + "Exiting.\n"); + exit(1); + } + + /* Find out what files there are for us to make tweaks in */ + def_msgs = fopen(DEF_MSGS, "r+"); + def_msgsize = fopen(DEF_MSGSIZE, "r+"); + max_msgs = fopen(MAX_MSGS, "r+"); + max_msgsize = fopen(MAX_MSGSIZE, "r+"); + + if (!max_msgs) + shutdown(2, "Failed to open msg_max", __LINE__); + if (!max_msgsize) + shutdown(2, "Failed to open msgsize_max", __LINE__); + if (def_msgs || def_msgsize) + default_settings = 1; + + /* Load up the current system values for everything we can */ + getr(RLIMIT_MSGQUEUE, &saved_limits); + cur_limits = saved_limits; + if (default_settings) { + saved_def_msgs = cur_def_msgs = get(def_msgs); + saved_def_msgsize = cur_def_msgsize = get(def_msgsize); + } + saved_max_msgs = cur_max_msgs = get(max_msgs); + saved_max_msgsize = cur_max_msgsize = get(max_msgsize); + + /* Tell the user our initial state */ + printf("\nInitial system state:\n"); + printf("\tUsing queue path:\t\t%s\n", queue_path); + printf("\tRLIMIT_MSGQUEUE(soft):\t\t%d\n", saved_limits.rlim_cur); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t%d\n", saved_limits.rlim_max); + printf("\tMaximum Message Size:\t\t%d\n", saved_max_msgsize); + printf("\tMaximum Queue Size:\t\t%d\n", saved_max_msgs); + if (default_settings) { + printf("\tDefault Message Size:\t\t%d\n", saved_def_msgsize); + printf("\tDefault Queue Size:\t\t%d\n", saved_def_msgs); + } else { + printf("\tDefault Message Size:\t\tNot Supported\n"); + printf("\tDefault Queue Size:\t\tNot Supported\n"); + } + printf("\n"); + + validate_current_settings(); + + printf("Adjusted system state for testing:\n"); + printf("\tRLIMIT_MSGQUEUE(soft):\t\t%d\n", cur_limits.rlim_cur); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t%d\n", cur_limits.rlim_max); + printf("\tMaximum Message Size:\t\t%d\n", cur_max_msgsize); + printf("\tMaximum Queue Size:\t\t%d\n", cur_max_msgs); + if (default_settings) { + printf("\tDefault Message Size:\t\t%d\n", cur_def_msgsize); + printf("\tDefault Queue Size:\t\t%d\n", cur_def_msgs); + } + + printf("\n\nTest series 1, behavior when no attr struct " + "passed to mq_open:\n"); + if (!default_settings) { + test_queue(NULL, &result); + printf("Given sane system settings, mq_open without an attr " + "struct succeeds:\tPASS\n"); + if (result.mq_maxmsg != cur_max_msgs || + result.mq_msgsize != cur_max_msgsize) { + printf("Kernel does not support setting the default " + "mq attributes,\nbut also doesn't tie the " + "defaults to the maximums:\t\t\tPASS\n"); + } else { + set(max_msgs, ++cur_max_msgs); + set(max_msgsize, ++cur_max_msgsize); + test_queue(NULL, &result); + if (result.mq_maxmsg == cur_max_msgs && + result.mq_msgsize == cur_max_msgsize) + printf("Kernel does not support setting the " + "default mq attributes and\n" + "also ties system wide defaults to " + "the system wide maximums:\t\t" + "FAIL\n"); + else + printf("Kernel does not support setting the " + "default mq attributes,\n" + "but also doesn't tie the defaults to " + "the maximums:\t\t\tPASS\n"); + } + } else { + printf("Kernel supports setting defaults separately from " + "maximums:\t\tPASS\n"); + /* + * While we are here, go ahead and test that the kernel + * properly follows the default settings + */ + test_queue(NULL, &result); + printf("Given sane values, mq_open without an attr struct " + "succeeds:\t\tPASS\n"); + if (result.mq_maxmsg != cur_def_msgs || + result.mq_msgsize != cur_def_msgsize) + printf("Kernel supports setting defaults, but does " + "not actually honor them:\tFAIL\n\n"); + else { + set(def_msgs, ++cur_def_msgs); + set(def_msgsize, ++cur_def_msgsize); + /* In case max was the same as the default */ + set(max_msgs, ++cur_max_msgs); + set(max_msgsize, ++cur_max_msgsize); + test_queue(NULL, &result); + if (result.mq_maxmsg != cur_def_msgs || + result.mq_msgsize != cur_def_msgsize) + printf("Kernel supports setting defaults, but " + "does not actually honor them:\t" + "FAIL\n"); + else + printf("Kernel properly honors default setting " + "knobs:\t\t\t\tPASS\n"); + } + set(def_msgs, cur_max_msgs + 1); + cur_def_msgs = cur_max_msgs + 1; + set(def_msgsize, cur_max_msgsize + 1); + cur_def_msgsize = cur_max_msgsize + 1; + if (cur_def_msgs * (cur_def_msgsize + 2 * sizeof(void *)) >= + cur_limits.rlim_cur) { + cur_limits.rlim_cur = (cur_def_msgs + 2) * + (cur_def_msgsize + 2 * sizeof(void *)); + cur_limits.rlim_max = 2 * cur_limits.rlim_cur; + setr(RLIMIT_MSGQUEUE, &cur_limits); + } + if (test_queue_fail(NULL, &result)) { + if (result.mq_maxmsg == cur_max_msgs && + result.mq_msgsize == cur_max_msgsize) + printf("Kernel properly limits default values " + "to lesser of default/max:\t\tPASS\n"); + else + printf("Kernel does not properly set default " + "queue parameters when\ndefaults > " + "max:\t\t\t\t\t\t\t\tFAIL\n"); + } else + printf("Kernel fails to open mq because defaults are " + "greater than maximums:\tFAIL\n"); + set(def_msgs, --cur_def_msgs); + set(def_msgsize, --cur_def_msgsize); + cur_limits.rlim_cur = cur_limits.rlim_max = cur_def_msgs * + cur_def_msgsize; + setr(RLIMIT_MSGQUEUE, &cur_limits); + if (test_queue_fail(NULL, &result)) + printf("Kernel creates queue even though defaults " + "would exceed\nrlimit setting:" + "\t\t\t\t\t\t\t\tFAIL\n"); + else + printf("Kernel properly fails to create queue when " + "defaults would\nexceed rlimit:" + "\t\t\t\t\t\t\t\tPASS\n"); + } + + /* + * Test #2 - open with an attr struct that exceeds rlimit + */ + printf("\n\nTest series 2, behavior when attr struct is " + "passed to mq_open:\n"); + cur_max_msgs = 32; + cur_max_msgsize = cur_limits.rlim_max >> 4; + set(max_msgs, cur_max_msgs); + set(max_msgsize, cur_max_msgsize); + attr.mq_maxmsg = cur_max_msgs; + attr.mq_msgsize = cur_max_msgsize; + if (test_queue_fail(&attr, &result)) + printf("Queue open in excess of rlimit max when euid = 0 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open in excess of rlimit max when euid = 0 " + "failed:\t\tPASS\n"); + attr.mq_maxmsg = cur_max_msgs + 1; + attr.mq_msgsize = 10; + if (test_queue_fail(&attr, &result)) + printf("Queue open with mq_maxmsg > limit when euid = 0 " + "succeeded:\t\tPASS\n"); + else + printf("Queue open with mq_maxmsg > limit when euid = 0 " + "failed:\t\tFAIL\n"); + attr.mq_maxmsg = 1; + attr.mq_msgsize = cur_max_msgsize + 1; + if (test_queue_fail(&attr, &result)) + printf("Queue open with mq_msgsize > limit when euid = 0 " + "succeeded:\t\tPASS\n"); + else + printf("Queue open with mq_msgsize > limit when euid = 0 " + "failed:\t\tFAIL\n"); + attr.mq_maxmsg = 65536; + attr.mq_msgsize = 65536; + if (test_queue_fail(&attr, &result)) + printf("Queue open with total size > 2GB when euid = 0 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open with total size > 2GB when euid = 0 " + "failed:\t\t\tPASS\n"); + seteuid(99); + attr.mq_maxmsg = cur_max_msgs; + attr.mq_msgsize = cur_max_msgsize; + if (test_queue_fail(&attr, &result)) + printf("Queue open in excess of rlimit max when euid = 99 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open in excess of rlimit max when euid = 99 " + "failed:\t\tPASS\n"); + attr.mq_maxmsg = cur_max_msgs + 1; + attr.mq_msgsize = 10; + if (test_queue_fail(&attr, &result)) + printf("Queue open with mq_maxmsg > limit when euid = 99 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open with mq_maxmsg > limit when euid = 99 " + "failed:\t\tPASS\n"); + attr.mq_maxmsg = 1; + attr.mq_msgsize = cur_max_msgsize + 1; + if (test_queue_fail(&attr, &result)) + printf("Queue open with mq_msgsize > limit when euid = 99 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open with mq_msgsize > limit when euid = 99 " + "failed:\t\tPASS\n"); + attr.mq_maxmsg = 65536; + attr.mq_msgsize = 65536; + if (test_queue_fail(&attr, &result)) + printf("Queue open with total size > 2GB when euid = 99 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open with total size > 2GB when euid = 99 " + "failed:\t\t\tPASS\n"); + + shutdown(0,"",0); +} diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c new file mode 100644 index 000000000000..2fadd4b97045 --- /dev/null +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -0,0 +1,741 @@ +/* + * This application is Copyright 2012 Red Hat, Inc. + * Doug Ledford <dledford@redhat.com> + * + * mq_perf_tests is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * mq_perf_tests is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For the full text of the license, see <http://www.gnu.org/licenses/>. + * + * mq_perf_tests.c + * Tests various types of message queue workloads, concentrating on those + * situations that invole large message sizes, large message queue depths, + * or both, and reports back useful metrics about kernel message queue + * performance. + * + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <limits.h> +#include <errno.h> +#include <signal.h> +#include <pthread.h> +#include <sched.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <mqueue.h> +#include <popt.h> + +static char *usage = +"Usage:\n" +" %s [-c #[,#..] -f] path\n" +"\n" +" -c # Skip most tests and go straight to a high queue depth test\n" +" and then run that test continuously (useful for running at\n" +" the same time as some other workload to see how much the\n" +" cache thrashing caused by adding messages to a very deep\n" +" queue impacts the performance of other programs). The number\n" +" indicates which CPU core we should bind the process to during\n" +" the run. If you have more than one physical CPU, then you\n" +" will need one copy per physical CPU package, and you should\n" +" specify the CPU cores to pin ourself to via a comma separated\n" +" list of CPU values.\n" +" -f Only usable with continuous mode. Pin ourself to the CPUs\n" +" as requested, then instead of looping doing a high mq\n" +" workload, just busy loop. This will allow us to lock up a\n" +" single CPU just like we normally would, but without actually\n" +" thrashing the CPU cache. This is to make it easier to get\n" +" comparable numbers from some other workload running on the\n" +" other CPUs. One set of numbers with # CPUs locked up running\n" +" an mq workload, and another set of numbers with those same\n" +" CPUs locked away from the test workload, but not doing\n" +" anything to trash the cache like the mq workload might.\n" +" path Path name of the message queue to create\n" +"\n" +" Note: this program must be run as root in order to enable all tests\n" +"\n"; + +char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max"; +char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max"; + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define MAX_CPUS 64 +char *cpu_option_string; +int cpus_to_pin[MAX_CPUS]; +int num_cpus_to_pin; +pthread_t cpu_threads[MAX_CPUS]; +pthread_t main_thread; +cpu_set_t *cpu_set; +int cpu_set_size; +int cpus_online; + +#define MSG_SIZE 16 +#define TEST1_LOOPS 10000000 +#define TEST2_LOOPS 100000 +int continuous_mode; +int continuous_mode_fake; + +struct rlimit saved_limits, cur_limits; +int saved_max_msgs, saved_max_msgsize; +int cur_max_msgs, cur_max_msgsize; +FILE *max_msgs, *max_msgsize; +int cur_nice; +char *queue_path = "/mq_perf_tests"; +mqd_t queue = -1; +struct mq_attr result; +int mq_prio_max; + +const struct poptOption options[] = { + { + .longName = "continuous", + .shortName = 'c', + .argInfo = POPT_ARG_STRING, + .arg = &cpu_option_string, + .val = 'c', + .descrip = "Run continuous tests at a high queue depth in " + "order to test the effects of cache thrashing on " + "other tasks on the system. This test is intended " + "to be run on one core of each physical CPU while " + "some other CPU intensive task is run on all the other " + "cores of that same physical CPU and the other task " + "is timed. It is assumed that the process of adding " + "messages to the message queue in a tight loop will " + "impact that other task to some degree. Once the " + "tests are performed in this way, you should then " + "re-run the tests using fake mode in order to check " + "the difference in time required to perform the CPU " + "intensive task", + .argDescrip = "cpu[,cpu]", + }, + { + .longName = "fake", + .shortName = 'f', + .argInfo = POPT_ARG_NONE, + .arg = &continuous_mode_fake, + .val = 0, + .descrip = "Tie up the CPUs that we would normally tie up in" + "continuous mode, but don't actually do any mq stuff, " + "just keep the CPU busy so it can't be used to process " + "system level tasks as this would free up resources on " + "the other CPU cores and skew the comparison between " + "the no-mqueue work and mqueue work tests", + .argDescrip = NULL, + }, + { + .longName = "path", + .shortName = 'p', + .argInfo = POPT_ARG_STRING | POPT_ARGFLAG_SHOW_DEFAULT, + .arg = &queue_path, + .val = 'p', + .descrip = "The name of the path to use in the mqueue " + "filesystem for our tests", + .argDescrip = "pathname", + }, + POPT_AUTOHELP + POPT_TABLEEND +}; + +static inline void __set(FILE *stream, int value, char *err_msg); +void shutdown(int exit_val, char *err_cause, int line_no); +void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context); +void sig_action(int signum, siginfo_t *info, void *context); +static inline int get(FILE *stream); +static inline void set(FILE *stream, int value); +static inline int try_set(FILE *stream, int value); +static inline void getr(int type, struct rlimit *rlim); +static inline void setr(int type, struct rlimit *rlim); +static inline void open_queue(struct mq_attr *attr); +void increase_limits(void); + +static inline void __set(FILE *stream, int value, char *err_msg) +{ + rewind(stream); + if (fprintf(stream, "%d", value) < 0) + perror(err_msg); +} + + +void shutdown(int exit_val, char *err_cause, int line_no) +{ + static int in_shutdown = 0; + int errno_at_shutdown = errno; + int i; + + /* In case we get called by multiple threads or from an sighandler */ + if (in_shutdown++) + return; + + for (i = 0; i < num_cpus_to_pin; i++) + if (cpu_threads[i]) { + pthread_kill(cpu_threads[i], SIGUSR1); + pthread_join(cpu_threads[i], NULL); + } + + if (queue != -1) + if (mq_close(queue)) + perror("mq_close() during shutdown"); + if (queue_path) + /* + * Be silent if this fails, if we cleaned up already it's + * expected to fail + */ + mq_unlink(queue_path); + if (saved_max_msgs) + __set(max_msgs, saved_max_msgs, + "failed to restore saved_max_msgs"); + if (saved_max_msgsize) + __set(max_msgsize, saved_max_msgsize, + "failed to restore saved_max_msgsize"); + if (exit_val) + error(exit_val, errno_at_shutdown, "%s at %d", + err_cause, line_no); + exit(0); +} + +void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context) +{ + if (pthread_self() != main_thread) + pthread_exit(0); + else { + fprintf(stderr, "Caught signal %d in SIGUSR1 handler, " + "exiting\n", signum); + shutdown(0, "", 0); + fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n"); + exit(0); + } +} + +void sig_action(int signum, siginfo_t *info, void *context) +{ + if (pthread_self() != main_thread) + pthread_kill(main_thread, signum); + else { + fprintf(stderr, "Caught signal %d, exiting\n", signum); + shutdown(0, "", 0); + fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n"); + exit(0); + } +} + +static inline int get(FILE *stream) +{ + int value; + rewind(stream); + if (fscanf(stream, "%d", &value) != 1) + shutdown(4, "Error reading /proc entry", __LINE__); + return value; +} + +static inline void set(FILE *stream, int value) +{ + int new_value; + + rewind(stream); + if (fprintf(stream, "%d", value) < 0) + return shutdown(5, "Failed writing to /proc file", __LINE__); + new_value = get(stream); + if (new_value != value) + return shutdown(5, "We didn't get what we wrote to /proc back", + __LINE__); +} + +static inline int try_set(FILE *stream, int value) +{ + int new_value; + + rewind(stream); + fprintf(stream, "%d", value); + new_value = get(stream); + return new_value == value; +} + +static inline void getr(int type, struct rlimit *rlim) +{ + if (getrlimit(type, rlim)) + shutdown(6, "getrlimit()", __LINE__); +} + +static inline void setr(int type, struct rlimit *rlim) +{ + if (setrlimit(type, rlim)) + shutdown(7, "setrlimit()", __LINE__); +} + +/** + * open_queue - open the global queue for testing + * @attr - An attr struct specifying the desired queue traits + * @result - An attr struct that lists the actual traits the queue has + * + * This open is not allowed to fail, failure will result in an orderly + * shutdown of the program. The global queue_path is used to set what + * queue to open, the queue descriptor is saved in the global queue + * variable. + */ +static inline void open_queue(struct mq_attr *attr) +{ + int flags = O_RDWR | O_EXCL | O_CREAT | O_NONBLOCK; + int perms = DEFFILEMODE; + + queue = mq_open(queue_path, flags, perms, attr); + if (queue == -1) + shutdown(1, "mq_open()", __LINE__); + if (mq_getattr(queue, &result)) + shutdown(1, "mq_getattr()", __LINE__); + printf("\n\tQueue %s created:\n", queue_path); + printf("\t\tmq_flags:\t\t\t%s\n", result.mq_flags & O_NONBLOCK ? + "O_NONBLOCK" : "(null)"); + printf("\t\tmq_maxmsg:\t\t\t%d\n", result.mq_maxmsg); + printf("\t\tmq_msgsize:\t\t\t%d\n", result.mq_msgsize); + printf("\t\tmq_curmsgs:\t\t\t%d\n", result.mq_curmsgs); +} + +void *fake_cont_thread(void *arg) +{ + int i; + + for (i = 0; i < num_cpus_to_pin; i++) + if (cpu_threads[i] == pthread_self()) + break; + printf("\tStarted fake continuous mode thread %d on CPU %d\n", i, + cpus_to_pin[i]); + while (1) + ; +} + +void *cont_thread(void *arg) +{ + char buff[MSG_SIZE]; + int i, priority; + + for (i = 0; i < num_cpus_to_pin; i++) + if (cpu_threads[i] == pthread_self()) + break; + printf("\tStarted continuous mode thread %d on CPU %d\n", i, + cpus_to_pin[i]); + while (1) { + while (mq_send(queue, buff, sizeof(buff), 0) == 0) + ; + mq_receive(queue, buff, sizeof(buff), &priority); + } +} + +#define drain_queue() \ + while (mq_receive(queue, buff, MSG_SIZE, &prio_in) == MSG_SIZE) + +#define do_untimed_send() \ + do { \ + if (mq_send(queue, buff, MSG_SIZE, prio_out)) \ + shutdown(3, "Test send failure", __LINE__); \ + } while (0) + +#define do_send_recv() \ + do { \ + clock_gettime(clock, &start); \ + if (mq_send(queue, buff, MSG_SIZE, prio_out)) \ + shutdown(3, "Test send failure", __LINE__); \ + clock_gettime(clock, &middle); \ + if (mq_receive(queue, buff, MSG_SIZE, &prio_in) != MSG_SIZE) \ + shutdown(3, "Test receive failure", __LINE__); \ + clock_gettime(clock, &end); \ + nsec = ((middle.tv_sec - start.tv_sec) * 1000000000) + \ + (middle.tv_nsec - start.tv_nsec); \ + send_total.tv_nsec += nsec; \ + if (send_total.tv_nsec >= 1000000000) { \ + send_total.tv_sec++; \ + send_total.tv_nsec -= 1000000000; \ + } \ + nsec = ((end.tv_sec - middle.tv_sec) * 1000000000) + \ + (end.tv_nsec - middle.tv_nsec); \ + recv_total.tv_nsec += nsec; \ + if (recv_total.tv_nsec >= 1000000000) { \ + recv_total.tv_sec++; \ + recv_total.tv_nsec -= 1000000000; \ + } \ + } while (0) + +struct test { + char *desc; + void (*func)(int *); +}; + +void const_prio(int *prio) +{ + return; +} + +void inc_prio(int *prio) +{ + if (++*prio == mq_prio_max) + *prio = 0; +} + +void dec_prio(int *prio) +{ + if (--*prio < 0) + *prio = mq_prio_max - 1; +} + +void random_prio(int *prio) +{ + *prio = random() % mq_prio_max; +} + +struct test test2[] = { + {"\n\tTest #2a: Time send/recv message, queue full, constant prio\n", + const_prio}, + {"\n\tTest #2b: Time send/recv message, queue full, increasing prio\n", + inc_prio}, + {"\n\tTest #2c: Time send/recv message, queue full, decreasing prio\n", + dec_prio}, + {"\n\tTest #2d: Time send/recv message, queue full, random prio\n", + random_prio}, + {NULL, NULL} +}; + +/** + * Tests to perform (all done with MSG_SIZE messages): + * + * 1) Time to add/remove message with 0 messages on queue + * 1a) with constant prio + * 2) Time to add/remove message when queue close to capacity: + * 2a) with constant prio + * 2b) with increasing prio + * 2c) with decreasing prio + * 2d) with random prio + * 3) Test limits of priorities honored (double check _SC_MQ_PRIO_MAX) + */ +void *perf_test_thread(void *arg) +{ + char buff[MSG_SIZE]; + int prio_out, prio_in; + int i; + clockid_t clock; + pthread_t *t; + struct timespec res, start, middle, end, send_total, recv_total; + unsigned long long nsec; + struct test *cur_test; + + t = &cpu_threads[0]; + printf("\n\tStarted mqueue performance test thread on CPU %d\n", + cpus_to_pin[0]); + mq_prio_max = sysconf(_SC_MQ_PRIO_MAX); + if (mq_prio_max == -1) + shutdown(2, "sysconf(_SC_MQ_PRIO_MAX)", __LINE__); + if (pthread_getcpuclockid(cpu_threads[0], &clock) != 0) + shutdown(2, "pthread_getcpuclockid", __LINE__); + + if (clock_getres(clock, &res)) + shutdown(2, "clock_getres()", __LINE__); + + printf("\t\tMax priorities:\t\t\t%d\n", mq_prio_max); + printf("\t\tClock resolution:\t\t%d nsec%s\n", res.tv_nsec, + res.tv_nsec > 1 ? "s" : ""); + + + + printf("\n\tTest #1: Time send/recv message, queue empty\n"); + printf("\t\t(%d iterations)\n", TEST1_LOOPS); + prio_out = 0; + send_total.tv_sec = 0; + send_total.tv_nsec = 0; + recv_total.tv_sec = 0; + recv_total.tv_nsec = 0; + for (i = 0; i < TEST1_LOOPS; i++) + do_send_recv(); + printf("\t\tSend msg:\t\t\t%d.%ds total time\n", + send_total.tv_sec, send_total.tv_nsec); + nsec = ((unsigned long long)send_total.tv_sec * 1000000000 + + send_total.tv_nsec) / TEST1_LOOPS; + printf("\t\t\t\t\t\t%d nsec/msg\n", nsec); + printf("\t\tRecv msg:\t\t\t%d.%ds total time\n", + recv_total.tv_sec, recv_total.tv_nsec); + nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 + + recv_total.tv_nsec) / TEST1_LOOPS; + printf("\t\t\t\t\t\t%d nsec/msg\n", nsec); + + + for (cur_test = test2; cur_test->desc != NULL; cur_test++) { + printf(cur_test->desc); + printf("\t\t(%d iterations)\n", TEST2_LOOPS); + prio_out = 0; + send_total.tv_sec = 0; + send_total.tv_nsec = 0; + recv_total.tv_sec = 0; + recv_total.tv_nsec = 0; + printf("\t\tFilling queue..."); + fflush(stdout); + clock_gettime(clock, &start); + for (i = 0; i < result.mq_maxmsg - 1; i++) { + do_untimed_send(); + cur_test->func(&prio_out); + } + clock_gettime(clock, &end); + nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) * + 1000000000) + (end.tv_nsec - start.tv_nsec); + printf("done.\t\t%lld.%llds\n", nsec / 1000000000, + nsec % 1000000000); + printf("\t\tTesting..."); + fflush(stdout); + for (i = 0; i < TEST2_LOOPS; i++) { + do_send_recv(); + cur_test->func(&prio_out); + } + printf("done.\n"); + printf("\t\tSend msg:\t\t\t%d.%ds total time\n", + send_total.tv_sec, send_total.tv_nsec); + nsec = ((unsigned long long)send_total.tv_sec * 1000000000 + + send_total.tv_nsec) / TEST2_LOOPS; + printf("\t\t\t\t\t\t%d nsec/msg\n", nsec); + printf("\t\tRecv msg:\t\t\t%d.%ds total time\n", + recv_total.tv_sec, recv_total.tv_nsec); + nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 + + recv_total.tv_nsec) / TEST2_LOOPS; + printf("\t\t\t\t\t\t%d nsec/msg\n", nsec); + printf("\t\tDraining queue..."); + fflush(stdout); + clock_gettime(clock, &start); + drain_queue(); + clock_gettime(clock, &end); + nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) * + 1000000000) + (end.tv_nsec - start.tv_nsec); + printf("done.\t\t%lld.%llds\n", nsec / 1000000000, + nsec % 1000000000); + } + return 0; +} + +void increase_limits(void) +{ + cur_limits.rlim_cur = RLIM_INFINITY; + cur_limits.rlim_max = RLIM_INFINITY; + setr(RLIMIT_MSGQUEUE, &cur_limits); + while (try_set(max_msgs, cur_max_msgs += 10)) + ; + cur_max_msgs = get(max_msgs); + while (try_set(max_msgsize, cur_max_msgsize += 1024)) + ; + cur_max_msgsize = get(max_msgsize); + if (setpriority(PRIO_PROCESS, 0, -20) != 0) + shutdown(2, "setpriority()", __LINE__); + cur_nice = -20; +} + +int main(int argc, char *argv[]) +{ + struct mq_attr attr; + char *option, *next_option; + int i, cpu; + struct sigaction sa; + poptContext popt_context; + char rc; + void *retval; + + main_thread = pthread_self(); + num_cpus_to_pin = 0; + + if (sysconf(_SC_NPROCESSORS_ONLN) == -1) { + perror("sysconf(_SC_NPROCESSORS_ONLN)"); + exit(1); + } + cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN)); + cpu_set = CPU_ALLOC(cpus_online); + if (cpu_set == NULL) { + perror("CPU_ALLOC()"); + exit(1); + } + cpu_set_size = CPU_ALLOC_SIZE(cpus_online); + CPU_ZERO_S(cpu_set_size, cpu_set); + + popt_context = poptGetContext(NULL, argc, (const char **)argv, + options, 0); + + while ((rc = poptGetNextOpt(popt_context)) > 0) { + switch (rc) { + case 'c': + continuous_mode = 1; + option = cpu_option_string; + do { + next_option = strchr(option, ','); + if (next_option) + *next_option = '\0'; + cpu = atoi(option); + if (cpu >= cpus_online) + fprintf(stderr, "CPU %d exceeds " + "cpus online, ignoring.\n", + cpu); + else + cpus_to_pin[num_cpus_to_pin++] = cpu; + if (next_option) + option = ++next_option; + } while (next_option && num_cpus_to_pin < MAX_CPUS); + /* Double check that they didn't give us the same CPU + * more than once */ + for (cpu = 0; cpu < num_cpus_to_pin; cpu++) { + if (CPU_ISSET_S(cpus_to_pin[cpu], cpu_set_size, + cpu_set)) { + fprintf(stderr, "Any given CPU may " + "only be given once.\n"); + exit(1); + } else + CPU_SET_S(cpus_to_pin[cpu], + cpu_set_size, cpu_set); + } + break; + case 'p': + /* + * Although we can create a msg queue with a + * non-absolute path name, unlink will fail. So, + * if the name doesn't start with a /, add one + * when we save it. + */ + option = queue_path; + if (*option != '/') { + queue_path = malloc(strlen(option) + 2); + if (!queue_path) { + perror("malloc()"); + exit(1); + } + queue_path[0] = '/'; + queue_path[1] = 0; + strcat(queue_path, option); + free(option); + } + break; + } + } + + if (continuous_mode && num_cpus_to_pin == 0) { + fprintf(stderr, "Must pass at least one CPU to continuous " + "mode.\n"); + poptPrintUsage(popt_context, stderr, 0); + exit(1); + } else if (!continuous_mode) { + num_cpus_to_pin = 1; + cpus_to_pin[0] = cpus_online - 1; + } + + if (getuid() != 0) { + fprintf(stderr, "Not running as root, but almost all tests " + "require root in order to modify\nsystem settings. " + "Exiting.\n"); + exit(1); + } + + max_msgs = fopen(MAX_MSGS, "r+"); + max_msgsize = fopen(MAX_MSGSIZE, "r+"); + if (!max_msgs) + shutdown(2, "Failed to open msg_max", __LINE__); + if (!max_msgsize) + shutdown(2, "Failed to open msgsize_max", __LINE__); + + /* Load up the current system values for everything we can */ + getr(RLIMIT_MSGQUEUE, &saved_limits); + cur_limits = saved_limits; + saved_max_msgs = cur_max_msgs = get(max_msgs); + saved_max_msgsize = cur_max_msgsize = get(max_msgsize); + errno = 0; + cur_nice = getpriority(PRIO_PROCESS, 0); + if (errno) + shutdown(2, "getpriority()", __LINE__); + + /* Tell the user our initial state */ + printf("\nInitial system state:\n"); + printf("\tUsing queue path:\t\t\t%s\n", queue_path); + printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%d\n", saved_limits.rlim_cur); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%d\n", saved_limits.rlim_max); + printf("\tMaximum Message Size:\t\t\t%d\n", saved_max_msgsize); + printf("\tMaximum Queue Size:\t\t\t%d\n", saved_max_msgs); + printf("\tNice value:\t\t\t\t%d\n", cur_nice); + printf("\n"); + + increase_limits(); + + printf("Adjusted system state for testing:\n"); + if (cur_limits.rlim_cur == RLIM_INFINITY) { + printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t(unlimited)\n"); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t(unlimited)\n"); + } else { + printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%d\n", + cur_limits.rlim_cur); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%d\n", + cur_limits.rlim_max); + } + printf("\tMaximum Message Size:\t\t\t%d\n", cur_max_msgsize); + printf("\tMaximum Queue Size:\t\t\t%d\n", cur_max_msgs); + printf("\tNice value:\t\t\t\t%d\n", cur_nice); + printf("\tContinuous mode:\t\t\t(%s)\n", continuous_mode ? + (continuous_mode_fake ? "fake mode" : "enabled") : + "disabled"); + printf("\tCPUs to pin:\t\t\t\t%d", cpus_to_pin[0]); + for (cpu = 1; cpu < num_cpus_to_pin; cpu++) + printf(",%d", cpus_to_pin[cpu]); + printf("\n"); + + sa.sa_sigaction = sig_action_SIGUSR1; + sigemptyset(&sa.sa_mask); + sigaddset(&sa.sa_mask, SIGHUP); + sigaddset(&sa.sa_mask, SIGINT); + sigaddset(&sa.sa_mask, SIGQUIT); + sigaddset(&sa.sa_mask, SIGTERM); + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGUSR1, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGUSR1)", __LINE__); + sa.sa_sigaction = sig_action; + if (sigaction(SIGHUP, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGHUP)", __LINE__); + if (sigaction(SIGINT, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGINT)", __LINE__); + if (sigaction(SIGQUIT, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGQUIT)", __LINE__); + if (sigaction(SIGTERM, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGTERM)", __LINE__); + + if (!continuous_mode_fake) { + attr.mq_flags = O_NONBLOCK; + attr.mq_maxmsg = cur_max_msgs; + attr.mq_msgsize = MSG_SIZE; + open_queue(&attr); + } + for (i = 0; i < num_cpus_to_pin; i++) { + pthread_attr_t thread_attr; + void *thread_func; + + if (continuous_mode_fake) + thread_func = &fake_cont_thread; + else if (continuous_mode) + thread_func = &cont_thread; + else + thread_func = &perf_test_thread; + + CPU_ZERO_S(cpu_set_size, cpu_set); + CPU_SET_S(cpus_to_pin[i], cpu_set_size, cpu_set); + pthread_attr_init(&thread_attr); + pthread_attr_setaffinity_np(&thread_attr, cpu_set_size, + cpu_set); + if (pthread_create(&cpu_threads[i], &thread_attr, thread_func, + NULL)) + shutdown(1, "pthread_create()", __LINE__); + pthread_attr_destroy(&thread_attr); + } + + if (!continuous_mode) { + pthread_join(cpu_threads[0], &retval); + shutdown((long)retval, "perf_test_thread()", __LINE__); + } else { + while (1) + sleep(1); + } + shutdown(0, "", 0); +} diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile new file mode 100644 index 000000000000..b336b24aa6c0 --- /dev/null +++ b/tools/testing/selftests/vm/Makefile @@ -0,0 +1,14 @@ +# Makefile for vm selftests + +CC = $(CROSS_COMPILE)gcc +CFLAGS = -Wall -Wextra + +all: hugepage-mmap hugepage-shm map_hugetlb +%: %.c + $(CC) $(CFLAGS) -o $@ $^ + +run_tests: all + /bin/sh ./run_vmtests + +clean: + $(RM) hugepage-mmap hugepage-shm map_hugetlb diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/vm/hugepage-mmap.c new file mode 100644 index 000000000000..a10f310d2362 --- /dev/null +++ b/tools/testing/selftests/vm/hugepage-mmap.c @@ -0,0 +1,92 @@ +/* + * hugepage-mmap: + * + * Example of using huge page memory in a user application using the mmap + * system call. Before running this application, make sure that the + * administrator has mounted the hugetlbfs filesystem (on some directory + * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this + * example, the app is requesting memory of size 256MB that is backed by + * huge pages. + * + * For the ia64 architecture, the Linux kernel reserves Region number 4 for + * huge pages. That means that if one requires a fixed address, a huge page + * aligned address starting with 0x800000... will be required. If a fixed + * address is not required, the kernel will select an address in the proper + * range. + * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <fcntl.h> + +#define FILE_NAME "huge/hugepagefile" +#define LENGTH (256UL*1024*1024) +#define PROTECTION (PROT_READ | PROT_WRITE) + +/* Only ia64 requires this */ +#ifdef __ia64__ +#define ADDR (void *)(0x8000000000000000UL) +#define FLAGS (MAP_SHARED | MAP_FIXED) +#else +#define ADDR (void *)(0x0UL) +#define FLAGS (MAP_SHARED) +#endif + +static void check_bytes(char *addr) +{ + printf("First hex is %x\n", *((unsigned int *)addr)); +} + +static void write_bytes(char *addr) +{ + unsigned long i; + + for (i = 0; i < LENGTH; i++) + *(addr + i) = (char)i; +} + +static int read_bytes(char *addr) +{ + unsigned long i; + + check_bytes(addr); + for (i = 0; i < LENGTH; i++) + if (*(addr + i) != (char)i) { + printf("Mismatch at %lu\n", i); + return 1; + } + return 0; +} + +int main(void) +{ + void *addr; + int fd, ret; + + fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); + if (fd < 0) { + perror("Open failed"); + exit(1); + } + + addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + unlink(FILE_NAME); + exit(1); + } + + printf("Returned address is %p\n", addr); + check_bytes(addr); + write_bytes(addr); + ret = read_bytes(addr); + + munmap(addr, LENGTH); + close(fd); + unlink(FILE_NAME); + + return ret; +} diff --git a/tools/testing/selftests/vm/hugepage-shm.c b/tools/testing/selftests/vm/hugepage-shm.c new file mode 100644 index 000000000000..0d0ef4fc0c04 --- /dev/null +++ b/tools/testing/selftests/vm/hugepage-shm.c @@ -0,0 +1,100 @@ +/* + * hugepage-shm: + * + * Example of using huge page memory in a user application using Sys V shared + * memory system calls. In this example the app is requesting 256MB of + * memory that is backed by huge pages. The application uses the flag + * SHM_HUGETLB in the shmget system call to inform the kernel that it is + * requesting huge pages. + * + * For the ia64 architecture, the Linux kernel reserves Region number 4 for + * huge pages. That means that if one requires a fixed address, a huge page + * aligned address starting with 0x800000... will be required. If a fixed + * address is not required, the kernel will select an address in the proper + * range. + * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. + * + * Note: The default shared memory limit is quite low on many kernels, + * you may need to increase it via: + * + * echo 268435456 > /proc/sys/kernel/shmmax + * + * This will increase the maximum size per shared memory segment to 256MB. + * The other limit that you will hit eventually is shmall which is the + * total amount of shared memory in pages. To set it to 16GB on a system + * with a 4kB pagesize do: + * + * echo 4194304 > /proc/sys/kernel/shmall + */ + +#include <stdlib.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/mman.h> + +#ifndef SHM_HUGETLB +#define SHM_HUGETLB 04000 +#endif + +#define LENGTH (256UL*1024*1024) + +#define dprintf(x) printf(x) + +/* Only ia64 requires this */ +#ifdef __ia64__ +#define ADDR (void *)(0x8000000000000000UL) +#define SHMAT_FLAGS (SHM_RND) +#else +#define ADDR (void *)(0x0UL) +#define SHMAT_FLAGS (0) +#endif + +int main(void) +{ + int shmid; + unsigned long i; + char *shmaddr; + + shmid = shmget(2, LENGTH, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W); + if (shmid < 0) { + perror("shmget"); + exit(1); + } + printf("shmid: 0x%x\n", shmid); + + shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS); + if (shmaddr == (char *)-1) { + perror("Shared memory attach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(2); + } + printf("shmaddr: %p\n", shmaddr); + + dprintf("Starting the writes:\n"); + for (i = 0; i < LENGTH; i++) { + shmaddr[i] = (char)(i); + if (!(i % (1024 * 1024))) + dprintf("."); + } + dprintf("\n"); + + dprintf("Starting the Check..."); + for (i = 0; i < LENGTH; i++) + if (shmaddr[i] != (char)i) { + printf("\nIndex %lu mismatched\n", i); + exit(3); + } + dprintf("Done.\n"); + + if (shmdt((const void *)shmaddr) != 0) { + perror("Detach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(4); + } + + shmctl(shmid, IPC_RMID, NULL); + + return 0; +} diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c new file mode 100644 index 000000000000..ac56639dd4a9 --- /dev/null +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -0,0 +1,79 @@ +/* + * Example of using hugepage memory in a user application using the mmap + * system call with MAP_HUGETLB flag. Before running this program make + * sure the administrator has allocated enough default sized huge pages + * to cover the 256 MB allocation. + * + * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. + * That means the addresses starting with 0x800000... will need to be + * specified. Specifying a fixed address is not required on ppc64, i386 + * or x86_64. + */ +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <fcntl.h> + +#define LENGTH (256UL*1024*1024) +#define PROTECTION (PROT_READ | PROT_WRITE) + +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 /* arch specific */ +#endif + +/* Only ia64 requires this */ +#ifdef __ia64__ +#define ADDR (void *)(0x8000000000000000UL) +#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED) +#else +#define ADDR (void *)(0x0UL) +#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) +#endif + +static void check_bytes(char *addr) +{ + printf("First hex is %x\n", *((unsigned int *)addr)); +} + +static void write_bytes(char *addr) +{ + unsigned long i; + + for (i = 0; i < LENGTH; i++) + *(addr + i) = (char)i; +} + +static int read_bytes(char *addr) +{ + unsigned long i; + + check_bytes(addr); + for (i = 0; i < LENGTH; i++) + if (*(addr + i) != (char)i) { + printf("Mismatch at %lu\n", i); + return 1; + } + return 0; +} + +int main(void) +{ + void *addr; + int ret; + + addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + printf("Returned address is %p\n", addr); + check_bytes(addr); + write_bytes(addr); + ret = read_bytes(addr); + + munmap(addr, LENGTH); + + return ret; +} diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests new file mode 100644 index 000000000000..8b40bd5e5cc2 --- /dev/null +++ b/tools/testing/selftests/vm/run_vmtests @@ -0,0 +1,77 @@ +#!/bin/bash +#please run as root + +#we need 256M, below is the size in kB +needmem=262144 +mnt=./huge + +#get pagesize and freepages from /proc/meminfo +while read name size unit; do + if [ "$name" = "HugePages_Free:" ]; then + freepgs=$size + fi + if [ "$name" = "Hugepagesize:" ]; then + pgsize=$size + fi +done < /proc/meminfo + +#set proper nr_hugepages +if [ -n "$freepgs" ] && [ -n "$pgsize" ]; then + nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` + needpgs=`expr $needmem / $pgsize` + if [ $freepgs -lt $needpgs ]; then + lackpgs=$(( $needpgs - $freepgs )) + echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages + if [ $? -ne 0 ]; then + echo "Please run this test as root" + exit 1 + fi + fi +else + echo "no hugetlbfs support in kernel?" + exit 1 +fi + +mkdir $mnt +mount -t hugetlbfs none $mnt + +echo "--------------------" +echo "runing hugepage-mmap" +echo "--------------------" +./hugepage-mmap +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi + +shmmax=`cat /proc/sys/kernel/shmmax` +shmall=`cat /proc/sys/kernel/shmall` +echo 268435456 > /proc/sys/kernel/shmmax +echo 4194304 > /proc/sys/kernel/shmall +echo "--------------------" +echo "runing hugepage-shm" +echo "--------------------" +./hugepage-shm +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi +echo $shmmax > /proc/sys/kernel/shmmax +echo $shmall > /proc/sys/kernel/shmall + +echo "--------------------" +echo "runing map_hugetlb" +echo "--------------------" +./map_hugetlb +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi + +#cleanup +umount $mnt +rm -rf $mnt +echo $nr_hugepgs > /proc/sys/vm/nr_hugepages diff --git a/tools/usb/Makefile b/tools/usb/Makefile index 8b704af14349..396d6c44e9d7 100644 --- a/tools/usb/Makefile +++ b/tools/usb/Makefile @@ -3,7 +3,7 @@ CC = $(CROSS_COMPILE)gcc PTHREAD_LIBS = -lpthread WARNINGS = -Wall -Wextra -CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS) +CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS) -I../include all: testusb ffs-test %: %.c diff --git a/tools/usb/ffs-test.c b/tools/usb/ffs-test.c index b9c798631699..8674b9ec14f6 100644 --- a/tools/usb/ffs-test.c +++ b/tools/usb/ffs-test.c @@ -2,7 +2,7 @@ * ffs-test.c.c -- user mode filesystem api for usb composite function * * Copyright (C) 2010 Samsung Electronics - * Author: Michal Nazarewicz <m.nazarewicz@samsung.com> + * Author: Michal Nazarewicz <mina86@mina86.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,6 +36,7 @@ #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> +#include <tools/le_byteshift.h> #include "../../include/linux/usb/functionfs.h" @@ -47,34 +48,6 @@ #define le32_to_cpu(x) le32toh(x) #define le16_to_cpu(x) le16toh(x) -static inline __u16 get_unaligned_le16(const void *_ptr) -{ - const __u8 *ptr = _ptr; - return ptr[0] | (ptr[1] << 8); -} - -static inline __u32 get_unaligned_le32(const void *_ptr) -{ - const __u8 *ptr = _ptr; - return ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (ptr[3] << 24); -} - -static inline void put_unaligned_le16(__u16 val, void *_ptr) -{ - __u8 *ptr = _ptr; - *ptr++ = val; - *ptr++ = val >> 8; -} - -static inline void put_unaligned_le32(__u32 val, void *_ptr) -{ - __u8 *ptr = _ptr; - *ptr++ = val; - *ptr++ = val >> 8; - *ptr++ = val >> 16; - *ptr++ = val >> 24; -} - /******************** Messages and Errors ***********************************/ @@ -324,7 +297,7 @@ static void *start_thread_helper(void *arg) ret = t->in(t, t->buf, t->buf_size); if (ret > 0) { - ret = t->out(t, t->buf, t->buf_size); + ret = t->out(t, t->buf, ret); name = out_name; op = "write"; } else { diff --git a/tools/usb/testusb.c b/tools/usb/testusb.c index f08e89463842..b0adb2710c02 100644 --- a/tools/usb/testusb.c +++ b/tools/usb/testusb.c @@ -3,7 +3,7 @@ /* * Copyright (c) 2002 by David Brownell * Copyright (c) 2010 by Samsung Electronics - * Author: Michal Nazarewicz <m.nazarewicz@samsung.com> + * Author: Michal Nazarewicz <mina86@mina86.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -358,6 +358,7 @@ static const char *usbfs_dir_find(void) { static char usbfs_path_0[] = "/dev/usb/devices"; static char usbfs_path_1[] = "/proc/bus/usb/devices"; + static char udev_usb_path[] = "/dev/bus/usb"; static char *const usbfs_paths[] = { usbfs_path_0, usbfs_path_1 @@ -376,6 +377,10 @@ static const char *usbfs_dir_find(void) } } while (++it != end); + /* real device-nodes managed by udev */ + if (access(udev_usb_path, F_OK) == 0) + return udev_usb_path; + return NULL; } @@ -420,7 +425,7 @@ int main (int argc, char **argv) /* for easy use when hotplugging */ device = getenv ("DEVICE"); - while ((c = getopt (argc, argv, "D:aA:c:g:hns:t:v:")) != EOF) + while ((c = getopt (argc, argv, "D:aA:c:g:hlns:t:v:")) != EOF) switch (c) { case 'D': /* device, if only one */ device = optarg; @@ -463,10 +468,21 @@ int main (int argc, char **argv) case 'h': default: usage: - fprintf (stderr, "usage: %s [-n] [-D dev | -a | -A usbfs-dir]\n" - "\t[-c iterations] [-t testnum]\n" - "\t[-s packetsize] [-g sglen] [-v vary]\n", - argv [0]); + fprintf (stderr, + "usage: %s [options]\n" + "Options:\n" + "\t-D dev only test specific device\n" + "\t-A usbfs-dir\n" + "\t-a test all recognized devices\n" + "\t-l loop forever(for stress test)\n" + "\t-t testnum only run specified case\n" + "\t-n no test running, show devices to be tested\n" + "Case arguments:\n" + "\t-c iterations default 1000\n" + "\t-s packetsize default 512\n" + "\t-g sglen default 32\n" + "\t-v vary default 512\n", + argv[0]); return 1; } if (optind != argc) diff --git a/tools/virtio/linux/hrtimer.h b/tools/virtio/linux/hrtimer.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/virtio/linux/hrtimer.h diff --git a/tools/virtio/linux/module.h b/tools/virtio/linux/module.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/virtio/linux/module.h diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 669bcdd45805..81847dd08bd0 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -181,26 +181,20 @@ struct virtqueue { #define smp_mb() mb() # define smp_rmb() barrier() # define smp_wmb() barrier() +/* Weak barriers should be used. If not - it's a bug */ +# define rmb() abort() +# define wmb() abort() #else #error Please fill in barrier macros #endif /* Interfaces exported by virtio_ring. */ -int virtqueue_add_buf_gfp(struct virtqueue *vq, - struct scatterlist sg[], - unsigned int out_num, - unsigned int in_num, - void *data, - gfp_t gfp); - -static inline int virtqueue_add_buf(struct virtqueue *vq, - struct scatterlist sg[], - unsigned int out_num, - unsigned int in_num, - void *data) -{ - return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC); -} +int virtqueue_add_buf(struct virtqueue *vq, + struct scatterlist sg[], + unsigned int out_num, + unsigned int in_num, + void *data, + gfp_t gfp); void virtqueue_kick(struct virtqueue *vq); @@ -209,11 +203,13 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); +bool virtqueue_enable_cb_delayed(struct virtqueue *vq); void *virtqueue_detach_unused_buf(struct virtqueue *vq); struct virtqueue *vring_new_virtqueue(unsigned int num, unsigned int vring_align, struct virtio_device *vdev, + bool weak_barriers, void *pages, void (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index 74d3331bdaf9..e626fa553c5a 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -92,7 +92,8 @@ static void vq_info_add(struct vdev_info *dev, int num) assert(r >= 0); memset(info->ring, 0, vring_size(num, 4096)); vring_init(&info->vring, num, info->ring, 4096); - info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring, + info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, + true, info->ring, vq_notify, vq_callback, "test"); assert(info->vq); info->vq->priv = info; @@ -143,7 +144,8 @@ static void wait_for_interrupt(struct vdev_info *dev) } } -static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs) +static void run_test(struct vdev_info *dev, struct vq_info *vq, + bool delayed, int bufs) { struct scatterlist sl; long started = 0, completed = 0; @@ -160,7 +162,8 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs) if (started < bufs) { sg_init_one(&sl, dev->buf, dev->buf_size); r = virtqueue_add_buf(vq->vq, &sl, 1, 0, - dev->buf + started); + dev->buf + started, + GFP_ATOMIC); if (likely(r >= 0)) { ++started; virtqueue_kick(vq->vq); @@ -181,8 +184,12 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs) assert(started <= bufs); if (completed == bufs) break; - if (virtqueue_enable_cb(vq->vq)) { - wait_for_interrupt(dev); + if (delayed) { + if (virtqueue_enable_cb_delayed(vq->vq)) + wait_for_interrupt(dev); + } else { + if (virtqueue_enable_cb(vq->vq)) + wait_for_interrupt(dev); } } test = 0; @@ -214,6 +221,14 @@ const struct option longopts[] = { .val = 'i', }, { + .name = "delayed-interrupt", + .val = 'D', + }, + { + .name = "no-delayed-interrupt", + .val = 'd', + }, + { } }; @@ -222,6 +237,7 @@ static void help() fprintf(stderr, "Usage: virtio_test [--help]" " [--no-indirect]" " [--no-event-idx]" + " [--delayed-interrupt]" "\n"); } @@ -231,6 +247,7 @@ int main(int argc, char **argv) unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | (1ULL << VIRTIO_RING_F_EVENT_IDX); int o; + bool delayed = false; for (;;) { o = getopt_long(argc, argv, optstring, longopts, NULL); @@ -249,6 +266,9 @@ int main(int argc, char **argv) case 'i': features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); break; + case 'D': + delayed = true; + break; default: assert(0); break; @@ -258,6 +278,6 @@ int main(int argc, char **argv) done: vdev_info_init(&dev, features); vq_info_add(&dev, 256); - run_test(&dev, &dev.vqs[0], 0x100000); + run_test(&dev, &dev.vqs[0], delayed, 0x100000); return 0; } diff --git a/tools/vm/Makefile b/tools/vm/Makefile new file mode 100644 index 000000000000..8e30e5c40f8a --- /dev/null +++ b/tools/vm/Makefile @@ -0,0 +1,11 @@ +# Makefile for vm tools + +CC = $(CROSS_COMPILE)gcc +CFLAGS = -Wall -Wextra + +all: page-types slabinfo +%: %.c + $(CC) $(CFLAGS) -o $@ $^ + +clean: + $(RM) page-types slabinfo diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c new file mode 100644 index 000000000000..f576971f6556 --- /dev/null +++ b/tools/vm/page-types.c @@ -0,0 +1,1076 @@ +/* + * page-types: Tool for querying page flags + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; version 2. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should find a copy of v2 of the GNU General Public License somewhere on + * your Linux system; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2009 Intel corporation + * + * Authors: Wu Fengguang <fengguang.wu@intel.com> + */ + +#define _LARGEFILE64_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdint.h> +#include <stdarg.h> +#include <string.h> +#include <getopt.h> +#include <limits.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/fcntl.h> +#include <sys/mount.h> +#include <sys/statfs.h> +#include "../../include/linux/magic.h" +#include "../../include/linux/kernel-page-flags.h" + + +#ifndef MAX_PATH +# define MAX_PATH 256 +#endif + +#ifndef STR +# define _STR(x) #x +# define STR(x) _STR(x) +#endif + +/* + * pagemap kernel ABI bits + */ + +#define PM_ENTRY_BYTES sizeof(uint64_t) +#define PM_STATUS_BITS 3 +#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) +#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) +#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) +#define PM_PSHIFT_BITS 6 +#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) +#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) +#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) +#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) +#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) + +#define PM_PRESENT PM_STATUS(4LL) +#define PM_SWAP PM_STATUS(2LL) + + +/* + * kernel page flags + */ + +#define KPF_BYTES 8 +#define PROC_KPAGEFLAGS "/proc/kpageflags" + +/* [32-] kernel hacking assistances */ +#define KPF_RESERVED 32 +#define KPF_MLOCKED 33 +#define KPF_MAPPEDTODISK 34 +#define KPF_PRIVATE 35 +#define KPF_PRIVATE_2 36 +#define KPF_OWNER_PRIVATE 37 +#define KPF_ARCH 38 +#define KPF_UNCACHED 39 + +/* [48-] take some arbitrary free slots for expanding overloaded flags + * not part of kernel API + */ +#define KPF_READAHEAD 48 +#define KPF_SLOB_FREE 49 +#define KPF_SLUB_FROZEN 50 +#define KPF_SLUB_DEBUG 51 + +#define KPF_ALL_BITS ((uint64_t)~0ULL) +#define KPF_HACKERS_BITS (0xffffULL << 32) +#define KPF_OVERLOADED_BITS (0xffffULL << 48) +#define BIT(name) (1ULL << KPF_##name) +#define BITS_COMPOUND (BIT(COMPOUND_HEAD) | BIT(COMPOUND_TAIL)) + +static const char * const page_flag_names[] = { + [KPF_LOCKED] = "L:locked", + [KPF_ERROR] = "E:error", + [KPF_REFERENCED] = "R:referenced", + [KPF_UPTODATE] = "U:uptodate", + [KPF_DIRTY] = "D:dirty", + [KPF_LRU] = "l:lru", + [KPF_ACTIVE] = "A:active", + [KPF_SLAB] = "S:slab", + [KPF_WRITEBACK] = "W:writeback", + [KPF_RECLAIM] = "I:reclaim", + [KPF_BUDDY] = "B:buddy", + + [KPF_MMAP] = "M:mmap", + [KPF_ANON] = "a:anonymous", + [KPF_SWAPCACHE] = "s:swapcache", + [KPF_SWAPBACKED] = "b:swapbacked", + [KPF_COMPOUND_HEAD] = "H:compound_head", + [KPF_COMPOUND_TAIL] = "T:compound_tail", + [KPF_HUGE] = "G:huge", + [KPF_UNEVICTABLE] = "u:unevictable", + [KPF_HWPOISON] = "X:hwpoison", + [KPF_NOPAGE] = "n:nopage", + [KPF_KSM] = "x:ksm", + [KPF_THP] = "t:thp", + + [KPF_RESERVED] = "r:reserved", + [KPF_MLOCKED] = "m:mlocked", + [KPF_MAPPEDTODISK] = "d:mappedtodisk", + [KPF_PRIVATE] = "P:private", + [KPF_PRIVATE_2] = "p:private_2", + [KPF_OWNER_PRIVATE] = "O:owner_private", + [KPF_ARCH] = "h:arch", + [KPF_UNCACHED] = "c:uncached", + + [KPF_READAHEAD] = "I:readahead", + [KPF_SLOB_FREE] = "P:slob_free", + [KPF_SLUB_FROZEN] = "A:slub_frozen", + [KPF_SLUB_DEBUG] = "E:slub_debug", +}; + + +static const char * const debugfs_known_mountpoints[] = { + "/sys/kernel/debug", + "/debug", + 0, +}; + +/* + * data structures + */ + +static int opt_raw; /* for kernel developers */ +static int opt_list; /* list pages (in ranges) */ +static int opt_no_summary; /* don't show summary */ +static pid_t opt_pid; /* process to walk */ + +#define MAX_ADDR_RANGES 1024 +static int nr_addr_ranges; +static unsigned long opt_offset[MAX_ADDR_RANGES]; +static unsigned long opt_size[MAX_ADDR_RANGES]; + +#define MAX_VMAS 10240 +static int nr_vmas; +static unsigned long pg_start[MAX_VMAS]; +static unsigned long pg_end[MAX_VMAS]; + +#define MAX_BIT_FILTERS 64 +static int nr_bit_filters; +static uint64_t opt_mask[MAX_BIT_FILTERS]; +static uint64_t opt_bits[MAX_BIT_FILTERS]; + +static int page_size; + +static int pagemap_fd; +static int kpageflags_fd; + +static int opt_hwpoison; +static int opt_unpoison; + +static char hwpoison_debug_fs[MAX_PATH+1]; +static int hwpoison_inject_fd; +static int hwpoison_forget_fd; + +#define HASH_SHIFT 13 +#define HASH_SIZE (1 << HASH_SHIFT) +#define HASH_MASK (HASH_SIZE - 1) +#define HASH_KEY(flags) (flags & HASH_MASK) + +static unsigned long total_pages; +static unsigned long nr_pages[HASH_SIZE]; +static uint64_t page_flags[HASH_SIZE]; + + +/* + * helper functions + */ + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define min_t(type, x, y) ({ \ + type __min1 = (x); \ + type __min2 = (y); \ + __min1 < __min2 ? __min1 : __min2; }) + +#define max_t(type, x, y) ({ \ + type __max1 = (x); \ + type __max2 = (y); \ + __max1 > __max2 ? __max1 : __max2; }) + +static unsigned long pages2mb(unsigned long pages) +{ + return (pages * page_size) >> 20; +} + +static void fatal(const char *x, ...) +{ + va_list ap; + + va_start(ap, x); + vfprintf(stderr, x, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static int checked_open(const char *pathname, int flags) +{ + int fd = open(pathname, flags); + + if (fd < 0) { + perror(pathname); + exit(EXIT_FAILURE); + } + + return fd; +} + +/* + * pagemap/kpageflags routines + */ + +static unsigned long do_u64_read(int fd, char *name, + uint64_t *buf, + unsigned long index, + unsigned long count) +{ + long bytes; + + if (index > ULONG_MAX / 8) + fatal("index overflow: %lu\n", index); + + if (lseek(fd, index * 8, SEEK_SET) < 0) { + perror(name); + exit(EXIT_FAILURE); + } + + bytes = read(fd, buf, count * 8); + if (bytes < 0) { + perror(name); + exit(EXIT_FAILURE); + } + if (bytes % 8) + fatal("partial read: %lu bytes\n", bytes); + + return bytes / 8; +} + +static unsigned long kpageflags_read(uint64_t *buf, + unsigned long index, + unsigned long pages) +{ + return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages); +} + +static unsigned long pagemap_read(uint64_t *buf, + unsigned long index, + unsigned long pages) +{ + return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages); +} + +static unsigned long pagemap_pfn(uint64_t val) +{ + unsigned long pfn; + + if (val & PM_PRESENT) + pfn = PM_PFRAME(val); + else + pfn = 0; + + return pfn; +} + + +/* + * page flag names + */ + +static char *page_flag_name(uint64_t flags) +{ + static char buf[65]; + int present; + size_t i, j; + + for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) { + present = (flags >> i) & 1; + if (!page_flag_names[i]) { + if (present) + fatal("unknown flag bit %d\n", i); + continue; + } + buf[j++] = present ? page_flag_names[i][0] : '_'; + } + + return buf; +} + +static char *page_flag_longname(uint64_t flags) +{ + static char buf[1024]; + size_t i, n; + + for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) { + if (!page_flag_names[i]) + continue; + if ((flags >> i) & 1) + n += snprintf(buf + n, sizeof(buf) - n, "%s,", + page_flag_names[i] + 2); + } + if (n) + n--; + buf[n] = '\0'; + + return buf; +} + + +/* + * page list and summary + */ + +static void show_page_range(unsigned long voffset, + unsigned long offset, uint64_t flags) +{ + static uint64_t flags0; + static unsigned long voff; + static unsigned long index; + static unsigned long count; + + if (flags == flags0 && offset == index + count && + (!opt_pid || voffset == voff + count)) { + count++; + return; + } + + if (count) { + if (opt_pid) + printf("%lx\t", voff); + printf("%lx\t%lx\t%s\n", + index, count, page_flag_name(flags0)); + } + + flags0 = flags; + index = offset; + voff = voffset; + count = 1; +} + +static void show_page(unsigned long voffset, + unsigned long offset, uint64_t flags) +{ + if (opt_pid) + printf("%lx\t", voffset); + printf("%lx\t%s\n", offset, page_flag_name(flags)); +} + +static void show_summary(void) +{ + size_t i; + + printf(" flags\tpage-count MB" + " symbolic-flags\t\t\tlong-symbolic-flags\n"); + + for (i = 0; i < ARRAY_SIZE(nr_pages); i++) { + if (nr_pages[i]) + printf("0x%016llx\t%10lu %8lu %s\t%s\n", + (unsigned long long)page_flags[i], + nr_pages[i], + pages2mb(nr_pages[i]), + page_flag_name(page_flags[i]), + page_flag_longname(page_flags[i])); + } + + printf(" total\t%10lu %8lu\n", + total_pages, pages2mb(total_pages)); +} + + +/* + * page flag filters + */ + +static int bit_mask_ok(uint64_t flags) +{ + int i; + + for (i = 0; i < nr_bit_filters; i++) { + if (opt_bits[i] == KPF_ALL_BITS) { + if ((flags & opt_mask[i]) == 0) + return 0; + } else { + if ((flags & opt_mask[i]) != opt_bits[i]) + return 0; + } + } + + return 1; +} + +static uint64_t expand_overloaded_flags(uint64_t flags) +{ + /* SLOB/SLUB overload several page flags */ + if (flags & BIT(SLAB)) { + if (flags & BIT(PRIVATE)) + flags ^= BIT(PRIVATE) | BIT(SLOB_FREE); + if (flags & BIT(ACTIVE)) + flags ^= BIT(ACTIVE) | BIT(SLUB_FROZEN); + if (flags & BIT(ERROR)) + flags ^= BIT(ERROR) | BIT(SLUB_DEBUG); + } + + /* PG_reclaim is overloaded as PG_readahead in the read path */ + if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM)) + flags ^= BIT(RECLAIM) | BIT(READAHEAD); + + return flags; +} + +static uint64_t well_known_flags(uint64_t flags) +{ + /* hide flags intended only for kernel hacker */ + flags &= ~KPF_HACKERS_BITS; + + /* hide non-hugeTLB compound pages */ + if ((flags & BITS_COMPOUND) && !(flags & BIT(HUGE))) + flags &= ~BITS_COMPOUND; + + return flags; +} + +static uint64_t kpageflags_flags(uint64_t flags) +{ + flags = expand_overloaded_flags(flags); + + if (!opt_raw) + flags = well_known_flags(flags); + + return flags; +} + +/* verify that a mountpoint is actually a debugfs instance */ +static int debugfs_valid_mountpoint(const char *debugfs) +{ + struct statfs st_fs; + + if (statfs(debugfs, &st_fs) < 0) + return -ENOENT; + else if (st_fs.f_type != (long) DEBUGFS_MAGIC) + return -ENOENT; + + return 0; +} + +/* find the path to the mounted debugfs */ +static const char *debugfs_find_mountpoint(void) +{ + const char *const *ptr; + char type[100]; + FILE *fp; + + ptr = debugfs_known_mountpoints; + while (*ptr) { + if (debugfs_valid_mountpoint(*ptr) == 0) { + strcpy(hwpoison_debug_fs, *ptr); + return hwpoison_debug_fs; + } + ptr++; + } + + /* give up and parse /proc/mounts */ + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + perror("Can't open /proc/mounts for read"); + + while (fscanf(fp, "%*s %" + STR(MAX_PATH) + "s %99s %*s %*d %*d\n", + hwpoison_debug_fs, type) == 2) { + if (strcmp(type, "debugfs") == 0) + break; + } + fclose(fp); + + if (strcmp(type, "debugfs") != 0) + return NULL; + + return hwpoison_debug_fs; +} + +/* mount the debugfs somewhere if it's not mounted */ + +static void debugfs_mount(void) +{ + const char *const *ptr; + + /* see if it's already mounted */ + if (debugfs_find_mountpoint()) + return; + + ptr = debugfs_known_mountpoints; + while (*ptr) { + if (mount(NULL, *ptr, "debugfs", 0, NULL) == 0) { + /* save the mountpoint */ + strcpy(hwpoison_debug_fs, *ptr); + break; + } + ptr++; + } + + if (*ptr == NULL) { + perror("mount debugfs"); + exit(EXIT_FAILURE); + } +} + +/* + * page actions + */ + +static void prepare_hwpoison_fd(void) +{ + char buf[MAX_PATH + 1]; + + debugfs_mount(); + + if (opt_hwpoison && !hwpoison_inject_fd) { + snprintf(buf, MAX_PATH, "%s/hwpoison/corrupt-pfn", + hwpoison_debug_fs); + hwpoison_inject_fd = checked_open(buf, O_WRONLY); + } + + if (opt_unpoison && !hwpoison_forget_fd) { + snprintf(buf, MAX_PATH, "%s/hwpoison/unpoison-pfn", + hwpoison_debug_fs); + hwpoison_forget_fd = checked_open(buf, O_WRONLY); + } +} + +static int hwpoison_page(unsigned long offset) +{ + char buf[100]; + int len; + + len = sprintf(buf, "0x%lx\n", offset); + len = write(hwpoison_inject_fd, buf, len); + if (len < 0) { + perror("hwpoison inject"); + return len; + } + return 0; +} + +static int unpoison_page(unsigned long offset) +{ + char buf[100]; + int len; + + len = sprintf(buf, "0x%lx\n", offset); + len = write(hwpoison_forget_fd, buf, len); + if (len < 0) { + perror("hwpoison forget"); + return len; + } + return 0; +} + +/* + * page frame walker + */ + +static size_t hash_slot(uint64_t flags) +{ + size_t k = HASH_KEY(flags); + size_t i; + + /* Explicitly reserve slot 0 for flags 0: the following logic + * cannot distinguish an unoccupied slot from slot (flags==0). + */ + if (flags == 0) + return 0; + + /* search through the remaining (HASH_SIZE-1) slots */ + for (i = 1; i < ARRAY_SIZE(page_flags); i++, k++) { + if (!k || k >= ARRAY_SIZE(page_flags)) + k = 1; + if (page_flags[k] == 0) { + page_flags[k] = flags; + return k; + } + if (page_flags[k] == flags) + return k; + } + + fatal("hash table full: bump up HASH_SHIFT?\n"); + exit(EXIT_FAILURE); +} + +static void add_page(unsigned long voffset, + unsigned long offset, uint64_t flags) +{ + flags = kpageflags_flags(flags); + + if (!bit_mask_ok(flags)) + return; + + if (opt_hwpoison) + hwpoison_page(offset); + if (opt_unpoison) + unpoison_page(offset); + + if (opt_list == 1) + show_page_range(voffset, offset, flags); + else if (opt_list == 2) + show_page(voffset, offset, flags); + + nr_pages[hash_slot(flags)]++; + total_pages++; +} + +#define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */ +static void walk_pfn(unsigned long voffset, + unsigned long index, + unsigned long count) +{ + uint64_t buf[KPAGEFLAGS_BATCH]; + unsigned long batch; + unsigned long pages; + unsigned long i; + + while (count) { + batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH); + pages = kpageflags_read(buf, index, batch); + if (pages == 0) + break; + + for (i = 0; i < pages; i++) + add_page(voffset + i, index + i, buf[i]); + + index += pages; + count -= pages; + } +} + +#define PAGEMAP_BATCH (64 << 10) +static void walk_vma(unsigned long index, unsigned long count) +{ + uint64_t buf[PAGEMAP_BATCH]; + unsigned long batch; + unsigned long pages; + unsigned long pfn; + unsigned long i; + + while (count) { + batch = min_t(unsigned long, count, PAGEMAP_BATCH); + pages = pagemap_read(buf, index, batch); + if (pages == 0) + break; + + for (i = 0; i < pages; i++) { + pfn = pagemap_pfn(buf[i]); + if (pfn) + walk_pfn(index + i, pfn, 1); + } + + index += pages; + count -= pages; + } +} + +static void walk_task(unsigned long index, unsigned long count) +{ + const unsigned long end = index + count; + unsigned long start; + int i = 0; + + while (index < end) { + + while (pg_end[i] <= index) + if (++i >= nr_vmas) + return; + if (pg_start[i] >= end) + return; + + start = max_t(unsigned long, pg_start[i], index); + index = min_t(unsigned long, pg_end[i], end); + + assert(start < index); + walk_vma(start, index - start); + } +} + +static void add_addr_range(unsigned long offset, unsigned long size) +{ + if (nr_addr_ranges >= MAX_ADDR_RANGES) + fatal("too many addr ranges\n"); + + opt_offset[nr_addr_ranges] = offset; + opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset); + nr_addr_ranges++; +} + +static void walk_addr_ranges(void) +{ + int i; + + kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY); + + if (!nr_addr_ranges) + add_addr_range(0, ULONG_MAX); + + for (i = 0; i < nr_addr_ranges; i++) + if (!opt_pid) + walk_pfn(0, opt_offset[i], opt_size[i]); + else + walk_task(opt_offset[i], opt_size[i]); + + close(kpageflags_fd); +} + + +/* + * user interface + */ + +static const char *page_flag_type(uint64_t flag) +{ + if (flag & KPF_HACKERS_BITS) + return "(r)"; + if (flag & KPF_OVERLOADED_BITS) + return "(o)"; + return " "; +} + +static void usage(void) +{ + size_t i, j; + + printf( +"page-types [options]\n" +" -r|--raw Raw mode, for kernel developers\n" +" -d|--describe flags Describe flags\n" +" -a|--addr addr-spec Walk a range of pages\n" +" -b|--bits bits-spec Walk pages with specified bits\n" +" -p|--pid pid Walk process address space\n" +#if 0 /* planned features */ +" -f|--file filename Walk file address space\n" +#endif +" -l|--list Show page details in ranges\n" +" -L|--list-each Show page details one by one\n" +" -N|--no-summary Don't show summary info\n" +" -X|--hwpoison hwpoison pages\n" +" -x|--unpoison unpoison pages\n" +" -h|--help Show this usage message\n" +"flags:\n" +" 0x10 bitfield format, e.g.\n" +" anon bit-name, e.g.\n" +" 0x10,anon comma-separated list, e.g.\n" +"addr-spec:\n" +" N one page at offset N (unit: pages)\n" +" N+M pages range from N to N+M-1\n" +" N,M pages range from N to M-1\n" +" N, pages range from N to end\n" +" ,M pages range from 0 to M-1\n" +"bits-spec:\n" +" bit1,bit2 (flags & (bit1|bit2)) != 0\n" +" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" +" bit1,~bit2 (flags & (bit1|bit2)) == bit1\n" +" =bit1,bit2 flags == (bit1|bit2)\n" +"bit-names:\n" + ); + + for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) { + if (!page_flag_names[i]) + continue; + printf("%16s%s", page_flag_names[i] + 2, + page_flag_type(1ULL << i)); + if (++j > 3) { + j = 0; + putchar('\n'); + } + } + printf("\n " + "(r) raw mode bits (o) overloaded bits\n"); +} + +static unsigned long long parse_number(const char *str) +{ + unsigned long long n; + + n = strtoll(str, NULL, 0); + + if (n == 0 && str[0] != '0') + fatal("invalid name or number: %s\n", str); + + return n; +} + +static void parse_pid(const char *str) +{ + FILE *file; + char buf[5000]; + + opt_pid = parse_number(str); + + sprintf(buf, "/proc/%d/pagemap", opt_pid); + pagemap_fd = checked_open(buf, O_RDONLY); + + sprintf(buf, "/proc/%d/maps", opt_pid); + file = fopen(buf, "r"); + if (!file) { + perror(buf); + exit(EXIT_FAILURE); + } + + while (fgets(buf, sizeof(buf), file) != NULL) { + unsigned long vm_start; + unsigned long vm_end; + unsigned long long pgoff; + int major, minor; + char r, w, x, s; + unsigned long ino; + int n; + + n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu", + &vm_start, + &vm_end, + &r, &w, &x, &s, + &pgoff, + &major, &minor, + &ino); + if (n < 10) { + fprintf(stderr, "unexpected line: %s\n", buf); + continue; + } + pg_start[nr_vmas] = vm_start / page_size; + pg_end[nr_vmas] = vm_end / page_size; + if (++nr_vmas >= MAX_VMAS) { + fprintf(stderr, "too many VMAs\n"); + break; + } + } + fclose(file); +} + +static void parse_file(const char *name) +{ +} + +static void parse_addr_range(const char *optarg) +{ + unsigned long offset; + unsigned long size; + char *p; + + p = strchr(optarg, ','); + if (!p) + p = strchr(optarg, '+'); + + if (p == optarg) { + offset = 0; + size = parse_number(p + 1); + } else if (p) { + offset = parse_number(optarg); + if (p[1] == '\0') + size = ULONG_MAX; + else { + size = parse_number(p + 1); + if (*p == ',') { + if (size < offset) + fatal("invalid range: %lu,%lu\n", + offset, size); + size -= offset; + } + } + } else { + offset = parse_number(optarg); + size = 1; + } + + add_addr_range(offset, size); +} + +static void add_bits_filter(uint64_t mask, uint64_t bits) +{ + if (nr_bit_filters >= MAX_BIT_FILTERS) + fatal("too much bit filters\n"); + + opt_mask[nr_bit_filters] = mask; + opt_bits[nr_bit_filters] = bits; + nr_bit_filters++; +} + +static uint64_t parse_flag_name(const char *str, int len) +{ + size_t i; + + if (!*str || !len) + return 0; + + if (len <= 8 && !strncmp(str, "compound", len)) + return BITS_COMPOUND; + + for (i = 0; i < ARRAY_SIZE(page_flag_names); i++) { + if (!page_flag_names[i]) + continue; + if (!strncmp(str, page_flag_names[i] + 2, len)) + return 1ULL << i; + } + + return parse_number(str); +} + +static uint64_t parse_flag_names(const char *str, int all) +{ + const char *p = str; + uint64_t flags = 0; + + while (1) { + if (*p == ',' || *p == '=' || *p == '\0') { + if ((*str != '~') || (*str == '~' && all && *++str)) + flags |= parse_flag_name(str, p - str); + if (*p != ',') + break; + str = p + 1; + } + p++; + } + + return flags; +} + +static void parse_bits_mask(const char *optarg) +{ + uint64_t mask; + uint64_t bits; + const char *p; + + p = strchr(optarg, '='); + if (p == optarg) { + mask = KPF_ALL_BITS; + bits = parse_flag_names(p + 1, 0); + } else if (p) { + mask = parse_flag_names(optarg, 0); + bits = parse_flag_names(p + 1, 0); + } else if (strchr(optarg, '~')) { + mask = parse_flag_names(optarg, 1); + bits = parse_flag_names(optarg, 0); + } else { + mask = parse_flag_names(optarg, 0); + bits = KPF_ALL_BITS; + } + + add_bits_filter(mask, bits); +} + +static void describe_flags(const char *optarg) +{ + uint64_t flags = parse_flag_names(optarg, 0); + + printf("0x%016llx\t%s\t%s\n", + (unsigned long long)flags, + page_flag_name(flags), + page_flag_longname(flags)); +} + +static const struct option opts[] = { + { "raw" , 0, NULL, 'r' }, + { "pid" , 1, NULL, 'p' }, + { "file" , 1, NULL, 'f' }, + { "addr" , 1, NULL, 'a' }, + { "bits" , 1, NULL, 'b' }, + { "describe" , 1, NULL, 'd' }, + { "list" , 0, NULL, 'l' }, + { "list-each" , 0, NULL, 'L' }, + { "no-summary", 0, NULL, 'N' }, + { "hwpoison" , 0, NULL, 'X' }, + { "unpoison" , 0, NULL, 'x' }, + { "help" , 0, NULL, 'h' }, + { NULL , 0, NULL, 0 } +}; + +int main(int argc, char *argv[]) +{ + int c; + + page_size = getpagesize(); + + while ((c = getopt_long(argc, argv, + "rp:f:a:b:d:lLNXxh", opts, NULL)) != -1) { + switch (c) { + case 'r': + opt_raw = 1; + break; + case 'p': + parse_pid(optarg); + break; + case 'f': + parse_file(optarg); + break; + case 'a': + parse_addr_range(optarg); + break; + case 'b': + parse_bits_mask(optarg); + break; + case 'd': + describe_flags(optarg); + exit(0); + case 'l': + opt_list = 1; + break; + case 'L': + opt_list = 2; + break; + case 'N': + opt_no_summary = 1; + break; + case 'X': + opt_hwpoison = 1; + prepare_hwpoison_fd(); + break; + case 'x': + opt_unpoison = 1; + prepare_hwpoison_fd(); + break; + case 'h': + usage(); + exit(0); + default: + usage(); + exit(1); + } + } + + if (opt_list && opt_pid) + printf("voffset\t"); + if (opt_list == 1) + printf("offset\tlen\tflags\n"); + if (opt_list == 2) + printf("offset\tflags\n"); + + walk_addr_ranges(); + + if (opt_list == 1) + show_page_range(0, 0, 0); /* drain the buffer */ + + if (opt_no_summary) + return 0; + + if (opt_list) + printf("\n\n"); + + show_summary(); + + return 0; +} diff --git a/tools/slub/slabinfo.c b/tools/vm/slabinfo.c index 868cc93f7ac2..808d5a9d5dcf 100644 --- a/tools/slub/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -42,6 +42,7 @@ struct slabinfo { unsigned long deactivate_remote_frees, order_fallback; unsigned long cmpxchg_double_cpu_fail, cmpxchg_double_fail; unsigned long alloc_node_mismatch, deactivate_bypass; + unsigned long cpu_partial_alloc, cpu_partial_free; int numa[MAX_NODES]; int numa_partial[MAX_NODES]; } slabinfo[MAX_SLABS]; @@ -436,29 +437,34 @@ static void slab_stats(struct slabinfo *s) printf("Fastpath %8lu %8lu %3lu %3lu\n", s->alloc_fastpath, s->free_fastpath, s->alloc_fastpath * 100 / total_alloc, - s->free_fastpath * 100 / total_free); + total_free ? s->free_fastpath * 100 / total_free : 0); printf("Slowpath %8lu %8lu %3lu %3lu\n", total_alloc - s->alloc_fastpath, s->free_slowpath, (total_alloc - s->alloc_fastpath) * 100 / total_alloc, - s->free_slowpath * 100 / total_free); + total_free ? s->free_slowpath * 100 / total_free : 0); printf("Page Alloc %8lu %8lu %3lu %3lu\n", s->alloc_slab, s->free_slab, s->alloc_slab * 100 / total_alloc, - s->free_slab * 100 / total_free); + total_free ? s->free_slab * 100 / total_free : 0); printf("Add partial %8lu %8lu %3lu %3lu\n", s->deactivate_to_head + s->deactivate_to_tail, s->free_add_partial, (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc, - s->free_add_partial * 100 / total_free); + total_free ? s->free_add_partial * 100 / total_free : 0); printf("Remove partial %8lu %8lu %3lu %3lu\n", s->alloc_from_partial, s->free_remove_partial, s->alloc_from_partial * 100 / total_alloc, - s->free_remove_partial * 100 / total_free); + total_free ? s->free_remove_partial * 100 / total_free : 0); + + printf("Cpu partial list %8lu %8lu %3lu %3lu\n", + s->cpu_partial_alloc, s->cpu_partial_free, + s->cpu_partial_alloc * 100 / total_alloc, + total_free ? s->cpu_partial_free * 100 / total_free : 0); printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n", s->deactivate_remote_frees, s->free_frozen, s->deactivate_remote_frees * 100 / total_alloc, - s->free_frozen * 100 / total_free); + total_free ? s->free_frozen * 100 / total_free : 0); printf("Total %8lu %8lu\n\n", total_alloc, total_free); @@ -1145,7 +1151,7 @@ static void read_slab_dir(void) switch (de->d_type) { case DT_LNK: alias->name = strdup(de->d_name); - count = readlink(de->d_name, buffer, sizeof(buffer)); + count = readlink(de->d_name, buffer, sizeof(buffer)-1); if (count < 0) fatal("Cannot read symlink %s\n", de->d_name); @@ -1209,6 +1215,8 @@ static void read_slab_dir(void) slab->order_fallback = get_obj("order_fallback"); slab->cmpxchg_double_cpu_fail = get_obj("cmpxchg_double_cpu_fail"); slab->cmpxchg_double_fail = get_obj("cmpxchg_double_fail"); + slab->cpu_partial_alloc = get_obj("cpu_partial_alloc"); + slab->cpu_partial_free = get_obj("cpu_partial_free"); slab->alloc_node_mismatch = get_obj("alloc_node_mismatch"); slab->deactivate_bypass = get_obj("deactivate_bypass"); chdir(".."); |